summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuc Verhaegen <libv@skynet.be>2010-04-02 12:46:10 +0200
committerLuc Verhaegen <libv@skynet.be>2010-04-02 12:46:10 +0200
commite8bf66970f1ee08b214e5b88f8f4b62fc1a73509 (patch)
tree84830fb169ba2c393ca250d75341bee9eeb5ae6f
parent54a8e9cc3988d908b5b846a752679127cacefd3b (diff)
Import i915 and i965 dri drivers from mesa 7.8.0.HEAD7.8.17.8.07.8master
-rw-r--r--i915/intel_tris.c4
-rw-r--r--i965/brw_eu_emit.c18
-rw-r--r--i965/brw_program.c14
-rw-r--r--i965/brw_sf.c21
-rw-r--r--i965/brw_sf.h6
-rw-r--r--i965/brw_sf_emit.c135
-rw-r--r--i965/brw_vs_emit.c6
-rw-r--r--i965/brw_wm.h6
-rw-r--r--i965/brw_wm_emit.c12
-rw-r--r--i965/brw_wm_glsl.c147
-rw-r--r--shared/intel_blit.c2
-rw-r--r--shared/intel_buffers.c6
-rw-r--r--shared/intel_context.c4
-rw-r--r--shared/intel_fbo.c9
-rw-r--r--shared/intel_mipmap_tree.c18
-rw-r--r--shared/intel_pixel_copy.c32
-rw-r--r--shared/intel_reg.h4
-rw-r--r--shared/intel_regions.c29
-rw-r--r--shared/intel_regions.h4
-rw-r--r--shared/intel_span.c16
20 files changed, 228 insertions, 265 deletions
diff --git a/i915/intel_tris.c b/i915/intel_tris.c
index fb191fe..81c4ade 100644
--- a/i915/intel_tris.c
+++ b/i915/intel_tris.c
@@ -251,7 +251,7 @@ void intel_flush_prim(struct intel_context *intel)
BEGIN_BATCH(5);
OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
I1_LOAD_S(0) | I1_LOAD_S(1) | 1);
- assert((offset & !S0_VB_OFFSET_MASK) == 0);
+ assert((offset & ~S0_VB_OFFSET_MASK) == 0);
OUT_RELOC(vb_bo, I915_GEM_DOMAIN_VERTEX, 0, offset);
OUT_BATCH((intel->vertex_size << S1_VERTEX_WIDTH_SHIFT) |
(intel->vertex_size << S1_VERTEX_PITCH_SHIFT));
@@ -270,7 +270,7 @@ void intel_flush_prim(struct intel_context *intel)
OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
I1_LOAD_S(0) | I1_LOAD_S(2) | 1);
/* S0 */
- assert((offset & !S0_VB_OFFSET_MASK_830) == 0);
+ assert((offset & ~S0_VB_OFFSET_MASK_830) == 0);
OUT_RELOC(vb_bo, I915_GEM_DOMAIN_VERTEX, 0,
offset | (intel->vertex_size << S0_VB_PITCH_SHIFT_830) |
S0_VB_ENABLE_830);
diff --git a/i965/brw_eu_emit.c b/i965/brw_eu_emit.c
index f69d529..82f2fda 100644
--- a/i965/brw_eu_emit.c
+++ b/i965/brw_eu_emit.c
@@ -1290,7 +1290,7 @@ void brw_SAMPLE(struct brw_compile *p,
GLuint simd_mode)
{
GLboolean need_stall = 0;
-
+
if (writemask == 0) {
/*printf("%s: zero writemask??\n", __FUNCTION__); */
return;
@@ -1327,8 +1327,14 @@ void brw_SAMPLE(struct brw_compile *p,
/* printf("need stall %x %x\n", newmask , writemask); */
}
else {
+ GLboolean dispatch_16 = GL_FALSE;
+
struct brw_reg m1 = brw_message_reg(msg_reg_nr);
-
+
+ guess_execution_size(p->current, dest);
+ if (p->current->header.execution_size == BRW_EXECUTE_16)
+ dispatch_16 = GL_TRUE;
+
newmask = ~newmask & WRITEMASK_XYZW;
brw_push_insn_state(p);
@@ -1343,7 +1349,13 @@ void brw_SAMPLE(struct brw_compile *p,
src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
dest = offset(dest, dst_offset);
- response_length = len * 2;
+
+ /* For 16-wide dispatch, masked channels are skipped in the
+ * response. For 8-wide, masked channels still take up slots,
+ * and are just not written to.
+ */
+ if (dispatch_16)
+ response_length = len * 2;
}
}
diff --git a/i965/brw_program.c b/i965/brw_program.c
index c78f7b3..1fd957b 100644
--- a/i965/brw_program.c
+++ b/i965/brw_program.c
@@ -95,9 +95,17 @@ static void brwDeleteProgram( GLcontext *ctx,
struct gl_program *prog )
{
if (prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
- struct gl_fragment_program *fprog = (struct gl_fragment_program *) prog;
- struct brw_fragment_program *brw_fprog = brw_fragment_program(fprog);
- dri_bo_unreference(brw_fprog->const_buffer);
+ struct gl_fragment_program *fp = (struct gl_fragment_program *) prog;
+ struct brw_fragment_program *brw_fp = brw_fragment_program(fp);
+
+ dri_bo_unreference(brw_fp->const_buffer);
+ }
+
+ if (prog->Target == GL_VERTEX_PROGRAM_ARB) {
+ struct gl_vertex_program *vp = (struct gl_vertex_program *) prog;
+ struct brw_vertex_program *brw_vp = brw_vertex_program(vp);
+
+ dri_bo_unreference(brw_vp->const_buffer);
}
_mesa_delete_program( ctx, prog );
diff --git a/i965/brw_sf.c b/i965/brw_sf.c
index 8e6839b..57d1c29 100644
--- a/i965/brw_sf.c
+++ b/i965/brw_sf.c
@@ -46,7 +46,6 @@
static void compile_sf_prog( struct brw_context *brw,
struct brw_sf_prog_key *key )
{
- GLcontext *ctx = &brw->intel.ctx;
struct brw_sf_compile c;
const GLuint *program;
GLuint program_size;
@@ -69,20 +68,14 @@ static void compile_sf_prog( struct brw_context *brw,
/* Construct map from attribute number to position in the vertex.
*/
- for (i = idx = 0; i < VERT_RESULT_MAX; i++)
+ for (i = idx = 0; i < VERT_RESULT_MAX; i++) {
if (c.key.attrs & BITFIELD64_BIT(i)) {
c.attr_to_idx[i] = idx;
c.idx_to_attr[idx] = i;
- if (i >= VERT_RESULT_TEX0 && i <= VERT_RESULT_TEX7) {
- c.point_attrs[i].CoordReplace =
- ctx->Point.CoordReplace[i - VERT_RESULT_TEX0];
- }
- else {
- c.point_attrs[i].CoordReplace = GL_FALSE;
- }
idx++;
}
-
+ }
+
/* Which primitive? Or all three?
*/
switch (key->primitive) {
@@ -162,6 +155,14 @@ static void upload_sf_prog(struct brw_context *brw)
}
key.do_point_sprite = ctx->Point.PointSprite;
+ if (key.do_point_sprite) {
+ int i;
+
+ for (i = 0; i < 8; i++) {
+ if (ctx->Point.CoordReplace[i])
+ key.point_sprite_coord_replace |= (1 << i);
+ }
+ }
key.sprite_origin_lower_left = (ctx->Point.SpriteOrigin == GL_LOWER_LEFT);
/* _NEW_LIGHT */
key.do_flat_shading = (ctx->Light.ShadeModel == GL_FLAT);
diff --git a/i965/brw_sf.h b/i965/brw_sf.h
index 0ba731f..a0680a5 100644
--- a/i965/brw_sf.h
+++ b/i965/brw_sf.h
@@ -46,6 +46,7 @@
struct brw_sf_prog_key {
GLbitfield64 attrs;
+ uint8_t point_sprite_coord_replace;
GLuint primitive:2;
GLuint do_twoside_color:1;
GLuint do_flat_shading:1;
@@ -56,10 +57,6 @@ struct brw_sf_prog_key {
GLuint pad:24;
};
-struct brw_sf_point_tex {
- GLboolean CoordReplace;
-};
-
struct brw_sf_compile {
struct brw_compile func;
struct brw_sf_prog_key key;
@@ -100,7 +97,6 @@ struct brw_sf_compile {
GLubyte attr_to_idx[VERT_RESULT_MAX];
GLubyte idx_to_attr[VERT_RESULT_MAX];
- struct brw_sf_point_tex point_attrs[VERT_RESULT_MAX];
};
diff --git a/i965/brw_sf_emit.c b/i965/brw_sf_emit.c
index bb08055..56f7c98 100644
--- a/i965/brw_sf_emit.c
+++ b/i965/brw_sf_emit.c
@@ -354,6 +354,33 @@ static GLboolean calculate_masks( struct brw_sf_compile *c,
return is_last_attr;
}
+/* Calculates the predicate control for which channels of a reg
+ * (containing 2 attrs) to do point sprite coordinate replacement on.
+ */
+static uint16_t
+calculate_point_sprite_mask(struct brw_sf_compile *c, GLuint reg)
+{
+ int attr1, attr2;
+ uint16_t pc = 0;
+
+ attr1 = c->idx_to_attr[reg * 2];
+ if (attr1 >= VERT_RESULT_TEX0 && attr1 <= VERT_RESULT_TEX7) {
+ if (c->key.point_sprite_coord_replace & (1 << (attr1 - VERT_RESULT_TEX0)))
+ pc |= 0x0f;
+ }
+
+ if (reg * 2 + 1 < c->nr_setup_attrs) {
+ attr2 = c->idx_to_attr[reg * 2 + 1];
+ if (attr2 >= VERT_RESULT_TEX0 && attr2 <= VERT_RESULT_TEX7) {
+ if (c->key.point_sprite_coord_replace & (1 << (attr2 -
+ VERT_RESULT_TEX0)))
+ pc |= 0xf0;
+ }
+ }
+
+ return pc;
+}
+
void brw_emit_tri_setup( struct brw_sf_compile *c, GLboolean allocate)
@@ -529,22 +556,27 @@ void brw_emit_point_sprite_setup( struct brw_sf_compile *c, GLboolean allocate)
copy_z_inv_w(c);
for (i = 0; i < c->nr_setup_regs; i++)
{
- struct brw_sf_point_tex *tex = &c->point_attrs[c->idx_to_attr[2*i]];
struct brw_reg a0 = offset(c->vert[0], i);
- GLushort pc, pc_persp, pc_linear;
+ GLushort pc, pc_persp, pc_linear, pc_coord_replace;
GLboolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
-
- if (pc_persp)
- {
- if (!tex->CoordReplace) {
- brw_set_predicate_control_flag_value(p, pc_persp);
- brw_MUL(p, a0, a0, c->inv_w[0]);
- }
+
+ pc_coord_replace = calculate_point_sprite_mask(c, i);
+ pc_persp &= ~pc_coord_replace;
+
+ if (pc_persp) {
+ brw_set_predicate_control_flag_value(p, pc_persp);
+ brw_MUL(p, a0, a0, c->inv_w[0]);
}
- if (tex->CoordReplace) {
- /* Caculate 1.0/PointWidth */
- brw_math(&c->func,
+ /* Point sprite coordinate replacement: A texcoord with this
+ * enabled gets replaced with the value (x, y, 0, 1) where x and
+ * y vary from 0 to 1 across the horizontal and vertical of the
+ * point.
+ */
+ if (pc_coord_replace) {
+ brw_set_predicate_control_flag_value(p, pc_coord_replace);
+ /* Caculate 1.0/PointWidth */
+ brw_math(&c->func,
c->tmp,
BRW_MATH_FUNCTION_INV,
BRW_MATH_SATURATE_NONE,
@@ -553,50 +585,51 @@ void brw_emit_point_sprite_setup( struct brw_sf_compile *c, GLboolean allocate)
BRW_MATH_DATA_SCALAR,
BRW_MATH_PRECISION_FULL);
- if (c->key.sprite_origin_lower_left) {
- brw_MUL(p, c->m1Cx, c->tmp, c->inv_w[0]);
- brw_MOV(p, vec1(suboffset(c->m1Cx, 1)), brw_imm_f(0.0));
- brw_MUL(p, c->m2Cy, c->tmp, negate(c->inv_w[0]));
- brw_MOV(p, vec1(suboffset(c->m2Cy, 0)), brw_imm_f(0.0));
- } else {
- brw_MUL(p, c->m1Cx, c->tmp, c->inv_w[0]);
- brw_MOV(p, vec1(suboffset(c->m1Cx, 1)), brw_imm_f(0.0));
- brw_MUL(p, c->m2Cy, c->tmp, c->inv_w[0]);
- brw_MOV(p, vec1(suboffset(c->m2Cy, 0)), brw_imm_f(0.0));
- }
- } else {
- brw_MOV(p, c->m1Cx, brw_imm_ud(0));
- brw_MOV(p, c->m2Cy, brw_imm_ud(0));
- }
+ brw_set_access_mode(p, BRW_ALIGN_16);
- {
- brw_set_predicate_control_flag_value(p, pc);
- if (tex->CoordReplace) {
- if (c->key.sprite_origin_lower_left) {
- brw_MUL(p, c->m3C0, c->inv_w[0], brw_imm_f(1.0));
- brw_MOV(p, vec1(suboffset(c->m3C0, 0)), brw_imm_f(0.0));
- }
- else
- brw_MOV(p, c->m3C0, brw_imm_f(0.0));
+ /* dA/dx, dA/dy */
+ brw_MOV(p, c->m1Cx, brw_imm_f(0.0));
+ brw_MOV(p, c->m2Cy, brw_imm_f(0.0));
+ brw_MOV(p, brw_writemask(c->m1Cx, WRITEMASK_X), c->tmp);
+ if (c->key.sprite_origin_lower_left) {
+ brw_MOV(p, brw_writemask(c->m2Cy, WRITEMASK_Y), negate(c->tmp));
} else {
- brw_MOV(p, c->m3C0, a0); /* constant value */
+ brw_MOV(p, brw_writemask(c->m2Cy, WRITEMASK_Y), c->tmp);
}
- /* Copy m0..m3 to URB.
- */
- brw_urb_WRITE(p,
- brw_null_reg(),
- 0,
- brw_vec8_grf(0, 0),
- 0, /* allocate */
- 1, /* used */
- 4, /* msg len */
- 0, /* response len */
- last, /* eot */
- last, /* writes complete */
- i*4, /* urb destination offset */
- BRW_URB_SWIZZLE_TRANSPOSE);
+ /* attribute constant offset */
+ brw_MOV(p, c->m3C0, brw_imm_f(0.0));
+ if (c->key.sprite_origin_lower_left) {
+ brw_MOV(p, brw_writemask(c->m3C0, WRITEMASK_YW), brw_imm_f(1.0));
+ } else {
+ brw_MOV(p, brw_writemask(c->m3C0, WRITEMASK_W), brw_imm_f(1.0));
+ }
+
+ brw_set_access_mode(p, BRW_ALIGN_1);
}
+
+ if (pc & ~pc_coord_replace) {
+ brw_set_predicate_control_flag_value(p, pc & ~pc_coord_replace);
+ brw_MOV(p, c->m1Cx, brw_imm_ud(0));
+ brw_MOV(p, c->m2Cy, brw_imm_ud(0));
+ brw_MOV(p, c->m3C0, a0); /* constant value */
+ }
+
+
+ brw_set_predicate_control_flag_value(p, pc);
+ /* Copy m0..m3 to URB. */
+ brw_urb_WRITE(p,
+ brw_null_reg(),
+ 0,
+ brw_vec8_grf(0, 0),
+ 0, /* allocate */
+ 1, /* used */
+ 4, /* msg len */
+ 0, /* response len */
+ last, /* eot */
+ last, /* writes complete */
+ i*4, /* urb destination offset */
+ BRW_URB_SWIZZLE_TRANSPOSE);
}
}
diff --git a/i965/brw_vs_emit.c b/i965/brw_vs_emit.c
index a7c4b58..a48804a 100644
--- a/i965/brw_vs_emit.c
+++ b/i965/brw_vs_emit.c
@@ -1717,11 +1717,13 @@ void brw_vs_emit(struct brw_vs_compile *c )
/* patch all the BREAK/CONT instructions from last BEGINLOOP */
while (inst0 > loop_inst[loop_depth]) {
inst0--;
- if (inst0->header.opcode == BRW_OPCODE_BREAK) {
+ if (inst0->header.opcode == BRW_OPCODE_BREAK &&
+ inst0->bits3.if_else.jump_count == 0) {
inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
inst0->bits3.if_else.pop_count = 0;
}
- else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) {
+ else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
+ inst0->bits3.if_else.jump_count == 0) {
inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
inst0->bits3.if_else.pop_count = 0;
}
diff --git a/i965/brw_wm.h b/i965/brw_wm.h
index 88d84ee..47b764d 100644
--- a/i965/brw_wm.h
+++ b/i965/brw_wm.h
@@ -328,6 +328,12 @@ void emit_cinterp(struct brw_compile *p,
const struct brw_reg *dst,
GLuint mask,
const struct brw_reg *arg0);
+void emit_cmp(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1,
+ const struct brw_reg *arg2);
void emit_ddxy(struct brw_compile *p,
const struct brw_reg *dst,
GLuint mask,
diff --git a/i965/brw_wm_emit.c b/i965/brw_wm_emit.c
index 9315bca..c7d87b9 100644
--- a/i965/brw_wm_emit.c
+++ b/i965/brw_wm_emit.c
@@ -566,12 +566,12 @@ static void emit_sne( struct brw_compile *p,
emit_sop(p, dst, mask, BRW_CONDITIONAL_NEQ, arg0, arg1);
}
-static void emit_cmp( struct brw_compile *p,
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0,
- const struct brw_reg *arg1,
- const struct brw_reg *arg2 )
+void emit_cmp(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1,
+ const struct brw_reg *arg2)
{
GLuint i;
diff --git a/i965/brw_wm_glsl.c b/i965/brw_wm_glsl.c
index 562608e..315b030 100644
--- a/i965/brw_wm_glsl.c
+++ b/i965/brw_wm_glsl.c
@@ -614,112 +614,6 @@ static void invoke_subroutine( struct brw_wm_compile *c,
}
}
-/* Workaround for using brw_wm_emit.c's emit functions, which expect
- * destination regs to be uniquely written. Moves arguments out to
- * temporaries as necessary for instructions which use their destination as
- * a temporary.
- */
-static void
-unalias3(struct brw_wm_compile *c,
- void (*func)(struct brw_compile *c,
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0,
- const struct brw_reg *arg1,
- const struct brw_reg *arg2),
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0,
- const struct brw_reg *arg1,
- const struct brw_reg *arg2)
-{
- struct brw_compile *p = &c->func;
- struct brw_reg tmp_arg0[4], tmp_arg1[4], tmp_arg2[4];
- int i, j;
- int mark = mark_tmps(c);
-
- for (j = 0; j < 4; j++) {
- tmp_arg0[j] = arg0[j];
- tmp_arg1[j] = arg1[j];
- tmp_arg2[j] = arg2[j];
- }
-
- for (i = 0; i < 4; i++) {
- if (mask & (1<<i)) {
- for (j = 0; j < 4; j++) {
- if (arg0[j].file == dst[i].file &&
- dst[i].nr == arg0[j].nr) {
- tmp_arg0[j] = alloc_tmp(c);
- brw_MOV(p, tmp_arg0[j], arg0[j]);
- }
- if (arg1[j].file == dst[i].file &&
- dst[i].nr == arg1[j].nr) {
- tmp_arg1[j] = alloc_tmp(c);
- brw_MOV(p, tmp_arg1[j], arg1[j]);
- }
- if (arg2[j].file == dst[i].file &&
- dst[i].nr == arg2[j].nr) {
- tmp_arg2[j] = alloc_tmp(c);
- brw_MOV(p, tmp_arg2[j], arg2[j]);
- }
- }
- }
- }
-
- func(p, dst, mask, tmp_arg0, tmp_arg1, tmp_arg2);
-
- release_tmps(c, mark);
-}
-
-/* Workaround for using brw_wm_emit.c's emit functions, which expect
- * destination regs to be uniquely written. Moves arguments out to
- * temporaries as necessary for instructions which use their destination as
- * a temporary.
- */
-static void
-unalias2(struct brw_wm_compile *c,
- void (*func)(struct brw_compile *c,
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0,
- const struct brw_reg *arg1),
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0,
- const struct brw_reg *arg1)
-{
- struct brw_compile *p = &c->func;
- struct brw_reg tmp_arg0[4], tmp_arg1[4];
- int i, j;
- int mark = mark_tmps(c);
-
- for (j = 0; j < 4; j++) {
- tmp_arg0[j] = arg0[j];
- tmp_arg1[j] = arg1[j];
- }
-
- for (i = 0; i < 4; i++) {
- if (mask & (1<<i)) {
- for (j = 0; j < 4; j++) {
- if (arg0[j].file == dst[i].file &&
- dst[i].nr == arg0[j].nr) {
- tmp_arg0[j] = alloc_tmp(c);
- brw_MOV(p, tmp_arg0[j], arg0[j]);
- }
- if (arg1[j].file == dst[i].file &&
- dst[i].nr == arg1[j].nr) {
- tmp_arg1[j] = alloc_tmp(c);
- brw_MOV(p, tmp_arg1[j], arg1[j]);
- }
- }
- }
- }
-
- func(p, dst, mask, tmp_arg0, tmp_arg1);
-
- release_tmps(c, mark);
-}
-
static void emit_arl(struct brw_wm_compile *c,
const struct prog_instruction *inst)
{
@@ -1813,14 +1707,29 @@ static void
get_argument_regs(struct brw_wm_compile *c,
const struct prog_instruction *inst,
int index,
+ struct brw_reg *dst,
struct brw_reg *regs,
int mask)
{
- int i;
+ struct brw_compile *p = &c->func;
+ int i, j;
for (i = 0; i < 4; i++) {
- if (mask & (1 << i))
+ if (mask & (1 << i)) {
regs[i] = get_src_reg(c, inst, index, i);
+
+ /* Unalias destination registers from our sources. */
+ if (regs[i].file == BRW_GENERAL_REGISTER_FILE) {
+ for (j = 0; j < 4; j++) {
+ if (memcmp(&regs[i], &dst[j], sizeof(regs[0])) == 0) {
+ struct brw_reg tmp = alloc_tmp(c);
+ brw_MOV(p, tmp, regs[i]);
+ regs[i] = tmp;
+ break;
+ }
+ }
+ }
+ }
}
}
@@ -1845,6 +1754,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
int dst_flags;
struct brw_reg args[3][4], dst[4];
int j;
+ int mark = mark_tmps( c );
c->cur_inst = i;
@@ -1866,7 +1776,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
}
}
for (j = 0; j < brw_wm_nr_args(inst->Opcode); j++)
- get_argument_regs(c, inst, j, args[j], WRITEMASK_XYZW);
+ get_argument_regs(c, inst, j, dst, args[j], WRITEMASK_XYZW);
dst_flags = inst->DstReg.WriteMask;
if (inst->SaturateMode == SATURATE_ZERO_ONE)
@@ -1920,8 +1830,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]);
break;
case OPCODE_LRP:
- unalias3(c, emit_lrp,
- dst, dst_flags, args[0], args[1], args[2]);
+ emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]);
break;
case OPCODE_TRUNC:
emit_alu1(p, brw_RNDZ, dst, dst_flags, args[0]);
@@ -1960,11 +1869,14 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
case OPCODE_LG2:
emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]);
break;
+ case OPCODE_CMP:
+ emit_cmp(p, dst, dst_flags, args[0], args[1], args[2]);
+ break;
case OPCODE_MIN:
- unalias2(c, emit_min, dst, dst_flags, args[0], args[1]);
+ emit_min(p, dst, dst_flags, args[0], args[1]);
break;
case OPCODE_MAX:
- unalias2(c, emit_max, dst, dst_flags, args[0], args[1]);
+ emit_max(p, dst, dst_flags, args[0], args[1]);
break;
case OPCODE_DDX:
case OPCODE_DDY:
@@ -2103,11 +2015,13 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
/* patch all the BREAK/CONT instructions from last BGNLOOP */
while (inst0 > loop_inst[loop_depth]) {
inst0--;
- if (inst0->header.opcode == BRW_OPCODE_BREAK) {
+ if (inst0->header.opcode == BRW_OPCODE_BREAK &&
+ inst0->bits3.if_else.jump_count == 0) {
inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
inst0->bits3.if_else.pop_count = 0;
}
- else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) {
+ else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
+ inst0->bits3.if_else.jump_count == 0) {
inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
inst0->bits3.if_else.pop_count = 0;
}
@@ -2119,6 +2033,9 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
inst->Opcode);
}
+ /* Release temporaries containing any unaliased source regs. */
+ release_tmps( c, mark );
+
if (inst->CondUpdate)
brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
else
diff --git a/shared/intel_blit.c b/shared/intel_blit.c
index f2769aa..4ad42a7 100644
--- a/shared/intel_blit.c
+++ b/shared/intel_blit.c
@@ -119,8 +119,6 @@ intelEmitCopyBlit(struct intel_context *intel,
break;
} while (pass < 2);
- intel_prepare_render(intel);
-
if (pass >= 2) {
drm_intel_gem_bo_map_gtt(dst_buffer);
drm_intel_gem_bo_map_gtt(src_buffer);
diff --git a/shared/intel_buffers.c b/shared/intel_buffers.c
index b106930..0480770 100644
--- a/shared/intel_buffers.c
+++ b/shared/intel_buffers.c
@@ -226,7 +226,7 @@ intel_draw_buffer(GLcontext * ctx, struct gl_framebuffer *fb)
* only changes with _NEW_STENCIL (which seems sensible). So flag it
* here since this is the _NEW_BUFFERS path.
*/
- ctx->NewState |= (_NEW_DEPTH | _NEW_STENCIL);
+ intel->NewGLState |= (_NEW_DEPTH | _NEW_STENCIL);
}
intel->vtbl.set_draw_region(intel, colorRegions, depthRegion,
@@ -236,7 +236,7 @@ intel_draw_buffer(GLcontext * ctx, struct gl_framebuffer *fb)
#ifdef I915
intelCalcViewport(ctx);
#else
- ctx->NewState |= _NEW_VIEWPORT;
+ intel->NewGLState |= _NEW_VIEWPORT;
#endif
/* Set state we know depends on drawable parameters:
*/
@@ -256,7 +256,7 @@ intel_draw_buffer(GLcontext * ctx, struct gl_framebuffer *fb)
if (ctx->Driver.FrontFace)
ctx->Driver.FrontFace(ctx, ctx->Polygon.FrontFace);
else
- ctx->NewState |= _NEW_POLYGON;
+ intel->NewGLState |= _NEW_POLYGON;
}
diff --git a/shared/intel_context.c b/shared/intel_context.c
index d6a1ba6..0a7dcb8 100644
--- a/shared/intel_context.c
+++ b/shared/intel_context.c
@@ -63,7 +63,7 @@ int INTEL_DEBUG = (0);
#endif
-#define DRIVER_DATE "20091221 DEVELOPMENT"
+#define DRIVER_DATE "20100328 2010Q1"
#define DRIVER_DATE_GEM "GEM " DRIVER_DATE
@@ -880,12 +880,12 @@ intelMakeCurrent(__DRIcontext * driContextPriv,
struct gl_framebuffer *fb = driDrawPriv->driverPrivate;
struct gl_framebuffer *readFb = driReadPriv->driverPrivate;
- _mesa_make_current(&intel->ctx, fb, readFb);
intel->driReadDrawable = driReadPriv;
intel->driDrawable = driDrawPriv;
driContextPriv->dri2.draw_stamp = driDrawPriv->dri2.stamp - 1;
driContextPriv->dri2.read_stamp = driReadPriv->dri2.stamp - 1;
intel_prepare_render(intel);
+ _mesa_make_current(&intel->ctx, fb, readFb);
}
else {
_mesa_make_current(NULL, NULL, NULL);
diff --git a/shared/intel_fbo.c b/shared/intel_fbo.c
index a429f8d..ba3bb8f 100644
--- a/shared/intel_fbo.c
+++ b/shared/intel_fbo.c
@@ -104,7 +104,6 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb,
struct intel_context *intel = intel_context(ctx);
struct intel_renderbuffer *irb = intel_renderbuffer(rb);
int cpp;
- GLuint pitch;
ASSERT(rb->Name != 0);
@@ -176,15 +175,11 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb,
/* allocate new memory region/renderbuffer */
- /* Choose a pitch to match hardware requirements:
- */
- pitch = ((cpp * width + 63) & ~63) / cpp;
-
/* alloc hardware renderbuffer */
- DBG("Allocating %d x %d Intel RBO (pitch %d)\n", width, height, pitch);
+ DBG("Allocating %d x %d Intel RBO\n", width, height);
irb->region = intel_region_alloc(intel, I915_TILING_NONE, cpp,
- width, height, pitch, GL_TRUE);
+ width, height, GL_TRUE);
if (!irb->region)
return GL_FALSE; /* out of memory? */
diff --git a/shared/intel_mipmap_tree.c b/shared/intel_mipmap_tree.c
index 4f14946..5b6b4b2 100644
--- a/shared/intel_mipmap_tree.c
+++ b/shared/intel_mipmap_tree.c
@@ -146,8 +146,8 @@ intel_miptree_create(struct intel_context *intel,
mt->cpp,
mt->pitch,
mt->total_height,
- mt->pitch,
expect_accelerated_upload);
+ mt->pitch = mt->region->pitch;
if (!mt->region) {
free(mt);
@@ -177,20 +177,11 @@ intel_miptree_create_for_region(struct intel_context *intel,
I915_TILING_NONE);
if (!mt)
return mt;
-#if 0
- if (mt->pitch != region->pitch) {
- fprintf(stderr,
- "region pitch (%d) doesn't match mipmap tree pitch (%d)\n",
- region->pitch, mt->pitch);
- free(mt);
- return NULL;
- }
-#else
+
/* The mipmap tree pitch is aligned to 64 bytes to make sure render
* to texture works, but we don't need that for texturing from a
* pixmap. Just override it here. */
mt->pitch = region->pitch;
-#endif
intel_region_reference(&mt->region, region);
@@ -520,12 +511,15 @@ intel_miptree_image_copy(struct intel_context *intel,
width = ALIGN(width, align_w);
}
+ intel_prepare_render(intel);
+
for (i = 0; i < depth; i++) {
intel_miptree_get_image_offset(src, level, face, i, &src_x, &src_y);
intel_miptree_get_image_offset(dst, level, face, i, &dst_x, &dst_y);
success = intel_region_copy(intel,
dst->region, 0, dst_x, dst_y,
- src->region, 0, src_x, src_y, width, height,
+ src->region, 0, src_x, src_y,
+ width, height, GL_FALSE,
GL_COPY);
if (!success) {
GLubyte *src_ptr, *dst_ptr;
diff --git a/shared/intel_pixel_copy.c b/shared/intel_pixel_copy.c
index f4f3fd6..56faf07 100644
--- a/shared/intel_pixel_copy.c
+++ b/shared/intel_pixel_copy.c
@@ -108,14 +108,15 @@ do_blit_copypixels(GLcontext * ctx,
GLint dstx, GLint dsty, GLenum type)
{
struct intel_context *intel = intel_context(ctx);
- struct intel_region *dst = intel_drawbuf_region(intel);
- struct intel_region *src = copypix_src_region(intel, type);
+ struct intel_region *dst;
+ struct intel_region *src;
struct gl_framebuffer *fb = ctx->DrawBuffer;
struct gl_framebuffer *read_fb = ctx->ReadBuffer;
GLint orig_dstx;
GLint orig_dsty;
GLint orig_srcx;
GLint orig_srcy;
+ GLboolean flip = GL_FALSE;
if (type == GL_DEPTH || type == GL_STENCIL) {
if (INTEL_DEBUG & DEBUG_FALLBACKS)
@@ -133,15 +134,16 @@ do_blit_copypixels(GLcontext * ctx,
ctx->Pixel.ZoomX != 1.0F || ctx->Pixel.ZoomY != 1.0F)
return GL_FALSE;
+ intel_prepare_render(intel);
+
+ dst = intel_drawbuf_region(intel);
+ src = copypix_src_region(intel, type);
+
if (!src || !dst)
return GL_FALSE;
intelFlush(&intel->ctx);
- intel_prepare_render(intel);
-
- /* XXX: We fail to handle different inversion between read and draw framebuffer. */
-
/* Clip to destination buffer. */
orig_dstx = dstx;
orig_dsty = dsty;
@@ -164,23 +166,23 @@ do_blit_copypixels(GLcontext * ctx,
dstx += srcx - orig_srcx;
dsty += srcy - orig_srcy;
- /* Convert from GL to hardware coordinates: */
+ /* Flip dest Y if it's a window system framebuffer. */
if (fb->Name == 0) {
- /* copypixels to a system framebuffer */
+ /* copypixels to a window system framebuffer */
dsty = fb->Height - dsty - height;
- } else {
- /* copypixels to a user framebuffer object */
- dsty = dsty;
+ flip = !flip;
}
- /* Flip source Y if it's a system framebuffer. */
- if (read_fb->Name == 0)
- srcy = fb->Height - srcy - height;
+ /* Flip source Y if it's a window system framebuffer. */
+ if (read_fb->Name == 0) {
+ srcy = read_fb->Height - srcy - height;
+ flip = !flip;
+ }
if (!intel_region_copy(intel,
dst, 0, dstx, dsty,
src, 0, srcx, srcy,
- width, height,
+ width, height, flip,
ctx->Color.ColorLogicOpEnabled ?
ctx->Color.LogicOp : GL_COPY)) {
DBG("%s: blit failure\n", __FUNCTION__);
diff --git a/shared/intel_reg.h b/shared/intel_reg.h
index d19f1ba..36d8180 100644
--- a/shared/intel_reg.h
+++ b/shared/intel_reg.h
@@ -70,8 +70,10 @@
/** @{
* 915 definitions
+ *
+ * 915 documents say that bits 31:28 and 1 are "undefined, must be zero."
*/
-#define S0_VB_OFFSET_MASK 0xffffffc0
+#define S0_VB_OFFSET_MASK 0x0ffffffc
#define S0_AUTO_CACHE_INV_DISABLE (1<<0)
/** @} */
diff --git a/shared/intel_regions.c b/shared/intel_regions.c
index f042bcb..1172de9 100644
--- a/shared/intel_regions.c
+++ b/shared/intel_regions.c
@@ -164,7 +164,6 @@ intel_region_alloc_internal(struct intel_context *intel,
/* Default to no tiling */
region->tiling = I915_TILING_NONE;
- region->bit_6_swizzle = I915_BIT_6_SWIZZLE_NONE;
_DBG("%s <-- %p\n", __FUNCTION__, region);
return region;
@@ -173,7 +172,7 @@ intel_region_alloc_internal(struct intel_context *intel,
struct intel_region *
intel_region_alloc(struct intel_context *intel,
uint32_t tiling,
- GLuint cpp, GLuint width, GLuint height, GLuint pitch,
+ GLuint cpp, GLuint width, GLuint height,
GLboolean expect_accelerated_upload)
{
dri_bo *buffer;
@@ -187,19 +186,10 @@ intel_region_alloc(struct intel_context *intel,
buffer = drm_intel_bo_alloc_tiled(intel->bufmgr, "region",
width, height, cpp,
&tiling, &aligned_pitch, flags);
- /* We've already chosen a pitch as part of miptree layout. It had
- * better be the same.
- */
- assert(aligned_pitch == pitch * cpp);
region = intel_region_alloc_internal(intel, cpp, width, height,
- pitch, buffer);
-
- if (tiling != I915_TILING_NONE) {
- assert(((pitch * cpp) & 127) == 0);
- drm_intel_bo_set_tiling(buffer, &tiling, pitch * cpp);
- drm_intel_bo_get_tiling(buffer, &region->tiling, &region->bit_6_swizzle);
- }
+ aligned_pitch / cpp, buffer);
+ region->tiling = tiling;
return region;
}
@@ -213,6 +203,7 @@ intel_region_alloc_for_handle(struct intel_context *intel,
struct intel_region *region, *dummy;
dri_bo *buffer;
int ret;
+ uint32_t bit_6_swizzle;
region = _mesa_HashLookup(intel->intelScreen->named_regions, handle);
if (region != NULL) {
@@ -236,7 +227,7 @@ intel_region_alloc_for_handle(struct intel_context *intel,
return region;
ret = dri_bo_get_tiling(region->buffer, &region->tiling,
- &region->bit_6_swizzle);
+ &bit_6_swizzle);
if (ret != 0) {
fprintf(stderr, "Couldn't get tiling of buffer %d (%s): %s\n",
handle, name, strerror(-ret));
@@ -316,7 +307,7 @@ _mesa_copy_rect(GLubyte * dst,
dst += dst_x * cpp;
src += src_x * cpp;
dst += dst_y * dst_pitch;
- src += src_y * dst_pitch;
+ src += src_y * src_pitch;
width *= cpp;
if (width == dst_pitch && width == src_pitch)
@@ -380,8 +371,11 @@ intel_region_copy(struct intel_context *intel,
struct intel_region *src,
GLuint src_offset,
GLuint srcx, GLuint srcy, GLuint width, GLuint height,
+ GLboolean flip,
GLenum logicop)
{
+ uint32_t src_pitch = src->pitch;
+
_DBG("%s\n", __FUNCTION__);
if (intel == NULL)
@@ -397,9 +391,12 @@ intel_region_copy(struct intel_context *intel,
assert(src->cpp == dst->cpp);
+ if (flip)
+ src_pitch = -src_pitch;
+
return intelEmitCopyBlit(intel,
dst->cpp,
- src->pitch, src->buffer, src_offset, src->tiling,
+ src_pitch, src->buffer, src_offset, src->tiling,
dst->pitch, dst->buffer, dst_offset, dst->tiling,
srcx, srcy, dstx, dsty, width, height,
logicop);
diff --git a/shared/intel_regions.h b/shared/intel_regions.h
index 7ee6a98..2459c9a 100644
--- a/shared/intel_regions.h
+++ b/shared/intel_regions.h
@@ -65,7 +65,6 @@ struct intel_region
GLuint draw_x, draw_y; /**< Offset of drawing within the region */
uint32_t tiling; /**< Which tiling mode the region is in */
- uint32_t bit_6_swizzle; /**< GEM flag for address swizzling requirement */
struct intel_buffer_object *pbo; /* zero-copy uploads */
uint32_t name; /**< Global name for the bo */
@@ -79,7 +78,7 @@ struct intel_region
struct intel_region *intel_region_alloc(struct intel_context *intel,
uint32_t tiling,
GLuint cpp, GLuint width,
- GLuint height, GLuint pitch,
+ GLuint height,
GLboolean expect_accelerated_upload);
struct intel_region *
@@ -122,6 +121,7 @@ intel_region_copy(struct intel_context *intel,
struct intel_region *src,
GLuint src_offset,
GLuint srcx, GLuint srcy, GLuint width, GLuint height,
+ GLboolean flip,
GLenum logicop);
/* Helpers for zerocopy uploads, particularly texture image uploads:
diff --git a/shared/intel_span.c b/shared/intel_span.c
index fb5c01b..377f3a8 100644
--- a/shared/intel_span.c
+++ b/shared/intel_span.c
@@ -48,11 +48,11 @@ intel_set_span_functions(struct intel_context *intel,
#define LOCAL_VARS \
struct intel_renderbuffer *irb = intel_renderbuffer(rb); \
- const GLint yScale = ctx->DrawBuffer->Name ? 1 : -1; \
- const GLint yBias = ctx->DrawBuffer->Name ? 0 : irb->Base.Height - 1;\
+ const GLint yScale = rb->Name ? 1 : -1; \
+ const GLint yBias = rb->Name ? 0 : rb->Height - 1; \
int minx = 0, miny = 0; \
- int maxx = ctx->DrawBuffer->Width; \
- int maxy = ctx->DrawBuffer->Height; \
+ int maxx = rb->Width; \
+ int maxy = rb->Height; \
int pitch = irb->region->pitch * irb->region->cpp; \
void *buf = irb->region->buffer->virtual; \
GLuint p; \
@@ -108,11 +108,11 @@ intel_set_span_functions(struct intel_context *intel,
#define LOCAL_DEPTH_VARS \
struct intel_renderbuffer *irb = intel_renderbuffer(rb); \
- const GLint yScale = ctx->DrawBuffer->Name ? 1 : -1; \
- const GLint yBias = ctx->DrawBuffer->Name ? 0 : irb->Base.Height - 1;\
+ const GLint yScale = rb->Name ? 1 : -1; \
+ const GLint yBias = rb->Name ? 0 : rb->Height - 1; \
int minx = 0, miny = 0; \
- int maxx = ctx->DrawBuffer->Width; \
- int maxy = ctx->DrawBuffer->Height; \
+ int maxx = rb->Width; \
+ int maxy = rb->Height; \
int pitch = irb->region->pitch * irb->region->cpp; \
void *buf = irb->region->buffer->virtual; \
(void)buf; (void)pitch; /* unused for non-gttmap. */ \