summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRoland Scheidegger <sroland@vmware.com>2010-12-02 04:32:06 +0100
committerRoland Scheidegger <sroland@vmware.com>2010-12-02 04:32:06 +0100
commita45bd509014743d21a532194d7b658a1aeb00cb7 (patch)
treed5fc155ee50d2f41fa9e7d4a253a8f9c51bc9e51
parent1aeca287a827f29206078fa1204715a477072c08 (diff)
parent32e1e591467d9a28c2ac4d2e17af7be2dc429d43 (diff)
Merge remote branch 'origin/master' into gallium-array-texturesgallium-array-textures
Conflicts: src/gallium/drivers/i915/i915_resource_texture.c src/gallium/drivers/i915/i915_state_emit.c src/gallium/drivers/i915/i915_surface.c
-rw-r--r--src/gallium/drivers/i915/TODO25
-rw-r--r--src/gallium/drivers/i915/i915_batch.h5
-rw-r--r--src/gallium/drivers/i915/i915_batchbuffer.h35
-rw-r--r--src/gallium/drivers/i915/i915_blit.c6
-rw-r--r--src/gallium/drivers/i915/i915_context.h3
-rw-r--r--src/gallium/drivers/i915/i915_debug.c2
-rw-r--r--src/gallium/drivers/i915/i915_debug.h1
-rw-r--r--src/gallium/drivers/i915/i915_prim_vbuf.c15
-rw-r--r--src/gallium/drivers/i915/i915_reg.h3
-rw-r--r--src/gallium/drivers/i915/i915_resource.h10
-rw-r--r--src/gallium/drivers/i915/i915_resource_texture.c106
-rw-r--r--src/gallium/drivers/i915/i915_state_emit.c69
-rw-r--r--src/gallium/drivers/i915/i915_state_sampler.c20
-rw-r--r--src/gallium/drivers/i915/i915_surface.c9
-rw-r--r--src/gallium/drivers/i915/i915_winsys.h29
-rw-r--r--src/gallium/drivers/r300/r300_chipset.c1
-rw-r--r--src/gallium/drivers/r300/r300_chipset.h2
-rw-r--r--src/gallium/drivers/r300/r300_reg.h2
-rw-r--r--src/gallium/drivers/r300/r300_screen.c3
-rw-r--r--src/gallium/drivers/r300/r300_state.c4
-rw-r--r--src/gallium/drivers/r300/r300_state_derived.c9
-rw-r--r--src/gallium/drivers/r300/r300_texture.c17
-rw-r--r--src/gallium/drivers/r300/r300_texture.h6
-rw-r--r--src/gallium/drivers/r300/r300_tgsi_to_rc.c2
-rw-r--r--src/gallium/winsys/i915/drm/i915_drm_batchbuffer.c110
-rw-r--r--src/gallium/winsys/i915/drm/i915_drm_buffer.c95
-rw-r--r--src/gallium/winsys/i915/drm/i915_drm_winsys.c1
-rw-r--r--src/gallium/winsys/i915/sw/i915_sw_batchbuffer.c2
-rw-r--r--src/gallium/winsys/i915/sw/i915_sw_buffer.c50
-rw-r--r--src/gallium/winsys/i915/sw/i915_sw_winsys.h4
-rw-r--r--src/glsl/Makefile2
-rw-r--r--src/glsl/SConscript2
-rw-r--r--src/glsl/ast_to_hir.cpp14
-rw-r--r--src/glsl/glsl_parser_extras.cpp1
-rw-r--r--src/glsl/ir_optimization.h7
-rw-r--r--src/glsl/lower_discard.cpp198
-rw-r--r--src/glsl/lower_instructions.cpp32
-rw-r--r--src/glsl/lower_jumps.cpp44
-rw-r--r--src/glsl/opt_discard_simplification.cpp180
-rw-r--r--src/mesa/drivers/dri/i915/i915_context.c1
-rw-r--r--src/mesa/drivers/dri/i915/i915_fragprog.c6
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.c3
-rw-r--r--src/mesa/drivers/dri/i965/brw_disasm.c3
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu.h3
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu_emit.c174
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp54
-rw-r--r--src/mesa/drivers/dri/i965/brw_structs.h15
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs_emit.c58
-rw-r--r--src/mesa/drivers/dri/i965/gen6_sf_state.c42
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c7
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h3
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c18
-rw-r--r--src/mesa/drivers/dri/r600/r700_assembler.c64
-rw-r--r--src/mesa/main/compiler.h4
-rw-r--r--src/mesa/main/mtypes.h1
-rw-r--r--src/mesa/program/ir_to_mesa.cpp18
56 files changed, 1249 insertions, 351 deletions
diff --git a/src/gallium/drivers/i915/TODO b/src/gallium/drivers/i915/TODO
new file mode 100644
index 00000000000..94c428bebf8
--- /dev/null
+++ b/src/gallium/drivers/i915/TODO
@@ -0,0 +1,25 @@
+Random list of problems with i915g:
+
+- Dies with BadDrawable on GLXFBconfig changes/destruction. Makes piglit totally
+ unusable :( Upgrading xserver helped here, it doesn't crash anymore. Still
+ broken, it doesn't update the viewport/get new buffers.
+
+- Tends to hang the chip after a few minutes of openarena. Looks tiling related,
+ at the last frame rendered has tiling corruption over the complete frame.
+
+- Kills the chip in 3D_PRIMITIVE LINELIST with mesa-demos/fbotexture in
+ wireframe mode.
+
+- Tiling is funny: If unlucky, it renders/samples all black. No clue yet what's
+ going on. Seems to depend on tiny details like whethever the sampler
+ relocation is fenced/unfenced (broken _with_ fenced reloc using tiling bits!).
+
+- Y-tiling is even more fun. i915c doesn't use it, maybe there's a reason?
+ Texture sampling from Y-tiled buffers seems to work, though (save above
+ problems).
+
+- Need to validate buffers before usage. Currently do_exec on the batchbuffer
+ can fail with -ENOSPC.
+
+Other bugs can be found here:
+https://bugs.freedesktop.org/buglist.cgi?bug_status=NEW&bug_status=ASSIGNED&bug_status=REOPENED&component=Drivers/Gallium/i915g
diff --git a/src/gallium/drivers/i915/i915_batch.h b/src/gallium/drivers/i915/i915_batch.h
index c411b84ccd4..6e93da76209 100644
--- a/src/gallium/drivers/i915/i915_batch.h
+++ b/src/gallium/drivers/i915/i915_batch.h
@@ -38,7 +38,10 @@
i915_winsys_batchbuffer_dword(i915->batch, dword)
#define OUT_RELOC(buf, usage, offset) \
- i915_winsys_batchbuffer_reloc(i915->batch, buf, usage, offset)
+ i915_winsys_batchbuffer_reloc(i915->batch, buf, usage, offset, false)
+
+#define OUT_RELOC_FENCED(buf, usage, offset) \
+ i915_winsys_batchbuffer_reloc(i915->batch, buf, usage, offset, true)
#define FLUSH_BATCH(fence) \
i915_flush(i915, fence)
diff --git a/src/gallium/drivers/i915/i915_batchbuffer.h b/src/gallium/drivers/i915/i915_batchbuffer.h
index c1cd314e7b8..d92b2ccb31e 100644
--- a/src/gallium/drivers/i915/i915_batchbuffer.h
+++ b/src/gallium/drivers/i915/i915_batchbuffer.h
@@ -29,42 +29,47 @@
#define I915_BATCHBUFFER_H
#include "i915_winsys.h"
+#include "util/u_debug.h"
struct i915_context;
+static INLINE size_t
+i915_winsys_batchbuffer_space(struct i915_winsys_batchbuffer *batch)
+{
+ return batch->size - (batch->ptr - batch->map);
+}
+
static INLINE boolean
i915_winsys_batchbuffer_check(struct i915_winsys_batchbuffer *batch,
size_t dwords,
size_t relocs)
{
- return dwords * 4 <= batch->size - (batch->ptr - batch->map) &&
+ return dwords * 4 <= i915_winsys_batchbuffer_space(batch) &&
relocs <= (batch->max_relocs - batch->relocs);
}
-static INLINE size_t
-i915_winsys_batchbuffer_space(struct i915_winsys_batchbuffer *batch)
+static INLINE void
+i915_winsys_batchbuffer_dword_unchecked(struct i915_winsys_batchbuffer *batch,
+ unsigned dword)
{
- return batch->size - (batch->ptr - batch->map);
+ *(unsigned *)batch->ptr = dword;
+ batch->ptr += 4;
}
static INLINE void
i915_winsys_batchbuffer_dword(struct i915_winsys_batchbuffer *batch,
unsigned dword)
{
- if (i915_winsys_batchbuffer_space(batch) < 4)
- return;
-
- *(unsigned *)batch->ptr = dword;
- batch->ptr += 4;
+ assert (i915_winsys_batchbuffer_space(batch) >= 4);
+ i915_winsys_batchbuffer_dword_unchecked(batch, dword);
}
static INLINE void
i915_winsys_batchbuffer_write(struct i915_winsys_batchbuffer *batch,
- void *data,
- size_t size)
+ void *data,
+ size_t size)
{
- if (i915_winsys_batchbuffer_space(batch) < size)
- return;
+ assert (i915_winsys_batchbuffer_space(batch) >= size);
memcpy(data, batch->ptr, size);
batch->ptr += size;
@@ -74,9 +79,9 @@ static INLINE int
i915_winsys_batchbuffer_reloc(struct i915_winsys_batchbuffer *batch,
struct i915_winsys_buffer *buffer,
enum i915_winsys_buffer_usage usage,
- size_t offset)
+ size_t offset, bool fenced)
{
- return batch->iws->batchbuffer_reloc(batch, buffer, usage, offset);
+ return batch->iws->batchbuffer_reloc(batch, buffer, usage, offset, fenced);
}
#endif
diff --git a/src/gallium/drivers/i915/i915_blit.c b/src/gallium/drivers/i915/i915_blit.c
index cdf20c0055a..97c25665156 100644
--- a/src/gallium/drivers/i915/i915_blit.c
+++ b/src/gallium/drivers/i915/i915_blit.c
@@ -74,7 +74,7 @@ i915_fill_blit(struct i915_context *i915,
OUT_BATCH(BR13);
OUT_BATCH((y << 16) | x);
OUT_BATCH(((y + h) << 16) | (x + w));
- OUT_RELOC(dst_buffer, I915_USAGE_2D_TARGET, dst_offset);
+ OUT_RELOC_FENCED(dst_buffer, I915_USAGE_2D_TARGET, dst_offset);
OUT_BATCH(color);
}
@@ -138,8 +138,8 @@ i915_copy_blit(struct i915_context *i915,
OUT_BATCH(BR13);
OUT_BATCH((dst_y << 16) | dst_x);
OUT_BATCH((dst_y2 << 16) | dst_x2);
- OUT_RELOC(dst_buffer, I915_USAGE_2D_TARGET, dst_offset);
+ OUT_RELOC_FENCED(dst_buffer, I915_USAGE_2D_TARGET, dst_offset);
OUT_BATCH((src_y << 16) | src_x);
OUT_BATCH(((int) src_pitch & 0xffff));
- OUT_RELOC(src_buffer, I915_USAGE_2D_SOURCE, src_offset);
+ OUT_RELOC_FENCED(src_buffer, I915_USAGE_2D_SOURCE, src_offset);
}
diff --git a/src/gallium/drivers/i915/i915_context.h b/src/gallium/drivers/i915/i915_context.h
index 3ae61d0ea70..7103a1b8c16 100644
--- a/src/gallium/drivers/i915/i915_context.h
+++ b/src/gallium/drivers/i915/i915_context.h
@@ -193,8 +193,7 @@ struct i915_velems_state {
};
-struct i915_context
-{
+struct i915_context {
struct pipe_context base;
struct i915_winsys *iws;
diff --git a/src/gallium/drivers/i915/i915_debug.c b/src/gallium/drivers/i915/i915_debug.c
index 57d3390dea3..d7150c99c4e 100644
--- a/src/gallium/drivers/i915/i915_debug.c
+++ b/src/gallium/drivers/i915/i915_debug.c
@@ -46,10 +46,12 @@ static const struct debug_named_value debug_options[] = {
};
unsigned i915_debug = 0;
+boolean i915_tiling = TRUE;
void i915_debug_init(struct i915_screen *screen)
{
i915_debug = debug_get_flags_option("I915_DEBUG", debug_options, 0);
+ i915_tiling = !debug_get_bool_option("I915_NO_TILING", FALSE);
}
diff --git a/src/gallium/drivers/i915/i915_debug.h b/src/gallium/drivers/i915/i915_debug.h
index fa60799d0c5..11af7662f0a 100644
--- a/src/gallium/drivers/i915/i915_debug.h
+++ b/src/gallium/drivers/i915/i915_debug.h
@@ -46,6 +46,7 @@ struct i915_winsys_batchbuffer;
#define DBG_CONSTANTS 0x20
extern unsigned i915_debug;
+extern boolean i915_tiling;
#ifdef DEBUG
static INLINE boolean
diff --git a/src/gallium/drivers/i915/i915_prim_vbuf.c b/src/gallium/drivers/i915/i915_prim_vbuf.c
index bd046bd9058..baebbc7bae3 100644
--- a/src/gallium/drivers/i915/i915_prim_vbuf.c
+++ b/src/gallium/drivers/i915/i915_prim_vbuf.c
@@ -172,6 +172,7 @@ i915_vbuf_render_reserve(struct i915_vbuf_render *i915_render, size_t size)
*
* Side effects:
* Updates hw_offset, sw_offset, index and allocates a new buffer.
+ * Will set i915->vbo to null on buffer allocation.
*/
static void
i915_vbuf_render_new_buf(struct i915_vbuf_render *i915_render, size_t size)
@@ -179,8 +180,16 @@ i915_vbuf_render_new_buf(struct i915_vbuf_render *i915_render, size_t size)
struct i915_context *i915 = i915_render->i915;
struct i915_winsys *iws = i915->iws;
- if (i915_render->vbo)
+ if (i915_render->vbo) {
iws->buffer_destroy(iws, i915_render->vbo);
+ /*
+ * XXX If buffers where referenced then this should be done in
+ * update_vbo_state but since they arn't and malloc likes to reuse
+ * memory we need to set it to null
+ */
+ i915->vbo = NULL;
+ i915_render->vbo = NULL;
+ }
i915->vbo_flushed = 0;
@@ -198,7 +207,7 @@ i915_vbuf_render_new_buf(struct i915_vbuf_render *i915_render, size_t size)
#endif
i915_render->vbo = iws->buffer_create(iws, i915_render->vbo_size,
- 64, I915_NEW_VERTEX);
+ I915_NEW_VERTEX);
}
/**
@@ -726,7 +735,7 @@ i915_vbuf_render_create(struct i915_context *i915)
i915_render->pool_fifo = u_fifo_create(6);
for (i = 0; i < 6; i++)
u_fifo_add(i915_render->pool_fifo,
- iws->buffer_create(iws, i915_render->pool_buffer_size, 64,
+ iws->buffer_create(iws, i915_render->pool_buffer_size,
I915_NEW_VERTEX));
#else
(void)i;
diff --git a/src/gallium/drivers/i915/i915_reg.h b/src/gallium/drivers/i915/i915_reg.h
index cc28891e4ac..5e4e80ddf6b 100644
--- a/src/gallium/drivers/i915/i915_reg.h
+++ b/src/gallium/drivers/i915/i915_reg.h
@@ -753,7 +753,7 @@
#define MT_COMPRESS_DXT1_RGB (4<<3)
#define MS3_USE_FENCE_REGS (1<<2)
#define MS3_TILED_SURFACE (1<<1)
-#define MS3_TILE_WALK (1<<0)
+#define MS3_TILE_WALK_Y (1<<0)
#define MS4_PITCH_SHIFT 21
#define MS4_CUBE_FACE_ENA_NEGX (1<<20)
@@ -851,6 +851,7 @@
#define MI_FLUSH ((0<<29)|(4<<23))
#define FLUSH_MAP_CACHE (1<<0)
#define INHIBIT_FLUSH_RENDER_CACHE (1<<2)
+#define MI_NOOP 0
#define CMD_3D (0x3<<29)
diff --git a/src/gallium/drivers/i915/i915_resource.h b/src/gallium/drivers/i915/i915_resource.h
index 753bd266b12..86620e6a123 100644
--- a/src/gallium/drivers/i915/i915_resource.h
+++ b/src/gallium/drivers/i915/i915_resource.h
@@ -49,6 +49,10 @@ struct i915_buffer {
#define I915_MAX_TEXTURE_3D_LEVELS 8 /* max 128x128x128 */
+struct offset_pair {
+ unsigned short nblocksx;
+ unsigned short nblocksy;
+};
struct i915_texture {
struct u_resource b;
@@ -63,14 +67,18 @@ struct i915_texture {
/* Explicitly store the offset of each image for each cube face or
* depth value.
+ *
+ * Array [depth] off offsets.
*/
- unsigned *image_offset[I915_MAX_TEXTURE_2D_LEVELS]; /**< array [depth] of offsets */
+ struct offset_pair *image_offset[I915_MAX_TEXTURE_2D_LEVELS];
/* The data is held here:
*/
struct i915_winsys_buffer *buffer;
};
+unsigned i915_texture_offset(struct i915_texture *tex,
+ unsigned level, unsigned layer);
void i915_init_screen_resource_functions(struct i915_screen *is);
void i915_init_resource_functions(struct i915_context *i915);
diff --git a/src/gallium/drivers/i915/i915_resource_texture.c b/src/gallium/drivers/i915/i915_resource_texture.c
index 67356abe208..f19106f3414 100644
--- a/src/gallium/drivers/i915/i915_resource_texture.c
+++ b/src/gallium/drivers/i915/i915_resource_texture.c
@@ -106,6 +106,23 @@ get_pot_stride(enum pipe_format format, unsigned width)
return util_next_power_of_two(util_format_get_stride(format, width));
}
+static INLINE const char*
+get_tiling_string(enum i915_winsys_buffer_tile tile)
+{
+ switch(tile) {
+ case I915_TILE_NONE:
+ return "none";
+ case I915_TILE_X:
+ return "x";
+ case I915_TILE_Y:
+ return "y";
+ default:
+ assert(FALSE);
+ return "?";
+ }
+}
+
+
/*
* More advanced helper funcs
*/
@@ -120,28 +137,56 @@ i915_texture_set_level_info(struct i915_texture *tex,
assert(!tex->image_offset[level]);
tex->nr_images[level] = nr_images;
- tex->image_offset[level] = (unsigned *) MALLOC(nr_images * sizeof(unsigned));
- tex->image_offset[level][0] = 0;
+ tex->image_offset[level] = MALLOC(nr_images * sizeof(struct offset_pair));
+ tex->image_offset[level][0].nblocksx = 0;
+ tex->image_offset[level][0].nblocksy = 0;
+}
+
+INLINE unsigned i915_texture_offset(struct i915_texture *tex,
+ unsigned level, unsigned layer)
+{
+ unsigned x, y;
+ x = tex->image_offset[level][layer].nblocksx
+ * util_format_get_blocksize(tex->b.b.format);
+ y = tex->image_offset[level][layer].nblocksy;
+
+ return y * tex->stride + x;
}
static void
i915_texture_set_image_offset(struct i915_texture *tex,
unsigned level, unsigned img,
- unsigned x, unsigned y)
+ unsigned nblocksx, unsigned nblocksy)
{
/* for the first image and level make sure offset is zero */
- assert(!(img == 0 && level == 0) || (x == 0 && y == 0));
+ assert(!(img == 0 && level == 0) || (nblocksx == 0 && nblocksy == 0));
assert(img < tex->nr_images[level]);
- tex->image_offset[level][img] = y * tex->stride + x * util_format_get_blocksize(tex->b.b.format);
+ tex->image_offset[level][img].nblocksx = nblocksx;
+ tex->image_offset[level][img].nblocksy = nblocksy;
#if DEBUG_TEXTURES
- debug_printf("%s: %p level %u, img %u (%u, %u) %p\n", __FUNCTION__,
- tex, level, img, x, y,
- (void*)(uintptr_t)tex->image_offset[level][img]);
+ debug_printf("%s: %p level %u, img %u (%u, %u)\n", __FUNCTION__,
+ tex, level, img, x, y);
#endif
}
+static enum i915_winsys_buffer_tile
+i915_texture_tiling(struct pipe_resource *pt)
+{
+ if (!i915_tiling)
+ return I915_TILE_NONE;
+
+ if (pt->target == PIPE_TEXTURE_1D)
+ return I915_TILE_NONE;
+
+ if (util_format_is_s3tc(pt->format))
+ /* XXX X-tiling might make sense */
+ return I915_TILE_NONE;
+
+ return I915_TILE_X;
+}
+
/*
* Shared layout functions
@@ -163,9 +208,10 @@ i9x5_scanout_layout(struct i915_texture *tex)
i915_texture_set_image_offset(tex, 0, 0, 0, 0);
if (pt->width0 >= 240) {
- tex->stride = get_pot_stride(pt->format, pt->width0);
+ tex->stride = align(util_format_get_stride(pt->format, pt->width0), 64);
tex->total_nblocksy = align_nblocksy(pt->format, pt->height0, 8);
tex->tiling = I915_TILE_X;
+ /* special case for cursors */
} else if (pt->width0 == 64 && pt->height0 == 64) {
tex->stride = get_pot_stride(pt->format, pt->width0);
tex->total_nblocksy = align_nblocksy(pt->format, pt->height0, 8);
@@ -200,7 +246,7 @@ i9x5_display_target_layout(struct i915_texture *tex)
i915_texture_set_level_info(tex, 0, 1);
i915_texture_set_image_offset(tex, 0, 0, 0, 0);
- tex->stride = get_pot_stride(pt->format, pt->width0);
+ tex->stride = align(util_format_get_stride(pt->format, pt->width0), 64);
tex->total_nblocksy = align_nblocksy(pt->format, pt->height0, 8);
tex->tiling = I915_TILE_X;
@@ -357,6 +403,8 @@ i915_texture_layout(struct i915_texture * tex)
{
struct pipe_resource *pt = &tex->b.b;
+ tex->tiling = i915_texture_tiling(pt);
+
switch (pt->target) {
case PIPE_TEXTURE_1D:
case PIPE_TEXTURE_2D:
@@ -603,6 +651,8 @@ i945_texture_layout(struct i915_texture * tex)
{
struct pipe_resource *pt = &tex->b.b;
+ tex->tiling = i915_texture_tiling(pt);
+
switch (pt->target) {
case PIPE_TEXTURE_1D:
case PIPE_TEXTURE_2D:
@@ -687,7 +737,6 @@ i915_texture_get_transfer(struct pipe_context *context,
return transfer;
}
-
static void *
i915_texture_transfer_map(struct pipe_context *pipe,
struct pipe_transfer *transfer)
@@ -703,7 +752,7 @@ i915_texture_transfer_map(struct pipe_context *pipe,
if (resource->target != PIPE_TEXTURE_3D &&
resource->target != PIPE_TEXTURE_CUBE)
assert(box->z == 0);
- offset = tex->image_offset[transfer->level][box->z];
+ offset = i915_texture_offset(tex, transfer->level, box->z);
map = iws->buffer_map(iws, tex->buffer,
(transfer->usage & PIPE_TRANSFER_WRITE) ? TRUE : FALSE);
@@ -749,7 +798,6 @@ i915_texture_create(struct pipe_screen *screen,
struct i915_screen *is = i915_screen(screen);
struct i915_winsys *iws = is->iws;
struct i915_texture *tex = CALLOC_STRUCT(i915_texture);
- size_t tex_size;
unsigned buf_usage = 0;
if (!tex)
@@ -768,8 +816,6 @@ i915_texture_create(struct pipe_screen *screen,
goto fail;
}
- tex_size = tex->stride * tex->total_nblocksy;
-
/* for scanouts and cursors, cursors arn't scanouts */
/* XXX: use a custom flag for cursors, don't rely on magically
@@ -780,27 +826,15 @@ i915_texture_create(struct pipe_screen *screen,
else
buf_usage = I915_NEW_TEXTURE;
- tex->buffer = iws->buffer_create(iws, tex_size, 64, buf_usage);
+ tex->buffer = iws->buffer_create_tiled(iws, &tex->stride, tex->total_nblocksy,
+ &tex->tiling, buf_usage);
if (!tex->buffer)
goto fail;
- /* setup any hw fences */
- if (tex->tiling) {
- iws->buffer_set_fence_reg(iws, tex->buffer, tex->stride, tex->tiling);
- }
-
-
-#if 0
- void *ptr = ws->buffer_map(ws, tex->buffer,
- PIPE_BUFFER_USAGE_CPU_WRITE);
- memset(ptr, 0x80, tex_size);
- ws->buffer_unmap(ws, tex->buffer);
-#endif
-
- I915_DBG(DBG_TEXTURE, "%s: %p size %u, stride %u, blocks (%u, %u)\n", __func__,
- tex, (unsigned int)tex_size, tex->stride,
+ I915_DBG(DBG_TEXTURE, "%s: %p stride %u, blocks (%u, %u) tiling %s\n", __func__,
+ tex, tex->stride,
tex->stride / util_format_get_blocksize(tex->b.b.format),
- tex->total_nblocksy);
+ tex->total_nblocksy, get_tiling_string(tex->tiling));
return &tex->b.b;
@@ -819,10 +853,11 @@ i915_texture_from_handle(struct pipe_screen * screen,
struct i915_winsys *iws = is->iws;
struct i915_winsys_buffer *buffer;
unsigned stride;
+ enum i915_winsys_buffer_tile tiling;
assert(screen);
- buffer = iws->buffer_from_handle(iws, whandle, &stride);
+ buffer = iws->buffer_from_handle(iws, whandle, &tiling, &stride);
/* Only supports one type */
if ((template->target != PIPE_TEXTURE_2D &&
@@ -842,6 +877,7 @@ i915_texture_from_handle(struct pipe_screen * screen,
tex->b.b.screen = screen;
tex->stride = stride;
+ tex->tiling = tiling;
tex->total_nblocksy = align_nblocksy(tex->b.b.format, tex->b.b.height0, 8);
i915_texture_set_level_info(tex, 0, 1);
@@ -849,10 +885,10 @@ i915_texture_from_handle(struct pipe_screen * screen,
tex->buffer = buffer;
- I915_DBG(DBG_TEXTURE, "%s: %p stride %u, blocks (%ux%u)\n", __func__,
+ I915_DBG(DBG_TEXTURE, "%s: %p stride %u, blocks (%u, %u) tiling %s\n", __func__,
tex, tex->stride,
tex->stride / util_format_get_blocksize(tex->b.b.format),
- tex->total_nblocksy);
+ tex->total_nblocksy, get_tiling_string(tex->tiling));
return &tex->b.b;
}
diff --git a/src/gallium/drivers/i915/i915_state_emit.c b/src/gallium/drivers/i915/i915_state_emit.c
index 9292fa00b43..c48d53ffbb2 100644
--- a/src/gallium/drivers/i915/i915_state_emit.c
+++ b/src/gallium/drivers/i915/i915_state_emit.c
@@ -86,6 +86,22 @@ framebuffer_size(const struct pipe_framebuffer_state *fb,
}
}
+static inline uint32_t
+buf_3d_tiling_bits(enum i915_winsys_buffer_tile tiling)
+{
+ uint32_t tiling_bits = 0;
+
+ switch (tiling) {
+ case I915_TILE_Y:
+ tiling_bits |= BUF_3D_TILE_WALK_Y;
+ case I915_TILE_X:
+ tiling_bits |= BUF_3D_TILED_SURFACE;
+ case I915_TILE_NONE:
+ break;
+ }
+
+ return tiling_bits;
+}
/* Push the state into the sarea and/or texture memory.
*/
@@ -220,44 +236,39 @@ i915_emit_hardware_state(struct i915_context *i915 )
struct pipe_surface *depth_surface = i915->framebuffer.zsbuf;
if (cbuf_surface) {
- unsigned ctile = BUF_3D_USE_FENCE;
struct i915_texture *tex = i915_texture(cbuf_surface->texture);
- unsigned offset;
assert(tex);
- offset = tex->image_offset[cbuf_surface->u.tex.level][cbuf_surface->u.tex.first_layer];
-
OUT_BATCH(_3DSTATE_BUF_INFO_CMD);
OUT_BATCH(BUF_3D_ID_COLOR_BACK |
BUF_3D_PITCH(tex->stride) | /* pitch in bytes */
- ctile);
+ buf_3d_tiling_bits(tex->tiling));
OUT_RELOC(tex->buffer,
I915_USAGE_RENDER,
- offset);
+ 0);
}
/* What happens if no zbuf??
*/
if (depth_surface) {
- unsigned ztile = BUF_3D_USE_FENCE;
struct i915_texture *tex = i915_texture(depth_surface->texture);
- unsigned offset;
+ unsigned offset = i915_texture_offset(tex, depth_surface->u.tex.level,
+ depth_surface->u.tex.first_layer);
assert(tex);
-
- offset = tex->image_offset[depth_surface->u.tex.level][depth_surface->u.tex.first_layer];
+ assert(offset == 0);
OUT_BATCH(_3DSTATE_BUF_INFO_CMD);
assert(tex);
OUT_BATCH(BUF_3D_ID_DEPTH |
BUF_3D_PITCH(tex->stride) | /* pitch in bytes */
- ztile);
+ buf_3d_tiling_bits(tex->tiling));
OUT_RELOC(tex->buffer,
I915_USAGE_RENDER,
- offset);
+ 0);
}
{
@@ -299,12 +310,11 @@ i915_emit_hardware_state(struct i915_context *i915 )
if (enabled & (1 << unit)) {
struct i915_texture *texture = i915_texture(i915->fragment_sampler_views[unit]->texture);
struct i915_winsys_buffer *buf = texture->buffer;
- uint offset = 0;
assert(buf);
count++;
- OUT_RELOC(buf, I915_USAGE_SAMPLER, offset);
+ OUT_RELOC(buf, I915_USAGE_SAMPLER, 0);
OUT_BATCH(i915->current.texbuffer[unit][0]); /* MS3 */
OUT_BATCH(i915->current.texbuffer[unit][1]); /* MS4 */
}
@@ -397,18 +407,33 @@ i915_emit_hardware_state(struct i915_context *i915 )
#if 01
/* drawing surface size */
/* 6 dwords, 0 relocs */
+ if (i915->hardware_dirty & I915_HW_STATIC)
{
uint w, h;
- boolean k = framebuffer_size(&i915->framebuffer, &w, &h);
- (void)k;
- assert(k);
+ struct pipe_surface *cbuf_surface = i915->framebuffer.cbufs[0];
+ struct i915_texture *tex = i915_texture(cbuf_surface->texture);
+ unsigned x, y;
+ int layer;
+ uint32_t draw_offset;
+ boolean ret;
+ ret = framebuffer_size(&i915->framebuffer, &w, &h);
+ assert(ret);
+
+ layer = cbuf_surface->u.tex.first_layer;
+
+ x = tex->image_offset[cbuf_surface->u.tex.level][layer].nblocksx;
+ y = tex->image_offset[cbuf_surface->u.tex.level][layer].nblocksy;
+
+ draw_offset = x | (y << 16);
+
+ /* XXX flush only required when the draw_offset changes! */
+ OUT_BATCH(MI_FLUSH | INHIBIT_FLUSH_RENDER_CACHE);
OUT_BATCH(_3DSTATE_DRAW_RECT_CMD);
- OUT_BATCH(0);
- OUT_BATCH(0);
- OUT_BATCH(((w - 1) & 0xffff) | ((h - 1) << 16));
- OUT_BATCH(0);
- OUT_BATCH(0);
+ OUT_BATCH(DRAW_RECT_DIS_DEPTH_OFS);
+ OUT_BATCH(draw_offset);
+ OUT_BATCH((w - 1 + x) | ((h - 1 + y) << 16));
+ OUT_BATCH(draw_offset);
}
#endif
diff --git a/src/gallium/drivers/i915/i915_state_sampler.c b/src/gallium/drivers/i915/i915_state_sampler.c
index 9771274ca11..916cb767536 100644
--- a/src/gallium/drivers/i915/i915_state_sampler.c
+++ b/src/gallium/drivers/i915/i915_state_sampler.c
@@ -243,6 +243,23 @@ static uint translate_texture_format(enum pipe_format pipeFormat)
}
}
+static inline uint32_t
+ms3_tiling_bits(enum i915_winsys_buffer_tile tiling)
+{
+ uint32_t tiling_bits = 0;
+
+ switch (tiling) {
+ case I915_TILE_Y:
+ tiling_bits |= MS3_TILE_WALK_Y;
+ case I915_TILE_X:
+ tiling_bits |= MS3_TILED_SURFACE;
+ case I915_TILE_NONE:
+ break;
+ }
+
+ return tiling_bits;
+}
+
static void update_map(struct i915_context *i915,
uint unit,
const struct i915_texture *tex,
@@ -254,7 +271,6 @@ static void update_map(struct i915_context *i915,
const uint width = pt->width0, height = pt->height0, depth = pt->depth0;
const uint num_levels = pt->last_level;
unsigned max_lod = num_levels * 4;
- unsigned tiled = MS3_USE_FENCE_REGS;
assert(tex);
assert(width);
@@ -272,7 +288,7 @@ static void update_map(struct i915_context *i915,
(((height - 1) << MS3_HEIGHT_SHIFT)
| ((width - 1) << MS3_WIDTH_SHIFT)
| format
- | tiled);
+ | ms3_tiling_bits(tex->tiling));
/*
* XXX When min_filter != mag_filter and there's just one mipmap level,
diff --git a/src/gallium/drivers/i915/i915_surface.c b/src/gallium/drivers/i915/i915_surface.c
index 4ac1f90ef95..becc6e93c2d 100644
--- a/src/gallium/drivers/i915/i915_surface.c
+++ b/src/gallium/drivers/i915/i915_surface.c
@@ -59,13 +59,12 @@ i915_surface_copy(struct pipe_context *pipe,
if (dst->target != PIPE_TEXTURE_CUBE &&
dst->target != PIPE_TEXTURE_3D)
assert(dstz == 0);
- dst_offset = dst_tex->image_offset[dst_level][dstz];
+ dst_offset = i915_texture_offset(dst_tex, dst_level, dstz);
if (src->target != PIPE_TEXTURE_CUBE &&
src->target != PIPE_TEXTURE_3D)
assert(src_box->z == 0);
- src_offset = src_tex->image_offset[src_level][src_box->z];
-
+ src_offset = i915_texture_offset(src_tex, src_level, src_box->z);
assert( dst != src );
assert( util_format_get_blocksize(dpt->format) == util_format_get_blocksize(spt->format) );
@@ -93,7 +92,7 @@ i915_clear_render_target(struct pipe_context *pipe,
struct i915_texture *tex = i915_texture(dst->texture);
struct pipe_resource *pt = &tex->b.b;
union util_color uc;
- unsigned offset = tex->image_offset[dst->u.tex.level][dst->u.tex.first_layer];
+ unsigned offset = i915_texture_offset(tex, dst->u.tex.level, dst->u.tex.first_layer);
assert(util_format_get_blockwidth(pt->format) == 1);
assert(util_format_get_blockheight(pt->format) == 1);
@@ -122,7 +121,7 @@ i915_clear_depth_stencil(struct pipe_context *pipe,
struct pipe_resource *pt = &tex->b.b;
unsigned packedds;
unsigned mask = 0;
- unsigned offset = tex->image_offset[dst->u.tex.level][dst->u.tex.first_layer];
+ unsigned offset = i915_texture_offset(tex, dst->u.tex.level, dst->u.tex.first_layer);
assert(util_format_get_blockwidth(pt->format) == 1);
assert(util_format_get_blockheight(pt->format) == 1);
diff --git a/src/gallium/drivers/i915/i915_winsys.h b/src/gallium/drivers/i915/i915_winsys.h
index 5385e403d22..24ea416f015 100644
--- a/src/gallium/drivers/i915/i915_winsys.h
+++ b/src/gallium/drivers/i915/i915_winsys.h
@@ -53,6 +53,7 @@ enum i915_winsys_buffer_type
I915_NEW_VERTEX
};
+/* These need to be in sync with the definitions of libdrm-intel! */
enum i915_winsys_buffer_tile
{
I915_TILE_NONE,
@@ -106,7 +107,7 @@ struct i915_winsys {
int (*batchbuffer_reloc)(struct i915_winsys_batchbuffer *batch,
struct i915_winsys_buffer *reloc,
enum i915_winsys_buffer_usage usage,
- unsigned offset);
+ unsigned offset, bool fenced);
/**
* Flush a bufferbatch.
@@ -130,10 +131,24 @@ struct i915_winsys {
*/
struct i915_winsys_buffer *
(*buffer_create)(struct i915_winsys *iws,
- unsigned size, unsigned alignment,
+ unsigned size,
enum i915_winsys_buffer_type type);
/**
+ * Create a tiled buffer.
+ *
+ * *stride, height are in bytes. The winsys tries to allocate the buffer with
+ * the tiling mode provide in *tiling. If tiling is no possible, *tiling will
+ * be set to I915_TILE_NONE. The calculated stride (incorporateing hw/kernel
+ * requirements) is always returned in *stride.
+ */
+ struct i915_winsys_buffer *
+ (*buffer_create_tiled)(struct i915_winsys *iws,
+ unsigned *stride, unsigned height,
+ enum i915_winsys_buffer_tile *tiling,
+ enum i915_winsys_buffer_type type);
+
+ /**
* Creates a buffer from a handle.
* Used to implement pipe_screen::resource_from_handle.
* Also provides the stride information needed for the
@@ -142,6 +157,7 @@ struct i915_winsys {
struct i915_winsys_buffer *
(*buffer_from_handle)(struct i915_winsys *iws,
struct winsys_handle *whandle,
+ enum i915_winsys_buffer_tile *tiling,
unsigned *stride);
/**
@@ -154,15 +170,6 @@ struct i915_winsys {
unsigned stride);
/**
- * Fence a buffer with a fence reg.
- * Not to be confused with pipe_fence_handle.
- */
- int (*buffer_set_fence_reg)(struct i915_winsys *iws,
- struct i915_winsys_buffer *buffer,
- unsigned stride,
- enum i915_winsys_buffer_tile tile);
-
- /**
* Map a buffer.
*/
void *(*buffer_map)(struct i915_winsys *iws,
diff --git a/src/gallium/drivers/r300/r300_chipset.c b/src/gallium/drivers/r300/r300_chipset.c
index 48c24092114..583e981a4d2 100644
--- a/src/gallium/drivers/r300/r300_chipset.c
+++ b/src/gallium/drivers/r300/r300_chipset.c
@@ -424,4 +424,5 @@ void r300_parse_chipset(struct r300_capabilities* caps)
}
caps->is_rv350 = caps->family >= CHIP_FAMILY_RV350;
+ caps->dxtc_swizzle = caps->is_r400 || caps->is_r500;
}
diff --git a/src/gallium/drivers/r300/r300_chipset.h b/src/gallium/drivers/r300/r300_chipset.h
index e7ca642b4f3..7ea4175dbee 100644
--- a/src/gallium/drivers/r300/r300_chipset.h
+++ b/src/gallium/drivers/r300/r300_chipset.h
@@ -79,6 +79,8 @@ struct r300_capabilities {
boolean is_r500;
/* Whether or not the second pixel pipe is accessed with the high bit */
boolean high_second_pipe;
+ /* DXTC texture swizzling. */
+ boolean dxtc_swizzle;
};
/* Enumerations for legibility and telling which card we're running on. */
diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h
index 6bea783f697..788c513be75 100644
--- a/src/gallium/drivers/r300/r300_reg.h
+++ b/src/gallium/drivers/r300/r300_reg.h
@@ -1520,11 +1520,11 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
# define R300_TX_TRI_PERF_3_8 (3<<15)
# define R300_ANISO_THRESHOLD_MASK (7<<17)
+# define R400_DXTC_SWIZZLE_ENABLE (1<<21)
# define R500_MACRO_SWITCH (1<<22)
# define R500_TX_MAX_ANISO(x) ((x) << 23)
# define R500_TX_MAX_ANISO_MASK (63 << 23)
# define R500_TX_ANISO_HIGH_QUALITY (1 << 30)
-
# define R500_BORDER_FIX (1<<31)
#define R300_TX_FORMAT0_0 0x4480
diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
index 85de60df0f4..09981cb26b8 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -116,8 +116,9 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
case PIPE_CAP_TEXTURE_MIRROR_REPEAT:
case PIPE_CAP_BLEND_EQUATION_SEPARATE:
- case PIPE_CAP_TEXTURE_SWIZZLE:
return 1;
+ case PIPE_CAP_TEXTURE_SWIZZLE:
+ return util_format_s3tc_enabled ? r300screen->caps.dxtc_swizzle : 1;
/* Unsupported features (boolean caps). */
case PIPE_CAP_TIMER_QUERY:
diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index 509e3106a45..0f563703c06 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -1347,6 +1347,7 @@ r300_create_sampler_view(struct pipe_context *pipe,
struct r300_sampler_view *view = CALLOC_STRUCT(r300_sampler_view);
struct r300_texture *tex = r300_texture(texture);
boolean is_r500 = r300_screen(pipe->screen)->caps.is_r500;
+ boolean dxtc_swizzle = r300_screen(pipe->screen)->caps.dxtc_swizzle;
if (view) {
view->base = *templ;
@@ -1363,7 +1364,8 @@ r300_create_sampler_view(struct pipe_context *pipe,
view->format = tex->tx_format;
view->format.format1 |= r300_translate_texformat(templ->format,
view->swizzle,
- is_r500);
+ is_r500,
+ dxtc_swizzle);
if (is_r500) {
view->format.format2 |= r500_tx_format_msb_bit(templ->format);
}
diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c
index 5722c91ecec..a6d07760515 100644
--- a/src/gallium/drivers/r300/r300_state_derived.c
+++ b/src/gallium/drivers/r300/r300_state_derived.c
@@ -764,13 +764,18 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300)
if (sampler->state.compare_mode == PIPE_TEX_COMPARE_NONE) {
texstate->format.format1 |=
r300_get_swizzle_combined(depth_swizzle,
- view->swizzle);
+ view->swizzle, FALSE);
} else {
texstate->format.format1 |=
- r300_get_swizzle_combined(depth_swizzle, 0);
+ r300_get_swizzle_combined(depth_swizzle, 0, FALSE);
}
}
+ if (r300->screen->caps.dxtc_swizzle &&
+ util_format_is_compressed(tex->desc.b.b.format)) {
+ texstate->filter1 |= R400_DXTC_SWIZZLE_ENABLE;
+ }
+
/* to emulate 1D textures through 2D ones correctly */
if (tex->desc.b.b.target == PIPE_TEXTURE_1D) {
texstate->filter0 &= ~R300_TX_WRAP_T_MASK;
diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c
index fe8859ab8f0..4b7b3e03564 100644
--- a/src/gallium/drivers/r300/r300_texture.c
+++ b/src/gallium/drivers/r300/r300_texture.c
@@ -40,7 +40,8 @@
#include "pipe/p_screen.h"
unsigned r300_get_swizzle_combined(const unsigned char *swizzle_format,
- const unsigned char *swizzle_view)
+ const unsigned char *swizzle_view,
+ boolean dxtc_swizzle)
{
unsigned i;
unsigned char swizzle[4];
@@ -51,10 +52,10 @@ unsigned r300_get_swizzle_combined(const unsigned char *swizzle_format,
R300_TX_FORMAT_B_SHIFT,
R300_TX_FORMAT_A_SHIFT
};
- const uint32_t swizzle_bit[4] = {
- R300_TX_FORMAT_X,
+ uint32_t swizzle_bit[4] = {
+ dxtc_swizzle ? R300_TX_FORMAT_Z : R300_TX_FORMAT_X,
R300_TX_FORMAT_Y,
- R300_TX_FORMAT_Z,
+ dxtc_swizzle ? R300_TX_FORMAT_X : R300_TX_FORMAT_Z,
R300_TX_FORMAT_W
};
@@ -107,7 +108,8 @@ unsigned r300_get_swizzle_combined(const unsigned char *swizzle_format,
* makes available X, Y, Z, W, ZERO, and ONE for swizzling. */
uint32_t r300_translate_texformat(enum pipe_format format,
const unsigned char *swizzle_view,
- boolean is_r500)
+ boolean is_r500,
+ boolean dxtc_swizzle)
{
uint32_t result = 0;
const struct util_format_description *desc;
@@ -169,7 +171,8 @@ uint32_t r300_translate_texformat(enum pipe_format format,
}
}
- result |= r300_get_swizzle_combined(desc->swizzle, swizzle_view);
+ result |= r300_get_swizzle_combined(desc->swizzle, swizzle_view,
+ util_format_is_compressed(format) && dxtc_swizzle);
/* S3TC formats. */
if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
@@ -571,7 +574,7 @@ boolean r300_is_zs_format_supported(enum pipe_format format)
boolean r300_is_sampler_format_supported(enum pipe_format format)
{
- return r300_translate_texformat(format, 0, TRUE) != ~0;
+ return r300_translate_texformat(format, 0, TRUE, FALSE) != ~0;
}
void r300_texture_setup_format_state(struct r300_screen *screen,
diff --git a/src/gallium/drivers/r300/r300_texture.h b/src/gallium/drivers/r300/r300_texture.h
index be2740efc3b..0ab22f747e4 100644
--- a/src/gallium/drivers/r300/r300_texture.h
+++ b/src/gallium/drivers/r300/r300_texture.h
@@ -36,11 +36,13 @@ struct r300_texture;
struct r300_screen;
unsigned r300_get_swizzle_combined(const unsigned char *swizzle_format,
- const unsigned char *swizzle_view);
+ const unsigned char *swizzle_view,
+ boolean dxtc_swizzle);
uint32_t r300_translate_texformat(enum pipe_format format,
const unsigned char *swizzle_view,
- boolean is_r500);
+ boolean is_r500,
+ boolean dxtc_swizzle);
uint32_t r500_tx_format_msb_bit(enum pipe_format format);
diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
index 33448bf0def..15a323989b2 100644
--- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c
+++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
@@ -57,7 +57,7 @@ static unsigned translate_opcode(unsigned opcode)
/* case TGSI_OPCODE_DP2A: return RC_OPCODE_DP2A; */
/* gap */
case TGSI_OPCODE_FRC: return RC_OPCODE_FRC;
- /* case TGSI_OPCODE_CLAMP: return RC_OPCODE_CLAMP; */
+ case TGSI_OPCODE_CLAMP: return RC_OPCODE_CLAMP;
case TGSI_OPCODE_FLR: return RC_OPCODE_FLR;
/* case TGSI_OPCODE_ROUND: return RC_OPCODE_ROUND; */
case TGSI_OPCODE_EX2: return RC_OPCODE_EX2;
diff --git a/src/gallium/winsys/i915/drm/i915_drm_batchbuffer.c b/src/gallium/winsys/i915/drm/i915_drm_batchbuffer.c
index c6daa52a379..ebe86dcf196 100644
--- a/src/gallium/winsys/i915/drm/i915_drm_batchbuffer.c
+++ b/src/gallium/winsys/i915/drm/i915_drm_batchbuffer.c
@@ -14,9 +14,6 @@
#define INTEL_BATCH_CLIPRECTS 0x2
#undef INTEL_RUN_SYNC
-#undef INTEL_MAP_BATCHBUFFER
-#undef INTEL_MAP_GTT
-#define INTEL_ALWAYS_FLUSH
struct i915_drm_batchbuffer
{
@@ -72,11 +69,7 @@ i915_drm_batchbuffer_create(struct i915_winsys *iws)
batch->actual_size = idws->max_batch_size;
-#ifdef INTEL_MAP_BATCHBUFFER
- batch->base.map = NULL;
-#else
batch->base.map = MALLOC(batch->actual_size);
-#endif
batch->base.ptr = NULL;
batch->base.size = 0;
@@ -94,7 +87,7 @@ static int
i915_drm_batchbuffer_reloc(struct i915_winsys_batchbuffer *ibatch,
struct i915_winsys_buffer *buffer,
enum i915_winsys_buffer_usage usage,
- unsigned pre_add)
+ unsigned pre_add, bool fenced)
{
struct i915_drm_batchbuffer *batch = i915_drm_batchbuffer(ibatch);
unsigned write_domain = 0;
@@ -104,37 +97,44 @@ i915_drm_batchbuffer_reloc(struct i915_winsys_batchbuffer *ibatch,
assert(batch->base.relocs < batch->base.max_relocs);
- if (usage == I915_USAGE_SAMPLER) {
+ switch (usage) {
+ case I915_USAGE_SAMPLER:
write_domain = 0;
read_domain = I915_GEM_DOMAIN_SAMPLER;
-
- } else if (usage == I915_USAGE_RENDER) {
+ break;
+ case I915_USAGE_RENDER:
write_domain = I915_GEM_DOMAIN_RENDER;
read_domain = I915_GEM_DOMAIN_RENDER;
-
- } else if (usage == I915_USAGE_2D_TARGET) {
+ break;
+ case I915_USAGE_2D_TARGET:
write_domain = I915_GEM_DOMAIN_RENDER;
read_domain = I915_GEM_DOMAIN_RENDER;
-
- } else if (usage == I915_USAGE_2D_SOURCE) {
+ break;
+ case I915_USAGE_2D_SOURCE:
write_domain = 0;
read_domain = I915_GEM_DOMAIN_RENDER;
-
- } else if (usage == I915_USAGE_VERTEX) {
+ break;
+ case I915_USAGE_VERTEX:
write_domain = 0;
read_domain = I915_GEM_DOMAIN_VERTEX;
-
- } else {
+ break;
+ default:
assert(0);
return -1;
}
offset = (unsigned)(batch->base.ptr - batch->base.map);
- ret = drm_intel_bo_emit_reloc(batch->bo, offset,
- intel_bo(buffer), pre_add,
- read_domain,
- write_domain);
+ if (fenced)
+ ret = drm_intel_bo_emit_reloc_fence(batch->bo, offset,
+ intel_bo(buffer), pre_add,
+ read_domain,
+ write_domain);
+ else
+ ret = drm_intel_bo_emit_reloc(batch->bo, offset,
+ intel_bo(buffer), pre_add,
+ read_domain,
+ write_domain);
((uint32_t*)batch->base.ptr)[0] = intel_bo(buffer)->offset + pre_add;
batch->base.ptr += 4;
@@ -150,70 +150,32 @@ i915_drm_batchbuffer_flush(struct i915_winsys_batchbuffer *ibatch,
struct pipe_fence_handle **fence)
{
struct i915_drm_batchbuffer *batch = i915_drm_batchbuffer(ibatch);
- unsigned used = 0;
- int ret = 0;
+ unsigned used;
+ int ret;
- assert(i915_winsys_batchbuffer_space(ibatch) >= 0);
+ /* MI_BATCH_BUFFER_END */
+ i915_winsys_batchbuffer_dword_unchecked(ibatch, (0xA<<23));
used = batch->base.ptr - batch->base.map;
- assert((used & 3) == 0);
-
-
-#ifdef INTEL_ALWAYS_FLUSH
- /* MI_FLUSH | FLUSH_MAP_CACHE */
- i915_winsys_batchbuffer_dword(ibatch, (0x4<<23)|(1<<0));
- used += 4;
-#endif
-
- if ((used & 4) == 0) {
+ if (used & 4) {
/* MI_NOOP */
- i915_winsys_batchbuffer_dword(ibatch, 0);
+ i915_winsys_batchbuffer_dword_unchecked(ibatch, 0);
+ used += 4;
}
- /* MI_BATCH_BUFFER_END */
- i915_winsys_batchbuffer_dword(ibatch, (0xA<<23));
-
- used = batch->base.ptr - batch->base.map;
- assert((used & 4) == 0);
-
-#ifdef INTEL_MAP_BATCHBUFFER
-#ifdef INTEL_MAP_GTT
- drm_intel_gem_bo_unmap_gtt(batch->bo);
-#else
- drm_intel_bo_unmap(batch->bo);
-#endif
-#else
- drm_intel_bo_subdata(batch->bo, 0, used, batch->base.map);
-#endif
/* Do the sending to HW */
- if (i915_drm_winsys(ibatch->iws)->send_cmd)
+ ret = drm_intel_bo_subdata(batch->bo, 0, used, batch->base.map);
+ if (ret == 0 && i915_drm_winsys(ibatch->iws)->send_cmd)
ret = drm_intel_bo_exec(batch->bo, used, NULL, 0, 0);
- else
- ret = 0;
if (ret != 0 || i915_drm_winsys(ibatch->iws)->dump_cmd) {
-#ifdef INTEL_MAP_BATCHBUFFER
-#ifdef INTEL_MAP_GTT
- drm_intel_gem_bo_map_gtt(batch->bo);
-#else
- drm_intel_bo_map(batch->bo, 0);
-#endif
-#endif
i915_dump_batchbuffer(ibatch);
assert(ret == 0);
-#ifdef INTEL_MAP_BATCHBUFFER
-#ifdef INTEL_MAP_GTT
- drm_intel_gem_bo_unmap_gtt(batch->bo);
-#else
- drm_intel_bo_unmap(batch->bo);
-#endif
-#endif
- } else {
+ }
+
#ifdef INTEL_RUN_SYNC
- drm_intel_bo_map(batch->bo, FALSE);
- drm_intel_bo_unmap(batch->bo);
+ drm_intel_bo_wait_rendering(batch->bo);
#endif
- }
if (fence) {
ibatch->iws->fence_reference(ibatch->iws, fence, NULL);
@@ -237,9 +199,7 @@ i915_drm_batchbuffer_destroy(struct i915_winsys_batchbuffer *ibatch)
if (batch->bo)
drm_intel_bo_unreference(batch->bo);
-#ifndef INTEL_MAP_BATCHBUFFER
FREE(batch->base.map);
-#endif
FREE(batch);
}
diff --git a/src/gallium/winsys/i915/drm/i915_drm_buffer.c b/src/gallium/winsys/i915/drm/i915_drm_buffer.c
index 15ec4487457..01dd4bf062f 100644
--- a/src/gallium/winsys/i915/drm/i915_drm_buffer.c
+++ b/src/gallium/winsys/i915/drm/i915_drm_buffer.c
@@ -5,14 +5,31 @@
#include "i915_drm.h"
+static char *i915_drm_type_to_name(enum i915_winsys_buffer_type type)
+{
+ char *name;
+
+ if (type == I915_NEW_TEXTURE) {
+ name = "gallium3d_texture";
+ } else if (type == I915_NEW_VERTEX) {
+ name = "gallium3d_vertex";
+ } else if (type == I915_NEW_SCANOUT) {
+ name = "gallium3d_scanout";
+ } else {
+ assert(0);
+ name = "gallium3d_unknown";
+ }
+
+ return name;
+}
+
static struct i915_winsys_buffer *
i915_drm_buffer_create(struct i915_winsys *iws,
- unsigned size, unsigned alignment,
+ unsigned size,
enum i915_winsys_buffer_type type)
{
struct i915_drm_buffer *buf = CALLOC_STRUCT(i915_drm_buffer);
struct i915_drm_winsys *idws = i915_drm_winsys(iws);
- char *name;
if (!buf)
return NULL;
@@ -21,22 +38,48 @@ i915_drm_buffer_create(struct i915_winsys *iws,
buf->flinked = FALSE;
buf->flink = 0;
- if (type == I915_NEW_TEXTURE) {
- name = "gallium3d_texture";
- } else if (type == I915_NEW_VERTEX) {
- name = "gallium3d_vertex";
- } else if (type == I915_NEW_SCANOUT) {
- name = "gallium3d_scanout";
- } else {
- assert(0);
- name = "gallium3d_unknown";
- }
+ buf->bo = drm_intel_bo_alloc(idws->gem_manager,
+ i915_drm_type_to_name(type), size, 0);
- buf->bo = drm_intel_bo_alloc(idws->gem_manager, name, size, alignment);
+ if (!buf->bo)
+ goto err;
+
+ return (struct i915_winsys_buffer *)buf;
+
+err:
+ assert(0);
+ FREE(buf);
+ return NULL;
+}
+
+static struct i915_winsys_buffer *
+i915_drm_buffer_create_tiled(struct i915_winsys *iws,
+ unsigned *stride, unsigned height,
+ enum i915_winsys_buffer_tile *tiling,
+ enum i915_winsys_buffer_type type)
+{
+ struct i915_drm_buffer *buf = CALLOC_STRUCT(i915_drm_buffer);
+ struct i915_drm_winsys *idws = i915_drm_winsys(iws);
+ unsigned long pitch = 0;
+ uint32_t tiling_mode = *tiling;
+
+ if (!buf)
+ return NULL;
+
+ buf->magic = 0xDEAD1337;
+ buf->flinked = FALSE;
+ buf->flink = 0;
+
+ buf->bo = drm_intel_bo_alloc_tiled(idws->gem_manager,
+ i915_drm_type_to_name(type),
+ *stride, height, 1,
+ &tiling_mode, &pitch, 0);
if (!buf->bo)
goto err;
+ *stride = pitch;
+ *tiling = tiling_mode;
return (struct i915_winsys_buffer *)buf;
err:
@@ -47,8 +90,9 @@ err:
static struct i915_winsys_buffer *
i915_drm_buffer_from_handle(struct i915_winsys *iws,
- struct winsys_handle *whandle,
- unsigned *stride)
+ struct winsys_handle *whandle,
+ enum i915_winsys_buffer_tile *tiling,
+ unsigned *stride)
{
struct i915_drm_winsys *idws = i915_drm_winsys(iws);
struct i915_drm_buffer *buf = CALLOC_STRUCT(i915_drm_buffer);
@@ -68,6 +112,7 @@ i915_drm_buffer_from_handle(struct i915_winsys *iws,
drm_intel_bo_get_tiling(buf->bo, &tile, &swizzle);
*stride = whandle->stride;
+ *tiling = tile;
return (struct i915_winsys_buffer *)buf;
@@ -103,24 +148,6 @@ i915_drm_buffer_get_handle(struct i915_winsys *iws,
return TRUE;
}
-static int
-i915_drm_buffer_set_fence_reg(struct i915_winsys *iws,
- struct i915_winsys_buffer *buffer,
- unsigned stride,
- enum i915_winsys_buffer_tile tile)
-{
- struct i915_drm_buffer *buf = i915_drm_buffer(buffer);
- assert(I915_TILING_NONE == I915_TILE_NONE);
- assert(I915_TILING_X == I915_TILE_X);
- assert(I915_TILING_Y == I915_TILE_Y);
-
- if (tile != I915_TILE_NONE) {
- assert(buf->map_count == 0);
- }
-
- return drm_intel_bo_set_tiling(buf->bo, &tile, stride);
-}
-
static void *
i915_drm_buffer_map(struct i915_winsys *iws,
struct i915_winsys_buffer *buffer,
@@ -190,9 +217,9 @@ void
i915_drm_winsys_init_buffer_functions(struct i915_drm_winsys *idws)
{
idws->base.buffer_create = i915_drm_buffer_create;
+ idws->base.buffer_create_tiled = i915_drm_buffer_create_tiled;
idws->base.buffer_from_handle = i915_drm_buffer_from_handle;
idws->base.buffer_get_handle = i915_drm_buffer_get_handle;
- idws->base.buffer_set_fence_reg = i915_drm_buffer_set_fence_reg;
idws->base.buffer_map = i915_drm_buffer_map;
idws->base.buffer_unmap = i915_drm_buffer_unmap;
idws->base.buffer_write = i915_drm_buffer_write;
diff --git a/src/gallium/winsys/i915/drm/i915_drm_winsys.c b/src/gallium/winsys/i915/drm/i915_drm_winsys.c
index cc0b6a99577..2288b48b2bd 100644
--- a/src/gallium/winsys/i915/drm/i915_drm_winsys.c
+++ b/src/gallium/winsys/i915/drm/i915_drm_winsys.c
@@ -69,6 +69,7 @@ i915_drm_winsys_create(int drmFD)
idws->gem_manager = drm_intel_bufmgr_gem_init(idws->fd, idws->max_batch_size);
drm_intel_bufmgr_gem_enable_reuse(idws->gem_manager);
+ drm_intel_bufmgr_gem_enable_fenced_relocs(idws->gem_manager);
idws->dump_cmd = debug_get_bool_option("I915_DUMP_CMD", FALSE);
idws->send_cmd = !debug_get_bool_option("I915_NO_HW", FALSE);
diff --git a/src/gallium/winsys/i915/sw/i915_sw_batchbuffer.c b/src/gallium/winsys/i915/sw/i915_sw_batchbuffer.c
index a480cfed57b..44773ae30e7 100644
--- a/src/gallium/winsys/i915/sw/i915_sw_batchbuffer.c
+++ b/src/gallium/winsys/i915/sw/i915_sw_batchbuffer.c
@@ -61,7 +61,7 @@ static int
i915_sw_batchbuffer_reloc(struct i915_winsys_batchbuffer *ibatch,
struct i915_winsys_buffer *buffer,
enum i915_winsys_buffer_usage usage,
- unsigned pre_add)
+ unsigned pre_add, bool fenced)
{
struct i915_sw_batchbuffer *batch = i915_sw_batchbuffer(ibatch);
int ret = 0;
diff --git a/src/gallium/winsys/i915/sw/i915_sw_buffer.c b/src/gallium/winsys/i915/sw/i915_sw_buffer.c
index df175688861..834805e621d 100644
--- a/src/gallium/winsys/i915/sw/i915_sw_buffer.c
+++ b/src/gallium/winsys/i915/sw/i915_sw_buffer.c
@@ -4,28 +4,15 @@
static struct i915_winsys_buffer *
i915_sw_buffer_create(struct i915_winsys *iws,
- unsigned size, unsigned alignment,
+ unsigned size,
enum i915_winsys_buffer_type type)
{
struct i915_sw_buffer *buf = CALLOC_STRUCT(i915_sw_buffer);
- char *name;
if (!buf)
return NULL;
- if (type == I915_NEW_TEXTURE) {
- name = "gallium3d_texture";
- } else if (type == I915_NEW_VERTEX) {
- name = "gallium3d_vertex";
- } else if (type == I915_NEW_SCANOUT) {
- name = "gallium3d_scanout";
- } else {
- assert(0);
- name = "gallium3d_unknown";
- }
-
buf->magic = 0xDEAD1337;
- buf->name = name;
buf->type = type;
buf->ptr = CALLOC(size, 1);
@@ -40,21 +27,32 @@ err:
return NULL;
}
-static int
-i915_sw_buffer_set_fence_reg(struct i915_winsys *iws,
- struct i915_winsys_buffer *buffer,
- unsigned stride,
- enum i915_winsys_buffer_tile tile)
+static struct i915_winsys_buffer *
+i915_sw_buffer_create_tiled(struct i915_winsys *iws,
+ unsigned *stride, unsigned height,
+ enum i915_winsys_buffer_tile *tiling,
+ enum i915_winsys_buffer_type type)
{
- struct i915_sw_buffer *buf = i915_sw_buffer(buffer);
+ struct i915_sw_buffer *buf = CALLOC_STRUCT(i915_sw_buffer);
+
+ if (!buf)
+ return NULL;
+
+ buf->magic = 0xDEAD1337;
+ buf->type = type;
+ buf->ptr = CALLOC(*stride * height, 1);
+ buf->tiling = *tiling;
+ buf->stride = *stride;
- if (tile != I915_TILE_NONE) {
- assert(buf->map_count == 0);
- }
+ if (!buf->ptr)
+ goto err;
- buf->tile = tile;
+ return (struct i915_winsys_buffer *)buf;
- return 0;
+err:
+ assert(0);
+ FREE(buf);
+ return NULL;
}
static void *
@@ -108,7 +106,7 @@ void
i915_sw_winsys_init_buffer_functions(struct i915_sw_winsys *isws)
{
isws->base.buffer_create = i915_sw_buffer_create;
- isws->base.buffer_set_fence_reg = i915_sw_buffer_set_fence_reg;
+ isws->base.buffer_create_tiled = i915_sw_buffer_create_tiled;
isws->base.buffer_map = i915_sw_buffer_map;
isws->base.buffer_unmap = i915_sw_buffer_unmap;
isws->base.buffer_write = i915_sw_buffer_write;
diff --git a/src/gallium/winsys/i915/sw/i915_sw_winsys.h b/src/gallium/winsys/i915/sw/i915_sw_winsys.h
index b7b43669f30..3af2548419e 100644
--- a/src/gallium/winsys/i915/sw/i915_sw_winsys.h
+++ b/src/gallium/winsys/i915/sw/i915_sw_winsys.h
@@ -43,8 +43,8 @@ struct i915_sw_buffer {
void *ptr;
unsigned map_count;
enum i915_winsys_buffer_type type;
- enum i915_winsys_buffer_tile tile;
- const char *name;
+ enum i915_winsys_buffer_tile tiling;
+ unsigned stride;
};
static INLINE struct i915_sw_buffer *
diff --git a/src/glsl/Makefile b/src/glsl/Makefile
index f5aadc347bd..2674c6ec485 100644
--- a/src/glsl/Makefile
+++ b/src/glsl/Makefile
@@ -52,6 +52,7 @@ CXX_SOURCES = \
loop_analysis.cpp \
loop_controls.cpp \
loop_unroll.cpp \
+ lower_discard.cpp \
lower_if_to_cond_assign.cpp \
lower_instructions.cpp \
lower_jumps.cpp \
@@ -70,6 +71,7 @@ CXX_SOURCES = \
opt_dead_code.cpp \
opt_dead_code_local.cpp \
opt_dead_functions.cpp \
+ opt_discard_simplification.cpp \
opt_function_inlining.cpp \
opt_if_simplification.cpp \
opt_noop_swizzle.cpp \
diff --git a/src/glsl/SConscript b/src/glsl/SConscript
index fd22f668631..b5b1728beef 100644
--- a/src/glsl/SConscript
+++ b/src/glsl/SConscript
@@ -49,6 +49,7 @@ sources = [
'loop_analysis.cpp',
'loop_controls.cpp',
'loop_unroll.cpp',
+ 'lower_discard.cpp',
'lower_if_to_cond_assign.cpp',
'lower_instructions.cpp',
'lower_jumps.cpp',
@@ -66,6 +67,7 @@ sources = [
'opt_dead_code.cpp',
'opt_dead_code_local.cpp',
'opt_dead_functions.cpp',
+ 'opt_discard_simplification.cpp',
'opt_function_inlining.cpp',
'opt_if_simplification.cpp',
'opt_noop_swizzle.cpp',
diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index 04b221e9b8d..f5b1120f785 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -745,6 +745,16 @@ ast_node::hir(exec_list *instructions,
return NULL;
}
+static void
+mark_whole_array_access(ir_rvalue *access)
+{
+ ir_dereference_variable *deref = access->as_dereference_variable();
+
+ if (deref) {
+ deref->var->max_array_access = deref->type->length - 1;
+ }
+}
+
static ir_rvalue *
do_comparison(void *mem_ctx, int operation, ir_rvalue *op0, ir_rvalue *op1)
{
@@ -780,6 +790,10 @@ do_comparison(void *mem_ctx, int operation, ir_rvalue *op0, ir_rvalue *op1)
last = result;
}
}
+
+ mark_whole_array_access(op0);
+ mark_whole_array_access(op1);
+
return last;
}
diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp
index 302cfbc5668..8dbe66927dc 100644
--- a/src/glsl/glsl_parser_extras.cpp
+++ b/src/glsl/glsl_parser_extras.cpp
@@ -716,6 +716,7 @@ do_common_optimization(exec_list *ir, bool linked, unsigned max_unroll_iteration
}
progress = do_structure_splitting(ir) || progress;
progress = do_if_simplification(ir) || progress;
+ progress = do_discard_simplification(ir) || progress;
progress = do_copy_propagation(ir) || progress;
if (linked)
progress = do_dead_code(ir) || progress;
diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
index fa497a45551..f264265f4b1 100644
--- a/src/glsl/ir_optimization.h
+++ b/src/glsl/ir_optimization.h
@@ -32,8 +32,9 @@
#define SUB_TO_ADD_NEG 0x01
#define DIV_TO_MUL_RCP 0x02
#define EXP_TO_EXP2 0x04
-#define LOG_TO_LOG2 0x08
-#define MOD_TO_FRACT 0x10
+#define POW_TO_EXP2 0x08
+#define LOG_TO_LOG2 0x10
+#define MOD_TO_FRACT 0x20
bool do_common_optimization(exec_list *ir, bool linked, unsigned max_unroll_iterations);
@@ -51,6 +52,7 @@ bool do_function_inlining(exec_list *instructions);
bool do_lower_jumps(exec_list *instructions, bool pull_out_jumps = true, bool lower_sub_return = true, bool lower_main_return = false, bool lower_continue = false, bool lower_break = false);
bool do_lower_texture_projection(exec_list *instructions);
bool do_if_simplification(exec_list *instructions);
+bool do_discard_simplification(exec_list *instructions);
bool do_if_to_cond_assign(exec_list *instructions);
bool do_mat_op_to_vec(exec_list *instructions);
bool do_mod_to_fract(exec_list *instructions);
@@ -61,6 +63,7 @@ bool do_swizzle_swizzle(exec_list *instructions);
bool do_tree_grafting(exec_list *instructions);
bool do_vec_index_to_cond_assign(exec_list *instructions);
bool do_vec_index_to_swizzle(exec_list *instructions);
+bool lower_discard(exec_list *instructions);
bool lower_instructions(exec_list *instructions, unsigned what_to_lower);
bool lower_noise(exec_list *instructions);
bool lower_variable_index_to_cond_assign(exec_list *instructions,
diff --git a/src/glsl/lower_discard.cpp b/src/glsl/lower_discard.cpp
new file mode 100644
index 00000000000..b95313df8c8
--- /dev/null
+++ b/src/glsl/lower_discard.cpp
@@ -0,0 +1,198 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file lower_discard.cpp
+ *
+ * This pass moves discards out of if-statements.
+ *
+ * Case 1: The "then" branch contains a conditional discard:
+ * ---------------------------------------------------------
+ *
+ * if (cond1) {
+ * s1;
+ * discard cond2;
+ * s2;
+ * } else {
+ * s3;
+ * }
+ *
+ * becomes:
+ *
+ * temp = false;
+ * if (cond1) {
+ * s1;
+ * temp = cond2;
+ * s2;
+ * } else {
+ * s3;
+ * }
+ * discard temp;
+ *
+ * Case 2: The "else" branch contains a conditional discard:
+ * ---------------------------------------------------------
+ *
+ * if (cond1) {
+ * s1;
+ * } else {
+ * s2;
+ * discard cond2;
+ * s3;
+ * }
+ *
+ * becomes:
+ *
+ * temp = false;
+ * if (cond1) {
+ * s1;
+ * } else {
+ * s2;
+ * temp = cond2;
+ * s3;
+ * }
+ * discard temp;
+ *
+ * Case 3: Both branches contain a conditional discard:
+ * ----------------------------------------------------
+ *
+ * if (cond1) {
+ * s1;
+ * discard cond2;
+ * s2;
+ * } else {
+ * s3;
+ * discard cond3;
+ * s4;
+ * }
+ *
+ * becomes:
+ *
+ * temp = false;
+ * if (cond1) {
+ * s1;
+ * temp = cond2;
+ * s2;
+ * } else {
+ * s3;
+ * temp = cond3;
+ * s4;
+ * }
+ * discard temp;
+ *
+ * If there are multiple conditional discards, we need only deal with one of
+ * them. Repeatedly applying this pass will take care of the others.
+ *
+ * Unconditional discards are treated as having a condition of "true".
+ */
+
+#include "glsl_types.h"
+#include "ir.h"
+
+class lower_discard_visitor : public ir_hierarchical_visitor {
+public:
+ lower_discard_visitor()
+ {
+ this->progress = false;
+ }
+
+ ir_visitor_status visit_leave(ir_if *);
+
+ bool progress;
+};
+
+
+bool
+lower_discard(exec_list *instructions)
+{
+ lower_discard_visitor v;
+
+ visit_list_elements(&v, instructions);
+
+ return v.progress;
+}
+
+
+static ir_discard *
+find_discard(exec_list &instructions)
+{
+ foreach_list(n, &instructions) {
+ ir_discard *ir = ((ir_instruction *) n)->as_discard();
+ if (ir != NULL)
+ return ir;
+ }
+ return NULL;
+}
+
+
+static void
+replace_discard(void *mem_ctx, ir_variable *var, ir_discard *ir)
+{
+ ir_rvalue *condition = ir->condition;
+
+ /* For unconditional discards, use "true" as the condition. */
+ if (condition == NULL)
+ condition = new(mem_ctx) ir_constant(true);
+
+ ir_assignment *assignment =
+ new(mem_ctx) ir_assignment(new(mem_ctx) ir_dereference_variable(var),
+ condition, NULL);
+
+ ir->replace_with(assignment);
+}
+
+
+ir_visitor_status
+lower_discard_visitor::visit_leave(ir_if *ir)
+{
+ ir_discard *then_discard = find_discard(ir->then_instructions);
+ ir_discard *else_discard = find_discard(ir->else_instructions);
+
+ if (then_discard == NULL && else_discard == NULL)
+ return visit_continue;
+
+ void *mem_ctx = talloc_parent(ir);
+
+ ir_variable *temp = new(mem_ctx) ir_variable(glsl_type::bool_type,
+ "discard_cond_temp",
+ ir_var_temporary);
+ ir_assignment *temp_initializer =
+ new(mem_ctx) ir_assignment(new(mem_ctx) ir_dereference_variable(temp),
+ new(mem_ctx) ir_constant(false), NULL);
+
+ ir->insert_before(temp);
+ ir->insert_before(temp_initializer);
+
+ if (then_discard != NULL)
+ replace_discard(mem_ctx, temp, then_discard);
+
+ if (else_discard != NULL)
+ replace_discard(mem_ctx, temp, else_discard);
+
+ ir_discard *discard = then_discard != NULL ? then_discard : else_discard;
+ discard->condition = new(mem_ctx) ir_dereference_variable(temp);
+ ir->insert_after(discard);
+
+ this->progress = true;
+
+ return visit_continue;
+}
diff --git a/src/glsl/lower_instructions.cpp b/src/glsl/lower_instructions.cpp
index d460ba1a97a..a5f61f213d0 100644
--- a/src/glsl/lower_instructions.cpp
+++ b/src/glsl/lower_instructions.cpp
@@ -33,6 +33,7 @@
* - SUB_TO_ADD_NEG
* - DIV_TO_MUL_RCP
* - EXP_TO_EXP2
+ * - POW_TO_EXP2
* - LOG_TO_LOG2
* - MOD_TO_FRACT
*
@@ -61,6 +62,11 @@
* do have base 2 versions, so this pass converts exp and log to exp2
* and log2 operations.
*
+ * POW_TO_EXP2:
+ * -----------
+ * Many older GPUs don't have an x**y instruction. For these GPUs, convert
+ * x**y to 2**(y * log2(x)).
+ *
* MOD_TO_FRACT:
* -------------
* Breaks an ir_unop_mod expression down to (op1 * fract(op0 / op1))
@@ -70,7 +76,7 @@
* opportunity to do things like constant fold the (1.0 / op1) easily.
*/
-#include "main/core.h" /* for M_E */
+#include "main/core.h" /* for M_LOG2E */
#include "glsl_types.h"
#include "ir.h"
#include "ir_optimization.h"
@@ -91,6 +97,7 @@ private:
void div_to_mul_rcp(ir_expression *);
void mod_to_fract(ir_expression *);
void exp_to_exp2(ir_expression *);
+ void pow_to_exp2(ir_expression *);
void log_to_log2(ir_expression *);
};
@@ -172,7 +179,7 @@ lower_instructions_visitor::div_to_mul_rcp(ir_expression *ir)
void
lower_instructions_visitor::exp_to_exp2(ir_expression *ir)
{
- ir_constant *log2_e = new(ir) ir_constant(log2f(M_E));
+ ir_constant *log2_e = new(ir) ir_constant(float(M_LOG2E));
ir->operation = ir_unop_exp2;
ir->operands[0] = new(ir) ir_expression(ir_binop_mul, ir->operands[0]->type,
@@ -181,12 +188,26 @@ lower_instructions_visitor::exp_to_exp2(ir_expression *ir)
}
void
+lower_instructions_visitor::pow_to_exp2(ir_expression *ir)
+{
+ ir_expression *const log2_x =
+ new(ir) ir_expression(ir_unop_log2, ir->operands[0]->type,
+ ir->operands[0]);
+
+ ir->operation = ir_unop_exp2;
+ ir->operands[0] = new(ir) ir_expression(ir_binop_mul, ir->operands[1]->type,
+ ir->operands[1], log2_x);
+ ir->operands[1] = NULL;
+ this->progress = true;
+}
+
+void
lower_instructions_visitor::log_to_log2(ir_expression *ir)
{
ir->operation = ir_binop_mul;
ir->operands[0] = new(ir) ir_expression(ir_unop_log2, ir->operands[0]->type,
ir->operands[0], NULL);
- ir->operands[1] = new(ir) ir_constant(1.0f / log2f(M_E));
+ ir->operands[1] = new(ir) ir_constant(float(1.0 / M_LOG2E));
this->progress = true;
}
@@ -254,6 +275,11 @@ lower_instructions_visitor::visit_leave(ir_expression *ir)
mod_to_fract(ir);
break;
+ case ir_binop_pow:
+ if (lowering(POW_TO_EXP2))
+ pow_to_exp2(ir);
+ break;
+
default:
return visit_continue;
}
diff --git a/src/glsl/lower_jumps.cpp b/src/glsl/lower_jumps.cpp
index e1e7a5b0073..9cd15ef7366 100644
--- a/src/glsl/lower_jumps.cpp
+++ b/src/glsl/lower_jumps.cpp
@@ -23,6 +23,37 @@
/**
* \file lower_jumps.cpp
+ *
+ * This pass lowers jumps (break, continue, and return) to if/else structures.
+ *
+ * It can be asked to:
+ * 1. Pull jumps out of ifs where possible
+ * 2. Remove all "continue"s, replacing them with an "execute flag"
+ * 3. Replace all "break" with a single conditional one at the end of the loop
+ * 4. Replace all "return"s with a single return at the end of the function,
+ * for the main function and/or other functions
+ *
+ * Applying this pass gives several benefits:
+ * 1. All functions can be inlined.
+ * 2. nv40 and other pre-DX10 chips without "continue" can be supported
+ * 3. nv30 and other pre-DX10 chips with no control flow at all are better
+ * supported
+ *
+ * Continues are lowered by adding a per-loop "execute flag", initialized to
+ * true, that when cleared inhibits all execution until the end of the loop.
+ *
+ * Breaks are lowered to continues, plus setting a "break flag" that is checked
+ * at the end of the loop, and trigger the unique "break".
+ *
+ * Returns are lowered to breaks/continues, plus adding a "return flag" that
+ * causes loops to break again out of their enclosing loops until all the
+ * loops are exited: then the "execute flag" logic will ignore everything
+ * until the end of the function.
+ *
+ * Note that "continue" and "return" can also be implemented by adding
+ * a dummy loop and using break.
+ * However, this is bad for hardware with limited nesting depth, and
+ * prevents further optimization, and thus is not currently performed.
*/
#include "glsl_types.h"
@@ -36,7 +67,6 @@ enum jump_strength
strength_continue,
strength_break,
strength_return,
- strength_discard
};
struct block_record
@@ -202,8 +232,6 @@ struct ir_lower_jumps_visitor : public ir_control_flow_visitor {
virtual void visit(class ir_discard * ir)
{
- truncate_after_instruction(ir);
- this->block.min_strength = strength_discard;
}
enum jump_strength get_jump_strength(ir_instruction* ir)
@@ -217,8 +245,6 @@ struct ir_lower_jumps_visitor : public ir_control_flow_visitor {
return strength_continue;
} else if(ir->ir_type == ir_type_return)
return strength_return;
- else if(ir->ir_type == ir_type_discard)
- return strength_discard;
else
return strength_none;
}
@@ -253,9 +279,6 @@ struct ir_lower_jumps_visitor : public ir_control_flow_visitor {
else
lower = lower_sub_return;
break;
- case strength_discard:
- lower = false; /* probably nothing needs this lowered */
- break;
}
return lower;
}
@@ -313,9 +336,8 @@ retry: /* we get here if we put code after the if inside a branch */
/* FINISHME: unify returns with identical expressions */
else if(jump_strengths[0] == strength_return && this->function.signature->return_type->is_void())
ir->insert_after(new(ir) ir_return(NULL));
- /* FINISHME: unify discards */
- else
- unify = false;
+ else
+ unify = false;
if(unify) {
jumps[0]->remove();
diff --git a/src/glsl/opt_discard_simplification.cpp b/src/glsl/opt_discard_simplification.cpp
new file mode 100644
index 00000000000..0e577c478a9
--- /dev/null
+++ b/src/glsl/opt_discard_simplification.cpp
@@ -0,0 +1,180 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file opt_discard_simplification.cpp
+ *
+ * This pass simplifies if-statements and loops containing unconditional
+ * discards.
+ *
+ * Case 1: Both branches contain unconditional discards:
+ * -----------------------------------------------------
+ *
+ * if (cond) {
+ * s1;
+ * discard;
+ * s2;
+ * } else {
+ * s3;
+ * discard;
+ * s4;
+ * }
+ *
+ * becomes:
+ *
+ * discard
+ *
+ * Case 2: The "then" clause contains an unconditional discard:
+ * ------------------------------------------------------------
+ *
+ * if (cond) {
+ * s1;
+ * discard;
+ * s2;
+ * } else {
+ * s3;
+ * }
+ *
+ * becomes:
+ *
+ * if (cond) {
+ * discard;
+ * } else {
+ * s3;
+ * }
+ *
+ * Case 3: The "else" clause contains an unconditional discard:
+ * ------------------------------------------------------------
+ *
+ * if (cond) {
+ * s1;
+ * } else {
+ * s2;
+ * discard;
+ * s3;
+ * }
+ *
+ * becomes:
+ *
+ * if (cond) {
+ * s1;
+ * } else {
+ * discard;
+ * }
+ */
+
+#include "glsl_types.h"
+#include "ir.h"
+
+class discard_simplifier : public ir_hierarchical_visitor {
+public:
+ discard_simplifier()
+ {
+ this->progress = false;
+ }
+
+ ir_visitor_status visit_enter(ir_if *);
+ ir_visitor_status visit_enter(ir_loop *);
+
+ bool progress;
+};
+
+static ir_discard *
+find_unconditional_discard(exec_list &instructions)
+{
+ foreach_list(n, &instructions) {
+ ir_discard *ir = ((ir_instruction *) n)->as_discard();
+ if (ir != NULL && ir->condition == NULL)
+ return ir;
+ }
+ return NULL;
+}
+
+static bool
+is_only_instruction(ir_discard *discard)
+{
+ return (discard->prev->is_head_sentinel() &&
+ discard->next->is_tail_sentinel());
+}
+
+ir_visitor_status
+discard_simplifier::visit_enter(ir_if *ir)
+{
+ ir_discard *then_discard = find_unconditional_discard(ir->then_instructions);
+ ir_discard *else_discard = find_unconditional_discard(ir->else_instructions);
+
+ if (then_discard == NULL && else_discard == NULL)
+ return visit_continue;
+
+ /* If both branches result in discard, replace whole if with discard. */
+ if (then_discard != NULL && else_discard != NULL) {
+ this->progress = true;
+ ir->replace_with(then_discard);
+ return visit_continue_with_parent;
+ }
+
+ /* Otherwise, one branch has a discard. */
+ if (then_discard != NULL && !is_only_instruction(then_discard)) {
+ this->progress = true;
+ ir->then_instructions.make_empty();
+ ir->then_instructions.push_tail(then_discard);
+ } else if (else_discard != NULL && !is_only_instruction(else_discard)) {
+ this->progress = true;
+ ir->else_instructions.make_empty();
+ ir->else_instructions.push_tail(else_discard);
+ }
+
+ visit_list_elements(this, &ir->then_instructions);
+ return visit_continue_with_parent;
+}
+
+ir_visitor_status
+discard_simplifier::visit_enter(ir_loop *ir)
+{
+ ir_discard *discard = find_unconditional_discard(ir->body_instructions);
+
+ if (discard) {
+ ir->replace_with(discard);
+ return visit_continue_with_parent;
+ }
+
+ return visit_continue;
+}
+
+bool
+do_discard_simplification(exec_list *instructions)
+{
+ /* Look for a top-level unconditional discard */
+ ir_discard *discard = find_unconditional_discard(*instructions);
+ if (discard != NULL) {
+ instructions->make_empty();
+ instructions->push_tail(discard);
+ return true;
+ }
+
+ discard_simplifier v;
+
+ visit_list_elements(&v, instructions);
+
+ return v.progress;
+}
diff --git a/src/mesa/drivers/dri/i915/i915_context.c b/src/mesa/drivers/dri/i915/i915_context.c
index f943f81dd05..f32f3cf6020 100644
--- a/src/mesa/drivers/dri/i915/i915_context.c
+++ b/src/mesa/drivers/dri/i915/i915_context.c
@@ -176,6 +176,7 @@ i915CreateContext(int api,
ctx->ShaderCompilerOptions[MESA_SHADER_VERTEX].EmitCondCodes = GL_TRUE;
ctx->ShaderCompilerOptions[MESA_SHADER_FRAGMENT].EmitNoIfs = GL_TRUE;
ctx->ShaderCompilerOptions[MESA_SHADER_FRAGMENT].EmitNoNoise = GL_TRUE;
+ ctx->ShaderCompilerOptions[MESA_SHADER_FRAGMENT].EmitNoPow = GL_TRUE;
ctx->Const.MaxDrawBuffers = 1;
diff --git a/src/mesa/drivers/dri/i915/i915_fragprog.c b/src/mesa/drivers/dri/i915/i915_fragprog.c
index c00ee415b6b..7a9fb7f088b 100644
--- a/src/mesa/drivers/dri/i915/i915_fragprog.c
+++ b/src/mesa/drivers/dri/i915/i915_fragprog.c
@@ -569,10 +569,14 @@ upload_program(struct i915_fragment_program *p)
if (inst->DstReg.CondMask == COND_TR) {
tmp = i915_get_utemp(p);
+ /* The KIL instruction discards the fragment if any component of
+ * the source is < 0. Emit an immediate operand of {-1}.xywz.
+ */
i915_emit_texld(p, get_live_regs(p, inst),
tmp, A0_DEST_CHANNEL_ALL,
0, /* use a dummy dest reg */
- swizzle(tmp, ONE, ONE, ONE, ONE), /* always */
+ negate(swizzle(tmp, ONE, ONE, ONE, ONE),
+ 1, 1, 1, 1),
T0_TEXKILL);
} else {
p->error = 1;
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index cb0a8b96c9c..28549f2574a 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -122,9 +122,6 @@ GLboolean brwCreateContext( int api,
(i == MESA_SHADER_FRAGMENT);
ctx->ShaderCompilerOptions[i].EmitNoIndirectTemp =
(i == MESA_SHADER_FRAGMENT);
-
- if (intel->gen == 6)
- ctx->ShaderCompilerOptions[i].EmitNoIfs = (i == MESA_SHADER_VERTEX);
}
ctx->Const.VertexProgram.MaxNativeInstructions = (16 * 1024);
diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c
index 962c04128b8..6b61f7af15d 100644
--- a/src/mesa/drivers/dri/i965/brw_disasm.c
+++ b/src/mesa/drivers/dri/i965/brw_disasm.c
@@ -899,7 +899,8 @@ int brw_disasm (FILE *file, struct brw_instruction *inst, int gen)
err |= dest (file, inst);
} else if (gen >= 6 && (inst->header.opcode == BRW_OPCODE_IF ||
inst->header.opcode == BRW_OPCODE_ELSE ||
- inst->header.opcode == BRW_OPCODE_ENDIF)) {
+ inst->header.opcode == BRW_OPCODE_ENDIF ||
+ inst->header.opcode == BRW_OPCODE_WHILE)) {
format (file, " %d", inst->bits1.branch_gen6.jump_count);
}
diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h
index b4538e6e8a7..a4904b7098a 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -954,6 +954,8 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p,
struct brw_instruction *patch_insn);
struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count);
+struct brw_instruction *brw_CONT_gen6(struct brw_compile *p,
+ struct brw_instruction *do_insn);
struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count);
/* Forward jumps:
*/
@@ -1009,6 +1011,7 @@ void brw_math_invert( struct brw_compile *p,
void brw_set_src1( struct brw_instruction *insn,
struct brw_reg reg );
+void brw_set_uip_jip(struct brw_compile *p);
/* brw_optimize.c */
void brw_optimize(struct brw_compile *p);
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index 9cb941dacfd..945f50d1106 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -1029,16 +1029,44 @@ void brw_ENDIF(struct brw_compile *p,
struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count)
{
+ struct intel_context *intel = &p->brw->intel;
struct brw_instruction *insn;
+
insn = next_insn(p, BRW_OPCODE_BREAK);
+ if (intel->gen >= 6) {
+ brw_set_dest(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ brw_set_src1(insn, brw_imm_d(0x0));
+ } else {
+ brw_set_dest(insn, brw_ip_reg());
+ brw_set_src0(insn, brw_ip_reg());
+ brw_set_src1(insn, brw_imm_d(0x0));
+ insn->bits3.if_else.pad0 = 0;
+ insn->bits3.if_else.pop_count = pop_count;
+ }
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.execution_size = BRW_EXECUTE_8;
+
+ return insn;
+}
+
+struct brw_instruction *brw_CONT_gen6(struct brw_compile *p,
+ struct brw_instruction *do_insn)
+{
+ struct brw_instruction *insn;
+ int br = 2;
+
+ insn = next_insn(p, BRW_OPCODE_CONTINUE);
+ brw_set_dest(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
brw_set_dest(insn, brw_ip_reg());
brw_set_src0(insn, brw_ip_reg());
brw_set_src1(insn, brw_imm_d(0x0));
+
+ insn->bits3.break_cont.uip = br * (do_insn - insn);
+
insn->header.compression_control = BRW_COMPRESSION_NONE;
insn->header.execution_size = BRW_EXECUTE_8;
- /* insn->header.mask_control = BRW_MASK_DISABLE; */
- insn->bits3.if_else.pad0 = 0;
- insn->bits3.if_else.pop_count = pop_count;
return insn;
}
@@ -1058,10 +1086,26 @@ struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count)
}
/* DO/WHILE loop:
+ *
+ * The DO/WHILE is just an unterminated loop -- break or continue are
+ * used for control within the loop. We have a few ways they can be
+ * done.
+ *
+ * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
+ * jip and no DO instruction.
+ *
+ * For non-uniform control flow pre-gen6, there's a DO instruction to
+ * push the mask, and a WHILE to jump back, and BREAK to get out and
+ * pop the mask.
+ *
+ * For gen6, there's no more mask stack, so no need for DO. WHILE
+ * just points back to the first instruction of the loop.
*/
struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
{
- if (p->single_program_flow) {
+ struct intel_context *intel = &p->brw->intel;
+
+ if (intel->gen >= 6 || p->single_program_flow) {
return &p->store[p->nr_insn];
} else {
struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
@@ -1094,34 +1138,42 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p,
if (intel->gen >= 5)
br = 2;
- if (p->single_program_flow)
- insn = next_insn(p, BRW_OPCODE_ADD);
- else
+ if (intel->gen >= 6) {
insn = next_insn(p, BRW_OPCODE_WHILE);
- brw_set_dest(insn, brw_ip_reg());
- brw_set_src0(insn, brw_ip_reg());
- brw_set_src1(insn, brw_imm_d(0x0));
+ brw_set_dest(insn, brw_imm_w(0));
+ insn->bits1.branch_gen6.jump_count = br * (do_insn - insn);
+ brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ brw_set_src1(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
- insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.execution_size = do_insn->header.execution_size;
+ assert(insn->header.execution_size == BRW_EXECUTE_8);
+ } else {
+ if (p->single_program_flow) {
+ insn = next_insn(p, BRW_OPCODE_ADD);
- if (p->single_program_flow) {
- insn->header.execution_size = BRW_EXECUTE_1;
+ brw_set_dest(insn, brw_ip_reg());
+ brw_set_src0(insn, brw_ip_reg());
+ brw_set_src1(insn, brw_imm_d((do_insn - insn) * 16));
+ insn->header.execution_size = BRW_EXECUTE_1;
+ } else {
+ insn = next_insn(p, BRW_OPCODE_WHILE);
- insn->bits3.d = (do_insn - insn) * 16;
- } else {
- insn->header.execution_size = do_insn->header.execution_size;
+ assert(do_insn->header.opcode == BRW_OPCODE_DO);
- assert(do_insn->header.opcode == BRW_OPCODE_DO);
- insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
- insn->bits3.if_else.pop_count = 0;
- insn->bits3.if_else.pad0 = 0;
- }
+ brw_set_dest(insn, brw_ip_reg());
+ brw_set_src0(insn, brw_ip_reg());
+ brw_set_src1(insn, brw_imm_d(0));
-/* insn->header.mask_control = BRW_MASK_ENABLE; */
+ insn->header.execution_size = do_insn->header.execution_size;
+ insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
+ insn->bits3.if_else.pop_count = 0;
+ insn->bits3.if_else.pad0 = 0;
+ }
+ }
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ p->current->header.predicate_control = BRW_PREDICATE_NONE;
- /* insn->header.mask_control = BRW_MASK_DISABLE; */
- p->current->header.predicate_control = BRW_PREDICATE_NONE;
return insn;
}
@@ -1989,6 +2041,80 @@ void brw_urb_WRITE(struct brw_compile *p,
swizzle);
}
+static int
+brw_find_next_block_end(struct brw_compile *p, int start)
+{
+ int ip;
+
+ for (ip = start + 1; ip < p->nr_insn; ip++) {
+ struct brw_instruction *insn = &p->store[ip];
+
+ switch (insn->header.opcode) {
+ case BRW_OPCODE_ENDIF:
+ case BRW_OPCODE_ELSE:
+ case BRW_OPCODE_WHILE:
+ return ip;
+ }
+ }
+ assert(!"not reached");
+ return start + 1;
+}
+
+/* There is no DO instruction on gen6, so to find the end of the loop
+ * we have to see if the loop is jumping back before our start
+ * instruction.
+ */
+static int
+brw_find_loop_end(struct brw_compile *p, int start)
+{
+ int ip;
+ int br = 2;
+
+ for (ip = start + 1; ip < p->nr_insn; ip++) {
+ struct brw_instruction *insn = &p->store[ip];
+
+ if (insn->header.opcode == BRW_OPCODE_WHILE) {
+ if (ip + insn->bits1.branch_gen6.jump_count / br < start)
+ return ip;
+ }
+ }
+ assert(!"not reached");
+ return start + 1;
+}
+
+/* After program generation, go back and update the UIP and JIP of
+ * BREAK and CONT instructions to their correct locations.
+ */
+void
+brw_set_uip_jip(struct brw_compile *p)
+{
+ struct intel_context *intel = &p->brw->intel;
+ int ip;
+ int br = 2;
+
+ if (intel->gen < 6)
+ return;
+
+ for (ip = 0; ip < p->nr_insn; ip++) {
+ struct brw_instruction *insn = &p->store[ip];
+
+ switch (insn->header.opcode) {
+ case BRW_OPCODE_BREAK:
+ insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip);
+ insn->bits3.break_cont.uip = br * (brw_find_loop_end(p, ip) - ip + 1);
+ break;
+ case BRW_OPCODE_CONTINUE:
+ /* JIP is set at CONTINUE emit time, since that's when we
+ * know where the start of the loop is.
+ */
+ insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip);
+ assert(insn->bits3.break_cont.uip != 0);
+ assert(insn->bits3.break_cont.jip != 0);
+ break;
+ }
+ }
+}
+
void brw_ff_sync(struct brw_compile *p,
struct brw_reg dest,
GLuint msg_reg_nr,
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 7a8e9812257..ee9ae160bdb 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -933,6 +933,10 @@ fs_visitor::visit(ir_expression *ir)
assert(!"not reached: should be handled by lower_noise");
break;
+ case ir_quadop_vector:
+ assert(!"not reached: should be handled by lower_quadop_vector");
+ break;
+
case ir_unop_sqrt:
emit_math(FS_OPCODE_SQRT, this->result, op[0]);
break;
@@ -3375,10 +3379,6 @@ fs_visitor::generate_code()
break;
case BRW_OPCODE_DO:
- /* FINISHME: We need to write the loop instruction support still. */
- if (intel->gen >= 6)
- this->fail = true;
-
loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8);
if_depth_in_loop[loop_stack_depth] = 0;
break;
@@ -3388,7 +3388,11 @@ fs_visitor::generate_code()
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
break;
case BRW_OPCODE_CONTINUE:
- brw_CONT(p, if_depth_in_loop[loop_stack_depth]);
+ /* FINISHME: We need to write the loop instruction support still. */
+ if (intel->gen >= 6)
+ brw_CONT_gen6(p, loop_stack[loop_stack_depth - 1]);
+ else
+ brw_CONT(p, if_depth_in_loop[loop_stack_depth]);
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
break;
@@ -3402,16 +3406,18 @@ fs_visitor::generate_code()
assert(loop_stack_depth > 0);
loop_stack_depth--;
inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]);
- /* patch all the BREAK/CONT instructions from last BGNLOOP */
- while (inst0 > loop_stack[loop_stack_depth]) {
- inst0--;
- if (inst0->header.opcode == BRW_OPCODE_BREAK &&
- inst0->bits3.if_else.jump_count == 0) {
- inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
+ if (intel->gen < 6) {
+ /* patch all the BREAK/CONT instructions from last BGNLOOP */
+ while (inst0 > loop_stack[loop_stack_depth]) {
+ inst0--;
+ if (inst0->header.opcode == BRW_OPCODE_BREAK &&
+ inst0->bits3.if_else.jump_count == 0) {
+ inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
}
- else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
- inst0->bits3.if_else.jump_count == 0) {
- inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
+ else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
+ inst0->bits3.if_else.jump_count == 0) {
+ inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
+ }
}
}
}
@@ -3488,6 +3494,26 @@ fs_visitor::generate_code()
last_native_inst = p->nr_insn;
}
+
+ brw_set_uip_jip(p);
+
+ /* OK, while the INTEL_DEBUG=wm above is very nice for debugging FS
+ * emit issues, it doesn't get the jump distances into the output,
+ * which is often something we want to debug. So this is here in
+ * case you're doing that.
+ */
+ if (0) {
+ if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
+ for (unsigned int i = 0; i < p->nr_insn; i++) {
+ printf("0x%08x 0x%08x 0x%08x 0x%08x ",
+ ((uint32_t *)&p->store[i])[3],
+ ((uint32_t *)&p->store[i])[2],
+ ((uint32_t *)&p->store[i])[1],
+ ((uint32_t *)&p->store[i])[0]);
+ brw_disasm(stdout, &p->store[i], intel->gen);
+ }
+ }
+ }
}
GLboolean
diff --git a/src/mesa/drivers/dri/i965/brw_structs.h b/src/mesa/drivers/dri/i965/brw_structs.h
index 8ce9af9c4fe..8f97bd136fd 100644
--- a/src/mesa/drivers/dri/i965/brw_structs.h
+++ b/src/mesa/drivers/dri/i965/brw_structs.h
@@ -1539,6 +1539,21 @@ struct brw_instruction
GLuint pad0:12;
} if_else;
+ struct
+ {
+ /* Signed jump distance to the ip to jump to if all channels
+ * are disabled after the break or continue. It should point
+ * to the end of the innermost control flow block, as that's
+ * where some channel could get re-enabled.
+ */
+ int jip:16;
+
+ /* Signed jump distance to the location to resume execution
+ * of this channel if it's enabled for the break or continue.
+ */
+ int uip:16;
+ } break_cont;
+
struct {
GLuint function:4;
GLuint int_type:1;
diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c
index b13e0c2a2c8..407358f498f 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c
@@ -2032,35 +2032,42 @@ void brw_vs_emit(struct brw_vs_compile *c )
break;
case OPCODE_CONT:
brw_set_predicate_control(p, get_predicate(inst));
- brw_CONT(p, if_depth_in_loop[loop_depth]);
+ if (intel->gen >= 6) {
+ brw_CONT_gen6(p, loop_inst[loop_depth - 1]);
+ } else {
+ brw_CONT(p, if_depth_in_loop[loop_depth]);
+ }
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
break;
- case OPCODE_ENDLOOP:
- {
- clear_current_const(c);
- struct brw_instruction *inst0, *inst1;
- GLuint br = 1;
-
- loop_depth--;
-
- if (intel->gen == 5)
- br = 2;
-
- inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]);
- /* patch all the BREAK/CONT instructions from last BEGINLOOP */
- while (inst0 > loop_inst[loop_depth]) {
- inst0--;
- if (inst0->header.opcode == BRW_OPCODE_BREAK &&
+
+ case OPCODE_ENDLOOP: {
+ clear_current_const(c);
+ struct brw_instruction *inst0, *inst1;
+ GLuint br = 1;
+
+ loop_depth--;
+
+ if (intel->gen == 5)
+ br = 2;
+
+ inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]);
+
+ if (intel->gen < 6) {
+ /* patch all the BREAK/CONT instructions from last BEGINLOOP */
+ while (inst0 > loop_inst[loop_depth]) {
+ inst0--;
+ if (inst0->header.opcode == BRW_OPCODE_BREAK &&
inst0->bits3.if_else.jump_count == 0) {
- inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
- }
- else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
- inst0->bits3.if_else.jump_count == 0) {
- inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
- }
- }
- }
+ inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
+ } else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
+ inst0->bits3.if_else.jump_count == 0) {
+ inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
+ }
+ }
+ }
+ }
break;
+
case OPCODE_BRA:
brw_set_predicate_control(p, get_predicate(inst));
brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
@@ -2151,6 +2158,7 @@ void brw_vs_emit(struct brw_vs_compile *c )
}
brw_resolve_cals(p);
+ brw_set_uip_jip(p);
brw_optimize(p);
diff --git a/src/mesa/drivers/dri/i965/gen6_sf_state.c b/src/mesa/drivers/dri/i965/gen6_sf_state.c
index 471067e8f02..06ac5d49a0f 100644
--- a/src/mesa/drivers/dri/i965/gen6_sf_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c
@@ -99,6 +99,48 @@ upload_sf_state(struct brw_context *brw)
if (ctx->Polygon.OffsetFill)
dw2 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID;
+ if (ctx->Polygon.OffsetLine)
+ dw2 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME;
+
+ if (ctx->Polygon.OffsetPoint)
+ dw2 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT;
+
+ switch (ctx->Polygon.FrontMode) {
+ case GL_FILL:
+ dw2 |= GEN6_SF_FRONT_SOLID;
+ break;
+
+ case GL_LINE:
+ dw2 |= GEN6_SF_FRONT_WIREFRAME;
+ break;
+
+ case GL_POINT:
+ dw2 |= GEN6_SF_FRONT_POINT;
+ break;
+
+ default:
+ assert(0);
+ break;
+ }
+
+ switch (ctx->Polygon.BackMode) {
+ case GL_FILL:
+ dw2 |= GEN6_SF_BACK_SOLID;
+ break;
+
+ case GL_LINE:
+ dw2 |= GEN6_SF_BACK_WIREFRAME;
+ break;
+
+ case GL_POINT:
+ dw2 |= GEN6_SF_BACK_POINT;
+ break;
+
+ default:
+ assert(0);
+ break;
+ }
+
/* _NEW_SCISSOR */
if (ctx->Scissor.Enabled)
dw3 |= GEN6_SF_SCISSOR_ENABLE;
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c
index da495a3afaa..113b27632a2 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c
@@ -67,6 +67,13 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = {
.IsComponentwise = 1
},
{
+ .Opcode = RC_OPCODE_CLAMP,
+ .Name = "CLAMP",
+ .NumSrcRegs = 3,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
.Opcode = RC_OPCODE_CMP,
.Name = "CMP",
.NumSrcRegs = 3,
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h
index d3f639c8701..7e666101276 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h
@@ -50,6 +50,9 @@ typedef enum {
/** vec4 instruction: dst.c = ceil(src0.c) */
RC_OPCODE_CEIL,
+ /** vec4 instruction: dst.c = clamp(src0.c, src1.c, src2.c) */
+ RC_OPCODE_CLAMP,
+
/** vec4 instruction: dst.c = src0.c < 0.0 ? src1.c : src2.c */
RC_OPCODE_CMP,
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
index 106e03495d8..01c2e74e7b3 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
@@ -217,6 +217,22 @@ static void transform_CEIL(struct radeon_compiler* c,
rc_remove_instruction(inst);
}
+static void transform_CLAMP(struct radeon_compiler *c,
+ struct rc_instruction *inst)
+{
+ /* CLAMP dst, src, min, max
+ * into:
+ * MIN tmp, src, max
+ * MAX dst, tmp, min
+ */
+ int tempreg = rc_find_free_temporary(c);
+ emit2(c, inst->Prev, RC_OPCODE_MIN, 0, dstreg(RC_FILE_TEMPORARY, tempreg),
+ inst->U.I.SrcReg[0], inst->U.I.SrcReg[2]);
+ emit2(c, inst->Prev, RC_OPCODE_MAX, inst->U.I.SaturateMode, inst->U.I.DstReg,
+ srcreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[1]);
+ rc_remove_instruction(inst);
+}
+
static void transform_DP2(struct radeon_compiler* c,
struct rc_instruction* inst)
{
@@ -554,6 +570,7 @@ int radeonTransformALU(
switch(inst->U.I.Opcode) {
case RC_OPCODE_ABS: transform_ABS(c, inst); return 1;
case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1;
+ case RC_OPCODE_CLAMP: transform_CLAMP(c, inst); return 1;
case RC_OPCODE_DP2: transform_DP2(c, inst); return 1;
case RC_OPCODE_DPH: transform_DPH(c, inst); return 1;
case RC_OPCODE_DST: transform_DST(c, inst); return 1;
@@ -782,6 +799,7 @@ int r300_transform_vertex_alu(
switch(inst->U.I.Opcode) {
case RC_OPCODE_ABS: transform_r300_vertex_ABS(c, inst); return 1;
case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1;
+ case RC_OPCODE_CLAMP: transform_CLAMP(c, inst); return 1;
case RC_OPCODE_CMP: transform_r300_vertex_CMP(c, inst); return 1;
case RC_OPCODE_DP2: transform_r300_vertex_DP2(c, inst); return 1;
case RC_OPCODE_DP3: transform_r300_vertex_DP3(c, inst); return 1;
diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c
index 2bf24096a0d..1fa559cec1a 100644
--- a/src/mesa/drivers/dri/r600/r700_assembler.c
+++ b/src/mesa/drivers/dri/r600/r700_assembler.c
@@ -3334,7 +3334,14 @@ GLboolean assemble_CMP(r700_AssemblerBase *pAsm)
return GL_FALSE;
}
- pAsm->D.dst.opcode = SQ_OP3_INST_CNDGE;
+ if(8 == pAsm->unAsic)
+ {
+ pAsm->D.dst.opcode = EG_OP3_INST_CNDGE;
+ }
+ else
+ {
+ pAsm->D.dst.opcode = SQ_OP3_INST_CNDGE;
+ }
pAsm->D.dst.op3 = 1;
tmp = (-1);
@@ -3416,8 +3423,14 @@ GLboolean assemble_TRIG(r700_AssemblerBase *pAsm, BITS opcode)
checkop1(pAsm);
tmp = gethelpr(pAsm);
-
- pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
+ if(8 == pAsm->unAsic)
+ {
+ pAsm->D.dst.opcode = EG_OP3_INST_MULADD;
+ }
+ else
+ {
+ pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
+ }
pAsm->D.dst.op3 = 1;
setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
@@ -3457,7 +3470,14 @@ GLboolean assemble_TRIG(r700_AssemblerBase *pAsm, BITS opcode)
{
return GL_FALSE;
}
- pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
+ if(8 == pAsm->unAsic)
+ {
+ pAsm->D.dst.opcode = EG_OP3_INST_MULADD;
+ }
+ else
+ {
+ pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
+ }
pAsm->D.dst.op3 = 1;
setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
@@ -4742,7 +4762,14 @@ GLboolean assemble_SCS(r700_AssemblerBase *pAsm)
tmp = gethelpr(pAsm);
- pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
+ if(8 == pAsm->unAsic)
+ {
+ pAsm->D.dst.opcode = EG_OP3_INST_MULADD;
+ }
+ else
+ {
+ pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
+ }
pAsm->D.dst.op3 = 1;
setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
@@ -4782,7 +4809,14 @@ GLboolean assemble_SCS(r700_AssemblerBase *pAsm)
{
return GL_FALSE;
}
- pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
+ if(8 == pAsm->unAsic)
+ {
+ pAsm->D.dst.opcode = EG_OP3_INST_MULADD;
+ }
+ else
+ {
+ pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
+ }
pAsm->D.dst.op3 = 1;
setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
@@ -5010,7 +5044,14 @@ GLboolean assemble_SSG(r700_AssemblerBase *pAsm)
GLuint tmp = gethelpr(pAsm);
/* tmp = (src > 0 ? 1 : src) */
- pAsm->D.dst.opcode = SQ_OP3_INST_CNDGT;
+ if(8 == pAsm->unAsic)
+ {
+ pAsm->D.dst.opcode = EG_OP3_INST_CNDGT;
+ }
+ else
+ {
+ pAsm->D.dst.opcode = SQ_OP3_INST_CNDGT;
+ }
pAsm->D.dst.op3 = 1;
pAsm->D.dst.rtype = DST_REG_TEMPORARY;
pAsm->D.dst.reg = tmp;
@@ -5033,7 +5074,14 @@ GLboolean assemble_SSG(r700_AssemblerBase *pAsm)
}
/* dst = (-tmp > 0 ? -1 : tmp) */
- pAsm->D.dst.opcode = SQ_OP3_INST_CNDGT;
+ if(8 == pAsm->unAsic)
+ {
+ pAsm->D.dst.opcode = EG_OP3_INST_CNDGT;
+ }
+ else
+ {
+ pAsm->D.dst.opcode = SQ_OP3_INST_CNDGT;
+ }
pAsm->D.dst.op3 = 1;
if( GL_FALSE == assemble_dst(pAsm) )
diff --git a/src/mesa/main/compiler.h b/src/mesa/main/compiler.h
index 800eb839005..5557a3b5cb5 100644
--- a/src/mesa/main/compiler.h
+++ b/src/mesa/main/compiler.h
@@ -358,6 +358,10 @@ static INLINE GLuint CPU_TO_LE32(GLuint x)
#define M_E (2.7182818284590452354)
#endif
+#ifndef M_LOG2E
+#define M_LOG2E (1.4426950408889634074)
+#endif
+
#ifndef ONE_DIV_LN2
#define ONE_DIV_LN2 (1.442695040888963456)
#endif
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 80c20e09d9a..82495714f21 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -2197,6 +2197,7 @@ struct gl_shader_compiler_options
GLboolean EmitNoCont; /**< Emit CONT opcode? */
GLboolean EmitNoMainReturn; /**< Emit CONT/RET opcodes? */
GLboolean EmitNoNoise; /**< Emit NOISE opcodes? */
+ GLboolean EmitNoPow; /**< Emit POW opcodes? */
/**
* \name Forms of indirect addressing the driver cannot do.
diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index 8f75c82c3eb..b274a961b28 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -2166,9 +2166,14 @@ ir_to_mesa_visitor::visit(ir_discard *ir)
{
struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog;
- assert(ir->condition == NULL); /* FINISHME */
+ if (ir->condition) {
+ ir->condition->accept(this);
+ this->result.negate = ~this->result.negate;
+ ir_to_mesa_emit_op1(ir, OPCODE_KIL, ir_to_mesa_undef_dst, this->result);
+ } else {
+ ir_to_mesa_emit_op0(ir, OPCODE_KIL_NV);
+ }
- ir_to_mesa_emit_op0(ir, OPCODE_KIL_NV);
fp->UsesKill = GL_TRUE;
}
@@ -2844,8 +2849,9 @@ _mesa_ir_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
/* Lowering */
do_mat_op_to_vec(ir);
- lower_instructions(ir, MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2
- | LOG_TO_LOG2);
+ lower_instructions(ir, (MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2
+ | LOG_TO_LOG2
+ | ((options->EmitNoPow) ? POW_TO_EXP2 : 0)));
progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress;
@@ -2853,8 +2859,10 @@ _mesa_ir_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
progress = lower_quadop_vector(ir, true) || progress;
- if (options->EmitNoIfs)
+ if (options->EmitNoIfs) {
+ progress = lower_discard(ir) || progress;
progress = do_if_to_cond_assign(ir) || progress;
+ }
if (options->EmitNoNoise)
progress = lower_noise(ir) || progress;