summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuc Verhaegen <libv@skynet.be>2010-03-14 01:38:43 +0100
committerLuc Verhaegen <libv@skynet.be>2010-03-14 01:38:43 +0100
commitc8335894f829274c38ab5281234c276f0803d65d (patch)
treec1a9664be1d821438fb72e9ffae814e48fd4a8f0
parent7a3ced9ea22443dcdc83b0f2e180bb7cbe8aa77c (diff)
Import i915 and i965 dri drivers from mesa 7.3.0.7.3.0
-rw-r--r--configure.ac9
-rw-r--r--i915/Makefile.am8
-rw-r--r--i915/i830_context.c5
-rw-r--r--i915/i830_context.h8
-rw-r--r--i915/i830_metaops.c9
-rw-r--r--i915/i830_reg.h8
-rw-r--r--i915/i830_state.c10
-rw-r--r--i915/i830_tex.c22
-rw-r--r--i915/i830_texblend.c18
-rw-r--r--i915/i830_texstate.c7
-rw-r--r--i915/i830_vtbl.c82
-rw-r--r--i915/i915_context.c9
-rw-r--r--i915/i915_context.h11
-rw-r--r--i915/i915_debug.c2
-rw-r--r--i915/i915_fragprog.c32
-rw-r--r--i915/i915_metaops.c9
-rw-r--r--i915/i915_program.c44
-rw-r--r--i915/i915_reg.h118
-rw-r--r--i915/i915_state.c12
-rw-r--r--i915/i915_tex.c22
-rw-r--r--i915/i915_tex_layout.c2
-rw-r--r--i915/i915_texstate.c33
-rw-r--r--i915/i915_vtbl.c102
-rw-r--r--i915/intel_pixel_read.c26
-rw-r--r--i915/intel_render.c44
-rw-r--r--i915/intel_state.c14
-rw-r--r--i915/intel_tris.c248
-rw-r--r--i915/intel_tris.h16
-rw-r--r--i965/Makefile.am9
-rw-r--r--i965/brw_cc.c23
-rw-r--r--i965/brw_clip.c10
-rw-r--r--i965/brw_clip_line.c10
-rw-r--r--i965/brw_clip_point.c8
-rw-r--r--i965/brw_clip_state.c37
-rw-r--r--i965/brw_clip_tri.c10
-rw-r--r--i965/brw_clip_unfilled.c8
-rw-r--r--i965/brw_clip_util.c8
-rw-r--r--i965/brw_context.c31
-rw-r--r--i965/brw_context.h87
-rw-r--r--i965/brw_curbe.c50
-rw-r--r--i965/brw_defines.h71
-rw-r--r--i965/brw_draw.c175
-rw-r--r--i965/brw_draw.h26
-rw-r--r--i965/brw_draw_upload.c152
-rw-r--r--i965/brw_eu.h93
-rw-r--r--i965/brw_eu_debug.c4
-rw-r--r--i965/brw_eu_emit.c25
-rw-r--r--i965/brw_fallback.c23
-rw-r--r--i965/brw_fallback.h2
-rw-r--r--i965/brw_gs.c13
-rw-r--r--i965/brw_gs_emit.c6
-rw-r--r--i965/brw_gs_state.c15
-rw-r--r--i965/brw_metaops.c6
-rw-r--r--i965/brw_misc_state.c138
-rw-r--r--i965/brw_program.c8
-rw-r--r--i965/brw_queryobj.c259
-rw-r--r--i965/brw_sf.c20
-rw-r--r--i965/brw_sf_emit.c6
-rw-r--r--i965/brw_sf_state.c68
-rw-r--r--i965/brw_state.h24
-rw-r--r--i965/brw_state_batch.c4
-rw-r--r--i965/brw_state_cache.c12
-rw-r--r--i965/brw_state_dump.c2
-rw-r--r--i965/brw_state_upload.c187
-rw-r--r--i965/brw_structs.h4
-rw-r--r--i965/brw_tex.c20
-rw-r--r--i965/brw_tex_layout.c2
-rw-r--r--i965/brw_urb.c66
-rw-r--r--i965/brw_util.c2
-rw-r--r--i965/brw_util.h2
-rw-r--r--i965/brw_vs.c3
-rw-r--r--i965/brw_vs.h4
-rw-r--r--i965/brw_vs_constval.c5
-rw-r--r--i965/brw_vs_emit.c140
-rw-r--r--i965/brw_vs_state.c31
-rw-r--r--i965/brw_vs_tnl.c1709
-rw-r--r--i965/brw_vtbl.c61
-rw-r--r--i965/brw_wm.c66
-rw-r--r--i965/brw_wm.h7
-rw-r--r--i965/brw_wm_debug.c4
-rw-r--r--i965/brw_wm_emit.c89
-rw-r--r--i965/brw_wm_fp.c80
-rw-r--r--i965/brw_wm_glsl.c1186
-rw-r--r--i965/brw_wm_iz.c66
-rw-r--r--i965/brw_wm_sampler_state.c47
-rw-r--r--i965/brw_wm_state.c58
-rw-r--r--i965/brw_wm_surface_state.c146
-rw-r--r--i965/intel_state.c12
-rw-r--r--shared/intel_batchbuffer.c149
-rw-r--r--shared/intel_batchbuffer.h65
-rw-r--r--shared/intel_blit.c223
-rw-r--r--shared/intel_blit.h10
-rw-r--r--shared/intel_buffer_objects.c11
-rw-r--r--shared/intel_buffer_objects.h2
-rw-r--r--shared/intel_buffers.c455
-rw-r--r--shared/intel_buffers.h9
-rw-r--r--shared/intel_bufmgr_ttm.c1122
-rw-r--r--shared/intel_bufmgr_ttm.h28
-rw-r--r--shared/intel_chipset.h6
-rw-r--r--shared/intel_context.c596
-rw-r--r--shared/intel_context.h61
-rw-r--r--shared/intel_decode.c783
-rw-r--r--shared/intel_depthstencil.c111
-rw-r--r--shared/intel_fbo.c101
-rw-r--r--shared/intel_fbo.h34
-rw-r--r--shared/intel_ioctl.c223
-rw-r--r--shared/intel_ioctl.h46
-rw-r--r--shared/intel_mipmap_tree.c15
-rw-r--r--shared/intel_pixel.c180
-rw-r--r--shared/intel_pixel.h11
-rw-r--r--shared/intel_pixel_bitmap.c334
-rw-r--r--shared/intel_pixel_copy.c154
-rw-r--r--shared/intel_pixel_draw.c579
-rw-r--r--shared/intel_reg.h153
-rw-r--r--shared/intel_regions.c188
-rw-r--r--shared/intel_regions.h29
-rw-r--r--shared/intel_screen.c351
-rw-r--r--shared/intel_screen.h14
-rw-r--r--shared/intel_span.c690
-rw-r--r--shared/intel_span.h6
-rw-r--r--shared/intel_tex.c14
-rw-r--r--shared/intel_tex.h12
-rw-r--r--shared/intel_tex_copy.c125
-rw-r--r--shared/intel_tex_format.c14
-rw-r--r--shared/intel_tex_image.c60
-rw-r--r--shared/intel_tex_layout.c2
-rw-r--r--shared/intel_tex_layout.h2
-rw-r--r--shared/intel_tex_subimage.c25
-rw-r--r--shared/intel_tex_validate.c23
129 files changed, 6682 insertions, 6704 deletions
diff --git a/configure.ac b/configure.ac
index 4ca4296..c2215a0 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,7 +1,7 @@
# Process this file with autoconf to produce a configure script
AC_PREREQ(2.57)
-AC_INIT([mesa-dri-i9xx], 7.2.0, [], mesa-dri-i9xx)
+AC_INIT([mesa-dri-i9xx], 7.3.0, [], mesa-dri-i9xx)
AM_INIT_AUTOMAKE([dist-bzip2])
@@ -15,10 +15,9 @@ AC_PROG_CC
# Checks for header files.
AC_HEADER_STDC
-PKG_CHECK_MODULES([DRM], [libdrm >= 2.3.0])
-# requires MESA_FORMAT_S8_Z24.
-PKG_CHECK_MODULES([DRI], [libmesadri >= 7.2.0 libmesadri < 7.3.0
- libmesadricommon >= 7.2.0 libmesadricommon < 7.3.0])
+PKG_CHECK_MODULES([DRM], [libdrm >= 2.4.3])
+PKG_CHECK_MODULES([DRI], [libmesadri >= 7.3.0 libmesadri < 7.4.0
+ libmesadricommon >= 7.3.0 libmesadricommon < 7.4.0])
AC_OUTPUT([
Makefile
diff --git a/i915/Makefile.am b/i915/Makefile.am
index e483d0c..75475cc 100644
--- a/i915/Makefile.am
+++ b/i915/Makefile.am
@@ -5,7 +5,7 @@ I915_CFLAGS = -I../shared -I../shared/server -DI915
i915_dri_la_LTLIBRARIES = i915_dri.la
i915_dri_la_CFLAGS = $(AM_CFLAGS) $(DRM_CFLAGS) $(DRI_CFLAGS) $(I915_CFLAGS)
i915_dri_la_LDFLAGS = -module -noprefix -avoid-version -lm -ldl \
- $(DRM_LIBS) $(DRI_LIBS)
+ $(DRM_LIBS) -ldrm_intel $(DRI_LIBS)
i915_dri_ladir = @libdir@/dri
i915_dri_la_SOURCES = \
i830_context.c \
@@ -31,8 +31,8 @@ i915_dri_la_SOURCES = \
../shared/intel_pixel.c \
../shared/intel_pixel_bitmap.c \
../shared/intel_pixel_copy.c \
- intel_pixel_read.c \
../shared/intel_pixel_draw.c \
+ intel_pixel_read.c \
../shared/intel_buffers.c \
../shared/intel_blit.c \
i915_tex.c \
@@ -47,11 +47,9 @@ i915_dri_la_SOURCES = \
i915_vtbl.c \
../shared/intel_context.c \
../shared/intel_decode.c \
- ../shared/intel_ioctl.c \
../shared/intel_screen.c \
../shared/intel_span.c \
intel_state.c \
intel_tris.c \
../shared/intel_fbo.c \
- ../shared/intel_depthstencil.c \
- ../shared/intel_bufmgr_ttm.c
+ ../shared/intel_depthstencil.c
diff --git a/i915/i830_context.c b/i915/i830_context.c
index 16c8a8d..09b1ec9 100644
--- a/i915/i830_context.c
+++ b/i915/i830_context.c
@@ -26,7 +26,7 @@
**************************************************************************/
#include "i830_context.h"
-#include "imports.h"
+#include "main/imports.h"
#include "texmem.h"
#include "intel_tex.h"
#include "tnl/tnl.h"
@@ -81,6 +81,9 @@ i830CreateContext(const __GLcontextModes * mesaVis,
_tnl_destroy_pipeline(ctx);
_tnl_install_pipeline(ctx, intel_pipeline);
+ if (intel->no_rast)
+ FALLBACK(intel, INTEL_FALLBACK_USER, 1);
+
intel->ctx.Const.MaxTextureUnits = I830_TEX_UNITS;
intel->ctx.Const.MaxTextureImageUnits = I830_TEX_UNITS;
intel->ctx.Const.MaxTextureCoordUnits = I830_TEX_UNITS;
diff --git a/i915/i830_context.h b/i915/i830_context.h
index a298c14..1bdb320 100644
--- a/i915/i830_context.h
+++ b/i915/i830_context.h
@@ -57,7 +57,13 @@
#define I830_DESTREG_SR0 7
#define I830_DESTREG_SR1 8
#define I830_DESTREG_SR2 9
-#define I830_DEST_SETUP_SIZE 10
+#define I830_DESTREG_DRAWRECT0 10
+#define I830_DESTREG_DRAWRECT1 11
+#define I830_DESTREG_DRAWRECT2 12
+#define I830_DESTREG_DRAWRECT3 13
+#define I830_DESTREG_DRAWRECT4 14
+#define I830_DESTREG_DRAWRECT5 15
+#define I830_DEST_SETUP_SIZE 16
#define I830_CTXREG_STATE1 0
#define I830_CTXREG_STATE2 1
diff --git a/i915/i830_metaops.c b/i915/i830_metaops.c
index 13e4ab3..2cce661 100644
--- a/i915/i830_metaops.c
+++ b/i915/i830_metaops.c
@@ -25,15 +25,14 @@
*
**************************************************************************/
-#include "glheader.h"
-#include "enums.h"
-#include "mtypes.h"
-#include "macros.h"
+#include "main/glheader.h"
+#include "main/enums.h"
+#include "main/mtypes.h"
+#include "main/macros.h"
#include "utils.h"
#include "intel_screen.h"
#include "intel_batchbuffer.h"
-#include "intel_ioctl.h"
#include "intel_regions.h"
#include "i830_context.h"
diff --git a/i915/i830_reg.h b/i915/i830_reg.h
index 41280bc..d210c2d 100644
--- a/i915/i830_reg.h
+++ b/i915/i830_reg.h
@@ -494,10 +494,6 @@
#define VFT1_TEX0_FMT(x) (x)
#define VFT1_TEX0_MASK 3
#define VFT1_TEX1_SHIFT 2
-#define TEXCOORDFMT_2D 0
-#define TEXCOORDFMT_3D 1
-#define TEXCOORDFMT_4D 2
-#define TEXCOORDFMT_1D 3
/*New stuff picked up along the way */
@@ -635,8 +631,4 @@
#define ENABLE_TEX_STREAM_MAP_IDX (1<<3)
#define TEX_STREAM_MAP_IDX(x) (x)
-
-#define MI_FLUSH ((0<<29)|(4<<23))
-#define FLUSH_MAP_CACHE (1<<0)
-
#endif
diff --git a/i915/i830_state.c b/i915/i830_state.c
index e44a7df..d9cad0c 100644
--- a/i915/i830_state.c
+++ b/i915/i830_state.c
@@ -26,11 +26,11 @@
**************************************************************************/
-#include "glheader.h"
-#include "context.h"
-#include "macros.h"
-#include "enums.h"
-#include "dd.h"
+#include "main/glheader.h"
+#include "main/context.h"
+#include "main/macros.h"
+#include "main/enums.h"
+#include "main/dd.h"
#include "texmem.h"
diff --git a/i915/i830_tex.c b/i915/i830_tex.c
index 79b0fcf..34ac42a 100644
--- a/i915/i830_tex.c
+++ b/i915/i830_tex.c
@@ -25,20 +25,18 @@
*
**************************************************************************/
-#include "glheader.h"
-#include "mtypes.h"
-#include "imports.h"
-#include "simple_list.h"
-#include "enums.h"
-#include "image.h"
-#include "texstore.h"
-#include "texformat.h"
-#include "texmem.h"
+#include "main/glheader.h"
+#include "main/mtypes.h"
+#include "main/imports.h"
+#include "main/simple_list.h"
+#include "main/enums.h"
+#include "main/image.h"
+#include "main/mm.h"
+#include "main/texstore.h"
+#include "main/texformat.h"
#include "swrast/swrast.h"
-#include "mm.h"
-
-#include "intel_ioctl.h"
+#include "texmem.h"
#include "i830_context.h"
#include "i830_reg.h"
diff --git a/i915/i830_texblend.c b/i915/i830_texblend.c
index 58f220e..09f7f37 100644
--- a/i915/i830_texblend.c
+++ b/i915/i830_texblend.c
@@ -25,18 +25,16 @@
*
**************************************************************************/
-#include "glheader.h"
-#include "macros.h"
-#include "mtypes.h"
-#include "simple_list.h"
-#include "enums.h"
-#include "texformat.h"
-#include "texstore.h"
-
-#include "mm.h"
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/mtypes.h"
+#include "main/simple_list.h"
+#include "main/enums.h"
+#include "main/texformat.h"
+#include "main/texstore.h"
+#include "main/mm.h"
#include "intel_screen.h"
-#include "intel_ioctl.h"
#include "intel_tex.h"
#include "i830_context.h"
diff --git a/i915/i830_texstate.c b/i915/i830_texstate.c
index 4e9b022..c718bb0 100644
--- a/i915/i830_texstate.c
+++ b/i915/i830_texstate.c
@@ -25,10 +25,9 @@
*
**************************************************************************/
-#include "mtypes.h"
-#include "enums.h"
-#include "texformat.h"
-#include "dri_bufmgr.h"
+#include "main/mtypes.h"
+#include "main/enums.h"
+#include "main/texformat.h"
#include "intel_mipmap_tree.h"
#include "intel_tex.h"
diff --git a/i915/i830_vtbl.c b/i915/i830_vtbl.c
index c5a85fe..8fc8aa5 100644
--- a/i915/i830_vtbl.c
+++ b/i915/i830_vtbl.c
@@ -25,12 +25,13 @@
*
**************************************************************************/
-#include "glapi.h"
+#include "glapi/glapi.h"
#include "i830_context.h"
#include "i830_reg.h"
#include "intel_batchbuffer.h"
#include "intel_regions.h"
+#include "intel_tris.h"
#include "tnl/t_context.h"
#include "tnl/t_vertex.h"
@@ -296,7 +297,7 @@ i830_emit_invarient_state(struct intel_context *intel)
{
BATCH_LOCALS;
- BEGIN_BATCH(40, IGNORE_CLIPRECTS);
+ BEGIN_BATCH(30, IGNORE_CLIPRECTS);
OUT_BATCH(_3DSTATE_DFLT_DIFFUSE_CMD);
OUT_BATCH(0);
@@ -419,10 +420,12 @@ i830_emit_state(struct intel_context *intel)
{
struct i830_context *i830 = i830_context(&intel->ctx);
struct i830_hw_state *state = i830->current;
- int i, ret, count;
+ int i, count;
GLuint dirty;
GET_CURRENT_CONTEXT(ctx);
BATCH_LOCALS;
+ dri_bo *aper_array[3 + I830_TEX_UNITS];
+ int aper_count;
/* We don't hold the lock at this point, so want to make sure that
* there won't be a buffer wrap between the state emits and the primitive
@@ -435,26 +438,29 @@ i830_emit_state(struct intel_context *intel)
* Set the space as LOOP_CLIPRECTS now, since that's what our primitives
* will be emitted under.
*/
- intel_batchbuffer_require_space(intel->batch, get_state_size(state) + 8,
+ intel_batchbuffer_require_space(intel->batch,
+ get_state_size(state) + INTEL_PRIM_EMIT_SIZE,
LOOP_CLIPRECTS);
count = 0;
again:
+ aper_count = 0;
dirty = get_dirty(state);
- ret = 0;
+ aper_array[aper_count++] = intel->batch->buf;
if (dirty & I830_UPLOAD_BUFFERS) {
- ret |= dri_bufmgr_check_aperture_space(state->draw_region->buffer);
- ret |= dri_bufmgr_check_aperture_space(state->depth_region->buffer);
+ aper_array[aper_count++] = state->draw_region->buffer;
+ if (state->depth_region)
+ aper_array[aper_count++] = state->depth_region->buffer;
}
-
+
for (i = 0; i < I830_TEX_UNITS; i++)
if (dirty & I830_UPLOAD_TEX(i)) {
if (state->tex_buffer[i]) {
- ret |= dri_bufmgr_check_aperture_space(state->tex_buffer[i]);
+ aper_array[aper_count++] = state->tex_buffer[i];
}
}
- if (ret) {
+ if (dri_bufmgr_check_aperture_space(aper_array, aper_count)) {
if (count == 0) {
count++;
intel_batchbuffer_flush(intel->batch);
@@ -485,19 +491,28 @@ i830_emit_state(struct intel_context *intel)
}
if (dirty & I830_UPLOAD_BUFFERS) {
+ GLuint count = 9;
+
DBG("I830_UPLOAD_BUFFERS:\n");
- BEGIN_BATCH(I830_DEST_SETUP_SIZE + 2, IGNORE_CLIPRECTS);
+
+ if (state->depth_region)
+ count += 3;
+
+ if (intel->constant_cliprect)
+ count += 6;
+
+ BEGIN_BATCH(count, IGNORE_CLIPRECTS);
OUT_BATCH(state->Buffer[I830_DESTREG_CBUFADDR0]);
OUT_BATCH(state->Buffer[I830_DESTREG_CBUFADDR1]);
OUT_RELOC(state->draw_region->buffer,
- DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE,
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
state->draw_region->draw_offset);
if (state->depth_region) {
OUT_BATCH(state->Buffer[I830_DESTREG_DBUFADDR0]);
OUT_BATCH(state->Buffer[I830_DESTREG_DBUFADDR1]);
OUT_RELOC(state->depth_region->buffer,
- DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE,
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
state->depth_region->draw_offset);
}
@@ -507,6 +522,16 @@ i830_emit_state(struct intel_context *intel)
OUT_BATCH(state->Buffer[I830_DESTREG_SR0]);
OUT_BATCH(state->Buffer[I830_DESTREG_SR1]);
OUT_BATCH(state->Buffer[I830_DESTREG_SR2]);
+
+ if (intel->constant_cliprect) {
+ assert(state->Buffer[I830_DESTREG_DRAWRECT0] != MI_NOOP);
+ OUT_BATCH(state->Buffer[I830_DESTREG_DRAWRECT0]);
+ OUT_BATCH(state->Buffer[I830_DESTREG_DRAWRECT1]);
+ OUT_BATCH(state->Buffer[I830_DESTREG_DRAWRECT2]);
+ OUT_BATCH(state->Buffer[I830_DESTREG_DRAWRECT3]);
+ OUT_BATCH(state->Buffer[I830_DESTREG_DRAWRECT4]);
+ OUT_BATCH(state->Buffer[I830_DESTREG_DRAWRECT5]);
+ }
ADVANCE_BATCH();
}
@@ -524,7 +549,7 @@ i830_emit_state(struct intel_context *intel)
if (state->tex_buffer[i]) {
OUT_RELOC(state->tex_buffer[i],
- DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
+ I915_GEM_DOMAIN_SAMPLER, 0,
state->tex_offset[i] | TM0S0_USE_FENCE);
}
else if (state == &i830->meta) {
@@ -541,6 +566,8 @@ i830_emit_state(struct intel_context *intel)
OUT_BATCH(state->Tex[i][I830_TEXREG_TM0S4]);
OUT_BATCH(state->Tex[i][I830_TEXREG_MCS]);
OUT_BATCH(state->Tex[i][I830_TEXREG_CUBE]);
+
+ ADVANCE_BATCH();
}
if (dirty & I830_UPLOAD_TEXBLEND(i)) {
@@ -561,6 +588,13 @@ i830_destroy_context(struct intel_context *intel)
GLuint i;
struct i830_context *i830 = i830_context(&intel->ctx);
+ intel_region_release(&i830->state.draw_region);
+ intel_region_release(&i830->state.depth_region);
+ intel_region_release(&i830->meta.draw_region);
+ intel_region_release(&i830->meta.depth_region);
+ intel_region_release(&i830->initial.draw_region);
+ intel_region_release(&i830->initial.depth_region);
+
for (i = 0; i < I830_TEX_UNITS; i++) {
if (i830->state.tex_buffer[i] != NULL) {
dri_bo_unreference(i830->state.tex_buffer[i]);
@@ -579,6 +613,7 @@ i830_state_draw_region(struct intel_context *intel,
struct intel_region *depth_region)
{
struct i830_context *i830 = i830_context(&intel->ctx);
+ GLcontext *ctx = &intel->ctx;
GLuint value;
ASSERT(state == &i830->state || state == &i830->meta);
@@ -631,6 +666,24 @@ i830_state_draw_region(struct intel_context *intel,
}
state->Buffer[I830_DESTREG_DV1] = value;
+ if (intel->constant_cliprect) {
+ state->Buffer[I830_DESTREG_DRAWRECT0] = _3DSTATE_DRAWRECT_INFO;
+ state->Buffer[I830_DESTREG_DRAWRECT1] = 0;
+ state->Buffer[I830_DESTREG_DRAWRECT2] = 0; /* xmin, ymin */
+ state->Buffer[I830_DESTREG_DRAWRECT3] =
+ (ctx->DrawBuffer->Width & 0xffff) |
+ (ctx->DrawBuffer->Height << 16);
+ state->Buffer[I830_DESTREG_DRAWRECT4] = 0; /* xoff, yoff */
+ state->Buffer[I830_DESTREG_DRAWRECT5] = 0;
+ } else {
+ state->Buffer[I830_DESTREG_DRAWRECT0] = MI_NOOP;
+ state->Buffer[I830_DESTREG_DRAWRECT1] = MI_NOOP;
+ state->Buffer[I830_DESTREG_DRAWRECT2] = MI_NOOP;
+ state->Buffer[I830_DESTREG_DRAWRECT3] = MI_NOOP;
+ state->Buffer[I830_DESTREG_DRAWRECT4] = MI_NOOP;
+ state->Buffer[I830_DESTREG_DRAWRECT5] = MI_NOOP;
+ }
+
I830_STATECHANGE(i830, I830_UPLOAD_BUFFERS);
@@ -717,4 +770,5 @@ i830InitVtbl(struct i830_context *i830)
i830->intel.vtbl.render_prevalidate = i830_render_prevalidate;
i830->intel.vtbl.assert_not_dirty = i830_assert_not_dirty;
i830->intel.vtbl.note_unlock = i830_note_unlock;
+ i830->intel.vtbl.finish_batch = intel_finish_vb;
}
diff --git a/i915/i915_context.c b/i915/i915_context.c
index bd9f1d5..3d6af38 100644
--- a/i915/i915_context.c
+++ b/i915/i915_context.c
@@ -26,7 +26,7 @@
**************************************************************************/
#include "i915_context.h"
-#include "imports.h"
+#include "main/imports.h"
#include "intel_tex.h"
#include "intel_tris.h"
#include "tnl/t_context.h"
@@ -55,9 +55,8 @@ static const struct dri_extension i915_extensions[] = {
{"GL_ARB_fragment_program", NULL},
{"GL_ARB_shadow", NULL},
{"GL_ARB_texture_non_power_of_two", NULL},
+ {"GL_ATI_texture_env_combine3", NULL},
{"GL_EXT_shadow_funcs", NULL},
- /* ARB extn won't work if not enabled */
- {"GL_SGIX_depth_texture", NULL},
{NULL, NULL}
};
@@ -138,6 +137,9 @@ i915CreateContext(const __GLcontextModes * mesaVis,
_tnl_destroy_pipeline(ctx);
_tnl_install_pipeline(ctx, intel_pipeline);
+ if (intel->no_rast)
+ FALLBACK(intel, INTEL_FALLBACK_USER, 1);
+
ctx->Const.MaxTextureUnits = I915_TEX_UNITS;
ctx->Const.MaxTextureImageUnits = I915_TEX_UNITS;
ctx->Const.MaxTextureCoordUnits = I915_TEX_UNITS;
@@ -169,7 +171,6 @@ i915CreateContext(const __GLcontextModes * mesaVis,
ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0; /* I don't think we have one */
ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE;
- ctx->FragmentProgram._UseTexEnvProgram = GL_TRUE;
driInitExtensions(ctx, i915_extensions, GL_FALSE);
diff --git a/i915/i915_context.h b/i915/i915_context.h
index c6958dd..87bbf5f 100644
--- a/i915/i915_context.h
+++ b/i915/i915_context.h
@@ -65,7 +65,13 @@
#define I915_DESTREG_SR0 9
#define I915_DESTREG_SR1 10
#define I915_DESTREG_SR2 11
-#define I915_DEST_SETUP_SIZE 12
+#define I915_DESTREG_DRAWRECT0 12
+#define I915_DESTREG_DRAWRECT1 13
+#define I915_DESTREG_DRAWRECT2 14
+#define I915_DESTREG_DRAWRECT3 15
+#define I915_DESTREG_DRAWRECT4 16
+#define I915_DESTREG_DRAWRECT5 17
+#define I915_DEST_SETUP_SIZE 18
#define I915_CTXREG_STATE4 0
#define I915_CTXREG_LI 1
@@ -125,6 +131,9 @@ struct i915_fragment_program
GLboolean on_hardware;
GLboolean error; /* If program is malformed for any reason. */
+ /** Record of which phases R registers were last written in. */
+ GLuint register_phases[16];
+ GLuint indirections;
GLuint nr_tex_indirect;
GLuint nr_tex_insn;
GLuint nr_alu_insn;
diff --git a/i915/i915_debug.c b/i915/i915_debug.c
index eb1a052..f7bb7ea 100644
--- a/i915/i915_debug.c
+++ b/i915/i915_debug.c
@@ -25,7 +25,7 @@
*
**************************************************************************/
-#include "imports.h"
+#include "main/imports.h"
#include "i915_reg.h"
#include "i915_context.h"
diff --git a/i915/i915_fragprog.c b/i915/i915_fragprog.c
index 1876218..f091d60 100644
--- a/i915/i915_fragprog.c
+++ b/i915/i915_fragprog.c
@@ -25,14 +25,15 @@
*
**************************************************************************/
-#include "glheader.h"
-#include "macros.h"
-#include "enums.h"
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/enums.h"
#include "shader/prog_instruction.h"
#include "shader/prog_parameter.h"
#include "shader/program.h"
#include "shader/programopt.h"
+#include "shader/prog_print.h"
#include "tnl/tnl.h"
#include "tnl/t_context.h"
@@ -1048,6 +1049,9 @@ i915ProgramStringNotify(GLcontext * ctx,
_mesa_append_fog_code(ctx, &p->FragProg);
p->FragProg.FogOption = GL_NONE;
}
+
+ if (INTEL_DEBUG & DEBUG_STATE)
+ _mesa_print_program(prog);
}
_tnl_program_string(ctx, target, prog);
@@ -1105,30 +1109,14 @@ i915ValidateFragmentProgram(struct i915_context *i915)
EMIT_ATTR(_TNL_ATTRIB_COLOR0, EMIT_4UB_4F_BGRA, S4_VFMT_COLOR, 4);
}
- if ((inputsRead & (FRAG_BIT_COL1 | FRAG_BIT_FOGC)) ||
- i915->vertex_fog != I915_FOG_NONE) {
-
- if (inputsRead & FRAG_BIT_COL1) {
- intel->specoffset = offset / 4;
- EMIT_ATTR(_TNL_ATTRIB_COLOR1, EMIT_3UB_3F_BGR, S4_VFMT_SPEC_FOG, 3);
- }
- else
- EMIT_PAD(3);
-
- if ((inputsRead & FRAG_BIT_FOGC) || i915->vertex_fog != I915_FOG_NONE)
- EMIT_ATTR(_TNL_ATTRIB_FOG, EMIT_1UB_1F, S4_VFMT_SPEC_FOG, 1);
- else
- EMIT_PAD(1);
+ if (inputsRead & FRAG_BIT_COL1) {
+ intel->specoffset = offset / 4;
+ EMIT_ATTR(_TNL_ATTRIB_COLOR1, EMIT_4UB_4F_BGRA, S4_VFMT_SPEC_FOG, 4);
}
- /* XXX this was disabled, but enabling this code helped fix the Glean
- * tfragprog1 fog tests.
- */
-#if 1
if ((inputsRead & FRAG_BIT_FOGC) || i915->vertex_fog != I915_FOG_NONE) {
EMIT_ATTR(_TNL_ATTRIB_FOG, EMIT_1F, S4_VFMT_FOG_PARAM, 4);
}
-#endif
for (i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) {
if (inputsRead & FRAG_BIT_TEX(i)) {
diff --git a/i915/i915_metaops.c b/i915/i915_metaops.c
index 73aa634..90a78c6 100644
--- a/i915/i915_metaops.c
+++ b/i915/i915_metaops.c
@@ -25,15 +25,14 @@
*
**************************************************************************/
-#include "glheader.h"
-#include "enums.h"
-#include "mtypes.h"
-#include "macros.h"
+#include "main/glheader.h"
+#include "main/enums.h"
+#include "main/mtypes.h"
+#include "main/macros.h"
#include "utils.h"
#include "intel_screen.h"
#include "intel_batchbuffer.h"
-#include "intel_ioctl.h"
#include "intel_regions.h"
#include "i915_context.h"
diff --git a/i915/i915_program.c b/i915/i915_program.c
index f79d00d..e87700f 100644
--- a/i915/i915_program.c
+++ b/i915/i915_program.c
@@ -27,9 +27,9 @@
#include <strings.h>
-#include "glheader.h"
-#include "macros.h"
-#include "enums.h"
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/enums.h"
#include "tnl/t_context.h"
#include "intel_batchbuffer.h"
@@ -190,6 +190,9 @@ i915_emit_arith(struct i915_fragment_program * p,
*(p->csr++) = (A1_SRC0(src0) | A1_SRC1(src1));
*(p->csr++) = (A2_SRC1(src1) | A2_SRC2(src2));
+ if (GET_UREG_TYPE(dest) == REG_TYPE_R)
+ p->register_phases[GET_UREG_NR(dest)] = p->nr_tex_indirect;
+
p->nr_alu_insn++;
return dest;
}
@@ -237,10 +240,35 @@ GLuint i915_emit_texld( struct i915_fragment_program *p,
else {
assert(GET_UREG_TYPE(dest) != REG_TYPE_CONST);
assert(dest = UREG(GET_UREG_TYPE(dest), GET_UREG_NR(dest)));
+ /* Can't use unsaved temps for coords, as the phase boundary would result
+ * in the contents becoming undefined.
+ */
+ assert(GET_UREG_TYPE(coord) != REG_TYPE_U);
+
+ if ((GET_UREG_TYPE(coord) != REG_TYPE_R) &&
+ (GET_UREG_TYPE(coord) != REG_TYPE_OC) &&
+ (GET_UREG_TYPE(coord) != REG_TYPE_OD) &&
+ (GET_UREG_TYPE(coord) != REG_TYPE_T)) {
+ GLuint tmpCoord = get_free_rreg(p, live_regs);
+
+ if (tmpCoord == UREG_BAD)
+ return 0;
+
+ i915_emit_arith(p, A0_MOV, tmpCoord, A0_DEST_CHANNEL_ALL, 0, coord, 0, 0);
+ coord = tmpCoord;
+ }
- if (GET_UREG_TYPE(coord) != REG_TYPE_T) {
+ /* Output register being oC or oD defines a phase boundary */
+ if (GET_UREG_TYPE(dest) == REG_TYPE_OC ||
+ GET_UREG_TYPE(dest) == REG_TYPE_OD)
+ p->nr_tex_indirect++;
+
+ /* Reading from an r# register whose contents depend on output of the
+ * current phase defines a phase boundary.
+ */
+ if (GET_UREG_TYPE(coord) == REG_TYPE_R &&
+ p->register_phases[GET_UREG_NR(coord)] == p->nr_tex_indirect)
p->nr_tex_indirect++;
- }
*(p->csr++) = (op |
T0_DEST( dest ) |
@@ -249,6 +277,9 @@ GLuint i915_emit_texld( struct i915_fragment_program *p,
*(p->csr++) = T1_ADDRESS_REG( coord );
*(p->csr++) = T2_MBZ;
+ if (GET_UREG_TYPE(dest) == REG_TYPE_R)
+ p->register_phases[GET_UREG_NR(dest)] = p->nr_tex_indirect;
+
p->nr_tex_insn++;
return dest;
}
@@ -413,7 +444,8 @@ i915_init_program(struct i915_context *i915, struct i915_fragment_program *p)
p->on_hardware = 0;
p->error = 0;
- p->nr_tex_indirect = 1; /* correct? */
+ memset(&p->register_phases, 0, sizeof(p->register_phases));
+ p->nr_tex_indirect = 1;
p->nr_tex_insn = 0;
p->nr_alu_insn = 0;
p->nr_decl_insn = 0;
diff --git a/i915/i915_reg.h b/i915/i915_reg.h
index b5585e7..8891e11 100644
--- a/i915/i915_reg.h
+++ b/i915/i915_reg.h
@@ -325,118 +325,6 @@
#define SCISSOR_RECT_0_YMAX(x) ((x)<<16)
#define SCISSOR_RECT_0_XMAX(x) (x)
-/* p189 */
-#define _3DSTATE_LOAD_STATE_IMMEDIATE_1 ((0x3<<29)|(0x1d<<24)|(0x04<<16))
-#define I1_LOAD_S(n) (1<<(4+n))
-
-#define S0_VB_OFFSET_MASK 0xffffffc
-#define S0_AUTO_CACHE_INV_DISABLE (1<<0)
-
-#define S1_VERTEX_WIDTH_SHIFT 24
-#define S1_VERTEX_WIDTH_MASK (0x3f<<24)
-#define S1_VERTEX_PITCH_SHIFT 16
-#define S1_VERTEX_PITCH_MASK (0x3f<<16)
-
-#define TEXCOORDFMT_2D 0x0
-#define TEXCOORDFMT_3D 0x1
-#define TEXCOORDFMT_4D 0x2
-#define TEXCOORDFMT_1D 0x3
-#define TEXCOORDFMT_2D_16 0x4
-#define TEXCOORDFMT_4D_16 0x5
-#define TEXCOORDFMT_NOT_PRESENT 0xf
-#define S2_TEXCOORD_FMT0_MASK 0xf
-#define S2_TEXCOORD_FMT1_SHIFT 4
-#define S2_TEXCOORD_FMT(unit, type) ((type)<<(unit*4))
-#define S2_TEXCOORD_NONE (~0)
-
-/* S3 not interesting */
-
-#define S4_POINT_WIDTH_SHIFT 23
-#define S4_POINT_WIDTH_MASK (0x1ff<<23)
-#define S4_LINE_WIDTH_SHIFT 19
-#define S4_LINE_WIDTH_ONE (0x2<<19)
-#define S4_LINE_WIDTH_MASK (0xf<<19)
-#define S4_FLATSHADE_ALPHA (1<<18)
-#define S4_FLATSHADE_FOG (1<<17)
-#define S4_FLATSHADE_SPECULAR (1<<16)
-#define S4_FLATSHADE_COLOR (1<<15)
-#define S4_CULLMODE_BOTH (0<<13)
-#define S4_CULLMODE_NONE (1<<13)
-#define S4_CULLMODE_CW (2<<13)
-#define S4_CULLMODE_CCW (3<<13)
-#define S4_CULLMODE_MASK (3<<13)
-#define S4_VFMT_POINT_WIDTH (1<<12)
-#define S4_VFMT_SPEC_FOG (1<<11)
-#define S4_VFMT_COLOR (1<<10)
-#define S4_VFMT_DEPTH_OFFSET (1<<9)
-#define S4_VFMT_XYZ (1<<6)
-#define S4_VFMT_XYZW (2<<6)
-#define S4_VFMT_XY (3<<6)
-#define S4_VFMT_XYW (4<<6)
-#define S4_VFMT_XYZW_MASK (7<<6)
-#define S4_FORCE_DEFAULT_DIFFUSE (1<<5)
-#define S4_FORCE_DEFAULT_SPECULAR (1<<4)
-#define S4_LOCAL_DEPTH_OFFSET_ENABLE (1<<3)
-#define S4_VFMT_FOG_PARAM (1<<2)
-#define S4_SPRITE_POINT_ENABLE (1<<1)
-#define S4_LINE_ANTIALIAS_ENABLE (1<<0)
-
-#define S4_VFMT_MASK (S4_VFMT_POINT_WIDTH | \
- S4_VFMT_SPEC_FOG | \
- S4_VFMT_COLOR | \
- S4_VFMT_DEPTH_OFFSET | \
- S4_VFMT_XYZW_MASK | \
- S4_VFMT_FOG_PARAM)
-
-
-#define S5_WRITEDISABLE_ALPHA (1<<31)
-#define S5_WRITEDISABLE_RED (1<<30)
-#define S5_WRITEDISABLE_GREEN (1<<29)
-#define S5_WRITEDISABLE_BLUE (1<<28)
-#define S5_WRITEDISABLE_MASK (0xf<<28)
-#define S5_FORCE_DEFAULT_POINT_SIZE (1<<27)
-#define S5_LAST_PIXEL_ENABLE (1<<26)
-#define S5_GLOBAL_DEPTH_OFFSET_ENABLE (1<<25)
-#define S5_FOG_ENABLE (1<<24)
-#define S5_STENCIL_REF_SHIFT 16
-#define S5_STENCIL_REF_MASK (0xff<<16)
-#define S5_STENCIL_TEST_FUNC_SHIFT 13
-#define S5_STENCIL_TEST_FUNC_MASK (0x7<<13)
-#define S5_STENCIL_FAIL_SHIFT 10
-#define S5_STENCIL_FAIL_MASK (0x7<<10)
-#define S5_STENCIL_PASS_Z_FAIL_SHIFT 7
-#define S5_STENCIL_PASS_Z_FAIL_MASK (0x7<<7)
-#define S5_STENCIL_PASS_Z_PASS_SHIFT 4
-#define S5_STENCIL_PASS_Z_PASS_MASK (0x7<<4)
-#define S5_STENCIL_WRITE_ENABLE (1<<3)
-#define S5_STENCIL_TEST_ENABLE (1<<2)
-#define S5_COLOR_DITHER_ENABLE (1<<1)
-#define S5_LOGICOP_ENABLE (1<<0)
-
-
-#define S6_ALPHA_TEST_ENABLE (1<<31)
-#define S6_ALPHA_TEST_FUNC_SHIFT 28
-#define S6_ALPHA_TEST_FUNC_MASK (0x7<<28)
-#define S6_ALPHA_REF_SHIFT 20
-#define S6_ALPHA_REF_MASK (0xff<<20)
-#define S6_DEPTH_TEST_ENABLE (1<<19)
-#define S6_DEPTH_TEST_FUNC_SHIFT 16
-#define S6_DEPTH_TEST_FUNC_MASK (0x7<<16)
-#define S6_CBUF_BLEND_ENABLE (1<<15)
-#define S6_CBUF_BLEND_FUNC_SHIFT 12
-#define S6_CBUF_BLEND_FUNC_MASK (0x7<<12)
-#define S6_CBUF_SRC_BLEND_FACT_SHIFT 8
-#define S6_CBUF_SRC_BLEND_FACT_MASK (0xf<<8)
-#define S6_CBUF_DST_BLEND_FACT_SHIFT 4
-#define S6_CBUF_DST_BLEND_FACT_MASK (0xf<<4)
-#define S6_DEPTH_WRITE_ENABLE (1<<3)
-#define S6_COLOR_WRITE_ENABLE (1<<2)
-#define S6_TRISTRIP_PV_SHIFT 0
-#define S6_TRISTRIP_PV_MASK (0x3<<0)
-
-#define S7_DEPTH_OFFSET_CONST_MASK ~0
-
-
/* Helper macros for blend factors
*/
#define DST_BLND_FACT(f) ((f)<<S6_CBUF_DST_BLEND_FACT_SHIFT)
@@ -855,10 +743,4 @@
#define _3DSTATE_DEFAULT_DIFFUSE ((0x3<<29)|(0x1d<<24)|(0x99<<16))
#define _3DSTATE_DEFAULT_SPECULAR ((0x3<<29)|(0x1d<<24)|(0x9a<<16))
-
-#define MI_FLUSH ((0<<29)|(4<<23))
-#define FLUSH_MAP_CACHE (1<<0)
-#define INHIBIT_FLUSH_RENDER_CACHE (1<<2)
-
-
#endif
diff --git a/i915/i915_state.c b/i915/i915_state.c
index c814f8d..a53f120 100644
--- a/i915/i915_state.c
+++ b/i915/i915_state.c
@@ -26,11 +26,11 @@
**************************************************************************/
-#include "glheader.h"
-#include "context.h"
-#include "macros.h"
-#include "enums.h"
-#include "dd.h"
+#include "main/glheader.h"
+#include "main/context.h"
+#include "main/macros.h"
+#include "main/enums.h"
+#include "main/dd.h"
#include "tnl/tnl.h"
#include "tnl/t_context.h"
@@ -569,7 +569,7 @@ i915_update_fog(GLcontext * ctx)
GLboolean enabled;
GLboolean try_pixel_fog;
- if (ctx->FragmentProgram._Active) {
+ if (ctx->FragmentProgram._Current) {
/* Pull in static fog state from program */
mode = ctx->FragmentProgram._Current->FogOption;
enabled = (mode != GL_NONE);
diff --git a/i915/i915_tex.c b/i915/i915_tex.c
index 386617a..e38d8fe 100644
--- a/i915/i915_tex.c
+++ b/i915/i915_tex.c
@@ -25,20 +25,18 @@
*
**************************************************************************/
-#include "glheader.h"
-#include "mtypes.h"
-#include "imports.h"
-#include "simple_list.h"
-#include "enums.h"
-#include "image.h"
-#include "texstore.h"
-#include "texformat.h"
-#include "texmem.h"
+#include "main/glheader.h"
+#include "main/mtypes.h"
+#include "main/imports.h"
+#include "main/simple_list.h"
+#include "main/enums.h"
+#include "main/image.h"
+#include "main/mm.h"
+#include "main/texstore.h"
+#include "main/texformat.h"
#include "swrast/swrast.h"
-#include "mm.h"
-
-#include "intel_ioctl.h"
+#include "texmem.h"
#include "i915_context.h"
#include "i915_reg.h"
diff --git a/i915/i915_tex_layout.c b/i915/i915_tex_layout.c
index b5085f4..d44a2f4 100644
--- a/i915/i915_tex_layout.c
+++ b/i915/i915_tex_layout.c
@@ -31,7 +31,7 @@
#include "intel_mipmap_tree.h"
#include "intel_tex_layout.h"
-#include "macros.h"
+#include "main/macros.h"
#include "intel_context.h"
#define FILE_DEBUG_FLAG DEBUG_TEXTURE
diff --git a/i915/i915_texstate.c b/i915/i915_texstate.c
index 1b1122c..adbb52a 100644
--- a/i915/i915_texstate.c
+++ b/i915/i915_texstate.c
@@ -25,10 +25,9 @@
*
**************************************************************************/
-#include "mtypes.h"
-#include "enums.h"
-#include "texformat.h"
-#include "dri_bufmgr.h"
+#include "main/mtypes.h"
+#include "main/enums.h"
+#include "main/texformat.h"
#include "intel_mipmap_tree.h"
#include "intel_tex.h"
@@ -296,6 +295,13 @@ i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3)
wt == GL_CLAMP_TO_BORDER || wr == GL_CLAMP_TO_BORDER))
return GL_FALSE;
+ /* Only support TEXCOORDMODE_CLAMP_EDGE and TEXCOORDMODE_CUBE (not
+ * used) when using cube map texture coordinates
+ */
+ if (tObj->Target == GL_TEXTURE_CUBE_MAP_ARB &&
+ (((ws != GL_CLAMP) && (ws != GL_CLAMP_TO_EDGE)) ||
+ ((wt != GL_CLAMP) && (wt != GL_CLAMP_TO_EDGE))))
+ return GL_FALSE;
state[I915_TEXREG_SS3] = ss3; /* SS3_NORMALIZED_COORDS */
@@ -308,10 +314,21 @@ i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3)
}
- state[I915_TEXREG_SS4] = INTEL_PACKCOLOR8888(tObj->_BorderChan[0],
- tObj->_BorderChan[1],
- tObj->_BorderChan[2],
- tObj->_BorderChan[3]);
+ if (firstImage->_BaseFormat == GL_DEPTH_COMPONENT) {
+ /* GL specs that border color for depth textures is taken from the
+ * R channel, while the hardware uses A. Spam R into all the channels
+ * for safety.
+ */
+ state[I915_TEXREG_SS4] = INTEL_PACKCOLOR8888(tObj->_BorderChan[0],
+ tObj->_BorderChan[0],
+ tObj->_BorderChan[0],
+ tObj->_BorderChan[0]);
+ } else {
+ state[I915_TEXREG_SS4] = INTEL_PACKCOLOR8888(tObj->_BorderChan[0],
+ tObj->_BorderChan[1],
+ tObj->_BorderChan[2],
+ tObj->_BorderChan[3]);
+ }
I915_ACTIVESTATE(i915, I915_UPLOAD_TEX(unit), GL_TRUE);
diff --git a/i915/i915_vtbl.c b/i915/i915_vtbl.c
index 135bfaa..3f6d282 100644
--- a/i915/i915_vtbl.c
+++ b/i915/i915_vtbl.c
@@ -27,11 +27,11 @@
-#include "glheader.h"
-#include "mtypes.h"
-#include "imports.h"
-#include "macros.h"
-#include "colormac.h"
+#include "main/glheader.h"
+#include "main/mtypes.h"
+#include "main/imports.h"
+#include "main/macros.h"
+#include "main/colormac.h"
#include "tnl/t_context.h"
#include "tnl/t_vertex.h"
@@ -39,11 +39,12 @@
#include "intel_batchbuffer.h"
#include "intel_tex.h"
#include "intel_regions.h"
+#include "intel_tris.h"
#include "i915_reg.h"
#include "i915_context.h"
-#include "glapi.h"
+#include "glapi/glapi.h"
static void
i915_render_prevalidate(struct intel_context *intel)
@@ -172,7 +173,7 @@ i915_emit_invarient_state(struct intel_context *intel)
{
BATCH_LOCALS;
- BEGIN_BATCH(200, IGNORE_CLIPRECTS);
+ BEGIN_BATCH(20, IGNORE_CLIPRECTS);
OUT_BATCH(_3DSTATE_AA_CMD |
AA_LINE_ECAAR_WIDTH_ENABLE |
@@ -296,9 +297,9 @@ i915_emit_state(struct intel_context *intel)
{
struct i915_context *i915 = i915_context(&intel->ctx);
struct i915_hw_state *state = i915->current;
- int i;
- int ret, count;
+ int i, count, aper_count;
GLuint dirty;
+ dri_bo *aper_array[3 + I915_TEX_UNITS];
GET_CURRENT_CONTEXT(ctx);
BATCH_LOCALS;
@@ -313,28 +314,32 @@ i915_emit_state(struct intel_context *intel)
* Set the space as LOOP_CLIPRECTS now, since that's what our primitives
* will be emitted under.
*/
- intel_batchbuffer_require_space(intel->batch, get_state_size(state) + 8,
+ intel_batchbuffer_require_space(intel->batch,
+ get_state_size(state) + INTEL_PRIM_EMIT_SIZE,
LOOP_CLIPRECTS);
count = 0;
again:
+ aper_count = 0;
dirty = get_dirty(state);
- ret = 0;
+ aper_array[aper_count++] = intel->batch->buf;
if (dirty & I915_UPLOAD_BUFFERS) {
- ret |= dri_bufmgr_check_aperture_space(state->draw_region->buffer);
- if (state->depth_region)
- ret |= dri_bufmgr_check_aperture_space(state->depth_region->buffer);
+ aper_array[aper_count++] = state->draw_region->buffer;
+ if (state->depth_region)
+ aper_array[aper_count++] = state->depth_region->buffer;
}
if (dirty & I915_UPLOAD_TEX_ALL) {
- for (i = 0; i < I915_TEX_UNITS; i++)
- if (dirty & I915_UPLOAD_TEX(i)) {
- if (state->tex_buffer[i]) {
- ret |= dri_bufmgr_check_aperture_space(state->tex_buffer[i]);
- }
- }
+ for (i = 0; i < I915_TEX_UNITS; i++) {
+ if (dirty & I915_UPLOAD_TEX(i)) {
+ if (state->tex_buffer[i]) {
+ aper_array[aper_count++] = state->tex_buffer[i];
+ }
+ }
+ }
}
- if (ret) {
+
+ if (dri_bufmgr_check_aperture_space(aper_array, aper_count)) {
if (count == 0) {
count++;
intel_batchbuffer_flush(intel->batch);
@@ -371,20 +376,29 @@ i915_emit_state(struct intel_context *intel)
}
if (dirty & I915_UPLOAD_BUFFERS) {
+ GLuint count = 9;
+
if (INTEL_DEBUG & DEBUG_STATE)
fprintf(stderr, "I915_UPLOAD_BUFFERS:\n");
- BEGIN_BATCH(I915_DEST_SETUP_SIZE + 2, IGNORE_CLIPRECTS);
+
+ if (state->depth_region)
+ count += 3;
+
+ if (intel->constant_cliprect)
+ count += 6;
+
+ BEGIN_BATCH(count, IGNORE_CLIPRECTS);
OUT_BATCH(state->Buffer[I915_DESTREG_CBUFADDR0]);
OUT_BATCH(state->Buffer[I915_DESTREG_CBUFADDR1]);
OUT_RELOC(state->draw_region->buffer,
- DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE,
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
state->draw_region->draw_offset);
if (state->depth_region) {
OUT_BATCH(state->Buffer[I915_DESTREG_DBUFADDR0]);
OUT_BATCH(state->Buffer[I915_DESTREG_DBUFADDR1]);
OUT_RELOC(state->depth_region->buffer,
- DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE,
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
state->depth_region->draw_offset);
}
@@ -394,6 +408,17 @@ i915_emit_state(struct intel_context *intel)
OUT_BATCH(state->Buffer[I915_DESTREG_SR0]);
OUT_BATCH(state->Buffer[I915_DESTREG_SR1]);
OUT_BATCH(state->Buffer[I915_DESTREG_SR2]);
+
+ if (intel->constant_cliprect) {
+ assert(state->Buffer[I915_DESTREG_DRAWRECT0] != MI_NOOP);
+ OUT_BATCH(state->Buffer[I915_DESTREG_DRAWRECT0]);
+ OUT_BATCH(state->Buffer[I915_DESTREG_DRAWRECT1]);
+ OUT_BATCH(state->Buffer[I915_DESTREG_DRAWRECT2]);
+ OUT_BATCH(state->Buffer[I915_DESTREG_DRAWRECT3]);
+ OUT_BATCH(state->Buffer[I915_DESTREG_DRAWRECT4]);
+ OUT_BATCH(state->Buffer[I915_DESTREG_DRAWRECT5]);
+ }
+
ADVANCE_BATCH();
}
@@ -427,7 +452,7 @@ i915_emit_state(struct intel_context *intel)
if (state->tex_buffer[i]) {
OUT_RELOC(state->tex_buffer[i],
- DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
+ I915_GEM_DOMAIN_SAMPLER, 0,
state->tex_offset[i]);
}
else if (state == &i915->meta) {
@@ -485,6 +510,13 @@ i915_destroy_context(struct intel_context *intel)
GLuint i;
struct i915_context *i915 = i915_context(&intel->ctx);
+ intel_region_release(&i915->state.draw_region);
+ intel_region_release(&i915->state.depth_region);
+ intel_region_release(&i915->meta.draw_region);
+ intel_region_release(&i915->meta.depth_region);
+ intel_region_release(&i915->initial.draw_region);
+ intel_region_release(&i915->initial.depth_region);
+
for (i = 0; i < I915_TEX_UNITS; i++) {
if (i915->state.tex_buffer[i] != NULL) {
dri_bo_unreference(i915->state.tex_buffer[i]);
@@ -509,6 +541,7 @@ i915_state_draw_region(struct intel_context *intel,
struct intel_region *depth_region)
{
struct i915_context *i915 = i915_context(&intel->ctx);
+ GLcontext *ctx = &intel->ctx;
GLuint value;
ASSERT(state == &i915->state || state == &i915->meta);
@@ -561,6 +594,24 @@ i915_state_draw_region(struct intel_context *intel,
}
state->Buffer[I915_DESTREG_DV1] = value;
+ if (intel->constant_cliprect) {
+ state->Buffer[I915_DESTREG_DRAWRECT0] = _3DSTATE_DRAWRECT_INFO;
+ state->Buffer[I915_DESTREG_DRAWRECT1] = 0;
+ state->Buffer[I915_DESTREG_DRAWRECT2] = 0; /* xmin, ymin */
+ state->Buffer[I915_DESTREG_DRAWRECT3] =
+ (ctx->DrawBuffer->Width & 0xffff) |
+ (ctx->DrawBuffer->Height << 16);
+ state->Buffer[I915_DESTREG_DRAWRECT4] = 0; /* xoff, yoff */
+ state->Buffer[I915_DESTREG_DRAWRECT5] = 0;
+ } else {
+ state->Buffer[I915_DESTREG_DRAWRECT0] = MI_NOOP;
+ state->Buffer[I915_DESTREG_DRAWRECT1] = MI_NOOP;
+ state->Buffer[I915_DESTREG_DRAWRECT2] = MI_NOOP;
+ state->Buffer[I915_DESTREG_DRAWRECT3] = MI_NOOP;
+ state->Buffer[I915_DESTREG_DRAWRECT4] = MI_NOOP;
+ state->Buffer[I915_DESTREG_DRAWRECT5] = MI_NOOP;
+ }
+
I915_STATECHANGE(i915, I915_UPLOAD_BUFFERS);
}
@@ -629,4 +680,5 @@ i915InitVtbl(struct i915_context *i915)
i915->intel.vtbl.flush_cmd = i915_flush_cmd;
i915->intel.vtbl.assert_not_dirty = i915_assert_not_dirty;
i915->intel.vtbl.note_unlock = i915_note_unlock;
+ i915->intel.vtbl.finish_batch = intel_finish_vb;
}
diff --git a/i915/intel_pixel_read.c b/i915/intel_pixel_read.c
index 2e31656..56087aa 100644
--- a/i915/intel_pixel_read.c
+++ b/i915/intel_pixel_read.c
@@ -25,17 +25,16 @@
*
**************************************************************************/
-#include "glheader.h"
-#include "enums.h"
-#include "mtypes.h"
-#include "macros.h"
-#include "image.h"
-#include "bufferobj.h"
+#include "main/glheader.h"
+#include "main/enums.h"
+#include "main/mtypes.h"
+#include "main/macros.h"
+#include "main/image.h"
+#include "main/bufferobj.h"
#include "swrast/swrast.h"
#include "intel_screen.h"
#include "intel_context.h"
-#include "intel_ioctl.h"
#include "intel_batchbuffer.h"
#include "intel_blit.h"
#include "intel_buffers.h"
@@ -173,7 +172,6 @@ do_blit_readpixels(GLcontext * ctx,
struct intel_buffer_object *dst = intel_buffer_object(pack->BufferObj);
GLuint dst_offset;
GLuint rowLength;
- dri_fence *fence = NULL;
if (INTEL_DEBUG & DEBUG_PIXEL)
_mesa_printf("%s\n", __FUNCTION__);
@@ -264,7 +262,7 @@ do_blit_readpixels(GLcontext * ctx,
intelEmitCopyBlit(intel,
src->cpp,
- src->pitch, src->buffer, 0, src->tiled,
+ src->pitch, src->buffer, 0, src->tiling,
rowLength, dst_buffer, dst_offset, GL_FALSE,
rect.x1,
rect.y1,
@@ -273,19 +271,9 @@ do_blit_readpixels(GLcontext * ctx,
rect.x2 - rect.x1, rect.y2 - rect.y1,
GL_COPY);
}
-
- intel_batchbuffer_flush(intel->batch);
- fence = intel->batch->last_fence;
- dri_fence_reference(fence);
-
}
UNLOCK_HARDWARE(intel);
- if (fence) {
- dri_fence_wait(fence);
- dri_fence_unreference(fence);
- }
-
if (INTEL_DEBUG & DEBUG_PIXEL)
_mesa_printf("%s - DONE\n", __FUNCTION__);
diff --git a/i915/intel_render.c b/i915/intel_render.c
index 5e6500c..410052b 100644
--- a/i915/intel_render.c
+++ b/i915/intel_render.c
@@ -30,12 +30,12 @@
* dma buffers. Use strip/fan hardware acceleration where possible.
*
*/
-#include "glheader.h"
-#include "context.h"
-#include "macros.h"
-#include "imports.h"
-#include "mtypes.h"
-#include "enums.h"
+#include "main/glheader.h"
+#include "main/context.h"
+#include "main/macros.h"
+#include "main/imports.h"
+#include "main/mtypes.h"
+#include "main/enums.h"
#include "tnl/t_context.h"
#include "tnl/t_vertex.h"
@@ -67,7 +67,7 @@
#define HAVE_ELTS 0
-static GLuint hw_prim[GL_POLYGON + 1] = {
+static uint32_t hw_prim[GL_POLYGON + 1] = {
0,
PRIM3D_LINELIST,
PRIM3D_LINESTRIP,
@@ -114,9 +114,29 @@ intelDmaPrimitive(struct intel_context *intel, GLenum prim)
fprintf(stderr, "%s %s\n", __FUNCTION__, _mesa_lookup_enum_by_nr(prim));
INTEL_FIREVERTICES(intel);
intel->vtbl.reduced_primitive_state(intel, reduced_prim[prim]);
- intelStartInlinePrimitive(intel, hw_prim[prim], LOOP_CLIPRECTS);
+ intel_set_prim(intel, hw_prim[prim]);
}
+static inline GLuint intel_get_vb_max(struct intel_context *intel)
+{
+ GLuint ret;
+
+ if (intel->intelScreen->no_vbo)
+ ret = intel->batch->size - 1500;
+ else
+ ret = INTEL_VB_SIZE;
+ ret /= (intel->vertex_size * 4);
+ return ret;
+}
+
+static inline GLuint intel_get_current_max(struct intel_context *intel)
+{
+
+ if (intel->intelScreen->no_vbo)
+ return intel_get_vb_max(intel);
+ else
+ return (INTEL_VB_SIZE - intel->prim.current_offset) / (intel->vertex_size * 4);
+}
#define LOCAL_VARS struct intel_context *intel = intel_context(ctx)
#define INIT( prim ) \
@@ -126,12 +146,10 @@ do { \
#define FLUSH() INTEL_FIREVERTICES(intel)
-#define GET_SUBSEQUENT_VB_MAX_VERTS() \
- ((intel->batch->size - 1500) / (intel->vertex_size*4))
-#define GET_CURRENT_VB_MAX_VERTS() GET_SUBSEQUENT_VB_MAX_VERTS()
+#define GET_SUBSEQUENT_VB_MAX_VERTS() intel_get_vb_max(intel)
+#define GET_CURRENT_VB_MAX_VERTS() intel_get_current_max(intel)
-#define ALLOC_VERTS( nr ) \
- intelExtendInlinePrimitive( intel, (nr) * intel->vertex_size )
+#define ALLOC_VERTS(nr) intel_get_prim_space(intel, nr)
#define EMIT_VERTS( ctx, j, nr, buf ) \
_tnl_emit_vertices_to_buffer(ctx, j, (j)+(nr), buf )
diff --git a/i915/intel_state.c b/i915/intel_state.c
index d1ca11d..4aa43e5 100644
--- a/i915/intel_state.c
+++ b/i915/intel_state.c
@@ -26,12 +26,12 @@
**************************************************************************/
-#include "glheader.h"
-#include "context.h"
-#include "macros.h"
-#include "enums.h"
-#include "colormac.h"
-#include "dd.h"
+#include "main/glheader.h"
+#include "main/context.h"
+#include "main/macros.h"
+#include "main/enums.h"
+#include "main/colormac.h"
+#include "main/dd.h"
#include "intel_screen.h"
#include "intel_context.h"
@@ -267,6 +267,8 @@ intelViewport(GLcontext * ctx,
GLint x, GLint y, GLsizei width, GLsizei height)
{
intelCalcViewport(ctx);
+
+ intel_viewport(ctx, x, y, width, height);
}
static void
diff --git a/i915/intel_tris.c b/i915/intel_tris.c
index bbb4e0f..e809965 100644
--- a/i915/intel_tris.c
+++ b/i915/intel_tris.c
@@ -25,13 +25,19 @@
*
**************************************************************************/
-#include "glheader.h"
-#include "context.h"
-#include "macros.h"
-#include "enums.h"
-#include "texobj.h"
-#include "state.h"
-#include "dd.h"
+/** @file intel_tris.c
+ *
+ * This file contains functions for managing the vertex buffer and emitting
+ * primitives into it.
+ */
+
+#include "main/glheader.h"
+#include "main/context.h"
+#include "main/macros.h"
+#include "main/enums.h"
+#include "main/texobj.h"
+#include "main/state.h"
+#include "main/dd.h"
#include "swrast/swrast.h"
#include "swrast_setup/swrast_setup.h"
@@ -47,13 +53,14 @@
#include "intel_reg.h"
#include "intel_span.h"
#include "intel_tex.h"
+#include "intel_chipset.h"
+#include "i830_context.h"
+#include "i830_reg.h"
static void intelRenderPrimitive(GLcontext * ctx, GLenum prim);
static void intelRasterPrimitive(GLcontext * ctx, GLenum rprim,
GLuint hwprim);
-/*
- */
static void
intel_flush_inline_primitive(struct intel_context *intel)
{
@@ -80,22 +87,16 @@ intel_flush_inline_primitive(struct intel_context *intel)
intel->prim.flush = 0;
}
-
-/* Emit a primitive referencing vertices in a vertex buffer.
- */
-void
-intelStartInlinePrimitive(struct intel_context *intel,
- GLuint prim, GLuint batch_flags)
+static void intel_start_inline(struct intel_context *intel, uint32_t prim)
{
BATCH_LOCALS;
-
- intel_wait_flips(intel);
+ uint32_t batch_flags = LOOP_CLIPRECTS;
intel->vtbl.emit_state(intel);
intel->no_batch_wrap = GL_TRUE;
-/* _mesa_printf("%s *", __progname); */
+ /*_mesa_printf("%s *", __progname);*/
/* Emit a slot which will be filled with the inline primitive
* command later.
@@ -113,24 +114,19 @@ intelStartInlinePrimitive(struct intel_context *intel,
ADVANCE_BATCH();
intel->no_batch_wrap = GL_FALSE;
-
/* _mesa_printf(">"); */
}
-
-void
-intelWrapInlinePrimitive(struct intel_context *intel)
+static void intel_wrap_inline(struct intel_context *intel)
{
GLuint prim = intel->prim.primitive;
- enum cliprect_mode cliprect_mode = intel->batch->cliprect_mode;
intel_flush_inline_primitive(intel);
intel_batchbuffer_flush(intel->batch);
- intelStartInlinePrimitive(intel, prim, cliprect_mode); /* ??? */
+ intel_start_inline(intel, prim); /* ??? */
}
-GLuint *
-intelExtendInlinePrimitive(struct intel_context *intel, GLuint dwords)
+static GLuint *intel_extend_inline(struct intel_context *intel, GLuint dwords)
{
GLuint sz = dwords * sizeof(GLuint);
GLuint *ptr;
@@ -138,7 +134,7 @@ intelExtendInlinePrimitive(struct intel_context *intel, GLuint dwords)
assert(intel->prim.flush == intel_flush_inline_primitive);
if (intel_batchbuffer_space(intel->batch) < sz)
- intelWrapInlinePrimitive(intel);
+ intel_wrap_inline(intel);
/* _mesa_printf("."); */
@@ -150,7 +146,184 @@ intelExtendInlinePrimitive(struct intel_context *intel, GLuint dwords)
return ptr;
}
+/** Sets the primitive type for a primitive sequence, flushing as needed. */
+void intel_set_prim(struct intel_context *intel, uint32_t prim)
+{
+ /* if we have no VBOs */
+ if (intel->intelScreen->no_vbo) {
+ intel_start_inline(intel, prim);
+ return;
+ }
+ if (prim != intel->prim.primitive) {
+ INTEL_FIREVERTICES(intel);
+ intel->prim.primitive = prim;
+ }
+}
+
+/** Returns mapped VB space for the given number of vertices */
+uint32_t *intel_get_prim_space(struct intel_context *intel, unsigned int count)
+{
+ uint32_t *addr;
+
+ if (intel->intelScreen->no_vbo) {
+ return intel_extend_inline(intel, count * intel->vertex_size);
+ }
+
+ /* Check for space in the existing VB */
+ if (intel->prim.vb_bo == NULL ||
+ (intel->prim.current_offset +
+ count * intel->vertex_size * 4) > INTEL_VB_SIZE ||
+ (intel->prim.count + count) >= (1 << 16)) {
+ /* Flush existing prim if any */
+ INTEL_FIREVERTICES(intel);
+
+ intel_finish_vb(intel);
+
+ /* Start a new VB */
+ if (intel->prim.vb == NULL)
+ intel->prim.vb = malloc(INTEL_VB_SIZE);
+ intel->prim.vb_bo = dri_bo_alloc(intel->bufmgr, "vb",
+ INTEL_VB_SIZE, 4);
+ intel->prim.start_offset = 0;
+ intel->prim.current_offset = 0;
+ }
+
+ intel->prim.flush = intel_flush_prim;
+
+ addr = (uint32_t *)(intel->prim.vb + intel->prim.current_offset);
+ intel->prim.current_offset += intel->vertex_size * 4 * count;
+ intel->prim.count += count;
+
+ return addr;
+}
+
+/** Dispatches the accumulated primitive to the batchbuffer. */
+void intel_flush_prim(struct intel_context *intel)
+{
+ BATCH_LOCALS;
+ dri_bo *aper_array[2];
+ dri_bo *vb_bo;
+ unsigned int offset, count;
+
+ /* Must be called after an intel_start_prim. */
+ assert(intel->prim.primitive != ~0);
+
+ if (intel->prim.count == 0)
+ return;
+
+ /* Clear the current prims out of the context state so that a batch flush
+ * flush triggered by emit_state doesn't loop back to flush_prim again.
+ */
+ vb_bo = intel->prim.vb_bo;
+ dri_bo_reference(vb_bo);
+ count = intel->prim.count;
+ intel->prim.count = 0;
+ offset = intel->prim.start_offset;
+ intel->prim.start_offset = intel->prim.current_offset;
+ if (!IS_9XX(intel->intelScreen->deviceID))
+ intel->prim.start_offset = ALIGN(intel->prim.start_offset, 128);
+ intel->prim.flush = NULL;
+
+ intel->vtbl.emit_state(intel);
+
+ aper_array[0] = intel->batch->buf;
+ aper_array[1] = vb_bo;
+ if (dri_bufmgr_check_aperture_space(aper_array, 2)) {
+ intel_batchbuffer_flush(intel->batch);
+ intel->vtbl.emit_state(intel);
+ }
+
+ /* Ensure that we don't start a new batch for the following emit, which
+ * depends on the state just emitted. emit_state should be making sure we
+ * have the space for this.
+ */
+ intel->no_batch_wrap = GL_TRUE;
+
+ /* Check that we actually emitted the state into this batch, using the
+ * UPLOAD_CTX bit as the signal.
+ */
+ assert((intel->batch->dirty_state & (1<<1)) == 0);
+
+#if 0
+ printf("emitting %d..%d=%d vertices size %d\n", offset,
+ intel->prim.current_offset, count,
+ intel->vertex_size * 4);
+#endif
+
+ if (IS_9XX(intel->intelScreen->deviceID)) {
+ BEGIN_BATCH(5, LOOP_CLIPRECTS);
+ OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
+ I1_LOAD_S(0) | I1_LOAD_S(1) | 1);
+ assert((offset & !S0_VB_OFFSET_MASK) == 0);
+ OUT_RELOC(vb_bo, I915_GEM_DOMAIN_VERTEX, 0, offset);
+ OUT_BATCH((intel->vertex_size << S1_VERTEX_WIDTH_SHIFT) |
+ (intel->vertex_size << S1_VERTEX_PITCH_SHIFT));
+
+ OUT_BATCH(_3DPRIMITIVE |
+ PRIM_INDIRECT |
+ PRIM_INDIRECT_SEQUENTIAL |
+ intel->prim.primitive |
+ count);
+ OUT_BATCH(0); /* Beginning vertex index */
+ ADVANCE_BATCH();
+ } else {
+ struct i830_context *i830 = i830_context(&intel->ctx);
+
+ BEGIN_BATCH(5, LOOP_CLIPRECTS);
+ OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
+ I1_LOAD_S(0) | I1_LOAD_S(2) | 1);
+ /* S0 */
+ assert((offset & !S0_VB_OFFSET_MASK_830) == 0);
+ OUT_RELOC(vb_bo, I915_GEM_DOMAIN_VERTEX, 0,
+ offset | (intel->vertex_size << S0_VB_PITCH_SHIFT_830) |
+ S0_VB_ENABLE_830);
+ /* S2
+ * This is somewhat unfortunate -- VB width is tied up with
+ * vertex format data that we've already uploaded through
+ * _3DSTATE_VFT[01]_CMD. We may want to replace emits of VFT state with
+ * STATE_IMMEDIATE_1 like this to avoid duplication.
+ */
+ OUT_BATCH((i830->state.Ctx[I830_CTXREG_VF] & VFT0_TEX_COUNT_MASK) >>
+ VFT0_TEX_COUNT_SHIFT << S2_TEX_COUNT_SHIFT_830 |
+ (i830->state.Ctx[I830_CTXREG_VF2] << 16) |
+ intel->vertex_size << S2_VERTEX_0_WIDTH_SHIFT_830);
+
+ OUT_BATCH(_3DPRIMITIVE |
+ PRIM_INDIRECT |
+ PRIM_INDIRECT_SEQUENTIAL |
+ intel->prim.primitive |
+ count);
+ OUT_BATCH(0); /* Beginning vertex index */
+ ADVANCE_BATCH();
+ }
+
+ intel->no_batch_wrap = GL_FALSE;
+
+ dri_bo_unreference(vb_bo);
+}
+
+/**
+ * Uploads the locally-accumulated VB into the buffer object.
+ *
+ * This avoids us thrashing the cachelines in and out as the buffer gets
+ * filled, dispatched, then reused as the hardware completes rendering from it,
+ * and also lets us clflush less if we dispatch with a partially-filled VB.
+ *
+ * This is called normally from get_space when we're finishing a BO, but also
+ * at batch flush time so that we don't try accessing the contents of a
+ * just-dispatched buffer.
+ */
+void intel_finish_vb(struct intel_context *intel)
+{
+ if (intel->prim.vb_bo == NULL)
+ return;
+
+ dri_bo_subdata(intel->prim.vb_bo, 0, intel->prim.start_offset,
+ intel->prim.vb);
+ dri_bo_unreference(intel->prim.vb_bo);
+ intel->prim.vb_bo = NULL;
+}
/***********************************************************************
* Emit primitives as inline vertices *
@@ -182,7 +355,7 @@ intel_draw_quad(struct intel_context *intel,
intelVertexPtr v1, intelVertexPtr v2, intelVertexPtr v3)
{
GLuint vertsize = intel->vertex_size;
- GLuint *vb = intelExtendInlinePrimitive(intel, 6 * vertsize);
+ GLuint *vb = intel_get_prim_space(intel, 6);
int j;
COPY_DWORDS(j, vb, vertsize, v0);
@@ -210,7 +383,7 @@ intel_draw_triangle(struct intel_context *intel,
intelVertexPtr v0, intelVertexPtr v1, intelVertexPtr v2)
{
GLuint vertsize = intel->vertex_size;
- GLuint *vb = intelExtendInlinePrimitive(intel, 3 * vertsize);
+ GLuint *vb = intel_get_prim_space(intel, 3);
int j;
COPY_DWORDS(j, vb, vertsize, v0);
@@ -224,7 +397,7 @@ intel_draw_line(struct intel_context *intel,
intelVertexPtr v0, intelVertexPtr v1)
{
GLuint vertsize = intel->vertex_size;
- GLuint *vb = intelExtendInlinePrimitive(intel, 2 * vertsize);
+ GLuint *vb = intel_get_prim_space(intel, 2);
int j;
COPY_DWORDS(j, vb, vertsize, v0);
@@ -236,7 +409,7 @@ static void
intel_draw_point(struct intel_context *intel, intelVertexPtr v0)
{
GLuint vertsize = intel->vertex_size;
- GLuint *vb = intelExtendInlinePrimitive(intel, vertsize);
+ GLuint *vb = intel_get_prim_space(intel, 1);
int j;
/* Adjust for sub pixel position -- still required for conform. */
@@ -745,7 +918,7 @@ intelFastRenderClippedPoly(GLcontext * ctx, const GLuint * elts, GLuint n)
{
struct intel_context *intel = intel_context(ctx);
const GLuint vertsize = intel->vertex_size;
- GLuint *vb = intelExtendInlinePrimitive(intel, (n - 2) * 3 * vertsize);
+ GLuint *vb = intel_get_prim_space(intel, (n - 2) * 3);
GLubyte *vertptr = (GLubyte *) intel->verts;
const GLuint *start = (const GLuint *) V(elts[0]);
int i, j;
@@ -950,7 +1123,7 @@ intelRasterPrimitive(GLcontext * ctx, GLenum rprim, GLuint hwprim)
if (hwprim != intel->prim.primitive) {
INTEL_FIREVERTICES(intel);
- intelStartInlinePrimitive(intel, hwprim, LOOP_CLIPRECTS);
+ intel_set_prim(intel, hwprim);
}
}
@@ -1083,15 +1256,18 @@ intel_meta_draw_poly(struct intel_context *intel,
union fi *vb;
GLint i;
GLboolean was_locked = intel->locked;
+ unsigned int saved_vertex_size = intel->vertex_size;
if (!was_locked)
LOCK_HARDWARE(intel);
+ intel->vertex_size = 6;
+
/* All 3d primitives should be emitted with LOOP_CLIPRECTS,
* otherwise the drawing origin (DR4) might not be set correctly.
*/
- intelStartInlinePrimitive(intel, PRIM3D_TRIFAN, LOOP_CLIPRECTS);
- vb = (union fi *) intelExtendInlinePrimitive(intel, n * 6);
+ intel_set_prim(intel, PRIM3D_TRIFAN);
+ vb = (union fi *) intel_get_prim_space(intel, n);
for (i = 0; i < n; i++) {
vb[0].f = xy[i][0];
@@ -1105,6 +1281,8 @@ intel_meta_draw_poly(struct intel_context *intel,
INTEL_FIREVERTICES(intel);
+ intel->vertex_size = saved_vertex_size;
+
if (!was_locked)
UNLOCK_HARDWARE(intel);
}
diff --git a/i915/intel_tris.h b/i915/intel_tris.h
index 021e5c6..55b60a4 100644
--- a/i915/intel_tris.h
+++ b/i915/intel_tris.h
@@ -28,9 +28,11 @@
#ifndef INTELTRIS_INC
#define INTELTRIS_INC
-#include "mtypes.h"
-
+#include "main/mtypes.h"
+#define INTEL_VB_SIZE (32 * 1024)
+/** 3 dwords of state_immediate and 2 of 3dprim, in intel_flush_prim */
+#define INTEL_PRIM_EMIT_SIZE (5 * 4)
#define _INTEL_NEW_RENDERSTATE (_DD_NEW_LINE_STIPPLE | \
_DD_NEW_TRI_UNFILLED | \
@@ -44,11 +46,9 @@ extern void intelInitTriFuncs(GLcontext * ctx);
extern void intelChooseRenderState(GLcontext * ctx);
-extern void intelStartInlinePrimitive(struct intel_context *intel,
- GLuint prim, GLuint flags);
-extern void intelWrapInlinePrimitive(struct intel_context *intel);
-
-GLuint *intelExtendInlinePrimitive(struct intel_context *intel,
- GLuint dwords);
+void intel_set_prim(struct intel_context *intel, uint32_t prim);
+GLuint *intel_get_prim_space(struct intel_context *intel, unsigned int count);
+void intel_flush_prim(struct intel_context *intel);
+void intel_finish_vb(struct intel_context *intel);
#endif
diff --git a/i965/Makefile.am b/i965/Makefile.am
index b1b816c..4fb59be 100644
--- a/i965/Makefile.am
+++ b/i965/Makefile.am
@@ -5,26 +5,25 @@ I965_CFLAGS = -I../shared -I../shared/server
i965_dri_la_LTLIBRARIES = i965_dri.la
i965_dri_la_CFLAGS = $(AM_CFLAGS) $(DRM_CFLAGS) $(DRI_CFLAGS) $(I965_CFLAGS)
i965_dri_la_LDFLAGS = -module -noprefix -avoid-version -lm -ldl\
- $(DRM_LIBS) $(DRI_LIBS)
+ $(DRM_LIBS) -ldrm_intel $(DRI_LIBS)
i965_dri_ladir = @libdir@/dri
i965_dri_la_SOURCES = \
../shared/intel_batchbuffer.c \
../shared/intel_blit.c \
../shared/intel_buffer_objects.c \
../shared/intel_buffers.c \
- ../shared/intel_bufmgr_ttm.c \
../shared/intel_context.c \
../shared/intel_decode.c \
../shared/intel_depthstencil.c \
../shared/intel_fbo.c \
- ../shared/intel_ioctl.c \
../shared/intel_mipmap_tree.c \
../shared/intel_regions.c \
../shared/intel_screen.c \
../shared/intel_span.c \
../shared/intel_pixel.c \
- ../shared/intel_pixel_copy.c \
../shared/intel_pixel_bitmap.c \
+ ../shared/intel_pixel_copy.c \
+ ../shared/intel_pixel_draw.c \
intel_state.c \
../shared/intel_tex.c \
../shared/intel_tex_copy.c \
@@ -56,6 +55,7 @@ i965_dri_la_SOURCES = \
brw_metaops.c \
brw_misc_state.c \
brw_program.c \
+ brw_queryobj.c \
brw_sf.c \
brw_sf_emit.c \
brw_sf_state.c \
@@ -71,7 +71,6 @@ i965_dri_la_SOURCES = \
brw_vs_constval.c \
brw_vs_emit.c \
brw_vs_state.c \
- brw_vs_tnl.c \
brw_vtbl.c \
brw_wm.c \
brw_wm_debug.c \
diff --git a/i965/brw_cc.c b/i965/brw_cc.c
index 9d8984f..fa8121e 100644
--- a/i965/brw_cc.c
+++ b/i965/brw_cc.c
@@ -34,10 +34,10 @@
#include "brw_state.h"
#include "brw_defines.h"
#include "brw_util.h"
-#include "macros.h"
-#include "enums.h"
+#include "main/macros.h"
+#include "main/enums.h"
-static int upload_cc_vp( struct brw_context *brw )
+static void prepare_cc_vp( struct brw_context *brw )
{
struct brw_cc_viewport ccv;
@@ -48,7 +48,6 @@ static int upload_cc_vp( struct brw_context *brw )
dri_bo_unreference(brw->cc.vp_bo);
brw->cc.vp_bo = brw_cache_data( &brw->cache, BRW_CC_VP, &ccv, NULL, 0 );
- return dri_bufmgr_check_aperture_space(brw->cc.vp_bo);
}
const struct brw_tracked_state brw_cc_vp = {
@@ -57,7 +56,7 @@ const struct brw_tracked_state brw_cc_vp = {
.brw = BRW_NEW_CONTEXT,
.cache = 0
},
- .prepare = upload_cc_vp
+ .prepare = prepare_cc_vp
};
struct brw_cc_unit_key {
@@ -256,16 +255,17 @@ cc_unit_create_from_key(struct brw_context *brw, struct brw_cc_unit_key *key)
NULL, NULL);
/* Emit CC viewport relocation */
- dri_emit_reloc(bo,
- DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
- 0,
- offsetof(struct brw_cc_unit_state, cc4),
- brw->cc.vp_bo);
+ dri_bo_emit_reloc(bo,
+ I915_GEM_DOMAIN_INSTRUCTION,
+ 0,
+ 0,
+ offsetof(struct brw_cc_unit_state, cc4),
+ brw->cc.vp_bo);
return bo;
}
-static int prepare_cc_unit( struct brw_context *brw )
+static void prepare_cc_unit( struct brw_context *brw )
{
struct brw_cc_unit_key key;
@@ -279,7 +279,6 @@ static int prepare_cc_unit( struct brw_context *brw )
if (brw->cc.state_bo == NULL)
brw->cc.state_bo = cc_unit_create_from_key(brw, &key);
- return dri_bufmgr_check_aperture_space(brw->cc.state_bo);
}
const struct brw_tracked_state brw_cc_unit = {
diff --git a/i965/brw_clip.c b/i965/brw_clip.c
index 540108e..38d8b70 100644
--- a/i965/brw_clip.c
+++ b/i965/brw_clip.c
@@ -29,9 +29,9 @@
* Keith Whitwell <keith@tungstengraphics.com>
*/
-#include "glheader.h"
-#include "macros.h"
-#include "enums.h"
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/enums.h"
#include "intel_batchbuffer.h"
@@ -131,7 +131,7 @@ static void compile_clip_prog( struct brw_context *brw,
/* Calculate interpolants for triangle and line rasterization.
*/
-static int upload_clip_prog( struct brw_context *brw )
+static void upload_clip_prog(struct brw_context *brw)
{
GLcontext *ctx = &brw->intel.ctx;
struct brw_clip_prog_key key;
@@ -242,8 +242,6 @@ static int upload_clip_prog( struct brw_context *brw )
&brw->clip.prog_data);
if (brw->clip.prog_bo == NULL)
compile_clip_prog( brw, &key );
-
- return dri_bufmgr_check_aperture_space(brw->clip.prog_bo);
}
diff --git a/i965/brw_clip_line.c b/i965/brw_clip_line.c
index 0930e6a..c45d48d 100644
--- a/i965/brw_clip_line.c
+++ b/i965/brw_clip_line.c
@@ -29,11 +29,11 @@
* Keith Whitwell <keith@tungstengraphics.com>
*/
-#include "glheader.h"
-#include "macros.h"
-#include "enums.h"
-
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/enums.h"
#include "shader/program.h"
+
#include "intel_batchbuffer.h"
#include "brw_defines.h"
@@ -148,7 +148,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c )
brw_clip_init_clipmask(c);
/* -ve rhw workaround */
- if (!(BRW_IS_GM45(p->brw) || BRW_IS_G4X(p->brw))) {
+ if (!BRW_IS_G4X(p->brw)) {
brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2),
brw_imm_ud(1<<20));
diff --git a/i965/brw_clip_point.c b/i965/brw_clip_point.c
index 2346980..d17b199 100644
--- a/i965/brw_clip_point.c
+++ b/i965/brw_clip_point.c
@@ -29,11 +29,11 @@
* Keith Whitwell <keith@tungstengraphics.com>
*/
-#include "glheader.h"
-#include "macros.h"
-#include "enums.h"
-
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/enums.h"
#include "shader/program.h"
+
#include "intel_batchbuffer.h"
#include "brw_defines.h"
diff --git a/i965/brw_clip_state.c b/i965/brw_clip_state.c
index 2d0b24c..9b0d7ea 100644
--- a/i965/brw_clip_state.c
+++ b/i965/brw_clip_state.c
@@ -32,7 +32,7 @@
#include "brw_context.h"
#include "brw_state.h"
#include "brw_defines.h"
-#include "macros.h"
+#include "main/macros.h"
struct brw_clip_unit_key {
unsigned int total_grf;
@@ -88,7 +88,21 @@ clip_unit_create_from_key(struct brw_context *brw,
clip.thread4.nr_urb_entries = key->nr_urb_entries;
clip.thread4.urb_entry_allocation_size = key->urb_size - 1;
- clip.thread4.max_threads = 1; /* 2 threads */
+ /* If we have enough clip URB entries to run two threads, do so.
+ */
+ if (key->nr_urb_entries >= 10) {
+ /* Half of the URB entries go to each thread, and it has to be an
+ * even number.
+ */
+ assert(key->nr_urb_entries % 2 == 0);
+ clip.thread4.max_threads = 2 - 1;
+ } else {
+ assert(key->nr_urb_entries >= 5);
+ clip.thread4.max_threads = 1 - 1;
+ }
+
+ if (INTEL_DEBUG & DEBUG_SINGLE_THREAD)
+ clip.thread4.max_threads = 0;
if (INTEL_DEBUG & DEBUG_STATS)
clip.thread4.stats_enable = 1;
@@ -102,7 +116,7 @@ clip_unit_create_from_key(struct brw_context *brw,
clip.clip5.api_mode = BRW_CLIP_API_OGL;
clip.clip5.clip_mode = key->clip_mode;
- if (BRW_IS_GM45(brw) || BRW_IS_G4X(brw))
+ if (BRW_IS_G4X(brw))
clip.clip5.negative_w_clip_test = 1;
clip.clip6.clipper_viewport_state_ptr = 0;
@@ -119,19 +133,19 @@ clip_unit_create_from_key(struct brw_context *brw,
/* Emit clip program relocation */
assert(brw->clip.prog_bo);
- dri_emit_reloc(bo,
- DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
- clip.thread0.grf_reg_count << 1,
- offsetof(struct brw_clip_unit_state, thread0),
- brw->clip.prog_bo);
+ dri_bo_emit_reloc(bo,
+ I915_GEM_DOMAIN_INSTRUCTION,
+ 0,
+ clip.thread0.grf_reg_count << 1,
+ offsetof(struct brw_clip_unit_state, thread0),
+ brw->clip.prog_bo);
return bo;
}
-static int upload_clip_unit( struct brw_context *brw )
+static void upload_clip_unit( struct brw_context *brw )
{
struct brw_clip_unit_key key;
- int ret = 0;
clip_unit_populate_key(brw, &key);
@@ -143,9 +157,6 @@ static int upload_clip_unit( struct brw_context *brw )
if (brw->clip.state_bo == NULL) {
brw->clip.state_bo = clip_unit_create_from_key(brw, &key);
}
-
- ret = dri_bufmgr_check_aperture_space(brw->clip.state_bo);
- return ret;
}
const struct brw_tracked_state brw_clip_unit = {
diff --git a/i965/brw_clip_tri.c b/i965/brw_clip_tri.c
index 0003901..1dbba37 100644
--- a/i965/brw_clip_tri.c
+++ b/i965/brw_clip_tri.c
@@ -29,11 +29,11 @@
* Keith Whitwell <keith@tungstengraphics.com>
*/
-#include "glheader.h"
-#include "macros.h"
-#include "enums.h"
-
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/enums.h"
#include "shader/program.h"
+
#include "intel_batchbuffer.h"
#include "brw_defines.h"
@@ -526,7 +526,7 @@ void brw_emit_tri_clip( struct brw_clip_compile *c )
/* if -ve rhw workaround bit is set,
do cliptest */
- if (!(BRW_IS_GM45(p->brw) || BRW_IS_G4X(p->brw))) {
+ if (!BRW_IS_G4X(p->brw)) {
brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2),
brw_imm_ud(1<<20));
diff --git a/i965/brw_clip_unfilled.c b/i965/brw_clip_unfilled.c
index 6f20d79..d7ca517 100644
--- a/i965/brw_clip_unfilled.c
+++ b/i965/brw_clip_unfilled.c
@@ -29,11 +29,11 @@
* Keith Whitwell <keith@tungstengraphics.com>
*/
-#include "glheader.h"
-#include "macros.h"
-#include "enums.h"
-
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/enums.h"
#include "shader/program.h"
+
#include "intel_batchbuffer.h"
#include "brw_defines.h"
diff --git a/i965/brw_clip_util.c b/i965/brw_clip_util.c
index c32bd4e..9d3b0be 100644
--- a/i965/brw_clip_util.c
+++ b/i965/brw_clip_util.c
@@ -30,11 +30,11 @@
*/
-#include "glheader.h"
-#include "macros.h"
-#include "enums.h"
-
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/enums.h"
#include "shader/program.h"
+
#include "intel_batchbuffer.h"
#include "brw_defines.h"
diff --git a/i965/brw_context.c b/i965/brw_context.c
index 1c7ad5c..d7a2bd9 100644
--- a/i965/brw_context.c
+++ b/i965/brw_context.c
@@ -30,11 +30,18 @@
*/
+#include "main/imports.h"
+#include "main/api_noop.h"
+#include "main/macros.h"
+#include "main/vtxfmt.h"
+#include "main/simple_list.h"
+#include "shader/shader_api.h"
+
#include "brw_context.h"
#include "brw_defines.h"
#include "brw_draw.h"
+#include "brw_state.h"
#include "brw_vs.h"
-#include "imports.h"
#include "intel_tex.h"
#include "intel_blit.h"
#include "intel_batchbuffer.h"
@@ -43,10 +50,7 @@
#include "tnl/t_pipeline.h"
#include "utils.h"
-#include "api_noop.h"
-#include "vtxfmt.h"
-#include "shader/shader_api.h"
/***************************************
* Mesa's Driver Functions
@@ -67,6 +71,9 @@ static void brwInitDriverFunctions( struct dd_function_table *functions )
brwInitFragProgFuncs( functions );
brwInitProgFuncs( functions );
+ brw_init_queryobj_functions(functions);
+
+ functions->Viewport = intel_viewport;
}
@@ -122,9 +129,10 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis,
TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline;
- ctx->Const.MaxTextureUnits = BRW_MAX_TEX_UNIT;
ctx->Const.MaxTextureImageUnits = BRW_MAX_TEX_UNIT;
- ctx->Const.MaxTextureCoordUnits = BRW_MAX_TEX_UNIT;
+ ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */
+ ctx->Const.MaxTextureUnits = MIN2(ctx->Const.MaxTextureCoordUnits,
+ ctx->Const.MaxTextureImageUnits);
ctx->Const.MaxVertexTextureImageUnits = 0; /* no vertex shader textures */
/* Advertise the full hardware capabilities. The new memory
@@ -134,8 +142,10 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis,
ctx->Const.Max3DTextureLevels = 9;
ctx->Const.MaxCubeTextureLevels = 12;
ctx->Const.MaxTextureRectSize = (1<<11);
- ctx->Const.MaxTextureUnits = BRW_MAX_TEX_UNIT;
+ /* if conformance mode is set, swrast can handle any size AA point */
+ ctx->Const.MaxPointSizeAA = 255.0;
+
/* ctx->Const.MaxNativeVertexProgramTemps = 32; */
brw_init_attribs( brw );
@@ -147,11 +157,12 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis,
brw->emit_state_always = 0;
- ctx->FragmentProgram._MaintainTexEnvProgram = 1;
+ ctx->VertexProgram._MaintainTnlProgram = GL_TRUE;
+ ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE;
- brw_draw_init( brw );
+ make_empty_list(&brw->query.active_head);
- brw_ProgramCacheInit( ctx );
+ brw_draw_init( brw );
return GL_TRUE;
}
diff --git a/i965/brw_context.h b/i965/brw_context.h
index 32e0554..5d3f99e 100644
--- a/i965/brw_context.h
+++ b/i965/brw_context.h
@@ -35,7 +35,7 @@
#include "intel_context.h"
#include "brw_structs.h"
-#include "imports.h"
+#include "main/imports.h"
/* Glossary:
@@ -130,18 +130,19 @@ struct brw_context;
#define BRW_NEW_CONTEXT 0x80
#define BRW_NEW_WM_INPUT_DIMENSIONS 0x100
#define BRW_NEW_INPUT_VARYING 0x200
-#define BRW_NEW_TNL_PROGRAM 0x400
#define BRW_NEW_PSP 0x800
#define BRW_NEW_METAOPS 0x1000
#define BRW_NEW_FENCE 0x2000
-#define BRW_NEW_LOCK 0x4000
+#define BRW_NEW_INDICES 0x4000
+#define BRW_NEW_VERTICES 0x8000
/**
* Used for any batch entry with a relocated pointer that will be used
* by any 3D rendering.
*/
-#define BRW_NEW_BATCH 0x8000
+#define BRW_NEW_BATCH 0x10000
/** brw->depth_region updated */
-#define BRW_NEW_DEPTH_BUFFER 0x10000
+#define BRW_NEW_DEPTH_BUFFER 0x20000
+#define BRW_NEW_NR_SURFACES 0x40000
struct brw_state_flags {
/** State update flags signalled by mesa internals */
@@ -157,7 +158,6 @@ struct brw_state_flags {
struct brw_vertex_program {
struct gl_vertex_program program;
GLuint id;
- GLuint param_state; /* flags indicating state tracked by params */
};
@@ -165,7 +165,6 @@ struct brw_vertex_program {
struct brw_fragment_program {
struct gl_fragment_program program;
GLuint id;
- GLuint param_state; /* flags indicating state tracked by params */
};
@@ -239,7 +238,7 @@ struct brw_vs_ouput_sizes {
};
-#define BRW_MAX_TEX_UNIT 8
+#define BRW_MAX_TEX_UNIT 16
#define BRW_WM_MAX_SURF BRW_MAX_TEX_UNIT + MAX_DRAW_BUFFERS
enum brw_cache_id {
@@ -332,7 +331,7 @@ struct brw_state_pointers {
*/
struct brw_tracked_state {
struct brw_state_flags dirty;
- int (*prepare)( struct brw_context *brw );
+ void (*prepare)( struct brw_context *brw );
void (*emit)( struct brw_context *brw );
};
@@ -409,7 +408,22 @@ struct brw_tnl_cache {
GLuint size, n_items;
};
+struct brw_query_object {
+ struct gl_query_object Base;
+ /** Doubly linked list of active query objects in the context. */
+ struct brw_query_object *prev, *next;
+
+ /** Last query BO associated with this query. */
+ dri_bo *bo;
+ /** First index in bo with query data for this object. */
+ int first_index;
+ /** Last index in bo with query data for this object. */
+ int last_index;
+
+ /* Total count of pixels from previous BOs */
+ unsigned int count;
+};
struct brw_context
{
@@ -417,7 +431,6 @@ struct brw_context
GLuint primitive;
GLboolean emit_state_always;
- GLboolean wrap;
GLboolean tmp_fallback;
GLboolean no_batch_wrap;
@@ -429,6 +442,19 @@ struct brw_context
GLuint nr_draw_regions;
struct intel_region *draw_regions[MAX_DRAW_BUFFERS];
struct intel_region *depth_region;
+
+ /**
+ * List of buffers accumulated in brw_validate_state to receive
+ * dri_bo_check_aperture treatment before exec, so we can know if we
+ * should flush the batch and try again before emitting primitives.
+ *
+ * This can be a fixed number as we only have a limited number of
+ * objects referenced from the batchbuffer in a primitive emit,
+ * consisting of the vertex buffers, pipelined state pointers,
+ * the CURBE, the depth buffer, and a query BO.
+ */
+ dri_bo *validated_bos[VERT_ATTRIB_MAX + 16];
+ int validated_bo_count;
} state;
struct brw_state_pointers attribs;
@@ -450,9 +476,22 @@ struct brw_context
* for changes to this state:
*/
struct brw_vertex_info info;
+ unsigned int min_index, max_index;
} vb;
struct {
+ /**
+ * Index buffer for this draw_prims call.
+ *
+ * Updates are signaled by BRW_NEW_INDICES.
+ */
+ const struct _mesa_index_buffer *ib;
+
+ dri_bo *bo;
+ unsigned int offset;
+ } ib;
+
+ struct {
/* Will be allocated on demand if needed.
*/
struct brw_state_pointers attribs;
@@ -473,10 +512,6 @@ struct brw_context
GLboolean active;
} metaops;
- /* Track fixed function t&l in a vertex program:
- */
- struct gl_vertex_program *tnl_program;
- struct brw_tnl_cache tnl_program_cache;
/* Active vertex program:
*/
@@ -542,6 +577,11 @@ struct brw_context
GLfloat *last_buf;
GLuint last_bufsz;
+ /**
+ * Whether we should create a new bo instead of reusing the old one
+ * (if we just dispatch the batch pointing at the old one.
+ */
+ GLboolean need_new_bo;
} curbe;
struct {
@@ -611,7 +651,12 @@ struct brw_context
dri_bo *vp_bo;
} cc;
-
+ struct {
+ struct brw_query_object active_head;
+ dri_bo *bo;
+ int index;
+ GLboolean active;
+ } query;
/* Used to give every program string a unique id
*/
GLuint program_id;
@@ -636,15 +681,13 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis,
__DRIcontextPrivate *driContextPriv,
void *sharedContextPrivate);
-
-
/*======================================================================
- * brw_state.c
+ * brw_queryobj.c
*/
-int brw_validate_state( struct brw_context *brw );
-void brw_init_state( struct brw_context *brw );
-void brw_destroy_state( struct brw_context *brw );
-
+void brw_init_queryobj_functions(struct dd_function_table *functions);
+void brw_prepare_query_begin(struct brw_context *brw);
+void brw_emit_query_begin(struct brw_context *brw);
+void brw_emit_query_end(struct brw_context *brw);
/*======================================================================
* brw_state_dump.c
diff --git a/i965/brw_curbe.c b/i965/brw_curbe.c
index 5ff4e29..fbf473a 100644
--- a/i965/brw_curbe.c
+++ b/i965/brw_curbe.c
@@ -31,10 +31,10 @@
-#include "glheader.h"
-#include "context.h"
-#include "macros.h"
-#include "enums.h"
+#include "main/glheader.h"
+#include "main/context.h"
+#include "main/macros.h"
+#include "main/enums.h"
#include "shader/prog_parameter.h"
#include "shader/prog_statevars.h"
#include "intel_batchbuffer.h"
@@ -46,7 +46,7 @@
/* Partition the CURBE between the various users of constant values:
*/
-static int calculate_curbe_offsets( struct brw_context *brw )
+static void calculate_curbe_offsets( struct brw_context *brw )
{
/* CACHE_NEW_WM_PROG */
GLuint nr_fp_regs = (brw->wm.prog_data->nr_params + 15) / 16;
@@ -117,7 +117,6 @@ static int calculate_curbe_offsets( struct brw_context *brw )
brw->state.dirty.brw |= BRW_NEW_CURBE_OFFSETS;
}
- return 0;
}
@@ -156,19 +155,7 @@ void brw_upload_constant_buffer_state(struct brw_context *brw)
assert(brw->urb.nr_cs_entries);
BRW_CACHED_BATCH_STRUCT(brw, &cbs);
-}
-
-#if 0
-const struct brw_tracked_state brw_constant_buffer_state = {
- .dirty = {
- .mesa = 0,
- .brw = BRW_NEW_URB_FENCE,
- .cache = 0
- },
- .update = brw_upload_constant_buffer_state
-};
-#endif
-
+}
static GLfloat fixed_plane[6][4] = {
{ 0, 0, -1, 1 },
@@ -183,7 +170,7 @@ static GLfloat fixed_plane[6][4] = {
* cache mechanism, but maybe would benefit from a comparison against
* the current uploaded set of constants.
*/
-static int prepare_constant_buffer(struct brw_context *brw)
+static void prepare_constant_buffer(struct brw_context *brw)
{
GLcontext *ctx = &brw->intel.ctx;
struct brw_vertex_program *vp = (struct brw_vertex_program *)brw->vertex_program;
@@ -197,8 +184,8 @@ static int prepare_constant_buffer(struct brw_context *brw)
* function will also be called whenever fp or vp changes.
*/
brw->curbe.tracked_state.dirty.mesa = (_NEW_TRANSFORM|_NEW_PROJECTION);
- brw->curbe.tracked_state.dirty.mesa |= vp->param_state;
- brw->curbe.tracked_state.dirty.mesa |= fp->param_state;
+ brw->curbe.tracked_state.dirty.mesa |= vp->program.Base.Parameters->StateFlags;
+ brw->curbe.tracked_state.dirty.mesa |= fp->program.Base.Parameters->StateFlags;
if (sz == 0) {
@@ -207,8 +194,8 @@ static int prepare_constant_buffer(struct brw_context *brw)
brw->curbe.last_buf = NULL;
brw->curbe.last_bufsz = 0;
}
-
- return 0;
+
+ return;
}
buf = (GLfloat *)malloc(bufsz);
@@ -295,7 +282,8 @@ static int prepare_constant_buffer(struct brw_context *brw)
brw->curbe.last_bufsz = bufsz;
if (brw->curbe.curbe_bo != NULL &&
- brw->curbe.curbe_next_offset + bufsz > brw->curbe.curbe_bo->size)
+ (brw->curbe.need_new_bo ||
+ brw->curbe.curbe_next_offset + bufsz > brw->curbe.curbe_bo->size))
{
dri_bo_unreference(brw->curbe.curbe_bo);
brw->curbe.curbe_bo = NULL;
@@ -306,10 +294,7 @@ static int prepare_constant_buffer(struct brw_context *brw)
* They're generally around 64b.
*/
brw->curbe.curbe_bo = dri_bo_alloc(brw->intel.bufmgr, "CURBE",
- 4096, 1 << 6,
- DRM_BO_FLAG_MEM_LOCAL |
- DRM_BO_FLAG_CACHED |
- DRM_BO_FLAG_CACHED_MAPPED);
+ 4096, 1 << 6);
brw->curbe.curbe_next_offset = 0;
}
@@ -322,6 +307,7 @@ static int prepare_constant_buffer(struct brw_context *brw)
dri_bo_subdata(brw->curbe.curbe_bo, brw->curbe.curbe_offset, bufsz, buf);
}
+ brw_add_validated_bo(brw, brw->curbe.curbe_bo);
/* Because this provokes an action (ie copy the constants into the
* URB), it shouldn't be shortcircuited if identical to the
@@ -336,9 +322,6 @@ static int prepare_constant_buffer(struct brw_context *brw)
* flushes as necessary when doublebuffering of CURBEs isn't
* possible.
*/
-
- /* check aperture space for this bo */
- return dri_bufmgr_check_aperture_space(brw->curbe.curbe_bo);
}
@@ -353,7 +336,8 @@ static void emit_constant_buffer(struct brw_context *brw)
OUT_BATCH(0);
} else {
OUT_BATCH((CMD_CONST_BUFFER << 16) | (1 << 8) | (2 - 2));
- OUT_RELOC(brw->curbe.curbe_bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
+ OUT_RELOC(brw->curbe.curbe_bo,
+ I915_GEM_DOMAIN_INSTRUCTION, 0,
(sz - 1) + brw->curbe.curbe_offset);
}
ADVANCE_BATCH();
diff --git a/i965/brw_defines.h b/i965/brw_defines.h
index 92c058a..39c3225 100644
--- a/i965/brw_defines.h
+++ b/i965/brw_defines.h
@@ -33,69 +33,6 @@
#ifndef BRW_DEFINES_H
#define BRW_DEFINES_H
-/*
- */
-#define MI_NOOP 0x00
-#define MI_USER_INTERRUPT 0x02
-#define MI_WAIT_FOR_EVENT 0x03
-#define MI_FLUSH 0x04
-#define MI_REPORT_HEAD 0x07
-#define MI_ARB_ON_OFF 0x08
-#define MI_BATCH_BUFFER_END 0x0A
-#define MI_OVERLAY_FLIP 0x11
-#define MI_LOAD_SCAN_LINES_INCL 0x12
-#define MI_LOAD_SCAN_LINES_EXCL 0x13
-#define MI_DISPLAY_BUFFER_INFO 0x14
-#define MI_SET_CONTEXT 0x18
-#define MI_STORE_DATA_IMM 0x20
-#define MI_STORE_DATA_INDEX 0x21
-#define MI_LOAD_REGISTER_IMM 0x22
-#define MI_STORE_REGISTER_MEM 0x24
-#define MI_BATCH_BUFFER_START 0x31
-
-#define MI_SYNCHRONOUS_FLIP 0x0
-#define MI_ASYNCHRONOUS_FLIP 0x1
-
-#define MI_BUFFER_SECURE 0x0
-#define MI_BUFFER_NONSECURE 0x1
-
-#define MI_ARBITRATE_AT_CHAIN_POINTS 0x0
-#define MI_ARBITRATE_BETWEEN_INSTS 0x1
-#define MI_NO_ARBITRATION 0x3
-
-#define MI_CONDITION_CODE_WAIT_DISABLED 0x0
-#define MI_CONDITION_CODE_WAIT_0 0x1
-#define MI_CONDITION_CODE_WAIT_1 0x2
-#define MI_CONDITION_CODE_WAIT_2 0x3
-#define MI_CONDITION_CODE_WAIT_3 0x4
-#define MI_CONDITION_CODE_WAIT_4 0x5
-
-#define MI_DISPLAY_PIPE_A 0x0
-#define MI_DISPLAY_PIPE_B 0x1
-
-#define MI_DISPLAY_PLANE_A 0x0
-#define MI_DISPLAY_PLANE_B 0x1
-#define MI_DISPLAY_PLANE_C 0x2
-
-#define MI_STANDARD_FLIP 0x0
-#define MI_ENQUEUE_FLIP_PERFORM_BASE_FRAME_NUMBER_LOAD 0x1
-#define MI_ENQUEUE_FLIP_TARGET_FRAME_NUMBER_RELATIVE 0x2
-#define MI_ENQUEUE_FLIP_ABSOLUTE_TARGET_FRAME_NUMBER 0x3
-
-#define MI_PHYSICAL_ADDRESS 0x0
-#define MI_VIRTUAL_ADDRESS 0x1
-
-#define MI_BUFFER_MEMORY_MAIN 0x0
-#define MI_BUFFER_MEMORY_GTT 0x2
-#define MI_BUFFER_MEMORY_PER_PROCESS_GTT 0x3
-
-#define MI_FLIP_CONTINUE 0x0
-#define MI_FLIP_ON 0x1
-#define MI_FLIP_OFF 0x2
-
-#define MI_UNTRUSTED_REGISTER_SPACE 0x0
-#define MI_TRUSTED_REGISTER_SPACE 0x1
-
/* 3D state:
*/
#define _3DOP_3DSTATE_PIPELINED 0x0
@@ -119,7 +56,6 @@
#define _3DSTATE_LINE_STIPPLE 0x08
#define _3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP 0x09
#define _3DCONTROL 0x00
-#define _3DPRIMITIVE 0x00
#define PIPE_CONTROL_NOWRITE 0x00
#define PIPE_CONTROL_WRITEIMMEDIATE 0x01
@@ -862,10 +798,9 @@
#include "intel_chipset.h"
-#define BRW_IS_GM45(brw) (IS_GM45_GM((brw)->intel.intelScreen->deviceID))
#define BRW_IS_G4X(brw) (IS_G4X((brw)->intel.intelScreen->deviceID))
-#define CMD_PIPELINE_SELECT(brw) ((BRW_IS_GM45(brw) || BRW_IS_G4X(brw)) ? CMD_PIPELINE_SELECT_GM45 : CMD_PIPELINE_SELECT_965)
-#define CMD_VF_STATISTICS(brw) ((BRW_IS_GM45(brw) || BRW_IS_G4X(brw)) ? CMD_VF_STATISTICS_GM45 : CMD_VF_STATISTICS_965)
-#define URB_SIZES(brw) ((BRW_IS_GM45(brw) || BRW_IS_G4X(brw)) ? 384 : 256) /* 512 bit unit */
+#define CMD_PIPELINE_SELECT(brw) (BRW_IS_G4X(brw) ? CMD_PIPELINE_SELECT_GM45 : CMD_PIPELINE_SELECT_965)
+#define CMD_VF_STATISTICS(brw) (BRW_IS_G4X(brw) ? CMD_VF_STATISTICS_GM45 : CMD_VF_STATISTICS_965)
+#define URB_SIZES(brw) (BRW_IS_G4X(brw) ? 384 : 256) /* 512 bit units */
#endif
diff --git a/i965/brw_draw.c b/i965/brw_draw.c
index f90c5f7..785fb78 100644
--- a/i965/brw_draw.c
+++ b/i965/brw_draw.c
@@ -27,11 +27,11 @@
#include <stdlib.h>
-#include "glheader.h"
-#include "context.h"
-#include "state.h"
-#include "api_validate.h"
-#include "enums.h"
+#include "main/glheader.h"
+#include "main/context.h"
+#include "main/state.h"
+#include "main/api_validate.h"
+#include "main/enums.h"
#include "brw_draw.h"
#include "brw_defines.h"
@@ -39,7 +39,6 @@
#include "brw_state.h"
#include "brw_fallback.h"
-#include "intel_ioctl.h"
#include "intel_batchbuffer.h"
#include "intel_buffer_objects.h"
@@ -50,7 +49,7 @@
#define FILE_DEBUG_FLAG DEBUG_BATCH
-static GLuint hw_prim[GL_POLYGON+1] = {
+static GLuint prim_to_hw_prim[GL_POLYGON+1] = {
_3DPRIM_POINTLIST,
_3DPRIM_LINELIST,
_3DPRIM_LINELOOP,
@@ -83,9 +82,8 @@ static const GLenum reduced_prim[GL_POLYGON+1] = {
* programs be immune to the active primitive (ie. cope with all
* possibilities). That may not be realistic however.
*/
-static GLuint brw_set_prim(struct brw_context *brw, GLenum prim, GLboolean *need_flush)
+static GLuint brw_set_prim(struct brw_context *brw, GLenum prim)
{
- int ret;
if (INTEL_DEBUG & DEBUG_PRIMS)
_mesa_printf("PRIM: %s\n", _mesa_lookup_enum_by_nr(prim));
@@ -105,13 +103,9 @@ static GLuint brw_set_prim(struct brw_context *brw, GLenum prim, GLboolean *need
brw->intel.reduced_primitive = reduced_prim[prim];
brw->state.dirty.brw |= BRW_NEW_REDUCED_PRIMITIVE;
}
-
- ret = brw_validate_state(brw);
- if (ret)
- *need_flush = GL_TRUE;
}
- return hw_prim[prim];
+ return prim_to_hw_prim[prim];
}
@@ -126,12 +120,11 @@ static GLuint trim(GLenum prim, GLuint length)
}
-static void brw_emit_prim( struct brw_context *brw,
- const struct _mesa_prim *prim )
-
+static void brw_emit_prim(struct brw_context *brw,
+ const struct _mesa_prim *prim,
+ uint32_t hw_prim)
{
struct brw_3d_primitive prim_packet;
- GLboolean need_flush = GL_FALSE;
if (INTEL_DEBUG & DEBUG_PRIMS)
_mesa_printf("PRIM: %s %d %d\n", _mesa_lookup_enum_by_nr(prim->mode),
@@ -140,7 +133,7 @@ static void brw_emit_prim( struct brw_context *brw,
prim_packet.header.opcode = CMD_3D_PRIM;
prim_packet.header.length = sizeof(prim_packet)/4 - 2;
prim_packet.header.pad = 0;
- prim_packet.header.topology = brw_set_prim(brw, prim->mode, &need_flush);
+ prim_packet.header.topology = hw_prim;
prim_packet.header.indexed = prim->indexed;
prim_packet.verts_per_instance = trim(prim->mode, prim->count);
@@ -149,22 +142,25 @@ static void brw_emit_prim( struct brw_context *brw,
prim_packet.start_instance_location = 0;
prim_packet.base_vert_location = 0;
+ /* Can't wrap here, since we rely on the validated state. */
+ brw->no_batch_wrap = GL_TRUE;
if (prim_packet.verts_per_instance) {
intel_batchbuffer_data( brw->intel.batch, &prim_packet,
sizeof(prim_packet), LOOP_CLIPRECTS);
}
-
- assert(need_flush == GL_FALSE);
+ brw->no_batch_wrap = GL_FALSE;
}
static void brw_merge_inputs( struct brw_context *brw,
const struct gl_client_array *arrays[])
{
- struct brw_vertex_element *inputs = brw->vb.inputs;
struct brw_vertex_info old = brw->vb.info;
GLuint i;
- memset(inputs, 0, sizeof(*inputs));
+ for (i = 0; i < VERT_ATTRIB_MAX; i++)
+ dri_bo_unreference(brw->vb.inputs[i].bo);
+
+ memset(&brw->vb.inputs, 0, sizeof(brw->vb.inputs));
memset(&brw->vb.info, 0, sizeof(brw->vb.info));
for (i = 0; i < VERT_ATTRIB_MAX; i++) {
@@ -175,7 +171,8 @@ static void brw_merge_inputs( struct brw_context *brw,
if (arrays[i]->StrideB != 0)
brw->vb.info.varying |= 1 << i;
- brw->vb.info.sizes[i/16] |= (inputs[i].glarray->Size - 1) << ((i%16) * 2);
+ brw->vb.info.sizes[i/16] |= (brw->vb.inputs[i].glarray->Size - 1) <<
+ ((i%16) * 2);
}
}
@@ -257,21 +254,29 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx,
struct intel_context *intel = intel_context(ctx);
struct brw_context *brw = brw_context(ctx);
GLboolean retval = GL_FALSE;
+ GLboolean warn = GL_FALSE;
+ GLboolean first_time = GL_TRUE;
GLuint i;
- GLuint ib_offset;
- dri_bo *ib_bo;
- GLboolean force_flush = GL_FALSE;
- int ret;
if (ctx->NewState)
_mesa_update_state( ctx );
+ if (check_fallbacks(brw, prim, nr_prims))
+ return GL_FALSE;
+
brw_validate_textures( brw );
/* Bind all inputs, derive varying and size information:
*/
brw_merge_inputs( brw, arrays );
-
+
+ brw->ib.ib = ib;
+ brw->state.dirty.brw |= BRW_NEW_INDICES;
+
+ brw->vb.min_index = min_index;
+ brw->vb.max_index = max_index;
+ brw->state.dirty.brw |= BRW_NEW_VERTICES;
+
/* Have to validate state quite late. Will rebuild tnl_program,
* which depends on varying information.
*
@@ -281,12 +286,14 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx,
LOCK_HARDWARE(intel);
- if (brw->intel.numClipRects == 0) {
+ if (!intel->constant_cliprect && intel->driDrawable->numClipRects == 0) {
UNLOCK_HARDWARE(intel);
return GL_TRUE;
}
- {
+ for (i = 0; i < nr_prims; i++) {
+ uint32_t hw_prim;
+
/* Flush the batch if it's approaching full, so that we don't wrap while
* we've got validated state that needs to be in the same batch as the
* primitives. This fraction is just a guess (minimal full state plus
@@ -294,76 +301,58 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx,
* an upper bound of how much we might emit in a single
* brw_try_draw_prims().
*/
- flush:
- if (force_flush)
- brw->no_batch_wrap = GL_FALSE;
+ intel_batchbuffer_require_space(intel->batch, intel->batch->size / 4,
+ LOOP_CLIPRECTS);
- if (intel->batch->ptr - intel->batch->map > intel->batch->size * 3 / 4
- /* brw_emit_prim may change the cliprect_mode to LOOP_CLIPRECTS */
- || intel->batch->cliprect_mode != LOOP_CLIPRECTS || (force_flush == GL_TRUE))
- intel_batchbuffer_flush(intel->batch);
+ hw_prim = brw_set_prim(brw, prim[i].mode);
- force_flush = GL_FALSE;
- brw->no_batch_wrap = GL_TRUE;
+ if (first_time || (brw->state.dirty.brw & BRW_NEW_PRIMITIVE)) {
+ first_time = GL_FALSE;
- /* Set the first primitive early, ahead of validate_state:
- */
- brw_set_prim(brw, prim[0].mode, &force_flush);
+ brw_validate_state(brw);
- /* XXX: Need to separate validate and upload of state.
- */
- ret = brw_validate_state( brw );
- if (ret) {
- force_flush = GL_TRUE;
- goto flush;
- }
-
- /* Various fallback checks:
- */
- if (brw->intel.Fallback)
- goto out;
-
- if (check_fallbacks( brw, prim, nr_prims ))
- goto out;
-
- /* need to account for index buffer and vertex buffer */
- if (ib) {
- ret = brw_prepare_indices( brw, ib , &ib_bo, &ib_offset);
- if (ret) {
- force_flush = GL_TRUE;
- goto flush;
- }
- }
+ /* Various fallback checks: */
+ if (brw->intel.Fallback)
+ goto out;
- ret = brw_prepare_vertices( brw, min_index, max_index);
- if (ret < 0)
- goto out;
-
- if (ret > 0) {
- force_flush = GL_TRUE;
- goto flush;
+ /* Check that we can fit our state in with our existing batchbuffer, or
+ * flush otherwise.
+ */
+ if (dri_bufmgr_check_aperture_space(brw->state.validated_bos,
+ brw->state.validated_bo_count)) {
+ static GLboolean warned;
+ intel_batchbuffer_flush(intel->batch);
+
+ /* Validate the state after we flushed the batch (which would have
+ * changed the set of dirty state). If we still fail to
+ * check_aperture, warn of what's happening, but attempt to continue
+ * on since it may succeed anyway, and the user would probably rather
+ * see a failure and a warning than a fallback.
+ */
+ brw_validate_state(brw);
+ if (!warned &&
+ dri_bufmgr_check_aperture_space(brw->state.validated_bos,
+ brw->state.validated_bo_count)) {
+ warn = GL_TRUE;
+ warned = GL_TRUE;
+ }
+ }
+
+ brw_upload_state(brw);
}
-
- /* Upload index, vertex data:
- */
- if (ib)
- brw_emit_indices( brw, ib, ib_bo, ib_offset);
- brw_emit_vertices( brw, min_index, max_index);
-
- for (i = 0; i < nr_prims; i++) {
- brw_emit_prim(brw, &prim[i]);
- }
+ brw_emit_prim(brw, &prim[i], hw_prim);
retval = GL_TRUE;
}
out:
-
- brw->no_batch_wrap = GL_FALSE;
-
UNLOCK_HARDWARE(intel);
+ if (warn)
+ fprintf(stderr, "i965: Single primitive emit potentially exceeded "
+ "available aperture space\n");
+
if (!retval)
DBG("%s failed\n", __FUNCTION__);
@@ -420,7 +409,6 @@ void brw_draw_prims( GLcontext *ctx,
return;
}
-
/* Make a first attempt at drawing:
*/
retval = brw_try_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
@@ -433,6 +421,7 @@ void brw_draw_prims( GLcontext *ctx,
_swsetup_Wakeup(ctx);
_tnl_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
}
+
}
void brw_draw_init( struct brw_context *brw )
@@ -447,8 +436,18 @@ void brw_draw_init( struct brw_context *brw )
void brw_draw_destroy( struct brw_context *brw )
{
+ int i;
+
if (brw->vb.upload.bo != NULL) {
dri_bo_unreference(brw->vb.upload.bo);
brw->vb.upload.bo = NULL;
}
+
+ for (i = 0; i < VERT_ATTRIB_MAX; i++) {
+ dri_bo_unreference(brw->vb.inputs[i].bo);
+ brw->vb.inputs[i].bo = NULL;
+ }
+
+ dri_bo_unreference(brw->ib.bo);
+ brw->ib.bo = NULL;
}
diff --git a/i965/brw_draw.h b/i965/brw_draw.h
index b354740..9aebbdb 100644
--- a/i965/brw_draw.h
+++ b/i965/brw_draw.h
@@ -28,10 +28,9 @@
#ifndef BRW_DRAW_H
#define BRW_DRAW_H
-#include "mtypes.h" /* for GLcontext... */
+#include "main/mtypes.h" /* for GLcontext... */
#include "vbo/vbo.h"
-#include "dri_bufmgr.h"
struct brw_context;
@@ -51,27 +50,4 @@ void brw_draw_destroy( struct brw_context *brw );
void brw_init_current_values(GLcontext *ctx,
struct gl_client_array *arrays);
-
-/* brw_draw_upload.c
- */
-int brw_prepare_indices( struct brw_context *brw,
- const struct _mesa_index_buffer *index_buffer,
- dri_bo **bo_return,
- GLuint *offset_return);
-
-void brw_emit_indices( struct brw_context *brw,
- const struct _mesa_index_buffer *index_buffer,
- dri_bo *bo,
- GLuint offset);
-
-int brw_prepare_vertices( struct brw_context *brw,
- GLuint min_index,
- GLuint max_index );
-
-void brw_emit_vertices( struct brw_context *brw,
- GLuint min_index,
- GLuint max_index );
-
-
-
#endif
diff --git a/i965/brw_draw_upload.c b/i965/brw_draw_upload.c
index 7946ffd..73d6dea 100644
--- a/i965/brw_draw_upload.c
+++ b/i965/brw_draw_upload.c
@@ -27,11 +27,11 @@
#include <stdlib.h>
-#include "glheader.h"
-#include "context.h"
-#include "state.h"
-#include "api_validate.h"
-#include "enums.h"
+#include "main/glheader.h"
+#include "main/context.h"
+#include "main/state.h"
+#include "main/api_validate.h"
+#include "main/enums.h"
#include "brw_draw.h"
#include "brw_defines.h"
@@ -39,7 +39,6 @@
#include "brw_state.h"
#include "brw_fallback.h"
-#include "intel_ioctl.h"
#include "intel_batchbuffer.h"
#include "intel_buffer_objects.h"
#include "intel_tex.h"
@@ -228,10 +227,7 @@ static void wrap_buffers( struct brw_context *brw,
if (brw->vb.upload.bo != NULL)
dri_bo_unreference(brw->vb.upload.bo);
brw->vb.upload.bo = dri_bo_alloc(brw->intel.bufmgr, "temporary VBO",
- size, 1,
- DRM_BO_FLAG_MEM_LOCAL |
- DRM_BO_FLAG_CACHED |
- DRM_BO_FLAG_CACHED_MAPPED);
+ size, 1);
/* Set the internal VBO\ to no-backing-store. We only use them as a
* temporary within a brw_try_draw_prims while the lock is held.
@@ -254,10 +250,10 @@ static void get_space( struct brw_context *brw,
wrap_buffers(brw, size);
}
+ assert(*bo_return == NULL);
dri_bo_reference(brw->vb.upload.bo);
*bo_return = brw->vb.upload.bo;
*offset_return = brw->vb.upload.offset;
-
brw->vb.upload.offset += size;
}
@@ -304,9 +300,7 @@ copy_array_to_vbo_array( struct brw_context *brw,
}
}
-int brw_prepare_vertices( struct brw_context *brw,
- GLuint min_index,
- GLuint max_index )
+static void brw_prepare_vertices(struct brw_context *brw)
{
GLcontext *ctx = &brw->intel.ctx;
struct intel_context *intel = intel_context(ctx);
@@ -314,7 +308,8 @@ int brw_prepare_vertices( struct brw_context *brw,
GLuint i;
const unsigned char *ptr = NULL;
GLuint interleave = 0;
- int ret = 0;
+ unsigned int min_index = brw->vb.min_index;
+ unsigned int max_index = brw->vb.max_index;
struct brw_vertex_element *enabled[VERT_ATTRIB_MAX];
GLuint nr_enabled = 0;
@@ -342,8 +337,10 @@ int brw_prepare_vertices( struct brw_context *brw,
* cases with > 17 vertex attributes enabled, so it probably
* isn't an issue at this point.
*/
- if (nr_enabled >= BRW_VEP_MAX)
- return -1;
+ if (nr_enabled >= BRW_VEP_MAX) {
+ intel->Fallback = 1;
+ return;
+ }
for (i = 0; i < nr_enabled; i++) {
struct brw_vertex_element *input = enabled[i];
@@ -356,22 +353,31 @@ int brw_prepare_vertices( struct brw_context *brw,
intel_buffer_object(input->glarray->BufferObj);
/* Named buffer object: Just reference its contents directly. */
+ dri_bo_unreference(input->bo);
input->bo = intel_bufferobj_buffer(intel, intel_buffer,
INTEL_READ);
dri_bo_reference(input->bo);
input->offset = (unsigned long)input->glarray->Ptr;
input->stride = input->glarray->StrideB;
-
- ret |= dri_bufmgr_check_aperture_space(input->bo);
} else {
+ if (input->bo != NULL) {
+ /* Already-uploaded vertex data is present from a previous
+ * prepare_vertices, but we had to re-validate state due to
+ * check_aperture failing and a new batch being produced.
+ */
+ continue;
+ }
+
/* Queue the buffer object up to be uploaded in the next pass,
* when we've decided if we're doing interleaved or not.
*/
if (i == 0) {
/* Position array not properly enabled:
*/
- if (input->glarray->StrideB == 0)
- return -1;
+ if (input->glarray->StrideB == 0) {
+ intel->Fallback = 1;
+ return;
+ }
interleave = input->glarray->StrideB;
ptr = input->glarray->Ptr;
@@ -403,7 +409,6 @@ int brw_prepare_vertices( struct brw_context *brw,
*/
copy_array_to_vbo_array(brw, upload[0], interleave);
- ret |= dri_bufmgr_check_aperture_space(upload[0]->bo);
for (i = 1; i < nr_uploads; i++) {
/* Then, just point upload[i] at upload[0]'s buffer. */
upload[i]->stride = interleave;
@@ -417,23 +422,19 @@ int brw_prepare_vertices( struct brw_context *brw,
/* Upload non-interleaved arrays */
for (i = 0; i < nr_uploads; i++) {
copy_array_to_vbo_array(brw, upload[i], upload[i]->element_size);
- if (upload[i]->bo) {
- ret |= dri_bufmgr_check_aperture_space(upload[i]->bo);
- }
}
}
+ brw_prepare_query_begin(brw);
- if (ret)
- return 1;
-
+ for (i = 0; i < nr_enabled; i++) {
+ struct brw_vertex_element *input = enabled[i];
- return 0;
+ brw_add_validated_bo(brw, input->bo);
+ }
}
-void brw_emit_vertices( struct brw_context *brw,
- GLuint min_index,
- GLuint max_index )
+static void brw_emit_vertices(struct brw_context *brw)
{
GLcontext *ctx = &brw->intel.ctx;
struct intel_context *intel = intel_context(ctx);
@@ -451,6 +452,7 @@ void brw_emit_vertices( struct brw_context *brw,
enabled[nr_enabled++] = input;
}
+ brw_emit_query_begin(brw);
/* Now emit VB and VEP state packets.
*
@@ -469,16 +471,10 @@ void brw_emit_vertices( struct brw_context *brw,
BRW_VB0_ACCESS_VERTEXDATA |
(input->stride << BRW_VB0_PITCH_SHIFT));
OUT_RELOC(input->bo,
- DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
+ I915_GEM_DOMAIN_VERTEX, 0,
input->offset);
- OUT_BATCH(max_index);
+ OUT_BATCH(brw->vb.max_index);
OUT_BATCH(0); /* Instance data step rate */
-
- /* Unreference the buffer so it can get freed, now that we won't
- * touch it any more.
- */
- dri_bo_unreference(input->bo);
- input->bo = NULL;
}
ADVANCE_BATCH();
@@ -515,18 +511,31 @@ void brw_emit_vertices( struct brw_context *brw,
ADVANCE_BATCH();
}
-int brw_prepare_indices( struct brw_context *brw,
- const struct _mesa_index_buffer *index_buffer,
- dri_bo **bo_return,
- GLuint *offset_return)
+const struct brw_tracked_state brw_vertices = {
+ .dirty = {
+ .mesa = 0,
+ .brw = BRW_NEW_BATCH | BRW_NEW_VERTICES,
+ .cache = 0,
+ },
+ .prepare = brw_prepare_vertices,
+ .emit = brw_emit_vertices,
+};
+
+static void brw_prepare_indices(struct brw_context *brw)
{
GLcontext *ctx = &brw->intel.ctx;
struct intel_context *intel = &brw->intel;
- GLuint ib_size = get_size(index_buffer->type) * index_buffer->count;
- dri_bo *bo;
- struct gl_buffer_object *bufferobj = index_buffer->obj;
- GLuint offset = (GLuint)index_buffer->ptr;
- int ret;
+ const struct _mesa_index_buffer *index_buffer = brw->ib.ib;
+ GLuint ib_size;
+ dri_bo *bo = NULL;
+ struct gl_buffer_object *bufferobj;
+ GLuint offset;
+
+ if (index_buffer == NULL)
+ return;
+
+ ib_size = get_size(index_buffer->type) * index_buffer->count;
+ bufferobj = index_buffer->obj;;
/* Turn into a proper VBO:
*/
@@ -540,6 +549,8 @@ int brw_prepare_indices( struct brw_context *brw,
*/
dri_bo_subdata(bo, offset, ib_size, index_buffer->ptr);
} else {
+ offset = (GLuint)index_buffer->ptr;
+
/* If the index buffer isn't aligned to its element size, we have to
* rebase it into a temporary.
*/
@@ -562,19 +573,24 @@ int brw_prepare_indices( struct brw_context *brw,
}
}
- *bo_return = bo;
- *offset_return = offset;
- ret = dri_bufmgr_check_aperture_space(bo);
- return ret;
+ dri_bo_unreference(brw->ib.bo);
+ brw->ib.bo = bo;
+ brw->ib.offset = offset;
+
+ brw_add_validated_bo(brw, brw->ib.bo);
}
-void brw_emit_indices(struct brw_context *brw,
- const struct _mesa_index_buffer *index_buffer,
- dri_bo *bo,
- GLuint offset)
+static void brw_emit_indices(struct brw_context *brw)
{
struct intel_context *intel = &brw->intel;
- GLuint ib_size = get_size(index_buffer->type) * index_buffer->count;
+ const struct _mesa_index_buffer *index_buffer = brw->ib.ib;
+ GLuint ib_size;
+
+ if (index_buffer == NULL)
+ return;
+
+ ib_size = get_size(index_buffer->type) * index_buffer->count;
+
/* Emit the indexbuffer packet:
*/
{
@@ -590,13 +606,23 @@ void brw_emit_indices(struct brw_context *brw,
BEGIN_BATCH(4, IGNORE_CLIPRECTS);
OUT_BATCH( ib.header.dword );
- OUT_RELOC( bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, offset);
- OUT_RELOC( bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
- offset + ib_size);
+ OUT_RELOC(brw->ib.bo,
+ I915_GEM_DOMAIN_VERTEX, 0,
+ brw->ib.offset);
+ OUT_RELOC(brw->ib.bo,
+ I915_GEM_DOMAIN_VERTEX, 0,
+ brw->ib.offset + ib_size);
OUT_BATCH( 0 );
ADVANCE_BATCH();
-
- dri_bo_unreference(bo);
}
}
+const struct brw_tracked_state brw_indices = {
+ .dirty = {
+ .mesa = 0,
+ .brw = BRW_NEW_BATCH | BRW_NEW_INDICES,
+ .cache = 0,
+ },
+ .prepare = brw_prepare_indices,
+ .emit = brw_emit_indices,
+};
diff --git a/i965/brw_eu.h b/i965/brw_eu.h
index 207b8b7..9e2b39a 100644
--- a/i965/brw_eu.h
+++ b/i965/brw_eu.h
@@ -65,7 +65,7 @@ struct brw_reg
GLuint abs:1; /* source only */
GLuint vstride:4; /* source only */
GLuint width:3; /* src only, align1 only */
- GLuint hstride:2; /* src only, align1 only */
+ GLuint hstride:2; /* align1 only */
GLuint address_mode:1; /* relative addressing, hopefully! */
GLuint pad0:1;
@@ -129,17 +129,28 @@ static INLINE int type_sz( GLuint type )
}
}
+/**
+ * Construct a brw_reg.
+ * \param file one of the BRW_x_REGISTER_FILE values
+ * \param nr register number/index
+ * \param subnr register sub number
+ * \param type one of BRW_REGISTER_TYPE_x
+ * \param vstride one of BRW_VERTICAL_STRIDE_x
+ * \param width one of BRW_WIDTH_x
+ * \param hstride one of BRW_HORIZONTAL_STRIDE_x
+ * \param swizzle one of BRW_SWIZZLE_x
+ * \param writemask WRITEMASK_X/Y/Z/W bitfield
+ */
static INLINE struct brw_reg brw_reg( GLuint file,
- GLuint nr,
- GLuint subnr,
- GLuint type,
- GLuint vstride,
- GLuint width,
- GLuint hstride,
- GLuint swizzle,
- GLuint writemask)
-{
-
+ GLuint nr,
+ GLuint subnr,
+ GLuint type,
+ GLuint vstride,
+ GLuint width,
+ GLuint hstride,
+ GLuint swizzle,
+ GLuint writemask )
+{
struct brw_reg reg;
reg.type = type;
reg.file = file;
@@ -166,6 +177,7 @@ static INLINE struct brw_reg brw_reg( GLuint file,
return reg;
}
+/** Construct float[16] register */
static INLINE struct brw_reg brw_vec16_reg( GLuint file,
GLuint nr,
GLuint subnr )
@@ -181,6 +193,7 @@ static INLINE struct brw_reg brw_vec16_reg( GLuint file,
WRITEMASK_XYZW);
}
+/** Construct float[8] register */
static INLINE struct brw_reg brw_vec8_reg( GLuint file,
GLuint nr,
GLuint subnr )
@@ -196,7 +209,7 @@ static INLINE struct brw_reg brw_vec8_reg( GLuint file,
WRITEMASK_XYZW);
}
-
+/** Construct float[4] register */
static INLINE struct brw_reg brw_vec4_reg( GLuint file,
GLuint nr,
GLuint subnr )
@@ -212,7 +225,7 @@ static INLINE struct brw_reg brw_vec4_reg( GLuint file,
WRITEMASK_XYZW);
}
-
+/** Construct float[2] register */
static INLINE struct brw_reg brw_vec2_reg( GLuint file,
GLuint nr,
GLuint subnr )
@@ -228,6 +241,7 @@ static INLINE struct brw_reg brw_vec2_reg( GLuint file,
WRITEMASK_XY);
}
+/** Construct float[1] register */
static INLINE struct brw_reg brw_vec1_reg( GLuint file,
GLuint nr,
GLuint subnr )
@@ -277,6 +291,7 @@ static INLINE struct brw_reg byte_offset( struct brw_reg reg,
}
+/** Construct unsigned word[16] register */
static INLINE struct brw_reg brw_uw16_reg( GLuint file,
GLuint nr,
GLuint subnr )
@@ -284,6 +299,7 @@ static INLINE struct brw_reg brw_uw16_reg( GLuint file,
return suboffset(retype(brw_vec16_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
}
+/** Construct unsigned word[8] register */
static INLINE struct brw_reg brw_uw8_reg( GLuint file,
GLuint nr,
GLuint subnr )
@@ -291,6 +307,7 @@ static INLINE struct brw_reg brw_uw8_reg( GLuint file,
return suboffset(retype(brw_vec8_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
}
+/** Construct unsigned word[1] register */
static INLINE struct brw_reg brw_uw1_reg( GLuint file,
GLuint nr,
GLuint subnr )
@@ -311,6 +328,7 @@ static INLINE struct brw_reg brw_imm_reg( GLuint type )
0);
}
+/** Construct float immediate register */
static INLINE struct brw_reg brw_imm_f( GLfloat f )
{
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_F);
@@ -318,6 +336,7 @@ static INLINE struct brw_reg brw_imm_f( GLfloat f )
return imm;
}
+/** Construct integer immediate register */
static INLINE struct brw_reg brw_imm_d( GLint d )
{
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_D);
@@ -325,6 +344,7 @@ static INLINE struct brw_reg brw_imm_d( GLint d )
return imm;
}
+/** Construct uint immediate register */
static INLINE struct brw_reg brw_imm_ud( GLuint ud )
{
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UD);
@@ -332,6 +352,7 @@ static INLINE struct brw_reg brw_imm_ud( GLuint ud )
return imm;
}
+/** Construct ushort immediate register */
static INLINE struct brw_reg brw_imm_uw( GLushort uw )
{
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UW);
@@ -339,6 +360,7 @@ static INLINE struct brw_reg brw_imm_uw( GLushort uw )
return imm;
}
+/** Construct short immediate register */
static INLINE struct brw_reg brw_imm_w( GLshort w )
{
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_W);
@@ -350,8 +372,7 @@ static INLINE struct brw_reg brw_imm_w( GLshort w )
* numbers alias with _V and _VF below:
*/
-/* Vector of eight signed half-byte values:
- */
+/** Construct vector of eight signed half-byte values */
static INLINE struct brw_reg brw_imm_v( GLuint v )
{
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_V);
@@ -362,8 +383,7 @@ static INLINE struct brw_reg brw_imm_v( GLuint v )
return imm;
}
-/* Vector of four 8-bit float values:
- */
+/** Construct vector of four 8-bit float values */
static INLINE struct brw_reg brw_imm_vf( GLuint v )
{
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF);
@@ -400,38 +420,43 @@ static INLINE struct brw_reg brw_address( struct brw_reg reg )
return brw_imm_uw(reg.nr * REG_SIZE + reg.subnr);
}
-
-static INLINE struct brw_reg brw_vec1_grf( GLuint nr,
- GLuint subnr )
+/** Construct float[1] general-purpose register */
+static INLINE struct brw_reg brw_vec1_grf( GLuint nr, GLuint subnr )
{
return brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
}
-static INLINE struct brw_reg brw_vec8_grf( GLuint nr,
- GLuint subnr )
+/** Construct float[2] general-purpose register */
+static INLINE struct brw_reg brw_vec2_grf( GLuint nr, GLuint subnr )
{
- return brw_vec8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+ return brw_vec2_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
}
-static INLINE struct brw_reg brw_vec4_grf( GLuint nr,
- GLuint subnr )
+/** Construct float[4] general-purpose register */
+static INLINE struct brw_reg brw_vec4_grf( GLuint nr, GLuint subnr )
{
return brw_vec4_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
}
-
-static INLINE struct brw_reg brw_vec2_grf( GLuint nr,
- GLuint subnr )
+/** Construct float[8] general-purpose register */
+static INLINE struct brw_reg brw_vec8_grf( GLuint nr, GLuint subnr )
{
- return brw_vec2_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+ return brw_vec8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
}
-static INLINE struct brw_reg brw_uw8_grf( GLuint nr,
- GLuint subnr )
+
+static INLINE struct brw_reg brw_uw8_grf( GLuint nr, GLuint subnr )
{
return brw_uw8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
}
+static INLINE struct brw_reg brw_uw16_grf( GLuint nr, GLuint subnr )
+{
+ return brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+
+
+/** Construct null register (usually used for setting condition codes) */
static INLINE struct brw_reg brw_null_reg( void )
{
return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE,
@@ -518,13 +543,13 @@ static INLINE struct brw_reg stride( struct brw_reg reg,
GLuint width,
GLuint hstride )
{
-
reg.vstride = cvt(vstride);
reg.width = cvt(width) - 1;
reg.hstride = cvt(hstride);
return reg;
}
+
static INLINE struct brw_reg vec16( struct brw_reg reg )
{
return stride(reg, 16,16,1);
@@ -550,6 +575,7 @@ static INLINE struct brw_reg vec1( struct brw_reg reg )
return stride(reg, 0,1,0);
}
+
static INLINE struct brw_reg get_element( struct brw_reg reg, GLuint elt )
{
return vec1(suboffset(reg, elt));
@@ -681,7 +707,7 @@ static INLINE struct brw_indirect brw_indirect( GLuint addr_subnr, GLint offset
static INLINE struct brw_instruction *current_insn( struct brw_compile *p)
{
- return &p->store[p->nr_insn];
+ return &p->store[p->nr_insn];
}
void brw_pop_insn_state( struct brw_compile *p );
@@ -727,6 +753,7 @@ ALU2(ADD)
ALU2(MUL)
ALU1(FRC)
ALU1(RNDD)
+ALU1(RNDZ)
ALU2(MAC)
ALU2(MACH)
ALU1(LZD)
diff --git a/i965/brw_eu_debug.c b/i965/brw_eu_debug.c
index 2dff1ad..91dbbd5 100644
--- a/i965/brw_eu_debug.c
+++ b/i965/brw_eu_debug.c
@@ -30,9 +30,9 @@
*/
-#include "mtypes.h"
+#include "main/mtypes.h"
+#include "main/imports.h"
#include "brw_eu.h"
-#include "imports.h"
void brw_print_reg( struct brw_reg hwreg )
{
diff --git a/i965/brw_eu_emit.c b/i965/brw_eu_emit.c
index 0bfbec9..4e099b5 100644
--- a/i965/brw_eu_emit.c
+++ b/i965/brw_eu_emit.c
@@ -64,7 +64,9 @@ static void brw_set_dest( struct brw_instruction *insn,
if (insn->header.access_mode == BRW_ALIGN_1) {
insn->bits1.da1.dest_subreg_nr = dest.subnr;
- insn->bits1.da1.dest_horiz_stride = BRW_HORIZONTAL_STRIDE_1;
+ if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
+ dest.hstride = BRW_HORIZONTAL_STRIDE_1;
+ insn->bits1.da1.dest_horiz_stride = dest.hstride;
}
else {
insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
@@ -78,7 +80,9 @@ static void brw_set_dest( struct brw_instruction *insn,
*/
if (insn->header.access_mode == BRW_ALIGN_1) {
insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
- insn->bits1.ia1.dest_horiz_stride = BRW_HORIZONTAL_STRIDE_1;
+ if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
+ dest.hstride = BRW_HORIZONTAL_STRIDE_1;
+ insn->bits1.ia1.dest_horiz_stride = dest.hstride;
}
else {
insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
@@ -329,14 +333,14 @@ static void brw_set_sampler_message(struct brw_context *brw,
{
brw_set_src1(insn, brw_imm_d(0));
- if (BRW_IS_GM45(brw) || BRW_IS_G4X(brw)) {
- insn->bits3.sampler_gm45_g4x.binding_table_index = binding_table_index;
- insn->bits3.sampler_gm45_g4x.sampler = sampler;
- insn->bits3.sampler_gm45_g4x.msg_type = msg_type;
- insn->bits3.sampler_gm45_g4x.response_length = response_length;
- insn->bits3.sampler_gm45_g4x.msg_length = msg_length;
- insn->bits3.sampler_gm45_g4x.end_of_thread = eot;
- insn->bits3.sampler_gm45_g4x.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
+ if (BRW_IS_G4X(brw)) {
+ insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
+ insn->bits3.sampler_g4x.sampler = sampler;
+ insn->bits3.sampler_g4x.msg_type = msg_type;
+ insn->bits3.sampler_g4x.response_length = response_length;
+ insn->bits3.sampler_g4x.msg_length = msg_length;
+ insn->bits3.sampler_g4x.end_of_thread = eot;
+ insn->bits3.sampler_g4x.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
} else {
insn->bits3.sampler.binding_table_index = binding_table_index;
insn->bits3.sampler.sampler = sampler;
@@ -435,6 +439,7 @@ ALU2(ADD)
ALU2(MUL)
ALU1(FRC)
ALU1(RNDD)
+ALU1(RNDZ)
ALU2(MAC)
ALU2(MACH)
ALU1(LZD)
diff --git a/i965/brw_fallback.c b/i965/brw_fallback.c
index 8a8fb50..4ea660a 100644
--- a/i965/brw_fallback.c
+++ b/i965/brw_fallback.c
@@ -25,19 +25,20 @@
*
**************************************************************************/
+#include "main/glheader.h"
+#include "main/context.h"
+#include "main/enums.h"
+#include "main/imports.h"
+#include "main/macros.h"
+#include "main/mtypes.h"
+
#include "swrast_setup/swrast_setup.h"
#include "swrast/swrast.h"
#include "tnl/tnl.h"
-#include "context.h"
#include "brw_context.h"
#include "brw_fallback.h"
-#include "glheader.h"
-#include "enums.h"
-#include "glapi.h"
-#include "imports.h"
-#include "macros.h"
-#include "mtypes.h"
+#include "glapi/glapi.h"
#define FILE_DEBUG_FLAG DEBUG_FALLBACKS
@@ -73,10 +74,7 @@ static GLboolean do_check_fallback(struct brw_context *brw)
if (texUnit->_ReallyEnabled) {
struct intel_texture_object *intelObj = intel_texture_object(texUnit->_Current);
struct gl_texture_image *texImage = intelObj->base.Image[0][intelObj->firstLevel];
- if (texImage->Border ||
- ((texImage->_BaseFormat == GL_DEPTH_COMPONENT) &&
- ((texImage->TexObject->WrapS == GL_CLAMP_TO_BORDER) ||
- (texImage->TexObject->WrapT == GL_CLAMP_TO_BORDER)))) {
+ if (texImage->Border) {
DBG("FALLBACK: texture border\n");
return GL_TRUE;
}
@@ -95,10 +93,9 @@ static GLboolean do_check_fallback(struct brw_context *brw)
return GL_FALSE;
}
-static int check_fallback(struct brw_context *brw)
+static void check_fallback(struct brw_context *brw)
{
brw->intel.Fallback = do_check_fallback(brw);
- return 0;
}
const struct brw_tracked_state brw_check_fallback = {
diff --git a/i965/brw_fallback.h b/i965/brw_fallback.h
index 684a46c..50dcdac 100644
--- a/i965/brw_fallback.h
+++ b/i965/brw_fallback.h
@@ -28,7 +28,7 @@
#ifndef BRW_FALLBACK_H
#define BRW_FALLBACK_H
-#include "mtypes.h" /* for GLcontext... */
+#include "main/mtypes.h" /* for GLcontext... */
struct brw_context;
struct vbo_prim;
diff --git a/i965/brw_gs.c b/i965/brw_gs.c
index 9419315..a8b74a0 100644
--- a/i965/brw_gs.c
+++ b/i965/brw_gs.c
@@ -29,9 +29,9 @@
* Keith Whitwell <keith@tungstengraphics.com>
*/
-#include "glheader.h"
-#include "macros.h"
-#include "enums.h"
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/enums.h"
#include "intel_batchbuffer.h"
@@ -162,10 +162,9 @@ static void populate_key( struct brw_context *brw,
/* Calculate interpolants for triangle and line rasterization.
*/
-static int prepare_gs_prog( struct brw_context *brw )
+static void prepare_gs_prog(struct brw_context *brw)
{
struct brw_gs_prog_key key;
- int ret = 0;
/* Populate the key:
*/
populate_key(brw, &key);
@@ -183,11 +182,7 @@ static int prepare_gs_prog( struct brw_context *brw )
&brw->gs.prog_data);
if (brw->gs.prog_bo == NULL)
compile_gs_prog( brw, &key );
-
- ret |= dri_bufmgr_check_aperture_space(brw->gs.prog_bo);
}
-
- return ret;
}
diff --git a/i965/brw_gs_emit.c b/i965/brw_gs_emit.c
index 9abb94d..22e0d25 100644
--- a/i965/brw_gs_emit.c
+++ b/i965/brw_gs_emit.c
@@ -30,9 +30,9 @@
*/
-#include "glheader.h"
-#include "macros.h"
-#include "enums.h"
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/enums.h"
#include "shader/program.h"
#include "intel_batchbuffer.h"
diff --git a/i965/brw_gs_state.c b/i965/brw_gs_state.c
index f1f9e01..27023cf 100644
--- a/i965/brw_gs_state.c
+++ b/i965/brw_gs_state.c
@@ -34,7 +34,7 @@
#include "brw_context.h"
#include "brw_state.h"
#include "brw_defines.h"
-#include "macros.h"
+#include "main/macros.h"
struct brw_gs_unit_key {
unsigned int total_grf;
@@ -106,17 +106,17 @@ gs_unit_create_from_key(struct brw_context *brw, struct brw_gs_unit_key *key)
if (key->prog_active) {
/* Emit GS program relocation */
- dri_emit_reloc(bo,
- DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
- gs.thread0.grf_reg_count << 1,
- offsetof(struct brw_gs_unit_state, thread0),
- brw->gs.prog_bo);
+ dri_bo_emit_reloc(bo,
+ I915_GEM_DOMAIN_INSTRUCTION, 0,
+ gs.thread0.grf_reg_count << 1,
+ offsetof(struct brw_gs_unit_state, thread0),
+ brw->gs.prog_bo);
}
return bo;
}
-static int prepare_gs_unit( struct brw_context *brw )
+static void prepare_gs_unit(struct brw_context *brw)
{
struct brw_gs_unit_key key;
@@ -130,7 +130,6 @@ static int prepare_gs_unit( struct brw_context *brw )
if (brw->gs.state_bo == NULL) {
brw->gs.state_bo = gs_unit_create_from_key(brw, &key);
}
- return dri_bufmgr_check_aperture_space(brw->gs.state_bo);
}
const struct brw_tracked_state brw_gs_unit = {
diff --git a/i965/brw_metaops.c b/i965/brw_metaops.c
index 252a899..41bfa2e 100644
--- a/i965/brw_metaops.c
+++ b/i965/brw_metaops.c
@@ -32,9 +32,9 @@
-#include "glheader.h"
-#include "context.h"
-#include "macros.h"
+#include "main/glheader.h"
+#include "main/context.h"
+#include "main/macros.h"
#include "shader/arbprogparse.h"
diff --git a/i965/brw_misc_state.c b/i965/brw_misc_state.c
index 62df259..627705f 100644
--- a/i965/brw_misc_state.c
+++ b/i965/brw_misc_state.c
@@ -71,6 +71,38 @@ const struct brw_tracked_state brw_blend_constant_color = {
.emit = upload_blend_constant_color
};
+/* Constant single cliprect for framebuffer object or DRI2 drawing */
+static void upload_drawing_rect(struct brw_context *brw)
+{
+ struct intel_context *intel = &brw->intel;
+ GLcontext *ctx = &intel->ctx;
+
+ if (!intel->constant_cliprect)
+ return;
+
+ BEGIN_BATCH(4, NO_LOOP_CLIPRECTS);
+ OUT_BATCH(_3DSTATE_DRAWRECT_INFO_I965);
+ OUT_BATCH(0); /* xmin, ymin */
+ OUT_BATCH(((ctx->DrawBuffer->Width - 1) & 0xffff) |
+ ((ctx->DrawBuffer->Height - 1) << 16));
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
+}
+
+const struct brw_tracked_state brw_drawing_rect = {
+ .dirty = {
+ .mesa = _NEW_BUFFERS,
+ .brw = 0,
+ .cache = 0
+ },
+ .emit = upload_drawing_rect
+};
+
+static void prepare_binding_table_pointers(struct brw_context *brw)
+{
+ brw_add_validated_bo(brw, brw->wm.bind_bo);
+}
+
/**
* Upload the binding table pointers, which point each stage's array of surface
* state pointers.
@@ -88,7 +120,9 @@ static void upload_binding_table_pointers(struct brw_context *brw)
OUT_BATCH(0); /* gs */
OUT_BATCH(0); /* clip */
OUT_BATCH(0); /* sf */
- OUT_RELOC(brw->wm.bind_bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, 0);
+ OUT_RELOC(brw->wm.bind_bo,
+ I915_GEM_DOMAIN_SAMPLER, 0,
+ 0);
ADVANCE_BATCH();
}
@@ -98,6 +132,7 @@ const struct brw_tracked_state brw_binding_table_pointers = {
.brw = BRW_NEW_BATCH,
.cache = CACHE_NEW_SURF_BIND,
},
+ .prepare = prepare_binding_table_pointers,
.emit = upload_binding_table_pointers,
};
@@ -114,40 +149,32 @@ static void upload_pipelined_state_pointers(struct brw_context *brw )
BEGIN_BATCH(7, IGNORE_CLIPRECTS);
OUT_BATCH(CMD_PIPELINED_STATE_POINTERS << 16 | (7 - 2));
- OUT_RELOC(brw->vs.state_bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, 0);
+ OUT_RELOC(brw->vs.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
if (brw->gs.prog_active)
- OUT_RELOC(brw->gs.state_bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, 1);
+ OUT_RELOC(brw->gs.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
else
OUT_BATCH(0);
if (!brw->metaops.active)
- OUT_RELOC(brw->clip.state_bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, 1);
+ OUT_RELOC(brw->clip.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
else
OUT_BATCH(0);
- OUT_RELOC(brw->sf.state_bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, 0);
- OUT_RELOC(brw->wm.state_bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, 0);
- OUT_RELOC(brw->cc.state_bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, 0);
+ OUT_RELOC(brw->sf.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+ OUT_RELOC(brw->wm.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+ OUT_RELOC(brw->cc.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
ADVANCE_BATCH();
brw->state.dirty.brw |= BRW_NEW_PSP;
}
-#if 0
-/* Combined into brw_psp_urb_cbs */
-const struct brw_tracked_state brw_pipelined_state_pointers = {
- .dirty = {
- .mesa = 0,
- .brw = BRW_NEW_METAOPS | BRW_NEW_BATCH,
- .cache = (CACHE_NEW_VS_UNIT |
- CACHE_NEW_GS_UNIT |
- CACHE_NEW_GS_PROG |
- CACHE_NEW_CLIP_UNIT |
- CACHE_NEW_SF_UNIT |
- CACHE_NEW_WM_UNIT |
- CACHE_NEW_CC_UNIT)
- },
- .emit = upload_pipelined_state_pointers
-};
-#endif
+
+static void prepare_psp_urb_cbs(struct brw_context *brw)
+{
+ brw_add_validated_bo(brw, brw->vs.state_bo);
+ brw_add_validated_bo(brw, brw->gs.state_bo);
+ brw_add_validated_bo(brw, brw->clip.state_bo);
+ brw_add_validated_bo(brw, brw->wm.state_bo);
+ brw_add_validated_bo(brw, brw->cc.state_bo);
+}
static void upload_psp_urb_cbs(struct brw_context *brw )
{
@@ -156,7 +183,6 @@ static void upload_psp_urb_cbs(struct brw_context *brw )
brw_upload_constant_buffer_state(brw);
}
-
const struct brw_tracked_state brw_psp_urb_cbs = {
.dirty = {
.mesa = 0,
@@ -169,30 +195,23 @@ const struct brw_tracked_state brw_psp_urb_cbs = {
CACHE_NEW_WM_UNIT |
CACHE_NEW_CC_UNIT)
},
+ .prepare = prepare_psp_urb_cbs,
.emit = upload_psp_urb_cbs,
};
-/**
- * Upload the depthbuffer offset and format.
- *
- * We have to do this per state validation as we need to emit the relocation
- * in the batch buffer.
- */
-
-static int prepare_depthbuffer(struct brw_context *brw)
+static void prepare_depthbuffer(struct brw_context *brw)
{
struct intel_region *region = brw->state.depth_region;
- if (!region || !region->buffer)
- return 0;
- return dri_bufmgr_check_aperture_space(region->buffer);
+ if (region != NULL)
+ brw_add_validated_bo(brw, region->buffer);
}
static void emit_depthbuffer(struct brw_context *brw)
{
struct intel_context *intel = &brw->intel;
struct intel_region *region = brw->state.depth_region;
- unsigned int len = (BRW_IS_GM45(brw) || BRW_IS_G4X(brw)) ? sizeof(struct brw_depthbuffer_gm45_g4x) / 4 : sizeof(struct brw_depthbuffer) / 4;
+ unsigned int len = BRW_IS_G4X(brw) ? 6 : 5;
if (region == NULL) {
BEGIN_BATCH(len, IGNORE_CLIPRECTS);
@@ -203,7 +222,7 @@ static void emit_depthbuffer(struct brw_context *brw)
OUT_BATCH(0);
OUT_BATCH(0);
- if (BRW_IS_GM45(brw) || BRW_IS_G4X(brw))
+ if (BRW_IS_G4X(brw))
OUT_BATCH(0);
ADVANCE_BATCH();
@@ -230,16 +249,17 @@ static void emit_depthbuffer(struct brw_context *brw)
OUT_BATCH(((region->pitch * region->cpp) - 1) |
(format << 18) |
(BRW_TILEWALK_YMAJOR << 26) |
- (region->tiled << 27) |
+ ((region->tiling != I915_TILING_NONE) << 27) |
(BRW_SURFACE_2D << 29));
OUT_RELOC(region->buffer,
- DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE, 0);
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+ 0);
OUT_BATCH((BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1) |
((region->pitch - 1) << 6) |
((region->height - 1) << 19));
OUT_BATCH(0);
- if (BRW_IS_GM45(brw) || BRW_IS_G4X(brw))
+ if (BRW_IS_G4X(brw))
OUT_BATCH(0);
ADVANCE_BATCH();
@@ -324,7 +344,7 @@ static void upload_aa_line_parameters(struct brw_context *brw)
{
struct brw_aa_line_parameters balp;
- if (!(BRW_IS_GM45(brw) || BRW_IS_G4X(brw)))
+ if (!BRW_IS_G4X(brw))
return;
/* use legacy aa line coverage computation */
@@ -380,40 +400,6 @@ const struct brw_tracked_state brw_line_stipple = {
};
-
-/***********************************************************************
- * Misc constant state packets
- */
-
-static void upload_pipe_control(struct brw_context *brw)
-{
- struct brw_pipe_control pc;
-
- return;
-
- memset(&pc, 0, sizeof(pc));
-
- pc.header.opcode = CMD_PIPE_CONTROL;
- pc.header.length = sizeof(pc)/4 - 2;
- pc.header.post_sync_operation = PIPE_CONTROL_NOWRITE;
-
- pc.header.instruction_state_cache_flush_enable = 1;
-
- pc.bits1.dest_addr_type = PIPE_CONTROL_GTTWRITE_GLOBAL;
-
- BRW_BATCH_STRUCT(brw, &pc);
-}
-
-const struct brw_tracked_state brw_pipe_control = {
- .dirty = {
- .mesa = 0,
- .brw = BRW_NEW_BATCH,
- .cache = 0
- },
- .emit = upload_pipe_control
-};
-
-
/***********************************************************************
* Misc invarient state packets
*/
diff --git a/i965/brw_program.c b/i965/brw_program.c
index c38610b..0c86911 100644
--- a/i965/brw_program.c
+++ b/i965/brw_program.c
@@ -111,13 +111,18 @@ static void brwProgramStringNotify( GLcontext *ctx,
struct gl_program *prog )
{
if (target == GL_FRAGMENT_PROGRAM_ARB) {
+ struct gl_fragment_program *fprog = (struct gl_fragment_program *) prog;
struct brw_context *brw = brw_context(ctx);
struct brw_fragment_program *p = (struct brw_fragment_program *)prog;
struct brw_fragment_program *fp = (struct brw_fragment_program *)brw->fragment_program;
+ if (fprog->FogOption) {
+ _mesa_append_fog_code(ctx, fprog);
+ fprog->FogOption = GL_NONE;
+ }
+
if (p == fp)
brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM;
p->id = brw->program_id++;
- p->param_state = p->program.Base.Parameters->StateFlags;
}
else if (target == GL_VERTEX_PROGRAM_ARB) {
struct brw_context *brw = brw_context(ctx);
@@ -129,7 +134,6 @@ static void brwProgramStringNotify( GLcontext *ctx,
_mesa_insert_mvp_code(ctx, &p->program);
}
p->id = brw->program_id++;
- p->param_state = p->program.Base.Parameters->StateFlags;
/* Also tell tnl about it:
*/
diff --git a/i965/brw_queryobj.c b/i965/brw_queryobj.c
new file mode 100644
index 0000000..cb9169e
--- /dev/null
+++ b/i965/brw_queryobj.c
@@ -0,0 +1,259 @@
+/*
+ * Copyright © 2008 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Eric Anholt <eric@anholt.net>
+ *
+ */
+
+/** @file support for ARB_query_object
+ *
+ * ARB_query_object is implemented by using the PIPE_CONTROL command to stall
+ * execution on the completion of previous depth tests, and write the
+ * current PS_DEPTH_COUNT to a buffer object.
+ *
+ * We use before and after counts when drawing during a query so that
+ * we don't pick up other clients' query data in ours. To reduce overhead,
+ * a single BO is used to record the query data for all active queries at
+ * once. This also gives us a simple bound on how much batchbuffer space is
+ * required for handling queries, so that we can be sure that we won't
+ * have to emit a batchbuffer without getting the ending PS_DEPTH_COUNT.
+ */
+#include "main/simple_list.h"
+#include "main/imports.h"
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "intel_batchbuffer.h"
+#include "intel_reg.h"
+
+/** Waits on the query object's BO and totals the results for this query */
+static void
+brw_queryobj_get_results(struct brw_query_object *query)
+{
+ int i;
+ uint64_t *results;
+
+ if (query->bo == NULL)
+ return;
+
+ /* Map and count the pixels from the current query BO */
+ dri_bo_map(query->bo, GL_FALSE);
+ results = query->bo->virtual;
+ for (i = query->first_index; i <= query->last_index; i++) {
+ query->Base.Result += results[i * 2 + 1] - results[i * 2];
+ }
+ dri_bo_unmap(query->bo);
+
+ dri_bo_unreference(query->bo);
+ query->bo = NULL;
+}
+
+static struct gl_query_object *
+brw_new_query_object(GLcontext *ctx, GLuint id)
+{
+ struct brw_query_object *query;
+
+ query = _mesa_calloc(sizeof(struct brw_query_object));
+
+ query->Base.Id = id;
+ query->Base.Result = 0;
+ query->Base.Active = GL_FALSE;
+ query->Base.Ready = GL_TRUE;
+
+ return &query->Base;
+}
+
+static void
+brw_delete_query(GLcontext *ctx, struct gl_query_object *q)
+{
+ struct brw_query_object *query = (struct brw_query_object *)q;
+
+ dri_bo_unreference(query->bo);
+ _mesa_free(query);
+}
+
+static void
+brw_begin_query(GLcontext *ctx, struct gl_query_object *q)
+{
+ struct brw_context *brw = brw_context(ctx);
+ struct intel_context *intel = intel_context(ctx);
+ struct brw_query_object *query = (struct brw_query_object *)q;
+
+ /* Reset our driver's tracking of query state. */
+ dri_bo_unreference(query->bo);
+ query->bo = NULL;
+ query->first_index = -1;
+ query->last_index = -1;
+
+ insert_at_head(&brw->query.active_head, query);
+ intel->stats_wm++;
+}
+
+/**
+ * Begin the ARB_occlusion_query query on a query object.
+ */
+static void
+brw_end_query(GLcontext *ctx, struct gl_query_object *q)
+{
+ struct brw_context *brw = brw_context(ctx);
+ struct intel_context *intel = intel_context(ctx);
+ struct brw_query_object *query = (struct brw_query_object *)q;
+
+ /* Flush the batchbuffer in case it has writes to our query BO.
+ * Have later queries write to a new query BO so that further rendering
+ * doesn't delay the collection of our results.
+ */
+ if (query->bo) {
+ brw_emit_query_end(brw);
+ intel_batchbuffer_flush(intel->batch);
+
+ dri_bo_unreference(brw->query.bo);
+ brw->query.bo = NULL;
+ }
+
+ remove_from_list(query);
+
+ intel->stats_wm--;
+}
+
+static void brw_wait_query(GLcontext *ctx, struct gl_query_object *q)
+{
+ struct brw_query_object *query = (struct brw_query_object *)q;
+
+ brw_queryobj_get_results(query);
+ query->Base.Ready = GL_TRUE;
+}
+
+static void brw_check_query(GLcontext *ctx, struct gl_query_object *q)
+{
+ /* XXX: Need to expose dri_bo_is_idle from bufmgr. */
+#if 0
+ struct brw_query_object *query = (struct brw_query_object *)q;
+
+ if (dri_bo_is_idle(query->bo)) {
+ brw_queryobj_get_results(query);
+ query->Base.Ready = GL_TRUE;
+ }
+#else
+ brw_wait_query(ctx, q);
+#endif
+}
+
+/** Called to set up the query BO and account for its aperture space */
+void
+brw_prepare_query_begin(struct brw_context *brw)
+{
+ struct intel_context *intel = &brw->intel;
+
+ /* Skip if we're not doing any queries. */
+ if (is_empty_list(&brw->query.active_head))
+ return;
+
+ /* Get a new query BO if we're going to need it. */
+ if (brw->query.bo == NULL ||
+ brw->query.index * 2 + 1 >= 4096 / sizeof(uint64_t)) {
+ dri_bo_unreference(brw->query.bo);
+ brw->query.bo = NULL;
+
+ brw->query.bo = dri_bo_alloc(intel->bufmgr, "query", 4096, 1);
+ brw->query.index = 0;
+ }
+
+ brw_add_validated_bo(brw, brw->query.bo);
+}
+
+/** Called just before primitive drawing to get a beginning PS_DEPTH_COUNT. */
+void
+brw_emit_query_begin(struct brw_context *brw)
+{
+ struct intel_context *intel = &brw->intel;
+ struct brw_query_object *query;
+
+ /* Skip if we're not doing any queries, or we've emitted the start. */
+ if (brw->query.active || is_empty_list(&brw->query.active_head))
+ return;
+
+ BEGIN_BATCH(4, IGNORE_CLIPRECTS);
+ OUT_BATCH(_3DSTATE_PIPE_CONTROL |
+ PIPE_CONTROL_DEPTH_STALL |
+ PIPE_CONTROL_WRITE_DEPTH_COUNT);
+ /* This object could be mapped cacheable, but we don't have an exposed
+ * mechanism to support that. Since it's going uncached, tell GEM that
+ * we're writing to it. The usual clflush should be all that's required
+ * to pick up the results.
+ */
+ OUT_RELOC(brw->query.bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ PIPE_CONTROL_GLOBAL_GTT_WRITE |
+ ((brw->query.index * 2) * sizeof(uint64_t)));
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
+
+ foreach(query, &brw->query.active_head) {
+ if (query->bo != brw->query.bo) {
+ if (query->bo != NULL)
+ brw_queryobj_get_results(query);
+ dri_bo_reference(brw->query.bo);
+ query->bo = brw->query.bo;
+ query->first_index = brw->query.index;
+ }
+ query->last_index = brw->query.index;
+ }
+ brw->query.active = GL_TRUE;
+}
+
+/** Called at batchbuffer flush to get an ending PS_DEPTH_COUNT */
+void
+brw_emit_query_end(struct brw_context *brw)
+{
+ struct intel_context *intel = &brw->intel;
+
+ if (!brw->query.active)
+ return;
+
+ BEGIN_BATCH(4, IGNORE_CLIPRECTS);
+ OUT_BATCH(_3DSTATE_PIPE_CONTROL |
+ PIPE_CONTROL_DEPTH_STALL |
+ PIPE_CONTROL_WRITE_DEPTH_COUNT);
+ OUT_RELOC(brw->query.bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ PIPE_CONTROL_GLOBAL_GTT_WRITE |
+ ((brw->query.index * 2 + 1) * sizeof(uint64_t)));
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
+
+ brw->query.active = GL_FALSE;
+ brw->query.index++;
+}
+
+void brw_init_queryobj_functions(struct dd_function_table *functions)
+{
+ functions->NewQueryObject = brw_new_query_object;
+ functions->DeleteQuery = brw_delete_query;
+ functions->BeginQuery = brw_begin_query;
+ functions->EndQuery = brw_end_query;
+ functions->CheckQuery = brw_check_query;
+ functions->WaitQuery = brw_wait_query;
+}
diff --git a/i965/brw_sf.c b/i965/brw_sf.c
index 0b61748..1a11d54 100644
--- a/i965/brw_sf.c
+++ b/i965/brw_sf.c
@@ -30,9 +30,9 @@
*/
-#include "glheader.h"
-#include "macros.h"
-#include "enums.h"
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/enums.h"
#include "intel_batchbuffer.h"
@@ -73,10 +73,12 @@ static void compile_sf_prog( struct brw_context *brw,
c.attr_to_idx[i] = idx;
c.idx_to_attr[idx] = i;
if (i >= VERT_RESULT_TEX0 && i <= VERT_RESULT_TEX7) {
- c.point_attrs[i].CoordReplace =
- brw->attribs.Point->CoordReplace[i - VERT_RESULT_TEX0];
- } else
- c.point_attrs[i].CoordReplace = GL_FALSE;
+ c.point_attrs[i].CoordReplace =
+ brw->attribs.Point->CoordReplace[i - VERT_RESULT_TEX0];
+ }
+ else {
+ c.point_attrs[i].CoordReplace = GL_FALSE;
+ }
idx++;
}
@@ -106,7 +108,6 @@ static void compile_sf_prog( struct brw_context *brw,
assert(0);
return;
}
-
/* get the program
*/
@@ -125,7 +126,7 @@ static void compile_sf_prog( struct brw_context *brw,
/* Calculate interpolants for triangle and line rasterization.
*/
-static int upload_sf_prog( struct brw_context *brw )
+static void upload_sf_prog(struct brw_context *brw)
{
struct brw_sf_prog_key key;
@@ -174,7 +175,6 @@ static int upload_sf_prog( struct brw_context *brw )
&brw->sf.prog_data);
if (brw->sf.prog_bo == NULL)
compile_sf_prog( brw, &key );
- return dri_bufmgr_check_aperture_space(brw->sf.prog_bo);
}
diff --git a/i965/brw_sf_emit.c b/i965/brw_sf_emit.c
index 6fba8c8..ffdb0ae 100644
--- a/i965/brw_sf_emit.c
+++ b/i965/brw_sf_emit.c
@@ -30,9 +30,9 @@
*/
-#include "glheader.h"
-#include "macros.h"
-#include "enums.h"
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/enums.h"
#include "intel_batchbuffer.h"
diff --git a/i965/brw_sf_state.c b/i965/brw_sf_state.c
index 24388b7..242b704 100644
--- a/i965/brw_sf_state.c
+++ b/i965/brw_sf_state.c
@@ -34,30 +34,23 @@
#include "brw_context.h"
#include "brw_state.h"
#include "brw_defines.h"
-#include "macros.h"
+#include "main/macros.h"
#include "intel_fbo.h"
-static int upload_sf_vp(struct brw_context *brw)
+static void upload_sf_vp(struct brw_context *brw)
{
GLcontext *ctx = &brw->intel.ctx;
const GLfloat depth_scale = 1.0F / ctx->DrawBuffer->_DepthMaxF;
struct brw_sf_viewport sfv;
- struct intel_renderbuffer *irb =
- intel_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[0]);
GLfloat y_scale, y_bias;
memset(&sfv, 0, sizeof(sfv));
- if (ctx->DrawBuffer->Name) {
- /* User-created FBO */
- if (irb && !irb->RenderToTexture) {
- y_scale = -1.0;
- y_bias = ctx->DrawBuffer->Height;
- } else {
- y_scale = 1.0;
- y_bias = 0;
- }
- } else {
+ if (intel_rendering_to_texture(ctx)) {
+ y_scale = 1.0;
+ y_bias = 0;
+ }
+ else {
y_scale = -1.0;
y_bias = ctx->DrawBuffer->Height;
}
@@ -98,8 +91,6 @@ static int upload_sf_vp(struct brw_context *brw)
dri_bo_unreference(brw->sf.vp_bo);
brw->sf.vp_bo = brw_cache_data( &brw->cache, BRW_SF_VP, &sfv, NULL, 0 );
-
- return dri_bufmgr_check_aperture_space(brw->sf.vp_bo);
}
const struct brw_tracked_state brw_sf_vp = {
@@ -122,6 +113,7 @@ struct brw_sf_unit_key {
GLboolean scissor, line_smooth, point_sprite, point_attenuated;
float line_width;
float point_size;
+ GLboolean render_to_texture;
};
static void
@@ -152,6 +144,8 @@ sf_unit_populate_key(struct brw_context *brw, struct brw_sf_unit_key *key)
key->point_sprite = brw->attribs.Point->PointSprite;
key->point_size = brw->attribs.Point->Size;
key->point_attenuated = brw->attribs.Point->_Attenuated;
+
+ key->render_to_texture = intel_rendering_to_texture(&brw->intel.ctx);
}
static dri_bo *
@@ -174,7 +168,8 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key,
sf.thread4.nr_urb_entries = key->nr_urb_entries;
sf.thread4.urb_entry_allocation_size = key->sfsize - 1;
- sf.thread4.max_threads = MIN2(12, key->nr_urb_entries / 2) - 1;
+ /* Each SF thread produces 1 PUE, and there can be up to 24 threads */
+ sf.thread4.max_threads = MIN2(24, key->nr_urb_entries) - 1;
if (INTEL_DEBUG & DEBUG_SINGLE_THREAD)
sf.thread4.max_threads = 0;
@@ -197,6 +192,11 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key,
else
sf.sf5.front_winding = BRW_FRONTWINDING_CW;
+ /* The viewport is inverted for rendering to texture, and that inverts
+ * polygon front/back orientation.
+ */
+ sf.sf5.front_winding ^= key->render_to_texture;
+
switch (key->cull_face) {
case GL_FRONT:
sf.sf6.cull_mode = BRW_CULLMODE_FRONT;
@@ -230,7 +230,7 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key,
/* XXX clamp max depends on AA vs. non-AA */
sf.sf7.sprite_point = key->point_sprite;
- sf.sf7.point_size = CLAMP(nearbyint(key->point_size), 1, 255) * (1<<3);
+ sf.sf7.point_size = CLAMP(rint(key->point_size), 1, 255) * (1<<3);
sf.sf7.use_point_size_state = !key->point_attenuated;
sf.sf7.aa_line_distance_mode = 0;
@@ -253,27 +253,26 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key,
NULL, NULL);
/* Emit SF program relocation */
- dri_emit_reloc(bo,
- DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
- sf.thread0.grf_reg_count << 1,
- offsetof(struct brw_sf_unit_state, thread0),
- brw->sf.prog_bo);
+ dri_bo_emit_reloc(bo,
+ I915_GEM_DOMAIN_INSTRUCTION, 0,
+ sf.thread0.grf_reg_count << 1,
+ offsetof(struct brw_sf_unit_state, thread0),
+ brw->sf.prog_bo);
/* Emit SF viewport relocation */
- dri_emit_reloc(bo,
- DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
- sf.sf5.front_winding | (sf.sf5.viewport_transform << 1),
- offsetof(struct brw_sf_unit_state, sf5),
- brw->sf.vp_bo);
+ dri_bo_emit_reloc(bo,
+ I915_GEM_DOMAIN_INSTRUCTION, 0,
+ sf.sf5.front_winding | (sf.sf5.viewport_transform << 1),
+ offsetof(struct brw_sf_unit_state, sf5),
+ brw->sf.vp_bo);
return bo;
}
-static int upload_sf_unit( struct brw_context *brw )
+static void upload_sf_unit( struct brw_context *brw )
{
struct brw_sf_unit_key key;
dri_bo *reloc_bufs[2];
- int ret = 0;
sf_unit_populate_key(brw, &key);
@@ -288,15 +287,6 @@ static int upload_sf_unit( struct brw_context *brw )
if (brw->sf.state_bo == NULL) {
brw->sf.state_bo = sf_unit_create_from_key(brw, &key, reloc_bufs);
}
-
- if (reloc_bufs[0])
- ret |= dri_bufmgr_check_aperture_space(reloc_bufs[0]);
-
- if (reloc_bufs[1])
- ret |= dri_bufmgr_check_aperture_space(reloc_bufs[1]);
-
- ret |= dri_bufmgr_check_aperture_space(brw->sf.state_bo);
- return ret;
}
const struct brw_tracked_state brw_sf_unit = {
diff --git a/i965/brw_state.h b/i965/brw_state.h
index d1fca05..df839c5 100644
--- a/i965/brw_state.h
+++ b/i965/brw_state.h
@@ -35,6 +35,16 @@
#include "brw_context.h"
+static inline void
+brw_add_validated_bo(struct brw_context *brw, dri_bo *bo)
+{
+ assert(brw->state.validated_bo_count < ARRAY_SIZE(brw->state.validated_bos));
+
+ if (bo != NULL) {
+ dri_bo_reference(bo);
+ brw->state.validated_bos[brw->state.validated_bo_count++] = bo;
+ }
+};
const struct brw_tracked_state brw_blend_constant_color;
const struct brw_tracked_state brw_cc_unit;
@@ -73,13 +83,23 @@ const struct brw_tracked_state brw_wm_unit;
const struct brw_tracked_state brw_psp_urb_cbs;
-const struct brw_tracked_state brw_active_vertprog;
-const struct brw_tracked_state brw_tnl_vertprog;
const struct brw_tracked_state brw_pipe_control;
const struct brw_tracked_state brw_clear_surface_cache;
const struct brw_tracked_state brw_clear_batch_cache;
+const struct brw_tracked_state brw_drawing_rect;
+const struct brw_tracked_state brw_indices;
+const struct brw_tracked_state brw_vertices;
+
+/***********************************************************************
+ * brw_state.c
+ */
+void brw_validate_state(struct brw_context *brw);
+void brw_upload_state(struct brw_context *brw);
+void brw_init_state(struct brw_context *brw);
+void brw_destroy_state(struct brw_context *brw);
+
/***********************************************************************
* brw_state_cache.c
*/
diff --git a/i965/brw_state_batch.c b/i965/brw_state_batch.c
index 77e2736..dc87859 100644
--- a/i965/brw_state_batch.c
+++ b/i965/brw_state_batch.c
@@ -33,7 +33,7 @@
#include "brw_state.h"
#include "intel_batchbuffer.h"
-#include "imports.h"
+#include "main/imports.h"
@@ -97,8 +97,6 @@ void brw_clear_batch_cache_flush( struct brw_context *brw )
{
clear_batch_cache(brw);
- brw->wrap = 0;
-
/* brw_do_flush(brw, BRW_FLUSH_STATE_CACHE|BRW_FLUSH_READ_CACHE); */
brw->state.dirty.mesa |= ~0;
diff --git a/i965/brw_state_cache.c b/i965/brw_state_cache.c
index d617650..d5b5166 100644
--- a/i965/brw_state_cache.c
+++ b/i965/brw_state_cache.c
@@ -58,7 +58,7 @@
#include "brw_state.h"
#include "intel_batchbuffer.h"
-#include "imports.h"
+#include "main/imports.h"
/* XXX: Fixme - have to include these to get the sizes of the prog_key
* structs:
@@ -214,10 +214,7 @@ brw_upload_cache( struct brw_cache *cache,
/* Create the buffer object to contain the data */
bo = dri_bo_alloc(cache->brw->intel.bufmgr,
- cache->name[cache_id], data_size, 1 << 6,
- DRM_BO_FLAG_MEM_LOCAL |
- DRM_BO_FLAG_CACHED |
- DRM_BO_FLAG_CACHED_MAPPED);
+ cache->name[cache_id], data_size, 1 << 6);
/* Set up the memory containing the key, aux_data, and reloc_bufs */
@@ -500,9 +497,10 @@ void brw_destroy_cache( struct brw_context *brw )
GLuint i;
brw_clear_cache(brw);
- for (i = 0; i < BRW_MAX_CACHE; i++)
+ for (i = 0; i < BRW_MAX_CACHE; i++) {
+ dri_bo_unreference(brw->cache.last_bo[i]);
free(brw->cache.name[i]);
-
+ }
free(brw->cache.items);
brw->cache.items = NULL;
brw->cache.size = 0;
diff --git a/i965/brw_state_dump.c b/i965/brw_state_dump.c
index 3a93f9f..b28c57c 100644
--- a/i965/brw_state_dump.c
+++ b/i965/brw_state_dump.c
@@ -25,7 +25,7 @@
*
*/
-#include "mtypes.h"
+#include "main/mtypes.h"
#include "brw_context.h"
#include "brw_state.h"
diff --git a/i965/brw_state_upload.c b/i965/brw_state_upload.c
index 3b2ccd4..4845859 100644
--- a/i965/brw_state_upload.c
+++ b/i965/brw_state_upload.c
@@ -33,7 +33,6 @@
#include "brw_context.h"
#include "brw_state.h"
-#include "dri_bufmgr.h"
#include "intel_batchbuffer.h"
/* This is used to initialize brw->state.atoms[]. We could use this
@@ -46,8 +45,6 @@ const struct brw_tracked_state *atoms[] =
{
&brw_check_fallback,
- &brw_tnl_vertprog,
- &brw_active_vertprog,
&brw_wm_input_sizes,
&brw_vs_prog,
&brw_gs_prog,
@@ -80,7 +77,6 @@ const struct brw_tracked_state *atoms[] =
*/
&brw_invarient_state,
&brw_state_base_address,
- &brw_pipe_control,
&brw_binding_table_pointers,
&brw_blend_constant_color,
@@ -102,6 +98,9 @@ const struct brw_tracked_state *atoms[] =
&brw_psp_urb_cbs,
#endif
+ &brw_drawing_rect,
+ &brw_indices,
+ &brw_vertices,
NULL, /* brw_constant_buffer */
};
@@ -169,48 +168,172 @@ static void xor_states( struct brw_state_flags *result,
result->cache = a->cache ^ b->cache;
}
+static void
+brw_clear_validated_bos(struct brw_context *brw)
+{
+ int i;
+
+ /* Clear the last round of validated bos */
+ for (i = 0; i < brw->state.validated_bo_count; i++) {
+ dri_bo_unreference(brw->state.validated_bos[i]);
+ brw->state.validated_bos[i] = NULL;
+ }
+ brw->state.validated_bo_count = 0;
+}
+
+struct dirty_bit_map {
+ uint32_t bit;
+ char *name;
+ uint32_t count;
+};
+
+#define DEFINE_BIT(name) {name, #name, 0}
+
+static struct dirty_bit_map mesa_bits[] = {
+ DEFINE_BIT(_NEW_MODELVIEW),
+ DEFINE_BIT(_NEW_PROJECTION),
+ DEFINE_BIT(_NEW_TEXTURE_MATRIX),
+ DEFINE_BIT(_NEW_COLOR_MATRIX),
+ DEFINE_BIT(_NEW_ACCUM),
+ DEFINE_BIT(_NEW_COLOR),
+ DEFINE_BIT(_NEW_DEPTH),
+ DEFINE_BIT(_NEW_EVAL),
+ DEFINE_BIT(_NEW_FOG),
+ DEFINE_BIT(_NEW_HINT),
+ DEFINE_BIT(_NEW_LIGHT),
+ DEFINE_BIT(_NEW_LINE),
+ DEFINE_BIT(_NEW_PIXEL),
+ DEFINE_BIT(_NEW_POINT),
+ DEFINE_BIT(_NEW_POLYGON),
+ DEFINE_BIT(_NEW_POLYGONSTIPPLE),
+ DEFINE_BIT(_NEW_SCISSOR),
+ DEFINE_BIT(_NEW_STENCIL),
+ DEFINE_BIT(_NEW_TEXTURE),
+ DEFINE_BIT(_NEW_TRANSFORM),
+ DEFINE_BIT(_NEW_VIEWPORT),
+ DEFINE_BIT(_NEW_PACKUNPACK),
+ DEFINE_BIT(_NEW_ARRAY),
+ DEFINE_BIT(_NEW_RENDERMODE),
+ DEFINE_BIT(_NEW_BUFFERS),
+ DEFINE_BIT(_NEW_MULTISAMPLE),
+ DEFINE_BIT(_NEW_TRACK_MATRIX),
+ DEFINE_BIT(_NEW_PROGRAM),
+ {0, 0, 0}
+};
+
+static struct dirty_bit_map brw_bits[] = {
+ DEFINE_BIT(BRW_NEW_URB_FENCE),
+ DEFINE_BIT(BRW_NEW_FRAGMENT_PROGRAM),
+ DEFINE_BIT(BRW_NEW_VERTEX_PROGRAM),
+ DEFINE_BIT(BRW_NEW_INPUT_DIMENSIONS),
+ DEFINE_BIT(BRW_NEW_CURBE_OFFSETS),
+ DEFINE_BIT(BRW_NEW_REDUCED_PRIMITIVE),
+ DEFINE_BIT(BRW_NEW_PRIMITIVE),
+ DEFINE_BIT(BRW_NEW_CONTEXT),
+ DEFINE_BIT(BRW_NEW_WM_INPUT_DIMENSIONS),
+ DEFINE_BIT(BRW_NEW_INPUT_VARYING),
+ DEFINE_BIT(BRW_NEW_PSP),
+ DEFINE_BIT(BRW_NEW_METAOPS),
+ DEFINE_BIT(BRW_NEW_FENCE),
+ DEFINE_BIT(BRW_NEW_INDICES),
+ DEFINE_BIT(BRW_NEW_VERTICES),
+ DEFINE_BIT(BRW_NEW_BATCH),
+ DEFINE_BIT(BRW_NEW_DEPTH_BUFFER),
+ {0, 0, 0}
+};
+
+static struct dirty_bit_map cache_bits[] = {
+ DEFINE_BIT(CACHE_NEW_CC_VP),
+ DEFINE_BIT(CACHE_NEW_CC_UNIT),
+ DEFINE_BIT(CACHE_NEW_WM_PROG),
+ DEFINE_BIT(CACHE_NEW_SAMPLER_DEFAULT_COLOR),
+ DEFINE_BIT(CACHE_NEW_SAMPLER),
+ DEFINE_BIT(CACHE_NEW_WM_UNIT),
+ DEFINE_BIT(CACHE_NEW_SF_PROG),
+ DEFINE_BIT(CACHE_NEW_SF_VP),
+ DEFINE_BIT(CACHE_NEW_SF_UNIT),
+ DEFINE_BIT(CACHE_NEW_VS_UNIT),
+ DEFINE_BIT(CACHE_NEW_VS_PROG),
+ DEFINE_BIT(CACHE_NEW_GS_UNIT),
+ DEFINE_BIT(CACHE_NEW_GS_PROG),
+ DEFINE_BIT(CACHE_NEW_CLIP_VP),
+ DEFINE_BIT(CACHE_NEW_CLIP_UNIT),
+ DEFINE_BIT(CACHE_NEW_CLIP_PROG),
+ DEFINE_BIT(CACHE_NEW_SURFACE),
+ DEFINE_BIT(CACHE_NEW_SURF_BIND),
+ {0, 0, 0}
+};
+
+
+static void
+brw_update_dirty_count(struct dirty_bit_map *bit_map, int32_t bits)
+{
+ int i;
+
+ for (i = 0; i < 32; i++) {
+ if (bit_map[i].bit == 0)
+ return;
+
+ if (bit_map[i].bit & bits)
+ bit_map[i].count++;
+ }
+}
+
+static void
+brw_print_dirty_count(struct dirty_bit_map *bit_map, int32_t bits)
+{
+ int i;
+
+ for (i = 0; i < 32; i++) {
+ if (bit_map[i].bit == 0)
+ return;
+
+ fprintf(stderr, "0x%08x: %12d (%s)\n",
+ bit_map[i].bit, bit_map[i].count, bit_map[i].name);
+ }
+}
/***********************************************************************
* Emit all state:
*/
-int brw_validate_state( struct brw_context *brw )
+void brw_validate_state( struct brw_context *brw )
{
+ struct intel_context *intel = &brw->intel;
struct brw_state_flags *state = &brw->state.dirty;
- GLuint i, ret, count;
+ GLuint i;
+
+ brw_clear_validated_bos(brw);
state->mesa |= brw->intel.NewGLState;
brw->intel.NewGLState = 0;
- if (brw->wrap)
- state->brw |= BRW_NEW_CONTEXT;
+ brw_add_validated_bo(brw, intel->batch->buf);
if (brw->emit_state_always) {
state->mesa |= ~0;
state->brw |= ~0;
}
- /* texenv program needs to notify us somehow when this happens:
- * Some confusion about which state flag should represent this change.
- */
if (brw->fragment_program != brw->attribs.FragmentProgram->_Current) {
brw->fragment_program = brw->attribs.FragmentProgram->_Current;
- brw->state.dirty.mesa |= _NEW_PROGRAM;
brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM;
}
+ if (brw->vertex_program != brw->attribs.VertexProgram->_Current) {
+ brw->vertex_program = brw->attribs.VertexProgram->_Current;
+ brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM;
+ }
if (state->mesa == 0 &&
state->cache == 0 &&
state->brw == 0)
- return 0;
+ return;
if (brw->state.dirty.brw & BRW_NEW_CONTEXT)
brw_clear_batch_cache_flush(brw);
brw->intel.Fallback = 0;
- count = 0;
-
/* do prepare stage for all atoms */
for (i = 0; i < Elements(atoms); i++) {
const struct brw_tracked_state *atom = brw->state.atoms[i];
@@ -220,15 +343,20 @@ int brw_validate_state( struct brw_context *brw )
if (check_state(state, &atom->dirty)) {
if (atom->prepare) {
- ret = atom->prepare(brw);
- if (ret)
- return ret;
+ atom->prepare(brw);
}
}
}
+}
+
- if (brw->intel.Fallback)
- return 0;
+void brw_upload_state(struct brw_context *brw)
+{
+ struct brw_state_flags *state = &brw->state.dirty;
+ int i;
+ static int dirty_count = 0;
+
+ brw_clear_validated_bos(brw);
if (INTEL_DEBUG) {
/* Debug version which enforces various sanity checks on the
@@ -251,8 +379,9 @@ int brw_validate_state( struct brw_context *brw )
break;
if (check_state(state, &atom->dirty)) {
- if (atom->emit)
+ if (atom->emit) {
atom->emit( brw );
+ }
}
accumulate_state(&examined, &atom->dirty);
@@ -274,13 +403,25 @@ int brw_validate_state( struct brw_context *brw )
break;
if (check_state(state, &atom->dirty)) {
- if (atom->emit)
+ if (atom->emit) {
atom->emit( brw );
+ }
}
}
}
+ if (INTEL_DEBUG & DEBUG_STATE) {
+ brw_update_dirty_count(mesa_bits, state->mesa);
+ brw_update_dirty_count(brw_bits, state->brw);
+ brw_update_dirty_count(cache_bits, state->cache);
+ if (dirty_count++ % 1000 == 0) {
+ brw_print_dirty_count(mesa_bits, state->mesa);
+ brw_print_dirty_count(brw_bits, state->brw);
+ brw_print_dirty_count(cache_bits, state->cache);
+ fprintf(stderr, "\n");
+ }
+ }
+
if (!brw->intel.Fallback)
memset(state, 0, sizeof(*state));
- return 0;
}
diff --git a/i965/brw_structs.h b/i965/brw_structs.h
index ec865c9..4e577d0 100644
--- a/i965/brw_structs.h
+++ b/i965/brw_structs.h
@@ -175,7 +175,7 @@ struct brw_depthbuffer
} dword4;
};
-struct brw_depthbuffer_gm45_g4x
+struct brw_depthbuffer_g4x
{
union header_union header;
@@ -1405,7 +1405,7 @@ struct brw_instruction
GLuint msg_target:4;
GLuint pad1:3;
GLuint end_of_thread:1;
- } sampler_gm45_g4x;
+ } sampler_g4x;
struct brw_urb_immediate urb;
diff --git a/i965/brw_tex.c b/i965/brw_tex.c
index 258c626..0bb6f17 100644
--- a/i965/brw_tex.c
+++ b/i965/brw_tex.c
@@ -30,19 +30,19 @@
*/
-#include "glheader.h"
-#include "mtypes.h"
-#include "imports.h"
-#include "simple_list.h"
-#include "enums.h"
-#include "image.h"
-#include "teximage.h"
-#include "texstore.h"
-#include "texformat.h"
+#include "main/glheader.h"
+#include "main/mtypes.h"
+#include "main/imports.h"
+#include "main/simple_list.h"
+#include "main/enums.h"
+#include "main/image.h"
+#include "main/teximage.h"
+#include "main/texstore.h"
+#include "main/texformat.h"
+
#include "texmem.h"
#include "intel_context.h"
-#include "intel_ioctl.h"
#include "intel_regions.h"
#include "intel_tex.h"
#include "brw_context.h"
diff --git a/i965/brw_tex_layout.c b/i965/brw_tex_layout.c
index e437c41..51a617f 100644
--- a/i965/brw_tex_layout.c
+++ b/i965/brw_tex_layout.c
@@ -36,7 +36,7 @@
#include "intel_mipmap_tree.h"
#include "intel_tex_layout.h"
#include "intel_context.h"
-#include "macros.h"
+#include "main/macros.h"
#define FILE_DEBUG_FLAG DEBUG_MIPTREE
diff --git a/i965/brw_urb.c b/i965/brw_urb.c
index c423dbe..7673dd3 100644
--- a/i965/brw_urb.c
+++ b/i965/brw_urb.c
@@ -42,7 +42,44 @@
#define SF 3
#define CS 4
-/* XXX: Are the min_entry_size numbers useful?
+/** @file brw_urb.c
+ *
+ * Manages the division of the URB space between the various fixed-function
+ * units.
+ *
+ * See the Thread Initiation Management section of the GEN4 B-Spec, and
+ * the individual *_STATE structures for restrictions on numbers of
+ * entries and threads.
+ */
+
+/*
+ * Generally, a unit requires a min_nr_entries based on how many entries
+ * it produces before the downstream unit gets unblocked and can use and
+ * dereference some of its handles.
+ *
+ * The SF unit preallocates a PUE at the start of thread dispatch, and only
+ * uses that one. So it requires one entry per thread.
+ *
+ * For CLIP, the SF unit will hold the previous primitive while the
+ * next is getting assembled, meaning that linestrips require 3 CLIP VUEs
+ * (vertices) to ensure continued processing, trifans require 4, and tristrips
+ * require 5. There can be 1 or 2 threads, and each has the same requirement.
+ *
+ * GS has the same requirement as CLIP, but it never handles tristrips,
+ * so we can lower the minimum to 4 for the POLYGONs (trifans) it produces.
+ * We only run it single-threaded.
+ *
+ * For VS, the number of entries may be 8, 12, 16, or 32 (or 64 on G4X).
+ * Each thread processes 2 preallocated VUEs (vertices) at a time, and they
+ * get streamed down as soon as threads processing earlier vertices get
+ * theirs accepted.
+ *
+ * Each unit will take the number of URB entries we give it (based on the
+ * entry size calculated in brw_vs_emit.c for VUEs, brw_sf_emit.c for PUEs,
+ * and brw_curbe.c for the CURBEs) and decide its maximum number of
+ * threads it can support based on that. in brw_*_state.c.
+ *
+ * XXX: Are the min_entry_size numbers useful?
* XXX: Verify min_nr_entries, esp for VS.
* XXX: Verify SF min_entry_size.
*/
@@ -54,7 +91,7 @@ static const struct {
} limits[CS+1] = {
{ 16, 32, 1, 5 }, /* vs */
{ 4, 8, 1, 5 }, /* gs */
- { 6, 8, 1, 5 }, /* clp */
+ { 5, 10, 1, 5 }, /* clp */
{ 1, 8, 1, 12 }, /* sf */
{ 1, 4, 1, 32 } /* cs */
};
@@ -74,7 +111,7 @@ static GLboolean check_urb_layout( struct brw_context *brw )
/* Most minimal update, forces re-emit of URB fence packet after GS
* unit turned on/off.
*/
-static int recalculate_urb_fence( struct brw_context *brw )
+static void recalculate_urb_fence( struct brw_context *brw )
{
GLuint csize = brw->curbe.total_size;
GLuint vsize = brw->vs.prog_data->urb_entry_size;
@@ -92,9 +129,9 @@ static int recalculate_urb_fence( struct brw_context *brw )
if (brw->urb.vsize < vsize ||
brw->urb.sfsize < sfsize ||
brw->urb.csize < csize ||
- (brw->urb.constrained && (brw->urb.vsize > brw->urb.vsize ||
- brw->urb.sfsize > brw->urb.sfsize ||
- brw->urb.csize > brw->urb.csize))) {
+ (brw->urb.constrained && (brw->urb.vsize > vsize ||
+ brw->urb.sfsize > sfsize ||
+ brw->urb.csize > csize))) {
brw->urb.csize = csize;
@@ -114,6 +151,10 @@ static int recalculate_urb_fence( struct brw_context *brw )
brw->urb.nr_sf_entries = limits[SF].min_nr_entries;
brw->urb.nr_cs_entries = limits[CS].min_nr_entries;
+ /* Mark us as operating with constrained nr_entries, so that next
+ * time we recalculate we'll resize the fences in the hope of
+ * escaping constrained mode and getting back to normal performance.
+ */
brw->urb.constrained = 1;
if (!check_urb_layout(brw)) {
@@ -142,7 +183,6 @@ static int recalculate_urb_fence( struct brw_context *brw )
brw->state.dirty.brw |= BRW_NEW_URB_FENCE;
}
- return 0;
}
@@ -187,15 +227,3 @@ void brw_upload_urb_fence(struct brw_context *brw)
BRW_BATCH_STRUCT(brw, &uf);
}
-
-
-#if 0
-const struct brw_tracked_state brw_urb_fence = {
- .dirty = {
- .mesa = 0,
- .brw = BRW_NEW_URB_FENCE | BRW_NEW_PSP,
- .cache = 0
- },
- .update = brw_upload_urb_fence
-};
-#endif
diff --git a/i965/brw_util.c b/i965/brw_util.c
index d8d35c5..ce21aa4 100644
--- a/i965/brw_util.c
+++ b/i965/brw_util.c
@@ -30,7 +30,7 @@
*/
-#include "mtypes.h"
+#include "main/mtypes.h"
#include "shader/prog_parameter.h"
#include "brw_util.h"
#include "brw_defines.h"
diff --git a/i965/brw_util.h b/i965/brw_util.h
index bd6cc0a..33e7cd8 100644
--- a/i965/brw_util.h
+++ b/i965/brw_util.h
@@ -33,7 +33,7 @@
#ifndef BRW_UTIL_H
#define BRW_UTIL_H
-#include "mtypes.h"
+#include "main/mtypes.h"
extern GLuint brw_count_bits( GLuint val );
extern GLuint brw_parameter_list_state_flags(struct gl_program_parameter_list *paramList);
diff --git a/i965/brw_vs.c b/i965/brw_vs.c
index f89b0e1..1db7cee 100644
--- a/i965/brw_vs.c
+++ b/i965/brw_vs.c
@@ -83,7 +83,7 @@ static void do_vs_prog( struct brw_context *brw,
}
-static int brw_upload_vs_prog( struct brw_context *brw )
+static void brw_upload_vs_prog(struct brw_context *brw)
{
struct brw_vs_prog_key key;
struct brw_vertex_program *vp =
@@ -115,7 +115,6 @@ static int brw_upload_vs_prog( struct brw_context *brw )
&brw->vs.prog_data);
if (brw->vs.prog_bo == NULL)
do_vs_prog(brw, vp, &key);
- return dri_bufmgr_check_aperture_space(brw->vs.prog_bo);
}
diff --git a/i965/brw_vs.h b/i965/brw_vs.h
index 41a33ff..22388ec 100644
--- a/i965/brw_vs.h
+++ b/i965/brw_vs.h
@@ -80,8 +80,4 @@ struct brw_vs_compile {
void brw_vs_emit( struct brw_vs_compile *c );
-
-void brw_ProgramCacheDestroy( GLcontext *ctx );
-void brw_ProgramCacheInit( GLcontext *ctx );
-
#endif
diff --git a/i965/brw_vs_constval.c b/i965/brw_vs_constval.c
index a0106b8..6fbac02 100644
--- a/i965/brw_vs_constval.c
+++ b/i965/brw_vs_constval.c
@@ -30,7 +30,7 @@
*/
-#include "macros.h"
+#include "main/macros.h"
#include "brw_context.h"
#include "brw_vs.h"
@@ -166,7 +166,7 @@ static GLuint get_input_size(struct brw_context *brw,
/* Calculate sizes of vertex program outputs. Size is the largest
* component index which might vary from [0,0,0,1]
*/
-static int calc_wm_input_sizes( struct brw_context *brw )
+static void calc_wm_input_sizes( struct brw_context *brw )
{
/* BRW_NEW_VERTEX_PROGRAM */
struct brw_vertex_program *vp =
@@ -210,7 +210,6 @@ static int calc_wm_input_sizes( struct brw_context *brw )
memcpy(brw->wm.input_size_masks, t.size_masks, sizeof(t.size_masks));
brw->state.dirty.brw |= BRW_NEW_WM_INPUT_DIMENSIONS;
}
- return 0;
}
const struct brw_tracked_state brw_wm_input_sizes = {
diff --git a/i965/brw_vs_emit.c b/i965/brw_vs_emit.c
index 8759826..174331a 100644
--- a/i965/brw_vs_emit.c
+++ b/i965/brw_vs_emit.c
@@ -73,8 +73,6 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
c->prog_data.curb_read_length = reg - 1;
-
-
/* Allocate input regs:
*/
c->nr_inputs = 0;
@@ -84,8 +82,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
c->regs[PROGRAM_INPUT][i] = brw_vec8_grf(reg, 0);
reg++;
}
- }
-
+ }
/* Allocate outputs: TODO: could organize the non-position outputs
* to go straight into message regs.
@@ -196,6 +193,7 @@ static void unalias1( struct brw_vs_compile *c,
struct brw_reg tmp = brw_writemask(get_tmp(c), dst.dw1.bits.writemask);
func(c, tmp, arg0);
brw_MOV(p, dst, tmp);
+ release_tmp(c, tmp);
}
else {
func(c, dst, arg0);
@@ -217,12 +215,38 @@ static void unalias2( struct brw_vs_compile *c,
struct brw_reg tmp = brw_writemask(get_tmp(c), dst.dw1.bits.writemask);
func(c, tmp, arg0, arg1);
brw_MOV(p, dst, tmp);
+ release_tmp(c, tmp);
}
else {
func(c, dst, arg0, arg1);
}
}
+static void unalias3( struct brw_vs_compile *c,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ struct brw_reg arg1,
+ struct brw_reg arg2,
+ void (*func)( struct brw_vs_compile *,
+ struct brw_reg,
+ struct brw_reg,
+ struct brw_reg,
+ struct brw_reg ))
+{
+ if ((dst.file == arg0.file && dst.nr == arg0.nr) ||
+ (dst.file == arg1.file && dst.nr == arg1.nr) ||
+ (dst.file == arg2.file && dst.nr == arg2.nr)) {
+ struct brw_compile *p = &c->func;
+ struct brw_reg tmp = brw_writemask(get_tmp(c), dst.dw1.bits.writemask);
+ func(c, tmp, arg0, arg1, arg2);
+ brw_MOV(p, dst, tmp);
+ release_tmp(c, tmp);
+ }
+ else {
+ func(c, dst, arg0, arg1, arg2);
+ }
+}
+
static void emit_sop( struct brw_compile *p,
struct brw_reg dst,
struct brw_reg arg0,
@@ -339,6 +363,7 @@ static void emit_math1( struct brw_vs_compile *c,
}
}
+
static void emit_math2( struct brw_vs_compile *c,
GLuint function,
struct brw_reg dst,
@@ -370,7 +395,6 @@ static void emit_math2( struct brw_vs_compile *c,
release_tmp(c, tmp);
}
}
-
static void emit_exp_noalias( struct brw_vs_compile *c,
@@ -420,7 +444,7 @@ static void emit_exp_noalias( struct brw_vs_compile *c,
BRW_MATH_FUNCTION_EXP,
brw_writemask(dst, WRITEMASK_Z),
brw_swizzle1(arg0, 0),
- BRW_MATH_PRECISION_PARTIAL);
+ BRW_MATH_PRECISION_FULL);
}
if (dst.dw1.bits.writemask & WRITEMASK_W) {
@@ -521,8 +545,6 @@ static void emit_log_noalias( struct brw_vs_compile *c,
}
-
-
/* Need to unalias - consider swizzles: r0 = DST r0.xxxx r1
*/
static void emit_dst_noalias( struct brw_vs_compile *c,
@@ -544,6 +566,7 @@ static void emit_dst_noalias( struct brw_vs_compile *c,
brw_MOV(p, brw_writemask(dst, WRITEMASK_W), arg1);
}
+
static void emit_xpd( struct brw_compile *p,
struct brw_reg dst,
struct brw_reg t,
@@ -554,7 +577,6 @@ static void emit_xpd( struct brw_compile *p,
}
-
static void emit_lit_noalias( struct brw_vs_compile *c,
struct brw_reg dst,
struct brw_reg arg0 )
@@ -595,8 +617,42 @@ static void emit_lit_noalias( struct brw_vs_compile *c,
brw_ENDIF(p, if_insn);
}
+static void emit_lrp_noalias(struct brw_vs_compile *c,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ struct brw_reg arg1,
+ struct brw_reg arg2)
+{
+ struct brw_compile *p = &c->func;
+
+ brw_ADD(p, dst, negate(arg0), brw_imm_f(1.0));
+ brw_MUL(p, brw_null_reg(), dst, arg2);
+ brw_MAC(p, dst, arg0, arg1);
+}
+
+/** 3 or 4-component vector normalization */
+static void emit_nrm( struct brw_vs_compile *c,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ int num_comps)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg tmp = get_tmp(c);
+
+ /* tmp = dot(arg0, arg0) */
+ if (num_comps == 3)
+ brw_DP3(p, tmp, arg0, arg0);
+ else
+ brw_DP4(p, tmp, arg0, arg0);
+
+ /* tmp = 1 / sqrt(tmp) */
+ emit_math1(c, BRW_MATH_FUNCTION_RSQ, tmp, tmp, BRW_MATH_PRECISION_FULL);
+ /* dst = arg0 * tmp */
+ brw_MUL(p, dst, arg0, tmp);
+ release_tmp(c, tmp);
+}
/* TODO: relative addressing!
@@ -634,7 +690,6 @@ static struct brw_reg get_reg( struct brw_vs_compile *c,
}
-
static struct brw_reg deref( struct brw_vs_compile *c,
struct brw_reg arg,
GLint offset)
@@ -728,8 +783,6 @@ static struct brw_reg get_dst( struct brw_vs_compile *c,
}
-
-
static void emit_swz( struct brw_vs_compile *c,
struct brw_reg dst,
struct prog_src_register src )
@@ -801,8 +854,8 @@ static void emit_swz( struct brw_vs_compile *c,
}
-
-/* Post-vertex-program processing. Send the results to the URB.
+/**
+ * Post-vertex-program processing. Send the results to the URB.
*/
static void emit_vertex_write( struct brw_vs_compile *c)
{
@@ -817,9 +870,7 @@ static void emit_vertex_write( struct brw_vs_compile *c)
get_reg(c, PROGRAM_INPUT, VERT_ATTRIB_EDGEFLAG));
}
-
- /* Build ndc coords? TODO: Shortcircuit when w is known to be one.
- */
+ /* Build ndc coords */
if (!c->key.know_w_is_one) {
ndc = get_tmp(c);
emit_math1(c, BRW_MATH_FUNCTION_INV, ndc, brw_swizzle1(pos, 3), BRW_MATH_PRECISION_FULL);
@@ -829,12 +880,12 @@ static void emit_vertex_write( struct brw_vs_compile *c)
ndc = pos;
}
- /* This includes the workaround for -ve rhw, so is no longer an
- * optional step:
+ /* Update the header for point size, user clipping flags, and -ve rhw
+ * workaround.
*/
if ((c->prog_data.outputs_written & (1<<VERT_RESULT_PSIZ)) ||
c->key.nr_userclip ||
- !c->key.know_w_is_one)
+ (!BRW_IS_G4X(p->brw) && !c->key.know_w_is_one))
{
struct brw_reg header1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
GLuint i;
@@ -849,7 +900,6 @@ static void emit_vertex_write( struct brw_vs_compile *c)
brw_AND(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(0x7ff<<8));
}
-
for (i = 0; i < c->key.nr_userclip; i++) {
brw_set_conditionalmod(p, BRW_CONDITIONAL_L);
brw_DP4(p, brw_null_reg(), pos, c->userplane[i]);
@@ -857,7 +907,6 @@ static void emit_vertex_write( struct brw_vs_compile *c)
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
}
-
/* i965 clipping workaround:
* 1) Test for -ve rhw
* 2) If set,
@@ -867,7 +916,7 @@ static void emit_vertex_write( struct brw_vs_compile *c)
* Later, clipping will detect ucp[6] and ensure the primitive is
* clipped against all fixed planes.
*/
- if (!(BRW_IS_GM45(p->brw) || BRW_IS_G4X(p->brw)) && !c->key.know_w_is_one) {
+ if (!BRW_IS_G4X(p->brw) && !c->key.know_w_is_one) {
brw_CMP(p,
vec8(brw_null_reg()),
BRW_CONDITIONAL_L,
@@ -889,14 +938,12 @@ static void emit_vertex_write( struct brw_vs_compile *c)
brw_MOV(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD), brw_imm_ud(0));
}
-
/* Emit the (interleaved) headers for the two vertices - an 8-reg
* of zeros followed by two sets of NDC coordinates:
*/
brw_set_access_mode(p, BRW_ALIGN_1);
brw_MOV(p, offset(m0, 2), ndc);
brw_MOV(p, offset(m0, 3), pos);
-
brw_urb_WRITE(p,
brw_null_reg(), /* dest */
@@ -910,9 +957,9 @@ static void emit_vertex_write( struct brw_vs_compile *c)
1, /* writes complete */
0, /* urb destination offset */
BRW_URB_SWIZZLE_INTERLEAVE);
-
}
+
static void
post_vs_emit( struct brw_vs_compile *c, struct brw_instruction *end_inst )
{
@@ -959,7 +1006,7 @@ void brw_vs_emit(struct brw_vs_compile *c )
GLuint file;
if (INTEL_DEBUG & DEBUG_VS) {
- _mesa_printf("\n\n\nvs-emit:\n");
+ _mesa_printf("vs-emit:\n");
_mesa_print_program(&c->vp->program.Base);
_mesa_printf("\n");
}
@@ -1017,6 +1064,11 @@ void brw_vs_emit(struct brw_vs_compile *c )
else
dst = get_dst(c, inst->DstReg);
+ if (inst->SaturateMode != SATURATE_OFF) {
+ _mesa_problem(NULL, "Unsupported saturate %d in vertex shader",
+ inst->SaturateMode);
+ }
+
switch (inst->Opcode) {
case OPCODE_ABS:
brw_MOV(p, dst, brw_abs(args[0]));
@@ -1024,6 +1076,9 @@ void brw_vs_emit(struct brw_vs_compile *c )
case OPCODE_ADD:
brw_ADD(p, dst, args[0], args[1]);
break;
+ case OPCODE_COS:
+ emit_math1(c, BRW_MATH_FUNCTION_COS, dst, args[0], BRW_MATH_PRECISION_FULL);
+ break;
case OPCODE_DP3:
brw_DP3(p, dst, args[0], args[1]);
break;
@@ -1033,6 +1088,12 @@ void brw_vs_emit(struct brw_vs_compile *c )
case OPCODE_DPH:
brw_DPH(p, dst, args[0], args[1]);
break;
+ case OPCODE_NRM3:
+ emit_nrm(c, dst, args[0], 3);
+ break;
+ case OPCODE_NRM4:
+ emit_nrm(c, dst, args[0], 4);
+ break;
case OPCODE_DST:
unalias2(c, dst, args[0], args[1], emit_dst_noalias);
break;
@@ -1060,6 +1121,9 @@ void brw_vs_emit(struct brw_vs_compile *c )
case OPCODE_LIT:
unalias1(c, dst, args[0], emit_lit_noalias);
break;
+ case OPCODE_LRP:
+ unalias3(c, dst, args[0], args[1], args[2], emit_lrp_noalias);
+ break;
case OPCODE_MAD:
brw_MOV(p, brw_acc_reg(), args[2]);
brw_MAC(p, dst, args[0], args[1]);
@@ -1089,6 +1153,9 @@ void brw_vs_emit(struct brw_vs_compile *c )
case OPCODE_SEQ:
emit_seq(p, dst, args[0], args[1]);
break;
+ case OPCODE_SIN:
+ emit_math1(c, BRW_MATH_FUNCTION_SIN, dst, args[0], BRW_MATH_PRECISION_FULL);
+ break;
case OPCODE_SNE:
emit_sne(p, dst, args[0], args[1]);
break;
@@ -1097,7 +1164,7 @@ void brw_vs_emit(struct brw_vs_compile *c )
break;
case OPCODE_SGT:
emit_sgt(p, dst, args[0], args[1]);
- break;
+ break;
case OPCODE_SLT:
emit_slt(p, dst, args[0], args[1]);
break;
@@ -1113,6 +1180,10 @@ void brw_vs_emit(struct brw_vs_compile *c )
*/
emit_swz(c, dst, inst->SrcReg[0] );
break;
+ case OPCODE_TRUNC:
+ /* round toward zero */
+ brw_RNDZ(p, dst, args[0]);
+ break;
case OPCODE_XPD:
emit_xpd(p, dst, args[0], args[1]);
break;
@@ -1131,7 +1202,7 @@ void brw_vs_emit(struct brw_vs_compile *c )
brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
brw_set_predicate_control_flag_value(p, 0xff);
- break;
+ break;
case OPCODE_CAL:
brw_set_access_mode(p, BRW_ALIGN_1);
brw_ADD(p, deref_1d(stack_index, 0), brw_ip_reg(), brw_imm_d(3*16));
@@ -1140,7 +1211,7 @@ void brw_vs_emit(struct brw_vs_compile *c )
get_addr_reg(stack_index), brw_imm_d(4));
inst->Data = &p->store[p->nr_insn];
brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
- break;
+ break;
case OPCODE_RET:
brw_ADD(p, get_addr_reg(stack_index),
get_addr_reg(stack_index), brw_imm_d(-4));
@@ -1149,14 +1220,17 @@ void brw_vs_emit(struct brw_vs_compile *c )
brw_set_access_mode(p, BRW_ALIGN_16);
case OPCODE_END:
brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
- break;
+ break;
case OPCODE_PRINT:
case OPCODE_BGNSUB:
case OPCODE_ENDSUB:
+ /* no-op instructions */
break;
default:
- _mesa_printf("Unsupport opcode %d in vertex shader\n", inst->Opcode);
- break;
+ _mesa_problem(NULL, "Unsupported opcode %i (%s) in vertex shader",
+ inst->Opcode, inst->Opcode < MAX_OPCODE ?
+ _mesa_opcode_string(inst->Opcode) :
+ "unknown");
}
if ((inst->DstReg.File == PROGRAM_OUTPUT)
diff --git a/i965/brw_vs_state.c b/i965/brw_vs_state.c
index 2a64f3d..9425816 100644
--- a/i965/brw_vs_state.c
+++ b/i965/brw_vs_state.c
@@ -34,7 +34,7 @@
#include "brw_context.h"
#include "brw_state.h"
#include "brw_defines.h"
-#include "macros.h"
+#include "main/macros.h"
struct brw_vs_unit_key {
unsigned int total_grf;
@@ -77,12 +77,19 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key)
{
struct brw_vs_unit_state vs;
dri_bo *bo;
+ int chipset_max_threads;
memset(&vs, 0, sizeof(vs));
vs.thread0.kernel_start_pointer = brw->vs.prog_bo->offset >> 6; /* reloc */
vs.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1;
vs.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
+ /* Choosing multiple program flow means that we may get 2-vertex threads,
+ * which will have the channel mask for dwords 4-7 enabled in the thread,
+ * and those dwords will be written to the second URB handle when we
+ * brw_urb_WRITE() results.
+ */
+ vs.thread1.single_program_flow = 0;
vs.thread3.urb_entry_read_length = key->urb_entry_read_length;
vs.thread3.const_urb_entry_read_length = key->curb_entry_read_length;
vs.thread3.dispatch_grf_start_reg = 1;
@@ -91,8 +98,13 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key)
vs.thread4.nr_urb_entries = key->nr_urb_entries;
vs.thread4.urb_entry_allocation_size = key->urb_size - 1;
- vs.thread4.max_threads = MIN2(MAX2(0, (key->nr_urb_entries - 6) / 2 - 1),
- 15);
+
+ if (BRW_IS_G4X(brw))
+ chipset_max_threads = 32;
+ else
+ chipset_max_threads = 16;
+ vs.thread4.max_threads = CLAMP(key->nr_urb_entries / 2,
+ 1, chipset_max_threads) - 1;
if (INTEL_DEBUG & DEBUG_SINGLE_THREAD)
vs.thread4.max_threads = 0;
@@ -115,16 +127,16 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key)
NULL, NULL);
/* Emit VS program relocation */
- dri_emit_reloc(bo,
- DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
- vs.thread0.grf_reg_count << 1,
- offsetof(struct brw_vs_unit_state, thread0),
- brw->vs.prog_bo);
+ dri_bo_emit_reloc(bo,
+ I915_GEM_DOMAIN_INSTRUCTION, 0,
+ vs.thread0.grf_reg_count << 1,
+ offsetof(struct brw_vs_unit_state, thread0),
+ brw->vs.prog_bo);
return bo;
}
-static int prepare_vs_unit( struct brw_context *brw )
+static void prepare_vs_unit(struct brw_context *brw)
{
struct brw_vs_unit_key key;
@@ -138,7 +150,6 @@ static int prepare_vs_unit( struct brw_context *brw )
if (brw->vs.state_bo == NULL) {
brw->vs.state_bo = vs_unit_create_from_key(brw, &key);
}
- return dri_bufmgr_check_aperture_space(brw->vs.state_bo);
}
const struct brw_tracked_state brw_vs_unit = {
diff --git a/i965/brw_vs_tnl.c b/i965/brw_vs_tnl.c
deleted file mode 100644
index e409620..0000000
--- a/i965/brw_vs_tnl.c
+++ /dev/null
@@ -1,1709 +0,0 @@
-/*
- * Mesa 3-D graphics library
- * Version: 6.3
- *
- * Copyright (C) 2005 Tungsten Graphics All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * TUNGSTEN GRAPHICS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN
- * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-/**
- * \file t_vp_build.c
- * Create a vertex program to execute the current fixed function T&L pipeline.
- * \author Keith Whitwell
- */
-
-
-#include "glheader.h"
-#include "macros.h"
-#include "enums.h"
-#include "shader/prog_parameter.h"
-#include "shader/prog_print.h"
-#include "brw_vs.h"
-#include "brw_state.h"
-
-
-struct state_key {
- unsigned light_global_enabled:1;
- unsigned light_local_viewer:1;
- unsigned light_twoside:1;
- unsigned light_color_material:1;
- unsigned light_color_material_mask:12;
- unsigned light_material_mask:12;
- unsigned normalize:1;
- unsigned rescale_normals:1;
- unsigned fog_source_is_depth:1;
- unsigned tnl_do_vertex_fog:1;
- unsigned separate_specular:1;
- unsigned fog_option:2;
- unsigned point_attenuated:1;
- unsigned texture_enabled_global:1;
- unsigned fragprog_inputs_read:12;
-
- struct {
- unsigned light_enabled:1;
- unsigned light_eyepos3_is_zero:1;
- unsigned light_spotcutoff_is_180:1;
- unsigned light_attenuated:1;
- unsigned texunit_really_enabled:1;
- unsigned texmat_enabled:1;
- unsigned texgen_enabled:4;
- unsigned texgen_mode0:4;
- unsigned texgen_mode1:4;
- unsigned texgen_mode2:4;
- unsigned texgen_mode3:4;
- } unit[8];
-};
-
-
-
-#define FOG_NONE 0
-#define FOG_LINEAR 1
-#define FOG_EXP 2
-#define FOG_EXP2 3
-
-static GLuint translate_fog_mode( GLenum mode )
-{
- switch (mode) {
- case GL_LINEAR: return FOG_LINEAR;
- case GL_EXP: return FOG_EXP;
- case GL_EXP2: return FOG_EXP2;
- default: return FOG_NONE;
- }
-}
-
-#define TXG_NONE 0
-#define TXG_OBJ_LINEAR 1
-#define TXG_EYE_LINEAR 2
-#define TXG_SPHERE_MAP 3
-#define TXG_REFLECTION_MAP 4
-#define TXG_NORMAL_MAP 5
-
-static GLuint translate_texgen( GLboolean enabled, GLenum mode )
-{
- if (!enabled)
- return TXG_NONE;
-
- switch (mode) {
- case GL_OBJECT_LINEAR: return TXG_OBJ_LINEAR;
- case GL_EYE_LINEAR: return TXG_EYE_LINEAR;
- case GL_SPHERE_MAP: return TXG_SPHERE_MAP;
- case GL_REFLECTION_MAP_NV: return TXG_REFLECTION_MAP;
- case GL_NORMAL_MAP_NV: return TXG_NORMAL_MAP;
- default: return TXG_NONE;
- }
-}
-
-static void make_state_key( GLcontext *ctx, struct state_key *key )
-{
- struct brw_context *brw = brw_context(ctx);
- const struct gl_fragment_program *fp = brw->fragment_program;
- GLuint i;
-
- /* This now relies on texenvprogram.c being active:
- */
- assert(fp);
-
- memset(key, 0, sizeof(*key));
-
- /* BRW_NEW_FRAGMENT_PROGRAM */
- key->fragprog_inputs_read = fp->Base.InputsRead;
-
- /* _NEW_LIGHT */
- key->separate_specular = (brw->attribs.Light->Model.ColorControl ==
- GL_SEPARATE_SPECULAR_COLOR);
-
- /* _NEW_LIGHT */
- if (brw->attribs.Light->Enabled) {
- key->light_global_enabled = 1;
-
- if (brw->attribs.Light->Model.LocalViewer)
- key->light_local_viewer = 1;
-
- if (brw->attribs.Light->Model.TwoSide)
- key->light_twoside = 1;
-
- if (brw->attribs.Light->ColorMaterialEnabled) {
- key->light_color_material = 1;
- key->light_color_material_mask = brw->attribs.Light->ColorMaterialBitmask;
- }
-
- /* BRW_NEW_INPUT_VARYING */
-
- /* For these programs, material values are stuffed into the
- * generic slots:
- */
- for (i = 0 ; i < MAT_ATTRIB_MAX ; i++)
- if (brw->vb.info.varying & (1<<(VERT_ATTRIB_GENERIC0 + i)))
- key->light_material_mask |= 1<<i;
-
- for (i = 0; i < MAX_LIGHTS; i++) {
- struct gl_light *light = &brw->attribs.Light->Light[i];
-
- if (light->Enabled) {
- key->unit[i].light_enabled = 1;
-
- if (light->EyePosition[3] == 0.0)
- key->unit[i].light_eyepos3_is_zero = 1;
-
- if (light->SpotCutoff == 180.0)
- key->unit[i].light_spotcutoff_is_180 = 1;
-
- if (light->ConstantAttenuation != 1.0 ||
- light->LinearAttenuation != 0.0 ||
- light->QuadraticAttenuation != 0.0)
- key->unit[i].light_attenuated = 1;
- }
- }
- }
-
- /* _NEW_TRANSFORM */
- if (brw->attribs.Transform->Normalize)
- key->normalize = 1;
-
- if (brw->attribs.Transform->RescaleNormals)
- key->rescale_normals = 1;
-
- /* BRW_NEW_FRAGMENT_PROGRAM */
- key->fog_option = translate_fog_mode(fp->FogOption);
- if (key->fog_option)
- key->fragprog_inputs_read |= FRAG_BIT_FOGC;
-
- /* _NEW_FOG */
- if (brw->attribs.Fog->FogCoordinateSource == GL_FRAGMENT_DEPTH_EXT)
- key->fog_source_is_depth = 1;
-
- /* _NEW_HINT, ??? */
- if (1)
- key->tnl_do_vertex_fog = 1;
-
- /* _NEW_POINT */
- if (brw->attribs.Point->_Attenuated)
- key->point_attenuated = 1;
-
- /* _NEW_TEXTURE */
- if (brw->attribs.Texture->_TexGenEnabled ||
- brw->attribs.Texture->_TexMatEnabled ||
- brw->attribs.Texture->_EnabledUnits)
- key->texture_enabled_global = 1;
-
- for (i = 0; i < MAX_TEXTURE_UNITS; i++) {
- struct gl_texture_unit *texUnit = &brw->attribs.Texture->Unit[i];
-
- if (texUnit->_ReallyEnabled)
- key->unit[i].texunit_really_enabled = 1;
-
- if (brw->attribs.Texture->_TexMatEnabled & ENABLE_TEXMAT(i))
- key->unit[i].texmat_enabled = 1;
-
- if (texUnit->TexGenEnabled) {
- key->unit[i].texgen_enabled = 1;
-
- key->unit[i].texgen_mode0 =
- translate_texgen( texUnit->TexGenEnabled & (1<<0),
- texUnit->GenModeS );
- key->unit[i].texgen_mode1 =
- translate_texgen( texUnit->TexGenEnabled & (1<<1),
- texUnit->GenModeT );
- key->unit[i].texgen_mode2 =
- translate_texgen( texUnit->TexGenEnabled & (1<<2),
- texUnit->GenModeR );
- key->unit[i].texgen_mode3 =
- translate_texgen( texUnit->TexGenEnabled & (1<<3),
- texUnit->GenModeQ );
- }
- }
-}
-
-
-
-/* Very useful debugging tool - produces annotated listing of
- * generated program with line/function references for each
- * instruction back into this file:
- */
-#define DISASSEM 0
-
-/* Should be tunable by the driver - do we want to do matrix
- * multiplications with DP4's or with MUL/MAD's? SSE works better
- * with the latter, drivers may differ.
- */
-#define PREFER_DP4 1
-
-
-/* Use uregs to represent registers internally, translate to Mesa's
- * expected formats on emit.
- *
- * NOTE: These are passed by value extensively in this file rather
- * than as usual by pointer reference. If this disturbs you, try
- * remembering they are just 32bits in size.
- *
- * GCC is smart enough to deal with these dword-sized structures in
- * much the same way as if I had defined them as dwords and was using
- * macros to access and set the fields. This is much nicer and easier
- * to evolve.
- */
-struct ureg {
- GLuint file:4;
- GLint idx:8; /* relative addressing may be negative */
- GLuint negate:1;
- GLuint swz:12;
- GLuint pad:7;
-};
-
-
-struct tnl_program {
- const struct state_key *state;
- struct gl_vertex_program *program;
-
- GLuint nr_instructions;
- GLuint temp_in_use;
- GLuint temp_reserved;
-
- struct ureg eye_position;
- struct ureg eye_position_normalized;
- struct ureg eye_normal;
- struct ureg identity;
-
- GLuint materials;
- GLuint color_materials;
-};
-
-
-const static struct ureg undef = {
- PROGRAM_UNDEFINED,
- ~0,
- 0,
- 0,
- 0
-};
-
-/* Local shorthand:
- */
-#define X SWIZZLE_X
-#define Y SWIZZLE_Y
-#define Z SWIZZLE_Z
-#define W SWIZZLE_W
-
-
-/* Construct a ureg:
- */
-static struct ureg make_ureg(GLuint file, GLint idx)
-{
- struct ureg reg;
- reg.file = file;
- reg.idx = idx;
- reg.negate = 0;
- reg.swz = SWIZZLE_NOOP;
- reg.pad = 0;
- return reg;
-}
-
-
-
-static struct ureg ureg_negate( struct ureg reg )
-{
- reg.negate ^= 1;
- return reg;
-}
-
-
-static struct ureg swizzle( struct ureg reg, int x, int y, int z, int w )
-{
- reg.swz = MAKE_SWIZZLE4(GET_SWZ(reg.swz, x),
- GET_SWZ(reg.swz, y),
- GET_SWZ(reg.swz, z),
- GET_SWZ(reg.swz, w));
-
- return reg;
-}
-
-static struct ureg swizzle1( struct ureg reg, int x )
-{
- return swizzle(reg, x, x, x, x);
-}
-
-static struct ureg get_temp( struct tnl_program *p )
-{
- int bit = ffs( ~p->temp_in_use );
- if (!bit) {
- fprintf(stderr, "%s: out of temporaries\n", __FILE__);
- assert(0);
- }
-
- if (bit > p->program->Base.NumTemporaries)
- p->program->Base.NumTemporaries = bit;
-
- p->temp_in_use |= 1<<(bit-1);
- return make_ureg(PROGRAM_TEMPORARY, bit-1);
-}
-
-static struct ureg reserve_temp( struct tnl_program *p )
-{
- struct ureg temp = get_temp( p );
- p->temp_reserved |= 1<<temp.idx;
- return temp;
-}
-
-static void release_temp( struct tnl_program *p, struct ureg reg )
-{
- if (reg.file == PROGRAM_TEMPORARY) {
- p->temp_in_use &= ~(1<<reg.idx);
- p->temp_in_use |= p->temp_reserved; /* can't release reserved temps */
- }
-}
-
-static void release_temps( struct tnl_program *p )
-{
- p->temp_in_use = p->temp_reserved;
-}
-
-
-
-static struct ureg register_input( struct tnl_program *p, GLuint input )
-{
- assert(input < 32);
-
- p->program->Base.InputsRead |= (1<<input);
- return make_ureg(PROGRAM_INPUT, input);
-}
-
-static struct ureg register_output( struct tnl_program *p, GLuint output )
-{
- p->program->Base.OutputsWritten |= (1<<output);
- return make_ureg(PROGRAM_OUTPUT, output);
-}
-
-static struct ureg register_const4f( struct tnl_program *p,
- GLfloat s0,
- GLfloat s1,
- GLfloat s2,
- GLfloat s3)
-{
- GLfloat values[4];
- GLint idx;
- GLuint swizzle;
- values[0] = s0;
- values[1] = s1;
- values[2] = s2;
- values[3] = s3;
- idx = _mesa_add_unnamed_constant( p->program->Base.Parameters, values, 4,
- &swizzle);
- assert(swizzle == SWIZZLE_NOOP); /* Need to handle swizzle in reg setup */
- return make_ureg(PROGRAM_STATE_VAR, idx);
-}
-
-#define register_const1f(p, s0) register_const4f(p, s0, 0, 0, 1)
-#define register_scalar_const(p, s0) register_const4f(p, s0, s0, s0, s0)
-#define register_const2f(p, s0, s1) register_const4f(p, s0, s1, 0, 1)
-#define register_const3f(p, s0, s1, s2) register_const4f(p, s0, s1, s2, 1)
-
-static GLboolean is_undef( struct ureg reg )
-{
- return reg.file == PROGRAM_UNDEFINED;
-}
-
-static struct ureg get_identity_param( struct tnl_program *p )
-{
- if (is_undef(p->identity))
- p->identity = register_const4f(p, 0,0,0,1);
-
- return p->identity;
-}
-
-static struct ureg register_param5( struct tnl_program *p,
- GLint s0,
- GLint s1,
- GLint s2,
- GLint s3,
- GLint s4)
-{
- gl_state_index tokens[STATE_LENGTH];
- GLint idx;
- tokens[0] = s0;
- tokens[1] = s1;
- tokens[2] = s2;
- tokens[3] = s3;
- tokens[4] = s4;
- idx = _mesa_add_state_reference( p->program->Base.Parameters, tokens );
- return make_ureg(PROGRAM_STATE_VAR, idx);
-}
-
-
-#define register_param1(p,s0) register_param5(p,s0,0,0,0,0)
-#define register_param2(p,s0,s1) register_param5(p,s0,s1,0,0,0)
-#define register_param3(p,s0,s1,s2) register_param5(p,s0,s1,s2,0,0)
-#define register_param4(p,s0,s1,s2,s3) register_param5(p,s0,s1,s2,s3,0)
-
-
-static void register_matrix_param5( struct tnl_program *p,
- GLint s0, /* matrix name */
- GLint s1, /* texture matrix number */
- GLint s2, /* first row */
- GLint s3, /* last row */
- GLint s4, /* modifier */
- struct ureg *matrix )
-{
- GLint i;
-
- /* This is a bit sad as the support is there to pull the whole
- * matrix out in one go:
- */
- for (i = 0; i <= s3 - s2; i++)
- matrix[i] = register_param5( p, s0, s1, i, i, s4 );
-}
-
-
-static void emit_arg( struct prog_src_register *src,
- struct ureg reg )
-{
- src->File = reg.file;
- src->Index = reg.idx;
- src->Swizzle = reg.swz;
- src->RelAddr = 0;
- src->NegateBase = reg.negate;
- src->Abs = 0;
- src->NegateAbs = 0;
-}
-
-static void emit_dst( struct prog_dst_register *dst,
- struct ureg reg, GLuint mask )
-{
- dst->File = reg.file;
- dst->Index = reg.idx;
- /* allow zero as a shorthand for xyzw */
- dst->WriteMask = mask ? mask : WRITEMASK_XYZW;
- dst->CondMask = 0;
- dst->CondSwizzle = 0;
- dst->CondSrc = 0;
- dst->pad = 0;
-}
-
-static void debug_insn( struct prog_instruction *inst, const char *fn,
- GLuint line )
-{
- if (DISASSEM) {
- static const char *last_fn;
-
- if (fn != last_fn) {
- last_fn = fn;
- _mesa_printf("%s:\n", fn);
- }
-
- _mesa_printf("%d:\t", line);
- _mesa_print_instruction(inst);
- }
-}
-
-
-static void emit_op3fn(struct tnl_program *p,
- GLuint op,
- struct ureg dest,
- GLuint mask,
- struct ureg src0,
- struct ureg src1,
- struct ureg src2,
- const char *fn,
- GLuint line)
-{
- GLuint nr = p->program->Base.NumInstructions++;
-
- if (nr >= p->nr_instructions) {
- int new_nr_instructions = p->nr_instructions * 2;
-
- p->program->Base.Instructions =
- _mesa_realloc(p->program->Base.Instructions,
- sizeof(struct prog_instruction) * p->nr_instructions,
- sizeof(struct prog_instruction) * new_nr_instructions);
- p->nr_instructions = new_nr_instructions;
- }
-
- {
- struct prog_instruction *inst = &p->program->Base.Instructions[nr];
- memset(inst, 0, sizeof(*inst));
- inst->Opcode = op;
- inst->StringPos = 0;
- inst->Data = 0;
-
- emit_arg( &inst->SrcReg[0], src0 );
- emit_arg( &inst->SrcReg[1], src1 );
- emit_arg( &inst->SrcReg[2], src2 );
-
- emit_dst( &inst->DstReg, dest, mask );
-
- debug_insn(inst, fn, line);
- }
-}
-
-
-
-#define emit_op3(p, op, dst, mask, src0, src1, src2) \
- emit_op3fn(p, op, dst, mask, src0, src1, src2, __FUNCTION__, __LINE__)
-
-#define emit_op2(p, op, dst, mask, src0, src1) \
- emit_op3fn(p, op, dst, mask, src0, src1, undef, __FUNCTION__, __LINE__)
-
-#define emit_op1(p, op, dst, mask, src0) \
- emit_op3fn(p, op, dst, mask, src0, undef, undef, __FUNCTION__, __LINE__)
-
-
-static struct ureg make_temp( struct tnl_program *p, struct ureg reg )
-{
- if (reg.file == PROGRAM_TEMPORARY &&
- !(p->temp_reserved & (1<<reg.idx)))
- return reg;
- else {
- struct ureg temp = get_temp(p);
- emit_op1(p, OPCODE_MOV, temp, 0, reg);
- return temp;
- }
-}
-
-
-/* Currently no tracking performed of input/output/register size or
- * active elements. Could be used to reduce these operations, as
- * could the matrix type.
- */
-static void emit_matrix_transform_vec4( struct tnl_program *p,
- struct ureg dest,
- const struct ureg *mat,
- struct ureg src)
-{
- emit_op2(p, OPCODE_DP4, dest, WRITEMASK_X, src, mat[0]);
- emit_op2(p, OPCODE_DP4, dest, WRITEMASK_Y, src, mat[1]);
- emit_op2(p, OPCODE_DP4, dest, WRITEMASK_Z, src, mat[2]);
- emit_op2(p, OPCODE_DP4, dest, WRITEMASK_W, src, mat[3]);
-}
-
-/* This version is much easier to implement if writemasks are not
- * supported natively on the target or (like SSE), the target doesn't
- * have a clean/obvious dotproduct implementation.
- */
-static void emit_transpose_matrix_transform_vec4( struct tnl_program *p,
- struct ureg dest,
- const struct ureg *mat,
- struct ureg src)
-{
- struct ureg tmp;
-
- if (dest.file != PROGRAM_TEMPORARY)
- tmp = get_temp(p);
- else
- tmp = dest;
-
- emit_op2(p, OPCODE_MUL, tmp, 0, swizzle1(src,X), mat[0]);
- emit_op3(p, OPCODE_MAD, tmp, 0, swizzle1(src,Y), mat[1], tmp);
- emit_op3(p, OPCODE_MAD, tmp, 0, swizzle1(src,Z), mat[2], tmp);
- emit_op3(p, OPCODE_MAD, dest, 0, swizzle1(src,W), mat[3], tmp);
-
- if (dest.file != PROGRAM_TEMPORARY)
- release_temp(p, tmp);
-}
-
-static void emit_matrix_transform_vec3( struct tnl_program *p,
- struct ureg dest,
- const struct ureg *mat,
- struct ureg src)
-{
- emit_op2(p, OPCODE_DP3, dest, WRITEMASK_X, src, mat[0]);
- emit_op2(p, OPCODE_DP3, dest, WRITEMASK_Y, src, mat[1]);
- emit_op2(p, OPCODE_DP3, dest, WRITEMASK_Z, src, mat[2]);
-}
-
-
-static void emit_normalize_vec3( struct tnl_program *p,
- struct ureg dest,
- struct ureg src )
-{
- emit_op2(p, OPCODE_DP3, dest, WRITEMASK_W, src, src);
- emit_op1(p, OPCODE_RSQ, dest, WRITEMASK_W, swizzle1(dest,W));
- emit_op2(p, OPCODE_MUL, dest, WRITEMASK_XYZ, src, swizzle1(dest,W));
-}
-
-static void emit_passthrough( struct tnl_program *p,
- GLuint input,
- GLuint output )
-{
- struct ureg out = register_output(p, output);
- emit_op1(p, OPCODE_MOV, out, 0, register_input(p, input));
-}
-
-static struct ureg get_eye_position( struct tnl_program *p )
-{
- if (is_undef(p->eye_position)) {
- struct ureg pos = register_input( p, VERT_ATTRIB_POS );
- struct ureg modelview[4];
-
- p->eye_position = reserve_temp(p);
-
- if (PREFER_DP4) {
- register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 3,
- 0, modelview );
-
- emit_matrix_transform_vec4(p, p->eye_position, modelview, pos);
- }
- else {
- register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 3,
- STATE_MATRIX_TRANSPOSE, modelview );
-
- emit_transpose_matrix_transform_vec4(p, p->eye_position, modelview, pos);
- }
- }
-
- return p->eye_position;
-}
-
-
-#if 0
-static struct ureg get_eye_z( struct tnl_program *p )
-{
- if (!is_undef(p->eye_position)) {
- return swizzle1(p->eye_position, Z);
- }
- else if (!is_undef(p->eye_z)) {
- struct ureg pos = register_input( p, BRW_ATTRIB_POS );
- struct ureg modelview2;
-
- p->eye_z = reserve_temp(p);
-
- register_matrix_param6( p, STATE_MATRIX, STATE_MODELVIEW, 0, 2, 1,
- STATE_MATRIX, &modelview2 );
-
- emit_matrix_transform_vec4(p, p->eye_position, modelview, pos);
- emit_op2(p, OPCODE_DP4, p->eye_z, WRITEMASK_Z, pos, modelview2);
- }
-
- return swizzle1(p->eye_z, Z)
-}
-#endif
-
-
-
-static struct ureg get_eye_position_normalized( struct tnl_program *p )
-{
- if (is_undef(p->eye_position_normalized)) {
- struct ureg eye = get_eye_position(p);
- p->eye_position_normalized = reserve_temp(p);
- emit_normalize_vec3(p, p->eye_position_normalized, eye);
- }
-
- return p->eye_position_normalized;
-}
-
-
-static struct ureg get_eye_normal( struct tnl_program *p )
-{
- if (is_undef(p->eye_normal)) {
- struct ureg normal = register_input(p, VERT_ATTRIB_NORMAL );
- struct ureg mvinv[3];
-
- register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 2,
- STATE_MATRIX_INVTRANS, mvinv );
-
- p->eye_normal = reserve_temp(p);
-
- /* Transform to eye space:
- */
- emit_matrix_transform_vec3( p, p->eye_normal, mvinv, normal );
-
- /* Normalize/Rescale:
- */
- if (p->state->normalize) {
- emit_normalize_vec3( p, p->eye_normal, p->eye_normal );
- }
- else if (p->state->rescale_normals) {
- struct ureg rescale = register_param2(p, STATE_INTERNAL,
- STATE_NORMAL_SCALE);
-
- emit_op2( p, OPCODE_MUL, p->eye_normal, 0, p->eye_normal,
- swizzle1(rescale, X));
- }
- }
-
- return p->eye_normal;
-}
-
-
-
-static void build_hpos( struct tnl_program *p )
-{
- struct ureg pos = register_input( p, VERT_ATTRIB_POS );
- struct ureg hpos = register_output( p, VERT_RESULT_HPOS );
- struct ureg mvp[4];
-
- if (PREFER_DP4) {
- register_matrix_param5( p, STATE_MVP_MATRIX, 0, 0, 3,
- 0, mvp );
- emit_matrix_transform_vec4( p, hpos, mvp, pos );
- }
- else {
- register_matrix_param5( p, STATE_MVP_MATRIX, 0, 0, 3,
- STATE_MATRIX_TRANSPOSE, mvp );
- emit_transpose_matrix_transform_vec4( p, hpos, mvp, pos );
- }
-}
-
-
-static GLuint material_attrib( GLuint side, GLuint property )
-{
- return (property - STATE_AMBIENT) * 2 + side;
-}
-
-/* Get a bitmask of which material values vary on a per-vertex basis.
- */
-static void set_material_flags( struct tnl_program *p )
-{
- p->color_materials = 0;
- p->materials = 0;
-
- if (p->state->light_color_material) {
- p->materials =
- p->color_materials = p->state->light_color_material_mask;
- }
-
- p->materials |= p->state->light_material_mask;
-}
-
-
-static struct ureg get_material( struct tnl_program *p, GLuint side,
- GLuint property )
-{
- GLuint attrib = material_attrib(side, property);
-
- if (p->color_materials & (1<<attrib))
- return register_input(p, VERT_ATTRIB_COLOR0);
- else if (p->materials & (1<<attrib))
- return register_input( p, attrib + _TNL_ATTRIB_MAT_FRONT_AMBIENT );
- else
- return register_param3( p, STATE_MATERIAL, side, property );
-}
-
-#define SCENE_COLOR_BITS(side) ((MAT_BIT_FRONT_EMISSION | \
- MAT_BIT_FRONT_AMBIENT | \
- MAT_BIT_FRONT_DIFFUSE) << (side))
-
-/* Either return a precalculated constant value or emit code to
- * calculate these values dynamically in the case where material calls
- * are present between begin/end pairs.
- *
- * Probably want to shift this to the program compilation phase - if
- * we always emitted the calculation here, a smart compiler could
- * detect that it was constant (given a certain set of inputs), and
- * lift it out of the main loop. That way the programs created here
- * would be independent of the vertex_buffer details.
- */
-static struct ureg get_scenecolor( struct tnl_program *p, GLuint side )
-{
- if (p->materials & SCENE_COLOR_BITS(side)) {
- struct ureg lm_ambient = register_param1(p, STATE_LIGHTMODEL_AMBIENT);
- struct ureg material_emission = get_material(p, side, STATE_EMISSION);
- struct ureg material_ambient = get_material(p, side, STATE_AMBIENT);
- struct ureg material_diffuse = get_material(p, side, STATE_DIFFUSE);
- struct ureg tmp = make_temp(p, material_diffuse);
- emit_op3(p, OPCODE_MAD, tmp, WRITEMASK_XYZ, lm_ambient,
- material_ambient, material_emission);
- return tmp;
- }
- else
- return register_param2( p, STATE_LIGHTMODEL_SCENECOLOR, side );
-}
-
-
-static struct ureg get_lightprod( struct tnl_program *p, GLuint light,
- GLuint side, GLuint property )
-{
- GLuint attrib = material_attrib(side, property);
- if (p->materials & (1<<attrib)) {
- struct ureg light_value =
- register_param3(p, STATE_LIGHT, light, property);
- struct ureg material_value = get_material(p, side, property);
- struct ureg tmp = get_temp(p);
- emit_op2(p, OPCODE_MUL, tmp, 0, light_value, material_value);
- return tmp;
- }
- else
- return register_param4(p, STATE_LIGHTPROD, light, side, property);
-}
-
-static struct ureg calculate_light_attenuation( struct tnl_program *p,
- GLuint i,
- struct ureg VPpli,
- struct ureg dist )
-{
- struct ureg attenuation = register_param3(p, STATE_LIGHT, i,
- STATE_ATTENUATION);
- struct ureg att = get_temp(p);
-
- /* Calculate spot attenuation:
- */
- if (!p->state->unit[i].light_spotcutoff_is_180) {
- struct ureg spot_dir_norm = register_param3(p, STATE_INTERNAL,
- STATE_SPOT_DIR_NORMALIZED, i);
- struct ureg spot = get_temp(p);
- struct ureg slt = get_temp(p);
-
- emit_op2(p, OPCODE_DP3, spot, 0, ureg_negate(VPpli), spot_dir_norm);
- emit_op2(p, OPCODE_SLT, slt, 0, swizzle1(spot_dir_norm,W), spot);
- emit_op2(p, OPCODE_POW, spot, 0, spot, swizzle1(attenuation, W));
- emit_op2(p, OPCODE_MUL, att, 0, slt, spot);
-
- release_temp(p, spot);
- release_temp(p, slt);
- }
-
- /* Calculate distance attenuation:
- */
- if (p->state->unit[i].light_attenuated) {
-
- /* 1/d,d,d,1/d */
- emit_op1(p, OPCODE_RCP, dist, WRITEMASK_YZ, dist);
- /* 1,d,d*d,1/d */
- emit_op2(p, OPCODE_MUL, dist, WRITEMASK_XZ, dist, swizzle1(dist,Y));
- /* 1/dist-atten */
- emit_op2(p, OPCODE_DP3, dist, 0, attenuation, dist);
-
- if (!p->state->unit[i].light_spotcutoff_is_180) {
- /* dist-atten */
- emit_op1(p, OPCODE_RCP, dist, 0, dist);
- /* spot-atten * dist-atten */
- emit_op2(p, OPCODE_MUL, att, 0, dist, att);
- } else {
- /* dist-atten */
- emit_op1(p, OPCODE_RCP, att, 0, dist);
- }
- }
-
- return att;
-}
-
-
-
-
-
-/* Need to add some addtional parameters to allow lighting in object
- * space - STATE_SPOT_DIRECTION and STATE_HALF_VECTOR implicitly assume eye
- * space lighting.
- */
-static void build_lighting( struct tnl_program *p )
-{
- const GLboolean twoside = p->state->light_twoside;
- const GLboolean separate = p->state->separate_specular;
- GLuint nr_lights = 0, count = 0;
- struct ureg normal = get_eye_normal(p);
- struct ureg lit = get_temp(p);
- struct ureg dots = get_temp(p);
- struct ureg _col0 = undef, _col1 = undef;
- struct ureg _bfc0 = undef, _bfc1 = undef;
- GLuint i;
-
- for (i = 0; i < MAX_LIGHTS; i++)
- if (p->state->unit[i].light_enabled)
- nr_lights++;
-
- set_material_flags(p);
-
- {
- struct ureg shininess = get_material(p, 0, STATE_SHININESS);
- emit_op1(p, OPCODE_MOV, dots, WRITEMASK_W, swizzle1(shininess,X));
- release_temp(p, shininess);
-
- _col0 = make_temp(p, get_scenecolor(p, 0));
- if (separate)
- _col1 = make_temp(p, get_identity_param(p));
- else
- _col1 = _col0;
-
- }
-
- if (twoside) {
- struct ureg shininess = get_material(p, 1, STATE_SHININESS);
- emit_op1(p, OPCODE_MOV, dots, WRITEMASK_Z,
- ureg_negate(swizzle1(shininess,X)));
- release_temp(p, shininess);
-
- _bfc0 = make_temp(p, get_scenecolor(p, 1));
- if (separate)
- _bfc1 = make_temp(p, get_identity_param(p));
- else
- _bfc1 = _bfc0;
- }
-
-
- /* If no lights, still need to emit the scenecolor.
- */
- /* KW: changed to do this always - v1.17 "Fix lighting alpha result"?
- */
- if (p->state->fragprog_inputs_read & FRAG_BIT_COL0)
- {
- struct ureg res0 = register_output( p, VERT_RESULT_COL0 );
- emit_op1(p, OPCODE_MOV, res0, 0, _col0);
-
- if (twoside) {
- struct ureg res0 = register_output( p, VERT_RESULT_BFC0 );
- emit_op1(p, OPCODE_MOV, res0, 0, _bfc0);
- }
- }
-
- if (separate && (p->state->fragprog_inputs_read & FRAG_BIT_COL1)) {
-
- struct ureg res1 = register_output( p, VERT_RESULT_COL1 );
- emit_op1(p, OPCODE_MOV, res1, 0, _col1);
-
- if (twoside) {
- struct ureg res1 = register_output( p, VERT_RESULT_BFC1 );
- emit_op1(p, OPCODE_MOV, res1, 0, _bfc1);
- }
- }
-
- if (nr_lights == 0) {
- release_temps(p);
- return;
- }
-
-
- for (i = 0; i < MAX_LIGHTS; i++) {
- if (p->state->unit[i].light_enabled) {
- struct ureg half = undef;
- struct ureg att = undef, VPpli = undef;
-
- count++;
-
- if (p->state->unit[i].light_eyepos3_is_zero) {
- /* Can used precomputed constants in this case.
- * Attenuation never applies to infinite lights.
- */
- VPpli = register_param3(p, STATE_LIGHT, i,
- STATE_POSITION_NORMALIZED);
- if (p->state->light_local_viewer) {
- struct ureg eye_hat = get_eye_position_normalized(p);
- half = get_temp(p);
- emit_op2(p, OPCODE_SUB, half, 0, VPpli, eye_hat);
- emit_normalize_vec3(p, half, half);
- } else {
- half = register_param3(p, STATE_LIGHT, i, STATE_HALF_VECTOR);
- }
- }
- else {
- struct ureg Ppli = register_param3(p, STATE_LIGHT, i,
- STATE_POSITION);
- struct ureg V = get_eye_position(p);
- struct ureg dist = get_temp(p);
- struct ureg tmpPpli = get_temp(p);
-
- VPpli = get_temp(p);
- half = get_temp(p);
-
- /* In homogeneous object coordinates
- */
- emit_op1(p, OPCODE_RCP, dist, 0, swizzle1(Ppli, W));
- emit_op2(p, OPCODE_MUL, tmpPpli, 0, Ppli, dist);
-
- /* Calulate VPpli vector
- */
- emit_op2(p, OPCODE_SUB, VPpli, 0, tmpPpli, V);
-
- /* Normalize VPpli. The dist value also used in
- * attenuation below.
- */
- emit_op2(p, OPCODE_DP3, dist, 0, VPpli, VPpli);
- emit_op1(p, OPCODE_RSQ, dist, 0, dist);
- emit_op2(p, OPCODE_MUL, VPpli, 0, VPpli, dist);
-
-
- /* Calculate attenuation:
- */
- if (!p->state->unit[i].light_spotcutoff_is_180 ||
- p->state->unit[i].light_attenuated) {
- att = calculate_light_attenuation(p, i, VPpli, dist);
- }
-
-
- /* Calculate viewer direction, or use infinite viewer:
- */
- if (p->state->light_local_viewer) {
- struct ureg eye_hat = get_eye_position_normalized(p);
- emit_op2(p, OPCODE_SUB, half, 0, VPpli, eye_hat);
- }
- else {
- struct ureg z_dir = swizzle(get_identity_param(p),X,Y,W,Z);
- emit_op2(p, OPCODE_ADD, half, 0, VPpli, z_dir);
- }
-
- emit_normalize_vec3(p, half, half);
-
- release_temp(p, dist);
- release_temp(p, tmpPpli);
- }
-
- /* Calculate dot products:
- */
- emit_op2(p, OPCODE_DP3, dots, WRITEMASK_X, normal, VPpli);
- emit_op2(p, OPCODE_DP3, dots, WRITEMASK_Y, normal, half);
-
-
- /* Front face lighting:
- */
- {
- struct ureg ambient = get_lightprod(p, i, 0, STATE_AMBIENT);
- struct ureg diffuse = get_lightprod(p, i, 0, STATE_DIFFUSE);
- struct ureg specular = get_lightprod(p, i, 0, STATE_SPECULAR);
- struct ureg res0, res1;
- GLuint mask0, mask1;
-
- emit_op1(p, OPCODE_LIT, lit, 0, dots);
-
- if (!is_undef(att))
- emit_op2(p, OPCODE_MUL, lit, 0, lit, att);
-
-
- mask0 = 0;
- mask1 = 0;
- res0 = _col0;
- res1 = _col1;
-
- if (count == nr_lights) {
- if (separate) {
- mask0 = WRITEMASK_XYZ;
- mask1 = WRITEMASK_XYZ;
-
- if (p->state->fragprog_inputs_read & FRAG_BIT_COL0)
- res0 = register_output( p, VERT_RESULT_COL0 );
-
- if (p->state->fragprog_inputs_read & FRAG_BIT_COL1)
- res1 = register_output( p, VERT_RESULT_COL1 );
- }
- else {
- mask1 = WRITEMASK_XYZ;
-
- if (p->state->fragprog_inputs_read & FRAG_BIT_COL0)
- res1 = register_output( p, VERT_RESULT_COL0 );
- }
- }
-
- emit_op3(p, OPCODE_MAD, _col0, 0, swizzle1(lit,X), ambient, _col0);
- emit_op3(p, OPCODE_MAD, res0, mask0, swizzle1(lit,Y), diffuse, _col0);
- emit_op3(p, OPCODE_MAD, res1, mask1, swizzle1(lit,Z), specular, _col1);
-
- release_temp(p, ambient);
- release_temp(p, diffuse);
- release_temp(p, specular);
- }
-
- /* Back face lighting:
- */
- if (twoside) {
- struct ureg ambient = get_lightprod(p, i, 1, STATE_AMBIENT);
- struct ureg diffuse = get_lightprod(p, i, 1, STATE_DIFFUSE);
- struct ureg specular = get_lightprod(p, i, 1, STATE_SPECULAR);
- struct ureg res0, res1;
- GLuint mask0, mask1;
-
- emit_op1(p, OPCODE_LIT, lit, 0, ureg_negate(swizzle(dots,X,Y,W,Z)));
-
- if (!is_undef(att))
- emit_op2(p, OPCODE_MUL, lit, 0, lit, att);
-
- mask0 = 0;
- mask1 = 0;
- res0 = _bfc0;
- res1 = _bfc1;
-
- if (count == nr_lights) {
- if (separate) {
- mask0 = WRITEMASK_XYZ;
- mask1 = WRITEMASK_XYZ;
- if (p->state->fragprog_inputs_read & FRAG_BIT_COL0)
- res0 = register_output( p, VERT_RESULT_BFC0 );
-
- if (p->state->fragprog_inputs_read & FRAG_BIT_COL1)
- res1 = register_output( p, VERT_RESULT_BFC1 );
- }
- else {
- mask1 = WRITEMASK_XYZ;
-
- if (p->state->fragprog_inputs_read & FRAG_BIT_COL0)
- res1 = register_output( p, VERT_RESULT_BFC0 );
- }
- }
-
- emit_op3(p, OPCODE_MAD, _bfc0, 0, swizzle1(lit,X), ambient, _bfc0);
- emit_op3(p, OPCODE_MAD, res0, mask0, swizzle1(lit,Y), diffuse, _bfc0);
- emit_op3(p, OPCODE_MAD, res1, mask1, swizzle1(lit,Z), specular, _bfc1);
-
- release_temp(p, ambient);
- release_temp(p, diffuse);
- release_temp(p, specular);
- }
-
- release_temp(p, half);
- release_temp(p, VPpli);
- release_temp(p, att);
- }
- }
-
- release_temps( p );
-}
-
-
-static void build_fog( struct tnl_program *p )
-{
- struct ureg fog = register_output(p, VERT_RESULT_FOGC);
- struct ureg input;
- GLuint useabs = p->state->fog_source_is_depth && p->state->fog_option &&
- (p->state->fog_option != FOG_EXP2);
-
- if (p->state->fog_source_is_depth) {
- input = swizzle1(get_eye_position(p), Z);
- }
- else {
- input = swizzle1(register_input(p, VERT_ATTRIB_FOG), X);
- if (p->state->fog_option &&
- p->state->tnl_do_vertex_fog)
- input = swizzle1(register_input(p, VERT_ATTRIB_FOG), X);
- else
- input = register_input(p, VERT_ATTRIB_FOG);
- }
-
- if (p->state->fog_option &&
- p->state->tnl_do_vertex_fog) {
- struct ureg params = register_param2(p, STATE_INTERNAL,
- STATE_FOG_PARAMS_OPTIMIZED);
- struct ureg tmp = get_temp(p);
- struct ureg id = get_identity_param(p);
-
- emit_op1(p, OPCODE_MOV, fog, 0, id);
-
- if (useabs) {
- emit_op1(p, OPCODE_ABS, tmp, 0, input);
- }
-
- switch (p->state->fog_option) {
- case FOG_LINEAR: {
- emit_op3(p, OPCODE_MAD, tmp, 0, useabs ? tmp : input,
- swizzle1(params,X), swizzle1(params,Y));
- emit_op2(p, OPCODE_MAX, tmp, 0, tmp, swizzle1(id,X)); /* saturate */
- emit_op2(p, OPCODE_MIN, fog, WRITEMASK_X, tmp, swizzle1(id,W));
- break;
- }
- case FOG_EXP:
- emit_op2(p, OPCODE_MUL, tmp, 0, useabs ? tmp : input,
- swizzle1(params,Z));
- emit_op1(p, OPCODE_EX2, fog, WRITEMASK_X, ureg_negate(tmp));
- break;
- case FOG_EXP2:
- emit_op2(p, OPCODE_MUL, tmp, 0, input, swizzle1(params,W));
- emit_op2(p, OPCODE_MUL, tmp, 0, tmp, tmp);
- emit_op1(p, OPCODE_EX2, fog, WRITEMASK_X, ureg_negate(tmp));
- break;
- }
-
- release_temp(p, tmp);
- }
- else {
- /* results = incoming fog coords (compute fog per-fragment later)
- *
- * KW: Is it really necessary to do anything in this case?
- */
- emit_op1(p, useabs ? OPCODE_ABS : OPCODE_MOV, fog, 0, input);
- }
-}
-
-static void build_reflect_texgen( struct tnl_program *p,
- struct ureg dest,
- GLuint writemask )
-{
- struct ureg normal = get_eye_normal(p);
- struct ureg eye_hat = get_eye_position_normalized(p);
- struct ureg tmp = get_temp(p);
-
- /* n.u */
- emit_op2(p, OPCODE_DP3, tmp, 0, normal, eye_hat);
- /* 2n.u */
- emit_op2(p, OPCODE_ADD, tmp, 0, tmp, tmp);
- /* (-2n.u)n + u */
- emit_op3(p, OPCODE_MAD, dest, writemask, ureg_negate(tmp), normal, eye_hat);
-
- release_temp(p, tmp);
-}
-
-static void build_sphere_texgen( struct tnl_program *p,
- struct ureg dest,
- GLuint writemask )
-{
- struct ureg normal = get_eye_normal(p);
- struct ureg eye_hat = get_eye_position_normalized(p);
- struct ureg tmp = get_temp(p);
- struct ureg half = register_scalar_const(p, .5);
- struct ureg r = get_temp(p);
- struct ureg inv_m = get_temp(p);
- struct ureg id = get_identity_param(p);
-
- /* Could share the above calculations, but it would be
- * a fairly odd state for someone to set (both sphere and
- * reflection active for different texture coordinate
- * components. Of course - if two texture units enable
- * reflect and/or sphere, things start to tilt in favour
- * of seperating this out:
- */
-
- /* n.u */
- emit_op2(p, OPCODE_DP3, tmp, 0, normal, eye_hat);
- /* 2n.u */
- emit_op2(p, OPCODE_ADD, tmp, 0, tmp, tmp);
- /* (-2n.u)n + u */
- emit_op3(p, OPCODE_MAD, r, 0, ureg_negate(tmp), normal, eye_hat);
- /* r + 0,0,1 */
- emit_op2(p, OPCODE_ADD, tmp, 0, r, swizzle(id,X,Y,W,Z));
- /* rx^2 + ry^2 + (rz+1)^2 */
- emit_op2(p, OPCODE_DP3, tmp, 0, tmp, tmp);
- /* 2/m */
- emit_op1(p, OPCODE_RSQ, tmp, 0, tmp);
- /* 1/m */
- emit_op2(p, OPCODE_MUL, inv_m, 0, tmp, half);
- /* r/m + 1/2 */
- emit_op3(p, OPCODE_MAD, dest, writemask, r, inv_m, half);
-
- release_temp(p, tmp);
- release_temp(p, r);
- release_temp(p, inv_m);
-}
-
-
-static void build_texture_transform( struct tnl_program *p )
-{
- GLuint i, j;
-
- for (i = 0; i < MAX_TEXTURE_UNITS; i++) {
-
- if (!(p->state->fragprog_inputs_read & (FRAG_BIT_TEX0<<i)))
- continue;
-
- if (p->state->unit[i].texgen_enabled ||
- p->state->unit[i].texmat_enabled) {
-
- GLuint texmat_enabled = p->state->unit[i].texmat_enabled;
- struct ureg out = register_output(p, VERT_RESULT_TEX0 + i);
- struct ureg out_texgen = undef;
-
- if (p->state->unit[i].texgen_enabled) {
- GLuint copy_mask = 0;
- GLuint sphere_mask = 0;
- GLuint reflect_mask = 0;
- GLuint normal_mask = 0;
- GLuint modes[4];
-
- if (texmat_enabled)
- out_texgen = get_temp(p);
- else
- out_texgen = out;
-
- modes[0] = p->state->unit[i].texgen_mode0;
- modes[1] = p->state->unit[i].texgen_mode1;
- modes[2] = p->state->unit[i].texgen_mode2;
- modes[3] = p->state->unit[i].texgen_mode3;
-
- for (j = 0; j < 4; j++) {
- switch (modes[j]) {
- case TXG_OBJ_LINEAR: {
- struct ureg obj = register_input(p, VERT_ATTRIB_POS);
- struct ureg plane =
- register_param3(p, STATE_TEXGEN, i,
- STATE_TEXGEN_OBJECT_S + j);
-
- emit_op2(p, OPCODE_DP4, out_texgen, WRITEMASK_X << j,
- obj, plane );
- break;
- }
- case TXG_EYE_LINEAR: {
- struct ureg eye = get_eye_position(p);
- struct ureg plane =
- register_param3(p, STATE_TEXGEN, i,
- STATE_TEXGEN_EYE_S + j);
-
- emit_op2(p, OPCODE_DP4, out_texgen, WRITEMASK_X << j,
- eye, plane );
- break;
- }
- case TXG_SPHERE_MAP:
- sphere_mask |= WRITEMASK_X << j;
- break;
- case TXG_REFLECTION_MAP:
- reflect_mask |= WRITEMASK_X << j;
- break;
- case TXG_NORMAL_MAP:
- normal_mask |= WRITEMASK_X << j;
- break;
- case TXG_NONE:
- copy_mask |= WRITEMASK_X << j;
- }
-
- }
-
-
- if (sphere_mask) {
- build_sphere_texgen(p, out_texgen, sphere_mask);
- }
-
- if (reflect_mask) {
- build_reflect_texgen(p, out_texgen, reflect_mask);
- }
-
- if (normal_mask) {
- struct ureg normal = get_eye_normal(p);
- emit_op1(p, OPCODE_MOV, out_texgen, normal_mask, normal );
- }
-
- if (copy_mask) {
- struct ureg in = register_input(p, VERT_ATTRIB_TEX0+i);
- emit_op1(p, OPCODE_MOV, out_texgen, copy_mask, in );
- }
- }
-
- if (texmat_enabled) {
- struct ureg texmat[4];
- struct ureg in = (!is_undef(out_texgen) ?
- out_texgen :
- register_input(p, VERT_ATTRIB_TEX0+i));
- if (PREFER_DP4) {
- register_matrix_param5( p, STATE_TEXTURE_MATRIX, i, 0, 3,
- 0, texmat );
- emit_matrix_transform_vec4( p, out, texmat, in );
- }
- else {
- register_matrix_param5( p, STATE_TEXTURE_MATRIX, i, 0, 3,
- STATE_MATRIX_TRANSPOSE, texmat );
- emit_transpose_matrix_transform_vec4( p, out, texmat, in );
- }
- }
-
- release_temps(p);
- }
- else {
- emit_passthrough(p, VERT_ATTRIB_TEX0+i, VERT_RESULT_TEX0+i);
- }
- }
-}
-
-
-/* Seems like it could be tighter:
- */
-static void build_pointsize( struct tnl_program *p )
-{
- struct ureg eye = get_eye_position(p);
- struct ureg state_size = register_param1(p, STATE_POINT_SIZE);
- struct ureg state_attenuation = register_param1(p, STATE_POINT_ATTENUATION);
- struct ureg out = register_output(p, VERT_RESULT_PSIZ);
- struct ureg ut = get_temp(p);
-
- /* 1, Z, Z * Z, 1 */
- emit_op1(p, OPCODE_MOV, ut, WRITEMASK_XW, swizzle1(get_identity_param(p), W));
- emit_op1(p, OPCODE_ABS, ut, WRITEMASK_YZ, swizzle1(eye, Z));
- emit_op2(p, OPCODE_MUL, ut, WRITEMASK_Z, ut, ut);
-
-
- /* p1 + p2 * dist + p3 * dist * dist, 0 */
- emit_op2(p, OPCODE_DP3, ut, WRITEMASK_X, ut, state_attenuation);
-
- /* 1 / sqrt(factor) */
- emit_op1(p, OPCODE_RSQ, ut, WRITEMASK_X, ut );
-
- /* ut = pointSize / factor */
- emit_op2(p, OPCODE_MUL, ut, WRITEMASK_X, ut, state_size);
-
- /* Clamp to min/max - state_size.[yz]
- */
- emit_op2(p, OPCODE_MAX, ut, WRITEMASK_X, ut, swizzle1(state_size, Y));
- emit_op2(p, OPCODE_MIN, out, 0, swizzle1(ut, X), swizzle1(state_size, Z));
-
- release_temp(p, ut);
-}
-
-static void build_tnl_program( struct tnl_program *p )
-{
- /* Emit the program, starting with modelviewproject:
- */
- build_hpos(p);
-
- /* Lighting calculations:
- */
- if (p->state->fragprog_inputs_read & (FRAG_BIT_COL0|FRAG_BIT_COL1)) {
- if (p->state->light_global_enabled)
- build_lighting(p);
- else {
- if (p->state->fragprog_inputs_read & FRAG_BIT_COL0)
- emit_passthrough(p, VERT_ATTRIB_COLOR0, VERT_RESULT_COL0);
-
- if (p->state->fragprog_inputs_read & FRAG_BIT_COL1)
- emit_passthrough(p, VERT_ATTRIB_COLOR1, VERT_RESULT_COL1);
- }
- }
-
- if ((p->state->fragprog_inputs_read & FRAG_BIT_FOGC) ||
- p->state->fog_option != FOG_NONE)
- build_fog(p);
-
- if (p->state->fragprog_inputs_read & FRAG_BITS_TEX_ANY)
- build_texture_transform(p);
-
- if (p->state->point_attenuated)
- build_pointsize(p);
-
- /* Finish up:
- */
- emit_op1(p, OPCODE_END, undef, 0, undef);
-
- /* Disassemble:
- */
- if (DISASSEM) {
- _mesa_printf ("\n");
- }
-}
-
-
-static void build_new_tnl_program( const struct state_key *key,
- struct gl_vertex_program *program,
- GLuint max_temps)
-{
- struct tnl_program p;
-
- _mesa_memset(&p, 0, sizeof(p));
- p.state = key;
- p.program = program;
- p.eye_position = undef;
- p.eye_position_normalized = undef;
- p.eye_normal = undef;
- p.identity = undef;
- p.temp_in_use = 0;
- p.nr_instructions = 16;
-
- if (max_temps >= sizeof(int) * 8)
- p.temp_reserved = 0;
- else
- p.temp_reserved = ~((1<<max_temps)-1);
-
- p.program->Base.Instructions =
- _mesa_malloc(sizeof(struct prog_instruction) * p.nr_instructions);
- p.program->Base.String = 0;
- p.program->Base.NumInstructions =
- p.program->Base.NumTemporaries =
- p.program->Base.NumParameters =
- p.program->Base.NumAttributes = p.program->Base.NumAddressRegs = 0;
- p.program->Base.Parameters = _mesa_new_parameter_list();
- p.program->Base.InputsRead = 0;
- p.program->Base.OutputsWritten = 0;
-
- build_tnl_program( &p );
-}
-
-static void *search_cache( struct brw_tnl_cache *cache,
- GLuint hash,
- const void *key,
- GLuint keysize)
-{
- struct brw_tnl_cache_item *c;
-
- for (c = cache->items[hash % cache->size]; c; c = c->next) {
- if (c->hash == hash && memcmp(c->key, key, keysize) == 0)
- return c->data;
- }
-
- return NULL;
-}
-
-static void rehash( struct brw_tnl_cache *cache )
-{
- struct brw_tnl_cache_item **items;
- struct brw_tnl_cache_item *c, *next;
- GLuint size, i;
-
- size = cache->size * 3;
- items = (struct brw_tnl_cache_item**) _mesa_malloc(size * sizeof(*items));
- _mesa_memset(items, 0, size * sizeof(*items));
-
- for (i = 0; i < cache->size; i++)
- for (c = cache->items[i]; c; c = next) {
- next = c->next;
- c->next = items[c->hash % size];
- items[c->hash % size] = c;
- }
-
- FREE(cache->items);
- cache->items = items;
- cache->size = size;
-}
-
-static void cache_item( struct brw_tnl_cache *cache,
- GLuint hash,
- const struct state_key *key,
- void *data )
-{
- struct brw_tnl_cache_item *c = MALLOC(sizeof(*c));
- c->hash = hash;
-
- c->key = malloc(sizeof(*key));
- memcpy(c->key, key, sizeof(*key));
-
- c->data = data;
-
- if (++cache->n_items > cache->size * 1.5)
- rehash(cache);
-
- c->next = cache->items[hash % cache->size];
- cache->items[hash % cache->size] = c;
-}
-
-
-static GLuint hash_key( struct state_key *key )
-{
- GLuint *ikey = (GLuint *)key;
- GLuint hash = 0, i;
-
- /* I'm sure this can be improved on, but speed is important:
- */
- for (i = 0; i < sizeof(*key)/sizeof(GLuint); i++)
- hash += ikey[i];
-
- return hash;
-}
-
-static int prepare_tnl_program( struct brw_context *brw )
-{
- GLcontext *ctx = &brw->intel.ctx;
- struct state_key key;
- GLuint hash;
- struct gl_vertex_program *old = brw->tnl_program;
-
- /* _NEW_PROGRAM */
- if (brw->attribs.VertexProgram->_Current)
- return 0;
-
- /* Grab all the relevent state and put it in a single structure:
- */
- make_state_key(ctx, &key);
- hash = hash_key(&key);
-
- /* Look for an already-prepared program for this state:
- */
- brw->tnl_program = (struct gl_vertex_program *)
- search_cache( &brw->tnl_program_cache, hash, &key, sizeof(key) );
-
- /* OK, we'll have to build a new one:
- */
- if (!brw->tnl_program) {
- brw->tnl_program = (struct gl_vertex_program *)
- ctx->Driver.NewProgram(ctx, GL_VERTEX_PROGRAM_ARB, 0);
-
- build_new_tnl_program( &key, brw->tnl_program,
-/* ctx->Const.MaxVertexProgramTemps */
- 32
- );
-
- if (ctx->Driver.ProgramStringNotify)
- ctx->Driver.ProgramStringNotify( ctx, GL_VERTEX_PROGRAM_ARB,
- &brw->tnl_program->Base );
-
- cache_item( &brw->tnl_program_cache,
- hash, &key, brw->tnl_program );
- }
-
- if (old != brw->tnl_program)
- brw->state.dirty.brw |= BRW_NEW_TNL_PROGRAM;
- return 0;
-}
-
-/* Note: See brw_draw.c - the vertex program must not rely on
- * brw->primitive or brw->reduced_prim.
- */
-const struct brw_tracked_state brw_tnl_vertprog = {
- .dirty = {
- .mesa = (_NEW_PROGRAM |
- _NEW_LIGHT |
- _NEW_TRANSFORM |
- _NEW_FOG |
- _NEW_HINT |
- _NEW_POINT |
- _NEW_TEXTURE |
- _NEW_TEXTURE_MATRIX),
- .brw = (BRW_NEW_FRAGMENT_PROGRAM |
- BRW_NEW_INPUT_VARYING),
- .cache = 0
- },
- .prepare = prepare_tnl_program
-};
-
-
-
-
-static int prepare_active_vertprog( struct brw_context *brw )
-{
- const struct gl_vertex_program *prev = brw->vertex_program;
-
- /* NEW_PROGRAM */
- if (brw->attribs.VertexProgram->_Current) {
- brw->vertex_program = brw->attribs.VertexProgram->_Current;
- }
- else {
- /* BRW_NEW_TNL_PROGRAM */
- brw->vertex_program = brw->tnl_program;
- }
-
- if (brw->vertex_program != prev)
- brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM;
-
- return 0;
-}
-
-
-
-const struct brw_tracked_state brw_active_vertprog = {
- .dirty = {
- .mesa = _NEW_PROGRAM,
- .brw = BRW_NEW_TNL_PROGRAM,
- .cache = 0
- },
- .prepare = prepare_active_vertprog
-};
-
-
-void brw_ProgramCacheInit( GLcontext *ctx )
-{
- struct brw_context *brw = brw_context(ctx);
-
- brw->tnl_program_cache.size = 17;
- brw->tnl_program_cache.n_items = 0;
- brw->tnl_program_cache.items = (struct brw_tnl_cache_item **)
- _mesa_calloc(brw->tnl_program_cache.size *
- sizeof(struct brw_tnl_cache_item));
-}
-
-void brw_ProgramCacheDestroy( GLcontext *ctx )
-{
- struct brw_context *brw = brw_context(ctx);
- struct brw_tnl_cache_item *c, *next;
- GLuint i;
-
- for (i = 0; i < brw->tnl_program_cache.size; i++)
- for (c = brw->tnl_program_cache.items[i]; c; c = next) {
- next = c->next;
- FREE(c->key);
- FREE(c->data);
- FREE(c);
- }
-
- FREE(brw->tnl_program_cache.items);
-}
diff --git a/i965/brw_vtbl.c b/i965/brw_vtbl.c
index 31e96a2..f7293ef 100644
--- a/i965/brw_vtbl.c
+++ b/i965/brw_vtbl.c
@@ -32,11 +32,11 @@
-#include "glheader.h"
-#include "mtypes.h"
-#include "imports.h"
-#include "macros.h"
-#include "colormac.h"
+#include "main/glheader.h"
+#include "main/mtypes.h"
+#include "main/imports.h"
+#include "main/macros.h"
+#include "main/colormac.h"
#include "intel_batchbuffer.h"
#include "intel_regions.h"
@@ -51,20 +51,52 @@
#include "brw_vs.h"
#include <stdarg.h>
+static void
+dri_bo_release(dri_bo **bo)
+{
+ dri_bo_unreference(*bo);
+ *bo = NULL;
+}
/* called from intelDestroyContext()
*/
static void brw_destroy_context( struct intel_context *intel )
{
- GLcontext *ctx = &intel->ctx;
struct brw_context *brw = brw_context(&intel->ctx);
+ int i;
brw_destroy_metaops(brw);
brw_destroy_state(brw);
brw_draw_destroy( brw );
- brw_ProgramCacheDestroy( ctx );
brw_FrameBufferTexDestroy( brw );
+
+ for (i = 0; i < brw->state.nr_draw_regions; i++)
+ intel_region_release(&brw->state.draw_regions[i]);
+ brw->state.nr_draw_regions = 0;
+ intel_region_release(&brw->state.depth_region);
+
+ dri_bo_release(&brw->curbe.curbe_bo);
+ dri_bo_release(&brw->vs.prog_bo);
+ dri_bo_release(&brw->vs.state_bo);
+ dri_bo_release(&brw->gs.prog_bo);
+ dri_bo_release(&brw->gs.state_bo);
+ dri_bo_release(&brw->clip.prog_bo);
+ dri_bo_release(&brw->clip.state_bo);
+ dri_bo_release(&brw->clip.vp_bo);
+ dri_bo_release(&brw->sf.prog_bo);
+ dri_bo_release(&brw->sf.state_bo);
+ dri_bo_release(&brw->sf.vp_bo);
+ for (i = 0; i < BRW_MAX_TEX_UNIT; i++)
+ dri_bo_release(&brw->wm.sdc_bo[i]);
+ dri_bo_release(&brw->wm.bind_bo);
+ for (i = 0; i < BRW_WM_MAX_SURF; i++)
+ dri_bo_release(&brw->wm.surf_bo[i]);
+ dri_bo_release(&brw->wm.prog_bo);
+ dri_bo_release(&brw->wm.state_bo);
+ dri_bo_release(&brw->cc.prog_bo);
+ dri_bo_release(&brw->cc.state_bo);
+ dri_bo_release(&brw->cc.vp_bo);
}
/* called from intelDrawBuffer()
@@ -87,6 +119,15 @@ static void brw_set_draw_region( struct intel_context *intel,
brw->state.nr_draw_regions = num_regions;
}
+/* called from intel_batchbuffer_flush and children before sending a
+ * batchbuffer off.
+ */
+static void brw_finish_batch(struct intel_context *intel)
+{
+ struct brw_context *brw = brw_context(&intel->ctx);
+
+ brw_emit_query_end(brw);
+}
/* called from intelFlushBatchLocked
*/
@@ -97,8 +138,7 @@ static void brw_new_batch( struct intel_context *intel )
/* Check that we didn't just wrap our batchbuffer at a bad time. */
assert(!brw->no_batch_wrap);
- dri_bo_unreference(brw->curbe.curbe_bo);
- brw->curbe.curbe_bo = NULL;
+ brw->curbe.need_new_bo = GL_TRUE;
/* Mark all context state as needing to be re-emitted.
* This is probably not as severe as on 915, since almost all of our state
@@ -131,8 +171,6 @@ static void brw_note_unlock( struct intel_context *intel )
struct brw_context *brw = brw_context(&intel->ctx);
brw_state_cache_check_size(brw);
-
- brw_context(&intel->ctx)->state.dirty.brw |= BRW_NEW_LOCK;
}
@@ -186,6 +224,7 @@ void brwInitVtbl( struct brw_context *brw )
brw->intel.vtbl.note_fence = brw_note_fence;
brw->intel.vtbl.note_unlock = brw_note_unlock;
brw->intel.vtbl.new_batch = brw_new_batch;
+ brw->intel.vtbl.finish_batch = brw_finish_batch;
brw->intel.vtbl.destroy = brw_destroy_context;
brw->intel.vtbl.set_draw_region = brw_set_draw_region;
brw->intel.vtbl.flush_cmd = brw_flush_cmd;
diff --git a/i965/brw_wm.c b/i965/brw_wm.c
index a470a25..c50b0d2 100644
--- a/i965/brw_wm.c
+++ b/i965/brw_wm.c
@@ -36,63 +36,24 @@
#include "brw_state.h"
+/** Return number of src args for given instruction */
GLuint brw_wm_nr_args( GLuint opcode )
{
switch (opcode) {
-
case WM_PIXELXY:
- case OPCODE_ABS:
- case OPCODE_FLR:
- case OPCODE_FRC:
- case OPCODE_SWZ:
- case OPCODE_MOV:
- case OPCODE_COS:
- case OPCODE_EX2:
- case OPCODE_LG2:
- case OPCODE_RCP:
- case OPCODE_RSQ:
- case OPCODE_SIN:
- case OPCODE_SCS:
- case OPCODE_TEX:
- case OPCODE_TXB:
- case OPCODE_TXP:
- case OPCODE_KIL:
- case OPCODE_LIT:
- case WM_CINTERP:
- case WM_WPOSXY:
+ case WM_CINTERP:
+ case WM_WPOSXY:
return 1;
-
- case OPCODE_POW:
- case OPCODE_SUB:
- case OPCODE_SGE:
- case OPCODE_SGT:
- case OPCODE_SLE:
- case OPCODE_SLT:
- case OPCODE_SEQ:
- case OPCODE_SNE:
- case OPCODE_ADD:
- case OPCODE_MAX:
- case OPCODE_MIN:
- case OPCODE_MUL:
- case OPCODE_XPD:
- case OPCODE_DP3:
- case OPCODE_DP4:
- case OPCODE_DPH:
- case OPCODE_DST:
- case WM_LINTERP:
+ case WM_LINTERP:
case WM_DELTAXY:
case WM_PIXELW:
return 2;
-
case WM_FB_WRITE:
- case WM_PINTERP:
- case OPCODE_MAD:
- case OPCODE_CMP:
- case OPCODE_LRP:
+ case WM_PINTERP:
return 3;
-
default:
- return 0;
+ assert(opcode < MAX_OPCODE);
+ return _mesa_num_inst_src_regs(opcode);
}
}
@@ -175,6 +136,9 @@ static void do_wm_prog( struct brw_context *brw,
*/
brw_wm_emit(c);
}
+ if (INTEL_DEBUG & DEBUG_WM)
+ fprintf(stderr, "\n");
+
/* get the program
*/
program = brw_get_program(&c->func, &program_size);
@@ -230,12 +194,6 @@ static void brw_wm_populate_key( struct brw_context *brw,
lookup |= IZ_STENCIL_WRITE_ENABLE_BIT;
}
- /* XXX: when should this be disabled?
- */
- if (1)
- lookup |= IZ_EARLY_DEPTH_TEST_BIT;
-
-
line_aa = AA_NEVER;
/* _NEW_LINE, _NEW_POLYGON, BRW_NEW_REDUCED_PRIMITIVE */
@@ -322,7 +280,7 @@ static void brw_wm_populate_key( struct brw_context *brw,
}
-static int brw_prepare_wm_prog( struct brw_context *brw )
+static void brw_prepare_wm_prog(struct brw_context *brw)
{
struct brw_wm_prog_key key;
struct brw_fragment_program *fp = (struct brw_fragment_program *)
@@ -339,8 +297,6 @@ static int brw_prepare_wm_prog( struct brw_context *brw )
&brw->wm.prog_data);
if (brw->wm.prog_bo == NULL)
do_wm_prog(brw, fp, &key);
-
- return dri_bufmgr_check_aperture_space(brw->wm.prog_bo);
}
diff --git a/i965/brw_wm.h b/i965/brw_wm.h
index 297617e..ded0796 100644
--- a/i965/brw_wm.h
+++ b/i965/brw_wm.h
@@ -49,8 +49,7 @@
#define IZ_DEPTH_TEST_ENABLE_BIT 0x8
#define IZ_STENCIL_WRITE_ENABLE_BIT 0x10
#define IZ_STENCIL_TEST_ENABLE_BIT 0x20
-#define IZ_EARLY_DEPTH_TEST_BIT 0x40
-#define IZ_BIT_MAX 0x80
+#define IZ_BIT_MAX 0x40
#define AA_NEVER 0
#define AA_SOMETIMES 1
@@ -157,6 +156,7 @@ struct brw_wm_instruction {
#define BRW_WM_MAX_PARAM 256
#define BRW_WM_MAX_CONST 256
#define BRW_WM_MAX_KILLS MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS
+#define BRW_WM_MAX_SUBROUTINE 16
@@ -246,7 +246,10 @@ struct brw_wm_compile {
struct brw_reg stack;
struct brw_reg emit_mask_reg;
GLuint reg_index;
+ GLuint tmp_regs[BRW_WM_MAX_GRF];
GLuint tmp_index;
+ GLuint tmp_max;
+ GLuint subroutines[BRW_WM_MAX_SUBROUTINE];
};
diff --git a/i965/brw_wm_debug.c b/i965/brw_wm_debug.c
index f31d097..8f07f89 100644
--- a/i965/brw_wm_debug.c
+++ b/i965/brw_wm_debug.c
@@ -163,9 +163,9 @@ void brw_wm_print_program( struct brw_wm_compile *c,
{
GLuint insn;
- _mesa_printf("\n\n\n%s:\n", stage);
+ _mesa_printf("%s:\n", stage);
for (insn = 0; insn < c->nr_insns; insn++)
brw_wm_print_insn(c, &c->instruction[insn]);
- _mesa_printf("\n\n\n");
+ _mesa_printf("\n");
}
diff --git a/i965/brw_wm_emit.c b/i965/brw_wm_emit.c
index 9b919b9..b5050a3 100644
--- a/i965/brw_wm_emit.c
+++ b/i965/brw_wm_emit.c
@@ -30,7 +30,7 @@
*/
-#include "macros.h"
+#include "main/macros.h"
#include "brw_context.h"
#include "brw_wm.h"
@@ -194,7 +194,7 @@ static void emit_linterp( struct brw_compile *p,
interp[2] = brw_vec1_grf(nr+1, 0);
interp[3] = brw_vec1_grf(nr+1, 4);
- for(i = 0; i < 4; i++ ) {
+ for (i = 0; i < 4; i++) {
if (mask & (1<<i)) {
brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
@@ -219,42 +219,40 @@ static void emit_pinterp( struct brw_compile *p,
interp[2] = brw_vec1_grf(nr+1, 0);
interp[3] = brw_vec1_grf(nr+1, 4);
- for(i = 0; i < 4; i++ ) {
+ for (i = 0; i < 4; i++) {
if (mask & (1<<i)) {
brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
}
}
- for(i = 0; i < 4; i++ ) {
+ for (i = 0; i < 4; i++) {
if (mask & (1<<i)) {
brw_MUL(p, dst[i], dst[i], w[3]);
}
}
}
+
static void emit_cinterp( struct brw_compile *p,
const struct brw_reg *dst,
GLuint mask,
const struct brw_reg *arg0 )
{
- struct brw_reg interp[4];
- GLuint nr = arg0[0].nr;
- GLuint i;
-
- interp[0] = brw_vec1_grf(nr, 0);
- interp[1] = brw_vec1_grf(nr, 4);
- interp[2] = brw_vec1_grf(nr+1, 0);
- interp[3] = brw_vec1_grf(nr+1, 4);
-
- for(i = 0; i < 4; i++ ) {
- if (mask & (1<<i)) {
- brw_MOV(p, dst[i], suboffset(interp[i],3)); /* TODO: optimize away like other moves */
- }
- }
-}
-
+ struct brw_reg interp[4];
+ GLuint nr = arg0[0].nr;
+ GLuint i;
+ interp[0] = brw_vec1_grf(nr, 0);
+ interp[1] = brw_vec1_grf(nr, 4);
+ interp[2] = brw_vec1_grf(nr+1, 0);
+ interp[3] = brw_vec1_grf(nr+1, 4);
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ brw_MOV(p, dst[i], suboffset(interp[i],3)); /* TODO: optimize away like other moves */
+ }
+ }
+}
static void emit_alu1( struct brw_compile *p,
@@ -280,6 +278,7 @@ static void emit_alu1( struct brw_compile *p,
brw_set_saturate(p, 0);
}
+
static void emit_alu2( struct brw_compile *p,
struct brw_instruction *(*func)(struct brw_compile *,
struct brw_reg,
@@ -351,6 +350,7 @@ static void emit_lrp( struct brw_compile *p,
}
}
}
+
static void emit_sop( struct brw_compile *p,
const struct brw_reg *dst,
GLuint mask,
@@ -376,7 +376,7 @@ static void emit_slt( struct brw_compile *p,
const struct brw_reg *arg0,
const struct brw_reg *arg1 )
{
- emit_sop(p, dst, mask, BRW_CONDITIONAL_L, arg0, arg1);
+ emit_sop(p, dst, mask, BRW_CONDITIONAL_L, arg0, arg1);
}
static void emit_sle( struct brw_compile *p,
@@ -385,7 +385,7 @@ static void emit_sle( struct brw_compile *p,
const struct brw_reg *arg0,
const struct brw_reg *arg1 )
{
- emit_sop(p, dst, mask, BRW_CONDITIONAL_LE, arg0, arg1);
+ emit_sop(p, dst, mask, BRW_CONDITIONAL_LE, arg0, arg1);
}
static void emit_sgt( struct brw_compile *p,
@@ -394,7 +394,7 @@ static void emit_sgt( struct brw_compile *p,
const struct brw_reg *arg0,
const struct brw_reg *arg1 )
{
- emit_sop(p, dst, mask, BRW_CONDITIONAL_G, arg0, arg1);
+ emit_sop(p, dst, mask, BRW_CONDITIONAL_G, arg0, arg1);
}
static void emit_sge( struct brw_compile *p,
@@ -403,7 +403,7 @@ static void emit_sge( struct brw_compile *p,
const struct brw_reg *arg0,
const struct brw_reg *arg1 )
{
- emit_sop(p, dst, mask, BRW_CONDITIONAL_GE, arg0, arg1);
+ emit_sop(p, dst, mask, BRW_CONDITIONAL_GE, arg0, arg1);
}
static void emit_seq( struct brw_compile *p,
@@ -412,7 +412,7 @@ static void emit_seq( struct brw_compile *p,
const struct brw_reg *arg0,
const struct brw_reg *arg1 )
{
- emit_sop(p, dst, mask, BRW_CONDITIONAL_EQ, arg0, arg1);
+ emit_sop(p, dst, mask, BRW_CONDITIONAL_EQ, arg0, arg1);
}
static void emit_sne( struct brw_compile *p,
@@ -421,7 +421,7 @@ static void emit_sne( struct brw_compile *p,
const struct brw_reg *arg0,
const struct brw_reg *arg1 )
{
- emit_sop(p, dst, mask, BRW_CONDITIONAL_NEQ, arg0, arg1);
+ emit_sop(p, dst, mask, BRW_CONDITIONAL_NEQ, arg0, arg1);
}
static void emit_cmp( struct brw_compile *p,
@@ -505,7 +505,7 @@ static void emit_dp3( struct brw_compile *p,
const struct brw_reg *arg1 )
{
if (!(mask & WRITEMASK_XYZW))
- return; /* Do not emit dead code*/
+ return; /* Do not emit dead code */
assert((mask & WRITEMASK_XYZW) == WRITEMASK_X);
@@ -525,7 +525,7 @@ static void emit_dp4( struct brw_compile *p,
const struct brw_reg *arg1 )
{
if (!(mask & WRITEMASK_XYZW))
- return; /* Do not emit dead code*/
+ return; /* Do not emit dead code */
assert((mask & WRITEMASK_XYZW) == WRITEMASK_X);
@@ -546,7 +546,7 @@ static void emit_dph( struct brw_compile *p,
const struct brw_reg *arg1 )
{
if (!(mask & WRITEMASK_XYZW))
- return; /* Do not emit dead code*/
+ return; /* Do not emit dead code */
assert((mask & WRITEMASK_XYZW) == WRITEMASK_X);
@@ -592,7 +592,7 @@ static void emit_math1( struct brw_compile *p,
const struct brw_reg *arg0 )
{
if (!(mask & WRITEMASK_XYZW))
- return; /* Do not emit dead code*/
+ return; /* Do not emit dead code */
//assert((mask & WRITEMASK_XYZW) == WRITEMASK_X ||
// function == BRW_MATH_FUNCTION_SINCOS);
@@ -619,7 +619,7 @@ static void emit_math2( struct brw_compile *p,
const struct brw_reg *arg1)
{
if (!(mask & WRITEMASK_XYZW))
- return; /* Do not emit dead code*/
+ return; /* Do not emit dead code */
assert((mask & WRITEMASK_XYZW) == WRITEMASK_X);
@@ -760,7 +760,6 @@ static void emit_txb( struct brw_wm_compile *c,
brw_MOV(p, brw_message_reg(8), arg[3]);
msgLength = 9;
-
brw_SAMPLE(p,
retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
1,
@@ -772,7 +771,6 @@ static void emit_txb( struct brw_wm_compile *c,
8, /* responseLength */
msgLength,
0);
-
}
@@ -823,7 +821,6 @@ static void emit_kil( struct brw_wm_compile *c,
struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
GLuint i;
-
/* XXX - usually won't need 4 compares!
*/
for (i = 0; i < 4; i++) {
@@ -836,6 +833,7 @@ static void emit_kil( struct brw_wm_compile *c,
}
}
+
static void fire_fb_write( struct brw_wm_compile *c,
GLuint base_reg,
GLuint nr,
@@ -869,6 +867,7 @@ static void fire_fb_write( struct brw_wm_compile *c,
eot);
}
+
static void emit_aa( struct brw_wm_compile *c,
struct brw_reg *arg1,
GLuint reg )
@@ -962,7 +961,6 @@ static void emit_fb_write( struct brw_wm_compile *c,
nr += 2;
}
-
if (!c->key.runtime_check_aads_emit) {
if (c->key.aa_dest_stencil_reg)
emit_aa(c, arg1, 2);
@@ -996,8 +994,6 @@ static void emit_fb_write( struct brw_wm_compile *c,
}
-
-
/* Post-fragment-program processing. Send the results to the
* framebuffer.
*/
@@ -1022,6 +1018,7 @@ static void emit_spill( struct brw_wm_compile *c,
slot);
}
+
static void emit_unspill( struct brw_wm_compile *c,
struct brw_reg reg,
GLuint slot )
@@ -1047,7 +1044,6 @@ static void emit_unspill( struct brw_wm_compile *c,
}
-
/**
* Retrieve upto 4 GEN4 register pairs for the given wm reg:
*/
@@ -1073,6 +1069,7 @@ static void get_argument_regs( struct brw_wm_compile *c,
}
}
+
static void spill_values( struct brw_wm_compile *c,
struct brw_wm_value *values,
GLuint nr )
@@ -1085,7 +1082,6 @@ static void spill_values( struct brw_wm_compile *c,
}
-
/* Emit the fragment program instructions here.
*/
void brw_wm_emit( struct brw_wm_compile *c )
@@ -1176,7 +1172,7 @@ void brw_wm_emit( struct brw_wm_compile *c )
emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]);
break;
- case OPCODE_DP3: /* */
+ case OPCODE_DP3:
emit_dp3(p, dst, dst_flags, args[0], args[1]);
break;
@@ -1188,7 +1184,7 @@ void brw_wm_emit( struct brw_wm_compile *c )
emit_dph(p, dst, dst_flags, args[0], args[1]);
break;
- case OPCODE_LRP: /* */
+ case OPCODE_LRP:
emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]);
break;
@@ -1302,8 +1298,10 @@ void brw_wm_emit( struct brw_wm_compile *c )
break;
default:
- _mesa_printf("unsupport opcode %d in fragment program\n",
- inst->opcode);
+ _mesa_printf("Unsupported opcode %i (%s) in fragment shader\n",
+ inst->opcode, inst->opcode < MAX_OPCODE ?
+ _mesa_opcode_string(inst->opcode) :
+ "unknown");
}
for (i = 0; i < 4; i++)
@@ -1313,8 +1311,3 @@ void brw_wm_emit( struct brw_wm_compile *c )
inst->dst[i]->spill_slot);
}
}
-
-
-
-
-
diff --git a/i965/brw_wm_fp.c b/i965/brw_wm_fp.c
index bc933fe..6df2c95 100644
--- a/i965/brw_wm_fp.c
+++ b/i965/brw_wm_fp.c
@@ -30,9 +30,9 @@
*/
-#include "glheader.h"
-#include "macros.h"
-#include "enums.h"
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/enums.h"
#include "brw_context.h"
#include "brw_wm.h"
#include "brw_util.h"
@@ -122,10 +122,11 @@ static struct prog_dst_register dst_reg(GLuint file, GLuint idx)
reg.File = file;
reg.Index = idx;
reg.WriteMask = WRITEMASK_XYZW;
+ reg.RelAddr = 0;
reg.CondMask = 0;
reg.CondSwizzle = 0;
- reg.pad = 0;
reg.CondSrc = 0;
+ reg.pad = 0;
return reg;
}
@@ -426,10 +427,6 @@ static struct prog_src_register search_or_add_param5(struct brw_wm_compile *c,
idx = _mesa_add_state_reference( paramList, tokens );
- /* Recalculate state dependency:
- */
- c->fp->param_state = paramList->StateFlags;
-
return src_reg(PROGRAM_STATE_VAR, idx);
}
@@ -814,62 +811,11 @@ static void precalc_txp( struct brw_wm_compile *c,
-
-
-/***********************************************************************
- * Add instructions to perform fog blending
- */
-
-static void fog_blend( struct brw_wm_compile *c,
- struct prog_src_register fog_factor )
-{
- struct prog_dst_register outcolor = dst_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLR);
- struct prog_src_register fogcolor = search_or_add_param5( c, STATE_FOG_COLOR, 0,0,0,0 );
-
- /* color.xyz = LRP fog_factor.xxxx, output_color, fog_color */
-
- emit_op(c,
- OPCODE_LRP,
- dst_mask(outcolor, WRITEMASK_XYZ),
- 0, 0, 0,
- fog_factor,
- src_reg_from_dst(outcolor),
- fogcolor);
-}
-
-
-
-/* This one is simple - just take the interpolated fog coordinate and
- * use it as the fog blend factor.
- */
-static void fog_interpolated( struct brw_wm_compile *c )
-{
- struct prog_src_register fogc = src_reg(PROGRAM_INPUT, FRAG_ATTRIB_FOGC);
-
- if (!(c->fp_interp_emitted & (1<<FRAG_ATTRIB_FOGC)))
- emit_interp(c, FRAG_ATTRIB_FOGC);
-
- fog_blend( c, src_swizzle1(fogc, GET_SWZ(fogc.Swizzle,X)));
-}
-
-static void emit_fog( struct brw_wm_compile *c )
-{
- if (!c->fp->program.FogOption)
- return;
-
- if (1)
- fog_interpolated( c );
- else {
- /* TODO: per-pixel fog */
- assert(0);
- }
-}
-
static void emit_fb_write( struct brw_wm_compile *c )
{
- struct prog_src_register outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLR);
struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
struct prog_src_register outdepth = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DEPR);
+ struct prog_src_register outcolor;
GLuint i;
struct prog_instruction *inst, *last_inst;
@@ -893,7 +839,14 @@ static void emit_fb_write( struct brw_wm_compile *c )
}
}
last_inst->Sampler |= 1; //eot
- }else {
+ }
+ else {
+ /* if gl_FragData[0] is written, use it, else use gl_FragColor */
+ if (c->fp->program.Base.OutputsWritten & (1 << FRAG_RESULT_DATA0))
+ outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0);
+ else
+ outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLR);
+
inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0),
0, 0, 0, outcolor, payload_r0_depth, outdepth);
inst->Sampler = 1|(0<<1);
@@ -960,7 +913,7 @@ void brw_wm_pass_fp( struct brw_wm_compile *c )
GLuint insn;
if (INTEL_DEBUG & DEBUG_WM) {
- _mesa_printf("\n\n\npre-fp:\n");
+ _mesa_printf("pre-fp:\n");
_mesa_print_program(&fp->program.Base);
_mesa_printf("\n");
}
@@ -1055,7 +1008,6 @@ void brw_wm_pass_fp( struct brw_wm_compile *c )
emit_ddy(c, inst);
break;
case OPCODE_END:
- emit_fog(c);
emit_fb_write(c);
break;
case OPCODE_PRINT:
@@ -1068,7 +1020,7 @@ void brw_wm_pass_fp( struct brw_wm_compile *c )
}
if (INTEL_DEBUG & DEBUG_WM) {
- _mesa_printf("\n\n\npass_fp:\n");
+ _mesa_printf("pass_fp:\n");
print_insns( c->prog_instructions, c->nr_fp_insns );
_mesa_printf("\n");
}
diff --git a/i965/brw_wm_glsl.c b/i965/brw_wm_glsl.c
index 8dce40f..8fd776a 100644
--- a/i965/brw_wm_glsl.c
+++ b/i965/brw_wm_glsl.c
@@ -1,9 +1,13 @@
-#include "macros.h"
+#include "main/macros.h"
#include "shader/prog_parameter.h"
#include "brw_context.h"
#include "brw_eu.h"
#include "brw_wm.h"
+enum _subroutine {
+ SUB_NOISE1, SUB_NOISE2, SUB_NOISE3, SUB_NOISE4
+};
+
/* Only guess, need a flag in gl_fragment_program later */
GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp)
{
@@ -12,13 +16,17 @@ GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp)
struct prog_instruction *inst = &fp->Base.Instructions[i];
switch (inst->Opcode) {
case OPCODE_IF:
- case OPCODE_INT:
+ case OPCODE_TRUNC:
case OPCODE_ENDIF:
case OPCODE_CAL:
case OPCODE_BRK:
case OPCODE_RET:
case OPCODE_DDX:
case OPCODE_DDY:
+ case OPCODE_NOISE1:
+ case OPCODE_NOISE2:
+ case OPCODE_NOISE3:
+ case OPCODE_NOISE4:
case OPCODE_BGNLOOP:
return GL_TRUE;
default:
@@ -47,13 +55,26 @@ static int get_scalar_dst_index(struct prog_instruction *inst)
static struct brw_reg alloc_tmp(struct brw_wm_compile *c)
{
struct brw_reg reg;
- reg = brw_vec8_grf(c->tmp_index--, 0);
+ if(c->tmp_index == c->tmp_max)
+ c->tmp_regs[ c->tmp_max++ ] = c->reg_index++;
+
+ reg = brw_vec8_grf(c->tmp_regs[ c->tmp_index++ ], 0);
return reg;
}
-static void release_tmps(struct brw_wm_compile *c)
+static int mark_tmps(struct brw_wm_compile *c)
+{
+ return c->tmp_index;
+}
+
+static struct brw_reg lookup_tmp( struct brw_wm_compile *c, int index )
{
- c->tmp_index = 127;
+ return brw_vec8_grf( c->tmp_regs[ index ], 0 );
+}
+
+static void release_tmps(struct brw_wm_compile *c, int mark)
+{
+ c->tmp_index = mark;
}
static struct brw_reg
@@ -155,6 +176,68 @@ static struct brw_reg get_src_reg(struct brw_wm_compile *c,
src->NegateBase, src->Abs);
}
+/* Subroutines are minimal support for resusable instruction sequences.
+ They are implemented as simply as possible to minimise overhead: there
+ is no explicit support for communication between the caller and callee
+ other than saving the return address in a temporary register, nor is
+ there any automatic local storage. This implies that great care is
+ required before attempting reentrancy or any kind of nested
+ subroutine invocations. */
+static void invoke_subroutine( struct brw_wm_compile *c,
+ enum _subroutine subroutine,
+ void (*emit)( struct brw_wm_compile * ) )
+{
+ struct brw_compile *p = &c->func;
+
+ assert( subroutine < BRW_WM_MAX_SUBROUTINE );
+
+ if( c->subroutines[ subroutine ] ) {
+ /* subroutine previously emitted: reuse existing instructions */
+
+ int mark = mark_tmps( c );
+ struct brw_reg return_address = retype( alloc_tmp( c ),
+ BRW_REGISTER_TYPE_UD );
+ int here = p->nr_insn;
+
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_ADD( p, return_address, brw_ip_reg(), brw_imm_ud( 2 << 4 ) );
+
+ brw_ADD( p, brw_ip_reg(), brw_ip_reg(),
+ brw_imm_d( ( c->subroutines[ subroutine ] -
+ here - 1 ) << 4 ) );
+ brw_pop_insn_state(p);
+
+ release_tmps( c, mark );
+ } else {
+ /* previously unused subroutine: emit, and mark for later reuse */
+
+ int mark = mark_tmps( c );
+ struct brw_reg return_address = retype( alloc_tmp( c ),
+ BRW_REGISTER_TYPE_UD );
+ struct brw_instruction *calc;
+ int base = p->nr_insn;
+
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ calc = brw_ADD( p, return_address, brw_ip_reg(), brw_imm_ud( 0 ) );
+ brw_pop_insn_state(p);
+
+ c->subroutines[ subroutine ] = p->nr_insn;
+
+ emit( c );
+
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_MOV( p, brw_ip_reg(), return_address );
+ brw_pop_insn_state(p);
+
+ brw_set_src1( calc, brw_imm_ud( ( p->nr_insn - base ) << 4 ) );
+
+ release_tmps( c, mark );
+ }
+}
+
static void emit_abs( struct brw_wm_compile *c,
struct prog_instruction *inst)
{
@@ -172,7 +255,7 @@ static void emit_abs( struct brw_wm_compile *c,
brw_set_saturate(p, 0);
}
-static void emit_int( struct brw_wm_compile *c,
+static void emit_trunc( struct brw_wm_compile *c,
struct prog_instruction *inst)
{
int i;
@@ -184,7 +267,7 @@ static void emit_int( struct brw_wm_compile *c,
struct brw_reg src, dst;
dst = get_dst_reg(c, inst, i, 1) ;
src = get_src_reg(c, &inst->SrcReg[0], i, 1);
- brw_RNDD(p, dst, src);
+ brw_RNDZ(p, dst, src);
}
}
brw_set_saturate(p, 0);
@@ -540,7 +623,7 @@ static void emit_dph(struct brw_wm_compile *c,
brw_MAC(p, brw_null_reg(), src0[1], src1[1]);
brw_MAC(p, dst, src0[2], src1[2]);
brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
- brw_ADD(p, dst, src0[3], src1[3]);
+ brw_ADD(p, dst, dst, src1[3]);
brw_set_saturate(p, 0);
}
@@ -778,6 +861,7 @@ static void emit_lrp(struct brw_wm_compile *c,
GLuint mask = inst->DstReg.WriteMask;
struct brw_reg dst, tmp1, tmp2, src0, src1, src2;
int i;
+ int mark = mark_tmps(c);
for (i = 0; i < 4; i++) {
if (mask & (1<<i)) {
dst = get_dst_reg(c, inst, i, 1);
@@ -804,19 +888,23 @@ static void emit_lrp(struct brw_wm_compile *c,
brw_MAC(p, dst, src0, tmp1);
brw_set_saturate(p, 0);
}
- release_tmps(c);
+ release_tmps(c, mark);
}
}
+/**
+ * For GLSL shaders, this KIL will be unconditional.
+ * It may be contained inside an IF/ENDIF structure of course.
+ */
static void emit_kil(struct brw_wm_compile *c)
{
- struct brw_compile *p = &c->func;
- struct brw_reg depth = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
- brw_push_insn_state(p);
- brw_set_mask_control(p, BRW_MASK_DISABLE);
- brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); //IMASK
- brw_AND(p, depth, c->emit_mask_reg, depth);
- brw_pop_insn_state(p);
+ struct brw_compile *p = &c->func;
+ struct brw_reg depth = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); //IMASK
+ brw_AND(p, depth, c->emit_mask_reg, depth);
+ brw_pop_insn_state(p);
}
static void emit_mad(struct brw_wm_compile *c,
@@ -957,6 +1045,1055 @@ static void emit_ddy(struct brw_wm_compile *c,
brw_set_saturate(p, 0);
}
+static __inline struct brw_reg high_words( struct brw_reg reg )
+{
+ return stride( suboffset( retype( reg, BRW_REGISTER_TYPE_W ), 1 ),
+ 0, 8, 2 );
+}
+
+static __inline struct brw_reg low_words( struct brw_reg reg )
+{
+ return stride( retype( reg, BRW_REGISTER_TYPE_W ), 0, 8, 2 );
+}
+
+static __inline struct brw_reg even_bytes( struct brw_reg reg )
+{
+ return stride( retype( reg, BRW_REGISTER_TYPE_B ), 0, 16, 2 );
+}
+
+static __inline struct brw_reg odd_bytes( struct brw_reg reg )
+{
+ return stride( suboffset( retype( reg, BRW_REGISTER_TYPE_B ), 1 ),
+ 0, 16, 2 );
+}
+
+/* One-, two- and three-dimensional Perlin noise, similar to the description
+ in _Improving Noise_, Ken Perlin, Computer Graphics vol. 35 no. 3. */
+static void noise1_sub( struct brw_wm_compile *c ) {
+
+ struct brw_compile *p = &c->func;
+ struct brw_reg param,
+ x0, x1, /* gradients at each end */
+ t, tmp[ 2 ], /* float temporaries */
+ itmp[ 5 ]; /* unsigned integer temporaries (aliases of floats above) */
+ int i;
+ int mark = mark_tmps( c );
+
+ x0 = alloc_tmp( c );
+ x1 = alloc_tmp( c );
+ t = alloc_tmp( c );
+ tmp[ 0 ] = alloc_tmp( c );
+ tmp[ 1 ] = alloc_tmp( c );
+ itmp[ 0 ] = retype( tmp[ 0 ], BRW_REGISTER_TYPE_UD );
+ itmp[ 1 ] = retype( tmp[ 1 ], BRW_REGISTER_TYPE_UD );
+ itmp[ 2 ] = retype( x0, BRW_REGISTER_TYPE_UD );
+ itmp[ 3 ] = retype( x1, BRW_REGISTER_TYPE_UD );
+ itmp[ 4 ] = retype( t, BRW_REGISTER_TYPE_UD );
+
+ param = lookup_tmp( c, mark - 2 );
+
+ brw_set_access_mode( p, BRW_ALIGN_1 );
+
+ brw_MOV( p, itmp[ 2 ], brw_imm_ud( 0xBA97 ) ); /* constant used later */
+
+ /* Arrange the two end coordinates into scalars (itmp0/itmp1) to
+ be hashed. Also compute the remainder (offset within the unit
+ length), interleaved to reduce register dependency penalties. */
+ brw_RNDD( p, retype( itmp[ 0 ], BRW_REGISTER_TYPE_D ), param );
+ brw_FRC( p, param, param );
+ brw_ADD( p, itmp[ 1 ], itmp[ 0 ], brw_imm_ud( 1 ) );
+ brw_MOV( p, itmp[ 3 ], brw_imm_ud( 0x79D9 ) ); /* constant used later */
+ brw_MOV( p, itmp[ 4 ], brw_imm_ud( 0xD5B1 ) ); /* constant used later */
+
+ /* We're now ready to perform the hashing. The two hashes are
+ interleaved for performance. The hash function used is
+ designed to rapidly achieve avalanche and require only 32x16
+ bit multiplication, and 16-bit swizzles (which we get for
+ free). We can't use immediate operands in the multiplies,
+ because immediates are permitted only in src1 and the 16-bit
+ factor is permitted only in src0. */
+ for( i = 0; i < 2; i++ )
+ brw_MUL( p, itmp[ i ], itmp[ 2 ], itmp[ i ] );
+ for( i = 0; i < 2; i++ )
+ brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ),
+ high_words( itmp[ i ] ) );
+ for( i = 0; i < 2; i++ )
+ brw_MUL( p, itmp[ i ], itmp[ 3 ], itmp[ i ] );
+ for( i = 0; i < 2; i++ )
+ brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ),
+ high_words( itmp[ i ] ) );
+ for( i = 0; i < 2; i++ )
+ brw_MUL( p, itmp[ i ], itmp[ 4 ], itmp[ i ] );
+ for( i = 0; i < 2; i++ )
+ brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ),
+ high_words( itmp[ i ] ) );
+
+ /* Now we want to initialise the two gradients based on the
+ hashes. Format conversion from signed integer to float leaves
+ everything scaled too high by a factor of pow( 2, 31 ), but
+ we correct for that right at the end. */
+ brw_ADD( p, t, param, brw_imm_f( -1.0 ) );
+ brw_MOV( p, x0, retype( tmp[ 0 ], BRW_REGISTER_TYPE_D ) );
+ brw_MOV( p, x1, retype( tmp[ 1 ], BRW_REGISTER_TYPE_D ) );
+
+ brw_MUL( p, x0, x0, param );
+ brw_MUL( p, x1, x1, t );
+
+ /* We interpolate between the gradients using the polynomial
+ 6t^5 - 15t^4 + 10t^3 (Perlin). */
+ brw_MUL( p, tmp[ 0 ], param, brw_imm_f( 6.0 ) );
+ brw_ADD( p, tmp[ 0 ], tmp[ 0 ], brw_imm_f( -15.0 ) );
+ brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param );
+ brw_ADD( p, tmp[ 0 ], tmp[ 0 ], brw_imm_f( 10.0 ) );
+ brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param );
+ brw_ADD( p, x1, x1, negate( x0 ) ); /* unrelated work to fill the
+ pipeline */
+ brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param );
+ brw_MUL( p, param, tmp[ 0 ], param );
+ brw_MUL( p, x1, x1, param );
+ brw_ADD( p, x0, x0, x1 );
+ /* scale by pow( 2, -30 ), to compensate for the format conversion
+ above and an extra factor of 2 so that a single gradient covers
+ the [-1,1] range */
+ brw_MUL( p, param, x0, brw_imm_f( 0.000000000931322574615478515625 ) );
+
+ release_tmps( c, mark );
+}
+
+static void emit_noise1( struct brw_wm_compile *c,
+ struct prog_instruction *inst )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg src, param, dst;
+ GLuint mask = inst->DstReg.WriteMask;
+ int i;
+ int mark = mark_tmps( c );
+
+ assert( mark == 0 );
+
+ src = get_src_reg( c, inst->SrcReg, 0, 1 );
+
+ param = alloc_tmp( c );
+
+ brw_MOV( p, param, src );
+
+ invoke_subroutine( c, SUB_NOISE1, noise1_sub );
+
+ /* Fill in the result: */
+ brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE );
+ for (i = 0 ; i < 4; i++) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i, 1);
+ brw_MOV( p, dst, param );
+ }
+ }
+ if( inst->SaturateMode == SATURATE_ZERO_ONE )
+ brw_set_saturate( p, 0 );
+
+ release_tmps( c, mark );
+}
+
+static void noise2_sub( struct brw_wm_compile *c ) {
+
+ struct brw_compile *p = &c->func;
+ struct brw_reg param0, param1,
+ x0y0, x0y1, x1y0, x1y1, /* gradients at each corner */
+ t, tmp[ 4 ], /* float temporaries */
+ itmp[ 7 ]; /* unsigned integer temporaries (aliases of floats above) */
+ int i;
+ int mark = mark_tmps( c );
+
+ x0y0 = alloc_tmp( c );
+ x0y1 = alloc_tmp( c );
+ x1y0 = alloc_tmp( c );
+ x1y1 = alloc_tmp( c );
+ t = alloc_tmp( c );
+ for( i = 0; i < 4; i++ ) {
+ tmp[ i ] = alloc_tmp( c );
+ itmp[ i ] = retype( tmp[ i ], BRW_REGISTER_TYPE_UD );
+ }
+ itmp[ 4 ] = retype( x0y0, BRW_REGISTER_TYPE_UD );
+ itmp[ 5 ] = retype( x0y1, BRW_REGISTER_TYPE_UD );
+ itmp[ 6 ] = retype( x1y0, BRW_REGISTER_TYPE_UD );
+
+ param0 = lookup_tmp( c, mark - 3 );
+ param1 = lookup_tmp( c, mark - 2 );
+
+ brw_set_access_mode( p, BRW_ALIGN_1 );
+
+ /* Arrange the four corner coordinates into scalars (itmp0..itmp3) to
+ be hashed. Also compute the remainders (offsets within the unit
+ square), interleaved to reduce register dependency penalties. */
+ brw_RNDD( p, retype( itmp[ 0 ], BRW_REGISTER_TYPE_D ), param0 );
+ brw_RNDD( p, retype( itmp[ 1 ], BRW_REGISTER_TYPE_D ), param1 );
+ brw_FRC( p, param0, param0 );
+ brw_FRC( p, param1, param1 );
+ brw_MOV( p, itmp[ 4 ], brw_imm_ud( 0xBA97 ) ); /* constant used later */
+ brw_ADD( p, high_words( itmp[ 0 ] ), high_words( itmp[ 0 ] ),
+ low_words( itmp[ 1 ] ) );
+ brw_MOV( p, itmp[ 5 ], brw_imm_ud( 0x79D9 ) ); /* constant used later */
+ brw_MOV( p, itmp[ 6 ], brw_imm_ud( 0xD5B1 ) ); /* constant used later */
+ brw_ADD( p, itmp[ 1 ], itmp[ 0 ], brw_imm_ud( 0x10000 ) );
+ brw_ADD( p, itmp[ 2 ], itmp[ 0 ], brw_imm_ud( 0x1 ) );
+ brw_ADD( p, itmp[ 3 ], itmp[ 0 ], brw_imm_ud( 0x10001 ) );
+
+ /* We're now ready to perform the hashing. The four hashes are
+ interleaved for performance. The hash function used is
+ designed to rapidly achieve avalanche and require only 32x16
+ bit multiplication, and 16-bit swizzles (which we get for
+ free). We can't use immediate operands in the multiplies,
+ because immediates are permitted only in src1 and the 16-bit
+ factor is permitted only in src0. */
+ for( i = 0; i < 4; i++ )
+ brw_MUL( p, itmp[ i ], itmp[ 4 ], itmp[ i ] );
+ for( i = 0; i < 4; i++ )
+ brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ),
+ high_words( itmp[ i ] ) );
+ for( i = 0; i < 4; i++ )
+ brw_MUL( p, itmp[ i ], itmp[ 5 ], itmp[ i ] );
+ for( i = 0; i < 4; i++ )
+ brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ),
+ high_words( itmp[ i ] ) );
+ for( i = 0; i < 4; i++ )
+ brw_MUL( p, itmp[ i ], itmp[ 6 ], itmp[ i ] );
+ for( i = 0; i < 4; i++ )
+ brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ),
+ high_words( itmp[ i ] ) );
+
+ /* Now we want to initialise the four gradients based on the
+ hashes. Format conversion from signed integer to float leaves
+ everything scaled too high by a factor of pow( 2, 15 ), but
+ we correct for that right at the end. */
+ brw_ADD( p, t, param0, brw_imm_f( -1.0 ) );
+ brw_MOV( p, x0y0, low_words( tmp[ 0 ] ) );
+ brw_MOV( p, x0y1, low_words( tmp[ 1 ] ) );
+ brw_MOV( p, x1y0, low_words( tmp[ 2 ] ) );
+ brw_MOV( p, x1y1, low_words( tmp[ 3 ] ) );
+
+ brw_MOV( p, tmp[ 0 ], high_words( tmp[ 0 ] ) );
+ brw_MOV( p, tmp[ 1 ], high_words( tmp[ 1 ] ) );
+ brw_MOV( p, tmp[ 2 ], high_words( tmp[ 2 ] ) );
+ brw_MOV( p, tmp[ 3 ], high_words( tmp[ 3 ] ) );
+
+ brw_MUL( p, x1y0, x1y0, t );
+ brw_MUL( p, x1y1, x1y1, t );
+ brw_ADD( p, t, param1, brw_imm_f( -1.0 ) );
+ brw_MUL( p, x0y0, x0y0, param0 );
+ brw_MUL( p, x0y1, x0y1, param0 );
+
+ brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param1 );
+ brw_MUL( p, tmp[ 2 ], tmp[ 2 ], param1 );
+ brw_MUL( p, tmp[ 1 ], tmp[ 1 ], t );
+ brw_MUL( p, tmp[ 3 ], tmp[ 3 ], t );
+
+ brw_ADD( p, x0y0, x0y0, tmp[ 0 ] );
+ brw_ADD( p, x1y0, x1y0, tmp[ 2 ] );
+ brw_ADD( p, x0y1, x0y1, tmp[ 1 ] );
+ brw_ADD( p, x1y1, x1y1, tmp[ 3 ] );
+
+ /* We interpolate between the gradients using the polynomial
+ 6t^5 - 15t^4 + 10t^3 (Perlin). */
+ brw_MUL( p, tmp[ 0 ], param0, brw_imm_f( 6.0 ) );
+ brw_MUL( p, tmp[ 1 ], param1, brw_imm_f( 6.0 ) );
+ brw_ADD( p, tmp[ 0 ], tmp[ 0 ], brw_imm_f( -15.0 ) );
+ brw_ADD( p, tmp[ 1 ], tmp[ 1 ], brw_imm_f( -15.0 ) );
+ brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param0 );
+ brw_MUL( p, tmp[ 1 ], tmp[ 1 ], param1 );
+ brw_ADD( p, x0y1, x0y1, negate( x0y0 ) ); /* unrelated work to fill the
+ pipeline */
+ brw_ADD( p, tmp[ 0 ], tmp[ 0 ], brw_imm_f( 10.0 ) );
+ brw_ADD( p, tmp[ 1 ], tmp[ 1 ], brw_imm_f( 10.0 ) );
+ brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param0 );
+ brw_MUL( p, tmp[ 1 ], tmp[ 1 ], param1 );
+ brw_ADD( p, x1y1, x1y1, negate( x1y0 ) ); /* unrelated work to fill the
+ pipeline */
+ brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param0 );
+ brw_MUL( p, tmp[ 1 ], tmp[ 1 ], param1 );
+ brw_MUL( p, param0, tmp[ 0 ], param0 );
+ brw_MUL( p, param1, tmp[ 1 ], param1 );
+
+ /* Here we interpolate in the y dimension... */
+ brw_MUL( p, x0y1, x0y1, param1 );
+ brw_MUL( p, x1y1, x1y1, param1 );
+ brw_ADD( p, x0y0, x0y0, x0y1 );
+ brw_ADD( p, x1y0, x1y0, x1y1 );
+
+ /* And now in x. There are horrible register dependencies here,
+ but we have nothing else to do. */
+ brw_ADD( p, x1y0, x1y0, negate( x0y0 ) );
+ brw_MUL( p, x1y0, x1y0, param0 );
+ brw_ADD( p, x0y0, x0y0, x1y0 );
+
+ /* scale by pow( 2, -15 ), as described above */
+ brw_MUL( p, param0, x0y0, brw_imm_f( 0.000030517578125 ) );
+
+ release_tmps( c, mark );
+}
+
+static void emit_noise2( struct brw_wm_compile *c,
+ struct prog_instruction *inst )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg src0, src1, param0, param1, dst;
+ GLuint mask = inst->DstReg.WriteMask;
+ int i;
+ int mark = mark_tmps( c );
+
+ assert( mark == 0 );
+
+ src0 = get_src_reg( c, inst->SrcReg, 0, 1 );
+ src1 = get_src_reg( c, inst->SrcReg, 1, 1 );
+
+ param0 = alloc_tmp( c );
+ param1 = alloc_tmp( c );
+
+ brw_MOV( p, param0, src0 );
+ brw_MOV( p, param1, src1 );
+
+ invoke_subroutine( c, SUB_NOISE2, noise2_sub );
+
+ /* Fill in the result: */
+ brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE );
+ for (i = 0 ; i < 4; i++) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i, 1);
+ brw_MOV( p, dst, param0 );
+ }
+ }
+ if( inst->SaturateMode == SATURATE_ZERO_ONE )
+ brw_set_saturate( p, 0 );
+
+ release_tmps( c, mark );
+}
+
+/* The three-dimensional case is much like the one- and two- versions above,
+ but since the number of corners is rapidly growing we now pack 16 16-bit
+ hashes into each register to extract more parallelism from the EUs. */
+static void noise3_sub( struct brw_wm_compile *c ) {
+
+ struct brw_compile *p = &c->func;
+ struct brw_reg param0, param1, param2,
+ x0y0, x0y1, x1y0, x1y1, /* gradients at four of the corners */
+ xi, yi, zi, /* interpolation coefficients */
+ t, tmp[ 8 ], /* float temporaries */
+ itmp[ 8 ], /* unsigned integer temporaries (aliases of floats above) */
+ wtmp[ 8 ]; /* 16-way unsigned word temporaries (aliases of above) */
+ int i;
+ int mark = mark_tmps( c );
+
+ x0y0 = alloc_tmp( c );
+ x0y1 = alloc_tmp( c );
+ x1y0 = alloc_tmp( c );
+ x1y1 = alloc_tmp( c );
+ xi = alloc_tmp( c );
+ yi = alloc_tmp( c );
+ zi = alloc_tmp( c );
+ t = alloc_tmp( c );
+ for( i = 0; i < 8; i++ ) {
+ tmp[ i ] = alloc_tmp( c );
+ itmp[ i ] = retype( tmp[ i ], BRW_REGISTER_TYPE_UD );
+ wtmp[ i ] = brw_uw16_grf( tmp[ i ].nr, 0 );
+ }
+
+ param0 = lookup_tmp( c, mark - 4 );
+ param1 = lookup_tmp( c, mark - 3 );
+ param2 = lookup_tmp( c, mark - 2 );
+
+ brw_set_access_mode( p, BRW_ALIGN_1 );
+
+ /* Arrange the eight corner coordinates into scalars (itmp0..itmp3) to
+ be hashed. Also compute the remainders (offsets within the unit
+ cube), interleaved to reduce register dependency penalties. */
+ brw_RNDD( p, retype( itmp[ 0 ], BRW_REGISTER_TYPE_D ), param0 );
+ brw_RNDD( p, retype( itmp[ 1 ], BRW_REGISTER_TYPE_D ), param1 );
+ brw_RNDD( p, retype( itmp[ 2 ], BRW_REGISTER_TYPE_D ), param2 );
+ brw_FRC( p, param0, param0 );
+ brw_FRC( p, param1, param1 );
+ brw_FRC( p, param2, param2 );
+ /* Since we now have only 16 bits of precision in the hash, we must
+ be more careful about thorough mixing to maintain entropy as we
+ squash the input vector into a small scalar. */
+ brw_MUL( p, brw_null_reg(), low_words( itmp[ 0 ] ), brw_imm_uw( 0xBC8F ) );
+ brw_MAC( p, brw_null_reg(), low_words( itmp[ 1 ] ), brw_imm_uw( 0xD0BD ) );
+ brw_MAC( p, low_words( itmp[ 0 ] ), low_words( itmp[ 2 ] ),
+ brw_imm_uw( 0x9B93 ) );
+ brw_ADD( p, high_words( itmp[ 0 ] ), low_words( itmp[ 0 ] ),
+ brw_imm_uw( 0xBC8F ) );
+
+ /* Temporarily disable the execution mask while we work with ExecSize=16
+ channels (the mask is set for ExecSize=8 and is probably incorrect).
+ Although this might cause execution of unwanted channels, the code
+ writes only to temporary registers and has no side effects, so
+ disabling the mask is harmless. */
+ brw_push_insn_state( p );
+ brw_set_mask_control( p, BRW_MASK_DISABLE );
+ brw_ADD( p, wtmp[ 1 ], wtmp[ 0 ], brw_imm_uw( 0xD0BD ) );
+ brw_ADD( p, wtmp[ 2 ], wtmp[ 0 ], brw_imm_uw( 0x9B93 ) );
+ brw_ADD( p, wtmp[ 3 ], wtmp[ 1 ], brw_imm_uw( 0x9B93 ) );
+
+ /* We're now ready to perform the hashing. The eight hashes are
+ interleaved for performance. The hash function used is
+ designed to rapidly achieve avalanche and require only 16x16
+ bit multiplication, and 8-bit swizzles (which we get for
+ free). */
+ for( i = 0; i < 4; i++ )
+ brw_MUL( p, wtmp[ i ], wtmp[ i ], brw_imm_uw( 0x28D9 ) );
+ for( i = 0; i < 4; i++ )
+ brw_XOR( p, even_bytes( wtmp[ i ] ), even_bytes( wtmp[ i ] ),
+ odd_bytes( wtmp[ i ] ) );
+ for( i = 0; i < 4; i++ )
+ brw_MUL( p, wtmp[ i ], wtmp[ i ], brw_imm_uw( 0xC6D5 ) );
+ for( i = 0; i < 4; i++ )
+ brw_XOR( p, even_bytes( wtmp[ i ] ), even_bytes( wtmp[ i ] ),
+ odd_bytes( wtmp[ i ] ) );
+ brw_pop_insn_state( p );
+
+ /* Now we want to initialise the four rear gradients based on the
+ hashes. Format conversion from signed integer to float leaves
+ everything scaled too high by a factor of pow( 2, 15 ), but
+ we correct for that right at the end. */
+ /* x component */
+ brw_ADD( p, t, param0, brw_imm_f( -1.0 ) );
+ brw_MOV( p, x0y0, low_words( tmp[ 0 ] ) );
+ brw_MOV( p, x0y1, low_words( tmp[ 1 ] ) );
+ brw_MOV( p, x1y0, high_words( tmp[ 0 ] ) );
+ brw_MOV( p, x1y1, high_words( tmp[ 1 ] ) );
+
+ brw_push_insn_state( p );
+ brw_set_mask_control( p, BRW_MASK_DISABLE );
+ brw_SHL( p, wtmp[ 0 ], wtmp[ 0 ], brw_imm_uw( 5 ) );
+ brw_SHL( p, wtmp[ 1 ], wtmp[ 1 ], brw_imm_uw( 5 ) );
+ brw_pop_insn_state( p );
+
+ brw_MUL( p, x1y0, x1y0, t );
+ brw_MUL( p, x1y1, x1y1, t );
+ brw_ADD( p, t, param1, brw_imm_f( -1.0 ) );
+ brw_MUL( p, x0y0, x0y0, param0 );
+ brw_MUL( p, x0y1, x0y1, param0 );
+
+ /* y component */
+ brw_MOV( p, tmp[ 5 ], low_words( tmp[ 1 ] ) );
+ brw_MOV( p, tmp[ 7 ], high_words( tmp[ 1 ] ) );
+ brw_MOV( p, tmp[ 4 ], low_words( tmp[ 0 ] ) );
+ brw_MOV( p, tmp[ 6 ], high_words( tmp[ 0 ] ) );
+
+ brw_push_insn_state( p );
+ brw_set_mask_control( p, BRW_MASK_DISABLE );
+ brw_SHL( p, wtmp[ 0 ], wtmp[ 0 ], brw_imm_uw( 5 ) );
+ brw_SHL( p, wtmp[ 1 ], wtmp[ 1 ], brw_imm_uw( 5 ) );
+ brw_pop_insn_state( p );
+
+ brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t );
+ brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t );
+ brw_ADD( p, t, param0, brw_imm_f( -1.0 ) );
+ brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param1 );
+ brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param1 );
+
+ brw_ADD( p, x0y1, x0y1, tmp[ 5 ] );
+ brw_ADD( p, x1y1, x1y1, tmp[ 7 ] );
+ brw_ADD( p, x0y0, x0y0, tmp[ 4 ] );
+ brw_ADD( p, x1y0, x1y0, tmp[ 6 ] );
+
+ /* z component */
+ brw_MOV( p, tmp[ 4 ], low_words( tmp[ 0 ] ) );
+ brw_MOV( p, tmp[ 5 ], low_words( tmp[ 1 ] ) );
+ brw_MOV( p, tmp[ 6 ], high_words( tmp[ 0 ] ) );
+ brw_MOV( p, tmp[ 7 ], high_words( tmp[ 1 ] ) );
+
+ brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param2 );
+ brw_MUL( p, tmp[ 5 ], tmp[ 5 ], param2 );
+ brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param2 );
+ brw_MUL( p, tmp[ 7 ], tmp[ 7 ], param2 );
+
+ brw_ADD( p, x0y0, x0y0, tmp[ 4 ] );
+ brw_ADD( p, x0y1, x0y1, tmp[ 5 ] );
+ brw_ADD( p, x1y0, x1y0, tmp[ 6 ] );
+ brw_ADD( p, x1y1, x1y1, tmp[ 7 ] );
+
+ /* We interpolate between the gradients using the polynomial
+ 6t^5 - 15t^4 + 10t^3 (Perlin). */
+ brw_MUL( p, xi, param0, brw_imm_f( 6.0 ) );
+ brw_MUL( p, yi, param1, brw_imm_f( 6.0 ) );
+ brw_MUL( p, zi, param2, brw_imm_f( 6.0 ) );
+ brw_ADD( p, xi, xi, brw_imm_f( -15.0 ) );
+ brw_ADD( p, yi, yi, brw_imm_f( -15.0 ) );
+ brw_ADD( p, zi, zi, brw_imm_f( -15.0 ) );
+ brw_MUL( p, xi, xi, param0 );
+ brw_MUL( p, yi, yi, param1 );
+ brw_MUL( p, zi, zi, param2 );
+ brw_ADD( p, xi, xi, brw_imm_f( 10.0 ) );
+ brw_ADD( p, yi, yi, brw_imm_f( 10.0 ) );
+ brw_ADD( p, zi, zi, brw_imm_f( 10.0 ) );
+ brw_ADD( p, x0y1, x0y1, negate( x0y0 ) ); /* unrelated work */
+ brw_ADD( p, x1y1, x1y1, negate( x1y0 ) ); /* unrelated work */
+ brw_MUL( p, xi, xi, param0 );
+ brw_MUL( p, yi, yi, param1 );
+ brw_MUL( p, zi, zi, param2 );
+ brw_MUL( p, xi, xi, param0 );
+ brw_MUL( p, yi, yi, param1 );
+ brw_MUL( p, zi, zi, param2 );
+ brw_MUL( p, xi, xi, param0 );
+ brw_MUL( p, yi, yi, param1 );
+ brw_MUL( p, zi, zi, param2 );
+
+ /* Here we interpolate in the y dimension... */
+ brw_MUL( p, x0y1, x0y1, yi );
+ brw_MUL( p, x1y1, x1y1, yi );
+ brw_ADD( p, x0y0, x0y0, x0y1 );
+ brw_ADD( p, x1y0, x1y0, x1y1 );
+
+ /* And now in x. Leave the result in tmp[ 0 ] (see below)... */
+ brw_ADD( p, x1y0, x1y0, negate( x0y0 ) );
+ brw_MUL( p, x1y0, x1y0, xi );
+ brw_ADD( p, tmp[ 0 ], x0y0, x1y0 );
+
+ /* Now do the same thing for the front four gradients... */
+ /* x component */
+ brw_MOV( p, x0y0, low_words( tmp[ 2 ] ) );
+ brw_MOV( p, x0y1, low_words( tmp[ 3 ] ) );
+ brw_MOV( p, x1y0, high_words( tmp[ 2 ] ) );
+ brw_MOV( p, x1y1, high_words( tmp[ 3 ] ) );
+
+ brw_push_insn_state( p );
+ brw_set_mask_control( p, BRW_MASK_DISABLE );
+ brw_SHL( p, wtmp[ 2 ], wtmp[ 2 ], brw_imm_uw( 5 ) );
+ brw_SHL( p, wtmp[ 3 ], wtmp[ 3 ], brw_imm_uw( 5 ) );
+ brw_pop_insn_state( p );
+
+ brw_MUL( p, x1y0, x1y0, t );
+ brw_MUL( p, x1y1, x1y1, t );
+ brw_ADD( p, t, param1, brw_imm_f( -1.0 ) );
+ brw_MUL( p, x0y0, x0y0, param0 );
+ brw_MUL( p, x0y1, x0y1, param0 );
+
+ /* y component */
+ brw_MOV( p, tmp[ 5 ], low_words( tmp[ 3 ] ) );
+ brw_MOV( p, tmp[ 7 ], high_words( tmp[ 3 ] ) );
+ brw_MOV( p, tmp[ 4 ], low_words( tmp[ 2 ] ) );
+ brw_MOV( p, tmp[ 6 ], high_words( tmp[ 2 ] ) );
+
+ brw_push_insn_state( p );
+ brw_set_mask_control( p, BRW_MASK_DISABLE );
+ brw_SHL( p, wtmp[ 2 ], wtmp[ 2 ], brw_imm_uw( 5 ) );
+ brw_SHL( p, wtmp[ 3 ], wtmp[ 3 ], brw_imm_uw( 5 ) );
+ brw_pop_insn_state( p );
+
+ brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t );
+ brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t );
+ brw_ADD( p, t, param2, brw_imm_f( -1.0 ) );
+ brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param1 );
+ brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param1 );
+
+ brw_ADD( p, x0y1, x0y1, tmp[ 5 ] );
+ brw_ADD( p, x1y1, x1y1, tmp[ 7 ] );
+ brw_ADD( p, x0y0, x0y0, tmp[ 4 ] );
+ brw_ADD( p, x1y0, x1y0, tmp[ 6 ] );
+
+ /* z component */
+ brw_MOV( p, tmp[ 4 ], low_words( tmp[ 2 ] ) );
+ brw_MOV( p, tmp[ 5 ], low_words( tmp[ 3 ] ) );
+ brw_MOV( p, tmp[ 6 ], high_words( tmp[ 2 ] ) );
+ brw_MOV( p, tmp[ 7 ], high_words( tmp[ 3 ] ) );
+
+ brw_MUL( p, tmp[ 4 ], tmp[ 4 ], t );
+ brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t );
+ brw_MUL( p, tmp[ 6 ], tmp[ 6 ], t );
+ brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t );
+
+ brw_ADD( p, x0y0, x0y0, tmp[ 4 ] );
+ brw_ADD( p, x0y1, x0y1, tmp[ 5 ] );
+ brw_ADD( p, x1y0, x1y0, tmp[ 6 ] );
+ brw_ADD( p, x1y1, x1y1, tmp[ 7 ] );
+
+ /* The interpolation coefficients are still around from last time, so
+ again interpolate in the y dimension... */
+ brw_ADD( p, x0y1, x0y1, negate( x0y0 ) );
+ brw_ADD( p, x1y1, x1y1, negate( x1y0 ) );
+ brw_MUL( p, x0y1, x0y1, yi );
+ brw_MUL( p, x1y1, x1y1, yi );
+ brw_ADD( p, x0y0, x0y0, x0y1 );
+ brw_ADD( p, x1y0, x1y0, x1y1 );
+
+ /* And now in x. The rear face is in tmp[ 0 ] (see above), so this
+ time put the front face in tmp[ 1 ] and we're nearly there... */
+ brw_ADD( p, x1y0, x1y0, negate( x0y0 ) );
+ brw_MUL( p, x1y0, x1y0, xi );
+ brw_ADD( p, tmp[ 1 ], x0y0, x1y0 );
+
+ /* The final interpolation, in the z dimension: */
+ brw_ADD( p, tmp[ 1 ], tmp[ 1 ], negate( tmp[ 0 ] ) );
+ brw_MUL( p, tmp[ 1 ], tmp[ 1 ], zi );
+ brw_ADD( p, tmp[ 0 ], tmp[ 0 ], tmp[ 1 ] );
+
+ /* scale by pow( 2, -15 ), as described above */
+ brw_MUL( p, param0, tmp[ 0 ], brw_imm_f( 0.000030517578125 ) );
+
+ release_tmps( c, mark );
+}
+
+static void emit_noise3( struct brw_wm_compile *c,
+ struct prog_instruction *inst )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg src0, src1, src2, param0, param1, param2, dst;
+ GLuint mask = inst->DstReg.WriteMask;
+ int i;
+ int mark = mark_tmps( c );
+
+ assert( mark == 0 );
+
+ src0 = get_src_reg( c, inst->SrcReg, 0, 1 );
+ src1 = get_src_reg( c, inst->SrcReg, 1, 1 );
+ src2 = get_src_reg( c, inst->SrcReg, 2, 1 );
+
+ param0 = alloc_tmp( c );
+ param1 = alloc_tmp( c );
+ param2 = alloc_tmp( c );
+
+ brw_MOV( p, param0, src0 );
+ brw_MOV( p, param1, src1 );
+ brw_MOV( p, param2, src2 );
+
+ invoke_subroutine( c, SUB_NOISE3, noise3_sub );
+
+ /* Fill in the result: */
+ brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE );
+ for (i = 0 ; i < 4; i++) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i, 1);
+ brw_MOV( p, dst, param0 );
+ }
+ }
+ if( inst->SaturateMode == SATURATE_ZERO_ONE )
+ brw_set_saturate( p, 0 );
+
+ release_tmps( c, mark );
+}
+
+/* For the four-dimensional case, the little micro-optimisation benefits
+ we obtain by unrolling all the loops aren't worth the massive bloat it
+ now causes. Instead, we loop twice around performing a similar operation
+ to noise3, once for the w=0 cube and once for the w=1, with a bit more
+ code to glue it all together. */
+static void noise4_sub( struct brw_wm_compile *c ) {
+
+ struct brw_compile *p = &c->func;
+ struct brw_reg param[ 4 ],
+ x0y0, x0y1, x1y0, x1y1, /* gradients at four of the corners */
+ w0, /* noise for the w=0 cube */
+ floors[ 2 ], /* integer coordinates of base corner of hypercube */
+ interp[ 4 ], /* interpolation coefficients */
+ t, tmp[ 8 ], /* float temporaries */
+ itmp[ 8 ], /* unsigned integer temporaries (aliases of floats above) */
+ wtmp[ 8 ]; /* 16-way unsigned word temporaries (aliases of above) */
+ int i, j;
+ int mark = mark_tmps( c );
+ GLuint loop, origin;
+
+ x0y0 = alloc_tmp( c );
+ x0y1 = alloc_tmp( c );
+ x1y0 = alloc_tmp( c );
+ x1y1 = alloc_tmp( c );
+ t = alloc_tmp( c );
+ w0 = alloc_tmp( c );
+ floors[ 0 ] = retype( alloc_tmp( c ), BRW_REGISTER_TYPE_UD );
+ floors[ 1 ] = retype( alloc_tmp( c ), BRW_REGISTER_TYPE_UD );
+
+ for( i = 0; i < 4; i++ ) {
+ param[ i ] = lookup_tmp( c, mark - 5 + i );
+ interp[ i ] = alloc_tmp( c );
+ }
+
+ for( i = 0; i < 8; i++ ) {
+ tmp[ i ] = alloc_tmp( c );
+ itmp[ i ] = retype( tmp[ i ], BRW_REGISTER_TYPE_UD );
+ wtmp[ i ] = brw_uw16_grf( tmp[ i ].nr, 0 );
+ }
+
+ brw_set_access_mode( p, BRW_ALIGN_1 );
+
+ /* We only want 16 bits of precision from the integral part of each
+ co-ordinate, but unfortunately the RNDD semantics would saturate
+ at 16 bits if we performed the operation directly to a 16-bit
+ destination. Therefore, we round to 32-bit temporaries where
+ appropriate, and then store only the lower 16 bits. */
+ brw_RNDD( p, retype( floors[ 0 ], BRW_REGISTER_TYPE_D ), param[ 0 ] );
+ brw_RNDD( p, retype( itmp[ 0 ], BRW_REGISTER_TYPE_D ), param[ 1 ] );
+ brw_RNDD( p, retype( floors[ 1 ], BRW_REGISTER_TYPE_D ), param[ 2 ] );
+ brw_RNDD( p, retype( itmp[ 1 ], BRW_REGISTER_TYPE_D ), param[ 3 ] );
+ brw_MOV( p, high_words( floors[ 0 ] ), low_words( itmp[ 0 ] ) );
+ brw_MOV( p, high_words( floors[ 1 ] ), low_words( itmp[ 1 ] ) );
+
+ /* Modify the flag register here, because the side effect is useful
+ later (see below). We know for certain that all flags will be
+ cleared, since the FRC instruction cannot possibly generate
+ negative results. Even for exceptional inputs (infinities, denormals,
+ NaNs), the architecture guarantees that the L conditional is false. */
+ brw_set_conditionalmod( p, BRW_CONDITIONAL_L );
+ brw_FRC( p, param[ 0 ], param[ 0 ] );
+ brw_set_predicate_control( p, BRW_PREDICATE_NONE );
+ for( i = 1; i < 4; i++ )
+ brw_FRC( p, param[ i ], param[ i ] );
+
+ /* Calculate the interpolation coefficients (6t^5 - 15t^4 + 10t^3) first
+ of all. */
+ for( i = 0; i < 4; i++ )
+ brw_MUL( p, interp[ i ], param[ i ], brw_imm_f( 6.0 ) );
+ for( i = 0; i < 4; i++ )
+ brw_ADD( p, interp[ i ], interp[ i ], brw_imm_f( -15.0 ) );
+ for( i = 0; i < 4; i++ )
+ brw_MUL( p, interp[ i ], interp[ i ], param[ i ] );
+ for( i = 0; i < 4; i++ )
+ brw_ADD( p, interp[ i ], interp[ i ], brw_imm_f( 10.0 ) );
+ for( j = 0; j < 3; j++ )
+ for( i = 0; i < 4; i++ )
+ brw_MUL( p, interp[ i ], interp[ i ], param[ i ] );
+
+ /* Mark the current address, as it will be a jump destination. The
+ following code will be executed twice: first, with the flag
+ register clear indicating the w=0 case, and second with flags
+ set for w=1. */
+ loop = p->nr_insn;
+
+ /* Arrange the eight corner coordinates into scalars (itmp0..itmp3) to
+ be hashed. Since we have only 16 bits of precision in the hash, we
+ must be careful about thorough mixing to maintain entropy as we
+ squash the input vector into a small scalar. */
+ brw_MUL( p, brw_null_reg(), low_words( floors[ 0 ] ),
+ brw_imm_uw( 0xBC8F ) );
+ brw_MAC( p, brw_null_reg(), high_words( floors[ 0 ] ),
+ brw_imm_uw( 0xD0BD ) );
+ brw_MAC( p, brw_null_reg(), low_words( floors[ 1 ] ),
+ brw_imm_uw( 0x9B93 ) );
+ brw_MAC( p, low_words( itmp[ 0 ] ), high_words( floors[ 1 ] ),
+ brw_imm_uw( 0xA359 ) );
+ brw_ADD( p, high_words( itmp[ 0 ] ), low_words( itmp[ 0 ] ),
+ brw_imm_uw( 0xBC8F ) );
+
+ /* Temporarily disable the execution mask while we work with ExecSize=16
+ channels (the mask is set for ExecSize=8 and is probably incorrect).
+ Although this might cause execution of unwanted channels, the code
+ writes only to temporary registers and has no side effects, so
+ disabling the mask is harmless. */
+ brw_push_insn_state( p );
+ brw_set_mask_control( p, BRW_MASK_DISABLE );
+ brw_ADD( p, wtmp[ 1 ], wtmp[ 0 ], brw_imm_uw( 0xD0BD ) );
+ brw_ADD( p, wtmp[ 2 ], wtmp[ 0 ], brw_imm_uw( 0x9B93 ) );
+ brw_ADD( p, wtmp[ 3 ], wtmp[ 1 ], brw_imm_uw( 0x9B93 ) );
+
+ /* We're now ready to perform the hashing. The eight hashes are
+ interleaved for performance. The hash function used is
+ designed to rapidly achieve avalanche and require only 16x16
+ bit multiplication, and 8-bit swizzles (which we get for
+ free). */
+ for( i = 0; i < 4; i++ )
+ brw_MUL( p, wtmp[ i ], wtmp[ i ], brw_imm_uw( 0x28D9 ) );
+ for( i = 0; i < 4; i++ )
+ brw_XOR( p, even_bytes( wtmp[ i ] ), even_bytes( wtmp[ i ] ),
+ odd_bytes( wtmp[ i ] ) );
+ for( i = 0; i < 4; i++ )
+ brw_MUL( p, wtmp[ i ], wtmp[ i ], brw_imm_uw( 0xC6D5 ) );
+ for( i = 0; i < 4; i++ )
+ brw_XOR( p, even_bytes( wtmp[ i ] ), even_bytes( wtmp[ i ] ),
+ odd_bytes( wtmp[ i ] ) );
+ brw_pop_insn_state( p );
+
+ /* Now we want to initialise the four rear gradients based on the
+ hashes. Format conversion from signed integer to float leaves
+ everything scaled too high by a factor of pow( 2, 15 ), but
+ we correct for that right at the end. */
+ /* x component */
+ brw_ADD( p, t, param[ 0 ], brw_imm_f( -1.0 ) );
+ brw_MOV( p, x0y0, low_words( tmp[ 0 ] ) );
+ brw_MOV( p, x0y1, low_words( tmp[ 1 ] ) );
+ brw_MOV( p, x1y0, high_words( tmp[ 0 ] ) );
+ brw_MOV( p, x1y1, high_words( tmp[ 1 ] ) );
+
+ brw_push_insn_state( p );
+ brw_set_mask_control( p, BRW_MASK_DISABLE );
+ brw_SHL( p, wtmp[ 0 ], wtmp[ 0 ], brw_imm_uw( 4 ) );
+ brw_SHL( p, wtmp[ 1 ], wtmp[ 1 ], brw_imm_uw( 4 ) );
+ brw_pop_insn_state( p );
+
+ brw_MUL( p, x1y0, x1y0, t );
+ brw_MUL( p, x1y1, x1y1, t );
+ brw_ADD( p, t, param[ 1 ], brw_imm_f( -1.0 ) );
+ brw_MUL( p, x0y0, x0y0, param[ 0 ] );
+ brw_MUL( p, x0y1, x0y1, param[ 0 ] );
+
+ /* y component */
+ brw_MOV( p, tmp[ 5 ], low_words( tmp[ 1 ] ) );
+ brw_MOV( p, tmp[ 7 ], high_words( tmp[ 1 ] ) );
+ brw_MOV( p, tmp[ 4 ], low_words( tmp[ 0 ] ) );
+ brw_MOV( p, tmp[ 6 ], high_words( tmp[ 0 ] ) );
+
+ brw_push_insn_state( p );
+ brw_set_mask_control( p, BRW_MASK_DISABLE );
+ brw_SHL( p, wtmp[ 0 ], wtmp[ 0 ], brw_imm_uw( 4 ) );
+ brw_SHL( p, wtmp[ 1 ], wtmp[ 1 ], brw_imm_uw( 4 ) );
+ brw_pop_insn_state( p );
+
+ brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t );
+ brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t );
+ /* prepare t for the w component (used below): w the first time through
+ the loop; w - 1 the second time) */
+ brw_set_predicate_control( p, BRW_PREDICATE_NORMAL );
+ brw_ADD( p, t, param[ 3 ], brw_imm_f( -1.0 ) );
+ p->current->header.predicate_inverse = 1;
+ brw_MOV( p, t, param[ 3 ] );
+ p->current->header.predicate_inverse = 0;
+ brw_set_predicate_control( p, BRW_PREDICATE_NONE );
+ brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param[ 1 ] );
+ brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param[ 1 ] );
+
+ brw_ADD( p, x0y1, x0y1, tmp[ 5 ] );
+ brw_ADD( p, x1y1, x1y1, tmp[ 7 ] );
+ brw_ADD( p, x0y0, x0y0, tmp[ 4 ] );
+ brw_ADD( p, x1y0, x1y0, tmp[ 6 ] );
+
+ /* z component */
+ brw_MOV( p, tmp[ 4 ], low_words( tmp[ 0 ] ) );
+ brw_MOV( p, tmp[ 5 ], low_words( tmp[ 1 ] ) );
+ brw_MOV( p, tmp[ 6 ], high_words( tmp[ 0 ] ) );
+ brw_MOV( p, tmp[ 7 ], high_words( tmp[ 1 ] ) );
+
+ brw_push_insn_state( p );
+ brw_set_mask_control( p, BRW_MASK_DISABLE );
+ brw_SHL( p, wtmp[ 0 ], wtmp[ 0 ], brw_imm_uw( 4 ) );
+ brw_SHL( p, wtmp[ 1 ], wtmp[ 1 ], brw_imm_uw( 4 ) );
+ brw_pop_insn_state( p );
+
+ brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param[ 2 ] );
+ brw_MUL( p, tmp[ 5 ], tmp[ 5 ], param[ 2 ] );
+ brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param[ 2 ] );
+ brw_MUL( p, tmp[ 7 ], tmp[ 7 ], param[ 2 ] );
+
+ brw_ADD( p, x0y0, x0y0, tmp[ 4 ] );
+ brw_ADD( p, x0y1, x0y1, tmp[ 5 ] );
+ brw_ADD( p, x1y0, x1y0, tmp[ 6 ] );
+ brw_ADD( p, x1y1, x1y1, tmp[ 7 ] );
+
+ /* w component */
+ brw_MOV( p, tmp[ 4 ], low_words( tmp[ 0 ] ) );
+ brw_MOV( p, tmp[ 5 ], low_words( tmp[ 1 ] ) );
+ brw_MOV( p, tmp[ 6 ], high_words( tmp[ 0 ] ) );
+ brw_MOV( p, tmp[ 7 ], high_words( tmp[ 1 ] ) );
+
+ brw_MUL( p, tmp[ 4 ], tmp[ 4 ], t );
+ brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t );
+ brw_MUL( p, tmp[ 6 ], tmp[ 6 ], t );
+ brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t );
+ brw_ADD( p, t, param[ 0 ], brw_imm_f( -1.0 ) );
+
+ brw_ADD( p, x0y0, x0y0, tmp[ 4 ] );
+ brw_ADD( p, x0y1, x0y1, tmp[ 5 ] );
+ brw_ADD( p, x1y0, x1y0, tmp[ 6 ] );
+ brw_ADD( p, x1y1, x1y1, tmp[ 7 ] );
+
+ /* Here we interpolate in the y dimension... */
+ brw_ADD( p, x0y1, x0y1, negate( x0y0 ) );
+ brw_ADD( p, x1y1, x1y1, negate( x1y0 ) );
+ brw_MUL( p, x0y1, x0y1, interp[ 1 ] );
+ brw_MUL( p, x1y1, x1y1, interp[ 1 ] );
+ brw_ADD( p, x0y0, x0y0, x0y1 );
+ brw_ADD( p, x1y0, x1y0, x1y1 );
+
+ /* And now in x. Leave the result in tmp[ 0 ] (see below)... */
+ brw_ADD( p, x1y0, x1y0, negate( x0y0 ) );
+ brw_MUL( p, x1y0, x1y0, interp[ 0 ] );
+ brw_ADD( p, tmp[ 0 ], x0y0, x1y0 );
+
+ /* Now do the same thing for the front four gradients... */
+ /* x component */
+ brw_MOV( p, x0y0, low_words( tmp[ 2 ] ) );
+ brw_MOV( p, x0y1, low_words( tmp[ 3 ] ) );
+ brw_MOV( p, x1y0, high_words( tmp[ 2 ] ) );
+ brw_MOV( p, x1y1, high_words( tmp[ 3 ] ) );
+
+ brw_push_insn_state( p );
+ brw_set_mask_control( p, BRW_MASK_DISABLE );
+ brw_SHL( p, wtmp[ 2 ], wtmp[ 2 ], brw_imm_uw( 4 ) );
+ brw_SHL( p, wtmp[ 3 ], wtmp[ 3 ], brw_imm_uw( 4 ) );
+ brw_pop_insn_state( p );
+
+ brw_MUL( p, x1y0, x1y0, t );
+ brw_MUL( p, x1y1, x1y1, t );
+ brw_ADD( p, t, param[ 1 ], brw_imm_f( -1.0 ) );
+ brw_MUL( p, x0y0, x0y0, param[ 0 ] );
+ brw_MUL( p, x0y1, x0y1, param[ 0 ] );
+
+ /* y component */
+ brw_MOV( p, tmp[ 5 ], low_words( tmp[ 3 ] ) );
+ brw_MOV( p, tmp[ 7 ], high_words( tmp[ 3 ] ) );
+ brw_MOV( p, tmp[ 4 ], low_words( tmp[ 2 ] ) );
+ brw_MOV( p, tmp[ 6 ], high_words( tmp[ 2 ] ) );
+
+ brw_push_insn_state( p );
+ brw_set_mask_control( p, BRW_MASK_DISABLE );
+ brw_SHL( p, wtmp[ 2 ], wtmp[ 2 ], brw_imm_uw( 4 ) );
+ brw_SHL( p, wtmp[ 3 ], wtmp[ 3 ], brw_imm_uw( 4 ) );
+ brw_pop_insn_state( p );
+
+ brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t );
+ brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t );
+ brw_ADD( p, t, param[ 2 ], brw_imm_f( -1.0 ) );
+ brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param[ 1 ] );
+ brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param[ 1 ] );
+
+ brw_ADD( p, x0y1, x0y1, tmp[ 5 ] );
+ brw_ADD( p, x1y1, x1y1, tmp[ 7 ] );
+ brw_ADD( p, x0y0, x0y0, tmp[ 4 ] );
+ brw_ADD( p, x1y0, x1y0, tmp[ 6 ] );
+
+ /* z component */
+ brw_MOV( p, tmp[ 4 ], low_words( tmp[ 2 ] ) );
+ brw_MOV( p, tmp[ 5 ], low_words( tmp[ 3 ] ) );
+ brw_MOV( p, tmp[ 6 ], high_words( tmp[ 2 ] ) );
+ brw_MOV( p, tmp[ 7 ], high_words( tmp[ 3 ] ) );
+
+ brw_push_insn_state( p );
+ brw_set_mask_control( p, BRW_MASK_DISABLE );
+ brw_SHL( p, wtmp[ 2 ], wtmp[ 2 ], brw_imm_uw( 4 ) );
+ brw_SHL( p, wtmp[ 3 ], wtmp[ 3 ], brw_imm_uw( 4 ) );
+ brw_pop_insn_state( p );
+
+ brw_MUL( p, tmp[ 4 ], tmp[ 4 ], t );
+ brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t );
+ brw_MUL( p, tmp[ 6 ], tmp[ 6 ], t );
+ brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t );
+ /* prepare t for the w component (used below): w the first time through
+ the loop; w - 1 the second time) */
+ brw_set_predicate_control( p, BRW_PREDICATE_NORMAL );
+ brw_ADD( p, t, param[ 3 ], brw_imm_f( -1.0 ) );
+ p->current->header.predicate_inverse = 1;
+ brw_MOV( p, t, param[ 3 ] );
+ p->current->header.predicate_inverse = 0;
+ brw_set_predicate_control( p, BRW_PREDICATE_NONE );
+
+ brw_ADD( p, x0y0, x0y0, tmp[ 4 ] );
+ brw_ADD( p, x0y1, x0y1, tmp[ 5 ] );
+ brw_ADD( p, x1y0, x1y0, tmp[ 6 ] );
+ brw_ADD( p, x1y1, x1y1, tmp[ 7 ] );
+
+ /* w component */
+ brw_MOV( p, tmp[ 4 ], low_words( tmp[ 2 ] ) );
+ brw_MOV( p, tmp[ 5 ], low_words( tmp[ 3 ] ) );
+ brw_MOV( p, tmp[ 6 ], high_words( tmp[ 2 ] ) );
+ brw_MOV( p, tmp[ 7 ], high_words( tmp[ 3 ] ) );
+
+ brw_MUL( p, tmp[ 4 ], tmp[ 4 ], t );
+ brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t );
+ brw_MUL( p, tmp[ 6 ], tmp[ 6 ], t );
+ brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t );
+
+ brw_ADD( p, x0y0, x0y0, tmp[ 4 ] );
+ brw_ADD( p, x0y1, x0y1, tmp[ 5 ] );
+ brw_ADD( p, x1y0, x1y0, tmp[ 6 ] );
+ brw_ADD( p, x1y1, x1y1, tmp[ 7 ] );
+
+ /* Interpolate in the y dimension: */
+ brw_ADD( p, x0y1, x0y1, negate( x0y0 ) );
+ brw_ADD( p, x1y1, x1y1, negate( x1y0 ) );
+ brw_MUL( p, x0y1, x0y1, interp[ 1 ] );
+ brw_MUL( p, x1y1, x1y1, interp[ 1 ] );
+ brw_ADD( p, x0y0, x0y0, x0y1 );
+ brw_ADD( p, x1y0, x1y0, x1y1 );
+
+ /* And now in x. The rear face is in tmp[ 0 ] (see above), so this
+ time put the front face in tmp[ 1 ] and we're nearly there... */
+ brw_ADD( p, x1y0, x1y0, negate( x0y0 ) );
+ brw_MUL( p, x1y0, x1y0, interp[ 0 ] );
+ brw_ADD( p, tmp[ 1 ], x0y0, x1y0 );
+
+ /* Another interpolation, in the z dimension: */
+ brw_ADD( p, tmp[ 1 ], tmp[ 1 ], negate( tmp[ 0 ] ) );
+ brw_MUL( p, tmp[ 1 ], tmp[ 1 ], interp[ 2 ] );
+ brw_ADD( p, tmp[ 0 ], tmp[ 0 ], tmp[ 1 ] );
+
+ /* Exit the loop if we've computed both cubes... */
+ origin = p->nr_insn;
+ brw_push_insn_state( p );
+ brw_set_predicate_control( p, BRW_PREDICATE_NORMAL );
+ brw_set_mask_control( p, BRW_MASK_DISABLE );
+ brw_ADD( p, brw_ip_reg(), brw_ip_reg(), brw_imm_d( 0 ) );
+ brw_pop_insn_state( p );
+
+ /* Save the result for the w=0 case, and increment the w coordinate: */
+ brw_MOV( p, w0, tmp[ 0 ] );
+ brw_ADD( p, high_words( floors[ 1 ] ), high_words( floors[ 1 ] ),
+ brw_imm_uw( 1 ) );
+
+ /* Loop around for the other cube. Explicitly set the flag register
+ (unfortunately we must spend an extra instruction to do this: we
+ can't rely on a side effect of the previous MOV or ADD because
+ conditional modifiers which are normally true might be false in
+ exceptional circumstances, e.g. given a NaN input; the add to
+ brw_ip_reg() is not suitable because the IP is not an 8-vector). */
+ brw_push_insn_state( p );
+ brw_set_mask_control( p, BRW_MASK_DISABLE );
+ brw_MOV( p, brw_flag_reg(), brw_imm_uw( 0xFF ) );
+ brw_ADD( p, brw_ip_reg(), brw_ip_reg(),
+ brw_imm_d( ( loop - p->nr_insn ) << 4 ) );
+ brw_pop_insn_state( p );
+
+ /* Patch the previous conditional branch now that we know the
+ destination address. */
+ brw_set_src1( p->store + origin,
+ brw_imm_d( ( p->nr_insn - origin ) << 4 ) );
+
+ /* The very last interpolation. */
+ brw_ADD( p, tmp[ 0 ], tmp[ 0 ], negate( w0 ) );
+ brw_MUL( p, tmp[ 0 ], tmp[ 0 ], interp[ 3 ] );
+ brw_ADD( p, tmp[ 0 ], tmp[ 0 ], w0 );
+
+ /* scale by pow( 2, -15 ), as described above */
+ brw_MUL( p, param[ 0 ], tmp[ 0 ], brw_imm_f( 0.000030517578125 ) );
+
+ release_tmps( c, mark );
+}
+
+static void emit_noise4( struct brw_wm_compile *c,
+ struct prog_instruction *inst )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg src0, src1, src2, src3, param0, param1, param2, param3, dst;
+ GLuint mask = inst->DstReg.WriteMask;
+ int i;
+ int mark = mark_tmps( c );
+
+ assert( mark == 0 );
+
+ src0 = get_src_reg( c, inst->SrcReg, 0, 1 );
+ src1 = get_src_reg( c, inst->SrcReg, 1, 1 );
+ src2 = get_src_reg( c, inst->SrcReg, 2, 1 );
+ src3 = get_src_reg( c, inst->SrcReg, 3, 1 );
+
+ param0 = alloc_tmp( c );
+ param1 = alloc_tmp( c );
+ param2 = alloc_tmp( c );
+ param3 = alloc_tmp( c );
+
+ brw_MOV( p, param0, src0 );
+ brw_MOV( p, param1, src1 );
+ brw_MOV( p, param2, src2 );
+ brw_MOV( p, param3, src3 );
+
+ invoke_subroutine( c, SUB_NOISE4, noise4_sub );
+
+ /* Fill in the result: */
+ brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE );
+ for (i = 0 ; i < 4; i++) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i, 1);
+ brw_MOV( p, dst, param0 );
+ }
+ }
+ if( inst->SaturateMode == SATURATE_ZERO_ONE )
+ brw_set_saturate( p, 0 );
+
+ release_tmps( c, mark );
+}
+
static void emit_wpos_xy(struct brw_wm_compile *c,
struct prog_instruction *inst)
{
@@ -1201,8 +2338,8 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
case OPCODE_LRP:
emit_lrp(c, inst);
break;
- case OPCODE_INT:
- emit_int(c, inst);
+ case OPCODE_TRUNC:
+ emit_trunc(c, inst);
break;
case OPCODE_MOV:
emit_mov(c, inst);
@@ -1276,6 +2413,18 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
case OPCODE_MAD:
emit_mad(c, inst);
break;
+ case OPCODE_NOISE1:
+ emit_noise1(c, inst);
+ break;
+ case OPCODE_NOISE2:
+ emit_noise2(c, inst);
+ break;
+ case OPCODE_NOISE3:
+ emit_noise3(c, inst);
+ break;
+ case OPCODE_NOISE4:
+ emit_noise4(c, inst);
+ break;
case OPCODE_TEX:
emit_tex(c, inst);
break;
@@ -1368,7 +2517,6 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c)
{
brw_wm_pass_fp(c);
- c->tmp_index = 127;
brw_wm_emit_glsl(brw, c);
c->prog_data.total_grf = c->reg_index;
c->prog_data.total_scratch = 0;
diff --git a/i965/brw_wm_iz.c b/i965/brw_wm_iz.c
index ec2b976..bd60ac9 100644
--- a/i965/brw_wm_iz.c
+++ b/i965/brw_wm_iz.c
@@ -30,7 +30,7 @@
*/
-#include "mtypes.h"
+#include "main/mtypes.h"
#include "brw_wm.h"
@@ -52,70 +52,6 @@ const struct {
{
{ P, 0, 0, 0, 0 },
{ P, 0, 0, 0, 0 },
- { C, 0, 1, 0, 0 },
- { C, 0, 1, 0, 0 },
- { C, 1, 1, 0, 0 },
- { C, 1, 1, 0, 0 },
- { C, 0, 1, 0, 0 },
- { C, 0, 1, 0, 0 },
- { C, 1, 1, 1, 0 },
- { C, 1, 1, 1, 0 },
- { C, 0, 1, 1, 0 },
- { C, 0, 1, 1, 0 },
- { C, 1, 1, 1, 0 },
- { C, 1, 1, 1, 0 },
- { C, 0, 1, 1, 0 },
- { C, 0, 1, 1, 0 },
- { P, 0, 0, 0, 0 },
- { P, 0, 0, 0, 0 },
- { C, 0, 1, 0, 0 },
- { C, 0, 1, 0, 0 },
- { C, 1, 1, 0, 0 },
- { C, 1, 1, 0, 0 },
- { C, 0, 1, 0, 0 },
- { C, 0, 1, 0, 0 },
- { C, 1, 1, 1, 0 },
- { C, 1, 1, 1, 0 },
- { C, 0, 1, 1, 0 },
- { C, 0, 1, 1, 0 },
- { C, 1, 1, 1, 0 },
- { C, 1, 1, 1, 0 },
- { C, 0, 1, 1, 0 },
- { C, 0, 1, 1, 0 },
- { C, 0, 0, 0, 1 },
- { C, 0, 0, 0, 1 },
- { C, 0, 1, 0, 1 },
- { C, 0, 1, 0, 1 },
- { C, 1, 1, 0, 1 },
- { C, 1, 1, 0, 1 },
- { C, 0, 1, 0, 1 },
- { C, 0, 1, 0, 1 },
- { C, 1, 1, 1, 1 },
- { C, 1, 1, 1, 1 },
- { C, 0, 1, 1, 1 },
- { C, 0, 1, 1, 1 },
- { C, 1, 1, 1, 1 },
- { C, 1, 1, 1, 1 },
- { C, 0, 1, 1, 1 },
- { C, 0, 1, 1, 1 },
- { C, 0, 0, 0, 1 },
- { C, 0, 0, 0, 1 },
- { C, 0, 1, 0, 1 },
- { C, 0, 1, 0, 1 },
- { C, 1, 1, 0, 1 },
- { C, 1, 1, 0, 1 },
- { C, 0, 1, 0, 1 },
- { C, 0, 1, 0, 1 },
- { C, 1, 1, 1, 1 },
- { C, 1, 1, 1, 1 },
- { C, 0, 1, 1, 1 },
- { C, 0, 1, 1, 1 },
- { C, 1, 1, 1, 1 },
- { C, 1, 1, 1, 1 },
- { C, 0, 1, 1, 1 },
- { C, 0, 1, 1, 1 },
- { P, 0, 0, 0, 0 },
- { P, 0, 0, 0, 0 },
{ P, 0, 0, 0, 0 },
{ P, 0, 0, 0, 0 },
{ P, 0, 0, 0, 0 },
diff --git a/i965/brw_wm_sampler_state.c b/i965/brw_wm_sampler_state.c
index d40332e..8c9cb78 100644
--- a/i965/brw_wm_sampler_state.c
+++ b/i965/brw_wm_sampler_state.c
@@ -34,7 +34,7 @@
#include "brw_state.h"
#include "brw_defines.h"
-#include "macros.h"
+#include "main/macros.h"
@@ -229,6 +229,9 @@ brw_wm_sampler_populate_key(struct brw_context *brw,
struct wm_sampler_entry *entry = &key->sampler[unit];
struct gl_texture_unit *texUnit = &brw->attribs.Texture->Unit[unit];
struct gl_texture_object *texObj = texUnit->_Current;
+ struct intel_texture_object *intelObj = intel_texture_object(texObj);
+ struct gl_texture_image *firstImage =
+ texObj->Image[0][intelObj->firstLevel];
entry->wrap_r = texObj->WrapR;
entry->wrap_s = texObj->WrapS;
@@ -241,11 +244,25 @@ brw_wm_sampler_populate_key(struct brw_context *brw,
entry->minfilter = texObj->MinFilter;
entry->magfilter = texObj->MagFilter;
entry->comparemode = texObj->CompareMode;
- entry->comparefunc = texObj->CompareFunc;
+ entry->comparefunc = texObj->CompareFunc;
dri_bo_unreference(brw->wm.sdc_bo[unit]);
- brw->wm.sdc_bo[unit] = upload_default_color(brw, texObj->BorderColor);
-
+ if (firstImage->_BaseFormat == GL_DEPTH_COMPONENT) {
+ float bordercolor[4] = {
+ texObj->BorderColor[0],
+ texObj->BorderColor[0],
+ texObj->BorderColor[0],
+ texObj->BorderColor[0]
+ };
+ /* GL specs that border color for depth textures is taken from the
+ * R channel, while the hardware uses A. Spam R into all the
+ * channels for safety.
+ */
+ brw->wm.sdc_bo[unit] = upload_default_color(brw, bordercolor);
+ } else {
+ brw->wm.sdc_bo[unit] = upload_default_color(brw,
+ texObj->BorderColor);
+ }
key->sampler_count = unit + 1;
}
}
@@ -255,11 +272,10 @@ brw_wm_sampler_populate_key(struct brw_context *brw,
* complicates various things. However, this is still too confusing -
* FIXME: simplify all the different new texture state flags.
*/
-static int upload_wm_samplers( struct brw_context *brw )
+static void upload_wm_samplers( struct brw_context *brw )
{
struct wm_sampler_key key;
int i;
- int ret = 0;
brw_wm_sampler_populate_key(brw, &key);
@@ -271,7 +287,7 @@ static int upload_wm_samplers( struct brw_context *brw )
dri_bo_unreference(brw->wm.sampler_bo);
brw->wm.sampler_bo = NULL;
if (brw->wm.sampler_count == 0)
- return 0;
+ return;
brw->wm.sampler_bo = brw_search_cache(&brw->cache, BRW_SAMPLER,
&key, sizeof(key),
@@ -304,19 +320,14 @@ static int upload_wm_samplers( struct brw_context *brw )
if (!brw->attribs.Texture->Unit[i]._ReallyEnabled)
continue;
- ret |= dri_bufmgr_check_aperture_space(brw->wm.sdc_bo[i]);
- dri_emit_reloc(brw->wm.sampler_bo,
- DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
- 0,
- i * sizeof(struct brw_sampler_state) +
- offsetof(struct brw_sampler_state, ss2),
- brw->wm.sdc_bo[i]);
+ dri_bo_emit_reloc(brw->wm.sampler_bo,
+ I915_GEM_DOMAIN_SAMPLER, 0,
+ 0,
+ i * sizeof(struct brw_sampler_state) +
+ offsetof(struct brw_sampler_state, ss2),
+ brw->wm.sdc_bo[i]);
}
}
-
- ret |= dri_bufmgr_check_aperture_space(brw->wm.sampler_bo);
- return ret;
-
}
const struct brw_tracked_state brw_wm_samplers = {
diff --git a/i965/brw_wm_state.c b/i965/brw_wm_state.c
index f4da0f2..5302405 100644
--- a/i965/brw_wm_state.c
+++ b/i965/brw_wm_state.c
@@ -34,7 +34,6 @@
#include "brw_context.h"
#include "brw_state.h"
#include "brw_defines.h"
-#include "dri_bufmgr.h"
#include "brw_wm.h"
/***********************************************************************
@@ -68,8 +67,13 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key)
if (INTEL_DEBUG & DEBUG_SINGLE_THREAD)
key->max_threads = 1;
- else
- key->max_threads = 32;
+ else {
+ /* WM maximum threads is number of EUs times number of threads per EU. */
+ if (BRW_IS_G4X(brw))
+ key->max_threads = 10 * 5;
+ else
+ key->max_threads = 8 * 4;
+ }
/* CACHE_NEW_WM_PROG */
key->total_grf = brw->wm.prog_data->total_grf;
@@ -84,7 +88,7 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key)
/* BRW_NEW_CURBE_OFFSETS */
key->curbe_offset = brw->curbe.wm_start;
- /* CACHE_NEW_SURFACE */
+ /* BRW_NEW_NR_SURFACEs */
key->nr_surfaces = brw->wm.nr_surfaces;
/* CACHE_NEW_SAMPLER */
@@ -199,40 +203,39 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
NULL, NULL);
/* Emit WM program relocation */
- dri_emit_reloc(bo,
- DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
- wm.thread0.grf_reg_count << 1,
- offsetof(struct brw_wm_unit_state, thread0),
- brw->wm.prog_bo);
+ dri_bo_emit_reloc(bo,
+ I915_GEM_DOMAIN_INSTRUCTION, 0,
+ wm.thread0.grf_reg_count << 1,
+ offsetof(struct brw_wm_unit_state, thread0),
+ brw->wm.prog_bo);
/* Emit scratch space relocation */
if (key->total_scratch != 0) {
- dri_emit_reloc(bo,
- DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE,
- wm.thread2.per_thread_scratch_space,
- offsetof(struct brw_wm_unit_state, thread2),
- brw->wm.scratch_buffer);
+ dri_bo_emit_reloc(bo,
+ 0, 0,
+ wm.thread2.per_thread_scratch_space,
+ offsetof(struct brw_wm_unit_state, thread2),
+ brw->wm.scratch_buffer);
}
/* Emit sampler state relocation */
if (key->sampler_count != 0) {
- dri_emit_reloc(bo,
- DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
- wm.wm4.stats_enable | (wm.wm4.sampler_count << 2),
- offsetof(struct brw_wm_unit_state, wm4),
- brw->wm.sampler_bo);
+ dri_bo_emit_reloc(bo,
+ I915_GEM_DOMAIN_INSTRUCTION, 0,
+ wm.wm4.stats_enable | (wm.wm4.sampler_count << 2),
+ offsetof(struct brw_wm_unit_state, wm4),
+ brw->wm.sampler_bo);
}
return bo;
}
-static int upload_wm_unit( struct brw_context *brw )
+static void upload_wm_unit( struct brw_context *brw )
{
struct intel_context *intel = &brw->intel;
struct brw_wm_unit_key key;
dri_bo *reloc_bufs[3];
- int ret = 0, i;
wm_unit_populate_key(brw, &key);
/* Allocate the necessary scratch space if we haven't already. Don't
@@ -251,7 +254,7 @@ static int upload_wm_unit( struct brw_context *brw )
brw->wm.scratch_buffer = dri_bo_alloc(intel->bufmgr,
"wm scratch",
total,
- 4096, DRM_BO_FLAG_MEM_TT);
+ 4096);
}
}
@@ -267,12 +270,6 @@ static int upload_wm_unit( struct brw_context *brw )
if (brw->wm.state_bo == NULL) {
brw->wm.state_bo = wm_unit_create_from_key(brw, &key, reloc_bufs);
}
-
- for (i = 0; i < 3; i++)
- if (reloc_bufs[i])
- ret |= dri_bufmgr_check_aperture_space(reloc_bufs[i]);
- ret |= dri_bufmgr_check_aperture_space(brw->wm.state_bo);
- return ret;
}
const struct brw_tracked_state brw_wm_unit = {
@@ -284,10 +281,9 @@ const struct brw_tracked_state brw_wm_unit = {
.brw = (BRW_NEW_FRAGMENT_PROGRAM |
BRW_NEW_CURBE_OFFSETS |
- BRW_NEW_LOCK),
+ BRW_NEW_NR_SURFACES),
- .cache = (CACHE_NEW_SURFACE |
- CACHE_NEW_WM_PROG |
+ .cache = (CACHE_NEW_WM_PROG |
CACHE_NEW_SAMPLER)
},
.prepare = upload_wm_unit,
diff --git a/i965/brw_wm_surface_state.c b/i965/brw_wm_surface_state.c
index 37c6b52..06e71e6 100644
--- a/i965/brw_wm_surface_state.c
+++ b/i965/brw_wm_surface_state.c
@@ -30,9 +30,9 @@
*/
-#include "mtypes.h"
-#include "texformat.h"
-#include "texstore.h"
+#include "main/mtypes.h"
+#include "main/texformat.h"
+#include "main/texstore.h"
#include "intel_mipmap_tree.h"
#include "intel_batchbuffer.h"
@@ -154,10 +154,29 @@ struct brw_wm_surface_key {
GLint first_level, last_level;
GLint width, height, depth;
GLint pitch, cpp;
- GLboolean tiled;
+ uint32_t tiling;
GLuint offset;
};
+static void
+brw_set_surface_tiling(struct brw_surface_state *surf, uint32_t tiling)
+{
+ switch (tiling) {
+ case I915_TILING_NONE:
+ surf->ss3.tiled_surface = 0;
+ surf->ss3.tile_walk = 0;
+ break;
+ case I915_TILING_X:
+ surf->ss3.tiled_surface = 1;
+ surf->ss3.tile_walk = BRW_TILEWALK_XMAJOR;
+ break;
+ case I915_TILING_Y:
+ surf->ss3.tiled_surface = 1;
+ surf->ss3.tile_walk = BRW_TILEWALK_YMAJOR;
+ break;
+ }
+}
+
static dri_bo *
brw_create_texture_surface( struct brw_context *brw,
struct brw_wm_surface_key *key )
@@ -173,28 +192,32 @@ brw_create_texture_surface( struct brw_context *brw,
if (key->bo)
surf.ss0.surface_format = translate_tex_format(key->format, key->depthmode);
else {
- switch(key->depth) {
- case 32: surf.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; break;
- default:
- case 24: surf.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8X8_UNORM; break;
- case 16: surf.ss0.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM; break;
- }
+ switch (key->depth) {
+ case 32:
+ surf.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
+ break;
+ default:
+ case 24:
+ surf.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8X8_UNORM;
+ break;
+ case 16:
+ surf.ss0.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM;
+ break;
+ }
}
/* This is ok for all textures with channel width 8bit or less:
*/
/* surf.ss0.data_return_format = BRW_SURFACERETURNFORMAT_S1; */
if (key->bo)
- surf.ss1.base_addr = key->bo->offset; /* reloc */
+ surf.ss1.base_addr = key->bo->offset; /* reloc */
else
- surf.ss1.base_addr = key->offset;
+ surf.ss1.base_addr = key->offset;
surf.ss2.mip_count = key->last_level - key->first_level;
surf.ss2.width = key->width - 1;
surf.ss2.height = key->height - 1;
-
- surf.ss3.tile_walk = BRW_TILEWALK_XMAJOR;
- surf.ss3.tiled_surface = key->tiled;
+ brw_set_surface_tiling(&surf, key->tiling);
surf.ss3.pitch = (key->pitch * key->cpp) - 1;
surf.ss3.depth = key->depth - 1;
@@ -214,18 +237,19 @@ brw_create_texture_surface( struct brw_context *brw,
&key->bo, key->bo ? 1 : 0,
&surf, sizeof(surf),
NULL, NULL);
+
if (key->bo) {
/* Emit relocation to surface contents */
- dri_emit_reloc(bo,
- DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
- 0,
- offsetof(struct brw_surface_state, ss1),
- key->bo);
+ dri_bo_emit_reloc(bo,
+ I915_GEM_DOMAIN_SAMPLER, 0,
+ 0,
+ offsetof(struct brw_surface_state, ss1),
+ key->bo);
}
return bo;
}
-static int
+static void
brw_update_texture_surface( GLcontext *ctx, GLuint unit )
{
struct brw_context *brw = brw_context(ctx);
@@ -233,7 +257,6 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit )
struct intel_texture_object *intelObj = intel_texture_object(tObj);
struct gl_texture_image *firstImage = tObj->Image[0][intelObj->firstLevel];
struct brw_wm_surface_key key;
- int ret = 0;
memset(&key, 0, sizeof(key));
@@ -248,7 +271,6 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit )
key.depth = firstImage->Depth;
key.bo = intelObj->mt->region->buffer;
key.offset = 0;
- ret |= dri_bufmgr_check_aperture_space(key.bo);
}
key.target = tObj->Target;
@@ -258,7 +280,7 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit )
key.width = firstImage->Width;
key.height = firstImage->Height;
key.cpp = intelObj->mt->cpp;
- key.tiled = intelObj->mt->region->tiled;
+ key.tiling = intelObj->mt->region->tiling;
dri_bo_unreference(brw->wm.surf_bo[unit + MAX_DRAW_BUFFERS]);
brw->wm.surf_bo[unit + MAX_DRAW_BUFFERS] = brw_search_cache(&brw->cache, BRW_SS_SURFACE,
@@ -268,9 +290,6 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit )
if (brw->wm.surf_bo[unit + MAX_DRAW_BUFFERS] == NULL) {
brw->wm.surf_bo[unit + MAX_DRAW_BUFFERS] = brw_create_texture_surface(brw, &key);
}
-
- ret |= dri_bufmgr_check_aperture_space(brw->wm.surf_bo[unit + MAX_DRAW_BUFFERS]);
- return ret;
}
/**
@@ -278,18 +297,18 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit )
* While it is only used for the front/back buffer currently, it should be
* usable for further buffers when doing ARB_draw_buffer support.
*/
-static int
+static void
brw_update_region_surface(struct brw_context *brw, struct intel_region *region,
unsigned int unit, GLboolean cached)
{
dri_bo *region_bo = NULL;
- int ret = 0;
struct {
unsigned int surface_type;
unsigned int surface_format;
unsigned int width, height, cpp;
GLubyte color_mask[4];
- GLboolean tiled, color_blend;
+ GLboolean color_blend;
+ uint32_t tiling;
} key;
memset(&key, 0, sizeof(key));
@@ -302,16 +321,14 @@ brw_update_region_surface(struct brw_context *brw, struct intel_region *region,
key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
else
key.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM;
- key.tiled = region->tiled;
+ key.tiling = region->tiling;
key.width = region->pitch; /* XXX: not really! */
key.height = region->height;
key.cpp = region->cpp;
-
- ret |= dri_bufmgr_check_aperture_space(region->buffer);
} else {
key.surface_type = BRW_SURFACE_NULL;
key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
- key.tiled = 0;
+ key.tiling = 0;
key.width = 1;
key.height = 1;
key.cpp = 4;
@@ -341,8 +358,7 @@ brw_update_region_surface(struct brw_context *brw, struct intel_region *region,
surf.ss2.width = key.width - 1;
surf.ss2.height = key.height - 1;
- surf.ss3.tile_walk = BRW_TILEWALK_XMAJOR;
- surf.ss3.tiled_surface = key.tiled;
+ brw_set_surface_tiling(&surf, key.tiling);
surf.ss3.pitch = (key.width * key.cpp) - 1;
/* _NEW_COLOR */
@@ -359,19 +375,19 @@ brw_update_region_surface(struct brw_context *brw, struct intel_region *region,
&surf, sizeof(surf),
NULL, NULL);
if (region_bo != NULL) {
- dri_emit_reloc(brw->wm.surf_bo[unit],
- DRM_BO_FLAG_MEM_TT |
- DRM_BO_FLAG_READ |
- DRM_BO_FLAG_WRITE,
- 0,
- offsetof(struct brw_surface_state, ss1),
- region_bo);
+ /* We might sample from it, and we might render to it, so flag
+ * them both. We might be able to figure out from other state
+ * a more restrictive relocation to emit.
+ */
+ dri_bo_emit_reloc(brw->wm.surf_bo[unit],
+ I915_GEM_DOMAIN_RENDER |
+ I915_GEM_DOMAIN_SAMPLER,
+ I915_GEM_DOMAIN_RENDER,
+ 0,
+ offsetof(struct brw_surface_state, ss1),
+ region_bo);
}
}
-
- ret |= dri_bufmgr_check_aperture_space(brw->wm.surf_bo[unit]);
-
- return ret;
}
@@ -409,13 +425,11 @@ brw_wm_get_binding_table(struct brw_context *brw)
/* Emit binding table relocations to surface state */
for (i = 0; i < BRW_WM_MAX_SURF; i++) {
if (brw->wm.surf_bo[i] != NULL) {
- dri_emit_reloc(bind_bo,
- DRM_BO_FLAG_MEM_TT |
- DRM_BO_FLAG_READ |
- DRM_BO_FLAG_WRITE,
- 0,
- i * sizeof(GLuint),
- brw->wm.surf_bo[i]);
+ dri_bo_emit_reloc(bind_bo,
+ I915_GEM_DOMAIN_INSTRUCTION, 0,
+ 0,
+ i * sizeof(GLuint),
+ brw->wm.surf_bo[i]);
}
}
@@ -425,25 +439,23 @@ brw_wm_get_binding_table(struct brw_context *brw)
return bind_bo;
}
-static int prepare_wm_surfaces(struct brw_context *brw )
+static void prepare_wm_surfaces(struct brw_context *brw )
{
GLcontext *ctx = &brw->intel.ctx;
struct intel_context *intel = &brw->intel;
- GLuint i, ret;
+ GLuint i;
+ int old_nr_surfaces;
if (brw->state.nr_draw_regions > 1) {
for (i = 0; i < brw->state.nr_draw_regions; i++) {
- ret = brw_update_region_surface(brw, brw->state.draw_regions[i], i,
- GL_FALSE);
- if (ret)
- return ret;
+ brw_update_region_surface(brw, brw->state.draw_regions[i], i,
+ GL_FALSE);
}
}else {
- ret = brw_update_region_surface(brw, brw->state.draw_regions[0], 0, GL_TRUE);
- if (ret)
- return ret;
+ brw_update_region_surface(brw, brw->state.draw_regions[0], 0, GL_TRUE);
}
+ old_nr_surfaces = brw->wm.nr_surfaces;
brw->wm.nr_surfaces = MAX_DRAW_BUFFERS;
for (i = 0; i < BRW_MAX_TEX_UNIT; i++) {
@@ -457,11 +469,8 @@ static int prepare_wm_surfaces(struct brw_context *brw )
dri_bo_reference(brw->wm.surf_bo[i+MAX_DRAW_BUFFERS]);
brw->wm.nr_surfaces = i + MAX_DRAW_BUFFERS + 1;
} else {
- ret = brw_update_texture_surface(ctx, i);
+ brw_update_texture_surface(ctx, i);
brw->wm.nr_surfaces = i + MAX_DRAW_BUFFERS + 1;
-
- if (ret)
- return ret;
}
} else {
dri_bo_unreference(brw->wm.surf_bo[i+MAX_DRAW_BUFFERS]);
@@ -473,7 +482,8 @@ static int prepare_wm_surfaces(struct brw_context *brw )
dri_bo_unreference(brw->wm.bind_bo);
brw->wm.bind_bo = brw_wm_get_binding_table(brw);
- return dri_bufmgr_check_aperture_space(brw->wm.bind_bo);
+ if (brw->wm.nr_surfaces != old_nr_surfaces)
+ brw->state.dirty.brw |= BRW_NEW_NR_SURFACES;
}
diff --git a/i965/intel_state.c b/i965/intel_state.c
index 0fba5a7..67ef5f7 100644
--- a/i965/intel_state.c
+++ b/i965/intel_state.c
@@ -26,12 +26,12 @@
**************************************************************************/
-#include "glheader.h"
-#include "context.h"
-#include "macros.h"
-#include "enums.h"
-#include "colormac.h"
-#include "dd.h"
+#include "main/glheader.h"
+#include "main/context.h"
+#include "main/macros.h"
+#include "main/enums.h"
+#include "main/colormac.h"
+#include "main/dd.h"
#include "intel_screen.h"
#include "intel_context.h"
diff --git a/shared/intel_batchbuffer.c b/shared/intel_batchbuffer.c
index a594fb6..9d99372 100644
--- a/shared/intel_batchbuffer.c
+++ b/shared/intel_batchbuffer.c
@@ -25,10 +25,12 @@
*
**************************************************************************/
+#include "intel_context.h"
#include "intel_batchbuffer.h"
-#include "intel_ioctl.h"
#include "intel_decode.h"
#include "intel_reg.h"
+#include "intel_bufmgr.h"
+#include "intel_buffers.h"
/* Relocations in kernel space:
* - pass dma buffer seperately
@@ -78,19 +80,21 @@ intel_batchbuffer_reset(struct intel_batchbuffer *batch)
batch->buf = NULL;
}
+ if (!batch->buffer && intel->ttm == GL_TRUE)
+ batch->buffer = malloc (intel->maxBatchSize);
+
batch->buf = dri_bo_alloc(intel->bufmgr, "batchbuffer",
- intel->maxBatchSize, 4096,
- DRM_BO_FLAG_MEM_LOCAL | DRM_BO_FLAG_CACHED | DRM_BO_FLAG_CACHED_MAPPED);
- dri_bo_map(batch->buf, GL_TRUE);
- batch->map = batch->buf->virtual;
+ intel->maxBatchSize, 4096);
+ if (batch->buffer)
+ batch->map = batch->buffer;
+ else {
+ dri_bo_map(batch->buf, GL_TRUE);
+ batch->map = batch->buf->virtual;
+ }
batch->size = intel->maxBatchSize;
batch->ptr = batch->map;
batch->dirty_state = ~0;
batch->cliprect_mode = IGNORE_CLIPRECTS;
-
- /* account batchbuffer in aperture */
- dri_bufmgr_check_aperture_space(batch->buf);
-
}
struct intel_batchbuffer *
@@ -99,7 +103,6 @@ intel_batchbuffer_alloc(struct intel_context *intel)
struct intel_batchbuffer *batch = calloc(sizeof(*batch), 1);
batch->intel = intel;
- batch->last_fence = NULL;
intel_batchbuffer_reset(batch);
return batch;
@@ -108,14 +111,13 @@ intel_batchbuffer_alloc(struct intel_context *intel)
void
intel_batchbuffer_free(struct intel_batchbuffer *batch)
{
- if (batch->last_fence) {
- dri_fence_wait(batch->last_fence);
- dri_fence_unreference(batch->last_fence);
- batch->last_fence = NULL;
- }
- if (batch->map) {
- dri_bo_unmap(batch->buf);
- batch->map = NULL;
+ if (batch->buffer)
+ free (batch->buffer);
+ else {
+ if (batch->map) {
+ dri_bo_unmap(batch->buf);
+ batch->map = NULL;
+ }
}
dri_bo_unreference(batch->buf);
batch->buf = NULL;
@@ -131,41 +133,34 @@ do_flush_locked(struct intel_batchbuffer *batch,
GLuint used, GLboolean allow_unlock)
{
struct intel_context *intel = batch->intel;
- void *start;
- GLuint count;
-
- dri_bo_unmap(batch->buf);
- start = dri_process_relocs(batch->buf, &count);
+ int ret = 0;
+ unsigned int num_cliprects = 0;
+ struct drm_clip_rect *cliprects = NULL;
+ int x_off = 0, y_off = 0;
+
+ if (batch->buffer)
+ dri_bo_subdata (batch->buf, 0, used, batch->buffer);
+ else
+ dri_bo_unmap(batch->buf);
batch->map = NULL;
batch->ptr = NULL;
- /* Throw away non-effective packets. Won't work once we have
- * hardware contexts which would preserve statechanges beyond a
- * single buffer.
- */
- if (!(intel->numClipRects == 0 &&
- batch->cliprect_mode == LOOP_CLIPRECTS)) {
- if (intel->ttm == GL_TRUE) {
- intel_exec_ioctl(batch->intel,
- used,
- batch->cliprect_mode != LOOP_CLIPRECTS,
- allow_unlock,
- start, count, &batch->last_fence);
- } else {
- intel_batch_ioctl(batch->intel,
- batch->buf->offset,
- used,
- batch->cliprect_mode != LOOP_CLIPRECTS,
- allow_unlock);
- }
+ if (batch->cliprect_mode == LOOP_CLIPRECTS) {
+ intel_get_cliprects(intel, &cliprects, &num_cliprects, &x_off, &y_off);
+ }
+ /* Dispatch the batchbuffer, if it has some effect (nonzero cliprects).
+ * Can't short-circuit like this once we have hardware contexts, but we
+ * should always be in DRI2 mode by then anyway.
+ */
+ if ((batch->cliprect_mode != LOOP_CLIPRECTS ||
+ num_cliprects != 0) && !intel->no_hw) {
+ dri_bo_exec(batch->buf, used, cliprects, num_cliprects,
+ (x_off & 0xffff) | (y_off << 16));
}
-
- dri_post_submit(batch->buf, &batch->last_fence);
- if (intel->numClipRects == 0 &&
- batch->cliprect_mode == LOOP_CLIPRECTS) {
+ if (batch->cliprect_mode == LOOP_CLIPRECTS && num_cliprects == 0) {
if (allow_unlock) {
/* If we are not doing any actual user-visible rendering,
* do a sched_yield to keep the app from pegging the cpu while
@@ -187,6 +182,10 @@ do_flush_locked(struct intel_batchbuffer *batch,
intel->vtbl.debug_batch(intel);
}
+ if (ret != 0) {
+ UNLOCK_HARDWARE(intel);
+ exit(1);
+ }
intel->vtbl.new_batch(intel);
}
@@ -198,27 +197,35 @@ _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file,
GLuint used = batch->ptr - batch->map;
GLboolean was_locked = intel->locked;
- if (used == 0)
+ if (used == 0) {
+ batch->cliprect_mode = IGNORE_CLIPRECTS;
return;
+ }
if (INTEL_DEBUG & DEBUG_BATCH)
fprintf(stderr, "%s:%d: Batchbuffer flush with %db used\n", file, line,
used);
- /* Add the MI_BATCH_BUFFER_END. Always add an MI_FLUSH - this is a
- * performance drain that we would like to avoid.
- */
- if (used & 4) {
- ((int *) batch->ptr)[0] = intel->vtbl.flush_cmd();
- ((int *) batch->ptr)[1] = 0;
- ((int *) batch->ptr)[2] = MI_BATCH_BUFFER_END;
- used += 12;
+
+ /* Emit a flush if the bufmgr doesn't do it for us. */
+ if (!intel->ttm) {
+ *(GLuint *) (batch->ptr) = intel->vtbl.flush_cmd();
+ batch->ptr += 4;
+ used = batch->ptr - batch->map;
}
- else {
- ((int *) batch->ptr)[0] = intel->vtbl.flush_cmd();
- ((int *) batch->ptr)[1] = MI_BATCH_BUFFER_END;
- used += 8;
+
+ /* Round batchbuffer usage to 2 DWORDs. */
+
+ if ((used & 4) == 0) {
+ *(GLuint *) (batch->ptr) = 0; /* noop */
+ batch->ptr += 4;
+ used = batch->ptr - batch->map;
}
+ /* Mark the end of the buffer. */
+ *(GLuint *) (batch->ptr) = MI_BATCH_BUFFER_END; /* noop */
+ batch->ptr += 4;
+ used = batch->ptr - batch->map;
+
/* Workaround for recursive batchbuffer flushing: If the window is
* moved, we can get into a case where we try to flush during a
* flush. What happens is that when we try to grab the lock for
@@ -230,6 +237,9 @@ _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file,
* avoid that in the first place. */
batch->ptr = batch->map;
+ if (intel->vtbl.finish_batch)
+ intel->vtbl.finish_batch(intel);
+
/* TODO: Just pass the relocation list and dma buffer up to the
* kernel.
*/
@@ -243,8 +253,8 @@ _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file,
if (INTEL_DEBUG & DEBUG_SYNC) {
fprintf(stderr, "waiting for idle\n");
- if (batch->last_fence != NULL)
- dri_fence_wait(batch->last_fence);
+ dri_bo_map(batch->buf, GL_TRUE);
+ dri_bo_unmap(batch->buf);
}
/* Reset the buffer:
@@ -252,25 +262,22 @@ _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file,
intel_batchbuffer_reset(batch);
}
-void
-intel_batchbuffer_finish(struct intel_batchbuffer *batch)
-{
- intel_batchbuffer_flush(batch);
- if (batch->last_fence != NULL)
- dri_fence_wait(batch->last_fence);
-}
-
/* This is the only way buffers get added to the validate list.
*/
GLboolean
intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch,
dri_bo *buffer,
- GLuint flags, GLuint delta)
+ uint32_t read_domains, uint32_t write_domain,
+ uint32_t delta)
{
int ret;
- ret = dri_emit_reloc(batch->buf, flags, delta, batch->ptr - batch->map, buffer);
+ if (batch->ptr - batch->map > batch->buf->size)
+ _mesa_printf ("bad relocation ptr %p map %p offset %d size %d\n",
+ batch->ptr, batch->map, batch->ptr - batch->map, batch->buf->size);
+ ret = dri_bo_emit_reloc(batch->buf, read_domains, write_domain,
+ delta, batch->ptr - batch->map, buffer);
/*
* Using the old buffer offset, write in what the right data would be, in case
diff --git a/shared/intel_batchbuffer.h b/shared/intel_batchbuffer.h
index 0da6020..51579df 100644
--- a/shared/intel_batchbuffer.h
+++ b/shared/intel_batchbuffer.h
@@ -1,11 +1,11 @@
#ifndef INTEL_BATCHBUFFER_H
#define INTEL_BATCHBUFFER_H
-#include "mtypes.h"
+#include "main/mtypes.h"
-#include "dri_bufmgr.h"
-
-struct intel_context;
+#include "intel_context.h"
+#include "intel_bufmgr.h"
+#include "intel_reg.h"
#define BATCH_SZ 16384
#define BATCH_RESERVED 16
@@ -19,6 +19,9 @@ enum cliprect_mode {
/**
* Batchbuffer contents require looping over per cliprect at batch submit
* time.
+ *
+ * This will be upgraded to NO_LOOP_CLIPRECTS when there's a single
+ * constant cliprect, as in DRI2 or FBO rendering.
*/
LOOP_CLIPRECTS,
/**
@@ -29,8 +32,10 @@ enum cliprect_mode {
/**
* Batchbuffer contents contain drawing that already handles cliprects, such
* as 2D drawing to front/back/depth that doesn't respect DRAWING_RECTANGLE.
+ *
* Equivalent behavior to NO_LOOP_CLIPRECTS, but may not persist in batch
- * outside of LOCK/UNLOCK.
+ * outside of LOCK/UNLOCK. This is upgraded to just NO_LOOP_CLIPRECTS when
+ * there's a constant cliprect, as in DRI2 or FBO rendering.
*/
REFERENCES_CLIPRECTS
};
@@ -40,7 +45,8 @@ struct intel_batchbuffer
struct intel_context *intel;
dri_bo *buf;
- dri_fence *last_fence;
+
+ GLubyte *buffer;
GLubyte *map;
GLubyte *ptr;
@@ -49,6 +55,12 @@ struct intel_batchbuffer
GLuint size;
+ /** Tracking of BEGIN_BATCH()/OUT_BATCH()/ADVANCE_BATCH() debugging */
+ struct {
+ GLuint total;
+ GLubyte *start_ptr;
+ } emit;
+
GLuint dirty_state;
};
@@ -58,8 +70,6 @@ struct intel_batchbuffer *intel_batchbuffer_alloc(struct intel_context
void intel_batchbuffer_free(struct intel_batchbuffer *batch);
-void intel_batchbuffer_finish(struct intel_batchbuffer *batch);
-
void _intel_batchbuffer_flush(struct intel_batchbuffer *batch,
const char *file, int line);
@@ -82,14 +92,16 @@ void intel_batchbuffer_release_space(struct intel_batchbuffer *batch,
GLboolean intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch,
dri_bo *buffer,
- GLuint flags, GLuint offset);
+ uint32_t read_domains,
+ uint32_t write_domain,
+ uint32_t offset);
/* Inline functions - might actually be better off with these
* non-inlined. Certainly better off switching all command packets to
* be passed as structs rather than dwords, but that's a little bit of
* work...
*/
-static INLINE GLuint
+static INLINE GLint
intel_batchbuffer_space(struct intel_batchbuffer *batch)
{
return (batch->size - BATCH_RESERVED) - (batch->ptr - batch->map);
@@ -114,6 +126,11 @@ intel_batchbuffer_require_space(struct intel_batchbuffer *batch,
if (intel_batchbuffer_space(batch) < sz)
intel_batchbuffer_flush(batch);
+ if ((cliprect_mode == LOOP_CLIPRECTS ||
+ cliprect_mode == REFERENCES_CLIPRECTS) &&
+ batch->intel->constant_cliprect)
+ cliprect_mode = NO_LOOP_CLIPRECTS;
+
if (cliprect_mode != IGNORE_CLIPRECTS) {
if (batch->cliprect_mode == IGNORE_CLIPRECTS) {
batch->cliprect_mode = cliprect_mode;
@@ -132,16 +149,36 @@ intel_batchbuffer_require_space(struct intel_batchbuffer *batch,
#define BEGIN_BATCH(n, cliprect_mode) do { \
intel_batchbuffer_require_space(intel->batch, (n)*4, cliprect_mode); \
+ assert(intel->batch->emit.start_ptr == NULL); \
+ intel->batch->emit.total = (n) * 4; \
+ intel->batch->emit.start_ptr = intel->batch->ptr; \
} while (0)
-#define OUT_BATCH(d) intel_batchbuffer_emit_dword(intel->batch, d)
+#define OUT_BATCH(d) intel_batchbuffer_emit_dword(intel->batch, d)
-#define OUT_RELOC(buf, cliprect_mode, delta) do { \
+#define OUT_RELOC(buf, read_domains, write_domain, delta) do { \
assert((delta) >= 0); \
- intel_batchbuffer_emit_reloc(intel->batch, buf, cliprect_mode, delta); \
+ intel_batchbuffer_emit_reloc(intel->batch, buf, \
+ read_domains, write_domain, delta); \
} while (0)
-#define ADVANCE_BATCH() do { } while(0)
+#define ADVANCE_BATCH() do { \
+ unsigned int _n = intel->batch->ptr - intel->batch->emit.start_ptr; \
+ assert(intel->batch->emit.start_ptr != NULL); \
+ if (_n != intel->batch->emit.total) { \
+ fprintf(stderr, "ADVANCE_BATCH: %d of %d dwords emitted\n", \
+ _n, intel->batch->emit.total); \
+ abort(); \
+ } \
+ intel->batch->emit.start_ptr = NULL; \
+} while(0)
+static INLINE void
+intel_batchbuffer_emit_mi_flush(struct intel_batchbuffer *batch)
+{
+ intel_batchbuffer_require_space(batch, 4, IGNORE_CLIPRECTS);
+ intel_batchbuffer_emit_dword(batch, MI_FLUSH);
+}
+
#endif
diff --git a/shared/intel_blit.c b/shared/intel_blit.c
index 25ac609..208f90c 100644
--- a/shared/intel_blit.c
+++ b/shared/intel_blit.c
@@ -29,17 +29,18 @@
#include <stdio.h>
#include <errno.h>
-#include "mtypes.h"
-#include "context.h"
-#include "enums.h"
+#include "main/mtypes.h"
+#include "main/context.h"
+#include "main/enums.h"
-#include "intel_batchbuffer.h"
#include "intel_blit.h"
#include "intel_buffers.h"
#include "intel_context.h"
#include "intel_fbo.h"
#include "intel_reg.h"
#include "intel_regions.h"
+#include "intel_batchbuffer.h"
+#include "intel_chipset.h"
#define FILE_DEBUG_FLAG DEBUG_BLIT
@@ -54,7 +55,6 @@ intelCopyBuffer(const __DRIdrawablePrivate * dPriv,
struct intel_context *intel;
const intelScreenPrivate *intelScreen;
- int ret;
DBG("%s\n", __FUNCTION__);
@@ -66,14 +66,6 @@ intelCopyBuffer(const __DRIdrawablePrivate * dPriv,
intelScreen = intel->intelScreen;
- if (intel->last_swap_fence) {
- dri_fence_wait(intel->last_swap_fence);
- dri_fence_unreference(intel->last_swap_fence);
- intel->last_swap_fence = NULL;
- }
- intel->last_swap_fence = intel->first_swap_fence;
- intel->first_swap_fence = NULL;
-
/* The LOCK_HARDWARE is required for the cliprects. Buffer offsets
* should work regardless.
*/
@@ -89,6 +81,7 @@ intelCopyBuffer(const __DRIdrawablePrivate * dPriv,
unsigned short src_x, src_y;
int BR13, CMD;
int i;
+ dri_bo *aper_array[3];
src = intel_get_rb_region(&intel_fb->Base, BUFFER_BACK_LEFT);
dst = intel_get_rb_region(&intel_fb->Base, BUFFER_FRONT_LEFT);
@@ -114,26 +107,28 @@ intelCopyBuffer(const __DRIdrawablePrivate * dPriv,
}
#ifndef I915
- if (src->tiled) {
+ if (src->tiling != I915_TILING_NONE) {
CMD |= XY_SRC_TILED;
src_pitch /= 4;
}
- if (dst->tiled) {
+ if (dst->tiling != I915_TILING_NONE) {
CMD |= XY_DST_TILED;
dst_pitch /= 4;
}
#endif
/* do space/cliprects check before going any further */
- intel_batchbuffer_require_space(intel->batch, 8 * 4, REFERENCES_CLIPRECTS);
+ intel_batchbuffer_require_space(intel->batch, 8 * 4,
+ REFERENCES_CLIPRECTS);
again:
- ret = dri_bufmgr_check_aperture_space(dst->buffer);
- ret |= dri_bufmgr_check_aperture_space(src->buffer);
-
- if (ret) {
+ aper_array[0] = intel->batch->buf;
+ aper_array[1] = dst->buffer;
+ aper_array[2] = src->buffer;
+
+ if (dri_bufmgr_check_aperture_space(aper_array, 3) != 0) {
intel_batchbuffer_flush(intel->batch);
goto again;
}
-
+
for (i = 0; i < nbox; i++, pbox++) {
drm_clip_rect_t box = *pbox;
@@ -157,19 +152,22 @@ intelCopyBuffer(const __DRIdrawablePrivate * dPriv,
OUT_BATCH((box.y1 << 16) | box.x1);
OUT_BATCH((box.y2 << 16) | box.x2);
- OUT_RELOC(dst->buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, 0);
+ OUT_RELOC(dst->buffer,
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+ 0);
OUT_BATCH((src_y << 16) | src_x);
OUT_BATCH(src_pitch);
- OUT_RELOC(src->buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, 0);
+ OUT_RELOC(src->buffer,
+ I915_GEM_DOMAIN_RENDER, 0,
+ 0);
ADVANCE_BATCH();
}
- if (intel->first_swap_fence)
- dri_fence_unreference(intel->first_swap_fence);
+ /* Flush the rendering and the batch so that the results all land on the
+ * screen in a timely fashion.
+ */
+ intel_batchbuffer_emit_mi_flush(intel->batch);
intel_batchbuffer_flush(intel->batch);
- intel->first_swap_fence = intel->batch->last_fence;
- if (intel->first_swap_fence)
- dri_fence_reference(intel->first_swap_fence);
}
UNLOCK_HARDWARE(intel);
@@ -184,7 +182,7 @@ intelEmitFillBlit(struct intel_context *intel,
GLshort dst_pitch,
dri_bo *dst_buffer,
GLuint dst_offset,
- GLboolean dst_tiled,
+ uint32_t dst_tiling,
GLshort x, GLshort y,
GLshort w, GLshort h,
GLuint color)
@@ -209,7 +207,7 @@ intelEmitFillBlit(struct intel_context *intel,
return;
}
#ifndef I915
- if (dst_tiled) {
+ if (dst_tiling != I915_TILING_NONE) {
CMD |= XY_DST_TILED;
dst_pitch /= 4;
}
@@ -226,7 +224,9 @@ intelEmitFillBlit(struct intel_context *intel,
OUT_BATCH(BR13 | dst_pitch);
OUT_BATCH((y << 16) | x);
OUT_BATCH(((y + h) << 16) | (x + w));
- OUT_RELOC(dst_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, dst_offset);
+ OUT_RELOC(dst_buffer,
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+ dst_offset);
OUT_BATCH(color);
ADVANCE_BATCH();
}
@@ -263,32 +263,63 @@ intelEmitCopyBlit(struct intel_context *intel,
GLshort src_pitch,
dri_bo *src_buffer,
GLuint src_offset,
- GLboolean src_tiled,
+ uint32_t src_tiling,
GLshort dst_pitch,
dri_bo *dst_buffer,
GLuint dst_offset,
- GLboolean dst_tiled,
+ uint32_t dst_tiling,
GLshort src_x, GLshort src_y,
GLshort dst_x, GLshort dst_y,
GLshort w, GLshort h,
GLenum logic_op)
{
- GLuint CMD, BR13;
+ GLuint CMD, BR13, pass = 0;
int dst_y2 = dst_y + h;
int dst_x2 = dst_x + w;
- int ret;
+ dri_bo *aper_array[3];
BATCH_LOCALS;
/* do space/cliprects check before going any further */
- intel_batchbuffer_require_space(intel->batch, 8 * 4, NO_LOOP_CLIPRECTS);
- again:
- ret = dri_bufmgr_check_aperture_space(dst_buffer);
- ret |= dri_bufmgr_check_aperture_space(src_buffer);
- if (ret) {
- intel_batchbuffer_flush(intel->batch);
- goto again;
+ do {
+ aper_array[0] = intel->batch->buf;
+ aper_array[1] = dst_buffer;
+ aper_array[2] = src_buffer;
+
+ if (dri_bufmgr_check_aperture_space(aper_array, 3) != 0) {
+ intel_batchbuffer_flush(intel->batch);
+ pass++;
+ } else
+ break;
+ } while (pass < 2);
+
+ if (pass >= 2) {
+ GLboolean locked = GL_FALSE;
+ if (!intel->locked) {
+ LOCK_HARDWARE(intel);
+ locked = GL_TRUE;
+ }
+
+ dri_bo_map(dst_buffer, GL_TRUE);
+ dri_bo_map(src_buffer, GL_FALSE);
+ _mesa_copy_rect((GLubyte *)dst_buffer->virtual + dst_offset,
+ cpp,
+ dst_pitch,
+ dst_x, dst_y,
+ w, h,
+ (GLubyte *)src_buffer->virtual + src_offset,
+ src_pitch,
+ src_x, src_y);
+
+ dri_bo_unmap(src_buffer);
+ dri_bo_unmap(dst_buffer);
+
+ if (locked)
+ UNLOCK_HARDWARE(intel);
+
+ return;
}
+ intel_batchbuffer_require_space(intel->batch, 8 * 4, NO_LOOP_CLIPRECTS);
DBG("%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n",
__FUNCTION__,
src_buffer, src_pitch, src_offset, src_x, src_y,
@@ -315,11 +346,11 @@ intelEmitCopyBlit(struct intel_context *intel,
}
#ifndef I915
- if (dst_tiled) {
+ if (dst_tiling != I915_TILING_NONE) {
CMD |= XY_DST_TILED;
dst_pitch /= 4;
}
- if (src_tiled) {
+ if (src_tiling != I915_TILING_NONE) {
CMD |= XY_SRC_TILED;
src_pitch /= 4;
}
@@ -329,47 +360,25 @@ intelEmitCopyBlit(struct intel_context *intel,
return;
}
- /* Initial y values don't seem to work with negative pitches. If
- * we adjust the offsets manually (below), it seems to work fine.
- *
- * On the other hand, if we always adjust, the hardware doesn't
- * know which blit directions to use, so overlapping copypixels get
- * the wrong result.
- */
- if (dst_pitch > 0 && src_pitch > 0) {
- assert(dst_x < dst_x2);
- assert(dst_y < dst_y2);
-
- BEGIN_BATCH(8, NO_LOOP_CLIPRECTS);
- OUT_BATCH(CMD);
- OUT_BATCH(BR13 | dst_pitch);
- OUT_BATCH((dst_y << 16) | dst_x);
- OUT_BATCH((dst_y2 << 16) | dst_x2);
- OUT_RELOC(dst_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE,
- dst_offset);
- OUT_BATCH((src_y << 16) | src_x);
- OUT_BATCH(src_pitch);
- OUT_RELOC(src_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
- src_offset);
- ADVANCE_BATCH();
- }
- else {
- assert(dst_x < dst_x2);
- assert(h > 0);
-
- BEGIN_BATCH(8, NO_LOOP_CLIPRECTS);
- OUT_BATCH(CMD);
- OUT_BATCH(BR13 | ((uint16_t)dst_pitch));
- OUT_BATCH((0 << 16) | dst_x);
- OUT_BATCH((h << 16) | dst_x2);
- OUT_RELOC(dst_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE,
- dst_offset + dst_y * dst_pitch);
- OUT_BATCH((0 << 16) | src_x);
- OUT_BATCH(src_pitch);
- OUT_RELOC(src_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
- src_offset + src_y * src_pitch);
- ADVANCE_BATCH();
- }
+ assert(dst_x < dst_x2);
+ assert(dst_y < dst_y2);
+
+ BEGIN_BATCH(8, NO_LOOP_CLIPRECTS);
+ OUT_BATCH(CMD);
+ OUT_BATCH(BR13 | (uint16_t)dst_pitch);
+ OUT_BATCH((dst_y << 16) | dst_x);
+ OUT_BATCH((dst_y2 << 16) | dst_x2);
+ OUT_RELOC(dst_buffer,
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+ dst_offset);
+ OUT_BATCH((src_y << 16) | src_x);
+ OUT_BATCH((uint16_t)src_pitch);
+ OUT_RELOC(src_buffer,
+ I915_GEM_DOMAIN_RENDER, 0,
+ src_offset);
+ ADVANCE_BATCH();
+
+ intel_batchbuffer_emit_mi_flush(intel->batch);
}
@@ -387,6 +396,9 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask)
struct gl_framebuffer *fb = ctx->DrawBuffer;
GLuint clear_depth;
GLbitfield skipBuffers = 0;
+ unsigned int num_cliprects;
+ struct drm_clip_rect *cliprects;
+ int x_off, y_off;
BATCH_LOCALS;
/*
@@ -411,7 +423,8 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask)
intelFlush(&intel->ctx);
LOCK_HARDWARE(intel);
- if (intel->numClipRects) {
+ intel_get_cliprects(intel, &cliprects, &num_cliprects, &x_off, &y_off);
+ if (num_cliprects) {
GLint cx, cy, cw, ch;
drm_clip_rect_t clear;
int i;
@@ -426,15 +439,15 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask)
/* clearing a window */
/* flip top to bottom */
- clear.x1 = cx + intel->drawX;
+ clear.x1 = cx + x_off;
clear.y1 = intel->driDrawable->y + intel->driDrawable->h - cy - ch;
clear.x2 = clear.x1 + cw;
clear.y2 = clear.y1 + ch;
}
else {
/* clearing FBO */
- assert(intel->numClipRects == 1);
- assert(intel->pClipRects == &intel->fboRect);
+ assert(num_cliprects == 1);
+ assert(cliprects == &intel->fboRect);
clear.x1 = cx;
clear.y1 = cy;
clear.x2 = clear.x1 + cw;
@@ -442,8 +455,8 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask)
/* no change to mask */
}
- for (i = 0; i < intel->numClipRects; i++) {
- const drm_clip_rect_t *box = &intel->pClipRects[i];
+ for (i = 0; i < num_cliprects; i++) {
+ const drm_clip_rect_t *box = &cliprects[i];
drm_clip_rect_t b;
GLuint buf;
GLuint clearMask = mask; /* use copy, since we modify it below */
@@ -513,7 +526,7 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask)
}
#ifndef I915
- if (irb_region->tiled) {
+ if (irb_region->tiling != I915_TILING_NONE) {
CMD |= XY_DST_TILED;
pitch /= 4;
}
@@ -531,7 +544,6 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask)
_mesa_debug(ctx, "hardware blit clear buf %d rb id %d\n",
buf, irb->Base.Name);
*/
- intel_wait_flips(intel);
assert(b.x1 < b.x2);
assert(b.y1 < b.y2);
@@ -541,7 +553,8 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask)
OUT_BATCH(BR13);
OUT_BATCH((b.y1 << 16) | b.x1);
OUT_BATCH((b.y2 << 16) | b.x2);
- OUT_RELOC(write_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE,
+ OUT_RELOC(write_buffer,
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
irb_region->draw_offset);
OUT_BATCH(clearVal);
ADVANCE_BATCH();
@@ -549,7 +562,7 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask)
}
}
}
- intel_batchbuffer_flush(intel->batch);
+ intel_batchbuffer_emit_mi_flush(intel->batch);
}
UNLOCK_HARDWARE(intel);
@@ -563,7 +576,7 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel,
GLshort dst_pitch,
dri_bo *dst_buffer,
GLuint dst_offset,
- GLboolean dst_tiled,
+ uint32_t dst_tiling,
GLshort x, GLshort y,
GLshort w, GLshort h,
GLenum logic_op)
@@ -586,14 +599,14 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel,
intel_batchbuffer_require_space( intel->batch,
(8 * 4) +
(3 * 4) +
- dwords,
- NO_LOOP_CLIPRECTS );
+ dwords * 4,
+ REFERENCES_CLIPRECTS );
opcode = XY_SETUP_BLT_CMD;
if (cpp == 4)
opcode |= XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
#ifndef I915
- if (dst_tiled) {
+ if (dst_tiling != I915_TILING_NONE) {
opcode |= XY_DST_TILED;
dst_pitch /= 4;
}
@@ -606,15 +619,17 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel,
br13 |= BR13_8888;
blit_cmd = XY_TEXT_IMMEDIATE_BLIT_CMD | XY_TEXT_BYTE_PACKED; /* packing? */
- if (dst_tiled)
+ if (dst_tiling != I915_TILING_NONE)
blit_cmd |= XY_DST_TILED;
- BEGIN_BATCH(8 + 3, NO_LOOP_CLIPRECTS);
+ BEGIN_BATCH(8 + 3, REFERENCES_CLIPRECTS);
OUT_BATCH(opcode);
OUT_BATCH(br13);
OUT_BATCH((0 << 16) | 0); /* clip x1, y1 */
OUT_BATCH((100 << 16) | 100); /* clip x2, y2 */
- OUT_RELOC(dst_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, dst_offset);
+ OUT_RELOC(dst_buffer,
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+ dst_offset);
OUT_BATCH(0); /* bg */
OUT_BATCH(fg_color); /* fg */
OUT_BATCH(0); /* pattern base addr */
@@ -627,5 +642,7 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel,
intel_batchbuffer_data( intel->batch,
src_bits,
dwords * 4,
- NO_LOOP_CLIPRECTS );
+ REFERENCES_CLIPRECTS );
+
+ intel_batchbuffer_emit_mi_flush(intel->batch);
}
diff --git a/shared/intel_blit.h b/shared/intel_blit.h
index fc0620c..52065b1 100644
--- a/shared/intel_blit.h
+++ b/shared/intel_blit.h
@@ -29,8 +29,6 @@
#define INTEL_BLIT_H
#include "intel_context.h"
-#include "intel_ioctl.h"
-#include "dri_bufmgr.h"
extern void intelCopyBuffer(const __DRIdrawablePrivate * dpriv,
const drm_clip_rect_t * rect);
@@ -42,11 +40,11 @@ extern void intelEmitCopyBlit(struct intel_context *intel,
GLshort src_pitch,
dri_bo *src_buffer,
GLuint src_offset,
- GLboolean src_tiled,
+ uint32_t src_tiling,
GLshort dst_pitch,
dri_bo *dst_buffer,
GLuint dst_offset,
- GLboolean dst_tiled,
+ uint32_t dst_tiling,
GLshort srcx, GLshort srcy,
GLshort dstx, GLshort dsty,
GLshort w, GLshort h,
@@ -57,7 +55,7 @@ extern void intelEmitFillBlit(struct intel_context *intel,
GLshort dst_pitch,
dri_bo *dst_buffer,
GLuint dst_offset,
- GLboolean dst_tiled,
+ uint32_t dst_tiling,
GLshort x, GLshort y,
GLshort w, GLshort h, GLuint color);
@@ -69,7 +67,7 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel,
GLshort dst_pitch,
dri_bo *dst_buffer,
GLuint dst_offset,
- GLboolean dst_tiled,
+ uint32_t dst_tiling,
GLshort x, GLshort y,
GLshort w, GLshort h,
GLenum logic_op);
diff --git a/shared/intel_buffer_objects.c b/shared/intel_buffer_objects.c
index 951b8cb..60d7bb3 100644
--- a/shared/intel_buffer_objects.c
+++ b/shared/intel_buffer_objects.c
@@ -26,14 +26,14 @@
**************************************************************************/
-#include "imports.h"
-#include "mtypes.h"
-#include "bufferobj.h"
+#include "main/imports.h"
+#include "main/mtypes.h"
+#include "main/bufferobj.h"
#include "intel_context.h"
#include "intel_buffer_objects.h"
+#include "intel_batchbuffer.h"
#include "intel_regions.h"
-#include "dri_bufmgr.h"
static GLboolean intel_bufferobj_unmap(GLcontext * ctx,
GLenum target,
@@ -45,8 +45,7 @@ intel_bufferobj_alloc_buffer(struct intel_context *intel,
struct intel_buffer_object *intel_obj)
{
intel_obj->buffer = dri_bo_alloc(intel->bufmgr, "bufferobj",
- intel_obj->Base.Size, 64,
- DRM_BO_FLAG_MEM_LOCAL | DRM_BO_FLAG_CACHED | DRM_BO_FLAG_CACHED_MAPPED);
+ intel_obj->Base.Size, 64);
}
/**
diff --git a/shared/intel_buffer_objects.h b/shared/intel_buffer_objects.h
index 7cecc32..bf6dbd5 100644
--- a/shared/intel_buffer_objects.h
+++ b/shared/intel_buffer_objects.h
@@ -28,7 +28,7 @@
#ifndef INTEL_BUFFEROBJ_H
#define INTEL_BUFFEROBJ_H
-#include "mtypes.h"
+#include "main/mtypes.h"
struct intel_context;
struct intel_region;
diff --git a/shared/intel_buffers.c b/shared/intel_buffers.c
index 75542a9..0fd2f16 100644
--- a/shared/intel_buffers.c
+++ b/shared/intel_buffers.c
@@ -35,30 +35,14 @@
#include "intel_regions.h"
#include "intel_batchbuffer.h"
#include "intel_reg.h"
-#include "context.h"
+#include "main/context.h"
+#include "main/framebuffer.h"
+#include "swrast/swrast.h"
#include "utils.h"
#include "drirenderbuffer.h"
-#include "framebuffer.h"
-#include "swrast/swrast.h"
#include "vblank.h"
#include "i915_drm.h"
-/* This block can be removed when libdrm >= 2.3.1 is required */
-
-#ifndef DRM_IOCTL_I915_FLIP
-
-#define DRM_VBLANK_FLIP 0x8000000
-
-typedef struct drm_i915_flip {
- int pipes;
-} drm_i915_flip_t;
-
-#undef DRM_IOCTL_I915_FLIP
-#define DRM_IOCTL_I915_FLIP DRM_IOW(DRM_COMMAND_BASE + DRM_I915_FLIP, \
- drm_i915_flip_t)
-
-#endif
-
#define FILE_DEBUG_FLAG DEBUG_BLIT
/**
@@ -123,150 +107,42 @@ intel_readbuf_region(struct intel_context *intel)
return NULL;
}
-
-
-/**
- * Update the following fields for rendering to a user-created FBO:
- * intel->numClipRects
- * intel->pClipRects
- * intel->drawX
- * intel->drawY
- */
-static void
-intelSetRenderbufferClipRects(struct intel_context *intel)
-{
- assert(intel->ctx.DrawBuffer->Width > 0);
- assert(intel->ctx.DrawBuffer->Height > 0);
- intel->fboRect.x1 = 0;
- intel->fboRect.y1 = 0;
- intel->fboRect.x2 = intel->ctx.DrawBuffer->Width;
- intel->fboRect.y2 = intel->ctx.DrawBuffer->Height;
- intel->numClipRects = 1;
- intel->pClipRects = &intel->fboRect;
- intel->drawX = 0;
- intel->drawY = 0;
-}
-
-
-/**
- * As above, but for rendering to front buffer of a window.
- * \sa intelSetRenderbufferClipRects
- */
-static void
-intelSetFrontClipRects(struct intel_context *intel)
-{
- __DRIdrawablePrivate *dPriv = intel->driDrawable;
-
- if (!dPriv)
- return;
-
- intel->numClipRects = dPriv->numClipRects;
- intel->pClipRects = dPriv->pClipRects;
- intel->drawX = dPriv->x;
- intel->drawY = dPriv->y;
-}
-
-
-/**
- * As above, but for rendering to back buffer of a window.
- */
-static void
-intelSetBackClipRects(struct intel_context *intel)
+void
+intel_get_cliprects(struct intel_context *intel,
+ struct drm_clip_rect **cliprects,
+ unsigned int *num_cliprects,
+ int *x_off, int *y_off)
{
__DRIdrawablePrivate *dPriv = intel->driDrawable;
- struct intel_framebuffer *intel_fb;
-
- if (!dPriv)
- return;
-
- intel_fb = dPriv->driverPrivate;
+ struct intel_framebuffer *intel_fb = dPriv->driverPrivate;
- if (intel_fb->pf_active || dPriv->numBackClipRects == 0) {
+ if (intel->constant_cliprect) {
+ /* FBO or DRI2 rendering, which can just use the fb's size. */
+ intel->fboRect.x1 = 0;
+ intel->fboRect.y1 = 0;
+ intel->fboRect.x2 = intel->ctx.DrawBuffer->Width;
+ intel->fboRect.y2 = intel->ctx.DrawBuffer->Height;
+
+ *cliprects = &intel->fboRect;
+ *num_cliprects = 1;
+ *x_off = 0;
+ *y_off = 0;
+ } else if (intel->front_cliprects || dPriv->numBackClipRects == 0) {
/* use the front clip rects */
- intel->numClipRects = dPriv->numClipRects;
- intel->pClipRects = dPriv->pClipRects;
- intel->drawX = dPriv->x;
- intel->drawY = dPriv->y;
+ *cliprects = dPriv->pClipRects;
+ *num_cliprects = dPriv->numClipRects;
+ *x_off = dPriv->x;
+ *y_off = dPriv->y;
}
else {
/* use the back clip rects */
- intel->numClipRects = dPriv->numBackClipRects;
- intel->pClipRects = dPriv->pBackClipRects;
- intel->drawX = dPriv->backX;
- intel->drawY = dPriv->backY;
+ *num_cliprects = dPriv->numBackClipRects;
+ *cliprects = dPriv->pBackClipRects;
+ *x_off = dPriv->backX;
+ *y_off = dPriv->backY;
}
}
-static void
-intelUpdatePageFlipping(struct intel_context *intel,
- GLint areaA, GLint areaB)
-{
- __DRIdrawablePrivate *dPriv = intel->driDrawable;
- struct intel_framebuffer *intel_fb = dPriv->driverPrivate;
- GLboolean pf_active;
- GLint pf_planes;
-
- /* Update page flipping info */
- pf_planes = 0;
-
- if (areaA > 0)
- pf_planes |= 1;
-
- if (areaB > 0)
- pf_planes |= 2;
-
- intel_fb->pf_current_page = (intel->sarea->pf_current_page >>
- (intel_fb->pf_planes & 0x2)) & 0x3;
-
- intel_fb->pf_num_pages = intel->intelScreen->third.handle ? 3 : 2;
-
- pf_active = pf_planes && (pf_planes & intel->sarea->pf_active) == pf_planes;
-
- if (INTEL_DEBUG & DEBUG_LOCK)
- if (pf_active != intel_fb->pf_active)
- _mesa_printf("%s - Page flipping %sactive\n", __progname,
- pf_active ? "" : "in");
-
- if (pf_active) {
- /* Sync pages between planes if flipping on both at the same time */
- if (pf_planes == 0x3 && pf_planes != intel_fb->pf_planes &&
- (intel->sarea->pf_current_page & 0x3) !=
- (((intel->sarea->pf_current_page) >> 2) & 0x3)) {
- drm_i915_flip_t flip;
-
- if (intel_fb->pf_current_page ==
- (intel->sarea->pf_current_page & 0x3)) {
- /* XXX: This is ugly, but emitting two flips 'in a row' can cause
- * lockups for unknown reasons.
- */
- intel->sarea->pf_current_page =
- intel->sarea->pf_current_page & 0x3;
- intel->sarea->pf_current_page |=
- ((intel_fb->pf_current_page + intel_fb->pf_num_pages - 1) %
- intel_fb->pf_num_pages) << 2;
-
- flip.pipes = 0x2;
- } else {
- intel->sarea->pf_current_page =
- intel->sarea->pf_current_page & (0x3 << 2);
- intel->sarea->pf_current_page |=
- (intel_fb->pf_current_page + intel_fb->pf_num_pages - 1) %
- intel_fb->pf_num_pages;
-
- flip.pipes = 0x1;
- }
-
- drmCommandWrite(intel->driFd, DRM_I915_FLIP, &flip, sizeof(flip));
- }
-
- intel_fb->pf_planes = pf_planes;
- }
-
- intel_fb->pf_active = pf_active;
- intel_flip_renderbuffers(intel_fb);
- intel_draw_buffer(&intel->ctx, intel->ctx.DrawBuffer);
-}
-
/**
* This will be called whenever the currently bound window is moved/resized.
* XXX: actually, it seems to NOT be called when the window is only moved (BP).
@@ -278,32 +154,9 @@ intelWindowMoved(struct intel_context *intel)
__DRIdrawablePrivate *dPriv = intel->driDrawable;
struct intel_framebuffer *intel_fb = dPriv->driverPrivate;
- if (!intel->ctx.DrawBuffer) {
- /* when would this happen? -BP */
- intelSetFrontClipRects(intel);
- }
- else if (intel->ctx.DrawBuffer->Name != 0) {
- /* drawing to user-created FBO - do nothing */
- /* Cliprects would be set from intelDrawBuffer() */
- }
- else {
- /* drawing to a window */
- switch (intel_fb->Base._ColorDrawBufferIndexes[0]) {
- case BUFFER_FRONT_LEFT:
- intelSetFrontClipRects(intel);
- break;
- case BUFFER_BACK_LEFT:
- intelSetBackClipRects(intel);
- break;
- default:
- intelSetFrontClipRects(intel);
- }
-
- }
-
if (!intel->intelScreen->driScrnPriv->dri2.enabled &&
intel->intelScreen->driScrnPriv->ddx_version.minor >= 7) {
- volatile struct drm_i915_sarea *sarea = intel->sarea;
+ volatile drm_i915_sarea_t *sarea = intel->sarea;
drm_clip_rect_t drw_rect = { .x1 = dPriv->x, .x2 = dPriv->x + dPriv->w,
.y1 = dPriv->y, .y2 = dPriv->y + dPriv->h };
drm_clip_rect_t planeA_rect = { .x1 = sarea->planeA_x, .y1 = sarea->planeA_y,
@@ -316,8 +169,6 @@ intelWindowMoved(struct intel_context *intel)
GLint areaB = driIntersectArea( drw_rect, planeB_rect );
GLuint flags = dPriv->vblFlags;
- intelUpdatePageFlipping(intel, areaA, areaB);
-
/* Update vblank info
*/
if (areaB > areaA || (areaA == areaB && areaB > 0)) {
@@ -342,7 +193,7 @@ intelWindowMoved(struct intel_context *intel)
vbl.request.type |= DRM_VBLANK_SECONDARY;
}
- for (i = 0; i < intel_fb->pf_num_pages; i++) {
+ for (i = 0; i < 2; i++) {
if (!intel_fb->color_rb[i] ||
(intel_fb->vbl_waited - intel_fb->color_rb[i]->vbl_pending) <=
(1<<23))
@@ -366,7 +217,7 @@ intelWindowMoved(struct intel_context *intel)
intel_fb->vbl_waited = dPriv->vblSeq;
- for (i = 0; i < intel_fb->pf_num_pages; i++) {
+ for (i = 0; i < 2; i++) {
if (intel_fb->color_rb[i])
intel_fb->color_rb[i]->vbl_pending = intel_fb->vbl_waited;
}
@@ -605,173 +456,6 @@ intelClear(GLcontext *ctx, GLbitfield mask)
}
}
-
-/* Emit wait for pending flips */
-void
-intel_wait_flips(struct intel_context *intel)
-{
- struct intel_framebuffer *intel_fb =
- (struct intel_framebuffer *) intel->ctx.DrawBuffer;
- struct intel_renderbuffer *intel_rb =
- intel_get_renderbuffer(&intel_fb->Base,
- intel_fb->Base._ColorDrawBufferIndexes[0] ==
- BUFFER_FRONT_LEFT ? BUFFER_FRONT_LEFT :
- BUFFER_BACK_LEFT);
-
- if (intel_fb->Base.Name == 0 && intel_rb &&
- intel_rb->pf_pending == intel_fb->pf_seq) {
- GLint pf_planes = intel_fb->pf_planes;
- BATCH_LOCALS;
-
- /* Wait for pending flips to take effect */
- BEGIN_BATCH(2, NO_LOOP_CLIPRECTS);
- OUT_BATCH(pf_planes & 0x1 ? (MI_WAIT_FOR_EVENT | MI_WAIT_FOR_PLANE_A_FLIP)
- : 0);
- OUT_BATCH(pf_planes & 0x2 ? (MI_WAIT_FOR_EVENT | MI_WAIT_FOR_PLANE_B_FLIP)
- : 0);
- ADVANCE_BATCH();
-
- intel_rb->pf_pending--;
- }
-}
-
-
-/* Flip the front & back buffers
- */
-static GLboolean
-intelPageFlip(const __DRIdrawablePrivate * dPriv)
-{
- struct intel_context *intel;
- int ret;
- struct intel_framebuffer *intel_fb = dPriv->driverPrivate;
-
- if (INTEL_DEBUG & DEBUG_IOCTL)
- fprintf(stderr, "%s\n", __FUNCTION__);
-
- assert(dPriv);
- assert(dPriv->driContextPriv);
- assert(dPriv->driContextPriv->driverPrivate);
-
- intel = (struct intel_context *) dPriv->driContextPriv->driverPrivate;
-
- if (intel->intelScreen->drmMinor < 9)
- return GL_FALSE;
-
- intelFlush(&intel->ctx);
-
- ret = 0;
-
- LOCK_HARDWARE(intel);
-
- if (dPriv->numClipRects && intel_fb->pf_active) {
- drm_i915_flip_t flip;
-
- flip.pipes = intel_fb->pf_planes;
-
- ret = drmCommandWrite(intel->driFd, DRM_I915_FLIP, &flip, sizeof(flip));
- }
-
- UNLOCK_HARDWARE(intel);
-
- if (ret || !intel_fb->pf_active)
- return GL_FALSE;
-
- if (!dPriv->numClipRects) {
- usleep(10000); /* throttle invisible client 10ms */
- }
-
- intel_fb->pf_current_page = (intel->sarea->pf_current_page >>
- (intel_fb->pf_planes & 0x2)) & 0x3;
-
- if (dPriv->numClipRects != 0) {
- intel_get_renderbuffer(&intel_fb->Base, BUFFER_FRONT_LEFT)->pf_pending =
- intel_get_renderbuffer(&intel_fb->Base, BUFFER_BACK_LEFT)->pf_pending =
- ++intel_fb->pf_seq;
- }
-
- intel_flip_renderbuffers(intel_fb);
- intel_draw_buffer(&intel->ctx, &intel_fb->Base);
-
- return GL_TRUE;
-}
-
-static GLboolean
-intelScheduleSwap(__DRIdrawablePrivate * dPriv, GLboolean *missed_target)
-{
- struct intel_framebuffer *intel_fb = dPriv->driverPrivate;
- unsigned int interval;
- struct intel_context *intel =
- intelScreenContext(dPriv->driScreenPriv->private);
- const intelScreenPrivate *intelScreen = intel->intelScreen;
- unsigned int target;
- drm_i915_vblank_swap_t swap;
- GLboolean ret;
-
- if (!dPriv->vblFlags ||
- (dPriv->vblFlags & VBLANK_FLAG_NO_IRQ) ||
- intelScreen->drmMinor < (intel_fb->pf_active ? 9 : 6))
- return GL_FALSE;
-
- interval = driGetVBlankInterval(dPriv);
-
- swap.seqtype = DRM_VBLANK_ABSOLUTE;
-
- if (dPriv->vblFlags & VBLANK_FLAG_SYNC) {
- swap.seqtype |= DRM_VBLANK_NEXTONMISS;
- } else if (interval == 0)
- return GL_FALSE;
-
- swap.drawable = dPriv->hHWDrawable;
- target = swap.sequence = dPriv->vblSeq + interval;
-
- if ( dPriv->vblFlags & VBLANK_FLAG_SECONDARY ) {
- swap.seqtype |= DRM_VBLANK_SECONDARY;
- }
-
- LOCK_HARDWARE(intel);
-
- intel_batchbuffer_flush(intel->batch);
-
- if ( intel_fb->pf_active ) {
- swap.seqtype |= DRM_VBLANK_FLIP;
-
- intel_fb->pf_current_page = (((intel->sarea->pf_current_page >>
- (intel_fb->pf_planes & 0x2)) & 0x3) + 1) %
- intel_fb->pf_num_pages;
- }
-
- if (!drmCommandWriteRead(intel->driFd, DRM_I915_VBLANK_SWAP, &swap,
- sizeof(swap))) {
- dPriv->vblSeq = swap.sequence;
- swap.sequence -= target;
- *missed_target = swap.sequence > 0 && swap.sequence <= (1 << 23);
-
- intel_get_renderbuffer(&intel_fb->Base, BUFFER_BACK_LEFT)->vbl_pending =
- intel_get_renderbuffer(&intel_fb->Base,
- BUFFER_FRONT_LEFT)->vbl_pending =
- dPriv->vblSeq;
-
- if (swap.seqtype & DRM_VBLANK_FLIP) {
- intel_flip_renderbuffers(intel_fb);
- intel_draw_buffer(&intel->ctx, intel->ctx.DrawBuffer);
- }
-
- ret = GL_TRUE;
- } else {
- if (swap.seqtype & DRM_VBLANK_FLIP) {
- intel_fb->pf_current_page = ((intel->sarea->pf_current_page >>
- (intel_fb->pf_planes & 0x2)) & 0x3) %
- intel_fb->pf_num_pages;
- }
-
- ret = GL_FALSE;
- }
-
- UNLOCK_HARDWARE(intel);
-
- return ret;
-}
-
void
intelSwapBuffers(__DRIdrawablePrivate * dPriv)
{
@@ -793,22 +477,22 @@ intelSwapBuffers(__DRIdrawablePrivate * dPriv)
_mesa_notifySwapBuffers(ctx); /* flush pending rendering comands */
- if (!intelScheduleSwap(dPriv, &missed_target)) {
- driWaitForVBlank(dPriv, &missed_target);
+ /*
+ * The old swapping ioctl was incredibly racy, just wait for vblank
+ * and do the swap ourselves.
+ */
+ driWaitForVBlank(dPriv, &missed_target);
- /*
- * Update each buffer's vbl_pending so we don't get too out of
- * sync
- */
- intel_get_renderbuffer(&intel_fb->Base,
- BUFFER_BACK_LEFT)->vbl_pending =
- intel_get_renderbuffer(&intel_fb->Base,
- BUFFER_FRONT_LEFT)->vbl_pending =
- dPriv->vblSeq;
- if (!intelPageFlip(dPriv)) {
- intelCopyBuffer(dPriv, NULL);
- }
- }
+ /*
+ * Update each buffer's vbl_pending so we don't get too out of
+ * sync
+ */
+ intel_get_renderbuffer(&intel_fb->Base,
+ BUFFER_BACK_LEFT)->vbl_pending = dPriv->vblSeq;
+ intel_get_renderbuffer(&intel_fb->Base,
+ BUFFER_FRONT_LEFT)->vbl_pending = dPriv->vblSeq;
+
+ intelCopyBuffer(dPriv, NULL);
intel_fb->swap_count++;
(*psp->systemTime->getUST) (&ust);
@@ -819,6 +503,8 @@ intelSwapBuffers(__DRIdrawablePrivate * dPriv)
intel_fb->swap_ust = ust;
}
+ drmCommandNone(intel->driFd, DRM_I915_GEM_THROTTLE);
+
}
else {
/* XXX this shouldn't be an error but we can't handle it for now */
@@ -867,7 +553,6 @@ intel_draw_buffer(GLcontext * ctx, struct gl_framebuffer *fb)
struct intel_context *intel = intel_context(ctx);
struct intel_region *colorRegions[MAX_DRAW_BUFFERS], *depthRegion = NULL;
struct intel_renderbuffer *irbDepth = NULL, *irbStencil = NULL;
- int front = 0; /* drawing to front color buffer? */
if (!fb) {
/* this can happen during the initial context initialization */
@@ -895,63 +580,49 @@ intel_draw_buffer(GLcontext * ctx, struct gl_framebuffer *fb)
if (fb->Name)
intel_validate_paired_depth_stencil(ctx, fb);
- /* If the batch contents require looping over cliprects, flush them before
- * we go changing which cliprects get referenced when that happens.
- */
- if (intel->batch->cliprect_mode == LOOP_CLIPRECTS)
- intel_batchbuffer_flush(intel->batch);
-
/*
* How many color buffers are we drawing into?
*/
if (fb->_NumColorDrawBuffers == 0) {
/* writing to 0 */
- FALLBACK(intel, INTEL_FALLBACK_DRAW_BUFFER, GL_TRUE);
colorRegions[0] = NULL;
-
- if (fb->Name != 0)
- intelSetRenderbufferClipRects(intel);
+ intel->constant_cliprect = GL_TRUE;
} else if (fb->_NumColorDrawBuffers > 1) {
int i;
struct intel_renderbuffer *irb;
- FALLBACK(intel, INTEL_FALLBACK_DRAW_BUFFER, GL_FALSE);
- if (fb->Name != 0)
- intelSetRenderbufferClipRects(intel);
for (i = 0; i < fb->_NumColorDrawBuffers; i++) {
irb = intel_renderbuffer(fb->_ColorDrawBuffers[i]);
- colorRegions[i] = (irb && irb->region) ? irb->region : NULL;
+ colorRegions[i] = irb ? irb->region : NULL;
}
+ intel->constant_cliprect = GL_TRUE;
}
else {
- /* draw to exactly one color buffer */
- /*_mesa_debug(ctx, "Hardware rendering\n");*/
- FALLBACK(intel, INTEL_FALLBACK_DRAW_BUFFER, GL_FALSE);
- if (fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT) {
- front = 1;
- }
-
- /*
- * Get the intel_renderbuffer for the colorbuffer we're drawing into.
- * And set up cliprects.
+ /* Get the intel_renderbuffer for the single colorbuffer we're drawing
+ * into, and set up cliprects if it's .
*/
if (fb->Name == 0) {
+ intel->constant_cliprect = intel->driScreen->dri2.enabled;
/* drawing to window system buffer */
- if (front) {
- intelSetFrontClipRects(intel);
+ if (fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT) {
+ if (!intel->constant_cliprect && !intel->front_cliprects)
+ intel_batchbuffer_flush(intel->batch);
+ intel->front_cliprects = GL_TRUE;
colorRegions[0] = intel_get_rb_region(fb, BUFFER_FRONT_LEFT);
}
else {
- intelSetBackClipRects(intel);
+ if (!intel->constant_cliprect && intel->front_cliprects)
+ intel_batchbuffer_flush(intel->batch);
+ intel->front_cliprects = GL_FALSE;
colorRegions[0]= intel_get_rb_region(fb, BUFFER_BACK_LEFT);
}
}
else {
/* drawing to user-created FBO */
struct intel_renderbuffer *irb;
- intelSetRenderbufferClipRects(intel);
irb = intel_renderbuffer(fb->_ColorDrawBuffers[0]);
colorRegions[0] = (irb && irb->region) ? irb->region : NULL;
+ intel->constant_cliprect = GL_TRUE;
}
}
diff --git a/shared/intel_buffers.h b/shared/intel_buffers.h
index a669a85..0be1cee 100644
--- a/shared/intel_buffers.h
+++ b/shared/intel_buffers.h
@@ -29,6 +29,8 @@
#ifndef INTEL_BUFFERS_H
#define INTEL_BUFFERS_H
+#include "dri_util.h"
+#include "drm.h"
struct intel_context;
struct intel_framebuffer;
@@ -43,8 +45,6 @@ extern struct intel_region *intel_readbuf_region(struct intel_context *intel);
extern struct intel_region *intel_drawbuf_region(struct intel_context *intel);
-extern void intel_wait_flips(struct intel_context *intel);
-
extern void intelSwapBuffers(__DRIdrawablePrivate * dPriv);
extern void intelWindowMoved(struct intel_context *intel);
@@ -53,4 +53,9 @@ extern void intel_draw_buffer(GLcontext * ctx, struct gl_framebuffer *fb);
extern void intelInitBufferFuncs(struct dd_function_table *functions);
+void intel_get_cliprects(struct intel_context *intel,
+ struct drm_clip_rect **cliprects,
+ unsigned int *num_cliprects,
+ int *x_off, int *y_off);
+
#endif /* INTEL_BUFFERS_H */
diff --git a/shared/intel_bufmgr_ttm.c b/shared/intel_bufmgr_ttm.c
deleted file mode 100644
index 194814e..0000000
--- a/shared/intel_bufmgr_ttm.c
+++ /dev/null
@@ -1,1122 +0,0 @@
-/**************************************************************************
- *
- * Copyright © 2007 Red Hat Inc.
- * Copyright © 2007 Intel Corporation
- * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- *
- **************************************************************************/
-/*
- * Authors: Thomas Hellström <thomas-at-tungstengraphics-dot-com>
- * Keith Whitwell <keithw-at-tungstengraphics-dot-com>
- * Eric Anholt <eric@anholt.net>
- * Dave Airlie <airlied@linux.ie>
- */
-
-#include <xf86drm.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-#include <assert.h>
-
-#include "errno.h"
-#include "mtypes.h"
-#include "dri_bufmgr.h"
-#include "string.h"
-#include "imports.h"
-
-#include "i915_drm.h"
-
-#include "intel_bufmgr_ttm.h"
-#ifdef TTM_API
-
-#define DBG(...) do { \
- if (bufmgr_ttm->bufmgr.debug) \
- fprintf(stderr, __VA_ARGS__); \
-} while (0)
-
-/*
- * These bits are always specified in each validation
- * request. Other bits are not supported at this point
- * as it would require a bit of investigation to figure
- * out what mask value should be used.
- */
-#define INTEL_BO_MASK (DRM_BO_MASK_MEM | \
- DRM_BO_FLAG_READ | \
- DRM_BO_FLAG_WRITE | \
- DRM_BO_FLAG_EXE)
-
-struct intel_validate_entry {
- dri_bo *bo;
- struct drm_i915_op_arg bo_arg;
-};
-
-struct dri_ttm_bo_bucket_entry {
- drmBO drm_bo;
- struct dri_ttm_bo_bucket_entry *next;
-};
-
-struct dri_ttm_bo_bucket {
- struct dri_ttm_bo_bucket_entry *head;
- struct dri_ttm_bo_bucket_entry **tail;
- /**
- * Limit on the number of entries in this bucket.
- *
- * 0 means that this caching at this bucket size is disabled.
- * -1 means that there is no limit to caching at this size.
- */
- int max_entries;
- int num_entries;
-};
-
-/* Arbitrarily chosen, 16 means that the maximum size we'll cache for reuse
- * is 1 << 16 pages, or 256MB.
- */
-#define INTEL_TTM_BO_BUCKETS 16
-typedef struct _dri_bufmgr_ttm {
- dri_bufmgr bufmgr;
-
- int fd;
- unsigned int fence_type;
- unsigned int fence_type_flush;
-
- uint32_t max_relocs;
-
- struct intel_validate_entry *validate_array;
- int validate_array_size;
- int validate_count;
-
- /** Array of lists of cached drmBOs of power-of-two sizes */
- struct dri_ttm_bo_bucket cache_bucket[INTEL_TTM_BO_BUCKETS];
-} dri_bufmgr_ttm;
-
-/**
- * Private information associated with a relocation that isn't already stored
- * in the relocation buffer to be passed to the kernel.
- */
-struct dri_ttm_reloc {
- dri_bo *target_buf;
- uint64_t validate_flags;
- /** Offset of target_buf after last execution of this relocation entry. */
- unsigned int last_target_offset;
-};
-
-typedef struct _dri_bo_ttm {
- dri_bo bo;
-
- int refcount;
- unsigned int map_count;
- drmBO drm_bo;
- const char *name;
-
- uint64_t last_flags;
-
- /**
- * Index of the buffer within the validation list while preparing a
- * batchbuffer execution.
- */
- int validate_index;
-
- /** DRM buffer object containing relocation list */
- uint32_t *reloc_buf_data;
- struct dri_ttm_reloc *relocs;
-
- /**
- * Indicates that the buffer may be shared with other processes, so we
- * can't hold maps beyond when the user does.
- */
- GLboolean shared;
-
- GLboolean delayed_unmap;
- /* Virtual address from the dri_bo_map whose unmap was delayed. */
- void *saved_virtual;
-} dri_bo_ttm;
-
-typedef struct _dri_fence_ttm
-{
- dri_fence fence;
-
- int refcount;
- const char *name;
- drmFence drm_fence;
-} dri_fence_ttm;
-
-static int
-logbase2(int n)
-{
- GLint i = 1;
- GLint log2 = 0;
-
- while (n > i) {
- i *= 2;
- log2++;
- }
-
- return log2;
-}
-
-static struct dri_ttm_bo_bucket *
-dri_ttm_bo_bucket_for_size(dri_bufmgr_ttm *bufmgr_ttm, unsigned long size)
-{
- int i;
-
- /* We only do buckets in power of two increments */
- if ((size & (size - 1)) != 0)
- return NULL;
-
- /* We should only see sizes rounded to pages. */
- assert((size % 4096) == 0);
-
- /* We always allocate in units of pages */
- i = ffs(size / 4096) - 1;
- if (i >= INTEL_TTM_BO_BUCKETS)
- return NULL;
-
- return &bufmgr_ttm->cache_bucket[i];
-}
-
-
-static void dri_ttm_dump_validation_list(dri_bufmgr_ttm *bufmgr_ttm)
-{
- int i, j;
-
- for (i = 0; i < bufmgr_ttm->validate_count; i++) {
- dri_bo *bo = bufmgr_ttm->validate_array[i].bo;
- dri_bo_ttm *bo_ttm = (dri_bo_ttm *)bo;
-
- if (bo_ttm->reloc_buf_data != NULL) {
- for (j = 0; j < (bo_ttm->reloc_buf_data[0] & 0xffff); j++) {
- uint32_t *reloc_entry = bo_ttm->reloc_buf_data +
- I915_RELOC_HEADER +
- j * I915_RELOC0_STRIDE;
- dri_bo *target_bo = bo_ttm->relocs[j].target_buf;
- dri_bo_ttm *target_ttm = (dri_bo_ttm *)target_bo;
-
- DBG("%2d: %s@0x%08x -> %s@0x%08lx + 0x%08x\n",
- i,
- bo_ttm->name, reloc_entry[0],
- target_ttm->name, target_bo->offset,
- reloc_entry[1]);
- }
- } else {
- DBG("%2d: %s\n", i, bo_ttm->name);
- }
- }
-}
-
-/**
- * Adds the given buffer to the list of buffers to be validated (moved into the
- * appropriate memory type) with the next batch submission.
- *
- * If a buffer is validated multiple times in a batch submission, it ends up
- * with the intersection of the memory type flags and the union of the
- * access flags.
- */
-static void
-intel_add_validate_buffer(dri_bo *buf,
- uint64_t flags)
-{
- dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)buf->bufmgr;
- dri_bo_ttm *ttm_buf = (dri_bo_ttm *)buf;
-
- /* If we delayed doing an unmap to mitigate map/unmap syscall thrashing,
- * do that now.
- */
- if (ttm_buf->delayed_unmap) {
- drmBOUnmap(bufmgr_ttm->fd, &ttm_buf->drm_bo);
- ttm_buf->delayed_unmap = GL_FALSE;
- }
-
- if (ttm_buf->validate_index == -1) {
- struct intel_validate_entry *entry;
- struct drm_i915_op_arg *arg;
- struct drm_bo_op_req *req;
- int index;
-
- /* Extend the array of validation entries as necessary. */
- if (bufmgr_ttm->validate_count == bufmgr_ttm->validate_array_size) {
- int i, new_size = bufmgr_ttm->validate_array_size * 2;
-
- if (new_size == 0)
- new_size = 5;
-
- bufmgr_ttm->validate_array =
- realloc(bufmgr_ttm->validate_array,
- sizeof(struct intel_validate_entry) * new_size);
- bufmgr_ttm->validate_array_size = new_size;
-
- /* Update pointers for realloced mem. */
- for (i = 0; i < bufmgr_ttm->validate_count - 1; i++) {
- bufmgr_ttm->validate_array[i].bo_arg.next = (unsigned long)
- &bufmgr_ttm->validate_array[i + 1].bo_arg;
- }
- }
-
- /* Pick out the new array entry for ourselves */
- index = bufmgr_ttm->validate_count;
- ttm_buf->validate_index = index;
- entry = &bufmgr_ttm->validate_array[index];
- bufmgr_ttm->validate_count++;
-
- /* Fill in array entry */
- entry->bo = buf;
- dri_bo_reference(buf);
-
- /* Fill in kernel arg */
- arg = &entry->bo_arg;
- req = &arg->d.req;
-
- memset(arg, 0, sizeof(*arg));
- req->bo_req.handle = ttm_buf->drm_bo.handle;
- req->op = drm_bo_validate;
- req->bo_req.flags = flags;
- req->bo_req.hint = 0;
-#ifdef DRM_BO_HINT_PRESUMED_OFFSET
- /* PRESUMED_OFFSET indicates that all relocations pointing at this
- * buffer have the correct offset. If any of our relocations don't,
- * this flag will be cleared off the buffer later in the relocation
- * processing.
- */
- req->bo_req.hint |= DRM_BO_HINT_PRESUMED_OFFSET;
- req->bo_req.presumed_offset = buf->offset;
-#endif
- req->bo_req.mask = INTEL_BO_MASK;
- req->bo_req.fence_class = 0; /* Backwards compat. */
-
- if (ttm_buf->reloc_buf_data != NULL)
- arg->reloc_ptr = (unsigned long)(void *)ttm_buf->reloc_buf_data;
- else
- arg->reloc_ptr = 0;
-
- /* Hook up the linked list of args for the kernel */
- arg->next = 0;
- if (index != 0) {
- bufmgr_ttm->validate_array[index - 1].bo_arg.next =
- (unsigned long)arg;
- }
- } else {
- struct intel_validate_entry *entry =
- &bufmgr_ttm->validate_array[ttm_buf->validate_index];
- struct drm_i915_op_arg *arg = &entry->bo_arg;
- struct drm_bo_op_req *req = &arg->d.req;
- uint64_t memFlags = req->bo_req.flags & flags & DRM_BO_MASK_MEM;
- uint64_t modeFlags = (req->bo_req.flags | flags) & ~DRM_BO_MASK_MEM;
-
- /* Buffer was already in the validate list. Extend its flags as
- * necessary.
- */
-
- if (memFlags == 0) {
- fprintf(stderr,
- "%s: No shared memory types between "
- "0x%16llx and 0x%16llx\n",
- __FUNCTION__, req->bo_req.flags, flags);
- abort();
- }
- if (flags & ~INTEL_BO_MASK) {
- fprintf(stderr,
- "%s: Flags bits 0x%16llx are not supposed to be used in a relocation\n",
- __FUNCTION__, flags & ~INTEL_BO_MASK);
- abort();
- }
- req->bo_req.flags = memFlags | modeFlags;
- }
-}
-
-
-#define RELOC_BUF_SIZE(x) ((I915_RELOC_HEADER + x * I915_RELOC0_STRIDE) * \
- sizeof(uint32_t))
-
-static int
-intel_setup_reloc_list(dri_bo *bo)
-{
- dri_bo_ttm *bo_ttm = (dri_bo_ttm *)bo;
- dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bo->bufmgr;
-
- bo_ttm->relocs = calloc(bufmgr_ttm->max_relocs,
- sizeof(struct dri_ttm_reloc));
- bo_ttm->reloc_buf_data = calloc(1, RELOC_BUF_SIZE(bufmgr_ttm->max_relocs));
-
- /* Initialize the relocation list with the header:
- * DWORD 0: relocation count
- * DWORD 1: relocation type
- * DWORD 2+3: handle to next relocation list (currently none) 64-bits
- */
- bo_ttm->reloc_buf_data[0] = 0;
- bo_ttm->reloc_buf_data[1] = I915_RELOC_TYPE_0;
- bo_ttm->reloc_buf_data[2] = 0;
- bo_ttm->reloc_buf_data[3] = 0;
-
- return 0;
-}
-
-#if 0
-int
-driFenceSignaled(DriFenceObject * fence, unsigned type)
-{
- int signaled;
- int ret;
-
- if (fence == NULL)
- return GL_TRUE;
-
- ret = drmFenceSignaled(bufmgr_ttm->fd, &fence->fence, type, &signaled);
- BM_CKFATAL(ret);
- return signaled;
-}
-#endif
-
-static dri_bo *
-dri_ttm_alloc(dri_bufmgr *bufmgr, const char *name,
- unsigned long size, unsigned int alignment,
- uint64_t location_mask)
-{
- dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bufmgr;
- dri_bo_ttm *ttm_buf;
- unsigned int pageSize = getpagesize();
- int ret;
- uint64_t flags;
- unsigned int hint;
- unsigned long alloc_size;
- struct dri_ttm_bo_bucket *bucket;
- GLboolean alloc_from_cache = GL_FALSE;
-
- ttm_buf = calloc(1, sizeof(*ttm_buf));
- if (!ttm_buf)
- return NULL;
-
- /* The mask argument doesn't do anything for us that we want other than
- * determine which pool (TTM or local) the buffer is allocated into, so
- * just pass all of the allocation class flags.
- */
- flags = location_mask | DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE |
- DRM_BO_FLAG_EXE;
- /* No hints we want to use. */
- hint = 0;
-
- /* Round the allocated size up to a power of two number of pages. */
- alloc_size = 1 << logbase2(size);
- if (alloc_size < pageSize)
- alloc_size = pageSize;
- bucket = dri_ttm_bo_bucket_for_size(bufmgr_ttm, alloc_size);
-
- /* If we don't have caching at this size, don't actually round the
- * allocation up.
- */
- if (bucket == NULL || bucket->max_entries == 0)
- alloc_size = size;
-
- /* Get a buffer out of the cache if available */
- if (bucket != NULL && bucket->num_entries > 0) {
- struct dri_ttm_bo_bucket_entry *entry = bucket->head;
- int busy;
-
- /* Check if the buffer is still in flight. If not, reuse it. */
- ret = drmBOBusy(bufmgr_ttm->fd, &entry->drm_bo, &busy);
- alloc_from_cache = (ret == 0 && busy == 0);
-
- if (alloc_from_cache) {
- bucket->head = entry->next;
- if (entry->next == NULL)
- bucket->tail = &bucket->head;
- bucket->num_entries--;
-
- ttm_buf->drm_bo = entry->drm_bo;
- free(entry);
- }
- }
-
- if (!alloc_from_cache) {
- ret = drmBOCreate(bufmgr_ttm->fd, alloc_size, alignment / pageSize,
- NULL, flags, hint, &ttm_buf->drm_bo);
- if (ret != 0) {
- free(ttm_buf);
- return NULL;
- }
- }
-
- ttm_buf->bo.size = size;
- ttm_buf->bo.offset = ttm_buf->drm_bo.offset;
- ttm_buf->bo.virtual = NULL;
- ttm_buf->bo.bufmgr = bufmgr;
- ttm_buf->name = name;
- ttm_buf->refcount = 1;
- ttm_buf->reloc_buf_data = NULL;
- ttm_buf->relocs = NULL;
- ttm_buf->last_flags = ttm_buf->drm_bo.flags;
- ttm_buf->shared = GL_FALSE;
- ttm_buf->delayed_unmap = GL_FALSE;
- ttm_buf->validate_index = -1;
-
- DBG("bo_create: %p (%s) %ldb\n", &ttm_buf->bo, ttm_buf->name, size);
-
- return &ttm_buf->bo;
-}
-
-/* Our TTM backend doesn't allow creation of static buffers, as that requires
- * privelege for the non-fake case, and the lock in the fake case where we were
- * working around the X Server not creating buffers and passing handles to us.
- */
-static dri_bo *
-dri_ttm_alloc_static(dri_bufmgr *bufmgr, const char *name,
- unsigned long offset, unsigned long size, void *virtual,
- uint64_t location_mask)
-{
- return NULL;
-}
-
-/**
- * Returns a dri_bo wrapping the given buffer object handle.
- *
- * This can be used when one application needs to pass a buffer object
- * to another.
- */
-dri_bo *
-intel_ttm_bo_create_from_handle(dri_bufmgr *bufmgr, const char *name,
- unsigned int handle)
-{
- dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bufmgr;
- dri_bo_ttm *ttm_buf;
- int ret;
-
- ttm_buf = calloc(1, sizeof(*ttm_buf));
- if (!ttm_buf)
- return NULL;
-
- ret = drmBOReference(bufmgr_ttm->fd, handle, &ttm_buf->drm_bo);
- if (ret != 0) {
- fprintf(stderr, "Couldn't reference %s handle 0x%08x: %s\n",
- name, handle, strerror(-ret));
- free(ttm_buf);
- return NULL;
- }
- ttm_buf->bo.size = ttm_buf->drm_bo.size;
- ttm_buf->bo.offset = ttm_buf->drm_bo.offset;
- ttm_buf->bo.virtual = NULL;
- ttm_buf->bo.bufmgr = bufmgr;
- ttm_buf->name = name;
- ttm_buf->refcount = 1;
- ttm_buf->reloc_buf_data = NULL;
- ttm_buf->relocs = NULL;
- ttm_buf->last_flags = ttm_buf->drm_bo.flags;
- ttm_buf->shared = GL_TRUE;
- ttm_buf->delayed_unmap = GL_FALSE;
- ttm_buf->validate_index = -1;
-
- DBG("bo_create_from_handle: %p %08x (%s)\n",
- &ttm_buf->bo, handle, ttm_buf->name);
-
- return &ttm_buf->bo;
-}
-
-static void
-dri_ttm_bo_reference(dri_bo *buf)
-{
- dri_bo_ttm *ttm_buf = (dri_bo_ttm *)buf;
-
- ttm_buf->refcount++;
-}
-
-static void
-dri_ttm_bo_unreference(dri_bo *buf)
-{
- dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)buf->bufmgr;
- dri_bo_ttm *ttm_buf = (dri_bo_ttm *)buf;
-
- if (!buf)
- return;
-
- if (--ttm_buf->refcount == 0) {
- struct dri_ttm_bo_bucket *bucket;
- int ret;
-
- assert(ttm_buf->map_count == 0);
-
- if (ttm_buf->reloc_buf_data) {
- int i;
-
- /* Unreference all the target buffers */
- for (i = 0; i < (ttm_buf->reloc_buf_data[0] & 0xffff); i++)
- dri_bo_unreference(ttm_buf->relocs[i].target_buf);
- free(ttm_buf->relocs);
-
- /* Free the kernel BO containing relocation entries */
- free(ttm_buf->reloc_buf_data);
- ttm_buf->reloc_buf_data = NULL;
- }
-
- if (ttm_buf->delayed_unmap) {
- int ret = drmBOUnmap(bufmgr_ttm->fd, &ttm_buf->drm_bo);
-
- if (ret != 0) {
- fprintf(stderr, "%s:%d: Error unmapping buffer %s: %s.\n",
- __FILE__, __LINE__, ttm_buf->name, strerror(-ret));
- }
- }
-
- bucket = dri_ttm_bo_bucket_for_size(bufmgr_ttm, ttm_buf->drm_bo.size);
- /* Put the buffer into our internal cache for reuse if we can. */
- if (!ttm_buf->shared &&
- bucket != NULL &&
- (bucket->max_entries == -1 ||
- (bucket->max_entries > 0 &&
- bucket->num_entries < bucket->max_entries)))
- {
- struct dri_ttm_bo_bucket_entry *entry;
-
- entry = calloc(1, sizeof(*entry));
- entry->drm_bo = ttm_buf->drm_bo;
-
- entry->next = NULL;
- *bucket->tail = entry;
- bucket->tail = &entry->next;
- bucket->num_entries++;
- } else {
- /* Decrement the kernel refcount for the buffer. */
- ret = drmBOUnreference(bufmgr_ttm->fd, &ttm_buf->drm_bo);
- if (ret != 0) {
- fprintf(stderr, "drmBOUnreference failed (%s): %s\n",
- ttm_buf->name, strerror(-ret));
- }
- }
-
- DBG("bo_unreference final: %p (%s)\n", &ttm_buf->bo, ttm_buf->name);
-
- free(buf);
- return;
- }
-}
-
-static int
-dri_ttm_bo_map(dri_bo *buf, GLboolean write_enable)
-{
- dri_bufmgr_ttm *bufmgr_ttm;
- dri_bo_ttm *ttm_buf = (dri_bo_ttm *)buf;
- uint64_t flags;
- int ret;
-
- bufmgr_ttm = (dri_bufmgr_ttm *)buf->bufmgr;
-
- flags = DRM_BO_FLAG_READ;
- if (write_enable)
- flags |= DRM_BO_FLAG_WRITE;
-
- /* Allow recursive mapping. Mesa may recursively map buffers with
- * nested display loops.
- */
- if (ttm_buf->map_count++ != 0)
- return 0;
-
- assert(buf->virtual == NULL);
-
- DBG("bo_map: %p (%s)\n", &ttm_buf->bo, ttm_buf->name);
-
- /* XXX: What about if we're upgrading from READ to WRITE? */
- if (ttm_buf->delayed_unmap) {
- buf->virtual = ttm_buf->saved_virtual;
- return 0;
- }
-
- ret = drmBOMap(bufmgr_ttm->fd, &ttm_buf->drm_bo, flags, 0, &buf->virtual);
- if (ret != 0) {
- fprintf(stderr, "%s:%d: Error mapping buffer %s: %s .\n",
- __FILE__, __LINE__, ttm_buf->name, strerror(-ret));
- }
-
- return ret;
-}
-
-static int
-dri_ttm_bo_unmap(dri_bo *buf)
-{
- dri_bufmgr_ttm *bufmgr_ttm;
- dri_bo_ttm *ttm_buf = (dri_bo_ttm *)buf;
- int ret;
-
- if (buf == NULL)
- return 0;
-
- assert(ttm_buf->map_count != 0);
- if (--ttm_buf->map_count != 0)
- return 0;
-
- bufmgr_ttm = (dri_bufmgr_ttm *)buf->bufmgr;
-
- assert(buf->virtual != NULL);
-
- DBG("bo_unmap: %p (%s)\n", &ttm_buf->bo, ttm_buf->name);
-
- if (!ttm_buf->shared) {
- ttm_buf->saved_virtual = buf->virtual;
- ttm_buf->delayed_unmap = GL_TRUE;
- buf->virtual = NULL;
-
- return 0;
- }
-
- buf->virtual = NULL;
-
- ret = drmBOUnmap(bufmgr_ttm->fd, &ttm_buf->drm_bo);
- if (ret != 0) {
- fprintf(stderr, "%s:%d: Error unmapping buffer %s: %s.\n",
- __FILE__, __LINE__, ttm_buf->name, strerror(-ret));
- }
-
- return ret;
-}
-
-/**
- * Returns a dri_bo wrapping the given buffer object handle.
- *
- * This can be used when one application needs to pass a buffer object
- * to another.
- */
-dri_fence *
-intel_ttm_fence_create_from_arg(dri_bufmgr *bufmgr, const char *name,
- drm_fence_arg_t *arg)
-{
- dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bufmgr;
- dri_fence_ttm *ttm_fence;
-
- ttm_fence = malloc(sizeof(*ttm_fence));
- if (!ttm_fence)
- return NULL;
-
- ttm_fence->drm_fence.handle = arg->handle;
- ttm_fence->drm_fence.fence_class = arg->fence_class;
- ttm_fence->drm_fence.type = arg->type;
- ttm_fence->drm_fence.flags = arg->flags;
- ttm_fence->drm_fence.signaled = 0;
- ttm_fence->drm_fence.sequence = arg->sequence;
-
- ttm_fence->fence.bufmgr = bufmgr;
- ttm_fence->name = name;
- ttm_fence->refcount = 1;
-
- DBG("fence_create_from_handle: %p (%s)\n",
- &ttm_fence->fence, ttm_fence->name);
-
- return &ttm_fence->fence;
-}
-
-
-static void
-dri_ttm_fence_reference(dri_fence *fence)
-{
- dri_fence_ttm *fence_ttm = (dri_fence_ttm *)fence;
- dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)fence->bufmgr;
-
- ++fence_ttm->refcount;
- DBG("fence_reference: %p (%s)\n", &fence_ttm->fence, fence_ttm->name);
-}
-
-static void
-dri_ttm_fence_unreference(dri_fence *fence)
-{
- dri_fence_ttm *fence_ttm = (dri_fence_ttm *)fence;
- dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)fence->bufmgr;
-
- if (!fence)
- return;
-
- DBG("fence_unreference: %p (%s)\n", &fence_ttm->fence, fence_ttm->name);
-
- if (--fence_ttm->refcount == 0) {
- int ret;
-
- ret = drmFenceUnreference(bufmgr_ttm->fd, &fence_ttm->drm_fence);
- if (ret != 0) {
- fprintf(stderr, "drmFenceUnreference failed (%s): %s\n",
- fence_ttm->name, strerror(-ret));
- }
-
- free(fence);
- return;
- }
-}
-
-static void
-dri_ttm_fence_wait(dri_fence *fence)
-{
- dri_fence_ttm *fence_ttm = (dri_fence_ttm *)fence;
- dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)fence->bufmgr;
- int ret;
-
- ret = drmFenceWait(bufmgr_ttm->fd, DRM_FENCE_FLAG_WAIT_LAZY, &fence_ttm->drm_fence, 0);
- if (ret != 0) {
- fprintf(stderr, "%s:%d: Error waiting for fence %s: %s.\n",
- __FILE__, __LINE__, fence_ttm->name, strerror(-ret));
- abort();
- }
-
- DBG("fence_wait: %p (%s)\n", &fence_ttm->fence, fence_ttm->name);
-}
-
-static void
-dri_bufmgr_ttm_destroy(dri_bufmgr *bufmgr)
-{
- dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bufmgr;
- int i;
-
- free(bufmgr_ttm->validate_array);
-
- /* Free any cached buffer objects we were going to reuse */
- for (i = 0; i < INTEL_TTM_BO_BUCKETS; i++) {
- struct dri_ttm_bo_bucket *bucket = &bufmgr_ttm->cache_bucket[i];
- struct dri_ttm_bo_bucket_entry *entry;
-
- while ((entry = bucket->head) != NULL) {
- int ret;
-
- bucket->head = entry->next;
- if (entry->next == NULL)
- bucket->tail = &bucket->head;
- bucket->num_entries--;
-
- /* Decrement the kernel refcount for the buffer. */
- ret = drmBOUnreference(bufmgr_ttm->fd, &entry->drm_bo);
- if (ret != 0) {
- fprintf(stderr, "drmBOUnreference failed: %s\n",
- strerror(-ret));
- }
-
- free(entry);
- }
- }
-
- free(bufmgr);
-}
-
-/**
- * Adds the target buffer to the validation list and adds the relocation
- * to the reloc_buffer's relocation list.
- *
- * The relocation entry at the given offset must already contain the
- * precomputed relocation value, because the kernel will optimize out
- * the relocation entry write when the buffer hasn't moved from the
- * last known offset in target_buf.
- */
-static int
-dri_ttm_emit_reloc(dri_bo *reloc_buf, uint64_t flags, GLuint delta,
- GLuint offset, dri_bo *target_buf)
-{
- dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)reloc_buf->bufmgr;
- dri_bo_ttm *reloc_buf_ttm = (dri_bo_ttm *)reloc_buf;
- dri_bo_ttm *target_buf_ttm = (dri_bo_ttm *)target_buf;
- int num_relocs;
- uint32_t *this_reloc;
-
- /* Create a new relocation list if needed */
- if (reloc_buf_ttm->reloc_buf_data == NULL)
- intel_setup_reloc_list(reloc_buf);
-
- num_relocs = reloc_buf_ttm->reloc_buf_data[0];
-
- /* Check overflow */
- assert(num_relocs < bufmgr_ttm->max_relocs);
-
- this_reloc = reloc_buf_ttm->reloc_buf_data + I915_RELOC_HEADER +
- num_relocs * I915_RELOC0_STRIDE;
-
- this_reloc[0] = offset;
- this_reloc[1] = delta;
- this_reloc[2] = target_buf_ttm->drm_bo.handle; /* To be filled in at exec time */
- this_reloc[3] = 0;
-
- reloc_buf_ttm->relocs[num_relocs].validate_flags = flags;
- reloc_buf_ttm->relocs[num_relocs].target_buf = target_buf;
- dri_bo_reference(target_buf);
-
- reloc_buf_ttm->reloc_buf_data[0]++; /* Increment relocation count */
- /* Check wraparound */
- assert(reloc_buf_ttm->reloc_buf_data[0] != 0);
- return 0;
-}
-
-/**
- * Walk the tree of relocations rooted at BO and accumulate the list of
- * validations to be performed and update the relocation buffers with
- * index values into the validation list.
- */
-static void
-dri_ttm_bo_process_reloc(dri_bo *bo)
-{
- dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bo->bufmgr;
- dri_bo_ttm *bo_ttm = (dri_bo_ttm *)bo;
- unsigned int nr_relocs;
- int i;
-
- if (bo_ttm->reloc_buf_data == NULL)
- return;
-
- nr_relocs = bo_ttm->reloc_buf_data[0] & 0xffff;
-
- for (i = 0; i < nr_relocs; i++) {
- struct dri_ttm_reloc *r = &bo_ttm->relocs[i];
-
- /* Continue walking the tree depth-first. */
- dri_ttm_bo_process_reloc(r->target_buf);
-
- /* Add the target to the validate list */
- intel_add_validate_buffer(r->target_buf, r->validate_flags);
-
- /* Clear the PRESUMED_OFFSET flag from the validate list entry of the
- * target if this buffer has a stale relocated pointer at it.
- */
- if (r->last_target_offset != r->target_buf->offset) {
- dri_bo_ttm *target_buf_ttm = (dri_bo_ttm *)r->target_buf;
- struct intel_validate_entry *entry =
- &bufmgr_ttm->validate_array[target_buf_ttm->validate_index];
-
- entry->bo_arg.d.req.bo_req.hint &= ~DRM_BO_HINT_PRESUMED_OFFSET;
- }
- }
-}
-
-static void *
-dri_ttm_process_reloc(dri_bo *batch_buf, GLuint *count)
-{
- dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)batch_buf->bufmgr;
-
- /* Update indices and set up the validate list. */
- dri_ttm_bo_process_reloc(batch_buf);
-
- /* Add the batch buffer to the validation list. There are no relocations
- * pointing to it.
- */
- intel_add_validate_buffer(batch_buf,
- DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_EXE);
-
- *count = bufmgr_ttm->validate_count;
- return &bufmgr_ttm->validate_array[0].bo_arg;
-}
-
-static const char *
-intel_get_flags_mem_type_string(uint64_t flags)
-{
- switch (flags & DRM_BO_MASK_MEM) {
- case DRM_BO_FLAG_MEM_LOCAL: return "local";
- case DRM_BO_FLAG_MEM_TT: return "ttm";
- case DRM_BO_FLAG_MEM_VRAM: return "vram";
- case DRM_BO_FLAG_MEM_PRIV0: return "priv0";
- case DRM_BO_FLAG_MEM_PRIV1: return "priv1";
- case DRM_BO_FLAG_MEM_PRIV2: return "priv2";
- case DRM_BO_FLAG_MEM_PRIV3: return "priv3";
- case DRM_BO_FLAG_MEM_PRIV4: return "priv4";
- default: return NULL;
- }
-}
-
-static const char *
-intel_get_flags_caching_string(uint64_t flags)
-{
- switch (flags & (DRM_BO_FLAG_CACHED | DRM_BO_FLAG_CACHED_MAPPED)) {
- case 0: return "UU";
- case DRM_BO_FLAG_CACHED: return "CU";
- case DRM_BO_FLAG_CACHED_MAPPED: return "UC";
- case DRM_BO_FLAG_CACHED | DRM_BO_FLAG_CACHED_MAPPED: return "CC";
- default: return NULL;
- }
-}
-
-static void
-intel_update_buffer_offsets (dri_bufmgr_ttm *bufmgr_ttm)
-{
- int i;
-
- for (i = 0; i < bufmgr_ttm->validate_count; i++) {
- dri_bo *bo = bufmgr_ttm->validate_array[i].bo;
- dri_bo_ttm *bo_ttm = (dri_bo_ttm *)bo;
- struct drm_i915_op_arg *arg = &bufmgr_ttm->validate_array[i].bo_arg;
- struct drm_bo_arg_rep *rep = &arg->d.rep;
-
- /* Update the flags */
- if (rep->bo_info.flags != bo_ttm->last_flags) {
- DBG("BO %s migrated: %s/%s -> %s/%s\n",
- bo_ttm->name,
- intel_get_flags_mem_type_string(bo_ttm->last_flags),
- intel_get_flags_caching_string(bo_ttm->last_flags),
- intel_get_flags_mem_type_string(rep->bo_info.flags),
- intel_get_flags_caching_string(rep->bo_info.flags));
-
- bo_ttm->last_flags = rep->bo_info.flags;
- }
- /* Update the buffer offset */
- if (rep->bo_info.offset != bo->offset) {
- DBG("BO %s migrated: 0x%08lx -> 0x%08lx\n",
- bo_ttm->name, bo->offset, (unsigned long)rep->bo_info.offset);
- bo->offset = rep->bo_info.offset;
- }
- }
-}
-
-/**
- * Update the last target offset field of relocation entries for PRESUMED_OFFSET
- * computation.
- */
-static void
-dri_ttm_bo_post_submit(dri_bo *bo)
-{
- dri_bo_ttm *bo_ttm = (dri_bo_ttm *)bo;
- unsigned int nr_relocs;
- int i;
-
- if (bo_ttm->reloc_buf_data == NULL)
- return;
-
- nr_relocs = bo_ttm->reloc_buf_data[0] & 0xffff;
-
- for (i = 0; i < nr_relocs; i++) {
- struct dri_ttm_reloc *r = &bo_ttm->relocs[i];
-
- /* Continue walking the tree depth-first. */
- dri_ttm_bo_post_submit(r->target_buf);
-
- r->last_target_offset = r->target_buf->offset;
- }
-}
-
-static void
-dri_ttm_post_submit(dri_bo *batch_buf, dri_fence **last_fence)
-{
- dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)batch_buf->bufmgr;
- int i;
-
- intel_update_buffer_offsets (bufmgr_ttm);
-
- dri_ttm_bo_post_submit(batch_buf);
-
- if (bufmgr_ttm->bufmgr.debug)
- dri_ttm_dump_validation_list(bufmgr_ttm);
-
- for (i = 0; i < bufmgr_ttm->validate_count; i++) {
- dri_bo *bo = bufmgr_ttm->validate_array[i].bo;
- dri_bo_ttm *bo_ttm = (dri_bo_ttm *)bo;
-
- /* Disconnect the buffer from the validate list */
- bo_ttm->validate_index = -1;
- dri_bo_unreference(bo);
- bufmgr_ttm->validate_array[i].bo = NULL;
- }
- bufmgr_ttm->validate_count = 0;
-}
-
-/**
- * Enables unlimited caching of buffer objects for reuse.
- *
- * This is potentially very memory expensive, as the cache at each bucket
- * size is only bounded by how many buffers of that size we've managed to have
- * in flight at once.
- */
-void
-intel_ttm_enable_bo_reuse(dri_bufmgr *bufmgr)
-{
- dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bufmgr;
- int i;
-
- for (i = 0; i < INTEL_TTM_BO_BUCKETS; i++) {
- bufmgr_ttm->cache_bucket[i].max_entries = -1;
- }
-}
-
-/*
- *
- */
-static int
-dri_ttm_check_aperture_space(dri_bo *bo)
-{
- return 0;
-}
-
-/**
- * Initializes the TTM buffer manager, which uses the kernel to allocate, map,
- * and manage map buffer objections.
- *
- * \param fd File descriptor of the opened DRM device.
- * \param fence_type Driver-specific fence type used for fences with no flush.
- * \param fence_type_flush Driver-specific fence type used for fences with a
- * flush.
- */
-dri_bufmgr *
-intel_bufmgr_ttm_init(int fd, unsigned int fence_type,
- unsigned int fence_type_flush, int batch_size)
-{
- dri_bufmgr_ttm *bufmgr_ttm;
- int i;
-
- bufmgr_ttm = calloc(1, sizeof(*bufmgr_ttm));
- bufmgr_ttm->fd = fd;
- bufmgr_ttm->fence_type = fence_type;
- bufmgr_ttm->fence_type_flush = fence_type_flush;
-
- /* Let's go with one relocation per every 2 dwords (but round down a bit
- * since a power of two will mean an extra page allocation for the reloc
- * buffer).
- *
- * Every 4 was too few for the blender benchmark.
- */
- bufmgr_ttm->max_relocs = batch_size / sizeof(uint32_t) / 2 - 2;
-
- bufmgr_ttm->bufmgr.bo_alloc = dri_ttm_alloc;
- bufmgr_ttm->bufmgr.bo_alloc_static = dri_ttm_alloc_static;
- bufmgr_ttm->bufmgr.bo_reference = dri_ttm_bo_reference;
- bufmgr_ttm->bufmgr.bo_unreference = dri_ttm_bo_unreference;
- bufmgr_ttm->bufmgr.bo_map = dri_ttm_bo_map;
- bufmgr_ttm->bufmgr.bo_unmap = dri_ttm_bo_unmap;
- bufmgr_ttm->bufmgr.fence_reference = dri_ttm_fence_reference;
- bufmgr_ttm->bufmgr.fence_unreference = dri_ttm_fence_unreference;
- bufmgr_ttm->bufmgr.fence_wait = dri_ttm_fence_wait;
- bufmgr_ttm->bufmgr.destroy = dri_bufmgr_ttm_destroy;
- bufmgr_ttm->bufmgr.emit_reloc = dri_ttm_emit_reloc;
- bufmgr_ttm->bufmgr.process_relocs = dri_ttm_process_reloc;
- bufmgr_ttm->bufmgr.post_submit = dri_ttm_post_submit;
- bufmgr_ttm->bufmgr.debug = GL_FALSE;
- bufmgr_ttm->bufmgr.check_aperture_space = dri_ttm_check_aperture_space;
- /* Initialize the linked lists for BO reuse cache. */
- for (i = 0; i < INTEL_TTM_BO_BUCKETS; i++)
- bufmgr_ttm->cache_bucket[i].tail = &bufmgr_ttm->cache_bucket[i].head;
-
- return &bufmgr_ttm->bufmgr;
-}
-#else
-dri_bufmgr *
-intel_bufmgr_ttm_init(int fd, unsigned int fence_type,
- unsigned int fence_type_flush, int batch_size)
-{
- return NULL;
-}
-
-dri_bo *
-intel_ttm_bo_create_from_handle(dri_bufmgr *bufmgr, const char *name,
- unsigned int handle)
-{
- return NULL;
-}
-
-void
-intel_ttm_enable_bo_reuse(dri_bufmgr *bufmgr)
-{
-}
-#endif
diff --git a/shared/intel_bufmgr_ttm.h b/shared/intel_bufmgr_ttm.h
deleted file mode 100644
index f5bd64c..0000000
--- a/shared/intel_bufmgr_ttm.h
+++ /dev/null
@@ -1,28 +0,0 @@
-
-#ifndef INTEL_BUFMGR_TTM_H
-#define INTEL_BUFMGR_TTM_H
-
-#include "dri_bufmgr.h"
-
-extern dri_bo *intel_ttm_bo_create_from_handle(dri_bufmgr *bufmgr, const char *name,
- unsigned int handle);
-
-#ifdef TTM_API
-dri_fence *intel_ttm_fence_create_from_arg(dri_bufmgr *bufmgr, const char *name,
- drm_fence_arg_t *arg);
-#endif
-
-
-dri_bufmgr *intel_bufmgr_ttm_init(int fd, unsigned int fence_type,
- unsigned int fence_type_flush, int batch_size);
-
-void
-intel_ttm_enable_bo_reuse(dri_bufmgr *bufmgr);
-
-#ifndef TTM_API
-#define DRM_I915_FENCE_CLASS_ACCEL 0
-#define DRM_I915_FENCE_TYPE_RW 2
-#define DRM_I915_FENCE_FLAG_FLUSHED 0x01000000
-#endif
-
-#endif
diff --git a/shared/intel_chipset.h b/shared/intel_chipset.h
index 170efd0..d1b4941 100644
--- a/shared/intel_chipset.h
+++ b/shared/intel_chipset.h
@@ -68,11 +68,12 @@
devid == PCI_CHIP_I965_GME || \
devid == PCI_CHIP_GM45_GM)
-#define IS_GM45_GM(devid) (devid == PCI_CHIP_GM45_GM)
-#define IS_G4X(devid) (devid == PCI_CHIP_IGD_E_G || \
+#define IS_G45(devid) (devid == PCI_CHIP_IGD_E_G || \
devid == PCI_CHIP_Q45_G || \
devid == PCI_CHIP_G45_G || \
devid == PCI_CHIP_G41_G)
+#define IS_GM45(devid) (devid == PCI_CHIP_GM45_GM)
+#define IS_G4X(devid) (IS_G45(devid) || IS_GM45(devid))
#define IS_915(devid) (devid == PCI_CHIP_I915_G || \
devid == PCI_CHIP_E7221_G || \
@@ -91,7 +92,6 @@
devid == PCI_CHIP_I965_GM || \
devid == PCI_CHIP_I965_GME || \
devid == PCI_CHIP_I946_GZ || \
- IS_GM45_GM(devid) || \
IS_G4X(devid))
#define IS_9XX(devid) (IS_915(devid) || \
diff --git a/shared/intel_context.c b/shared/intel_context.c
index fd2fe59..c4a24d7 100644
--- a/shared/intel_context.c
+++ b/shared/intel_context.c
@@ -26,14 +26,14 @@
**************************************************************************/
-#include "glheader.h"
-#include "context.h"
-#include "matrix.h"
-#include "simple_list.h"
-#include "extensions.h"
-#include "framebuffer.h"
-#include "imports.h"
-#include "points.h"
+#include "main/glheader.h"
+#include "main/context.h"
+#include "main/matrix.h"
+#include "main/simple_list.h"
+#include "main/extensions.h"
+#include "main/framebuffer.h"
+#include "main/imports.h"
+#include "main/points.h"
#include "swrast/swrast.h"
#include "swrast_setup/swrast_setup.h"
@@ -51,7 +51,6 @@
#include "intel_chipset.h"
#include "intel_buffers.h"
#include "intel_tex.h"
-#include "intel_ioctl.h"
#include "intel_batchbuffer.h"
#include "intel_blit.h"
#include "intel_pixel.h"
@@ -59,7 +58,7 @@
#include "intel_buffer_objects.h"
#include "intel_fbo.h"
#include "intel_decode.h"
-#include "intel_bufmgr_ttm.h"
+#include "intel_bufmgr.h"
#include "drirenderbuffer.h"
#include "vblank.h"
@@ -69,14 +68,15 @@
int INTEL_DEBUG = (0);
#endif
-#define need_GL_NV_point_sprite
#define need_GL_ARB_multisample
+#define need_GL_ARB_occlusion_query
#define need_GL_ARB_point_parameters
+#define need_GL_ARB_shader_objects
#define need_GL_ARB_texture_compression
#define need_GL_ARB_vertex_buffer_object
#define need_GL_ARB_vertex_program
+#define need_GL_ARB_vertex_shader
#define need_GL_ARB_window_pos
-#define need_GL_ARB_occlusion_query
#define need_GL_EXT_blend_color
#define need_GL_EXT_blend_equation_separate
#define need_GL_EXT_blend_func_separate
@@ -85,22 +85,23 @@ int INTEL_DEBUG = (0);
#define need_GL_EXT_fog_coord
#define need_GL_EXT_framebuffer_object
#define need_GL_EXT_multi_draw_arrays
+#define need_GL_EXT_point_parameters
#define need_GL_EXT_secondary_color
-#define need_GL_NV_vertex_program
#define need_GL_ATI_separate_stencil
-#define need_GL_EXT_point_parameters
+#define need_GL_NV_point_sprite
+#define need_GL_NV_vertex_program
#define need_GL_VERSION_2_0
#define need_GL_VERSION_2_1
-#define need_GL_ARB_shader_objects
-#define need_GL_ARB_vertex_shader
#include "extension_helper.h"
-#define DRIVER_DATE "20061102"
+#define DRIVER_DATE "20090114"
+#define DRIVER_DATE_GEM "GEM " DRIVER_DATE
static const GLubyte *
intelGetString(GLcontext * ctx, GLenum name)
{
+ const struct intel_context *const intel = intel_context(ctx);
const char *chipset;
static char buffer[128];
@@ -110,7 +111,7 @@ intelGetString(GLcontext * ctx, GLenum name)
break;
case GL_RENDERER:
- switch (intel_context(ctx)->intelScreen->deviceID) {
+ switch (intel->intelScreen->deviceID) {
case PCI_CHIP_845_G:
chipset = "Intel(R) 845G";
break;
@@ -186,7 +187,9 @@ intelGetString(GLcontext * ctx, GLenum name)
break;
}
- (void) driGetRendererString(buffer, chipset, DRIVER_DATE, 0);
+ (void) driGetRendererString(buffer, chipset,
+ (intel->ttm) ? DRIVER_DATE_GEM : DRIVER_DATE,
+ 0);
return (GLubyte *) buffer;
default:
@@ -194,6 +197,149 @@ intelGetString(GLcontext * ctx, GLenum name)
}
}
+void
+intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable)
+{
+ struct intel_framebuffer *intel_fb = drawable->driverPrivate;
+ struct intel_renderbuffer *rb;
+ struct intel_region *region, *depth_region;
+ struct intel_context *intel = context->driverPrivate;
+ __DRIbuffer *buffers;
+ __DRIscreen *screen;
+ int i, count;
+ unsigned int attachments[10];
+ uint32_t name;
+ const char *region_name;
+
+ if (INTEL_DEBUG & DEBUG_DRI)
+ fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
+
+ screen = intel->intelScreen->driScrnPriv;
+
+ i = 0;
+ if (intel_fb->color_rb[0])
+ attachments[i++] = __DRI_BUFFER_FRONT_LEFT;
+ if (intel_fb->color_rb[1])
+ attachments[i++] = __DRI_BUFFER_BACK_LEFT;
+ if (intel_get_renderbuffer(&intel_fb->Base, BUFFER_DEPTH))
+ attachments[i++] = __DRI_BUFFER_DEPTH;
+ if (intel_get_renderbuffer(&intel_fb->Base, BUFFER_STENCIL))
+ attachments[i++] = __DRI_BUFFER_STENCIL;
+
+ buffers = (*screen->dri2.loader->getBuffers)(drawable,
+ &drawable->w,
+ &drawable->h,
+ attachments, i,
+ &count,
+ drawable->loaderPrivate);
+
+ if (buffers == NULL)
+ return;
+
+ drawable->x = 0;
+ drawable->y = 0;
+ drawable->backX = 0;
+ drawable->backY = 0;
+ drawable->numClipRects = 1;
+ drawable->pClipRects[0].x1 = 0;
+ drawable->pClipRects[0].y1 = 0;
+ drawable->pClipRects[0].x2 = drawable->w;
+ drawable->pClipRects[0].y2 = drawable->h;
+ drawable->numBackClipRects = 1;
+ drawable->pBackClipRects[0].x1 = 0;
+ drawable->pBackClipRects[0].y1 = 0;
+ drawable->pBackClipRects[0].x2 = drawable->w;
+ drawable->pBackClipRects[0].y2 = drawable->h;
+
+ depth_region = NULL;
+ for (i = 0; i < count; i++) {
+ switch (buffers[i].attachment) {
+ case __DRI_BUFFER_FRONT_LEFT:
+ rb = intel_fb->color_rb[0];
+ region_name = "dri2 front buffer";
+ break;
+
+ case __DRI_BUFFER_BACK_LEFT:
+ rb = intel_fb->color_rb[1];
+ region_name = "dri2 back buffer";
+ break;
+
+ case __DRI_BUFFER_DEPTH:
+ rb = intel_get_renderbuffer(&intel_fb->Base, BUFFER_DEPTH);
+ region_name = "dri2 depth buffer";
+ break;
+
+ case __DRI_BUFFER_STENCIL:
+ rb = intel_get_renderbuffer(&intel_fb->Base, BUFFER_STENCIL);
+ region_name = "dri2 stencil buffer";
+ break;
+
+ case __DRI_BUFFER_ACCUM:
+ default:
+ fprintf(stderr,
+ "unhandled buffer attach event, attacment type %d\n",
+ buffers[i].attachment);
+ return;
+ }
+
+ if (rb->region) {
+ dri_bo_flink(rb->region->buffer, &name);
+ if (name == buffers[i].name)
+ continue;
+ }
+
+ if (INTEL_DEBUG & DEBUG_DRI)
+ fprintf(stderr,
+ "attaching buffer %d, at %d, cpp %d, pitch %d\n",
+ buffers[i].name, buffers[i].attachment,
+ buffers[i].cpp, buffers[i].pitch);
+
+ if (buffers[i].attachment == __DRI_BUFFER_STENCIL && depth_region) {
+ if (INTEL_DEBUG & DEBUG_DRI)
+ fprintf(stderr, "(reusing depth buffer as stencil)\n");
+ intel_region_reference(&region, depth_region);
+ }
+ else
+ region = intel_region_alloc_for_handle(intel, buffers[i].cpp,
+ drawable->w,
+ drawable->h,
+ buffers[i].pitch / buffers[i].cpp,
+ buffers[i].name,
+ region_name);
+
+ if (buffers[i].attachment == __DRI_BUFFER_DEPTH)
+ depth_region = region;
+
+ intel_renderbuffer_set_region(rb, region);
+ intel_region_release(&region);
+ }
+
+ driUpdateFramebufferSize(&intel->ctx, drawable);
+}
+
+void
+intel_viewport(GLcontext *ctx, GLint x, GLint y, GLsizei w, GLsizei h)
+{
+ struct intel_context *intel = intel_context(ctx);
+ __DRIcontext *driContext = intel->driContext;
+ void (*old_viewport)(GLcontext *ctx, GLint x, GLint y,
+ GLsizei w, GLsizei h);
+
+ if (!driContext->driScreenPriv->dri2.enabled)
+ return;
+
+ intel_update_renderbuffers(driContext, driContext->driDrawablePriv);
+ if (driContext->driDrawablePriv != driContext->driReadablePriv)
+ intel_update_renderbuffers(driContext, driContext->driReadablePriv);
+
+ old_viewport = ctx->Driver.Viewport;
+ ctx->Driver.Viewport = NULL;
+ intel->driDrawable = driContext->driDrawablePriv;
+ intelWindowMoved(intel);
+ intel_draw_buffer(ctx, intel->ctx.DrawBuffer);
+ ctx->Driver.Viewport = old_viewport;
+}
+
/**
* Extension strings exported by the intel driver.
*
@@ -201,86 +347,83 @@ intelGetString(GLcontext * ctx, GLenum name)
* i965_dri.
*/
static const struct dri_extension card_extensions[] = {
- {"GL_ARB_multisample", GL_ARB_multisample_functions},
- {"GL_ARB_multitexture", NULL},
- {"GL_ARB_point_parameters", GL_ARB_point_parameters_functions},
- {"GL_NV_point_sprite", GL_NV_point_sprite_functions},
- {"GL_ARB_texture_border_clamp", NULL},
- {"GL_ARB_texture_compression", GL_ARB_texture_compression_functions},
- {"GL_ARB_texture_cube_map", NULL},
- {"GL_ARB_texture_env_add", NULL},
- {"GL_ARB_texture_env_combine", NULL},
- {"GL_ARB_texture_env_crossbar", NULL},
- {"GL_ARB_texture_env_dot3", NULL},
- {"GL_ARB_texture_mirrored_repeat", NULL},
- {"GL_ARB_texture_non_power_of_two", NULL },
- {"GL_ARB_texture_rectangle", NULL},
- {"GL_NV_texture_rectangle", NULL},
- {"GL_EXT_texture_rectangle", NULL},
- {"GL_ARB_point_parameters", NULL},
- {"GL_ARB_vertex_buffer_object", GL_ARB_vertex_buffer_object_functions},
- {"GL_ARB_vertex_program", GL_ARB_vertex_program_functions},
- {"GL_ARB_window_pos", GL_ARB_window_pos_functions},
- {"GL_EXT_blend_color", GL_EXT_blend_color_functions},
- {"GL_EXT_blend_equation_separate",
- GL_EXT_blend_equation_separate_functions},
- {"GL_EXT_blend_func_separate", GL_EXT_blend_func_separate_functions},
- {"GL_EXT_blend_minmax", GL_EXT_blend_minmax_functions},
- {"GL_EXT_blend_logic_op", NULL},
- {"GL_EXT_blend_subtract", NULL},
- {"GL_EXT_cull_vertex", GL_EXT_cull_vertex_functions},
- {"GL_EXT_fog_coord", GL_EXT_fog_coord_functions},
- {"GL_EXT_multi_draw_arrays", GL_EXT_multi_draw_arrays_functions},
- {"GL_ATI_separate_stencil", GL_ATI_separate_stencil_functions},
-#if 1 /* XXX FBO temporary? */
- {"GL_EXT_packed_depth_stencil", NULL},
-#endif
- {"GL_EXT_secondary_color", GL_EXT_secondary_color_functions},
- {"GL_EXT_stencil_wrap", NULL},
- {"GL_EXT_texture_edge_clamp", NULL},
- {"GL_EXT_texture_env_combine", NULL},
- {"GL_EXT_texture_env_dot3", NULL},
- {"GL_EXT_texture_filter_anisotropic", NULL},
- {"GL_EXT_texture_lod_bias", NULL},
- {"GL_3DFX_texture_compression_FXT1", NULL},
- {"GL_APPLE_client_storage", NULL},
- {"GL_MESA_pack_invert", NULL},
- {"GL_MESA_ycbcr_texture", NULL},
- {"GL_NV_blend_square", NULL},
- {"GL_NV_vertex_program", GL_NV_vertex_program_functions},
- {"GL_NV_vertex_program1_1", NULL},
- { "GL_SGIS_generate_mipmap", NULL },
- {NULL, NULL}
+ { "GL_ARB_multisample", GL_ARB_multisample_functions },
+ { "GL_ARB_multitexture", NULL },
+ { "GL_ARB_point_parameters", GL_ARB_point_parameters_functions },
+ { "GL_ARB_texture_border_clamp", NULL },
+ { "GL_ARB_texture_compression", GL_ARB_texture_compression_functions },
+ { "GL_ARB_texture_cube_map", NULL },
+ { "GL_ARB_texture_env_add", NULL },
+ { "GL_ARB_texture_env_combine", NULL },
+ { "GL_ARB_texture_env_crossbar", NULL },
+ { "GL_ARB_texture_env_dot3", NULL },
+ { "GL_ARB_texture_mirrored_repeat", NULL },
+ { "GL_ARB_texture_rectangle", NULL },
+ { "GL_ARB_vertex_buffer_object", GL_ARB_vertex_buffer_object_functions },
+ { "GL_ARB_vertex_program", GL_ARB_vertex_program_functions },
+ { "GL_ARB_window_pos", GL_ARB_window_pos_functions },
+ { "GL_EXT_blend_color", GL_EXT_blend_color_functions },
+ { "GL_EXT_blend_equation_separate", GL_EXT_blend_equation_separate_functions },
+ { "GL_EXT_blend_func_separate", GL_EXT_blend_func_separate_functions },
+ { "GL_EXT_blend_minmax", GL_EXT_blend_minmax_functions },
+ { "GL_EXT_blend_logic_op", NULL },
+ { "GL_EXT_blend_subtract", NULL },
+ { "GL_EXT_cull_vertex", GL_EXT_cull_vertex_functions },
+ { "GL_EXT_fog_coord", GL_EXT_fog_coord_functions },
+ { "GL_EXT_multi_draw_arrays", GL_EXT_multi_draw_arrays_functions },
+ { "GL_EXT_packed_depth_stencil", NULL },
+ { "GL_EXT_secondary_color", GL_EXT_secondary_color_functions },
+ { "GL_EXT_stencil_wrap", NULL },
+ { "GL_EXT_texture_edge_clamp", NULL },
+ { "GL_EXT_texture_env_combine", NULL },
+ { "GL_EXT_texture_env_dot3", NULL },
+ { "GL_EXT_texture_filter_anisotropic", NULL },
+ { "GL_EXT_texture_lod_bias", NULL },
+ { "GL_3DFX_texture_compression_FXT1", NULL },
+ { "GL_APPLE_client_storage", NULL },
+ { "GL_MESA_pack_invert", NULL },
+ { "GL_MESA_ycbcr_texture", NULL },
+ { "GL_NV_blend_square", NULL },
+ { "GL_NV_point_sprite", GL_NV_point_sprite_functions },
+ { "GL_NV_vertex_program", GL_NV_vertex_program_functions },
+ { "GL_NV_vertex_program1_1", NULL },
+ { "GL_SGIS_generate_mipmap", NULL },
+ { NULL, NULL }
};
static const struct dri_extension brw_extensions[] = {
- { "GL_ARB_shading_language_100", GL_VERSION_2_0_functions},
- { "GL_ARB_shading_language_120", GL_VERSION_2_1_functions},
- { "GL_ARB_shader_objects", GL_ARB_shader_objects_functions},
- { "GL_ARB_vertex_shader", GL_ARB_vertex_shader_functions},
- { "GL_ARB_point_sprite", NULL},
- { "GL_ARB_fragment_shader", NULL },
- { "GL_ARB_draw_buffers", NULL },
{ "GL_ARB_depth_texture", NULL },
+ { "GL_ARB_draw_buffers", NULL },
{ "GL_ARB_fragment_program", NULL },
+ { "GL_ARB_fragment_program_shadow", NULL },
+ { "GL_ARB_fragment_shader", NULL },
+ { "GL_ARB_occlusion_query", GL_ARB_occlusion_query_functions },
+ { "GL_ARB_point_sprite", NULL },
+ { "GL_ARB_shader_objects", GL_ARB_shader_objects_functions },
+ { "GL_ARB_shading_language_100", GL_VERSION_2_0_functions },
+#if 0
+ /* Support for GLSL 1.20 is currently broken in core Mesa.
+ */
+ { "GL_ARB_shading_language_120", GL_VERSION_2_1_functions },
+#endif
{ "GL_ARB_shadow", NULL },
+ { "GL_ARB_texture_non_power_of_two", NULL },
+ { "GL_ARB_vertex_shader", GL_ARB_vertex_shader_functions },
{ "GL_EXT_shadow_funcs", NULL },
- { "GL_ARB_fragment_program_shadow", NULL },
- /* ARB extn won't work if not enabled */
- { "GL_SGIX_depth_texture", NULL },
- { "GL_EXT_texture_sRGB", NULL},
+ { "GL_EXT_texture_sRGB", NULL },
+ { "GL_ATI_separate_stencil", GL_ATI_separate_stencil_functions },
+ { "GL_ATI_texture_env_combine3", NULL },
{ NULL, NULL }
};
-static const struct dri_extension arb_oc_extensions[] = {
- {"GL_ARB_occlusion_query", GL_ARB_occlusion_query_functions},
- {NULL, NULL}
+static const struct dri_extension arb_oq_extensions[] = {
+ { NULL, NULL }
};
static const struct dri_extension ttm_extensions[] = {
- {"GL_EXT_framebuffer_object", GL_EXT_framebuffer_object_functions},
- {"GL_ARB_pixel_buffer_object", NULL},
- {NULL, NULL}
+ { "GL_ARB_pixel_buffer_object", NULL },
+ { "GL_EXT_framebuffer_object", GL_EXT_framebuffer_object_functions },
+ { NULL, NULL }
};
/**
@@ -300,11 +443,6 @@ void intelInitExtensions(GLcontext *ctx, GLboolean enable_imaging)
if (intel == NULL || intel->ttm)
driInitExtensions(ctx, ttm_extensions, GL_FALSE);
- if (intel == NULL ||
- (IS_965(intel->intelScreen->deviceID) &&
- intel->intelScreen->drmMinor >= 8))
- driInitExtensions(ctx, arb_oc_extensions, GL_FALSE);
-
if (intel == NULL || IS_965(intel->intelScreen->deviceID))
driInitExtensions(ctx, brw_extensions, GL_FALSE);
}
@@ -370,146 +508,35 @@ intelFlush(GLcontext * ctx)
if (!IS_965(intel->intelScreen->deviceID))
INTEL_FIREVERTICES(intel);
+ /* Emit a flush so that any frontbuffer rendering that might have occurred
+ * lands onscreen in a timely manner, even if the X Server doesn't trigger
+ * a flush for us.
+ */
+ intel_batchbuffer_emit_mi_flush(intel->batch);
+
if (intel->batch->map != intel->batch->ptr)
intel_batchbuffer_flush(intel->batch);
-
- /* XXX: Need to do an MI_FLUSH here.
- */
}
void
intelFinish(GLcontext * ctx)
{
- struct intel_context *intel = intel_context(ctx);
- intelFlush(ctx);
- if (intel->batch->last_fence) {
- dri_fence_wait(intel->batch->last_fence);
- dri_fence_unreference(intel->batch->last_fence);
- intel->batch->last_fence = NULL;
- }
-}
-
-static void
-intelBeginQuery(GLcontext *ctx, GLenum target, struct gl_query_object *q)
-{
- struct intel_context *intel = intel_context( ctx );
- struct drm_i915_mmio io = {
- .read_write = I915_MMIO_READ,
- .reg = MMIO_REGS_PS_DEPTH_COUNT,
- .data = &q->Result
- };
- intel->stats_wm++;
- intelFinish(&intel->ctx);
- drmCommandWrite(intel->driFd, DRM_I915_MMIO, &io, sizeof(io));
-}
-
-static void
-intelEndQuery(GLcontext *ctx, GLenum target, struct gl_query_object *q)
-{
- struct intel_context *intel = intel_context( ctx );
- GLuint64EXT tmp;
- struct drm_i915_mmio io = {
- .read_write = I915_MMIO_READ,
- .reg = MMIO_REGS_PS_DEPTH_COUNT,
- .data = &tmp
- };
- intelFinish(&intel->ctx);
- drmCommandWrite(intel->driFd, DRM_I915_MMIO, &io, sizeof(io));
- q->Result = tmp - q->Result;
- q->Ready = GL_TRUE;
- intel->stats_wm--;
-}
-
-/** Driver-specific fence emit implementation for the fake memory manager. */
-static unsigned int
-intel_fence_emit(void *private)
-{
- struct intel_context *intel = (struct intel_context *)private;
- unsigned int fence;
-
- /* XXX: Need to emit a flush, if we haven't already (at least with the
- * current batchbuffer implementation, we have).
- */
-
- fence = intelEmitIrqLocked(intel);
-
- return fence;
-}
-
-/** Driver-specific fence wait implementation for the fake memory manager. */
-static int
-intel_fence_wait(void *private, unsigned int cookie)
-{
- struct intel_context *intel = (struct intel_context *)private;
-
- intelWaitIrq(intel, cookie);
-
- return 0;
-}
+ struct gl_framebuffer *fb = ctx->DrawBuffer;
+ int i;
-static GLboolean
-intel_init_bufmgr(struct intel_context *intel)
-{
- intelScreenPrivate *intelScreen = intel->intelScreen;
- GLboolean ttm_disable = getenv("INTEL_NO_TTM") != NULL;
- GLboolean ttm_supported;
+ intelFlush(ctx);
- /* If we've got a new enough DDX that's initializing TTM and giving us
- * object handles for the shared buffers, use that.
- */
- intel->ttm = GL_FALSE;
- if (intel->intelScreen->driScrnPriv->dri2.enabled)
- ttm_supported = GL_TRUE;
- else if (intel->intelScreen->driScrnPriv->ddx_version.minor >= 9 &&
- intel->intelScreen->drmMinor >= 11 &&
- intel->intelScreen->front.bo_handle != -1)
- ttm_supported = GL_TRUE;
- else
- ttm_supported = GL_FALSE;
+ for (i = 0; i < fb->_NumColorDrawBuffers; i++) {
+ struct intel_renderbuffer *irb;
- if (!ttm_disable && ttm_supported) {
- int bo_reuse_mode;
- intel->bufmgr = intel_bufmgr_ttm_init(intel->driFd,
- DRM_FENCE_TYPE_EXE,
- DRM_FENCE_TYPE_EXE |
- DRM_I915_FENCE_TYPE_RW,
- BATCH_SZ);
- if (intel->bufmgr != NULL)
- intel->ttm = GL_TRUE;
+ irb = intel_renderbuffer(fb->_ColorDrawBuffers[i]);
- bo_reuse_mode = driQueryOptioni(&intel->optionCache, "bo_reuse");
- switch (bo_reuse_mode) {
- case DRI_CONF_BO_REUSE_DISABLED:
- break;
- case DRI_CONF_BO_REUSE_ALL:
- intel_ttm_enable_bo_reuse(intel->bufmgr);
- break;
- }
+ if (irb->region)
+ dri_bo_wait_rendering(irb->region->buffer);
}
- /* Otherwise, use the classic buffer manager. */
- if (intel->bufmgr == NULL) {
- if (ttm_disable) {
- fprintf(stderr, "TTM buffer manager disabled. Using classic.\n");
- } else {
- fprintf(stderr, "Failed to initialize TTM buffer manager. "
- "Falling back to classic.\n");
- }
-
- if (intelScreen->tex.size == 0) {
- fprintf(stderr, "[%s:%u] Error initializing buffer manager.\n",
- __func__, __LINE__);
- return GL_FALSE;
- }
-
- intel->bufmgr = dri_bufmgr_fake_init(intelScreen->tex.offset,
- intelScreen->tex.map,
- intelScreen->tex.size,
- intel_fence_emit,
- intel_fence_wait,
- intel);
+ if (fb->_DepthBuffer) {
+ /* XXX: Wait on buffer idle */
}
-
- return GL_TRUE;
}
void
@@ -527,9 +554,6 @@ intelInitDriverFunctions(struct dd_function_table *functions)
functions->CopyConvolutionFilter1D = _swrast_CopyConvolutionFilter1D;
functions->CopyConvolutionFilter2D = _swrast_CopyConvolutionFilter2D;
- functions->BeginQuery = intelBeginQuery;
- functions->EndQuery = intelEndQuery;
-
intelInitTextureFuncs(functions);
intelInitStateFuncs(functions);
intelInitBufferFuncs(functions);
@@ -548,8 +572,6 @@ intelInitContext(struct intel_context *intel,
GLcontext *shareCtx = (GLcontext *) sharedContextPrivate;
__DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv;
intelScreenPrivate *intelScreen = (intelScreenPrivate *) sPriv->private;
- volatile struct drm_i915_sarea *saPriv = (struct drm_i915_sarea *)
- (((GLubyte *) sPriv->pSAREA) + intelScreen->sarea_priv_offset);
int fthrottle_mode;
if (!_mesa_initialize_context(&intel->ctx, mesaVis, shareCtx,
@@ -561,16 +583,14 @@ intelInitContext(struct intel_context *intel,
driContextPriv->driverPrivate = intel;
intel->intelScreen = intelScreen;
intel->driScreen = sPriv;
- intel->sarea = saPriv;
+ intel->sarea = intelScreen->sarea;
+ intel->driContext = driContextPriv;
/* Dri stuff */
intel->hHWContext = driContextPriv->hHWContext;
intel->driFd = sPriv->fd;
intel->driHwLock = sPriv->lock;
- intel->width = intelScreen->width;
- intel->height = intelScreen->height;
-
driParseConfigFiles(&intel->optionCache, &intelScreen->optionCache,
intel->driScreen->myNum,
IS_965(intelScreen->deviceID) ? "i965" : "i915");
@@ -579,8 +599,20 @@ intelInitContext(struct intel_context *intel,
else
intel->maxBatchSize = BATCH_SZ;
- if (!intel_init_bufmgr(intel))
- return GL_FALSE;
+ intel->bufmgr = intelScreen->bufmgr;
+ intel->ttm = intelScreen->ttm;
+ if (intel->ttm) {
+ int bo_reuse_mode;
+
+ bo_reuse_mode = driQueryOptioni(&intel->optionCache, "bo_reuse");
+ switch (bo_reuse_mode) {
+ case DRI_CONF_BO_REUSE_DISABLED:
+ break;
+ case DRI_CONF_BO_REUSE_ALL:
+ intel_bufmgr_gem_enable_reuse(intel->bufmgr);
+ break;
+ }
+ }
ctx->Const.MaxTextureMaxAnisotropy = 2.0;
@@ -676,8 +708,6 @@ intelInitContext(struct intel_context *intel,
intel_recreate_static_regions(intel);
intel->batch = intel_batchbuffer_alloc(intel);
- intel->last_swap_fence = NULL;
- intel->first_swap_fence = NULL;
intel_bufferobj_init(intel);
intel_fbo_init(intel);
@@ -695,7 +725,6 @@ intelInitContext(struct intel_context *intel,
/* Force all software fallbacks */
if (driQueryOptionb(&intel->optionCache, "no_rast")) {
fprintf(stderr, "disabling 3D rasterization\n");
- FALLBACK(intel, INTEL_FALLBACK_USER, 1);
intel->no_rast = 1;
}
@@ -730,17 +759,12 @@ intelDestroyContext(__DRIcontextPrivate * driContextPriv)
intel->Fallback = 0; /* don't call _swrast_Flush later */
intel_batchbuffer_free(intel->batch);
+ intel->batch = NULL;
- if (intel->last_swap_fence) {
- dri_fence_wait(intel->last_swap_fence);
- dri_fence_unreference(intel->last_swap_fence);
- intel->last_swap_fence = NULL;
- }
- if (intel->first_swap_fence) {
- dri_fence_wait(intel->first_swap_fence);
- dri_fence_unreference(intel->first_swap_fence);
- intel->first_swap_fence = NULL;
- }
+ free(intel->prim.vb);
+ intel->prim.vb = NULL;
+ dri_bo_unreference(intel->prim.vb_bo);
+ intel->prim.vb_bo = NULL;
if (release_texture_heaps) {
/* This share group is about to go away, free our private
@@ -750,10 +774,14 @@ intelDestroyContext(__DRIcontextPrivate * driContextPriv)
fprintf(stderr, "do something to free texture heaps\n");
}
+ intel_region_release(&intel->front_region);
+ intel_region_release(&intel->back_region);
+ intel_region_release(&intel->depth_region);
+
+ driDestroyOptionCache(&intel->optionCache);
+
/* free the Mesa context */
_mesa_free_context_data(&intel->ctx);
-
- dri_bufmgr_destroy(intel->bufmgr);
}
}
@@ -776,11 +804,14 @@ intelMakeCurrent(__DRIcontextPrivate * driContextPriv,
struct intel_framebuffer *intel_fb =
(struct intel_framebuffer *) driDrawPriv->driverPrivate;
GLframebuffer *readFb = (GLframebuffer *) driReadPriv->driverPrivate;
-
-
- /* XXX FBO temporary fix-ups! */
- /* if the renderbuffers don't have regions, init them from the context */
- if (!driContextPriv->driScreenPriv->dri2.enabled) {
+
+ if (driContextPriv->driScreenPriv->dri2.enabled) {
+ intel_update_renderbuffers(driContextPriv, driDrawPriv);
+ if (driDrawPriv != driReadPriv)
+ intel_update_renderbuffers(driContextPriv, driReadPriv);
+ } else {
+ /* XXX FBO temporary fix-ups! */
+ /* if the renderbuffers don't have regions, init them from the context */
struct intel_renderbuffer *irbDepth
= intel_get_renderbuffer(&intel_fb->Base, BUFFER_DEPTH);
struct intel_renderbuffer *irbStencil
@@ -794,12 +825,7 @@ intelMakeCurrent(__DRIcontextPrivate * driContextPriv,
intel_renderbuffer_set_region(intel_fb->color_rb[1],
intel->back_region);
}
-#if 0
- if (intel_fb->color_rb[2]) {
- intel_renderbuffer_set_region(intel_fb->color_rb[2],
- intel->third_region);
- }
-#endif
+
if (irbDepth) {
intel_renderbuffer_set_region(irbDepth, intel->depth_region);
}
@@ -836,7 +862,7 @@ intelMakeCurrent(__DRIcontextPrivate * driContextPriv,
driDrawableInitVBlank(driDrawPriv);
intel_fb->vbl_waited = driDrawPriv->vblSeq;
- for (i = 0; i < (intel->intelScreen->third.handle ? 3 : 2); i++) {
+ for (i = 0; i < 2; i++) {
if (intel_fb->color_rb[i])
intel_fb->color_rb[i]->vbl_pending = driDrawPriv->vblSeq;
}
@@ -860,7 +886,7 @@ intelContendedLock(struct intel_context *intel, GLuint flags)
{
__DRIdrawablePrivate *dPriv = intel->driDrawable;
__DRIscreenPrivate *sPriv = intel->driScreen;
- volatile struct drm_i915_sarea *sarea = intel->sarea;
+ volatile drm_i915_sarea_t *sarea = intel->sarea;
int me = intel->hHWContext;
drmGetLock(intel->driFd, intel->hHWContext, flags);
@@ -892,7 +918,7 @@ intelContendedLock(struct intel_context *intel, GLuint flags)
*/
if (!intel->ttm && sarea->texAge != intel->hHWContext) {
sarea->texAge = intel->hHWContext;
- dri_bufmgr_fake_contended_lock_take(intel->bufmgr);
+ intel_bufmgr_fake_contended_lock_take(intel->bufmgr);
if (INTEL_DEBUG & DEBUG_BATCH)
intel_decode_context_reset();
if (INTEL_DEBUG & DEBUG_BUFMGR)
@@ -900,38 +926,6 @@ intelContendedLock(struct intel_context *intel, GLuint flags)
sarea->ctxOwner, intel->hHWContext);
}
- if (sarea->width != intel->width || sarea->height != intel->height) {
- int numClipRects = intel->numClipRects;
-
- /*
- * FIXME: Really only need to do this when drawing to a
- * common back- or front buffer.
- */
-
- /*
- * This will essentially drop the outstanding batchbuffer on
- * the floor.
- */
- intel->numClipRects = 0;
-
- if (intel->Fallback)
- _swrast_flush(&intel->ctx);
-
- if (!IS_965(intel->intelScreen->deviceID))
- INTEL_FIREVERTICES(intel);
-
- if (intel->batch->map != intel->batch->ptr)
- intel_batchbuffer_flush(intel->batch);
-
- intel->numClipRects = numClipRects;
-
- /* force window update */
- intel->lastStamp = 0;
-
- intel->width = sarea->width;
- intel->height = sarea->height;
- }
-
/* Drawable changed?
*/
if (dPriv && intel->lastStamp != dPriv->lastStamp) {
@@ -982,18 +976,12 @@ void LOCK_HARDWARE( struct intel_context *intel )
intel_fb->vbl_waited = vbl.reply.sequence;
}
- DRM_CAS(intel->driHwLock, intel->hHWContext,
- (DRM_LOCK_HELD|intel->hHWContext), __ret);
+ if (!sPriv->dri2.enabled) {
+ DRM_CAS(intel->driHwLock, intel->hHWContext,
+ (DRM_LOCK_HELD|intel->hHWContext), __ret);
- if (sPriv->dri2.enabled) {
if (__ret)
- drmGetLock(intel->driFd, intel->hHWContext, 0);
- if (__driParseEvents(dPriv->driContextPriv, dPriv)) {
- intelWindowMoved(intel);
- intel_draw_buffer(&intel->ctx, intel->ctx.DrawBuffer);
- }
- } else if (__ret) {
- intelContendedLock( intel, 0 );
+ intelContendedLock( intel, 0 );
}
@@ -1006,10 +994,13 @@ void LOCK_HARDWARE( struct intel_context *intel )
*/
void UNLOCK_HARDWARE( struct intel_context *intel )
{
+ __DRIscreen *sPriv = intel->driScreen;
+
intel->vtbl.note_unlock( intel );
intel->locked = 0;
- DRM_UNLOCK(intel->driFd, intel->driHwLock, intel->hHWContext);
+ if (!sPriv->dri2.enabled)
+ DRM_UNLOCK(intel->driFd, intel->driHwLock, intel->hHWContext);
_glthread_UNLOCK_MUTEX(lockMutex);
@@ -1020,6 +1011,7 @@ void UNLOCK_HARDWARE( struct intel_context *intel )
* Nothing should be left in batch outside of LOCK/UNLOCK which references
* cliprects.
*/
- assert(intel->batch->cliprect_mode != REFERENCES_CLIPRECTS);
+ if (intel->batch->cliprect_mode == REFERENCES_CLIPRECTS)
+ intel_batchbuffer_flush(intel->batch);
}
diff --git a/shared/intel_context.h b/shared/intel_context.h
index df79ab8..048286c 100644
--- a/shared/intel_context.h
+++ b/shared/intel_context.h
@@ -30,11 +30,11 @@
-#include "mtypes.h"
-#include "drm.h"
-#include "mm.h"
+#include "main/mtypes.h"
+#include "main/mm.h"
#include "texmem.h"
-#include "dri_bufmgr.h"
+#include "drm.h"
+#include "intel_bufmgr.h"
#include "intel_screen.h"
#include "intel_tex_obj.h"
@@ -85,6 +85,7 @@ struct intel_context
{
void (*destroy) (struct intel_context * intel);
void (*emit_state) (struct intel_context * intel);
+ void (*finish_batch) (struct intel_context * intel);
void (*new_batch) (struct intel_context * intel);
void (*emit_invarient_state) (struct intel_context * intel);
void (*note_fence) (struct intel_context *intel, GLuint fence);
@@ -156,6 +157,19 @@ struct intel_context
void (*debug_batch)(struct intel_context *intel);
} vtbl;
+ struct {
+ struct gl_fragment_program *bitmap_fp;
+ struct gl_vertex_program *passthrough_vp;
+
+ struct gl_fragment_program *saved_fp;
+ GLboolean saved_fp_enable;
+ struct gl_vertex_program *saved_vp;
+ GLboolean saved_vp_enable;
+
+ GLint saved_vp_x, saved_vp_y;
+ GLsizei saved_vp_width, saved_vp_height;
+ } meta;
+
GLint refcount;
GLuint Fallback;
GLuint NewGLState;
@@ -165,7 +179,6 @@ struct intel_context
struct intel_region *front_region;
struct intel_region *back_region;
- struct intel_region *third_region;
struct intel_region *depth_region;
/**
@@ -174,9 +187,6 @@ struct intel_context
*/
GLboolean ttm;
- dri_fence *last_swap_fence;
- dri_fence *first_swap_fence;
-
struct intel_batchbuffer *batch;
GLboolean no_batch_wrap;
unsigned batch_id;
@@ -184,9 +194,14 @@ struct intel_context
struct
{
GLuint id;
- GLuint primitive;
- GLubyte *start_ptr;
+ uint32_t primitive; /**< Current hardware primitive type */
void (*flush) (struct intel_context *);
+ GLubyte *start_ptr; /**< for i8xx */
+ dri_bo *vb_bo;
+ uint8_t *vb;
+ unsigned int start_offset; /**< Byte offset of primitive sequence */
+ unsigned int current_offset; /**< Byte offset of next vertex */
+ unsigned int count; /**< Number of vertices in current primitive */
} prim;
GLuint stats_wm;
@@ -224,7 +239,6 @@ struct intel_context
GLenum reduced_primitive;
GLuint vertex_size;
GLubyte *verts; /* points to tnl->clipspace.vertex_buf */
- struct intel_region *draw_region;
/* Fallback rasterization functions
*/
@@ -234,10 +248,18 @@ struct intel_context
/* These refer to the current drawing buffer:
*/
- int drawX, drawY; /**< origin of drawing area within region */
- GLuint numClipRects; /**< cliprects for drawing */
- drm_clip_rect_t *pClipRects;
struct gl_texture_object *frame_buffer_texobj;
+ /**
+ * Set to true if a single constant cliprect should be used in the
+ * batchbuffer. Otherwise, cliprects must be calculated at batchbuffer
+ * flush time while the lock is held.
+ */
+ GLboolean constant_cliprect;
+ /**
+ * In !constant_cliprect mode, set to true if the front cliprects should be
+ * used instead of back.
+ */
+ GLboolean front_cliprects;
drm_clip_rect_t fboRect; /**< cliprect for FBO rendering */
int perf_boxes;
@@ -254,6 +276,7 @@ struct intel_context
drmLock *driHwLock;
int driFd;
+ __DRIcontextPrivate *driContext;
__DRIdrawablePrivate *driDrawable;
__DRIdrawablePrivate *driReadDrawable;
__DRIscreenPrivate *driScreen;
@@ -269,10 +292,6 @@ struct intel_context
*/
driOptionCache optionCache;
- /* Last seen width/height of the screen */
- int width;
- int height;
-
int64_t swap_ust;
int64_t swap_missed_ust;
@@ -291,6 +310,7 @@ extern char *__progname;
#define SUBPIXEL_X 0.125
#define SUBPIXEL_Y 0.125
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
#define ALIGN(value, alignment) ((value + alignment - 1) & ~(alignment - 1))
#define INTEL_FIREVERTICES(intel) \
@@ -488,6 +508,11 @@ extern int intel_translate_stencil_op(GLenum op);
extern int intel_translate_blend_factor(GLenum factor);
extern int intel_translate_logic_op(GLenum opcode);
+void intel_viewport(GLcontext * ctx, GLint x, GLint y,
+ GLsizei width, GLsizei height);
+
+void intel_update_renderbuffers(__DRIcontext *context,
+ __DRIdrawable *drawable);
/*======================================================================
* Inline conversion functions.
diff --git a/shared/intel_decode.c b/shared/intel_decode.c
index a124063..5f90ca2 100644
--- a/shared/intel_decode.c
+++ b/shared/intel_decode.c
@@ -37,6 +37,7 @@
#include <stdio.h>
#include <stdarg.h>
+#include <string.h>
#include <inttypes.h>
#include "intel_decode.h"
@@ -183,9 +184,10 @@ decode_2d(uint32_t *data, int count, uint32_t hw_offset, int *failures)
switch ((data[0] & 0x1fc00000) >> 22) {
case 0x50:
instr_out(data, hw_offset, 0,
- "XY_COLOR_BLT (rgb %sabled, alpha %sabled)\n",
+ "XY_COLOR_BLT (rgb %sabled, alpha %sabled, dst tile %d)\n",
(data[0] & (1 << 20)) ? "en" : "dis",
- (data[0] & (1 << 21)) ? "en" : "dis");
+ (data[0] & (1 << 21)) ? "en" : "dis",
+ (data[0] >> 11) & 1);
len = (data[0] & 0x000000ff) + 2;
if (len != 6)
@@ -210,7 +212,8 @@ decode_2d(uint32_t *data, int count, uint32_t hw_offset, int *failures)
instr_out(data, hw_offset, 1, "format %s, pitch %d, "
"clipping %sabled\n", format,
- data[1] & 0xffff, data[1] & (1 << 30) ? "en" : "dis");
+ (short)(data[1] & 0xffff),
+ data[1] & (1 << 30) ? "en" : "dis");
instr_out(data, hw_offset, 2, "(%d,%d)\n",
data[2] & 0xffff, data[2] >> 16);
instr_out(data, hw_offset, 3, "(%d,%d)\n",
@@ -220,9 +223,12 @@ decode_2d(uint32_t *data, int count, uint32_t hw_offset, int *failures)
return len;
case 0x53:
instr_out(data, hw_offset, 0,
- "XY_SRC_COPY_BLT (rgb %sabled, alpha %sabled)\n",
+ "XY_SRC_COPY_BLT (rgb %sabled, alpha %sabled, "
+ "src tile %d, dst tile %d)\n",
(data[0] & (1 << 20)) ? "en" : "dis",
- (data[0] & (1 << 21)) ? "en" : "dis");
+ (data[0] & (1 << 21)) ? "en" : "dis",
+ (data[0] >> 15) & 1,
+ (data[0] >> 11) & 1);
len = (data[0] & 0x000000ff) + 2;
if (len != 8)
@@ -247,16 +253,17 @@ decode_2d(uint32_t *data, int count, uint32_t hw_offset, int *failures)
instr_out(data, hw_offset, 1, "format %s, dst pitch %d, "
"clipping %sabled\n", format,
- data[1] & 0xffff, data[1] & (1 << 30) ? "en" : "dis");
+ (short)(data[1] & 0xffff),
+ data[1] & (1 << 30) ? "en" : "dis");
instr_out(data, hw_offset, 2, "dst (%d,%d)\n",
data[2] & 0xffff, data[2] >> 16);
instr_out(data, hw_offset, 3, "dst (%d,%d)\n",
- data[2] & 0xffff, data[2] >> 16);
+ data[3] & 0xffff, data[3] >> 16);
instr_out(data, hw_offset, 4, "dst offset 0x%08x\n", data[4]);
instr_out(data, hw_offset, 5, "src (%d,%d)\n",
data[5] & 0xffff, data[5] >> 16);
instr_out(data, hw_offset, 6, "src pitch %d\n",
- data[6] & 0xffff);
+ (short)(data[6] & 0xffff));
instr_out(data, hw_offset, 7, "src offset 0x%08x\n", data[7]);
return len;
}
@@ -302,6 +309,15 @@ decode_3d_1c(uint32_t *data, int count, uint32_t hw_offset, int *failures)
case 0x10:
instr_out(data, hw_offset, 0, "3DSTATE_SCISSOR_ENABLE\n");
return 1;
+ case 0x01:
+ instr_out(data, hw_offset, 0, "3DSTATE_MAP_COORD_SET_I830\n");
+ return 1;
+ case 0x0a:
+ instr_out(data, hw_offset, 0, "3DSTATE_MAP_CUBE_I830\n");
+ return 1;
+ case 0x05:
+ instr_out(data, hw_offset, 0, "3DSTATE_MAP_TEX_STREAM_I830\n");
+ return 1;
}
instr_out(data, hw_offset, 0, "3D UNKNOWN\n");
@@ -309,34 +325,508 @@ decode_3d_1c(uint32_t *data, int count, uint32_t hw_offset, int *failures)
return 1;
}
+/** Sets the string dstname to describe the destination of the PS instruction */
+static void
+i915_get_instruction_dst(uint32_t *data, int i, char *dstname, int do_mask)
+{
+ uint32_t a0 = data[i];
+ int dst_nr = (a0 >> 14) & 0xf;
+ char dstmask[8];
+ char *sat;
+
+ if (do_mask) {
+ if (((a0 >> 10) & 0xf) == 0xf) {
+ dstmask[0] = 0;
+ } else {
+ int dstmask_index = 0;
+
+ dstmask[dstmask_index++] = '.';
+ if (a0 & (1 << 10))
+ dstmask[dstmask_index++] = 'x';
+ if (a0 & (1 << 11))
+ dstmask[dstmask_index++] = 'y';
+ if (a0 & (1 << 12))
+ dstmask[dstmask_index++] = 'z';
+ if (a0 & (1 << 13))
+ dstmask[dstmask_index++] = 'w';
+ dstmask[dstmask_index++] = 0;
+ }
+
+ if (a0 & (1 << 22))
+ sat = ".sat";
+ else
+ sat = "";
+ } else {
+ dstmask[0] = 0;
+ sat = "";
+ }
+
+ switch ((a0 >> 19) & 0x7) {
+ case 0:
+ if (dst_nr > 15)
+ fprintf(out, "bad destination reg R%d\n", dst_nr);
+ sprintf(dstname, "R%d%s%s", dst_nr, dstmask, sat);
+ break;
+ case 4:
+ if (dst_nr > 0)
+ fprintf(out, "bad destination reg oC%d\n", dst_nr);
+ sprintf(dstname, "oC%s%s", dstmask, sat);
+ break;
+ case 5:
+ if (dst_nr > 0)
+ fprintf(out, "bad destination reg oD%d\n", dst_nr);
+ sprintf(dstname, "oD%s%s", dstmask, sat);
+ break;
+ case 6:
+ if (dst_nr > 2)
+ fprintf(out, "bad destination reg U%d\n", dst_nr);
+ sprintf(dstname, "U%d%s%s", dst_nr, dstmask, sat);
+ break;
+ default:
+ sprintf(dstname, "RESERVED");
+ break;
+ }
+}
+
+static char *
+i915_get_channel_swizzle(uint32_t select)
+{
+ switch (select & 0x7) {
+ case 0:
+ return (select & 8) ? "-x" : "x";
+ case 1:
+ return (select & 8) ? "-y" : "y";
+ case 2:
+ return (select & 8) ? "-z" : "z";
+ case 3:
+ return (select & 8) ? "-w" : "w";
+ case 4:
+ return (select & 8) ? "-0" : "0";
+ case 5:
+ return (select & 8) ? "-1" : "1";
+ default:
+ return (select & 8) ? "-bad" : "bad";
+ }
+}
+
+static void
+i915_get_instruction_src_name(uint32_t src_type, uint32_t src_nr, char *name)
+{
+ switch (src_type) {
+ case 0:
+ sprintf(name, "R%d", src_nr);
+ if (src_nr > 15)
+ fprintf(out, "bad src reg %s\n", name);
+ break;
+ case 1:
+ if (src_nr < 8)
+ sprintf(name, "T%d", src_nr);
+ else if (src_nr == 8)
+ sprintf(name, "DIFFUSE");
+ else if (src_nr == 9)
+ sprintf(name, "SPECULAR");
+ else if (src_nr == 10)
+ sprintf(name, "FOG");
+ else {
+ fprintf(out, "bad src reg T%d\n", src_nr);
+ sprintf(name, "RESERVED");
+ }
+ break;
+ case 2:
+ sprintf(name, "C%d", src_nr);
+ if (src_nr > 31)
+ fprintf(out, "bad src reg %s\n", name);
+ break;
+ case 4:
+ sprintf(name, "oC");
+ if (src_nr > 0)
+ fprintf(out, "bad src reg oC%d\n", src_nr);
+ break;
+ case 5:
+ sprintf(name, "oD");
+ if (src_nr > 0)
+ fprintf(out, "bad src reg oD%d\n", src_nr);
+ break;
+ case 6:
+ sprintf(name, "U%d", src_nr);
+ if (src_nr > 2)
+ fprintf(out, "bad src reg %s\n", name);
+ break;
+ default:
+ fprintf(out, "bad src reg type %d\n", src_type);
+ sprintf(name, "RESERVED");
+ break;
+ }
+}
+
+static void
+i915_get_instruction_src0(uint32_t *data, int i, char *srcname)
+{
+ uint32_t a0 = data[i];
+ uint32_t a1 = data[i + 1];
+ int src_nr = (a0 >> 2) & 0x1f;
+ char *swizzle_x = i915_get_channel_swizzle((a1 >> 28) & 0xf);
+ char *swizzle_y = i915_get_channel_swizzle((a1 >> 24) & 0xf);
+ char *swizzle_z = i915_get_channel_swizzle((a1 >> 20) & 0xf);
+ char *swizzle_w = i915_get_channel_swizzle((a1 >> 16) & 0xf);
+ char swizzle[100];
+
+ i915_get_instruction_src_name((a0 >> 7) & 0x7, src_nr, srcname);
+ sprintf(swizzle, ".%s%s%s%s", swizzle_x, swizzle_y, swizzle_z, swizzle_w);
+ if (strcmp(swizzle, ".xyzw") != 0)
+ strcat(srcname, swizzle);
+}
+
+static void
+i915_get_instruction_src1(uint32_t *data, int i, char *srcname)
+{
+ uint32_t a1 = data[i + 1];
+ uint32_t a2 = data[i + 2];
+ int src_nr = (a1 >> 8) & 0x1f;
+ char *swizzle_x = i915_get_channel_swizzle((a1 >> 4) & 0xf);
+ char *swizzle_y = i915_get_channel_swizzle((a1 >> 0) & 0xf);
+ char *swizzle_z = i915_get_channel_swizzle((a2 >> 28) & 0xf);
+ char *swizzle_w = i915_get_channel_swizzle((a2 >> 24) & 0xf);
+ char swizzle[100];
+
+ i915_get_instruction_src_name((a1 >> 13) & 0x7, src_nr, srcname);
+ sprintf(swizzle, ".%s%s%s%s", swizzle_x, swizzle_y, swizzle_z, swizzle_w);
+ if (strcmp(swizzle, ".xyzw") != 0)
+ strcat(srcname, swizzle);
+}
+
+static void
+i915_get_instruction_src2(uint32_t *data, int i, char *srcname)
+{
+ uint32_t a2 = data[i + 2];
+ int src_nr = (a2 >> 16) & 0x1f;
+ char *swizzle_x = i915_get_channel_swizzle((a2 >> 12) & 0xf);
+ char *swizzle_y = i915_get_channel_swizzle((a2 >> 8) & 0xf);
+ char *swizzle_z = i915_get_channel_swizzle((a2 >> 4) & 0xf);
+ char *swizzle_w = i915_get_channel_swizzle((a2 >> 0) & 0xf);
+ char swizzle[100];
+
+ i915_get_instruction_src_name((a2 >> 21) & 0x7, src_nr, srcname);
+ sprintf(swizzle, ".%s%s%s%s", swizzle_x, swizzle_y, swizzle_z, swizzle_w);
+ if (strcmp(swizzle, ".xyzw") != 0)
+ strcat(srcname, swizzle);
+}
+
+static void
+i915_get_instruction_addr(uint32_t src_type, uint32_t src_nr, char *name)
+{
+ switch (src_type) {
+ case 0:
+ sprintf(name, "R%d", src_nr);
+ if (src_nr > 15)
+ fprintf(out, "bad src reg %s\n", name);
+ break;
+ case 1:
+ if (src_nr < 8)
+ sprintf(name, "T%d", src_nr);
+ else if (src_nr == 8)
+ sprintf(name, "DIFFUSE");
+ else if (src_nr == 9)
+ sprintf(name, "SPECULAR");
+ else if (src_nr == 10)
+ sprintf(name, "FOG");
+ else {
+ fprintf(out, "bad src reg T%d\n", src_nr);
+ sprintf(name, "RESERVED");
+ }
+ break;
+ case 4:
+ sprintf(name, "oC");
+ if (src_nr > 0)
+ fprintf(out, "bad src reg oC%d\n", src_nr);
+ break;
+ case 5:
+ sprintf(name, "oD");
+ if (src_nr > 0)
+ fprintf(out, "bad src reg oD%d\n", src_nr);
+ break;
+ default:
+ fprintf(out, "bad src reg type %d\n", src_type);
+ sprintf(name, "RESERVED");
+ break;
+ }
+}
+
+static void
+i915_decode_alu1(uint32_t *data, uint32_t hw_offset,
+ int i, char *instr_prefix, char *op_name)
+{
+ char dst[100], src0[100];
+
+ i915_get_instruction_dst(data, i, dst, 1);
+ i915_get_instruction_src0(data, i, src0);
+
+ instr_out(data, hw_offset, i++, "%s: %s %s, %s\n", instr_prefix,
+ op_name, dst, src0);
+ instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+ instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+}
+
+static void
+i915_decode_alu2(uint32_t *data, uint32_t hw_offset,
+ int i, char *instr_prefix, char *op_name)
+{
+ char dst[100], src0[100], src1[100];
+
+ i915_get_instruction_dst(data, i, dst, 1);
+ i915_get_instruction_src0(data, i, src0);
+ i915_get_instruction_src1(data, i, src1);
+
+ instr_out(data, hw_offset, i++, "%s: %s %s, %s, %s\n", instr_prefix,
+ op_name, dst, src0, src1);
+ instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+ instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+}
+
+static void
+i915_decode_alu3(uint32_t *data, uint32_t hw_offset,
+ int i, char *instr_prefix, char *op_name)
+{
+ char dst[100], src0[100], src1[100], src2[100];
+
+ i915_get_instruction_dst(data, i, dst, 1);
+ i915_get_instruction_src0(data, i, src0);
+ i915_get_instruction_src1(data, i, src1);
+ i915_get_instruction_src2(data, i, src2);
+
+ instr_out(data, hw_offset, i++, "%s: %s %s, %s, %s, %s\n", instr_prefix,
+ op_name, dst, src0, src1, src2);
+ instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+ instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+}
+
+static void
+i915_decode_tex(uint32_t *data, uint32_t hw_offset, int i, char *instr_prefix,
+ char *tex_name)
+{
+ uint32_t t0 = data[i];
+ uint32_t t1 = data[i + 1];
+ char dst_name[100];
+ char addr_name[100];
+ int sampler_nr;
+
+ i915_get_instruction_dst(data, i, dst_name, 0);
+ i915_get_instruction_addr((t1 >> 24) & 0x7,
+ (t1 >> 17) & 0xf,
+ addr_name);
+ sampler_nr = t0 & 0xf;
+
+ instr_out(data, hw_offset, i++, "%s: %s %s, S%d, %s\n", instr_prefix,
+ tex_name, dst_name, sampler_nr, addr_name);
+ instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+ instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+}
+
+static void
+i915_decode_dcl(uint32_t *data, uint32_t hw_offset, int i, char *instr_prefix)
+{
+ uint32_t d0 = data[i];
+ char *sampletype;
+ int dcl_nr = (d0 >> 14) & 0xf;
+ char *dcl_x = d0 & (1 << 10) ? "x" : "";
+ char *dcl_y = d0 & (1 << 11) ? "y" : "";
+ char *dcl_z = d0 & (1 << 12) ? "z" : "";
+ char *dcl_w = d0 & (1 << 13) ? "w" : "";
+ char dcl_mask[10];
+
+ switch ((d0 >> 19) & 0x3) {
+ case 1:
+ sprintf(dcl_mask, ".%s%s%s%s", dcl_x, dcl_y, dcl_z, dcl_w);
+ if (strcmp(dcl_mask, ".") == 0)
+ fprintf(out, "bad (empty) dcl mask\n");
+
+ if (dcl_nr > 10)
+ fprintf(out, "bad T%d dcl register number\n", dcl_nr);
+ if (dcl_nr < 8) {
+ if (strcmp(dcl_mask, ".x") != 0 &&
+ strcmp(dcl_mask, ".xy") != 0 &&
+ strcmp(dcl_mask, ".xz") != 0 &&
+ strcmp(dcl_mask, ".w") != 0 &&
+ strcmp(dcl_mask, ".xyzw") != 0) {
+ fprintf(out, "bad T%d.%s dcl mask\n", dcl_nr, dcl_mask);
+ }
+ instr_out(data, hw_offset, i++, "%s: DCL T%d%s\n", instr_prefix,
+ dcl_nr, dcl_mask);
+ } else {
+ if (strcmp(dcl_mask, ".xz") == 0)
+ fprintf(out, "errataed bad dcl mask %s\n", dcl_mask);
+ else if (strcmp(dcl_mask, ".xw") == 0)
+ fprintf(out, "errataed bad dcl mask %s\n", dcl_mask);
+ else if (strcmp(dcl_mask, ".xzw") == 0)
+ fprintf(out, "errataed bad dcl mask %s\n", dcl_mask);
+
+ if (dcl_nr == 8) {
+ instr_out(data, hw_offset, i++, "%s: DCL DIFFUSE%s\n", instr_prefix,
+ dcl_mask);
+ } else if (dcl_nr == 9) {
+ instr_out(data, hw_offset, i++, "%s: DCL SPECULAR%s\n", instr_prefix,
+ dcl_mask);
+ } else if (dcl_nr == 10) {
+ instr_out(data, hw_offset, i++, "%s: DCL FOG%s\n", instr_prefix,
+ dcl_mask);
+ }
+ }
+ instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+ instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+ break;
+ case 3:
+ switch ((d0 >> 22) & 0x3) {
+ case 0:
+ sampletype = "2D";
+ break;
+ case 1:
+ sampletype = "CUBE";
+ break;
+ case 2:
+ sampletype = "3D";
+ break;
+ default:
+ sampletype = "RESERVED";
+ break;
+ }
+ if (dcl_nr > 15)
+ fprintf(out, "bad S%d dcl register number\n", dcl_nr);
+ instr_out(data, hw_offset, i++, "%s: DCL S%d %s\n", instr_prefix,
+ dcl_nr, sampletype);
+ instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+ instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+ break;
+ default:
+ instr_out(data, hw_offset, i++, "%s: DCL RESERVED%d\n", instr_prefix, dcl_nr);
+ instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+ instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+ }
+}
+
+static void
+i915_decode_instruction(uint32_t *data, uint32_t hw_offset,
+ int i, char *instr_prefix)
+{
+ switch ((data[i] >> 24) & 0x1f) {
+ case 0x0:
+ instr_out(data, hw_offset, i++, "%s: NOP\n", instr_prefix);
+ instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+ instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+ break;
+ case 0x01:
+ i915_decode_alu2(data, hw_offset, i, instr_prefix, "ADD");
+ break;
+ case 0x02:
+ i915_decode_alu1(data, hw_offset, i, instr_prefix, "MOV");
+ break;
+ case 0x03:
+ i915_decode_alu2(data, hw_offset, i, instr_prefix, "MUL");
+ break;
+ case 0x04:
+ i915_decode_alu3(data, hw_offset, i, instr_prefix, "MAD");
+ break;
+ case 0x05:
+ i915_decode_alu3(data, hw_offset, i, instr_prefix, "DP2ADD");
+ break;
+ case 0x06:
+ i915_decode_alu2(data, hw_offset, i, instr_prefix, "DP3");
+ break;
+ case 0x07:
+ i915_decode_alu2(data, hw_offset, i, instr_prefix, "DP4");
+ break;
+ case 0x08:
+ i915_decode_alu1(data, hw_offset, i, instr_prefix, "FRC");
+ break;
+ case 0x09:
+ i915_decode_alu1(data, hw_offset, i, instr_prefix, "RCP");
+ break;
+ case 0x0a:
+ i915_decode_alu1(data, hw_offset, i, instr_prefix, "RSQ");
+ break;
+ case 0x0b:
+ i915_decode_alu1(data, hw_offset, i, instr_prefix, "EXP");
+ break;
+ case 0x0c:
+ i915_decode_alu1(data, hw_offset, i, instr_prefix, "LOG");
+ break;
+ case 0x0d:
+ i915_decode_alu2(data, hw_offset, i, instr_prefix, "CMP");
+ break;
+ case 0x0e:
+ i915_decode_alu2(data, hw_offset, i, instr_prefix, "MIN");
+ break;
+ case 0x0f:
+ i915_decode_alu2(data, hw_offset, i, instr_prefix, "MAX");
+ break;
+ case 0x10:
+ i915_decode_alu1(data, hw_offset, i, instr_prefix, "FLR");
+ break;
+ case 0x11:
+ i915_decode_alu1(data, hw_offset, i, instr_prefix, "MOD");
+ break;
+ case 0x12:
+ i915_decode_alu1(data, hw_offset, i, instr_prefix, "TRC");
+ break;
+ case 0x13:
+ i915_decode_alu2(data, hw_offset, i, instr_prefix, "SGE");
+ break;
+ case 0x14:
+ i915_decode_alu2(data, hw_offset, i, instr_prefix, "SLT");
+ break;
+ case 0x15:
+ i915_decode_tex(data, hw_offset, i, instr_prefix, "TEXLD");
+ break;
+ case 0x16:
+ i915_decode_tex(data, hw_offset, i, instr_prefix, "TEXLDP");
+ break;
+ case 0x17:
+ i915_decode_tex(data, hw_offset, i, instr_prefix, "TEXLDB");
+ break;
+ case 0x19:
+ i915_decode_dcl(data, hw_offset, i, instr_prefix);
+ break;
+ default:
+ instr_out(data, hw_offset, i++, "%s: unknown\n", instr_prefix);
+ instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+ instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+ break;
+ }
+}
+
static int
-decode_3d_1d(uint32_t *data, int count, uint32_t hw_offset, int *failures)
+decode_3d_1d(uint32_t *data, int count, uint32_t hw_offset, int *failures, int i830)
{
unsigned int len, i, c, opcode, word, map, sampler, instr;
struct {
uint32_t opcode;
+ int i830_only;
int min_len;
int max_len;
char *name;
} opcodes_3d_1d[] = {
- { 0x8e, 3, 3, "3DSTATE_BUFFER_INFO" },
- { 0x86, 4, 4, "3DSTATE_CHROMA_KEY" },
- { 0x9c, 1, 1, "3DSTATE_CLEAR_PARAMETERS" },
- { 0x88, 2, 2, "3DSTATE_CONSTANT_BLEND_COLOR" },
- { 0x99, 2, 2, "3DSTATE_DEFAULT_DIFFUSE" },
- { 0x9a, 2, 2, "3DSTATE_DEFAULT_SPECULAR" },
- { 0x98, 2, 2, "3DSTATE_DEFAULT_Z" },
- { 0x97, 2, 2, "3DSTATE_DEPTH_OFFSET_SCALE" },
- { 0x85, 2, 2, "3DSTATE_DEST_BUFFER_VARIABLES" },
- { 0x80, 5, 5, "3DSTATE_DRAWING_RECTANGLE" },
- { 0x8e, 3, 3, "3DSTATE_BUFFER_INFO" },
- { 0x9d, 65, 65, "3DSTATE_FILTER_COEFFICIENTS_4X4" },
- { 0x9e, 4, 4, "3DSTATE_MONO_FILTER" },
- { 0x89, 4, 4, "3DSTATE_FOG_MODE" },
- { 0x8f, 2, 16, "3DSTATE_MAP_PALLETE_LOAD_32" },
- { 0x81, 3, 3, "3DSTATE_SCISSOR_RECTANGLE" },
- { 0x83, 2, 2, "3DSTATE_SPAN_STIPPLE" },
+ { 0x8e, 0, 3, 3, "3DSTATE_BUFFER_INFO" },
+ { 0x86, 0, 4, 4, "3DSTATE_CHROMA_KEY" },
+ { 0x9c, 0, 1, 1, "3DSTATE_CLEAR_PARAMETERS" },
+ { 0x88, 0, 2, 2, "3DSTATE_CONSTANT_BLEND_COLOR" },
+ { 0x99, 0, 2, 2, "3DSTATE_DEFAULT_DIFFUSE" },
+ { 0x9a, 0, 2, 2, "3DSTATE_DEFAULT_SPECULAR" },
+ { 0x98, 0, 2, 2, "3DSTATE_DEFAULT_Z" },
+ { 0x97, 0, 2, 2, "3DSTATE_DEPTH_OFFSET_SCALE" },
+ { 0x85, 0, 2, 2, "3DSTATE_DEST_BUFFER_VARIABLES" },
+ { 0x80, 0, 5, 5, "3DSTATE_DRAWING_RECTANGLE" },
+ { 0x8e, 0, 3, 3, "3DSTATE_BUFFER_INFO" },
+ { 0x9d, 0, 65, 65, "3DSTATE_FILTER_COEFFICIENTS_4X4" },
+ { 0x9e, 0, 4, 4, "3DSTATE_MONO_FILTER" },
+ { 0x89, 0, 4, 4, "3DSTATE_FOG_MODE" },
+ { 0x8f, 0, 2, 16, "3DSTATE_MAP_PALLETE_LOAD_32" },
+ { 0x81, 0, 3, 3, "3DSTATE_SCISSOR_RECTANGLE" },
+ { 0x83, 0, 2, 2, "3DSTATE_SPAN_STIPPLE" },
+ { 0x8c, 1, 2, 2, "3DSTATE_MAP_COORD_TRANSFORM_I830" },
+ { 0x8b, 1, 2, 2, "3DSTATE_MAP_VERTEX_TRANSFORM_I830" },
+ { 0x8d, 1, 3, 3, "3DSTATE_W_STATE_I830" },
+ { 0x01, 1, 2, 2, "3DSTATE_COLOR_FACTOR_I830" },
+ { 0x02, 1, 2, 2, "3DSTATE_MAP_COORD_SETBIND_I830" },
};
switch ((data[0] & 0x00ff0000) >> 16) {
@@ -420,8 +910,9 @@ decode_3d_1d(uint32_t *data, int count, uint32_t hw_offset, int *failures)
case 0x00:
instr_out(data, hw_offset, 0, "3DSTATE_MAP_STATE\n");
len = (data[0] & 0x0000003f) + 2;
+ instr_out(data, hw_offset, 1, "mask\n");
- i = 1;
+ i = 2;
for (map = 0; map <= 15; map++) {
if (data[1] & (1 << map)) {
if (i + 3 >= count)
@@ -474,17 +965,22 @@ decode_3d_1d(uint32_t *data, int count, uint32_t hw_offset, int *failures)
}
i = 1;
for (instr = 0; instr < (len - 1) / 3; instr++) {
+ char instr_prefix[10];
+
if (i + 3 >= count)
- BUFFER_FAIL(count, len, "3DSTATE_MAP_STATE");
- instr_out(data, hw_offset, i++, "PS%03x\n", instr);
- instr_out(data, hw_offset, i++, "PS%03x\n", instr);
- instr_out(data, hw_offset, i++, "PS%03x\n", instr);
+ BUFFER_FAIL(count, len, "3DSTATE_PIXEL_SHADER_PROGRAM");
+ sprintf(instr_prefix, "PS%03d", instr);
+ i915_decode_instruction(data, hw_offset, i, instr_prefix);
+ i += 3;
}
return len;
case 0x01:
+ if (i830)
+ break;
instr_out(data, hw_offset, 0, "3DSTATE_SAMPLER_STATE\n");
+ instr_out(data, hw_offset, 1, "mask\n");
len = (data[0] & 0x0000003f) + 2;
- i = 1;
+ i = 2;
for (sampler = 0; sampler <= 15; sampler++) {
if (data[1] & (1 << sampler)) {
if (i + 3 >= count)
@@ -507,6 +1003,9 @@ decode_3d_1d(uint32_t *data, int count, uint32_t hw_offset, int *failures)
for (opcode = 0; opcode < sizeof(opcodes_3d_1d) / sizeof(opcodes_3d_1d[0]);
opcode++)
{
+ if (opcodes_3d_1d[opcode].i830_only && !i830)
+ continue;
+
if (((data[0] & 0x00ff0000) >> 16) == opcodes_3d_1d[opcode].opcode) {
len = 1;
@@ -750,7 +1249,7 @@ decode_3d(uint32_t *data, int count, uint32_t hw_offset, int *failures)
case 0x1f:
return decode_3d_primitive(data, count, hw_offset, failures);
case 0x1d:
- return decode_3d_1d(data, count, hw_offset, failures);
+ return decode_3d_1d(data, count, hw_offset, failures, 0);
case 0x1c:
return decode_3d_1c(data, count, hw_offset, failures);
}
@@ -810,10 +1309,71 @@ get_965_depthformat(unsigned int depthformat)
}
}
+static const char *
+get_965_element_component(uint32_t data, int component)
+{
+ uint32_t component_control = (data >> (16 + (3 - component) * 4)) & 0x7;
+
+ switch (component_control) {
+ case 0:
+ return "nostore";
+ case 1:
+ switch (component) {
+ case 0: return "X";
+ case 1: return "Y";
+ case 2: return "Z";
+ case 3: return "W";
+ default: return "fail";
+ }
+ case 2:
+ return "0.0";
+ case 3:
+ return "1.0";
+ case 4:
+ return "0x1";
+ case 5:
+ return "VID";
+ default:
+ return "fail";
+ }
+}
+
+static const char *
+get_965_prim_type(uint32_t data)
+{
+ uint32_t primtype = (data >> 10) & 0x1f;
+
+ switch (primtype) {
+ case 0x01: return "point list";
+ case 0x02: return "line list";
+ case 0x03: return "line strip";
+ case 0x04: return "tri list";
+ case 0x05: return "tri strip";
+ case 0x06: return "tri fan";
+ case 0x07: return "quad list";
+ case 0x08: return "quad strip";
+ case 0x09: return "line list adj";
+ case 0x0a: return "line strip adj";
+ case 0x0b: return "tri list adj";
+ case 0x0c: return "tri strip adj";
+ case 0x0d: return "tri strip reverse";
+ case 0x0e: return "polygon";
+ case 0x0f: return "rect list";
+ case 0x10: return "line loop";
+ case 0x11: return "point list bf";
+ case 0x12: return "line strip cont";
+ case 0x13: return "line strip bf";
+ case 0x14: return "line strip cont bf";
+ case 0x15: return "tri fan no stipple";
+ default: return "fail";
+ }
+}
+
static int
decode_3d_965(uint32_t *data, int count, uint32_t hw_offset, int *failures)
{
unsigned int opcode, len;
+ int i;
struct {
uint32_t opcode;
@@ -834,8 +1394,7 @@ decode_3d_965(uint32_t *data, int count, uint32_t hw_offset, int *failures)
{ 0x780b, 1, 1, "3DSTATE_VF_STATISTICS" },
{ 0x7808, 5, 257, "3DSTATE_VERTEX_BUFFERS" },
{ 0x7809, 3, 256, "3DSTATE_VERTEX_ELEMENTS" },
- /* 0x7808: 3DSTATE_VERTEX_BUFFERS */
- /* 0x7809: 3DSTATE_VERTEX_ELEMENTS */
+ { 0x780a, 3, 3, "3DSTATE_INDEX_BUFFER" },
{ 0x7900, 4, 4, "3DSTATE_DRAWING_RECTANGLE" },
{ 0x7901, 5, 5, "3DSTATE_CONSTANT_COLOR" },
{ 0x7905, 5, 7, "3DSTATE_DEPTH_BUFFER" },
@@ -921,6 +1480,64 @@ decode_3d_965(uint32_t *data, int count, uint32_t hw_offset, int *failures)
return len;
+ case 0x7808:
+ len = (data[0] & 0xff) + 2;
+ if ((len - 1) % 4 != 0)
+ fprintf(out, "Bad count in 3DSTATE_VERTEX_BUFFERS\n");
+ if (count < len)
+ BUFFER_FAIL(count, len, "3DSTATE_VERTEX_BUFFERS");
+ instr_out(data, hw_offset, 0, "3DSTATE_VERTEX_BUFFERS\n");
+
+ for (i = 1; i < len;) {
+ instr_out(data, hw_offset, i, "buffer %d: %s, pitch %db\n",
+ data[i] >> 27,
+ data[i] & (1 << 26) ? "random" : "sequential",
+ data[i] & 0x07ff);
+ i++;
+ instr_out(data, hw_offset, i++, "buffer address\n");
+ instr_out(data, hw_offset, i++, "max index\n");
+ instr_out(data, hw_offset, i++, "mbz\n");
+ }
+ return len;
+
+ case 0x7809:
+ len = (data[0] & 0xff) + 2;
+ if ((len + 1) % 2 != 0)
+ fprintf(out, "Bad count in 3DSTATE_VERTEX_ELEMENTS\n");
+ if (count < len)
+ BUFFER_FAIL(count, len, "3DSTATE_VERTEX_ELEMENTS");
+ instr_out(data, hw_offset, 0, "3DSTATE_VERTEX_ELEMENTS\n");
+
+ for (i = 1; i < len;) {
+ instr_out(data, hw_offset, i, "buffer %d: %svalid, type 0x%04x, "
+ "src offset 0x%04xd bytes\n",
+ data[i] >> 27,
+ data[i] & (1 << 26) ? "" : "in",
+ (data[i] >> 16) & 0x1ff,
+ data[i] & 0x07ff);
+ i++;
+ instr_out(data, hw_offset, i, "(%s, %s, %s, %s), "
+ "dst offset 0x%02x bytes\n",
+ get_965_element_component(data[i], 0),
+ get_965_element_component(data[i], 1),
+ get_965_element_component(data[i], 2),
+ get_965_element_component(data[i], 3),
+ (data[i] & 0xff) * 4);
+ i++;
+ }
+ return len;
+
+ case 0x780a:
+ len = (data[0] & 0xff) + 2;
+ if (len != 3)
+ fprintf(out, "Bad count in 3DSTATE_INDEX_BUFFER\n");
+ if (count < len)
+ BUFFER_FAIL(count, len, "3DSTATE_INDEX_BUFFER");
+ instr_out(data, hw_offset, 0, "3DSTATE_INDEX_BUFFER\n");
+ instr_out(data, hw_offset, 1, "beginning buffer address\n");
+ instr_out(data, hw_offset, 2, "ending buffer address\n");
+ return len;
+
case 0x7900:
if (len != 4)
fprintf(out, "Bad count in 3DSTATE_DRAWING_RECTANGLE\n");
@@ -942,9 +1559,9 @@ decode_3d_965(uint32_t *data, int count, uint32_t hw_offset, int *failures)
return len;
case 0x7905:
- if (len != 5)
+ if (len != 5 && len != 6)
fprintf(out, "Bad count in 3DSTATE_DEPTH_BUFFER\n");
- if (count < 5)
+ if (count < len)
BUFFER_FAIL(count, len, "3DSTATE_DEPTH_BUFFER");
instr_out(data, hw_offset, 0,
@@ -959,8 +1576,28 @@ decode_3d_965(uint32_t *data, int count, uint32_t hw_offset, int *failures)
((data[3] & 0x0007ffc0) >> 6) + 1,
((data[3] & 0xfff80000) >> 19) + 1);
instr_out(data, hw_offset, 4, "volume depth\n");
+ if (len == 6)
+ instr_out(data, hw_offset, 5, "\n");
return len;
+
+ case 0x7b00:
+ len = (data[0] & 0xff) + 2;
+ if (len != 6)
+ fprintf(out, "Bad count in 3DPRIMITIVE\n");
+ if (count < len)
+ BUFFER_FAIL(count, len, "3DPRIMITIVE");
+
+ instr_out(data, hw_offset, 0,
+ "3DPRIMITIVE: %s %s\n",
+ get_965_prim_type(data[0]),
+ (data[0] & (1 << 15)) ? "random" : "sequential");
+ instr_out(data, hw_offset, 1, "primitive count\n");
+ instr_out(data, hw_offset, 2, "start vertex\n");
+ instr_out(data, hw_offset, 3, "instance count\n");
+ instr_out(data, hw_offset, 4, "start instance\n");
+ instr_out(data, hw_offset, 5, "index bias\n");
+ return len;
}
for (opcode = 0; opcode < sizeof(opcodes_3d) / sizeof(opcodes_3d[0]);
@@ -993,6 +1630,73 @@ decode_3d_965(uint32_t *data, int count, uint32_t hw_offset, int *failures)
return 1;
}
+static int
+decode_3d_i830(uint32_t *data, int count, uint32_t hw_offset, int *failures)
+{
+ unsigned int opcode;
+
+ struct {
+ uint32_t opcode;
+ int min_len;
+ int max_len;
+ char *name;
+ } opcodes_3d[] = {
+ { 0x02, 1, 1, "3DSTATE_MODES_3" },
+ { 0x03, 1, 1, "3DSTATE_ENABLES_1"},
+ { 0x04, 1, 1, "3DSTATE_ENABLES_2"},
+ { 0x05, 1, 1, "3DSTATE_VFT0"},
+ { 0x06, 1, 1, "3DSTATE_AA"},
+ { 0x07, 1, 1, "3DSTATE_RASTERIZATION_RULES" },
+ { 0x08, 1, 1, "3DSTATE_MODES_1" },
+ { 0x09, 1, 1, "3DSTATE_STENCIL_TEST" },
+ { 0x0a, 1, 1, "3DSTATE_VFT1"},
+ { 0x0b, 1, 1, "3DSTATE_INDPT_ALPHA_BLEND" },
+ { 0x0c, 1, 1, "3DSTATE_MODES_5" },
+ { 0x0d, 1, 1, "3DSTATE_MAP_BLEND_OP" },
+ { 0x0e, 1, 1, "3DSTATE_MAP_BLEND_ARG" },
+ { 0x0f, 1, 1, "3DSTATE_MODES_2" },
+ { 0x15, 1, 1, "3DSTATE_FOG_COLOR" },
+ { 0x16, 1, 1, "3DSTATE_MODES_4" },
+ };
+
+ switch ((data[0] & 0x1f000000) >> 24) {
+ case 0x1f:
+ return decode_3d_primitive(data, count, hw_offset, failures);
+ case 0x1d:
+ return decode_3d_1d(data, count, hw_offset, failures, 1);
+ case 0x1c:
+ return decode_3d_1c(data, count, hw_offset, failures);
+ }
+
+ for (opcode = 0; opcode < sizeof(opcodes_3d) / sizeof(opcodes_3d[0]);
+ opcode++) {
+ if ((data[0] & 0x1f000000) >> 24 == opcodes_3d[opcode].opcode) {
+ unsigned int len = 1, i;
+
+ instr_out(data, hw_offset, 0, "%s\n", opcodes_3d[opcode].name);
+ if (opcodes_3d[opcode].max_len > 1) {
+ len = (data[0] & 0xff) + 2;
+ if (len < opcodes_3d[opcode].min_len ||
+ len > opcodes_3d[opcode].max_len)
+ {
+ fprintf(out, "Bad count in %s\n", opcodes_3d[opcode].name);
+ }
+ }
+
+ for (i = 1; i < len; i++) {
+ if (i >= count)
+ BUFFER_FAIL(count, len, opcodes_3d[opcode].name);
+ instr_out(data, hw_offset, i, "dword %d\n", i);
+ }
+ return len;
+ }
+ }
+
+ instr_out(data, hw_offset, 0, "3D UNKNOWN\n");
+ (*failures)++;
+ return 1;
+}
+
/**
* Decodes an i830-i915 batch buffer, writing the output to stdout.
*
@@ -1022,9 +1726,12 @@ intel_decode(uint32_t *data, int count, uint32_t hw_offset, uint32_t devid)
if (IS_965(devid)) {
index += decode_3d_965(data + index, count - index,
hw_offset + index * 4, &failures);
- } else {
+ } else if (IS_9XX(devid)) {
index += decode_3d(data + index, count - index,
hw_offset + index * 4, &failures);
+ } else {
+ index += decode_3d_i830(data + index, count - index,
+ hw_offset + index * 4, &failures);
}
break;
default:
diff --git a/shared/intel_depthstencil.c b/shared/intel_depthstencil.c
index 90baecd..354b3bf 100644
--- a/shared/intel_depthstencil.c
+++ b/shared/intel_depthstencil.c
@@ -25,21 +25,21 @@
*
**************************************************************************/
-#include "glheader.h"
-#include "imports.h"
-#include "context.h"
-#include "depthstencil.h"
-#include "fbobject.h"
-#include "framebuffer.h"
-#include "hash.h"
-#include "mtypes.h"
-#include "renderbuffer.h"
+#include "main/glheader.h"
+#include "main/imports.h"
+#include "main/context.h"
+#include "main/depthstencil.h"
+#include "main/fbobject.h"
+#include "main/framebuffer.h"
+#include "main/hash.h"
+#include "main/mtypes.h"
+#include "main/renderbuffer.h"
#include "intel_context.h"
#include "intel_fbo.h"
#include "intel_depthstencil.h"
#include "intel_regions.h"
-
+#include "intel_span.h"
/**
* The GL_EXT_framebuffer_object allows the user to create their own
@@ -86,68 +86,36 @@
*
*/
-
-
-static void
-map_regions(GLcontext * ctx,
- struct intel_renderbuffer *depthRb,
- struct intel_renderbuffer *stencilRb)
-{
- struct intel_context *intel = intel_context(ctx);
- if (depthRb && depthRb->region) {
- intel_region_map(intel, depthRb->region);
- depthRb->pfMap = depthRb->region->map;
- depthRb->pfPitch = depthRb->region->pitch;
- }
- if (stencilRb && stencilRb->region) {
- intel_region_map(intel, stencilRb->region);
- stencilRb->pfMap = stencilRb->region->map;
- stencilRb->pfPitch = stencilRb->region->pitch;
- }
-}
-
-static void
-unmap_regions(GLcontext * ctx,
- struct intel_renderbuffer *depthRb,
- struct intel_renderbuffer *stencilRb)
-{
- struct intel_context *intel = intel_context(ctx);
- if (depthRb && depthRb->region) {
- intel_region_unmap(intel, depthRb->region);
- depthRb->pfMap = NULL;
- depthRb->pfPitch = 0;
- }
- if (stencilRb && stencilRb->region) {
- intel_region_unmap(intel, stencilRb->region);
- stencilRb->pfMap = NULL;
- stencilRb->pfPitch = 0;
- }
-}
-
-
-
/**
* Undo the pairing/interleaving between depth and stencil buffers.
* irb should be a depth/stencil or stencil renderbuffer.
*/
void
-intel_unpair_depth_stencil(GLcontext * ctx, struct intel_renderbuffer *irb)
+intel_unpair_depth_stencil(GLcontext *ctx, struct intel_renderbuffer *irb)
{
+ struct intel_context *intel = intel_context(ctx);
+ struct gl_renderbuffer *rb = &irb->Base;
+
if (irb->PairedStencil) {
/* irb is a depth/stencil buffer */
struct gl_renderbuffer *stencilRb;
struct intel_renderbuffer *stencilIrb;
- ASSERT(irb->Base._ActualFormat == GL_DEPTH24_STENCIL8_EXT);
+ ASSERT(rb->_ActualFormat == GL_DEPTH24_STENCIL8_EXT);
stencilRb = _mesa_lookup_renderbuffer(ctx, irb->PairedStencil);
stencilIrb = intel_renderbuffer(stencilRb);
if (stencilIrb) {
/* need to extract stencil values from the depth buffer */
- ASSERT(stencilIrb->PairedDepth == irb->Base.Name);
- map_regions(ctx, irb, stencilIrb);
- _mesa_extract_stencil(ctx, &irb->Base, &stencilIrb->Base);
- unmap_regions(ctx, irb, stencilIrb);
+ ASSERT(stencilIrb->PairedDepth == rb->Name);
+ intel_renderbuffer_map(intel, rb);
+ intel_renderbuffer_map(intel, stencilRb);
+#if 0
+ /* disable for now */
+ _mesa_extract_stencil(ctx, rb, stencilRb);
+#endif
+ intel_renderbuffer_unmap(intel, stencilRb);
+ intel_renderbuffer_unmap(intel, rb);
stencilIrb->PairedDepth = 0;
}
irb->PairedStencil = 0;
@@ -157,17 +125,22 @@ intel_unpair_depth_stencil(GLcontext * ctx, struct intel_renderbuffer *irb)
struct gl_renderbuffer *depthRb;
struct intel_renderbuffer *depthIrb;
- ASSERT(irb->Base._ActualFormat == GL_STENCIL_INDEX8_EXT ||
- irb->Base._ActualFormat == GL_DEPTH24_STENCIL8_EXT);
+ ASSERT(rb->_ActualFormat == GL_STENCIL_INDEX8_EXT ||
+ rb->_ActualFormat == GL_DEPTH24_STENCIL8_EXT);
depthRb = _mesa_lookup_renderbuffer(ctx, irb->PairedDepth);
depthIrb = intel_renderbuffer(depthRb);
if (depthIrb) {
/* need to extract stencil values from the depth buffer */
- ASSERT(depthIrb->PairedStencil == irb->Base.Name);
- map_regions(ctx, depthIrb, irb);
- _mesa_extract_stencil(ctx, &depthIrb->Base, &irb->Base);
- unmap_regions(ctx, depthIrb, irb);
+ ASSERT(depthIrb->PairedStencil == rb->Name);
+ intel_renderbuffer_map(intel, rb);
+ intel_renderbuffer_map(intel, depthRb);
+#if 0
+ /* disable for now */
+ _mesa_extract_stencil(ctx, depthRb, rb);
+#endif
+ intel_renderbuffer_unmap(intel, depthRb);
+ intel_renderbuffer_unmap(intel, rb);
depthIrb->PairedStencil = 0;
}
irb->PairedDepth = 0;
@@ -194,6 +167,7 @@ void
intel_validate_paired_depth_stencil(GLcontext * ctx,
struct gl_framebuffer *fb)
{
+ struct intel_context *intel = intel_context(ctx);
struct intel_renderbuffer *depthRb, *stencilRb;
depthRb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
@@ -209,8 +183,11 @@ intel_validate_paired_depth_stencil(GLcontext * ctx,
}
else {
/* Separate depth/stencil buffers, need to interleave now */
- ASSERT(depthRb->Base._BaseFormat == GL_DEPTH_COMPONENT);
- ASSERT(stencilRb->Base._BaseFormat == GL_STENCIL_INDEX);
+ ASSERT(depthRb->Base._BaseFormat == GL_DEPTH_COMPONENT ||
+ depthRb->Base._BaseFormat == GL_DEPTH_STENCIL);
+ ASSERT(stencilRb->Base._BaseFormat == GL_STENCIL_INDEX ||
+ stencilRb->Base._BaseFormat == GL_DEPTH_STENCIL);
+
/* may need to interleave depth/stencil now */
if (depthRb->PairedStencil == stencilRb->Base.Name) {
/* OK, the depth and stencil buffers are already interleaved */
@@ -230,9 +207,11 @@ intel_validate_paired_depth_stencil(GLcontext * ctx,
stencilRb->Base._ActualFormat == GL_DEPTH24_STENCIL8_EXT);
/* establish new pairing: interleave stencil into depth buffer */
- map_regions(ctx, depthRb, stencilRb);
+ intel_renderbuffer_map(intel, &depthRb->Base);
+ intel_renderbuffer_map(intel, &stencilRb->Base);
_mesa_insert_stencil(ctx, &depthRb->Base, &stencilRb->Base);
- unmap_regions(ctx, depthRb, stencilRb);
+ intel_renderbuffer_unmap(intel, &stencilRb->Base);
+ intel_renderbuffer_unmap(intel, &depthRb->Base);
depthRb->PairedStencil = stencilRb->Base.Name;
stencilRb->PairedDepth = depthRb->Base.Name;
}
diff --git a/shared/intel_fbo.c b/shared/intel_fbo.c
index b3f6610..7cf1261 100644
--- a/shared/intel_fbo.c
+++ b/shared/intel_fbo.c
@@ -26,14 +26,14 @@
**************************************************************************/
-#include "imports.h"
-#include "mtypes.h"
-#include "fbobject.h"
-#include "framebuffer.h"
-#include "renderbuffer.h"
-#include "context.h"
-#include "texformat.h"
-#include "texrender.h"
+#include "main/imports.h"
+#include "main/mtypes.h"
+#include "main/fbobject.h"
+#include "main/framebuffer.h"
+#include "main/renderbuffer.h"
+#include "main/context.h"
+#include "main/texformat.h"
+#include "main/texrender.h"
#include "intel_context.h"
#include "intel_buffers.h"
@@ -77,43 +77,6 @@ intel_get_renderbuffer(struct gl_framebuffer *fb, int attIndex)
return NULL;
}
-
-void
-intel_flip_renderbuffers(struct intel_framebuffer *intel_fb)
-{
- int current_page = intel_fb->pf_current_page;
- int next_page = (current_page + 1) % intel_fb->pf_num_pages;
- struct gl_renderbuffer *tmp_rb;
-
- /* Exchange renderbuffers if necessary but make sure their reference counts
- * are preserved.
- */
- if (intel_fb->color_rb[current_page] &&
- intel_fb->Base.Attachment[BUFFER_FRONT_LEFT].Renderbuffer !=
- &intel_fb->color_rb[current_page]->Base) {
- tmp_rb = NULL;
- _mesa_reference_renderbuffer(&tmp_rb,
- intel_fb->Base.Attachment[BUFFER_FRONT_LEFT].Renderbuffer);
- tmp_rb = &intel_fb->color_rb[current_page]->Base;
- _mesa_reference_renderbuffer(
- &intel_fb->Base.Attachment[BUFFER_FRONT_LEFT].Renderbuffer, tmp_rb);
- _mesa_reference_renderbuffer(&tmp_rb, NULL);
- }
-
- if (intel_fb->color_rb[next_page] &&
- intel_fb->Base.Attachment[BUFFER_BACK_LEFT].Renderbuffer !=
- &intel_fb->color_rb[next_page]->Base) {
- tmp_rb = NULL;
- _mesa_reference_renderbuffer(&tmp_rb,
- intel_fb->Base.Attachment[BUFFER_BACK_LEFT].Renderbuffer);
- tmp_rb = &intel_fb->color_rb[next_page]->Base;
- _mesa_reference_renderbuffer(
- &intel_fb->Base.Attachment[BUFFER_BACK_LEFT].Renderbuffer, tmp_rb);
- _mesa_reference_renderbuffer(&tmp_rb, NULL);
- }
-}
-
-
struct intel_region *
intel_get_rb_region(struct gl_framebuffer *fb, GLuint attIndex)
{
@@ -153,6 +116,9 @@ intel_delete_renderbuffer(struct gl_renderbuffer *rb)
intel_unpair_depth_stencil(ctx, irb);
}
+ if (irb->span_cache != NULL)
+ _mesa_free(irb->span_cache);
+
if (intel && irb->region) {
intel_region_release(&irb->region);
}
@@ -209,6 +175,14 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb,
case GL_RGB10:
case GL_RGB12:
case GL_RGB16:
+ rb->_ActualFormat = GL_RGB8;
+ rb->DataType = GL_UNSIGNED_BYTE;
+ rb->RedBits = 8;
+ rb->GreenBits = 8;
+ rb->BlueBits = 8;
+ rb->AlphaBits = 0;
+ cpp = 4;
+ break;
case GL_RGBA:
case GL_RGBA2:
case GL_RGBA4:
@@ -237,11 +211,18 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb,
cpp = 4;
break;
case GL_DEPTH_COMPONENT16:
+#if 0
rb->_ActualFormat = GL_DEPTH_COMPONENT16;
rb->DataType = GL_UNSIGNED_SHORT;
rb->DepthBits = 16;
cpp = 2;
break;
+#else
+ /* fall-through.
+ * 16bpp depth renderbuffer can't be paired with a stencil buffer so
+ * always used combined depth/stencil format.
+ */
+#endif
case GL_DEPTH_COMPONENT:
case GL_DEPTH_COMPONENT24:
case GL_DEPTH_COMPONENT32:
@@ -285,7 +266,7 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb,
DBG("Allocating %d x %d Intel RBO (pitch %d)\n", width,
height, pitch);
- irb->region = intel_region_alloc(intel, cpp, pitch, height);
+ irb->region = intel_region_alloc(intel, cpp, width, height, pitch);
if (!irb->region)
return GL_FALSE; /* out of memory? */
@@ -294,9 +275,6 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb,
rb->Width = width;
rb->Height = height;
- /* This sets the Get/PutRow/Value functions */
- intel_set_span_functions(&irb->Base);
-
return GL_TRUE;
}
}
@@ -336,7 +314,7 @@ intel_resize_buffers(GLcontext *ctx, struct gl_framebuffer *fb,
}
/* Make sure all window system renderbuffers are up to date */
- for (i = 0; i < 3; i++) {
+ for (i = 0; i < 2; i++) {
struct gl_renderbuffer *rb = &intel_fb->color_rb[i]->Base;
/* only resize if size is changing */
@@ -366,7 +344,6 @@ intel_renderbuffer_set_region(struct intel_renderbuffer *rb,
intel_region_reference(&rb->region, region);
intel_region_release(&old);
- rb->pfMap = region->map;
rb->pfPitch = region->pitch;
}
@@ -446,8 +423,6 @@ intel_create_renderbuffer(GLenum intFormat)
irb->Base.Delete = intel_delete_renderbuffer;
irb->Base.AllocStorage = intel_alloc_window_storage;
irb->Base.GetPointer = intel_get_pointer;
- /* This sets the Get/PutRow/Value functions */
- intel_set_span_functions(&irb->Base);
return irb;
}
@@ -519,25 +494,30 @@ intel_framebuffer_renderbuffer(GLcontext * ctx,
static GLboolean
intel_update_wrapper(GLcontext *ctx, struct intel_renderbuffer *irb,
- struct gl_texture_image *texImage)
+ struct gl_texture_image *texImage)
{
if (texImage->TexFormat == &_mesa_texformat_argb8888) {
irb->Base._ActualFormat = GL_RGBA8;
irb->Base._BaseFormat = GL_RGBA;
+ irb->Base.DataType = GL_UNSIGNED_BYTE;
DBG("Render to RGBA8 texture OK\n");
}
else if (texImage->TexFormat == &_mesa_texformat_rgb565) {
irb->Base._ActualFormat = GL_RGB5;
irb->Base._BaseFormat = GL_RGB;
+ irb->Base.DataType = GL_UNSIGNED_SHORT;
DBG("Render to RGB5 texture OK\n");
}
else if (texImage->TexFormat == &_mesa_texformat_z16) {
irb->Base._ActualFormat = GL_DEPTH_COMPONENT16;
irb->Base._BaseFormat = GL_DEPTH_COMPONENT;
+ irb->Base.DataType = GL_UNSIGNED_SHORT;
DBG("Render to DEPTH16 texture OK\n");
- } else if (texImage->TexFormat == &_mesa_texformat_z24_s8) {
+ }
+ else if (texImage->TexFormat == &_mesa_texformat_s8_z24) {
irb->Base._ActualFormat = GL_DEPTH24_STENCIL8_EXT;
irb->Base._BaseFormat = GL_DEPTH_STENCIL_EXT;
+ irb->Base.DataType = GL_UNSIGNED_INT_24_8_EXT;
DBG("Render to DEPTH_STENCIL texture OK\n");
}
else {
@@ -549,7 +529,6 @@ intel_update_wrapper(GLcontext *ctx, struct intel_renderbuffer *irb,
irb->Base.InternalFormat = irb->Base._ActualFormat;
irb->Base.Width = texImage->Width;
irb->Base.Height = texImage->Height;
- irb->Base.DataType = GL_UNSIGNED_BYTE; /* FBO XXX fix */
irb->Base.RedBits = texImage->TexFormat->RedBits;
irb->Base.GreenBits = texImage->TexFormat->GreenBits;
irb->Base.BlueBits = texImage->TexFormat->BlueBits;
@@ -558,7 +537,6 @@ intel_update_wrapper(GLcontext *ctx, struct intel_renderbuffer *irb,
irb->Base.Delete = intel_delete_renderbuffer;
irb->Base.AllocStorage = intel_nop_alloc_storage;
- intel_set_span_functions(&irb->Base);
irb->RenderToTexture = GL_TRUE;
@@ -616,7 +594,14 @@ intel_render_texture(GLcontext * ctx,
ASSERT(newImage);
- if (!irb) {
+ if (newImage->Border != 0) {
+ /* Fallback on drawing to a texture with a border, which won't have a
+ * miptree.
+ */
+ _mesa_reference_renderbuffer(&att->Renderbuffer, NULL);
+ _mesa_render_texture(ctx, fb, att);
+ return;
+ } else if (!irb) {
irb = intel_wrap_texture(ctx, newImage);
if (irb) {
/* bind the wrapper to the attachment point */
diff --git a/shared/intel_fbo.h b/shared/intel_fbo.h
index c90c84b..b7e9280 100644
--- a/shared/intel_fbo.h
+++ b/shared/intel_fbo.h
@@ -28,9 +28,9 @@
#ifndef INTEL_FBO_H
#define INTEL_FBO_H
+#include "intel_screen.h"
struct intel_context;
-struct intel_region;
/**
* Intel framebuffer, derived from gl_framebuffer.
@@ -39,14 +39,7 @@ struct intel_framebuffer
{
struct gl_framebuffer Base;
- struct intel_renderbuffer *color_rb[3];
-
- /* Drawable page flipping state */
- GLboolean pf_active;
- GLuint pf_seq;
- GLint pf_planes;
- GLint pf_current_page;
- GLint pf_num_pages;
+ struct intel_renderbuffer *color_rb[2];
/* VBI
*/
@@ -70,16 +63,16 @@ struct intel_renderbuffer
{
struct gl_renderbuffer Base;
struct intel_region *region;
- void *pfMap; /* possibly paged flipped map pointer */
GLuint pfPitch; /* possibly paged flipped pitch */
GLboolean RenderToTexture; /* RTT? */
GLuint PairedDepth; /**< only used if this is a depth renderbuffer */
GLuint PairedStencil; /**< only used if this is a stencil renderbuffer */
- GLuint pf_pending; /**< sequence number of pending flip */
-
GLuint vbl_pending; /**< vblank sequence number of pending flip */
+
+ uint8_t *span_cache;
+ unsigned long span_cache_offset;
};
extern struct intel_renderbuffer *intel_renderbuffer(struct gl_renderbuffer
@@ -109,5 +102,22 @@ extern struct intel_region *intel_get_rb_region(struct gl_framebuffer *fb,
+/**
+ * Are we currently rendering into a texture?
+ */
+static INLINE GLboolean
+intel_rendering_to_texture(const GLcontext *ctx)
+{
+ if (ctx->DrawBuffer->Name) {
+ /* User-created FBO */
+ const struct intel_renderbuffer *irb =
+ intel_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[0]);
+ return irb && irb->RenderToTexture;
+ }
+ else {
+ return GL_FALSE;
+ }
+}
+
#endif /* INTEL_FBO_H */
diff --git a/shared/intel_ioctl.c b/shared/intel_ioctl.c
deleted file mode 100644
index f4566ba..0000000
--- a/shared/intel_ioctl.c
+++ /dev/null
@@ -1,223 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-
-#include <stdio.h>
-#include <unistd.h>
-#include <errno.h>
-#include <sched.h>
-
-#include "mtypes.h"
-#include "context.h"
-#include "swrast/swrast.h"
-
-#include "intel_context.h"
-#include "intel_ioctl.h"
-#include "intel_batchbuffer.h"
-#include "intel_blit.h"
-#include "intel_regions.h"
-#include "drm.h"
-#include "i915_drm.h"
-
-#include "intel_bufmgr_ttm.h"
-
-#define FILE_DEBUG_FLAG DEBUG_IOCTL
-
-int
-intelEmitIrqLocked(struct intel_context *intel)
-{
- struct drm_i915_irq_emit ie;
- int ret, seq = 1;
-
- if (intel->no_hw)
- return 1;
-
- /*
- assert(((*(int *)intel->driHwLock) & ~DRM_LOCK_CONT) ==
- (DRM_LOCK_HELD|intel->hHWContext));
- */
-
- ie.irq_seq = &seq;
-
- ret = drmCommandWriteRead(intel->driFd, DRM_I915_IRQ_EMIT, &ie, sizeof(ie));
- if (ret) {
- fprintf(stderr, "%s: drm_i915_irq_emit: %d\n", __FUNCTION__, ret);
- exit(1);
- }
-
- DBG("%s --> %d\n", __FUNCTION__, seq);
-
- return seq;
-}
-
-void
-intelWaitIrq(struct intel_context *intel, int seq)
-{
- struct drm_i915_irq_wait iw;
- int ret, lastdispatch;
- volatile struct drm_i915_sarea *sarea = intel->sarea;
-
- if (intel->no_hw)
- return;
-
- DBG("%s %d\n", __FUNCTION__, seq);
-
- iw.irq_seq = seq;
-
- do {
- lastdispatch = sarea->last_dispatch;
- ret = drmCommandWrite(intel->driFd, DRM_I915_IRQ_WAIT, &iw, sizeof(iw));
- } while (ret == -EAGAIN ||
- ret == -EINTR ||
- (ret == -EBUSY && lastdispatch != sarea->last_dispatch) ||
- (ret == 0 && seq > sarea->last_dispatch) ||
- (ret == 0 && sarea->last_dispatch - seq >= (1 << 24)));
-
- if (ret) {
- fprintf(stderr, "%s: drm_i915_irq_wait: %d\n", __FUNCTION__, ret);
- exit(1);
- }
-}
-
-
-void
-intel_batch_ioctl(struct intel_context *intel,
- GLuint start_offset,
- GLuint used,
- GLboolean ignore_cliprects, GLboolean allow_unlock)
-{
- struct drm_i915_batchbuffer batch;
-
- if (intel->no_hw)
- return;
-
- assert(intel->locked);
- assert(used);
-
- DBG("%s used %d offset %x..%x ignore_cliprects %d\n",
- __FUNCTION__,
- used, start_offset, start_offset + used, ignore_cliprects);
-
- /* Throw away non-effective packets. Won't work once we have
- * hardware contexts which would preserve statechanges beyond a
- * single buffer.
- */
- batch.start = start_offset;
- batch.used = used;
- batch.cliprects = intel->pClipRects;
- batch.num_cliprects = ignore_cliprects ? 0 : intel->numClipRects;
- batch.DR1 = 0;
- batch.DR4 = ((((GLuint) intel->drawX) & 0xffff) |
- (((GLuint) intel->drawY) << 16));
-
- DBG("%s: 0x%x..0x%x DR4: %x cliprects: %d\n",
- __FUNCTION__,
- batch.start,
- batch.start + batch.used * 4, batch.DR4, batch.num_cliprects);
-
- if (drmCommandWrite(intel->driFd, DRM_I915_BATCHBUFFER, &batch,
- sizeof(batch))) {
- fprintf(stderr, "DRM_I915_BATCHBUFFER: %d\n", -errno);
- UNLOCK_HARDWARE(intel);
- exit(1);
- }
-}
-
-#ifdef TTM_API
-void
-intel_exec_ioctl(struct intel_context *intel,
- GLuint used,
- GLboolean ignore_cliprects, GLboolean allow_unlock,
- void *start, GLuint count, dri_fence **fence)
-{
- struct drm_i915_execbuffer execbuf;
- dri_fence *fo;
- int ret;
-
- assert(intel->locked);
- assert(used);
-
- if (intel->no_hw)
- return;
-
- if (*fence) {
- dri_fence_unreference(*fence);
- }
-
- memset(&execbuf, 0, sizeof(execbuf));
-
- execbuf.num_buffers = count;
- execbuf.batch.used = used;
- execbuf.batch.cliprects = intel->pClipRects;
- execbuf.batch.num_cliprects = ignore_cliprects ? 0 : intel->numClipRects;
- execbuf.batch.DR1 = 0;
- execbuf.batch.DR4 = ((((GLuint) intel->drawX) & 0xffff) |
- (((GLuint) intel->drawY) << 16));
-
- execbuf.ops_list = (unsigned long)start; // TODO
- execbuf.fence_arg.flags = DRM_FENCE_FLAG_SHAREABLE | DRM_I915_FENCE_FLAG_FLUSHED;
-
- do {
- ret = drmCommandWriteRead(intel->driFd, DRM_I915_EXECBUFFER, &execbuf,
- sizeof(execbuf));
- } while (ret == -EAGAIN);
-
- if (ret != 0) {
- fprintf(stderr, "DRM_I915_EXECBUFFER: %d\n", -errno);
- UNLOCK_HARDWARE(intel);
- exit(1);
- }
-
- if (execbuf.fence_arg.error != 0) {
-
- /*
- * Fence creation has failed, but the GPU has been
- * idled by the kernel. Safe to continue.
- */
-
- *fence = NULL;
- return;
- }
-
- fo = intel_ttm_fence_create_from_arg(intel->bufmgr, "fence buffers",
- &execbuf.fence_arg);
- if (!fo) {
- fprintf(stderr, "failed to fence handle: %08x\n", execbuf.fence_arg.handle);
- UNLOCK_HARDWARE(intel);
- exit(1);
- }
- *fence = fo;
-}
-#else
-void
-intel_exec_ioctl(struct intel_context *intel,
- GLuint used,
- GLboolean ignore_cliprects, GLboolean allow_unlock,
- void *start, GLuint count, dri_fence **fence)
-{
-}
-#endif
diff --git a/shared/intel_ioctl.h b/shared/intel_ioctl.h
deleted file mode 100644
index 8674aef..0000000
--- a/shared/intel_ioctl.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#ifndef INTEL_IOCTL_H
-#define INTEL_IOCTL_H
-
-#include "intel_context.h"
-
-void intelWaitIrq( struct intel_context *intel, int seq );
-int intelEmitIrqLocked( struct intel_context *intel );
-
-void intel_batch_ioctl( struct intel_context *intel,
- GLuint start_offset,
- GLuint used,
- GLboolean ignore_cliprects,
- GLboolean allow_unlock );
-void intel_exec_ioctl(struct intel_context *intel,
- GLuint used,
- GLboolean ignore_cliprects, GLboolean allow_unlock,
- void *start, GLuint count, dri_fence **fence);
-
-#endif
diff --git a/shared/intel_mipmap_tree.c b/shared/intel_mipmap_tree.c
index 9be7e02..bf1c3f0 100644
--- a/shared/intel_mipmap_tree.c
+++ b/shared/intel_mipmap_tree.c
@@ -29,7 +29,7 @@
#include "intel_mipmap_tree.h"
#include "intel_regions.h"
#include "intel_chipset.h"
-#include "enums.h"
+#include "main/enums.h"
#define FILE_DEBUG_FLAG DEBUG_MIPTREE
@@ -111,13 +111,16 @@ intel_miptree_create(struct intel_context *intel,
first_level, last_level, width0,
height0, depth0, cpp, compress_byte);
/*
- * pitch == 0 indicates the null texture
+ * pitch == 0 || height == 0 indicates the null texture
*/
- if (!mt || !mt->pitch)
+ if (!mt || !mt->pitch || !mt->total_height)
return NULL;
mt->region = intel_region_alloc(intel,
- mt->cpp, mt->pitch, mt->total_height);
+ mt->cpp,
+ mt->pitch,
+ mt->total_height,
+ mt->pitch);
if (!mt->region) {
free(mt);
@@ -141,7 +144,7 @@ intel_miptree_create_for_region(struct intel_context *intel,
mt = intel_miptree_create_internal(intel, target, internal_format,
first_level, last_level,
- region->pitch, region->height, depth0,
+ region->width, region->height, 1,
region->cpp, compress_byte);
if (!mt)
return mt;
@@ -160,7 +163,7 @@ intel_miptree_create_for_region(struct intel_context *intel,
mt->pitch = region->pitch;
#endif
- mt->region = region;
+ intel_region_reference(&mt->region, region);
return mt;
}
diff --git a/shared/intel_pixel.c b/shared/intel_pixel.c
index 6417866..cf2f32d 100644
--- a/shared/intel_pixel.c
+++ b/shared/intel_pixel.c
@@ -25,9 +25,14 @@
*
**************************************************************************/
-#include "enums.h"
-#include "state.h"
+#include "main/enums.h"
+#include "main/state.h"
+#include "main/context.h"
+#include "main/enable.h"
+#include "main/matrix.h"
#include "swrast/swrast.h"
+#include "shader/arbprogram.h"
+#include "shader/program.h"
#include "intel_context.h"
#include "intel_pixel.h"
@@ -112,12 +117,6 @@ intel_check_blit_fragment_ops(GLcontext * ctx, GLboolean src_alpha_is_one)
return GL_FALSE;
}
- if (ctx->Scissor.Enabled) {
- /* XXX Note: Scissor could be done with the blitter */
- DBG("fallback due to image scissor\n");
- return GL_FALSE;
- }
-
if (ctx->RenderMode != GL_RENDER) {
DBG("fallback due to render mode\n");
return GL_FALSE;
@@ -173,6 +172,159 @@ intel_check_blit_format(struct intel_region * region,
return GL_FALSE;
}
+void
+intel_meta_set_passthrough_transform(struct intel_context *intel)
+{
+ GLcontext *ctx = &intel->ctx;
+
+ intel->meta.saved_vp_x = ctx->Viewport.X;
+ intel->meta.saved_vp_y = ctx->Viewport.Y;
+ intel->meta.saved_vp_width = ctx->Viewport.Width;
+ intel->meta.saved_vp_height = ctx->Viewport.Height;
+
+ _mesa_Viewport(0, 0, ctx->DrawBuffer->Width, ctx->DrawBuffer->Height);
+
+ _mesa_MatrixMode(GL_PROJECTION);
+ _mesa_PushMatrix();
+ _mesa_LoadIdentity();
+ _mesa_Ortho(0, ctx->DrawBuffer->Width, 0, ctx->DrawBuffer->Height, 1, -1);
+
+ _mesa_MatrixMode(GL_MODELVIEW);
+ _mesa_PushMatrix();
+ _mesa_LoadIdentity();
+}
+
+void
+intel_meta_restore_transform(struct intel_context *intel)
+{
+ _mesa_MatrixMode(GL_PROJECTION);
+ _mesa_PopMatrix();
+ _mesa_MatrixMode(GL_MODELVIEW);
+ _mesa_PopMatrix();
+
+ _mesa_Viewport(intel->meta.saved_vp_x, intel->meta.saved_vp_y,
+ intel->meta.saved_vp_width, intel->meta.saved_vp_height);
+}
+
+/**
+ * Set up a vertex program to pass through the position and first texcoord
+ * for pixel path.
+ */
+void
+intel_meta_set_passthrough_vertex_program(struct intel_context *intel)
+{
+ GLcontext *ctx = &intel->ctx;
+ static const char *vp =
+ "!!ARBvp1.0\n"
+ "TEMP vertexClip;\n"
+ "DP4 vertexClip.x, state.matrix.mvp.row[0], vertex.position;\n"
+ "DP4 vertexClip.y, state.matrix.mvp.row[1], vertex.position;\n"
+ "DP4 vertexClip.z, state.matrix.mvp.row[2], vertex.position;\n"
+ "DP4 vertexClip.w, state.matrix.mvp.row[3], vertex.position;\n"
+ "MOV result.position, vertexClip;\n"
+ "MOV result.texcoord[0], vertex.texcoord[0];\n"
+ "MOV result.color, vertex.color;\n"
+ "END\n";
+
+ assert(intel->meta.saved_vp == NULL);
+
+ _mesa_reference_vertprog(ctx, &intel->meta.saved_vp,
+ ctx->VertexProgram.Current);
+ if (intel->meta.passthrough_vp == NULL) {
+ GLuint prog_name;
+ _mesa_GenPrograms(1, &prog_name);
+ _mesa_BindProgram(GL_VERTEX_PROGRAM_ARB, prog_name);
+ _mesa_ProgramStringARB(GL_VERTEX_PROGRAM_ARB,
+ GL_PROGRAM_FORMAT_ASCII_ARB,
+ strlen(vp), (const GLubyte *)vp);
+ _mesa_reference_vertprog(ctx, &intel->meta.passthrough_vp,
+ ctx->VertexProgram.Current);
+ _mesa_DeletePrograms(1, &prog_name);
+ }
+
+ FLUSH_VERTICES(ctx, _NEW_PROGRAM);
+ _mesa_reference_vertprog(ctx, &ctx->VertexProgram.Current,
+ intel->meta.passthrough_vp);
+ ctx->Driver.BindProgram(ctx, GL_VERTEX_PROGRAM_ARB,
+ &intel->meta.passthrough_vp->Base);
+
+ intel->meta.saved_vp_enable = ctx->VertexProgram.Enabled;
+ _mesa_Enable(GL_VERTEX_PROGRAM_ARB);
+}
+
+/**
+ * Restores the previous vertex program after
+ * intel_meta_set_passthrough_vertex_program()
+ */
+void
+intel_meta_restore_vertex_program(struct intel_context *intel)
+{
+ GLcontext *ctx = &intel->ctx;
+
+ FLUSH_VERTICES(ctx, _NEW_PROGRAM);
+ _mesa_reference_vertprog(ctx, &ctx->VertexProgram.Current,
+ intel->meta.saved_vp);
+ _mesa_reference_vertprog(ctx, &intel->meta.saved_vp, NULL);
+ ctx->Driver.BindProgram(ctx, GL_VERTEX_PROGRAM_ARB,
+ &ctx->VertexProgram.Current->Base);
+
+ if (!intel->meta.saved_vp_enable)
+ _mesa_Disable(GL_VERTEX_PROGRAM_ARB);
+}
+
+/**
+ * Binds the given program string to GL_FRAGMENT_PROGRAM_ARB, caching the
+ * program object.
+ */
+void
+intel_meta_set_fragment_program(struct intel_context *intel,
+ struct gl_fragment_program **prog,
+ const char *prog_string)
+{
+ GLcontext *ctx = &intel->ctx;
+ assert(intel->meta.saved_fp == NULL);
+
+ _mesa_reference_fragprog(ctx, &intel->meta.saved_fp,
+ ctx->FragmentProgram.Current);
+ if (*prog == NULL) {
+ GLuint prog_name;
+ _mesa_GenPrograms(1, &prog_name);
+ _mesa_BindProgram(GL_FRAGMENT_PROGRAM_ARB, prog_name);
+ _mesa_ProgramStringARB(GL_FRAGMENT_PROGRAM_ARB,
+ GL_PROGRAM_FORMAT_ASCII_ARB,
+ strlen(prog_string), (const GLubyte *)prog_string);
+ _mesa_reference_fragprog(ctx, prog, ctx->FragmentProgram.Current);
+ /* Note that DeletePrograms unbinds the program on us */
+ _mesa_DeletePrograms(1, &prog_name);
+ }
+
+ FLUSH_VERTICES(ctx, _NEW_PROGRAM);
+ _mesa_reference_fragprog(ctx, &ctx->FragmentProgram.Current, *prog);
+ ctx->Driver.BindProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, &((*prog)->Base));
+
+ intel->meta.saved_fp_enable = ctx->FragmentProgram.Enabled;
+ _mesa_Enable(GL_FRAGMENT_PROGRAM_ARB);
+}
+
+/**
+ * Restores the previous fragment program after
+ * intel_meta_set_fragment_program()
+ */
+void
+intel_meta_restore_fragment_program(struct intel_context *intel)
+{
+ GLcontext *ctx = &intel->ctx;
+
+ FLUSH_VERTICES(ctx, _NEW_PROGRAM);
+ _mesa_reference_fragprog(ctx, &ctx->FragmentProgram.Current,
+ intel->meta.saved_fp);
+ _mesa_reference_fragprog(ctx, &intel->meta.saved_fp, NULL);
+ ctx->Driver.BindProgram(ctx, GL_FRAGMENT_PROGRAM_ARB,
+ &ctx->FragmentProgram.Current->Base);
+
+ if (!intel->meta.saved_fp_enable)
+ _mesa_Disable(GL_FRAGMENT_PROGRAM_ARB);
+}
void
intelInitPixelFuncs(struct dd_function_table *functions)
@@ -181,9 +333,19 @@ intelInitPixelFuncs(struct dd_function_table *functions)
if (!getenv("INTEL_NO_BLIT")) {
functions->Bitmap = intelBitmap;
functions->CopyPixels = intelCopyPixels;
+ functions->DrawPixels = intelDrawPixels;
#ifdef I915
functions->ReadPixels = intelReadPixels;
- functions->DrawPixels = intelDrawPixels;
#endif
}
}
+
+void
+intel_free_pixel_state(struct intel_context *intel)
+{
+ GLcontext *ctx = &intel->ctx;
+
+ _mesa_reference_vertprog(ctx, &intel->meta.passthrough_vp, NULL);
+ _mesa_reference_fragprog(ctx, &intel->meta.bitmap_fp, NULL);
+}
+
diff --git a/shared/intel_pixel.h b/shared/intel_pixel.h
index 9c899b9..76b8781 100644
--- a/shared/intel_pixel.h
+++ b/shared/intel_pixel.h
@@ -28,9 +28,18 @@
#ifndef INTEL_PIXEL_H
#define INTEL_PIXEL_H
-#include "mtypes.h"
+#include "main/mtypes.h"
void intelInitPixelFuncs(struct dd_function_table *functions);
+void intel_meta_set_passthrough_transform(struct intel_context *intel);
+void intel_meta_restore_transform(struct intel_context *intel);
+void intel_meta_set_passthrough_vertex_program(struct intel_context *intel);
+void intel_meta_restore_vertex_program(struct intel_context *intel);
+void intel_meta_set_fragment_program(struct intel_context *intel,
+ struct gl_fragment_program **prog,
+ const char *prog_string);
+void intel_meta_restore_fragment_program(struct intel_context *intel);
+void intel_free_pixel_state(struct intel_context *intel);
GLboolean intel_check_blit_fragment_ops(GLcontext * ctx,
GLboolean src_alpha_is_one);
diff --git a/shared/intel_pixel_bitmap.c b/shared/intel_pixel_bitmap.c
index 81238ac..3a01f63 100644
--- a/shared/intel_pixel_bitmap.c
+++ b/shared/intel_pixel_bitmap.c
@@ -25,25 +25,36 @@
*
**************************************************************************/
-#include "glheader.h"
-#include "enums.h"
-#include "image.h"
-#include "colormac.h"
-#include "mtypes.h"
-#include "macros.h"
-#include "bufferobj.h"
+#include "main/glheader.h"
+#include "main/enums.h"
+#include "main/image.h"
+#include "main/colormac.h"
+#include "main/mtypes.h"
+#include "main/macros.h"
+#include "main/bufferobj.h"
+#include "main/pixelstore.h"
+#include "main/state.h"
+#include "main/teximage.h"
+#include "main/texenv.h"
+#include "main/texobj.h"
+#include "main/texstate.h"
+#include "main/texparam.h"
+#include "main/varray.h"
+#include "main/attrib.h"
+#include "main/enable.h"
+#include "shader/arbprogram.h"
+#include "glapi/dispatch.h"
#include "swrast/swrast.h"
#include "intel_screen.h"
#include "intel_context.h"
-#include "intel_ioctl.h"
#include "intel_batchbuffer.h"
#include "intel_blit.h"
#include "intel_regions.h"
#include "intel_buffer_objects.h"
#include "intel_buffers.h"
#include "intel_pixel.h"
-
+#include "intel_reg.h"
#define FILE_DEBUG_FLAG DEBUG_PIXEL
@@ -87,6 +98,11 @@ static GLboolean test_bit( const GLubyte *src,
return (src[bit/8] & (1<<(bit % 8))) ? 1 : 0;
}
+static GLboolean test_msb_bit(const GLubyte *src, GLuint bit)
+{
+ return (src[bit/8] & (1<<(7 - (bit % 8)))) ? 1 : 0;
+}
+
static void set_bit( GLubyte *dest,
GLuint bit )
{
@@ -150,8 +166,18 @@ static GLuint get_bitmap_rect(GLsizei width, GLsizei height,
return count;
}
-
-
+/**
+ * Returns the low Y value of the vertical range given, flipped according to
+ * whether the framebuffer is or not.
+ */
+static inline int
+y_flip(struct gl_framebuffer *fb, int y, int height)
+{
+ if (fb->Name != 0)
+ return y;
+ else
+ return fb->Height - y - height;
+}
/*
* Render a bitmap.
@@ -165,9 +191,26 @@ do_blit_bitmap( GLcontext *ctx,
{
struct intel_context *intel = intel_context(ctx);
struct intel_region *dst = intel_drawbuf_region(intel);
+ struct gl_framebuffer *fb = ctx->DrawBuffer;
GLfloat tmpColor[4];
GLubyte ubcolor[4];
GLuint color8888, color565;
+ unsigned int num_cliprects;
+ drm_clip_rect_t *cliprects;
+ int x_off, y_off;
+ GLsizei bitmap_width = width;
+ GLsizei bitmap_height = height;
+
+ /* Update draw buffer bounds */
+ _mesa_update_state(ctx);
+
+ if (ctx->Depth.Test) {
+ /* The blit path produces incorrect results when depth testing is on.
+ * It seems the blit Z coord is always 1.0 (the far plane) so fragments
+ * will likely be obscured by other, closer geometry.
+ */
+ return GL_FALSE;
+ }
if (!dst)
return GL_FALSE;
@@ -192,68 +235,43 @@ do_blit_bitmap( GLcontext *ctx,
color8888 = INTEL_PACKCOLOR8888(ubcolor[0], ubcolor[1], ubcolor[2], ubcolor[3]);
color565 = INTEL_PACKCOLOR565(ubcolor[0], ubcolor[1], ubcolor[2]);
- /* Does zoom apply to bitmaps?
- */
- if (!intel_check_blit_fragment_ops(ctx, tmpColor[3] == 1.0F) ||
- ctx->Pixel.ZoomX != 1.0F ||
- ctx->Pixel.ZoomY != 1.0F)
+ if (!intel_check_blit_fragment_ops(ctx, tmpColor[3] == 1.0F))
return GL_FALSE;
LOCK_HARDWARE(intel);
- if (intel->driDrawable->numClipRects) {
- __DRIdrawablePrivate *dPriv = intel->driDrawable;
- drm_clip_rect_t *box = dPriv->pClipRects;
- drm_clip_rect_t dest_rect;
- GLint nbox = dPriv->numClipRects;
- GLint srcx = 0, srcy = 0;
- GLint orig_screen_x1, orig_screen_y2;
+ intel_get_cliprects(intel, &cliprects, &num_cliprects, &x_off, &y_off);
+ if (num_cliprects != 0) {
GLuint i;
+ GLint orig_dstx = dstx;
+ GLint orig_dsty = dsty;
-
- orig_screen_x1 = dPriv->x + dstx;
- orig_screen_y2 = dPriv->y + (dPriv->h - dsty);
-
- /* Do scissoring in GL coordinates:
- */
- if (ctx->Scissor.Enabled)
- {
- GLint x = ctx->Scissor.X;
- GLint y = ctx->Scissor.Y;
- GLuint w = ctx->Scissor.Width;
- GLuint h = ctx->Scissor.Height;
-
- if (!_mesa_clip_to_region(x, y, x+w-1, y+h-1, &dstx, &dsty, &width, &height))
+ /* Clip to buffer bounds and scissor. */
+ if (!_mesa_clip_to_region(fb->_Xmin, fb->_Ymin,
+ fb->_Xmax, fb->_Ymax,
+ &dstx, &dsty, &width, &height))
goto out;
- }
-
- /* Convert from GL to hardware coordinates:
- */
- dsty = dPriv->y + (dPriv->h - dsty - height);
- dstx = dPriv->x + dstx;
- dest_rect.x1 = dstx < 0 ? 0 : dstx;
- dest_rect.y1 = dsty < 0 ? 0 : dsty;
- dest_rect.x2 = dstx + width < 0 ? 0 : dstx + width;
- dest_rect.y2 = dsty + height < 0 ? 0 : dsty + height;
+ dstx = x_off + dstx;
+ dsty = y_off + y_flip(fb, dsty, height);
- for (i = 0; i < nbox; i++) {
- drm_clip_rect_t rect;
- int box_w, box_h;
+ for (i = 0; i < num_cliprects; i++) {
+ int box_x, box_y, box_w, box_h;
GLint px, py;
GLuint stipple[32];
- if (!intel_intersect_cliprects(&rect, &dest_rect, &box[i]))
- continue;
-
- /* Now go back to GL coordinates to figure out what subset of
- * the bitmap we are uploading for this cliprect:
- */
- box_w = rect.x2 - rect.x1;
- box_h = rect.y2 - rect.y1;
- srcx = rect.x1 - orig_screen_x1;
- srcy = orig_screen_y2 - rect.y2;
+ box_x = dstx;
+ box_y = dsty;
+ box_w = width;
+ box_h = height;
+ /* Clip to drawable cliprect */
+ if (!_mesa_clip_to_region(cliprects[i].x1,
+ cliprects[i].y1,
+ cliprects[i].x2,
+ cliprects[i].y2,
+ &box_x, &box_y, &box_w, &box_h))
+ continue;
#define DY 32
#define DX 32
@@ -274,13 +292,19 @@ do_blit_bitmap( GLcontext *ctx,
/* May need to adjust this when padding has been introduced in
* sz above:
+ *
+ * Have to translate destination coordinates back into source
+ * coordinates.
*/
- if (get_bitmap_rect(width, height, unpack,
+ if (get_bitmap_rect(bitmap_width, bitmap_height, unpack,
bitmap,
- srcx + px, srcy + py, w, h,
+ -orig_dstx + (box_x + px - x_off),
+ -orig_dsty + y_flip(fb,
+ box_y + py - y_off, h),
+ w, h,
(GLubyte *)stipple,
8,
- GL_TRUE) == 0)
+ fb->Name == 0 ? GL_TRUE : GL_FALSE) == 0)
continue;
/*
@@ -293,19 +317,20 @@ do_blit_bitmap( GLcontext *ctx,
dst->pitch,
dst->buffer,
0,
- dst->tiled,
- rect.x1 + px,
- rect.y2 - (py + h),
+ dst->tiling,
+ box_x + px,
+ box_y + py,
w, h,
logic_op);
}
}
}
- out:
- intel_batchbuffer_flush(intel->batch);
}
+out:
UNLOCK_HARDWARE(intel);
+ if (INTEL_DEBUG & DEBUG_SYNC)
+ intel_batchbuffer_flush(intel->batch);
if (unpack->BufferObj->Name) {
/* done with PBO so unmap it now */
@@ -316,9 +341,178 @@ do_blit_bitmap( GLcontext *ctx,
return GL_TRUE;
}
+static GLboolean
+intel_texture_bitmap(GLcontext * ctx,
+ GLint dst_x, GLint dst_y,
+ GLsizei width, GLsizei height,
+ const struct gl_pixelstore_attrib *unpack,
+ const GLubyte *bitmap)
+{
+ struct intel_context *intel = intel_context(ctx);
+ static const char *fp =
+ "!!ARBfp1.0\n"
+ "TEMP val;\n"
+ "PARAM color=program.local[0];\n"
+ "TEX val, fragment.texcoord[0], texture[0], 2D;\n"
+ "ADD val, val.wwww, {-.5, -.5, -.5, -.5};\n"
+ "KIL val;\n"
+ "MOV result.color, color;\n"
+ "END\n";
+ GLuint texname;
+ GLfloat vertices[4][4];
+ GLfloat texcoords[4][2];
+ GLint old_active_texture;
+ GLubyte *unpacked_bitmap;
+ GLubyte *a8_bitmap;
+ int x, y;
+ GLfloat dst_z;
+
+ /* We need a fragment program for the KIL effect */
+ if (!ctx->Extensions.ARB_fragment_program ||
+ !ctx->Extensions.ARB_vertex_program) {
+ if (INTEL_DEBUG & DEBUG_FALLBACKS)
+ fprintf(stderr,
+ "glBitmap fallback: No fragment/vertex program support\n");
+ return GL_FALSE;
+ }
+
+ /* We're going to mess with texturing with no regard to existing texture
+ * state, so if there is some set up we have to bail.
+ */
+ if (ctx->Texture._EnabledUnits != 0) {
+ if (INTEL_DEBUG & DEBUG_FALLBACKS)
+ fprintf(stderr, "glBitmap fallback: texturing enabled\n");
+ return GL_FALSE;
+ }
+ /* Can't do textured DrawPixels with a fragment program, unless we were
+ * to generate a new program that sampled our texture and put the results
+ * in the fragment color before the user's program started.
+ */
+ if (ctx->FragmentProgram.Enabled) {
+ if (INTEL_DEBUG & DEBUG_FALLBACKS)
+ fprintf(stderr, "glBitmap fallback: fragment program enabled\n");
+ return GL_FALSE;
+ }
+ if (ctx->VertexProgram.Enabled) {
+ if (INTEL_DEBUG & DEBUG_FALLBACKS)
+ fprintf(stderr, "glBitmap fallback: vertex program enabled\n");
+ return GL_FALSE;
+ }
+ /* Check that we can load in a texture this big. */
+ if (width > (1 << (ctx->Const.MaxTextureLevels - 1)) ||
+ height > (1 << (ctx->Const.MaxTextureLevels - 1))) {
+ if (INTEL_DEBUG & DEBUG_FALLBACKS)
+ fprintf(stderr, "glBitmap fallback: bitmap too large (%dx%d)\n",
+ width, height);
+ return GL_FALSE;
+ }
+
+ /* Convert the A1 bitmap to an A8 format suitable for glTexImage */
+ if (unpack->BufferObj->Name) {
+ bitmap = map_pbo(ctx, width, height, unpack, bitmap);
+ if (bitmap == NULL)
+ return GL_TRUE; /* even though this is an error, we're done */
+ }
+ unpacked_bitmap = _mesa_unpack_bitmap(width, height, bitmap,
+ unpack);
+ a8_bitmap = _mesa_calloc(width * height);
+ for (y = 0; y < height; y++) {
+ for (x = 0; x < width; x++) {
+ if (test_msb_bit(unpacked_bitmap, ALIGN(width, 8) * y + x))
+ a8_bitmap[y * width + x] = 0xff;
+ }
+ }
+ _mesa_free(unpacked_bitmap);
+ if (unpack->BufferObj->Name) {
+ /* done with PBO so unmap it now */
+ ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT,
+ unpack->BufferObj);
+ }
+
+ /* Save GL state before we start setting up our drawing */
+ _mesa_PushAttrib(GL_ENABLE_BIT | GL_CURRENT_BIT |
+ GL_VIEWPORT_BIT);
+ _mesa_PushClientAttrib(GL_CLIENT_VERTEX_ARRAY_BIT |
+ GL_CLIENT_PIXEL_STORE_BIT);
+ old_active_texture = ctx->Texture.CurrentUnit;
+
+ _mesa_Disable(GL_POLYGON_STIPPLE);
+
+ /* Upload our bitmap data to an alpha texture */
+ _mesa_ActiveTextureARB(GL_TEXTURE0_ARB);
+ _mesa_Enable(GL_TEXTURE_2D);
+ _mesa_GenTextures(1, &texname);
+ _mesa_BindTexture(GL_TEXTURE_2D, texname);
+ _mesa_TexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+ _mesa_TexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+
+ _mesa_PixelStorei(GL_UNPACK_SWAP_BYTES, GL_FALSE);
+ _mesa_PixelStorei(GL_UNPACK_LSB_FIRST, GL_FALSE);
+ _mesa_PixelStorei(GL_UNPACK_ROW_LENGTH, 0);
+ _mesa_PixelStorei(GL_UNPACK_SKIP_PIXELS, 0);
+ _mesa_PixelStorei(GL_UNPACK_SKIP_ROWS, 0);
+ _mesa_PixelStorei(GL_UNPACK_ALIGNMENT, 1);
+ _mesa_TexImage2D(GL_TEXTURE_2D, 0, GL_ALPHA, width, height, 0,
+ GL_ALPHA, GL_UNSIGNED_BYTE, a8_bitmap);
+ _mesa_free(a8_bitmap);
+
+ intel_meta_set_fragment_program(intel, &intel->meta.bitmap_fp, fp);
+ _mesa_ProgramLocalParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, 0,
+ ctx->Current.RasterColor);
+ intel_meta_set_passthrough_vertex_program(intel);
+ intel_meta_set_passthrough_transform(intel);
+
+ /* convert rasterpos Z from [0,1] to NDC coord in [-1,1] */
+ dst_z = -1.0 + 2.0 * ctx->Current.RasterPos[2];
+
+ vertices[0][0] = dst_x;
+ vertices[0][1] = dst_y;
+ vertices[0][2] = dst_z;
+ vertices[0][3] = 1.0;
+ vertices[1][0] = dst_x + width;
+ vertices[1][1] = dst_y;
+ vertices[1][2] = dst_z;
+ vertices[1][3] = 1.0;
+ vertices[2][0] = dst_x + width;
+ vertices[2][1] = dst_y + height;
+ vertices[2][2] = dst_z;
+ vertices[2][3] = 1.0;
+ vertices[3][0] = dst_x;
+ vertices[3][1] = dst_y + height;
+ vertices[3][2] = dst_z;
+ vertices[3][3] = 1.0;
+
+ texcoords[0][0] = 0.0;
+ texcoords[0][1] = 0.0;
+ texcoords[1][0] = 1.0;
+ texcoords[1][1] = 0.0;
+ texcoords[2][0] = 1.0;
+ texcoords[2][1] = 1.0;
+ texcoords[3][0] = 0.0;
+ texcoords[3][1] = 1.0;
+
+ _mesa_VertexPointer(4, GL_FLOAT, 4 * sizeof(GLfloat), &vertices);
+ _mesa_TexCoordPointer(2, GL_FLOAT, 2 * sizeof(GLfloat), &texcoords);
+ _mesa_Enable(GL_VERTEX_ARRAY);
+ _mesa_Enable(GL_TEXTURE_COORD_ARRAY);
+ CALL_DrawArrays(ctx->Exec, (GL_TRIANGLE_FAN, 0, 4));
+
+ intel_meta_restore_transform(intel);
+ intel_meta_restore_fragment_program(intel);
+ intel_meta_restore_vertex_program(intel);
+
+ _mesa_PopClientAttrib();
+ _mesa_Disable(GL_TEXTURE_2D); /* asserted that it was disabled at entry */
+ _mesa_ActiveTextureARB(GL_TEXTURE0_ARB + old_active_texture);
+ _mesa_PopAttrib();
+
+ _mesa_DeleteTextures(1, &texname);
+
+ return GL_TRUE;
+}
/* There are a large number of possible ways to implement bitmap on
* this hardware, most of them have some sort of drawback. Here are a
@@ -351,6 +545,10 @@ intelBitmap(GLcontext * ctx,
unpack, pixels))
return;
+ if (intel_texture_bitmap(ctx, x, y, width, height,
+ unpack, pixels))
+ return;
+
if (INTEL_DEBUG & DEBUG_PIXEL)
_mesa_printf("%s: fallback to swrast\n", __FUNCTION__);
diff --git a/shared/intel_pixel_copy.c b/shared/intel_pixel_copy.c
index 45f72ba..7c7aa60 100644
--- a/shared/intel_pixel_copy.c
+++ b/shared/intel_pixel_copy.c
@@ -25,17 +25,16 @@
*
**************************************************************************/
-#include "glheader.h"
-#include "enums.h"
-#include "image.h"
-#include "state.h"
-#include "mtypes.h"
-#include "macros.h"
+#include "main/glheader.h"
+#include "main/enums.h"
+#include "main/image.h"
+#include "main/state.h"
+#include "main/mtypes.h"
+#include "main/macros.h"
#include "swrast/swrast.h"
#include "intel_screen.h"
#include "intel_context.h"
-#include "intel_ioctl.h"
#include "intel_batchbuffer.h"
#include "intel_buffers.h"
#include "intel_blit.h"
@@ -120,6 +119,12 @@ do_texture_copypixels(GLcontext * ctx,
if (!src || !dst || type != GL_COLOR)
return GL_FALSE;
+ if (ctx->_ImageTransferState) {
+ if (INTEL_DEBUG & DEBUG_PIXEL)
+ fprintf(stderr, "%s: check_color failed\n", __FUNCTION__);
+ return GL_FALSE;
+ }
+
/* Can't handle overlapping regions. Don't have sufficient control
* over rasterization to pull it off in-place. Punt on these for
* now.
@@ -136,10 +141,20 @@ do_texture_copypixels(GLcontext * ctx,
srcbox.x2 = srcx + width;
srcbox.y2 = srcy + height;
- dstbox.x1 = dstx;
- dstbox.y1 = dsty;
- dstbox.x2 = dstx + width * ctx->Pixel.ZoomX;
- dstbox.y2 = dsty + height * ctx->Pixel.ZoomY;
+ if (ctx->Pixel.ZoomX > 0) {
+ dstbox.x1 = dstx;
+ dstbox.x2 = dstx + width * ctx->Pixel.ZoomX;
+ } else {
+ dstbox.x1 = dstx + width * ctx->Pixel.ZoomX;
+ dstbox.x2 = dstx;
+ }
+ if (ctx->Pixel.ZoomY > 0) {
+ dstbox.y1 = dsty;
+ dstbox.y2 = dsty + height * ctx->Pixel.ZoomY;
+ } else {
+ dstbox.y1 = dsty + height * ctx->Pixel.ZoomY;
+ dstbox.y2 = dsty;
+ }
DBG("src %d,%d %d,%d\n", srcbox.x1, srcbox.y1, srcbox.x2, srcbox.y2);
DBG("dst %d,%d %d,%d (%dx%d) (%f,%f)\n", dstbox.x1, dstbox.y1, dstbox.x2, dstbox.y2,
@@ -229,7 +244,7 @@ do_texture_copypixels(GLcontext * ctx,
out:
intel->vtbl.leave_meta_state(intel);
- intel_batchbuffer_flush(intel->batch);
+ intel_batchbuffer_emit_mi_flush(intel->batch);
}
UNLOCK_HARDWARE(intel);
@@ -251,6 +266,14 @@ do_blit_copypixels(GLcontext * ctx,
struct intel_context *intel = intel_context(ctx);
struct intel_region *dst = intel_drawbuf_region(intel);
struct intel_region *src = copypix_src_region(intel, type);
+ struct gl_framebuffer *fb = ctx->DrawBuffer;
+ struct gl_framebuffer *read_fb = ctx->ReadBuffer;
+ unsigned int num_cliprects;
+ drm_clip_rect_t *cliprects;
+ int x_off, y_off;
+
+ /* Update draw buffer bounds */
+ _mesa_update_state(ctx);
/* Copypixels can be more than a straight copy. Ensure all the
* extra operations are disabled:
@@ -268,87 +291,88 @@ do_blit_copypixels(GLcontext * ctx,
LOCK_HARDWARE(intel);
- if (intel->driDrawable->numClipRects) {
- __DRIdrawablePrivate *dPriv = intel->driDrawable;
- __DRIdrawablePrivate *dReadPriv = intel->driReadDrawable;
- drm_clip_rect_t *box = dPriv->pClipRects;
- GLint nbox = dPriv->numClipRects;
- GLint delta_x = 0;
- GLint delta_y = 0;
+ intel_get_cliprects(intel, &cliprects, &num_cliprects, &x_off, &y_off);
+ if (num_cliprects != 0) {
+ GLint delta_x;
+ GLint delta_y;
+ GLint orig_dstx;
+ GLint orig_dsty;
+ GLint orig_srcx;
+ GLint orig_srcy;
GLuint i;
- /* Do scissoring in GL coordinates:
- */
- if (ctx->Scissor.Enabled)
- {
- GLint x = ctx->Scissor.X;
- GLint y = ctx->Scissor.Y;
- GLuint w = ctx->Scissor.Width;
- GLuint h = ctx->Scissor.Height;
- GLint dx = dstx - srcx;
- GLint dy = dsty - srcy;
-
- if (!_mesa_clip_to_region(x, y, x+w-1, y+h-1, &dstx, &dsty, &width, &height))
- goto out;
-
- srcx = dstx - dx;
- srcy = dsty - dy;
- }
+ /* XXX: We fail to handle different inversion between read and draw framebuffer. */
+
+ /* Clip to destination buffer. */
+ orig_dstx = dstx;
+ orig_dsty = dsty;
+ if (!_mesa_clip_to_region(fb->_Xmin, fb->_Ymin,
+ fb->_Xmax, fb->_Ymax,
+ &dstx, &dsty, &width, &height))
+ goto out;
+ /* Adjust src coords for our post-clipped destination origin */
+ srcx += dstx - orig_dstx;
+ srcy += dsty - orig_dsty;
+
+ /* Clip to source buffer. */
+ orig_srcx = srcx;
+ orig_srcy = srcy;
+ if (!_mesa_clip_to_region(0, 0,
+ read_fb->Width, read_fb->Height,
+ &srcx, &srcy, &width, &height))
+ goto out;
+ /* Adjust dst coords for our post-clipped source origin */
+ dstx += srcx - orig_srcx;
+ dsty += srcy - orig_srcy;
/* Convert from GL to hardware coordinates:
*/
- dsty = dPriv->h - dsty - height;
- srcy = dPriv->h - srcy - height;
- dstx += dPriv->x;
- dsty += dPriv->y;
- srcx += dReadPriv->x;
- srcy += dReadPriv->y;
-
- /* Clip against the source region. This is the only source
- * clipping we do. Dst is clipped with cliprects below.
- */
- {
- delta_x = srcx - dstx;
- delta_y = srcy - dsty;
-
- if (!_mesa_clip_to_region(0, 0, src->pitch, src->height,
- &srcx, &srcy, &width, &height))
- goto out;
+ if (fb->Name == 0) {
+ /* copypixels to a system framebuffer */
+ dstx = x_off + dstx;
+ dsty = y_off + (fb->Height - dsty - height);
+ } else {
+ /* copypixels to a user framebuffer object */
+ dstx = x_off + dstx;
+ dsty = y_off + dsty;
+ }
- dstx = srcx - delta_x;
- dsty = srcy - delta_y;
+ /* Flip source Y if it's a system framebuffer. */
+ if (read_fb->Name == 0) {
+ srcx = intel->driReadDrawable->x + srcx;
+ srcy = intel->driReadDrawable->y + (fb->Height - srcy - height);
}
+ delta_x = srcx - dstx;
+ delta_y = srcy - dsty;
/* Could do slightly more clipping: Eg, take the intersection of
- * the existing set of cliprects and those cliprects translated
- * by delta_x, delta_y:
- *
+ * the destination cliprects and the read drawable cliprects
+ *
* This code will not overwrite other windows, but will
* introduce garbage when copying from obscured window regions.
*/
- for (i = 0; i < nbox; i++) {
+ for (i = 0; i < num_cliprects; i++) {
GLint clip_x = dstx;
GLint clip_y = dsty;
GLint clip_w = width;
GLint clip_h = height;
- if (!_mesa_clip_to_region(box[i].x1, box[i].y1, box[i].x2, box[i].y2,
+ if (!_mesa_clip_to_region(cliprects[i].x1, cliprects[i].y1,
+ cliprects[i].x2, cliprects[i].y2,
&clip_x, &clip_y, &clip_w, &clip_h))
continue;
intelEmitCopyBlit(intel, dst->cpp,
- src->pitch, src->buffer, 0, src->tiled,
- dst->pitch, dst->buffer, 0, dst->tiled,
+ src->pitch, src->buffer, 0, src->tiling,
+ dst->pitch, dst->buffer, 0, dst->tiling,
clip_x + delta_x, clip_y + delta_y, /* srcx, srcy */
clip_x, clip_y, /* dstx, dsty */
clip_w, clip_h,
ctx->Color.ColorLogicOpEnabled ?
ctx->Color.LogicOp : GL_COPY);
}
-
- out:
- intel_batchbuffer_flush(intel->batch);
}
+out:
UNLOCK_HARDWARE(intel);
DBG("%s: success\n", __FUNCTION__);
diff --git a/shared/intel_pixel_draw.c b/shared/intel_pixel_draw.c
index 34813d2..0e83afa 100644
--- a/shared/intel_pixel_draw.c
+++ b/shared/intel_pixel_draw.c
@@ -25,322 +25,351 @@
*
**************************************************************************/
-#include "glheader.h"
-#include "enums.h"
-#include "image.h"
-#include "mtypes.h"
-#include "macros.h"
-#include "bufferobj.h"
+#include "main/glheader.h"
+#include "main/enums.h"
+#include "main/image.h"
+#include "main/mtypes.h"
+#include "main/macros.h"
+#include "main/bufferobj.h"
+#include "main/teximage.h"
+#include "main/texenv.h"
+#include "main/texobj.h"
+#include "main/texstate.h"
+#include "main/texparam.h"
+#include "main/varray.h"
+#include "main/attrib.h"
+#include "main/enable.h"
+#include "main/buffers.h"
+#include "main/fbobject.h"
+#include "main/renderbuffer.h"
+#include "main/depth.h"
+#include "main/hash.h"
+#include "main/blend.h"
+#include "glapi/dispatch.h"
#include "swrast/swrast.h"
#include "intel_screen.h"
#include "intel_context.h"
-#include "intel_ioctl.h"
#include "intel_batchbuffer.h"
#include "intel_blit.h"
#include "intel_buffers.h"
#include "intel_regions.h"
#include "intel_pixel.h"
#include "intel_buffer_objects.h"
-#include "intel_tris.h"
-
-
+#include "intel_fbo.h"
static GLboolean
-do_texture_drawpixels(GLcontext * ctx,
- GLint x, GLint y,
- GLsizei width, GLsizei height,
- GLenum format, GLenum type,
- const struct gl_pixelstore_attrib *unpack,
- const GLvoid * pixels)
+intel_texture_drawpixels(GLcontext * ctx,
+ GLint x, GLint y,
+ GLsizei width, GLsizei height,
+ GLenum format,
+ GLenum type,
+ const struct gl_pixelstore_attrib *unpack,
+ const GLvoid *pixels)
{
struct intel_context *intel = intel_context(ctx);
- struct intel_region *dst = intel_drawbuf_region(intel);
- struct intel_buffer_object *src = intel_buffer_object(unpack->BufferObj);
- GLuint rowLength = unpack->RowLength ? unpack->RowLength : width;
- GLuint src_offset;
-
- if (INTEL_DEBUG & DEBUG_PIXEL)
- fprintf(stderr, "%s\n", __FUNCTION__);
-
- intelFlush(&intel->ctx);
-
- if (!dst)
- return GL_FALSE;
-
- intel->vtbl.render_start(intel);
- intel->vtbl.emit_state(intel);
+ GLuint texname;
+ GLfloat vertices[4][4];
+ GLfloat texcoords[4][2];
+ GLfloat z;
- if (src) {
- if (!_mesa_validate_pbo_access(2, unpack, width, height, 1,
- format, type, pixels)) {
- _mesa_error(ctx, GL_INVALID_OPERATION, "glDrawPixels");
- return GL_TRUE;
- }
- }
- else {
- /* PBO only for now:
- */
-/* _mesa_printf("%s - not PBO\n", __FUNCTION__); */
+ /* We're going to mess with texturing with no regard to existing texture
+ * state, so if there is some set up we have to bail.
+ */
+ if (ctx->Texture._EnabledUnits != 0) {
+ if (INTEL_DEBUG & DEBUG_FALLBACKS)
+ fprintf(stderr, "glDrawPixels() fallback: texturing enabled\n");
return GL_FALSE;
}
- /* There are a couple of things we can't do yet, one of which is
- * set the correct state for pixel operations when GL texturing is
- * enabled. That's a pretty rare state and probably not worth the
- * effort. A completely device-independent version of this may do
- * more.
- *
- * Similarly, we make no attempt to merge metaops processing with
- * an enabled fragment program, though it would certainly be
- * possible.
+ /* Can't do textured DrawPixels with a fragment program, unless we were
+ * to generate a new program that sampled our texture and put the results
+ * in the fragment color before the user's program started.
*/
- if (!intel_check_meta_tex_fragment_ops(ctx)) {
- if (INTEL_DEBUG & DEBUG_PIXEL)
- _mesa_printf("%s - bad GL fragment state for metaops texture\n",
- __FUNCTION__);
+ if (ctx->FragmentProgram.Enabled) {
+ if (INTEL_DEBUG & DEBUG_FALLBACKS)
+ fprintf(stderr, "glDrawPixels() fallback: fragment program enabled\n");
return GL_FALSE;
}
- intel->vtbl.install_meta_state(intel);
-
-
- /* Is this true? Also will need to turn depth testing on according
- * to state:
+ /* We don't have a way to generate fragments with stencil values which *
+ * will set the resulting stencil value.
*/
- intel->vtbl.meta_no_stencil_write(intel);
- intel->vtbl.meta_no_depth_write(intel);
-
- /* Set the 3d engine to draw into the destination region:
- */
- intel->vtbl.meta_draw_region(intel, dst, intel->depth_region);
-
- intel->vtbl.meta_import_pixel_state(intel);
-
- src_offset = (GLuint) _mesa_image_address(2, unpack, pixels, width, height,
- format, type, 0, 0, 0);
+ if (format == GL_STENCIL_INDEX)
+ return GL_FALSE;
+ /* Check that we can load in a texture this big. */
+ if (width > (1 << (ctx->Const.MaxTextureLevels - 1)) ||
+ height > (1 << (ctx->Const.MaxTextureLevels - 1))) {
+ if (INTEL_DEBUG & DEBUG_FALLBACKS)
+ fprintf(stderr, "glDrawPixels() fallback: bitmap too large (%dx%d)\n",
+ width, height);
+ return GL_FALSE;
+ }
- /* Setup the pbo up as a rectangular texture, if possible.
- *
- * TODO: This is almost always possible if the i915 fragment
- * program is adjusted to correctly swizzle the sampled colors.
- * The major exception is any 24bit texture, like RGB888, for which
- * there is no hardware support.
+ /* To do DEPTH_COMPONENT, we would need to change our setup to not draw to
+ * the color buffer, and sample the texture values into the fragment depth
+ * in a program.
*/
- if (!intel->vtbl.meta_tex_rect_source(intel, src->buffer, src_offset,
- rowLength, height, format, type)) {
- intel->vtbl.leave_meta_state(intel);
+ if (format == GL_DEPTH_COMPONENT) {
+ if (INTEL_DEBUG & DEBUG_FALLBACKS)
+ fprintf(stderr,
+ "glDrawPixels() fallback: format == GL_DEPTH_COMPONENT\n");
return GL_FALSE;
}
- intel->vtbl.meta_texture_blend_replace(intel);
-
-
- LOCK_HARDWARE(intel);
-
- if (intel->driDrawable->numClipRects) {
- __DRIdrawablePrivate *dPriv = intel->driDrawable;
- GLint srcx, srcy;
- GLint dstx, dsty;
-
- dstx = x;
- dsty = dPriv->h - (y + height);
-
- srcx = 0; /* skiprows/pixels already done */
- srcy = 0;
-
- if (0) {
- const GLint orig_x = dstx;
- const GLint orig_y = dsty;
-
- if (!_mesa_clip_to_region(0, 0, dst->pitch, dst->height,
- &dstx, &dsty, &width, &height))
- goto out;
-
- srcx += dstx - orig_x;
- srcy += dsty - orig_y;
- }
-
-
- if (INTEL_DEBUG & DEBUG_PIXEL)
- _mesa_printf("draw %d,%d %dx%d\n", dstx, dsty, width, height);
+ _mesa_PushAttrib(GL_ENABLE_BIT | GL_TEXTURE_BIT |
+ GL_CURRENT_BIT);
+ _mesa_PushClientAttrib(GL_CLIENT_VERTEX_ARRAY_BIT);
+
+ /* XXX: pixel store stuff */
+ _mesa_Disable(GL_POLYGON_STIPPLE);
+
+ _mesa_ActiveTextureARB(GL_TEXTURE0_ARB);
+ _mesa_Enable(GL_TEXTURE_2D);
+ _mesa_GenTextures(1, &texname);
+ _mesa_BindTexture(GL_TEXTURE_2D, texname);
+ _mesa_TexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+ _mesa_TexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+ _mesa_TexEnvf(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
+ /*
+ _mesa_TexEnvf(GL_TEXTURE_ENV, GL_COMBINE_RGB, GL_REPLACE);
+ _mesa_TexEnvf(GL_TEXTURE_ENV, GL_COMBINE_ALPHA, GL_REPLACE);
+ */
+ _mesa_TexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, width, height, 0, format,
+ type, pixels);
+
+ intel_meta_set_passthrough_transform(intel);
+
+ /* convert rasterpos Z from [0,1] to NDC coord in [-1,1] */
+ z = -1.0 + 2.0 * ctx->Current.RasterPos[2];
+
+ /* Create the vertex buffer based on the current raster pos. The x and y
+ * we're handed are ctx->Current.RasterPos[0,1] rounded to integers.
+ * We also apply the depth. However, the W component is already multiplied
+ * into ctx->Current.RasterPos[0,1,2] and we can ignore it at this point.
+ */
+ vertices[0][0] = x;
+ vertices[0][1] = y;
+ vertices[0][2] = z;
+ vertices[0][3] = 1.0;
+ vertices[1][0] = x + width * ctx->Pixel.ZoomX;
+ vertices[1][1] = y;
+ vertices[1][2] = z;
+ vertices[1][3] = 1.0;
+ vertices[2][0] = x + width * ctx->Pixel.ZoomX;
+ vertices[2][1] = y + height * ctx->Pixel.ZoomY;
+ vertices[2][2] = z;
+ vertices[2][3] = 1.0;
+ vertices[3][0] = x;
+ vertices[3][1] = y + height * ctx->Pixel.ZoomY;
+ vertices[3][2] = z;
+ vertices[3][3] = 1.0;
+
+ texcoords[0][0] = 0.0;
+ texcoords[0][1] = 0.0;
+ texcoords[1][0] = 1.0;
+ texcoords[1][1] = 0.0;
+ texcoords[2][0] = 1.0;
+ texcoords[2][1] = 1.0;
+ texcoords[3][0] = 0.0;
+ texcoords[3][1] = 1.0;
+
+ _mesa_VertexPointer(4, GL_FLOAT, 4 * sizeof(GLfloat), &vertices);
+ _mesa_TexCoordPointer(2, GL_FLOAT, 2 * sizeof(GLfloat), &texcoords);
+ _mesa_Enable(GL_VERTEX_ARRAY);
+ _mesa_Enable(GL_TEXTURE_COORD_ARRAY);
+ CALL_DrawArrays(ctx->Exec, (GL_TRIANGLE_FAN, 0, 4));
+
+ intel_meta_restore_transform(intel);
+ _mesa_PopClientAttrib();
+ _mesa_PopAttrib();
+
+ _mesa_DeleteTextures(1, &texname);
- /* Must use the regular cliprect mechanism in order to get the
- * drawing origin set correctly. Otherwise scissor state is in
- * incorrect coordinate space. Does this even need to hold the
- * lock???
- */
- intel->vtbl.meta_draw_quad(intel,
- dstx, dstx + width * ctx->Pixel.ZoomX,
- dPriv->h - (y + height * ctx->Pixel.ZoomY),
- dPriv->h - (y),
- -ctx->Current.RasterPos[2] * .5,
- 0x00ff00ff,
- srcx, srcx + width, srcy + height, srcy);
- out:
- intel->vtbl.leave_meta_state(intel);
- intel_batchbuffer_flush(intel->batch);
- }
- UNLOCK_HARDWARE(intel);
return GL_TRUE;
}
-
-
-
-
-/* Pros:
- * - no waiting for idle before updating framebuffer.
- *
- * Cons:
- * - if upload is by memcpy, this may actually be slower than fallback path.
- * - uploads the whole image even if destination is clipped
- *
- * Need to benchmark.
- *
- * Given the questions about performance, implement for pbo's only.
- * This path is definitely a win if the pbo is already in agp. If it
- * turns out otherwise, we can add the code necessary to upload client
- * data to agp space before performing the blit. (Though it may turn
- * out to be better/simpler just to use the texture engine).
- */
static GLboolean
-do_blit_drawpixels(GLcontext * ctx,
- GLint x, GLint y,
- GLsizei width, GLsizei height,
- GLenum format, GLenum type,
- const struct gl_pixelstore_attrib *unpack,
- const GLvoid * pixels)
+intel_stencil_drawpixels(GLcontext * ctx,
+ GLint x, GLint y,
+ GLsizei width, GLsizei height,
+ GLenum format,
+ GLenum type,
+ const struct gl_pixelstore_attrib *unpack,
+ const GLvoid *pixels)
{
struct intel_context *intel = intel_context(ctx);
- struct intel_region *dest = intel_drawbuf_region(intel);
- struct intel_buffer_object *src = intel_buffer_object(unpack->BufferObj);
- GLuint src_offset;
- GLuint rowLength;
- dri_fence *fence = NULL;
-
- if (INTEL_DEBUG & DEBUG_PIXEL)
- _mesa_printf("%s\n", __FUNCTION__);
-
-
- if (!dest) {
- if (INTEL_DEBUG & DEBUG_PIXEL)
- _mesa_printf("%s - no dest\n", __FUNCTION__);
+ GLuint texname, rb_name, fb_name, old_fb_name;
+ GLfloat vertices[4][2];
+ GLfloat texcoords[4][2];
+ struct intel_renderbuffer *irb;
+ struct intel_renderbuffer *depth_irb;
+ struct gl_renderbuffer *rb;
+ struct gl_pixelstore_attrib old_unpack;
+ GLstencil *stencil_pixels;
+ int row;
+
+ if (format != GL_STENCIL_INDEX)
return GL_FALSE;
- }
- if (src) {
- /* This validation should be done by core mesa:
- */
- if (!_mesa_validate_pbo_access(2, unpack, width, height, 1,
- format, type, pixels)) {
- _mesa_error(ctx, GL_INVALID_OPERATION, "glDrawPixels");
- return GL_TRUE;
- }
- }
- else {
- /* PBO only for now:
- */
- if (INTEL_DEBUG & DEBUG_PIXEL)
- _mesa_printf("%s - not PBO\n", __FUNCTION__);
- return GL_FALSE;
- }
+ /* If there's nothing to write, we're done. */
+ if (ctx->Stencil.WriteMask[0] == 0)
+ return GL_TRUE;
- if (!intel_check_blit_format(dest, format, type)) {
- if (INTEL_DEBUG & DEBUG_PIXEL)
- _mesa_printf("%s - bad format for blit\n", __FUNCTION__);
+ /* Can't do a per-bit writemask while treating stencil as rgba data. */
+ if ((ctx->Stencil.WriteMask[0] & 0xff) != 0xff) {
+ if (INTEL_DEBUG & DEBUG_FALLBACKS)
+ fprintf(stderr, "glDrawPixels(STENCIL_INDEX) fallback: "
+ "stencil mask enabled\n");
return GL_FALSE;
}
- if (!intel_check_blit_fragment_ops(ctx, GL_FALSE)) {
- if (INTEL_DEBUG & DEBUG_PIXEL)
- _mesa_printf("%s - bad GL fragment state for blitter\n",
- __FUNCTION__);
+ /* We use FBOs for our wrapping of the depthbuffer into a color
+ * destination.
+ */
+ if (!ctx->Extensions.EXT_framebuffer_object)
return GL_FALSE;
- }
- if (ctx->Pixel.ZoomX != 1.0F) {
- if (INTEL_DEBUG & DEBUG_PIXEL)
- _mesa_printf("%s - bad PixelZoomX for blit\n", __FUNCTION__);
+ /* We're going to mess with texturing with no regard to existing texture
+ * state, so if there is some set up we have to bail.
+ */
+ if (ctx->Texture._EnabledUnits != 0) {
+ if (INTEL_DEBUG & DEBUG_FALLBACKS)
+ fprintf(stderr, "glDrawPixels(STENCIL_INDEX) fallback: "
+ "texturing enabled\n");
return GL_FALSE;
}
-
- if (unpack->RowLength > 0)
- rowLength = unpack->RowLength;
- else
- rowLength = width;
-
- if (ctx->Pixel.ZoomY == -1.0F) {
- if (INTEL_DEBUG & DEBUG_PIXEL)
- _mesa_printf("%s - bad PixelZoomY for blit\n", __FUNCTION__);
- return GL_FALSE; /* later */
- y -= height;
- }
- else if (ctx->Pixel.ZoomY == 1.0F) {
- rowLength = -rowLength;
- }
- else {
- if (INTEL_DEBUG & DEBUG_PIXEL)
- _mesa_printf("%s - bad PixelZoomY for blit\n", __FUNCTION__);
+ /* Can't do textured DrawPixels with a fragment program, unless we were
+ * to generate a new program that sampled our texture and put the results
+ * in the fragment color before the user's program started.
+ */
+ if (ctx->FragmentProgram.Enabled) {
+ if (INTEL_DEBUG & DEBUG_FALLBACKS)
+ fprintf(stderr, "glDrawPixels(STENCIL_INDEX) fallback: "
+ "fragment program enabled\n");
return GL_FALSE;
}
- src_offset = (GLuint) _mesa_image_address(2, unpack, pixels, width, height,
- format, type, 0, 0, 0);
-
- intelFlush(&intel->ctx);
- LOCK_HARDWARE(intel);
-
- if (intel->driDrawable->numClipRects) {
- __DRIdrawablePrivate *dPriv = intel->driDrawable;
- int nbox = dPriv->numClipRects;
- drm_clip_rect_t *box = dPriv->pClipRects;
- drm_clip_rect_t rect;
- drm_clip_rect_t dest_rect;
- dri_bo *src_buffer = intel_bufferobj_buffer(intel, src, INTEL_READ);
- int i;
-
- dest_rect.x1 = dPriv->x + x;
- dest_rect.y1 = dPriv->y + dPriv->h - (y + height);
- dest_rect.x2 = dest_rect.x1 + width;
- dest_rect.y2 = dest_rect.y1 + height;
-
- for (i = 0; i < nbox; i++) {
- if (!intel_intersect_cliprects(&rect, &dest_rect, &box[i]))
- continue;
-
- intelEmitCopyBlit(intel,
- dest->cpp,
- rowLength, src_buffer, src_offset, GL_FALSE,
- dest->pitch, dest->buffer, 0, dest->tiled,
- rect.x1 - dest_rect.x1,
- rect.y2 - dest_rect.y2,
- rect.x1,
- rect.y1, rect.x2 - rect.x1, rect.y2 - rect.y1,
- ctx->Color.ColorLogicOpEnabled ?
- ctx->Color.LogicOp : GL_COPY);
- }
- intel_batchbuffer_flush(intel->batch);
- fence = intel->batch->last_fence;
- dri_fence_reference(fence);
+ /* Check that we can load in a texture this big. */
+ if (width > (1 << (ctx->Const.MaxTextureLevels - 1)) ||
+ height > (1 << (ctx->Const.MaxTextureLevels - 1))) {
+ if (INTEL_DEBUG & DEBUG_FALLBACKS)
+ fprintf(stderr, "glDrawPixels(STENCIL_INDEX) fallback: "
+ "bitmap too large (%dx%d)\n",
+ width, height);
+ return GL_FALSE;
}
- UNLOCK_HARDWARE(intel);
- if (fence) {
- dri_fence_wait(fence);
- dri_fence_unreference(fence);
+ _mesa_PushAttrib(GL_ENABLE_BIT | GL_TEXTURE_BIT |
+ GL_CURRENT_BIT | GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
+ _mesa_PushClientAttrib(GL_CLIENT_VERTEX_ARRAY_BIT);
+ old_fb_name = ctx->DrawBuffer->Name;
+
+ _mesa_Disable(GL_POLYGON_STIPPLE);
+ _mesa_Disable(GL_DEPTH_TEST);
+ _mesa_Disable(GL_STENCIL_TEST);
+
+ /* Unpack the supplied stencil values into a ubyte buffer. */
+ assert(sizeof(GLstencil) == sizeof(GLubyte));
+ stencil_pixels = _mesa_malloc(width * height * sizeof(GLstencil));
+ for (row = 0; row < height; row++) {
+ GLvoid *source = _mesa_image_address2d(unpack, pixels,
+ width, height,
+ GL_COLOR_INDEX, type,
+ row, 0);
+ _mesa_unpack_stencil_span(ctx, width, GL_UNSIGNED_BYTE,
+ stencil_pixels +
+ row * width * sizeof(GLstencil),
+ type, source, unpack, ctx->_ImageTransferState);
}
- if (INTEL_DEBUG & DEBUG_PIXEL)
- _mesa_printf("%s - DONE\n", __FUNCTION__);
+ /* Take the current depth/stencil renderbuffer, and make a new one wrapping
+ * it which will be treated as GL_RGBA8 so we can render to it as a color
+ * buffer.
+ */
+ depth_irb = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH);
+ irb = intel_create_renderbuffer(GL_RGBA8);
+ rb = &irb->Base;
+ irb->Base.Width = depth_irb->Base.Width;
+ irb->Base.Height = depth_irb->Base.Height;
+ intel_renderbuffer_set_region(irb, depth_irb->region);
+
+ /* Create a name for our renderbuffer, which lets us use other mesa
+ * rb functions for convenience.
+ */
+ _mesa_GenRenderbuffersEXT(1, &rb_name);
+ irb->Base.RefCount++;
+ _mesa_HashInsert(ctx->Shared->RenderBuffers, rb_name, &irb->Base);
+
+ /* Bind the new renderbuffer to the color attachment point. */
+ _mesa_GenFramebuffersEXT(1, &fb_name);
+ _mesa_BindFramebufferEXT(GL_FRAMEBUFFER_EXT, fb_name);
+ _mesa_FramebufferRenderbufferEXT(GL_FRAMEBUFFER_EXT,
+ GL_COLOR_ATTACHMENT0_EXT,
+ GL_RENDERBUFFER_EXT,
+ rb_name);
+ /* Choose to render to the color attachment. */
+ _mesa_DrawBuffer(GL_COLOR_ATTACHMENT0_EXT);
+
+ _mesa_DepthMask(GL_FALSE);
+ _mesa_ColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_TRUE);
+
+ _mesa_ActiveTextureARB(GL_TEXTURE0_ARB);
+ _mesa_Enable(GL_TEXTURE_2D);
+ _mesa_GenTextures(1, &texname);
+ _mesa_BindTexture(GL_TEXTURE_2D, texname);
+ _mesa_TexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+ _mesa_TexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+ _mesa_TexEnvf(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
+ old_unpack = ctx->Unpack;
+ ctx->Unpack = ctx->DefaultPacking;
+ _mesa_TexImage2D(GL_TEXTURE_2D, 0, GL_INTENSITY, width, height, 0,
+ GL_RED, GL_UNSIGNED_BYTE, stencil_pixels);
+ ctx->Unpack = old_unpack;
+ _mesa_free(stencil_pixels);
+
+ intel_meta_set_passthrough_transform(intel);
+
+ vertices[0][0] = x;
+ vertices[0][1] = y;
+ vertices[1][0] = x + width * ctx->Pixel.ZoomX;
+ vertices[1][1] = y;
+ vertices[2][0] = x + width * ctx->Pixel.ZoomX;
+ vertices[2][1] = y + height * ctx->Pixel.ZoomY;
+ vertices[3][0] = x;
+ vertices[3][1] = y + height * ctx->Pixel.ZoomY;
+
+ texcoords[0][0] = 0.0;
+ texcoords[0][1] = 0.0;
+ texcoords[1][0] = 1.0;
+ texcoords[1][1] = 0.0;
+ texcoords[2][0] = 1.0;
+ texcoords[2][1] = 1.0;
+ texcoords[3][0] = 0.0;
+ texcoords[3][1] = 1.0;
+
+ _mesa_VertexPointer(2, GL_FLOAT, 2 * sizeof(GLfloat), &vertices);
+ _mesa_TexCoordPointer(2, GL_FLOAT, 2 * sizeof(GLfloat), &texcoords);
+ _mesa_Enable(GL_VERTEX_ARRAY);
+ _mesa_Enable(GL_TEXTURE_COORD_ARRAY);
+ CALL_DrawArrays(ctx->Exec, (GL_TRIANGLE_FAN, 0, 4));
+
+ intel_meta_restore_transform(intel);
+
+ _mesa_BindFramebufferEXT(GL_FRAMEBUFFER_EXT, old_fb_name);
+
+ _mesa_PopClientAttrib();
+ _mesa_PopAttrib();
+
+ _mesa_DeleteTextures(1, &texname);
+ _mesa_DeleteFramebuffersEXT(1, &fb_name);
+ _mesa_DeleteRenderbuffersEXT(1, &rb_name);
return GL_TRUE;
}
-
-
void
intelDrawPixels(GLcontext * ctx,
GLint x, GLint y,
@@ -350,39 +379,17 @@ intelDrawPixels(GLcontext * ctx,
const struct gl_pixelstore_attrib *unpack,
const GLvoid * pixels)
{
- if (do_blit_drawpixels(ctx, x, y, width, height, format, type,
- unpack, pixels))
+ if (intel_texture_drawpixels(ctx, x, y, width, height, format, type,
+ unpack, pixels))
return;
- if (do_texture_drawpixels(ctx, x, y, width, height, format, type,
- unpack, pixels))
+ if (intel_stencil_drawpixels(ctx, x, y, width, height, format, type,
+ unpack, pixels))
return;
-
if (INTEL_DEBUG & DEBUG_PIXEL)
_mesa_printf("%s: fallback to swrast\n", __FUNCTION__);
- if (ctx->FragmentProgram._Current == ctx->FragmentProgram._TexEnvProgram) {
- /*
- * We don't want the i915 texenv program to be applied to DrawPixels.
- * This is really just a performance optimization (mesa will other-
- * wise happily run the fragment program on each pixel in the image).
- */
- struct gl_fragment_program *fpSave = ctx->FragmentProgram._Current;
- /* can't just set current frag prog to 0 here as on buffer resize
- we'll get new state checks which will segfault. Remains a hack. */
- ctx->FragmentProgram._Current = NULL;
- ctx->FragmentProgram._UseTexEnvProgram = GL_FALSE;
- ctx->FragmentProgram._Active = GL_FALSE;
- _swrast_DrawPixels( ctx, x, y, width, height, format, type,
- unpack, pixels );
- ctx->FragmentProgram._Current = fpSave;
- ctx->FragmentProgram._UseTexEnvProgram = GL_TRUE;
- ctx->FragmentProgram._Active = GL_TRUE;
- _swrast_InvalidateState(ctx, _NEW_PROGRAM);
- }
- else {
- _swrast_DrawPixels( ctx, x, y, width, height, format, type,
- unpack, pixels );
- }
+ _swrast_DrawPixels(ctx, x, y, width, height, format, type,
+ unpack, pixels);
}
diff --git a/shared/intel_reg.h b/shared/intel_reg.h
index 37629c0..57ac8f0 100644
--- a/shared/intel_reg.h
+++ b/shared/intel_reg.h
@@ -29,13 +29,166 @@
#define CMD_2D (0x2 << 29)
#define CMD_3D (0x3 << 29)
+#define MI_NOOP (CMD_MI | 0)
+
#define MI_BATCH_BUFFER_END (CMD_MI | 0xA << 23)
+#define MI_FLUSH (CMD_MI | (4 << 23))
+#define FLUSH_MAP_CACHE (1 << 0)
+#define INHIBIT_FLUSH_RENDER_CACHE (1 << 2)
+
/* Stalls command execution waiting for the given events to have occurred. */
#define MI_WAIT_FOR_EVENT (CMD_MI | (0x3 << 23))
#define MI_WAIT_FOR_PLANE_B_FLIP (1<<6)
#define MI_WAIT_FOR_PLANE_A_FLIP (1<<2)
+/* p189 */
+#define _3DSTATE_LOAD_STATE_IMMEDIATE_1 (CMD_3D | (0x1d<<24) | (0x04<<16))
+#define I1_LOAD_S(n) (1<<(4+n))
+
+#define _3DSTATE_DRAWRECT_INFO (CMD_3D | (0x1d<<24) | (0x80<<16) | 0x3)
+#define _3DSTATE_DRAWRECT_INFO_I965 (CMD_3D | (3 << 27) | (1 << 24) | 0x2)
+
+/** @{
+ *
+ * PIPE_CONTROL operation, a combination MI_FLUSH and register write with
+ * additional flushing control.
+ */
+#define _3DSTATE_PIPE_CONTROL (CMD_3D | (3 << 27) | (2 << 24) | 2)
+#define PIPE_CONTROL_NO_WRITE (0 << 14)
+#define PIPE_CONTROL_WRITE_IMMEDIATE (1 << 14)
+#define PIPE_CONTROL_WRITE_DEPTH_COUNT (2 << 14)
+#define PIPE_CONTROL_WRITE_TIMESTAMP (3 << 14)
+#define PIPE_CONTROL_DEPTH_STALL (1 << 13)
+#define PIPE_CONTROL_WRITE_FLUSH (1 << 12)
+#define PIPE_CONTROL_INSTRUCTION_FLUSH (1 << 11)
+#define PIPE_CONTROL_INTERRUPT_ENABLE (1 << 8)
+#define PIPE_CONTROL_PPGTT_WRITE (0 << 2)
+#define PIPE_CONTROL_GLOBAL_GTT_WRITE (1 << 2)
+
+/** @} */
+
+/** @{
+ * 915 definitions
+ */
+#define S0_VB_OFFSET_MASK 0xffffffc0
+#define S0_AUTO_CACHE_INV_DISABLE (1<<0)
+/** @} */
+
+/** @{
+ * 830 definitions
+ */
+#define S0_VB_OFFSET_MASK_830 0xffffff80
+#define S0_VB_PITCH_SHIFT_830 1
+#define S0_VB_ENABLE_830 (1<<0)
+/** @} */
+
+#define S1_VERTEX_WIDTH_SHIFT 24
+#define S1_VERTEX_WIDTH_MASK (0x3f<<24)
+#define S1_VERTEX_PITCH_SHIFT 16
+#define S1_VERTEX_PITCH_MASK (0x3f<<16)
+
+#define TEXCOORDFMT_2D 0x0
+#define TEXCOORDFMT_3D 0x1
+#define TEXCOORDFMT_4D 0x2
+#define TEXCOORDFMT_1D 0x3
+#define TEXCOORDFMT_2D_16 0x4
+#define TEXCOORDFMT_4D_16 0x5
+#define TEXCOORDFMT_NOT_PRESENT 0xf
+#define S2_TEXCOORD_FMT0_MASK 0xf
+#define S2_TEXCOORD_FMT1_SHIFT 4
+#define S2_TEXCOORD_FMT(unit, type) ((type)<<(unit*4))
+#define S2_TEXCOORD_NONE (~0)
+#define S2_TEX_COUNT_SHIFT_830 12
+#define S2_VERTEX_1_WIDTH_SHIFT_830 0
+#define S2_VERTEX_0_WIDTH_SHIFT_830 6
+/* S3 not interesting */
+
+#define S4_POINT_WIDTH_SHIFT 23
+#define S4_POINT_WIDTH_MASK (0x1ff<<23)
+#define S4_LINE_WIDTH_SHIFT 19
+#define S4_LINE_WIDTH_ONE (0x2<<19)
+#define S4_LINE_WIDTH_MASK (0xf<<19)
+#define S4_FLATSHADE_ALPHA (1<<18)
+#define S4_FLATSHADE_FOG (1<<17)
+#define S4_FLATSHADE_SPECULAR (1<<16)
+#define S4_FLATSHADE_COLOR (1<<15)
+#define S4_CULLMODE_BOTH (0<<13)
+#define S4_CULLMODE_NONE (1<<13)
+#define S4_CULLMODE_CW (2<<13)
+#define S4_CULLMODE_CCW (3<<13)
+#define S4_CULLMODE_MASK (3<<13)
+#define S4_VFMT_POINT_WIDTH (1<<12)
+#define S4_VFMT_SPEC_FOG (1<<11)
+#define S4_VFMT_COLOR (1<<10)
+#define S4_VFMT_DEPTH_OFFSET (1<<9)
+#define S4_VFMT_XYZ (1<<6)
+#define S4_VFMT_XYZW (2<<6)
+#define S4_VFMT_XY (3<<6)
+#define S4_VFMT_XYW (4<<6)
+#define S4_VFMT_XYZW_MASK (7<<6)
+#define S4_FORCE_DEFAULT_DIFFUSE (1<<5)
+#define S4_FORCE_DEFAULT_SPECULAR (1<<4)
+#define S4_LOCAL_DEPTH_OFFSET_ENABLE (1<<3)
+#define S4_VFMT_FOG_PARAM (1<<2)
+#define S4_SPRITE_POINT_ENABLE (1<<1)
+#define S4_LINE_ANTIALIAS_ENABLE (1<<0)
+
+#define S4_VFMT_MASK (S4_VFMT_POINT_WIDTH | \
+ S4_VFMT_SPEC_FOG | \
+ S4_VFMT_COLOR | \
+ S4_VFMT_DEPTH_OFFSET | \
+ S4_VFMT_XYZW_MASK | \
+ S4_VFMT_FOG_PARAM)
+
+
+#define S5_WRITEDISABLE_ALPHA (1<<31)
+#define S5_WRITEDISABLE_RED (1<<30)
+#define S5_WRITEDISABLE_GREEN (1<<29)
+#define S5_WRITEDISABLE_BLUE (1<<28)
+#define S5_WRITEDISABLE_MASK (0xf<<28)
+#define S5_FORCE_DEFAULT_POINT_SIZE (1<<27)
+#define S5_LAST_PIXEL_ENABLE (1<<26)
+#define S5_GLOBAL_DEPTH_OFFSET_ENABLE (1<<25)
+#define S5_FOG_ENABLE (1<<24)
+#define S5_STENCIL_REF_SHIFT 16
+#define S5_STENCIL_REF_MASK (0xff<<16)
+#define S5_STENCIL_TEST_FUNC_SHIFT 13
+#define S5_STENCIL_TEST_FUNC_MASK (0x7<<13)
+#define S5_STENCIL_FAIL_SHIFT 10
+#define S5_STENCIL_FAIL_MASK (0x7<<10)
+#define S5_STENCIL_PASS_Z_FAIL_SHIFT 7
+#define S5_STENCIL_PASS_Z_FAIL_MASK (0x7<<7)
+#define S5_STENCIL_PASS_Z_PASS_SHIFT 4
+#define S5_STENCIL_PASS_Z_PASS_MASK (0x7<<4)
+#define S5_STENCIL_WRITE_ENABLE (1<<3)
+#define S5_STENCIL_TEST_ENABLE (1<<2)
+#define S5_COLOR_DITHER_ENABLE (1<<1)
+#define S5_LOGICOP_ENABLE (1<<0)
+
+
+#define S6_ALPHA_TEST_ENABLE (1<<31)
+#define S6_ALPHA_TEST_FUNC_SHIFT 28
+#define S6_ALPHA_TEST_FUNC_MASK (0x7<<28)
+#define S6_ALPHA_REF_SHIFT 20
+#define S6_ALPHA_REF_MASK (0xff<<20)
+#define S6_DEPTH_TEST_ENABLE (1<<19)
+#define S6_DEPTH_TEST_FUNC_SHIFT 16
+#define S6_DEPTH_TEST_FUNC_MASK (0x7<<16)
+#define S6_CBUF_BLEND_ENABLE (1<<15)
+#define S6_CBUF_BLEND_FUNC_SHIFT 12
+#define S6_CBUF_BLEND_FUNC_MASK (0x7<<12)
+#define S6_CBUF_SRC_BLEND_FACT_SHIFT 8
+#define S6_CBUF_SRC_BLEND_FACT_MASK (0xf<<8)
+#define S6_CBUF_DST_BLEND_FACT_SHIFT 4
+#define S6_CBUF_DST_BLEND_FACT_MASK (0xf<<4)
+#define S6_DEPTH_WRITE_ENABLE (1<<3)
+#define S6_COLOR_WRITE_ENABLE (1<<2)
+#define S6_TRISTRIP_PV_SHIFT 0
+#define S6_TRISTRIP_PV_MASK (0x3<<0)
+
+#define S7_DEPTH_OFFSET_CONST_MASK ~0
+
/* Primitive dispatch on 830-945 */
#define _3DPRIMITIVE (CMD_3D | (0x1f << 24))
#define PRIM_INDIRECT (1<<23)
diff --git a/shared/intel_regions.c b/shared/intel_regions.c
index 35ab46a..51ce32a 100644
--- a/shared/intel_regions.c
+++ b/shared/intel_regions.c
@@ -39,13 +39,16 @@
* last moment.
*/
+#include <sys/ioctl.h>
+#include <errno.h>
+
#include "intel_context.h"
#include "intel_regions.h"
#include "intel_blit.h"
#include "intel_buffer_objects.h"
-#include "dri_bufmgr.h"
-#include "intel_bufmgr_ttm.h"
+#include "intel_bufmgr.h"
#include "intel_batchbuffer.h"
+#include "intel_chipset.h"
#define FILE_DEBUG_FLAG DEBUG_REGION
@@ -78,8 +81,9 @@ intel_region_unmap(struct intel_context *intel, struct intel_region *region)
static struct intel_region *
intel_region_alloc_internal(struct intel_context *intel,
- GLuint cpp, GLuint pitch, GLuint height,
- GLuint tiled, dri_bo *buffer)
+ GLuint cpp,
+ GLuint width, GLuint height, GLuint pitch,
+ dri_bo *buffer)
{
struct intel_region *region;
@@ -90,46 +94,66 @@ intel_region_alloc_internal(struct intel_context *intel,
region = calloc(sizeof(*region), 1);
region->cpp = cpp;
+ region->width = width;
+ region->height = height;
region->pitch = pitch;
- region->height = height; /* needed? */
region->refcount = 1;
- region->tiled = tiled;
region->buffer = buffer;
+ /* Default to no tiling */
+ region->tiling = I915_TILING_NONE;
+ region->bit_6_swizzle = I915_BIT_6_SWIZZLE_NONE;
+
return region;
}
struct intel_region *
intel_region_alloc(struct intel_context *intel,
- GLuint cpp, GLuint pitch, GLuint height)
+ GLuint cpp, GLuint width, GLuint height, GLuint pitch)
{
dri_bo *buffer;
buffer = dri_bo_alloc(intel->bufmgr, "region",
- pitch * cpp * height, 64,
- DRM_BO_FLAG_MEM_LOCAL |
- DRM_BO_FLAG_CACHED |
- DRM_BO_FLAG_CACHED_MAPPED);
+ pitch * cpp * height, 64);
- return intel_region_alloc_internal(intel, cpp, pitch, height, 0, buffer);
+ return intel_region_alloc_internal(intel, cpp, width, height, pitch, buffer);
}
struct intel_region *
intel_region_alloc_for_handle(struct intel_context *intel,
- GLuint cpp, GLuint pitch, GLuint height,
- GLuint tiled, GLuint handle)
+ GLuint cpp,
+ GLuint width, GLuint height, GLuint pitch,
+ GLuint handle, const char *name)
{
+ struct intel_region *region;
dri_bo *buffer;
+ int ret;
+
+ buffer = intel_bo_gem_create_from_name(intel->bufmgr, name, handle);
- buffer = intel_ttm_bo_create_from_handle(intel->bufmgr, "region", handle);
+ region = intel_region_alloc_internal(intel, cpp,
+ width, height, pitch, buffer);
+ if (region == NULL)
+ return region;
- return intel_region_alloc_internal(intel,
- cpp, pitch, height, tiled, buffer);
+ ret = dri_bo_get_tiling(region->buffer, &region->tiling,
+ &region->bit_6_swizzle);
+ if (ret != 0) {
+ fprintf(stderr, "Couldn't get tiling of buffer %d (%s): %s\n",
+ handle, name, strerror(-ret));
+ intel_region_release(&region);
+ return NULL;
+ }
+
+ return region;
}
void
intel_region_reference(struct intel_region **dst, struct intel_region *src)
{
+ if (src)
+ DBG("%s %d\n", __FUNCTION__, src->refcount);
+
assert(*dst == NULL);
if (src) {
src->refcount++;
@@ -138,26 +162,34 @@ intel_region_reference(struct intel_region **dst, struct intel_region *src)
}
void
-intel_region_release(struct intel_region **region)
+intel_region_release(struct intel_region **region_handle)
{
- if (!*region)
+ struct intel_region *region = *region_handle;
+
+ if (region == NULL)
return;
- DBG("%s %d\n", __FUNCTION__, (*region)->refcount - 1);
+ DBG("%s %d\n", __FUNCTION__, region->refcount - 1);
+
+ ASSERT(region->refcount > 0);
+ region->refcount--;
+
+ if (region->refcount == 0) {
+ assert(region->map_refcount == 0);
- ASSERT((*region)->refcount > 0);
- (*region)->refcount--;
+ if (region->pbo)
+ region->pbo->region = NULL;
+ region->pbo = NULL;
+ dri_bo_unreference(region->buffer);
- if ((*region)->refcount == 0) {
- assert((*region)->map_refcount == 0);
+ if (region->classic_map != NULL) {
+ drmUnmap(region->classic_map,
+ region->pitch * region->cpp * region->height);
+ }
- if ((*region)->pbo)
- (*region)->pbo->region = NULL;
- (*region)->pbo = NULL;
- dri_bo_unreference((*region)->buffer);
- free(*region);
+ free(region);
}
- *region = NULL;
+ *region_handle = NULL;
}
/*
@@ -272,8 +304,8 @@ intel_region_copy(struct intel_context *intel,
intelEmitCopyBlit(intel,
dst->cpp,
- src->pitch, src->buffer, src_offset, src->tiled,
- dst->pitch, dst->buffer, dst_offset, dst->tiled,
+ src->pitch, src->buffer, src_offset, src->tiling,
+ dst->pitch, dst->buffer, dst_offset, dst->tiling,
srcx, srcy, dstx, dsty, width, height,
GL_COPY);
}
@@ -303,7 +335,7 @@ intel_region_fill(struct intel_context *intel,
intelEmitFillBlit(intel,
dst->cpp,
- dst->pitch, dst->buffer, dst_offset, dst->tiled,
+ dst->pitch, dst->buffer, dst_offset, dst->tiling,
dstx, dsty, width, height, color);
}
@@ -355,10 +387,7 @@ intel_region_release_pbo(struct intel_context *intel,
region->buffer = dri_bo_alloc(intel->bufmgr, "region",
region->pitch * region->cpp * region->height,
- 64,
- DRM_BO_FLAG_MEM_LOCAL |
- DRM_BO_FLAG_CACHED |
- DRM_BO_FLAG_CACHED_MAPPED);
+ 64);
}
/* Break the COW tie to the pbo. Both the pbo and the region end up
@@ -382,23 +411,19 @@ intel_region_cow(struct intel_context *intel, struct intel_region *region)
/* Now blit from the texture buffer to the new buffer:
*/
- intel_batchbuffer_flush(intel->batch);
-
was_locked = intel->locked;
- if (intel->locked)
+ if (!was_locked)
LOCK_HARDWARE(intel);
intelEmitCopyBlit(intel,
region->cpp,
- region->pitch, region->buffer, 0, region->tiled,
- region->pitch, pbo->buffer, 0, region->tiled,
+ region->pitch, region->buffer, 0, region->tiling,
+ region->pitch, pbo->buffer, 0, region->tiling,
0, 0, 0, 0,
region->pitch, region->height,
GL_COPY);
- intel_batchbuffer_flush(intel->batch);
-
- if (was_locked)
+ if (!was_locked)
UNLOCK_HARDWARE(intel);
}
@@ -423,6 +448,7 @@ intel_recreate_static(struct intel_context *intel,
intelRegion *region_desc)
{
intelScreenPrivate *intelScreen = intel->intelScreen;
+ int ret;
if (region == NULL) {
region = calloc(sizeof(*region), 1);
@@ -435,21 +461,62 @@ intel_recreate_static(struct intel_context *intel,
region->cpp = intel->ctx.Visual.rgbBits / 8;
region->pitch = intelScreen->pitch;
region->height = intelScreen->height; /* needed? */
- region->tiled = region_desc->tiled;
+
+ if (region->buffer != NULL) {
+ dri_bo_unreference(region->buffer);
+ region->buffer = NULL;
+ }
if (intel->ttm) {
assert(region_desc->bo_handle != -1);
- region->buffer = intel_ttm_bo_create_from_handle(intel->bufmgr,
- name,
- region_desc->bo_handle);
+ region->buffer = intel_bo_gem_create_from_name(intel->bufmgr,
+ name,
+ region_desc->bo_handle);
+
+ ret = dri_bo_get_tiling(region->buffer, &region->tiling,
+ &region->bit_6_swizzle);
+ if (ret != 0) {
+ fprintf(stderr, "Couldn't get tiling of buffer %d (%s): %s\n",
+ region_desc->bo_handle, name, strerror(-ret));
+ intel_region_release(&region);
+ return NULL;
+ }
} else {
- region->buffer = dri_bo_alloc_static(intel->bufmgr,
- name,
- region_desc->offset,
- intelScreen->pitch *
- intelScreen->height,
- region_desc->map,
- DRM_BO_FLAG_MEM_TT);
+ if (region->classic_map != NULL) {
+ drmUnmap(region->classic_map,
+ region->pitch * region->cpp * region->height);
+ region->classic_map = NULL;
+ }
+ ret = drmMap(intel->driFd, region_desc->handle,
+ region->pitch * region->cpp * region->height,
+ &region->classic_map);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to drmMap %s buffer\n", name);
+ free(region);
+ return NULL;
+ }
+
+ region->buffer = intel_bo_fake_alloc_static(intel->bufmgr,
+ name,
+ region_desc->offset,
+ region->pitch * region->cpp *
+ region->height,
+ region->classic_map);
+
+ /* The sarea just gives us a boolean for whether it's tiled or not,
+ * instead of which tiling mode it is. Guess.
+ */
+ if (region_desc->tiled) {
+ if (IS_965(intel->intelScreen->deviceID) &&
+ region_desc == &intelScreen->depth)
+ region->tiling = I915_TILING_Y;
+ else
+ region->tiling = I915_TILING_X;
+ } else {
+ region->tiling = I915_TILING_NONE;
+ }
+
+ region->bit_6_swizzle = I915_BIT_6_SWIZZLE_NONE;
}
assert(region->buffer != NULL);
@@ -483,15 +550,6 @@ intel_recreate_static_regions(struct intel_context *intel)
intel->back_region,
&intelScreen->back);
-#ifdef I915
- if (intelScreen->third.handle) {
- intel->third_region =
- intel_recreate_static(intel, "third",
- intel->third_region,
- &intelScreen->third);
- }
-#endif /* I915 */
-
/* Still assumes front.cpp == depth.cpp. We can kill this when we move to
* private buffers.
*/
diff --git a/shared/intel_regions.h b/shared/intel_regions.h
index 229f79a..4b120ba 100644
--- a/shared/intel_regions.h
+++ b/shared/intel_regions.h
@@ -28,8 +28,16 @@
#ifndef INTEL_REGIONS_H
#define INTEL_REGIONS_H
-#include "mtypes.h"
-#include "dri_bufmgr.h"
+/** @file intel_regions.h
+ *
+ * Structure definitions and prototypes for intel_region handling, which is
+ * the basic structure for rectangular collections of pixels stored in a dri_bo.
+ */
+
+#include <xf86drm.h>
+
+#include "main/mtypes.h"
+#include "intel_bufmgr.h"
struct intel_context;
struct intel_buffer_object;
@@ -47,14 +55,16 @@ struct intel_region
dri_bo *buffer; /**< buffer manager's buffer */
GLuint refcount; /**< Reference count for region */
GLuint cpp; /**< bytes per pixel */
- GLuint pitch; /**< in pixels */
+ GLuint width; /**< in pixels */
GLuint height; /**< in pixels */
+ GLuint pitch; /**< in pixels */
GLubyte *map; /**< only non-NULL when region is actually mapped */
GLuint map_refcount; /**< Reference count for mapping */
GLuint draw_offset; /**< Offset of drawing address within the region */
- GLboolean tiled; /**< True if the region is X or Y-tiled. Used on 965. */
-
+ uint32_t tiling; /**< Which tiling mode the region is in */
+ uint32_t bit_6_swizzle; /**< GEM flag for address swizzling requirement */
+ drmAddress classic_map; /**< drmMap of the region when not in GEM mode */
struct intel_buffer_object *pbo; /* zero-copy uploads */
};
@@ -63,13 +73,14 @@ struct intel_region
* copied by calling intel_reference_region().
*/
struct intel_region *intel_region_alloc(struct intel_context *intel,
- GLuint cpp,
- GLuint pitch, GLuint height);
+ GLuint cpp, GLuint width,
+ GLuint height, GLuint pitch);
struct intel_region *
intel_region_alloc_for_handle(struct intel_context *intel,
- GLuint cpp, GLuint pitch, GLuint height,
- GLuint tiled, unsigned int handle);
+ GLuint cpp,
+ GLuint width, GLuint height, GLuint pitch,
+ unsigned int handle, const char *name);
void intel_region_reference(struct intel_region **dst,
struct intel_region *src);
diff --git a/shared/intel_screen.c b/shared/intel_screen.c
index 5dded4b..7042c25 100644
--- a/shared/intel_screen.c
+++ b/shared/intel_screen.c
@@ -25,12 +25,12 @@
*
**************************************************************************/
-#include "glheader.h"
-#include "context.h"
-#include "framebuffer.h"
-#include "matrix.h"
-#include "renderbuffer.h"
-#include "simple_list.h"
+#include "main/glheader.h"
+#include "main/context.h"
+#include "main/framebuffer.h"
+#include "main/matrix.h"
+#include "main/renderbuffer.h"
+#include "main/simple_list.h"
#include "utils.h"
#include "vblank.h"
#include "xmlpool.h"
@@ -41,7 +41,6 @@
#include "intel_buffers.h"
#include "intel_tex.h"
#include "intel_span.h"
-#include "intel_ioctl.h"
#include "intel_fbo.h"
#include "intel_chipset.h"
@@ -49,7 +48,7 @@
#include "i830_dri.h"
#include "intel_regions.h"
#include "intel_batchbuffer.h"
-#include "intel_bufmgr_ttm.h"
+#include "intel_bufmgr.h"
PUBLIC const char __driConfigOptions[] =
DRI_CONF_BEGIN
@@ -59,7 +58,7 @@ PUBLIC const char __driConfigOptions[] =
/* Options correspond to DRI_CONF_BO_REUSE_DISABLED,
* DRI_CONF_BO_REUSE_ALL
*/
- DRI_CONF_OPT_BEGIN_V(bo_reuse, enum, 0, "0:1")
+ DRI_CONF_OPT_BEGIN_V(bo_reuse, enum, 1, "0:1")
DRI_CONF_DESC_BEGIN(en, "Buffer object reuse")
DRI_CONF_ENUM(0, "Disable buffer object reuse")
DRI_CONF_ENUM(1, "Enable reuse of all sizes of buffer objects")
@@ -90,51 +89,6 @@ intelMapScreenRegions(__DRIscreenPrivate * sPriv)
{
intelScreenPrivate *intelScreen = (intelScreenPrivate *) sPriv->private;
- if (intelScreen->front.handle) {
- if (drmMap(sPriv->fd,
- intelScreen->front.handle,
- intelScreen->front.size,
- (drmAddress *) & intelScreen->front.map) != 0) {
- _mesa_problem(NULL, "drmMap(frontbuffer) failed!");
- return GL_FALSE;
- }
- }
- else {
- _mesa_warning(NULL, "no front buffer handle in intelMapScreenRegions!");
- }
-
- if (0)
- _mesa_printf("Back 0x%08x ", intelScreen->back.handle);
- if (drmMap(sPriv->fd,
- intelScreen->back.handle,
- intelScreen->back.size,
- (drmAddress *) & intelScreen->back.map) != 0) {
- intelUnmapScreenRegions(intelScreen);
- return GL_FALSE;
- }
-
- if (intelScreen->third.handle) {
- if (0)
- _mesa_printf("Third 0x%08x ", intelScreen->third.handle);
- if (drmMap(sPriv->fd,
- intelScreen->third.handle,
- intelScreen->third.size,
- (drmAddress *) & intelScreen->third.map) != 0) {
- intelUnmapScreenRegions(intelScreen);
- return GL_FALSE;
- }
- }
-
- if (0)
- _mesa_printf("Depth 0x%08x ", intelScreen->depth.handle);
- if (drmMap(sPriv->fd,
- intelScreen->depth.handle,
- intelScreen->depth.size,
- (drmAddress *) & intelScreen->depth.map) != 0) {
- intelUnmapScreenRegions(intelScreen);
- return GL_FALSE;
- }
-
if (0)
_mesa_printf("TEX 0x%08x ", intelScreen->tex.handle);
if (intelScreen->tex.size != 0) {
@@ -147,50 +101,15 @@ intelMapScreenRegions(__DRIscreenPrivate * sPriv)
}
}
- if (0)
- printf("Mappings: front: %p back: %p third: %p depth: %p tex: %p\n",
- intelScreen->front.map,
- intelScreen->back.map, intelScreen->third.map,
- intelScreen->depth.map, intelScreen->tex.map);
return GL_TRUE;
}
void
intelUnmapScreenRegions(intelScreenPrivate * intelScreen)
{
-#define REALLY_UNMAP 1
- if (intelScreen->front.map) {
-#if REALLY_UNMAP
- if (drmUnmap(intelScreen->front.map, intelScreen->front.size) != 0)
- printf("drmUnmap front failed!\n");
-#endif
- intelScreen->front.map = NULL;
- }
- if (intelScreen->back.map) {
-#if REALLY_UNMAP
- if (drmUnmap(intelScreen->back.map, intelScreen->back.size) != 0)
- printf("drmUnmap back failed!\n");
-#endif
- intelScreen->back.map = NULL;
- }
- if (intelScreen->third.map) {
-#if REALLY_UNMAP
- if (drmUnmap(intelScreen->third.map, intelScreen->third.size) != 0)
- printf("drmUnmap third failed!\n");
-#endif
- intelScreen->third.map = NULL;
- }
- if (intelScreen->depth.map) {
-#if REALLY_UNMAP
- drmUnmap(intelScreen->depth.map, intelScreen->depth.size);
- intelScreen->depth.map = NULL;
-#endif
- }
if (intelScreen->tex.map) {
-#if REALLY_UNMAP
drmUnmap(intelScreen->tex.map, intelScreen->tex.size);
intelScreen->tex.map = NULL;
-#endif
}
}
@@ -215,22 +134,22 @@ intelPrintDRIInfo(intelScreenPrivate * intelScreen,
static void
-intelPrintSAREA(const struct drm_i915_sarea * sarea)
+intelPrintSAREA(const drm_i915_sarea_t * sarea)
{
fprintf(stderr, "SAREA: sarea width %d height %d\n", sarea->width,
sarea->height);
fprintf(stderr, "SAREA: pitch: %d\n", sarea->pitch);
fprintf(stderr,
- "SAREA: front offset: 0x%08x size: 0x%x handle: 0x%x\n",
+ "SAREA: front offset: 0x%08x size: 0x%x handle: 0x%x tiled: %d\n",
sarea->front_offset, sarea->front_size,
- (unsigned) sarea->front_handle);
+ (unsigned) sarea->front_handle, sarea->front_tiled);
fprintf(stderr,
- "SAREA: back offset: 0x%08x size: 0x%x handle: 0x%x\n",
+ "SAREA: back offset: 0x%08x size: 0x%x handle: 0x%x tiled: %d\n",
sarea->back_offset, sarea->back_size,
- (unsigned) sarea->back_handle);
- fprintf(stderr, "SAREA: depth offset: 0x%08x size: 0x%x handle: 0x%x\n",
+ (unsigned) sarea->back_handle, sarea->back_tiled);
+ fprintf(stderr, "SAREA: depth offset: 0x%08x size: 0x%x handle: 0x%x tiled: %d\n",
sarea->depth_offset, sarea->depth_size,
- (unsigned) sarea->depth_handle);
+ (unsigned) sarea->depth_handle, sarea->depth_tiled);
fprintf(stderr, "SAREA: tex offset: 0x%08x size: 0x%x handle: 0x%x\n",
sarea->tex_offset, sarea->tex_size, (unsigned) sarea->tex_handle);
}
@@ -242,7 +161,7 @@ intelPrintSAREA(const struct drm_i915_sarea * sarea)
*/
void
intelUpdateScreenFromSAREA(intelScreenPrivate * intelScreen,
- struct drm_i915_sarea * sarea)
+ drm_i915_sarea_t * sarea)
{
intelScreen->width = sarea->width;
intelScreen->height = sarea->height;
@@ -258,13 +177,6 @@ intelUpdateScreenFromSAREA(intelScreenPrivate * intelScreen,
intelScreen->back.size = sarea->back_size;
intelScreen->back.tiled = sarea->back_tiled;
- if (intelScreen->driScrnPriv->ddx_version.minor >= 8) {
- intelScreen->third.offset = sarea->third_offset;
- intelScreen->third.handle = sarea->third_handle;
- intelScreen->third.size = sarea->third_size;
- intelScreen->third.tiled = sarea->third_tiled;
- }
-
intelScreen->depth.offset = sarea->depth_offset;
intelScreen->depth.handle = sarea->depth_handle;
intelScreen->depth.size = sarea->depth_size;
@@ -273,12 +185,10 @@ intelUpdateScreenFromSAREA(intelScreenPrivate * intelScreen,
if (intelScreen->driScrnPriv->ddx_version.minor >= 9) {
intelScreen->front.bo_handle = sarea->front_bo_handle;
intelScreen->back.bo_handle = sarea->back_bo_handle;
- intelScreen->third.bo_handle = sarea->third_bo_handle;
intelScreen->depth.bo_handle = sarea->depth_bo_handle;
} else {
intelScreen->front.bo_handle = -1;
intelScreen->back.bo_handle = -1;
- intelScreen->third.bo_handle = -1;
intelScreen->depth.bo_handle = -1;
}
@@ -291,106 +201,6 @@ intelUpdateScreenFromSAREA(intelScreenPrivate * intelScreen,
intelPrintSAREA(sarea);
}
-
-/**
- * DRI2 entrypoint
- */
-static void
-intelHandleDrawableConfig(__DRIdrawablePrivate *dPriv,
- __DRIcontextPrivate *pcp,
- __DRIDrawableConfigEvent *event)
-{
- struct intel_framebuffer *intel_fb = dPriv->driverPrivate;
- struct intel_region *region = NULL;
- struct intel_renderbuffer *rb, *depth_rb, *stencil_rb;
- struct intel_context *intel = pcp->driverPrivate;
- int cpp, pitch;
-
- cpp = intel->ctx.Visual.rgbBits / 8;
- pitch = ((cpp * dPriv->w + 63) & ~63) / cpp;
-
- rb = intel_fb->color_rb[1];
- if (rb) {
- region = intel_region_alloc(intel, cpp, pitch, dPriv->h);
- intel_renderbuffer_set_region(rb, region);
- }
-
- rb = intel_fb->color_rb[2];
- if (rb) {
- region = intel_region_alloc(intel, cpp, pitch, dPriv->h);
- intel_renderbuffer_set_region(rb, region);
- }
-
- depth_rb = intel_get_renderbuffer(&intel_fb->Base, BUFFER_DEPTH);
- stencil_rb = intel_get_renderbuffer(&intel_fb->Base, BUFFER_STENCIL);
- if (depth_rb || stencil_rb)
- region = intel_region_alloc(intel, cpp, pitch, dPriv->h);
- if (depth_rb)
- intel_renderbuffer_set_region(depth_rb, region);
- if (stencil_rb)
- intel_renderbuffer_set_region(stencil_rb, region);
-
- /* FIXME: Tell the X server about the regions we just allocated and
- * attached. */
-}
-
-#define BUFFER_FLAG_TILED 0x0100
-
-/**
- * DRI2 entrypoint
- */
-static void
-intelHandleBufferAttach(__DRIdrawablePrivate *dPriv,
- __DRIcontextPrivate *pcp,
- __DRIBufferAttachEvent *ba)
-{
- struct intel_framebuffer *intel_fb = dPriv->driverPrivate;
- struct intel_renderbuffer *rb;
- struct intel_region *region;
- struct intel_context *intel = pcp->driverPrivate;
- GLuint tiled;
-
- switch (ba->buffer.attachment) {
- case DRI_DRAWABLE_BUFFER_FRONT_LEFT:
- rb = intel_fb->color_rb[0];
- break;
-
- case DRI_DRAWABLE_BUFFER_BACK_LEFT:
- rb = intel_fb->color_rb[0];
- break;
-
- case DRI_DRAWABLE_BUFFER_DEPTH:
- rb = intel_get_renderbuffer(&intel_fb->Base, BUFFER_DEPTH);
- break;
-
- case DRI_DRAWABLE_BUFFER_STENCIL:
- rb = intel_get_renderbuffer(&intel_fb->Base, BUFFER_STENCIL);
- break;
-
- case DRI_DRAWABLE_BUFFER_ACCUM:
- default:
- fprintf(stderr, "unhandled buffer attach event, attacment type %d\n",
- ba->buffer.attachment);
- return;
- }
-
-#if 0
- /* FIXME: Add this so we can filter out when the X server sends us
- * attachment events for the buffers we just allocated. Need to
- * get the BO handle for a render buffer. */
- if (intel_renderbuffer_get_region_handle(rb) == ba->buffer.handle)
- return;
-#endif
-
- tiled = (ba->buffer.flags & BUFFER_FLAG_TILED) > 0;
- region = intel_region_alloc_for_handle(intel, ba->buffer.cpp,
- ba->buffer.pitch / ba->buffer.cpp,
- dPriv->h, tiled,
- ba->buffer.handle);
-
- intel_renderbuffer_set_region(rb, region);
-}
-
static const __DRItexOffsetExtension intelTexOffsetExtension = {
{ __DRI_TEX_OFFSET },
intelSetTexOffset,
@@ -434,7 +244,7 @@ static GLboolean intelInitDriver(__DRIscreenPrivate *sPriv)
{
intelScreenPrivate *intelScreen;
I830DRIPtr gDRIPriv = (I830DRIPtr) sPriv->pDevPriv;
- struct drm_i915_sarea *sarea;
+ drm_i915_sarea_t *sarea;
if (sPriv->devPrivSize != sizeof(I830DRIRec)) {
fprintf(stderr,
@@ -454,9 +264,9 @@ static GLboolean intelInitDriver(__DRIscreenPrivate *sPriv)
intelScreen->driScrnPriv = sPriv;
sPriv->private = (void *) intelScreen;
- intelScreen->sarea_priv_offset = gDRIPriv->sarea_priv_offset;
- sarea = (struct drm_i915_sarea *)
- (((GLubyte *) sPriv->pSAREA) + intelScreen->sarea_priv_offset);
+ sarea = (drm_i915_sarea_t *)
+ (((GLubyte *) sPriv->pSAREA) + gDRIPriv->sarea_priv_offset);
+ intelScreen->sarea = sarea;
intelScreen->deviceID = gDRIPriv->deviceID;
@@ -469,8 +279,6 @@ static GLboolean intelInitDriver(__DRIscreenPrivate *sPriv)
return GL_FALSE;
}
- intelScreen->sarea_priv_offset = gDRIPriv->sarea_priv_offset;
-
if (0)
intelPrintDRIInfo(intelScreen, sPriv, gDRIPriv);
@@ -481,11 +289,6 @@ static GLboolean intelInitDriver(__DRIscreenPrivate *sPriv)
&intelScreen->irq_active))
return GL_FALSE;
- /* Determine if batchbuffers are allowed */
- if (!intel_get_param(sPriv, I915_PARAM_ALLOW_BATCHBUFFER,
- &intelScreen->allow_batchbuffer))
- return GL_FALSE;
-
sPriv->extensions = intelScreenExtensions;
return GL_TRUE;
@@ -497,6 +300,7 @@ intelDestroyScreen(__DRIscreenPrivate * sPriv)
{
intelScreenPrivate *intelScreen = (intelScreenPrivate *) sPriv->private;
+ dri_bufmgr_destroy(intelScreen->bufmgr);
intelUnmapScreenRegions(intelScreen);
FREE(intelScreen);
@@ -530,23 +334,16 @@ intelCreateBuffer(__DRIscreenPrivate * driScrnPriv,
_mesa_initialize_framebuffer(&intel_fb->Base, mesaVis);
/* setup the hardware-based renderbuffers */
- {
- intel_fb->color_rb[0] = intel_create_renderbuffer(rgbFormat);
- _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_FRONT_LEFT,
- &intel_fb->color_rb[0]->Base);
- }
+ intel_fb->color_rb[0] = intel_create_renderbuffer(rgbFormat);
+ _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_FRONT_LEFT,
+ &intel_fb->color_rb[0]->Base);
if (mesaVis->doubleBufferMode) {
- intel_fb->color_rb[1] = intel_create_renderbuffer(rgbFormat);
+ intel_fb->color_rb[1] = intel_create_renderbuffer(rgbFormat);
+
_mesa_add_renderbuffer(&intel_fb->Base, BUFFER_BACK_LEFT,
&intel_fb->color_rb[1]->Base);
- if (screen->third.handle) {
- struct gl_renderbuffer *tmp_rb = NULL;
-
- intel_fb->color_rb[2] = intel_create_renderbuffer(rgbFormat);
- _mesa_reference_renderbuffer(&tmp_rb, &intel_fb->color_rb[2]->Base);
- }
}
if (mesaVis->depthBits == 24) {
@@ -569,7 +366,7 @@ intelCreateBuffer(__DRIscreenPrivate * driScrnPriv,
else if (mesaVis->depthBits == 16) {
/* just 16-bit depth buffer, no hw stencil */
struct intel_renderbuffer *depthRb
- = intel_create_renderbuffer(GL_DEPTH_COMPONENT16);
+ = intel_create_renderbuffer(GL_DEPTH_COMPONENT16);
_mesa_add_renderbuffer(&intel_fb->Base, BUFFER_DEPTH, &depthRb->Base);
}
@@ -649,6 +446,7 @@ intelCreateContext(const __GLcontextModes * mesaVis,
sharedContextPrivate);
}
} else {
+ intelScreen->no_vbo = GL_TRUE;
return i830CreateContext(mesaVis, driContextPriv, sharedContextPrivate);
}
#else
@@ -680,8 +478,8 @@ intelFillInModes(__DRIscreenPrivate *psp,
GLX_NONE, GLX_SWAP_UNDEFINED_OML, GLX_SWAP_COPY_OML
};
- u_int8_t depth_bits_array[3];
- u_int8_t stencil_bits_array[3];
+ uint8_t depth_bits_array[3];
+ uint8_t stencil_bits_array[3];
depth_bits_array[0] = 0;
depth_bits_array[1] = depth_bits;
@@ -732,6 +530,69 @@ intelFillInModes(__DRIscreenPrivate *psp,
return configs;
}
+static GLboolean
+intel_init_bufmgr(intelScreenPrivate *intelScreen)
+{
+ GLboolean gem_disable = getenv("INTEL_NO_GEM") != NULL;
+ int gem_kernel = 0;
+ GLboolean gem_supported;
+ struct drm_i915_getparam gp;
+ __DRIscreenPrivate *spriv = intelScreen->driScrnPriv;
+
+ intelScreen->no_hw = getenv("INTEL_NO_HW") != NULL;
+
+ gp.param = I915_PARAM_HAS_GEM;
+ gp.value = &gem_kernel;
+
+ (void) drmCommandWriteRead(spriv->fd, DRM_I915_GETPARAM, &gp, sizeof(gp));
+
+ /* If we've got a new enough DDX that's initializing GEM and giving us
+ * object handles for the shared buffers, use that.
+ */
+ intelScreen->ttm = GL_FALSE;
+ if (intelScreen->driScrnPriv->dri2.enabled)
+ gem_supported = GL_TRUE;
+ else if (intelScreen->driScrnPriv->ddx_version.minor >= 9 &&
+ gem_kernel &&
+ intelScreen->front.bo_handle != -1)
+ gem_supported = GL_TRUE;
+ else
+ gem_supported = GL_FALSE;
+
+ if (!gem_disable && gem_supported) {
+ intelScreen->bufmgr = intel_bufmgr_gem_init(spriv->fd, BATCH_SZ);
+ if (intelScreen->bufmgr != NULL)
+ intelScreen->ttm = GL_TRUE;
+ }
+ /* Otherwise, use the classic buffer manager. */
+ if (intelScreen->bufmgr == NULL) {
+ if (gem_disable) {
+ fprintf(stderr, "GEM disabled. Using classic.\n");
+ } else {
+ fprintf(stderr, "Failed to initialize GEM. "
+ "Falling back to classic.\n");
+ }
+
+ if (intelScreen->tex.size == 0) {
+ fprintf(stderr, "[%s:%u] Error initializing buffer manager.\n",
+ __func__, __LINE__);
+ return GL_FALSE;
+ }
+
+ intelScreen->bufmgr =
+ intel_bufmgr_fake_init(spriv->fd,
+ intelScreen->tex.offset,
+ intelScreen->tex.map,
+ intelScreen->tex.size,
+ (unsigned int * volatile)
+ &intelScreen->sarea->last_dispatch);
+ }
+
+ /* XXX bufmgr should be per-screen, not per-context */
+ intelScreen->ttm = intelScreen->ttm;
+
+ return GL_TRUE;
+}
/**
* This is the driver specific part of the createNewScreen entry point.
@@ -743,6 +604,7 @@ intelFillInModes(__DRIscreenPrivate *psp,
*/
static const __DRIconfig **intelInitScreen(__DRIscreenPrivate *psp)
{
+ intelScreenPrivate *intelScreen;
#ifdef I915
static const __DRIversion ddx_expected = { 1, 5, 0 };
#else
@@ -776,6 +638,10 @@ static const __DRIconfig **intelInitScreen(__DRIscreenPrivate *psp)
psp->extensions = intelScreenExtensions;
+ intelScreen = psp->private;
+ if (!intel_init_bufmgr(intelScreen))
+ return GL_FALSE;
+
return (const __DRIconfig **)
intelFillInModes(psp, dri_priv->cpp * 8,
(dri_priv->cpp == 2) ? 16 : 24,
@@ -835,26 +701,15 @@ __DRIconfig **intelInitScreen2(__DRIscreenPrivate *psp)
intelScreen->drmMinor = psp->drm_version.minor;
- /* Determine chipset ID? */
+ /* Determine chipset ID */
if (!intel_get_param(psp, I915_PARAM_CHIPSET_ID,
&intelScreen->deviceID))
return GL_FALSE;
- /* Determine if IRQs are active? */
- if (!intel_get_param(psp, I915_PARAM_IRQ_ACTIVE,
- &intelScreen->irq_active))
- return GL_FALSE;
-
- /* Determine if batchbuffers are allowed */
- if (!intel_get_param(psp, I915_PARAM_ALLOW_BATCHBUFFER,
- &intelScreen->allow_batchbuffer))
- return GL_FALSE;
-
- if (!intelScreen->allow_batchbuffer) {
- fprintf(stderr, "batch buffer not allowed\n");
- return GL_FALSE;
- }
+ if (!intel_init_bufmgr(intelScreen))
+ return GL_FALSE;
+ intelScreen->irq_active = 1;
psp->extensions = intelScreenExtensions;
return driConcatConfigs(intelFillInModes(psp, 16, 16, 0, 1),
@@ -877,6 +732,4 @@ const struct __DriverAPIRec driDriverAPI = {
.CopySubBuffer = intelCopySubBuffer,
.InitScreen2 = intelInitScreen2,
- .HandleDrawableConfig = intelHandleDrawableConfig,
- .HandleBufferAttach = intelHandleBufferAttach,
};
diff --git a/shared/intel_screen.h b/shared/intel_screen.h
index e62b2d7..fcd0d9c 100644
--- a/shared/intel_screen.h
+++ b/shared/intel_screen.h
@@ -30,6 +30,7 @@
#include <sys/time.h>
#include "dri_util.h"
+#include "intel_bufmgr.h"
#include "i915_drm.h"
#include "xmlconfig.h"
@@ -55,7 +56,6 @@ typedef struct
{
intelRegion front;
intelRegion back;
- intelRegion third;
intelRegion depth;
intelRegion tex;
@@ -67,12 +67,18 @@ typedef struct
int logTextureGranularity;
__DRIscreenPrivate *driScrnPriv;
- unsigned int sarea_priv_offset;
+
+ volatile drm_i915_sarea_t *sarea;
int drmMinor;
int irq_active;
- int allow_batchbuffer;
+
+ GLboolean no_hw;
+
+ GLboolean no_vbo;
+ int ttm;
+ dri_bufmgr *bufmgr;
/**
* Configuration cache with default values for all contexts
@@ -88,7 +94,7 @@ extern void intelUnmapScreenRegions(intelScreenPrivate * intelScreen);
extern void
intelUpdateScreenFromSAREA(intelScreenPrivate * intelScreen,
- struct drm_i915_sarea * sarea);
+ drm_i915_sarea_t * sarea);
extern void intelDestroyContext(__DRIcontextPrivate * driContextPriv);
diff --git a/shared/intel_span.c b/shared/intel_span.c
index 742b1b8..d931504 100644
--- a/shared/intel_span.c
+++ b/shared/intel_span.c
@@ -25,20 +25,226 @@
*
**************************************************************************/
-#include "glheader.h"
-#include "macros.h"
-#include "mtypes.h"
-#include "colormac.h"
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/mtypes.h"
+#include "main/colormac.h"
+#include "intel_buffers.h"
#include "intel_fbo.h"
#include "intel_screen.h"
#include "intel_span.h"
#include "intel_regions.h"
-#include "intel_ioctl.h"
#include "intel_tex.h"
#include "swrast/swrast.h"
+static void
+intel_set_span_functions(struct intel_context *intel,
+ struct gl_renderbuffer *rb);
+
+#define SPAN_CACHE_SIZE 4096
+
+static void
+get_span_cache(struct intel_renderbuffer *irb, uint32_t offset)
+{
+ if (irb->span_cache == NULL) {
+ irb->span_cache = _mesa_malloc(SPAN_CACHE_SIZE);
+ irb->span_cache_offset = -1;
+ }
+
+ if ((offset & ~(SPAN_CACHE_SIZE - 1)) != irb->span_cache_offset) {
+ irb->span_cache_offset = offset & ~(SPAN_CACHE_SIZE - 1);
+ dri_bo_get_subdata(irb->region->buffer, irb->span_cache_offset,
+ SPAN_CACHE_SIZE, irb->span_cache);
+ }
+}
+
+static void
+clear_span_cache(struct intel_renderbuffer *irb)
+{
+ irb->span_cache_offset = -1;
+}
+
+static uint32_t
+pread_32(struct intel_renderbuffer *irb, uint32_t offset)
+{
+ get_span_cache(irb, offset);
+
+ return *(uint32_t *)(irb->span_cache + (offset & (SPAN_CACHE_SIZE - 1)));
+}
+
+static uint32_t
+pread_xrgb8888(struct intel_renderbuffer *irb, uint32_t offset)
+{
+ get_span_cache(irb, offset);
+
+ return *(uint32_t *)(irb->span_cache + (offset & (SPAN_CACHE_SIZE - 1))) |
+ 0xff000000;
+}
+
+static uint16_t
+pread_16(struct intel_renderbuffer *irb, uint32_t offset)
+{
+ get_span_cache(irb, offset);
+
+ return *(uint16_t *)(irb->span_cache + (offset & (SPAN_CACHE_SIZE - 1)));
+}
+
+static uint8_t
+pread_8(struct intel_renderbuffer *irb, uint32_t offset)
+{
+ get_span_cache(irb, offset);
+
+ return *(uint8_t *)(irb->span_cache + (offset & (SPAN_CACHE_SIZE - 1)));
+}
+
+static void
+pwrite_32(struct intel_renderbuffer *irb, uint32_t offset, uint32_t val)
+{
+ clear_span_cache(irb);
+
+ dri_bo_subdata(irb->region->buffer, offset, 4, &val);
+}
+
+static void
+pwrite_xrgb8888(struct intel_renderbuffer *irb, uint32_t offset, uint32_t val)
+{
+ clear_span_cache(irb);
+
+ dri_bo_subdata(irb->region->buffer, offset, 3, &val);
+}
+
+static void
+pwrite_16(struct intel_renderbuffer *irb, uint32_t offset, uint16_t val)
+{
+ clear_span_cache(irb);
+
+ dri_bo_subdata(irb->region->buffer, offset, 2, &val);
+}
+
+static void
+pwrite_8(struct intel_renderbuffer *irb, uint32_t offset, uint8_t val)
+{
+ clear_span_cache(irb);
+
+ dri_bo_subdata(irb->region->buffer, offset, 1, &val);
+}
+
+static uint32_t no_tile_swizzle(struct intel_renderbuffer *irb,
+ int x, int y)
+{
+ return (y * irb->region->pitch + x) * irb->region->cpp;
+}
+
+/*
+ * Deal with tiled surfaces
+ */
+
+static uint32_t x_tile_swizzle(struct intel_renderbuffer *irb,
+ int x, int y)
+{
+ int tile_stride;
+ int xbyte;
+ int x_tile_off, y_tile_off;
+ int x_tile_number, y_tile_number;
+ int tile_off, tile_base;
+
+ tile_stride = (irb->pfPitch * irb->region->cpp) << 3;
+
+ xbyte = x * irb->region->cpp;
+
+ x_tile_off = xbyte & 0x1ff;
+ y_tile_off = y & 7;
+
+ x_tile_number = xbyte >> 9;
+ y_tile_number = y >> 3;
+
+ tile_off = (y_tile_off << 9) + x_tile_off;
+
+ switch (irb->region->bit_6_swizzle) {
+ case I915_BIT_6_SWIZZLE_NONE:
+ break;
+ case I915_BIT_6_SWIZZLE_9:
+ tile_off ^= ((tile_off >> 3) & 64);
+ break;
+ case I915_BIT_6_SWIZZLE_9_10:
+ tile_off ^= ((tile_off >> 3) & 64) ^ ((tile_off >> 4) & 64);
+ break;
+ case I915_BIT_6_SWIZZLE_9_11:
+ tile_off ^= ((tile_off >> 3) & 64) ^ ((tile_off >> 5) & 64);
+ break;
+ case I915_BIT_6_SWIZZLE_9_10_11:
+ tile_off ^= ((tile_off >> 3) & 64) ^ ((tile_off >> 4) & 64) ^
+ ((tile_off >> 5) & 64);
+ break;
+ default:
+ fprintf(stderr, "Unknown tile swizzling mode %d\n",
+ irb->region->bit_6_swizzle);
+ exit(1);
+ }
+
+ tile_base = (x_tile_number << 12) + y_tile_number * tile_stride;
+
+#if 0
+ printf("(%d,%d) -> %d + %d = %d (pitch = %d, tstride = %d)\n",
+ x, y, tile_off, tile_base,
+ tile_off + tile_base,
+ irb->pfPitch, tile_stride);
+#endif
+
+ return tile_base + tile_off;
+}
+
+static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb,
+ int x, int y)
+{
+ int tile_stride;
+ int xbyte;
+ int x_tile_off, y_tile_off;
+ int x_tile_number, y_tile_number;
+ int tile_off, tile_base;
+
+ tile_stride = (irb->pfPitch * irb->region->cpp) << 5;
+
+ xbyte = x * irb->region->cpp;
+
+ x_tile_off = xbyte & 0x7f;
+ y_tile_off = y & 0x1f;
+
+ x_tile_number = xbyte >> 7;
+ y_tile_number = y >> 5;
+
+ tile_off = ((x_tile_off & ~0xf) << 5) + (y_tile_off << 4) +
+ (x_tile_off & 0xf);
+
+ switch (irb->region->bit_6_swizzle) {
+ case I915_BIT_6_SWIZZLE_NONE:
+ break;
+ case I915_BIT_6_SWIZZLE_9:
+ tile_off ^= ((tile_off >> 3) & 64);
+ break;
+ case I915_BIT_6_SWIZZLE_9_10:
+ tile_off ^= ((tile_off >> 3) & 64) ^ ((tile_off >> 4) & 64);
+ break;
+ case I915_BIT_6_SWIZZLE_9_11:
+ tile_off ^= ((tile_off >> 3) & 64) ^ ((tile_off >> 5) & 64);
+ break;
+ case I915_BIT_6_SWIZZLE_9_10_11:
+ tile_off ^= ((tile_off >> 3) & 64) ^ ((tile_off >> 4) & 64) ^
+ ((tile_off >> 5) & 64);
+ break;
+ default:
+ fprintf(stderr, "Unknown tile swizzling mode %d\n",
+ irb->region->bit_6_swizzle);
+ exit(1);
+ }
+
+ tile_base = (x_tile_number << 12) + y_tile_number * tile_stride;
+
+ return tile_base + tile_off;
+}
+
/*
break intelWriteRGBASpan_ARGB8888
*/
@@ -51,11 +257,12 @@
struct intel_renderbuffer *irb = intel_renderbuffer(rb); \
const GLint yScale = irb->RenderToTexture ? 1 : -1; \
const GLint yBias = irb->RenderToTexture ? 0 : irb->Base.Height - 1; \
- GLubyte *buf = (GLubyte *) irb->pfMap \
- + (intel->drawY * irb->pfPitch + intel->drawX) * irb->region->cpp;\
+ unsigned int num_cliprects; \
+ struct drm_clip_rect *cliprects; \
+ int x_off, y_off; \
GLuint p; \
- assert(irb->pfMap);\
- (void) p;
+ (void) p; \
+ intel_get_cliprects(intel, &cliprects, &num_cliprects, &x_off, &y_off);
/* XXX FBO: this is identical to the macro in spantmp2.h except we get
* the cliprect info from the context, not the driDrawable.
@@ -63,22 +270,29 @@
*/
#define HW_CLIPLOOP() \
do { \
- int _nc = intel->numClipRects; \
+ int _nc = num_cliprects; \
while ( _nc-- ) { \
- int minx = intel->pClipRects[_nc].x1 - intel->drawX; \
- int miny = intel->pClipRects[_nc].y1 - intel->drawY; \
- int maxx = intel->pClipRects[_nc].x2 - intel->drawX; \
- int maxy = intel->pClipRects[_nc].y2 - intel->drawY;
-
-
-
+ int minx = cliprects[_nc].x1 - x_off; \
+ int miny = cliprects[_nc].y1 - y_off; \
+ int maxx = cliprects[_nc].x2 - x_off; \
+ int maxy = cliprects[_nc].y2 - y_off;
+
+#if 0
+ }}
+#endif
#define Y_FLIP(_y) ((_y) * yScale + yBias)
+/* XXX with GEM, these need to tell the kernel */
#define HW_LOCK()
#define HW_UNLOCK()
+/* Convenience macros to avoid typing the swizzle argument over and over */
+#define NO_TILE(_X, _Y) no_tile_swizzle(irb, (_X) + x_off, (_Y) + y_off)
+#define X_TILE(_X, _Y) x_tile_swizzle(irb, (_X) + x_off, (_Y) + y_off)
+#define Y_TILE(_X, _Y) y_tile_swizzle(irb, (_X) + x_off, (_Y) + y_off)
+
/* 16 bit, RGB565 color spanline and pixel functions
*/
#define SPANTMP_PIXEL_FMT GL_RGB
@@ -86,7 +300,8 @@
#define TAG(x) intel##x##_RGB565
#define TAG2(x,y) intel##x##_RGB565##y
-#define GET_PTR(X,Y) (buf + ((Y) * irb->pfPitch + (X)) * 2)
+#define GET_VALUE(X, Y) pread_16(irb, NO_TILE(X, Y))
+#define PUT_VALUE(X, Y, V) pwrite_16(irb, NO_TILE(X, Y), V)
#include "spantmp2.h"
/* 32 bit, ARGB8888 color spanline and pixel functions
@@ -96,17 +311,93 @@
#define TAG(x) intel##x##_ARGB8888
#define TAG2(x,y) intel##x##_ARGB8888##y
-#define GET_PTR(X,Y) (buf + ((Y) * irb->pfPitch + (X)) * 4)
+#define GET_VALUE(X, Y) pread_32(irb, NO_TILE(X, Y))
+#define PUT_VALUE(X, Y, V) pwrite_32(irb, NO_TILE(X, Y), V)
+#include "spantmp2.h"
+
+/* 32 bit, xRGB8888 color spanline and pixel functions
+ */
+#define SPANTMP_PIXEL_FMT GL_BGRA
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
+
+#define TAG(x) intel##x##_xRGB8888
+#define TAG2(x,y) intel##x##_xRGB8888##y
+#define GET_VALUE(X, Y) pread_xrgb8888(irb, NO_TILE(X, Y))
+#define PUT_VALUE(X, Y, V) pwrite_xrgb8888(irb, NO_TILE(X, Y), V)
+#include "spantmp2.h"
+
+/* 16 bit RGB565 color tile spanline and pixel functions
+ */
+
+#define SPANTMP_PIXEL_FMT GL_RGB
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5
+
+#define TAG(x) intel_XTile_##x##_RGB565
+#define TAG2(x,y) intel_XTile_##x##_RGB565##y
+#define GET_VALUE(X, Y) pread_16(irb, X_TILE(X, Y))
+#define PUT_VALUE(X, Y, V) pwrite_16(irb, X_TILE(X, Y), V)
+#include "spantmp2.h"
+
+#define SPANTMP_PIXEL_FMT GL_RGB
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5
+
+#define TAG(x) intel_YTile_##x##_RGB565
+#define TAG2(x,y) intel_YTile_##x##_RGB565##y
+#define GET_VALUE(X, Y) pread_16(irb, Y_TILE(X, Y))
+#define PUT_VALUE(X, Y, V) pwrite_16(irb, Y_TILE(X, Y), V)
+#include "spantmp2.h"
+
+/* 32 bit ARGB888 color tile spanline and pixel functions
+ */
+
+#define SPANTMP_PIXEL_FMT GL_BGRA
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
+
+#define TAG(x) intel_XTile_##x##_ARGB8888
+#define TAG2(x,y) intel_XTile_##x##_ARGB8888##y
+#define GET_VALUE(X, Y) pread_32(irb, X_TILE(X, Y))
+#define PUT_VALUE(X, Y, V) pwrite_32(irb, X_TILE(X, Y), V)
+#include "spantmp2.h"
+
+#define SPANTMP_PIXEL_FMT GL_BGRA
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
+
+#define TAG(x) intel_YTile_##x##_ARGB8888
+#define TAG2(x,y) intel_YTile_##x##_ARGB8888##y
+#define GET_VALUE(X, Y) pread_32(irb, Y_TILE(X, Y))
+#define PUT_VALUE(X, Y, V) pwrite_32(irb, Y_TILE(X, Y), V)
+#include "spantmp2.h"
+
+/* 32 bit xRGB888 color tile spanline and pixel functions
+ */
+
+#define SPANTMP_PIXEL_FMT GL_BGRA
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
+
+#define TAG(x) intel_XTile_##x##_xRGB8888
+#define TAG2(x,y) intel_XTile_##x##_xRGB8888##y
+#define GET_VALUE(X, Y) pread_xrgb8888(irb, X_TILE(X, Y))
+#define PUT_VALUE(X, Y, V) pwrite_xrgb8888(irb, X_TILE(X, Y), V)
+#include "spantmp2.h"
+
+#define SPANTMP_PIXEL_FMT GL_BGRA
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
+
+#define TAG(x) intel_YTile_##x##_xRGB8888
+#define TAG2(x,y) intel_YTile_##x##_xRGB8888##y
+#define GET_VALUE(X, Y) pread_xrgb8888(irb, Y_TILE(X, Y))
+#define PUT_VALUE(X, Y, V) pwrite_xrgb8888(irb, Y_TILE(X, Y), V)
#include "spantmp2.h"
#define LOCAL_DEPTH_VARS \
struct intel_context *intel = intel_context(ctx); \
struct intel_renderbuffer *irb = intel_renderbuffer(rb); \
- const GLuint pitch = irb->pfPitch/***XXX region->pitch*/; /* in pixels */ \
const GLint yScale = irb->RenderToTexture ? 1 : -1; \
- const GLint yBias = irb->RenderToTexture ? 0 : irb->Base.Height - 1; \
- char *buf = (char *) irb->pfMap/*XXX use region->map*/ + \
- (intel->drawY * pitch + intel->drawX) * irb->region->cpp;
+ const GLint yBias = irb->RenderToTexture ? 0 : irb->Base.Height - 1; \
+ unsigned int num_cliprects; \
+ struct drm_clip_rect *cliprects; \
+ int x_off, y_off; \
+ intel_get_cliprects(intel, &cliprects, &num_cliprects, &x_off, &y_off);
#define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS
@@ -115,15 +406,28 @@
** 16-bit depthbuffer functions.
**/
#define VALUE_TYPE GLushort
+#define WRITE_DEPTH(_x, _y, d) pwrite_16(irb, NO_TILE(_x, _y), d)
+#define READ_DEPTH(d, _x, _y) d = pread_16(irb, NO_TILE(_x, _y))
+#define TAG(x) intel##x##_z16
+#include "depthtmp.h"
-#define WRITE_DEPTH( _x, _y, d ) \
- ((GLushort *)buf)[(_x) + (_y) * pitch] = d;
-
-#define READ_DEPTH( d, _x, _y ) \
- d = ((GLushort *)buf)[(_x) + (_y) * pitch];
+/**
+ ** 16-bit x tile depthbuffer functions.
+ **/
+#define VALUE_TYPE GLushort
+#define WRITE_DEPTH(_x, _y, d) pwrite_16(irb, X_TILE(_x, _y), d)
+#define READ_DEPTH(d, _x, _y) d = pread_16(irb, X_TILE(_x, _y))
+#define TAG(x) intel_XTile_##x##_z16
+#include "depthtmp.h"
-#define TAG(x) intel##x##_z16
+/**
+ ** 16-bit y tile depthbuffer functions.
+ **/
+#define VALUE_TYPE GLushort
+#define WRITE_DEPTH(_x, _y, d) pwrite_16(irb, Y_TILE(_x, _y), d)
+#define READ_DEPTH(d, _x, _y) d = pread_16(irb, Y_TILE(_x, _y))
+#define TAG(x) intel_YTile_##x##_z16
#include "depthtmp.h"
@@ -136,14 +440,12 @@
#define VALUE_TYPE GLuint
/* Change ZZZS -> SZZZ */
-#define WRITE_DEPTH( _x, _y, d ) { \
- GLuint tmp = ((d) >> 8) | ((d) << 24); \
- ((GLuint *)buf)[(_x) + (_y) * pitch] = tmp; \
-}
+#define WRITE_DEPTH(_x, _y, d) \
+ pwrite_32(irb, NO_TILE(_x, _y), ((d) >> 8) | ((d) << 24))
/* Change SZZZ -> ZZZS */
#define READ_DEPTH( d, _x, _y ) { \
- GLuint tmp = ((GLuint *)buf)[(_x) + (_y) * pitch]; \
+ GLuint tmp = pread_32(irb, NO_TILE(_x, _y)); \
d = (tmp << 8) | (tmp >> 24); \
}
@@ -152,22 +454,100 @@
/**
- ** 8-bit stencil function (XXX FBO: This is obsolete)
+ ** 24/8-bit x-tile interleaved depth/stencil functions
+ ** Note: we're actually reading back combined depth+stencil values.
+ ** The wrappers in main/depthstencil.c are used to extract the depth
+ ** and stencil values.
+ **/
+#define VALUE_TYPE GLuint
+
+/* Change ZZZS -> SZZZ */
+#define WRITE_DEPTH(_x, _y, d) \
+ pwrite_32(irb, X_TILE(_x, _y), ((d) >> 8) | ((d) << 24))
+
+/* Change SZZZ -> ZZZS */
+#define READ_DEPTH( d, _x, _y ) { \
+ GLuint tmp = pread_32(irb, X_TILE(_x, _y)); \
+ d = (tmp << 8) | (tmp >> 24); \
+}
+
+#define TAG(x) intel_XTile_##x##_z24_s8
+#include "depthtmp.h"
+
+/**
+ ** 24/8-bit y-tile interleaved depth/stencil functions
+ ** Note: we're actually reading back combined depth+stencil values.
+ ** The wrappers in main/depthstencil.c are used to extract the depth
+ ** and stencil values.
**/
-#define WRITE_STENCIL( _x, _y, d ) { \
- GLuint tmp = ((GLuint *)buf)[(_x) + (_y) * pitch]; \
- tmp &= 0xffffff; \
- tmp |= ((d) << 24); \
- ((GLuint *) buf)[(_x) + (_y) * pitch] = tmp; \
+#define VALUE_TYPE GLuint
+
+/* Change ZZZS -> SZZZ */
+#define WRITE_DEPTH(_x, _y, d) \
+ pwrite_32(irb, Y_TILE(_x, _y), ((d) >> 8) | ((d) << 24))
+
+/* Change SZZZ -> ZZZS */
+#define READ_DEPTH( d, _x, _y ) { \
+ GLuint tmp = pread_32(irb, Y_TILE(_x, _y)); \
+ d = (tmp << 8) | (tmp >> 24); \
}
-#define READ_STENCIL( d, _x, _y ) \
- d = ((GLuint *)buf)[(_x) + (_y) * pitch] >> 24;
+#define TAG(x) intel_YTile_##x##_z24_s8
+#include "depthtmp.h"
+
+/**
+ ** 8-bit stencil function (XXX FBO: This is obsolete)
+ **/
+#define WRITE_STENCIL(_x, _y, d) pwrite_8(irb, NO_TILE(_x, _y) + 3, d)
+#define READ_STENCIL(d, _x, _y) d = pread_8(irb, NO_TILE(_x, _y) + 3);
#define TAG(x) intel##x##_z24_s8
#include "stenciltmp.h"
+/**
+ ** 8-bit x-tile stencil function (XXX FBO: This is obsolete)
+ **/
+#define WRITE_STENCIL(_x, _y, d) pwrite_8(irb, X_TILE(_x, _y) + 3, d)
+#define READ_STENCIL(d, _x, _y) d = pread_8(irb, X_TILE(_x, _y) + 3);
+#define TAG(x) intel_XTile_##x##_z24_s8
+#include "stenciltmp.h"
+
+/**
+ ** 8-bit y-tile stencil function (XXX FBO: This is obsolete)
+ **/
+#define WRITE_STENCIL(_x, _y, d) pwrite_8(irb, Y_TILE(_x, _y) + 3, d)
+#define READ_STENCIL(d, _x, _y) d = pread_8(irb, Y_TILE(_x, _y) + 3)
+#define TAG(x) intel_YTile_##x##_z24_s8
+#include "stenciltmp.h"
+
+void
+intel_renderbuffer_map(struct intel_context *intel, struct gl_renderbuffer *rb)
+{
+ struct intel_renderbuffer *irb = intel_renderbuffer(rb);
+
+ if (irb == NULL || irb->region == NULL)
+ return;
+
+ irb->pfPitch = irb->region->pitch;
+
+ intel_set_span_functions(intel, rb);
+}
+
+void
+intel_renderbuffer_unmap(struct intel_context *intel,
+ struct gl_renderbuffer *rb)
+{
+ struct intel_renderbuffer *irb = intel_renderbuffer(rb);
+
+ if (irb == NULL || irb->region == NULL)
+ return;
+ clear_span_cache(irb);
+ irb->pfPitch = 0;
+
+ rb->GetRow = NULL;
+ rb->PutRow = NULL;
+}
/**
* Map or unmap all the renderbuffers which we may need during
@@ -186,23 +566,13 @@ intel_map_unmap_buffers(struct intel_context *intel, GLboolean map)
{
GLcontext *ctx = &intel->ctx;
GLuint i, j;
- struct intel_renderbuffer *irb;
/* color draw buffers */
for (j = 0; j < ctx->DrawBuffer->_NumColorDrawBuffers; j++) {
- struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[j];
- irb = intel_renderbuffer(rb);
- if (irb) {
- /* this is a user-created intel_renderbuffer */
- if (irb->region) {
- if (map)
- intel_region_map(intel, irb->region);
- else
- intel_region_unmap(intel, irb->region);
- irb->pfMap = irb->region->map;
- irb->pfPitch = irb->region->pitch;
- }
- }
+ if (map)
+ intel_renderbuffer_map(intel, ctx->DrawBuffer->_ColorDrawBuffers[j]);
+ else
+ intel_renderbuffer_unmap(intel, ctx->DrawBuffer->_ColorDrawBuffers[j]);
}
/* check for render to textures */
@@ -213,89 +583,36 @@ intel_map_unmap_buffers(struct intel_context *intel, GLboolean map)
if (tex) {
/* render to texture */
ASSERT(att->Renderbuffer);
- if (map) {
- struct gl_texture_image *texImg;
- texImg = tex->Image[att->CubeMapFace][att->TextureLevel];
+ if (map)
intel_tex_map_images(intel, intel_texture_object(tex));
- }
- else {
+ else
intel_tex_unmap_images(intel, intel_texture_object(tex));
- }
}
}
/* color read buffers */
- irb = intel_renderbuffer(ctx->ReadBuffer->_ColorReadBuffer);
- if (irb && irb->region) {
- if (map)
- intel_region_map(intel, irb->region);
- else
- intel_region_unmap(intel, irb->region);
- irb->pfMap = irb->region->map;
- irb->pfPitch = irb->region->pitch;
- }
-
- /* Account for front/back color page flipping.
- * The span routines use the pfMap and pfPitch fields which will
- * swap the front/back region map/pitch if we're page flipped.
- * Do this after mapping, above, so the map field is valid.
- */
-#if 0
- if (map && ctx->DrawBuffer->Name == 0) {
- struct intel_renderbuffer *irbFront
- = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_FRONT_LEFT);
- struct intel_renderbuffer *irbBack
- = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_BACK_LEFT);
- if (irbBack) {
- /* double buffered */
- if (intel->sarea->pf_current_page == 0) {
- irbFront->pfMap = irbFront->region->map;
- irbFront->pfPitch = irbFront->region->pitch;
- irbBack->pfMap = irbBack->region->map;
- irbBack->pfPitch = irbBack->region->pitch;
- }
- else {
- irbFront->pfMap = irbBack->region->map;
- irbFront->pfPitch = irbBack->region->pitch;
- irbBack->pfMap = irbFront->region->map;
- irbBack->pfPitch = irbFront->region->pitch;
- }
- }
- }
-#endif
+ if (map)
+ intel_renderbuffer_map(intel, ctx->ReadBuffer->_ColorReadBuffer);
+ else
+ intel_renderbuffer_unmap(intel, ctx->ReadBuffer->_ColorReadBuffer);
/* depth buffer (Note wrapper!) */
if (ctx->DrawBuffer->_DepthBuffer) {
- irb = intel_renderbuffer(ctx->DrawBuffer->_DepthBuffer->Wrapped);
- if (irb && irb->region) {
- if (map) {
- intel_region_map(intel, irb->region);
- irb->pfMap = irb->region->map;
- irb->pfPitch = irb->region->pitch;
- }
- else {
- intel_region_unmap(intel, irb->region);
- irb->pfMap = irb->region->map;
- irb->pfPitch = irb->region->pitch;
- }
- }
+ if (map)
+ intel_renderbuffer_map(intel, ctx->DrawBuffer->_DepthBuffer->Wrapped);
+ else
+ intel_renderbuffer_unmap(intel,
+ ctx->DrawBuffer->_DepthBuffer->Wrapped);
}
/* stencil buffer (Note wrapper!) */
if (ctx->DrawBuffer->_StencilBuffer) {
- irb = intel_renderbuffer(ctx->DrawBuffer->_StencilBuffer->Wrapped);
- if (irb && irb->region) {
- if (map) {
- intel_region_map(intel, irb->region);
- irb->pfMap = irb->region->map;
- irb->pfPitch = irb->region->pitch;
- }
- else {
- intel_region_unmap(intel, irb->region);
- irb->pfMap = irb->region->map;
- irb->pfPitch = irb->region->pitch;
- }
- }
+ if (map)
+ intel_renderbuffer_map(intel,
+ ctx->DrawBuffer->_StencilBuffer->Wrapped);
+ else
+ intel_renderbuffer_unmap(intel,
+ ctx->DrawBuffer->_StencilBuffer->Wrapped);
}
}
@@ -313,19 +630,10 @@ intelSpanRenderStart(GLcontext * ctx)
struct intel_context *intel = intel_context(ctx);
GLuint i;
- intelFinish(&intel->ctx);
+ intelFlush(&intel->ctx);
LOCK_HARDWARE(intel);
-#if 0
- /* Just map the framebuffer and all textures. Bufmgr code will
- * take care of waiting on the necessary fences:
- */
- intel_region_map(intel, intel->front_region);
- intel_region_map(intel, intel->back_region);
- intel_region_map(intel, intel->depth_region);
-#endif
-
- for (i = 0; i < ctx->Const.MaxTextureCoordUnits; i++) {
+ for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
if (ctx->Texture.Unit[i]._ReallyEnabled) {
struct gl_texture_object *texObj = ctx->Texture.Unit[i]._Current;
intel_tex_map_images(intel, intel_texture_object(texObj));
@@ -347,15 +655,7 @@ intelSpanRenderFinish(GLcontext * ctx)
_swrast_flush(ctx);
- /* Now unmap the framebuffer:
- */
-#if 0
- intel_region_unmap(intel, intel->front_region);
- intel_region_unmap(intel, intel->back_region);
- intel_region_unmap(intel, intel->depth_region);
-#endif
-
- for (i = 0; i < ctx->Const.MaxTextureCoordUnits; i++) {
+ for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
if (ctx->Texture.Unit[i]._ReallyEnabled) {
struct gl_texture_object *texObj = ctx->Texture.Unit[i]._Current;
intel_tex_unmap_images(intel, intel_texture_object(texObj));
@@ -381,26 +681,108 @@ intelInitSpanFuncs(GLcontext * ctx)
* Plug in appropriate span read/write functions for the given renderbuffer.
* These are used for the software fallbacks.
*/
-void
-intel_set_span_functions(struct gl_renderbuffer *rb)
+static void
+intel_set_span_functions(struct intel_context *intel,
+ struct gl_renderbuffer *rb)
{
+ struct intel_renderbuffer *irb = (struct intel_renderbuffer *) rb;
+ uint32_t tiling;
+
+ /* If in GEM mode, we need to do the tile address swizzling ourselves,
+ * instead of the fence registers handling it.
+ */
+ if (intel->ttm)
+ tiling = irb->region->tiling;
+ else
+ tiling = I915_TILING_NONE;
+
if (rb->_ActualFormat == GL_RGB5) {
/* 565 RGB */
- intelInitPointers_RGB565(rb);
+ switch (tiling) {
+ case I915_TILING_NONE:
+ default:
+ intelInitPointers_RGB565(rb);
+ break;
+ case I915_TILING_X:
+ intel_XTile_InitPointers_RGB565(rb);
+ break;
+ case I915_TILING_Y:
+ intel_YTile_InitPointers_RGB565(rb);
+ break;
+ }
+ }
+ else if (rb->_ActualFormat == GL_RGB8) {
+ /* 8888 RGBx */
+ switch (tiling) {
+ case I915_TILING_NONE:
+ default:
+ intelInitPointers_xRGB8888(rb);
+ break;
+ case I915_TILING_X:
+ intel_XTile_InitPointers_xRGB8888(rb);
+ break;
+ case I915_TILING_Y:
+ intel_YTile_InitPointers_xRGB8888(rb);
+ break;
+ }
}
else if (rb->_ActualFormat == GL_RGBA8) {
/* 8888 RGBA */
- intelInitPointers_ARGB8888(rb);
+ switch (tiling) {
+ case I915_TILING_NONE:
+ default:
+ intelInitPointers_ARGB8888(rb);
+ break;
+ case I915_TILING_X:
+ intel_XTile_InitPointers_ARGB8888(rb);
+ break;
+ case I915_TILING_Y:
+ intel_YTile_InitPointers_ARGB8888(rb);
+ break;
+ }
}
else if (rb->_ActualFormat == GL_DEPTH_COMPONENT16) {
- intelInitDepthPointers_z16(rb);
+ switch (tiling) {
+ case I915_TILING_NONE:
+ default:
+ intelInitDepthPointers_z16(rb);
+ break;
+ case I915_TILING_X:
+ intel_XTile_InitDepthPointers_z16(rb);
+ break;
+ case I915_TILING_Y:
+ intel_YTile_InitDepthPointers_z16(rb);
+ break;
+ }
}
else if (rb->_ActualFormat == GL_DEPTH_COMPONENT24 || /* XXX FBO remove */
rb->_ActualFormat == GL_DEPTH24_STENCIL8_EXT) {
- intelInitDepthPointers_z24_s8(rb);
+ switch (tiling) {
+ case I915_TILING_NONE:
+ default:
+ intelInitDepthPointers_z24_s8(rb);
+ break;
+ case I915_TILING_X:
+ intel_XTile_InitDepthPointers_z24_s8(rb);
+ break;
+ case I915_TILING_Y:
+ intel_YTile_InitDepthPointers_z24_s8(rb);
+ break;
+ }
}
- else if (rb->_ActualFormat == GL_STENCIL_INDEX8_EXT) { /* XXX FBO remove */
- intelInitStencilPointers_z24_s8(rb);
+ else if (rb->_ActualFormat == GL_STENCIL_INDEX8_EXT) {
+ switch (tiling) {
+ case I915_TILING_NONE:
+ default:
+ intelInitStencilPointers_z24_s8(rb);
+ break;
+ case I915_TILING_X:
+ intel_XTile_InitStencilPointers_z24_s8(rb);
+ break;
+ case I915_TILING_Y:
+ intel_YTile_InitStencilPointers_z24_s8(rb);
+ break;
+ }
}
else {
_mesa_problem(NULL,
diff --git a/shared/intel_span.h b/shared/intel_span.h
index 5201f6d..acbeb4a 100644
--- a/shared/intel_span.h
+++ b/shared/intel_span.h
@@ -32,7 +32,9 @@ extern void intelInitSpanFuncs(GLcontext * ctx);
extern void intelSpanRenderFinish(GLcontext * ctx);
extern void intelSpanRenderStart(GLcontext * ctx);
-
-extern void intel_set_span_functions(struct gl_renderbuffer *rb);
+void intel_renderbuffer_map(struct intel_context *intel,
+ struct gl_renderbuffer *rb);
+void intel_renderbuffer_unmap(struct intel_context *intel,
+ struct gl_renderbuffer *rb);
#endif
diff --git a/shared/intel_tex.c b/shared/intel_tex.c
index 4fa18e2..e64d8a1 100644
--- a/shared/intel_tex.c
+++ b/shared/intel_tex.c
@@ -1,7 +1,7 @@
#include "swrast/swrast.h"
-#include "texobj.h"
-#include "teximage.h"
-#include "mipmap.h"
+#include "main/texobj.h"
+#include "main/teximage.h"
+#include "main/mipmap.h"
#include "intel_context.h"
#include "intel_mipmap_tree.h"
#include "intel_tex.h"
@@ -222,22 +222,16 @@ intelInitTextureFuncs(struct dd_function_table *functions)
functions->TexSubImage1D = intelTexSubImage1D;
functions->TexSubImage2D = intelTexSubImage2D;
functions->TexSubImage3D = intelTexSubImage3D;
-#ifdef I915
functions->CopyTexImage1D = intelCopyTexImage1D;
functions->CopyTexImage2D = intelCopyTexImage2D;
functions->CopyTexSubImage1D = intelCopyTexSubImage1D;
functions->CopyTexSubImage2D = intelCopyTexSubImage2D;
-#else
- functions->CopyTexImage1D = _swrast_copy_teximage1d;
- functions->CopyTexImage2D = _swrast_copy_teximage2d;
- functions->CopyTexSubImage1D = _swrast_copy_texsubimage1d;
- functions->CopyTexSubImage2D = _swrast_copy_texsubimage2d;
-#endif
functions->GetTexImage = intelGetTexImage;
functions->GenerateMipmap = intelGenerateMipmap;
/* compressed texture functions */
functions->CompressedTexImage2D = intelCompressedTexImage2D;
+ functions->CompressedTexSubImage2D = intelCompressedTexSubImage2D;
functions->GetCompressedTexImage = intelGetCompressedTexImage;
functions->NewTextureObject = intelNewTextureObject;
diff --git a/shared/intel_tex.h b/shared/intel_tex.h
index fe7a8ba..742ccc0 100644
--- a/shared/intel_tex.h
+++ b/shared/intel_tex.h
@@ -28,7 +28,7 @@
#ifndef INTELTEX_INC
#define INTELTEX_INC
-#include "mtypes.h"
+#include "main/mtypes.h"
#include "intel_context.h"
#include "texmem.h"
@@ -130,6 +130,16 @@ void intelCompressedTexImage2D( GLcontext *ctx, GLenum target, GLint level,
struct gl_texture_object *texObj,
struct gl_texture_image *texImage );
+void intelCompressedTexSubImage2D(GLcontext * ctx,
+ GLenum target,
+ GLint level,
+ GLint xoffset, GLint yoffset,
+ GLsizei width, GLsizei height,
+ GLenum format, GLsizei imageSize,
+ const GLvoid * pixels,
+ struct gl_texture_object *texObj,
+ struct gl_texture_image *texImage);
+
void intelGetCompressedTexImage(GLcontext *ctx, GLenum target, GLint level,
GLvoid *pixels,
struct gl_texture_object *texObj,
diff --git a/shared/intel_tex_copy.c b/shared/intel_tex_copy.c
index 1add7c6..08437aa 100644
--- a/shared/intel_tex_copy.c
+++ b/shared/intel_tex_copy.c
@@ -25,11 +25,11 @@
*
**************************************************************************/
-#include "mtypes.h"
-#include "enums.h"
-#include "image.h"
-#include "teximage.h"
-#include "mipmap.h"
+#include "main/mtypes.h"
+#include "main/enums.h"
+#include "main/image.h"
+#include "main/teximage.h"
+#include "main/mipmap.h"
#include "swrast/swrast.h"
#include "intel_screen.h"
@@ -60,7 +60,7 @@ get_teximage_source(struct intel_context *intel, GLenum internalFormat)
switch (internalFormat) {
case GL_DEPTH_COMPONENT:
- case GL_DEPTH_COMPONENT16_ARB:
+ case GL_DEPTH_COMPONENT16:
irb = intel_get_renderbuffer(intel->ctx.ReadBuffer, BUFFER_DEPTH);
if (irb && irb->region && irb->region->cpp == 2)
return irb->region;
@@ -98,7 +98,9 @@ do_copy_texsubimage(struct intel_context *intel,
get_teximage_source(intel, internalFormat);
if (!intelImage->mt || !src) {
- DBG("%s fail %p %p\n", __FUNCTION__, intelImage->mt, src);
+ if (INTEL_DEBUG & DEBUG_FALLBACKS)
+ fprintf(stderr, "%s fail %p %p\n",
+ __FUNCTION__, intelImage->mt, src);
return GL_FALSE;
}
@@ -110,58 +112,53 @@ do_copy_texsubimage(struct intel_context *intel,
intelImage->level);
const GLint orig_x = x;
const GLint orig_y = y;
- const struct gl_framebuffer *fb = ctx->DrawBuffer;
-
- if (_mesa_clip_to_region(fb->_Xmin, fb->_Ymin, fb->_Xmax, fb->_Ymax,
- &x, &y, &width, &height)) {
- /* Update dst for clipped src. Need to also clip the source rect.
- */
- dstx += x - orig_x;
- dsty += y - orig_y;
-
- if (ctx->ReadBuffer->Name == 0) {
- /* reading from a window, adjust x, y */
- __DRIdrawablePrivate *dPriv = intel->driDrawable;
- GLuint window_y;
- /* window_y = position of window on screen if y=0=bottom */
- window_y = intel->intelScreen->height - (dPriv->y + dPriv->h);
- y = window_y + y;
- x += dPriv->x;
- }
- else {
- /* reading from a FBO */
- /* invert Y */
- y = ctx->ReadBuffer->Height - y - 1;
- }
-
-
- /* A bit of fiddling to get the blitter to work with -ve
- * pitches. But we get a nice inverted blit this way, so it's
- * worth it:
- */
- intelEmitCopyBlit(intel,
- intelImage->mt->cpp,
- -src->pitch,
- src->buffer,
- src->height * src->pitch * src->cpp,
- GL_FALSE,
- intelImage->mt->pitch,
- intelImage->mt->region->buffer,
- image_offset,
- intelImage->mt->region->tiled,
- x, y + height, dstx, dsty, width, height,
- GL_COPY); /* ? */
-
- intel_batchbuffer_flush(intel->batch);
+ GLshort src_pitch;
+
+ /* Update dst for clipped src. Need to also clip the source rect. */
+ dstx += x - orig_x;
+ dsty += y - orig_y;
+
+ /* image_offset may be non-page-aligned, but that's illegal for tiling. */
+ assert(intelImage->mt->region->tiling == I915_TILING_NONE);
+
+ if (ctx->ReadBuffer->Name == 0) {
+ /* reading from a window, adjust x, y */
+ __DRIdrawablePrivate *dPriv = intel->driDrawable;
+ y = dPriv->y + (dPriv->h - (y + height));
+ x += dPriv->x;
+
+ /* Invert the data coming from the source rectangle due to GL
+ * and hardware disagreeing on where y=0 is.
+ *
+ * It appears that our offsets and pitches get mangled
+ * appropriately by the hardware, and we don't need to adjust them
+ * on our own.
+ */
+ src_pitch = -src->pitch;
+ } else {
+ /* reading from a FBO, y is already oriented the way we like */
+ src_pitch = src->pitch;
}
- }
+ intelEmitCopyBlit(intel,
+ intelImage->mt->cpp,
+ src_pitch,
+ src->buffer,
+ 0,
+ src->tiling,
+ intelImage->mt->pitch,
+ intelImage->mt->region->buffer,
+ image_offset,
+ intelImage->mt->region->tiling,
+ x, y, dstx, dsty, width, height,
+ GL_COPY);
+ }
UNLOCK_HARDWARE(intel);
/* GL_SGIS_generate_mipmap */
if (intelImage->level == texObj->BaseLevel && texObj->GenerateMipmap) {
- intel_generate_mipmap(ctx, target, texObj);
+ ctx->Driver.GenerateMipmap(ctx, target, texObj);
}
return GL_TRUE;
@@ -182,6 +179,7 @@ intelCopyTexImage1D(GLcontext * ctx, GLenum target, GLint level,
_mesa_select_tex_object(ctx, texUnit, target);
struct gl_texture_image *texImage =
_mesa_select_tex_image(ctx, texObj, target, level);
+ int srcx, srcy, dstx, dsty, height;
if (border)
goto fail;
@@ -193,10 +191,20 @@ intelCopyTexImage1D(GLcontext * ctx, GLenum target, GLint level,
width, border,
GL_RGBA, CHAN_TYPE, NULL,
&ctx->DefaultPacking, texObj, texImage);
+ srcx = x;
+ srcy = y;
+ dstx = 0;
+ dsty = 0;
+ height = 1;
+ if (!_mesa_clip_copytexsubimage(ctx,
+ &dstx, &dsty,
+ &srcx, &srcy,
+ &width, &height))
+ return;
if (!do_copy_texsubimage(intel_context(ctx), target,
intel_texture_image(texImage),
- internalFormat, 0, 0, x, y, width, 1))
+ internalFormat, 0, 0, x, y, width, height))
goto fail;
return;
@@ -218,10 +226,21 @@ intelCopyTexImage2D(GLcontext * ctx, GLenum target, GLint level,
_mesa_select_tex_object(ctx, texUnit, target);
struct gl_texture_image *texImage =
_mesa_select_tex_image(ctx, texObj, target, level);
+ int srcx, srcy, dstx, dsty;
if (border)
goto fail;
+ srcx = x;
+ srcy = y;
+ dstx = 0;
+ dsty = 0;
+ if (!_mesa_clip_copytexsubimage(ctx,
+ &dstx, &dsty,
+ &srcx, &srcy,
+ &width, &height))
+ return;
+
/* Setup or redefine the texture object, mipmap tree and texture
* image. Don't populate yet.
*/
diff --git a/shared/intel_tex_format.c b/shared/intel_tex_format.c
index 8ae80e1..5e418ac 100644
--- a/shared/intel_tex_format.c
+++ b/shared/intel_tex_format.c
@@ -1,7 +1,7 @@
#include "intel_context.h"
#include "intel_tex.h"
-#include "texformat.h"
-#include "enums.h"
+#include "main/texformat.h"
+#include "main/enums.h"
/* It works out that this function is fine for all the supported
* hardware. However, there is still a need to map the formats onto
@@ -134,8 +134,14 @@ intelChooseTextureFormat(GLcontext * ctx, GLint internalFormat,
case GL_DEPTH_COMPONENT16:
case GL_DEPTH_COMPONENT24:
case GL_DEPTH_COMPONENT32:
+#if 0
return &_mesa_texformat_z16;
-
+#else
+ /* fall-through.
+ * 16bpp depth texture can't be paired with a stencil buffer so
+ * always used combined depth/stencil format.
+ */
+#endif
case GL_DEPTH_STENCIL_EXT:
case GL_DEPTH24_STENCIL8_EXT:
return &_mesa_texformat_s8_z24;
@@ -158,7 +164,7 @@ intelChooseTextureFormat(GLcontext * ctx, GLint internalFormat,
case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT:
case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT:
case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT:
- return &_mesa_texformat_srgb_dxt1;
+ return &_mesa_texformat_srgb_dxt1;
#endif
default:
diff --git a/shared/intel_tex_image.c b/shared/intel_tex_image.c
index f261034..2ac7dce 100644
--- a/shared/intel_tex_image.c
+++ b/shared/intel_tex_image.c
@@ -2,26 +2,25 @@
#include <stdlib.h>
#include <stdio.h>
-#include "glheader.h"
-#include "macros.h"
-#include "mtypes.h"
-#include "enums.h"
-#include "colortab.h"
-#include "convolve.h"
-#include "context.h"
-#include "simple_list.h"
-#include "texcompress.h"
-#include "texformat.h"
-#include "texobj.h"
-#include "texstore.h"
-#include "teximage.h"
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/mtypes.h"
+#include "main/enums.h"
+#include "main/colortab.h"
+#include "main/convolve.h"
+#include "main/context.h"
+#include "main/simple_list.h"
+#include "main/texcompress.h"
+#include "main/texformat.h"
+#include "main/texobj.h"
+#include "main/texstore.h"
+#include "main/teximage.h"
#include "intel_context.h"
#include "intel_mipmap_tree.h"
#include "intel_buffer_objects.h"
#include "intel_batchbuffer.h"
#include "intel_tex.h"
-#include "intel_ioctl.h"
#include "intel_blit.h"
#include "intel_fbo.h"
@@ -238,8 +237,6 @@ try_pbo_upload(struct intel_context *intel,
dst_stride, dst_buffer, dst_offset, GL_FALSE,
0, 0, 0, 0, width, height,
GL_COPY);
-
- intel_batchbuffer_flush(intel->batch);
}
UNLOCK_HARDWARE(intel);
@@ -400,10 +397,25 @@ intelTexImage(GLcontext * ctx,
intel_miptree_reference(&intelImage->mt, intelObj->mt);
assert(intelImage->mt);
- }
+ } else if (intelImage->base.Border == 0) {
+ int comp_byte = 0;
+
+ if (intelImage->base.IsCompressed) {
+ comp_byte =
+ intel_compressed_num_bytes(intelImage->base.TexFormat->MesaFormat);
+ }
+
+ /* Didn't fit in the object miptree, but it's suitable for inclusion in
+ * a miptree, so create one just for our level and store it in the image.
+ * It'll get moved into the object miptree at validate time.
+ */
+ intelImage->mt = intel_miptree_create(intel, target, internalFormat,
+ level, level,
+ width, height, depth,
+ intelImage->base.TexFormat->TexelBytes,
+ comp_byte);
- if (!intelImage->mt)
- DBG("XXX: Image did not fit into tree - storing in local memory!\n");
+ }
/* PBO fastpaths:
*/
@@ -718,9 +730,15 @@ intelSetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv)
if (!intelObj)
return;
- __driParseEvents(pDRICtx, dPriv);
+ intel_update_renderbuffers(pDRICtx, dPriv);
rb = intel_fb->color_rb[0];
+ /* If the region isn't set, then intel_update_renderbuffers was unable
+ * to get the buffers for the drawable.
+ */
+ if (rb->region == NULL)
+ return;
+
type = GL_BGRA;
format = GL_UNSIGNED_BYTE;
internalFormat = (rb->region->cpp == 3 ? 3 : 4);
@@ -739,7 +757,7 @@ intelSetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv)
intelObj->mt = mt;
texImage = _mesa_get_tex_image(&intel->ctx, texObj, target, level);
_mesa_init_teximage_fields(&intel->ctx, target, texImage,
- rb->region->pitch, rb->region->height, 1,
+ rb->region->width, rb->region->height, 1,
0, internalFormat);
intelImage = intel_texture_image(texImage);
diff --git a/shared/intel_tex_layout.c b/shared/intel_tex_layout.c
index edc3a2e..e6f9a41 100644
--- a/shared/intel_tex_layout.c
+++ b/shared/intel_tex_layout.c
@@ -33,7 +33,7 @@
#include "intel_mipmap_tree.h"
#include "intel_tex_layout.h"
#include "intel_context.h"
-#include "macros.h"
+#include "main/macros.h"
GLuint intel_compressed_alignment(GLenum internalFormat)
{
diff --git a/shared/intel_tex_layout.h b/shared/intel_tex_layout.h
index 193699d..dbc90e6 100644
--- a/shared/intel_tex_layout.h
+++ b/shared/intel_tex_layout.h
@@ -30,7 +30,7 @@
* Michel Dänzer <michel@tungstengraphics.com>
*/
-#include "macros.h"
+#include "main/macros.h"
static GLuint minify( GLuint d )
diff --git a/shared/intel_tex_subimage.c b/shared/intel_tex_subimage.c
index 5428a1d..f86de56 100644
--- a/shared/intel_tex_subimage.c
+++ b/shared/intel_tex_subimage.c
@@ -26,11 +26,11 @@
*
**************************************************************************/
-#include "mtypes.h"
-#include "texobj.h"
-#include "texstore.h"
-#include "texcompress.h"
-#include "enums.h"
+#include "main/mtypes.h"
+#include "main/texobj.h"
+#include "main/texstore.h"
+#include "main/texcompress.h"
+#include "main/enums.h"
#include "intel_context.h"
#include "intel_tex.h"
@@ -184,3 +184,18 @@ intelTexSubImage1D(GLcontext * ctx,
format, type, pixels, packing, texObj, texImage);
}
+
+void
+intelCompressedTexSubImage2D(GLcontext * ctx,
+ GLenum target,
+ GLint level,
+ GLint xoffset, GLint yoffset,
+ GLsizei width, GLsizei height,
+ GLenum format, GLsizei imageSize,
+ const GLvoid * pixels,
+ struct gl_texture_object *texObj,
+ struct gl_texture_image *texImage)
+{
+ fprintf(stderr, "stubbed CompressedTexSubImage2D: %dx%d@%dx%d\n",
+ width, height, xoffset, yoffset);
+}
diff --git a/shared/intel_tex_validate.c b/shared/intel_tex_validate.c
index 1b3aa89..820683d 100644
--- a/shared/intel_tex_validate.c
+++ b/shared/intel_tex_validate.c
@@ -1,5 +1,5 @@
-#include "mtypes.h"
-#include "macros.h"
+#include "main/mtypes.h"
+#include "main/macros.h"
#include "intel_context.h"
#include "intel_batchbuffer.h"
@@ -125,13 +125,10 @@ intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit)
struct intel_texture_object *intelObj = intel_texture_object(tObj);
int comp_byte = 0;
int cpp;
-
GLuint face, i;
GLuint nr_faces = 0;
struct intel_texture_image *firstImage;
- GLboolean need_flush = GL_FALSE;
-
/* We know/require this is true by now:
*/
assert(intelObj->base._Complete);
@@ -144,10 +141,7 @@ intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit)
/* Fallback case:
*/
- if (firstImage->base.Border ||
- ((firstImage->base._BaseFormat == GL_DEPTH_COMPONENT) &&
- ((tObj->WrapS == GL_CLAMP_TO_BORDER) ||
- (tObj->WrapT == GL_CLAMP_TO_BORDER)))) {
+ if (firstImage->base.Border) {
if (intelObj->mt) {
intel_miptree_release(intel, &intelObj->mt);
}
@@ -227,21 +221,10 @@ intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit)
*/
if (intelObj->mt != intelImage->mt) {
copy_image_data_to_tree(intel, intelObj, intelImage);
- need_flush = GL_TRUE;
}
}
}
-#ifdef I915
- /* XXX: what is this flush about?
- * On 965, it causes a batch flush in the middle of the state relocation
- * emits, which means that the eventual rendering doesn't have all of the
- * required relocations in place.
- */
- if (need_flush)
- intel_batchbuffer_flush(intel->batch);
-#endif
-
return GL_TRUE;
}