summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKeith Whitwell <keithw@vmware.com>2009-12-22 09:40:39 +0000
committerKeith Whitwell <keithw@vmware.com>2009-12-22 09:40:39 +0000
commitaa02683e45f1eaf61bba2ba7eeda7686efeed2ca (patch)
tree63e0ef2fa85e5d7ebd6ffc6ae9043ce0819251a2
parentebbc73d1aed283c9bc4aa2b37bed4374bbaec5b5 (diff)
parent0fc4dd3819af252c028ed43bbd668b4f34104e32 (diff)
Merge branch 'i965g-restart'
Conflicts: configure.ac
-rw-r--r--Makefile1
-rw-r--r--SConstruct8
-rw-r--r--configs/default2
-rw-r--r--configs/linux-dri2
-rw-r--r--configs/linux-i9658
-rw-r--r--configure.ac6
-rw-r--r--progs/fp/fp-tri.c11
-rw-r--r--progs/fp/mov-imm.txt3
-rw-r--r--progs/fp/mov-param.txt4
-rw-r--r--progs/trivial/.gitignore1
-rw-r--r--progs/trivial/Makefile1
-rw-r--r--progs/trivial/tri-orig.c10
-rw-r--r--progs/trivial/vbo-tri.c131
-rw-r--r--progs/vp/add-param-imm.txt7
-rw-r--r--src/gallium/auxiliary/util/u_debug.c27
-rw-r--r--src/gallium/auxiliary/util/u_math.h13
-rw-r--r--src/gallium/auxiliary/util/u_prim.h2
-rw-r--r--src/gallium/auxiliary/util/u_upload_mgr.h2
-rw-r--r--src/gallium/drivers/i965/Makefile74
-rw-r--r--src/gallium/drivers/i965/SConscript77
-rw-r--r--src/gallium/drivers/i965/brw_batchbuffer.c202
-rw-r--r--src/gallium/drivers/i965/brw_batchbuffer.h148
-rw-r--r--src/gallium/drivers/i965/brw_cc.c111
-rw-r--r--src/gallium/drivers/i965/brw_clip.c224
-rw-r--r--src/gallium/drivers/i965/brw_clip.h199
-rw-r--r--src/gallium/drivers/i965/brw_clip_line.c271
-rw-r--r--src/gallium/drivers/i965/brw_clip_point.c48
-rw-r--r--src/gallium/drivers/i965/brw_clip_state.c209
-rw-r--r--src/gallium/drivers/i965/brw_clip_tri.c595
-rw-r--r--src/gallium/drivers/i965/brw_clip_unfilled.c497
-rw-r--r--src/gallium/drivers/i965/brw_clip_util.c388
-rw-r--r--src/gallium/drivers/i965/brw_context.c154
-rw-r--r--src/gallium/drivers/i965/brw_context.h853
-rw-r--r--src/gallium/drivers/i965/brw_curbe.c390
-rw-r--r--src/gallium/drivers/i965/brw_debug.h43
-rw-r--r--src/gallium/drivers/i965/brw_defines.h847
-rw-r--r--src/gallium/drivers/i965/brw_disasm.c922
-rw-r--r--src/gallium/drivers/i965/brw_disasm.h34
-rw-r--r--src/gallium/drivers/i965/brw_draw.c291
-rw-r--r--src/gallium/drivers/i965/brw_draw.h39
-rw-r--r--src/gallium/drivers/i965/brw_draw_upload.c542
-rw-r--r--src/gallium/drivers/i965/brw_eu.c262
-rw-r--r--src/gallium/drivers/i965/brw_eu.h992
-rw-r--r--src/gallium/drivers/i965/brw_eu_debug.c94
-rw-r--r--src/gallium/drivers/i965/brw_eu_emit.c1433
-rw-r--r--src/gallium/drivers/i965/brw_eu_util.c126
-rw-r--r--src/gallium/drivers/i965/brw_gs.c216
-rw-r--r--src/gallium/drivers/i965/brw_gs.h76
-rw-r--r--src/gallium/drivers/i965/brw_gs_emit.c181
-rw-r--r--src/gallium/drivers/i965/brw_gs_state.c169
-rw-r--r--src/gallium/drivers/i965/brw_misc_state.c513
-rw-r--r--src/gallium/drivers/i965/brw_pipe_blend.c208
-rw-r--r--src/gallium/drivers/i965/brw_pipe_clear.c218
-rw-r--r--src/gallium/drivers/i965/brw_pipe_depth.c172
-rw-r--r--src/gallium/drivers/i965/brw_pipe_fb.c77
-rw-r--r--src/gallium/drivers/i965/brw_pipe_flush.c83
-rw-r--r--src/gallium/drivers/i965/brw_pipe_misc.c54
-rw-r--r--src/gallium/drivers/i965/brw_pipe_query.c263
-rw-r--r--src/gallium/drivers/i965/brw_pipe_rast.c161
-rw-r--r--src/gallium/drivers/i965/brw_pipe_rast.h16
-rw-r--r--src/gallium/drivers/i965/brw_pipe_sampler.c233
-rw-r--r--src/gallium/drivers/i965/brw_pipe_shader.c299
-rw-r--r--src/gallium/drivers/i965/brw_pipe_vertex.c78
-rw-r--r--src/gallium/drivers/i965/brw_reg.h115
-rw-r--r--src/gallium/drivers/i965/brw_screen.c403
-rw-r--r--src/gallium/drivers/i965/brw_screen.h199
-rw-r--r--src/gallium/drivers/i965/brw_screen_buffers.c202
-rw-r--r--src/gallium/drivers/i965/brw_screen_surface.c262
-rw-r--r--src/gallium/drivers/i965/brw_screen_tex_layout.c414
-rw-r--r--src/gallium/drivers/i965/brw_screen_texture.c572
-rw-r--r--src/gallium/drivers/i965/brw_sf.c216
-rw-r--r--src/gallium/drivers/i965/brw_sf.h122
-rw-r--r--src/gallium/drivers/i965/brw_sf_emit.c765
-rw-r--r--src/gallium/drivers/i965/brw_sf_state.c333
-rw-r--r--src/gallium/drivers/i965/brw_state.h174
-rw-r--r--src/gallium/drivers/i965/brw_state_batch.c98
-rw-r--r--src/gallium/drivers/i965/brw_state_cache.c617
-rw-r--r--src/gallium/drivers/i965/brw_state_debug.c153
-rw-r--r--src/gallium/drivers/i965/brw_state_upload.c270
-rw-r--r--src/gallium/drivers/i965/brw_structs.h1576
-rw-r--r--src/gallium/drivers/i965/brw_structs_dump.c1247
-rw-r--r--src/gallium/drivers/i965/brw_structs_dump.h276
-rwxr-xr-xsrc/gallium/drivers/i965/brw_structs_dump.py291
-rw-r--r--src/gallium/drivers/i965/brw_swtnl.c95
-rw-r--r--src/gallium/drivers/i965/brw_types.h21
-rw-r--r--src/gallium/drivers/i965/brw_urb.c263
-rw-r--r--src/gallium/drivers/i965/brw_util.c38
-rw-r--r--src/gallium/drivers/i965/brw_util.h44
-rw-r--r--src/gallium/drivers/i965/brw_vs.c145
-rw-r--r--src/gallium/drivers/i965/brw_vs.h109
-rw-r--r--src/gallium/drivers/i965/brw_vs_emit.c1673
-rw-r--r--src/gallium/drivers/i965/brw_vs_state.c201
-rw-r--r--src/gallium/drivers/i965/brw_vs_surface_state.c232
-rw-r--r--src/gallium/drivers/i965/brw_winsys.h307
-rw-r--r--src/gallium/drivers/i965/brw_winsys_debug.c87
-rw-r--r--src/gallium/drivers/i965/brw_wm.c319
-rw-r--r--src/gallium/drivers/i965/brw_wm.h344
-rw-r--r--src/gallium/drivers/i965/brw_wm_constant_buffer.c165
-rw-r--r--src/gallium/drivers/i965/brw_wm_debug.c256
-rw-r--r--src/gallium/drivers/i965/brw_wm_emit.c1521
-rw-r--r--src/gallium/drivers/i965/brw_wm_fp.c1224
-rw-r--r--src/gallium/drivers/i965/brw_wm_glsl.c2032
-rw-r--r--src/gallium/drivers/i965/brw_wm_iz.c156
-rw-r--r--src/gallium/drivers/i965/brw_wm_pass0.c366
-rw-r--r--src/gallium/drivers/i965/brw_wm_pass1.c292
-rw-r--r--src/gallium/drivers/i965/brw_wm_pass2.c334
-rw-r--r--src/gallium/drivers/i965/brw_wm_sampler_state.c228
-rw-r--r--src/gallium/drivers/i965/brw_wm_state.c339
-rw-r--r--src/gallium/drivers/i965/brw_wm_surface_state.c294
-rw-r--r--src/gallium/drivers/i965/intel_decode.c1790
-rw-r--r--src/gallium/drivers/i965/intel_decode.h29
-rw-r--r--src/gallium/drivers/i965/intel_structs.h132
-rw-r--r--src/gallium/winsys/drm/SConscript5
-rw-r--r--src/gallium/winsys/drm/i965/Makefile12
-rw-r--r--src/gallium/winsys/drm/i965/SConscript7
-rw-r--r--src/gallium/winsys/drm/i965/dri/Makefile26
-rw-r--r--src/gallium/winsys/drm/i965/dri/SConscript19
-rw-r--r--src/gallium/winsys/drm/i965/egl/Makefile29
-rw-r--r--src/gallium/winsys/drm/i965/gem/Makefile14
-rw-r--r--src/gallium/winsys/drm/i965/gem/SConscript15
-rw-r--r--src/gallium/winsys/drm/i965/gem/i965_drm_api.c243
-rw-r--r--src/gallium/winsys/drm/i965/gem/i965_drm_buffer.c427
-rw-r--r--src/gallium/winsys/drm/i965/gem/i965_drm_winsys.h64
-rw-r--r--src/gallium/winsys/drm/i965/xlib/Makefile97
-rw-r--r--src/gallium/winsys/drm/i965/xlib/xlib_i965.c522
-rw-r--r--src/gallium/winsys/drm/i965/xorg/Makefile57
-rw-r--r--src/gallium/winsys/drm/i965/xorg/intel_xorg.c147
-rw-r--r--src/gallium/winsys/drm/intel/dri/Makefile1
-rw-r--r--src/mesa/state_tracker/st_draw.c32
129 files changed, 36360 insertions, 19 deletions
diff --git a/Makefile b/Makefile
index ace4006d891..eb8dcc1235a 100644
--- a/Makefile
+++ b/Makefile
@@ -105,6 +105,7 @@ irix6-n32-static \
irix6-o32 \
irix6-o32-static \
linux \
+linux-i965 \
linux-alpha \
linux-alpha-static \
linux-cell \
diff --git a/SConstruct b/SConstruct
index 122b8cf916f..8e063e28078 100644
--- a/SConstruct
+++ b/SConstruct
@@ -32,10 +32,10 @@ import common
default_statetrackers = 'mesa'
if common.default_platform in ('linux', 'freebsd', 'darwin'):
- default_drivers = 'softpipe,failover,svga,i915,trace,identity,llvmpipe'
+ default_drivers = 'softpipe,failover,svga,i915,i965,trace,identity,llvmpipe'
default_winsys = 'xlib'
elif common.default_platform in ('winddk',):
- default_drivers = 'softpipe,svga,i915,trace,identity'
+ default_drivers = 'softpipe,svga,i915,i965,trace,identity'
default_winsys = 'all'
else:
default_drivers = 'all'
@@ -46,9 +46,9 @@ common.AddOptions(opts)
opts.Add(ListVariable('statetrackers', 'state trackers to build', default_statetrackers,
['mesa', 'python', 'xorg']))
opts.Add(ListVariable('drivers', 'pipe drivers to build', default_drivers,
- ['softpipe', 'failover', 'svga', 'i915', 'cell', 'trace', 'r300', 'identity', 'llvmpipe']))
+ ['softpipe', 'failover', 'svga', 'i915', 'i965', 'cell', 'trace', 'r300', 'identity', 'llvmpipe']))
opts.Add(ListVariable('winsys', 'winsys drivers to build', default_winsys,
- ['xlib', 'vmware', 'intel', 'gdi', 'radeon']))
+ ['xlib', 'vmware', 'intel', 'i965', 'gdi', 'radeon']))
opts.Add(EnumVariable('MSVS_VERSION', 'MS Visual C++ version', None, allowed_values=('7.1', '8.0', '9.0')))
diff --git a/configs/default b/configs/default
index eb6123d1e0a..8a975982a8b 100644
--- a/configs/default
+++ b/configs/default
@@ -96,7 +96,7 @@ EGL_DRIVERS_DIRS = demo
GALLIUM_DIRS = auxiliary drivers state_trackers
GALLIUM_AUXILIARY_DIRS = rbug draw translate cso_cache pipebuffer tgsi sct rtasm util indices vl
GALLIUM_AUXILIARIES = $(foreach DIR,$(GALLIUM_AUXILIARY_DIRS),$(TOP)/src/gallium/auxiliary/$(DIR)/lib$(DIR).a)
-GALLIUM_DRIVERS_DIRS = softpipe failover svga i915 trace identity
+GALLIUM_DRIVERS_DIRS = softpipe failover svga i915 i965 trace identity
GALLIUM_DRIVERS = $(foreach DIR,$(GALLIUM_DRIVERS_DIRS),$(TOP)/src/gallium/drivers/$(DIR)/lib$(DIR).a)
GALLIUM_WINSYS_DIRS = xlib egl_xlib
GALLIUM_WINSYS_DRM_DIRS =
diff --git a/configs/linux-dri b/configs/linux-dri
index 0802543347a..cf1f4e19833 100644
--- a/configs/linux-dri
+++ b/configs/linux-dri
@@ -60,7 +60,7 @@ EGL_DRIVERS_DIRS = demo glx
DRIVER_DIRS = dri
WINDOW_SYSTEM = dri
GALLIUM_WINSYS_DIRS = drm
-GALLIUM_WINSYS_DRM_DIRS = vmware intel
+GALLIUM_WINSYS_DRM_DIRS = vmware intel i965
GALLIUM_STATE_TRACKERS_DIRS = egl
DRI_DIRS = i810 i915 i965 mach64 mga r128 r200 r300 radeon \
diff --git a/configs/linux-i965 b/configs/linux-i965
new file mode 100644
index 00000000000..e66abc347bb
--- /dev/null
+++ b/configs/linux-i965
@@ -0,0 +1,8 @@
+# Configuration for standalone mode i965 debug
+
+include $(TOP)/configs/linux-debug
+
+CONFIG_NAME = linux-i965
+
+GALLIUM_DRIVER_DIRS = i965
+GALLIUM_WINSYS_DIRS = drm/i965/xlib
diff --git a/configure.ac b/configure.ac
index dd0c78547c4..d83dd4315b7 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1221,10 +1221,10 @@ AC_ARG_ENABLE([gallium-intel],
[enable_gallium_intel="$enableval"],
[enable_gallium_intel=auto])
if test "x$enable_gallium_intel" = xyes; then
- GALLIUM_WINSYS_DRM_DIRS="$GALLIUM_WINSYS_DRM_DIRS intel"
- GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS i915"
+ GALLIUM_WINSYS_DRM_DIRS="$GALLIUM_WINSYS_DRM_DIRS intel i965"
+ GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS i915 i965"
elif test "x$enable_gallium_intel" = xauto; then
- GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS i915"
+ GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS i915 i965"
fi
dnl
diff --git a/progs/fp/fp-tri.c b/progs/fp/fp-tri.c
index 26af66ad84e..ed29a2d683d 100644
--- a/progs/fp/fp-tri.c
+++ b/progs/fp/fp-tri.c
@@ -176,6 +176,17 @@ static void Init( void )
}
+ {
+ const float Ambient[4] = { 0.0, 1.0, 0.0, 0.0 };
+ const float Diffuse[4] = { 1.0, 0.0, 0.0, 0.0 };
+ const float Specular[4] = { 0.0, 0.0, 1.0, 0.0 };
+ const float Emission[4] = { 0.0, 0.0, 0.0, 1.0 };
+ glMaterialfv(GL_FRONT_AND_BACK, GL_AMBIENT, Ambient);
+ glMaterialfv(GL_FRONT_AND_BACK, GL_DIFFUSE, Diffuse);
+ glMaterialfv(GL_FRONT_AND_BACK, GL_SPECULAR, Specular);
+ glMaterialfv(GL_FRONT_AND_BACK, GL_EMISSION, Emission);
+ }
+
glClearColor(.1, .3, .5, 0);
}
diff --git a/progs/fp/mov-imm.txt b/progs/fp/mov-imm.txt
new file mode 100644
index 00000000000..38e48079d09
--- /dev/null
+++ b/progs/fp/mov-imm.txt
@@ -0,0 +1,3 @@
+!!ARBfp1.0
+MOV result.color, {0.5, 0.8, 0.3, 1.0};
+END
diff --git a/progs/fp/mov-param.txt b/progs/fp/mov-param.txt
new file mode 100644
index 00000000000..13d82fe00b8
--- /dev/null
+++ b/progs/fp/mov-param.txt
@@ -0,0 +1,4 @@
+!!ARBfp1.0
+PARAM Diffuse = state.material.diffuse;
+MOV result.color, Diffuse;
+END
diff --git a/progs/trivial/.gitignore b/progs/trivial/.gitignore
index 4d6e405c500..4317eb607fe 100644
--- a/progs/trivial/.gitignore
+++ b/progs/trivial/.gitignore
@@ -147,6 +147,7 @@ vbo-drawarrays
vbo-drawelements
vbo-drawrange
vbo-noninterleaved
+vbo-tri
vp-array
vp-array-int
vp-clip
diff --git a/progs/trivial/Makefile b/progs/trivial/Makefile
index 70728616d28..e15ec33ab59 100644
--- a/progs/trivial/Makefile
+++ b/progs/trivial/Makefile
@@ -153,6 +153,7 @@ SOURCES = \
tristrip-clip.c \
tristrip-flat.c \
tristrip.c \
+ vbo-tri.c \
vbo-drawarrays.c \
vbo-noninterleaved.c \
vbo-drawelements.c \
diff --git a/progs/trivial/tri-orig.c b/progs/trivial/tri-orig.c
index d86d34c39de..f86ac52a026 100644
--- a/progs/trivial/tri-orig.c
+++ b/progs/trivial/tri-orig.c
@@ -51,7 +51,7 @@ static void Reshape(int width, int height)
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
-/* glOrtho(-1.0, 1.0, -1.0, 1.0, -0.5, 1000.0); */
+ glOrtho(-1.0, 1.0, -1.0, 1.0, -0.5, 1000.0);
glMatrixMode(GL_MODELVIEW);
}
@@ -74,11 +74,11 @@ static void Draw(void)
glBegin(GL_TRIANGLES);
glColor3f(0,0,.7);
- glVertex3f( 0.9, -0.9, -0.0);
+ glVertex3f( 0.9, -0.9, -30.0);
glColor3f(.8,0,0);
- glVertex3f( 0.9, 0.9, -0.0);
+ glVertex3f( 0.9, 0.9, -30.0);
glColor3f(0,.9,0);
- glVertex3f(-0.9, 0.0, -0.0);
+ glVertex3f(-0.9, 0.0, -30.0);
glEnd();
glFlush();
@@ -119,7 +119,7 @@ int main(int argc, char **argv)
glutInitWindowPosition(0, 0); glutInitWindowSize( 250, 250);
- type = GLUT_RGB | GLUT_ALPHA;
+ type = GLUT_RGB;
type |= (doubleBuffer) ? GLUT_DOUBLE : GLUT_SINGLE;
glutInitDisplayMode(type);
diff --git a/progs/trivial/vbo-tri.c b/progs/trivial/vbo-tri.c
new file mode 100644
index 00000000000..d4cba14414c
--- /dev/null
+++ b/progs/trivial/vbo-tri.c
@@ -0,0 +1,131 @@
+/* Even simpler for many drivers than trivial/tri -- pass-through
+ * vertex shader and vertex data in a VBO.
+ */
+
+#include <assert.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <GL/glew.h>
+#include <GL/glut.h>
+
+
+struct {
+ GLfloat pos[4];
+ GLfloat color[4];
+} verts[] =
+{
+ { { -0.9, -0.9, 0.0, 1.0 },
+ {.8,0,0, 1},
+ },
+
+ { { 0.9, -0.9, 0.0, 1.0 },
+ { 0, .9, 0, 1 },
+ },
+
+ { { 0, 0.9, 0.0, 1.0 },
+ {0,0,.7, 1},
+ },
+};
+
+GLuint arrayObj;
+
+static void Init( void )
+{
+ GLint errno;
+ GLuint prognum;
+
+ static const char *prog1 =
+ "!!ARBvp1.0\n"
+ "MOV result.color, vertex.color;\n"
+ "MOV result.position, vertex.position;\n"
+ "END\n";
+
+
+ glGenProgramsARB(1, &prognum);
+
+ glBindProgramARB(GL_VERTEX_PROGRAM_ARB, prognum);
+ glProgramStringARB(GL_VERTEX_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB,
+ strlen(prog1), (const GLubyte *) prog1);
+
+ assert(glIsProgramARB(prognum));
+ errno = glGetError();
+ printf("glGetError = %d\n", errno);
+ if (errno != GL_NO_ERROR)
+ {
+ GLint errorpos;
+
+ glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &errorpos);
+ printf("errorpos: %d\n", errorpos);
+ printf("%s\n", (char *)glGetString(GL_PROGRAM_ERROR_STRING_ARB));
+ }
+
+
+ glEnableClientState( GL_VERTEX_ARRAY );
+ glEnableClientState( GL_COLOR_ARRAY );
+
+ glGenBuffersARB(1, &arrayObj);
+ glBindBufferARB(GL_ARRAY_BUFFER_ARB, arrayObj);
+ glBufferDataARB(GL_ARRAY_BUFFER_ARB, sizeof(verts), verts, GL_STATIC_DRAW_ARB);
+
+ glVertexPointer( 4, GL_FLOAT, sizeof(verts[0]), 0 );
+ glColorPointer( 4, GL_FLOAT, sizeof(verts[0]), (void *)(4*sizeof(float)) );
+}
+
+
+
+static void Display( void )
+{
+ glClearColor(0.3, 0.3, 0.3, 1);
+ glClear( GL_COLOR_BUFFER_BIT );
+
+ glEnable(GL_VERTEX_PROGRAM_NV);
+ glDrawArrays( GL_TRIANGLES, 0, 3 );
+
+ glutSwapBuffers();
+}
+
+
+static void Reshape( int width, int height )
+{
+ glViewport( 0, 0, width, height );
+ glMatrixMode( GL_PROJECTION );
+ glLoadIdentity();
+ glOrtho(-1.0, 1.0, -1.0, 1.0, -0.5, 1000.0);
+ glMatrixMode( GL_MODELVIEW );
+ glLoadIdentity();
+ /*glTranslatef( 0.0, 0.0, -15.0 );*/
+}
+
+
+static void Key( unsigned char key, int x, int y )
+{
+ (void) x;
+ (void) y;
+ switch (key) {
+ case 27:
+ exit(0);
+ break;
+ }
+ glutPostRedisplay();
+}
+
+
+
+
+int main( int argc, char *argv[] )
+{
+ glutInit( &argc, argv );
+ glutInitWindowPosition( 0, 0 );
+ glutInitWindowSize( 250, 250 );
+ glutInitDisplayMode( GLUT_RGB | GLUT_DOUBLE );
+ glutCreateWindow(argv[0]);
+ glewInit();
+ glutReshapeFunc( Reshape );
+ glutKeyboardFunc( Key );
+ glutDisplayFunc( Display );
+ Init();
+ glutMainLoop();
+ return 0;
+}
diff --git a/progs/vp/add-param-imm.txt b/progs/vp/add-param-imm.txt
new file mode 100644
index 00000000000..90bcf96528f
--- /dev/null
+++ b/progs/vp/add-param-imm.txt
@@ -0,0 +1,7 @@
+!!ARBvp1.0
+TEMP R0;
+PARAM Emission = state.material.emission;
+ADD R0, vertex.color, {-0.5}.x;
+ADD result.color, R0, Emission.w;
+MOV result.position, vertex.position;
+END
diff --git a/src/gallium/auxiliary/util/u_debug.c b/src/gallium/auxiliary/util/u_debug.c
index 27e0b0d1595..4e01123fff1 100644
--- a/src/gallium/auxiliary/util/u_debug.c
+++ b/src/gallium/auxiliary/util/u_debug.c
@@ -70,6 +70,7 @@
#include "util/u_stream.h"
#include "util/u_math.h"
#include "util/u_tile.h"
+#include "util/u_prim.h"
#ifdef PIPE_SUBSYSTEM_WINDOWS_DISPLAY
@@ -601,6 +602,32 @@ const char *pf_name( enum pipe_format format )
}
+
+static const struct debug_named_value pipe_prim_names[] = {
+#ifdef DEBUG
+ DEBUG_NAMED_VALUE(PIPE_PRIM_POINTS),
+ DEBUG_NAMED_VALUE(PIPE_PRIM_LINES),
+ DEBUG_NAMED_VALUE(PIPE_PRIM_LINE_LOOP),
+ DEBUG_NAMED_VALUE(PIPE_PRIM_LINE_STRIP),
+ DEBUG_NAMED_VALUE(PIPE_PRIM_TRIANGLES),
+ DEBUG_NAMED_VALUE(PIPE_PRIM_TRIANGLE_STRIP),
+ DEBUG_NAMED_VALUE(PIPE_PRIM_TRIANGLE_FAN),
+ DEBUG_NAMED_VALUE(PIPE_PRIM_QUADS),
+ DEBUG_NAMED_VALUE(PIPE_PRIM_QUAD_STRIP),
+ DEBUG_NAMED_VALUE(PIPE_PRIM_POLYGON),
+#endif
+ DEBUG_NAMED_VALUE_END
+};
+
+
+const char *u_prim_name( unsigned prim )
+{
+ return debug_dump_enum(pipe_prim_names, prim);
+}
+
+
+
+
#ifdef DEBUG
void debug_dump_image(const char *prefix,
unsigned format, unsigned cpp,
diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h
index b76592d1ec6..81aeb83cbb5 100644
--- a/src/gallium/auxiliary/util/u_math.h
+++ b/src/gallium/auxiliary/util/u_math.h
@@ -583,6 +583,19 @@ do { \
#endif
+static INLINE uint32_t util_unsigned_fixed(float value, unsigned frac_bits)
+{
+ value *= (1<<frac_bits);
+ return value < 0 ? 0 : value;
+}
+
+static INLINE int32_t util_signed_fixed(float value, unsigned frac_bits)
+{
+ return value * (1<<frac_bits);
+}
+
+
+
#ifdef __cplusplus
}
#endif
diff --git a/src/gallium/auxiliary/util/u_prim.h b/src/gallium/auxiliary/util/u_prim.h
index a9b533eea70..74343299623 100644
--- a/src/gallium/auxiliary/util/u_prim.h
+++ b/src/gallium/auxiliary/util/u_prim.h
@@ -135,4 +135,6 @@ static INLINE unsigned u_reduced_prim( unsigned pipe_prim )
}
}
+const char *u_prim_name( unsigned pipe_prim );
+
#endif
diff --git a/src/gallium/auxiliary/util/u_upload_mgr.h b/src/gallium/auxiliary/util/u_upload_mgr.h
index 745b5834af6..e158bed9d04 100644
--- a/src/gallium/auxiliary/util/u_upload_mgr.h
+++ b/src/gallium/auxiliary/util/u_upload_mgr.h
@@ -32,6 +32,8 @@
#ifndef U_UPLOAD_MGR_H
#define U_UPLOAD_MGR_H
+#include "pipe/p_defines.h"
+
struct pipe_screen;
struct pipe_buffer;
struct u_upload_mgr;
diff --git a/src/gallium/drivers/i965/Makefile b/src/gallium/drivers/i965/Makefile
new file mode 100644
index 00000000000..95fd3cd69bd
--- /dev/null
+++ b/src/gallium/drivers/i965/Makefile
@@ -0,0 +1,74 @@
+TOP = ../../../..
+include $(TOP)/configs/current
+
+LIBNAME = i965
+
+C_SOURCES = \
+ brw_cc.c \
+ brw_clip.c \
+ brw_clip_line.c \
+ brw_clip_point.c \
+ brw_clip_state.c \
+ brw_clip_tri.c \
+ brw_clip_unfilled.c \
+ brw_clip_util.c \
+ brw_context.c \
+ brw_curbe.c \
+ brw_disasm.c \
+ brw_draw.c \
+ brw_draw_upload.c \
+ brw_eu.c \
+ brw_eu_debug.c \
+ brw_eu_emit.c \
+ brw_eu_util.c \
+ brw_gs.c \
+ brw_gs_emit.c \
+ brw_gs_state.c \
+ brw_misc_state.c \
+ brw_pipe_blend.c \
+ brw_pipe_depth.c \
+ brw_pipe_fb.c \
+ brw_pipe_query.c \
+ brw_pipe_shader.c \
+ brw_pipe_flush.c \
+ brw_pipe_misc.c \
+ brw_pipe_sampler.c \
+ brw_pipe_vertex.c \
+ brw_pipe_clear.c \
+ brw_pipe_rast.c \
+ brw_sf.c \
+ brw_sf_emit.c \
+ brw_sf_state.c \
+ brw_state_batch.c \
+ brw_state_debug.c \
+ brw_state_cache.c \
+ brw_state_upload.c \
+ brw_structs_dump.c \
+ brw_swtnl.c \
+ brw_urb.c \
+ brw_util.c \
+ brw_vs.c \
+ brw_vs_emit.c \
+ brw_vs_state.c \
+ brw_vs_surface_state.c \
+ brw_wm.c \
+ brw_wm_debug.c \
+ brw_wm_emit.c \
+ brw_wm_fp.c \
+ brw_wm_iz.c \
+ brw_wm_pass0.c \
+ brw_wm_pass1.c \
+ brw_wm_pass2.c \
+ brw_wm_sampler_state.c \
+ brw_wm_state.c \
+ brw_wm_surface_state.c \
+ brw_screen.c \
+ brw_screen_buffers.c \
+ brw_screen_tex_layout.c \
+ brw_screen_texture.c \
+ brw_screen_surface.c \
+ brw_batchbuffer.c \
+ brw_winsys_debug.c \
+ intel_decode.c
+
+include ../../Makefile.template
diff --git a/src/gallium/drivers/i965/SConscript b/src/gallium/drivers/i965/SConscript
new file mode 100644
index 00000000000..9c2faaf4b49
--- /dev/null
+++ b/src/gallium/drivers/i965/SConscript
@@ -0,0 +1,77 @@
+Import('*')
+
+env = env.Clone()
+
+i965 = env.ConvenienceLibrary(
+ target = 'i965',
+ source = [
+ 'brw_batchbuffer.c',
+ 'brw_cc.c',
+ 'brw_clip.c',
+ 'brw_clip_line.c',
+ 'brw_clip_point.c',
+ 'brw_clip_state.c',
+ 'brw_clip_tri.c',
+ 'brw_clip_unfilled.c',
+ 'brw_clip_util.c',
+ 'brw_context.c',
+ 'brw_curbe.c',
+ 'brw_disasm.c',
+ 'brw_draw.c',
+ 'brw_draw_upload.c',
+ 'brw_eu.c',
+ 'brw_eu_debug.c',
+ 'brw_eu_emit.c',
+ 'brw_eu_util.c',
+ 'brw_gs.c',
+ 'brw_gs_emit.c',
+ 'brw_gs_state.c',
+ 'brw_misc_state.c',
+ 'brw_pipe_blend.c',
+ 'brw_pipe_clear.c',
+ 'brw_pipe_depth.c',
+ 'brw_pipe_fb.c',
+ 'brw_pipe_flush.c',
+ 'brw_pipe_misc.c',
+ 'brw_pipe_query.c',
+ 'brw_pipe_rast.c',
+ 'brw_pipe_sampler.c',
+ 'brw_pipe_shader.c',
+ 'brw_pipe_vertex.c',
+ 'brw_screen_buffers.c',
+ 'brw_screen.c',
+ 'brw_screen_surface.c',
+ 'brw_screen_tex_layout.c',
+ 'brw_screen_texture.c',
+ 'brw_structs_dump.c',
+ 'brw_sf.c',
+ 'brw_sf_emit.c',
+ 'brw_sf_state.c',
+ 'brw_state_batch.c',
+ 'brw_state_cache.c',
+# 'brw_state_debug.c',
+ 'brw_state_upload.c',
+ 'brw_swtnl.c',
+ 'brw_urb.c',
+ 'brw_util.c',
+ 'brw_vs.c',
+ 'brw_vs_emit.c',
+ 'brw_vs_state.c',
+ 'brw_vs_surface_state.c',
+ 'brw_wm.c',
+# 'brw_wm_constant_buffer.c',
+ 'brw_wm_debug.c',
+ 'brw_wm_emit.c',
+ 'brw_wm_fp.c',
+# 'brw_wm_glsl.c',
+ 'brw_wm_iz.c',
+ 'brw_wm_pass0.c',
+ 'brw_wm_pass1.c',
+ 'brw_wm_pass2.c',
+ 'brw_wm_sampler_state.c',
+ 'brw_wm_state.c',
+ 'brw_wm_surface_state.c',
+ 'intel_decode.c',
+ ])
+
+Export('i965')
diff --git a/src/gallium/drivers/i965/brw_batchbuffer.c b/src/gallium/drivers/i965/brw_batchbuffer.c
new file mode 100644
index 00000000000..22607dc6083
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_batchbuffer.c
@@ -0,0 +1,202 @@
+/**************************************************************************
+ *
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "util/u_memory.h"
+
+#include "brw_batchbuffer.h"
+#include "brw_reg.h"
+#include "brw_winsys.h"
+#include "brw_debug.h"
+#include "brw_structs.h"
+
+#define ALWAYS_EMIT_MI_FLUSH 1
+
+enum pipe_error
+brw_batchbuffer_reset(struct brw_batchbuffer *batch)
+{
+ enum pipe_error ret;
+
+ ret = batch->sws->bo_alloc( batch->sws,
+ BRW_BUFFER_TYPE_BATCH,
+ BRW_BATCH_SIZE, 4096,
+ &batch->buf );
+ if (ret)
+ return ret;
+
+ batch->size = BRW_BATCH_SIZE;
+
+ /* With map_range semantics, the winsys can decide whether to
+ * inject a malloc'ed bounce buffer instead of mapping directly.
+ */
+ batch->map = batch->sws->bo_map(batch->buf,
+ BRW_DATA_BATCH_BUFFER,
+ 0, batch->size,
+ GL_TRUE,
+ GL_TRUE,
+ GL_TRUE);
+
+ batch->ptr = batch->map;
+ return PIPE_OK;
+}
+
+struct brw_batchbuffer *
+brw_batchbuffer_alloc(struct brw_winsys_screen *sws,
+ struct brw_chipset chipset)
+{
+ struct brw_batchbuffer *batch = CALLOC_STRUCT(brw_batchbuffer);
+
+ batch->sws = sws;
+ batch->chipset = chipset;
+ brw_batchbuffer_reset(batch);
+
+ return batch;
+}
+
+void
+brw_batchbuffer_free(struct brw_batchbuffer *batch)
+{
+ if (batch->map) {
+ batch->sws->bo_unmap(batch->buf);
+ batch->map = NULL;
+ }
+
+ bo_reference(&batch->buf, NULL);
+ FREE(batch);
+}
+
+
+void
+_brw_batchbuffer_flush(struct brw_batchbuffer *batch,
+ const char *file,
+ int line)
+{
+ GLuint used = batch->ptr - batch->map;
+
+ if (used == 0)
+ return;
+
+ /* Post-swap throttling done by the state tracker.
+ */
+
+ if (BRW_DEBUG & DEBUG_BATCH)
+ debug_printf("%s:%d: Batchbuffer flush with %db used\n",
+ file, line, used);
+
+ if (ALWAYS_EMIT_MI_FLUSH) {
+ *(GLuint *) (batch->ptr) = MI_FLUSH | BRW_FLUSH_STATE_CACHE;
+ batch->ptr += 4;
+ used = batch->ptr - batch->map;
+ }
+
+ /* Round batchbuffer usage to 2 DWORDs.
+ */
+ if ((used & 4) == 0) {
+ *(GLuint *) (batch->ptr) = 0; /* noop */
+ batch->ptr += 4;
+ used = batch->ptr - batch->map;
+ }
+
+ /* Mark the end of the buffer.
+ */
+ *(GLuint *) (batch->ptr) = MI_BATCH_BUFFER_END;
+ batch->ptr += 4;
+ used = batch->ptr - batch->map;
+
+ batch->sws->bo_flush_range(batch->buf, 0, used);
+ batch->sws->bo_unmap(batch->buf);
+ batch->map = NULL;
+ batch->ptr = NULL;
+
+ batch->sws->bo_exec(batch->buf, used );
+
+ if (BRW_DEBUG & DEBUG_SYNC) {
+ /* Abuse map/unmap to achieve wait-for-fence.
+ *
+ * XXX: hide this inside the winsys and export a fence
+ * interface.
+ */
+ debug_printf("waiting for idle\n");
+ batch->sws->bo_wait_idle(batch->buf);
+ }
+
+ /* Reset the buffer:
+ */
+ brw_batchbuffer_reset(batch);
+}
+
+
+/* The OUT_RELOC() macro ends up here, generating a relocation within
+ * the batch buffer.
+ */
+enum pipe_error
+brw_batchbuffer_emit_reloc(struct brw_batchbuffer *batch,
+ struct brw_winsys_buffer *buffer,
+ uint32_t usage,
+ uint32_t delta)
+{
+ int ret;
+
+ if (batch->ptr - batch->map > batch->buf->size) {
+ debug_printf("bad relocation ptr %p map %p offset %d size %d\n",
+ batch->ptr, batch->map, batch->ptr - batch->map, batch->buf->size);
+
+ return PIPE_ERROR_OUT_OF_MEMORY;
+ }
+
+ ret = batch->sws->bo_emit_reloc(batch->buf,
+ usage,
+ delta,
+ batch->ptr - batch->map,
+ buffer);
+ if (ret != 0)
+ return ret;
+
+ /* bo_emit_reloc was resposible for writing a zero into the
+ * batchbuffer if necessary. Just need to update our pointer.
+ */
+ batch->ptr += 4;
+
+ return 0;
+}
+
+enum pipe_error
+brw_batchbuffer_data(struct brw_batchbuffer *batch,
+ const void *data, GLuint bytes,
+ enum cliprect_mode cliprect_mode)
+{
+ enum pipe_error ret;
+
+ assert((bytes & 3) == 0);
+
+ ret = brw_batchbuffer_require_space(batch, bytes);
+ if (ret)
+ return ret;
+
+ memcpy(batch->ptr, data, bytes);
+ batch->ptr += bytes;
+ return 0;
+}
diff --git a/src/gallium/drivers/i965/brw_batchbuffer.h b/src/gallium/drivers/i965/brw_batchbuffer.h
new file mode 100644
index 00000000000..7473f5bea4d
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_batchbuffer.h
@@ -0,0 +1,148 @@
+#ifndef BRW_BATCHBUFFER_H
+#define BRW_BATCHBUFFER_H
+
+#include "util/u_debug.h"
+
+#include "brw_types.h"
+#include "brw_winsys.h"
+#include "brw_reg.h"
+
+#define BATCH_SZ 16384
+#define BATCH_RESERVED 16
+
+/* All ignored:
+ */
+enum cliprect_mode {
+ IGNORE_CLIPRECTS,
+ LOOP_CLIPRECTS,
+ NO_LOOP_CLIPRECTS,
+ REFERENCES_CLIPRECTS
+};
+
+
+
+
+struct brw_batchbuffer {
+
+ struct brw_winsys_screen *sws;
+ struct brw_winsys_buffer *buf;
+ struct brw_chipset chipset;
+
+ /**
+ * Values exported to speed up the writing the batchbuffer,
+ * instead of having to go trough a accesor function for
+ * each dword written.
+ */
+ /*{@*/
+ uint8_t *map;
+ uint8_t *ptr;
+ size_t size;
+ struct {
+ uint8_t *end_ptr;
+ } emit;
+
+
+ size_t relocs;
+ size_t max_relocs;
+ /*@}*/
+};
+
+struct brw_batchbuffer *brw_batchbuffer_alloc( struct brw_winsys_screen *sws,
+ struct brw_chipset chipset );
+
+void brw_batchbuffer_free(struct brw_batchbuffer *batch);
+
+void _brw_batchbuffer_flush(struct brw_batchbuffer *batch,
+ const char *file, int line);
+
+
+enum pipe_error
+brw_batchbuffer_reset(struct brw_batchbuffer *batch);
+
+
+/* Unlike bmBufferData, this currently requires the buffer be mapped.
+ * Consider it a convenience function wrapping multple
+ * intel_buffer_dword() calls.
+ */
+int brw_batchbuffer_data(struct brw_batchbuffer *batch,
+ const void *data, GLuint bytes,
+ enum cliprect_mode cliprect_mode);
+
+
+int brw_batchbuffer_emit_reloc(struct brw_batchbuffer *batch,
+ struct brw_winsys_buffer *buffer,
+ enum brw_buffer_usage usage,
+ uint32_t offset);
+
+/* Inline functions - might actually be better off with these
+ * non-inlined. Certainly better off switching all command packets to
+ * be passed as structs rather than dwords, but that's a little bit of
+ * work...
+ */
+static INLINE GLint
+brw_batchbuffer_space(struct brw_batchbuffer *batch)
+{
+ return (batch->size - BATCH_RESERVED) - (batch->ptr - batch->map);
+}
+
+
+static INLINE void
+brw_batchbuffer_emit_dword(struct brw_batchbuffer *batch, GLuint dword)
+{
+ assert(batch->map);
+ assert(brw_batchbuffer_space(batch) >= 4);
+ *(GLuint *) (batch->ptr) = dword;
+ batch->ptr += 4;
+}
+
+static INLINE enum pipe_error
+brw_batchbuffer_require_space(struct brw_batchbuffer *batch,
+ GLuint sz)
+{
+ assert(sz < batch->size - 8);
+ if (brw_batchbuffer_space(batch) < sz) {
+ assert(0);
+ return PIPE_ERROR_OUT_OF_MEMORY;
+ }
+#ifdef DEBUG
+ batch->emit.end_ptr = batch->ptr + sz;
+#endif
+ return 0;
+}
+
+/* Here are the crusty old macros, to be removed:
+ */
+#define BEGIN_BATCH(n, cliprect_mode) do { \
+ brw_batchbuffer_require_space(brw->batch, (n)*4); \
+ } while (0)
+
+#define OUT_BATCH(d) brw_batchbuffer_emit_dword(brw->batch, d)
+
+#define OUT_RELOC(buf, usage, delta) do { \
+ assert((unsigned) (delta) < buf->size); \
+ brw_batchbuffer_emit_reloc(brw->batch, buf, \
+ usage, delta); \
+ } while (0)
+
+#ifdef DEBUG
+#define ADVANCE_BATCH() do { \
+ unsigned int _n = brw->batch->ptr - brw->batch->emit.end_ptr; \
+ if (_n != 0) { \
+ debug_printf("%s: %d too many bytes emitted to batch\n", \
+ __FUNCTION__, _n); \
+ abort(); \
+ } \
+ brw->batch->emit.end_ptr = NULL; \
+ } while(0)
+#else
+#define ADVANCE_BATCH()
+#endif
+
+static INLINE void
+brw_batchbuffer_emit_mi_flush(struct brw_batchbuffer *batch)
+{
+ brw_batchbuffer_require_space(batch, 4);
+ brw_batchbuffer_emit_dword(batch, MI_FLUSH);
+}
+
+#endif
diff --git a/src/gallium/drivers/i965/brw_cc.c b/src/gallium/drivers/i965/brw_cc.c
new file mode 100644
index 00000000000..3e070f5591a
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_cc.c
@@ -0,0 +1,111 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+
+
+static enum pipe_error prepare_cc_vp( struct brw_context *brw )
+{
+ return brw_cache_data( &brw->cache,
+ BRW_CC_VP,
+ &brw->curr.ccv,
+ NULL, 0,
+ &brw->cc.reloc[CC_RELOC_VP].bo );
+}
+
+const struct brw_tracked_state brw_cc_vp = {
+ .dirty = {
+ .mesa = PIPE_NEW_VIEWPORT,
+ .brw = BRW_NEW_CONTEXT,
+ .cache = 0
+ },
+ .prepare = prepare_cc_vp
+};
+
+
+/* A long-winded way to OR two unsigned integers together:
+ */
+static INLINE struct brw_cc3
+combine_cc3( struct brw_cc3 a, struct brw_cc3 b )
+{
+ union { struct brw_cc3 cc3; unsigned i; } ca, cb;
+ ca.cc3 = a;
+ cb.cc3 = b;
+ ca.i |= cb.i;
+ return ca.cc3;
+}
+
+
+static int prepare_cc_unit( struct brw_context *brw )
+{
+ brw->cc.cc.cc0 = brw->curr.zstencil->cc0;
+ brw->cc.cc.cc1 = brw->curr.zstencil->cc1;
+ brw->cc.cc.cc2 = brw->curr.zstencil->cc2;
+ brw->cc.cc.cc3 = combine_cc3( brw->curr.zstencil->cc3, brw->curr.blend->cc3 );
+
+ brw->cc.cc.cc5 = brw->curr.blend->cc5;
+ brw->cc.cc.cc6 = brw->curr.blend->cc6;
+ brw->cc.cc.cc7 = brw->curr.zstencil->cc7;
+
+ return brw_cache_data_sz(&brw->cache, BRW_CC_UNIT,
+ &brw->cc.cc, sizeof(brw->cc.cc),
+ brw->cc.reloc, 1,
+ &brw->cc.state_bo);
+}
+
+const struct brw_tracked_state brw_cc_unit = {
+ .dirty = {
+ .mesa = PIPE_NEW_DEPTH_STENCIL_ALPHA | PIPE_NEW_BLEND,
+ .brw = 0,
+ .cache = CACHE_NEW_CC_VP
+ },
+ .prepare = prepare_cc_unit,
+};
+
+
+void brw_hw_cc_init( struct brw_context *brw )
+{
+ make_reloc(&brw->cc.reloc[0],
+ BRW_USAGE_STATE,
+ 0,
+ offsetof(struct brw_cc_unit_state, cc4),
+ NULL);
+}
+
+
+void brw_hw_cc_cleanup( struct brw_context *brw )
+{
+ bo_reference(&brw->cc.state_bo, NULL);
+ bo_reference(&brw->cc.reloc[0].bo, NULL);
+}
diff --git a/src/gallium/drivers/i965/brw_clip.c b/src/gallium/drivers/i965/brw_clip.c
new file mode 100644
index 00000000000..58d9e56df27
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_clip.c
@@ -0,0 +1,224 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "pipe/p_state.h"
+
+#include "util/u_math.h"
+
+#include "brw_screen.h"
+#include "brw_batchbuffer.h"
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_state.h"
+#include "brw_pipe_rast.h"
+#include "brw_clip.h"
+
+
+#define FRONT_UNFILLED_BIT 0x1
+#define BACK_UNFILLED_BIT 0x2
+
+
+static enum pipe_error
+compile_clip_prog( struct brw_context *brw,
+ struct brw_clip_prog_key *key,
+ struct brw_winsys_buffer **bo_out )
+{
+ enum pipe_error ret;
+ struct brw_clip_compile c;
+ const GLuint *program;
+ GLuint program_size;
+ GLuint delta;
+
+ memset(&c, 0, sizeof(c));
+
+ /* Begin the compilation:
+ */
+ brw_init_compile(brw, &c.func);
+
+ c.func.single_program_flow = 1;
+
+ c.chipset = brw->chipset;
+ c.key = *key;
+ c.need_ff_sync = c.chipset.is_igdng;
+
+ /* Need to locate the two positions present in vertex + header.
+ * These are currently hardcoded:
+ */
+ c.header_position_offset = ATTR_SIZE;
+
+ if (c.chipset.is_igdng)
+ delta = 3 * REG_SIZE;
+ else
+ delta = REG_SIZE;
+
+ c.offset_hpos = delta + c.key.output_hpos * ATTR_SIZE;
+
+ if (c.key.output_color0)
+ c.offset_color0 = delta + c.key.output_color0 * ATTR_SIZE;
+
+ if (c.key.output_color1)
+ c.offset_color1 = delta + c.key.output_color1 * ATTR_SIZE;
+
+ if (c.key.output_bfc0)
+ c.offset_bfc0 = delta + c.key.output_bfc0 * ATTR_SIZE;
+
+ if (c.key.output_bfc1)
+ c.offset_bfc1 = delta + c.key.output_bfc1 * ATTR_SIZE;
+
+ if (c.key.output_edgeflag)
+ c.offset_edgeflag = delta + c.key.output_edgeflag * ATTR_SIZE;
+
+ if (BRW_IS_IGDNG(brw))
+ c.nr_regs = (c.key.nr_attrs + 1) / 2 + 3; /* are vertices packed, or reg-aligned? */
+ else
+ c.nr_regs = (c.key.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */
+
+ c.nr_bytes = c.nr_regs * REG_SIZE;
+
+ c.prog_data.clip_mode = c.key.clip_mode; /* XXX */
+
+ /* For some reason the thread is spawned with only 4 channels
+ * unmasked.
+ */
+ brw_set_mask_control(&c.func, BRW_MASK_DISABLE);
+
+
+ /* Would ideally have the option of producing a program which could
+ * do all three:
+ */
+ switch (key->primitive) {
+ case PIPE_PRIM_TRIANGLES:
+ if (key->do_unfilled)
+ brw_emit_unfilled_clip( &c );
+ else
+ brw_emit_tri_clip( &c );
+ break;
+ case PIPE_PRIM_LINES:
+ brw_emit_line_clip( &c );
+ break;
+ case PIPE_PRIM_POINTS:
+ brw_emit_point_clip( &c );
+ break;
+ default:
+ assert(0);
+ return PIPE_ERROR_BAD_INPUT;
+ }
+
+
+
+ /* get the program
+ */
+ ret = brw_get_program(&c.func, &program, &program_size);
+ if (ret)
+ return ret;
+
+ /* Upload
+ */
+ ret = brw_upload_cache( &brw->cache,
+ BRW_CLIP_PROG,
+ &c.key, sizeof(c.key),
+ NULL, 0,
+ program, program_size,
+ &c.prog_data,
+ &brw->clip.prog_data,
+ bo_out );
+ if (ret)
+ return ret;
+
+ return PIPE_OK;
+}
+
+/* Calculate interpolants for triangle and line rasterization.
+ */
+static enum pipe_error
+upload_clip_prog(struct brw_context *brw)
+{
+ const struct brw_vertex_shader *vs = brw->curr.vertex_shader;
+ struct brw_clip_prog_key key;
+ enum pipe_error ret;
+
+ /* Populate the key, starting from the almost-complete version from
+ * the rast state.
+ */
+
+ /* PIPE_NEW_RAST */
+ key = brw->curr.rast->clip_key;
+
+ /* BRW_NEW_REDUCED_PRIMITIVE */
+ key.primitive = brw->reduced_primitive;
+
+ /* XXX: if edgeflag is moved to a proper TGSI vs output, can remove
+ * dependency on CACHE_NEW_VS_PROG
+ */
+ /* CACHE_NEW_VS_PROG */
+ key.nr_attrs = brw->vs.prog_data->nr_outputs;
+ key.output_edgeflag = brw->vs.prog_data->output_edgeflag;
+
+ /* PIPE_NEW_VS */
+ key.output_hpos = vs->output_hpos;
+ key.output_color0 = vs->output_color0;
+ key.output_color1 = vs->output_color1;
+ key.output_bfc0 = vs->output_bfc0;
+ key.output_bfc1 = vs->output_bfc1;
+
+ /* PIPE_NEW_CLIP */
+ key.nr_userclip = brw->curr.ucp.nr;
+
+ /* Already cached?
+ */
+ if (brw_search_cache(&brw->cache, BRW_CLIP_PROG,
+ &key, sizeof(key),
+ NULL, 0,
+ &brw->clip.prog_data,
+ &brw->clip.prog_bo))
+ return PIPE_OK;
+
+ /* Compile new program:
+ */
+ ret = compile_clip_prog( brw, &key, &brw->clip.prog_bo );
+ if (ret)
+ return ret;
+
+ return PIPE_OK;
+}
+
+
+const struct brw_tracked_state brw_clip_prog = {
+ .dirty = {
+ .mesa = (PIPE_NEW_RAST |
+ PIPE_NEW_CLIP),
+ .brw = (BRW_NEW_REDUCED_PRIMITIVE),
+ .cache = CACHE_NEW_VS_PROG
+ },
+ .prepare = upload_clip_prog
+};
diff --git a/src/gallium/drivers/i965/brw_clip.h b/src/gallium/drivers/i965/brw_clip.h
new file mode 100644
index 00000000000..80e3a11a370
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_clip.h
@@ -0,0 +1,199 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef BRW_CLIP_H
+#define BRW_CLIP_H
+
+#include "pipe/p_state.h"
+#include "brw_reg.h"
+#include "brw_eu.h"
+
+#define MAX_VERTS (3+6+6)
+
+/* Note that if unfilled primitives are being emitted, we have to fix
+ * up polygon offset and flatshading at this point:
+ */
+struct brw_clip_prog_key {
+ GLuint nr_attrs:6;
+ GLuint primitive:4;
+ GLuint nr_userclip:3;
+ GLuint do_flat_shading:1;
+ GLuint do_unfilled:1;
+ GLuint fill_cw:2; /* includes cull information */
+ GLuint fill_ccw:2; /* includes cull information */
+ GLuint offset_cw:1;
+ GLuint offset_ccw:1;
+ GLuint copy_bfc_cw:1;
+ GLuint copy_bfc_ccw:1;
+ GLuint clip_mode:3;
+ GLuint output_hpos:6; /* not always zero? */
+
+ GLuint output_color0:6;
+ GLuint output_color1:6;
+ GLuint output_bfc0:6;
+ GLuint output_bfc1:6;
+ GLuint output_edgeflag:6;
+ GLuint pad1:2;
+
+ GLfloat offset_factor;
+ GLfloat offset_units;
+};
+
+struct brw_clip_prog_data {
+ GLuint curb_read_length; /* user planes? */
+ GLuint clip_mode;
+ GLuint urb_read_length;
+ GLuint total_grf;
+};
+
+#define CLIP_LINE 0
+#define CLIP_POINT 1
+#define CLIP_FILL 2
+#define CLIP_CULL 3
+
+
+#define PRIM_MASK (0x1f)
+
+struct brw_clip_compile {
+ struct brw_compile func;
+ struct brw_clip_prog_key key;
+ struct brw_clip_prog_data prog_data;
+
+ struct {
+ struct brw_reg R0;
+ struct brw_reg vertex[MAX_VERTS];
+
+ struct brw_reg t;
+ struct brw_reg t0, t1;
+ struct brw_reg dp0, dp1;
+
+ struct brw_reg dpPrev;
+ struct brw_reg dp;
+ struct brw_reg loopcount;
+ struct brw_reg nr_verts;
+ struct brw_reg planemask;
+
+ struct brw_reg inlist;
+ struct brw_reg outlist;
+ struct brw_reg freelist;
+
+ struct brw_reg dir;
+ struct brw_reg tmp0, tmp1;
+ struct brw_reg offset;
+
+ struct brw_reg fixed_planes;
+ struct brw_reg plane_equation;
+
+ struct brw_reg ff_sync;
+ } reg;
+
+ /* 3 different ways of expressing vertex size, including
+ * key.nr_attrs.
+ */
+ GLuint nr_regs;
+ GLuint nr_bytes;
+
+ GLuint first_tmp;
+ GLuint last_tmp;
+
+ GLboolean need_direction;
+ struct brw_chipset chipset;
+
+ GLuint last_mrf;
+
+ GLuint header_position_offset;
+ GLboolean need_ff_sync;
+
+ GLuint nr_color_attrs;
+ GLuint offset_color0;
+ GLuint offset_color1;
+ GLuint offset_bfc0;
+ GLuint offset_bfc1;
+
+ GLuint offset_hpos;
+ GLuint offset_edgeflag;
+};
+
+#define ATTR_SIZE (4*4)
+
+/* Points are only culled, so no need for a clip routine, however it
+ * works out easier to have a dummy one.
+ */
+void brw_emit_unfilled_clip( struct brw_clip_compile *c );
+void brw_emit_tri_clip( struct brw_clip_compile *c );
+void brw_emit_line_clip( struct brw_clip_compile *c );
+void brw_emit_point_clip( struct brw_clip_compile *c );
+
+/* brw_clip_tri.c, for use by the unfilled clip routine:
+ */
+void brw_clip_tri_init_vertices( struct brw_clip_compile *c );
+void brw_clip_tri_flat_shade( struct brw_clip_compile *c );
+void brw_clip_tri( struct brw_clip_compile *c );
+void brw_clip_tri_emit_polygon( struct brw_clip_compile *c );
+void brw_clip_tri_alloc_regs( struct brw_clip_compile *c,
+ GLuint nr_verts );
+
+
+/* Utils:
+ */
+
+void brw_clip_interp_vertex( struct brw_clip_compile *c,
+ struct brw_indirect dest_ptr,
+ struct brw_indirect v0_ptr, /* from */
+ struct brw_indirect v1_ptr, /* to */
+ struct brw_reg t0,
+ GLboolean force_edgeflag );
+
+void brw_clip_init_planes( struct brw_clip_compile *c );
+
+void brw_clip_emit_vue(struct brw_clip_compile *c,
+ struct brw_indirect vert,
+ GLboolean allocate,
+ GLboolean eot,
+ GLuint header);
+
+void brw_clip_kill_thread(struct brw_clip_compile *c);
+
+struct brw_reg brw_clip_plane_stride( struct brw_clip_compile *c );
+struct brw_reg brw_clip_plane0_address( struct brw_clip_compile *c );
+
+void brw_clip_copy_colors( struct brw_clip_compile *c,
+ GLuint to, GLuint from );
+
+void brw_clip_init_clipmask( struct brw_clip_compile *c );
+
+struct brw_reg get_tmp( struct brw_clip_compile *c );
+
+void brw_clip_project_position(struct brw_clip_compile *c,
+ struct brw_reg pos );
+void brw_clip_ff_sync(struct brw_clip_compile *c);
+void brw_clip_init_ff_sync(struct brw_clip_compile *c);
+#endif
diff --git a/src/gallium/drivers/i965/brw_clip_line.c b/src/gallium/drivers/i965/brw_clip_line.c
new file mode 100644
index 00000000000..54282d975ed
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_clip_line.c
@@ -0,0 +1,271 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "util/u_debug.h"
+
+#include "brw_defines.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_clip.h"
+
+
+
+
+static void brw_clip_line_alloc_regs( struct brw_clip_compile *c )
+{
+ GLuint i = 0,j;
+
+ /* Register usage is static, precompute here:
+ */
+ c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++;
+
+ if (c->key.nr_userclip) {
+ c->reg.fixed_planes = brw_vec4_grf(i, 0);
+ i += (6 + c->key.nr_userclip + 1) / 2;
+
+ c->prog_data.curb_read_length = (6 + c->key.nr_userclip + 1) / 2;
+ }
+ else
+ c->prog_data.curb_read_length = 0;
+
+
+ /* Payload vertices plus space for more generated vertices:
+ */
+ for (j = 0; j < 4; j++) {
+ c->reg.vertex[j] = brw_vec4_grf(i, 0);
+ i += c->nr_regs;
+ }
+
+ c->reg.t = brw_vec1_grf(i, 0);
+ c->reg.t0 = brw_vec1_grf(i, 1);
+ c->reg.t1 = brw_vec1_grf(i, 2);
+ c->reg.planemask = retype(brw_vec1_grf(i, 3), BRW_REGISTER_TYPE_UD);
+ c->reg.plane_equation = brw_vec4_grf(i, 4);
+ i++;
+
+ c->reg.dp0 = brw_vec1_grf(i, 0); /* fixme - dp4 will clobber r.1,2,3 */
+ c->reg.dp1 = brw_vec1_grf(i, 4);
+ i++;
+
+ if (!c->key.nr_userclip) {
+ c->reg.fixed_planes = brw_vec8_grf(i, 0);
+ i++;
+ }
+
+ if (c->need_ff_sync) {
+ c->reg.ff_sync = retype(brw_vec1_grf(i, 0), BRW_REGISTER_TYPE_UD);
+ i++;
+ }
+
+ c->first_tmp = i;
+ c->last_tmp = i;
+
+ c->prog_data.urb_read_length = c->nr_regs; /* ? */
+ c->prog_data.total_grf = i;
+}
+
+
+
+/* Line clipping, more or less following the following algorithm:
+ *
+ * for (p=0;p<MAX_PLANES;p++) {
+ * if (clipmask & (1 << p)) {
+ * GLfloat dp0 = DOTPROD( vtx0, plane[p] );
+ * GLfloat dp1 = DOTPROD( vtx1, plane[p] );
+ *
+ * if (IS_NEGATIVE(dp1)) {
+ * GLfloat t = dp1 / (dp1 - dp0);
+ * if (t > t1) t1 = t;
+ * } else {
+ * GLfloat t = dp0 / (dp0 - dp1);
+ * if (t > t0) t0 = t;
+ * }
+ *
+ * if (t0 + t1 >= 1.0)
+ * return;
+ * }
+ * }
+ *
+ * interp( ctx, newvtx0, vtx0, vtx1, t0 );
+ * interp( ctx, newvtx1, vtx1, vtx0, t1 );
+ *
+ */
+static void clip_and_emit_line( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_indirect vtx0 = brw_indirect(0, 0);
+ struct brw_indirect vtx1 = brw_indirect(1, 0);
+ struct brw_indirect newvtx0 = brw_indirect(2, 0);
+ struct brw_indirect newvtx1 = brw_indirect(3, 0);
+ struct brw_indirect plane_ptr = brw_indirect(4, 0);
+ struct brw_instruction *plane_loop;
+ struct brw_instruction *plane_active;
+ struct brw_instruction *is_negative;
+ struct brw_instruction *is_neg2 = NULL;
+ struct brw_instruction *not_culled;
+ struct brw_reg v1_null_ud = retype(vec1(brw_null_reg()), BRW_REGISTER_TYPE_UD);
+
+ brw_MOV(p, get_addr_reg(vtx0), brw_address(c->reg.vertex[0]));
+ brw_MOV(p, get_addr_reg(vtx1), brw_address(c->reg.vertex[1]));
+ brw_MOV(p, get_addr_reg(newvtx0), brw_address(c->reg.vertex[2]));
+ brw_MOV(p, get_addr_reg(newvtx1), brw_address(c->reg.vertex[3]));
+ brw_MOV(p, get_addr_reg(plane_ptr), brw_clip_plane0_address(c));
+
+ /* Note: init t0, t1 together:
+ */
+ brw_MOV(p, vec2(c->reg.t0), brw_imm_f(0));
+
+ brw_clip_init_planes(c);
+ brw_clip_init_clipmask(c);
+
+ /* -ve rhw workaround */
+ if (c->chipset.is_965) {
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2),
+ brw_imm_ud(1<<20));
+ brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(0x3f));
+ }
+
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+ plane_loop = brw_DO(p, BRW_EXECUTE_1);
+ {
+ /* if (planemask & 1)
+ */
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ brw_AND(p, v1_null_ud, c->reg.planemask, brw_imm_ud(1));
+
+ plane_active = brw_IF(p, BRW_EXECUTE_1);
+ {
+ if (c->key.nr_userclip)
+ brw_MOV(p, c->reg.plane_equation, deref_4f(plane_ptr, 0));
+ else
+ brw_MOV(p, c->reg.plane_equation, deref_4b(plane_ptr, 0));
+
+ /* dp = DP4(vtx->position, plane)
+ */
+ brw_DP4(p, vec4(c->reg.dp0), deref_4f(vtx0, c->offset_hpos), c->reg.plane_equation);
+
+ /* if (IS_NEGATIVE(dp1))
+ */
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_L);
+ brw_DP4(p, vec4(c->reg.dp1), deref_4f(vtx1, c->offset_hpos), c->reg.plane_equation);
+ is_negative = brw_IF(p, BRW_EXECUTE_1);
+ {
+ /*
+ * Both can be negative on GM965/G965 due to RHW workaround
+ * if so, this object should be rejected.
+ */
+ if (c->chipset.is_965) {
+ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_LE, c->reg.dp0, brw_imm_f(0.0));
+ is_neg2 = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_clip_kill_thread(c);
+ }
+ brw_ENDIF(p, is_neg2);
+ }
+
+ brw_ADD(p, c->reg.t, c->reg.dp1, negate(c->reg.dp0));
+ brw_math_invert(p, c->reg.t, c->reg.t);
+ brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp1);
+
+ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_G, c->reg.t, c->reg.t1 );
+ brw_MOV(p, c->reg.t1, c->reg.t);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ }
+ is_negative = brw_ELSE(p, is_negative);
+ {
+ /* Coming back in. We know that both cannot be negative
+ * because the line would have been culled in that case.
+ */
+
+ /* If both are positive, do nothing */
+ /* Only on GM965/G965 */
+ if (c->chipset.is_965) {
+ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.dp0, brw_imm_f(0.0));
+ is_neg2 = brw_IF(p, BRW_EXECUTE_1);
+ }
+
+ {
+ brw_ADD(p, c->reg.t, c->reg.dp0, negate(c->reg.dp1));
+ brw_math_invert(p, c->reg.t, c->reg.t);
+ brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp0);
+
+ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_G, c->reg.t, c->reg.t0 );
+ brw_MOV(p, c->reg.t0, c->reg.t);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ }
+
+ if (c->chipset.is_965) {
+ brw_ENDIF(p, is_neg2);
+ }
+ }
+ brw_ENDIF(p, is_negative);
+ }
+ brw_ENDIF(p, plane_active);
+
+ /* plane_ptr++;
+ */
+ brw_ADD(p, get_addr_reg(plane_ptr), get_addr_reg(plane_ptr), brw_clip_plane_stride(c));
+
+ /* while (planemask>>=1) != 0
+ */
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ brw_SHR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(1));
+ }
+ brw_WHILE(p, plane_loop);
+
+ brw_ADD(p, c->reg.t, c->reg.t0, c->reg.t1);
+ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.t, brw_imm_f(1.0));
+ not_culled = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_clip_interp_vertex(c, newvtx0, vtx0, vtx1, c->reg.t0, FALSE);
+ brw_clip_interp_vertex(c, newvtx1, vtx1, vtx0, c->reg.t1, FALSE);
+
+ brw_clip_emit_vue(c, newvtx0, 1, 0, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_START);
+ brw_clip_emit_vue(c, newvtx1, 0, 1, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_END);
+ }
+ brw_ENDIF(p, not_culled);
+ brw_clip_kill_thread(c);
+}
+
+
+
+void brw_emit_line_clip( struct brw_clip_compile *c )
+{
+ brw_clip_line_alloc_regs(c);
+ brw_clip_init_ff_sync(c);
+
+ if (c->key.do_flat_shading)
+ brw_clip_copy_colors(c, 0, 1);
+
+ clip_and_emit_line(c);
+}
diff --git a/src/gallium/drivers/i965/brw_clip_point.c b/src/gallium/drivers/i965/brw_clip_point.c
new file mode 100644
index 00000000000..e0a5330556d
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_clip_point.c
@@ -0,0 +1,48 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "brw_defines.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_clip.h"
+
+
+/* Point clipping, nothing to do?
+ */
+void brw_emit_point_clip( struct brw_clip_compile *c )
+{
+ /* Send an empty message to kill the thread:
+ */
+ brw_clip_tri_alloc_regs(c, 0);
+ brw_clip_init_ff_sync(c);
+
+ brw_clip_kill_thread(c);
+}
diff --git a/src/gallium/drivers/i965/brw_clip_state.c b/src/gallium/drivers/i965/brw_clip_state.c
new file mode 100644
index 00000000000..5c3ccfd8d0d
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_clip_state.c
@@ -0,0 +1,209 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "util/u_math.h"
+
+#include "brw_context.h"
+#include "brw_clip.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "brw_debug.h"
+
+struct brw_clip_unit_key {
+ unsigned int total_grf;
+ unsigned int urb_entry_read_length;
+ unsigned int curb_entry_read_length;
+ unsigned int clip_mode;
+
+ unsigned int curbe_offset;
+
+ unsigned int nr_urb_entries, urb_size;
+
+ GLboolean depth_clamp;
+};
+
+static void
+clip_unit_populate_key(struct brw_context *brw, struct brw_clip_unit_key *key)
+{
+ memset(key, 0, sizeof(*key));
+
+ /* CACHE_NEW_CLIP_PROG */
+ key->total_grf = brw->clip.prog_data->total_grf;
+ key->urb_entry_read_length = brw->clip.prog_data->urb_read_length;
+ key->curb_entry_read_length = brw->clip.prog_data->curb_read_length;
+ key->clip_mode = brw->clip.prog_data->clip_mode;
+
+ /* BRW_NEW_CURBE_OFFSETS */
+ key->curbe_offset = brw->curbe.clip_start;
+
+ /* BRW_NEW_URB_FENCE */
+ key->nr_urb_entries = brw->urb.nr_clip_entries;
+ key->urb_size = brw->urb.vsize;
+
+ /* */
+ key->depth_clamp = 0; /* XXX: add this to gallium: ctx->Transform.DepthClamp; */
+}
+
+static enum pipe_error
+clip_unit_create_from_key(struct brw_context *brw,
+ struct brw_clip_unit_key *key,
+ struct brw_winsys_reloc *reloc,
+ struct brw_winsys_buffer **bo_out)
+{
+ struct brw_clip_unit_state clip;
+ enum pipe_error ret;
+
+ memset(&clip, 0, sizeof(clip));
+
+ clip.thread0.grf_reg_count = align(key->total_grf, 16) / 16 - 1;
+ /* reloc */
+ clip.thread0.kernel_start_pointer = 0;
+
+ clip.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
+ clip.thread1.single_program_flow = 1;
+
+ clip.thread3.urb_entry_read_length = key->urb_entry_read_length;
+ clip.thread3.const_urb_entry_read_length = key->curb_entry_read_length;
+ clip.thread3.const_urb_entry_read_offset = key->curbe_offset * 2;
+ clip.thread3.dispatch_grf_start_reg = 1;
+ clip.thread3.urb_entry_read_offset = 0;
+
+ clip.thread4.nr_urb_entries = key->nr_urb_entries;
+ clip.thread4.urb_entry_allocation_size = key->urb_size - 1;
+ /* If we have enough clip URB entries to run two threads, do so.
+ */
+ if (key->nr_urb_entries >= 10) {
+ /* Half of the URB entries go to each thread, and it has to be an
+ * even number.
+ */
+ assert(key->nr_urb_entries % 2 == 0);
+
+ /* Although up to 16 concurrent Clip threads are allowed on IGDNG,
+ * only 2 threads can output VUEs at a time.
+ */
+ if (BRW_IS_IGDNG(brw))
+ clip.thread4.max_threads = 16 - 1;
+ else
+ clip.thread4.max_threads = 2 - 1;
+ } else {
+ assert(key->nr_urb_entries >= 5);
+ clip.thread4.max_threads = 1 - 1;
+ }
+
+ if (BRW_DEBUG & DEBUG_SINGLE_THREAD)
+ clip.thread4.max_threads = 0;
+
+ if (BRW_DEBUG & DEBUG_STATS)
+ clip.thread4.stats_enable = 1;
+
+ clip.clip5.userclip_enable_flags = 0x7f;
+ clip.clip5.userclip_must_clip = 1;
+ clip.clip5.guard_band_enable = 0;
+ if (!key->depth_clamp)
+ clip.clip5.viewport_z_clip_enable = 1;
+ clip.clip5.viewport_xy_clip_enable = 1;
+ clip.clip5.vertex_position_space = BRW_CLIP_NDCSPACE;
+ clip.clip5.api_mode = BRW_CLIP_API_OGL;
+ clip.clip5.clip_mode = key->clip_mode;
+
+ if (BRW_IS_G4X(brw))
+ clip.clip5.negative_w_clip_test = 1;
+
+ clip.clip6.clipper_viewport_state_ptr = 0;
+ clip.viewport_xmin = -1;
+ clip.viewport_xmax = 1;
+ clip.viewport_ymin = -1;
+ clip.viewport_ymax = 1;
+
+ ret = brw_upload_cache(&brw->cache, BRW_CLIP_UNIT,
+ key, sizeof(*key),
+ reloc, 1,
+ &clip, sizeof(clip),
+ NULL, NULL,
+ bo_out);
+ if (ret)
+ return ret;
+
+ return PIPE_OK;
+}
+
+static int upload_clip_unit( struct brw_context *brw )
+{
+ struct brw_clip_unit_key key;
+ struct brw_winsys_reloc reloc[1];
+ unsigned grf_reg_count;
+ enum pipe_error ret;
+
+ clip_unit_populate_key(brw, &key);
+
+ grf_reg_count = align(key.total_grf, 16) / 16 - 1;
+
+ /* clip program relocation
+ *
+ * XXX: these reloc structs are long lived and only need to be
+ * updated when the bound BO changes. Hopefully the stuff mixed in
+ * in the delta's is non-orthogonal.
+ */
+ assert(brw->clip.prog_bo);
+ make_reloc(&reloc[0],
+ BRW_USAGE_STATE,
+ grf_reg_count << 1,
+ offsetof(struct brw_clip_unit_state, thread0),
+ brw->clip.prog_bo);
+
+
+ if (brw_search_cache(&brw->cache, BRW_CLIP_UNIT,
+ &key, sizeof(key),
+ reloc, 1,
+ NULL,
+ &brw->clip.state_bo))
+ return PIPE_OK;
+
+ /* Create new:
+ */
+ ret = clip_unit_create_from_key(brw, &key,
+ reloc,
+ &brw->clip.state_bo);
+ if (ret)
+ return ret;
+
+ return PIPE_OK;
+}
+
+const struct brw_tracked_state brw_clip_unit = {
+ .dirty = {
+ .mesa = 0,
+ .brw = (BRW_NEW_CURBE_OFFSETS |
+ BRW_NEW_URB_FENCE),
+ .cache = CACHE_NEW_CLIP_PROG
+ },
+ .prepare = upload_clip_unit,
+};
diff --git a/src/gallium/drivers/i965/brw_clip_tri.c b/src/gallium/drivers/i965/brw_clip_tri.c
new file mode 100644
index 00000000000..4cde7294ea0
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_clip_tri.c
@@ -0,0 +1,595 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "brw_defines.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_clip.h"
+
+static void release_tmps( struct brw_clip_compile *c )
+{
+ c->last_tmp = c->first_tmp;
+}
+
+
+void brw_clip_tri_alloc_regs( struct brw_clip_compile *c,
+ GLuint nr_verts )
+{
+ GLuint i = 0,j;
+
+ /* Register usage is static, precompute here:
+ */
+ c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++;
+
+ if (c->key.nr_userclip) {
+ c->reg.fixed_planes = brw_vec4_grf(i, 0);
+ i += (6 + c->key.nr_userclip + 1) / 2;
+
+ c->prog_data.curb_read_length = (6 + c->key.nr_userclip + 1) / 2;
+ }
+ else
+ c->prog_data.curb_read_length = 0;
+
+
+ /* Payload vertices plus space for more generated vertices:
+ */
+ for (j = 0; j < nr_verts; j++) {
+ c->reg.vertex[j] = brw_vec4_grf(i, 0);
+ i += c->nr_regs;
+ }
+
+ if (c->key.nr_attrs & 1) {
+ for (j = 0; j < 3; j++) {
+ GLuint delta = c->key.nr_attrs*16 + 32;
+
+ if (c->chipset.is_igdng)
+ delta = c->key.nr_attrs * 16 + 32 * 3;
+
+ brw_MOV(&c->func, byte_offset(c->reg.vertex[j], delta), brw_imm_f(0));
+ }
+ }
+
+ c->reg.t = brw_vec1_grf(i, 0);
+ c->reg.loopcount = retype(brw_vec1_grf(i, 1), BRW_REGISTER_TYPE_D);
+ c->reg.nr_verts = retype(brw_vec1_grf(i, 2), BRW_REGISTER_TYPE_UD);
+ c->reg.planemask = retype(brw_vec1_grf(i, 3), BRW_REGISTER_TYPE_UD);
+ c->reg.plane_equation = brw_vec4_grf(i, 4);
+ i++;
+
+ c->reg.dpPrev = brw_vec1_grf(i, 0); /* fixme - dp4 will clobber r.1,2,3 */
+ c->reg.dp = brw_vec1_grf(i, 4);
+ i++;
+
+ c->reg.inlist = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0);
+ i++;
+
+ c->reg.outlist = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0);
+ i++;
+
+ c->reg.freelist = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0);
+ i++;
+
+ if (!c->key.nr_userclip) {
+ c->reg.fixed_planes = brw_vec8_grf(i, 0);
+ i++;
+ }
+
+ if (c->key.do_unfilled) {
+ c->reg.dir = brw_vec4_grf(i, 0);
+ c->reg.offset = brw_vec4_grf(i, 4);
+ i++;
+ c->reg.tmp0 = brw_vec4_grf(i, 0);
+ c->reg.tmp1 = brw_vec4_grf(i, 4);
+ i++;
+ }
+
+ if (c->need_ff_sync) {
+ c->reg.ff_sync = retype(brw_vec1_grf(i, 0), BRW_REGISTER_TYPE_UD);
+ i++;
+ }
+
+ c->first_tmp = i;
+ c->last_tmp = i;
+
+ c->prog_data.urb_read_length = c->nr_regs; /* ? */
+ c->prog_data.total_grf = i;
+}
+
+
+
+void brw_clip_tri_init_vertices( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */
+ struct brw_instruction *is_rev;
+
+ /* Initial list of indices for incoming vertexes:
+ */
+ brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK));
+ brw_CMP(p,
+ vec1(brw_null_reg()),
+ BRW_CONDITIONAL_EQ,
+ tmp0,
+ brw_imm_ud(_3DPRIM_TRISTRIP_REVERSE));
+
+ /* XXX: Is there an easier way to do this? Need to reverse every
+ * second tristrip element: Can ignore sometimes?
+ */
+ is_rev = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_MOV(p, get_element(c->reg.inlist, 0), brw_address(c->reg.vertex[1]) );
+ brw_MOV(p, get_element(c->reg.inlist, 1), brw_address(c->reg.vertex[0]) );
+ if (c->need_direction)
+ brw_MOV(p, c->reg.dir, brw_imm_f(-1));
+ }
+ is_rev = brw_ELSE(p, is_rev);
+ {
+ brw_MOV(p, get_element(c->reg.inlist, 0), brw_address(c->reg.vertex[0]) );
+ brw_MOV(p, get_element(c->reg.inlist, 1), brw_address(c->reg.vertex[1]) );
+ if (c->need_direction)
+ brw_MOV(p, c->reg.dir, brw_imm_f(1));
+ }
+ brw_ENDIF(p, is_rev);
+
+ brw_MOV(p, get_element(c->reg.inlist, 2), brw_address(c->reg.vertex[2]) );
+ brw_MOV(p, brw_vec8_grf(c->reg.outlist.nr, 0), brw_imm_f(0));
+ brw_MOV(p, c->reg.nr_verts, brw_imm_ud(3));
+}
+
+
+
+void brw_clip_tri_flat_shade( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *is_poly;
+ struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */
+
+ brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK));
+ brw_CMP(p,
+ vec1(brw_null_reg()),
+ BRW_CONDITIONAL_EQ,
+ tmp0,
+ brw_imm_ud(_3DPRIM_POLYGON));
+
+ is_poly = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_clip_copy_colors(c, 1, 0);
+ brw_clip_copy_colors(c, 2, 0);
+ }
+ is_poly = brw_ELSE(p, is_poly);
+ {
+ brw_clip_copy_colors(c, 0, 2);
+ brw_clip_copy_colors(c, 1, 2);
+ }
+ brw_ENDIF(p, is_poly);
+}
+
+
+
+/* Use mesa's clipping algorithms, translated to GEN4 assembly.
+ */
+void brw_clip_tri( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_indirect vtx = brw_indirect(0, 0);
+ struct brw_indirect vtxPrev = brw_indirect(1, 0);
+ struct brw_indirect vtxOut = brw_indirect(2, 0);
+ struct brw_indirect plane_ptr = brw_indirect(3, 0);
+ struct brw_indirect inlist_ptr = brw_indirect(4, 0);
+ struct brw_indirect outlist_ptr = brw_indirect(5, 0);
+ struct brw_indirect freelist_ptr = brw_indirect(6, 0);
+ struct brw_instruction *plane_loop;
+ struct brw_instruction *plane_active;
+ struct brw_instruction *vertex_loop;
+ struct brw_instruction *next_test;
+ struct brw_instruction *prev_test;
+
+ brw_MOV(p, get_addr_reg(vtxPrev), brw_address(c->reg.vertex[2]) );
+ brw_MOV(p, get_addr_reg(plane_ptr), brw_clip_plane0_address(c));
+ brw_MOV(p, get_addr_reg(inlist_ptr), brw_address(c->reg.inlist));
+ brw_MOV(p, get_addr_reg(outlist_ptr), brw_address(c->reg.outlist));
+
+ brw_MOV(p, get_addr_reg(freelist_ptr), brw_address(c->reg.vertex[3]) );
+
+ plane_loop = brw_DO(p, BRW_EXECUTE_1);
+ {
+ /* if (planemask & 1)
+ */
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ brw_AND(p, vec1(brw_null_reg()), c->reg.planemask, brw_imm_ud(1));
+
+ plane_active = brw_IF(p, BRW_EXECUTE_1);
+ {
+ /* vtxOut = freelist_ptr++
+ */
+ brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(freelist_ptr) );
+ brw_ADD(p, get_addr_reg(freelist_ptr), get_addr_reg(freelist_ptr), brw_imm_uw(c->nr_regs * REG_SIZE));
+
+ if (c->key.nr_userclip)
+ brw_MOV(p, c->reg.plane_equation, deref_4f(plane_ptr, 0));
+ else
+ brw_MOV(p, c->reg.plane_equation, deref_4b(plane_ptr, 0));
+
+ brw_MOV(p, c->reg.loopcount, c->reg.nr_verts);
+ brw_MOV(p, c->reg.nr_verts, brw_imm_ud(0));
+
+ vertex_loop = brw_DO(p, BRW_EXECUTE_1);
+ {
+ /* vtx = *input_ptr;
+ */
+ brw_MOV(p, get_addr_reg(vtx), deref_1uw(inlist_ptr, 0));
+
+ /* IS_NEGATIVE(prev) */
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_L);
+ brw_DP4(p, vec4(c->reg.dpPrev), deref_4f(vtxPrev, c->offset_hpos), c->reg.plane_equation);
+ prev_test = brw_IF(p, BRW_EXECUTE_1);
+ {
+ /* IS_POSITIVE(next)
+ */
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_GE);
+ brw_DP4(p, vec4(c->reg.dp), deref_4f(vtx, c->offset_hpos), c->reg.plane_equation);
+ next_test = brw_IF(p, BRW_EXECUTE_1);
+ {
+
+ /* Coming back in.
+ */
+ brw_ADD(p, c->reg.t, c->reg.dpPrev, negate(c->reg.dp));
+ brw_math_invert(p, c->reg.t, c->reg.t);
+ brw_MUL(p, c->reg.t, c->reg.t, c->reg.dpPrev);
+
+ /* If (vtxOut == 0) vtxOut = vtxPrev
+ */
+ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_EQ, get_addr_reg(vtxOut), brw_imm_uw(0) );
+ brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(vtxPrev) );
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+ brw_clip_interp_vertex(c, vtxOut, vtxPrev, vtx, c->reg.t, GL_FALSE);
+
+ /* *outlist_ptr++ = vtxOut;
+ * nr_verts++;
+ * vtxOut = 0;
+ */
+ brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxOut));
+ brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short)));
+ brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1));
+ brw_MOV(p, get_addr_reg(vtxOut), brw_imm_uw(0) );
+ }
+ brw_ENDIF(p, next_test);
+
+ }
+ prev_test = brw_ELSE(p, prev_test);
+ {
+ /* *outlist_ptr++ = vtxPrev;
+ * nr_verts++;
+ */
+ brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxPrev));
+ brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short)));
+ brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1));
+
+ /* IS_NEGATIVE(next)
+ */
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_L);
+ brw_DP4(p, vec4(c->reg.dp), deref_4f(vtx, c->offset_hpos), c->reg.plane_equation);
+ next_test = brw_IF(p, BRW_EXECUTE_1);
+ {
+ /* Going out of bounds. Avoid division by zero as we
+ * know dp != dpPrev from DIFFERENT_SIGNS, above.
+ */
+ brw_ADD(p, c->reg.t, c->reg.dp, negate(c->reg.dpPrev));
+ brw_math_invert(p, c->reg.t, c->reg.t);
+ brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp);
+
+ /* If (vtxOut == 0) vtxOut = vtx
+ */
+ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_EQ, get_addr_reg(vtxOut), brw_imm_uw(0) );
+ brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(vtx) );
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+ brw_clip_interp_vertex(c, vtxOut, vtx, vtxPrev, c->reg.t, GL_TRUE);
+
+ /* *outlist_ptr++ = vtxOut;
+ * nr_verts++;
+ * vtxOut = 0;
+ */
+ brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxOut));
+ brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short)));
+ brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1));
+ brw_MOV(p, get_addr_reg(vtxOut), brw_imm_uw(0) );
+ }
+ brw_ENDIF(p, next_test);
+ }
+ brw_ENDIF(p, prev_test);
+
+ /* vtxPrev = vtx;
+ * inlist_ptr++;
+ */
+ brw_MOV(p, get_addr_reg(vtxPrev), get_addr_reg(vtx));
+ brw_ADD(p, get_addr_reg(inlist_ptr), get_addr_reg(inlist_ptr), brw_imm_uw(sizeof(short)));
+
+ /* while (--loopcount != 0)
+ */
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1));
+ }
+ brw_WHILE(p, vertex_loop);
+
+ /* vtxPrev = *(outlist_ptr-1) OR: outlist[nr_verts-1]
+ * inlist = outlist
+ * inlist_ptr = &inlist[0]
+ * outlist_ptr = &outlist[0]
+ */
+ brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_w(-2));
+ brw_MOV(p, get_addr_reg(vtxPrev), deref_1uw(outlist_ptr, 0));
+ brw_MOV(p, brw_vec8_grf(c->reg.inlist.nr, 0), brw_vec8_grf(c->reg.outlist.nr, 0));
+ brw_MOV(p, get_addr_reg(inlist_ptr), brw_address(c->reg.inlist));
+ brw_MOV(p, get_addr_reg(outlist_ptr), brw_address(c->reg.outlist));
+ }
+ brw_ENDIF(p, plane_active);
+
+ /* plane_ptr++;
+ */
+ brw_ADD(p, get_addr_reg(plane_ptr), get_addr_reg(plane_ptr), brw_clip_plane_stride(c));
+
+ /* nr_verts >= 3
+ */
+ brw_CMP(p,
+ vec1(brw_null_reg()),
+ BRW_CONDITIONAL_GE,
+ c->reg.nr_verts,
+ brw_imm_ud(3));
+
+ /* && (planemask>>=1) != 0
+ */
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ brw_SHR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(1));
+ }
+ brw_WHILE(p, plane_loop);
+}
+
+
+
+void brw_clip_tri_emit_polygon(struct brw_clip_compile *c)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *loop, *if_insn;
+
+ /* for (loopcount = nr_verts-2; loopcount > 0; loopcount--)
+ */
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_G);
+ brw_ADD(p,
+ c->reg.loopcount,
+ c->reg.nr_verts,
+ brw_imm_d(-2));
+
+ if_insn = brw_IF(p, BRW_EXECUTE_1);
+ {
+ struct brw_indirect v0 = brw_indirect(0, 0);
+ struct brw_indirect vptr = brw_indirect(1, 0);
+
+ brw_MOV(p, get_addr_reg(vptr), brw_address(c->reg.inlist));
+ brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0));
+
+ brw_clip_emit_vue(c, v0, 1, 0, ((_3DPRIM_TRIFAN << 2) | R02_PRIM_START));
+
+ brw_ADD(p, get_addr_reg(vptr), get_addr_reg(vptr), brw_imm_uw(2));
+ brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0));
+
+ loop = brw_DO(p, BRW_EXECUTE_1);
+ {
+ brw_clip_emit_vue(c, v0, 1, 0, (_3DPRIM_TRIFAN << 2));
+
+ brw_ADD(p, get_addr_reg(vptr), get_addr_reg(vptr), brw_imm_uw(2));
+ brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0));
+
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1));
+ }
+ brw_WHILE(p, loop);
+
+ brw_clip_emit_vue(c, v0, 0, 1, ((_3DPRIM_TRIFAN << 2) | R02_PRIM_END));
+ }
+ brw_ENDIF(p, if_insn);
+}
+
+static void do_clip_tri( struct brw_clip_compile *c )
+{
+ brw_clip_init_planes(c);
+
+ brw_clip_tri(c);
+}
+
+
+static void maybe_do_clip_tri( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *do_clip;
+
+ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, c->reg.planemask, brw_imm_ud(0));
+ do_clip = brw_IF(p, BRW_EXECUTE_1);
+ {
+ do_clip_tri(c);
+ }
+ brw_ENDIF(p, do_clip);
+}
+
+static void brw_clip_test( struct brw_clip_compile *c )
+{
+ struct brw_reg t = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
+ struct brw_reg t1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
+ struct brw_reg t2 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
+ struct brw_reg t3 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
+
+ struct brw_reg v0 = get_tmp(c);
+ struct brw_reg v1 = get_tmp(c);
+ struct brw_reg v2 = get_tmp(c);
+
+ struct brw_indirect vt0 = brw_indirect(0, 0);
+ struct brw_indirect vt1 = brw_indirect(1, 0);
+ struct brw_indirect vt2 = brw_indirect(2, 0);
+
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *is_outside;
+ struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */
+
+ brw_MOV(p, get_addr_reg(vt0), brw_address(c->reg.vertex[0]));
+ brw_MOV(p, get_addr_reg(vt1), brw_address(c->reg.vertex[1]));
+ brw_MOV(p, get_addr_reg(vt2), brw_address(c->reg.vertex[2]));
+ brw_MOV(p, v0, deref_4f(vt0, c->offset_hpos));
+ brw_MOV(p, v1, deref_4f(vt1, c->offset_hpos));
+ brw_MOV(p, v2, deref_4f(vt2, c->offset_hpos));
+ brw_AND(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(~0x3f));
+
+ /* test nearz, xmin, ymin plane */
+ /* clip.xyz < -clip.w */
+ brw_CMP(p, t1, BRW_CONDITIONAL_L, v0, negate(get_element(v0, 3)));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_CMP(p, t2, BRW_CONDITIONAL_L, v1, negate(get_element(v1, 3)));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_CMP(p, t3, BRW_CONDITIONAL_L, v2, negate(get_element(v2, 3)));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+ /* All vertices are outside of a plane, rejected */
+ brw_AND(p, t, t1, t2);
+ brw_AND(p, t, t, t3);
+ brw_OR(p, tmp0, get_element(t, 0), get_element(t, 1));
+ brw_OR(p, tmp0, tmp0, get_element(t, 2));
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ brw_AND(p, brw_null_reg(), tmp0, brw_imm_ud(0x1));
+ is_outside = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_clip_kill_thread(c);
+ }
+ brw_ENDIF(p, is_outside);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+ /* some vertices are inside a plane, some are outside,need to clip */
+ brw_XOR(p, t, t1, t2);
+ brw_XOR(p, t1, t2, t3);
+ brw_OR(p, t, t, t1);
+ brw_AND(p, t, t, brw_imm_ud(0x1));
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
+ get_element(t, 0), brw_imm_ud(0));
+ brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<5)));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
+ get_element(t, 1), brw_imm_ud(0));
+ brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<3)));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
+ get_element(t, 2), brw_imm_ud(0));
+ brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<1)));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+ /* test farz, xmax, ymax plane */
+ /* clip.xyz > clip.w */
+ brw_CMP(p, t1, BRW_CONDITIONAL_G, v0, get_element(v0, 3));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_CMP(p, t2, BRW_CONDITIONAL_G, v1, get_element(v1, 3));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_CMP(p, t3, BRW_CONDITIONAL_G, v2, get_element(v2, 3));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+ /* All vertices are outside of a plane, rejected */
+ brw_AND(p, t, t1, t2);
+ brw_AND(p, t, t, t3);
+ brw_OR(p, tmp0, get_element(t, 0), get_element(t, 1));
+ brw_OR(p, tmp0, tmp0, get_element(t, 2));
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ brw_AND(p, brw_null_reg(), tmp0, brw_imm_ud(0x1));
+ is_outside = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_clip_kill_thread(c);
+ }
+ brw_ENDIF(p, is_outside);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+ /* some vertices are inside a plane, some are outside,need to clip */
+ brw_XOR(p, t, t1, t2);
+ brw_XOR(p, t1, t2, t3);
+ brw_OR(p, t, t, t1);
+ brw_AND(p, t, t, brw_imm_ud(0x1));
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
+ get_element(t, 0), brw_imm_ud(0));
+ brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<4)));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
+ get_element(t, 1), brw_imm_ud(0));
+ brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<2)));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
+ get_element(t, 2), brw_imm_ud(0));
+ brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<0)));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+ release_tmps(c);
+}
+
+
+void brw_emit_tri_clip( struct brw_clip_compile *c )
+{
+ struct brw_instruction *neg_rhw;
+ struct brw_compile *p = &c->func;
+ brw_clip_tri_alloc_regs(c, 3 + c->key.nr_userclip + 6);
+ brw_clip_tri_init_vertices(c);
+ brw_clip_init_clipmask(c);
+ brw_clip_init_ff_sync(c);
+
+ /* if -ve rhw workaround bit is set,
+ do cliptest */
+ if (c->chipset.is_965) {
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2),
+ brw_imm_ud(1<<20));
+ neg_rhw = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_clip_test(c);
+ }
+ brw_ENDIF(p, neg_rhw);
+ }
+ /* Can't push into do_clip_tri because with polygon (or quad)
+ * flatshading, need to apply the flatshade here because we don't
+ * respect the PV when converting to trifan for emit:
+ */
+ if (c->key.do_flat_shading)
+ brw_clip_tri_flat_shade(c);
+
+ if ((c->key.clip_mode == BRW_CLIPMODE_NORMAL) ||
+ (c->key.clip_mode == BRW_CLIPMODE_KERNEL_CLIP))
+ do_clip_tri(c);
+ else
+ maybe_do_clip_tri(c);
+
+ brw_clip_tri_emit_polygon(c);
+
+ /* Send an empty message to kill the thread:
+ */
+ brw_clip_kill_thread(c);
+}
diff --git a/src/gallium/drivers/i965/brw_clip_unfilled.c b/src/gallium/drivers/i965/brw_clip_unfilled.c
new file mode 100644
index 00000000000..aec835b8cec
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_clip_unfilled.c
@@ -0,0 +1,497 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "brw_defines.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_clip.h"
+
+
+
+/* This is performed against the original triangles, so no indirection
+ * required:
+BZZZT!
+ */
+static void compute_tri_direction( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg e = c->reg.tmp0;
+ struct brw_reg f = c->reg.tmp1;
+ struct brw_reg v0 = byte_offset(c->reg.vertex[0], c->offset_hpos);
+ struct brw_reg v1 = byte_offset(c->reg.vertex[1], c->offset_hpos);
+ struct brw_reg v2 = byte_offset(c->reg.vertex[2], c->offset_hpos);
+
+
+ struct brw_reg v0n = get_tmp(c);
+ struct brw_reg v1n = get_tmp(c);
+ struct brw_reg v2n = get_tmp(c);
+
+ /* Convert to NDC.
+ * NOTE: We can't modify the original vertex coordinates,
+ * as it may impact further operations.
+ * So, we have to keep normalized coordinates in temp registers.
+ *
+ * TBD-KC
+ * Try to optimize unnecessary MOV's.
+ */
+ brw_MOV(p, v0n, v0);
+ brw_MOV(p, v1n, v1);
+ brw_MOV(p, v2n, v2);
+
+ brw_clip_project_position(c, v0n);
+ brw_clip_project_position(c, v1n);
+ brw_clip_project_position(c, v2n);
+
+ /* Calculate the vectors of two edges of the triangle:
+ */
+ brw_ADD(p, e, v0n, negate(v2n));
+ brw_ADD(p, f, v1n, negate(v2n));
+
+ /* Take their crossproduct:
+ */
+ brw_set_access_mode(p, BRW_ALIGN_16);
+ brw_MUL(p, vec4(brw_null_reg()), brw_swizzle(e, 1,2,0,3), brw_swizzle(f,2,0,1,3));
+ brw_MAC(p, vec4(e), negate(brw_swizzle(e, 2,0,1,3)), brw_swizzle(f,1,2,0,3));
+ brw_set_access_mode(p, BRW_ALIGN_1);
+
+ brw_MUL(p, c->reg.dir, c->reg.dir, vec4(e));
+}
+
+
+static void cull_direction( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *ccw;
+ GLuint conditional;
+
+ assert (!(c->key.fill_ccw == CLIP_CULL &&
+ c->key.fill_cw == CLIP_CULL));
+
+ if (c->key.fill_ccw == CLIP_CULL)
+ conditional = BRW_CONDITIONAL_GE;
+ else
+ conditional = BRW_CONDITIONAL_L;
+
+ brw_CMP(p,
+ vec1(brw_null_reg()),
+ conditional,
+ get_element(c->reg.dir, 2),
+ brw_imm_f(0));
+
+ ccw = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_clip_kill_thread(c);
+ }
+ brw_ENDIF(p, ccw);
+}
+
+
+
+static void copy_bfc( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *ccw;
+ GLuint conditional;
+
+ /* Do we have any colors to copy?
+ */
+ if ((c->offset_color0 == 0 || c->offset_bfc0 == 0) &&
+ (c->offset_color1 == 0 || c->offset_bfc1 == 0))
+ return;
+
+ /* In some wierd degnerate cases we can end up testing the
+ * direction twice, once for culling and once for bfc copying. Oh
+ * well, that's what you get for setting wierd GL state.
+ */
+ if (c->key.copy_bfc_ccw)
+ conditional = BRW_CONDITIONAL_GE;
+ else
+ conditional = BRW_CONDITIONAL_L;
+
+ brw_CMP(p,
+ vec1(brw_null_reg()),
+ conditional,
+ get_element(c->reg.dir, 2),
+ brw_imm_f(0));
+
+ ccw = brw_IF(p, BRW_EXECUTE_1);
+ {
+ GLuint i;
+
+ for (i = 0; i < 3; i++) {
+ if (c->offset_color0 && c->offset_bfc0)
+ brw_MOV(p,
+ byte_offset(c->reg.vertex[i], c->offset_color0),
+ byte_offset(c->reg.vertex[i], c->offset_bfc0));
+
+ if (c->offset_color1 && c->offset_bfc1)
+ brw_MOV(p,
+ byte_offset(c->reg.vertex[i], c->offset_color0),
+ byte_offset(c->reg.vertex[i], c->offset_bfc0));
+ }
+ }
+ brw_ENDIF(p, ccw);
+}
+
+
+
+
+/*
+ GLfloat iz = 1.0 / dir.z;
+ GLfloat ac = dir.x * iz;
+ GLfloat bc = dir.y * iz;
+ offset = ctx->Polygon.OffsetUnits * DEPTH_SCALE;
+ offset += MAX2( abs(ac), abs(bc) ) * ctx->Polygon.OffsetFactor;
+ offset *= MRD;
+*/
+static void compute_offset( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg off = c->reg.offset;
+ struct brw_reg dir = c->reg.dir;
+
+ brw_math_invert(p, get_element(off, 2), get_element(dir, 2));
+ brw_MUL(p, vec2(off), dir, get_element(off, 2));
+
+ brw_CMP(p,
+ vec1(brw_null_reg()),
+ BRW_CONDITIONAL_GE,
+ brw_abs(get_element(off, 0)),
+ brw_abs(get_element(off, 1)));
+
+ brw_SEL(p, vec1(off), brw_abs(get_element(off, 0)), brw_abs(get_element(off, 1)));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+ brw_MUL(p, vec1(off), off, brw_imm_f(c->key.offset_factor));
+ brw_ADD(p, vec1(off), off, brw_imm_f(c->key.offset_units));
+}
+
+
+static void merge_edgeflags( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *is_poly;
+ struct brw_reg tmp0 = get_element_ud(c->reg.tmp0, 0);
+
+ brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK));
+ brw_CMP(p,
+ vec1(brw_null_reg()),
+ BRW_CONDITIONAL_EQ,
+ tmp0,
+ brw_imm_ud(_3DPRIM_POLYGON));
+
+ /* Get away with using reg.vertex because we know that this is not
+ * a _3DPRIM_TRISTRIP_REVERSE:
+ */
+ is_poly = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_EQ);
+ brw_AND(p, vec1(brw_null_reg()), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<8));
+ brw_MOV(p, byte_offset(c->reg.vertex[0], c->offset_edgeflag), brw_imm_f(0));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_EQ);
+ brw_AND(p, vec1(brw_null_reg()), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<9));
+ brw_MOV(p, byte_offset(c->reg.vertex[2], c->offset_edgeflag), brw_imm_f(0));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ }
+ brw_ENDIF(p, is_poly);
+}
+
+
+
+static void apply_one_offset( struct brw_clip_compile *c,
+ struct brw_indirect vert )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg z = deref_1f(vert, c->header_position_offset +
+ 2 * type_sz(BRW_REGISTER_TYPE_F));
+
+ brw_ADD(p, z, z, vec1(c->reg.offset));
+}
+
+
+
+/***********************************************************************
+ * Output clipped polygon as an unfilled primitive:
+ */
+static void emit_lines(struct brw_clip_compile *c,
+ GLboolean do_offset)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *loop;
+ struct brw_instruction *draw_edge;
+ struct brw_indirect v0 = brw_indirect(0, 0);
+ struct brw_indirect v1 = brw_indirect(1, 0);
+ struct brw_indirect v0ptr = brw_indirect(2, 0);
+ struct brw_indirect v1ptr = brw_indirect(3, 0);
+
+ /* Need a seperate loop for offset:
+ */
+ if (do_offset) {
+ brw_MOV(p, c->reg.loopcount, c->reg.nr_verts);
+ brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist));
+
+ loop = brw_DO(p, BRW_EXECUTE_1);
+ {
+ brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0));
+ brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2));
+
+ apply_one_offset(c, v0);
+
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_G);
+ brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1));
+ }
+ brw_WHILE(p, loop);
+ }
+
+ /* v1ptr = &inlist[nr_verts]
+ * *v1ptr = v0
+ */
+ brw_MOV(p, c->reg.loopcount, c->reg.nr_verts);
+ brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist));
+ brw_ADD(p, get_addr_reg(v1ptr), get_addr_reg(v0ptr), retype(c->reg.nr_verts, BRW_REGISTER_TYPE_UW));
+ brw_ADD(p, get_addr_reg(v1ptr), get_addr_reg(v1ptr), retype(c->reg.nr_verts, BRW_REGISTER_TYPE_UW));
+ brw_MOV(p, deref_1uw(v1ptr, 0), deref_1uw(v0ptr, 0));
+
+ loop = brw_DO(p, BRW_EXECUTE_1);
+ {
+ brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0));
+ brw_MOV(p, get_addr_reg(v1), deref_1uw(v0ptr, 2));
+ brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2));
+
+ /* draw edge if edgeflag != 0 */
+ brw_CMP(p,
+ vec1(brw_null_reg()), BRW_CONDITIONAL_NZ,
+ deref_1f(v0, c->offset_edgeflag),
+ brw_imm_f(0));
+ draw_edge = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_clip_emit_vue(c, v0, 1, 0, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_START);
+ brw_clip_emit_vue(c, v1, 1, 0, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_END);
+ }
+ brw_ENDIF(p, draw_edge);
+
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1));
+ }
+ brw_WHILE(p, loop);
+}
+
+
+
+static void emit_points(struct brw_clip_compile *c,
+ GLboolean do_offset )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *loop;
+ struct brw_instruction *draw_point;
+
+ struct brw_indirect v0 = brw_indirect(0, 0);
+ struct brw_indirect v0ptr = brw_indirect(2, 0);
+
+ brw_MOV(p, c->reg.loopcount, c->reg.nr_verts);
+ brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist));
+
+ loop = brw_DO(p, BRW_EXECUTE_1);
+ {
+ brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0));
+ brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2));
+
+ /* draw if edgeflag != 0
+ */
+ brw_CMP(p,
+ vec1(brw_null_reg()), BRW_CONDITIONAL_NZ,
+ deref_1f(v0, c->offset_edgeflag),
+ brw_imm_f(0));
+ draw_point = brw_IF(p, BRW_EXECUTE_1);
+ {
+ if (do_offset)
+ apply_one_offset(c, v0);
+
+ brw_clip_emit_vue(c, v0, 1, 0, (_3DPRIM_POINTLIST << 2) | R02_PRIM_START | R02_PRIM_END);
+ }
+ brw_ENDIF(p, draw_point);
+
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1));
+ }
+ brw_WHILE(p, loop);
+}
+
+
+
+
+
+
+
+static void emit_primitives( struct brw_clip_compile *c,
+ GLuint mode,
+ GLboolean do_offset )
+{
+ switch (mode) {
+ case CLIP_FILL:
+ brw_clip_tri_emit_polygon(c);
+ break;
+
+ case CLIP_LINE:
+ emit_lines(c, do_offset);
+ break;
+
+ case CLIP_POINT:
+ emit_points(c, do_offset);
+ break;
+
+ case CLIP_CULL:
+ assert(0);
+ break;
+ }
+}
+
+
+
+static void emit_unfilled_primitives( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *ccw;
+
+ /* Direction culling has already been done.
+ */
+ if (c->key.fill_ccw != c->key.fill_cw &&
+ c->key.fill_ccw != CLIP_CULL &&
+ c->key.fill_cw != CLIP_CULL)
+ {
+ brw_CMP(p,
+ vec1(brw_null_reg()),
+ BRW_CONDITIONAL_GE,
+ get_element(c->reg.dir, 2),
+ brw_imm_f(0));
+
+ ccw = brw_IF(p, BRW_EXECUTE_1);
+ {
+ emit_primitives(c, c->key.fill_ccw, c->key.offset_ccw);
+ }
+ ccw = brw_ELSE(p, ccw);
+ {
+ emit_primitives(c, c->key.fill_cw, c->key.offset_cw);
+ }
+ brw_ENDIF(p, ccw);
+ }
+ else if (c->key.fill_cw != CLIP_CULL) {
+ emit_primitives(c, c->key.fill_cw, c->key.offset_cw);
+ }
+ else if (c->key.fill_ccw != CLIP_CULL) {
+ emit_primitives(c, c->key.fill_ccw, c->key.offset_ccw);
+ }
+}
+
+
+
+
+static void check_nr_verts( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *if_insn;
+
+ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.nr_verts, brw_imm_d(3));
+ if_insn = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_clip_kill_thread(c);
+ }
+ brw_ENDIF(p, if_insn);
+}
+
+
+void brw_emit_unfilled_clip( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *do_clip;
+
+
+ c->need_direction = ((c->key.offset_ccw || c->key.offset_cw) ||
+ (c->key.fill_ccw != c->key.fill_cw) ||
+ c->key.fill_ccw == CLIP_CULL ||
+ c->key.fill_cw == CLIP_CULL ||
+ c->key.copy_bfc_cw ||
+ c->key.copy_bfc_ccw);
+
+ brw_clip_tri_alloc_regs(c, 3 + c->key.nr_userclip + 6);
+ brw_clip_tri_init_vertices(c);
+ brw_clip_init_ff_sync(c);
+
+ assert(c->offset_edgeflag);
+
+ if (c->key.fill_ccw == CLIP_CULL &&
+ c->key.fill_cw == CLIP_CULL) {
+ brw_clip_kill_thread(c);
+ return;
+ }
+
+ merge_edgeflags(c);
+
+ /* Need to use the inlist indirection here:
+ */
+ if (c->need_direction)
+ compute_tri_direction(c);
+
+ if (c->key.fill_ccw == CLIP_CULL ||
+ c->key.fill_cw == CLIP_CULL)
+ cull_direction(c);
+
+ if (c->key.offset_ccw ||
+ c->key.offset_cw)
+ compute_offset(c);
+
+ if (c->key.copy_bfc_ccw ||
+ c->key.copy_bfc_cw)
+ copy_bfc(c);
+
+ /* Need to do this whether we clip or not:
+ */
+ if (c->key.do_flat_shading)
+ brw_clip_tri_flat_shade(c);
+
+ brw_clip_init_clipmask(c);
+ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, c->reg.planemask, brw_imm_ud(0));
+ do_clip = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_clip_init_planes(c);
+ brw_clip_tri(c);
+ check_nr_verts(c);
+ }
+ brw_ENDIF(p, do_clip);
+
+ emit_unfilled_primitives(c);
+ brw_clip_kill_thread(c);
+}
+
+
+
diff --git a/src/gallium/drivers/i965/brw_clip_util.c b/src/gallium/drivers/i965/brw_clip_util.c
new file mode 100644
index 00000000000..97a57103105
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_clip_util.c
@@ -0,0 +1,388 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "brw_defines.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_clip.h"
+
+
+
+
+struct brw_reg get_tmp( struct brw_clip_compile *c )
+{
+ struct brw_reg tmp = brw_vec4_grf(c->last_tmp, 0);
+
+ if (++c->last_tmp > c->prog_data.total_grf)
+ c->prog_data.total_grf = c->last_tmp;
+
+ return tmp;
+}
+
+static void release_tmp( struct brw_clip_compile *c, struct brw_reg tmp )
+{
+ if (tmp.nr == c->last_tmp-1)
+ c->last_tmp--;
+}
+
+
+static struct brw_reg make_plane_ud(GLuint x, GLuint y, GLuint z, GLuint w)
+{
+ return brw_imm_ud((w<<24) | (z<<16) | (y<<8) | x);
+}
+
+
+void brw_clip_init_planes( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+
+ if (!c->key.nr_userclip) {
+ brw_MOV(p, get_element_ud(c->reg.fixed_planes, 0), make_plane_ud( 0, 0, 0xff, 1));
+ brw_MOV(p, get_element_ud(c->reg.fixed_planes, 1), make_plane_ud( 0, 0, 1, 1));
+ brw_MOV(p, get_element_ud(c->reg.fixed_planes, 2), make_plane_ud( 0, 0xff, 0, 1));
+ brw_MOV(p, get_element_ud(c->reg.fixed_planes, 3), make_plane_ud( 0, 1, 0, 1));
+ brw_MOV(p, get_element_ud(c->reg.fixed_planes, 4), make_plane_ud(0xff, 0, 0, 1));
+ brw_MOV(p, get_element_ud(c->reg.fixed_planes, 5), make_plane_ud( 1, 0, 0, 1));
+ }
+}
+
+
+
+#define W 3
+
+/* Project 'pos' to screen space (or back again), overwrite with results:
+ */
+void brw_clip_project_position(struct brw_clip_compile *c, struct brw_reg pos )
+{
+ struct brw_compile *p = &c->func;
+
+ /* calc rhw
+ */
+ brw_math_invert(p, get_element(pos, W), get_element(pos, W));
+
+ /* value.xyz *= value.rhw
+ */
+ brw_set_access_mode(p, BRW_ALIGN_16);
+ brw_MUL(p, brw_writemask(pos, BRW_WRITEMASK_XYZ), pos, brw_swizzle1(pos, W));
+ brw_set_access_mode(p, BRW_ALIGN_1);
+}
+
+
+static void brw_clip_project_vertex( struct brw_clip_compile *c,
+ struct brw_indirect vert_addr )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg tmp = get_tmp(c);
+
+ /* Fixup position. Extract from the original vertex and re-project
+ * to screen space:
+ */
+ brw_MOV(p, tmp, deref_4f(vert_addr, c->offset_hpos));
+ brw_clip_project_position(c, tmp);
+ brw_MOV(p, deref_4f(vert_addr, c->header_position_offset), tmp);
+
+ release_tmp(c, tmp);
+}
+
+
+
+
+/* Interpolate between two vertices and put the result into a0.0.
+ * Increment a0.0 accordingly.
+ */
+void brw_clip_interp_vertex( struct brw_clip_compile *c,
+ struct brw_indirect dest_ptr,
+ struct brw_indirect v0_ptr, /* from */
+ struct brw_indirect v1_ptr, /* to */
+ struct brw_reg t0,
+ GLboolean force_edgeflag)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg tmp = get_tmp(c);
+ GLuint i;
+
+ /* Just copy the vertex header:
+ */
+ /*
+ * After CLIP stage, only first 256 bits of the VUE are read
+ * back on IGDNG, so needn't change it
+ */
+ brw_copy_indirect_to_indirect(p, dest_ptr, v0_ptr, 1);
+
+ /* Iterate over each attribute (could be done in pairs?)
+ */
+ for (i = 0; i < c->key.nr_attrs; i++) {
+ GLuint delta = i*16 + 32;
+
+ if (c->chipset.is_igdng)
+ delta = i * 16 + 32 * 3;
+
+ if (delta == c->offset_edgeflag) {
+ if (force_edgeflag)
+ brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(1));
+ else
+ brw_MOV(p, deref_4f(dest_ptr, delta), deref_4f(v0_ptr, delta));
+ }
+ else {
+ /* Interpolate:
+ *
+ * New = attr0 + t*attr1 - t*attr0
+ */
+ brw_MUL(p,
+ vec4(brw_null_reg()),
+ deref_4f(v1_ptr, delta),
+ t0);
+
+ brw_MAC(p,
+ tmp,
+ negate(deref_4f(v0_ptr, delta)),
+ t0);
+
+ brw_ADD(p,
+ deref_4f(dest_ptr, delta),
+ deref_4f(v0_ptr, delta),
+ tmp);
+ }
+ }
+
+ if (i & 1) {
+ GLuint delta = i*16 + 32;
+
+ if (c->chipset.is_igdng)
+ delta = i * 16 + 32 * 3;
+
+ brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(0));
+ }
+
+ release_tmp(c, tmp);
+
+ /* Recreate the projected (NDC) coordinate in the new vertex
+ * header:
+ */
+ brw_clip_project_vertex(c, dest_ptr );
+}
+
+
+
+
+#define MAX_MRF 16
+
+void brw_clip_emit_vue(struct brw_clip_compile *c,
+ struct brw_indirect vert,
+ GLboolean allocate,
+ GLboolean eot,
+ GLuint header)
+{
+ struct brw_compile *p = &c->func;
+ GLuint start = c->last_mrf;
+
+ brw_clip_ff_sync(c);
+
+ assert(!(allocate && eot));
+
+ /* Cycle through mrf regs - probably futile as we have to wait for
+ * the allocation response anyway. Also, the order this function
+ * is invoked doesn't correspond to the order the instructions will
+ * be executed, so it won't have any effect in many cases.
+ */
+#if 0
+ if (start + c->nr_regs + 1 >= MAX_MRF)
+ start = 0;
+
+ c->last_mrf = start + c->nr_regs + 1;
+#endif
+
+ /* Copy the vertex from vertn into m1..mN+1:
+ */
+ brw_copy_from_indirect(p, brw_message_reg(start+1), vert, c->nr_regs);
+
+ /* Overwrite PrimType and PrimStart in the message header, for
+ * each vertex in turn:
+ */
+ brw_MOV(p, get_element_ud(c->reg.R0, 2), brw_imm_ud(header));
+
+
+ /* Send each vertex as a seperate write to the urb. This
+ * is different to the concept in brw_sf_emit.c, where
+ * subsequent writes are used to build up a single urb
+ * entry. Each of these writes instantiates a seperate
+ * urb entry - (I think... what about 'allocate'?)
+ */
+ brw_urb_WRITE(p,
+ allocate ? c->reg.R0 : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
+ start,
+ c->reg.R0,
+ allocate,
+ 1, /* used */
+ c->nr_regs + 1, /* msg length */
+ allocate ? 1 : 0, /* response_length */
+ eot, /* eot */
+ 1, /* writes_complete */
+ 0, /* urb offset */
+ BRW_URB_SWIZZLE_NONE);
+}
+
+
+
+void brw_clip_kill_thread(struct brw_clip_compile *c)
+{
+ struct brw_compile *p = &c->func;
+
+ brw_clip_ff_sync(c);
+ /* Send an empty message to kill the thread and release any
+ * allocated urb entry:
+ */
+ brw_urb_WRITE(p,
+ retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
+ 0,
+ c->reg.R0,
+ 0, /* allocate */
+ 0, /* used */
+ 1, /* msg len */
+ 0, /* response len */
+ 1, /* eot */
+ 1, /* writes complete */
+ 0,
+ BRW_URB_SWIZZLE_NONE);
+}
+
+
+
+
+struct brw_reg brw_clip_plane0_address( struct brw_clip_compile *c )
+{
+ return brw_address(c->reg.fixed_planes);
+}
+
+
+struct brw_reg brw_clip_plane_stride( struct brw_clip_compile *c )
+{
+ if (c->key.nr_userclip) {
+ return brw_imm_uw(16);
+ }
+ else {
+ return brw_imm_uw(4);
+ }
+}
+
+
+/* If flatshading, distribute color from provoking vertex prior to
+ * clipping.
+ */
+void brw_clip_copy_colors( struct brw_clip_compile *c,
+ GLuint to, GLuint from )
+{
+ struct brw_compile *p = &c->func;
+
+ if (c->offset_color0)
+ brw_MOV(p,
+ byte_offset(c->reg.vertex[to], c->offset_color0),
+ byte_offset(c->reg.vertex[from], c->offset_color0));
+
+ if (c->offset_color1)
+ brw_MOV(p,
+ byte_offset(c->reg.vertex[to], c->offset_color1),
+ byte_offset(c->reg.vertex[from], c->offset_color1));
+
+ if (c->offset_bfc0)
+ brw_MOV(p,
+ byte_offset(c->reg.vertex[to], c->offset_bfc0),
+ byte_offset(c->reg.vertex[from], c->offset_bfc0));
+
+ if (c->offset_bfc1)
+ brw_MOV(p,
+ byte_offset(c->reg.vertex[to], c->offset_bfc1),
+ byte_offset(c->reg.vertex[from], c->offset_bfc1));
+}
+
+
+
+void brw_clip_init_clipmask( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg incoming = get_element_ud(c->reg.R0, 2);
+
+ /* Shift so that lowest outcode bit is rightmost:
+ */
+ brw_SHR(p, c->reg.planemask, incoming, brw_imm_ud(26));
+
+ if (c->key.nr_userclip) {
+ struct brw_reg tmp = retype(vec1(get_tmp(c)), BRW_REGISTER_TYPE_UD);
+
+ /* Rearrange userclip outcodes so that they come directly after
+ * the fixed plane bits.
+ */
+ brw_AND(p, tmp, incoming, brw_imm_ud(0x3f<<14));
+ brw_SHR(p, tmp, tmp, brw_imm_ud(8));
+ brw_OR(p, c->reg.planemask, c->reg.planemask, tmp);
+
+ release_tmp(c, tmp);
+ }
+}
+
+void brw_clip_ff_sync(struct brw_clip_compile *c)
+{
+ if (c->need_ff_sync) {
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *need_ff_sync;
+
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
+ brw_AND(p, brw_null_reg(), c->reg.ff_sync, brw_imm_ud(0x1));
+ need_ff_sync = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_OR(p, c->reg.ff_sync, c->reg.ff_sync, brw_imm_ud(0x1));
+ brw_ff_sync(p,
+ c->reg.R0,
+ 0,
+ c->reg.R0,
+ 1,
+ 1, /* used */
+ 1, /* msg length */
+ 1, /* response length */
+ 0, /* eot */
+ 1, /* write compelete */
+ 0, /* urb offset */
+ BRW_URB_SWIZZLE_NONE);
+ }
+ brw_ENDIF(p, need_ff_sync);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ }
+}
+
+void brw_clip_init_ff_sync(struct brw_clip_compile *c)
+{
+ if (c->need_ff_sync) {
+ struct brw_compile *p = &c->func;
+
+ brw_MOV(p, c->reg.ff_sync, brw_imm_ud(0));
+ }
+}
diff --git a/src/gallium/drivers/i965/brw_context.c b/src/gallium/drivers/i965/brw_context.c
new file mode 100644
index 00000000000..e67551882dc
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_context.c
@@ -0,0 +1,154 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "pipe/p_context.h"
+#include "util/u_simple_list.h"
+
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_draw.h"
+#include "brw_state.h"
+#include "brw_batchbuffer.h"
+#include "brw_winsys.h"
+#include "brw_screen.h"
+
+
+static void brw_destroy_context( struct pipe_context *pipe )
+{
+ struct brw_context *brw = brw_context(pipe);
+ int i;
+
+ brw_context_flush( brw );
+ brw_batchbuffer_free( brw->batch );
+ brw_destroy_state(brw);
+
+ brw_draw_cleanup( brw );
+
+ brw_pipe_blend_cleanup( brw );
+ brw_pipe_depth_stencil_cleanup( brw );
+ brw_pipe_framebuffer_cleanup( brw );
+ brw_pipe_flush_cleanup( brw );
+ brw_pipe_misc_cleanup( brw );
+ brw_pipe_query_cleanup( brw );
+ brw_pipe_rast_cleanup( brw );
+ brw_pipe_sampler_cleanup( brw );
+ brw_pipe_shader_cleanup( brw );
+ brw_pipe_vertex_cleanup( brw );
+ brw_pipe_clear_cleanup( brw );
+
+ brw_hw_cc_cleanup( brw );
+
+
+ FREE(brw->wm.compile_data);
+
+ for (i = 0; i < brw->curr.fb.nr_cbufs; i++)
+ pipe_surface_reference(&brw->curr.fb.cbufs[i], NULL);
+ brw->curr.fb.nr_cbufs = 0;
+ pipe_surface_reference(&brw->curr.fb.zsbuf, NULL);
+
+ bo_reference(&brw->curbe.curbe_bo, NULL);
+ bo_reference(&brw->vs.prog_bo, NULL);
+ bo_reference(&brw->vs.state_bo, NULL);
+ bo_reference(&brw->vs.bind_bo, NULL);
+ bo_reference(&brw->gs.prog_bo, NULL);
+ bo_reference(&brw->gs.state_bo, NULL);
+ bo_reference(&brw->clip.prog_bo, NULL);
+ bo_reference(&brw->clip.state_bo, NULL);
+ bo_reference(&brw->clip.vp_bo, NULL);
+ bo_reference(&brw->sf.prog_bo, NULL);
+ bo_reference(&brw->sf.state_bo, NULL);
+ bo_reference(&brw->sf.vp_bo, NULL);
+
+ for (i = 0; i < Elements(brw->wm.sdc_bo); i++)
+ bo_reference(&brw->wm.sdc_bo[i], NULL);
+
+ bo_reference(&brw->wm.bind_bo, NULL);
+
+ for (i = 0; i < Elements(brw->wm.surf_bo); i++)
+ bo_reference(&brw->wm.surf_bo[i], NULL);
+
+ bo_reference(&brw->wm.sampler_bo, NULL);
+ bo_reference(&brw->wm.prog_bo, NULL);
+ bo_reference(&brw->wm.state_bo, NULL);
+}
+
+
+struct pipe_context *brw_create_context(struct pipe_screen *screen)
+{
+ struct brw_context *brw = (struct brw_context *) CALLOC_STRUCT(brw_context);
+
+ if (!brw) {
+ debug_printf("%s: failed to alloc context\n", __FUNCTION__);
+ return NULL;
+ }
+
+ brw->base.screen = screen;
+ brw->base.destroy = brw_destroy_context;
+ brw->sws = brw_screen(screen)->sws;
+ brw->chipset = brw_screen(screen)->chipset;
+
+ brw_pipe_blend_init( brw );
+ brw_pipe_depth_stencil_init( brw );
+ brw_pipe_framebuffer_init( brw );
+ brw_pipe_flush_init( brw );
+ brw_pipe_misc_init( brw );
+ brw_pipe_query_init( brw );
+ brw_pipe_rast_init( brw );
+ brw_pipe_sampler_init( brw );
+ brw_pipe_shader_init( brw );
+ brw_pipe_vertex_init( brw );
+ brw_pipe_clear_init( brw );
+
+ brw_hw_cc_init( brw );
+
+ brw_init_state( brw );
+ brw_draw_init( brw );
+
+ brw->state.dirty.mesa = ~0;
+ brw->state.dirty.brw = ~0;
+
+ brw->flags.always_emit_state = 0;
+
+ make_empty_list(&brw->query.active_head);
+
+ brw->batch = brw_batchbuffer_alloc( brw->sws, brw->chipset );
+ if (brw->batch == NULL)
+ goto fail;
+
+ return &brw->base;
+
+fail:
+ if (brw->batch)
+ brw_batchbuffer_free( brw->batch );
+ return NULL;
+}
+
diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h
new file mode 100644
index 00000000000..56e78074000
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_context.h
@@ -0,0 +1,853 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#ifndef BRWCONTEXT_INC
+#define BRWCONTEXT_INC
+
+#include "brw_structs.h"
+#include "brw_winsys.h"
+#include "brw_reg.h"
+#include "pipe/p_state.h"
+#include "pipe/p_context.h"
+#include "tgsi/tgsi_scan.h"
+
+
+/* Glossary:
+ *
+ * URB - uniform resource buffer. A mid-sized buffer which is
+ * partitioned between the fixed function units and used for passing
+ * values (vertices, primitives, constants) between them.
+ *
+ * CURBE - constant URB entry. An urb region (entry) used to hold
+ * constant values which the fixed function units can be instructed to
+ * preload into the GRF when spawning a thread.
+ *
+ * VUE - vertex URB entry. An urb entry holding a vertex and usually
+ * a vertex header. The header contains control information and
+ * things like primitive type, Begin/end flags and clip codes.
+ *
+ * PUE - primitive URB entry. An urb entry produced by the setup (SF)
+ * unit holding rasterization and interpolation parameters.
+ *
+ * GRF - general register file. One of several register files
+ * addressable by programmed threads. The inputs (r0, payload, curbe,
+ * urb) of the thread are preloaded to this area before the thread is
+ * spawned. The registers are individually 8 dwords wide and suitable
+ * for general usage. Registers holding thread input values are not
+ * special and may be overwritten.
+ *
+ * MRF - message register file. Threads communicate (and terminate)
+ * by sending messages. Message parameters are placed in contiguous
+ * MRF registers. All program output is via these messages. URB
+ * entries are populated by sending a message to the shared URB
+ * function containing the new data, together with a control word,
+ * often an unmodified copy of R0.
+ *
+ * R0 - GRF register 0. Typically holds control information used when
+ * sending messages to other threads.
+ *
+ * EU or GEN4 EU: The name of the programmable subsystem of the
+ * i965 hardware. Threads are executed by the EU, the registers
+ * described above are part of the EU architecture.
+ *
+ * Fixed function units:
+ *
+ * CS - Command streamer. Notional first unit, little software
+ * interaction. Holds the URB entries used for constant data, ie the
+ * CURBEs.
+ *
+ * VF/VS - Vertex Fetch / Vertex Shader. The fixed function part of
+ * this unit is responsible for pulling vertices out of vertex buffers
+ * in vram and injecting them into the processing pipe as VUEs. If
+ * enabled, it first passes them to a VS thread which is a good place
+ * for the driver to implement any active vertex shader.
+ *
+ * GS - Geometry Shader. This corresponds to a new DX10 concept. If
+ * enabled, incoming strips etc are passed to GS threads in individual
+ * line/triangle/point units. The GS thread may perform arbitary
+ * computation and emit whatever primtives with whatever vertices it
+ * chooses. This makes GS an excellent place to implement GL's
+ * unfilled polygon modes, though of course it is capable of much
+ * more. Additionally, GS is used to translate away primitives not
+ * handled by latter units, including Quads and Lineloops.
+ *
+ * CS - Clipper. Mesa's clipping algorithms are imported to run on
+ * this unit. The fixed function part performs cliptesting against
+ * the 6 fixed clipplanes and makes decisions on whether or not the
+ * incoming primitive needs to be passed to a thread for clipping.
+ * User clip planes are handled via cooperation with the VS thread.
+ *
+ * SF - Strips Fans or Setup: Triangles are prepared for
+ * rasterization. Interpolation coefficients are calculated.
+ * Flatshading and two-side lighting usually performed here.
+ *
+ * WM - Windower. Interpolation of vertex attributes performed here.
+ * Fragment shader implemented here. SIMD aspects of EU taken full
+ * advantage of, as pixels are processed in blocks of 16.
+ *
+ * CC - Color Calculator. No EU threads associated with this unit.
+ * Handles blending and (presumably) depth and stencil testing.
+ */
+
+#define BRW_MAX_CURBE (32*16)
+
+struct brw_context;
+
+struct brw_depth_stencil_state {
+ /* Precalculated hardware state:
+ */
+ struct brw_cc0 cc0;
+ struct brw_cc1 cc1;
+ struct brw_cc2 cc2;
+ struct brw_cc3 cc3;
+ struct brw_cc7 cc7;
+
+ unsigned iz_lookup;
+};
+
+
+struct brw_blend_state {
+ /* Precalculated hardware state:
+ */
+ struct brw_cc2 cc2;
+ struct brw_cc3 cc3;
+ struct brw_cc5 cc5;
+ struct brw_cc6 cc6;
+
+ struct brw_surf_ss0 ss0;
+};
+
+
+struct brw_rasterizer_state;
+
+struct brw_immediate_data {
+ unsigned nr;
+ float (*data)[4];
+};
+
+struct brw_vertex_shader {
+ const struct tgsi_token *tokens;
+ struct brw_winsys_buffer *const_buffer; /** Program constant buffer/surface */
+
+ struct tgsi_shader_info info;
+ struct brw_immediate_data immediates;
+
+ GLuint has_flow_control:1;
+ GLuint use_const_buffer:1;
+
+ /* Offsets of special vertex shader outputs required for clipping.
+ */
+ GLuint output_hpos:6; /* not always zero? */
+ GLuint output_color0:6;
+ GLuint output_color1:6;
+ GLuint output_bfc0:6;
+ GLuint output_bfc1:6;
+ GLuint output_edgeflag:6;
+
+ unsigned id;
+};
+
+struct brw_fs_signature {
+ GLuint nr_inputs;
+ struct {
+ GLuint interp:3; /* TGSI_INTERPOLATE_x */
+ GLuint semantic:5; /* TGSI_SEMANTIC_x */
+ GLuint semantic_index:24;
+ } input[PIPE_MAX_SHADER_INPUTS];
+};
+
+#define brw_fs_signature_size(s) (offsetof(struct brw_fs_signature, input) + \
+ ((s)->nr_inputs * sizeof (s)->input[0]))
+
+
+struct brw_fragment_shader {
+ const struct tgsi_token *tokens;
+ struct tgsi_shader_info info;
+
+ struct brw_fs_signature signature;
+ struct brw_immediate_data immediates;
+
+ unsigned iz_lookup;
+ /*unsigned wm_lookup;*/
+
+ unsigned uses_depth:1;
+ unsigned has_flow_control:1;
+
+ unsigned id;
+ struct brw_winsys_buffer *const_buffer; /** Program constant buffer/surface */
+ GLboolean use_const_buffer;
+};
+
+
+struct brw_sampler {
+ struct brw_ss0 ss0;
+ struct brw_ss1 ss1;
+ float border_color[4];
+ struct brw_ss3 ss3;
+};
+
+
+
+#define PIPE_NEW_DEPTH_STENCIL_ALPHA 0x1
+#define PIPE_NEW_RAST 0x2
+#define PIPE_NEW_BLEND 0x4
+#define PIPE_NEW_VIEWPORT 0x8
+#define PIPE_NEW_SAMPLERS 0x10
+#define PIPE_NEW_VERTEX_BUFFER 0x20
+#define PIPE_NEW_VERTEX_ELEMENT 0x40
+#define PIPE_NEW_FRAGMENT_SHADER 0x80
+#define PIPE_NEW_VERTEX_SHADER 0x100
+#define PIPE_NEW_FRAGMENT_CONSTANTS 0x200
+#define PIPE_NEW_VERTEX_CONSTANTS 0x400
+#define PIPE_NEW_CLIP 0x800
+#define PIPE_NEW_INDEX_BUFFER 0x1000
+#define PIPE_NEW_INDEX_RANGE 0x2000
+#define PIPE_NEW_BLEND_COLOR 0x4000
+#define PIPE_NEW_POLYGON_STIPPLE 0x8000
+#define PIPE_NEW_FRAMEBUFFER_DIMENSIONS 0x10000
+#define PIPE_NEW_DEPTH_BUFFER 0x20000
+#define PIPE_NEW_COLOR_BUFFERS 0x40000
+#define PIPE_NEW_QUERY 0x80000
+#define PIPE_NEW_SCISSOR 0x100000
+#define PIPE_NEW_BOUND_TEXTURES 0x200000
+#define PIPE_NEW_NR_CBUFS 0x400000
+#define PIPE_NEW_FRAGMENT_SIGNATURE 0x800000
+
+
+
+#define BRW_NEW_URB_FENCE 0x1
+#define BRW_NEW_FRAGMENT_PROGRAM 0x2
+#define BRW_NEW_VERTEX_PROGRAM 0x4
+#define BRW_NEW_INPUT_DIMENSIONS 0x8
+#define BRW_NEW_CURBE_OFFSETS 0x10
+#define BRW_NEW_REDUCED_PRIMITIVE 0x20
+#define BRW_NEW_PRIMITIVE 0x40
+#define BRW_NEW_CONTEXT 0x80
+#define BRW_NEW_WM_INPUT_DIMENSIONS 0x100
+#define BRW_NEW_PSP 0x800
+#define BRW_NEW_WM_SURFACES 0x1000
+#define BRW_NEW_xxx 0x2000 /* was FENCE */
+#define BRW_NEW_INDICES 0x4000
+
+/**
+ * Used for any batch entry with a relocated pointer that will be used
+ * by any 3D rendering. Need to re-emit these fresh in each
+ * batchbuffer as the referenced buffers may be relocated in the
+ * meantime.
+ */
+#define BRW_NEW_BATCH 0x10000
+#define BRW_NEW_NR_WM_SURFACES 0x40000
+#define BRW_NEW_NR_VS_SURFACES 0x80000
+#define BRW_NEW_INDEX_BUFFER 0x100000
+
+struct brw_state_flags {
+ /** State update flags signalled by mesa internals */
+ GLuint mesa;
+ /**
+ * State update flags signalled as the result of brw_tracked_state updates
+ */
+ GLuint brw;
+ /** State update flags signalled by brw_state_cache.c searches */
+ GLuint cache;
+};
+
+
+
+/* Data about a particular attempt to compile a program. Note that
+ * there can be many of these, each in a different GL state
+ * corresponding to a different brw_wm_prog_key struct, with different
+ * compiled programs:
+ */
+struct brw_wm_prog_data {
+ GLuint curb_read_length;
+ GLuint urb_read_length;
+
+ GLuint first_curbe_grf;
+ GLuint total_grf;
+ GLuint total_scratch;
+
+ GLuint nr_params; /**< number of float params/constants */
+ GLboolean error;
+
+ /* Pointer to tracked values (only valid once
+ * _mesa_load_state_parameters has been called at runtime).
+ */
+ const GLfloat *param[BRW_MAX_CURBE];
+};
+
+struct brw_sf_prog_data {
+ GLuint urb_read_length;
+ GLuint total_grf;
+
+ /* Each vertex may have upto 12 attributes, 4 components each,
+ * except WPOS which requires only 2. (11*4 + 2) == 44 ==> 11
+ * rows.
+ *
+ * Actually we use 4 for each, so call it 12 rows.
+ */
+ GLuint urb_entry_size;
+};
+
+
+struct brw_clip_prog_data;
+
+struct brw_gs_prog_data {
+ GLuint urb_read_length;
+ GLuint total_grf;
+};
+
+struct brw_vs_prog_data {
+ GLuint curb_read_length;
+ GLuint urb_read_length;
+ GLuint total_grf;
+
+ GLuint nr_outputs;
+ GLuint nr_inputs;
+
+ GLuint nr_params; /**< number of TGSI_FILE_CONSTANT's */
+
+ GLuint output_edgeflag;
+
+ GLboolean writes_psiz;
+
+ /* Used for calculating urb partitions:
+ */
+ GLuint urb_entry_size;
+};
+
+
+/* Size == 0 if output either not written, or always [0,0,0,1]
+ */
+struct brw_vs_ouput_sizes {
+ GLubyte output_size[PIPE_MAX_SHADER_OUTPUTS];
+};
+
+
+/** Number of texture sampler units */
+#define BRW_MAX_TEX_UNIT 16
+
+/** Max number of render targets in a shader */
+#define BRW_MAX_DRAW_BUFFERS 4
+
+/**
+ * Size of our surface binding table for the WM.
+ * This contains pointers to the drawing surfaces and current texture
+ * objects and shader constant buffers (+2).
+ */
+#define BRW_WM_MAX_SURF (BRW_MAX_DRAW_BUFFERS + BRW_MAX_TEX_UNIT + 1)
+
+/**
+ * Helpers to convert drawing buffers, textures and constant buffers
+ * to surface binding table indexes, for WM.
+ */
+#define BTI_COLOR_BUF(d) (d)
+#define BTI_FRAGMENT_CONSTANTS (BRW_MAX_DRAW_BUFFERS)
+#define BTI_TEXTURE(t) (BRW_MAX_DRAW_BUFFERS + 1 + (t))
+
+/**
+ * Size of surface binding table for the VS.
+ * Only one constant buffer for now.
+ */
+#define BRW_VS_MAX_SURF 1
+
+/**
+ * Only a VS constant buffer
+ */
+#define SURF_INDEX_VERT_CONST_BUFFER 0
+
+
+/* Bit of a hack to align these with the winsys buffer_data_type enum.
+ */
+enum brw_cache_id {
+ BRW_CC_VP = BRW_DATA_GS_CC_VP,
+ BRW_CC_UNIT = BRW_DATA_GS_CC_UNIT,
+ BRW_WM_PROG = BRW_DATA_GS_WM_PROG,
+ BRW_SAMPLER_DEFAULT_COLOR = BRW_DATA_GS_SAMPLER_DEFAULT_COLOR,
+ BRW_SAMPLER = BRW_DATA_GS_SAMPLER,
+ BRW_WM_UNIT = BRW_DATA_GS_WM_UNIT,
+ BRW_SF_PROG = BRW_DATA_GS_SF_PROG,
+ BRW_SF_VP = BRW_DATA_GS_SF_VP,
+ BRW_SF_UNIT = BRW_DATA_GS_SF_UNIT,
+ BRW_VS_UNIT = BRW_DATA_GS_VS_UNIT,
+ BRW_VS_PROG = BRW_DATA_GS_VS_PROG,
+ BRW_GS_UNIT = BRW_DATA_GS_GS_UNIT,
+ BRW_GS_PROG = BRW_DATA_GS_GS_PROG,
+ BRW_CLIP_VP = BRW_DATA_GS_CLIP_VP,
+ BRW_CLIP_UNIT = BRW_DATA_GS_CLIP_UNIT,
+ BRW_CLIP_PROG = BRW_DATA_GS_CLIP_PROG,
+ BRW_SS_SURFACE = BRW_DATA_SS_SURFACE,
+ BRW_SS_SURF_BIND = BRW_DATA_SS_SURF_BIND,
+
+ BRW_MAX_CACHE
+};
+
+struct brw_cache_item {
+ /**
+ * Effectively part of the key, cache_id identifies what kind of state
+ * buffer is involved, and also which brw->state.dirty.cache flag should
+ * be set when this cache item is chosen.
+ */
+ enum brw_cache_id cache_id;
+ /** 32-bit hash of the key data */
+ GLuint hash;
+ GLuint key_size; /* for variable-sized keys */
+ const void *key;
+ struct brw_winsys_reloc *relocs;
+ GLuint nr_relocs;
+
+ struct brw_winsys_buffer *bo;
+ GLuint data_size;
+
+ struct brw_cache_item *next;
+};
+
+
+
+struct brw_cache {
+ struct brw_context *brw;
+ struct brw_winsys_screen *sws;
+
+ struct brw_cache_item **items;
+ GLuint size, n_items;
+
+ enum brw_buffer_type buffer_type;
+
+ GLuint key_size[BRW_MAX_CACHE]; /* for fixed-size keys */
+ GLuint aux_size[BRW_MAX_CACHE];
+ char *name[BRW_MAX_CACHE];
+
+
+ /* Record of the last BOs chosen for each cache_id. Used to set
+ * brw->state.dirty.cache when a new cache item is chosen.
+ */
+ struct brw_winsys_buffer *last_bo[BRW_MAX_CACHE];
+};
+
+
+struct brw_tracked_state {
+ struct brw_state_flags dirty;
+ int (*prepare)( struct brw_context *brw );
+ int (*emit)( struct brw_context *brw );
+};
+
+/* Flags for brw->state.cache.
+ */
+#define CACHE_NEW_CC_VP (1<<BRW_CC_VP)
+#define CACHE_NEW_CC_UNIT (1<<BRW_CC_UNIT)
+#define CACHE_NEW_WM_PROG (1<<BRW_WM_PROG)
+#define CACHE_NEW_SAMPLER_DEFAULT_COLOR (1<<BRW_SAMPLER_DEFAULT_COLOR)
+#define CACHE_NEW_SAMPLER (1<<BRW_SAMPLER)
+#define CACHE_NEW_WM_UNIT (1<<BRW_WM_UNIT)
+#define CACHE_NEW_SF_PROG (1<<BRW_SF_PROG)
+#define CACHE_NEW_SF_VP (1<<BRW_SF_VP)
+#define CACHE_NEW_SF_UNIT (1<<BRW_SF_UNIT)
+#define CACHE_NEW_VS_UNIT (1<<BRW_VS_UNIT)
+#define CACHE_NEW_VS_PROG (1<<BRW_VS_PROG)
+#define CACHE_NEW_GS_UNIT (1<<BRW_GS_UNIT)
+#define CACHE_NEW_GS_PROG (1<<BRW_GS_PROG)
+#define CACHE_NEW_CLIP_VP (1<<BRW_CLIP_VP)
+#define CACHE_NEW_CLIP_UNIT (1<<BRW_CLIP_UNIT)
+#define CACHE_NEW_CLIP_PROG (1<<BRW_CLIP_PROG)
+#define CACHE_NEW_SURFACE (1<<BRW_SS_SURFACE)
+#define CACHE_NEW_SURF_BIND (1<<BRW_SS_SURF_BIND)
+
+struct brw_cached_batch_item {
+ struct header *header;
+ GLuint sz;
+ struct brw_cached_batch_item *next;
+};
+
+
+
+/* Protect against a future where VERT_ATTRIB_MAX > 32. Wouldn't life
+ * be easier if C allowed arrays of packed elements?
+ */
+#define VS_INPUT_BITMASK_DWORDS ((PIPE_MAX_SHADER_INPUTS+31)/32)
+
+
+
+
+struct brw_vertex_info {
+ GLuint sizes[VS_INPUT_BITMASK_DWORDS * 2]; /* sizes:2[VERT_ATTRIB_MAX] */
+};
+
+
+struct brw_query_object {
+ /** Doubly linked list of active query objects in the context. */
+ struct brw_query_object *prev, *next;
+
+ /** Last query BO associated with this query. */
+ struct brw_winsys_buffer *bo;
+ /** First index in bo with query data for this object. */
+ int first_index;
+ /** Last index in bo with query data for this object. */
+ int last_index;
+
+ /* Total count of pixels from previous BOs */
+ uint64_t result;
+};
+
+#define CC_RELOC_VP 0
+
+
+/**
+ * brw_context is derived from pipe_context
+ */
+struct brw_context
+{
+ struct pipe_context base;
+ struct brw_chipset chipset;
+
+ struct brw_winsys_screen *sws;
+
+ struct brw_batchbuffer *batch;
+
+ GLuint primitive;
+ GLuint reduced_primitive;
+
+ /* Active state from the state tracker:
+ */
+ struct {
+ struct brw_vertex_shader *vertex_shader;
+ struct brw_fragment_shader *fragment_shader;
+ const struct brw_blend_state *blend;
+ const struct brw_rasterizer_state *rast;
+ const struct brw_depth_stencil_state *zstencil;
+
+ const struct brw_sampler *sampler[PIPE_MAX_SAMPLERS];
+ unsigned num_samplers;
+
+ struct pipe_texture *texture[PIPE_MAX_SAMPLERS];
+ struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
+ struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS];
+ unsigned num_vertex_elements;
+ unsigned num_textures;
+ unsigned num_vertex_buffers;
+
+ struct pipe_scissor_state scissor;
+ struct pipe_viewport_state viewport;
+ struct pipe_framebuffer_state fb;
+ struct pipe_clip_state ucp;
+ struct pipe_buffer *vertex_constants;
+ struct pipe_buffer *fragment_constants;
+
+ struct brw_blend_constant_color bcc;
+ struct brw_polygon_stipple bps;
+ struct brw_cc_viewport ccv;
+
+ /**
+ * Index buffer for this draw_prims call.
+ *
+ * Updates are signaled by PIPE_NEW_INDEX_BUFFER.
+ */
+ struct pipe_buffer *index_buffer;
+ unsigned index_size;
+
+ /* Updates are signalled by PIPE_NEW_INDEX_RANGE:
+ */
+ unsigned min_index;
+ unsigned max_index;
+
+ } curr;
+
+ struct {
+ struct brw_state_flags dirty;
+
+ /**
+ * List of buffers accumulated in brw_validate_state to receive
+ * dri_bo_check_aperture treatment before exec, so we can know if we
+ * should flush the batch and try again before emitting primitives.
+ *
+ * This can be a fixed number as we only have a limited number of
+ * objects referenced from the batchbuffer in a primitive emit,
+ * consisting of the vertex buffers, pipelined state pointers,
+ * the CURBE, the depth buffer, and a query BO.
+ */
+ struct brw_winsys_buffer *validated_bos[PIPE_MAX_SHADER_INPUTS + 16];
+ int validated_bo_count;
+ } state;
+
+ struct brw_cache cache; /** non-surface items */
+ struct brw_cache surface_cache; /* surface items */
+ struct brw_cached_batch_item *cached_batch_items;
+
+ struct {
+ struct u_upload_mgr *upload_vertex;
+ struct u_upload_mgr *upload_index;
+
+ /* Information on uploaded vertex buffers:
+ */
+ struct {
+ unsigned stride; /* in bytes between successive vertices */
+ unsigned offset; /* in bytes, of first vertex in bo */
+ unsigned vertex_count; /* count of valid vertices which may be accessed */
+ struct brw_winsys_buffer *bo;
+ } vb[PIPE_MAX_ATTRIBS];
+
+ unsigned nr_vb; /* currently the same as curr.num_vertex_buffers */
+ } vb;
+
+ struct {
+ /* Updates to these fields are signaled by BRW_NEW_INDEX_BUFFER. */
+ struct brw_winsys_buffer *bo;
+ unsigned int offset;
+ unsigned int size;
+ /* Offset to index buffer index to use in CMD_3D_PRIM so that we can
+ * avoid re-uploading the IB packet over and over if we're actually
+ * referencing the same index buffer.
+ */
+ unsigned int start_vertex_offset;
+ } ib;
+
+
+ /* BRW_NEW_URB_ALLOCATIONS:
+ */
+ struct {
+ GLuint vsize; /* vertex size plus header in urb registers */
+ GLuint csize; /* constant buffer size in urb registers */
+ GLuint sfsize; /* setup data size in urb registers */
+
+ GLboolean constrained;
+
+ GLuint nr_vs_entries;
+ GLuint nr_gs_entries;
+ GLuint nr_clip_entries;
+ GLuint nr_sf_entries;
+ GLuint nr_cs_entries;
+
+ GLuint vs_start;
+ GLuint gs_start;
+ GLuint clip_start;
+ GLuint sf_start;
+ GLuint cs_start;
+ } urb;
+
+
+ /* BRW_NEW_CURBE_OFFSETS:
+ */
+ struct {
+ GLuint wm_start; /**< pos of first wm const in CURBE buffer */
+ GLuint wm_size; /**< number of float[4] consts, multiple of 16 */
+ GLuint clip_start;
+ GLuint clip_size;
+ GLuint vs_start;
+ GLuint vs_size;
+ GLuint total_size;
+
+ struct brw_winsys_buffer *curbe_bo;
+ /** Offset within curbe_bo of space for current curbe entry */
+ GLuint curbe_offset;
+ /** Offset within curbe_bo of space for next curbe entry */
+ GLuint curbe_next_offset;
+
+ GLfloat *last_buf;
+ GLuint last_bufsz;
+ /**
+ * Whether we should create a new bo instead of reusing the old one
+ * (if we just dispatch the batch pointing at the old one.
+ */
+ GLboolean need_new_bo;
+ } curbe;
+
+ struct {
+ struct brw_vs_prog_data *prog_data;
+
+ struct brw_winsys_buffer *prog_bo;
+ struct brw_winsys_buffer *state_bo;
+
+ /** Binding table of pointers to surf_bo entries */
+ struct brw_winsys_buffer *bind_bo;
+ struct brw_winsys_buffer *surf_bo[BRW_VS_MAX_SURF];
+ GLuint nr_surfaces;
+ } vs;
+
+ struct {
+ struct brw_gs_prog_data *prog_data;
+
+ GLboolean prog_active;
+ struct brw_winsys_buffer *prog_bo;
+ struct brw_winsys_buffer *state_bo;
+ } gs;
+
+ struct {
+ struct brw_clip_prog_data *prog_data;
+
+ struct brw_winsys_buffer *prog_bo;
+ struct brw_winsys_buffer *state_bo;
+ struct brw_winsys_buffer *vp_bo;
+ } clip;
+
+
+ struct {
+ struct brw_sf_prog_data *prog_data;
+
+ struct brw_winsys_buffer *prog_bo;
+ struct brw_winsys_buffer *state_bo;
+ struct brw_winsys_buffer *vp_bo;
+ } sf;
+
+ struct {
+ struct brw_wm_prog_data *prog_data;
+ struct brw_wm_compile *compile_data;
+
+ /** Input sizes, calculated from active vertex program.
+ * One bit per fragment program input attribute.
+ */
+ /*GLbitfield input_size_masks[4];*/
+
+ /** Array of surface default colors (texture border color) */
+ struct brw_winsys_buffer *sdc_bo[BRW_MAX_TEX_UNIT];
+
+ GLuint render_surf;
+ GLuint nr_surfaces;
+
+ GLuint max_threads;
+ struct brw_winsys_buffer *scratch_bo;
+
+ GLuint sampler_count;
+ struct brw_winsys_buffer *sampler_bo;
+
+ /** Binding table of pointers to surf_bo entries */
+ struct brw_winsys_buffer *bind_bo;
+ struct brw_winsys_buffer *surf_bo[BRW_WM_MAX_SURF];
+
+ struct brw_winsys_buffer *prog_bo;
+ struct brw_winsys_buffer *state_bo;
+ } wm;
+
+
+ struct {
+ struct brw_winsys_buffer *state_bo;
+
+ struct brw_cc_unit_state cc;
+ struct brw_winsys_reloc reloc[1];
+ } cc;
+
+ struct {
+ struct brw_query_object active_head;
+ struct brw_winsys_buffer *bo;
+ int index;
+ GLboolean active;
+ int stats_wm;
+ } query;
+
+ struct {
+ unsigned always_emit_state:1;
+ unsigned always_flush_batch:1;
+ unsigned force_swtnl:1;
+ unsigned no_swtnl:1;
+ } flags;
+
+ /* Used to give every program string a unique id
+ */
+ GLuint program_id;
+};
+
+
+
+/*======================================================================
+ * brw_queryobj.c
+ */
+void brw_init_query(struct brw_context *brw);
+enum pipe_error brw_prepare_query_begin(struct brw_context *brw);
+void brw_emit_query_begin(struct brw_context *brw);
+void brw_emit_query_end(struct brw_context *brw);
+
+/*======================================================================
+ * brw_state_dump.c
+ */
+void brw_debug_batch(struct brw_context *intel);
+
+
+/*======================================================================
+ * brw_pipe_*.c
+ */
+void brw_pipe_blend_init( struct brw_context *brw );
+void brw_pipe_depth_stencil_init( struct brw_context *brw );
+void brw_pipe_framebuffer_init( struct brw_context *brw );
+void brw_pipe_flush_init( struct brw_context *brw );
+void brw_pipe_misc_init( struct brw_context *brw );
+void brw_pipe_query_init( struct brw_context *brw );
+void brw_pipe_rast_init( struct brw_context *brw );
+void brw_pipe_sampler_init( struct brw_context *brw );
+void brw_pipe_shader_init( struct brw_context *brw );
+void brw_pipe_vertex_init( struct brw_context *brw );
+void brw_pipe_clear_init( struct brw_context *brw );
+
+
+void brw_pipe_blend_cleanup( struct brw_context *brw );
+void brw_pipe_depth_stencil_cleanup( struct brw_context *brw );
+void brw_pipe_framebuffer_cleanup( struct brw_context *brw );
+void brw_pipe_flush_cleanup( struct brw_context *brw );
+void brw_pipe_misc_cleanup( struct brw_context *brw );
+void brw_pipe_query_cleanup( struct brw_context *brw );
+void brw_pipe_rast_cleanup( struct brw_context *brw );
+void brw_pipe_sampler_cleanup( struct brw_context *brw );
+void brw_pipe_shader_cleanup( struct brw_context *brw );
+void brw_pipe_vertex_cleanup( struct brw_context *brw );
+void brw_pipe_clear_cleanup( struct brw_context *brw );
+
+void brw_hw_cc_init( struct brw_context *brw );
+void brw_hw_cc_cleanup( struct brw_context *brw );
+
+
+
+void brw_context_flush( struct brw_context *brw );
+
+
+/* brw_urb.c
+ */
+int brw_upload_urb_fence(struct brw_context *brw);
+
+/* brw_curbe.c
+ */
+int brw_upload_cs_urb_state(struct brw_context *brw);
+
+
+/*======================================================================
+ * Inline conversion functions. These are better-typed than the
+ * macros used previously:
+ */
+static INLINE struct brw_context *
+brw_context( struct pipe_context *ctx )
+{
+ return (struct brw_context *)ctx;
+}
+
+
+#define BRW_IS_965(brw) ((brw)->chipset.is_965)
+#define BRW_IS_IGDNG(brw) ((brw)->chipset.is_igdng)
+#define BRW_IS_G4X(brw) ((brw)->chipset.is_g4x)
+
+
+#endif
+
diff --git a/src/gallium/drivers/i965/brw_curbe.c b/src/gallium/drivers/i965/brw_curbe.c
new file mode 100644
index 00000000000..3f031577d5a
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_curbe.c
@@ -0,0 +1,390 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "util/u_memory.h"
+#include "util/u_math.h"
+
+#include "brw_batchbuffer.h"
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_state.h"
+#include "brw_util.h"
+#include "brw_debug.h"
+#include "brw_screen.h"
+
+
+/**
+ * Partition the CURBE between the various users of constant values:
+ * Note that vertex and fragment shaders can now fetch constants out
+ * of constant buffers. We no longer allocatea block of the GRF for
+ * constants. That greatly reduces the demand for space in the CURBE.
+ * Some of the comments within are dated...
+ */
+static int calculate_curbe_offsets( struct brw_context *brw )
+{
+ /* CACHE_NEW_WM_PROG */
+ const GLuint nr_fp_regs = brw->wm.prog_data->curb_read_length;
+
+ /* BRW_NEW_VERTEX_PROGRAM */
+ const GLuint nr_vp_regs = brw->vs.prog_data->curb_read_length;
+ GLuint nr_clip_regs = 0;
+ GLuint total_regs;
+
+ /* PIPE_NEW_CLIP */
+ if (brw->curr.ucp.nr) {
+ GLuint nr_planes = 6 + brw->curr.ucp.nr;
+ nr_clip_regs = (nr_planes * 4 + 15) / 16;
+ }
+
+
+ total_regs = nr_fp_regs + nr_vp_regs + nr_clip_regs;
+
+ /* When this is > 32, want to use a true constant buffer to hold
+ * the extra constants.
+ */
+ assert(total_regs <= 32);
+
+ /* Lazy resize:
+ */
+ if (nr_fp_regs > brw->curbe.wm_size ||
+ nr_vp_regs > brw->curbe.vs_size ||
+ nr_clip_regs != brw->curbe.clip_size ||
+ (total_regs < brw->curbe.total_size / 4 &&
+ brw->curbe.total_size > 16)) {
+
+ GLuint reg = 0;
+
+ /* Calculate a new layout:
+ */
+ reg = 0;
+ brw->curbe.wm_start = reg;
+ brw->curbe.wm_size = nr_fp_regs; reg += nr_fp_regs;
+ brw->curbe.clip_start = reg;
+ brw->curbe.clip_size = nr_clip_regs; reg += nr_clip_regs;
+ brw->curbe.vs_start = reg;
+ brw->curbe.vs_size = nr_vp_regs; reg += nr_vp_regs;
+ brw->curbe.total_size = reg;
+
+ if (BRW_DEBUG & DEBUG_CURBE)
+ debug_printf("curbe wm %d+%d clip %d+%d vs %d+%d\n",
+ brw->curbe.wm_start,
+ brw->curbe.wm_size,
+ brw->curbe.clip_start,
+ brw->curbe.clip_size,
+ brw->curbe.vs_start,
+ brw->curbe.vs_size );
+
+ brw->state.dirty.brw |= BRW_NEW_CURBE_OFFSETS;
+ }
+
+ return 0;
+}
+
+
+const struct brw_tracked_state brw_curbe_offsets = {
+ .dirty = {
+ .mesa = PIPE_NEW_CLIP,
+ .brw = BRW_NEW_VERTEX_PROGRAM,
+ .cache = CACHE_NEW_WM_PROG
+ },
+ .prepare = calculate_curbe_offsets
+};
+
+
+
+
+/* Define the number of curbes within CS's urb allocation. Multiple
+ * urb entries -> multiple curbes. These will be used by
+ * fixed-function hardware in a double-buffering scheme to avoid a
+ * pipeline stall each time the contents of the curbe is changed.
+ */
+int brw_upload_cs_urb_state(struct brw_context *brw)
+{
+ struct brw_cs_urb_state cs_urb;
+ memset(&cs_urb, 0, sizeof(cs_urb));
+
+ /* It appears that this is the state packet for the CS unit, ie. the
+ * urb entries detailed here are housed in the CS range from the
+ * URB_FENCE command.
+ */
+ cs_urb.header.opcode = CMD_CS_URB_STATE;
+ cs_urb.header.length = sizeof(cs_urb)/4 - 2;
+
+ /* BRW_NEW_URB_FENCE */
+ cs_urb.bits0.nr_urb_entries = brw->urb.nr_cs_entries;
+ cs_urb.bits0.urb_entry_size = brw->urb.csize - 1;
+
+ assert(brw->urb.nr_cs_entries);
+ BRW_CACHED_BATCH_STRUCT(brw, &cs_urb);
+ return 0;
+}
+
+static GLfloat fixed_plane[6][4] = {
+ { 0, 0, -1, 1 },
+ { 0, 0, 1, 1 },
+ { 0, -1, 0, 1 },
+ { 0, 1, 0, 1 },
+ {-1, 0, 0, 1 },
+ { 1, 0, 0, 1 }
+};
+
+/* Upload a new set of constants. Too much variability to go into the
+ * cache mechanism, but maybe would benefit from a comparison against
+ * the current uploaded set of constants.
+ */
+static enum pipe_error prepare_curbe_buffer(struct brw_context *brw)
+{
+ struct pipe_screen *screen = brw->base.screen;
+ const GLuint sz = brw->curbe.total_size;
+ const GLuint bufsz = sz * 16 * sizeof(GLfloat);
+ enum pipe_error ret;
+ GLfloat *buf;
+ GLuint i;
+
+ if (sz == 0) {
+ if (brw->curbe.last_buf) {
+ free(brw->curbe.last_buf);
+ brw->curbe.last_buf = NULL;
+ brw->curbe.last_bufsz = 0;
+ }
+ return 0;
+ }
+
+ buf = (GLfloat *) CALLOC(bufsz, 1);
+
+ /* fragment shader constants */
+ if (brw->curbe.wm_size) {
+ const struct brw_fragment_shader *fs = brw->curr.fragment_shader;
+ GLuint offset = brw->curbe.wm_start * 16;
+ GLuint nr_immediate, nr_const;
+
+ nr_immediate = fs->immediates.nr;
+ if (nr_immediate) {
+ memcpy(&buf[offset],
+ fs->immediates.data,
+ nr_immediate * 4 * sizeof(float));
+
+ offset += nr_immediate * 4;
+ }
+
+ nr_const = fs->info.file_max[TGSI_FILE_CONSTANT] + 1;
+/* nr_const = brw->wm.prog_data->nr_params; */
+ if (nr_const) {
+ const GLfloat *value = screen->buffer_map( screen,
+ brw->curr.fragment_constants,
+ PIPE_BUFFER_USAGE_CPU_READ);
+
+ memcpy(&buf[offset], value,
+ nr_const * 4 * sizeof(float));
+
+ screen->buffer_unmap( screen,
+ brw->curr.fragment_constants );
+ }
+ }
+
+
+ /* The clipplanes are actually delivered to both CLIP and VS units.
+ * VS uses them to calculate the outcode bitmasks.
+ */
+ if (brw->curbe.clip_size) {
+ GLuint offset = brw->curbe.clip_start * 16;
+ GLuint j;
+
+ /* If any planes are going this way, send them all this way:
+ */
+ for (i = 0; i < 6; i++) {
+ buf[offset + i * 4 + 0] = fixed_plane[i][0];
+ buf[offset + i * 4 + 1] = fixed_plane[i][1];
+ buf[offset + i * 4 + 2] = fixed_plane[i][2];
+ buf[offset + i * 4 + 3] = fixed_plane[i][3];
+ }
+
+ /* Clip planes:
+ */
+ assert(brw->curr.ucp.nr <= 6);
+ for (j = 0; j < brw->curr.ucp.nr; j++) {
+ buf[offset + i * 4 + 0] = brw->curr.ucp.ucp[j][0];
+ buf[offset + i * 4 + 1] = brw->curr.ucp.ucp[j][1];
+ buf[offset + i * 4 + 2] = brw->curr.ucp.ucp[j][2];
+ buf[offset + i * 4 + 3] = brw->curr.ucp.ucp[j][3];
+ i++;
+ }
+ }
+
+ /* vertex shader constants */
+ if (brw->curbe.vs_size) {
+ GLuint offset = brw->curbe.vs_start * 16;
+ const struct brw_vertex_shader *vs = brw->curr.vertex_shader;
+ GLuint nr_immediate, nr_const;
+
+ nr_immediate = vs->immediates.nr;
+ if (nr_immediate) {
+ memcpy(&buf[offset],
+ vs->immediates.data,
+ nr_immediate * 4 * sizeof(float));
+
+ offset += nr_immediate * 4;
+ }
+
+ nr_const = vs->info.file_max[TGSI_FILE_CONSTANT] + 1;
+ if (nr_const) {
+ /* XXX: note that constant buffers are currently *already* in
+ * buffer objects. If we want to keep on putting them into the
+ * curbe, makes sense to treat constbuf's specially with malloc.
+ */
+ const GLfloat *value = screen->buffer_map( screen,
+ brw->curr.vertex_constants,
+ PIPE_BUFFER_USAGE_CPU_READ);
+
+ /* XXX: what if user's constant buffer is too small?
+ */
+ memcpy(&buf[offset], value, nr_const * 4 * sizeof(float));
+
+ screen->buffer_unmap( screen, brw->curr.vertex_constants );
+ }
+ }
+
+ if (BRW_DEBUG & DEBUG_CURBE) {
+ for (i = 0; i < sz*16; i+=4)
+ debug_printf("curbe %d.%d: %f %f %f %f\n", i/8, i&4,
+ buf[i+0], buf[i+1], buf[i+2], buf[i+3]);
+
+ debug_printf("last_buf %p buf %p sz %d/%d cmp %d\n",
+ (void *)brw->curbe.last_buf, (void *)buf,
+ bufsz, brw->curbe.last_bufsz,
+ brw->curbe.last_buf ? memcmp(buf, brw->curbe.last_buf, bufsz) : -1);
+ }
+
+ if (brw->curbe.curbe_bo != NULL &&
+ brw->curbe.last_buf &&
+ bufsz == brw->curbe.last_bufsz &&
+ memcmp(buf, brw->curbe.last_buf, bufsz) == 0) {
+ /* constants have not changed */
+ FREE(buf);
+ }
+ else {
+ /* constants have changed */
+ FREE(brw->curbe.last_buf);
+
+ brw->curbe.last_buf = buf;
+ brw->curbe.last_bufsz = bufsz;
+
+ if (brw->curbe.curbe_bo != NULL &&
+ (brw->curbe.need_new_bo ||
+ brw->curbe.curbe_next_offset + bufsz > brw->curbe.curbe_bo->size))
+ {
+ bo_reference(&brw->curbe.curbe_bo, NULL);
+ }
+
+ if (brw->curbe.curbe_bo == NULL) {
+ /* Allocate a single page for CURBE entries for this
+ * batchbuffer. They're generally around 64b. We will
+ * discard the curbe buffer after the batch is flushed to
+ * avoid synchronous updates.
+ */
+ ret = brw->sws->bo_alloc(brw->sws,
+ BRW_BUFFER_TYPE_CURBE,
+ 4096, 1 << 6,
+ &brw->curbe.curbe_bo);
+ if (ret)
+ return ret;
+
+ brw->curbe.curbe_next_offset = 0;
+ }
+
+ brw->curbe.curbe_offset = brw->curbe.curbe_next_offset;
+ brw->curbe.curbe_next_offset += bufsz;
+ brw->curbe.curbe_next_offset = align(brw->curbe.curbe_next_offset, 64);
+
+ /* Copy data to the buffer:
+ */
+ brw->sws->bo_subdata(brw->curbe.curbe_bo,
+ BRW_DATA_CONSTANT_BUFFER,
+ brw->curbe.curbe_offset,
+ bufsz,
+ buf,
+ NULL, 0);
+ }
+
+ brw_add_validated_bo(brw, brw->curbe.curbe_bo);
+
+ /* Because this provokes an action (ie copy the constants into the
+ * URB), it shouldn't be shortcircuited if identical to the
+ * previous time - because eg. the urb destination may have
+ * changed, or the urb contents different to last time.
+ *
+ * Note that the data referred to is actually copied internally,
+ * not just used in place according to passed pointer.
+ *
+ * It appears that the CS unit takes care of using each available
+ * URB entry (Const URB Entry == CURBE) in turn, and issuing
+ * flushes as necessary when doublebuffering of CURBEs isn't
+ * possible.
+ */
+
+ return 0;
+}
+
+static enum pipe_error emit_curbe_buffer(struct brw_context *brw)
+{
+ GLuint sz = brw->curbe.total_size;
+
+ BEGIN_BATCH(2, IGNORE_CLIPRECTS);
+ if (sz == 0) {
+ OUT_BATCH((CMD_CONST_BUFFER << 16) | (2 - 2));
+ OUT_BATCH(0);
+ } else {
+ OUT_BATCH((CMD_CONST_BUFFER << 16) | (1 << 8) | (2 - 2));
+ OUT_RELOC(brw->curbe.curbe_bo,
+ BRW_USAGE_STATE,
+ (sz - 1) + brw->curbe.curbe_offset);
+ }
+ ADVANCE_BATCH();
+ return 0;
+}
+
+const struct brw_tracked_state brw_curbe_buffer = {
+ .dirty = {
+ .mesa = (PIPE_NEW_FRAGMENT_CONSTANTS |
+ PIPE_NEW_VERTEX_CONSTANTS |
+ PIPE_NEW_CLIP),
+ .brw = (BRW_NEW_FRAGMENT_PROGRAM |
+ BRW_NEW_VERTEX_PROGRAM |
+ BRW_NEW_URB_FENCE | /* Implicit - hardware requires this, not used above */
+ BRW_NEW_PSP | /* Implicit - hardware requires this, not used above */
+ BRW_NEW_CURBE_OFFSETS |
+ BRW_NEW_BATCH),
+ .cache = (CACHE_NEW_WM_PROG)
+ },
+ .prepare = prepare_curbe_buffer,
+ .emit = emit_curbe_buffer,
+};
+
diff --git a/src/gallium/drivers/i965/brw_debug.h b/src/gallium/drivers/i965/brw_debug.h
new file mode 100644
index 00000000000..ae8e9254a68
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_debug.h
@@ -0,0 +1,43 @@
+#ifndef BRW_DEBUG_H
+#define BRW_DEBUG_H
+
+/* ================================================================
+ * Debugging:
+ */
+
+#define DEBUG_TEXTURE 0x1
+#define DEBUG_STATE 0x2
+#define DEBUG_IOCTL 0x4
+#define DEBUG_BLIT 0x8
+#define DEBUG_CURBE 0x10
+#define DEBUG_FALLBACKS 0x20
+#define DEBUG_VERBOSE 0x40
+#define DEBUG_BATCH 0x80
+#define DEBUG_PIXEL 0x100
+#define DEBUG_WINSYS 0x200
+#define DEBUG_MIN_URB 0x400
+#define DEBUG_DISASSEM 0x800
+#define DEBUG_unused3 0x1000
+#define DEBUG_SYNC 0x2000
+#define DEBUG_PRIMS 0x4000
+#define DEBUG_VERTS 0x8000
+#define DEBUG_unused4 0x10000
+#define DEBUG_DMA 0x20000
+#define DEBUG_SANITY 0x40000
+#define DEBUG_SLEEP 0x80000
+#define DEBUG_STATS 0x100000
+#define DEBUG_unused5 0x200000
+#define DEBUG_SINGLE_THREAD 0x400000
+#define DEBUG_WM 0x800000
+#define DEBUG_URB 0x1000000
+#define DEBUG_VS 0x2000000
+
+#ifdef DEBUG
+extern int BRW_DEBUG;
+#else
+#define BRW_DEBUG 0
+#endif
+
+
+
+#endif
diff --git a/src/gallium/drivers/i965/brw_defines.h b/src/gallium/drivers/i965/brw_defines.h
new file mode 100644
index 00000000000..e201ce4d7ce
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_defines.h
@@ -0,0 +1,847 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#ifndef BRW_DEFINES_H
+#define BRW_DEFINES_H
+
+/* 3D state:
+ */
+#define _3DOP_3DSTATE_PIPELINED 0x0
+#define _3DOP_3DSTATE_NONPIPELINED 0x1
+#define _3DOP_3DCONTROL 0x2
+#define _3DOP_3DPRIMITIVE 0x3
+
+#define _3DSTATE_PIPELINED_POINTERS 0x00
+#define _3DSTATE_BINDING_TABLE_POINTERS 0x01
+#define _3DSTATE_VERTEX_BUFFERS 0x08
+#define _3DSTATE_VERTEX_ELEMENTS 0x09
+#define _3DSTATE_INDEX_BUFFER 0x0A
+#define _3DSTATE_VF_STATISTICS 0x0B
+#define _3DSTATE_DRAWING_RECTANGLE 0x00
+#define _3DSTATE_CONSTANT_COLOR 0x01
+#define _3DSTATE_SAMPLER_PALETTE_LOAD 0x02
+#define _3DSTATE_CHROMA_KEY 0x04
+#define _3DSTATE_DEPTH_BUFFER 0x05
+#define _3DSTATE_POLY_STIPPLE_OFFSET 0x06
+#define _3DSTATE_POLY_STIPPLE_PATTERN 0x07
+#define _3DSTATE_LINE_STIPPLE 0x08
+#define _3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP 0x09
+#define _3DCONTROL 0x00
+
+#define PIPE_CONTROL_NOWRITE 0x00
+#define PIPE_CONTROL_WRITEIMMEDIATE 0x01
+#define PIPE_CONTROL_WRITEDEPTH 0x02
+#define PIPE_CONTROL_WRITETIMESTAMP 0x03
+
+#define PIPE_CONTROL_GTTWRITE_PROCESS_LOCAL 0x00
+#define PIPE_CONTROL_GTTWRITE_GLOBAL 0x01
+
+#define _3DPRIM_POINTLIST 0x01
+#define _3DPRIM_LINELIST 0x02
+#define _3DPRIM_LINESTRIP 0x03
+#define _3DPRIM_TRILIST 0x04
+#define _3DPRIM_TRISTRIP 0x05
+#define _3DPRIM_TRIFAN 0x06
+#define _3DPRIM_QUADLIST 0x07
+#define _3DPRIM_QUADSTRIP 0x08
+#define _3DPRIM_LINELIST_ADJ 0x09
+#define _3DPRIM_LINESTRIP_ADJ 0x0A
+#define _3DPRIM_TRILIST_ADJ 0x0B
+#define _3DPRIM_TRISTRIP_ADJ 0x0C
+#define _3DPRIM_TRISTRIP_REVERSE 0x0D
+#define _3DPRIM_POLYGON 0x0E
+#define _3DPRIM_RECTLIST 0x0F
+#define _3DPRIM_LINELOOP 0x10
+#define _3DPRIM_POINTLIST_BF 0x11
+#define _3DPRIM_LINESTRIP_CONT 0x12
+#define _3DPRIM_LINESTRIP_BF 0x13
+#define _3DPRIM_LINESTRIP_CONT_BF 0x14
+#define _3DPRIM_TRIFAN_NOSTIPPLE 0x15
+
+#define _3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL 0
+#define _3DPRIM_VERTEXBUFFER_ACCESS_RANDOM 1
+
+#define BRW_ANISORATIO_2 0
+#define BRW_ANISORATIO_4 1
+#define BRW_ANISORATIO_6 2
+#define BRW_ANISORATIO_8 3
+#define BRW_ANISORATIO_10 4
+#define BRW_ANISORATIO_12 5
+#define BRW_ANISORATIO_14 6
+#define BRW_ANISORATIO_16 7
+
+#define BRW_BLENDFACTOR_ONE 0x1
+#define BRW_BLENDFACTOR_SRC_COLOR 0x2
+#define BRW_BLENDFACTOR_SRC_ALPHA 0x3
+#define BRW_BLENDFACTOR_DST_ALPHA 0x4
+#define BRW_BLENDFACTOR_DST_COLOR 0x5
+#define BRW_BLENDFACTOR_SRC_ALPHA_SATURATE 0x6
+#define BRW_BLENDFACTOR_CONST_COLOR 0x7
+#define BRW_BLENDFACTOR_CONST_ALPHA 0x8
+#define BRW_BLENDFACTOR_SRC1_COLOR 0x9
+#define BRW_BLENDFACTOR_SRC1_ALPHA 0x0A
+#define BRW_BLENDFACTOR_ZERO 0x11
+#define BRW_BLENDFACTOR_INV_SRC_COLOR 0x12
+#define BRW_BLENDFACTOR_INV_SRC_ALPHA 0x13
+#define BRW_BLENDFACTOR_INV_DST_ALPHA 0x14
+#define BRW_BLENDFACTOR_INV_DST_COLOR 0x15
+#define BRW_BLENDFACTOR_INV_CONST_COLOR 0x17
+#define BRW_BLENDFACTOR_INV_CONST_ALPHA 0x18
+#define BRW_BLENDFACTOR_INV_SRC1_COLOR 0x19
+#define BRW_BLENDFACTOR_INV_SRC1_ALPHA 0x1A
+
+#define BRW_BLENDFUNCTION_ADD 0
+#define BRW_BLENDFUNCTION_SUBTRACT 1
+#define BRW_BLENDFUNCTION_REVERSE_SUBTRACT 2
+#define BRW_BLENDFUNCTION_MIN 3
+#define BRW_BLENDFUNCTION_MAX 4
+
+#define BRW_ALPHATEST_FORMAT_UNORM8 0
+#define BRW_ALPHATEST_FORMAT_FLOAT32 1
+
+#define BRW_CHROMAKEY_KILL_ON_ANY_MATCH 0
+#define BRW_CHROMAKEY_REPLACE_BLACK 1
+
+#define BRW_CLIP_API_OGL 0
+#define BRW_CLIP_API_DX 1
+
+#define BRW_CLIPMODE_NORMAL 0
+#define BRW_CLIPMODE_CLIP_ALL 1
+#define BRW_CLIPMODE_CLIP_NON_REJECTED 2
+#define BRW_CLIPMODE_REJECT_ALL 3
+#define BRW_CLIPMODE_ACCEPT_ALL 4
+#define BRW_CLIPMODE_KERNEL_CLIP 5
+
+#define BRW_CLIP_NDCSPACE 0
+#define BRW_CLIP_SCREENSPACE 1
+
+#define BRW_COMPAREFUNCTION_ALWAYS 0
+#define BRW_COMPAREFUNCTION_NEVER 1
+#define BRW_COMPAREFUNCTION_LESS 2
+#define BRW_COMPAREFUNCTION_EQUAL 3
+#define BRW_COMPAREFUNCTION_LEQUAL 4
+#define BRW_COMPAREFUNCTION_GREATER 5
+#define BRW_COMPAREFUNCTION_NOTEQUAL 6
+#define BRW_COMPAREFUNCTION_GEQUAL 7
+
+#define BRW_COVERAGE_PIXELS_HALF 0
+#define BRW_COVERAGE_PIXELS_1 1
+#define BRW_COVERAGE_PIXELS_2 2
+#define BRW_COVERAGE_PIXELS_4 3
+
+#define BRW_CULLMODE_BOTH 0
+#define BRW_CULLMODE_NONE 1
+#define BRW_CULLMODE_FRONT 2
+#define BRW_CULLMODE_BACK 3
+
+#define BRW_DEFAULTCOLOR_R8G8B8A8_UNORM 0
+#define BRW_DEFAULTCOLOR_R32G32B32A32_FLOAT 1
+
+#define BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT 0
+#define BRW_DEPTHFORMAT_D32_FLOAT 1
+#define BRW_DEPTHFORMAT_D24_UNORM_S8_UINT 2
+#define BRW_DEPTHFORMAT_D16_UNORM 5
+
+#define BRW_FLOATING_POINT_IEEE_754 0
+#define BRW_FLOATING_POINT_NON_IEEE_754 1
+
+#define BRW_FRONTWINDING_CW 0
+#define BRW_FRONTWINDING_CCW 1
+
+#define BRW_SPRITE_POINT_ENABLE 16
+
+#define BRW_INDEX_BYTE 0
+#define BRW_INDEX_WORD 1
+#define BRW_INDEX_DWORD 2
+
+#define BRW_LOGICOPFUNCTION_CLEAR 0
+#define BRW_LOGICOPFUNCTION_NOR 1
+#define BRW_LOGICOPFUNCTION_AND_INVERTED 2
+#define BRW_LOGICOPFUNCTION_COPY_INVERTED 3
+#define BRW_LOGICOPFUNCTION_AND_REVERSE 4
+#define BRW_LOGICOPFUNCTION_INVERT 5
+#define BRW_LOGICOPFUNCTION_XOR 6
+#define BRW_LOGICOPFUNCTION_NAND 7
+#define BRW_LOGICOPFUNCTION_AND 8
+#define BRW_LOGICOPFUNCTION_EQUIV 9
+#define BRW_LOGICOPFUNCTION_NOOP 10
+#define BRW_LOGICOPFUNCTION_OR_INVERTED 11
+#define BRW_LOGICOPFUNCTION_COPY 12
+#define BRW_LOGICOPFUNCTION_OR_REVERSE 13
+#define BRW_LOGICOPFUNCTION_OR 14
+#define BRW_LOGICOPFUNCTION_SET 15
+
+#define BRW_MAPFILTER_NEAREST 0x0
+#define BRW_MAPFILTER_LINEAR 0x1
+#define BRW_MAPFILTER_ANISOTROPIC 0x2
+
+#define BRW_MIPFILTER_NONE 0
+#define BRW_MIPFILTER_NEAREST 1
+#define BRW_MIPFILTER_LINEAR 3
+
+#define BRW_POLYGON_FRONT_FACING 0
+#define BRW_POLYGON_BACK_FACING 1
+
+#define BRW_PREFILTER_ALWAYS 0x0
+#define BRW_PREFILTER_NEVER 0x1
+#define BRW_PREFILTER_LESS 0x2
+#define BRW_PREFILTER_EQUAL 0x3
+#define BRW_PREFILTER_LEQUAL 0x4
+#define BRW_PREFILTER_GREATER 0x5
+#define BRW_PREFILTER_NOTEQUAL 0x6
+#define BRW_PREFILTER_GEQUAL 0x7
+
+#define BRW_PROVOKING_VERTEX_0 0
+#define BRW_PROVOKING_VERTEX_1 1
+#define BRW_PROVOKING_VERTEX_2 2
+
+#define BRW_RASTRULE_UPPER_LEFT 0
+#define BRW_RASTRULE_UPPER_RIGHT 1
+/* These are listed as "Reserved, but not seen as useful"
+ * in Intel documentation (page 212, "Point Rasterization Rule",
+ * section 7.4 "SF Pipeline State Summary", of document
+ * "Intel® 965 Express Chipset Family and Intel® G35 Express
+ * Chipset Graphics Controller Programmer's Reference Manual,
+ * Volume 2: 3D/Media", Revision 1.0b as of January 2008,
+ * available at
+ * http://intellinuxgraphics.org/documentation.html
+ * at the time of this writing).
+ *
+ * These appear to be supported on at least some
+ * i965-family devices, and the BRW_RASTRULE_LOWER_RIGHT
+ * is useful when using OpenGL to render to a FBO
+ * (which has the pixel coordinate Y orientation inverted
+ * with respect to the normal OpenGL pixel coordinate system).
+ */
+#define BRW_RASTRULE_LOWER_LEFT 2
+#define BRW_RASTRULE_LOWER_RIGHT 3
+
+#define BRW_RENDERTARGET_CLAMPRANGE_UNORM 0
+#define BRW_RENDERTARGET_CLAMPRANGE_SNORM 1
+#define BRW_RENDERTARGET_CLAMPRANGE_FORMAT 2
+
+#define BRW_STENCILOP_KEEP 0
+#define BRW_STENCILOP_ZERO 1
+#define BRW_STENCILOP_REPLACE 2
+#define BRW_STENCILOP_INCRSAT 3
+#define BRW_STENCILOP_DECRSAT 4
+#define BRW_STENCILOP_INCR 5
+#define BRW_STENCILOP_DECR 6
+#define BRW_STENCILOP_INVERT 7
+
+#define BRW_SURFACE_MIPMAPLAYOUT_BELOW 0
+#define BRW_SURFACE_MIPMAPLAYOUT_RIGHT 1
+
+#define BRW_SURFACEFORMAT_R32G32B32A32_FLOAT 0x000
+#define BRW_SURFACEFORMAT_R32G32B32A32_SINT 0x001
+#define BRW_SURFACEFORMAT_R32G32B32A32_UINT 0x002
+#define BRW_SURFACEFORMAT_R32G32B32A32_UNORM 0x003
+#define BRW_SURFACEFORMAT_R32G32B32A32_SNORM 0x004
+#define BRW_SURFACEFORMAT_R64G64_FLOAT 0x005
+#define BRW_SURFACEFORMAT_R32G32B32X32_FLOAT 0x006
+#define BRW_SURFACEFORMAT_R32G32B32A32_SSCALED 0x007
+#define BRW_SURFACEFORMAT_R32G32B32A32_USCALED 0x008
+#define BRW_SURFACEFORMAT_R32G32B32_FLOAT 0x040
+#define BRW_SURFACEFORMAT_R32G32B32_SINT 0x041
+#define BRW_SURFACEFORMAT_R32G32B32_UINT 0x042
+#define BRW_SURFACEFORMAT_R32G32B32_UNORM 0x043
+#define BRW_SURFACEFORMAT_R32G32B32_SNORM 0x044
+#define BRW_SURFACEFORMAT_R32G32B32_SSCALED 0x045
+#define BRW_SURFACEFORMAT_R32G32B32_USCALED 0x046
+#define BRW_SURFACEFORMAT_R16G16B16A16_UNORM 0x080
+#define BRW_SURFACEFORMAT_R16G16B16A16_SNORM 0x081
+#define BRW_SURFACEFORMAT_R16G16B16A16_SINT 0x082
+#define BRW_SURFACEFORMAT_R16G16B16A16_UINT 0x083
+#define BRW_SURFACEFORMAT_R16G16B16A16_FLOAT 0x084
+#define BRW_SURFACEFORMAT_R32G32_FLOAT 0x085
+#define BRW_SURFACEFORMAT_R32G32_SINT 0x086
+#define BRW_SURFACEFORMAT_R32G32_UINT 0x087
+#define BRW_SURFACEFORMAT_R32_FLOAT_X8X24_TYPELESS 0x088
+#define BRW_SURFACEFORMAT_X32_TYPELESS_G8X24_UINT 0x089
+#define BRW_SURFACEFORMAT_L32A32_FLOAT 0x08A
+#define BRW_SURFACEFORMAT_R32G32_UNORM 0x08B
+#define BRW_SURFACEFORMAT_R32G32_SNORM 0x08C
+#define BRW_SURFACEFORMAT_R64_FLOAT 0x08D
+#define BRW_SURFACEFORMAT_R16G16B16X16_UNORM 0x08E
+#define BRW_SURFACEFORMAT_R16G16B16X16_FLOAT 0x08F
+#define BRW_SURFACEFORMAT_A32X32_FLOAT 0x090
+#define BRW_SURFACEFORMAT_L32X32_FLOAT 0x091
+#define BRW_SURFACEFORMAT_I32X32_FLOAT 0x092
+#define BRW_SURFACEFORMAT_R16G16B16A16_SSCALED 0x093
+#define BRW_SURFACEFORMAT_R16G16B16A16_USCALED 0x094
+#define BRW_SURFACEFORMAT_R32G32_SSCALED 0x095
+#define BRW_SURFACEFORMAT_R32G32_USCALED 0x096
+#define BRW_SURFACEFORMAT_B8G8R8A8_UNORM 0x0C0
+#define BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB 0x0C1
+#define BRW_SURFACEFORMAT_R10G10B10A2_UNORM 0x0C2
+#define BRW_SURFACEFORMAT_R10G10B10A2_UNORM_SRGB 0x0C3
+#define BRW_SURFACEFORMAT_R10G10B10A2_UINT 0x0C4
+#define BRW_SURFACEFORMAT_R10G10B10_SNORM_A2_UNORM 0x0C5
+#define BRW_SURFACEFORMAT_R8G8B8A8_UNORM 0x0C7
+#define BRW_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB 0x0C8
+#define BRW_SURFACEFORMAT_R8G8B8A8_SNORM 0x0C9
+#define BRW_SURFACEFORMAT_R8G8B8A8_SINT 0x0CA
+#define BRW_SURFACEFORMAT_R8G8B8A8_UINT 0x0CB
+#define BRW_SURFACEFORMAT_R16G16_UNORM 0x0CC
+#define BRW_SURFACEFORMAT_R16G16_SNORM 0x0CD
+#define BRW_SURFACEFORMAT_R16G16_SINT 0x0CE
+#define BRW_SURFACEFORMAT_R16G16_UINT 0x0CF
+#define BRW_SURFACEFORMAT_R16G16_FLOAT 0x0D0
+#define BRW_SURFACEFORMAT_B10G10R10A2_UNORM 0x0D1
+#define BRW_SURFACEFORMAT_B10G10R10A2_UNORM_SRGB 0x0D2
+#define BRW_SURFACEFORMAT_R11G11B10_FLOAT 0x0D3
+#define BRW_SURFACEFORMAT_R32_SINT 0x0D6
+#define BRW_SURFACEFORMAT_R32_UINT 0x0D7
+#define BRW_SURFACEFORMAT_R32_FLOAT 0x0D8
+#define BRW_SURFACEFORMAT_R24_UNORM_X8_TYPELESS 0x0D9
+#define BRW_SURFACEFORMAT_X24_TYPELESS_G8_UINT 0x0DA
+#define BRW_SURFACEFORMAT_L16A16_UNORM 0x0DF
+#define BRW_SURFACEFORMAT_I24X8_UNORM 0x0E0
+#define BRW_SURFACEFORMAT_L24X8_UNORM 0x0E1
+#define BRW_SURFACEFORMAT_A24X8_UNORM 0x0E2
+#define BRW_SURFACEFORMAT_I32_FLOAT 0x0E3
+#define BRW_SURFACEFORMAT_L32_FLOAT 0x0E4
+#define BRW_SURFACEFORMAT_A32_FLOAT 0x0E5
+#define BRW_SURFACEFORMAT_B8G8R8X8_UNORM 0x0E9
+#define BRW_SURFACEFORMAT_B8G8R8X8_UNORM_SRGB 0x0EA
+#define BRW_SURFACEFORMAT_R8G8B8X8_UNORM 0x0EB
+#define BRW_SURFACEFORMAT_R8G8B8X8_UNORM_SRGB 0x0EC
+#define BRW_SURFACEFORMAT_R9G9B9E5_SHAREDEXP 0x0ED
+#define BRW_SURFACEFORMAT_B10G10R10X2_UNORM 0x0EE
+#define BRW_SURFACEFORMAT_L16A16_FLOAT 0x0F0
+#define BRW_SURFACEFORMAT_R32_UNORM 0x0F1
+#define BRW_SURFACEFORMAT_R32_SNORM 0x0F2
+#define BRW_SURFACEFORMAT_R10G10B10X2_USCALED 0x0F3
+#define BRW_SURFACEFORMAT_R8G8B8A8_SSCALED 0x0F4
+#define BRW_SURFACEFORMAT_R8G8B8A8_USCALED 0x0F5
+#define BRW_SURFACEFORMAT_R16G16_SSCALED 0x0F6
+#define BRW_SURFACEFORMAT_R16G16_USCALED 0x0F7
+#define BRW_SURFACEFORMAT_R32_SSCALED 0x0F8
+#define BRW_SURFACEFORMAT_R32_USCALED 0x0F9
+#define BRW_SURFACEFORMAT_B5G6R5_UNORM 0x100
+#define BRW_SURFACEFORMAT_B5G6R5_UNORM_SRGB 0x101
+#define BRW_SURFACEFORMAT_B5G5R5A1_UNORM 0x102
+#define BRW_SURFACEFORMAT_B5G5R5A1_UNORM_SRGB 0x103
+#define BRW_SURFACEFORMAT_B4G4R4A4_UNORM 0x104
+#define BRW_SURFACEFORMAT_B4G4R4A4_UNORM_SRGB 0x105
+#define BRW_SURFACEFORMAT_R8G8_UNORM 0x106
+#define BRW_SURFACEFORMAT_R8G8_SNORM 0x107
+#define BRW_SURFACEFORMAT_R8G8_SINT 0x108
+#define BRW_SURFACEFORMAT_R8G8_UINT 0x109
+#define BRW_SURFACEFORMAT_R16_UNORM 0x10A
+#define BRW_SURFACEFORMAT_R16_SNORM 0x10B
+#define BRW_SURFACEFORMAT_R16_SINT 0x10C
+#define BRW_SURFACEFORMAT_R16_UINT 0x10D
+#define BRW_SURFACEFORMAT_R16_FLOAT 0x10E
+#define BRW_SURFACEFORMAT_I16_UNORM 0x111
+#define BRW_SURFACEFORMAT_L16_UNORM 0x112
+#define BRW_SURFACEFORMAT_A16_UNORM 0x113
+#define BRW_SURFACEFORMAT_L8A8_UNORM 0x114
+#define BRW_SURFACEFORMAT_I16_FLOAT 0x115
+#define BRW_SURFACEFORMAT_L16_FLOAT 0x116
+#define BRW_SURFACEFORMAT_A16_FLOAT 0x117
+#define BRW_SURFACEFORMAT_L8A8_UNORM_SRGB 0x118
+#define BRW_SURFACEFORMAT_R5G5_SNORM_B6_UNORM 0x119
+#define BRW_SURFACEFORMAT_B5G5R5X1_UNORM 0x11A
+#define BRW_SURFACEFORMAT_B5G5R5X1_UNORM_SRGB 0x11B
+#define BRW_SURFACEFORMAT_R8G8_SSCALED 0x11C
+#define BRW_SURFACEFORMAT_R8G8_USCALED 0x11D
+#define BRW_SURFACEFORMAT_R16_SSCALED 0x11E
+#define BRW_SURFACEFORMAT_R16_USCALED 0x11F
+#define BRW_SURFACEFORMAT_R8_UNORM 0x140
+#define BRW_SURFACEFORMAT_R8_SNORM 0x141
+#define BRW_SURFACEFORMAT_R8_SINT 0x142
+#define BRW_SURFACEFORMAT_R8_UINT 0x143
+#define BRW_SURFACEFORMAT_A8_UNORM 0x144
+#define BRW_SURFACEFORMAT_I8_UNORM 0x145
+#define BRW_SURFACEFORMAT_L8_UNORM 0x146
+#define BRW_SURFACEFORMAT_P4A4_UNORM 0x147
+#define BRW_SURFACEFORMAT_A4P4_UNORM 0x148
+#define BRW_SURFACEFORMAT_R8_SSCALED 0x149
+#define BRW_SURFACEFORMAT_R8_USCALED 0x14A
+#define BRW_SURFACEFORMAT_L8_UNORM_SRGB 0x14C
+#define BRW_SURFACEFORMAT_R1_UINT 0x181
+#define BRW_SURFACEFORMAT_YCRCB_NORMAL 0x182
+#define BRW_SURFACEFORMAT_YCRCB_SWAPUVY 0x183
+#define BRW_SURFACEFORMAT_BC1_UNORM 0x186
+#define BRW_SURFACEFORMAT_BC2_UNORM 0x187
+#define BRW_SURFACEFORMAT_BC3_UNORM 0x188
+#define BRW_SURFACEFORMAT_BC4_UNORM 0x189
+#define BRW_SURFACEFORMAT_BC5_UNORM 0x18A
+#define BRW_SURFACEFORMAT_BC1_UNORM_SRGB 0x18B
+#define BRW_SURFACEFORMAT_BC2_UNORM_SRGB 0x18C
+#define BRW_SURFACEFORMAT_BC3_UNORM_SRGB 0x18D
+#define BRW_SURFACEFORMAT_MONO8 0x18E
+#define BRW_SURFACEFORMAT_YCRCB_SWAPUV 0x18F
+#define BRW_SURFACEFORMAT_YCRCB_SWAPY 0x190
+#define BRW_SURFACEFORMAT_DXT1_RGB 0x191
+#define BRW_SURFACEFORMAT_FXT1 0x192
+#define BRW_SURFACEFORMAT_R8G8B8_UNORM 0x193
+#define BRW_SURFACEFORMAT_R8G8B8_SNORM 0x194
+#define BRW_SURFACEFORMAT_R8G8B8_SSCALED 0x195
+#define BRW_SURFACEFORMAT_R8G8B8_USCALED 0x196
+#define BRW_SURFACEFORMAT_R64G64B64A64_FLOAT 0x197
+#define BRW_SURFACEFORMAT_R64G64B64_FLOAT 0x198
+#define BRW_SURFACEFORMAT_BC4_SNORM 0x199
+#define BRW_SURFACEFORMAT_BC5_SNORM 0x19A
+#define BRW_SURFACEFORMAT_R16G16B16_UNORM 0x19C
+#define BRW_SURFACEFORMAT_R16G16B16_SNORM 0x19D
+#define BRW_SURFACEFORMAT_R16G16B16_SSCALED 0x19E
+#define BRW_SURFACEFORMAT_R16G16B16_USCALED 0x19F
+#define BRW_SURFACEFORMAT_INVALID 0xFFF
+
+#define BRW_SURFACERETURNFORMAT_FLOAT32 0
+#define BRW_SURFACERETURNFORMAT_S1 1
+
+#define BRW_SURFACE_1D 0
+#define BRW_SURFACE_2D 1
+#define BRW_SURFACE_3D 2
+#define BRW_SURFACE_CUBE 3
+#define BRW_SURFACE_BUFFER 4
+#define BRW_SURFACE_NULL 7
+
+#define BRW_TEXCOORDMODE_WRAP 0
+#define BRW_TEXCOORDMODE_MIRROR 1
+#define BRW_TEXCOORDMODE_CLAMP 2
+#define BRW_TEXCOORDMODE_CUBE 3
+#define BRW_TEXCOORDMODE_CLAMP_BORDER 4
+#define BRW_TEXCOORDMODE_MIRROR_ONCE 5
+
+#define BRW_THREAD_PRIORITY_NORMAL 0
+#define BRW_THREAD_PRIORITY_HIGH 1
+
+#define BRW_TILEWALK_XMAJOR 0
+#define BRW_TILEWALK_YMAJOR 1
+
+#define BRW_VERTEX_SUBPIXEL_PRECISION_8BITS 0
+#define BRW_VERTEX_SUBPIXEL_PRECISION_4BITS 1
+
+/* Execution Unit (EU) defines
+ */
+
+#define BRW_ALIGN_1 0
+#define BRW_ALIGN_16 1
+
+#define BRW_ADDRESS_DIRECT 0
+#define BRW_ADDRESS_REGISTER_INDIRECT_REGISTER 1
+
+#define BRW_CHANNEL_X 0
+#define BRW_CHANNEL_Y 1
+#define BRW_CHANNEL_Z 2
+#define BRW_CHANNEL_W 3
+
+#define BRW_COMPRESSION_NONE 0
+#define BRW_COMPRESSION_2NDHALF 1
+#define BRW_COMPRESSION_COMPRESSED 2
+
+#define BRW_CONDITIONAL_NONE 0
+#define BRW_CONDITIONAL_Z 1
+#define BRW_CONDITIONAL_NZ 2
+#define BRW_CONDITIONAL_EQ 1 /* Z */
+#define BRW_CONDITIONAL_NEQ 2 /* NZ */
+#define BRW_CONDITIONAL_G 3
+#define BRW_CONDITIONAL_GE 4
+#define BRW_CONDITIONAL_L 5
+#define BRW_CONDITIONAL_LE 6
+#define BRW_CONDITIONAL_R 7
+#define BRW_CONDITIONAL_O 8
+#define BRW_CONDITIONAL_U 9
+
+#define BRW_DEBUG_NONE 0
+#define BRW_DEBUG_BREAKPOINT 1
+
+#define BRW_DEPENDENCY_NORMAL 0
+#define BRW_DEPENDENCY_NOTCLEARED 1
+#define BRW_DEPENDENCY_NOTCHECKED 2
+#define BRW_DEPENDENCY_DISABLE 3
+
+#define BRW_EXECUTE_1 0
+#define BRW_EXECUTE_2 1
+#define BRW_EXECUTE_4 2
+#define BRW_EXECUTE_8 3
+#define BRW_EXECUTE_16 4
+#define BRW_EXECUTE_32 5
+
+#define BRW_HORIZONTAL_STRIDE_0 0
+#define BRW_HORIZONTAL_STRIDE_1 1
+#define BRW_HORIZONTAL_STRIDE_2 2
+#define BRW_HORIZONTAL_STRIDE_4 3
+
+#define BRW_INSTRUCTION_NORMAL 0
+#define BRW_INSTRUCTION_SATURATE 1
+
+#define BRW_MASK_ENABLE 0
+#define BRW_MASK_DISABLE 1
+
+#define BRW_OPCODE_MOV 1
+#define BRW_OPCODE_SEL 2
+#define BRW_OPCODE_NOT 4
+#define BRW_OPCODE_AND 5
+#define BRW_OPCODE_OR 6
+#define BRW_OPCODE_XOR 7
+#define BRW_OPCODE_SHR 8
+#define BRW_OPCODE_SHL 9
+#define BRW_OPCODE_RSR 10
+#define BRW_OPCODE_RSL 11
+#define BRW_OPCODE_ASR 12
+#define BRW_OPCODE_CMP 16
+#define BRW_OPCODE_CMPN 17
+#define BRW_OPCODE_JMPI 32
+#define BRW_OPCODE_IF 34
+#define BRW_OPCODE_IFF 35
+#define BRW_OPCODE_ELSE 36
+#define BRW_OPCODE_ENDIF 37
+#define BRW_OPCODE_DO 38
+#define BRW_OPCODE_WHILE 39
+#define BRW_OPCODE_BREAK 40
+#define BRW_OPCODE_CONTINUE 41
+#define BRW_OPCODE_HALT 42
+#define BRW_OPCODE_MSAVE 44
+#define BRW_OPCODE_MRESTORE 45
+#define BRW_OPCODE_PUSH 46
+#define BRW_OPCODE_POP 47
+#define BRW_OPCODE_WAIT 48
+#define BRW_OPCODE_SEND 49
+#define BRW_OPCODE_ADD 64
+#define BRW_OPCODE_MUL 65
+#define BRW_OPCODE_AVG 66
+#define BRW_OPCODE_FRC 67
+#define BRW_OPCODE_RNDU 68
+#define BRW_OPCODE_RNDD 69
+#define BRW_OPCODE_RNDE 70
+#define BRW_OPCODE_RNDZ 71
+#define BRW_OPCODE_MAC 72
+#define BRW_OPCODE_MACH 73
+#define BRW_OPCODE_LZD 74
+#define BRW_OPCODE_SAD2 80
+#define BRW_OPCODE_SADA2 81
+#define BRW_OPCODE_DP4 84
+#define BRW_OPCODE_DPH 85
+#define BRW_OPCODE_DP3 86
+#define BRW_OPCODE_DP2 87
+#define BRW_OPCODE_DPA2 88
+#define BRW_OPCODE_LINE 89
+#define BRW_OPCODE_NOP 126
+
+#define BRW_PREDICATE_NONE 0
+#define BRW_PREDICATE_NORMAL 1
+#define BRW_PREDICATE_ALIGN1_ANYV 2
+#define BRW_PREDICATE_ALIGN1_ALLV 3
+#define BRW_PREDICATE_ALIGN1_ANY2H 4
+#define BRW_PREDICATE_ALIGN1_ALL2H 5
+#define BRW_PREDICATE_ALIGN1_ANY4H 6
+#define BRW_PREDICATE_ALIGN1_ALL4H 7
+#define BRW_PREDICATE_ALIGN1_ANY8H 8
+#define BRW_PREDICATE_ALIGN1_ALL8H 9
+#define BRW_PREDICATE_ALIGN1_ANY16H 10
+#define BRW_PREDICATE_ALIGN1_ALL16H 11
+#define BRW_PREDICATE_ALIGN16_REPLICATE_X 2
+#define BRW_PREDICATE_ALIGN16_REPLICATE_Y 3
+#define BRW_PREDICATE_ALIGN16_REPLICATE_Z 4
+#define BRW_PREDICATE_ALIGN16_REPLICATE_W 5
+#define BRW_PREDICATE_ALIGN16_ANY4H 6
+#define BRW_PREDICATE_ALIGN16_ALL4H 7
+
+#define BRW_ARCHITECTURE_REGISTER_FILE 0
+#define BRW_GENERAL_REGISTER_FILE 1
+#define BRW_MESSAGE_REGISTER_FILE 2
+#define BRW_IMMEDIATE_VALUE 3
+
+#define BRW_REGISTER_TYPE_UD 0
+#define BRW_REGISTER_TYPE_D 1
+#define BRW_REGISTER_TYPE_UW 2
+#define BRW_REGISTER_TYPE_W 3
+#define BRW_REGISTER_TYPE_UB 4
+#define BRW_REGISTER_TYPE_B 5
+#define BRW_REGISTER_TYPE_VF 5 /* packed float vector, immediates only? */
+#define BRW_REGISTER_TYPE_HF 6
+#define BRW_REGISTER_TYPE_V 6 /* packed int vector, immediates only, uword dest only */
+#define BRW_REGISTER_TYPE_F 7
+
+#define BRW_ARF_NULL 0x00
+#define BRW_ARF_ADDRESS 0x10
+#define BRW_ARF_ACCUMULATOR 0x20
+#define BRW_ARF_FLAG 0x30
+#define BRW_ARF_MASK 0x40
+#define BRW_ARF_MASK_STACK 0x50
+#define BRW_ARF_MASK_STACK_DEPTH 0x60
+#define BRW_ARF_STATE 0x70
+#define BRW_ARF_CONTROL 0x80
+#define BRW_ARF_NOTIFICATION_COUNT 0x90
+#define BRW_ARF_IP 0xA0
+
+#define BRW_AMASK 0
+#define BRW_IMASK 1
+#define BRW_LMASK 2
+#define BRW_CMASK 3
+
+
+
+#define BRW_THREAD_NORMAL 0
+#define BRW_THREAD_ATOMIC 1
+#define BRW_THREAD_SWITCH 2
+
+#define BRW_VERTICAL_STRIDE_0 0
+#define BRW_VERTICAL_STRIDE_1 1
+#define BRW_VERTICAL_STRIDE_2 2
+#define BRW_VERTICAL_STRIDE_4 3
+#define BRW_VERTICAL_STRIDE_8 4
+#define BRW_VERTICAL_STRIDE_16 5
+#define BRW_VERTICAL_STRIDE_32 6
+#define BRW_VERTICAL_STRIDE_64 7
+#define BRW_VERTICAL_STRIDE_128 8
+#define BRW_VERTICAL_STRIDE_256 9
+#define BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL 0xF
+
+#define BRW_WIDTH_1 0
+#define BRW_WIDTH_2 1
+#define BRW_WIDTH_4 2
+#define BRW_WIDTH_8 3
+#define BRW_WIDTH_16 4
+
+#define BRW_STATELESS_BUFFER_BOUNDARY_1K 0
+#define BRW_STATELESS_BUFFER_BOUNDARY_2K 1
+#define BRW_STATELESS_BUFFER_BOUNDARY_4K 2
+#define BRW_STATELESS_BUFFER_BOUNDARY_8K 3
+#define BRW_STATELESS_BUFFER_BOUNDARY_16K 4
+#define BRW_STATELESS_BUFFER_BOUNDARY_32K 5
+#define BRW_STATELESS_BUFFER_BOUNDARY_64K 6
+#define BRW_STATELESS_BUFFER_BOUNDARY_128K 7
+#define BRW_STATELESS_BUFFER_BOUNDARY_256K 8
+#define BRW_STATELESS_BUFFER_BOUNDARY_512K 9
+#define BRW_STATELESS_BUFFER_BOUNDARY_1M 10
+#define BRW_STATELESS_BUFFER_BOUNDARY_2M 11
+
+#define BRW_POLYGON_FACING_FRONT 0
+#define BRW_POLYGON_FACING_BACK 1
+
+#define BRW_MESSAGE_TARGET_NULL 0
+#define BRW_MESSAGE_TARGET_MATH 1
+#define BRW_MESSAGE_TARGET_SAMPLER 2
+#define BRW_MESSAGE_TARGET_GATEWAY 3
+#define BRW_MESSAGE_TARGET_DATAPORT_READ 4
+#define BRW_MESSAGE_TARGET_DATAPORT_WRITE 5
+#define BRW_MESSAGE_TARGET_URB 6
+#define BRW_MESSAGE_TARGET_THREAD_SPAWNER 7
+
+#define BRW_SAMPLER_RETURN_FORMAT_FLOAT32 0
+#define BRW_SAMPLER_RETURN_FORMAT_UINT32 2
+#define BRW_SAMPLER_RETURN_FORMAT_SINT32 3
+
+#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE 0
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE 0
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS 0
+#define BRW_SAMPLER_MESSAGE_SIMD8_KILLPIX 1
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD 1
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD 1
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS 2
+#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS 2
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE 0
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE 2
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO 2
+#define BRW_SAMPLER_MESSAGE_SIMD8_RESINFO 2
+#define BRW_SAMPLER_MESSAGE_SIMD16_RESINFO 2
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_LD 3
+#define BRW_SAMPLER_MESSAGE_SIMD8_LD 3
+#define BRW_SAMPLER_MESSAGE_SIMD16_LD 3
+
+#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_IGDNG 0
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_IGDNG 0
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_IGDNG 0
+#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_IGDNG 1
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_BIAS_IGDNG 1
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS_IGDNG 1
+#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_IGDNG 2
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD_IGDNG 2
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD_IGDNG 2
+#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_COMPARE_IGDNG 3
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE_IGDNG 3
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE_IGDNG 3
+
+/* for IGDNG only */
+#define BRW_SAMPLER_SIMD_MODE_SIMD4X2 0
+#define BRW_SAMPLER_SIMD_MODE_SIMD8 1
+#define BRW_SAMPLER_SIMD_MODE_SIMD16 2
+#define BRW_SAMPLER_SIMD_MODE_SIMD32_64 3
+
+#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW 0
+#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDHIGH 1
+#define BRW_DATAPORT_OWORD_BLOCK_2_OWORDS 2
+#define BRW_DATAPORT_OWORD_BLOCK_4_OWORDS 3
+#define BRW_DATAPORT_OWORD_BLOCK_8_OWORDS 4
+
+#define BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD 0
+#define BRW_DATAPORT_OWORD_DUAL_BLOCK_4OWORDS 2
+
+#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS 2
+#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS 3
+
+#define BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ 0
+#define BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 1
+#define BRW_DATAPORT_READ_MESSAGE_DWORD_BLOCK_READ 2
+#define BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 3
+
+#define BRW_DATAPORT_READ_TARGET_DATA_CACHE 0
+#define BRW_DATAPORT_READ_TARGET_RENDER_CACHE 1
+#define BRW_DATAPORT_READ_TARGET_SAMPLER_CACHE 2
+
+#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE 0
+#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED 1
+#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01 2
+#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23 3
+#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01 4
+
+#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 0
+#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 1
+#define BRW_DATAPORT_WRITE_MESSAGE_DWORD_BLOCK_WRITE 2
+#define BRW_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE 3
+#define BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE 4
+#define BRW_DATAPORT_WRITE_MESSAGE_STREAMED_VERTEX_BUFFER_WRITE 5
+#define BRW_DATAPORT_WRITE_MESSAGE_FLUSH_RENDER_CACHE 7
+
+#define BRW_MATH_FUNCTION_INV 1
+#define BRW_MATH_FUNCTION_LOG 2
+#define BRW_MATH_FUNCTION_EXP 3
+#define BRW_MATH_FUNCTION_SQRT 4
+#define BRW_MATH_FUNCTION_RSQ 5
+#define BRW_MATH_FUNCTION_SIN 6 /* was 7 */
+#define BRW_MATH_FUNCTION_COS 7 /* was 8 */
+#define BRW_MATH_FUNCTION_SINCOS 8 /* was 6 */
+#define BRW_MATH_FUNCTION_TAN 9
+#define BRW_MATH_FUNCTION_POW 10
+#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER 11
+#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT 12
+#define BRW_MATH_FUNCTION_INT_DIV_REMAINDER 13
+
+#define BRW_MATH_INTEGER_UNSIGNED 0
+#define BRW_MATH_INTEGER_SIGNED 1
+
+#define BRW_MATH_PRECISION_FULL 0
+#define BRW_MATH_PRECISION_PARTIAL 1
+
+#define BRW_MATH_SATURATE_NONE 0
+#define BRW_MATH_SATURATE_SATURATE 1
+
+#define BRW_MATH_DATA_VECTOR 0
+#define BRW_MATH_DATA_SCALAR 1
+
+#define BRW_URB_OPCODE_WRITE 0
+
+#define BRW_URB_SWIZZLE_NONE 0
+#define BRW_URB_SWIZZLE_INTERLEAVE 1
+#define BRW_URB_SWIZZLE_TRANSPOSE 2
+
+#define BRW_SCRATCH_SPACE_SIZE_1K 0
+#define BRW_SCRATCH_SPACE_SIZE_2K 1
+#define BRW_SCRATCH_SPACE_SIZE_4K 2
+#define BRW_SCRATCH_SPACE_SIZE_8K 3
+#define BRW_SCRATCH_SPACE_SIZE_16K 4
+#define BRW_SCRATCH_SPACE_SIZE_32K 5
+#define BRW_SCRATCH_SPACE_SIZE_64K 6
+#define BRW_SCRATCH_SPACE_SIZE_128K 7
+#define BRW_SCRATCH_SPACE_SIZE_256K 8
+#define BRW_SCRATCH_SPACE_SIZE_512K 9
+#define BRW_SCRATCH_SPACE_SIZE_1M 10
+#define BRW_SCRATCH_SPACE_SIZE_2M 11
+
+
+
+
+#define CMD_URB_FENCE 0x6000
+#define CMD_CS_URB_STATE 0x6001
+#define CMD_CONST_BUFFER 0x6002
+
+#define CMD_STATE_BASE_ADDRESS 0x6101
+#define CMD_STATE_INSN_POINTER 0x6102
+#define CMD_PIPELINE_SELECT_965 0x6104
+#define CMD_PIPELINE_SELECT_GM45 0x6904
+
+#define CMD_PIPELINED_STATE_POINTERS 0x7800
+#define CMD_BINDING_TABLE_PTRS 0x7801
+
+#define CMD_VERTEX_BUFFER 0x7808
+# define BRW_VB0_INDEX_SHIFT 27
+# define BRW_VB0_ACCESS_VERTEXDATA (0 << 26)
+# define BRW_VB0_ACCESS_INSTANCEDATA (1 << 26)
+# define BRW_VB0_PITCH_SHIFT 0
+
+#define CMD_VERTEX_ELEMENT 0x7809
+# define BRW_VE0_INDEX_SHIFT 27
+# define BRW_VE0_FORMAT_SHIFT 16
+# define BRW_VE0_VALID (1 << 26)
+# define BRW_VE0_SRC_OFFSET_SHIFT 0
+# define BRW_VE1_COMPONENT_NOSTORE 0
+# define BRW_VE1_COMPONENT_STORE_SRC 1
+# define BRW_VE1_COMPONENT_STORE_0 2
+# define BRW_VE1_COMPONENT_STORE_1_FLT 3
+# define BRW_VE1_COMPONENT_STORE_1_INT 4
+# define BRW_VE1_COMPONENT_STORE_VID 5
+# define BRW_VE1_COMPONENT_STORE_IID 6
+# define BRW_VE1_COMPONENT_STORE_PID 7
+# define BRW_VE1_COMPONENT_0_SHIFT 28
+# define BRW_VE1_COMPONENT_1_SHIFT 24
+# define BRW_VE1_COMPONENT_2_SHIFT 20
+# define BRW_VE1_COMPONENT_3_SHIFT 16
+# define BRW_VE1_DST_OFFSET_SHIFT 0
+
+#define CMD_INDEX_BUFFER 0x780a
+#define CMD_VF_STATISTICS_965 0x780b
+#define CMD_VF_STATISTICS_GM45 0x680b
+
+#define CMD_DRAW_RECT 0x7900
+#define CMD_BLEND_CONSTANT_COLOR 0x7901
+#define CMD_CHROMA_KEY 0x7904
+#define CMD_DEPTH_BUFFER 0x7905
+#define CMD_POLY_STIPPLE_OFFSET 0x7906
+#define CMD_POLY_STIPPLE_PATTERN 0x7907
+#define CMD_LINE_STIPPLE_PATTERN 0x7908
+#define CMD_GLOBAL_DEPTH_OFFSET_CLAMP 0x7909
+#define CMD_AA_LINE_PARAMETERS 0x790a
+
+#define CMD_PIPE_CONTROL 0x7a00
+
+#define CMD_3D_PRIM 0x7b00
+
+#define CMD_MI_FLUSH 0x0200
+
+
+/* Various values from the R0 vertex header:
+ */
+#define R02_PRIM_END 0x1
+#define R02_PRIM_START 0x2
+
+#define URB_SIZES(brw) (BRW_IS_IGDNG(brw) ? 1024 : \
+ (BRW_IS_G4X(brw) ? 384 : 256)) /* 512 bit units */
+
+
+
+#endif
diff --git a/src/gallium/drivers/i965/brw_disasm.c b/src/gallium/drivers/i965/brw_disasm.c
new file mode 100644
index 00000000000..65db27248b1
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_disasm.c
@@ -0,0 +1,922 @@
+/*
+ * Copyright © 2008 Keith Packard
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that copyright
+ * notice and this permission notice appear in supporting documentation, and
+ * that the name of the copyright holders not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission. The copyright holders make no representations
+ * about the suitability of this software for any purpose. It is provided "as
+ * is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+ * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+ * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THIS SOFTWARE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <getopt.h>
+#include <unistd.h>
+#include <stdarg.h>
+
+#include "brw_disasm.h"
+#include "brw_structs.h"
+#include "brw_reg.h"
+#include "brw_defines.h"
+
+struct {
+ char *name;
+ int nsrc;
+ int ndst;
+} opcode[128] = {
+ [BRW_OPCODE_MOV] = { .name = "mov", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_FRC] = { .name = "frc", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_RNDU] = { .name = "rndu", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_RNDD] = { .name = "rndd", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_RNDE] = { .name = "rnde", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_RNDZ] = { .name = "rndz", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_NOT] = { .name = "not", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_LZD] = { .name = "lzd", .nsrc = 1, .ndst = 1 },
+
+ [BRW_OPCODE_MUL] = { .name = "mul", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_MAC] = { .name = "mac", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_MACH] = { .name = "mach", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_LINE] = { .name = "line", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_SAD2] = { .name = "sad2", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_SADA2] = { .name = "sada2", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_DP4] = { .name = "dp4", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_DPH] = { .name = "dph", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_DP3] = { .name = "dp3", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_DP2] = { .name = "dp2", .nsrc = 2, .ndst = 1 },
+
+ [BRW_OPCODE_AVG] = { .name = "avg", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_ADD] = { .name = "add", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_SEL] = { .name = "sel", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_AND] = { .name = "and", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_OR] = { .name = "or", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_XOR] = { .name = "xor", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_SHR] = { .name = "shr", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_SHL] = { .name = "shl", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_ASR] = { .name = "asr", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_CMP] = { .name = "cmp", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_CMPN] = { .name = "cmpn", .nsrc = 2, .ndst = 1 },
+
+ [BRW_OPCODE_SEND] = { .name = "send", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_NOP] = { .name = "nop", .nsrc = 0, .ndst = 0 },
+ [BRW_OPCODE_JMPI] = { .name = "jmpi", .nsrc = 1, .ndst = 0 },
+ [BRW_OPCODE_IF] = { .name = "if", .nsrc = 2, .ndst = 0 },
+ [BRW_OPCODE_IFF] = { .name = "iff", .nsrc = 1, .ndst = 01 },
+ [BRW_OPCODE_WHILE] = { .name = "while", .nsrc = 1, .ndst = 0 },
+ [BRW_OPCODE_ELSE] = { .name = "else", .nsrc = 2, .ndst = 0 },
+ [BRW_OPCODE_BREAK] = { .name = "break", .nsrc = 1, .ndst = 0 },
+ [BRW_OPCODE_CONTINUE] = { .name = "cont", .nsrc = 1, .ndst = 0 },
+ [BRW_OPCODE_HALT] = { .name = "halt", .nsrc = 1, .ndst = 0 },
+ [BRW_OPCODE_MSAVE] = { .name = "msave", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_PUSH] = { .name = "push", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_MRESTORE] = { .name = "mrest", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_POP] = { .name = "pop", .nsrc = 2, .ndst = 0 },
+ [BRW_OPCODE_WAIT] = { .name = "wait", .nsrc = 1, .ndst = 0 },
+ [BRW_OPCODE_DO] = { .name = "do", .nsrc = 0, .ndst = 0 },
+ [BRW_OPCODE_ENDIF] = { .name = "endif", .nsrc = 2, .ndst = 0 },
+};
+
+char *conditional_modifier[16] = {
+ [BRW_CONDITIONAL_NONE] = "",
+ [BRW_CONDITIONAL_Z] = ".e",
+ [BRW_CONDITIONAL_NZ] = ".ne",
+ [BRW_CONDITIONAL_G] = ".g",
+ [BRW_CONDITIONAL_GE] = ".ge",
+ [BRW_CONDITIONAL_L] = ".l",
+ [BRW_CONDITIONAL_LE] = ".le",
+ [BRW_CONDITIONAL_R] = ".r",
+ [BRW_CONDITIONAL_O] = ".o",
+ [BRW_CONDITIONAL_U] = ".u",
+};
+
+char *negate[2] = {
+ [0] = "",
+ [1] = "-",
+};
+
+char *_abs[2] = {
+ [0] = "",
+ [1] = "(abs)",
+};
+
+char *vert_stride[16] = {
+ [0] = "0",
+ [1] = "1",
+ [2] = "2",
+ [3] = "4",
+ [4] = "8",
+ [5] = "16",
+ [6] = "32",
+ [15] = "VxH",
+};
+
+char *width[8] = {
+ [0] = "1",
+ [1] = "2",
+ [2] = "4",
+ [3] = "8",
+ [4] = "16",
+};
+
+char *horiz_stride[4] = {
+ [0] = "0",
+ [1] = "1",
+ [2] = "2",
+ [3] = "4"
+};
+
+char *chan_sel[4] = {
+ [0] = "x",
+ [1] = "y",
+ [2] = "z",
+ [3] = "w",
+};
+
+char *dest_condmod[16] = {
+ [0] = NULL
+};
+
+char *debug_ctrl[2] = {
+ [0] = "",
+ [1] = ".breakpoint"
+};
+
+char *saturate[2] = {
+ [0] = "",
+ [1] = ".sat"
+};
+
+char *exec_size[8] = {
+ [0] = "1",
+ [1] = "2",
+ [2] = "4",
+ [3] = "8",
+ [4] = "16",
+ [5] = "32"
+};
+
+char *pred_inv[2] = {
+ [0] = "+",
+ [1] = "-"
+};
+
+char *pred_ctrl_align16[16] = {
+ [1] = "",
+ [2] = ".x",
+ [3] = ".y",
+ [4] = ".z",
+ [5] = ".w",
+ [6] = ".any4h",
+ [7] = ".all4h",
+};
+
+char *pred_ctrl_align1[16] = {
+ [1] = "",
+ [2] = ".anyv",
+ [3] = ".allv",
+ [4] = ".any2h",
+ [5] = ".all2h",
+ [6] = ".any4h",
+ [7] = ".all4h",
+ [8] = ".any8h",
+ [9] = ".all8h",
+ [10] = ".any16h",
+ [11] = ".all16h",
+};
+
+char *thread_ctrl[4] = {
+ [0] = "",
+ [2] = "switch"
+};
+
+char *compr_ctrl[4] = {
+ [0] = "",
+ [1] = "sechalf",
+ [2] = "compr",
+};
+
+char *dep_ctrl[4] = {
+ [0] = "",
+ [1] = "NoDDClr",
+ [2] = "NoDDChk",
+ [3] = "NoDDClr,NoDDChk",
+};
+
+char *mask_ctrl[4] = {
+ [0] = "",
+ [1] = "nomask",
+};
+
+char *access_mode[2] = {
+ [0] = "align1",
+ [1] = "align16",
+};
+
+char *reg_encoding[8] = {
+ [0] = "UD",
+ [1] = "D",
+ [2] = "UW",
+ [3] = "W",
+ [4] = "UB",
+ [5] = "B",
+ [7] = "F"
+};
+
+char *imm_encoding[8] = {
+ [0] = "UD",
+ [1] = "D",
+ [2] = "UW",
+ [3] = "W",
+ [5] = "VF",
+ [5] = "V",
+ [7] = "F"
+};
+
+char *reg_file[4] = {
+ [0] = "A",
+ [1] = "g",
+ [2] = "m",
+ [3] = "imm",
+};
+
+char *writemask[16] = {
+ [0x0] = ".",
+ [0x1] = ".x",
+ [0x2] = ".y",
+ [0x3] = ".xy",
+ [0x4] = ".z",
+ [0x5] = ".xz",
+ [0x6] = ".yz",
+ [0x7] = ".xyz",
+ [0x8] = ".w",
+ [0x9] = ".xw",
+ [0xa] = ".yw",
+ [0xb] = ".xyw",
+ [0xc] = ".zw",
+ [0xd] = ".xzw",
+ [0xe] = ".yzw",
+ [0xf] = "",
+};
+
+char *end_of_thread[2] = {
+ [0] = "",
+ [1] = "EOT"
+};
+
+char *target_function[16] = {
+ [BRW_MESSAGE_TARGET_NULL] = "null",
+ [BRW_MESSAGE_TARGET_MATH] = "math",
+ [BRW_MESSAGE_TARGET_SAMPLER] = "sampler",
+ [BRW_MESSAGE_TARGET_GATEWAY] = "gateway",
+ [BRW_MESSAGE_TARGET_DATAPORT_READ] = "read",
+ [BRW_MESSAGE_TARGET_DATAPORT_WRITE] = "write",
+ [BRW_MESSAGE_TARGET_URB] = "urb",
+ [BRW_MESSAGE_TARGET_THREAD_SPAWNER] = "thread_spawner"
+};
+
+char *math_function[16] = {
+ [BRW_MATH_FUNCTION_INV] = "inv",
+ [BRW_MATH_FUNCTION_LOG] = "log",
+ [BRW_MATH_FUNCTION_EXP] = "exp",
+ [BRW_MATH_FUNCTION_SQRT] = "sqrt",
+ [BRW_MATH_FUNCTION_RSQ] = "rsq",
+ [BRW_MATH_FUNCTION_SIN] = "sin",
+ [BRW_MATH_FUNCTION_COS] = "cos",
+ [BRW_MATH_FUNCTION_SINCOS] = "sincos",
+ [BRW_MATH_FUNCTION_TAN] = "tan",
+ [BRW_MATH_FUNCTION_POW] = "pow",
+ [BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER] = "intdivmod",
+ [BRW_MATH_FUNCTION_INT_DIV_QUOTIENT] = "intmod",
+ [BRW_MATH_FUNCTION_INT_DIV_REMAINDER] = "intdiv",
+};
+
+char *math_saturate[2] = {
+ [0] = "",
+ [1] = "sat"
+};
+
+char *math_signed[2] = {
+ [0] = "",
+ [1] = "signed"
+};
+
+char *math_scalar[2] = {
+ [0] = "",
+ [1] = "scalar"
+};
+
+char *math_precision[2] = {
+ [0] = "",
+ [1] = "partial_precision"
+};
+
+char *urb_swizzle[4] = {
+ [BRW_URB_SWIZZLE_NONE] = "",
+ [BRW_URB_SWIZZLE_INTERLEAVE] = "interleave",
+ [BRW_URB_SWIZZLE_TRANSPOSE] = "transpose",
+};
+
+char *urb_allocate[2] = {
+ [0] = "",
+ [1] = "allocate"
+};
+
+char *urb_used[2] = {
+ [0] = "",
+ [1] = "used"
+};
+
+char *urb_complete[2] = {
+ [0] = "",
+ [1] = "complete"
+};
+
+char *sampler_target_format[4] = {
+ [0] = "F",
+ [2] = "UD",
+ [3] = "D"
+};
+
+
+static int column;
+
+static int string (FILE *file, char *string)
+{
+ fputs (string, file);
+ column += strlen (string);
+ return 0;
+}
+
+static int format (FILE *f, char *format, ...)
+{
+ char buf[1024];
+ va_list args;
+ va_start (args, format);
+
+ vsnprintf (buf, sizeof (buf) - 1, format, args);
+ string (f, buf);
+ return 0;
+}
+
+static int newline (FILE *f)
+{
+ putc ('\n', f);
+ column = 0;
+ return 0;
+}
+
+static int pad (FILE *f, int c)
+{
+ do
+ string (f, " ");
+ while (column < c);
+ return 0;
+}
+
+static int control (FILE *file, char *name, char *ctrl[], GLuint id, int *space)
+{
+ if (!ctrl[id]) {
+ fprintf (file, "*** invalid %s value %d ",
+ name, id);
+ return 1;
+ }
+ if (ctrl[id][0])
+ {
+ if (space && *space)
+ string (file, " ");
+ string (file, ctrl[id]);
+ if (space)
+ *space = 1;
+ }
+ return 0;
+}
+
+static int print_opcode (FILE *file, int id)
+{
+ if (!opcode[id].name) {
+ format (file, "*** invalid opcode value %d ", id);
+ return 1;
+ }
+ string (file, opcode[id].name);
+ return 0;
+}
+
+static int reg (FILE *file, GLuint _reg_file, GLuint _reg_nr)
+{
+ int err = 0;
+ if (_reg_file == BRW_ARCHITECTURE_REGISTER_FILE) {
+ switch (_reg_nr & 0xf0) {
+ case BRW_ARF_NULL:
+ string (file, "null");
+ return -1;
+ case BRW_ARF_ADDRESS:
+ format (file, "a%d", _reg_nr & 0x0f);
+ break;
+ case BRW_ARF_ACCUMULATOR:
+ format (file, "acc%d", _reg_nr & 0x0f);
+ break;
+ case BRW_ARF_MASK:
+ format (file, "mask%d", _reg_nr & 0x0f);
+ break;
+ case BRW_ARF_MASK_STACK:
+ format (file, "msd%d", _reg_nr & 0x0f);
+ break;
+ case BRW_ARF_STATE:
+ format (file, "sr%d", _reg_nr & 0x0f);
+ break;
+ case BRW_ARF_CONTROL:
+ format (file, "cr%d", _reg_nr & 0x0f);
+ break;
+ case BRW_ARF_NOTIFICATION_COUNT:
+ format (file, "n%d", _reg_nr & 0x0f);
+ break;
+ case BRW_ARF_IP:
+ string (file, "ip");
+ return -1;
+ break;
+ default:
+ format (file, "ARF%d", _reg_nr);
+ break;
+ }
+ } else {
+ err |= control (file, "src reg file", reg_file, _reg_file, NULL);
+ format (file, "%d", _reg_nr);
+ }
+ return err;
+}
+
+static int dest (FILE *file, const struct brw_instruction *inst)
+{
+ int err = 0;
+
+ if (inst->header.access_mode == BRW_ALIGN_1)
+ {
+ if (inst->bits1.da1.dest_address_mode == BRW_ADDRESS_DIRECT)
+ {
+ err |= reg (file, inst->bits1.da1.dest_reg_file, inst->bits1.da1.dest_reg_nr);
+ if (err == -1)
+ return 0;
+ if (inst->bits1.da1.dest_subreg_nr)
+ format (file, ".%d", inst->bits1.da1.dest_subreg_nr);
+ format (file, "<%d>", inst->bits1.da1.dest_horiz_stride);
+ err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.da1.dest_reg_type, NULL);
+ }
+ else
+ {
+ string (file, "g[a0");
+ if (inst->bits1.ia1.dest_subreg_nr)
+ format (file, ".%d", inst->bits1.ia1.dest_subreg_nr);
+ if (inst->bits1.ia1.dest_indirect_offset)
+ format (file, " %d", inst->bits1.ia1.dest_indirect_offset);
+ string (file, "]");
+ format (file, "<%d>", inst->bits1.ia1.dest_horiz_stride);
+ err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.ia1.dest_reg_type, NULL);
+ }
+ }
+ else
+ {
+ if (inst->bits1.da16.dest_address_mode == BRW_ADDRESS_DIRECT)
+ {
+ err |= reg (file, inst->bits1.da16.dest_reg_file, inst->bits1.da16.dest_reg_nr);
+ if (err == -1)
+ return 0;
+ if (inst->bits1.da16.dest_subreg_nr)
+ format (file, ".%d", inst->bits1.da16.dest_subreg_nr);
+ string (file, "<1>");
+ err |= control (file, "writemask", writemask, inst->bits1.da16.dest_writemask, NULL);
+ err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.da16.dest_reg_type, NULL);
+ }
+ else
+ {
+ err = 1;
+ string (file, "Indirect align16 address mode not supported");
+ }
+ }
+
+ return 0;
+}
+
+static int src_align1_region (FILE *file,
+ GLuint _vert_stride, GLuint _width, GLuint _horiz_stride)
+{
+ int err = 0;
+ string (file, "<");
+ err |= control (file, "vert stride", vert_stride, _vert_stride, NULL);
+ string (file, ",");
+ err |= control (file, "width", width, _width, NULL);
+ string (file, ",");
+ err |= control (file, "horiz_stride", horiz_stride, _horiz_stride, NULL);
+ string (file, ">");
+ return err;
+}
+
+static int src_da1 (FILE *file, GLuint type, GLuint _reg_file,
+ GLuint _vert_stride, GLuint _width, GLuint _horiz_stride,
+ GLuint reg_num, GLuint sub_reg_num, GLuint __abs, GLuint _negate)
+{
+ int err = 0;
+ err |= control (file, "negate", negate, _negate, NULL);
+ err |= control (file, "abs", _abs, __abs, NULL);
+
+ err |= reg (file, _reg_file, reg_num);
+ if (err == -1)
+ return 0;
+ if (sub_reg_num)
+ format (file, ".%d", sub_reg_num);
+ src_align1_region (file, _vert_stride, _width, _horiz_stride);
+ err |= control (file, "src reg encoding", reg_encoding, type, NULL);
+ return err;
+}
+
+static int src_ia1 (FILE *file,
+ GLuint type,
+ GLuint _reg_file,
+ GLint _addr_imm,
+ GLuint _addr_subreg_nr,
+ GLuint _negate,
+ GLuint __abs,
+ GLuint _addr_mode,
+ GLuint _horiz_stride,
+ GLuint _width,
+ GLuint _vert_stride)
+{
+ int err = 0;
+ err |= control (file, "negate", negate, _negate, NULL);
+ err |= control (file, "abs", _abs, __abs, NULL);
+
+ string (file, "g[a0");
+ if (_addr_subreg_nr)
+ format (file, ".%d", _addr_subreg_nr);
+ if (_addr_imm)
+ format (file, " %d", _addr_imm);
+ string (file, "]");
+ src_align1_region (file, _vert_stride, _width, _horiz_stride);
+ err |= control (file, "src reg encoding", reg_encoding, type, NULL);
+ return err;
+}
+
+static int src_da16 (FILE *file,
+ GLuint _reg_type,
+ GLuint _reg_file,
+ GLuint _vert_stride,
+ GLuint _reg_nr,
+ GLuint _subreg_nr,
+ GLuint __abs,
+ GLuint _negate,
+ GLuint swz_x,
+ GLuint swz_y,
+ GLuint swz_z,
+ GLuint swz_w)
+{
+ int err = 0;
+ err |= control (file, "negate", negate, _negate, NULL);
+ err |= control (file, "abs", _abs, __abs, NULL);
+
+ err |= reg (file, _reg_file, _reg_nr);
+ if (err == -1)
+ return 0;
+ if (_subreg_nr)
+ format (file, ".%d", _subreg_nr);
+ string (file, "<");
+ err |= control (file, "vert stride", vert_stride, _vert_stride, NULL);
+ string (file, ",1,1>");
+ err |= control (file, "src da16 reg type", reg_encoding, _reg_type, NULL);
+ /*
+ * Three kinds of swizzle display:
+ * identity - nothing printed
+ * 1->all - print the single channel
+ * 1->1 - print the mapping
+ */
+ if (swz_x == BRW_CHANNEL_X &&
+ swz_y == BRW_CHANNEL_Y &&
+ swz_z == BRW_CHANNEL_Z &&
+ swz_w == BRW_CHANNEL_W)
+ {
+ ;
+ }
+ else if (swz_x == swz_y && swz_x == swz_z && swz_x == swz_w)
+ {
+ string (file, ".");
+ err |= control (file, "channel select", chan_sel, swz_x, NULL);
+ }
+ else
+ {
+ string (file, ".");
+ err |= control (file, "channel select", chan_sel, swz_x, NULL);
+ err |= control (file, "channel select", chan_sel, swz_y, NULL);
+ err |= control (file, "channel select", chan_sel, swz_z, NULL);
+ err |= control (file, "channel select", chan_sel, swz_w, NULL);
+ }
+ return err;
+}
+
+
+static int imm (FILE *file, GLuint type, const struct brw_instruction *inst) {
+ switch (type) {
+ case BRW_REGISTER_TYPE_UD:
+ format (file, "0x%08xUD", inst->bits3.ud);
+ break;
+ case BRW_REGISTER_TYPE_D:
+ format (file, "%dD", inst->bits3.d);
+ break;
+ case BRW_REGISTER_TYPE_UW:
+ format (file, "0x%04xUW", (uint16_t) inst->bits3.ud);
+ break;
+ case BRW_REGISTER_TYPE_W:
+ format (file, "%dW", (int16_t) inst->bits3.d);
+ break;
+ case BRW_REGISTER_TYPE_UB:
+ format (file, "0x%02xUB", (int8_t) inst->bits3.ud);
+ break;
+ case BRW_REGISTER_TYPE_VF:
+ format (file, "Vector Float");
+ break;
+ case BRW_REGISTER_TYPE_V:
+ format (file, "0x%08xV", inst->bits3.ud);
+ break;
+ case BRW_REGISTER_TYPE_F:
+ format (file, "%-gF", inst->bits3.f);
+ }
+ return 0;
+}
+
+static int src0 (FILE *file, const struct brw_instruction *inst)
+{
+ if (inst->bits1.da1.src0_reg_file == BRW_IMMEDIATE_VALUE)
+ return imm (file, inst->bits1.da1.src0_reg_type,
+ inst);
+ else if (inst->header.access_mode == BRW_ALIGN_1)
+ {
+ if (inst->bits2.da1.src0_address_mode == BRW_ADDRESS_DIRECT)
+ {
+ return src_da1 (file,
+ inst->bits1.da1.src0_reg_type,
+ inst->bits1.da1.src0_reg_file,
+ inst->bits2.da1.src0_vert_stride,
+ inst->bits2.da1.src0_width,
+ inst->bits2.da1.src0_horiz_stride,
+ inst->bits2.da1.src0_reg_nr,
+ inst->bits2.da1.src0_subreg_nr,
+ inst->bits2.da1.src0_abs,
+ inst->bits2.da1.src0_negate);
+ }
+ else
+ {
+ return src_ia1 (file,
+ inst->bits1.ia1.src0_reg_type,
+ inst->bits1.ia1.src0_reg_file,
+ inst->bits2.ia1.src0_indirect_offset,
+ inst->bits2.ia1.src0_subreg_nr,
+ inst->bits2.ia1.src0_negate,
+ inst->bits2.ia1.src0_abs,
+ inst->bits2.ia1.src0_address_mode,
+ inst->bits2.ia1.src0_horiz_stride,
+ inst->bits2.ia1.src0_width,
+ inst->bits2.ia1.src0_vert_stride);
+ }
+ }
+ else
+ {
+ if (inst->bits2.da16.src0_address_mode == BRW_ADDRESS_DIRECT)
+ {
+ return src_da16 (file,
+ inst->bits1.da16.src0_reg_type,
+ inst->bits1.da16.src0_reg_file,
+ inst->bits2.da16.src0_vert_stride,
+ inst->bits2.da16.src0_reg_nr,
+ inst->bits2.da16.src0_subreg_nr,
+ inst->bits2.da16.src0_abs,
+ inst->bits2.da16.src0_negate,
+ inst->bits2.da16.src0_swz_x,
+ inst->bits2.da16.src0_swz_y,
+ inst->bits2.da16.src0_swz_z,
+ inst->bits2.da16.src0_swz_w);
+ }
+ else
+ {
+ string (file, "Indirect align16 address mode not supported");
+ return 1;
+ }
+ }
+}
+
+static int src1 (FILE *file, const struct brw_instruction *inst)
+{
+ if (inst->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE)
+ return imm (file, inst->bits1.da1.src1_reg_type,
+ inst);
+ else if (inst->header.access_mode == BRW_ALIGN_1)
+ {
+ if (inst->bits3.da1.src1_address_mode == BRW_ADDRESS_DIRECT)
+ {
+ return src_da1 (file,
+ inst->bits1.da1.src1_reg_type,
+ inst->bits1.da1.src1_reg_file,
+ inst->bits3.da1.src1_vert_stride,
+ inst->bits3.da1.src1_width,
+ inst->bits3.da1.src1_horiz_stride,
+ inst->bits3.da1.src1_reg_nr,
+ inst->bits3.da1.src1_subreg_nr,
+ inst->bits3.da1.src1_abs,
+ inst->bits3.da1.src1_negate);
+ }
+ else
+ {
+ return src_ia1 (file,
+ inst->bits1.ia1.src1_reg_type,
+ inst->bits1.ia1.src1_reg_file,
+ inst->bits3.ia1.src1_indirect_offset,
+ inst->bits3.ia1.src1_subreg_nr,
+ inst->bits3.ia1.src1_negate,
+ inst->bits3.ia1.src1_abs,
+ inst->bits3.ia1.src1_address_mode,
+ inst->bits3.ia1.src1_horiz_stride,
+ inst->bits3.ia1.src1_width,
+ inst->bits3.ia1.src1_vert_stride);
+ }
+ }
+ else
+ {
+ if (inst->bits3.da16.src1_address_mode == BRW_ADDRESS_DIRECT)
+ {
+ return src_da16 (file,
+ inst->bits1.da16.src1_reg_type,
+ inst->bits1.da16.src1_reg_file,
+ inst->bits3.da16.src1_vert_stride,
+ inst->bits3.da16.src1_reg_nr,
+ inst->bits3.da16.src1_subreg_nr,
+ inst->bits3.da16.src1_abs,
+ inst->bits3.da16.src1_negate,
+ inst->bits3.da16.src1_swz_x,
+ inst->bits3.da16.src1_swz_y,
+ inst->bits3.da16.src1_swz_z,
+ inst->bits3.da16.src1_swz_w);
+ }
+ else
+ {
+ string (file, "Indirect align16 address mode not supported");
+ return 1;
+ }
+ }
+}
+
+int brw_disasm_insn (FILE *file, const struct brw_instruction *inst)
+{
+ int err = 0;
+ int space = 0;
+
+ if (inst->header.predicate_control) {
+ string (file, "(");
+ err |= control (file, "predicate inverse", pred_inv, inst->header.predicate_inverse, NULL);
+ string (file, "f0");
+ if (inst->bits2.da1.flag_reg_nr)
+ format (file, ".%d", inst->bits2.da1.flag_reg_nr);
+ if (inst->header.access_mode == BRW_ALIGN_1)
+ err |= control (file, "predicate control align1", pred_ctrl_align1,
+ inst->header.predicate_control, NULL);
+ else
+ err |= control (file, "predicate control align16", pred_ctrl_align16,
+ inst->header.predicate_control, NULL);
+ string (file, ") ");
+ }
+
+ err |= print_opcode (file, inst->header.opcode);
+ err |= control (file, "saturate", saturate, inst->header.saturate, NULL);
+ err |= control (file, "debug control", debug_ctrl, inst->header.debug_control, NULL);
+
+ if (inst->header.opcode != BRW_OPCODE_SEND)
+ err |= control (file, "conditional modifier", conditional_modifier,
+ inst->header.destreg__conditionalmod, NULL);
+
+ if (inst->header.opcode != BRW_OPCODE_NOP) {
+ string (file, "(");
+ err |= control (file, "execution size", exec_size, inst->header.execution_size, NULL);
+ string (file, ")");
+ }
+
+ if (inst->header.opcode == BRW_OPCODE_SEND)
+ format (file, " %d", inst->header.destreg__conditionalmod);
+
+ if (opcode[inst->header.opcode].ndst > 0) {
+ pad (file, 16);
+ err |= dest (file, inst);
+ }
+ if (opcode[inst->header.opcode].nsrc > 0) {
+ pad (file, 32);
+ err |= src0 (file, inst);
+ }
+ if (opcode[inst->header.opcode].nsrc > 1) {
+ pad (file, 48);
+ err |= src1 (file, inst);
+ }
+
+ if (inst->header.opcode == BRW_OPCODE_SEND) {
+ newline (file);
+ pad (file, 16);
+ space = 0;
+ err |= control (file, "target function", target_function,
+ inst->bits3.generic.msg_target, &space);
+ switch (inst->bits3.generic.msg_target) {
+ case BRW_MESSAGE_TARGET_MATH:
+ err |= control (file, "math function", math_function,
+ inst->bits3.math.function, &space);
+ err |= control (file, "math saturate", math_saturate,
+ inst->bits3.math.saturate, &space);
+ err |= control (file, "math signed", math_signed,
+ inst->bits3.math.int_type, &space);
+ err |= control (file, "math scalar", math_scalar,
+ inst->bits3.math.data_type, &space);
+ err |= control (file, "math precision", math_precision,
+ inst->bits3.math.precision, &space);
+ break;
+ case BRW_MESSAGE_TARGET_SAMPLER:
+ format (file, " (%d, %d, ",
+ inst->bits3.sampler.binding_table_index,
+ inst->bits3.sampler.sampler);
+ err |= control (file, "sampler target format", sampler_target_format,
+ inst->bits3.sampler.return_format, NULL);
+ string (file, ")");
+ break;
+ case BRW_MESSAGE_TARGET_DATAPORT_WRITE:
+ format (file, " (%d, %d, %d, %d)",
+ inst->bits3.dp_write.binding_table_index,
+ (inst->bits3.dp_write.pixel_scoreboard_clear << 3) |
+ inst->bits3.dp_write.msg_control,
+ inst->bits3.dp_write.msg_type,
+ inst->bits3.dp_write.send_commit_msg);
+ break;
+ case BRW_MESSAGE_TARGET_URB:
+ format (file, " %d", inst->bits3.urb.offset);
+ space = 1;
+ err |= control (file, "urb swizzle", urb_swizzle,
+ inst->bits3.urb.swizzle_control, &space);
+ err |= control (file, "urb allocate", urb_allocate,
+ inst->bits3.urb.allocate, &space);
+ err |= control (file, "urb used", urb_used,
+ inst->bits3.urb.used, &space);
+ err |= control (file, "urb complete", urb_complete,
+ inst->bits3.urb.complete, &space);
+ break;
+ case BRW_MESSAGE_TARGET_THREAD_SPAWNER:
+ break;
+ default:
+ format (file, "unsupported target %d", inst->bits3.generic.msg_target);
+ break;
+ }
+ if (space)
+ string (file, " ");
+ format (file, "mlen %d",
+ inst->bits3.generic.msg_length);
+ format (file, " rlen %d",
+ inst->bits3.generic.response_length);
+ }
+ pad (file, 64);
+ if (inst->header.opcode != BRW_OPCODE_NOP) {
+ string (file, "{");
+ space = 1;
+ err |= control(file, "access mode", access_mode, inst->header.access_mode, &space);
+ err |= control (file, "mask control", mask_ctrl, inst->header.mask_control, &space);
+ err |= control (file, "dependency control", dep_ctrl, inst->header.dependency_control, &space);
+ err |= control (file, "compression control", compr_ctrl, inst->header.compression_control, &space);
+ err |= control (file, "thread control", thread_ctrl, inst->header.thread_control, &space);
+ if (inst->header.opcode == BRW_OPCODE_SEND)
+ err |= control (file, "end of thread", end_of_thread,
+ inst->bits3.generic.end_of_thread, &space);
+ if (space)
+ string (file, " ");
+ string (file, "}");
+ }
+ string (file, ";");
+ newline (file);
+ return err;
+}
+
+
+int brw_disasm (FILE *file,
+ const struct brw_instruction *inst,
+ unsigned count)
+{
+ int i, err;
+
+ for (i = 0; i < count; i++) {
+ err = brw_disasm_insn(stderr, &inst[i]);
+ if (err)
+ return err;
+ }
+
+ fprintf(file, "\n");
+ return 0;
+}
+
diff --git a/src/gallium/drivers/i965/brw_disasm.h b/src/gallium/drivers/i965/brw_disasm.h
new file mode 100644
index 00000000000..77d402d35e6
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_disasm.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright © 2008 Keith Packard
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that copyright
+ * notice and this permission notice appear in supporting documentation, and
+ * that the name of the copyright holders not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission. The copyright holders make no representations
+ * about the suitability of this software for any purpose. It is provided "as
+ * is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+ * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+ * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THIS SOFTWARE.
+ */
+
+#ifndef BRW_DISASM_H
+#define BRW_DISASM_H
+
+struct brw_instruction;
+
+int brw_disasm_insn (FILE *file, const struct brw_instruction *inst);
+int brw_disasm (FILE *file,
+ const struct brw_instruction *inst,
+ unsigned count);
+
+#endif
+
diff --git a/src/gallium/drivers/i965/brw_draw.c b/src/gallium/drivers/i965/brw_draw.c
new file mode 100644
index 00000000000..852fd229828
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_draw.c
@@ -0,0 +1,291 @@
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "util/u_prim.h"
+#include "util/u_upload_mgr.h"
+
+#include "brw_draw.h"
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_debug.h"
+#include "brw_screen.h"
+
+#include "brw_batchbuffer.h"
+
+
+static uint32_t prim_to_hw_prim[PIPE_PRIM_POLYGON+1] = {
+ _3DPRIM_POINTLIST,
+ _3DPRIM_LINELIST,
+ _3DPRIM_LINELOOP,
+ _3DPRIM_LINESTRIP,
+ _3DPRIM_TRILIST,
+ _3DPRIM_TRISTRIP,
+ _3DPRIM_TRIFAN,
+ _3DPRIM_QUADLIST,
+ _3DPRIM_QUADSTRIP,
+ _3DPRIM_POLYGON
+};
+
+
+
+/* When the primitive changes, set a state bit and re-validate. Not
+ * the nicest and would rather deal with this by having all the
+ * programs be immune to the active primitive (ie. cope with all
+ * possibilities). That may not be realistic however.
+ */
+static int brw_set_prim(struct brw_context *brw, unsigned prim )
+{
+
+ if (BRW_DEBUG & DEBUG_PRIMS)
+ debug_printf("PRIM: %s\n", u_prim_name(prim));
+
+ if (prim != brw->primitive) {
+ unsigned reduced_prim;
+
+ brw->primitive = prim;
+ brw->state.dirty.brw |= BRW_NEW_PRIMITIVE;
+
+ reduced_prim = u_reduced_prim(prim);
+ if (reduced_prim != brw->reduced_primitive) {
+ brw->reduced_primitive = reduced_prim;
+ brw->state.dirty.brw |= BRW_NEW_REDUCED_PRIMITIVE;
+ }
+ }
+
+ return prim_to_hw_prim[prim];
+}
+
+
+
+static int brw_emit_prim(struct brw_context *brw,
+ unsigned start,
+ unsigned count,
+ boolean indexed,
+ uint32_t hw_prim)
+{
+ struct brw_3d_primitive prim_packet;
+ int ret;
+
+ if (BRW_DEBUG & DEBUG_PRIMS)
+ debug_printf("%s start %d count %d indexed %d hw_prim %d\n",
+ __FUNCTION__, start, count, indexed, hw_prim);
+
+ prim_packet.header.opcode = CMD_3D_PRIM;
+ prim_packet.header.length = sizeof(prim_packet)/4 - 2;
+ prim_packet.header.pad = 0;
+ prim_packet.header.topology = hw_prim;
+ prim_packet.header.indexed = indexed;
+
+ prim_packet.verts_per_instance = count;
+ prim_packet.start_vert_location = start;
+ if (indexed)
+ prim_packet.start_vert_location += brw->ib.start_vertex_offset;
+ prim_packet.instance_count = 1;
+ prim_packet.start_instance_location = 0;
+ prim_packet.base_vert_location = 0; /* prim->basevertex; XXX: add this to gallium */
+
+
+ /* If we're set to always flush, do it before and after the primitive emit.
+ * We want to catch both missed flushes that hurt instruction/state cache
+ * and missed flushes of the render cache as it heads to other parts of
+ * the besides the draw code.
+ */
+ if (0) {
+ BEGIN_BATCH(1, IGNORE_CLIPRECTS);
+ OUT_BATCH((CMD_MI_FLUSH << 16) | BRW_FLUSH_STATE_CACHE);
+ ADVANCE_BATCH();
+ }
+ if (prim_packet.verts_per_instance) {
+ ret = brw_batchbuffer_data( brw->batch, &prim_packet,
+ sizeof(prim_packet), LOOP_CLIPRECTS);
+ if (ret)
+ return ret;
+ }
+ if (0) {
+ BEGIN_BATCH(1, IGNORE_CLIPRECTS);
+ OUT_BATCH((CMD_MI_FLUSH << 16) | BRW_FLUSH_STATE_CACHE);
+ ADVANCE_BATCH();
+ }
+
+ return 0;
+}
+
+
+/* May fail if out of video memory for texture or vbo upload, or on
+ * fallback conditions.
+ */
+static int
+try_draw_range_elements(struct brw_context *brw,
+ struct pipe_buffer *index_buffer,
+ unsigned hw_prim,
+ unsigned start, unsigned count)
+{
+ int ret;
+
+ ret = brw_validate_state(brw);
+ if (ret)
+ return ret;
+
+ /* Check that we can fit our state in with our existing batchbuffer, or
+ * flush otherwise.
+ */
+ ret = brw->sws->check_aperture_space(brw->sws,
+ brw->state.validated_bos,
+ brw->state.validated_bo_count);
+ if (ret)
+ return ret;
+
+ ret = brw_upload_state(brw);
+ if (ret)
+ return ret;
+
+ ret = brw_emit_prim(brw, start, count, index_buffer != NULL, hw_prim);
+ if (ret)
+ return ret;
+
+ if (brw->flags.always_flush_batch)
+ brw_context_flush( brw );
+
+ return 0;
+}
+
+
+static boolean
+brw_draw_range_elements(struct pipe_context *pipe,
+ struct pipe_buffer *index_buffer,
+ unsigned index_size,
+ unsigned min_index,
+ unsigned max_index,
+ unsigned mode, unsigned start, unsigned count)
+{
+ struct brw_context *brw = brw_context(pipe);
+ int ret;
+ uint32_t hw_prim;
+
+ hw_prim = brw_set_prim(brw, mode);
+
+ if (BRW_DEBUG & DEBUG_PRIMS)
+ debug_printf("PRIM: %s start %d count %d index_buffer %p\n",
+ u_prim_name(mode), start, count, (void *)index_buffer);
+
+ /* Potentially trigger upload of new index buffer.
+ *
+ * XXX: do we need to go through state validation to achieve this?
+ * Could just call upload code directly.
+ */
+ if (brw->curr.index_buffer != index_buffer ||
+ brw->curr.index_size != index_size) {
+ pipe_buffer_reference( &brw->curr.index_buffer, index_buffer );
+ brw->curr.index_size = index_size;
+ brw->state.dirty.mesa |= PIPE_NEW_INDEX_BUFFER;
+ }
+
+ /* XXX: do we really care?
+ */
+ if (brw->curr.min_index != min_index ||
+ brw->curr.max_index != max_index)
+ {
+ brw->curr.min_index = min_index;
+ brw->curr.max_index = max_index;
+ brw->state.dirty.mesa |= PIPE_NEW_INDEX_RANGE;
+ }
+
+
+ /* Make a first attempt at drawing:
+ */
+ ret = try_draw_range_elements(brw, index_buffer, hw_prim, start, count );
+
+ /* Otherwise, flush and retry:
+ */
+ if (ret != 0) {
+ brw_context_flush( brw );
+ ret = try_draw_range_elements(brw, index_buffer, hw_prim, start, count );
+ assert(ret == 0);
+ }
+
+ return TRUE;
+}
+
+static boolean
+brw_draw_elements(struct pipe_context *pipe,
+ struct pipe_buffer *index_buffer,
+ unsigned index_size,
+ unsigned mode,
+ unsigned start, unsigned count)
+{
+ return brw_draw_range_elements( pipe, index_buffer,
+ index_size,
+ 0, 0xffffffff,
+ mode,
+ start, count );
+}
+
+static boolean
+brw_draw_arrays(struct pipe_context *pipe, unsigned mode,
+ unsigned start, unsigned count)
+{
+ return brw_draw_elements(pipe, NULL, 0, mode, start, count);
+}
+
+
+
+boolean brw_draw_init( struct brw_context *brw )
+{
+ /* Register our drawing function:
+ */
+ brw->base.draw_arrays = brw_draw_arrays;
+ brw->base.draw_elements = brw_draw_elements;
+ brw->base.draw_range_elements = brw_draw_range_elements;
+
+ /* Create helpers for uploading data in user buffers:
+ */
+ brw->vb.upload_vertex = u_upload_create( brw->base.screen,
+ 128 * 1024,
+ 64,
+ PIPE_BUFFER_USAGE_VERTEX );
+ if (brw->vb.upload_vertex == NULL)
+ return FALSE;
+
+ brw->vb.upload_index = u_upload_create( brw->base.screen,
+ 32 * 1024,
+ 64,
+ PIPE_BUFFER_USAGE_INDEX );
+ if (brw->vb.upload_index == NULL)
+ return FALSE;
+
+ return TRUE;
+}
+
+void brw_draw_cleanup( struct brw_context *brw )
+{
+ u_upload_destroy( brw->vb.upload_vertex );
+ u_upload_destroy( brw->vb.upload_index );
+
+ bo_reference(&brw->ib.bo, NULL);
+}
diff --git a/src/gallium/drivers/i965/brw_draw.h b/src/gallium/drivers/i965/brw_draw.h
new file mode 100644
index 00000000000..8dc5dbce622
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_draw.h
@@ -0,0 +1,39 @@
+ /**************************************************************************
+ *
+ * Copyright 2005 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef BRW_DRAW_H
+#define BRW_DRAW_H
+
+#include "brw_types.h"
+
+struct brw_context;
+
+boolean brw_draw_init( struct brw_context *brw );
+void brw_draw_cleanup( struct brw_context *brw );
+
+
+#endif
diff --git a/src/gallium/drivers/i965/brw_draw_upload.c b/src/gallium/drivers/i965/brw_draw_upload.c
new file mode 100644
index 00000000000..a27da5f1c17
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_draw_upload.c
@@ -0,0 +1,542 @@
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "pipe/p_context.h"
+
+#include "util/u_upload_mgr.h"
+#include "util/u_math.h"
+
+#include "brw_draw.h"
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_screen.h"
+#include "brw_batchbuffer.h"
+#include "brw_debug.h"
+
+
+
+
+static unsigned brw_translate_surface_format( unsigned id )
+{
+ switch (id) {
+ case PIPE_FORMAT_R64_FLOAT:
+ return BRW_SURFACEFORMAT_R64_FLOAT;
+ case PIPE_FORMAT_R64G64_FLOAT:
+ return BRW_SURFACEFORMAT_R64G64_FLOAT;
+ case PIPE_FORMAT_R64G64B64_FLOAT:
+ return BRW_SURFACEFORMAT_R64G64B64_FLOAT;
+ case PIPE_FORMAT_R64G64B64A64_FLOAT:
+ return BRW_SURFACEFORMAT_R64G64B64A64_FLOAT;
+
+ case PIPE_FORMAT_R32_FLOAT:
+ return BRW_SURFACEFORMAT_R32_FLOAT;
+ case PIPE_FORMAT_R32G32_FLOAT:
+ return BRW_SURFACEFORMAT_R32G32_FLOAT;
+ case PIPE_FORMAT_R32G32B32_FLOAT:
+ return BRW_SURFACEFORMAT_R32G32B32_FLOAT;
+ case PIPE_FORMAT_R32G32B32A32_FLOAT:
+ return BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
+
+ case PIPE_FORMAT_R32_UNORM:
+ return BRW_SURFACEFORMAT_R32_UNORM;
+ case PIPE_FORMAT_R32G32_UNORM:
+ return BRW_SURFACEFORMAT_R32G32_UNORM;
+ case PIPE_FORMAT_R32G32B32_UNORM:
+ return BRW_SURFACEFORMAT_R32G32B32_UNORM;
+ case PIPE_FORMAT_R32G32B32A32_UNORM:
+ return BRW_SURFACEFORMAT_R32G32B32A32_UNORM;
+
+ case PIPE_FORMAT_R32_USCALED:
+ return BRW_SURFACEFORMAT_R32_USCALED;
+ case PIPE_FORMAT_R32G32_USCALED:
+ return BRW_SURFACEFORMAT_R32G32_USCALED;
+ case PIPE_FORMAT_R32G32B32_USCALED:
+ return BRW_SURFACEFORMAT_R32G32B32_USCALED;
+ case PIPE_FORMAT_R32G32B32A32_USCALED:
+ return BRW_SURFACEFORMAT_R32G32B32A32_USCALED;
+
+ case PIPE_FORMAT_R32_SNORM:
+ return BRW_SURFACEFORMAT_R32_SNORM;
+ case PIPE_FORMAT_R32G32_SNORM:
+ return BRW_SURFACEFORMAT_R32G32_SNORM;
+ case PIPE_FORMAT_R32G32B32_SNORM:
+ return BRW_SURFACEFORMAT_R32G32B32_SNORM;
+ case PIPE_FORMAT_R32G32B32A32_SNORM:
+ return BRW_SURFACEFORMAT_R32G32B32A32_SNORM;
+
+ case PIPE_FORMAT_R32_SSCALED:
+ return BRW_SURFACEFORMAT_R32_SSCALED;
+ case PIPE_FORMAT_R32G32_SSCALED:
+ return BRW_SURFACEFORMAT_R32G32_SSCALED;
+ case PIPE_FORMAT_R32G32B32_SSCALED:
+ return BRW_SURFACEFORMAT_R32G32B32_SSCALED;
+ case PIPE_FORMAT_R32G32B32A32_SSCALED:
+ return BRW_SURFACEFORMAT_R32G32B32A32_SSCALED;
+
+ case PIPE_FORMAT_R16_UNORM:
+ return BRW_SURFACEFORMAT_R16_UNORM;
+ case PIPE_FORMAT_R16G16_UNORM:
+ return BRW_SURFACEFORMAT_R16G16_UNORM;
+ case PIPE_FORMAT_R16G16B16_UNORM:
+ return BRW_SURFACEFORMAT_R16G16B16_UNORM;
+ case PIPE_FORMAT_R16G16B16A16_UNORM:
+ return BRW_SURFACEFORMAT_R16G16B16A16_UNORM;
+
+ case PIPE_FORMAT_R16_USCALED:
+ return BRW_SURFACEFORMAT_R16_USCALED;
+ case PIPE_FORMAT_R16G16_USCALED:
+ return BRW_SURFACEFORMAT_R16G16_USCALED;
+ case PIPE_FORMAT_R16G16B16_USCALED:
+ return BRW_SURFACEFORMAT_R16G16B16_USCALED;
+ case PIPE_FORMAT_R16G16B16A16_USCALED:
+ return BRW_SURFACEFORMAT_R16G16B16A16_USCALED;
+
+ case PIPE_FORMAT_R16_SNORM:
+ return BRW_SURFACEFORMAT_R16_SNORM;
+ case PIPE_FORMAT_R16G16_SNORM:
+ return BRW_SURFACEFORMAT_R16G16_SNORM;
+ case PIPE_FORMAT_R16G16B16_SNORM:
+ return BRW_SURFACEFORMAT_R16G16B16_SNORM;
+ case PIPE_FORMAT_R16G16B16A16_SNORM:
+ return BRW_SURFACEFORMAT_R16G16B16A16_SNORM;
+
+ case PIPE_FORMAT_R16_SSCALED:
+ return BRW_SURFACEFORMAT_R16_SSCALED;
+ case PIPE_FORMAT_R16G16_SSCALED:
+ return BRW_SURFACEFORMAT_R16G16_SSCALED;
+ case PIPE_FORMAT_R16G16B16_SSCALED:
+ return BRW_SURFACEFORMAT_R16G16B16_SSCALED;
+ case PIPE_FORMAT_R16G16B16A16_SSCALED:
+ return BRW_SURFACEFORMAT_R16G16B16A16_SSCALED;
+
+ case PIPE_FORMAT_R8_UNORM:
+ return BRW_SURFACEFORMAT_R8_UNORM;
+ case PIPE_FORMAT_R8G8_UNORM:
+ return BRW_SURFACEFORMAT_R8G8_UNORM;
+ case PIPE_FORMAT_R8G8B8_UNORM:
+ return BRW_SURFACEFORMAT_R8G8B8_UNORM;
+ case PIPE_FORMAT_R8G8B8A8_UNORM:
+ return BRW_SURFACEFORMAT_R8G8B8A8_UNORM;
+
+ case PIPE_FORMAT_R8_USCALED:
+ return BRW_SURFACEFORMAT_R8_USCALED;
+ case PIPE_FORMAT_R8G8_USCALED:
+ return BRW_SURFACEFORMAT_R8G8_USCALED;
+ case PIPE_FORMAT_R8G8B8_USCALED:
+ return BRW_SURFACEFORMAT_R8G8B8_USCALED;
+ case PIPE_FORMAT_R8G8B8A8_USCALED:
+ return BRW_SURFACEFORMAT_R8G8B8A8_USCALED;
+
+ case PIPE_FORMAT_R8_SNORM:
+ return BRW_SURFACEFORMAT_R8_SNORM;
+ case PIPE_FORMAT_R8G8_SNORM:
+ return BRW_SURFACEFORMAT_R8G8_SNORM;
+ case PIPE_FORMAT_R8G8B8_SNORM:
+ return BRW_SURFACEFORMAT_R8G8B8_SNORM;
+ case PIPE_FORMAT_R8G8B8A8_SNORM:
+ return BRW_SURFACEFORMAT_R8G8B8A8_SNORM;
+
+ case PIPE_FORMAT_R8_SSCALED:
+ return BRW_SURFACEFORMAT_R8_SSCALED;
+ case PIPE_FORMAT_R8G8_SSCALED:
+ return BRW_SURFACEFORMAT_R8G8_SSCALED;
+ case PIPE_FORMAT_R8G8B8_SSCALED:
+ return BRW_SURFACEFORMAT_R8G8B8_SSCALED;
+ case PIPE_FORMAT_R8G8B8A8_SSCALED:
+ return BRW_SURFACEFORMAT_R8G8B8A8_SSCALED;
+
+ default:
+ assert(0);
+ return 0;
+ }
+}
+
+static unsigned get_index_type(int type)
+{
+ switch (type) {
+ case 1: return BRW_INDEX_BYTE;
+ case 2: return BRW_INDEX_WORD;
+ case 4: return BRW_INDEX_DWORD;
+ default: assert(0); return 0;
+ }
+}
+
+
+static int brw_prepare_vertices(struct brw_context *brw)
+{
+ unsigned int min_index = brw->curr.min_index;
+ unsigned int max_index = brw->curr.max_index;
+ GLuint i;
+ int ret;
+
+ if (BRW_DEBUG & DEBUG_VERTS)
+ debug_printf("%s %d..%d\n", __FUNCTION__, min_index, max_index);
+
+
+ for (i = 0; i < brw->curr.num_vertex_buffers; i++) {
+ struct pipe_vertex_buffer *vb = &brw->curr.vertex_buffer[i];
+ struct brw_winsys_buffer *bo;
+ struct pipe_buffer *upload_buf = NULL;
+ unsigned offset;
+
+ if (BRW_DEBUG & DEBUG_VERTS)
+ debug_printf("%s vb[%d] user:%d offset:0x%x sz:0x%x stride:0x%x\n",
+ __FUNCTION__, i,
+ brw_buffer_is_user_buffer(vb->buffer),
+ vb->buffer_offset,
+ vb->buffer->size,
+ vb->stride);
+
+ if (brw_buffer_is_user_buffer(vb->buffer)) {
+
+ /* XXX: simplify this. Stop the state trackers from generating
+ * zero-stride buffers & have them use additional constants (or
+ * add support for >1 constant buffer) instead.
+ */
+ unsigned size = (vb->stride == 0 ?
+ vb->buffer->size - vb->buffer_offset :
+ MAX2(vb->buffer->size - vb->buffer_offset,
+ vb->stride * (max_index + 1 - min_index)));
+
+ ret = u_upload_buffer( brw->vb.upload_vertex,
+ vb->buffer_offset + min_index * vb->stride,
+ size,
+ vb->buffer,
+ &offset,
+ &upload_buf );
+ if (ret)
+ return ret;
+
+ bo = brw_buffer(upload_buf)->bo;
+
+ assert(offset + size <= bo->size);
+ }
+ else
+ {
+ offset = vb->buffer_offset;
+ bo = brw_buffer(vb->buffer)->bo;
+ }
+
+ assert(offset < bo->size);
+
+ /* Set up post-upload info about this vertex buffer:
+ */
+ brw->vb.vb[i].offset = offset;
+ brw->vb.vb[i].stride = vb->stride;
+ brw->vb.vb[i].vertex_count = (vb->stride == 0 ?
+ 1 :
+ (bo->size - offset) / vb->stride);
+
+ bo_reference( &brw->vb.vb[i].bo, bo );
+
+ /* Don't need to retain this reference. We have a reference on
+ * the underlying winsys buffer:
+ */
+ pipe_buffer_reference( &upload_buf, NULL );
+ }
+
+ brw->vb.nr_vb = i;
+ brw_prepare_query_begin(brw);
+
+ for (i = 0; i < brw->vb.nr_vb; i++) {
+ brw_add_validated_bo(brw, brw->vb.vb[i].bo);
+ }
+
+ return 0;
+}
+
+static int brw_emit_vertex_buffers( struct brw_context *brw )
+{
+ int i;
+
+ /* If the VS doesn't read any inputs (calculating vertex position from
+ * a state variable for some reason, for example), just bail.
+ *
+ * The stale VB state stays in place, but they don't do anything unless
+ * a VE loads from them.
+ */
+ if (brw->vb.nr_vb == 0) {
+ if (BRW_DEBUG & DEBUG_VERTS)
+ debug_printf("%s: no active vertex buffers\n", __FUNCTION__);
+
+ return 0;
+ }
+
+ /* Emit VB state packets.
+ */
+ BEGIN_BATCH(1 + brw->vb.nr_vb * 4, IGNORE_CLIPRECTS);
+ OUT_BATCH((CMD_VERTEX_BUFFER << 16) |
+ ((1 + brw->vb.nr_vb * 4) - 2));
+
+ for (i = 0; i < brw->vb.nr_vb; i++) {
+ OUT_BATCH((i << BRW_VB0_INDEX_SHIFT) |
+ BRW_VB0_ACCESS_VERTEXDATA |
+ (brw->vb.vb[i].stride << BRW_VB0_PITCH_SHIFT));
+ OUT_RELOC(brw->vb.vb[i].bo,
+ BRW_USAGE_VERTEX,
+ brw->vb.vb[i].offset);
+ if (BRW_IS_IGDNG(brw)) {
+ OUT_RELOC(brw->vb.vb[i].bo,
+ BRW_USAGE_VERTEX,
+ brw->vb.vb[i].bo->size - 1);
+ } else
+ OUT_BATCH(brw->vb.vb[i].stride ? brw->vb.vb[i].vertex_count : 0);
+ OUT_BATCH(0); /* Instance data step rate */
+ }
+ ADVANCE_BATCH();
+ return 0;
+}
+
+
+
+
+static int brw_emit_vertex_elements(struct brw_context *brw)
+{
+ GLuint nr = brw->curr.num_vertex_elements;
+ GLuint i;
+
+ brw_emit_query_begin(brw);
+
+ /* If the VS doesn't read any inputs (calculating vertex position from
+ * a state variable for some reason, for example), emit a single pad
+ * VERTEX_ELEMENT struct and bail.
+ *
+ * The stale VB state stays in place, but they don't do anything unless
+ * a VE loads from them.
+ */
+ if (nr == 0) {
+ BEGIN_BATCH(3, IGNORE_CLIPRECTS);
+ OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | 1);
+ OUT_BATCH((0 << BRW_VE0_INDEX_SHIFT) |
+ BRW_VE0_VALID |
+ (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) |
+ (0 << BRW_VE0_SRC_OFFSET_SHIFT));
+ OUT_BATCH((BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT) |
+ (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
+ (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
+ (BRW_VE1_COMPONENT_STORE_1_FLT << BRW_VE1_COMPONENT_3_SHIFT));
+ ADVANCE_BATCH();
+ return 0;
+ }
+
+ /* Now emit vertex element (VEP) state packets.
+ *
+ */
+ BEGIN_BATCH(1 + nr * 2, IGNORE_CLIPRECTS);
+ OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | ((1 + nr * 2) - 2));
+ for (i = 0; i < nr; i++) {
+ const struct pipe_vertex_element *input = &brw->curr.vertex_element[i];
+ uint32_t format = brw_translate_surface_format( input->src_format );
+ uint32_t comp0 = BRW_VE1_COMPONENT_STORE_SRC;
+ uint32_t comp1 = BRW_VE1_COMPONENT_STORE_SRC;
+ uint32_t comp2 = BRW_VE1_COMPONENT_STORE_SRC;
+ uint32_t comp3 = BRW_VE1_COMPONENT_STORE_SRC;
+
+ switch (input->nr_components) {
+ case 0: comp0 = BRW_VE1_COMPONENT_STORE_0;
+ case 1: comp1 = BRW_VE1_COMPONENT_STORE_0;
+ case 2: comp2 = BRW_VE1_COMPONENT_STORE_0;
+ case 3: comp3 = BRW_VE1_COMPONENT_STORE_1_FLT;
+ break;
+ }
+
+ OUT_BATCH((input->vertex_buffer_index << BRW_VE0_INDEX_SHIFT) |
+ BRW_VE0_VALID |
+ (format << BRW_VE0_FORMAT_SHIFT) |
+ (input->src_offset << BRW_VE0_SRC_OFFSET_SHIFT));
+
+ if (BRW_IS_IGDNG(brw))
+ OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) |
+ (comp1 << BRW_VE1_COMPONENT_1_SHIFT) |
+ (comp2 << BRW_VE1_COMPONENT_2_SHIFT) |
+ (comp3 << BRW_VE1_COMPONENT_3_SHIFT));
+ else
+ OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) |
+ (comp1 << BRW_VE1_COMPONENT_1_SHIFT) |
+ (comp2 << BRW_VE1_COMPONENT_2_SHIFT) |
+ (comp3 << BRW_VE1_COMPONENT_3_SHIFT) |
+ ((i * 4) << BRW_VE1_DST_OFFSET_SHIFT));
+ }
+ ADVANCE_BATCH();
+ return 0;
+}
+
+
+static int brw_emit_vertices( struct brw_context *brw )
+{
+ int ret;
+
+ ret = brw_emit_vertex_buffers( brw );
+ if (ret)
+ return ret;
+
+ ret = brw_emit_vertex_elements( brw );
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+
+const struct brw_tracked_state brw_vertices = {
+ .dirty = {
+ .mesa = (PIPE_NEW_INDEX_RANGE |
+ PIPE_NEW_VERTEX_BUFFER),
+ .brw = BRW_NEW_BATCH,
+ .cache = 0,
+ },
+ .prepare = brw_prepare_vertices,
+ .emit = brw_emit_vertices,
+};
+
+
+static int brw_prepare_indices(struct brw_context *brw)
+{
+ struct pipe_buffer *index_buffer = brw->curr.index_buffer;
+ struct pipe_buffer *upload_buf = NULL;
+ struct brw_winsys_buffer *bo = NULL;
+ GLuint offset;
+ GLuint index_size;
+ GLuint ib_size;
+ int ret;
+
+ if (index_buffer == NULL)
+ return 0;
+
+ if (BRW_DEBUG & DEBUG_VERTS)
+ debug_printf("%s: index_size:%d index_buffer->size:%d\n",
+ __FUNCTION__,
+ brw->curr.index_size,
+ brw->curr.index_buffer->size);
+
+ ib_size = index_buffer->size;
+ index_size = brw->curr.index_size;
+
+ /* Turn userbuffer into a proper hardware buffer?
+ */
+ if (brw_buffer_is_user_buffer(index_buffer)) {
+
+ ret = u_upload_buffer( brw->vb.upload_index,
+ 0,
+ ib_size,
+ index_buffer,
+ &offset,
+ &upload_buf );
+ if (ret)
+ return ret;
+
+ bo = brw_buffer(upload_buf)->bo;
+
+ /* XXX: annotate the userbuffer with the upload information so
+ * that successive calls don't get re-uploaded.
+ */
+ }
+ else {
+ bo = brw_buffer(index_buffer)->bo;
+ ib_size = bo->size;
+ offset = 0;
+ }
+
+ /* Use CMD_3D_PRIM's start_vertex_offset to avoid re-uploading the
+ * index buffer state when we're just moving the start index of our
+ * drawing.
+ *
+ * In gallium this will happen in the case where successive draw
+ * calls are made with (distinct?) userbuffers, but the upload_mgr
+ * places the data into a single winsys buffer.
+ *
+ * This statechange doesn't raise any state flags and is always
+ * just merged into the final draw packet:
+ */
+ if (1) {
+ assert((offset & (index_size - 1)) == 0);
+ brw->ib.start_vertex_offset = offset / index_size;
+ }
+
+ /* These statechanges trigger a new CMD_INDEX_BUFFER packet:
+ */
+ if (brw->ib.bo != bo ||
+ brw->ib.size != ib_size)
+ {
+ bo_reference(&brw->ib.bo, bo);
+ brw->ib.size = ib_size;
+ brw->state.dirty.brw |= BRW_NEW_INDEX_BUFFER;
+ }
+
+ pipe_buffer_reference( &upload_buf, NULL );
+ brw_add_validated_bo(brw, brw->ib.bo);
+ return 0;
+}
+
+const struct brw_tracked_state brw_indices = {
+ .dirty = {
+ .mesa = PIPE_NEW_INDEX_BUFFER,
+ .brw = 0,
+ .cache = 0,
+ },
+ .prepare = brw_prepare_indices,
+};
+
+static int brw_emit_index_buffer(struct brw_context *brw)
+{
+ /* Emit the indexbuffer packet:
+ */
+ if (brw->ib.bo)
+ {
+ struct brw_indexbuffer ib;
+
+ memset(&ib, 0, sizeof(ib));
+
+ ib.header.bits.opcode = CMD_INDEX_BUFFER;
+ ib.header.bits.length = sizeof(ib)/4 - 2;
+ ib.header.bits.index_format = get_index_type(brw->ib.size);
+ ib.header.bits.cut_index_enable = 0;
+
+ BEGIN_BATCH(4, IGNORE_CLIPRECTS);
+ OUT_BATCH( ib.header.dword );
+ OUT_RELOC(brw->ib.bo,
+ BRW_USAGE_VERTEX,
+ brw->ib.offset);
+ OUT_RELOC(brw->ib.bo,
+ BRW_USAGE_VERTEX,
+ brw->ib.offset + brw->ib.size - 1);
+ OUT_BATCH( 0 );
+ ADVANCE_BATCH();
+ }
+
+ return 0;
+}
+
+const struct brw_tracked_state brw_index_buffer = {
+ .dirty = {
+ .mesa = 0,
+ .brw = BRW_NEW_BATCH | BRW_NEW_INDEX_BUFFER,
+ .cache = 0,
+ },
+ .emit = brw_emit_index_buffer,
+};
diff --git a/src/gallium/drivers/i965/brw_eu.c b/src/gallium/drivers/i965/brw_eu.c
new file mode 100644
index 00000000000..a8fcb5f97eb
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_eu.c
@@ -0,0 +1,262 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "util/u_memory.h"
+
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_eu.h"
+
+
+
+/* How does predicate control work when execution_size != 8? Do I
+ * need to test/set for 0xffff when execution_size is 16?
+ */
+void brw_set_predicate_control_flag_value( struct brw_compile *p, GLuint value )
+{
+ p->current->header.predicate_control = BRW_PREDICATE_NONE;
+
+ if (value != 0xff) {
+ if (value != p->flag_value) {
+ brw_push_insn_state(p);
+ brw_MOV(p, brw_flag_reg(), brw_imm_uw(value));
+ p->flag_value = value;
+ brw_pop_insn_state(p);
+ }
+
+ p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
+ }
+}
+
+void brw_set_predicate_control( struct brw_compile *p, GLuint pc )
+{
+ p->current->header.predicate_control = pc;
+}
+
+void brw_set_conditionalmod( struct brw_compile *p, GLuint conditional )
+{
+ p->current->header.destreg__conditionalmod = conditional;
+}
+
+void brw_set_access_mode( struct brw_compile *p, GLuint access_mode )
+{
+ p->current->header.access_mode = access_mode;
+}
+
+void brw_set_compression_control( struct brw_compile *p, GLboolean compression_control )
+{
+ p->current->header.compression_control = compression_control;
+}
+
+void brw_set_mask_control( struct brw_compile *p, GLuint value )
+{
+ p->current->header.mask_control = value;
+}
+
+void brw_set_saturate( struct brw_compile *p, GLuint value )
+{
+ p->current->header.saturate = value;
+}
+
+void brw_push_insn_state( struct brw_compile *p )
+{
+ assert(p->current != &p->stack[BRW_EU_MAX_INSN_STACK-1]);
+ memcpy(p->current+1, p->current, sizeof(struct brw_instruction));
+ p->current++;
+}
+
+void brw_pop_insn_state( struct brw_compile *p )
+{
+ assert(p->current != p->stack);
+ p->current--;
+}
+
+
+/***********************************************************************
+ */
+void brw_init_compile( struct brw_context *brw, struct brw_compile *p )
+{
+ p->brw = brw;
+ p->nr_insn = 0;
+ p->current = p->stack;
+ memset(p->current, 0, sizeof(p->current[0]));
+
+ /* Some defaults?
+ */
+ brw_set_mask_control(p, BRW_MASK_ENABLE); /* what does this do? */
+ brw_set_saturate(p, 0);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_set_predicate_control_flag_value(p, 0xff);
+}
+
+
+enum pipe_error brw_get_program( struct brw_compile *p,
+ const GLuint **data,
+ GLuint *sz )
+{
+ GLuint i;
+
+ for (i = 0; i < 8; i++)
+ brw_NOP(p);
+
+ /* Is the generated program malformed for some reason?
+ */
+ if (p->error)
+ return PIPE_ERROR_BAD_INPUT;
+
+ *sz = p->nr_insn * sizeof(struct brw_instruction);
+ *data = (const GLuint *)p->store;
+ return PIPE_OK;
+}
+
+
+
+/**
+ * Subroutine calls require special attention.
+ * Mesa instructions may be expanded into multiple hardware instructions
+ * so the prog_instruction::BranchTarget field can't be used as an index
+ * into the hardware instructions.
+ *
+ * The BranchTarget field isn't needed, however. Mesa's GLSL compiler
+ * emits CAL and BGNSUB instructions with labels that can be used to map
+ * subroutine calls to actual subroutine code blocks.
+ *
+ * The structures and function here implement patching of CAL instructions
+ * so they jump to the right subroutine code...
+ */
+
+
+/**
+ * For each OPCODE_BGNSUB we create one of these.
+ */
+struct brw_eu_label
+{
+ GLuint label; /**< the label number */
+ GLuint position; /**< the position of the brw instruction for this label */
+ struct brw_eu_label *next; /**< next in linked list */
+};
+
+
+/**
+ * For each OPCODE_CAL we create one of these.
+ */
+struct brw_eu_call
+{
+ GLuint call_inst_pos; /**< location of the CAL instruction */
+ GLuint label;
+ struct brw_eu_call *next; /**< next in linked list */
+};
+
+
+/**
+ * Called for each OPCODE_BGNSUB.
+ */
+void
+brw_save_label(struct brw_compile *c, unsigned l, GLuint position)
+{
+ struct brw_eu_label *label = CALLOC_STRUCT(brw_eu_label);
+ label->label = l;
+ label->position = position;
+ label->next = c->first_label;
+ c->first_label = label;
+}
+
+
+/**
+ * Called for each OPCODE_CAL.
+ */
+void
+brw_save_call(struct brw_compile *c, GLuint label, GLuint call_pos)
+{
+ struct brw_eu_call *call = CALLOC_STRUCT(brw_eu_call);
+ call->call_inst_pos = call_pos;
+ call->label = label;
+ call->next = c->first_call;
+ c->first_call = call;
+}
+
+
+/**
+ * Lookup a label, return label's position/offset.
+ */
+static GLuint
+brw_lookup_label(struct brw_compile *c, unsigned l)
+{
+ const struct brw_eu_label *label;
+ for (label = c->first_label; label; label = label->next) {
+ if (l == label->label) {
+ return label->position;
+ }
+ }
+ abort(); /* should never happen */
+ return ~0;
+}
+
+
+/**
+ * When we're done generating code, this function is called to resolve
+ * subroutine calls.
+ */
+void
+brw_resolve_cals(struct brw_compile *c)
+{
+ const struct brw_eu_call *call;
+
+ for (call = c->first_call; call; call = call->next) {
+ const GLuint sub_loc = brw_lookup_label(c, call->label);
+ struct brw_instruction *brw_call_inst = &c->store[call->call_inst_pos];
+ struct brw_instruction *brw_sub_inst = &c->store[sub_loc];
+ GLint offset = brw_sub_inst - brw_call_inst;
+
+ /* patch brw_inst1 to point to brw_inst2 */
+ brw_set_src1(brw_call_inst, brw_imm_d(offset * 16));
+ }
+
+ /* free linked list of calls */
+ {
+ struct brw_eu_call *call, *next;
+ for (call = c->first_call; call; call = next) {
+ next = call->next;
+ FREE(call);
+ }
+ c->first_call = NULL;
+ }
+
+ /* free linked list of labels */
+ {
+ struct brw_eu_label *label, *next;
+ for (label = c->first_label; label; label = next) {
+ next = label->next;
+ FREE(label);
+ }
+ c->first_label = NULL;
+ }
+}
diff --git a/src/gallium/drivers/i965/brw_eu.h b/src/gallium/drivers/i965/brw_eu.h
new file mode 100644
index 00000000000..af509b2e5f4
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_eu.h
@@ -0,0 +1,992 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#ifndef BRW_EU_H
+#define BRW_EU_H
+
+#include "util/u_debug.h"
+#include "pipe/p_defines.h"
+
+#include "brw_structs.h"
+#include "brw_defines.h"
+
+#define BRW_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<2) | ((c)<<4) | ((d)<<6))
+#define BRW_GET_SWZ(swz, idx) (((swz) >> ((idx)*2)) & 0x3)
+
+#define BRW_SWIZZLE_NOOP BRW_SWIZZLE4(0,1,2,3)
+#define BRW_SWIZZLE_XYZW BRW_SWIZZLE4(0,1,2,3)
+#define BRW_SWIZZLE_XXXX BRW_SWIZZLE4(0,0,0,0)
+#define BRW_SWIZZLE_XYXY BRW_SWIZZLE4(0,1,0,1)
+
+#define BRW_WRITEMASK_NONE 0x00
+#define BRW_WRITEMASK_X 0x01
+#define BRW_WRITEMASK_Y 0x02
+#define BRW_WRITEMASK_XY 0x03
+#define BRW_WRITEMASK_Z 0x04
+#define BRW_WRITEMASK_XZ 0x05
+#define BRW_WRITEMASK_YZ 0x06
+#define BRW_WRITEMASK_XYZ 0x07
+#define BRW_WRITEMASK_W 0x08
+#define BRW_WRITEMASK_XW 0x09
+#define BRW_WRITEMASK_YW 0x0A
+#define BRW_WRITEMASK_XYW 0x0B
+#define BRW_WRITEMASK_ZW 0x0C
+#define BRW_WRITEMASK_XZW 0x0D
+#define BRW_WRITEMASK_YZW 0x0E
+#define BRW_WRITEMASK_XYZW 0x0F
+
+
+#define REG_SIZE (8*4)
+
+
+/* These aren't hardware structs, just something useful for us to pass around:
+ *
+ * Align1 operation has a lot of control over input ranges. Used in
+ * WM programs to implement shaders decomposed into "channel serial"
+ * or "structure of array" form:
+ */
+struct brw_reg
+{
+ GLuint type:4;
+ GLuint file:2;
+ GLuint nr:8;
+ GLuint subnr:5; /* :1 in align16 */
+ GLuint negate:1; /* source only */
+ GLuint abs:1; /* source only */
+ GLuint vstride:4; /* source only */
+ GLuint width:3; /* src only, align1 only */
+ GLuint hstride:2; /* align1 only */
+ GLuint address_mode:1; /* relative addressing, hopefully! */
+ GLuint pad0:1;
+
+ union {
+ struct {
+ GLuint swizzle:8; /* src only, align16 only */
+ GLuint writemask:4; /* dest only, align16 only */
+ GLint indirect_offset:10; /* relative addressing offset */
+ GLuint pad1:10; /* two dwords total */
+ } bits;
+
+ GLfloat f;
+ GLint d;
+ GLuint ud;
+ } dw1;
+};
+
+
+struct brw_indirect {
+ GLuint addr_subnr:4;
+ GLint addr_offset:10;
+ GLuint pad:18;
+};
+
+
+struct brw_eu_label;
+struct brw_eu_call;
+
+
+
+#define BRW_EU_MAX_INSN_STACK 5
+#define BRW_EU_MAX_INSN 10000
+
+struct brw_compile {
+ struct brw_instruction store[BRW_EU_MAX_INSN];
+ GLuint nr_insn;
+
+ /* Allow clients to push/pop instruction state:
+ */
+ struct brw_instruction stack[BRW_EU_MAX_INSN_STACK];
+ struct brw_instruction *current;
+
+ GLuint flag_value;
+ GLboolean single_program_flow;
+ struct brw_context *brw;
+
+ struct brw_eu_label *first_label; /**< linked list of labels */
+ struct brw_eu_call *first_call; /**< linked list of CALs */
+
+ boolean error;
+};
+
+
+void
+brw_save_label(struct brw_compile *c, unsigned label, GLuint position);
+
+void
+brw_save_call(struct brw_compile *c, unsigned label, GLuint call_pos);
+
+void
+brw_resolve_cals(struct brw_compile *c);
+
+
+
+static INLINE int type_sz( GLuint type )
+{
+ switch( type ) {
+ case BRW_REGISTER_TYPE_UD:
+ case BRW_REGISTER_TYPE_D:
+ case BRW_REGISTER_TYPE_F:
+ return 4;
+ case BRW_REGISTER_TYPE_HF:
+ case BRW_REGISTER_TYPE_UW:
+ case BRW_REGISTER_TYPE_W:
+ return 2;
+ case BRW_REGISTER_TYPE_UB:
+ case BRW_REGISTER_TYPE_B:
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+/**
+ * Construct a brw_reg.
+ * \param file one of the BRW_x_REGISTER_FILE values
+ * \param nr register number/index
+ * \param subnr register sub number
+ * \param type one of BRW_REGISTER_TYPE_x
+ * \param vstride one of BRW_VERTICAL_STRIDE_x
+ * \param width one of BRW_WIDTH_x
+ * \param hstride one of BRW_HORIZONTAL_STRIDE_x
+ * \param swizzle one of BRW_SWIZZLE_x
+ * \param writemask BRW_WRITEMASK_X/Y/Z/W bitfield
+ */
+static INLINE struct brw_reg brw_reg( GLuint file,
+ GLuint nr,
+ GLuint subnr,
+ GLuint type,
+ GLuint vstride,
+ GLuint width,
+ GLuint hstride,
+ GLuint swizzle,
+ GLuint writemask )
+{
+ struct brw_reg reg;
+ if (type == BRW_GENERAL_REGISTER_FILE)
+ assert(nr < BRW_MAX_GRF);
+ else if (type == BRW_MESSAGE_REGISTER_FILE)
+ assert(nr < BRW_MAX_MRF);
+ else if (type == BRW_ARCHITECTURE_REGISTER_FILE)
+ assert(nr <= BRW_ARF_IP);
+
+ reg.type = type;
+ reg.file = file;
+ reg.nr = nr;
+ reg.subnr = subnr * type_sz(type);
+ reg.negate = 0;
+ reg.abs = 0;
+ reg.vstride = vstride;
+ reg.width = width;
+ reg.hstride = hstride;
+ reg.address_mode = BRW_ADDRESS_DIRECT;
+ reg.pad0 = 0;
+
+ /* Could do better: If the reg is r5.3<0;1,0>, we probably want to
+ * set swizzle and writemask to W, as the lower bits of subnr will
+ * be lost when converted to align16. This is probably too much to
+ * keep track of as you'd want it adjusted by suboffset(), etc.
+ * Perhaps fix up when converting to align16?
+ */
+ reg.dw1.bits.swizzle = swizzle;
+ reg.dw1.bits.writemask = writemask;
+ reg.dw1.bits.indirect_offset = 0;
+ reg.dw1.bits.pad1 = 0;
+ return reg;
+}
+
+/** Construct float[16] register */
+static INLINE struct brw_reg brw_vec16_reg( GLuint file,
+ GLuint nr,
+ GLuint subnr )
+{
+ return brw_reg(file,
+ nr,
+ subnr,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_16,
+ BRW_WIDTH_16,
+ BRW_HORIZONTAL_STRIDE_1,
+ BRW_SWIZZLE_XYZW,
+ BRW_WRITEMASK_XYZW);
+}
+
+/** Construct float[8] register */
+static INLINE struct brw_reg brw_vec8_reg( GLuint file,
+ GLuint nr,
+ GLuint subnr )
+{
+ return brw_reg(file,
+ nr,
+ subnr,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_8,
+ BRW_WIDTH_8,
+ BRW_HORIZONTAL_STRIDE_1,
+ BRW_SWIZZLE_XYZW,
+ BRW_WRITEMASK_XYZW);
+}
+
+/** Construct float[4] register */
+static INLINE struct brw_reg brw_vec4_reg( GLuint file,
+ GLuint nr,
+ GLuint subnr )
+{
+ return brw_reg(file,
+ nr,
+ subnr,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_4,
+ BRW_WIDTH_4,
+ BRW_HORIZONTAL_STRIDE_1,
+ BRW_SWIZZLE_XYZW,
+ BRW_WRITEMASK_XYZW);
+}
+
+/** Construct float[2] register */
+static INLINE struct brw_reg brw_vec2_reg( GLuint file,
+ GLuint nr,
+ GLuint subnr )
+{
+ return brw_reg(file,
+ nr,
+ subnr,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_2,
+ BRW_WIDTH_2,
+ BRW_HORIZONTAL_STRIDE_1,
+ BRW_SWIZZLE_XYXY,
+ BRW_WRITEMASK_XY);
+}
+
+/** Construct float[1] register */
+static INLINE struct brw_reg brw_vec1_reg( GLuint file,
+ GLuint nr,
+ GLuint subnr )
+{
+ return brw_reg(file,
+ nr,
+ subnr,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_0,
+ BRW_WIDTH_1,
+ BRW_HORIZONTAL_STRIDE_0,
+ BRW_SWIZZLE_XXXX,
+ BRW_WRITEMASK_X);
+}
+
+
+static INLINE struct brw_reg retype( struct brw_reg reg,
+ GLuint type )
+{
+ reg.type = type;
+ return reg;
+}
+
+static INLINE struct brw_reg suboffset( struct brw_reg reg,
+ GLuint delta )
+{
+ reg.subnr += delta * type_sz(reg.type);
+ return reg;
+}
+
+
+static INLINE struct brw_reg offset( struct brw_reg reg,
+ GLuint delta )
+{
+ reg.nr += delta;
+ return reg;
+}
+
+
+static INLINE struct brw_reg byte_offset( struct brw_reg reg,
+ GLuint bytes )
+{
+ GLuint newoffset = reg.nr * REG_SIZE + reg.subnr + bytes;
+ reg.nr = newoffset / REG_SIZE;
+ reg.subnr = newoffset % REG_SIZE;
+ return reg;
+}
+
+
+/** Construct unsigned word[16] register */
+static INLINE struct brw_reg brw_uw16_reg( GLuint file,
+ GLuint nr,
+ GLuint subnr )
+{
+ return suboffset(retype(brw_vec16_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
+}
+
+/** Construct unsigned word[8] register */
+static INLINE struct brw_reg brw_uw8_reg( GLuint file,
+ GLuint nr,
+ GLuint subnr )
+{
+ return suboffset(retype(brw_vec8_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
+}
+
+/** Construct unsigned word[1] register */
+static INLINE struct brw_reg brw_uw1_reg( GLuint file,
+ GLuint nr,
+ GLuint subnr )
+{
+ return suboffset(retype(brw_vec1_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
+}
+
+static INLINE struct brw_reg brw_imm_reg( GLuint type )
+{
+ return brw_reg( BRW_IMMEDIATE_VALUE,
+ 0,
+ 0,
+ type,
+ BRW_VERTICAL_STRIDE_0,
+ BRW_WIDTH_1,
+ BRW_HORIZONTAL_STRIDE_0,
+ 0,
+ 0);
+}
+
+/** Construct float immediate register */
+static INLINE struct brw_reg brw_imm_f( GLfloat f )
+{
+ struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_F);
+ imm.dw1.f = f;
+ return imm;
+}
+
+/** Construct integer immediate register */
+static INLINE struct brw_reg brw_imm_d( GLint d )
+{
+ struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_D);
+ imm.dw1.d = d;
+ return imm;
+}
+
+/** Construct uint immediate register */
+static INLINE struct brw_reg brw_imm_ud( GLuint ud )
+{
+ struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UD);
+ imm.dw1.ud = ud;
+ return imm;
+}
+
+/** Construct ushort immediate register */
+static INLINE struct brw_reg brw_imm_uw( GLushort uw )
+{
+ struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UW);
+ imm.dw1.ud = uw | (uw << 16);
+ return imm;
+}
+
+/** Construct short immediate register */
+static INLINE struct brw_reg brw_imm_w( GLshort w )
+{
+ struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_W);
+ imm.dw1.d = w | (w << 16);
+ return imm;
+}
+
+/* brw_imm_b and brw_imm_ub aren't supported by hardware - the type
+ * numbers alias with _V and _VF below:
+ */
+
+/** Construct vector of eight signed half-byte values */
+static INLINE struct brw_reg brw_imm_v( GLuint v )
+{
+ struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_V);
+ imm.vstride = BRW_VERTICAL_STRIDE_0;
+ imm.width = BRW_WIDTH_8;
+ imm.hstride = BRW_HORIZONTAL_STRIDE_1;
+ imm.dw1.ud = v;
+ return imm;
+}
+
+/** Construct vector of four 8-bit float values */
+static INLINE struct brw_reg brw_imm_vf( GLuint v )
+{
+ struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF);
+ imm.vstride = BRW_VERTICAL_STRIDE_0;
+ imm.width = BRW_WIDTH_4;
+ imm.hstride = BRW_HORIZONTAL_STRIDE_1;
+ imm.dw1.ud = v;
+ return imm;
+}
+
+#define VF_ZERO 0x0
+#define VF_ONE 0x30
+#define VF_NEG (1<<7)
+
+static INLINE struct brw_reg brw_imm_vf4( GLuint v0,
+ GLuint v1,
+ GLuint v2,
+ GLuint v3)
+{
+ struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF);
+ imm.vstride = BRW_VERTICAL_STRIDE_0;
+ imm.width = BRW_WIDTH_4;
+ imm.hstride = BRW_HORIZONTAL_STRIDE_1;
+ imm.dw1.ud = ((v0 << 0) |
+ (v1 << 8) |
+ (v2 << 16) |
+ (v3 << 24));
+ return imm;
+}
+
+
+static INLINE struct brw_reg brw_address( struct brw_reg reg )
+{
+ return brw_imm_uw(reg.nr * REG_SIZE + reg.subnr);
+}
+
+/** Construct float[1] general-purpose register */
+static INLINE struct brw_reg brw_vec1_grf( GLuint nr, GLuint subnr )
+{
+ return brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+
+/** Construct float[2] general-purpose register */
+static INLINE struct brw_reg brw_vec2_grf( GLuint nr, GLuint subnr )
+{
+ return brw_vec2_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+
+/** Construct float[4] general-purpose register */
+static INLINE struct brw_reg brw_vec4_grf( GLuint nr, GLuint subnr )
+{
+ return brw_vec4_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+
+/** Construct float[8] general-purpose register */
+static INLINE struct brw_reg brw_vec8_grf( GLuint nr, GLuint subnr )
+{
+ return brw_vec8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+
+
+static INLINE struct brw_reg brw_uw8_grf( GLuint nr, GLuint subnr )
+{
+ return brw_uw8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+
+static INLINE struct brw_reg brw_uw16_grf( GLuint nr, GLuint subnr )
+{
+ return brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+
+
+/** Construct null register (usually used for setting condition codes) */
+static INLINE struct brw_reg brw_null_reg( void )
+{
+ return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+ BRW_ARF_NULL,
+ 0);
+}
+
+static INLINE struct brw_reg brw_address_reg( GLuint subnr )
+{
+ return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+ BRW_ARF_ADDRESS,
+ subnr);
+}
+
+/* If/else instructions break in align16 mode if writemask & swizzle
+ * aren't xyzw. This goes against the convention for other scalar
+ * regs:
+ */
+static INLINE struct brw_reg brw_ip_reg( void )
+{
+ return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+ BRW_ARF_IP,
+ 0,
+ BRW_REGISTER_TYPE_UD,
+ BRW_VERTICAL_STRIDE_4, /* ? */
+ BRW_WIDTH_1,
+ BRW_HORIZONTAL_STRIDE_0,
+ BRW_SWIZZLE_XYZW, /* NOTE! */
+ BRW_WRITEMASK_XYZW); /* NOTE! */
+}
+
+static INLINE struct brw_reg brw_acc_reg( void )
+{
+ return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+ BRW_ARF_ACCUMULATOR,
+ 0);
+}
+
+
+static INLINE struct brw_reg brw_flag_reg( void )
+{
+ return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+ BRW_ARF_FLAG,
+ 0);
+}
+
+
+static INLINE struct brw_reg brw_mask_reg( GLuint subnr )
+{
+ return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+ BRW_ARF_MASK,
+ subnr);
+}
+
+static INLINE struct brw_reg brw_message_reg( GLuint nr )
+{
+ assert(nr < BRW_MAX_MRF);
+ return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE,
+ nr,
+ 0);
+}
+
+
+
+
+/* This is almost always called with a numeric constant argument, so
+ * make things easy to evaluate at compile time:
+ */
+static INLINE GLuint cvt( GLuint val )
+{
+ switch (val) {
+ case 0: return 0;
+ case 1: return 1;
+ case 2: return 2;
+ case 4: return 3;
+ case 8: return 4;
+ case 16: return 5;
+ case 32: return 6;
+ }
+ return 0;
+}
+
+static INLINE struct brw_reg stride( struct brw_reg reg,
+ GLuint vstride,
+ GLuint width,
+ GLuint hstride )
+{
+ reg.vstride = cvt(vstride);
+ reg.width = cvt(width) - 1;
+ reg.hstride = cvt(hstride);
+ return reg;
+}
+
+
+static INLINE struct brw_reg vec16( struct brw_reg reg )
+{
+ return stride(reg, 16,16,1);
+}
+
+static INLINE struct brw_reg vec8( struct brw_reg reg )
+{
+ return stride(reg, 8,8,1);
+}
+
+static INLINE struct brw_reg vec4( struct brw_reg reg )
+{
+ return stride(reg, 4,4,1);
+}
+
+static INLINE struct brw_reg vec2( struct brw_reg reg )
+{
+ return stride(reg, 2,2,1);
+}
+
+static INLINE struct brw_reg vec1( struct brw_reg reg )
+{
+ return stride(reg, 0,1,0);
+}
+
+
+static INLINE struct brw_reg get_element( struct brw_reg reg, GLuint elt )
+{
+ return vec1(suboffset(reg, elt));
+}
+
+static INLINE struct brw_reg get_element_ud( struct brw_reg reg, GLuint elt )
+{
+ return vec1(suboffset(retype(reg, BRW_REGISTER_TYPE_UD), elt));
+}
+
+
+static INLINE struct brw_reg brw_swizzle( struct brw_reg reg,
+ GLuint x,
+ GLuint y,
+ GLuint z,
+ GLuint w)
+{
+ reg.dw1.bits.swizzle = BRW_SWIZZLE4(BRW_GET_SWZ(reg.dw1.bits.swizzle, x),
+ BRW_GET_SWZ(reg.dw1.bits.swizzle, y),
+ BRW_GET_SWZ(reg.dw1.bits.swizzle, z),
+ BRW_GET_SWZ(reg.dw1.bits.swizzle, w));
+ return reg;
+}
+
+
+static INLINE struct brw_reg brw_swizzle1( struct brw_reg reg,
+ GLuint x )
+{
+ return brw_swizzle(reg, x, x, x, x);
+}
+
+static INLINE struct brw_reg brw_writemask( struct brw_reg reg,
+ GLuint mask )
+{
+ reg.dw1.bits.writemask &= mask;
+ return reg;
+}
+
+static INLINE struct brw_reg brw_set_writemask( struct brw_reg reg,
+ GLuint mask )
+{
+ reg.dw1.bits.writemask = mask;
+ return reg;
+}
+
+static INLINE struct brw_reg negate( struct brw_reg reg )
+{
+ reg.negate ^= 1;
+ return reg;
+}
+
+static INLINE struct brw_reg brw_abs( struct brw_reg reg )
+{
+ reg.abs = 1;
+ return reg;
+}
+
+/***********************************************************************
+ */
+static INLINE struct brw_reg brw_vec4_indirect( GLuint subnr,
+ GLint offset )
+{
+ struct brw_reg reg = brw_vec4_grf(0, 0);
+ reg.subnr = subnr;
+ reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
+ reg.dw1.bits.indirect_offset = offset;
+ return reg;
+}
+
+static INLINE struct brw_reg brw_vec1_indirect( GLuint subnr,
+ GLint offset )
+{
+ struct brw_reg reg = brw_vec1_grf(0, 0);
+ reg.subnr = subnr;
+ reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
+ reg.dw1.bits.indirect_offset = offset;
+ return reg;
+}
+
+static INLINE struct brw_reg deref_4f(struct brw_indirect ptr, GLint offset)
+{
+ return brw_vec4_indirect(ptr.addr_subnr, ptr.addr_offset + offset);
+}
+
+static INLINE struct brw_reg deref_1f(struct brw_indirect ptr, GLint offset)
+{
+ return brw_vec1_indirect(ptr.addr_subnr, ptr.addr_offset + offset);
+}
+
+static INLINE struct brw_reg deref_4b(struct brw_indirect ptr, GLint offset)
+{
+ return retype(deref_4f(ptr, offset), BRW_REGISTER_TYPE_B);
+}
+
+static INLINE struct brw_reg deref_1uw(struct brw_indirect ptr, GLint offset)
+{
+ return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UW);
+}
+
+static INLINE struct brw_reg deref_1d(struct brw_indirect ptr, GLint offset)
+{
+ return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_D);
+}
+
+static INLINE struct brw_reg deref_1ud(struct brw_indirect ptr, GLint offset)
+{
+ return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UD);
+}
+
+static INLINE struct brw_reg get_addr_reg(struct brw_indirect ptr)
+{
+ return brw_address_reg(ptr.addr_subnr);
+}
+
+static INLINE struct brw_indirect brw_indirect_offset( struct brw_indirect ptr, GLint offset )
+{
+ ptr.addr_offset += offset;
+ return ptr;
+}
+
+static INLINE struct brw_indirect brw_indirect( GLuint addr_subnr, GLint offset )
+{
+ struct brw_indirect ptr;
+ ptr.addr_subnr = addr_subnr;
+ ptr.addr_offset = offset;
+ ptr.pad = 0;
+ return ptr;
+}
+
+/** Do two brw_regs refer to the same register? */
+static INLINE GLboolean
+brw_same_reg(struct brw_reg r1, struct brw_reg r2)
+{
+ return r1.file == r2.file && r1.nr == r2.nr;
+}
+
+static INLINE struct brw_instruction *current_insn( struct brw_compile *p)
+{
+ return &p->store[p->nr_insn];
+}
+
+void brw_pop_insn_state( struct brw_compile *p );
+void brw_push_insn_state( struct brw_compile *p );
+void brw_set_mask_control( struct brw_compile *p, GLuint value );
+void brw_set_saturate( struct brw_compile *p, GLuint value );
+void brw_set_access_mode( struct brw_compile *p, GLuint access_mode );
+void brw_set_compression_control( struct brw_compile *p, GLboolean control );
+void brw_set_predicate_control_flag_value( struct brw_compile *p, GLuint value );
+void brw_set_predicate_control( struct brw_compile *p, GLuint pc );
+void brw_set_conditionalmod( struct brw_compile *p, GLuint conditional );
+
+void brw_init_compile( struct brw_context *, struct brw_compile *p );
+
+enum pipe_error brw_get_program( struct brw_compile *p,
+ const GLuint **program,
+ GLuint *sz );
+
+
+/* Helpers for regular instructions:
+ */
+#define ALU1(OP) \
+struct brw_instruction *brw_##OP(struct brw_compile *p, \
+ struct brw_reg dest, \
+ struct brw_reg src0);
+
+#define ALU2(OP) \
+struct brw_instruction *brw_##OP(struct brw_compile *p, \
+ struct brw_reg dest, \
+ struct brw_reg src0, \
+ struct brw_reg src1);
+
+ALU1(MOV)
+ALU2(SEL)
+ALU1(NOT)
+ALU2(AND)
+ALU2(OR)
+ALU2(XOR)
+ALU2(SHR)
+ALU2(SHL)
+ALU2(RSR)
+ALU2(RSL)
+ALU2(ASR)
+ALU2(JMPI)
+ALU2(ADD)
+ALU2(MUL)
+ALU1(FRC)
+ALU1(RNDD)
+ALU1(RNDZ)
+ALU2(MAC)
+ALU2(MACH)
+ALU1(LZD)
+ALU2(DP4)
+ALU2(DPH)
+ALU2(DP3)
+ALU2(DP2)
+ALU2(LINE)
+
+#undef ALU1
+#undef ALU2
+
+
+
+/* Helpers for SEND instruction:
+ */
+void brw_urb_WRITE(struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint msg_reg_nr,
+ struct brw_reg src0,
+ GLboolean allocate,
+ GLboolean used,
+ GLuint msg_length,
+ GLuint response_length,
+ GLboolean eot,
+ GLboolean writes_complete,
+ GLuint offset,
+ GLuint swizzle);
+
+void brw_ff_sync(struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint msg_reg_nr,
+ struct brw_reg src0,
+ GLboolean allocate,
+ GLboolean used,
+ GLuint msg_length,
+ GLuint response_length,
+ GLboolean eot,
+ GLboolean writes_complete,
+ GLuint offset,
+ GLuint swizzle);
+
+void brw_fb_WRITE(struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint msg_reg_nr,
+ struct brw_reg src0,
+ GLuint binding_table_index,
+ GLuint msg_length,
+ GLuint response_length,
+ GLboolean eot);
+
+void brw_SAMPLE(struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint msg_reg_nr,
+ struct brw_reg src0,
+ GLuint binding_table_index,
+ GLuint sampler,
+ GLuint writemask,
+ GLuint msg_type,
+ GLuint response_length,
+ GLuint msg_length,
+ GLboolean eot,
+ GLuint header_present,
+ GLuint simd_mode);
+
+void brw_math_16( struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint function,
+ GLuint saturate,
+ GLuint msg_reg_nr,
+ struct brw_reg src,
+ GLuint precision );
+
+void brw_math( struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint function,
+ GLuint saturate,
+ GLuint msg_reg_nr,
+ struct brw_reg src,
+ GLuint data_type,
+ GLuint precision );
+
+void brw_dp_READ_16( struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint scratch_offset );
+
+void brw_dp_READ_4( struct brw_compile *p,
+ struct brw_reg dest,
+ GLboolean relAddr,
+ GLuint location,
+ GLuint bind_table_index );
+
+void brw_dp_READ_4_vs( struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint oword,
+ GLboolean relAddr,
+ struct brw_reg addrReg,
+ GLuint location,
+ GLuint bind_table_index );
+
+void brw_dp_WRITE_16( struct brw_compile *p,
+ struct brw_reg src,
+ GLuint scratch_offset );
+
+/* If/else/endif. Works by manipulating the execution flags on each
+ * channel.
+ */
+struct brw_instruction *brw_IF(struct brw_compile *p,
+ GLuint execute_size);
+
+struct brw_instruction *brw_ELSE(struct brw_compile *p,
+ struct brw_instruction *if_insn);
+
+void brw_ENDIF(struct brw_compile *p,
+ struct brw_instruction *if_or_else_insn);
+
+
+/* DO/WHILE loops:
+ */
+struct brw_instruction *brw_DO(struct brw_compile *p,
+ GLuint execute_size);
+
+struct brw_instruction *brw_WHILE(struct brw_compile *p,
+ struct brw_instruction *patch_insn);
+
+struct brw_instruction *brw_BREAK(struct brw_compile *p);
+struct brw_instruction *brw_CONT(struct brw_compile *p);
+/* Forward jumps:
+ */
+void brw_land_fwd_jump(struct brw_compile *p,
+ struct brw_instruction *jmp_insn);
+
+
+
+void brw_NOP(struct brw_compile *p);
+
+/* Special case: there is never a destination, execution size will be
+ * taken from src0:
+ */
+void brw_CMP(struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint conditional,
+ struct brw_reg src0,
+ struct brw_reg src1);
+
+void brw_print_reg( struct brw_reg reg );
+
+
+/***********************************************************************
+ * brw_eu_util.c:
+ */
+
+void brw_copy_indirect_to_indirect(struct brw_compile *p,
+ struct brw_indirect dst_ptr,
+ struct brw_indirect src_ptr,
+ GLuint count);
+
+void brw_copy_from_indirect(struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_indirect ptr,
+ GLuint count);
+
+void brw_copy4(struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg src,
+ GLuint count);
+
+void brw_copy8(struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg src,
+ GLuint count);
+
+void brw_math_invert( struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg src);
+
+void brw_set_src1( struct brw_instruction *insn,
+ struct brw_reg reg );
+#endif
diff --git a/src/gallium/drivers/i965/brw_eu_debug.c b/src/gallium/drivers/i965/brw_eu_debug.c
new file mode 100644
index 00000000000..5989f5a04ee
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_eu_debug.c
@@ -0,0 +1,94 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "util/u_debug.h"
+
+#include "brw_eu.h"
+
+void brw_print_reg( struct brw_reg hwreg )
+{
+ static const char *file[] = {
+ "arf",
+ "grf",
+ "msg",
+ "imm"
+ };
+
+ static const char *type[] = {
+ "ud",
+ "d",
+ "uw",
+ "w",
+ "ub",
+ "vf",
+ "hf",
+ "f"
+ };
+
+ debug_printf("%s%s",
+ hwreg.abs ? "abs/" : "",
+ hwreg.negate ? "-" : "");
+
+ if (hwreg.file == BRW_GENERAL_REGISTER_FILE &&
+ hwreg.nr % 2 == 0 &&
+ hwreg.subnr == 0 &&
+ hwreg.vstride == BRW_VERTICAL_STRIDE_8 &&
+ hwreg.width == BRW_WIDTH_8 &&
+ hwreg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
+ hwreg.type == BRW_REGISTER_TYPE_F) {
+ /* vector register */
+ debug_printf("vec%d", hwreg.nr);
+ }
+ else if (hwreg.file == BRW_GENERAL_REGISTER_FILE &&
+ hwreg.vstride == BRW_VERTICAL_STRIDE_0 &&
+ hwreg.width == BRW_WIDTH_1 &&
+ hwreg.hstride == BRW_HORIZONTAL_STRIDE_0 &&
+ hwreg.type == BRW_REGISTER_TYPE_F) {
+ /* "scalar" register */
+ debug_printf("scl%d.%d", hwreg.nr, hwreg.subnr / 4);
+ }
+ else if (hwreg.file == BRW_IMMEDIATE_VALUE) {
+ debug_printf("imm %f", hwreg.dw1.f);
+ }
+ else {
+ debug_printf("%s%d.%d<%d;%d,%d>:%s",
+ file[hwreg.file],
+ hwreg.nr,
+ hwreg.subnr / type_sz(hwreg.type),
+ hwreg.vstride ? (1<<(hwreg.vstride-1)) : 0,
+ 1<<hwreg.width,
+ hwreg.hstride ? (1<<(hwreg.hstride-1)) : 0,
+ type[hwreg.type]);
+ }
+}
+
+
+
diff --git a/src/gallium/drivers/i965/brw_eu_emit.c b/src/gallium/drivers/i965/brw_eu_emit.c
new file mode 100644
index 00000000000..4fe7b6acc16
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_eu_emit.c
@@ -0,0 +1,1433 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_eu.h"
+#include "brw_debug.h"
+#include "brw_disasm.h"
+
+
+
+
+/***********************************************************************
+ * Internal helper for constructing instructions
+ */
+
+static void guess_execution_size( struct brw_instruction *insn,
+ struct brw_reg reg )
+{
+ if (reg.width == BRW_WIDTH_8 &&
+ insn->header.compression_control == BRW_COMPRESSION_COMPRESSED)
+ insn->header.execution_size = BRW_EXECUTE_16;
+ else
+ insn->header.execution_size = reg.width; /* note - definitions are compatible */
+}
+
+
+static void brw_set_dest( struct brw_instruction *insn,
+ struct brw_reg dest )
+{
+ if (dest.type != BRW_ARCHITECTURE_REGISTER_FILE)
+ assert(dest.nr < 128);
+
+ insn->bits1.da1.dest_reg_file = dest.file;
+ insn->bits1.da1.dest_reg_type = dest.type;
+ insn->bits1.da1.dest_address_mode = dest.address_mode;
+
+ if (dest.address_mode == BRW_ADDRESS_DIRECT) {
+ insn->bits1.da1.dest_reg_nr = dest.nr;
+
+ if (insn->header.access_mode == BRW_ALIGN_1) {
+ insn->bits1.da1.dest_subreg_nr = dest.subnr;
+ if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
+ dest.hstride = BRW_HORIZONTAL_STRIDE_1;
+ insn->bits1.da1.dest_horiz_stride = dest.hstride;
+ }
+ else {
+ insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
+ insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
+ }
+ }
+ else {
+ insn->bits1.ia1.dest_subreg_nr = dest.subnr;
+
+ /* These are different sizes in align1 vs align16:
+ */
+ if (insn->header.access_mode == BRW_ALIGN_1) {
+ insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
+ if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
+ dest.hstride = BRW_HORIZONTAL_STRIDE_1;
+ insn->bits1.ia1.dest_horiz_stride = dest.hstride;
+ }
+ else {
+ insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
+ }
+ }
+
+ /* NEW: Set the execution size based on dest.width and
+ * insn->compression_control:
+ */
+ guess_execution_size(insn, dest);
+}
+
+static void brw_set_src0( struct brw_instruction *insn,
+ struct brw_reg reg )
+{
+ assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
+
+ if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
+ assert(reg.nr < 128);
+
+ insn->bits1.da1.src0_reg_file = reg.file;
+ insn->bits1.da1.src0_reg_type = reg.type;
+ insn->bits2.da1.src0_abs = reg.abs;
+ insn->bits2.da1.src0_negate = reg.negate;
+ insn->bits2.da1.src0_address_mode = reg.address_mode;
+
+ if (reg.file == BRW_IMMEDIATE_VALUE) {
+ insn->bits3.ud = reg.dw1.ud;
+
+ /* Required to set some fields in src1 as well:
+ */
+ insn->bits1.da1.src1_reg_file = 0; /* arf */
+ insn->bits1.da1.src1_reg_type = reg.type;
+ }
+ else
+ {
+ if (reg.address_mode == BRW_ADDRESS_DIRECT) {
+ if (insn->header.access_mode == BRW_ALIGN_1) {
+ insn->bits2.da1.src0_subreg_nr = reg.subnr;
+ insn->bits2.da1.src0_reg_nr = reg.nr;
+ }
+ else {
+ insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
+ insn->bits2.da16.src0_reg_nr = reg.nr;
+ }
+ }
+ else {
+ insn->bits2.ia1.src0_subreg_nr = reg.subnr;
+
+ if (insn->header.access_mode == BRW_ALIGN_1) {
+ insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
+ }
+ else {
+ insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
+ }
+ }
+
+ if (insn->header.access_mode == BRW_ALIGN_1) {
+ if (reg.width == BRW_WIDTH_1 &&
+ insn->header.execution_size == BRW_EXECUTE_1) {
+ insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
+ insn->bits2.da1.src0_width = BRW_WIDTH_1;
+ insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
+ }
+ else {
+ insn->bits2.da1.src0_horiz_stride = reg.hstride;
+ insn->bits2.da1.src0_width = reg.width;
+ insn->bits2.da1.src0_vert_stride = reg.vstride;
+ }
+ }
+ else {
+ insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
+ insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
+ insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
+ insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
+
+ /* This is an oddity of the fact we're using the same
+ * descriptions for registers in align_16 as align_1:
+ */
+ if (reg.vstride == BRW_VERTICAL_STRIDE_8)
+ insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
+ else
+ insn->bits2.da16.src0_vert_stride = reg.vstride;
+ }
+ }
+}
+
+
+void brw_set_src1( struct brw_instruction *insn,
+ struct brw_reg reg )
+{
+ assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
+
+ assert(reg.nr < 128);
+
+ insn->bits1.da1.src1_reg_file = reg.file;
+ insn->bits1.da1.src1_reg_type = reg.type;
+ insn->bits3.da1.src1_abs = reg.abs;
+ insn->bits3.da1.src1_negate = reg.negate;
+
+ /* Only src1 can be immediate in two-argument instructions.
+ */
+ assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
+
+ if (reg.file == BRW_IMMEDIATE_VALUE) {
+ insn->bits3.ud = reg.dw1.ud;
+ }
+ else {
+ /* This is a hardware restriction, which may or may not be lifted
+ * in the future:
+ */
+ assert (reg.address_mode == BRW_ADDRESS_DIRECT);
+ /*assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
+
+ if (insn->header.access_mode == BRW_ALIGN_1) {
+ insn->bits3.da1.src1_subreg_nr = reg.subnr;
+ insn->bits3.da1.src1_reg_nr = reg.nr;
+ }
+ else {
+ insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
+ insn->bits3.da16.src1_reg_nr = reg.nr;
+ }
+
+ if (insn->header.access_mode == BRW_ALIGN_1) {
+ if (reg.width == BRW_WIDTH_1 &&
+ insn->header.execution_size == BRW_EXECUTE_1) {
+ insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
+ insn->bits3.da1.src1_width = BRW_WIDTH_1;
+ insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
+ }
+ else {
+ insn->bits3.da1.src1_horiz_stride = reg.hstride;
+ insn->bits3.da1.src1_width = reg.width;
+ insn->bits3.da1.src1_vert_stride = reg.vstride;
+ }
+ }
+ else {
+ insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
+ insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
+ insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
+ insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
+
+ /* This is an oddity of the fact we're using the same
+ * descriptions for registers in align_16 as align_1:
+ */
+ if (reg.vstride == BRW_VERTICAL_STRIDE_8)
+ insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
+ else
+ insn->bits3.da16.src1_vert_stride = reg.vstride;
+ }
+ }
+}
+
+
+
+static void brw_set_math_message( struct brw_context *brw,
+ struct brw_instruction *insn,
+ GLuint msg_length,
+ GLuint response_length,
+ GLuint function,
+ GLuint integer_type,
+ GLboolean low_precision,
+ GLboolean saturate,
+ GLuint dataType )
+{
+ brw_set_src1(insn, brw_imm_d(0));
+
+ if (BRW_IS_IGDNG(brw)) {
+ insn->bits3.math_igdng.function = function;
+ insn->bits3.math_igdng.int_type = integer_type;
+ insn->bits3.math_igdng.precision = low_precision;
+ insn->bits3.math_igdng.saturate = saturate;
+ insn->bits3.math_igdng.data_type = dataType;
+ insn->bits3.math_igdng.snapshot = 0;
+ insn->bits3.math_igdng.header_present = 0;
+ insn->bits3.math_igdng.response_length = response_length;
+ insn->bits3.math_igdng.msg_length = msg_length;
+ insn->bits3.math_igdng.end_of_thread = 0;
+ insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_MATH;
+ insn->bits2.send_igdng.end_of_thread = 0;
+ } else {
+ insn->bits3.math.function = function;
+ insn->bits3.math.int_type = integer_type;
+ insn->bits3.math.precision = low_precision;
+ insn->bits3.math.saturate = saturate;
+ insn->bits3.math.data_type = dataType;
+ insn->bits3.math.response_length = response_length;
+ insn->bits3.math.msg_length = msg_length;
+ insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH;
+ insn->bits3.math.end_of_thread = 0;
+ }
+}
+
+
+static void brw_set_ff_sync_message( struct brw_context *brw,
+ struct brw_instruction *insn,
+ GLboolean allocate,
+ GLboolean used,
+ GLuint msg_length,
+ GLuint response_length,
+ GLboolean end_of_thread,
+ GLboolean complete,
+ GLuint offset,
+ GLuint swizzle_control )
+{
+ brw_set_src1(insn, brw_imm_d(0));
+
+ insn->bits3.urb_igdng.opcode = 1;
+ insn->bits3.urb_igdng.offset = offset;
+ insn->bits3.urb_igdng.swizzle_control = swizzle_control;
+ insn->bits3.urb_igdng.allocate = allocate;
+ insn->bits3.urb_igdng.used = used;
+ insn->bits3.urb_igdng.complete = complete;
+ insn->bits3.urb_igdng.header_present = 1;
+ insn->bits3.urb_igdng.response_length = response_length;
+ insn->bits3.urb_igdng.msg_length = msg_length;
+ insn->bits3.urb_igdng.end_of_thread = end_of_thread;
+ insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_URB;
+ insn->bits2.send_igdng.end_of_thread = end_of_thread;
+}
+
+static void brw_set_urb_message( struct brw_context *brw,
+ struct brw_instruction *insn,
+ GLboolean allocate,
+ GLboolean used,
+ GLuint msg_length,
+ GLuint response_length,
+ GLboolean end_of_thread,
+ GLboolean complete,
+ GLuint offset,
+ GLuint swizzle_control )
+{
+ brw_set_src1(insn, brw_imm_d(0));
+
+ if (BRW_IS_IGDNG(brw)) {
+ insn->bits3.urb_igdng.opcode = 0; /* ? */
+ insn->bits3.urb_igdng.offset = offset;
+ insn->bits3.urb_igdng.swizzle_control = swizzle_control;
+ insn->bits3.urb_igdng.allocate = allocate;
+ insn->bits3.urb_igdng.used = used; /* ? */
+ insn->bits3.urb_igdng.complete = complete;
+ insn->bits3.urb_igdng.header_present = 1;
+ insn->bits3.urb_igdng.response_length = response_length;
+ insn->bits3.urb_igdng.msg_length = msg_length;
+ insn->bits3.urb_igdng.end_of_thread = end_of_thread;
+ insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_URB;
+ insn->bits2.send_igdng.end_of_thread = end_of_thread;
+ } else {
+ insn->bits3.urb.opcode = 0; /* ? */
+ insn->bits3.urb.offset = offset;
+ insn->bits3.urb.swizzle_control = swizzle_control;
+ insn->bits3.urb.allocate = allocate;
+ insn->bits3.urb.used = used; /* ? */
+ insn->bits3.urb.complete = complete;
+ insn->bits3.urb.response_length = response_length;
+ insn->bits3.urb.msg_length = msg_length;
+ insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB;
+ insn->bits3.urb.end_of_thread = end_of_thread;
+ }
+}
+
+static void brw_set_dp_write_message( struct brw_context *brw,
+ struct brw_instruction *insn,
+ GLuint binding_table_index,
+ GLuint msg_control,
+ GLuint msg_type,
+ GLuint msg_length,
+ GLuint pixel_scoreboard_clear,
+ GLuint response_length,
+ GLuint end_of_thread )
+{
+ brw_set_src1(insn, brw_imm_d(0));
+
+ if (BRW_IS_IGDNG(brw)) {
+ insn->bits3.dp_write_igdng.binding_table_index = binding_table_index;
+ insn->bits3.dp_write_igdng.msg_control = msg_control;
+ insn->bits3.dp_write_igdng.pixel_scoreboard_clear = pixel_scoreboard_clear;
+ insn->bits3.dp_write_igdng.msg_type = msg_type;
+ insn->bits3.dp_write_igdng.send_commit_msg = 0;
+ insn->bits3.dp_write_igdng.header_present = 1;
+ insn->bits3.dp_write_igdng.response_length = response_length;
+ insn->bits3.dp_write_igdng.msg_length = msg_length;
+ insn->bits3.dp_write_igdng.end_of_thread = end_of_thread;
+ insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
+ insn->bits2.send_igdng.end_of_thread = end_of_thread;
+ } else {
+ insn->bits3.dp_write.binding_table_index = binding_table_index;
+ insn->bits3.dp_write.msg_control = msg_control;
+ insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear;
+ insn->bits3.dp_write.msg_type = msg_type;
+ insn->bits3.dp_write.send_commit_msg = 0;
+ insn->bits3.dp_write.response_length = response_length;
+ insn->bits3.dp_write.msg_length = msg_length;
+ insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
+ insn->bits3.dp_write.end_of_thread = end_of_thread;
+ }
+}
+
+static void brw_set_dp_read_message( struct brw_context *brw,
+ struct brw_instruction *insn,
+ GLuint binding_table_index,
+ GLuint msg_control,
+ GLuint msg_type,
+ GLuint target_cache,
+ GLuint msg_length,
+ GLuint response_length,
+ GLuint end_of_thread )
+{
+ brw_set_src1(insn, brw_imm_d(0));
+
+ if (BRW_IS_IGDNG(brw)) {
+ insn->bits3.dp_read_igdng.binding_table_index = binding_table_index;
+ insn->bits3.dp_read_igdng.msg_control = msg_control;
+ insn->bits3.dp_read_igdng.msg_type = msg_type;
+ insn->bits3.dp_read_igdng.target_cache = target_cache;
+ insn->bits3.dp_read_igdng.header_present = 1;
+ insn->bits3.dp_read_igdng.response_length = response_length;
+ insn->bits3.dp_read_igdng.msg_length = msg_length;
+ insn->bits3.dp_read_igdng.pad1 = 0;
+ insn->bits3.dp_read_igdng.end_of_thread = end_of_thread;
+ insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ;
+ insn->bits2.send_igdng.end_of_thread = end_of_thread;
+ } else {
+ insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
+ insn->bits3.dp_read.msg_control = msg_control; /*8:11*/
+ insn->bits3.dp_read.msg_type = msg_type; /*12:13*/
+ insn->bits3.dp_read.target_cache = target_cache; /*14:15*/
+ insn->bits3.dp_read.response_length = response_length; /*16:19*/
+ insn->bits3.dp_read.msg_length = msg_length; /*20:23*/
+ insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/
+ insn->bits3.dp_read.pad1 = 0; /*28:30*/
+ insn->bits3.dp_read.end_of_thread = end_of_thread; /*31*/
+ }
+}
+
+static void brw_set_sampler_message(struct brw_context *brw,
+ struct brw_instruction *insn,
+ GLuint binding_table_index,
+ GLuint sampler,
+ GLuint msg_type,
+ GLuint response_length,
+ GLuint msg_length,
+ GLboolean eot,
+ GLuint header_present,
+ GLuint simd_mode)
+{
+ assert(eot == 0);
+ brw_set_src1(insn, brw_imm_d(0));
+
+ if (BRW_IS_IGDNG(brw)) {
+ insn->bits3.sampler_igdng.binding_table_index = binding_table_index;
+ insn->bits3.sampler_igdng.sampler = sampler;
+ insn->bits3.sampler_igdng.msg_type = msg_type;
+ insn->bits3.sampler_igdng.simd_mode = simd_mode;
+ insn->bits3.sampler_igdng.header_present = header_present;
+ insn->bits3.sampler_igdng.response_length = response_length;
+ insn->bits3.sampler_igdng.msg_length = msg_length;
+ insn->bits3.sampler_igdng.end_of_thread = eot;
+ insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_SAMPLER;
+ insn->bits2.send_igdng.end_of_thread = eot;
+ } else if (BRW_IS_G4X(brw)) {
+ insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
+ insn->bits3.sampler_g4x.sampler = sampler;
+ insn->bits3.sampler_g4x.msg_type = msg_type;
+ insn->bits3.sampler_g4x.response_length = response_length;
+ insn->bits3.sampler_g4x.msg_length = msg_length;
+ insn->bits3.sampler_g4x.end_of_thread = eot;
+ insn->bits3.sampler_g4x.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
+ } else {
+ insn->bits3.sampler.binding_table_index = binding_table_index;
+ insn->bits3.sampler.sampler = sampler;
+ insn->bits3.sampler.msg_type = msg_type;
+ insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
+ insn->bits3.sampler.response_length = response_length;
+ insn->bits3.sampler.msg_length = msg_length;
+ insn->bits3.sampler.end_of_thread = eot;
+ insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
+ }
+}
+
+
+
+static struct brw_instruction *next_insn( struct brw_compile *p,
+ GLuint opcode )
+{
+ struct brw_instruction *insn;
+
+ if (0 && (BRW_DEBUG & DEBUG_DISASSEM))
+ {
+ if (p->nr_insn)
+ brw_disasm_insn(stderr, &p->store[p->nr_insn-1]);
+ }
+
+ assert(p->nr_insn + 1 < BRW_EU_MAX_INSN);
+
+ insn = &p->store[p->nr_insn++];
+ memcpy(insn, p->current, sizeof(*insn));
+
+ /* Reset this one-shot flag:
+ */
+
+ if (p->current->header.destreg__conditionalmod) {
+ p->current->header.destreg__conditionalmod = 0;
+ p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
+ }
+
+ insn->header.opcode = opcode;
+ return insn;
+}
+
+
+static struct brw_instruction *brw_alu1( struct brw_compile *p,
+ GLuint opcode,
+ struct brw_reg dest,
+ struct brw_reg src )
+{
+ struct brw_instruction *insn = next_insn(p, opcode);
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, src);
+ return insn;
+}
+
+static struct brw_instruction *brw_alu2(struct brw_compile *p,
+ GLuint opcode,
+ struct brw_reg dest,
+ struct brw_reg src0,
+ struct brw_reg src1 )
+{
+ struct brw_instruction *insn = next_insn(p, opcode);
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, src0);
+ brw_set_src1(insn, src1);
+ return insn;
+}
+
+
+/***********************************************************************
+ * Convenience routines.
+ */
+#define ALU1(OP) \
+struct brw_instruction *brw_##OP(struct brw_compile *p, \
+ struct brw_reg dest, \
+ struct brw_reg src0) \
+{ \
+ return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
+}
+
+#define ALU2(OP) \
+struct brw_instruction *brw_##OP(struct brw_compile *p, \
+ struct brw_reg dest, \
+ struct brw_reg src0, \
+ struct brw_reg src1) \
+{ \
+ return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
+}
+
+
+ALU1(MOV)
+ALU2(SEL)
+ALU1(NOT)
+ALU2(AND)
+ALU2(OR)
+ALU2(XOR)
+ALU2(SHR)
+ALU2(SHL)
+ALU2(RSR)
+ALU2(RSL)
+ALU2(ASR)
+ALU2(ADD)
+ALU2(MUL)
+ALU1(FRC)
+ALU1(RNDD)
+ALU1(RNDZ)
+ALU2(MAC)
+ALU2(MACH)
+ALU1(LZD)
+ALU2(DP4)
+ALU2(DPH)
+ALU2(DP3)
+ALU2(DP2)
+ALU2(LINE)
+
+
+
+
+void brw_NOP(struct brw_compile *p)
+{
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);
+ brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
+ brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
+ brw_set_src1(insn, brw_imm_ud(0x0));
+}
+
+
+
+
+
+/***********************************************************************
+ * Comparisons, if/else/endif
+ */
+
+struct brw_instruction *brw_JMPI(struct brw_compile *p,
+ struct brw_reg dest,
+ struct brw_reg src0,
+ struct brw_reg src1)
+{
+ struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
+
+ insn->header.execution_size = 1;
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.mask_control = BRW_MASK_DISABLE;
+
+ p->current->header.predicate_control = BRW_PREDICATE_NONE;
+
+ return insn;
+}
+
+/* EU takes the value from the flag register and pushes it onto some
+ * sort of a stack (presumably merging with any flag value already on
+ * the stack). Within an if block, the flags at the top of the stack
+ * control execution on each channel of the unit, eg. on each of the
+ * 16 pixel values in our wm programs.
+ *
+ * When the matching 'else' instruction is reached (presumably by
+ * countdown of the instruction count patched in by our ELSE/ENDIF
+ * functions), the relevent flags are inverted.
+ *
+ * When the matching 'endif' instruction is reached, the flags are
+ * popped off. If the stack is now empty, normal execution resumes.
+ *
+ * No attempt is made to deal with stack overflow (14 elements?).
+ */
+struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size)
+{
+ struct brw_instruction *insn;
+
+ if (p->single_program_flow) {
+ assert(execute_size == BRW_EXECUTE_1);
+
+ insn = next_insn(p, BRW_OPCODE_ADD);
+ insn->header.predicate_inverse = 1;
+ } else {
+ insn = next_insn(p, BRW_OPCODE_IF);
+ }
+
+ /* Override the defaults for this instruction:
+ */
+ brw_set_dest(insn, brw_ip_reg());
+ brw_set_src0(insn, brw_ip_reg());
+ brw_set_src1(insn, brw_imm_d(0x0));
+
+ insn->header.execution_size = execute_size;
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.predicate_control = BRW_PREDICATE_NORMAL;
+ insn->header.mask_control = BRW_MASK_ENABLE;
+ if (!p->single_program_flow)
+ insn->header.thread_control = BRW_THREAD_SWITCH;
+
+ p->current->header.predicate_control = BRW_PREDICATE_NONE;
+
+ return insn;
+}
+
+
+struct brw_instruction *brw_ELSE(struct brw_compile *p,
+ struct brw_instruction *if_insn)
+{
+ struct brw_instruction *insn;
+ GLuint br = 1;
+
+ if (BRW_IS_IGDNG(p->brw))
+ br = 2;
+
+ if (p->single_program_flow) {
+ insn = next_insn(p, BRW_OPCODE_ADD);
+ } else {
+ insn = next_insn(p, BRW_OPCODE_ELSE);
+ }
+
+ brw_set_dest(insn, brw_ip_reg());
+ brw_set_src0(insn, brw_ip_reg());
+ brw_set_src1(insn, brw_imm_d(0x0));
+
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.execution_size = if_insn->header.execution_size;
+ insn->header.mask_control = BRW_MASK_ENABLE;
+ if (!p->single_program_flow)
+ insn->header.thread_control = BRW_THREAD_SWITCH;
+
+ /* Patch the if instruction to point at this instruction.
+ */
+ if (p->single_program_flow) {
+ assert(if_insn->header.opcode == BRW_OPCODE_ADD);
+
+ if_insn->bits3.ud = (insn - if_insn + 1) * 16;
+ } else {
+ assert(if_insn->header.opcode == BRW_OPCODE_IF);
+
+ if_insn->bits3.if_else.jump_count = br * (insn - if_insn);
+ if_insn->bits3.if_else.pop_count = 0;
+ if_insn->bits3.if_else.pad0 = 0;
+ }
+
+ return insn;
+}
+
+void brw_ENDIF(struct brw_compile *p,
+ struct brw_instruction *patch_insn)
+{
+ GLuint br = 1;
+
+ if (BRW_IS_IGDNG(p->brw))
+ br = 2;
+
+ if (p->single_program_flow) {
+ /* In single program flow mode, there's no need to execute an ENDIF,
+ * since we don't need to do any stack operations, and if we're executing
+ * currently, we want to just continue executing.
+ */
+ struct brw_instruction *next = &p->store[p->nr_insn];
+
+ assert(patch_insn->header.opcode == BRW_OPCODE_ADD);
+
+ patch_insn->bits3.ud = (next - patch_insn) * 16;
+ } else {
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF);
+
+ brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
+ brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
+ brw_set_src1(insn, brw_imm_d(0x0));
+
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.execution_size = patch_insn->header.execution_size;
+ insn->header.mask_control = BRW_MASK_ENABLE;
+ insn->header.thread_control = BRW_THREAD_SWITCH;
+
+ assert(patch_insn->bits3.if_else.jump_count == 0);
+
+ /* Patch the if or else instructions to point at this or the next
+ * instruction respectively.
+ */
+ if (patch_insn->header.opcode == BRW_OPCODE_IF) {
+ /* Automagically turn it into an IFF:
+ */
+ patch_insn->header.opcode = BRW_OPCODE_IFF;
+ patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1);
+ patch_insn->bits3.if_else.pop_count = 0;
+ patch_insn->bits3.if_else.pad0 = 0;
+ } else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) {
+ patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1);
+ patch_insn->bits3.if_else.pop_count = 1;
+ patch_insn->bits3.if_else.pad0 = 0;
+ } else {
+ assert(0);
+ }
+
+ /* Also pop item off the stack in the endif instruction:
+ */
+ insn->bits3.if_else.jump_count = 0;
+ insn->bits3.if_else.pop_count = 1;
+ insn->bits3.if_else.pad0 = 0;
+ }
+}
+
+struct brw_instruction *brw_BREAK(struct brw_compile *p)
+{
+ struct brw_instruction *insn;
+ insn = next_insn(p, BRW_OPCODE_BREAK);
+ brw_set_dest(insn, brw_ip_reg());
+ brw_set_src0(insn, brw_ip_reg());
+ brw_set_src1(insn, brw_imm_d(0x0));
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.execution_size = BRW_EXECUTE_8;
+ /* insn->header.mask_control = BRW_MASK_DISABLE; */
+ insn->bits3.if_else.pad0 = 0;
+ return insn;
+}
+
+struct brw_instruction *brw_CONT(struct brw_compile *p)
+{
+ struct brw_instruction *insn;
+ insn = next_insn(p, BRW_OPCODE_CONTINUE);
+ brw_set_dest(insn, brw_ip_reg());
+ brw_set_src0(insn, brw_ip_reg());
+ brw_set_src1(insn, brw_imm_d(0x0));
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.execution_size = BRW_EXECUTE_8;
+ /* insn->header.mask_control = BRW_MASK_DISABLE; */
+ insn->bits3.if_else.pad0 = 0;
+ return insn;
+}
+
+/* DO/WHILE loop:
+ */
+struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
+{
+ if (p->single_program_flow) {
+ return &p->store[p->nr_insn];
+ } else {
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
+
+ /* Override the defaults for this instruction:
+ */
+ brw_set_dest(insn, brw_null_reg());
+ brw_set_src0(insn, brw_null_reg());
+ brw_set_src1(insn, brw_null_reg());
+
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.execution_size = execute_size;
+ insn->header.predicate_control = BRW_PREDICATE_NONE;
+ /* insn->header.mask_control = BRW_MASK_ENABLE; */
+ /* insn->header.mask_control = BRW_MASK_DISABLE; */
+
+ return insn;
+ }
+}
+
+
+
+struct brw_instruction *brw_WHILE(struct brw_compile *p,
+ struct brw_instruction *do_insn)
+{
+ struct brw_instruction *insn;
+ GLuint br = 1;
+
+ if (BRW_IS_IGDNG(p->brw))
+ br = 2;
+
+ if (p->single_program_flow)
+ insn = next_insn(p, BRW_OPCODE_ADD);
+ else
+ insn = next_insn(p, BRW_OPCODE_WHILE);
+
+ brw_set_dest(insn, brw_ip_reg());
+ brw_set_src0(insn, brw_ip_reg());
+ brw_set_src1(insn, brw_imm_d(0x0));
+
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+
+ if (p->single_program_flow) {
+ insn->header.execution_size = BRW_EXECUTE_1;
+
+ insn->bits3.d = (do_insn - insn) * 16;
+ } else {
+ insn->header.execution_size = do_insn->header.execution_size;
+
+ assert(do_insn->header.opcode == BRW_OPCODE_DO);
+ insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
+ insn->bits3.if_else.pop_count = 0;
+ insn->bits3.if_else.pad0 = 0;
+ }
+
+/* insn->header.mask_control = BRW_MASK_ENABLE; */
+
+ /* insn->header.mask_control = BRW_MASK_DISABLE; */
+ p->current->header.predicate_control = BRW_PREDICATE_NONE;
+ return insn;
+}
+
+
+/* FORWARD JUMPS:
+ */
+void brw_land_fwd_jump(struct brw_compile *p,
+ struct brw_instruction *jmp_insn)
+{
+ struct brw_instruction *landing = &p->store[p->nr_insn];
+ GLuint jmpi = 1;
+
+ if (BRW_IS_IGDNG(p->brw))
+ jmpi = 2;
+
+ assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
+ assert(jmp_insn->bits1.da1.src1_reg_file = BRW_IMMEDIATE_VALUE);
+
+ jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1);
+}
+
+
+
+/* To integrate with the above, it makes sense that the comparison
+ * instruction should populate the flag register. It might be simpler
+ * just to use the flag reg for most WM tasks?
+ */
+void brw_CMP(struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint conditional,
+ struct brw_reg src0,
+ struct brw_reg src1)
+{
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
+
+ insn->header.destreg__conditionalmod = conditional;
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, src0);
+ brw_set_src1(insn, src1);
+
+/* guess_execution_size(insn, src0); */
+
+
+ /* Make it so that future instructions will use the computed flag
+ * value until brw_set_predicate_control_flag_value() is called
+ * again.
+ */
+ if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
+ dest.nr == 0) {
+ p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
+ p->flag_value = 0xff;
+ }
+}
+
+
+
+/***********************************************************************
+ * Helpers for the various SEND message types:
+ */
+
+/** Extended math function, float[8].
+ */
+void brw_math( struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint function,
+ GLuint saturate,
+ GLuint msg_reg_nr,
+ struct brw_reg src,
+ GLuint data_type,
+ GLuint precision )
+{
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+ GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
+ GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
+
+ /* Example code doesn't set predicate_control for send
+ * instructions.
+ */
+ insn->header.predicate_control = 0;
+ insn->header.destreg__conditionalmod = msg_reg_nr;
+
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, src);
+ brw_set_math_message(p->brw,
+ insn,
+ msg_length, response_length,
+ function,
+ BRW_MATH_INTEGER_UNSIGNED,
+ precision,
+ saturate,
+ data_type);
+}
+
+/**
+ * Extended math function, float[16].
+ * Use 2 send instructions.
+ */
+void brw_math_16( struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint function,
+ GLuint saturate,
+ GLuint msg_reg_nr,
+ struct brw_reg src,
+ GLuint precision )
+{
+ struct brw_instruction *insn;
+ GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
+ GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
+
+ /* First instruction:
+ */
+ brw_push_insn_state(p);
+ brw_set_predicate_control_flag_value(p, 0xff);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+
+ insn = next_insn(p, BRW_OPCODE_SEND);
+ insn->header.destreg__conditionalmod = msg_reg_nr;
+
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, src);
+ brw_set_math_message(p->brw,
+ insn,
+ msg_length, response_length,
+ function,
+ BRW_MATH_INTEGER_UNSIGNED,
+ precision,
+ saturate,
+ BRW_MATH_DATA_VECTOR);
+
+ /* Second instruction:
+ */
+ insn = next_insn(p, BRW_OPCODE_SEND);
+ insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
+ insn->header.destreg__conditionalmod = msg_reg_nr+1;
+
+ brw_set_dest(insn, offset(dest,1));
+ brw_set_src0(insn, src);
+ brw_set_math_message(p->brw,
+ insn,
+ msg_length, response_length,
+ function,
+ BRW_MATH_INTEGER_UNSIGNED,
+ precision,
+ saturate,
+ BRW_MATH_DATA_VECTOR);
+
+ brw_pop_insn_state(p);
+}
+
+
+/**
+ * Write block of 16 dwords/floats to the data port Render Cache scratch buffer.
+ * Scratch offset should be a multiple of 64.
+ * Used for register spilling.
+ */
+void brw_dp_WRITE_16( struct brw_compile *p,
+ struct brw_reg src,
+ GLuint scratch_offset )
+{
+ GLuint msg_reg_nr = 1;
+ {
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+
+ /* set message header global offset field (reg 0, element 2) */
+ brw_MOV(p,
+ retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
+ brw_imm_d(scratch_offset));
+
+ brw_pop_insn_state(p);
+ }
+
+ {
+ GLuint msg_length = 3;
+ struct brw_reg dest = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+
+ insn->header.predicate_control = 0; /* XXX */
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.destreg__conditionalmod = msg_reg_nr;
+
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, src);
+
+ brw_set_dp_write_message(p->brw,
+ insn,
+ 255, /* binding table index (255=stateless) */
+ BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */
+ BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */
+ msg_length,
+ 0, /* pixel scoreboard */
+ 0, /* response_length */
+ 0); /* eot */
+ }
+}
+
+
+/**
+ * Read block of 16 dwords/floats from the data port Render Cache scratch buffer.
+ * Scratch offset should be a multiple of 64.
+ * Used for register spilling.
+ */
+void brw_dp_READ_16( struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint scratch_offset )
+{
+ GLuint msg_reg_nr = 1;
+ {
+ brw_push_insn_state(p);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+
+ /* set message header global offset field (reg 0, element 2) */
+ brw_MOV(p,
+ retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
+ brw_imm_d(scratch_offset));
+
+ brw_pop_insn_state(p);
+ }
+
+ {
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+
+ insn->header.predicate_control = 0; /* XXX */
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.destreg__conditionalmod = msg_reg_nr;
+
+ brw_set_dest(insn, dest); /* UW? */
+ brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW));
+
+ brw_set_dp_read_message(p->brw,
+ insn,
+ 255, /* binding table index (255=stateless) */
+ 3, /* msg_control (3 means 4 Owords) */
+ BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
+ 1, /* target cache (render/scratch) */
+ 1, /* msg_length */
+ 2, /* response_length */
+ 0); /* eot */
+ }
+}
+
+
+/**
+ * Read a float[4] vector from the data port Data Cache (const buffer).
+ * Location (in buffer) should be a multiple of 16.
+ * Used for fetching shader constants.
+ * If relAddr is true, we'll do an indirect fetch using the address register.
+ */
+void brw_dp_READ_4( struct brw_compile *p,
+ struct brw_reg dest,
+ GLboolean relAddr,
+ GLuint location,
+ GLuint bind_table_index )
+{
+ /* XXX: relAddr not implemented */
+ GLuint msg_reg_nr = 1;
+ {
+ struct brw_reg b;
+ brw_push_insn_state(p);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+
+ /* Setup MRF[1] with location/offset into const buffer */
+ b = brw_message_reg(msg_reg_nr);
+ b = retype(b, BRW_REGISTER_TYPE_UD);
+ /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
+ * when the docs say only dword[2] should be set. Hmmm. But it works.
+ */
+ brw_MOV(p, b, brw_imm_ud(location));
+ brw_pop_insn_state(p);
+ }
+
+ {
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+
+ insn->header.predicate_control = BRW_PREDICATE_NONE;
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.destreg__conditionalmod = msg_reg_nr;
+ insn->header.mask_control = BRW_MASK_DISABLE;
+
+ /* cast dest to a uword[8] vector */
+ dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
+
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, brw_null_reg());
+
+ brw_set_dp_read_message(p->brw,
+ insn,
+ bind_table_index,
+ 0, /* msg_control (0 means 1 Oword) */
+ BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
+ 0, /* source cache = data cache */
+ 1, /* msg_length */
+ 1, /* response_length (1 Oword) */
+ 0); /* eot */
+ }
+}
+
+
+/**
+ * Read float[4] constant(s) from VS constant buffer.
+ * For relative addressing, two float[4] constants will be read into 'dest'.
+ * Otherwise, one float[4] constant will be read into the lower half of 'dest'.
+ */
+void brw_dp_READ_4_vs(struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint oword,
+ GLboolean relAddr,
+ struct brw_reg addrReg,
+ GLuint location,
+ GLuint bind_table_index)
+{
+ GLuint msg_reg_nr = 1;
+
+ assert(oword < 2);
+ /*
+ printf("vs const read msg, location %u, msg_reg_nr %d\n",
+ location, msg_reg_nr);
+ */
+
+ /* Setup MRF[1] with location/offset into const buffer */
+ {
+ struct brw_reg b;
+
+ brw_push_insn_state(p);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ /*brw_set_access_mode(p, BRW_ALIGN_16);*/
+
+ /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
+ * when the docs say only dword[2] should be set. Hmmm. But it works.
+ */
+ b = brw_message_reg(msg_reg_nr);
+ b = retype(b, BRW_REGISTER_TYPE_UD);
+ /*b = get_element_ud(b, 2);*/
+ if (relAddr) {
+ brw_ADD(p, b, addrReg, brw_imm_ud(location));
+ }
+ else {
+ brw_MOV(p, b, brw_imm_ud(location));
+ }
+
+ brw_pop_insn_state(p);
+ }
+
+ {
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+
+ insn->header.predicate_control = BRW_PREDICATE_NONE;
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.destreg__conditionalmod = msg_reg_nr;
+ insn->header.mask_control = BRW_MASK_DISABLE;
+ /*insn->header.access_mode = BRW_ALIGN_16;*/
+
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, brw_null_reg());
+
+ brw_set_dp_read_message(p->brw,
+ insn,
+ bind_table_index,
+ oword, /* 0 = lower Oword, 1 = upper Oword */
+ BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
+ 0, /* source cache = data cache */
+ 1, /* msg_length */
+ 1, /* response_length (1 Oword) */
+ 0); /* eot */
+ }
+}
+
+
+
+void brw_fb_WRITE(struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint msg_reg_nr,
+ struct brw_reg src0,
+ GLuint binding_table_index,
+ GLuint msg_length,
+ GLuint response_length,
+ GLboolean eot)
+{
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+
+ insn->header.predicate_control = 0; /* XXX */
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.destreg__conditionalmod = msg_reg_nr;
+
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, src0);
+ brw_set_dp_write_message(p->brw,
+ insn,
+ binding_table_index,
+ BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE, /* msg_control */
+ BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE, /* msg_type */
+ msg_length,
+ 1, /* pixel scoreboard */
+ response_length,
+ eot);
+}
+
+
+/**
+ * Texture sample instruction.
+ * Note: the msg_type plus msg_length values determine exactly what kind
+ * of sampling operation is performed. See volume 4, page 161 of docs.
+ */
+void brw_SAMPLE(struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint msg_reg_nr,
+ struct brw_reg src0,
+ GLuint binding_table_index,
+ GLuint sampler,
+ GLuint writemask,
+ GLuint msg_type,
+ GLuint response_length,
+ GLuint msg_length,
+ GLboolean eot,
+ GLuint header_present,
+ GLuint simd_mode)
+{
+ GLboolean need_stall = 0;
+
+ if (writemask == 0) {
+ /*debug_printf("%s: zero writemask??\n", __FUNCTION__); */
+ return;
+ }
+
+ /* Hardware doesn't do destination dependency checking on send
+ * instructions properly. Add a workaround which generates the
+ * dependency by other means. In practice it seems like this bug
+ * only crops up for texture samples, and only where registers are
+ * written by the send and then written again later without being
+ * read in between. Luckily for us, we already track that
+ * information and use it to modify the writemask for the
+ * instruction, so that is a guide for whether a workaround is
+ * needed.
+ */
+ if (writemask != BRW_WRITEMASK_XYZW) {
+ GLuint dst_offset = 0;
+ GLuint i, newmask = 0, len = 0;
+
+ for (i = 0; i < 4; i++) {
+ if (writemask & (1<<i))
+ break;
+ dst_offset += 2;
+ }
+ for (; i < 4; i++) {
+ if (!(writemask & (1<<i)))
+ break;
+ newmask |= 1<<i;
+ len++;
+ }
+
+ if (newmask != writemask) {
+ need_stall = 1;
+ /* debug_printf("need stall %x %x\n", newmask , writemask); */
+ }
+ else {
+ struct brw_reg m1 = brw_message_reg(msg_reg_nr);
+
+ newmask = ~newmask & BRW_WRITEMASK_XYZW;
+
+ brw_push_insn_state(p);
+
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+
+ brw_MOV(p, m1, brw_vec8_grf(0,0));
+ brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12));
+
+ brw_pop_insn_state(p);
+
+ src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
+ dest = offset(dest, dst_offset);
+ response_length = len * 2;
+ }
+ }
+
+ {
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+
+ insn->header.predicate_control = 0; /* XXX */
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.destreg__conditionalmod = msg_reg_nr;
+
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, src0);
+ brw_set_sampler_message(p->brw, insn,
+ binding_table_index,
+ sampler,
+ msg_type,
+ response_length,
+ msg_length,
+ eot,
+ header_present,
+ simd_mode);
+ }
+
+ if (need_stall) {
+ struct brw_reg reg = vec8(offset(dest, response_length-1));
+
+ /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 }
+ */
+ brw_push_insn_state(p);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_MOV(p, reg, reg);
+ brw_pop_insn_state(p);
+ }
+
+}
+
+/* All these variables are pretty confusing - we might be better off
+ * using bitmasks and macros for this, in the old style. Or perhaps
+ * just having the caller instantiate the fields in dword3 itself.
+ */
+void brw_urb_WRITE(struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint msg_reg_nr,
+ struct brw_reg src0,
+ GLboolean allocate,
+ GLboolean used,
+ GLuint msg_length,
+ GLuint response_length,
+ GLboolean eot,
+ GLboolean writes_complete,
+ GLuint offset,
+ GLuint swizzle)
+{
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+
+ assert(msg_length < BRW_MAX_MRF);
+
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, src0);
+ brw_set_src1(insn, brw_imm_d(0));
+
+ insn->header.destreg__conditionalmod = msg_reg_nr;
+
+ brw_set_urb_message(p->brw,
+ insn,
+ allocate,
+ used,
+ msg_length,
+ response_length,
+ eot,
+ writes_complete,
+ offset,
+ swizzle);
+}
+
+void brw_ff_sync(struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint msg_reg_nr,
+ struct brw_reg src0,
+ GLboolean allocate,
+ GLboolean used,
+ GLuint msg_length,
+ GLuint response_length,
+ GLboolean eot,
+ GLboolean writes_complete,
+ GLuint offset,
+ GLuint swizzle)
+{
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+
+ assert(msg_length < 16);
+
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, src0);
+ brw_set_src1(insn, brw_imm_d(0));
+
+ insn->header.destreg__conditionalmod = msg_reg_nr;
+
+ brw_set_ff_sync_message(p->brw,
+ insn,
+ allocate,
+ used,
+ msg_length,
+ response_length,
+ eot,
+ writes_complete,
+ offset,
+ swizzle);
+}
diff --git a/src/gallium/drivers/i965/brw_eu_util.c b/src/gallium/drivers/i965/brw_eu_util.c
new file mode 100644
index 00000000000..5405cf17a4e
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_eu_util.c
@@ -0,0 +1,126 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_eu.h"
+
+
+void brw_math_invert( struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg src)
+{
+ brw_math( p,
+ dst,
+ BRW_MATH_FUNCTION_INV,
+ BRW_MATH_SATURATE_NONE,
+ 0,
+ src,
+ BRW_MATH_PRECISION_FULL,
+ BRW_MATH_DATA_VECTOR );
+}
+
+
+
+void brw_copy4(struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg src,
+ GLuint count)
+{
+ GLuint i;
+
+ dst = vec4(dst);
+ src = vec4(src);
+
+ for (i = 0; i < count; i++)
+ {
+ GLuint delta = i*32;
+ brw_MOV(p, byte_offset(dst, delta), byte_offset(src, delta));
+ brw_MOV(p, byte_offset(dst, delta+16), byte_offset(src, delta+16));
+ }
+}
+
+
+void brw_copy8(struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg src,
+ GLuint count)
+{
+ GLuint i;
+
+ dst = vec8(dst);
+ src = vec8(src);
+
+ for (i = 0; i < count; i++)
+ {
+ GLuint delta = i*32;
+ brw_MOV(p, byte_offset(dst, delta), byte_offset(src, delta));
+ }
+}
+
+
+void brw_copy_indirect_to_indirect(struct brw_compile *p,
+ struct brw_indirect dst_ptr,
+ struct brw_indirect src_ptr,
+ GLuint count)
+{
+ GLuint i;
+
+ for (i = 0; i < count; i++)
+ {
+ GLuint delta = i*32;
+ brw_MOV(p, deref_4f(dst_ptr, delta), deref_4f(src_ptr, delta));
+ brw_MOV(p, deref_4f(dst_ptr, delta+16), deref_4f(src_ptr, delta+16));
+ }
+}
+
+
+void brw_copy_from_indirect(struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_indirect ptr,
+ GLuint count)
+{
+ GLuint i;
+
+ dst = vec4(dst);
+
+ for (i = 0; i < count; i++)
+ {
+ GLuint delta = i*32;
+ brw_MOV(p, byte_offset(dst, delta), deref_4f(ptr, delta));
+ brw_MOV(p, byte_offset(dst, delta+16), deref_4f(ptr, delta+16));
+ }
+}
+
+
+
+
diff --git a/src/gallium/drivers/i965/brw_gs.c b/src/gallium/drivers/i965/brw_gs.c
new file mode 100644
index 00000000000..921b201bae2
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_gs.c
@@ -0,0 +1,216 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "brw_batchbuffer.h"
+
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_state.h"
+#include "brw_gs.h"
+
+
+
+static enum pipe_error compile_gs_prog( struct brw_context *brw,
+ struct brw_gs_prog_key *key,
+ struct brw_winsys_buffer **bo_out )
+{
+ struct brw_gs_compile c;
+ enum pipe_error ret;
+ const GLuint *program;
+ GLuint program_size;
+
+ memset(&c, 0, sizeof(c));
+
+ c.key = *key;
+ c.need_ff_sync = BRW_IS_IGDNG(brw);
+ /* Need to locate the two positions present in vertex + header.
+ * These are currently hardcoded:
+ */
+ c.nr_attrs = c.key.nr_attrs;
+
+ if (BRW_IS_IGDNG(brw))
+ c.nr_regs = (c.nr_attrs + 1) / 2 + 3; /* are vertices packed, or reg-aligned? */
+ else
+ c.nr_regs = (c.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */
+
+ c.nr_bytes = c.nr_regs * REG_SIZE;
+
+
+ /* Begin the compilation:
+ */
+ brw_init_compile(brw, &c.func);
+
+ c.func.single_program_flow = 1;
+
+ /* For some reason the thread is spawned with only 4 channels
+ * unmasked.
+ */
+ brw_set_mask_control(&c.func, BRW_MASK_DISABLE);
+
+
+ /* Note that primitives which don't require a GS program have
+ * already been weeded out by this stage:
+ */
+ switch (key->primitive) {
+ case PIPE_PRIM_QUADS:
+ brw_gs_quads( &c );
+ break;
+ case PIPE_PRIM_QUAD_STRIP:
+ brw_gs_quad_strip( &c );
+ break;
+ case PIPE_PRIM_LINE_LOOP:
+ brw_gs_lines( &c );
+ break;
+ case PIPE_PRIM_LINES:
+ if (key->hint_gs_always)
+ brw_gs_lines( &c );
+ else {
+ return PIPE_OK;
+ }
+ break;
+ case PIPE_PRIM_TRIANGLES:
+ if (key->hint_gs_always)
+ brw_gs_tris( &c );
+ else {
+ return PIPE_OK;
+ }
+ break;
+ case PIPE_PRIM_POINTS:
+ if (key->hint_gs_always)
+ brw_gs_points( &c );
+ else {
+ return PIPE_OK;
+ }
+ break;
+ default:
+ assert(0);
+ return PIPE_ERROR_BAD_INPUT;
+ }
+
+ /* get the program
+ */
+ ret = brw_get_program(&c.func, &program, &program_size);
+ if (ret)
+ return ret;
+
+ /* Upload
+ */
+ ret = brw_upload_cache( &brw->cache, BRW_GS_PROG,
+ &c.key, sizeof(c.key),
+ NULL, 0,
+ program, program_size,
+ &c.prog_data,
+ &brw->gs.prog_data,
+ bo_out );
+ if (ret)
+ return ret;
+
+ return PIPE_OK;
+}
+
+static const unsigned gs_prim[PIPE_PRIM_MAX] = {
+ PIPE_PRIM_POINTS,
+ PIPE_PRIM_LINES,
+ PIPE_PRIM_LINE_LOOP,
+ PIPE_PRIM_LINES,
+ PIPE_PRIM_TRIANGLES,
+ PIPE_PRIM_TRIANGLES,
+ PIPE_PRIM_TRIANGLES,
+ PIPE_PRIM_QUADS,
+ PIPE_PRIM_QUAD_STRIP,
+ PIPE_PRIM_TRIANGLES
+};
+
+static void populate_key( struct brw_context *brw,
+ struct brw_gs_prog_key *key )
+{
+ const struct brw_fs_signature *sig = &brw->curr.fragment_shader->signature;
+
+ memset(key, 0, sizeof(*key));
+
+ /* PIPE_NEW_FRAGMENT_SIGNATURE */
+ key->nr_attrs = sig->nr_inputs + 1;
+
+ /* BRW_NEW_PRIMITIVE */
+ key->primitive = gs_prim[brw->primitive];
+
+ key->hint_gs_always = 0; /* debug code? */
+
+ key->need_gs_prog = (key->hint_gs_always ||
+ brw->primitive == PIPE_PRIM_QUADS ||
+ brw->primitive == PIPE_PRIM_QUAD_STRIP ||
+ brw->primitive == PIPE_PRIM_LINE_LOOP);
+}
+
+/* Calculate interpolants for triangle and line rasterization.
+ */
+static int prepare_gs_prog(struct brw_context *brw)
+{
+ struct brw_gs_prog_key key;
+ enum pipe_error ret;
+
+ /* Populate the key:
+ */
+ populate_key(brw, &key);
+
+ if (brw->gs.prog_active != key.need_gs_prog) {
+ brw->state.dirty.cache |= CACHE_NEW_GS_PROG;
+ brw->gs.prog_active = key.need_gs_prog;
+ }
+
+ if (!brw->gs.prog_active)
+ return PIPE_OK;
+
+ if (brw_search_cache(&brw->cache, BRW_GS_PROG,
+ &key, sizeof(key),
+ NULL, 0,
+ &brw->gs.prog_data,
+ &brw->gs.prog_bo))
+ return PIPE_OK;
+
+ ret = compile_gs_prog( brw, &key, &brw->gs.prog_bo );
+ if (ret)
+ return ret;
+
+ return PIPE_OK;
+}
+
+
+const struct brw_tracked_state brw_gs_prog = {
+ .dirty = {
+ .mesa = PIPE_NEW_FRAGMENT_SIGNATURE,
+ .brw = BRW_NEW_PRIMITIVE,
+ .cache = 0,
+ },
+ .prepare = prepare_gs_prog
+};
diff --git a/src/gallium/drivers/i965/brw_gs.h b/src/gallium/drivers/i965/brw_gs.h
new file mode 100644
index 00000000000..6e616dcb875
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_gs.h
@@ -0,0 +1,76 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#ifndef BRW_GS_H
+#define BRW_GS_H
+
+
+#include "brw_context.h"
+#include "brw_eu.h"
+
+#define MAX_GS_VERTS (4)
+
+struct brw_gs_prog_key {
+ GLuint nr_attrs:8;
+ GLuint primitive:4;
+ GLuint hint_gs_always:1;
+ GLuint need_gs_prog:1;
+ GLuint pad:18;
+};
+
+struct brw_gs_compile {
+ struct brw_compile func;
+ struct brw_gs_prog_key key;
+ struct brw_gs_prog_data prog_data;
+
+ struct {
+ struct brw_reg R0;
+ struct brw_reg vertex[MAX_GS_VERTS];
+ } reg;
+
+ /* 3 different ways of expressing vertex size:
+ */
+ GLuint nr_attrs;
+ GLuint nr_regs;
+ GLuint nr_bytes;
+ GLboolean need_ff_sync;
+};
+
+#define ATTR_SIZE (4*4)
+
+void brw_gs_quads( struct brw_gs_compile *c );
+void brw_gs_quad_strip( struct brw_gs_compile *c );
+void brw_gs_tris( struct brw_gs_compile *c );
+void brw_gs_lines( struct brw_gs_compile *c );
+void brw_gs_points( struct brw_gs_compile *c );
+
+#endif
diff --git a/src/gallium/drivers/i965/brw_gs_emit.c b/src/gallium/drivers/i965/brw_gs_emit.c
new file mode 100644
index 00000000000..fd8e2accedd
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_gs_emit.c
@@ -0,0 +1,181 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "brw_batchbuffer.h"
+
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_gs.h"
+
+static void brw_gs_alloc_regs( struct brw_gs_compile *c,
+ GLuint nr_verts )
+{
+ GLuint i = 0,j;
+
+ /* Register usage is static, precompute here:
+ */
+ c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++;
+
+ /* Payload vertices plus space for more generated vertices:
+ */
+ for (j = 0; j < nr_verts; j++) {
+ c->reg.vertex[j] = brw_vec4_grf(i, 0);
+ i += c->nr_regs;
+ }
+
+ c->prog_data.urb_read_length = c->nr_regs;
+ c->prog_data.total_grf = i;
+}
+
+
+static void brw_gs_emit_vue(struct brw_gs_compile *c,
+ struct brw_reg vert,
+ GLboolean last,
+ GLuint header)
+{
+ struct brw_compile *p = &c->func;
+ GLboolean allocate = !last;
+
+ /* Overwrite PrimType and PrimStart in the message header, for
+ * each vertex in turn:
+ */
+ brw_MOV(p, get_element_ud(c->reg.R0, 2), brw_imm_ud(header));
+
+ /* Copy the vertex from vertn into m1..mN+1:
+ */
+ brw_copy8(p, brw_message_reg(1), vert, c->nr_regs);
+
+ /* Send each vertex as a seperate write to the urb. This is
+ * different to the concept in brw_sf_emit.c, where subsequent
+ * writes are used to build up a single urb entry. Each of these
+ * writes instantiates a seperate urb entry, and a new one must be
+ * allocated each time.
+ */
+ brw_urb_WRITE(p,
+ allocate ? c->reg.R0 : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
+ 0,
+ c->reg.R0,
+ allocate,
+ 1, /* used */
+ c->nr_regs + 1, /* msg length */
+ allocate ? 1 : 0, /* response length */
+ allocate ? 0 : 1, /* eot */
+ 1, /* writes_complete */
+ 0, /* urb offset */
+ BRW_URB_SWIZZLE_NONE);
+}
+
+static void brw_gs_ff_sync(struct brw_gs_compile *c, int num_prim)
+{
+ struct brw_compile *p = &c->func;
+ brw_MOV(p, get_element_ud(c->reg.R0, 1), brw_imm_ud(num_prim));
+ brw_ff_sync(p,
+ c->reg.R0,
+ 0,
+ c->reg.R0,
+ 1,
+ 1, /* used */
+ 1, /* msg length */
+ 1, /* response length */
+ 0, /* eot */
+ 1, /* write compelete */
+ 0, /* urb offset */
+ BRW_URB_SWIZZLE_NONE);
+}
+
+
+void brw_gs_quads( struct brw_gs_compile *c )
+{
+ brw_gs_alloc_regs(c, 4);
+
+ /* Use polygons for correct edgeflag behaviour. Note that vertex 3
+ * is the PV for quads, but vertex 0 for polygons:
+ */
+ if (c->need_ff_sync)
+ brw_gs_ff_sync(c, 1);
+ brw_gs_emit_vue(c, c->reg.vertex[3], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START));
+ brw_gs_emit_vue(c, c->reg.vertex[0], 0, (_3DPRIM_POLYGON << 2));
+ brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_POLYGON << 2));
+ brw_gs_emit_vue(c, c->reg.vertex[2], 1, ((_3DPRIM_POLYGON << 2) | R02_PRIM_END));
+}
+
+void brw_gs_quad_strip( struct brw_gs_compile *c )
+{
+ brw_gs_alloc_regs(c, 4);
+
+ if (c->need_ff_sync)
+ brw_gs_ff_sync(c, 1);
+ brw_gs_emit_vue(c, c->reg.vertex[2], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START));
+ brw_gs_emit_vue(c, c->reg.vertex[3], 0, (_3DPRIM_POLYGON << 2));
+ brw_gs_emit_vue(c, c->reg.vertex[0], 0, (_3DPRIM_POLYGON << 2));
+ brw_gs_emit_vue(c, c->reg.vertex[1], 1, ((_3DPRIM_POLYGON << 2) | R02_PRIM_END));
+}
+
+void brw_gs_tris( struct brw_gs_compile *c )
+{
+ brw_gs_alloc_regs(c, 3);
+
+ if (c->need_ff_sync)
+ brw_gs_ff_sync(c, 1);
+ brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_TRILIST << 2) | R02_PRIM_START));
+ brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_TRILIST << 2));
+ brw_gs_emit_vue(c, c->reg.vertex[2], 1, ((_3DPRIM_TRILIST << 2) | R02_PRIM_END));
+}
+
+void brw_gs_lines( struct brw_gs_compile *c )
+{
+ brw_gs_alloc_regs(c, 2);
+
+ if (c->need_ff_sync)
+ brw_gs_ff_sync(c, 1);
+ brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_LINESTRIP << 2) | R02_PRIM_START));
+ brw_gs_emit_vue(c, c->reg.vertex[1], 1, ((_3DPRIM_LINESTRIP << 2) | R02_PRIM_END));
+}
+
+void brw_gs_points( struct brw_gs_compile *c )
+{
+ brw_gs_alloc_regs(c, 1);
+
+ if (c->need_ff_sync)
+ brw_gs_ff_sync(c, 1);
+ brw_gs_emit_vue(c, c->reg.vertex[0], 1, ((_3DPRIM_POINTLIST << 2) | R02_PRIM_START | R02_PRIM_END));
+}
+
+
+
+
+
+
+
+
diff --git a/src/gallium/drivers/i965/brw_gs_state.c b/src/gallium/drivers/i965/brw_gs_state.c
new file mode 100644
index 00000000000..b64ec286cea
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_gs_state.c
@@ -0,0 +1,169 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "util/u_math.h"
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "brw_debug.h"
+
+struct brw_gs_unit_key {
+ unsigned int total_grf;
+ unsigned int urb_entry_read_length;
+
+ unsigned int curbe_offset;
+
+ unsigned int nr_urb_entries, urb_size;
+ GLboolean prog_active;
+};
+
+static void
+gs_unit_populate_key(struct brw_context *brw, struct brw_gs_unit_key *key)
+{
+ memset(key, 0, sizeof(*key));
+
+ /* CACHE_NEW_GS_PROG */
+ key->prog_active = brw->gs.prog_active;
+ if (key->prog_active) {
+ key->total_grf = brw->gs.prog_data->total_grf;
+ key->urb_entry_read_length = brw->gs.prog_data->urb_read_length;
+ } else {
+ key->total_grf = 1;
+ key->urb_entry_read_length = 1;
+ }
+
+ /* BRW_NEW_CURBE_OFFSETS */
+ key->curbe_offset = brw->curbe.clip_start;
+
+ /* BRW_NEW_URB_FENCE */
+ key->nr_urb_entries = brw->urb.nr_gs_entries;
+ key->urb_size = brw->urb.vsize;
+}
+
+static enum pipe_error
+gs_unit_create_from_key(struct brw_context *brw,
+ struct brw_gs_unit_key *key,
+ struct brw_winsys_reloc *reloc,
+ unsigned nr_reloc,
+ struct brw_winsys_buffer **bo_out)
+{
+ struct brw_gs_unit_state gs;
+ enum pipe_error ret;
+
+
+ memset(&gs, 0, sizeof(gs));
+
+ /* reloc */
+ gs.thread0.grf_reg_count = align(key->total_grf, 16) / 16 - 1;
+ gs.thread0.kernel_start_pointer = 0;
+
+ gs.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
+ gs.thread1.single_program_flow = 1;
+
+ gs.thread3.dispatch_grf_start_reg = 1;
+ gs.thread3.const_urb_entry_read_offset = 0;
+ gs.thread3.const_urb_entry_read_length = 0;
+ gs.thread3.urb_entry_read_offset = 0;
+ gs.thread3.urb_entry_read_length = key->urb_entry_read_length;
+
+ gs.thread4.nr_urb_entries = key->nr_urb_entries;
+ gs.thread4.urb_entry_allocation_size = key->urb_size - 1;
+
+ if (key->nr_urb_entries >= 8)
+ gs.thread4.max_threads = 1;
+ else
+ gs.thread4.max_threads = 0;
+
+ if (BRW_IS_IGDNG(brw))
+ gs.thread4.rendering_enable = 1;
+
+ if (BRW_DEBUG & DEBUG_STATS)
+ gs.thread4.stats_enable = 1;
+
+ ret = brw_upload_cache(&brw->cache, BRW_GS_UNIT,
+ key, sizeof(*key),
+ reloc, nr_reloc,
+ &gs, sizeof(gs),
+ NULL, NULL,
+ bo_out);
+ if (ret)
+ return ret;
+
+ return PIPE_OK;
+}
+
+static enum pipe_error prepare_gs_unit(struct brw_context *brw)
+{
+ struct brw_gs_unit_key key;
+ enum pipe_error ret;
+ struct brw_winsys_reloc reloc[1];
+ unsigned nr_reloc = 0;
+ unsigned grf_reg_count;
+
+ gs_unit_populate_key(brw, &key);
+
+ grf_reg_count = (align(key.total_grf, 16) / 16 - 1);
+
+ /* GS program relocation */
+ if (key.prog_active) {
+ make_reloc(&reloc[nr_reloc++],
+ BRW_USAGE_STATE,
+ grf_reg_count << 1,
+ offsetof(struct brw_gs_unit_state, thread0),
+ brw->gs.prog_bo);
+ }
+
+ if (brw_search_cache(&brw->cache, BRW_GS_UNIT,
+ &key, sizeof(key),
+ reloc, nr_reloc,
+ NULL,
+ &brw->gs.state_bo))
+ return PIPE_OK;
+
+ ret = gs_unit_create_from_key(brw, &key,
+ reloc, nr_reloc,
+ &brw->gs.state_bo);
+ if (ret)
+ return ret;
+
+ return PIPE_OK;
+}
+
+const struct brw_tracked_state brw_gs_unit = {
+ .dirty = {
+ .mesa = 0,
+ .brw = (BRW_NEW_CURBE_OFFSETS |
+ BRW_NEW_URB_FENCE),
+ .cache = CACHE_NEW_GS_PROG
+ },
+ .prepare = prepare_gs_unit,
+};
diff --git a/src/gallium/drivers/i965/brw_misc_state.c b/src/gallium/drivers/i965/brw_misc_state.c
new file mode 100644
index 00000000000..e4b24229db3
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_misc_state.c
@@ -0,0 +1,513 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+
+#include "brw_debug.h"
+#include "brw_batchbuffer.h"
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "brw_screen.h"
+#include "brw_pipe_rast.h"
+
+
+
+
+
+/***********************************************************************
+ * Blend color
+ */
+
+static int upload_blend_constant_color(struct brw_context *brw)
+{
+ BRW_CACHED_BATCH_STRUCT(brw, &brw->curr.bcc);
+ return 0;
+}
+
+
+const struct brw_tracked_state brw_blend_constant_color = {
+ .dirty = {
+ .mesa = PIPE_NEW_BLEND_COLOR,
+ .brw = 0,
+ .cache = 0
+ },
+ .emit = upload_blend_constant_color
+};
+
+/***********************************************************************
+ * Drawing rectangle - framebuffer dimensions
+ */
+static int upload_drawing_rect(struct brw_context *brw)
+{
+ BEGIN_BATCH(4, NO_LOOP_CLIPRECTS);
+ OUT_BATCH(_3DSTATE_DRAWRECT_INFO_I965);
+ OUT_BATCH(0);
+ OUT_BATCH(((brw->curr.fb.width - 1) & 0xffff) |
+ ((brw->curr.fb.height - 1) << 16));
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
+ return 0;
+}
+
+const struct brw_tracked_state brw_drawing_rect = {
+ .dirty = {
+ .mesa = PIPE_NEW_FRAMEBUFFER_DIMENSIONS,
+ .brw = 0,
+ .cache = 0
+ },
+ .emit = upload_drawing_rect
+};
+
+
+/***********************************************************************
+ * Binding table pointers
+ */
+
+static int prepare_binding_table_pointers(struct brw_context *brw)
+{
+ brw_add_validated_bo(brw, brw->vs.bind_bo);
+ brw_add_validated_bo(brw, brw->wm.bind_bo);
+ return 0;
+}
+
+/**
+ * Upload the binding table pointers, which point each stage's array of surface
+ * state pointers.
+ *
+ * The binding table pointers are relative to the surface state base address,
+ * which is 0.
+ */
+static int upload_binding_table_pointers(struct brw_context *brw)
+{
+ BEGIN_BATCH(6, IGNORE_CLIPRECTS);
+ OUT_BATCH(CMD_BINDING_TABLE_PTRS << 16 | (6 - 2));
+ if (brw->vs.bind_bo != NULL)
+ OUT_RELOC(brw->vs.bind_bo,
+ BRW_USAGE_SAMPLER,
+ 0); /* vs */
+ else
+ OUT_BATCH(0);
+ OUT_BATCH(0); /* gs */
+ OUT_BATCH(0); /* clip */
+ OUT_BATCH(0); /* sf */
+ OUT_RELOC(brw->wm.bind_bo,
+ BRW_USAGE_SAMPLER,
+ 0); /* wm/ps */
+ ADVANCE_BATCH();
+ return 0;
+}
+
+const struct brw_tracked_state brw_binding_table_pointers = {
+ .dirty = {
+ .mesa = 0,
+ .brw = BRW_NEW_BATCH,
+ .cache = CACHE_NEW_SURF_BIND,
+ },
+ .prepare = prepare_binding_table_pointers,
+ .emit = upload_binding_table_pointers,
+};
+
+
+/**********************************************************************
+ * Upload pointers to the per-stage state.
+ *
+ * The state pointers in this packet are all relative to the general state
+ * base address set by CMD_STATE_BASE_ADDRESS, which is 0.
+ */
+static int upload_pipelined_state_pointers(struct brw_context *brw )
+{
+ BEGIN_BATCH(7, IGNORE_CLIPRECTS);
+ OUT_BATCH(CMD_PIPELINED_STATE_POINTERS << 16 | (7 - 2));
+ OUT_RELOC(brw->vs.state_bo,
+ BRW_USAGE_STATE,
+ 0);
+ if (brw->gs.prog_active)
+ OUT_RELOC(brw->gs.state_bo,
+ BRW_USAGE_STATE,
+ 1);
+ else
+ OUT_BATCH(0);
+ OUT_RELOC(brw->clip.state_bo,
+ BRW_USAGE_STATE,
+ 1);
+ OUT_RELOC(brw->sf.state_bo,
+ BRW_USAGE_STATE,
+ 0);
+ OUT_RELOC(brw->wm.state_bo,
+ BRW_USAGE_STATE,
+ 0);
+ OUT_RELOC(brw->cc.state_bo,
+ BRW_USAGE_STATE,
+ 0);
+ ADVANCE_BATCH();
+
+ brw->state.dirty.brw |= BRW_NEW_PSP;
+ return 0;
+}
+
+
+static int prepare_psp_urb_cbs(struct brw_context *brw)
+{
+ brw_add_validated_bo(brw, brw->vs.state_bo);
+ brw_add_validated_bo(brw, brw->gs.state_bo);
+ brw_add_validated_bo(brw, brw->clip.state_bo);
+ brw_add_validated_bo(brw, brw->sf.state_bo);
+ brw_add_validated_bo(brw, brw->wm.state_bo);
+ brw_add_validated_bo(brw, brw->cc.state_bo);
+ return 0;
+}
+
+static int upload_psp_urb_cbs(struct brw_context *brw )
+{
+ int ret;
+
+ ret = upload_pipelined_state_pointers(brw);
+ if (ret)
+ return ret;
+
+ ret = brw_upload_urb_fence(brw);
+ if (ret)
+ return ret;
+
+ ret = brw_upload_cs_urb_state(brw);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+const struct brw_tracked_state brw_psp_urb_cbs = {
+ .dirty = {
+ .mesa = 0,
+ .brw = BRW_NEW_URB_FENCE | BRW_NEW_BATCH,
+ .cache = (CACHE_NEW_VS_UNIT |
+ CACHE_NEW_GS_UNIT |
+ CACHE_NEW_GS_PROG |
+ CACHE_NEW_CLIP_UNIT |
+ CACHE_NEW_SF_UNIT |
+ CACHE_NEW_WM_UNIT |
+ CACHE_NEW_CC_UNIT)
+ },
+ .prepare = prepare_psp_urb_cbs,
+ .emit = upload_psp_urb_cbs,
+};
+
+
+/***********************************************************************
+ * Depth buffer
+ */
+
+static int prepare_depthbuffer(struct brw_context *brw)
+{
+ struct pipe_surface *zsbuf = brw->curr.fb.zsbuf;
+
+ if (zsbuf)
+ brw_add_validated_bo(brw, brw_surface(zsbuf)->bo);
+
+ return 0;
+}
+
+static int emit_depthbuffer(struct brw_context *brw)
+{
+ struct pipe_surface *surface = brw->curr.fb.zsbuf;
+ unsigned int len = (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ? 6 : 5;
+
+ if (surface == NULL) {
+ BEGIN_BATCH(len, IGNORE_CLIPRECTS);
+ OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (len - 2));
+ OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) |
+ (BRW_SURFACE_NULL << 29));
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+
+ if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw))
+ OUT_BATCH(0);
+
+ ADVANCE_BATCH();
+ } else {
+ struct brw_winsys_buffer *bo;
+ unsigned int format;
+ unsigned int pitch;
+ unsigned int cpp;
+
+ switch (surface->format) {
+ case PIPE_FORMAT_Z16_UNORM:
+ format = BRW_DEPTHFORMAT_D16_UNORM;
+ cpp = 2;
+ break;
+ case PIPE_FORMAT_X8Z24_UNORM:
+ case PIPE_FORMAT_S8Z24_UNORM:
+ format = BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
+ cpp = 4;
+ break;
+ case PIPE_FORMAT_Z32_FLOAT:
+ format = BRW_DEPTHFORMAT_D32_FLOAT;
+ cpp = 4;
+ break;
+ default:
+ assert(0);
+ return PIPE_ERROR_BAD_INPUT;
+ }
+
+ bo = brw_surface(surface)->bo;
+ pitch = brw_surface(surface)->pitch;
+
+ BEGIN_BATCH(len, IGNORE_CLIPRECTS);
+ OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (len - 2));
+ OUT_BATCH(((pitch * cpp) - 1) |
+ (format << 18) |
+ (BRW_TILEWALK_YMAJOR << 26) |
+ ((surface->layout != PIPE_SURFACE_LAYOUT_LINEAR) << 27) |
+ (BRW_SURFACE_2D << 29));
+ OUT_RELOC(bo,
+ BRW_USAGE_DEPTH_BUFFER,
+ surface->offset);
+ OUT_BATCH((BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1) |
+ ((pitch - 1) << 6) |
+ ((surface->height - 1) << 19));
+ OUT_BATCH(0);
+
+ if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw))
+ OUT_BATCH(0);
+
+ ADVANCE_BATCH();
+ }
+
+ return 0;
+}
+
+const struct brw_tracked_state brw_depthbuffer = {
+ .dirty = {
+ .mesa = PIPE_NEW_DEPTH_BUFFER,
+ .brw = BRW_NEW_BATCH,
+ .cache = 0,
+ },
+ .prepare = prepare_depthbuffer,
+ .emit = emit_depthbuffer,
+};
+
+
+
+/***********************************************************************
+ * Polygon stipple packet
+ */
+
+static int upload_polygon_stipple(struct brw_context *brw)
+{
+ BRW_CACHED_BATCH_STRUCT(brw, &brw->curr.bps);
+ return 0;
+}
+
+const struct brw_tracked_state brw_polygon_stipple = {
+ .dirty = {
+ .mesa = PIPE_NEW_POLYGON_STIPPLE,
+ .brw = 0,
+ .cache = 0
+ },
+ .emit = upload_polygon_stipple
+};
+
+
+/***********************************************************************
+ * Line stipple packet
+ */
+
+static int upload_line_stipple(struct brw_context *brw)
+{
+ const struct brw_line_stipple *bls = &brw->curr.rast->bls;
+ if (bls->header.opcode) {
+ BRW_CACHED_BATCH_STRUCT(brw, bls);
+ }
+ return 0;
+}
+
+const struct brw_tracked_state brw_line_stipple = {
+ .dirty = {
+ .mesa = PIPE_NEW_RAST,
+ .brw = 0,
+ .cache = 0
+ },
+ .emit = upload_line_stipple
+};
+
+
+/***********************************************************************
+ * Misc invarient state packets
+ */
+
+static int upload_invarient_state( struct brw_context *brw )
+{
+ {
+ /* 0x61040000 Pipeline Select */
+ /* PipelineSelect : 0 */
+ struct brw_pipeline_select ps;
+
+ memset(&ps, 0, sizeof(ps));
+ if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw))
+ ps.header.opcode = CMD_PIPELINE_SELECT_GM45;
+ else
+ ps.header.opcode = CMD_PIPELINE_SELECT_965;
+ ps.header.pipeline_select = 0;
+ BRW_BATCH_STRUCT(brw, &ps);
+ }
+
+ {
+ struct brw_global_depth_offset_clamp gdo;
+ memset(&gdo, 0, sizeof(gdo));
+
+ /* Disable depth offset clamping.
+ */
+ gdo.header.opcode = CMD_GLOBAL_DEPTH_OFFSET_CLAMP;
+ gdo.header.length = sizeof(gdo)/4 - 2;
+ gdo.depth_offset_clamp = 0.0;
+
+ BRW_BATCH_STRUCT(brw, &gdo);
+ }
+
+
+ /* 0x61020000 State Instruction Pointer */
+ {
+ struct brw_system_instruction_pointer sip;
+ memset(&sip, 0, sizeof(sip));
+
+ sip.header.opcode = CMD_STATE_INSN_POINTER;
+ sip.header.length = 0;
+ sip.bits0.pad = 0;
+ sip.bits0.system_instruction_pointer = 0;
+ BRW_BATCH_STRUCT(brw, &sip);
+ }
+
+ /* VF Statistics */
+ {
+ struct brw_vf_statistics vfs;
+ memset(&vfs, 0, sizeof(vfs));
+
+ if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw))
+ vfs.opcode = CMD_VF_STATISTICS_GM45;
+ else
+ vfs.opcode = CMD_VF_STATISTICS_965;
+
+ if (BRW_DEBUG & DEBUG_STATS)
+ vfs.statistics_enable = 1;
+
+ BRW_BATCH_STRUCT(brw, &vfs);
+ }
+
+ if (!BRW_IS_965(brw))
+ {
+ struct brw_aa_line_parameters balp;
+
+ /* use legacy aa line coverage computation */
+ memset(&balp, 0, sizeof(balp));
+ balp.header.opcode = CMD_AA_LINE_PARAMETERS;
+ balp.header.length = sizeof(balp) / 4 - 2;
+
+ BRW_BATCH_STRUCT(brw, &balp);
+ }
+
+ {
+ struct brw_polygon_stipple_offset bpso;
+
+ /* This is invarient state in gallium:
+ */
+ memset(&bpso, 0, sizeof(bpso));
+ bpso.header.opcode = CMD_POLY_STIPPLE_OFFSET;
+ bpso.header.length = sizeof(bpso)/4-2;
+ bpso.bits0.y_offset = 0;
+ bpso.bits0.x_offset = 0;
+
+ BRW_BATCH_STRUCT(brw, &bpso);
+ }
+
+ return 0;
+}
+
+const struct brw_tracked_state brw_invarient_state = {
+ .dirty = {
+ .mesa = 0,
+ .brw = BRW_NEW_CONTEXT,
+ .cache = 0
+ },
+ .emit = upload_invarient_state
+};
+
+
+/***********************************************************************
+ * State base address
+ */
+
+/**
+ * Define the base addresses which some state is referenced from.
+ *
+ * This allows us to avoid having to emit relocations in many places for
+ * cached state, and instead emit pointers inside of large, mostly-static
+ * state pools. This comes at the expense of memory, and more expensive cache
+ * misses.
+ */
+static int upload_state_base_address( struct brw_context *brw )
+{
+ /* Output the structure (brw_state_base_address) directly to the
+ * batchbuffer, so we can emit relocations inline.
+ */
+ if (BRW_IS_IGDNG(brw)) {
+ BEGIN_BATCH(8, IGNORE_CLIPRECTS);
+ OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (8 - 2));
+ OUT_BATCH(1); /* General state base address */
+ OUT_BATCH(1); /* Surface state base address */
+ OUT_BATCH(1); /* Indirect object base address */
+ OUT_BATCH(1); /* Instruction base address */
+ OUT_BATCH(1); /* General state upper bound */
+ OUT_BATCH(1); /* Indirect object upper bound */
+ OUT_BATCH(1); /* Instruction access upper bound */
+ ADVANCE_BATCH();
+ } else {
+ BEGIN_BATCH(6, IGNORE_CLIPRECTS);
+ OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (6 - 2));
+ OUT_BATCH(1); /* General state base address */
+ OUT_BATCH(1); /* Surface state base address */
+ OUT_BATCH(1); /* Indirect object base address */
+ OUT_BATCH(1); /* General state upper bound */
+ OUT_BATCH(1); /* Indirect object upper bound */
+ ADVANCE_BATCH();
+ }
+ return 0;
+}
+
+const struct brw_tracked_state brw_state_base_address = {
+ .dirty = {
+ .mesa = 0,
+ .brw = BRW_NEW_CONTEXT,
+ .cache = 0,
+ },
+ .emit = upload_state_base_address
+};
diff --git a/src/gallium/drivers/i965/brw_pipe_blend.c b/src/gallium/drivers/i965/brw_pipe_blend.c
new file mode 100644
index 00000000000..b759a910b63
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_pipe_blend.c
@@ -0,0 +1,208 @@
+
+#include "util/u_memory.h"
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_debug.h"
+
+static int translate_logicop(unsigned logicop)
+{
+ switch (logicop) {
+ case PIPE_LOGICOP_CLEAR:
+ return BRW_LOGICOPFUNCTION_CLEAR;
+ case PIPE_LOGICOP_AND:
+ return BRW_LOGICOPFUNCTION_AND;
+ case PIPE_LOGICOP_AND_REVERSE:
+ return BRW_LOGICOPFUNCTION_AND_REVERSE;
+ case PIPE_LOGICOP_COPY:
+ return BRW_LOGICOPFUNCTION_COPY;
+ case PIPE_LOGICOP_COPY_INVERTED:
+ return BRW_LOGICOPFUNCTION_COPY_INVERTED;
+ case PIPE_LOGICOP_AND_INVERTED:
+ return BRW_LOGICOPFUNCTION_AND_INVERTED;
+ case PIPE_LOGICOP_NOOP:
+ return BRW_LOGICOPFUNCTION_NOOP;
+ case PIPE_LOGICOP_XOR:
+ return BRW_LOGICOPFUNCTION_XOR;
+ case PIPE_LOGICOP_OR:
+ return BRW_LOGICOPFUNCTION_OR;
+ case PIPE_LOGICOP_OR_INVERTED:
+ return BRW_LOGICOPFUNCTION_OR_INVERTED;
+ case PIPE_LOGICOP_NOR:
+ return BRW_LOGICOPFUNCTION_NOR;
+ case PIPE_LOGICOP_EQUIV:
+ return BRW_LOGICOPFUNCTION_EQUIV;
+ case PIPE_LOGICOP_INVERT:
+ return BRW_LOGICOPFUNCTION_INVERT;
+ case PIPE_LOGICOP_OR_REVERSE:
+ return BRW_LOGICOPFUNCTION_OR_REVERSE;
+ case PIPE_LOGICOP_NAND:
+ return BRW_LOGICOPFUNCTION_NAND;
+ case PIPE_LOGICOP_SET:
+ return BRW_LOGICOPFUNCTION_SET;
+ default:
+ assert(0);
+ return BRW_LOGICOPFUNCTION_SET;
+ }
+}
+
+
+static unsigned translate_blend_equation( unsigned mode )
+{
+ switch (mode) {
+ case PIPE_BLEND_ADD:
+ return BRW_BLENDFUNCTION_ADD;
+ case PIPE_BLEND_MIN:
+ return BRW_BLENDFUNCTION_MIN;
+ case PIPE_BLEND_MAX:
+ return BRW_BLENDFUNCTION_MAX;
+ case PIPE_BLEND_SUBTRACT:
+ return BRW_BLENDFUNCTION_SUBTRACT;
+ case PIPE_BLEND_REVERSE_SUBTRACT:
+ return BRW_BLENDFUNCTION_REVERSE_SUBTRACT;
+ default:
+ assert(0);
+ return BRW_BLENDFUNCTION_ADD;
+ }
+}
+
+static unsigned translate_blend_factor( unsigned factor )
+{
+ switch(factor) {
+ case PIPE_BLENDFACTOR_ZERO:
+ return BRW_BLENDFACTOR_ZERO;
+ case PIPE_BLENDFACTOR_SRC_ALPHA:
+ return BRW_BLENDFACTOR_SRC_ALPHA;
+ case PIPE_BLENDFACTOR_ONE:
+ return BRW_BLENDFACTOR_ONE;
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ return BRW_BLENDFACTOR_SRC_COLOR;
+ case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+ return BRW_BLENDFACTOR_INV_SRC_COLOR;
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ return BRW_BLENDFACTOR_DST_COLOR;
+ case PIPE_BLENDFACTOR_INV_DST_COLOR:
+ return BRW_BLENDFACTOR_INV_DST_COLOR;
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+ return BRW_BLENDFACTOR_INV_SRC_ALPHA;
+ case PIPE_BLENDFACTOR_DST_ALPHA:
+ return BRW_BLENDFACTOR_DST_ALPHA;
+ case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+ return BRW_BLENDFACTOR_INV_DST_ALPHA;
+ case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+ return BRW_BLENDFACTOR_SRC_ALPHA_SATURATE;
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ return BRW_BLENDFACTOR_CONST_COLOR;
+ case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+ return BRW_BLENDFACTOR_INV_CONST_COLOR;
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
+ return BRW_BLENDFACTOR_CONST_ALPHA;
+ case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+ return BRW_BLENDFACTOR_INV_CONST_ALPHA;
+ default:
+ assert(0);
+ return BRW_BLENDFACTOR_ZERO;
+ }
+}
+
+static void *brw_create_blend_state( struct pipe_context *pipe,
+ const struct pipe_blend_state *templ )
+{
+ struct brw_blend_state *blend = CALLOC_STRUCT(brw_blend_state);
+ if (blend == NULL)
+ return NULL;
+
+ if (templ->logicop_enable) {
+ blend->cc2.logicop_enable = 1;
+ blend->cc5.logicop_func = translate_logicop(templ->logicop_func);
+ }
+ else if (templ->blend_enable) {
+ blend->cc6.dest_blend_factor = translate_blend_factor(templ->rgb_dst_factor);
+ blend->cc6.src_blend_factor = translate_blend_factor(templ->rgb_src_factor);
+ blend->cc6.blend_function = translate_blend_equation(templ->rgb_func);
+
+ blend->cc5.ia_dest_blend_factor = translate_blend_factor(templ->alpha_dst_factor);
+ blend->cc5.ia_src_blend_factor = translate_blend_factor(templ->alpha_src_factor);
+ blend->cc5.ia_blend_function = translate_blend_equation(templ->alpha_func);
+
+ blend->cc3.blend_enable = 1;
+ blend->cc3.ia_blend_enable =
+ (blend->cc6.dest_blend_factor != blend->cc5.ia_dest_blend_factor ||
+ blend->cc6.src_blend_factor != blend->cc5.ia_src_blend_factor ||
+ blend->cc6.blend_function != blend->cc5.ia_blend_function);
+
+ /* Per-surface blend enables, currently just follow global
+ * state:
+ */
+ blend->ss0.color_blend = 1;
+ }
+
+ blend->cc5.dither_enable = templ->dither;
+
+ if (BRW_DEBUG & DEBUG_STATS)
+ blend->cc5.statistics_enable = 1;
+
+ /* Per-surface color mask -- just follow global state:
+ */
+ blend->ss0.writedisable_red = (templ->colormask & PIPE_MASK_R) ? 0 : 1;
+ blend->ss0.writedisable_green = (templ->colormask & PIPE_MASK_G) ? 0 : 1;
+ blend->ss0.writedisable_blue = (templ->colormask & PIPE_MASK_B) ? 0 : 1;
+ blend->ss0.writedisable_alpha = (templ->colormask & PIPE_MASK_A) ? 0 : 1;
+
+ return (void *)blend;
+}
+
+static void brw_bind_blend_state(struct pipe_context *pipe,
+ void *cso)
+{
+ struct brw_context *brw = brw_context(pipe);
+ brw->curr.blend = (const struct brw_blend_state *)cso;
+ brw->state.dirty.mesa |= PIPE_NEW_BLEND;
+}
+
+static void brw_delete_blend_state(struct pipe_context *pipe,
+ void *cso)
+{
+ struct brw_context *brw = brw_context(pipe);
+ assert((const void *)cso != (const void *)brw->curr.blend);
+ FREE(cso);
+}
+
+
+static void brw_set_blend_color(struct pipe_context *pipe,
+ const struct pipe_blend_color *blend_color)
+{
+ struct brw_context *brw = brw_context(pipe);
+ struct brw_blend_constant_color *bcc = &brw->curr.bcc;
+
+ bcc->blend_constant_color[0] = blend_color->color[0];
+ bcc->blend_constant_color[1] = blend_color->color[1];
+ bcc->blend_constant_color[2] = blend_color->color[2];
+ bcc->blend_constant_color[3] = blend_color->color[3];
+
+ brw->state.dirty.mesa |= PIPE_NEW_BLEND_COLOR;
+}
+
+
+void brw_pipe_blend_init( struct brw_context *brw )
+{
+ brw->base.set_blend_color = brw_set_blend_color;
+ brw->base.create_blend_state = brw_create_blend_state;
+ brw->base.bind_blend_state = brw_bind_blend_state;
+ brw->base.delete_blend_state = brw_delete_blend_state;
+
+ {
+ struct brw_blend_constant_color *bcc = &brw->curr.bcc;
+
+ memset(bcc, 0, sizeof(*bcc));
+ bcc->header.opcode = CMD_BLEND_CONSTANT_COLOR;
+ bcc->header.length = sizeof(*bcc)/4-2;
+ }
+
+}
+
+void brw_pipe_blend_cleanup( struct brw_context *brw )
+{
+}
diff --git a/src/gallium/drivers/i965/brw_pipe_clear.c b/src/gallium/drivers/i965/brw_pipe_clear.c
new file mode 100644
index 00000000000..211be881789
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_pipe_clear.c
@@ -0,0 +1,218 @@
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "util/u_pack_color.h"
+
+#include "pipe/p_state.h"
+
+#include "brw_batchbuffer.h"
+#include "brw_screen.h"
+#include "brw_context.h"
+
+#define MASK16 0xffff
+#define MASK24 0xffffff
+
+
+/**
+ * Use blitting to clear the renderbuffers named by 'flags'.
+ * Note: we can't use the ctx->DrawBuffer->_ColorDrawBufferIndexes field
+ * since that might include software renderbuffers or renderbuffers
+ * which we're clearing with triangles.
+ * \param mask bitmask of BUFFER_BIT_* values indicating buffers to clear
+ */
+static enum pipe_error
+try_clear( struct brw_context *brw,
+ struct brw_surface *surface,
+ unsigned value )
+{
+ uint32_t BR13, CMD;
+ int x1 = 0;
+ int y1 = 0;
+ int x2 = surface->base.width;
+ int y2 = surface->base.height;
+ int pitch = surface->pitch;
+ int cpp = surface->cpp;
+
+ if (x2 == 0 || y2 == 0)
+ return 0;
+
+ debug_printf("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n",
+ __FUNCTION__,
+ (void *)surface->bo, pitch * cpp,
+ surface->base.offset,
+ x1, y1, x2 - x1, y2 - y1);
+
+ BR13 = 0xf0 << 16;
+ CMD = XY_COLOR_BLT_CMD | XY_BLT_WRITE_RGB | XY_BLT_WRITE_ALPHA;
+
+ /* Setup the blit command */
+ if (cpp == 4) {
+ BR13 |= BR13_8888;
+ CMD |= XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
+ }
+ else {
+ assert(cpp == 2);
+ BR13 |= BR13_565;
+ }
+
+ /* XXX: nasty hack for clearing depth buffers
+ */
+ if (surface->tiling == BRW_TILING_Y) {
+ x2 = pitch;
+ }
+
+ if (surface->tiling == BRW_TILING_X) {
+ CMD |= XY_DST_TILED;
+ pitch /= 4;
+ }
+
+ BR13 |= (pitch * cpp);
+
+ BEGIN_BATCH(6, 0);
+ OUT_BATCH(CMD);
+ OUT_BATCH(BR13);
+ OUT_BATCH((y1 << 16) | x1);
+ OUT_BATCH((y2 << 16) | x2);
+ OUT_RELOC(surface->bo,
+ BRW_USAGE_BLIT_DEST,
+ surface->base.offset);
+ OUT_BATCH(value);
+ ADVANCE_BATCH();
+
+ return 0;
+}
+
+
+
+
+static void color_clear(struct brw_context *brw,
+ struct brw_surface *bsurface,
+ const float *rgba )
+{
+ enum pipe_error ret;
+ unsigned value;
+
+ util_pack_color( rgba, bsurface->base.format, &value );
+
+ if (bsurface->cpp == 2)
+ value |= value << 16;
+
+ ret = try_clear( brw, bsurface, value );
+
+ if (ret != 0) {
+ brw_context_flush( brw );
+ ret = try_clear( brw, bsurface, value );
+ assert( ret == 0 );
+ }
+}
+
+static void zstencil_clear(struct brw_context *brw,
+ struct brw_surface *bsurface,
+ double depth,
+ unsigned stencil )
+{
+ enum pipe_error ret;
+ unsigned value;
+
+ switch (bsurface->base.format) {
+ case PIPE_FORMAT_X8Z24_UNORM:
+ case PIPE_FORMAT_S8Z24_UNORM:
+ value = ((unsigned)(depth * MASK24) & MASK24);
+ break;
+ case PIPE_FORMAT_Z16_UNORM:
+ value = ((unsigned)(depth * MASK16) & MASK16);
+ break;
+ default:
+ assert(0);
+ return;
+ }
+
+ switch (bsurface->base.format) {
+ case PIPE_FORMAT_X8Z24_UNORM:
+ case PIPE_FORMAT_S8Z24_UNORM:
+ value = value | (stencil << 24);
+ break;
+
+ case PIPE_FORMAT_Z16_UNORM:
+ value = value | (value << 16);
+ break;
+
+ default:
+ break;
+ }
+
+ ret = try_clear( brw, bsurface, value );
+
+ if (ret != 0) {
+ brw_context_flush( brw );
+ ret = try_clear( brw, bsurface, value );
+ assert( ret == 0 );
+ }
+}
+
+
+
+/**
+ * Clear the given surface to the specified value.
+ * No masking, no scissor (clear entire buffer).
+ */
+static void brw_clear(struct pipe_context *pipe,
+ unsigned buffers,
+ const float *rgba,
+ double depth,
+ unsigned stencil)
+{
+ struct brw_context *brw = brw_context( pipe );
+ int i;
+
+ if (buffers & PIPE_CLEAR_COLOR) {
+ for (i = 0; i < brw->curr.fb.nr_cbufs; i++) {
+ color_clear( brw,
+ brw_surface(brw->curr.fb.cbufs[i]),
+ rgba );
+ }
+ }
+
+ if (buffers & PIPE_CLEAR_DEPTHSTENCIL) {
+ if (brw->curr.fb.zsbuf) {
+ zstencil_clear( brw,
+ brw_surface(brw->curr.fb.zsbuf),
+ depth, stencil );
+ }
+ }
+}
+
+
+void brw_pipe_clear_init( struct brw_context *brw )
+{
+ brw->base.clear = brw_clear;
+}
+
+
+void brw_pipe_clear_cleanup( struct brw_context *brw )
+{
+}
diff --git a/src/gallium/drivers/i965/brw_pipe_depth.c b/src/gallium/drivers/i965/brw_pipe_depth.c
new file mode 100644
index 00000000000..e010d76e0d3
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_pipe_depth.c
@@ -0,0 +1,172 @@
+
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+#include "brw_context.h"
+#include "brw_defines.h"
+
+/* XXX: Fixme - include this to get IZ_ defines
+ */
+#include "brw_wm.h"
+
+static unsigned brw_translate_compare_func(unsigned func)
+{
+ switch (func) {
+ case PIPE_FUNC_NEVER:
+ return BRW_COMPAREFUNCTION_NEVER;
+ case PIPE_FUNC_LESS:
+ return BRW_COMPAREFUNCTION_LESS;
+ case PIPE_FUNC_LEQUAL:
+ return BRW_COMPAREFUNCTION_LEQUAL;
+ case PIPE_FUNC_GREATER:
+ return BRW_COMPAREFUNCTION_GREATER;
+ case PIPE_FUNC_GEQUAL:
+ return BRW_COMPAREFUNCTION_GEQUAL;
+ case PIPE_FUNC_NOTEQUAL:
+ return BRW_COMPAREFUNCTION_NOTEQUAL;
+ case PIPE_FUNC_EQUAL:
+ return BRW_COMPAREFUNCTION_EQUAL;
+ case PIPE_FUNC_ALWAYS:
+ return BRW_COMPAREFUNCTION_ALWAYS;
+ default:
+ assert(0);
+ return BRW_COMPAREFUNCTION_ALWAYS;
+ }
+}
+
+static unsigned translate_stencil_op(unsigned op)
+{
+ switch (op) {
+ case PIPE_STENCIL_OP_KEEP:
+ return BRW_STENCILOP_KEEP;
+ case PIPE_STENCIL_OP_ZERO:
+ return BRW_STENCILOP_ZERO;
+ case PIPE_STENCIL_OP_REPLACE:
+ return BRW_STENCILOP_REPLACE;
+ case PIPE_STENCIL_OP_INCR:
+ return BRW_STENCILOP_INCRSAT;
+ case PIPE_STENCIL_OP_DECR:
+ return BRW_STENCILOP_DECRSAT;
+ case PIPE_STENCIL_OP_INCR_WRAP:
+ return BRW_STENCILOP_INCR;
+ case PIPE_STENCIL_OP_DECR_WRAP:
+ return BRW_STENCILOP_DECR;
+ case PIPE_STENCIL_OP_INVERT:
+ return BRW_STENCILOP_INVERT;
+ default:
+ assert(0);
+ return BRW_STENCILOP_ZERO;
+ }
+}
+
+static void create_bcc_state( struct brw_depth_stencil_state *zstencil,
+ const struct pipe_depth_stencil_alpha_state *templ )
+{
+ if (templ->stencil[0].enabled) {
+ zstencil->cc0.stencil_enable = 1;
+ zstencil->cc0.stencil_func =
+ brw_translate_compare_func(templ->stencil[0].func);
+ zstencil->cc0.stencil_fail_op =
+ translate_stencil_op(templ->stencil[0].fail_op);
+ zstencil->cc0.stencil_pass_depth_fail_op =
+ translate_stencil_op(templ->stencil[0].zfail_op);
+ zstencil->cc0.stencil_pass_depth_pass_op =
+ translate_stencil_op(templ->stencil[0].zpass_op);
+ zstencil->cc1.stencil_ref = templ->stencil[0].ref_value;
+ zstencil->cc1.stencil_write_mask = templ->stencil[0].writemask;
+ zstencil->cc1.stencil_test_mask = templ->stencil[0].valuemask;
+
+ if (templ->stencil[1].enabled) {
+ zstencil->cc0.bf_stencil_enable = 1;
+ zstencil->cc0.bf_stencil_func =
+ brw_translate_compare_func(templ->stencil[1].func);
+ zstencil->cc0.bf_stencil_fail_op =
+ translate_stencil_op(templ->stencil[1].fail_op);
+ zstencil->cc0.bf_stencil_pass_depth_fail_op =
+ translate_stencil_op(templ->stencil[1].zfail_op);
+ zstencil->cc0.bf_stencil_pass_depth_pass_op =
+ translate_stencil_op(templ->stencil[1].zpass_op);
+ zstencil->cc1.bf_stencil_ref = templ->stencil[1].ref_value;
+ zstencil->cc2.bf_stencil_write_mask = templ->stencil[1].writemask;
+ zstencil->cc2.bf_stencil_test_mask = templ->stencil[1].valuemask;
+ }
+
+ zstencil->cc0.stencil_write_enable = (zstencil->cc1.stencil_write_mask ||
+ zstencil->cc2.bf_stencil_write_mask);
+ }
+
+
+ if (templ->alpha.enabled) {
+ zstencil->cc3.alpha_test = 1;
+ zstencil->cc3.alpha_test_func = brw_translate_compare_func(templ->alpha.func);
+ zstencil->cc3.alpha_test_format = BRW_ALPHATEST_FORMAT_UNORM8;
+ zstencil->cc7.alpha_ref.ub[0] = float_to_ubyte(templ->alpha.ref_value);
+ }
+
+ if (templ->depth.enabled) {
+ zstencil->cc2.depth_test = 1;
+ zstencil->cc2.depth_test_function = brw_translate_compare_func(templ->depth.func);
+ zstencil->cc2.depth_write_enable = templ->depth.writemask;
+ }
+}
+
+static void create_wm_iz_state( struct brw_depth_stencil_state *zstencil )
+{
+ if (zstencil->cc3.alpha_test)
+ zstencil->iz_lookup |= IZ_PS_KILL_ALPHATEST_BIT;
+
+ if (zstencil->cc2.depth_test)
+ zstencil->iz_lookup |= IZ_DEPTH_TEST_ENABLE_BIT;
+
+ if (zstencil->cc2.depth_write_enable)
+ zstencil->iz_lookup |= IZ_DEPTH_WRITE_ENABLE_BIT;
+
+ if (zstencil->cc0.stencil_enable)
+ zstencil->iz_lookup |= IZ_STENCIL_TEST_ENABLE_BIT;
+
+ if (zstencil->cc0.stencil_write_enable)
+ zstencil->iz_lookup |= IZ_STENCIL_WRITE_ENABLE_BIT;
+
+}
+
+
+static void *
+brw_create_depth_stencil_state( struct pipe_context *pipe,
+ const struct pipe_depth_stencil_alpha_state *templ )
+{
+ struct brw_depth_stencil_state *zstencil = CALLOC_STRUCT(brw_depth_stencil_state);
+
+ create_bcc_state( zstencil, templ );
+ create_wm_iz_state( zstencil );
+
+ return (void *)zstencil;
+}
+
+
+static void brw_bind_depth_stencil_state(struct pipe_context *pipe,
+ void *cso)
+{
+ struct brw_context *brw = brw_context(pipe);
+ brw->curr.zstencil = (const struct brw_depth_stencil_state *)cso;
+ brw->state.dirty.mesa |= PIPE_NEW_DEPTH_STENCIL_ALPHA;
+}
+
+static void brw_delete_depth_stencil_state(struct pipe_context *pipe,
+ void *cso)
+{
+ struct brw_context *brw = brw_context(pipe);
+ assert((const void *)cso != (const void *)brw->curr.zstencil);
+ FREE(cso);
+}
+
+
+void brw_pipe_depth_stencil_init( struct brw_context *brw )
+{
+ brw->base.create_depth_stencil_alpha_state = brw_create_depth_stencil_state;
+ brw->base.bind_depth_stencil_alpha_state = brw_bind_depth_stencil_state;
+ brw->base.delete_depth_stencil_alpha_state = brw_delete_depth_stencil_state;
+}
+
+void brw_pipe_depth_stencil_cleanup( struct brw_context *brw )
+{
+}
diff --git a/src/gallium/drivers/i965/brw_pipe_fb.c b/src/gallium/drivers/i965/brw_pipe_fb.c
new file mode 100644
index 00000000000..6b03094f502
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_pipe_fb.c
@@ -0,0 +1,77 @@
+#include "util/u_math.h"
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+
+#include "brw_context.h"
+
+/**
+ * called from intelDrawBuffer()
+ */
+static void brw_set_framebuffer_state( struct pipe_context *pipe,
+ const struct pipe_framebuffer_state *fb )
+{
+ struct brw_context *brw = brw_context(pipe);
+ unsigned i;
+
+ /* Dimensions:
+ */
+ if (brw->curr.fb.width != fb->width ||
+ brw->curr.fb.height != fb->height) {
+ brw->curr.fb.width = fb->width;
+ brw->curr.fb.height = fb->height;
+ brw->state.dirty.mesa |= PIPE_NEW_FRAMEBUFFER_DIMENSIONS;
+ }
+
+ /* Z/Stencil
+ */
+ if (brw->curr.fb.zsbuf != fb->zsbuf) {
+ pipe_surface_reference(&brw->curr.fb.zsbuf, fb->zsbuf);
+ brw->state.dirty.mesa |= PIPE_NEW_DEPTH_BUFFER;
+ }
+
+ /* Color buffers:
+ */
+ for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
+ if (brw->curr.fb.cbufs[i] != fb->cbufs[i]) {
+ brw->state.dirty.mesa |= PIPE_NEW_COLOR_BUFFERS;
+ pipe_surface_reference(&brw->curr.fb.cbufs[i], fb->cbufs[i]);
+ }
+ }
+
+ if (brw->curr.fb.nr_cbufs != fb->nr_cbufs) {
+ brw->curr.fb.nr_cbufs = MIN2(BRW_MAX_DRAW_BUFFERS, fb->nr_cbufs);
+ brw->state.dirty.mesa |= PIPE_NEW_NR_CBUFS;
+ }
+}
+
+
+static void brw_set_viewport_state( struct pipe_context *pipe,
+ const struct pipe_viewport_state *viewport )
+{
+ struct brw_context *brw = brw_context(pipe);
+
+ brw->curr.viewport = *viewport;
+ brw->curr.ccv.min_depth = 0.0; /* XXX: near */
+ brw->curr.ccv.max_depth = 1.0; /* XXX: far */
+
+ brw->state.dirty.mesa |= PIPE_NEW_VIEWPORT;
+}
+
+
+void brw_pipe_framebuffer_init( struct brw_context *brw )
+{
+ brw->base.set_framebuffer_state = brw_set_framebuffer_state;
+ brw->base.set_viewport_state = brw_set_viewport_state;
+}
+
+void brw_pipe_framebuffer_cleanup( struct brw_context *brw )
+{
+ struct pipe_framebuffer_state *fb = &brw->curr.fb;
+ int i;
+
+ for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
+ pipe_surface_reference(&fb->cbufs[i], NULL);
+ }
+
+ pipe_surface_reference(&fb->zsbuf, NULL);
+}
diff --git a/src/gallium/drivers/i965/brw_pipe_flush.c b/src/gallium/drivers/i965/brw_pipe_flush.c
new file mode 100644
index 00000000000..fdc4814b221
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_pipe_flush.c
@@ -0,0 +1,83 @@
+
+#include "util/u_upload_mgr.h"
+
+#include "brw_context.h"
+#include "brw_screen.h"
+#include "brw_batchbuffer.h"
+
+
+
+/* All batchbuffer flushes must go through this function.
+ */
+void brw_context_flush( struct brw_context *brw )
+{
+ /*
+ *
+ */
+ brw_emit_query_end(brw);
+
+ /* Move to the end of the current upload buffer so that we'll force choosing
+ * a new buffer next time.
+ */
+ u_upload_flush( brw->vb.upload_vertex );
+ u_upload_flush( brw->vb.upload_index );
+
+ _brw_batchbuffer_flush( brw->batch, __FILE__, __LINE__ );
+
+ /* Mark all context state as needing to be re-emitted.
+ * This is probably not as severe as on 915, since almost all of our state
+ * is just in referenced buffers.
+ */
+ brw->state.dirty.brw |= BRW_NEW_CONTEXT;
+ brw->state.dirty.mesa |= ~0;
+ brw->state.dirty.brw |= ~0;
+ brw->state.dirty.cache |= ~0;
+
+ brw->curbe.need_new_bo = GL_TRUE;
+}
+
+static void
+brw_flush( struct pipe_context *pipe,
+ unsigned flags,
+ struct pipe_fence_handle **fence )
+{
+ brw_context_flush( brw_context( pipe ) );
+ if (fence)
+ *fence = NULL;
+}
+
+static unsigned brw_is_buffer_referenced(struct pipe_context *pipe,
+ struct pipe_buffer *buffer)
+{
+ struct brw_context *brw = brw_context(pipe);
+ struct brw_screen *bscreen = brw_screen(brw->base.screen);
+
+ return brw_is_buffer_referenced_by_bo( bscreen,
+ buffer,
+ brw->batch->buf );
+}
+
+static unsigned brw_is_texture_referenced(struct pipe_context *pipe,
+ struct pipe_texture *texture,
+ unsigned face,
+ unsigned level)
+{
+ struct brw_context *brw = brw_context(pipe);
+ struct brw_screen *bscreen = brw_screen(brw->base.screen);
+
+ return brw_is_texture_referenced_by_bo( bscreen,
+ texture, face, level,
+ brw->batch->buf );
+}
+
+void brw_pipe_flush_init( struct brw_context *brw )
+{
+ brw->base.flush = brw_flush;
+ brw->base.is_buffer_referenced = brw_is_buffer_referenced;
+ brw->base.is_texture_referenced = brw_is_texture_referenced;
+}
+
+
+void brw_pipe_flush_cleanup( struct brw_context *brw )
+{
+}
diff --git a/src/gallium/drivers/i965/brw_pipe_misc.c b/src/gallium/drivers/i965/brw_pipe_misc.c
new file mode 100644
index 00000000000..30359078079
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_pipe_misc.c
@@ -0,0 +1,54 @@
+
+#include "brw_context.h"
+#include "brw_structs.h"
+#include "brw_defines.h"
+
+static void brw_set_polygon_stipple( struct pipe_context *pipe,
+ const struct pipe_poly_stipple *stip )
+{
+ struct brw_context *brw = brw_context(pipe);
+ struct brw_polygon_stipple *bps = &brw->curr.bps;
+ GLuint i;
+
+ memset(bps, 0, sizeof *bps);
+ bps->header.opcode = CMD_POLY_STIPPLE_PATTERN;
+ bps->header.length = sizeof *bps/4-2;
+
+ for (i = 0; i < 32; i++)
+ bps->stipple[i] = stip->stipple[i]; /* don't invert */
+
+ brw->state.dirty.mesa |= PIPE_NEW_POLYGON_STIPPLE;
+}
+
+
+static void brw_set_scissor_state( struct pipe_context *pipe,
+ const struct pipe_scissor_state *scissor )
+{
+ struct brw_context *brw = brw_context(pipe);
+
+ brw->curr.scissor = *scissor;
+ brw->state.dirty.mesa |= PIPE_NEW_SCISSOR;
+}
+
+
+static void brw_set_clip_state( struct pipe_context *pipe,
+ const struct pipe_clip_state *clip )
+{
+ struct brw_context *brw = brw_context(pipe);
+
+ brw->curr.ucp = *clip;
+ brw->state.dirty.mesa |= PIPE_NEW_CLIP;
+}
+
+
+void brw_pipe_misc_init( struct brw_context *brw )
+{
+ brw->base.set_polygon_stipple = brw_set_polygon_stipple;
+ brw->base.set_scissor_state = brw_set_scissor_state;
+ brw->base.set_clip_state = brw_set_clip_state;
+}
+
+
+void brw_pipe_misc_cleanup( struct brw_context *brw )
+{
+}
diff --git a/src/gallium/drivers/i965/brw_pipe_query.c b/src/gallium/drivers/i965/brw_pipe_query.c
new file mode 100644
index 00000000000..2eb862635cc
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_pipe_query.c
@@ -0,0 +1,263 @@
+/*
+ * Copyright © 2008 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Eric Anholt <eric@anholt.net>
+ *
+ */
+
+/** @file support for ARB_query_object
+ *
+ * ARB_query_object is implemented by using the PIPE_CONTROL command to stall
+ * execution on the completion of previous depth tests, and write the
+ * current PS_DEPTH_COUNT to a buffer object.
+ *
+ * We use before and after counts when drawing during a query so that
+ * we don't pick up other clients' query data in ours. To reduce overhead,
+ * a single BO is used to record the query data for all active queries at
+ * once. This also gives us a simple bound on how much batchbuffer space is
+ * required for handling queries, so that we can be sure that we won't
+ * have to emit a batchbuffer without getting the ending PS_DEPTH_COUNT.
+ */
+#include "util/u_simple_list.h"
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_batchbuffer.h"
+#include "brw_reg.h"
+
+/** Waits on the query object's BO and totals the results for this query */
+static boolean
+brw_query_get_result(struct pipe_context *pipe,
+ struct pipe_query *q,
+ boolean wait,
+ uint64_t *result)
+{
+ struct brw_context *brw = brw_context(pipe);
+ struct brw_query_object *query = (struct brw_query_object *)q;
+
+ /* Map and count the pixels from the current query BO */
+ if (query->bo) {
+ int i;
+ uint64_t *map;
+
+ if (brw->sws->bo_is_busy(query->bo) && !wait)
+ return FALSE;
+
+ map = bo_map_read(brw->sws, query->bo);
+ if (map == NULL)
+ return FALSE;
+
+ for (i = query->first_index; i <= query->last_index; i++) {
+ query->result += map[i * 2 + 1] - map[i * 2];
+ }
+
+ brw->sws->bo_unmap(query->bo);
+ bo_reference(&query->bo, NULL);
+ }
+
+ *result = query->result;
+ return TRUE;
+}
+
+static struct pipe_query *
+brw_query_create(struct pipe_context *pipe, unsigned type )
+{
+ struct brw_query_object *query;
+
+ switch (type) {
+ case PIPE_QUERY_OCCLUSION_COUNTER:
+ query = CALLOC_STRUCT( brw_query_object );
+ if (query == NULL)
+ return NULL;
+ return (struct pipe_query *)query;
+
+ default:
+ return NULL;
+ }
+}
+
+static void
+brw_query_destroy(struct pipe_context *pipe, struct pipe_query *q)
+{
+ struct brw_query_object *query = (struct brw_query_object *)q;
+
+ bo_reference(&query->bo, NULL);
+ FREE(query);
+}
+
+static void
+brw_query_begin(struct pipe_context *pipe, struct pipe_query *q)
+{
+ struct brw_context *brw = brw_context(pipe);
+ struct brw_query_object *query = (struct brw_query_object *)q;
+
+ /* Reset our driver's tracking of query state. */
+ bo_reference(&query->bo, NULL);
+ query->result = 0;
+ query->first_index = -1;
+ query->last_index = -1;
+
+ insert_at_head(&brw->query.active_head, query);
+ brw->query.stats_wm++;
+ brw->state.dirty.mesa |= PIPE_NEW_QUERY;
+}
+
+static void
+brw_query_end(struct pipe_context *pipe, struct pipe_query *q)
+{
+ struct brw_context *brw = brw_context(pipe);
+ struct brw_query_object *query = (struct brw_query_object *)q;
+
+ /* Flush the batchbuffer in case it has writes to our query BO.
+ * Have later queries write to a new query BO so that further rendering
+ * doesn't delay the collection of our results.
+ */
+ if (query->bo) {
+ brw_emit_query_end(brw);
+ brw_context_flush( brw );
+
+ bo_reference(&brw->query.bo, NULL);
+ }
+
+ remove_from_list(query);
+ brw->query.stats_wm--;
+ brw->state.dirty.mesa |= PIPE_NEW_QUERY;
+}
+
+/***********************************************************************
+ * Internal functions and callbacks to implement queries
+ */
+
+/** Called to set up the query BO and account for its aperture space */
+enum pipe_error
+brw_prepare_query_begin(struct brw_context *brw)
+{
+ enum pipe_error ret;
+
+ /* Skip if we're not doing any queries. */
+ if (is_empty_list(&brw->query.active_head))
+ return PIPE_OK;
+
+ /* Get a new query BO if we're going to need it. */
+ if (brw->query.bo == NULL ||
+ brw->query.index * 2 + 1 >= 4096 / sizeof(uint64_t)) {
+
+ ret = brw->sws->bo_alloc(brw->sws, BRW_BUFFER_TYPE_QUERY, 4096, 1,
+ &brw->query.bo);
+ if (ret)
+ return ret;
+
+ brw->query.index = 0;
+ }
+
+ brw_add_validated_bo(brw, brw->query.bo);
+
+ return PIPE_OK;
+}
+
+/** Called just before primitive drawing to get a beginning PS_DEPTH_COUNT. */
+void
+brw_emit_query_begin(struct brw_context *brw)
+{
+ struct brw_query_object *query;
+
+ /* Skip if we're not doing any queries, or we've emitted the start. */
+ if (brw->query.active || is_empty_list(&brw->query.active_head))
+ return;
+
+ BEGIN_BATCH(4, IGNORE_CLIPRECTS);
+ OUT_BATCH(_3DSTATE_PIPE_CONTROL |
+ PIPE_CONTROL_DEPTH_STALL |
+ PIPE_CONTROL_WRITE_DEPTH_COUNT);
+ /* This object could be mapped cacheable, but we don't have an exposed
+ * mechanism to support that. Since it's going uncached, tell GEM that
+ * we're writing to it. The usual clflush should be all that's required
+ * to pick up the results.
+ */
+ OUT_RELOC(brw->query.bo,
+ BRW_USAGE_QUERY_RESULT,
+ PIPE_CONTROL_GLOBAL_GTT_WRITE |
+ ((brw->query.index * 2) * sizeof(uint64_t)));
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
+
+ foreach(query, &brw->query.active_head) {
+ if (query->bo != brw->query.bo) {
+ uint64_t tmp;
+
+ /* Propogate the results from this buffer to all of the
+ * active queries, as the bo is going away.
+ */
+ if (query->bo != NULL)
+ brw_query_get_result( &brw->base,
+ (struct pipe_query *)query,
+ FALSE,
+ &tmp );
+
+ bo_reference( &query->bo, brw->query.bo );
+ query->first_index = brw->query.index;
+ }
+ query->last_index = brw->query.index;
+ }
+ brw->query.active = GL_TRUE;
+}
+
+/** Called at batchbuffer flush to get an ending PS_DEPTH_COUNT */
+void
+brw_emit_query_end(struct brw_context *brw)
+{
+ if (!brw->query.active)
+ return;
+
+ BEGIN_BATCH(4, IGNORE_CLIPRECTS);
+ OUT_BATCH(_3DSTATE_PIPE_CONTROL |
+ PIPE_CONTROL_DEPTH_STALL |
+ PIPE_CONTROL_WRITE_DEPTH_COUNT);
+ OUT_RELOC(brw->query.bo,
+ BRW_USAGE_QUERY_RESULT,
+ PIPE_CONTROL_GLOBAL_GTT_WRITE |
+ ((brw->query.index * 2 + 1) * sizeof(uint64_t)));
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
+
+ brw->query.active = GL_FALSE;
+ brw->query.index++;
+}
+
+void brw_pipe_query_init( struct brw_context *brw )
+{
+ brw->base.create_query = brw_query_create;
+ brw->base.destroy_query = brw_query_destroy;
+ brw->base.begin_query = brw_query_begin;
+ brw->base.end_query = brw_query_end;
+ brw->base.get_query_result = brw_query_get_result;
+}
+
+
+void brw_pipe_query_cleanup( struct brw_context *brw )
+{
+ /* Unreference brw->query.bo ??
+ */
+}
diff --git a/src/gallium/drivers/i965/brw_pipe_rast.c b/src/gallium/drivers/i965/brw_pipe_rast.c
new file mode 100644
index 00000000000..2117e91a9e4
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_pipe_rast.c
@@ -0,0 +1,161 @@
+
+#include "util/u_memory.h"
+#include "pipe/p_defines.h"
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_pipe_rast.h"
+#include "brw_wm.h"
+
+
+static unsigned translate_fill( unsigned fill )
+{
+ switch (fill) {
+ case PIPE_POLYGON_MODE_FILL:
+ return CLIP_FILL;
+ case PIPE_POLYGON_MODE_LINE:
+ return CLIP_LINE;
+ case PIPE_POLYGON_MODE_POINT:
+ return CLIP_POINT;
+ default:
+ assert(0);
+ return CLIP_FILL;
+ }
+}
+
+
+/* Calculates the key for triangle-mode clipping. Non-triangle
+ * clipping keys use much less information and are computed on the
+ * fly.
+ */
+static void
+calculate_clip_key_rast( const struct brw_context *brw,
+ const struct pipe_rasterizer_state *templ,
+ const struct brw_rasterizer_state *rast,
+ struct brw_clip_prog_key *key)
+{
+ memset(key, 0, sizeof *key);
+
+ if (brw->chipset.is_igdng)
+ key->clip_mode = BRW_CLIPMODE_KERNEL_CLIP;
+ else
+ key->clip_mode = BRW_CLIPMODE_NORMAL;
+
+ key->do_flat_shading = templ->flatshade;
+
+ if (templ->cull_mode == PIPE_WINDING_BOTH) {
+ key->clip_mode = BRW_CLIPMODE_REJECT_ALL;
+ return;
+ }
+
+ key->fill_ccw = CLIP_CULL;
+ key->fill_cw = CLIP_CULL;
+
+ if (!(templ->cull_mode & PIPE_WINDING_CCW)) {
+ key->fill_ccw = translate_fill(templ->fill_ccw);
+ }
+
+ if (!(templ->cull_mode & PIPE_WINDING_CW)) {
+ key->fill_cw = translate_fill(templ->fill_cw);
+ }
+
+ if (key->fill_cw == CLIP_LINE ||
+ key->fill_ccw == CLIP_LINE ||
+ key->fill_cw == CLIP_POINT ||
+ key->fill_ccw == CLIP_POINT) {
+ key->do_unfilled = 1;
+ key->clip_mode = BRW_CLIPMODE_CLIP_NON_REJECTED;
+ }
+
+ key->offset_ccw = templ->offset_ccw;
+ key->offset_cw = templ->offset_cw;
+
+ if (templ->light_twoside && key->fill_cw != CLIP_CULL)
+ key->copy_bfc_cw = 1;
+
+ if (templ->light_twoside && key->fill_ccw != CLIP_CULL)
+ key->copy_bfc_ccw = 1;
+}
+
+
+static void
+calculate_line_stipple_rast( const struct pipe_rasterizer_state *templ,
+ struct brw_line_stipple *bls )
+{
+ GLfloat tmp = 1.0f / (templ->line_stipple_factor + 1);
+ GLint tmpi = tmp * (1<<13);
+
+ bls->header.opcode = CMD_LINE_STIPPLE_PATTERN;
+ bls->header.length = sizeof(*bls)/4 - 2;
+ bls->bits0.pattern = templ->line_stipple_pattern;
+ bls->bits1.repeat_count = templ->line_stipple_factor + 1;
+ bls->bits1.inverse_repeat_count = tmpi;
+}
+
+static void *brw_create_rasterizer_state( struct pipe_context *pipe,
+ const struct pipe_rasterizer_state *templ )
+{
+ struct brw_context *brw = brw_context(pipe);
+ struct brw_rasterizer_state *rast;
+
+ rast = CALLOC_STRUCT(brw_rasterizer_state);
+ if (rast == NULL)
+ return NULL;
+
+ rast->templ = *templ;
+
+ calculate_clip_key_rast( brw, templ, rast, &rast->clip_key );
+
+ if (templ->line_stipple_enable)
+ calculate_line_stipple_rast( templ, &rast->bls );
+
+ /* Caclculate lookup value for WM IZ table.
+ */
+ if (templ->line_smooth) {
+ if (templ->fill_cw == PIPE_POLYGON_MODE_LINE &&
+ templ->fill_ccw == PIPE_POLYGON_MODE_LINE) {
+ rast->unfilled_aa_line = AA_ALWAYS;
+ }
+ else if (templ->fill_cw == PIPE_POLYGON_MODE_LINE ||
+ templ->fill_ccw == PIPE_POLYGON_MODE_LINE) {
+ rast->unfilled_aa_line = AA_SOMETIMES;
+ }
+ else {
+ rast->unfilled_aa_line = AA_NEVER;
+ }
+ }
+ else {
+ rast->unfilled_aa_line = AA_NEVER;
+ }
+
+ return (void *)rast;
+}
+
+
+static void brw_bind_rasterizer_state(struct pipe_context *pipe,
+ void *cso)
+{
+ struct brw_context *brw = brw_context(pipe);
+ brw->curr.rast = (const struct brw_rasterizer_state *)cso;
+ brw->state.dirty.mesa |= PIPE_NEW_RAST;
+}
+
+static void brw_delete_rasterizer_state(struct pipe_context *pipe,
+ void *cso)
+{
+ struct brw_context *brw = brw_context(pipe);
+ assert((const void *)cso != (const void *)brw->curr.rast);
+ FREE(cso);
+}
+
+
+
+void brw_pipe_rast_init( struct brw_context *brw )
+{
+ brw->base.create_rasterizer_state = brw_create_rasterizer_state;
+ brw->base.bind_rasterizer_state = brw_bind_rasterizer_state;
+ brw->base.delete_rasterizer_state = brw_delete_rasterizer_state;
+}
+
+void brw_pipe_rast_cleanup( struct brw_context *brw )
+{
+}
diff --git a/src/gallium/drivers/i965/brw_pipe_rast.h b/src/gallium/drivers/i965/brw_pipe_rast.h
new file mode 100644
index 00000000000..9354f01e18a
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_pipe_rast.h
@@ -0,0 +1,16 @@
+#ifndef BRW_PIPE_RAST_H
+#define BRW_PIPE_RAST_H
+
+#include "brw_clip.h"
+
+struct brw_rasterizer_state {
+ struct pipe_rasterizer_state templ; /* for draw module */
+
+ /* Precalculated hardware state:
+ */
+ struct brw_clip_prog_key clip_key;
+ struct brw_line_stipple bls;
+ unsigned unfilled_aa_line;
+};
+
+#endif
diff --git a/src/gallium/drivers/i965/brw_pipe_sampler.c b/src/gallium/drivers/i965/brw_pipe_sampler.c
new file mode 100644
index 00000000000..5ddc63f57ec
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_pipe_sampler.c
@@ -0,0 +1,233 @@
+
+#include "util/u_memory.h"
+#include "util/u_math.h"
+
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_debug.h"
+
+
+
+/* The brw (and related graphics cores) do not support GL_CLAMP. The
+ * Intel drivers for "other operating systems" implement GL_CLAMP as
+ * GL_CLAMP_TO_EDGE, so the same is done here.
+ */
+static GLuint translate_wrap_mode( unsigned wrap )
+{
+ switch( wrap ) {
+ case PIPE_TEX_WRAP_REPEAT:
+ return BRW_TEXCOORDMODE_WRAP;
+
+ case PIPE_TEX_WRAP_CLAMP:
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+ return BRW_TEXCOORDMODE_CLAMP;
+
+ case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+ return BRW_TEXCOORDMODE_CLAMP_BORDER;
+
+ case PIPE_TEX_WRAP_MIRROR_REPEAT:
+ return BRW_TEXCOORDMODE_MIRROR;
+
+ case PIPE_TEX_WRAP_MIRROR_CLAMP:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+ return BRW_TEXCOORDMODE_MIRROR_ONCE;
+
+ default:
+ return BRW_TEXCOORDMODE_WRAP;
+ }
+}
+
+static GLuint translate_img_filter( unsigned filter )
+{
+ switch (filter) {
+ case PIPE_TEX_FILTER_NEAREST:
+ return BRW_MAPFILTER_NEAREST;
+ case PIPE_TEX_FILTER_LINEAR:
+ return BRW_MAPFILTER_LINEAR;
+ case PIPE_TEX_FILTER_ANISO:
+ return BRW_MAPFILTER_ANISOTROPIC;
+ default:
+ assert(0);
+ return BRW_MAPFILTER_NEAREST;
+ }
+}
+
+static GLuint translate_mip_filter( unsigned filter )
+{
+ switch (filter) {
+ case PIPE_TEX_MIPFILTER_NONE:
+ return BRW_MIPFILTER_NONE;
+ case PIPE_TEX_MIPFILTER_NEAREST:
+ return BRW_MIPFILTER_NEAREST;
+ case PIPE_TEX_MIPFILTER_LINEAR:
+ return BRW_MIPFILTER_LINEAR;
+ default:
+ assert(0);
+ return BRW_MIPFILTER_NONE;
+ }
+}
+
+/* XXX: not sure why there are special translations for the shadow tex
+ * compare functions. In particular ALWAYS is translated to NEVER.
+ * Is this a hardware issue? Does i965 really suffer from this?
+ */
+static GLuint translate_shadow_compare_func( unsigned func )
+{
+ switch (func) {
+ case PIPE_FUNC_NEVER:
+ return BRW_COMPAREFUNCTION_ALWAYS;
+ case PIPE_FUNC_LESS:
+ return BRW_COMPAREFUNCTION_LEQUAL;
+ case PIPE_FUNC_LEQUAL:
+ return BRW_COMPAREFUNCTION_LESS;
+ case PIPE_FUNC_GREATER:
+ return BRW_COMPAREFUNCTION_GEQUAL;
+ case PIPE_FUNC_GEQUAL:
+ return BRW_COMPAREFUNCTION_GREATER;
+ case PIPE_FUNC_NOTEQUAL:
+ return BRW_COMPAREFUNCTION_EQUAL;
+ case PIPE_FUNC_EQUAL:
+ return BRW_COMPAREFUNCTION_NOTEQUAL;
+ case PIPE_FUNC_ALWAYS:
+ return BRW_COMPAREFUNCTION_NEVER;
+ default:
+ assert(0);
+ return BRW_COMPAREFUNCTION_NEVER;
+ }
+}
+
+
+
+
+static void *
+brw_create_sampler_state( struct pipe_context *pipe,
+ const struct pipe_sampler_state *template )
+{
+ struct brw_sampler *sampler = CALLOC_STRUCT(brw_sampler);
+
+ sampler->ss0.min_filter = translate_img_filter( template->min_img_filter );
+ sampler->ss0.mag_filter = translate_img_filter( template->mag_img_filter );
+ sampler->ss0.mip_filter = translate_mip_filter( template->min_mip_filter );
+
+
+ /* XXX: anisotropy logic slightly changed:
+ */
+ if (template->max_anisotropy > 1.0) {
+ sampler->ss0.min_filter = BRW_MAPFILTER_ANISOTROPIC;
+ sampler->ss0.mag_filter = BRW_MAPFILTER_ANISOTROPIC;
+
+ if (template->max_anisotropy > 2.0) {
+ sampler->ss3.max_aniso = MIN2((template->max_anisotropy - 2) / 2,
+ BRW_ANISORATIO_16);
+ }
+ }
+
+ sampler->ss1.r_wrap_mode = translate_wrap_mode(template->wrap_r);
+ sampler->ss1.s_wrap_mode = translate_wrap_mode(template->wrap_s);
+ sampler->ss1.t_wrap_mode = translate_wrap_mode(template->wrap_t);
+
+ /* Set LOD bias:
+ */
+ sampler->ss0.lod_bias =
+ util_signed_fixed(CLAMP(template->lod_bias, -16, 15), 6);
+
+
+ sampler->ss0.lod_preclamp = 1; /* OpenGL mode */
+ sampler->ss0.default_color_mode = 0; /* OpenGL/DX10 mode */
+
+ /* Set shadow function:
+ */
+ if (template->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
+
+ /* Shadowing is "enabled" by emitting a particular sampler
+ * message (sample_c). So need to recompile WM program when
+ * shadow comparison is enabled on each/any texture unit.
+ */
+ sampler->ss0.shadow_function =
+ translate_shadow_compare_func(template->compare_func);
+ }
+
+ /* Set BaseMipLevel, MaxLOD, MinLOD:
+ */
+ sampler->ss0.base_level =
+ util_unsigned_fixed(0, 1);
+
+ sampler->ss1.max_lod =
+ util_unsigned_fixed(CLAMP(template->max_lod, 0, 13), 6);
+
+ sampler->ss1.min_lod =
+ util_unsigned_fixed(CLAMP(template->min_lod, 0, 13), 6);
+
+ return (void *)sampler;
+}
+
+static void brw_bind_sampler_state(struct pipe_context *pipe,
+ unsigned num, void **sampler)
+{
+ struct brw_context *brw = brw_context(pipe);
+ int i;
+
+ for (i = 0; i < num; i++)
+ brw->curr.sampler[i] = sampler[i];
+
+ for (i = num; i < brw->curr.num_samplers; i++)
+ brw->curr.sampler[i] = NULL;
+
+ brw->curr.num_samplers = num;
+ brw->state.dirty.mesa |= PIPE_NEW_SAMPLERS;
+}
+
+static void brw_delete_sampler_state(struct pipe_context *pipe,
+ void *cso)
+{
+ FREE(cso);
+}
+
+static void brw_set_sampler_textures(struct pipe_context *pipe,
+ unsigned num,
+ struct pipe_texture **texture)
+{
+ struct brw_context *brw = brw_context(pipe);
+ int i;
+
+ for (i = 0; i < num; i++)
+ pipe_texture_reference(&brw->curr.texture[i], texture[i]);
+
+ for (i = num; i < brw->curr.num_textures; i++)
+ pipe_texture_reference(&brw->curr.texture[i], NULL);
+
+ brw->curr.num_textures = num;
+ brw->state.dirty.mesa |= PIPE_NEW_BOUND_TEXTURES;
+}
+
+static void brw_set_vertex_sampler_textures(struct pipe_context *pipe,
+ unsigned num,
+ struct pipe_texture **texture)
+{
+}
+
+static void brw_bind_vertex_sampler_state(struct pipe_context *pipe,
+ unsigned num, void **sampler)
+{
+}
+
+
+void brw_pipe_sampler_init( struct brw_context *brw )
+{
+ brw->base.create_sampler_state = brw_create_sampler_state;
+ brw->base.delete_sampler_state = brw_delete_sampler_state;
+
+ brw->base.set_fragment_sampler_textures = brw_set_sampler_textures;
+ brw->base.bind_fragment_sampler_states = brw_bind_sampler_state;
+
+ brw->base.set_vertex_sampler_textures = brw_set_vertex_sampler_textures;
+ brw->base.bind_vertex_sampler_states = brw_bind_vertex_sampler_state;
+
+}
+void brw_pipe_sampler_cleanup( struct brw_context *brw )
+{
+}
diff --git a/src/gallium/drivers/i965/brw_pipe_shader.c b/src/gallium/drivers/i965/brw_pipe_shader.c
new file mode 100644
index 00000000000..31a715ab655
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_pipe_shader.c
@@ -0,0 +1,299 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "util/u_memory.h"
+
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_scan.h"
+
+#include "brw_context.h"
+#include "brw_util.h"
+#include "brw_wm.h"
+
+
+/**
+ * Determine if the given shader uses complex features such as flow
+ * conditionals, loops, subroutines.
+ */
+static GLboolean has_flow_control(const struct tgsi_shader_info *info)
+{
+ return (info->opcode_count[TGSI_OPCODE_ARL] > 0 ||
+ info->opcode_count[TGSI_OPCODE_IF] > 0 ||
+ info->opcode_count[TGSI_OPCODE_ENDIF] > 0 || /* redundant - IF */
+ info->opcode_count[TGSI_OPCODE_CAL] > 0 ||
+ info->opcode_count[TGSI_OPCODE_BRK] > 0 || /* redundant - BGNLOOP */
+ info->opcode_count[TGSI_OPCODE_RET] > 0 || /* redundant - CAL */
+ info->opcode_count[TGSI_OPCODE_BGNLOOP] > 0);
+}
+
+
+static void scan_immediates(const struct tgsi_token *tokens,
+ const struct tgsi_shader_info *info,
+ struct brw_immediate_data *imm)
+{
+ struct tgsi_parse_context parse;
+ boolean done = FALSE;
+
+ imm->nr = 0;
+ imm->data = MALLOC(info->immediate_count * 4 * sizeof(float));
+
+ tgsi_parse_init( &parse, tokens );
+ while (!tgsi_parse_end_of_tokens( &parse ) && !done) {
+ tgsi_parse_token( &parse );
+
+ switch (parse.FullToken.Token.Type) {
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ break;
+
+ case TGSI_TOKEN_TYPE_IMMEDIATE: {
+ static const float id[4] = {0,0,0,1};
+ const float *value = &parse.FullToken.FullImmediate.u[0].Float;
+ unsigned size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
+ unsigned i;
+
+ for (i = 0; i < size; i++)
+ imm->data[imm->nr][i] = value[i];
+
+ for (; i < 4; i++)
+ imm->data[imm->nr][i] = id[i];
+
+ imm->nr++;
+ break;
+ }
+
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ done = 1;
+ break;
+ }
+ }
+}
+
+
+static void brw_bind_fs_state( struct pipe_context *pipe, void *prog )
+{
+ struct brw_fragment_shader *fs = (struct brw_fragment_shader *)prog;
+ struct brw_context *brw = brw_context(pipe);
+
+ if (brw->curr.fragment_shader == fs)
+ return;
+
+ if (brw->curr.fragment_shader == NULL ||
+ fs == NULL ||
+ memcmp(&brw->curr.fragment_shader->signature, &fs->signature,
+ brw_fs_signature_size(&fs->signature)) != 0) {
+ brw->state.dirty.mesa |= PIPE_NEW_FRAGMENT_SIGNATURE;
+ }
+
+ brw->curr.fragment_shader = fs;
+ brw->state.dirty.mesa |= PIPE_NEW_FRAGMENT_SHADER;
+}
+
+static void brw_bind_vs_state( struct pipe_context *pipe, void *prog )
+{
+ struct brw_context *brw = brw_context(pipe);
+
+ brw->curr.vertex_shader = (struct brw_vertex_shader *)prog;
+ brw->state.dirty.mesa |= PIPE_NEW_VERTEX_SHADER;
+}
+
+
+
+static void *brw_create_fs_state( struct pipe_context *pipe,
+ const struct pipe_shader_state *shader )
+{
+ struct brw_context *brw = brw_context(pipe);
+ struct brw_fragment_shader *fs;
+ int i;
+
+ fs = CALLOC_STRUCT(brw_fragment_shader);
+ if (fs == NULL)
+ return NULL;
+
+ /* Duplicate tokens, scan shader
+ */
+ fs->id = brw->program_id++;
+ fs->has_flow_control = has_flow_control(&fs->info);
+
+ fs->tokens = tgsi_dup_tokens(shader->tokens);
+ if (fs->tokens == NULL)
+ goto fail;
+
+ tgsi_scan_shader(fs->tokens, &fs->info);
+ scan_immediates(fs->tokens, &fs->info, &fs->immediates);
+
+ fs->signature.nr_inputs = fs->info.num_inputs;
+ for (i = 0; i < fs->info.num_inputs; i++) {
+ fs->signature.input[i].interp = fs->info.input_interpolate[i];
+ fs->signature.input[i].semantic = fs->info.input_semantic_name[i];
+ fs->signature.input[i].semantic_index = fs->info.input_semantic_index[i];
+ }
+
+ for (i = 0; i < fs->info.num_inputs; i++)
+ if (fs->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION)
+ fs->uses_depth = 1;
+
+ if (fs->info.uses_kill)
+ fs->iz_lookup |= IZ_PS_KILL_ALPHATEST_BIT;
+
+ if (fs->info.writes_z)
+ fs->iz_lookup |= IZ_PS_COMPUTES_DEPTH_BIT;
+
+ return (void *)fs;
+
+fail:
+ FREE(fs);
+ return NULL;
+}
+
+
+static void *brw_create_vs_state( struct pipe_context *pipe,
+ const struct pipe_shader_state *shader )
+{
+ struct brw_context *brw = brw_context(pipe);
+ struct brw_vertex_shader *vs;
+ unsigned i;
+
+ vs = CALLOC_STRUCT(brw_vertex_shader);
+ if (vs == NULL)
+ return NULL;
+
+ /* Duplicate tokens, scan shader
+ */
+ vs->tokens = tgsi_dup_tokens(shader->tokens);
+ if (vs->tokens == NULL)
+ goto fail;
+
+ tgsi_scan_shader(vs->tokens, &vs->info);
+ scan_immediates(vs->tokens, &vs->info, &vs->immediates);
+
+ vs->id = brw->program_id++;
+ vs->has_flow_control = has_flow_control(&vs->info);
+
+ for (i = 0; i < vs->info.num_outputs; i++) {
+ int index = vs->info.output_semantic_index[i];
+ switch (vs->info.output_semantic_name[i]) {
+ case TGSI_SEMANTIC_POSITION:
+ vs->output_hpos = i;
+ break;
+ case TGSI_SEMANTIC_COLOR:
+ if (index == 0)
+ vs->output_color0 = i;
+ else
+ vs->output_color1 = i;
+ break;
+ case TGSI_SEMANTIC_BCOLOR:
+ if (index == 0)
+ vs->output_bfc0 = i;
+ else
+ vs->output_bfc1 = i;
+ break;
+#if 0
+ case TGSI_SEMANTIC_EDGEFLAG:
+ vs->output_edgeflag = i;
+ break;
+#endif
+ }
+ }
+
+
+
+ /* Done:
+ */
+ return (void *)vs;
+
+fail:
+ FREE(vs);
+ return NULL;
+}
+
+
+static void brw_delete_fs_state( struct pipe_context *pipe, void *prog )
+{
+ struct brw_fragment_shader *fs = (struct brw_fragment_shader *)prog;
+
+ bo_reference(&fs->const_buffer, NULL);
+ FREE( (void *)fs->tokens );
+ FREE( fs );
+}
+
+
+static void brw_delete_vs_state( struct pipe_context *pipe, void *prog )
+{
+ struct brw_fragment_shader *vs = (struct brw_fragment_shader *)prog;
+
+ /* Delete draw shader
+ */
+ FREE( (void *)vs->tokens );
+ FREE( vs );
+}
+
+
+static void brw_set_constant_buffer(struct pipe_context *pipe,
+ uint shader, uint index,
+ const struct pipe_constant_buffer *buf)
+{
+ struct brw_context *brw = brw_context(pipe);
+
+ assert(index == 0);
+
+ if (shader == PIPE_SHADER_FRAGMENT) {
+ pipe_buffer_reference( &brw->curr.fragment_constants,
+ buf->buffer );
+
+ brw->state.dirty.mesa |= PIPE_NEW_FRAGMENT_CONSTANTS;
+ }
+ else {
+ pipe_buffer_reference( &brw->curr.vertex_constants,
+ buf->buffer );
+
+ brw->state.dirty.mesa |= PIPE_NEW_VERTEX_CONSTANTS;
+ }
+}
+
+
+void brw_pipe_shader_init( struct brw_context *brw )
+{
+ brw->base.set_constant_buffer = brw_set_constant_buffer;
+
+ brw->base.create_vs_state = brw_create_vs_state;
+ brw->base.bind_vs_state = brw_bind_vs_state;
+ brw->base.delete_vs_state = brw_delete_vs_state;
+
+ brw->base.create_fs_state = brw_create_fs_state;
+ brw->base.bind_fs_state = brw_bind_fs_state;
+ brw->base.delete_fs_state = brw_delete_fs_state;
+}
+
+void brw_pipe_shader_cleanup( struct brw_context *brw )
+{
+ pipe_buffer_reference( &brw->curr.fragment_constants, NULL );
+ pipe_buffer_reference( &brw->curr.vertex_constants, NULL );
+}
diff --git a/src/gallium/drivers/i965/brw_pipe_vertex.c b/src/gallium/drivers/i965/brw_pipe_vertex.c
new file mode 100644
index 00000000000..3d87a2853f7
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_pipe_vertex.c
@@ -0,0 +1,78 @@
+#include "brw_context.h"
+
+
+static void brw_set_vertex_elements( struct pipe_context *pipe,
+ unsigned count,
+ const struct pipe_vertex_element *elements )
+{
+ struct brw_context *brw = brw_context(pipe);
+
+ memcpy(brw->curr.vertex_element, elements, count * sizeof(elements[0]));
+ brw->curr.num_vertex_elements = count;
+
+ brw->state.dirty.mesa |= PIPE_NEW_VERTEX_ELEMENT;
+}
+
+
+static void brw_set_vertex_buffers(struct pipe_context *pipe,
+ unsigned count,
+ const struct pipe_vertex_buffer *buffers)
+{
+ struct brw_context *brw = brw_context(pipe);
+ unsigned i;
+
+ /* Check for no change */
+ if (count == brw->curr.num_vertex_buffers &&
+ memcmp(brw->curr.vertex_buffer,
+ buffers,
+ count * sizeof buffers[0]) == 0)
+ return;
+
+ /* Adjust refcounts */
+ for (i = 0; i < count; i++)
+ pipe_buffer_reference(&brw->curr.vertex_buffer[i].buffer,
+ buffers[i].buffer);
+
+ for ( ; i < brw->curr.num_vertex_buffers; i++)
+ pipe_buffer_reference(&brw->curr.vertex_buffer[i].buffer,
+ NULL);
+
+ /* Copy remaining data */
+ memcpy(brw->curr.vertex_buffer, buffers, count * sizeof buffers[0]);
+ brw->curr.num_vertex_buffers = count;
+
+ brw->state.dirty.mesa |= PIPE_NEW_VERTEX_BUFFER;
+}
+
+static void brw_set_edgeflags( struct pipe_context *pipe,
+ const unsigned *bitfield )
+{
+ /* XXX */
+}
+
+
+void
+brw_pipe_vertex_init( struct brw_context *brw )
+{
+ brw->base.set_vertex_buffers = brw_set_vertex_buffers;
+ brw->base.set_vertex_elements = brw_set_vertex_elements;
+ brw->base.set_edgeflags = brw_set_edgeflags;
+}
+
+
+void
+brw_pipe_vertex_cleanup( struct brw_context *brw )
+{
+
+ /* Release bound pipe vertex_buffers
+ */
+
+ /* Release some other stuff
+ */
+#if 0
+ for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
+ bo_reference(&brw->vb.inputs[i].bo, NULL);
+ brw->vb.inputs[i].bo = NULL;
+ }
+#endif
+}
diff --git a/src/gallium/drivers/i965/brw_reg.h b/src/gallium/drivers/i965/brw_reg.h
new file mode 100644
index 00000000000..a63403b6afd
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_reg.h
@@ -0,0 +1,115 @@
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef BRW_REG_H
+#define BRW_REG_H
+
+#define CMD_MI (0x0 << 29)
+#define CMD_2D (0x2 << 29)
+#define CMD_3D (0x3 << 29)
+
+#define MI_NOOP (CMD_MI | 0)
+#define MI_BATCH_BUFFER_END (CMD_MI | 0xA << 23)
+#define MI_FLUSH (CMD_MI | (4 << 23))
+
+#define _3DSTATE_DRAWRECT_INFO_I965 (CMD_3D | (3 << 27) | (1 << 24) | 0x2)
+
+/** @{
+ *
+ * PIPE_CONTROL operation, a combination MI_FLUSH and register write with
+ * additional flushing control.
+ */
+#define _3DSTATE_PIPE_CONTROL (CMD_3D | (3 << 27) | (2 << 24) | 2)
+#define PIPE_CONTROL_NO_WRITE (0 << 14)
+#define PIPE_CONTROL_WRITE_IMMEDIATE (1 << 14)
+#define PIPE_CONTROL_WRITE_DEPTH_COUNT (2 << 14)
+#define PIPE_CONTROL_WRITE_TIMESTAMP (3 << 14)
+#define PIPE_CONTROL_DEPTH_STALL (1 << 13)
+#define PIPE_CONTROL_WRITE_FLUSH (1 << 12)
+#define PIPE_CONTROL_INSTRUCTION_FLUSH (1 << 11)
+#define PIPE_CONTROL_INTERRUPT_ENABLE (1 << 8)
+#define PIPE_CONTROL_PPGTT_WRITE (0 << 2)
+#define PIPE_CONTROL_GLOBAL_GTT_WRITE (1 << 2)
+
+/** @} */
+
+#define XY_SETUP_BLT_CMD (CMD_2D | (0x01 << 22) | 6)
+#define XY_COLOR_BLT_CMD (CMD_2D | (0x50 << 22) | 4)
+#define XY_SRC_COPY_BLT_CMD (CMD_2D | (0x53 << 22) | 6)
+
+/* BR00 */
+#define XY_BLT_WRITE_ALPHA (1 << 21)
+#define XY_BLT_WRITE_RGB (1 << 20)
+#define XY_SRC_TILED (1 << 15)
+#define XY_DST_TILED (1 << 11)
+
+/* BR13 */
+#define BR13_565 (0x1 << 24)
+#define BR13_8888 (0x3 << 24)
+
+#define FENCE_LINEAR 0
+#define FENCE_XMAJOR 1
+#define FENCE_YMAJOR 2
+
+
+
+/* PCI IDs
+ */
+#define PCI_CHIP_I965_G 0x29A2
+#define PCI_CHIP_I965_Q 0x2992
+#define PCI_CHIP_I965_G_1 0x2982
+#define PCI_CHIP_I946_GZ 0x2972
+#define PCI_CHIP_I965_GM 0x2A02
+#define PCI_CHIP_I965_GME 0x2A12
+
+#define PCI_CHIP_GM45_GM 0x2A42
+
+#define PCI_CHIP_IGD_E_G 0x2E02
+#define PCI_CHIP_Q45_G 0x2E12
+#define PCI_CHIP_G45_G 0x2E22
+#define PCI_CHIP_G41_G 0x2E32
+#define PCI_CHIP_B43_G 0x2E42
+
+#define PCI_CHIP_ILD_G 0x0042
+#define PCI_CHIP_ILM_G 0x0046
+
+struct brw_chipset {
+ unsigned pci_id:16;
+ unsigned is_965:1;
+ unsigned is_igdng:1;
+ unsigned is_g4x:1;
+ unsigned pad:13;
+};
+
+
+/* XXX: hacks
+ */
+#define VERT_RESULT_HPOS 0 /* not always true */
+#define VERT_RESULT_PSIZ 10000 /* disabled */
+
+
+#endif
diff --git a/src/gallium/drivers/i965/brw_screen.c b/src/gallium/drivers/i965/brw_screen.c
new file mode 100644
index 00000000000..0ecacac9a3a
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_screen.c
@@ -0,0 +1,403 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "pipe/p_inlines.h"
+#include "util/u_memory.h"
+#include "util/u_string.h"
+
+#include "brw_reg.h"
+#include "brw_context.h"
+#include "brw_screen.h"
+#include "brw_winsys.h"
+#include "brw_debug.h"
+
+#ifdef DEBUG
+static const struct debug_named_value debug_names[] = {
+ { "tex", DEBUG_TEXTURE},
+ { "state", DEBUG_STATE},
+ { "ioctl", DEBUG_IOCTL},
+ { "blit", DEBUG_BLIT},
+ { "curbe", DEBUG_CURBE},
+ { "fall", DEBUG_FALLBACKS},
+ { "verb", DEBUG_VERBOSE},
+ { "bat", DEBUG_BATCH},
+ { "pix", DEBUG_PIXEL},
+ { "wins", DEBUG_WINSYS},
+ { "min", DEBUG_MIN_URB},
+ { "dis", DEBUG_DISASSEM},
+ { "sync", DEBUG_SYNC},
+ { "prim", DEBUG_PRIMS },
+ { "vert", DEBUG_VERTS },
+ { "dma", DEBUG_DMA },
+ { "san", DEBUG_SANITY },
+ { "sleep", DEBUG_SLEEP },
+ { "stats", DEBUG_STATS },
+ { "sing", DEBUG_SINGLE_THREAD },
+ { "thre", DEBUG_SINGLE_THREAD },
+ { "wm", DEBUG_WM },
+ { "urb", DEBUG_URB },
+ { "vs", DEBUG_VS },
+ { NULL, 0 }
+};
+
+static const struct debug_named_value dump_names[] = {
+ { "asm", DUMP_ASM},
+ { "state", DUMP_STATE},
+ { "batch", DUMP_BATCH},
+ { NULL, 0 }
+};
+
+int BRW_DEBUG = 0;
+int BRW_DUMP = 0;
+
+#endif
+
+
+/*
+ * Probe functions
+ */
+
+
+static const char *
+brw_get_vendor(struct pipe_screen *screen)
+{
+ return "VMware, Inc.";
+}
+
+static const char *
+brw_get_name(struct pipe_screen *screen)
+{
+ static char buffer[128];
+ const char *chipset;
+
+ switch (brw_screen(screen)->chipset.pci_id) {
+ case PCI_CHIP_I965_G:
+ chipset = "I965_G";
+ break;
+ case PCI_CHIP_I965_Q:
+ chipset = "I965_Q";
+ break;
+ case PCI_CHIP_I965_G_1:
+ chipset = "I965_G_1";
+ break;
+ case PCI_CHIP_I946_GZ:
+ chipset = "I946_GZ";
+ break;
+ case PCI_CHIP_I965_GM:
+ chipset = "I965_GM";
+ break;
+ case PCI_CHIP_I965_GME:
+ chipset = "I965_GME";
+ break;
+ case PCI_CHIP_GM45_GM:
+ chipset = "GM45_GM";
+ break;
+ case PCI_CHIP_IGD_E_G:
+ chipset = "IGD_E_G";
+ break;
+ case PCI_CHIP_Q45_G:
+ chipset = "Q45_G";
+ break;
+ case PCI_CHIP_G45_G:
+ chipset = "G45_G";
+ break;
+ case PCI_CHIP_G41_G:
+ chipset = "G41_G";
+ break;
+ case PCI_CHIP_B43_G:
+ chipset = "B43_G";
+ break;
+ case PCI_CHIP_ILD_G:
+ chipset = "ILD_G";
+ break;
+ case PCI_CHIP_ILM_G:
+ chipset = "ILM_G";
+ break;
+ }
+
+ util_snprintf(buffer, sizeof(buffer), "i965 (chipset: %s)", chipset);
+ return buffer;
+}
+
+static int
+brw_get_param(struct pipe_screen *screen, int param)
+{
+ switch (param) {
+ case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
+ return 8;
+ case PIPE_CAP_NPOT_TEXTURES:
+ return 1;
+ case PIPE_CAP_TWO_SIDED_STENCIL:
+ return 1;
+ case PIPE_CAP_GLSL:
+ return 0;
+ case PIPE_CAP_ANISOTROPIC_FILTER:
+ return 0;
+ case PIPE_CAP_POINT_SPRITE:
+ return 0;
+ case PIPE_CAP_MAX_RENDER_TARGETS:
+ return 1;
+ case PIPE_CAP_OCCLUSION_QUERY:
+ return 0;
+ case PIPE_CAP_TEXTURE_SHADOW_MAP:
+ return 1;
+ case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
+ return 11; /* max 1024x1024 */
+ case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
+ return 8; /* max 128x128x128 */
+ case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
+ return 11; /* max 1024x1024 */
+ default:
+ return 0;
+ }
+}
+
+static float
+brw_get_paramf(struct pipe_screen *screen, int param)
+{
+ switch (param) {
+ case PIPE_CAP_MAX_LINE_WIDTH:
+ /* fall-through */
+ case PIPE_CAP_MAX_LINE_WIDTH_AA:
+ return 7.5;
+
+ case PIPE_CAP_MAX_POINT_WIDTH:
+ /* fall-through */
+ case PIPE_CAP_MAX_POINT_WIDTH_AA:
+ return 255.0;
+
+ case PIPE_CAP_MAX_TEXTURE_ANISOTROPY:
+ return 4.0;
+
+ case PIPE_CAP_MAX_TEXTURE_LOD_BIAS:
+ return 16.0;
+
+ default:
+ return 0;
+ }
+}
+
+static boolean
+brw_is_format_supported(struct pipe_screen *screen,
+ enum pipe_format format,
+ enum pipe_texture_target target,
+ unsigned tex_usage,
+ unsigned geom_flags)
+{
+ static const enum pipe_format tex_supported[] = {
+ PIPE_FORMAT_L8_UNORM,
+ PIPE_FORMAT_I8_UNORM,
+ PIPE_FORMAT_A8_UNORM,
+ PIPE_FORMAT_L16_UNORM,
+ /*PIPE_FORMAT_I16_UNORM,*/
+ /*PIPE_FORMAT_A16_UNORM,*/
+ PIPE_FORMAT_A8L8_UNORM,
+ PIPE_FORMAT_R5G6B5_UNORM,
+ PIPE_FORMAT_A1R5G5B5_UNORM,
+ PIPE_FORMAT_A4R4G4B4_UNORM,
+ PIPE_FORMAT_X8R8G8B8_UNORM,
+ PIPE_FORMAT_A8R8G8B8_UNORM,
+ /* video */
+ PIPE_FORMAT_YCBCR,
+ PIPE_FORMAT_YCBCR_REV,
+ /* compressed */
+ /*PIPE_FORMAT_FXT1_RGBA,*/
+ PIPE_FORMAT_DXT1_RGB,
+ PIPE_FORMAT_DXT1_RGBA,
+ PIPE_FORMAT_DXT3_RGBA,
+ PIPE_FORMAT_DXT5_RGBA,
+ /* sRGB */
+ PIPE_FORMAT_R8G8B8A8_SRGB,
+ PIPE_FORMAT_A8L8_SRGB,
+ PIPE_FORMAT_L8_SRGB,
+ PIPE_FORMAT_DXT1_SRGB,
+ /* depth */
+ PIPE_FORMAT_Z32_FLOAT,
+ PIPE_FORMAT_X8Z24_UNORM,
+ PIPE_FORMAT_S8Z24_UNORM,
+ PIPE_FORMAT_Z16_UNORM,
+ /* signed */
+ PIPE_FORMAT_R8G8_SNORM,
+ PIPE_FORMAT_R8G8B8A8_SNORM,
+ PIPE_FORMAT_NONE /* list terminator */
+ };
+ static const enum pipe_format render_supported[] = {
+ PIPE_FORMAT_X8R8G8B8_UNORM,
+ PIPE_FORMAT_A8R8G8B8_UNORM,
+ PIPE_FORMAT_R5G6B5_UNORM,
+ PIPE_FORMAT_NONE /* list terminator */
+ };
+ static const enum pipe_format depth_supported[] = {
+ PIPE_FORMAT_Z32_FLOAT,
+ PIPE_FORMAT_X8Z24_UNORM,
+ PIPE_FORMAT_S8Z24_UNORM,
+ PIPE_FORMAT_Z16_UNORM,
+ PIPE_FORMAT_NONE /* list terminator */
+ };
+ const enum pipe_format *list;
+ uint i;
+
+ if (tex_usage & PIPE_TEXTURE_USAGE_DEPTH_STENCIL)
+ list = depth_supported;
+ else if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET)
+ list = render_supported;
+ else
+ list = tex_supported;
+
+ for (i = 0; list[i] != PIPE_FORMAT_NONE; i++) {
+ if (list[i] == format)
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+
+/*
+ * Fence functions
+ */
+
+
+static void
+brw_fence_reference(struct pipe_screen *screen,
+ struct pipe_fence_handle **ptr,
+ struct pipe_fence_handle *fence)
+{
+}
+
+static int
+brw_fence_signalled(struct pipe_screen *screen,
+ struct pipe_fence_handle *fence,
+ unsigned flags)
+{
+ return 0; /* XXX shouldn't this be a boolean? */
+}
+
+static int
+brw_fence_finish(struct pipe_screen *screen,
+ struct pipe_fence_handle *fence,
+ unsigned flags)
+{
+ return 0;
+}
+
+
+/*
+ * Generic functions
+ */
+
+
+static void
+brw_destroy_screen(struct pipe_screen *screen)
+{
+ struct brw_screen *bscreen = brw_screen(screen);
+
+ if (bscreen->sws)
+ bscreen->sws->destroy(bscreen->sws);
+
+ FREE(bscreen);
+}
+
+/**
+ * Create a new brw_screen object
+ */
+struct pipe_screen *
+brw_create_screen(struct brw_winsys_screen *sws, uint pci_id)
+{
+ struct brw_screen *bscreen;
+ struct brw_chipset chipset;
+
+#ifdef DEBUG
+ BRW_DEBUG = debug_get_flags_option("BRW_DEBUG", debug_names, 0);
+ BRW_DEBUG |= debug_get_flags_option("INTEL_DEBUG", debug_names, 0);
+ BRW_DEBUG |= DEBUG_STATS | DEBUG_MIN_URB | DEBUG_WM;
+
+ BRW_DUMP = debug_get_flags_option("BRW_DUMP", dump_names, 0);
+#endif
+
+ memset(&chipset, 0, sizeof chipset);
+
+ chipset.pci_id = pci_id;
+
+ switch (pci_id) {
+ case PCI_CHIP_I965_G:
+ case PCI_CHIP_I965_Q:
+ case PCI_CHIP_I965_G_1:
+ case PCI_CHIP_I946_GZ:
+ case PCI_CHIP_I965_GM:
+ case PCI_CHIP_I965_GME:
+ chipset.is_965 = TRUE;
+ break;
+
+ case PCI_CHIP_GM45_GM:
+ case PCI_CHIP_IGD_E_G:
+ case PCI_CHIP_Q45_G:
+ case PCI_CHIP_G45_G:
+ case PCI_CHIP_G41_G:
+ case PCI_CHIP_B43_G:
+ chipset.is_g4x = TRUE;
+ break;
+
+ case PCI_CHIP_ILD_G:
+ case PCI_CHIP_ILM_G:
+ chipset.is_igdng = TRUE;
+ break;
+
+ default:
+ debug_printf("%s: unknown pci id 0x%x, cannot create screen\n",
+ __FUNCTION__, pci_id);
+ return NULL;
+ }
+
+
+ bscreen = CALLOC_STRUCT(brw_screen);
+ if (!bscreen)
+ return NULL;
+
+ bscreen->chipset = chipset;
+ bscreen->sws = sws;
+ bscreen->base.winsys = NULL;
+ bscreen->base.destroy = brw_destroy_screen;
+ bscreen->base.get_name = brw_get_name;
+ bscreen->base.get_vendor = brw_get_vendor;
+ bscreen->base.get_param = brw_get_param;
+ bscreen->base.get_paramf = brw_get_paramf;
+ bscreen->base.is_format_supported = brw_is_format_supported;
+ bscreen->base.fence_reference = brw_fence_reference;
+ bscreen->base.fence_signalled = brw_fence_signalled;
+ bscreen->base.fence_finish = brw_fence_finish;
+
+ brw_screen_tex_init(bscreen);
+ brw_screen_tex_surface_init(bscreen);
+ brw_screen_buffer_init(bscreen);
+
+ bscreen->no_tiling = debug_get_option("BRW_NO_TILING", FALSE) != NULL;
+
+
+ return &bscreen->base;
+}
diff --git a/src/gallium/drivers/i965/brw_screen.h b/src/gallium/drivers/i965/brw_screen.h
new file mode 100644
index 00000000000..7226d9228b7
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_screen.h
@@ -0,0 +1,199 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef BRW_SCREEN_H
+#define BRW_SCREEN_H
+
+#include "pipe/p_state.h"
+#include "pipe/p_screen.h"
+
+#include "brw_reg.h"
+#include "brw_structs.h"
+
+struct brw_winsys_screen;
+
+
+/**
+ * Subclass of pipe_screen
+ */
+struct brw_screen
+{
+ struct pipe_screen base;
+ struct brw_chipset chipset;
+ struct brw_winsys_screen *sws;
+ boolean no_tiling;
+};
+
+/**
+ * Subclass of pipe_transfer
+ */
+struct brw_transfer
+{
+ struct pipe_transfer base;
+
+ unsigned offset;
+};
+
+struct brw_buffer
+{
+ struct pipe_buffer base;
+
+ /* One of either bo or user_buffer will be non-null, depending on
+ * whether this is a hardware or user buffer.
+ */
+ struct brw_winsys_buffer *bo;
+ void *user_buffer;
+
+ /* Mapped pointer??
+ */
+ void *ptr;
+};
+
+
+union brw_surface_id {
+ struct {
+ unsigned face:3;
+ unsigned zslice:13;
+ unsigned level:16;
+ } bits;
+ unsigned value;
+};
+
+
+struct brw_surface
+{
+ struct pipe_surface base;
+
+ union brw_surface_id id;
+ unsigned cpp;
+ unsigned pitch;
+ unsigned draw_offset;
+ unsigned tiling;
+
+ struct brw_surface_state ss;
+ struct brw_winsys_buffer *bo;
+ struct brw_surface *next, *prev;
+};
+
+
+
+struct brw_texture
+{
+ struct pipe_texture base;
+ struct brw_winsys_buffer *bo;
+ struct brw_surface_state ss;
+
+ unsigned *image_offset[PIPE_MAX_TEXTURE_LEVELS];
+ unsigned nr_images[PIPE_MAX_TEXTURE_LEVELS];
+ unsigned level_offset[PIPE_MAX_TEXTURE_LEVELS];
+
+ boolean compressed;
+ unsigned brw_target;
+ unsigned pitch;
+ unsigned tiling;
+ unsigned cpp;
+ unsigned total_height;
+
+ struct brw_surface views[2];
+};
+
+
+
+/*
+ * Cast wrappers
+ */
+static INLINE struct brw_screen *
+brw_screen(struct pipe_screen *pscreen)
+{
+ return (struct brw_screen *) pscreen;
+}
+
+static INLINE struct brw_transfer *
+brw_transfer(struct pipe_transfer *transfer)
+{
+ return (struct brw_transfer *)transfer;
+}
+
+static INLINE struct brw_surface *
+brw_surface(struct pipe_surface *surface)
+{
+ return (struct brw_surface *)surface;
+}
+
+static INLINE struct brw_buffer *
+brw_buffer(struct pipe_buffer *buffer)
+{
+ return (struct brw_buffer *)buffer;
+}
+
+static INLINE struct brw_texture *
+brw_texture(struct pipe_texture *texture)
+{
+ return (struct brw_texture *)texture;
+}
+
+
+/* Pipe buffer helpers
+ */
+static INLINE boolean
+brw_buffer_is_user_buffer( const struct pipe_buffer *buf )
+{
+ return ((const struct brw_buffer *)buf)->user_buffer != NULL;
+}
+
+unsigned
+brw_surface_pitch( const struct pipe_surface *surface );
+
+/***********************************************************************
+ * Internal functions
+ */
+GLboolean brw_texture_layout(struct brw_screen *brw_screen,
+ struct brw_texture *tex );
+
+void brw_update_texture( struct brw_screen *brw_screen,
+ struct brw_texture *tex );
+
+
+void brw_screen_tex_init( struct brw_screen *brw_screen );
+void brw_screen_tex_surface_init( struct brw_screen *brw_screen );
+
+void brw_screen_buffer_init(struct brw_screen *brw_screen);
+
+
+boolean brw_is_texture_referenced_by_bo( struct brw_screen *brw_screen,
+ struct pipe_texture *texture,
+ unsigned face,
+ unsigned level,
+ struct brw_winsys_buffer *bo );
+
+boolean brw_is_buffer_referenced_by_bo( struct brw_screen *brw_screen,
+ struct pipe_buffer *buffer,
+ struct brw_winsys_buffer *bo );
+
+
+
+#endif /* BRW_SCREEN_H */
diff --git a/src/gallium/drivers/i965/brw_screen_buffers.c b/src/gallium/drivers/i965/brw_screen_buffers.c
new file mode 100644
index 00000000000..d8141a3f5b9
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_screen_buffers.c
@@ -0,0 +1,202 @@
+
+#include "util/u_memory.h"
+#include "util/u_math.h"
+
+#include "pipe/p_state.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_inlines.h"
+
+#include "brw_screen.h"
+#include "brw_winsys.h"
+
+
+
+static void *
+brw_buffer_map_range( struct pipe_screen *screen,
+ struct pipe_buffer *buffer,
+ unsigned offset,
+ unsigned length,
+ unsigned usage )
+{
+ struct brw_screen *bscreen = brw_screen(screen);
+ struct brw_winsys_screen *sws = bscreen->sws;
+ struct brw_buffer *buf = brw_buffer( buffer );
+
+ if (buf->user_buffer)
+ return buf->user_buffer;
+
+ return sws->bo_map( buf->bo,
+ BRW_DATA_OTHER,
+ offset,
+ length,
+ (usage & PIPE_BUFFER_USAGE_CPU_WRITE) ? TRUE : FALSE,
+ (usage & PIPE_BUFFER_USAGE_DISCARD) ? TRUE : FALSE,
+ (usage & PIPE_BUFFER_USAGE_FLUSH_EXPLICIT) ? TRUE : FALSE);
+}
+
+static void *
+brw_buffer_map( struct pipe_screen *screen,
+ struct pipe_buffer *buffer,
+ unsigned usage )
+{
+ struct brw_screen *bscreen = brw_screen(screen);
+ struct brw_winsys_screen *sws = bscreen->sws;
+ struct brw_buffer *buf = brw_buffer( buffer );
+
+ if (buf->user_buffer)
+ return buf->user_buffer;
+
+ return sws->bo_map( buf->bo,
+ BRW_DATA_OTHER,
+ 0,
+ buf->base.size,
+ (usage & PIPE_BUFFER_USAGE_CPU_WRITE) ? TRUE : FALSE,
+ FALSE,
+ FALSE);
+}
+
+
+static void
+brw_buffer_flush_mapped_range( struct pipe_screen *screen,
+ struct pipe_buffer *buffer,
+ unsigned offset,
+ unsigned length )
+{
+ struct brw_screen *bscreen = brw_screen(screen);
+ struct brw_winsys_screen *sws = bscreen->sws;
+ struct brw_buffer *buf = brw_buffer( buffer );
+
+ if (buf->user_buffer)
+ return;
+
+ sws->bo_flush_range( buf->bo,
+ offset,
+ length );
+}
+
+
+static void
+brw_buffer_unmap( struct pipe_screen *screen,
+ struct pipe_buffer *buffer )
+{
+ struct brw_screen *bscreen = brw_screen(screen);
+ struct brw_winsys_screen *sws = bscreen->sws;
+ struct brw_buffer *buf = brw_buffer( buffer );
+
+ if (buf->bo)
+ sws->bo_unmap(buf->bo);
+}
+
+static void
+brw_buffer_destroy( struct pipe_buffer *buffer )
+{
+ struct brw_buffer *buf = brw_buffer( buffer );
+
+ assert(!p_atomic_read(&buffer->reference.count));
+
+ bo_reference(&buf->bo, NULL);
+ FREE(buf);
+}
+
+
+static struct pipe_buffer *
+brw_buffer_create(struct pipe_screen *screen,
+ unsigned alignment,
+ unsigned usage,
+ unsigned size)
+{
+ struct brw_screen *bscreen = brw_screen(screen);
+ struct brw_winsys_screen *sws = bscreen->sws;
+ struct brw_buffer *buf;
+ unsigned buffer_type;
+ enum pipe_error ret;
+
+ buf = CALLOC_STRUCT(brw_buffer);
+ if (!buf)
+ return NULL;
+
+ pipe_reference_init(&buf->base.reference, 1);
+ buf->base.screen = screen;
+ buf->base.alignment = alignment;
+ buf->base.usage = usage;
+ buf->base.size = size;
+
+ switch (usage & (PIPE_BUFFER_USAGE_VERTEX |
+ PIPE_BUFFER_USAGE_INDEX |
+ PIPE_BUFFER_USAGE_PIXEL |
+ PIPE_BUFFER_USAGE_CONSTANT))
+ {
+ case PIPE_BUFFER_USAGE_VERTEX:
+ case PIPE_BUFFER_USAGE_INDEX:
+ case (PIPE_BUFFER_USAGE_VERTEX|PIPE_BUFFER_USAGE_INDEX):
+ buffer_type = BRW_BUFFER_TYPE_VERTEX;
+ break;
+
+ case PIPE_BUFFER_USAGE_PIXEL:
+ buffer_type = BRW_BUFFER_TYPE_PIXEL;
+ break;
+
+ case PIPE_BUFFER_USAGE_CONSTANT:
+ buffer_type = BRW_BUFFER_TYPE_SHADER_CONSTANTS;
+ break;
+
+ default:
+ buffer_type = BRW_BUFFER_TYPE_GENERIC;
+ break;
+ }
+
+ ret = sws->bo_alloc( sws, buffer_type,
+ size, alignment,
+ &buf->bo );
+ if (ret != PIPE_OK)
+ return NULL;
+
+ return &buf->base;
+}
+
+
+static struct pipe_buffer *
+brw_user_buffer_create(struct pipe_screen *screen,
+ void *ptr,
+ unsigned bytes)
+{
+ struct brw_buffer *buf;
+
+ buf = CALLOC_STRUCT(brw_buffer);
+ if (!buf)
+ return NULL;
+
+ buf->user_buffer = ptr;
+
+ pipe_reference_init(&buf->base.reference, 1);
+ buf->base.screen = screen;
+ buf->base.alignment = 1;
+ buf->base.usage = 0;
+ buf->base.size = bytes;
+
+ return &buf->base;
+}
+
+
+boolean brw_is_buffer_referenced_by_bo( struct brw_screen *brw_screen,
+ struct pipe_buffer *buffer,
+ struct brw_winsys_buffer *bo )
+{
+ struct brw_buffer *buf = brw_buffer(buffer);
+ if (buf->bo == NULL)
+ return FALSE;
+
+ return brw_screen->sws->bo_references( bo, buf->bo );
+}
+
+
+void brw_screen_buffer_init(struct brw_screen *brw_screen)
+{
+ brw_screen->base.buffer_create = brw_buffer_create;
+ brw_screen->base.user_buffer_create = brw_user_buffer_create;
+ brw_screen->base.buffer_map = brw_buffer_map;
+ brw_screen->base.buffer_map_range = brw_buffer_map_range;
+ brw_screen->base.buffer_flush_mapped_range = brw_buffer_flush_mapped_range;
+ brw_screen->base.buffer_unmap = brw_buffer_unmap;
+ brw_screen->base.buffer_destroy = brw_buffer_destroy;
+}
diff --git a/src/gallium/drivers/i965/brw_screen_surface.c b/src/gallium/drivers/i965/brw_screen_surface.c
new file mode 100644
index 00000000000..e2b9954e596
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_screen_surface.c
@@ -0,0 +1,262 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "util/u_memory.h"
+#include "util/u_simple_list.h"
+#include "util/u_math.h"
+
+#include "pipe/p_screen.h"
+#include "brw_screen.h"
+#include "brw_defines.h"
+#include "brw_winsys.h"
+
+enum {
+ BRW_VIEW_LINEAR,
+ BRW_VIEW_IN_PLACE
+};
+
+
+static boolean need_linear_view( struct brw_screen *brw_screen,
+ struct brw_texture *brw_texture,
+ union brw_surface_id id,
+ unsigned usage )
+{
+#if 0
+ /* XXX: what about IDGNG?
+ */
+ if (!BRW_IS_G4X(brw->brw_screen->pci_id))
+ {
+ struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i];
+ struct intel_renderbuffer *irb = intel_renderbuffer(rb);
+
+ /* The original gen4 hardware couldn't set up WM surfaces pointing
+ * at an offset within a tile, which can happen when rendering to
+ * anything but the base level of a texture or the +X face/0 depth.
+ * This was fixed with the 4 Series hardware.
+ *
+ * For these original chips, you would have to make the depth and
+ * color destination surfaces include information on the texture
+ * type, LOD, face, and various limits to use them as a destination.
+ *
+ * This is easy in Gallium as surfaces are all backed by
+ * textures, but there's also a nasty requirement that the depth
+ * and the color surfaces all be of the same LOD, which is
+ * harder to get around as we can't look at a surface in
+ * isolation and decide if it's legal.
+ *
+ * Instead, end up being pessimistic and say that for i965,
+ * ... ??
+ */
+ if (brw_tex->tiling != I915_TILING_NONE &&
+ (brw_tex_image_offset(brw_tex, face, level, zslize) & 4095)) {
+ if (BRW_DEBUG & DEBUG_VIEW)
+ debug_printf("%s: need surface view for non-aligned tex image\n",
+ __FUNCTION__);
+ return GL_TRUE;
+ }
+ }
+#endif
+
+ /* Tiled 3d textures don't have subsets that look like 2d surfaces:
+ */
+
+ /* Everything else should be fine to render to in-place:
+ */
+ return GL_FALSE;
+}
+
+/* Look at all texture views and figure out if any of them need to be
+ * back-copied into the texture for sampling
+ */
+void brw_update_texture( struct brw_screen *brw_screen,
+ struct brw_texture *tex )
+{
+ /* currently nothing to do */
+}
+
+
+/* Create a new surface with linear layout to serve as a render-target
+ * where it would be illegal (perhaps due to tiling constraints) to do
+ * this in-place.
+ *
+ * Currently not implmented, not sure if it's needed.
+ */
+static struct brw_surface *create_linear_view( struct brw_screen *brw_screen,
+ struct brw_texture *tex,
+ union brw_surface_id id,
+ unsigned usage )
+{
+ return NULL;
+}
+
+
+/* Create a pipe_surface that just points directly into the existing
+ * texture's storage.
+ */
+static struct brw_surface *create_in_place_view( struct brw_screen *brw_screen,
+ struct brw_texture *tex,
+ union brw_surface_id id,
+ unsigned usage )
+{
+ struct brw_surface *surface;
+
+ surface = CALLOC_STRUCT(brw_surface);
+ if (surface == NULL)
+ return NULL;
+
+ pipe_reference_init(&surface->base.reference, 1);
+
+ /* XXX: ignoring render-to-slice-of-3d-texture
+ */
+ assert(id.bits.zslice == 0);
+
+ surface->base.format = tex->base.format;
+ surface->base.width = u_minify(tex->base.width0, id.bits.level);
+ surface->base.height = u_minify(tex->base.height0, id.bits.level);
+ surface->base.offset = tex->image_offset[id.bits.level][id.bits.face];
+ surface->base.usage = usage;
+ surface->base.zslice = id.bits.zslice;
+ surface->base.face = id.bits.face;
+ surface->base.level = id.bits.level;
+ surface->id = id;
+ surface->cpp = tex->cpp;
+ surface->pitch = tex->pitch;
+ surface->tiling = tex->tiling;
+
+ bo_reference( &surface->bo, tex->bo );
+ pipe_texture_reference( &surface->base.texture, &tex->base );
+
+ surface->ss.ss0.surface_format = tex->ss.ss0.surface_format;
+ surface->ss.ss0.surface_type = BRW_SURFACE_2D;
+
+ if (tex->tiling == BRW_TILING_NONE) {
+ surface->ss.ss1.base_addr = surface->base.offset;
+ } else {
+ uint32_t tile_offset = surface->base.offset % 4096;
+
+ surface->ss.ss1.base_addr = surface->base.offset - tile_offset;
+
+ if (brw_screen->chipset.is_g4x) {
+ if (tex->tiling == BRW_TILING_X) {
+ /* Note that the low bits of these fields are missing, so
+ * there's the possibility of getting in trouble.
+ */
+ surface->ss.ss5.x_offset = (tile_offset % 512) / tex->cpp / 4;
+ surface->ss.ss5.y_offset = tile_offset / 512 / 2;
+ } else {
+ surface->ss.ss5.x_offset = (tile_offset % 128) / tex->cpp / 4;
+ surface->ss.ss5.y_offset = tile_offset / 128 / 2;
+ }
+ }
+ else {
+ assert(tile_offset == 0);
+ }
+ }
+
+#if 0
+ if (region_bo != NULL)
+ surface->ss.ss1.base_addr += region_bo->offset; /* reloc */
+#endif
+
+ surface->ss.ss2.width = surface->base.width - 1;
+ surface->ss.ss2.height = surface->base.height - 1;
+ surface->ss.ss3.tiled_surface = tex->ss.ss3.tiled_surface;
+ surface->ss.ss3.tile_walk = tex->ss.ss3.tile_walk;
+ surface->ss.ss3.pitch = tex->ss.ss3.pitch;
+
+ return surface;
+}
+
+/* Get a surface which is view into a texture
+ */
+static struct pipe_surface *brw_get_tex_surface(struct pipe_screen *screen,
+ struct pipe_texture *pt,
+ unsigned face, unsigned level,
+ unsigned zslice,
+ unsigned usage )
+{
+ struct brw_texture *tex = brw_texture(pt);
+ struct brw_screen *bscreen = brw_screen(screen);
+ struct brw_surface *surface;
+ union brw_surface_id id;
+ int type;
+
+ id.bits.face = face;
+ id.bits.level = level;
+ id.bits.zslice = zslice;
+
+ if (need_linear_view(bscreen, tex, id, usage))
+ type = BRW_VIEW_LINEAR;
+ else
+ type = BRW_VIEW_IN_PLACE;
+
+
+ foreach (surface, &tex->views[type]) {
+ if (id.value == surface->id.value)
+ return &surface->base;
+ }
+
+ switch (type) {
+ case BRW_VIEW_LINEAR:
+ surface = create_linear_view( bscreen, tex, id, usage );
+ break;
+ case BRW_VIEW_IN_PLACE:
+ surface = create_in_place_view( bscreen, tex, id, usage );
+ break;
+ default:
+ return NULL;
+ }
+
+ insert_at_head( &tex->views[type], surface );
+ return &surface->base;
+}
+
+
+static void brw_tex_surface_destroy( struct pipe_surface *surf )
+{
+ struct brw_surface *surface = brw_surface(surf);
+
+ /* Unreference texture, shared buffer:
+ */
+ remove_from_list(surface);
+ bo_reference(&surface->bo, NULL);
+ pipe_texture_reference( &surface->base.texture, NULL );
+
+
+ FREE(surface);
+}
+
+
+void brw_screen_tex_surface_init( struct brw_screen *brw_screen )
+{
+ brw_screen->base.get_tex_surface = brw_get_tex_surface;
+ brw_screen->base.tex_surface_destroy = brw_tex_surface_destroy;
+}
diff --git a/src/gallium/drivers/i965/brw_screen_tex_layout.c b/src/gallium/drivers/i965/brw_screen_tex_layout.c
new file mode 100644
index 00000000000..894f4bea401
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_screen_tex_layout.c
@@ -0,0 +1,414 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+
+#include "pipe/p_format.h"
+
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+#include "brw_screen.h"
+#include "brw_debug.h"
+#include "brw_winsys.h"
+
+/* Code to layout images in a mipmap tree for i965.
+ */
+
+static int
+brw_tex_pitch_align (struct brw_texture *tex,
+ int pitch)
+{
+ if (!tex->compressed) {
+ int pitch_align;
+
+ switch (tex->tiling) {
+ case BRW_TILING_X:
+ pitch_align = 512;
+ break;
+ case BRW_TILING_Y:
+ pitch_align = 128;
+ break;
+ default:
+ /* XXX: Untiled pitch alignment of 64 bytes for now to allow
+ * render-to-texture to work in all cases. This should
+ * probably be replaced at some point by some scheme to only
+ * do this when really necessary, for example standalone
+ * render target views.
+ */
+ pitch_align = 64;
+ break;
+ }
+
+ pitch = align(pitch * tex->cpp, pitch_align);
+ pitch /= tex->cpp;
+ }
+
+ return pitch;
+}
+
+
+static void
+brw_tex_alignment_unit(enum pipe_format pf,
+ GLuint *w, GLuint *h)
+{
+ switch (pf) {
+ case PIPE_FORMAT_DXT1_RGB:
+ case PIPE_FORMAT_DXT1_RGBA:
+ case PIPE_FORMAT_DXT3_RGBA:
+ case PIPE_FORMAT_DXT5_RGBA:
+ case PIPE_FORMAT_DXT1_SRGB:
+ case PIPE_FORMAT_DXT1_SRGBA:
+ case PIPE_FORMAT_DXT3_SRGBA:
+ case PIPE_FORMAT_DXT5_SRGBA:
+ *w = 4;
+ *h = 4;
+ break;
+
+ default:
+ *w = 4;
+ *h = 2;
+ break;
+ }
+}
+
+
+static void
+brw_tex_set_level_info(struct brw_texture *tex,
+ GLuint level,
+ GLuint nr_images,
+ GLuint x, GLuint y,
+ GLuint w, GLuint h, GLuint d)
+{
+
+ if (BRW_DEBUG & DEBUG_TEXTURE)
+ debug_printf("%s level %d size: %d,%d,%d offset %d,%d (0x%x)\n", __FUNCTION__,
+ level, w, h, d, x, y, tex->level_offset[level]);
+
+ assert(tex->image_offset[level] == NULL);
+ assert(nr_images >= 1);
+
+ tex->level_offset[level] = (x + y * tex->pitch) * tex->cpp;
+ tex->nr_images[level] = nr_images;
+
+ tex->image_offset[level] = MALLOC(nr_images * sizeof(GLuint));
+ tex->image_offset[level][0] = 0;
+}
+
+
+static void
+brw_tex_set_image_offset(struct brw_texture *tex,
+ GLuint level, GLuint img,
+ GLuint x, GLuint y,
+ GLuint offset)
+{
+ assert((x == 0 && y == 0) || img != 0 || level != 0);
+ assert(img < tex->nr_images[level]);
+
+ if (BRW_DEBUG & DEBUG_TEXTURE)
+ debug_printf("%s level %d img %d pos %d,%d image_offset %x\n",
+ __FUNCTION__, level, img, x, y,
+ tex->image_offset[level][img]);
+
+ tex->image_offset[level][img] = (x + y * tex->pitch) * tex->cpp + offset;
+}
+
+
+
+static void brw_layout_2d( struct brw_texture *tex )
+{
+ GLuint align_h = 2, align_w = 4;
+ GLuint level;
+ GLuint x = 0;
+ GLuint y = 0;
+ GLuint width = tex->base.width0;
+ GLuint height = tex->base.height0;
+
+ tex->pitch = tex->base.width0;
+ brw_tex_alignment_unit(tex->base.format, &align_w, &align_h);
+
+ if (tex->compressed) {
+ tex->pitch = align(tex->base.width0, align_w);
+ }
+
+ /* May need to adjust pitch to accomodate the placement of
+ * the 2nd mipmap. This occurs when the alignment
+ * constraints of mipmap placement push the right edge of the
+ * 2nd mipmap out past the width of its parent.
+ */
+ if (tex->base.last_level > 0) {
+ GLuint mip1_width;
+
+ if (tex->compressed) {
+ mip1_width = (align(u_minify(tex->base.width0, 1), align_w) +
+ align(u_minify(tex->base.width0, 2), align_w));
+ } else {
+ mip1_width = (align(u_minify(tex->base.width0, 1), align_w) +
+ u_minify(tex->base.width0, 2));
+ }
+
+ if (mip1_width > tex->pitch) {
+ tex->pitch = mip1_width;
+ }
+ }
+
+ /* Pitch must be a whole number of dwords, even though we
+ * express it in texels.
+ */
+ tex->pitch = brw_tex_pitch_align (tex, tex->pitch);
+ tex->total_height = 0;
+
+ for ( level = 0 ; level <= tex->base.last_level ; level++ ) {
+ GLuint img_height;
+
+ brw_tex_set_level_info(tex, level, 1, x, y, width, height, 1);
+
+ if (tex->compressed)
+ img_height = MAX2(1, height/4);
+ else
+ img_height = align(height, align_h);
+
+
+ /* Because the images are packed better, the final offset
+ * might not be the maximal one:
+ */
+ tex->total_height = MAX2(tex->total_height, y + img_height);
+
+ /* Layout_below: step right after second mipmap.
+ */
+ if (level == 1) {
+ x += align(width, align_w);
+ }
+ else {
+ y += img_height;
+ }
+
+ width = u_minify(width, 1);
+ height = u_minify(height, 1);
+ }
+}
+
+
+static boolean
+brw_layout_cubemap_idgng( struct brw_texture *tex )
+{
+ GLuint align_h = 2, align_w = 4;
+ GLuint level;
+ GLuint x = 0;
+ GLuint y = 0;
+ GLuint width = tex->base.width0;
+ GLuint height = tex->base.height0;
+ GLuint qpitch = 0;
+ GLuint y_pitch = 0;
+
+ tex->pitch = tex->base.width0;
+ brw_tex_alignment_unit(tex->base.format, &align_w, &align_h);
+ y_pitch = align(height, align_h);
+
+ if (tex->compressed) {
+ tex->pitch = align(tex->base.width0, align_w);
+ }
+
+ if (tex->base.last_level != 0) {
+ GLuint mip1_width;
+
+ if (tex->compressed) {
+ mip1_width = (align(u_minify(tex->base.width0, 1), align_w) +
+ align(u_minify(tex->base.width0, 2), align_w));
+ } else {
+ mip1_width = (align(u_minify(tex->base.width0, 1), align_w) +
+ u_minify(tex->base.width0, 2));
+ }
+
+ if (mip1_width > tex->pitch) {
+ tex->pitch = mip1_width;
+ }
+ }
+
+ tex->pitch = brw_tex_pitch_align(tex, tex->pitch);
+
+ if (tex->compressed) {
+ qpitch = ((y_pitch +
+ align(u_minify(y_pitch, 1), align_h) +
+ 11 * align_h) / 4) * tex->pitch * tex->cpp;
+
+ tex->total_height = ((y_pitch +
+ align(u_minify(y_pitch, 1), align_h) +
+ 11 * align_h) / 4) * 6;
+ } else {
+ qpitch = (y_pitch +
+ align(u_minify(y_pitch, 1), align_h) +
+ 11 * align_h) * tex->pitch * tex->cpp;
+
+ tex->total_height = (y_pitch +
+ align(u_minify(y_pitch, 1), align_h) +
+ 11 * align_h) * 6;
+ }
+
+ for (level = 0; level <= tex->base.last_level; level++) {
+ GLuint img_height;
+ GLuint nr_images = 6;
+ GLuint q = 0;
+
+ brw_tex_set_level_info(tex, level, nr_images, x, y, width, height, 1);
+
+ for (q = 0; q < nr_images; q++)
+ brw_tex_set_image_offset(tex, level, q, x, y, q * qpitch);
+
+ if (tex->compressed)
+ img_height = MAX2(1, height/4);
+ else
+ img_height = align(height, align_h);
+
+ if (level == 1) {
+ x += align(width, align_w);
+ }
+ else {
+ y += img_height;
+ }
+
+ width = u_minify(width, 1);
+ height = u_minify(height, 1);
+ }
+
+ return TRUE;
+}
+
+
+static boolean
+brw_layout_3d_cube( struct brw_texture *tex )
+{
+ GLuint width = tex->base.width0;
+ GLuint height = tex->base.height0;
+ GLuint depth = tex->base.depth0;
+ GLuint pack_x_pitch, pack_x_nr;
+ GLuint pack_y_pitch;
+ GLuint level;
+ GLuint align_h = 2;
+ GLuint align_w = 4;
+
+ tex->total_height = 0;
+ brw_tex_alignment_unit(tex->base.format, &align_w, &align_h);
+
+ if (tex->compressed) {
+ tex->pitch = align(width, align_w);
+ pack_y_pitch = (height + 3) / 4;
+ } else {
+ tex->pitch = brw_tex_pitch_align(tex, tex->base.width0);
+ pack_y_pitch = align(tex->base.height0, align_h);
+ }
+
+ pack_x_pitch = width;
+ pack_x_nr = 1;
+
+ for (level = 0 ; level <= tex->base.last_level ; level++) {
+ GLuint nr_images = tex->base.target == PIPE_TEXTURE_3D ? depth : 6;
+ GLint x = 0;
+ GLint y = 0;
+ GLint q, j;
+
+ brw_tex_set_level_info(tex, level, nr_images,
+ 0, tex->total_height,
+ width, height, depth);
+
+ for (q = 0; q < nr_images;) {
+ for (j = 0; j < pack_x_nr && q < nr_images; j++, q++) {
+ brw_tex_set_image_offset(tex, level, q, x, y, 0);
+ x += pack_x_pitch;
+ }
+
+ x = 0;
+ y += pack_y_pitch;
+ }
+
+
+ tex->total_height += y;
+ width = u_minify(width, 1);
+ height = u_minify(height, 1);
+ depth = u_minify(depth, 1);
+
+ if (tex->compressed) {
+ pack_y_pitch = (height + 3) / 4;
+
+ if (pack_x_pitch > align(width, align_w)) {
+ pack_x_pitch = align(width, align_w);
+ pack_x_nr <<= 1;
+ }
+ } else {
+ if (pack_x_pitch > 4) {
+ pack_x_pitch >>= 1;
+ pack_x_nr <<= 1;
+ assert(pack_x_pitch * pack_x_nr <= tex->pitch);
+ }
+
+ if (pack_y_pitch > 2) {
+ pack_y_pitch >>= 1;
+ pack_y_pitch = align(pack_y_pitch, align_h);
+ }
+ }
+ }
+
+ /* The 965's sampler lays cachelines out according to how accesses
+ * in the texture surfaces run, so they may be "vertical" through
+ * memory. As a result, the docs say in Surface Padding Requirements:
+ * Sampling Engine Surfaces that two extra rows of padding are required.
+ */
+ if (tex->base.target == PIPE_TEXTURE_CUBE)
+ tex->total_height += 2;
+
+ return TRUE;
+}
+
+
+
+GLboolean brw_texture_layout(struct brw_screen *brw_screen,
+ struct brw_texture *tex )
+{
+ switch (tex->base.target) {
+ case PIPE_TEXTURE_CUBE:
+ if (brw_screen->chipset.is_igdng)
+ brw_layout_cubemap_idgng( tex );
+ else
+ brw_layout_3d_cube( tex );
+ break;
+
+ case PIPE_TEXTURE_3D:
+ brw_layout_3d_cube( tex );
+ break;
+
+ default:
+ brw_layout_2d( tex );
+ break;
+ }
+
+ if (BRW_DEBUG & DEBUG_TEXTURE)
+ debug_printf("%s: %dx%dx%d - sz 0x%x\n", __FUNCTION__,
+ tex->pitch,
+ tex->total_height,
+ tex->cpp,
+ tex->pitch * tex->total_height * tex->cpp );
+
+ return GL_TRUE;
+}
diff --git a/src/gallium/drivers/i965/brw_screen_texture.c b/src/gallium/drivers/i965/brw_screen_texture.c
new file mode 100644
index 00000000000..ff999086c02
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_screen_texture.c
@@ -0,0 +1,572 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "util/u_memory.h"
+#include "util/u_simple_list.h"
+
+#include "brw_screen.h"
+#include "brw_defines.h"
+#include "brw_structs.h"
+#include "brw_winsys.h"
+
+
+
+static GLuint translate_tex_target( unsigned target )
+{
+ switch (target) {
+ case PIPE_TEXTURE_1D:
+ return BRW_SURFACE_1D;
+
+ case PIPE_TEXTURE_2D:
+ return BRW_SURFACE_2D;
+
+ case PIPE_TEXTURE_3D:
+ return BRW_SURFACE_3D;
+
+ case PIPE_TEXTURE_CUBE:
+ return BRW_SURFACE_CUBE;
+
+ default:
+ assert(0);
+ return BRW_SURFACE_1D;
+ }
+}
+
+
+static GLuint translate_tex_format( enum pipe_format pf )
+{
+ switch( pf ) {
+ case PIPE_FORMAT_L8_UNORM:
+ return BRW_SURFACEFORMAT_L8_UNORM;
+
+ case PIPE_FORMAT_I8_UNORM:
+ return BRW_SURFACEFORMAT_I8_UNORM;
+
+ case PIPE_FORMAT_A8_UNORM:
+ return BRW_SURFACEFORMAT_A8_UNORM;
+
+ case PIPE_FORMAT_L16_UNORM:
+ return BRW_SURFACEFORMAT_L16_UNORM;
+
+ /* XXX: Add these to gallium
+ case PIPE_FORMAT_I16_UNORM:
+ return BRW_SURFACEFORMAT_I16_UNORM;
+
+ case PIPE_FORMAT_A16_UNORM:
+ return BRW_SURFACEFORMAT_A16_UNORM;
+ */
+
+ case PIPE_FORMAT_A8L8_UNORM:
+ return BRW_SURFACEFORMAT_L8A8_UNORM;
+
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ return BRW_SURFACEFORMAT_B5G6R5_UNORM;
+
+ case PIPE_FORMAT_A1R5G5B5_UNORM:
+ return BRW_SURFACEFORMAT_B5G5R5A1_UNORM;
+
+ case PIPE_FORMAT_A4R4G4B4_UNORM:
+ return BRW_SURFACEFORMAT_B4G4R4A4_UNORM;
+
+ case PIPE_FORMAT_X8R8G8B8_UNORM:
+ return BRW_SURFACEFORMAT_R8G8B8X8_UNORM;
+
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ return BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
+
+ /*
+ * Video formats
+ */
+
+ case PIPE_FORMAT_YCBCR_REV:
+ return BRW_SURFACEFORMAT_YCRCB_NORMAL;
+
+ case PIPE_FORMAT_YCBCR:
+ return BRW_SURFACEFORMAT_YCRCB_SWAPUVY;
+
+ /*
+ * Compressed formats.
+ */
+ /* XXX: Add FXT to gallium?
+ case PIPE_FORMAT_FXT1_RGBA:
+ return BRW_SURFACEFORMAT_FXT1;
+ */
+
+ case PIPE_FORMAT_DXT1_RGB:
+ return BRW_SURFACEFORMAT_DXT1_RGB;
+
+ case PIPE_FORMAT_DXT1_RGBA:
+ return BRW_SURFACEFORMAT_BC1_UNORM;
+
+ case PIPE_FORMAT_DXT3_RGBA:
+ return BRW_SURFACEFORMAT_BC2_UNORM;
+
+ case PIPE_FORMAT_DXT5_RGBA:
+ return BRW_SURFACEFORMAT_BC3_UNORM;
+
+ /*
+ * sRGB formats
+ */
+
+ case PIPE_FORMAT_R8G8B8A8_SRGB:
+ return BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB;
+
+ case PIPE_FORMAT_A8L8_SRGB:
+ return BRW_SURFACEFORMAT_L8A8_UNORM_SRGB;
+
+ case PIPE_FORMAT_L8_SRGB:
+ return BRW_SURFACEFORMAT_L8_UNORM_SRGB;
+
+ case PIPE_FORMAT_DXT1_SRGB:
+ return BRW_SURFACEFORMAT_BC1_UNORM_SRGB;
+
+ /*
+ * Depth formats
+ */
+
+ case PIPE_FORMAT_Z16_UNORM:
+ return BRW_SURFACEFORMAT_I16_UNORM;
+
+ case PIPE_FORMAT_S8Z24_UNORM:
+ case PIPE_FORMAT_X8Z24_UNORM:
+ return BRW_SURFACEFORMAT_I24X8_UNORM;
+
+ case PIPE_FORMAT_Z32_FLOAT:
+ return BRW_SURFACEFORMAT_I32_FLOAT;
+
+ /* XXX: presumably for bump mapping. Add this to mesa state
+ * tracker?
+ *
+ * XXX: Add flipped versions of these formats to Gallium.
+ */
+ case PIPE_FORMAT_R8G8_SNORM:
+ return BRW_SURFACEFORMAT_R8G8_SNORM;
+
+ case PIPE_FORMAT_R8G8B8A8_SNORM:
+ return BRW_SURFACEFORMAT_R8G8B8A8_SNORM;
+
+ default:
+ return BRW_SURFACEFORMAT_INVALID;
+ }
+}
+
+
+
+
+
+static struct pipe_texture *brw_texture_create( struct pipe_screen *screen,
+ const struct pipe_texture *templ )
+
+{
+ struct brw_screen *bscreen = brw_screen(screen);
+ struct brw_texture *tex;
+ enum brw_buffer_type buffer_type;
+ enum pipe_error ret;
+
+ tex = CALLOC_STRUCT(brw_texture);
+ if (tex == NULL)
+ return NULL;
+
+ memcpy(&tex->base, templ, sizeof *templ);
+ pipe_reference_init(&tex->base.reference, 1);
+ tex->base.screen = screen;
+
+ /* XXX: compressed textures need special treatment here
+ */
+ tex->cpp = pf_get_size(tex->base.format);
+ tex->compressed = pf_is_compressed(tex->base.format);
+
+ make_empty_list(&tex->views[0]);
+ make_empty_list(&tex->views[1]);
+
+ /* XXX: No tiling with compressed textures??
+ */
+ if (tex->compressed == 0 &&
+ !bscreen->no_tiling)
+ {
+ if (bscreen->chipset.is_965 &&
+ pf_is_depth_or_stencil(templ->format))
+ tex->tiling = BRW_TILING_Y;
+ else
+ tex->tiling = BRW_TILING_X;
+ }
+ else {
+ tex->tiling = BRW_TILING_NONE;
+ }
+
+
+
+
+ if (!brw_texture_layout( bscreen, tex ))
+ goto fail;
+
+
+ if (templ->tex_usage & (PIPE_TEXTURE_USAGE_DISPLAY_TARGET |
+ PIPE_TEXTURE_USAGE_PRIMARY)) {
+ buffer_type = BRW_BUFFER_TYPE_SCANOUT;
+ }
+ else {
+ buffer_type = BRW_BUFFER_TYPE_TEXTURE;
+ }
+
+ ret = bscreen->sws->bo_alloc( bscreen->sws,
+ buffer_type,
+ tex->pitch * tex->total_height * tex->cpp,
+ 64,
+ &tex->bo );
+ if (ret)
+ goto fail;
+
+ tex->ss.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW;
+ tex->ss.ss0.surface_type = translate_tex_target(tex->base.target);
+ tex->ss.ss0.surface_format = translate_tex_format(tex->base.format);
+ assert(tex->ss.ss0.surface_format != BRW_SURFACEFORMAT_INVALID);
+
+ /* This is ok for all textures with channel width 8bit or less:
+ */
+/* tex->ss.ss0.data_return_format = BRW_SURFACERETURNFORMAT_S1; */
+
+
+ /* XXX: what happens when tex->bo->offset changes???
+ */
+ tex->ss.ss1.base_addr = 0; /* reloc */
+ tex->ss.ss2.mip_count = tex->base.last_level;
+ tex->ss.ss2.width = tex->base.width0 - 1;
+ tex->ss.ss2.height = tex->base.height0 - 1;
+
+ switch (tex->tiling) {
+ case BRW_TILING_NONE:
+ tex->ss.ss3.tiled_surface = 0;
+ tex->ss.ss3.tile_walk = 0;
+ break;
+ case BRW_TILING_X:
+ tex->ss.ss3.tiled_surface = 1;
+ tex->ss.ss3.tile_walk = BRW_TILEWALK_XMAJOR;
+ break;
+ case BRW_TILING_Y:
+ tex->ss.ss3.tiled_surface = 1;
+ tex->ss.ss3.tile_walk = BRW_TILEWALK_YMAJOR;
+ break;
+ }
+
+ tex->ss.ss3.pitch = (tex->pitch * tex->cpp) - 1;
+ tex->ss.ss3.depth = tex->base.depth0 - 1;
+
+ tex->ss.ss4.min_lod = 0;
+
+ if (tex->base.target == PIPE_TEXTURE_CUBE) {
+ tex->ss.ss0.cube_pos_x = 1;
+ tex->ss.ss0.cube_pos_y = 1;
+ tex->ss.ss0.cube_pos_z = 1;
+ tex->ss.ss0.cube_neg_x = 1;
+ tex->ss.ss0.cube_neg_y = 1;
+ tex->ss.ss0.cube_neg_z = 1;
+ }
+
+ return &tex->base;
+
+fail:
+ bo_reference(&tex->bo, NULL);
+ FREE(tex);
+ return NULL;
+}
+
+static struct pipe_texture *brw_texture_blanket(struct pipe_screen *screen,
+ const struct pipe_texture *templ,
+ const unsigned *stride,
+ struct pipe_buffer *buffer)
+{
+ return NULL;
+}
+
+static void brw_texture_destroy(struct pipe_texture *pt)
+{
+ struct brw_texture *tex = brw_texture(pt);
+ bo_reference(&tex->bo, NULL);
+ FREE(pt);
+}
+
+
+static boolean brw_is_format_supported( struct pipe_screen *screen,
+ enum pipe_format format,
+ enum pipe_texture_target target,
+ unsigned tex_usage,
+ unsigned geom_flags )
+{
+ return translate_tex_format(format) != BRW_SURFACEFORMAT_INVALID;
+}
+
+
+boolean brw_is_texture_referenced_by_bo( struct brw_screen *brw_screen,
+ struct pipe_texture *texture,
+ unsigned face,
+ unsigned level,
+ struct brw_winsys_buffer *bo )
+{
+ struct brw_texture *tex = brw_texture(texture);
+ struct brw_surface *surf;
+ int i;
+
+ /* XXX: this is subject to false positives if the underlying
+ * texture BO is referenced, we can't tell whether the sub-region
+ * we care about participates in that.
+ */
+ if (brw_screen->sws->bo_references( bo, tex->bo ))
+ return TRUE;
+
+ /* Find any view on this texture for this face/level and see if it
+ * is referenced:
+ */
+ for (i = 0; i < 2; i++) {
+ foreach (surf, &tex->views[i]) {
+ if (surf->bo == tex->bo)
+ continue;
+
+ if (surf->id.bits.face != face ||
+ surf->id.bits.level != level)
+ continue;
+
+ if (brw_screen->sws->bo_references( bo, surf->bo))
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
+
+/*
+ * Transfer functions
+ */
+
+static struct pipe_transfer*
+brw_get_tex_transfer(struct pipe_screen *screen,
+ struct pipe_texture *texture,
+ unsigned face, unsigned level, unsigned zslice,
+ enum pipe_transfer_usage usage, unsigned x, unsigned y,
+ unsigned w, unsigned h)
+{
+ struct brw_texture *tex = brw_texture(texture);
+ struct brw_transfer *trans;
+ unsigned offset; /* in bytes */
+
+ if (texture->target == PIPE_TEXTURE_CUBE) {
+ offset = tex->image_offset[level][face];
+ } else if (texture->target == PIPE_TEXTURE_3D) {
+ offset = tex->image_offset[level][zslice];
+ } else {
+ offset = tex->image_offset[level][0];
+ assert(face == 0);
+ assert(zslice == 0);
+ }
+
+ trans = CALLOC_STRUCT(brw_transfer);
+ if (trans) {
+ pipe_texture_reference(&trans->base.texture, texture);
+ trans->base.format = trans->base.format;
+ trans->base.x = x;
+ trans->base.y = y;
+ trans->base.width = w;
+ trans->base.height = h;
+ trans->base.block = texture->block;
+ trans->base.nblocksx = texture->nblocksx[level];
+ trans->base.nblocksy = texture->nblocksy[level];
+ trans->base.stride = tex->pitch * tex->cpp;
+ trans->offset = offset;
+ trans->base.usage = usage;
+ }
+ return &trans->base;
+}
+
+static void *
+brw_transfer_map(struct pipe_screen *screen,
+ struct pipe_transfer *transfer)
+{
+ struct brw_texture *tex = brw_texture(transfer->texture);
+ struct brw_winsys_screen *sws = brw_screen(screen)->sws;
+ char *map;
+ unsigned usage = transfer->usage;
+
+ map = sws->bo_map(tex->bo,
+ BRW_DATA_OTHER,
+ 0,
+ tex->bo->size,
+ (usage & PIPE_TRANSFER_WRITE) ? TRUE : FALSE,
+ (usage & 0) ? TRUE : FALSE,
+ (usage & 0) ? TRUE : FALSE);
+
+ if (!map)
+ return NULL;
+
+ return map + brw_transfer(transfer)->offset +
+ transfer->y / transfer->block.height * transfer->stride +
+ transfer->x / transfer->block.width * transfer->block.size;
+}
+
+static void
+brw_transfer_unmap(struct pipe_screen *screen,
+ struct pipe_transfer *transfer)
+{
+ struct brw_texture *tex = brw_texture(transfer->texture);
+ struct brw_winsys_screen *sws = brw_screen(screen)->sws;
+
+ sws->bo_unmap(tex->bo);
+}
+
+static void
+brw_tex_transfer_destroy(struct pipe_transfer *trans)
+{
+ pipe_texture_reference(&trans->texture, NULL);
+ FREE(trans);
+}
+
+
+/*
+ * Functions exported to the winsys
+ */
+
+boolean brw_texture_get_winsys_buffer(struct pipe_texture *texture,
+ struct brw_winsys_buffer **buffer,
+ unsigned *stride)
+{
+ struct brw_texture *tex = brw_texture(texture);
+
+ *buffer = tex->bo;
+ if (stride)
+ *stride = tex->pitch * tex->cpp;
+
+ return TRUE;
+}
+
+struct pipe_texture *
+brw_texture_blanket_winsys_buffer(struct pipe_screen *screen,
+ const struct pipe_texture *templ,
+ unsigned pitch,
+ unsigned tiling,
+ struct brw_winsys_buffer *buffer)
+{
+ struct brw_screen *bscreen = brw_screen(screen);
+ struct brw_texture *tex;
+
+ if (templ->target != PIPE_TEXTURE_2D ||
+ templ->last_level != 0 ||
+ templ->depth0 != 1)
+ return NULL;
+
+ if (pf_is_compressed(templ->format))
+ return NULL;
+
+ tex = CALLOC_STRUCT(brw_texture);
+ if (!tex)
+ return NULL;
+
+ memcpy(&tex->base, templ, sizeof *templ);
+ pipe_reference_init(&tex->base.reference, 1);
+ tex->base.screen = screen;
+
+ tex->cpp = pf_get_size(tex->base.format);
+ tex->tiling = tiling;
+
+ make_empty_list(&tex->views[0]);
+ make_empty_list(&tex->views[1]);
+
+ if (!brw_texture_layout(bscreen, tex))
+ goto fail;
+
+ /* XXX Maybe some more checks? */
+ if ((pitch / tex->cpp) < tex->pitch)
+ goto fail;
+
+ tex->pitch = pitch / tex->cpp;
+
+ tex->bo = buffer;
+
+ /* fix this warning */
+#if 0
+ if (tex->size > buffer->size)
+ goto fail;
+#endif
+
+ tex->ss.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW;
+ tex->ss.ss0.surface_type = translate_tex_target(tex->base.target);
+ tex->ss.ss0.surface_format = translate_tex_format(tex->base.format);
+ assert(tex->ss.ss0.surface_format != BRW_SURFACEFORMAT_INVALID);
+
+ /* This is ok for all textures with channel width 8bit or less:
+ */
+/* tex->ss.ss0.data_return_format = BRW_SURFACERETURNFORMAT_S1; */
+
+
+ /* XXX: what happens when tex->bo->offset changes???
+ */
+ tex->ss.ss1.base_addr = 0; /* reloc */
+ tex->ss.ss2.mip_count = tex->base.last_level;
+ tex->ss.ss2.width = tex->base.width0 - 1;
+ tex->ss.ss2.height = tex->base.height0 - 1;
+
+ switch (tex->tiling) {
+ case BRW_TILING_NONE:
+ tex->ss.ss3.tiled_surface = 0;
+ tex->ss.ss3.tile_walk = 0;
+ break;
+ case BRW_TILING_X:
+ tex->ss.ss3.tiled_surface = 1;
+ tex->ss.ss3.tile_walk = BRW_TILEWALK_XMAJOR;
+ break;
+ case BRW_TILING_Y:
+ tex->ss.ss3.tiled_surface = 1;
+ tex->ss.ss3.tile_walk = BRW_TILEWALK_YMAJOR;
+ break;
+ }
+
+ tex->ss.ss3.pitch = (tex->pitch * tex->cpp) - 1;
+ tex->ss.ss3.depth = tex->base.depth0 - 1;
+
+ tex->ss.ss4.min_lod = 0;
+
+ return &tex->base;
+
+fail:
+ FREE(tex);
+ return NULL;
+}
+
+void brw_screen_tex_init( struct brw_screen *brw_screen )
+{
+ brw_screen->base.is_format_supported = brw_is_format_supported;
+ brw_screen->base.texture_create = brw_texture_create;
+ brw_screen->base.texture_destroy = brw_texture_destroy;
+ brw_screen->base.texture_blanket = brw_texture_blanket;
+ brw_screen->base.get_tex_transfer = brw_get_tex_transfer;
+ brw_screen->base.transfer_map = brw_transfer_map;
+ brw_screen->base.transfer_unmap = brw_transfer_unmap;
+ brw_screen->base.tex_transfer_destroy = brw_tex_transfer_destroy;
+}
diff --git a/src/gallium/drivers/i965/brw_sf.c b/src/gallium/drivers/i965/brw_sf.c
new file mode 100644
index 00000000000..e1986a9dbbd
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_sf.c
@@ -0,0 +1,216 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "pipe/p_state.h"
+
+#include "brw_batchbuffer.h"
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_pipe_rast.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_sf.h"
+#include "brw_state.h"
+
+static enum pipe_error compile_sf_prog( struct brw_context *brw,
+ struct brw_sf_prog_key *key,
+ struct brw_winsys_buffer **bo_out )
+{
+ enum pipe_error ret;
+ struct brw_sf_compile c;
+ const GLuint *program;
+ GLuint program_size;
+
+ memset(&c, 0, sizeof(c));
+
+ /* Begin the compilation:
+ */
+ brw_init_compile(brw, &c.func);
+
+ c.key = *key;
+ c.nr_attrs = c.key.nr_attrs;
+ c.nr_attr_regs = (c.nr_attrs+1)/2;
+ c.nr_setup_attrs = c.key.nr_attrs;
+ c.nr_setup_regs = (c.nr_setup_attrs+1)/2;
+
+ c.prog_data.urb_read_length = c.nr_attr_regs;
+ c.prog_data.urb_entry_size = c.nr_setup_regs * 2;
+
+ /* Special case when there are no attributes to setup.
+ *
+ * XXX: should be able to set nr_setup_attrs to nr_attrs-1 -- but
+ * breaks vp-tris.c
+ */
+ if (c.nr_attrs - 1 == 0) {
+ c.nr_verts = 0;
+ brw_emit_null_setup( &c );
+ }
+ else {
+ /* Which primitive? Or all three?
+ */
+ switch (key->primitive) {
+ case SF_TRIANGLES:
+ c.nr_verts = 3;
+ brw_emit_tri_setup( &c, GL_TRUE );
+ break;
+ case SF_LINES:
+ c.nr_verts = 2;
+ brw_emit_line_setup( &c, GL_TRUE );
+ break;
+ case SF_POINTS:
+ c.nr_verts = 1;
+ if (key->do_point_sprite)
+ brw_emit_point_sprite_setup( &c, GL_TRUE );
+ else
+ brw_emit_point_setup( &c, GL_TRUE );
+ break;
+ case SF_UNFILLED_TRIS:
+ c.nr_verts = 3;
+ brw_emit_anyprim_setup( &c );
+ break;
+ default:
+ assert(0);
+ return PIPE_ERROR_BAD_INPUT;
+ }
+ }
+
+ /* get the program
+ */
+ ret = brw_get_program(&c.func, &program, &program_size);
+ if (ret)
+ return ret;
+
+ /* Upload
+ */
+ ret = brw_upload_cache( &brw->cache, BRW_SF_PROG,
+ &c.key, sizeof(c.key),
+ NULL, 0,
+ program, program_size,
+ &c.prog_data,
+ &brw->sf.prog_data,
+ bo_out);
+ if (ret)
+ return ret;
+
+ return PIPE_OK;
+}
+
+/* Calculate interpolants for triangle and line rasterization.
+ */
+static enum pipe_error upload_sf_prog(struct brw_context *brw)
+{
+ const struct brw_fs_signature *sig = &brw->curr.fragment_shader->signature;
+ struct brw_sf_prog_key key;
+ enum pipe_error ret;
+ unsigned i;
+
+ memset(&key, 0, sizeof(key));
+
+ /* Populate the key, noting state dependencies:
+ */
+
+ /* XXX: Add one to account for the position input.
+ */
+ /* PIPE_NEW_FRAGMENT_SIGNATURE */
+ key.nr_attrs = sig->nr_inputs + 1;
+
+
+ /* XXX: why is position required to be linear? why do we care
+ * about it at all?
+ */
+ key.linear_attrs = 1; /* position -- but why? */
+
+ for (i = 0; i < sig->nr_inputs; i++) {
+ switch (sig->input[i].interp) {
+ case TGSI_INTERPOLATE_CONSTANT:
+ break;
+ case TGSI_INTERPOLATE_LINEAR:
+ key.linear_attrs |= 1 << (i+1);
+ break;
+ case TGSI_INTERPOLATE_PERSPECTIVE:
+ key.persp_attrs |= 1 << (i+1);
+ break;
+ }
+ }
+
+ /* BRW_NEW_REDUCED_PRIMITIVE */
+ switch (brw->reduced_primitive) {
+ case PIPE_PRIM_TRIANGLES:
+ /* PIPE_NEW_RAST
+ */
+ if (brw->curr.rast->templ.fill_cw != PIPE_POLYGON_MODE_FILL ||
+ brw->curr.rast->templ.fill_ccw != PIPE_POLYGON_MODE_FILL)
+ key.primitive = SF_UNFILLED_TRIS;
+ else
+ key.primitive = SF_TRIANGLES;
+ break;
+ case PIPE_PRIM_LINES:
+ key.primitive = SF_LINES;
+ break;
+ case PIPE_PRIM_POINTS:
+ key.primitive = SF_POINTS;
+ break;
+ }
+
+ key.do_point_sprite = brw->curr.rast->templ.point_sprite;
+ key.sprite_origin_lower_left = 0; /* XXX: ctx->Point.SpriteOrigin - fix rast state */
+ key.do_flat_shading = brw->curr.rast->templ.flatshade;
+ key.do_twoside_color = brw->curr.rast->templ.light_twoside;
+
+ if (key.do_twoside_color) {
+ key.frontface_ccw = (brw->curr.rast->templ.front_winding ==
+ PIPE_WINDING_CCW);
+ }
+
+ if (brw_search_cache(&brw->cache, BRW_SF_PROG,
+ &key, sizeof(key),
+ NULL, 0,
+ &brw->sf.prog_data,
+ &brw->sf.prog_bo))
+ return PIPE_OK;
+
+ ret = compile_sf_prog( brw, &key, &brw->sf.prog_bo );
+ if (ret)
+ return ret;
+
+ return PIPE_OK;
+}
+
+
+const struct brw_tracked_state brw_sf_prog = {
+ .dirty = {
+ .mesa = (PIPE_NEW_RAST | PIPE_NEW_FRAGMENT_SIGNATURE),
+ .brw = (BRW_NEW_REDUCED_PRIMITIVE),
+ .cache = 0
+ },
+ .prepare = upload_sf_prog
+};
+
diff --git a/src/gallium/drivers/i965/brw_sf.h b/src/gallium/drivers/i965/brw_sf.h
new file mode 100644
index 00000000000..a895c7d2f6a
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_sf.h
@@ -0,0 +1,122 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#ifndef BRW_SF_H
+#define BRW_SF_H
+
+
+#include "brw_context.h"
+#include "brw_eu.h"
+
+
+#define SF_POINTS 0
+#define SF_LINES 1
+#define SF_TRIANGLES 2
+#define SF_UNFILLED_TRIS 3
+
+struct brw_sf_prog_key {
+
+ /* Bitmask of linear and perspective interpolated inputs, 0..nr
+ */
+ GLuint persp_attrs:32;
+ GLuint linear_attrs:32;
+ GLuint point_coord_replace_attrs:32;
+
+ GLuint nr_attrs:8;
+ GLuint primitive:2;
+ GLuint do_twoside_color:1;
+ GLuint do_flat_shading:1;
+ GLuint frontface_ccw:1;
+ GLuint do_point_sprite:1;
+ GLuint sprite_origin_lower_left:1;
+ GLuint pad:17;
+
+ GLuint attr_col0:8;
+ GLuint attr_col1:8;
+ GLuint attr_bfc0:8;
+ GLuint attr_bfc1:8;
+};
+
+struct brw_sf_point_tex {
+ GLboolean CoordReplace;
+};
+
+struct brw_sf_compile {
+ struct brw_compile func;
+ struct brw_sf_prog_key key;
+ struct brw_sf_prog_data prog_data;
+
+ struct brw_reg pv;
+ struct brw_reg det;
+ struct brw_reg dx0;
+ struct brw_reg dx2;
+ struct brw_reg dy0;
+ struct brw_reg dy2;
+
+ /* z and 1/w passed in seperately:
+ */
+ struct brw_reg z[3];
+ struct brw_reg inv_w[3];
+
+ /* The vertices:
+ */
+ struct brw_reg vert[3];
+
+ /* Temporaries, allocated after last vertex reg.
+ */
+ struct brw_reg inv_det;
+ struct brw_reg a1_sub_a0;
+ struct brw_reg a2_sub_a0;
+ struct brw_reg tmp;
+
+ struct brw_reg m1Cx;
+ struct brw_reg m2Cy;
+ struct brw_reg m3C0;
+
+ GLuint nr_verts;
+ GLuint nr_attrs;
+ GLuint nr_attr_regs;
+ GLuint nr_setup_attrs;
+ GLuint nr_setup_regs;
+
+ GLuint point_coord_replace_mask;
+};
+
+
+void brw_emit_null_setup( struct brw_sf_compile *c );
+void brw_emit_tri_setup( struct brw_sf_compile *c, GLboolean allocate );
+void brw_emit_line_setup( struct brw_sf_compile *c, GLboolean allocate );
+void brw_emit_point_setup( struct brw_sf_compile *c, GLboolean allocate );
+void brw_emit_point_sprite_setup( struct brw_sf_compile *c, GLboolean allocate );
+void brw_emit_anyprim_setup( struct brw_sf_compile *c );
+
+#endif
diff --git a/src/gallium/drivers/i965/brw_sf_emit.c b/src/gallium/drivers/i965/brw_sf_emit.c
new file mode 100644
index 00000000000..3b85725e368
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_sf_emit.c
@@ -0,0 +1,765 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "brw_batchbuffer.h"
+
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_sf.h"
+
+
+static struct brw_reg get_vert_attr(struct brw_sf_compile *c,
+ struct brw_reg vert,
+ GLuint attr)
+{
+ GLuint off = attr / 2;
+ GLuint sub = attr % 2;
+
+ return brw_vec4_grf(vert.nr + off, sub * 4);
+}
+
+
+/***********************************************************************
+ * Twoside lighting
+ */
+static void copy_bfc( struct brw_sf_compile *c,
+ struct brw_reg vert )
+{
+ struct brw_compile *p = &c->func;
+
+ if (c->key.attr_col0 && c->key.attr_bfc0)
+ brw_MOV(p,
+ get_vert_attr(c, vert, c->key.attr_col0),
+ get_vert_attr(c, vert, c->key.attr_bfc0));
+
+ if (c->key.attr_col1 && c->key.attr_bfc1)
+ brw_MOV(p,
+ get_vert_attr(c, vert, c->key.attr_col1),
+ get_vert_attr(c, vert, c->key.attr_bfc1));
+}
+
+
+static void do_twoside_color( struct brw_sf_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *if_insn;
+ GLuint backface_conditional = c->key.frontface_ccw ? BRW_CONDITIONAL_G : BRW_CONDITIONAL_L;
+
+ /* Already done in clip program:
+ */
+ if (c->key.primitive == SF_UNFILLED_TRIS)
+ return;
+
+ /* XXX: What happens if BFC isn't present? This could only happen
+ * for user-supplied vertex programs, as t_vp_build.c always does
+ * the right thing.
+ */
+ if (!(c->key.attr_col0 && c->key.attr_bfc0) &&
+ !(c->key.attr_col1 && c->key.attr_bfc1))
+ return;
+
+ /* Need to use BRW_EXECUTE_4 and also do an 4-wide compare in order
+ * to get all channels active inside the IF. In the clipping code
+ * we run with NoMask, so it's not an option and we can use
+ * BRW_EXECUTE_1 for all comparisions.
+ */
+ brw_push_insn_state(p);
+ brw_CMP(p, vec4(brw_null_reg()), backface_conditional, c->det, brw_imm_f(0));
+ if_insn = brw_IF(p, BRW_EXECUTE_4);
+ {
+ switch (c->nr_verts) {
+ case 3: copy_bfc(c, c->vert[2]);
+ case 2: copy_bfc(c, c->vert[1]);
+ case 1: copy_bfc(c, c->vert[0]);
+ }
+ }
+ brw_ENDIF(p, if_insn);
+ brw_pop_insn_state(p);
+}
+
+
+
+/***********************************************************************
+ * Flat shading
+ */
+
+#define VERT_RESULT_COLOR_BITS ((1<<VERT_RESULT_COL0) | \
+ (1<<VERT_RESULT_COL1))
+
+static void copy_colors( struct brw_sf_compile *c,
+ struct brw_reg dst,
+ struct brw_reg src)
+{
+ struct brw_compile *p = &c->func;
+
+ if (c->key.attr_col0)
+ brw_MOV(p,
+ get_vert_attr(c, dst, c->key.attr_col0),
+ get_vert_attr(c, src, c->key.attr_col0));
+
+ if (c->key.attr_col1)
+ brw_MOV(p,
+ get_vert_attr(c, dst, c->key.attr_col1),
+ get_vert_attr(c, src, c->key.attr_col1));
+
+}
+
+
+
+/* Need to use a computed jump to copy flatshaded attributes as the
+ * vertices are ordered according to y-coordinate before reaching this
+ * point, so the PV could be anywhere.
+ */
+static void do_flatshade_triangle( struct brw_sf_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg ip = brw_ip_reg();
+ GLuint jmpi = 1;
+ GLuint nr = 0;
+
+ if (c->key.attr_col0)
+ nr++;
+
+ if (c->key.attr_col1)
+ nr++;
+
+ if (nr == 0)
+ return;
+
+ /* Already done in clip program:
+ */
+ if (c->key.primitive == SF_UNFILLED_TRIS)
+ return;
+
+ if (BRW_IS_IGDNG(p->brw))
+ jmpi = 2;
+
+ brw_push_insn_state(p);
+
+ brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr*2+1)));
+ brw_JMPI(p, ip, ip, c->pv);
+
+ copy_colors(c, c->vert[1], c->vert[0]);
+ copy_colors(c, c->vert[2], c->vert[0]);
+ brw_JMPI(p, ip, ip, brw_imm_d(jmpi*(nr*4+1)));
+
+ copy_colors(c, c->vert[0], c->vert[1]);
+ copy_colors(c, c->vert[2], c->vert[1]);
+ brw_JMPI(p, ip, ip, brw_imm_d(jmpi*nr*2));
+
+ copy_colors(c, c->vert[0], c->vert[2]);
+ copy_colors(c, c->vert[1], c->vert[2]);
+
+ brw_pop_insn_state(p);
+}
+
+
+static void do_flatshade_line( struct brw_sf_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg ip = brw_ip_reg();
+ GLuint jmpi = 1;
+ GLuint nr = 0;
+
+ if (c->key.attr_col0)
+ nr++;
+
+ if (c->key.attr_col1)
+ nr++;
+
+ if (nr == 0)
+ return;
+
+ /* Already done in clip program:
+ */
+ if (c->key.primitive == SF_UNFILLED_TRIS)
+ return;
+
+ if (BRW_IS_IGDNG(p->brw))
+ jmpi = 2;
+
+ brw_push_insn_state(p);
+
+ brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr+1)));
+ brw_JMPI(p, ip, ip, c->pv);
+ copy_colors(c, c->vert[1], c->vert[0]);
+
+ brw_JMPI(p, ip, ip, brw_imm_ud(jmpi*nr));
+ copy_colors(c, c->vert[0], c->vert[1]);
+
+ brw_pop_insn_state(p);
+}
+
+
+
+/***********************************************************************
+ * Triangle setup.
+ */
+
+
+static void alloc_regs( struct brw_sf_compile *c )
+{
+ GLuint reg, i;
+
+ /* Values computed by fixed function unit:
+ */
+ c->pv = retype(brw_vec1_grf(1, 1), BRW_REGISTER_TYPE_D);
+ c->det = brw_vec1_grf(1, 2);
+ c->dx0 = brw_vec1_grf(1, 3);
+ c->dx2 = brw_vec1_grf(1, 4);
+ c->dy0 = brw_vec1_grf(1, 5);
+ c->dy2 = brw_vec1_grf(1, 6);
+
+ /* z and 1/w passed in seperately:
+ */
+ c->z[0] = brw_vec1_grf(2, 0);
+ c->inv_w[0] = brw_vec1_grf(2, 1);
+ c->z[1] = brw_vec1_grf(2, 2);
+ c->inv_w[1] = brw_vec1_grf(2, 3);
+ c->z[2] = brw_vec1_grf(2, 4);
+ c->inv_w[2] = brw_vec1_grf(2, 5);
+
+ /* The vertices:
+ */
+ reg = 3;
+ for (i = 0; i < c->nr_verts; i++) {
+ c->vert[i] = brw_vec8_grf(reg, 0);
+ reg += c->nr_attr_regs;
+ }
+
+ /* Temporaries, allocated after last vertex reg.
+ */
+ c->inv_det = brw_vec1_grf(reg, 0); reg++;
+ c->a1_sub_a0 = brw_vec8_grf(reg, 0); reg++;
+ c->a2_sub_a0 = brw_vec8_grf(reg, 0); reg++;
+ c->tmp = brw_vec8_grf(reg, 0); reg++;
+
+ /* Note grf allocation:
+ */
+ c->prog_data.total_grf = reg;
+
+
+ /* Outputs of this program - interpolation coefficients for
+ * rasterization:
+ */
+ c->m1Cx = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 1, 0);
+ c->m2Cy = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 2, 0);
+ c->m3C0 = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 3, 0);
+}
+
+
+static void copy_z_inv_w( struct brw_sf_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ GLuint i;
+
+ brw_push_insn_state(p);
+
+ /* Copy both scalars with a single MOV:
+ */
+ for (i = 0; i < c->nr_verts; i++)
+ brw_MOV(p, vec2(suboffset(c->vert[i], 2)), vec2(c->z[i]));
+
+ brw_pop_insn_state(p);
+}
+
+
+static void invert_det( struct brw_sf_compile *c)
+{
+ /* Looks like we invert all 8 elements just to get 1/det in
+ * position 2 !?!
+ */
+ brw_math(&c->func,
+ c->inv_det,
+ BRW_MATH_FUNCTION_INV,
+ BRW_MATH_SATURATE_NONE,
+ 0,
+ c->det,
+ BRW_MATH_DATA_SCALAR,
+ BRW_MATH_PRECISION_FULL);
+
+}
+
+
+/* Two attributes packed into a wide register. Figure out if either
+ * or both of them need linear/perspective interpolation. Constant
+ * regs are left as-is.
+ */
+static GLboolean calculate_masks( struct brw_sf_compile *c,
+ GLuint reg,
+ GLushort *pc,
+ GLushort *pc_persp,
+ GLushort *pc_linear)
+{
+ GLboolean is_last_attr = (reg == c->nr_setup_regs - 1);
+ GLuint persp_mask = c->key.persp_attrs;
+ GLuint linear_mask = (c->key.persp_attrs | c->key.linear_attrs);
+
+ *pc_persp = 0;
+ *pc_linear = 0;
+ *pc = 0xf;
+
+ if (persp_mask & (1 << (reg*2)))
+ *pc_persp = 0xf;
+
+ if (linear_mask & (1 << (reg*2)))
+ *pc_linear = 0xf;
+
+ /* Maybe only processs one attribute on the final round:
+ */
+ if (reg*2+1 < c->nr_setup_attrs) {
+ *pc |= 0xf0;
+
+ if (persp_mask & (1 << (reg*2+1)))
+ *pc_persp |= 0xf0;
+
+ if (linear_mask & (1 << (reg*2+1)))
+ *pc_linear |= 0xf0;
+ }
+
+ return is_last_attr;
+}
+
+
+void brw_emit_null_setup( struct brw_sf_compile *c )
+{
+ struct brw_compile *p = &c->func;
+
+ /* m0 is implicitly copied from r0 in the send instruction:
+ */
+ brw_urb_WRITE(p,
+ brw_null_reg(),
+ 0,
+ brw_vec8_grf(0, 0), /* r0, will be copied to m0 */
+ 0, /* allocate */
+ 1, /* used */
+ 1, /* msg len */
+ 0, /* response len */
+ 1, /* eot */
+ 1, /* writes complete */
+ 0, /* offset */
+ BRW_URB_SWIZZLE_TRANSPOSE);
+}
+
+void brw_emit_tri_setup( struct brw_sf_compile *c, GLboolean allocate)
+{
+ struct brw_compile *p = &c->func;
+ GLuint i;
+
+ c->nr_verts = 3;
+
+ if (allocate)
+ alloc_regs(c);
+
+ invert_det(c);
+ copy_z_inv_w(c);
+
+ if (c->key.do_twoside_color)
+ do_twoside_color(c);
+
+ if (c->key.do_flat_shading)
+ do_flatshade_triangle(c);
+
+
+ for (i = 0; i < c->nr_setup_regs; i++)
+ {
+ /* Pair of incoming attributes:
+ */
+ struct brw_reg a0 = offset(c->vert[0], i);
+ struct brw_reg a1 = offset(c->vert[1], i);
+ struct brw_reg a2 = offset(c->vert[2], i);
+ GLushort pc, pc_persp, pc_linear;
+ GLboolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
+
+ if (pc_persp)
+ {
+ brw_set_predicate_control_flag_value(p, pc_persp);
+ brw_MUL(p, a0, a0, c->inv_w[0]);
+ brw_MUL(p, a1, a1, c->inv_w[1]);
+ brw_MUL(p, a2, a2, c->inv_w[2]);
+ }
+
+
+ /* Calculate coefficients for interpolated values:
+ */
+ if (pc_linear)
+ {
+ brw_set_predicate_control_flag_value(p, pc_linear);
+
+ brw_ADD(p, c->a1_sub_a0, a1, negate(a0));
+ brw_ADD(p, c->a2_sub_a0, a2, negate(a0));
+
+ /* calculate dA/dx
+ */
+ brw_MUL(p, brw_null_reg(), c->a1_sub_a0, c->dy2);
+ brw_MAC(p, c->tmp, c->a2_sub_a0, negate(c->dy0));
+ brw_MUL(p, c->m1Cx, c->tmp, c->inv_det);
+
+ /* calculate dA/dy
+ */
+ brw_MUL(p, brw_null_reg(), c->a2_sub_a0, c->dx0);
+ brw_MAC(p, c->tmp, c->a1_sub_a0, negate(c->dx2));
+ brw_MUL(p, c->m2Cy, c->tmp, c->inv_det);
+ }
+
+ {
+ brw_set_predicate_control_flag_value(p, pc);
+ /* start point for interpolation
+ */
+ brw_MOV(p, c->m3C0, a0);
+
+ /* Copy m0..m3 to URB. m0 is implicitly copied from r0 in
+ * the send instruction:
+ */
+ brw_urb_WRITE(p,
+ brw_null_reg(),
+ 0,
+ brw_vec8_grf(0, 0), /* r0, will be copied to m0 */
+ 0, /* allocate */
+ 1, /* used */
+ 4, /* msg len */
+ 0, /* response len */
+ last, /* eot */
+ last, /* writes complete */
+ i*4, /* offset */
+ BRW_URB_SWIZZLE_TRANSPOSE); /* XXX: Swizzle control "SF to windower" */
+ }
+ }
+}
+
+
+
+void brw_emit_line_setup( struct brw_sf_compile *c, GLboolean allocate)
+{
+ struct brw_compile *p = &c->func;
+ GLuint i;
+
+
+ c->nr_verts = 2;
+
+ if (allocate)
+ alloc_regs(c);
+
+ invert_det(c);
+ copy_z_inv_w(c);
+
+ if (c->key.do_flat_shading)
+ do_flatshade_line(c);
+
+ for (i = 0; i < c->nr_setup_regs; i++)
+ {
+ /* Pair of incoming attributes:
+ */
+ struct brw_reg a0 = offset(c->vert[0], i);
+ struct brw_reg a1 = offset(c->vert[1], i);
+ GLushort pc, pc_persp, pc_linear;
+ GLboolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
+
+ if (pc_persp)
+ {
+ brw_set_predicate_control_flag_value(p, pc_persp);
+ brw_MUL(p, a0, a0, c->inv_w[0]);
+ brw_MUL(p, a1, a1, c->inv_w[1]);
+ }
+
+ /* Calculate coefficients for position, color:
+ */
+ if (pc_linear) {
+ brw_set_predicate_control_flag_value(p, pc_linear);
+
+ brw_ADD(p, c->a1_sub_a0, a1, negate(a0));
+
+ brw_MUL(p, c->tmp, c->a1_sub_a0, c->dx0);
+ brw_MUL(p, c->m1Cx, c->tmp, c->inv_det);
+
+ brw_MUL(p, c->tmp, c->a1_sub_a0, c->dy0);
+ brw_MUL(p, c->m2Cy, c->tmp, c->inv_det);
+ }
+
+ {
+ brw_set_predicate_control_flag_value(p, pc);
+
+ /* start point for interpolation
+ */
+ brw_MOV(p, c->m3C0, a0);
+
+ /* Copy m0..m3 to URB.
+ */
+ brw_urb_WRITE(p,
+ brw_null_reg(),
+ 0,
+ brw_vec8_grf(0, 0),
+ 0, /* allocate */
+ 1, /* used */
+ 4, /* msg len */
+ 0, /* response len */
+ last, /* eot */
+ last, /* writes complete */
+ i*4, /* urb destination offset */
+ BRW_URB_SWIZZLE_TRANSPOSE);
+ }
+ }
+}
+
+void brw_emit_point_sprite_setup( struct brw_sf_compile *c, GLboolean allocate)
+{
+ struct brw_compile *p = &c->func;
+ GLuint i;
+
+ c->nr_verts = 1;
+
+ if (allocate)
+ alloc_regs(c);
+
+ copy_z_inv_w(c);
+
+ for (i = 0; i < c->nr_setup_regs; i++)
+ {
+ /* XXX: only seems to check point_coord_replace_attrs for every
+ * second attribute?!?
+ */
+ boolean coord_replace = !!(c->key.point_coord_replace_attrs & (1<<(2*i)));
+ struct brw_reg a0 = offset(c->vert[0], i);
+ GLushort pc, pc_persp, pc_linear;
+ GLboolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
+
+ if (pc_persp)
+ {
+ if (coord_replace) {
+ brw_set_predicate_control_flag_value(p, pc_persp);
+ brw_MUL(p, a0, a0, c->inv_w[0]);
+ }
+ }
+
+ if (coord_replace) {
+ /* Caculate 1.0/PointWidth */
+ brw_math(&c->func,
+ c->tmp,
+ BRW_MATH_FUNCTION_INV,
+ BRW_MATH_SATURATE_NONE,
+ 0,
+ c->dx0,
+ BRW_MATH_DATA_SCALAR,
+ BRW_MATH_PRECISION_FULL);
+
+ if (c->key.sprite_origin_lower_left) {
+ brw_MUL(p, c->m1Cx, c->tmp, c->inv_w[0]);
+ brw_MOV(p, vec1(suboffset(c->m1Cx, 1)), brw_imm_f(0.0));
+ brw_MUL(p, c->m2Cy, c->tmp, negate(c->inv_w[0]));
+ brw_MOV(p, vec1(suboffset(c->m2Cy, 0)), brw_imm_f(0.0));
+ }
+ else {
+ brw_MUL(p, c->m1Cx, c->tmp, c->inv_w[0]);
+ brw_MOV(p, vec1(suboffset(c->m1Cx, 1)), brw_imm_f(0.0));
+ brw_MUL(p, c->m2Cy, c->tmp, c->inv_w[0]);
+ brw_MOV(p, vec1(suboffset(c->m2Cy, 0)), brw_imm_f(0.0));
+ }
+ }
+ else {
+ brw_MOV(p, c->m1Cx, brw_imm_ud(0));
+ brw_MOV(p, c->m2Cy, brw_imm_ud(0));
+ }
+
+ {
+ brw_set_predicate_control_flag_value(p, pc);
+ if (coord_replace) {
+ if (c->key.sprite_origin_lower_left) {
+ brw_MUL(p, c->m3C0, c->inv_w[0], brw_imm_f(1.0));
+ brw_MOV(p, vec1(suboffset(c->m3C0, 0)), brw_imm_f(0.0));
+ }
+ else {
+ brw_MOV(p, c->m3C0, brw_imm_f(0.0));
+ }
+ }
+ else {
+ brw_MOV(p, c->m3C0, a0); /* constant value */
+ }
+
+ /* Copy m0..m3 to URB.
+ */
+ brw_urb_WRITE(p,
+ brw_null_reg(),
+ 0,
+ brw_vec8_grf(0, 0),
+ 0, /* allocate */
+ 1, /* used */
+ 4, /* msg len */
+ 0, /* response len */
+ last, /* eot */
+ last, /* writes complete */
+ i*4, /* urb destination offset */
+ BRW_URB_SWIZZLE_TRANSPOSE);
+ }
+ }
+}
+
+/* Points setup - several simplifications as all attributes are
+ * constant across the face of the point (point sprites excluded!)
+ */
+void brw_emit_point_setup( struct brw_sf_compile *c, GLboolean allocate)
+{
+ struct brw_compile *p = &c->func;
+ GLuint i;
+
+ c->nr_verts = 1;
+
+ if (allocate)
+ alloc_regs(c);
+
+ copy_z_inv_w(c);
+
+ brw_MOV(p, c->m1Cx, brw_imm_ud(0)); /* zero - move out of loop */
+ brw_MOV(p, c->m2Cy, brw_imm_ud(0)); /* zero - move out of loop */
+
+ for (i = 0; i < c->nr_setup_regs; i++)
+ {
+ struct brw_reg a0 = offset(c->vert[0], i);
+ GLushort pc, pc_persp, pc_linear;
+ GLboolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
+
+ if (pc_persp)
+ {
+ /* This seems odd as the values are all constant, but the
+ * fragment shader will be expecting it:
+ */
+ brw_set_predicate_control_flag_value(p, pc_persp);
+ brw_MUL(p, a0, a0, c->inv_w[0]);
+ }
+
+
+ /* The delta values are always zero, just send the starting
+ * coordinate. Again, this is to fit in with the interpolation
+ * code in the fragment shader.
+ */
+ {
+ brw_set_predicate_control_flag_value(p, pc);
+
+ brw_MOV(p, c->m3C0, a0); /* constant value */
+
+ /* Copy m0..m3 to URB.
+ */
+ brw_urb_WRITE(p,
+ brw_null_reg(),
+ 0,
+ brw_vec8_grf(0, 0),
+ 0, /* allocate */
+ 1, /* used */
+ 4, /* msg len */
+ 0, /* response len */
+ last, /* eot */
+ last, /* writes complete */
+ i*4, /* urb destination offset */
+ BRW_URB_SWIZZLE_TRANSPOSE);
+ }
+ }
+}
+
+void brw_emit_anyprim_setup( struct brw_sf_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg ip = brw_ip_reg();
+ struct brw_reg payload_prim = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0);
+ struct brw_reg payload_attr = get_element_ud(brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0), 0);
+ struct brw_reg primmask;
+ struct brw_instruction *jmp;
+ struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
+
+ GLuint saveflag;
+
+ c->nr_verts = 3;
+ alloc_regs(c);
+
+ primmask = retype(get_element(c->tmp, 0), BRW_REGISTER_TYPE_UD);
+
+ brw_MOV(p, primmask, brw_imm_ud(1));
+ brw_SHL(p, primmask, primmask, payload_prim);
+
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
+ brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_TRILIST) |
+ (1<<_3DPRIM_TRISTRIP) |
+ (1<<_3DPRIM_TRIFAN) |
+ (1<<_3DPRIM_TRISTRIP_REVERSE) |
+ (1<<_3DPRIM_POLYGON) |
+ (1<<_3DPRIM_RECTLIST) |
+ (1<<_3DPRIM_TRIFAN_NOSTIPPLE)));
+ jmp = brw_JMPI(p, ip, ip, brw_imm_d(0));
+ {
+ saveflag = p->flag_value;
+ brw_push_insn_state(p);
+ brw_emit_tri_setup( c, GL_FALSE );
+ brw_pop_insn_state(p);
+ p->flag_value = saveflag;
+ /* note - thread killed in subroutine, so must
+ * restore the flag which is changed when building
+ * the subroutine. fix #13240
+ */
+ }
+ brw_land_fwd_jump(p, jmp);
+
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
+ brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_LINELIST) |
+ (1<<_3DPRIM_LINESTRIP) |
+ (1<<_3DPRIM_LINELOOP) |
+ (1<<_3DPRIM_LINESTRIP_CONT) |
+ (1<<_3DPRIM_LINESTRIP_BF) |
+ (1<<_3DPRIM_LINESTRIP_CONT_BF)));
+ jmp = brw_JMPI(p, ip, ip, brw_imm_d(0));
+ {
+ saveflag = p->flag_value;
+ brw_push_insn_state(p);
+ brw_emit_line_setup( c, GL_FALSE );
+ brw_pop_insn_state(p);
+ p->flag_value = saveflag;
+ /* note - thread killed in subroutine */
+ }
+ brw_land_fwd_jump(p, jmp);
+
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
+ brw_AND(p, v1_null_ud, payload_attr, brw_imm_ud(1<<BRW_SPRITE_POINT_ENABLE));
+ jmp = brw_JMPI(p, ip, ip, brw_imm_d(0));
+ {
+ saveflag = p->flag_value;
+ brw_push_insn_state(p);
+ brw_emit_point_sprite_setup( c, GL_FALSE );
+ brw_pop_insn_state(p);
+ p->flag_value = saveflag;
+ }
+ brw_land_fwd_jump(p, jmp);
+
+ brw_emit_point_setup( c, GL_FALSE );
+}
+
+
+
+
diff --git a/src/gallium/drivers/i965/brw_sf_state.c b/src/gallium/drivers/i965/brw_sf_state.c
new file mode 100644
index 00000000000..25dc2b52e07
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_sf_state.c
@@ -0,0 +1,333 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "util/u_math.h"
+
+#include "pipe/p_state.h"
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "brw_debug.h"
+#include "brw_pipe_rast.h"
+
+static enum pipe_error upload_sf_vp(struct brw_context *brw)
+{
+ const struct pipe_viewport_state *vp = &brw->curr.viewport;
+ const struct pipe_scissor_state *scissor = &brw->curr.scissor;
+ struct brw_sf_viewport sfv;
+ enum pipe_error ret;
+
+ memset(&sfv, 0, sizeof(sfv));
+
+ /* PIPE_NEW_VIEWPORT, PIPE_NEW_SCISSOR */
+
+ sfv.viewport.m00 = vp->scale[0];
+ sfv.viewport.m11 = vp->scale[1];
+ sfv.viewport.m22 = vp->scale[2];
+ sfv.viewport.m30 = vp->translate[0];
+ sfv.viewport.m31 = vp->translate[1];
+ sfv.viewport.m32 = vp->translate[2];
+
+ sfv.scissor.xmin = scissor->minx;
+ sfv.scissor.xmax = scissor->maxx - 1; /* ? */
+ sfv.scissor.ymin = scissor->miny;
+ sfv.scissor.ymax = scissor->maxy - 1; /* ? */
+
+ ret = brw_cache_data( &brw->cache, BRW_SF_VP, &sfv, NULL, 0,
+ &brw->sf.vp_bo );
+ if (ret)
+ return ret;
+
+ return PIPE_OK;
+}
+
+const struct brw_tracked_state brw_sf_vp = {
+ .dirty = {
+ .mesa = (PIPE_NEW_VIEWPORT |
+ PIPE_NEW_SCISSOR),
+ .brw = 0,
+ .cache = 0
+ },
+ .prepare = upload_sf_vp
+};
+
+struct brw_sf_unit_key {
+ unsigned int total_grf;
+ unsigned int urb_entry_read_length;
+ unsigned int nr_urb_entries, urb_size, sfsize;
+
+ unsigned scissor:1;
+ unsigned line_smooth:1;
+ unsigned point_sprite:1;
+ unsigned point_attenuated:1;
+ unsigned front_face:2;
+ unsigned cull_mode:2;
+ unsigned flatshade_first:1;
+ unsigned gl_rasterization_rules:1;
+ unsigned line_last_pixel_enable:1;
+ float line_width;
+ float point_size;
+};
+
+static void
+sf_unit_populate_key(struct brw_context *brw, struct brw_sf_unit_key *key)
+{
+ const struct pipe_rasterizer_state *rast = &brw->curr.rast->templ;
+ memset(key, 0, sizeof(*key));
+
+ /* CACHE_NEW_SF_PROG */
+ key->total_grf = brw->sf.prog_data->total_grf;
+ key->urb_entry_read_length = brw->sf.prog_data->urb_read_length;
+
+ /* BRW_NEW_URB_FENCE */
+ key->nr_urb_entries = brw->urb.nr_sf_entries;
+ key->urb_size = brw->urb.vsize;
+ key->sfsize = brw->urb.sfsize;
+
+ /* PIPE_NEW_RAST */
+ key->scissor = rast->scissor;
+ key->front_face = rast->front_winding;
+ key->cull_mode = rast->cull_mode;
+ key->line_smooth = rast->line_smooth;
+ key->line_width = rast->line_width;
+ key->flatshade_first = rast->flatshade_first;
+ key->line_last_pixel_enable = rast->line_last_pixel;
+ key->gl_rasterization_rules = rast->gl_rasterization_rules;
+
+ key->point_sprite = rast->point_sprite;
+ key->point_attenuated = rast->point_size_per_vertex;
+
+ key->point_size = CLAMP(rast->point_size,
+ rast->point_size_min,
+ rast->point_size_max);
+}
+
+static enum pipe_error
+sf_unit_create_from_key(struct brw_context *brw,
+ struct brw_sf_unit_key *key,
+ struct brw_winsys_reloc *reloc,
+ struct brw_winsys_buffer **bo_out)
+{
+ struct brw_sf_unit_state sf;
+ enum pipe_error ret;
+ int chipset_max_threads;
+ memset(&sf, 0, sizeof(sf));
+
+ sf.thread0.grf_reg_count = align(key->total_grf, 16) / 16 - 1;
+ /* reloc */
+ sf.thread0.kernel_start_pointer = 0;
+
+ sf.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
+
+ sf.thread3.dispatch_grf_start_reg = 3;
+
+ if (BRW_IS_IGDNG(brw))
+ sf.thread3.urb_entry_read_offset = 3;
+ else
+ sf.thread3.urb_entry_read_offset = 1;
+
+ sf.thread3.urb_entry_read_length = key->urb_entry_read_length;
+
+ sf.thread4.nr_urb_entries = key->nr_urb_entries;
+ sf.thread4.urb_entry_allocation_size = key->sfsize - 1;
+
+ /* Each SF thread produces 1 PUE, and there can be up to 24(Pre-IGDNG) or
+ * 48(IGDNG) threads
+ */
+ if (BRW_IS_IGDNG(brw))
+ chipset_max_threads = 48;
+ else
+ chipset_max_threads = 24;
+
+ sf.thread4.max_threads = MIN2(chipset_max_threads, key->nr_urb_entries) - 1;
+
+ if (BRW_DEBUG & DEBUG_SINGLE_THREAD)
+ sf.thread4.max_threads = 0;
+
+ if (BRW_DEBUG & DEBUG_STATS)
+ sf.thread4.stats_enable = 1;
+
+ /* CACHE_NEW_SF_VP */
+ /* reloc */
+ sf.sf5.sf_viewport_state_offset = 0;
+
+ sf.sf5.viewport_transform = 1;
+
+ if (key->scissor)
+ sf.sf6.scissor = 1;
+
+ if (key->front_face == PIPE_WINDING_CCW)
+ sf.sf5.front_winding = BRW_FRONTWINDING_CCW;
+ else
+ sf.sf5.front_winding = BRW_FRONTWINDING_CW;
+
+ switch (key->cull_mode) {
+ case PIPE_WINDING_CCW:
+ case PIPE_WINDING_CW:
+ sf.sf6.cull_mode = (key->front_face == key->cull_mode ?
+ BRW_CULLMODE_FRONT :
+ BRW_CULLMODE_BACK);
+ break;
+ case PIPE_WINDING_BOTH:
+ sf.sf6.cull_mode = BRW_CULLMODE_BOTH;
+ break;
+ case PIPE_WINDING_NONE:
+ sf.sf6.cull_mode = BRW_CULLMODE_NONE;
+ break;
+ default:
+ assert(0);
+ sf.sf6.cull_mode = BRW_CULLMODE_NONE;
+ break;
+ }
+
+ /* _NEW_LINE */
+ /* XXX use ctx->Const.Min/MaxLineWidth here */
+ sf.sf6.line_width = CLAMP(key->line_width, 1.0, 5.0) * (1<<1);
+
+ sf.sf6.line_endcap_aa_region_width = 1;
+ if (key->line_smooth)
+ sf.sf6.aa_enable = 1;
+ else if (sf.sf6.line_width <= 0x2)
+ sf.sf6.line_width = 0;
+
+ /* XXX: gl_rasterization_rules? something else?
+ */
+ sf.sf6.point_rast_rule = BRW_RASTRULE_UPPER_RIGHT;
+ sf.sf6.point_rast_rule = BRW_RASTRULE_LOWER_RIGHT;
+ sf.sf6.point_rast_rule = 1;
+
+ /* XXX clamp max depends on AA vs. non-AA */
+
+ /* _NEW_POINT */
+ sf.sf7.sprite_point = key->point_sprite;
+ sf.sf7.point_size = CLAMP(rint(key->point_size), 1, 255) * (1<<3);
+ sf.sf7.use_point_size_state = !key->point_attenuated;
+ sf.sf7.aa_line_distance_mode = 0;
+
+ /* might be BRW_NEW_PRIMITIVE if we have to adjust pv for polygons:
+ */
+ if (!key->flatshade_first) {
+ sf.sf7.trifan_pv = 2;
+ sf.sf7.linestrip_pv = 1;
+ sf.sf7.tristrip_pv = 2;
+ } else {
+ sf.sf7.trifan_pv = 1;
+ sf.sf7.linestrip_pv = 0;
+ sf.sf7.tristrip_pv = 0;
+ }
+
+ sf.sf7.line_last_pixel_enable = key->line_last_pixel_enable;
+
+ /* Set bias for OpenGL rasterization rules:
+ */
+ if (key->gl_rasterization_rules) {
+ sf.sf6.dest_org_vbias = 0x8;
+ sf.sf6.dest_org_hbias = 0x8;
+ }
+ else {
+ sf.sf6.dest_org_vbias = 0x0;
+ sf.sf6.dest_org_hbias = 0x0;
+ }
+
+ ret = brw_upload_cache(&brw->cache, BRW_SF_UNIT,
+ key, sizeof(*key),
+ reloc, 2,
+ &sf, sizeof(sf),
+ NULL, NULL,
+ bo_out);
+ if (ret)
+ return ret;
+
+
+ return PIPE_OK;
+}
+
+static enum pipe_error upload_sf_unit( struct brw_context *brw )
+{
+ struct brw_sf_unit_key key;
+ struct brw_winsys_reloc reloc[2];
+ unsigned total_grf;
+ unsigned viewport_transform;
+ unsigned front_winding;
+ enum pipe_error ret;
+
+ sf_unit_populate_key(brw, &key);
+
+ /* XXX: cut this crap and pre calculate the key:
+ */
+ total_grf = (align(key.total_grf, 16) / 16 - 1);
+ viewport_transform = 1;
+ front_winding = (key.front_face == PIPE_WINDING_CCW ?
+ BRW_FRONTWINDING_CCW :
+ BRW_FRONTWINDING_CW);
+
+ /* Emit SF program relocation */
+ make_reloc(&reloc[0],
+ BRW_USAGE_STATE,
+ total_grf << 1,
+ offsetof(struct brw_sf_unit_state, thread0),
+ brw->sf.prog_bo);
+
+ /* Emit SF viewport relocation */
+ make_reloc(&reloc[1],
+ BRW_USAGE_STATE,
+ front_winding | (viewport_transform << 1),
+ offsetof(struct brw_sf_unit_state, sf5),
+ brw->sf.vp_bo);
+
+
+ if (brw_search_cache(&brw->cache, BRW_SF_UNIT,
+ &key, sizeof(key),
+ reloc, 2,
+ NULL,
+ &brw->sf.state_bo))
+ return PIPE_OK;
+
+
+ ret = sf_unit_create_from_key(brw, &key,
+ reloc,
+ &brw->sf.state_bo);
+ if (ret)
+ return ret;
+
+ return PIPE_OK;
+}
+
+const struct brw_tracked_state brw_sf_unit = {
+ .dirty = {
+ .mesa = (PIPE_NEW_RAST),
+ .brw = BRW_NEW_URB_FENCE,
+ .cache = (CACHE_NEW_SF_VP |
+ CACHE_NEW_SF_PROG)
+ },
+ .prepare = upload_sf_unit,
+};
diff --git a/src/gallium/drivers/i965/brw_state.h b/src/gallium/drivers/i965/brw_state.h
new file mode 100644
index 00000000000..d2bbd0123d1
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_state.h
@@ -0,0 +1,174 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#ifndef BRW_STATE_H
+#define BRW_STATE_H
+
+#include "pipe/p_defines.h"
+#include "util/u_memory.h"
+
+#include "brw_context.h"
+
+static INLINE void
+brw_add_validated_bo(struct brw_context *brw, struct brw_winsys_buffer *bo)
+{
+ assert(brw->state.validated_bo_count < Elements(brw->state.validated_bos));
+
+ if (bo != NULL) {
+ bo_reference( &brw->state.validated_bos[brw->state.validated_bo_count++],
+ bo );
+ }
+}
+
+const struct brw_tracked_state brw_blend_constant_color;
+const struct brw_tracked_state brw_cc_unit;
+const struct brw_tracked_state brw_cc_vp;
+const struct brw_tracked_state brw_clip_prog;
+const struct brw_tracked_state brw_clip_unit;
+const struct brw_tracked_state brw_curbe_buffer;
+const struct brw_tracked_state brw_curbe_offsets;
+const struct brw_tracked_state brw_invarient_state;
+const struct brw_tracked_state brw_gs_prog;
+const struct brw_tracked_state brw_gs_unit;
+const struct brw_tracked_state brw_line_stipple;
+const struct brw_tracked_state brw_aa_line_parameters;
+const struct brw_tracked_state brw_pipelined_state_pointers;
+const struct brw_tracked_state brw_binding_table_pointers;
+const struct brw_tracked_state brw_depthbuffer;
+const struct brw_tracked_state brw_polygon_stipple;
+const struct brw_tracked_state brw_program_parameters;
+const struct brw_tracked_state brw_recalculate_urb_fence;
+const struct brw_tracked_state brw_sf_prog;
+const struct brw_tracked_state brw_sf_unit;
+const struct brw_tracked_state brw_sf_vp;
+const struct brw_tracked_state brw_state_base_address;
+const struct brw_tracked_state brw_urb_fence;
+const struct brw_tracked_state brw_vertex_state;
+const struct brw_tracked_state brw_vs_surfaces;
+const struct brw_tracked_state brw_vs_prog;
+const struct brw_tracked_state brw_vs_unit;
+const struct brw_tracked_state brw_wm_input_sizes;
+const struct brw_tracked_state brw_wm_prog;
+const struct brw_tracked_state brw_wm_samplers;
+const struct brw_tracked_state brw_wm_constant_surface;
+const struct brw_tracked_state brw_wm_surfaces;
+const struct brw_tracked_state brw_wm_unit;
+
+const struct brw_tracked_state brw_psp_urb_cbs;
+
+const struct brw_tracked_state brw_pipe_control;
+
+const struct brw_tracked_state brw_drawing_rect;
+const struct brw_tracked_state brw_indices;
+const struct brw_tracked_state brw_vertices;
+const struct brw_tracked_state brw_index_buffer;
+
+
+/***********************************************************************
+ * brw_state.c
+ */
+int brw_validate_state(struct brw_context *brw);
+int brw_upload_state(struct brw_context *brw);
+void brw_init_state(struct brw_context *brw);
+void brw_destroy_state(struct brw_context *brw);
+
+/***********************************************************************
+ * brw_state_cache.c
+ */
+enum pipe_error brw_cache_data(struct brw_cache *cache,
+ enum brw_cache_id cache_id,
+ const void *data,
+ struct brw_winsys_reloc *relocs,
+ GLuint nr_relocs,
+ struct brw_winsys_buffer **bo_out );
+
+enum pipe_error brw_cache_data_sz(struct brw_cache *cache,
+ enum brw_cache_id cache_id,
+ const void *data,
+ GLuint data_size,
+ struct brw_winsys_reloc *relocs,
+ GLuint nr_relocs,
+ struct brw_winsys_buffer **bo_out);
+
+enum pipe_error brw_upload_cache( struct brw_cache *cache,
+ enum brw_cache_id cache_id,
+ const void *key,
+ GLuint key_sz,
+ struct brw_winsys_reloc *relocs,
+ GLuint nr_relocs,
+ const void *data,
+ GLuint data_sz,
+ const void *aux,
+ void *aux_return ,
+ struct brw_winsys_buffer **bo_out);
+
+boolean brw_search_cache( struct brw_cache *cache,
+ enum brw_cache_id cache_id,
+ const void *key,
+ GLuint key_size,
+ struct brw_winsys_reloc *relocs,
+ GLuint nr_relocs,
+ void *aux_return,
+ struct brw_winsys_buffer **bo_out);
+
+void brw_state_cache_check_size( struct brw_context *brw );
+
+void brw_init_caches( struct brw_context *brw );
+void brw_destroy_caches( struct brw_context *brw );
+void brw_state_cache_bo_delete(struct brw_cache *cache, struct brw_winsys_buffer *bo);
+
+/***********************************************************************
+ * brw_state_batch.c
+ */
+#define BRW_BATCH_STRUCT(brw, s) brw_batchbuffer_data( brw->batch, (s), sizeof(*(s)), IGNORE_CLIPRECTS)
+#define BRW_CACHED_BATCH_STRUCT(brw, s) brw_cached_batch_struct( brw, (s), sizeof(*(s)) )
+
+GLboolean brw_cached_batch_struct( struct brw_context *brw,
+ const void *data,
+ GLuint sz );
+void brw_destroy_batch_cache( struct brw_context *brw );
+void brw_clear_batch_cache( struct brw_context *brw );
+
+/***********************************************************************
+ * brw_wm_surface_state.c
+ */
+
+/***********************************************************************
+ * brw_state_debug.c
+ */
+void brw_update_dirty_counts( unsigned mesa,
+ unsigned brw,
+ unsigned cache );
+
+
+
+#endif
diff --git a/src/gallium/drivers/i965/brw_state_batch.c b/src/gallium/drivers/i965/brw_state_batch.c
new file mode 100644
index 00000000000..7d212e5c247
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_state_batch.c
@@ -0,0 +1,98 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+
+#include "brw_state.h"
+#include "brw_batchbuffer.h"
+
+
+
+/* A facility similar to the data caching code above, which aims to
+ * prevent identical commands being issued repeatedly.
+ */
+GLboolean brw_cached_batch_struct( struct brw_context *brw,
+ const void *data,
+ GLuint sz )
+{
+ struct brw_cached_batch_item *item = brw->cached_batch_items;
+ struct header *newheader = (struct header *)data;
+
+ if (brw->flags.always_emit_state) {
+ brw_batchbuffer_data(brw->batch, data, sz, IGNORE_CLIPRECTS);
+ return GL_TRUE;
+ }
+
+ while (item) {
+ if (item->header->opcode == newheader->opcode) {
+ if (item->sz == sz && memcmp(item->header, newheader, sz) == 0)
+ return GL_FALSE;
+ if (item->sz != sz) {
+ FREE(item->header);
+ item->header = MALLOC(sz);
+ item->sz = sz;
+ }
+ goto emit;
+ }
+ item = item->next;
+ }
+
+ assert(!item);
+ item = CALLOC_STRUCT(brw_cached_batch_item);
+ item->header = MALLOC(sz);
+ item->sz = sz;
+ item->next = brw->cached_batch_items;
+ brw->cached_batch_items = item;
+
+ emit:
+ memcpy(item->header, newheader, sz);
+ brw_batchbuffer_data(brw->batch, data, sz, IGNORE_CLIPRECTS);
+ return GL_TRUE;
+}
+
+void brw_clear_batch_cache( struct brw_context *brw )
+{
+ struct brw_cached_batch_item *item = brw->cached_batch_items;
+
+ while (item) {
+ struct brw_cached_batch_item *next = item->next;
+ free((void *)item->header);
+ free(item);
+ item = next;
+ }
+
+ brw->cached_batch_items = NULL;
+}
+
+void brw_destroy_batch_cache( struct brw_context *brw )
+{
+ brw_clear_batch_cache(brw);
+}
diff --git a/src/gallium/drivers/i965/brw_state_cache.c b/src/gallium/drivers/i965/brw_state_cache.c
new file mode 100644
index 00000000000..16b643ceb28
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_state_cache.c
@@ -0,0 +1,617 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+/** @file brw_state_cache.c
+ *
+ * This file implements a simple static state cache for 965. The consumers
+ * can query the hash table of state using a cache_id, opaque key data,
+ * and list of buffers that will be used in relocations, and receive the
+ * corresponding state buffer object of state (plus associated auxiliary
+ * data) in return.
+ *
+ * The inner workings are a simple hash table based on a CRC of the key data.
+ * The cache_id and relocation target buffers associated with the state
+ * buffer are included as auxiliary key data, but are not part of the hash
+ * value (this should be fixed, but will likely be fixed instead by making
+ * consumers use structured keys).
+ *
+ * Replacement is not implemented. Instead, when the cache gets too big, at
+ * a safe point (unlock) we throw out all of the cache data and let it
+ * regenerate for the next rendering operation.
+ *
+ * The reloc structs need to be included as key data, otherwise the
+ * non-unique values stuffed in the offset in key data through
+ * brw_cache_data() may result in successful probe for state buffers
+ * even when the buffer being referenced doesn't match. The result would be
+ * that the same state cache entry is used twice for different buffers,
+ * only one of the two buffers referenced gets put into the offset, and the
+ * incorrect program is run for the other instance.
+ */
+#include "util/u_memory.h"
+
+#include "brw_debug.h"
+#include "brw_state.h"
+#include "brw_batchbuffer.h"
+
+/* XXX: Fixme - have to include these to get the sizes of the prog_key
+ * structs:
+ */
+#include "brw_wm.h"
+#include "brw_vs.h"
+#include "brw_clip.h"
+#include "brw_sf.h"
+#include "brw_gs.h"
+
+
+static GLuint
+hash_key(const void *key, GLuint key_size,
+ struct brw_winsys_reloc *relocs, GLuint nr_relocs)
+{
+ GLuint *ikey = (GLuint *)key;
+ GLuint hash = 0, i;
+
+ assert(key_size % 4 == 0);
+
+ /* I'm sure this can be improved on:
+ */
+ for (i = 0; i < key_size/4; i++) {
+ hash ^= ikey[i];
+ hash = (hash << 5) | (hash >> 27);
+ }
+
+ /* Include the BO pointers as key data as well */
+ ikey = (GLuint *)relocs;
+ key_size = nr_relocs * sizeof(struct brw_winsys_reloc);
+ for (i = 0; i < key_size/4; i++) {
+ hash ^= ikey[i];
+ hash = (hash << 5) | (hash >> 27);
+ }
+
+ return hash;
+}
+
+
+/**
+ * Marks a new buffer as being chosen for the given cache id.
+ */
+static void
+update_cache_last(struct brw_cache *cache, enum brw_cache_id cache_id,
+ struct brw_winsys_buffer *bo)
+{
+ if (bo == cache->last_bo[cache_id])
+ return; /* no change */
+
+ bo_reference( &cache->last_bo[cache_id], bo );
+
+ cache->brw->state.dirty.cache |= 1 << cache_id;
+}
+
+
+static struct brw_cache_item *
+search_cache(struct brw_cache *cache, enum brw_cache_id cache_id,
+ GLuint hash, const void *key, GLuint key_size,
+ struct brw_winsys_reloc *relocs, GLuint nr_relocs)
+{
+ struct brw_cache_item *c;
+
+#if 0
+ int bucketcount = 0;
+
+ for (c = cache->items[hash % cache->size]; c; c = c->next)
+ bucketcount++;
+
+ debug_printf("bucket %d/%d = %d/%d items\n", hash % cache->size,
+ cache->size, bucketcount, cache->n_items);
+#endif
+
+ for (c = cache->items[hash % cache->size]; c; c = c->next) {
+ if (c->cache_id == cache_id &&
+ c->hash == hash &&
+ c->key_size == key_size &&
+ memcmp(c->key, key, key_size) == 0 &&
+ c->nr_relocs == nr_relocs &&
+ memcmp(c->relocs, relocs, nr_relocs * sizeof *relocs) == 0)
+ return c;
+ }
+
+ return NULL;
+}
+
+
+static void
+rehash(struct brw_cache *cache)
+{
+ struct brw_cache_item **items;
+ struct brw_cache_item *c, *next;
+ GLuint size, i;
+
+ size = cache->size * 3;
+ items = (struct brw_cache_item**) CALLOC(size, sizeof(*items));
+
+ for (i = 0; i < cache->size; i++)
+ for (c = cache->items[i]; c; c = next) {
+ next = c->next;
+ c->next = items[c->hash % size];
+ items[c->hash % size] = c;
+ }
+
+ FREE(cache->items);
+ cache->items = items;
+ cache->size = size;
+}
+
+
+/**
+ * Returns the buffer object matching cache_id and key, or NULL.
+ */
+boolean
+brw_search_cache(struct brw_cache *cache,
+ enum brw_cache_id cache_id,
+ const void *key,
+ GLuint key_size,
+ struct brw_winsys_reloc *relocs,
+ GLuint nr_relocs,
+ void *aux_return,
+ struct brw_winsys_buffer **bo_out)
+{
+ struct brw_cache_item *item;
+ GLuint hash = hash_key(key, key_size, relocs, nr_relocs);
+
+ item = search_cache(cache, cache_id, hash, key, key_size,
+ relocs, nr_relocs);
+
+ if (item) {
+ if (aux_return)
+ *(void **)aux_return = (void *)((char *)item->key + item->key_size);
+
+ update_cache_last(cache, cache_id, item->bo);
+ bo_reference(bo_out, item->bo);
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+
+enum pipe_error
+brw_upload_cache( struct brw_cache *cache,
+ enum brw_cache_id cache_id,
+ const void *key,
+ GLuint key_size,
+ struct brw_winsys_reloc *relocs,
+ GLuint nr_relocs,
+ const void *data,
+ GLuint data_size,
+ const void *aux,
+ void *aux_return,
+ struct brw_winsys_buffer **bo_out)
+{
+ struct brw_cache_item *item = CALLOC_STRUCT(brw_cache_item);
+ GLuint hash = hash_key(key, key_size, relocs, nr_relocs);
+ GLuint relocs_size = nr_relocs * sizeof relocs[0];
+ GLuint aux_size = cache->aux_size[cache_id];
+ enum pipe_error ret;
+ void *tmp;
+ int i;
+
+ /* Create the buffer object to contain the data. For now, use a
+ * single buffer type to describe all cached state atoms. Later,
+ * may want to take advantage of hardware distinctions between
+ * these various entities.
+ */
+ ret = cache->sws->bo_alloc(cache->sws,
+ cache->buffer_type,
+ data_size, 1 << 6,
+ bo_out);
+ if (ret)
+ return ret;
+
+
+ /* Set up the memory containing the key, aux_data, and relocs */
+ tmp = MALLOC(key_size + aux_size + relocs_size);
+
+ memcpy(tmp, key, key_size);
+ memcpy((char *)tmp + key_size, aux, cache->aux_size[cache_id]);
+ memcpy((char *)tmp + key_size + aux_size, relocs, relocs_size);
+ for (i = 0; i < nr_relocs; i++) {
+ p_atomic_inc(&relocs[i].bo->reference.count);
+ }
+
+ item->cache_id = cache_id;
+ item->key = tmp;
+ item->hash = hash;
+ item->key_size = key_size;
+ item->relocs = (struct brw_winsys_reloc *)((char *)tmp + key_size + aux_size);
+ item->nr_relocs = nr_relocs;
+ bo_reference( &item->bo, *bo_out );
+ item->data_size = data_size;
+
+ if (cache->n_items > cache->size * 1.5)
+ rehash(cache);
+
+ hash %= cache->size;
+ item->next = cache->items[hash];
+ cache->items[hash] = item;
+ cache->n_items++;
+
+ if (aux_return) {
+ assert(cache->aux_size[cache_id]);
+ *(void **)aux_return = (void *)((char *)item->key + item->key_size);
+ }
+
+ if (BRW_DEBUG & DEBUG_STATE)
+ debug_printf("upload %s: %d bytes to cache id %d\n",
+ cache->name[cache_id],
+ data_size, cache_id);
+
+ /* Copy data to the buffer */
+ ret = cache->sws->bo_subdata(item->bo,
+ cache_id,
+ 0, data_size, data,
+ relocs, nr_relocs);
+ if (ret)
+ return ret;
+
+ update_cache_last(cache, cache_id, item->bo);
+
+ return PIPE_OK;
+}
+
+
+/**
+ * This doesn't really work with aux data. Use search/upload instead
+ */
+enum pipe_error
+brw_cache_data_sz(struct brw_cache *cache,
+ enum brw_cache_id cache_id,
+ const void *data,
+ GLuint data_size,
+ struct brw_winsys_reloc *relocs,
+ GLuint nr_relocs,
+ struct brw_winsys_buffer **bo_out)
+{
+ struct brw_cache_item *item;
+ GLuint hash = hash_key(data, data_size, relocs, nr_relocs);
+
+ item = search_cache(cache, cache_id, hash, data, data_size,
+ relocs, nr_relocs);
+ if (item) {
+ update_cache_last(cache, cache_id, item->bo);
+
+ bo_reference(bo_out, item->bo);
+ return PIPE_OK;
+ }
+
+ return brw_upload_cache(cache, cache_id,
+ data, data_size,
+ relocs, nr_relocs,
+ data, data_size,
+ NULL, NULL,
+ bo_out);
+}
+
+
+/**
+ * Wrapper around brw_cache_data_sz using the cache_id's canonical key size.
+ *
+ * If nr_relocs is nonzero, brw_search_cache()/brw_upload_cache() would be
+ * better to use, as the potentially changing offsets in the data-used-as-key
+ * will result in excessive cache misses.
+ *
+ * XXX: above is no longer true -- can we remove some code?
+ */
+enum pipe_error
+brw_cache_data(struct brw_cache *cache,
+ enum brw_cache_id cache_id,
+ const void *data,
+ struct brw_winsys_reloc *relocs,
+ GLuint nr_relocs,
+ struct brw_winsys_buffer **bo_out)
+{
+ return brw_cache_data_sz(cache, cache_id, data, cache->key_size[cache_id],
+ relocs, nr_relocs, bo_out);
+}
+
+
+static void
+brw_init_cache_id(struct brw_cache *cache,
+ const char *name,
+ enum brw_cache_id id,
+ GLuint key_size,
+ GLuint aux_size)
+{
+ cache->name[id] = strdup(name);
+ cache->key_size[id] = key_size;
+ cache->aux_size[id] = aux_size;
+}
+
+
+static void
+brw_init_general_state_cache(struct brw_context *brw)
+{
+ struct brw_cache *cache = &brw->cache;
+
+ cache->brw = brw;
+ cache->sws = brw->sws;
+
+ cache->buffer_type = BRW_BUFFER_TYPE_GENERAL_STATE;
+
+ cache->size = 7;
+ cache->n_items = 0;
+ cache->items = (struct brw_cache_item **)
+ CALLOC(cache->size, sizeof(struct brw_cache_item));
+
+ brw_init_cache_id(cache,
+ "CC_VP",
+ BRW_CC_VP,
+ sizeof(struct brw_cc_viewport),
+ 0);
+
+ brw_init_cache_id(cache,
+ "CC_UNIT",
+ BRW_CC_UNIT,
+ sizeof(struct brw_cc_unit_state),
+ 0);
+
+ brw_init_cache_id(cache,
+ "WM_PROG",
+ BRW_WM_PROG,
+ sizeof(struct brw_wm_prog_key),
+ sizeof(struct brw_wm_prog_data));
+
+ brw_init_cache_id(cache,
+ "SAMPLER_DEFAULT_COLOR",
+ BRW_SAMPLER_DEFAULT_COLOR,
+ sizeof(struct brw_sampler_default_color),
+ 0);
+
+ brw_init_cache_id(cache,
+ "SAMPLER",
+ BRW_SAMPLER,
+ 0, /* variable key/data size */
+ 0);
+
+ brw_init_cache_id(cache,
+ "WM_UNIT",
+ BRW_WM_UNIT,
+ sizeof(struct brw_wm_unit_state),
+ 0);
+
+ brw_init_cache_id(cache,
+ "SF_PROG",
+ BRW_SF_PROG,
+ sizeof(struct brw_sf_prog_key),
+ sizeof(struct brw_sf_prog_data));
+
+ brw_init_cache_id(cache,
+ "SF_VP",
+ BRW_SF_VP,
+ sizeof(struct brw_sf_viewport),
+ 0);
+
+ brw_init_cache_id(cache,
+ "SF_UNIT",
+ BRW_SF_UNIT,
+ sizeof(struct brw_sf_unit_state),
+ 0);
+
+ brw_init_cache_id(cache,
+ "VS_UNIT",
+ BRW_VS_UNIT,
+ sizeof(struct brw_vs_unit_state),
+ 0);
+
+ brw_init_cache_id(cache,
+ "VS_PROG",
+ BRW_VS_PROG,
+ sizeof(struct brw_vs_prog_key),
+ sizeof(struct brw_vs_prog_data));
+
+ brw_init_cache_id(cache,
+ "CLIP_UNIT",
+ BRW_CLIP_UNIT,
+ sizeof(struct brw_clip_unit_state),
+ 0);
+
+ brw_init_cache_id(cache,
+ "CLIP_PROG",
+ BRW_CLIP_PROG,
+ sizeof(struct brw_clip_prog_key),
+ sizeof(struct brw_clip_prog_data));
+
+ brw_init_cache_id(cache,
+ "GS_UNIT",
+ BRW_GS_UNIT,
+ sizeof(struct brw_gs_unit_state),
+ 0);
+
+ brw_init_cache_id(cache,
+ "GS_PROG",
+ BRW_GS_PROG,
+ sizeof(struct brw_gs_prog_key),
+ sizeof(struct brw_gs_prog_data));
+}
+
+
+static void
+brw_init_surface_state_cache(struct brw_context *brw)
+{
+ struct brw_cache *cache = &brw->surface_cache;
+
+ cache->brw = brw;
+ cache->sws = brw->sws;
+
+ cache->buffer_type = BRW_BUFFER_TYPE_SURFACE_STATE;
+
+ cache->size = 7;
+ cache->n_items = 0;
+ cache->items = (struct brw_cache_item **)
+ CALLOC(cache->size, sizeof(struct brw_cache_item));
+
+ brw_init_cache_id(cache,
+ "SS_SURFACE",
+ BRW_SS_SURFACE,
+ sizeof(struct brw_surface_state),
+ 0);
+
+ brw_init_cache_id(cache,
+ "SS_SURF_BIND",
+ BRW_SS_SURF_BIND,
+ 0,
+ 0);
+}
+
+
+void
+brw_init_caches(struct brw_context *brw)
+{
+ brw_init_general_state_cache(brw);
+ brw_init_surface_state_cache(brw);
+}
+
+
+static void
+brw_clear_cache(struct brw_context *brw, struct brw_cache *cache)
+{
+ struct brw_cache_item *c, *next;
+ GLuint i;
+
+ if (BRW_DEBUG & DEBUG_STATE)
+ debug_printf("%s\n", __FUNCTION__);
+
+ for (i = 0; i < cache->size; i++) {
+ for (c = cache->items[i]; c; c = next) {
+ int j;
+
+ next = c->next;
+
+ for (j = 0; j < c->nr_relocs; j++)
+ bo_reference(&c->relocs[j].bo, NULL);
+
+ bo_reference(&c->bo, NULL);
+ FREE((void *)c->key);
+ FREE(c);
+ }
+ cache->items[i] = NULL;
+ }
+
+ cache->n_items = 0;
+
+ if (brw->curbe.last_buf) {
+ FREE(brw->curbe.last_buf);
+ brw->curbe.last_buf = NULL;
+ }
+
+ brw->state.dirty.mesa |= ~0;
+ brw->state.dirty.brw |= ~0;
+ brw->state.dirty.cache |= ~0;
+}
+
+/* Clear all entries from the cache that point to the given bo.
+ *
+ * This lets us release memory for reuse earlier for known-dead buffers,
+ * at the cost of walking the entire hash table.
+ */
+void
+brw_state_cache_bo_delete(struct brw_cache *cache, struct brw_winsys_buffer *bo)
+{
+ struct brw_cache_item **prev;
+ GLuint i;
+
+ if (BRW_DEBUG & DEBUG_STATE)
+ debug_printf("%s\n", __FUNCTION__);
+
+ for (i = 0; i < cache->size; i++) {
+ for (prev = &cache->items[i]; *prev;) {
+ struct brw_cache_item *c = *prev;
+
+ if (cache->sws->bo_references(c->bo, bo)) {
+ int j;
+
+ *prev = c->next;
+
+ for (j = 0; j < c->nr_relocs; j++)
+ bo_reference(&c->relocs[j].bo, NULL);
+
+ bo_reference(&c->bo, NULL);
+
+ FREE((void *)c->key);
+ FREE(c);
+ cache->n_items--;
+ } else {
+ prev = &c->next;
+ }
+ }
+ }
+}
+
+void
+brw_state_cache_check_size(struct brw_context *brw)
+{
+ if (BRW_DEBUG & DEBUG_STATE)
+ debug_printf("%s (n_items=%d)\n", __FUNCTION__, brw->cache.n_items);
+
+ /* un-tuned guess. We've got around 20 state objects for a total of around
+ * 32k, so 1000 of them is around 1.5MB.
+ */
+ if (brw->cache.n_items > 1000)
+ brw_clear_cache(brw, &brw->cache);
+
+ if (brw->surface_cache.n_items > 1000)
+ brw_clear_cache(brw, &brw->surface_cache);
+}
+
+
+static void
+brw_destroy_cache(struct brw_context *brw, struct brw_cache *cache)
+{
+ GLuint i;
+
+ if (BRW_DEBUG & DEBUG_STATE)
+ debug_printf("%s\n", __FUNCTION__);
+
+ brw_clear_cache(brw, cache);
+ for (i = 0; i < BRW_MAX_CACHE; i++) {
+ bo_reference(&cache->last_bo[i], NULL);
+ FREE(cache->name[i]);
+ }
+ FREE(cache->items);
+ cache->items = NULL;
+ cache->size = 0;
+}
+
+
+void
+brw_destroy_caches(struct brw_context *brw)
+{
+ brw_destroy_cache(brw, &brw->cache);
+ brw_destroy_cache(brw, &brw->surface_cache);
+}
diff --git a/src/gallium/drivers/i965/brw_state_debug.c b/src/gallium/drivers/i965/brw_state_debug.c
new file mode 100644
index 00000000000..049c278c93e
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_state_debug.c
@@ -0,0 +1,153 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+
+
+
+#include "brw_context.h"
+#include "brw_state.h"
+
+
+struct dirty_bit_map {
+ uint32_t bit;
+ char *name;
+ uint32_t count;
+};
+
+#define DEFINE_BIT(name) {name, #name, 0}
+
+static struct dirty_bit_map mesa_bits[] = {
+ DEFINE_BIT(PIPE_NEW_DEPTH_STENCIL_ALPHA),
+ DEFINE_BIT(PIPE_NEW_RAST),
+ DEFINE_BIT(PIPE_NEW_BLEND),
+ DEFINE_BIT(PIPE_NEW_VIEWPORT),
+ DEFINE_BIT(PIPE_NEW_SAMPLERS),
+ DEFINE_BIT(PIPE_NEW_VERTEX_BUFFER),
+ DEFINE_BIT(PIPE_NEW_VERTEX_ELEMENT),
+ DEFINE_BIT(PIPE_NEW_FRAGMENT_SHADER),
+ DEFINE_BIT(PIPE_NEW_VERTEX_SHADER),
+ DEFINE_BIT(PIPE_NEW_FRAGMENT_CONSTANTS),
+ DEFINE_BIT(PIPE_NEW_VERTEX_CONSTANTS),
+ DEFINE_BIT(PIPE_NEW_CLIP),
+ DEFINE_BIT(PIPE_NEW_INDEX_BUFFER),
+ DEFINE_BIT(PIPE_NEW_INDEX_RANGE),
+ DEFINE_BIT(PIPE_NEW_BLEND_COLOR),
+ DEFINE_BIT(PIPE_NEW_POLYGON_STIPPLE),
+ DEFINE_BIT(PIPE_NEW_FRAMEBUFFER_DIMENSIONS),
+ DEFINE_BIT(PIPE_NEW_DEPTH_BUFFER),
+ DEFINE_BIT(PIPE_NEW_COLOR_BUFFERS),
+ DEFINE_BIT(PIPE_NEW_QUERY),
+ DEFINE_BIT(PIPE_NEW_SCISSOR),
+ DEFINE_BIT(PIPE_NEW_BOUND_TEXTURES),
+ DEFINE_BIT(PIPE_NEW_NR_CBUFS),
+ {0, 0, 0}
+};
+
+static struct dirty_bit_map brw_bits[] = {
+ DEFINE_BIT(BRW_NEW_URB_FENCE),
+ DEFINE_BIT(BRW_NEW_FRAGMENT_PROGRAM),
+ DEFINE_BIT(BRW_NEW_VERTEX_PROGRAM),
+ DEFINE_BIT(BRW_NEW_INPUT_DIMENSIONS),
+ DEFINE_BIT(BRW_NEW_CURBE_OFFSETS),
+ DEFINE_BIT(BRW_NEW_REDUCED_PRIMITIVE),
+ DEFINE_BIT(BRW_NEW_PRIMITIVE),
+ DEFINE_BIT(BRW_NEW_CONTEXT),
+ DEFINE_BIT(BRW_NEW_WM_INPUT_DIMENSIONS),
+ DEFINE_BIT(BRW_NEW_PSP),
+ DEFINE_BIT(BRW_NEW_WM_SURFACES),
+ DEFINE_BIT(BRW_NEW_xxx),
+ DEFINE_BIT(BRW_NEW_INDICES),
+ {0, 0, 0}
+};
+
+static struct dirty_bit_map cache_bits[] = {
+ DEFINE_BIT(CACHE_NEW_CC_VP),
+ DEFINE_BIT(CACHE_NEW_CC_UNIT),
+ DEFINE_BIT(CACHE_NEW_WM_PROG),
+ DEFINE_BIT(CACHE_NEW_SAMPLER_DEFAULT_COLOR),
+ DEFINE_BIT(CACHE_NEW_SAMPLER),
+ DEFINE_BIT(CACHE_NEW_WM_UNIT),
+ DEFINE_BIT(CACHE_NEW_SF_PROG),
+ DEFINE_BIT(CACHE_NEW_SF_VP),
+ DEFINE_BIT(CACHE_NEW_SF_UNIT),
+ DEFINE_BIT(CACHE_NEW_VS_UNIT),
+ DEFINE_BIT(CACHE_NEW_VS_PROG),
+ DEFINE_BIT(CACHE_NEW_GS_UNIT),
+ DEFINE_BIT(CACHE_NEW_GS_PROG),