diff options
author | Keith Whitwell <keithw@vmware.com> | 2009-12-22 09:40:39 +0000 |
---|---|---|
committer | Keith Whitwell <keithw@vmware.com> | 2009-12-22 09:40:39 +0000 |
commit | aa02683e45f1eaf61bba2ba7eeda7686efeed2ca (patch) | |
tree | 63e0ef2fa85e5d7ebd6ffc6ae9043ce0819251a2 | |
parent | ebbc73d1aed283c9bc4aa2b37bed4374bbaec5b5 (diff) | |
parent | 0fc4dd3819af252c028ed43bbd668b4f34104e32 (diff) |
Merge branch 'i965g-restart'
Conflicts:
configure.ac
129 files changed, 36360 insertions, 19 deletions
@@ -105,6 +105,7 @@ irix6-n32-static \ irix6-o32 \ irix6-o32-static \ linux \ +linux-i965 \ linux-alpha \ linux-alpha-static \ linux-cell \ diff --git a/SConstruct b/SConstruct index 122b8cf916f..8e063e28078 100644 --- a/SConstruct +++ b/SConstruct @@ -32,10 +32,10 @@ import common default_statetrackers = 'mesa' if common.default_platform in ('linux', 'freebsd', 'darwin'): - default_drivers = 'softpipe,failover,svga,i915,trace,identity,llvmpipe' + default_drivers = 'softpipe,failover,svga,i915,i965,trace,identity,llvmpipe' default_winsys = 'xlib' elif common.default_platform in ('winddk',): - default_drivers = 'softpipe,svga,i915,trace,identity' + default_drivers = 'softpipe,svga,i915,i965,trace,identity' default_winsys = 'all' else: default_drivers = 'all' @@ -46,9 +46,9 @@ common.AddOptions(opts) opts.Add(ListVariable('statetrackers', 'state trackers to build', default_statetrackers, ['mesa', 'python', 'xorg'])) opts.Add(ListVariable('drivers', 'pipe drivers to build', default_drivers, - ['softpipe', 'failover', 'svga', 'i915', 'cell', 'trace', 'r300', 'identity', 'llvmpipe'])) + ['softpipe', 'failover', 'svga', 'i915', 'i965', 'cell', 'trace', 'r300', 'identity', 'llvmpipe'])) opts.Add(ListVariable('winsys', 'winsys drivers to build', default_winsys, - ['xlib', 'vmware', 'intel', 'gdi', 'radeon'])) + ['xlib', 'vmware', 'intel', 'i965', 'gdi', 'radeon'])) opts.Add(EnumVariable('MSVS_VERSION', 'MS Visual C++ version', None, allowed_values=('7.1', '8.0', '9.0'))) diff --git a/configs/default b/configs/default index eb6123d1e0a..8a975982a8b 100644 --- a/configs/default +++ b/configs/default @@ -96,7 +96,7 @@ EGL_DRIVERS_DIRS = demo GALLIUM_DIRS = auxiliary drivers state_trackers GALLIUM_AUXILIARY_DIRS = rbug draw translate cso_cache pipebuffer tgsi sct rtasm util indices vl GALLIUM_AUXILIARIES = $(foreach DIR,$(GALLIUM_AUXILIARY_DIRS),$(TOP)/src/gallium/auxiliary/$(DIR)/lib$(DIR).a) -GALLIUM_DRIVERS_DIRS = softpipe failover svga i915 trace identity +GALLIUM_DRIVERS_DIRS = softpipe failover svga i915 i965 trace identity GALLIUM_DRIVERS = $(foreach DIR,$(GALLIUM_DRIVERS_DIRS),$(TOP)/src/gallium/drivers/$(DIR)/lib$(DIR).a) GALLIUM_WINSYS_DIRS = xlib egl_xlib GALLIUM_WINSYS_DRM_DIRS = diff --git a/configs/linux-dri b/configs/linux-dri index 0802543347a..cf1f4e19833 100644 --- a/configs/linux-dri +++ b/configs/linux-dri @@ -60,7 +60,7 @@ EGL_DRIVERS_DIRS = demo glx DRIVER_DIRS = dri WINDOW_SYSTEM = dri GALLIUM_WINSYS_DIRS = drm -GALLIUM_WINSYS_DRM_DIRS = vmware intel +GALLIUM_WINSYS_DRM_DIRS = vmware intel i965 GALLIUM_STATE_TRACKERS_DIRS = egl DRI_DIRS = i810 i915 i965 mach64 mga r128 r200 r300 radeon \ diff --git a/configs/linux-i965 b/configs/linux-i965 new file mode 100644 index 00000000000..e66abc347bb --- /dev/null +++ b/configs/linux-i965 @@ -0,0 +1,8 @@ +# Configuration for standalone mode i965 debug + +include $(TOP)/configs/linux-debug + +CONFIG_NAME = linux-i965 + +GALLIUM_DRIVER_DIRS = i965 +GALLIUM_WINSYS_DIRS = drm/i965/xlib diff --git a/configure.ac b/configure.ac index dd0c78547c4..d83dd4315b7 100644 --- a/configure.ac +++ b/configure.ac @@ -1221,10 +1221,10 @@ AC_ARG_ENABLE([gallium-intel], [enable_gallium_intel="$enableval"], [enable_gallium_intel=auto]) if test "x$enable_gallium_intel" = xyes; then - GALLIUM_WINSYS_DRM_DIRS="$GALLIUM_WINSYS_DRM_DIRS intel" - GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS i915" + GALLIUM_WINSYS_DRM_DIRS="$GALLIUM_WINSYS_DRM_DIRS intel i965" + GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS i915 i965" elif test "x$enable_gallium_intel" = xauto; then - GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS i915" + GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS i915 i965" fi dnl diff --git a/progs/fp/fp-tri.c b/progs/fp/fp-tri.c index 26af66ad84e..ed29a2d683d 100644 --- a/progs/fp/fp-tri.c +++ b/progs/fp/fp-tri.c @@ -176,6 +176,17 @@ static void Init( void ) } + { + const float Ambient[4] = { 0.0, 1.0, 0.0, 0.0 }; + const float Diffuse[4] = { 1.0, 0.0, 0.0, 0.0 }; + const float Specular[4] = { 0.0, 0.0, 1.0, 0.0 }; + const float Emission[4] = { 0.0, 0.0, 0.0, 1.0 }; + glMaterialfv(GL_FRONT_AND_BACK, GL_AMBIENT, Ambient); + glMaterialfv(GL_FRONT_AND_BACK, GL_DIFFUSE, Diffuse); + glMaterialfv(GL_FRONT_AND_BACK, GL_SPECULAR, Specular); + glMaterialfv(GL_FRONT_AND_BACK, GL_EMISSION, Emission); + } + glClearColor(.1, .3, .5, 0); } diff --git a/progs/fp/mov-imm.txt b/progs/fp/mov-imm.txt new file mode 100644 index 00000000000..38e48079d09 --- /dev/null +++ b/progs/fp/mov-imm.txt @@ -0,0 +1,3 @@ +!!ARBfp1.0 +MOV result.color, {0.5, 0.8, 0.3, 1.0}; +END diff --git a/progs/fp/mov-param.txt b/progs/fp/mov-param.txt new file mode 100644 index 00000000000..13d82fe00b8 --- /dev/null +++ b/progs/fp/mov-param.txt @@ -0,0 +1,4 @@ +!!ARBfp1.0 +PARAM Diffuse = state.material.diffuse; +MOV result.color, Diffuse; +END diff --git a/progs/trivial/.gitignore b/progs/trivial/.gitignore index 4d6e405c500..4317eb607fe 100644 --- a/progs/trivial/.gitignore +++ b/progs/trivial/.gitignore @@ -147,6 +147,7 @@ vbo-drawarrays vbo-drawelements vbo-drawrange vbo-noninterleaved +vbo-tri vp-array vp-array-int vp-clip diff --git a/progs/trivial/Makefile b/progs/trivial/Makefile index 70728616d28..e15ec33ab59 100644 --- a/progs/trivial/Makefile +++ b/progs/trivial/Makefile @@ -153,6 +153,7 @@ SOURCES = \ tristrip-clip.c \ tristrip-flat.c \ tristrip.c \ + vbo-tri.c \ vbo-drawarrays.c \ vbo-noninterleaved.c \ vbo-drawelements.c \ diff --git a/progs/trivial/tri-orig.c b/progs/trivial/tri-orig.c index d86d34c39de..f86ac52a026 100644 --- a/progs/trivial/tri-orig.c +++ b/progs/trivial/tri-orig.c @@ -51,7 +51,7 @@ static void Reshape(int width, int height) glMatrixMode(GL_PROJECTION); glLoadIdentity(); -/* glOrtho(-1.0, 1.0, -1.0, 1.0, -0.5, 1000.0); */ + glOrtho(-1.0, 1.0, -1.0, 1.0, -0.5, 1000.0); glMatrixMode(GL_MODELVIEW); } @@ -74,11 +74,11 @@ static void Draw(void) glBegin(GL_TRIANGLES); glColor3f(0,0,.7); - glVertex3f( 0.9, -0.9, -0.0); + glVertex3f( 0.9, -0.9, -30.0); glColor3f(.8,0,0); - glVertex3f( 0.9, 0.9, -0.0); + glVertex3f( 0.9, 0.9, -30.0); glColor3f(0,.9,0); - glVertex3f(-0.9, 0.0, -0.0); + glVertex3f(-0.9, 0.0, -30.0); glEnd(); glFlush(); @@ -119,7 +119,7 @@ int main(int argc, char **argv) glutInitWindowPosition(0, 0); glutInitWindowSize( 250, 250); - type = GLUT_RGB | GLUT_ALPHA; + type = GLUT_RGB; type |= (doubleBuffer) ? GLUT_DOUBLE : GLUT_SINGLE; glutInitDisplayMode(type); diff --git a/progs/trivial/vbo-tri.c b/progs/trivial/vbo-tri.c new file mode 100644 index 00000000000..d4cba14414c --- /dev/null +++ b/progs/trivial/vbo-tri.c @@ -0,0 +1,131 @@ +/* Even simpler for many drivers than trivial/tri -- pass-through + * vertex shader and vertex data in a VBO. + */ + +#include <assert.h> +#include <string.h> +#include <stdio.h> +#include <stdlib.h> +#include <math.h> +#include <GL/glew.h> +#include <GL/glut.h> + + +struct { + GLfloat pos[4]; + GLfloat color[4]; +} verts[] = +{ + { { -0.9, -0.9, 0.0, 1.0 }, + {.8,0,0, 1}, + }, + + { { 0.9, -0.9, 0.0, 1.0 }, + { 0, .9, 0, 1 }, + }, + + { { 0, 0.9, 0.0, 1.0 }, + {0,0,.7, 1}, + }, +}; + +GLuint arrayObj; + +static void Init( void ) +{ + GLint errno; + GLuint prognum; + + static const char *prog1 = + "!!ARBvp1.0\n" + "MOV result.color, vertex.color;\n" + "MOV result.position, vertex.position;\n" + "END\n"; + + + glGenProgramsARB(1, &prognum); + + glBindProgramARB(GL_VERTEX_PROGRAM_ARB, prognum); + glProgramStringARB(GL_VERTEX_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, + strlen(prog1), (const GLubyte *) prog1); + + assert(glIsProgramARB(prognum)); + errno = glGetError(); + printf("glGetError = %d\n", errno); + if (errno != GL_NO_ERROR) + { + GLint errorpos; + + glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &errorpos); + printf("errorpos: %d\n", errorpos); + printf("%s\n", (char *)glGetString(GL_PROGRAM_ERROR_STRING_ARB)); + } + + + glEnableClientState( GL_VERTEX_ARRAY ); + glEnableClientState( GL_COLOR_ARRAY ); + + glGenBuffersARB(1, &arrayObj); + glBindBufferARB(GL_ARRAY_BUFFER_ARB, arrayObj); + glBufferDataARB(GL_ARRAY_BUFFER_ARB, sizeof(verts), verts, GL_STATIC_DRAW_ARB); + + glVertexPointer( 4, GL_FLOAT, sizeof(verts[0]), 0 ); + glColorPointer( 4, GL_FLOAT, sizeof(verts[0]), (void *)(4*sizeof(float)) ); +} + + + +static void Display( void ) +{ + glClearColor(0.3, 0.3, 0.3, 1); + glClear( GL_COLOR_BUFFER_BIT ); + + glEnable(GL_VERTEX_PROGRAM_NV); + glDrawArrays( GL_TRIANGLES, 0, 3 ); + + glutSwapBuffers(); +} + + +static void Reshape( int width, int height ) +{ + glViewport( 0, 0, width, height ); + glMatrixMode( GL_PROJECTION ); + glLoadIdentity(); + glOrtho(-1.0, 1.0, -1.0, 1.0, -0.5, 1000.0); + glMatrixMode( GL_MODELVIEW ); + glLoadIdentity(); + /*glTranslatef( 0.0, 0.0, -15.0 );*/ +} + + +static void Key( unsigned char key, int x, int y ) +{ + (void) x; + (void) y; + switch (key) { + case 27: + exit(0); + break; + } + glutPostRedisplay(); +} + + + + +int main( int argc, char *argv[] ) +{ + glutInit( &argc, argv ); + glutInitWindowPosition( 0, 0 ); + glutInitWindowSize( 250, 250 ); + glutInitDisplayMode( GLUT_RGB | GLUT_DOUBLE ); + glutCreateWindow(argv[0]); + glewInit(); + glutReshapeFunc( Reshape ); + glutKeyboardFunc( Key ); + glutDisplayFunc( Display ); + Init(); + glutMainLoop(); + return 0; +} diff --git a/progs/vp/add-param-imm.txt b/progs/vp/add-param-imm.txt new file mode 100644 index 00000000000..90bcf96528f --- /dev/null +++ b/progs/vp/add-param-imm.txt @@ -0,0 +1,7 @@ +!!ARBvp1.0 +TEMP R0; +PARAM Emission = state.material.emission; +ADD R0, vertex.color, {-0.5}.x; +ADD result.color, R0, Emission.w; +MOV result.position, vertex.position; +END diff --git a/src/gallium/auxiliary/util/u_debug.c b/src/gallium/auxiliary/util/u_debug.c index 27e0b0d1595..4e01123fff1 100644 --- a/src/gallium/auxiliary/util/u_debug.c +++ b/src/gallium/auxiliary/util/u_debug.c @@ -70,6 +70,7 @@ #include "util/u_stream.h" #include "util/u_math.h" #include "util/u_tile.h" +#include "util/u_prim.h" #ifdef PIPE_SUBSYSTEM_WINDOWS_DISPLAY @@ -601,6 +602,32 @@ const char *pf_name( enum pipe_format format ) } + +static const struct debug_named_value pipe_prim_names[] = { +#ifdef DEBUG + DEBUG_NAMED_VALUE(PIPE_PRIM_POINTS), + DEBUG_NAMED_VALUE(PIPE_PRIM_LINES), + DEBUG_NAMED_VALUE(PIPE_PRIM_LINE_LOOP), + DEBUG_NAMED_VALUE(PIPE_PRIM_LINE_STRIP), + DEBUG_NAMED_VALUE(PIPE_PRIM_TRIANGLES), + DEBUG_NAMED_VALUE(PIPE_PRIM_TRIANGLE_STRIP), + DEBUG_NAMED_VALUE(PIPE_PRIM_TRIANGLE_FAN), + DEBUG_NAMED_VALUE(PIPE_PRIM_QUADS), + DEBUG_NAMED_VALUE(PIPE_PRIM_QUAD_STRIP), + DEBUG_NAMED_VALUE(PIPE_PRIM_POLYGON), +#endif + DEBUG_NAMED_VALUE_END +}; + + +const char *u_prim_name( unsigned prim ) +{ + return debug_dump_enum(pipe_prim_names, prim); +} + + + + #ifdef DEBUG void debug_dump_image(const char *prefix, unsigned format, unsigned cpp, diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h index b76592d1ec6..81aeb83cbb5 100644 --- a/src/gallium/auxiliary/util/u_math.h +++ b/src/gallium/auxiliary/util/u_math.h @@ -583,6 +583,19 @@ do { \ #endif +static INLINE uint32_t util_unsigned_fixed(float value, unsigned frac_bits) +{ + value *= (1<<frac_bits); + return value < 0 ? 0 : value; +} + +static INLINE int32_t util_signed_fixed(float value, unsigned frac_bits) +{ + return value * (1<<frac_bits); +} + + + #ifdef __cplusplus } #endif diff --git a/src/gallium/auxiliary/util/u_prim.h b/src/gallium/auxiliary/util/u_prim.h index a9b533eea70..74343299623 100644 --- a/src/gallium/auxiliary/util/u_prim.h +++ b/src/gallium/auxiliary/util/u_prim.h @@ -135,4 +135,6 @@ static INLINE unsigned u_reduced_prim( unsigned pipe_prim ) } } +const char *u_prim_name( unsigned pipe_prim ); + #endif diff --git a/src/gallium/auxiliary/util/u_upload_mgr.h b/src/gallium/auxiliary/util/u_upload_mgr.h index 745b5834af6..e158bed9d04 100644 --- a/src/gallium/auxiliary/util/u_upload_mgr.h +++ b/src/gallium/auxiliary/util/u_upload_mgr.h @@ -32,6 +32,8 @@ #ifndef U_UPLOAD_MGR_H #define U_UPLOAD_MGR_H +#include "pipe/p_defines.h" + struct pipe_screen; struct pipe_buffer; struct u_upload_mgr; diff --git a/src/gallium/drivers/i965/Makefile b/src/gallium/drivers/i965/Makefile new file mode 100644 index 00000000000..95fd3cd69bd --- /dev/null +++ b/src/gallium/drivers/i965/Makefile @@ -0,0 +1,74 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = i965 + +C_SOURCES = \ + brw_cc.c \ + brw_clip.c \ + brw_clip_line.c \ + brw_clip_point.c \ + brw_clip_state.c \ + brw_clip_tri.c \ + brw_clip_unfilled.c \ + brw_clip_util.c \ + brw_context.c \ + brw_curbe.c \ + brw_disasm.c \ + brw_draw.c \ + brw_draw_upload.c \ + brw_eu.c \ + brw_eu_debug.c \ + brw_eu_emit.c \ + brw_eu_util.c \ + brw_gs.c \ + brw_gs_emit.c \ + brw_gs_state.c \ + brw_misc_state.c \ + brw_pipe_blend.c \ + brw_pipe_depth.c \ + brw_pipe_fb.c \ + brw_pipe_query.c \ + brw_pipe_shader.c \ + brw_pipe_flush.c \ + brw_pipe_misc.c \ + brw_pipe_sampler.c \ + brw_pipe_vertex.c \ + brw_pipe_clear.c \ + brw_pipe_rast.c \ + brw_sf.c \ + brw_sf_emit.c \ + brw_sf_state.c \ + brw_state_batch.c \ + brw_state_debug.c \ + brw_state_cache.c \ + brw_state_upload.c \ + brw_structs_dump.c \ + brw_swtnl.c \ + brw_urb.c \ + brw_util.c \ + brw_vs.c \ + brw_vs_emit.c \ + brw_vs_state.c \ + brw_vs_surface_state.c \ + brw_wm.c \ + brw_wm_debug.c \ + brw_wm_emit.c \ + brw_wm_fp.c \ + brw_wm_iz.c \ + brw_wm_pass0.c \ + brw_wm_pass1.c \ + brw_wm_pass2.c \ + brw_wm_sampler_state.c \ + brw_wm_state.c \ + brw_wm_surface_state.c \ + brw_screen.c \ + brw_screen_buffers.c \ + brw_screen_tex_layout.c \ + brw_screen_texture.c \ + brw_screen_surface.c \ + brw_batchbuffer.c \ + brw_winsys_debug.c \ + intel_decode.c + +include ../../Makefile.template diff --git a/src/gallium/drivers/i965/SConscript b/src/gallium/drivers/i965/SConscript new file mode 100644 index 00000000000..9c2faaf4b49 --- /dev/null +++ b/src/gallium/drivers/i965/SConscript @@ -0,0 +1,77 @@ +Import('*') + +env = env.Clone() + +i965 = env.ConvenienceLibrary( + target = 'i965', + source = [ + 'brw_batchbuffer.c', + 'brw_cc.c', + 'brw_clip.c', + 'brw_clip_line.c', + 'brw_clip_point.c', + 'brw_clip_state.c', + 'brw_clip_tri.c', + 'brw_clip_unfilled.c', + 'brw_clip_util.c', + 'brw_context.c', + 'brw_curbe.c', + 'brw_disasm.c', + 'brw_draw.c', + 'brw_draw_upload.c', + 'brw_eu.c', + 'brw_eu_debug.c', + 'brw_eu_emit.c', + 'brw_eu_util.c', + 'brw_gs.c', + 'brw_gs_emit.c', + 'brw_gs_state.c', + 'brw_misc_state.c', + 'brw_pipe_blend.c', + 'brw_pipe_clear.c', + 'brw_pipe_depth.c', + 'brw_pipe_fb.c', + 'brw_pipe_flush.c', + 'brw_pipe_misc.c', + 'brw_pipe_query.c', + 'brw_pipe_rast.c', + 'brw_pipe_sampler.c', + 'brw_pipe_shader.c', + 'brw_pipe_vertex.c', + 'brw_screen_buffers.c', + 'brw_screen.c', + 'brw_screen_surface.c', + 'brw_screen_tex_layout.c', + 'brw_screen_texture.c', + 'brw_structs_dump.c', + 'brw_sf.c', + 'brw_sf_emit.c', + 'brw_sf_state.c', + 'brw_state_batch.c', + 'brw_state_cache.c', +# 'brw_state_debug.c', + 'brw_state_upload.c', + 'brw_swtnl.c', + 'brw_urb.c', + 'brw_util.c', + 'brw_vs.c', + 'brw_vs_emit.c', + 'brw_vs_state.c', + 'brw_vs_surface_state.c', + 'brw_wm.c', +# 'brw_wm_constant_buffer.c', + 'brw_wm_debug.c', + 'brw_wm_emit.c', + 'brw_wm_fp.c', +# 'brw_wm_glsl.c', + 'brw_wm_iz.c', + 'brw_wm_pass0.c', + 'brw_wm_pass1.c', + 'brw_wm_pass2.c', + 'brw_wm_sampler_state.c', + 'brw_wm_state.c', + 'brw_wm_surface_state.c', + 'intel_decode.c', + ]) + +Export('i965') diff --git a/src/gallium/drivers/i965/brw_batchbuffer.c b/src/gallium/drivers/i965/brw_batchbuffer.c new file mode 100644 index 00000000000..22607dc6083 --- /dev/null +++ b/src/gallium/drivers/i965/brw_batchbuffer.c @@ -0,0 +1,202 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "util/u_memory.h" + +#include "brw_batchbuffer.h" +#include "brw_reg.h" +#include "brw_winsys.h" +#include "brw_debug.h" +#include "brw_structs.h" + +#define ALWAYS_EMIT_MI_FLUSH 1 + +enum pipe_error +brw_batchbuffer_reset(struct brw_batchbuffer *batch) +{ + enum pipe_error ret; + + ret = batch->sws->bo_alloc( batch->sws, + BRW_BUFFER_TYPE_BATCH, + BRW_BATCH_SIZE, 4096, + &batch->buf ); + if (ret) + return ret; + + batch->size = BRW_BATCH_SIZE; + + /* With map_range semantics, the winsys can decide whether to + * inject a malloc'ed bounce buffer instead of mapping directly. + */ + batch->map = batch->sws->bo_map(batch->buf, + BRW_DATA_BATCH_BUFFER, + 0, batch->size, + GL_TRUE, + GL_TRUE, + GL_TRUE); + + batch->ptr = batch->map; + return PIPE_OK; +} + +struct brw_batchbuffer * +brw_batchbuffer_alloc(struct brw_winsys_screen *sws, + struct brw_chipset chipset) +{ + struct brw_batchbuffer *batch = CALLOC_STRUCT(brw_batchbuffer); + + batch->sws = sws; + batch->chipset = chipset; + brw_batchbuffer_reset(batch); + + return batch; +} + +void +brw_batchbuffer_free(struct brw_batchbuffer *batch) +{ + if (batch->map) { + batch->sws->bo_unmap(batch->buf); + batch->map = NULL; + } + + bo_reference(&batch->buf, NULL); + FREE(batch); +} + + +void +_brw_batchbuffer_flush(struct brw_batchbuffer *batch, + const char *file, + int line) +{ + GLuint used = batch->ptr - batch->map; + + if (used == 0) + return; + + /* Post-swap throttling done by the state tracker. + */ + + if (BRW_DEBUG & DEBUG_BATCH) + debug_printf("%s:%d: Batchbuffer flush with %db used\n", + file, line, used); + + if (ALWAYS_EMIT_MI_FLUSH) { + *(GLuint *) (batch->ptr) = MI_FLUSH | BRW_FLUSH_STATE_CACHE; + batch->ptr += 4; + used = batch->ptr - batch->map; + } + + /* Round batchbuffer usage to 2 DWORDs. + */ + if ((used & 4) == 0) { + *(GLuint *) (batch->ptr) = 0; /* noop */ + batch->ptr += 4; + used = batch->ptr - batch->map; + } + + /* Mark the end of the buffer. + */ + *(GLuint *) (batch->ptr) = MI_BATCH_BUFFER_END; + batch->ptr += 4; + used = batch->ptr - batch->map; + + batch->sws->bo_flush_range(batch->buf, 0, used); + batch->sws->bo_unmap(batch->buf); + batch->map = NULL; + batch->ptr = NULL; + + batch->sws->bo_exec(batch->buf, used ); + + if (BRW_DEBUG & DEBUG_SYNC) { + /* Abuse map/unmap to achieve wait-for-fence. + * + * XXX: hide this inside the winsys and export a fence + * interface. + */ + debug_printf("waiting for idle\n"); + batch->sws->bo_wait_idle(batch->buf); + } + + /* Reset the buffer: + */ + brw_batchbuffer_reset(batch); +} + + +/* The OUT_RELOC() macro ends up here, generating a relocation within + * the batch buffer. + */ +enum pipe_error +brw_batchbuffer_emit_reloc(struct brw_batchbuffer *batch, + struct brw_winsys_buffer *buffer, + uint32_t usage, + uint32_t delta) +{ + int ret; + + if (batch->ptr - batch->map > batch->buf->size) { + debug_printf("bad relocation ptr %p map %p offset %d size %d\n", + batch->ptr, batch->map, batch->ptr - batch->map, batch->buf->size); + + return PIPE_ERROR_OUT_OF_MEMORY; + } + + ret = batch->sws->bo_emit_reloc(batch->buf, + usage, + delta, + batch->ptr - batch->map, + buffer); + if (ret != 0) + return ret; + + /* bo_emit_reloc was resposible for writing a zero into the + * batchbuffer if necessary. Just need to update our pointer. + */ + batch->ptr += 4; + + return 0; +} + +enum pipe_error +brw_batchbuffer_data(struct brw_batchbuffer *batch, + const void *data, GLuint bytes, + enum cliprect_mode cliprect_mode) +{ + enum pipe_error ret; + + assert((bytes & 3) == 0); + + ret = brw_batchbuffer_require_space(batch, bytes); + if (ret) + return ret; + + memcpy(batch->ptr, data, bytes); + batch->ptr += bytes; + return 0; +} diff --git a/src/gallium/drivers/i965/brw_batchbuffer.h b/src/gallium/drivers/i965/brw_batchbuffer.h new file mode 100644 index 00000000000..7473f5bea4d --- /dev/null +++ b/src/gallium/drivers/i965/brw_batchbuffer.h @@ -0,0 +1,148 @@ +#ifndef BRW_BATCHBUFFER_H +#define BRW_BATCHBUFFER_H + +#include "util/u_debug.h" + +#include "brw_types.h" +#include "brw_winsys.h" +#include "brw_reg.h" + +#define BATCH_SZ 16384 +#define BATCH_RESERVED 16 + +/* All ignored: + */ +enum cliprect_mode { + IGNORE_CLIPRECTS, + LOOP_CLIPRECTS, + NO_LOOP_CLIPRECTS, + REFERENCES_CLIPRECTS +}; + + + + +struct brw_batchbuffer { + + struct brw_winsys_screen *sws; + struct brw_winsys_buffer *buf; + struct brw_chipset chipset; + + /** + * Values exported to speed up the writing the batchbuffer, + * instead of having to go trough a accesor function for + * each dword written. + */ + /*{@*/ + uint8_t *map; + uint8_t *ptr; + size_t size; + struct { + uint8_t *end_ptr; + } emit; + + + size_t relocs; + size_t max_relocs; + /*@}*/ +}; + +struct brw_batchbuffer *brw_batchbuffer_alloc( struct brw_winsys_screen *sws, + struct brw_chipset chipset ); + +void brw_batchbuffer_free(struct brw_batchbuffer *batch); + +void _brw_batchbuffer_flush(struct brw_batchbuffer *batch, + const char *file, int line); + + +enum pipe_error +brw_batchbuffer_reset(struct brw_batchbuffer *batch); + + +/* Unlike bmBufferData, this currently requires the buffer be mapped. + * Consider it a convenience function wrapping multple + * intel_buffer_dword() calls. + */ +int brw_batchbuffer_data(struct brw_batchbuffer *batch, + const void *data, GLuint bytes, + enum cliprect_mode cliprect_mode); + + +int brw_batchbuffer_emit_reloc(struct brw_batchbuffer *batch, + struct brw_winsys_buffer *buffer, + enum brw_buffer_usage usage, + uint32_t offset); + +/* Inline functions - might actually be better off with these + * non-inlined. Certainly better off switching all command packets to + * be passed as structs rather than dwords, but that's a little bit of + * work... + */ +static INLINE GLint +brw_batchbuffer_space(struct brw_batchbuffer *batch) +{ + return (batch->size - BATCH_RESERVED) - (batch->ptr - batch->map); +} + + +static INLINE void +brw_batchbuffer_emit_dword(struct brw_batchbuffer *batch, GLuint dword) +{ + assert(batch->map); + assert(brw_batchbuffer_space(batch) >= 4); + *(GLuint *) (batch->ptr) = dword; + batch->ptr += 4; +} + +static INLINE enum pipe_error +brw_batchbuffer_require_space(struct brw_batchbuffer *batch, + GLuint sz) +{ + assert(sz < batch->size - 8); + if (brw_batchbuffer_space(batch) < sz) { + assert(0); + return PIPE_ERROR_OUT_OF_MEMORY; + } +#ifdef DEBUG + batch->emit.end_ptr = batch->ptr + sz; +#endif + return 0; +} + +/* Here are the crusty old macros, to be removed: + */ +#define BEGIN_BATCH(n, cliprect_mode) do { \ + brw_batchbuffer_require_space(brw->batch, (n)*4); \ + } while (0) + +#define OUT_BATCH(d) brw_batchbuffer_emit_dword(brw->batch, d) + +#define OUT_RELOC(buf, usage, delta) do { \ + assert((unsigned) (delta) < buf->size); \ + brw_batchbuffer_emit_reloc(brw->batch, buf, \ + usage, delta); \ + } while (0) + +#ifdef DEBUG +#define ADVANCE_BATCH() do { \ + unsigned int _n = brw->batch->ptr - brw->batch->emit.end_ptr; \ + if (_n != 0) { \ + debug_printf("%s: %d too many bytes emitted to batch\n", \ + __FUNCTION__, _n); \ + abort(); \ + } \ + brw->batch->emit.end_ptr = NULL; \ + } while(0) +#else +#define ADVANCE_BATCH() +#endif + +static INLINE void +brw_batchbuffer_emit_mi_flush(struct brw_batchbuffer *batch) +{ + brw_batchbuffer_require_space(batch, 4); + brw_batchbuffer_emit_dword(batch, MI_FLUSH); +} + +#endif diff --git a/src/gallium/drivers/i965/brw_cc.c b/src/gallium/drivers/i965/brw_cc.c new file mode 100644 index 00000000000..3e070f5591a --- /dev/null +++ b/src/gallium/drivers/i965/brw_cc.c @@ -0,0 +1,111 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" + + +static enum pipe_error prepare_cc_vp( struct brw_context *brw ) +{ + return brw_cache_data( &brw->cache, + BRW_CC_VP, + &brw->curr.ccv, + NULL, 0, + &brw->cc.reloc[CC_RELOC_VP].bo ); +} + +const struct brw_tracked_state brw_cc_vp = { + .dirty = { + .mesa = PIPE_NEW_VIEWPORT, + .brw = BRW_NEW_CONTEXT, + .cache = 0 + }, + .prepare = prepare_cc_vp +}; + + +/* A long-winded way to OR two unsigned integers together: + */ +static INLINE struct brw_cc3 +combine_cc3( struct brw_cc3 a, struct brw_cc3 b ) +{ + union { struct brw_cc3 cc3; unsigned i; } ca, cb; + ca.cc3 = a; + cb.cc3 = b; + ca.i |= cb.i; + return ca.cc3; +} + + +static int prepare_cc_unit( struct brw_context *brw ) +{ + brw->cc.cc.cc0 = brw->curr.zstencil->cc0; + brw->cc.cc.cc1 = brw->curr.zstencil->cc1; + brw->cc.cc.cc2 = brw->curr.zstencil->cc2; + brw->cc.cc.cc3 = combine_cc3( brw->curr.zstencil->cc3, brw->curr.blend->cc3 ); + + brw->cc.cc.cc5 = brw->curr.blend->cc5; + brw->cc.cc.cc6 = brw->curr.blend->cc6; + brw->cc.cc.cc7 = brw->curr.zstencil->cc7; + + return brw_cache_data_sz(&brw->cache, BRW_CC_UNIT, + &brw->cc.cc, sizeof(brw->cc.cc), + brw->cc.reloc, 1, + &brw->cc.state_bo); +} + +const struct brw_tracked_state brw_cc_unit = { + .dirty = { + .mesa = PIPE_NEW_DEPTH_STENCIL_ALPHA | PIPE_NEW_BLEND, + .brw = 0, + .cache = CACHE_NEW_CC_VP + }, + .prepare = prepare_cc_unit, +}; + + +void brw_hw_cc_init( struct brw_context *brw ) +{ + make_reloc(&brw->cc.reloc[0], + BRW_USAGE_STATE, + 0, + offsetof(struct brw_cc_unit_state, cc4), + NULL); +} + + +void brw_hw_cc_cleanup( struct brw_context *brw ) +{ + bo_reference(&brw->cc.state_bo, NULL); + bo_reference(&brw->cc.reloc[0].bo, NULL); +} diff --git a/src/gallium/drivers/i965/brw_clip.c b/src/gallium/drivers/i965/brw_clip.c new file mode 100644 index 00000000000..58d9e56df27 --- /dev/null +++ b/src/gallium/drivers/i965/brw_clip.c @@ -0,0 +1,224 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "pipe/p_state.h" + +#include "util/u_math.h" + +#include "brw_screen.h" +#include "brw_batchbuffer.h" +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_state.h" +#include "brw_pipe_rast.h" +#include "brw_clip.h" + + +#define FRONT_UNFILLED_BIT 0x1 +#define BACK_UNFILLED_BIT 0x2 + + +static enum pipe_error +compile_clip_prog( struct brw_context *brw, + struct brw_clip_prog_key *key, + struct brw_winsys_buffer **bo_out ) +{ + enum pipe_error ret; + struct brw_clip_compile c; + const GLuint *program; + GLuint program_size; + GLuint delta; + + memset(&c, 0, sizeof(c)); + + /* Begin the compilation: + */ + brw_init_compile(brw, &c.func); + + c.func.single_program_flow = 1; + + c.chipset = brw->chipset; + c.key = *key; + c.need_ff_sync = c.chipset.is_igdng; + + /* Need to locate the two positions present in vertex + header. + * These are currently hardcoded: + */ + c.header_position_offset = ATTR_SIZE; + + if (c.chipset.is_igdng) + delta = 3 * REG_SIZE; + else + delta = REG_SIZE; + + c.offset_hpos = delta + c.key.output_hpos * ATTR_SIZE; + + if (c.key.output_color0) + c.offset_color0 = delta + c.key.output_color0 * ATTR_SIZE; + + if (c.key.output_color1) + c.offset_color1 = delta + c.key.output_color1 * ATTR_SIZE; + + if (c.key.output_bfc0) + c.offset_bfc0 = delta + c.key.output_bfc0 * ATTR_SIZE; + + if (c.key.output_bfc1) + c.offset_bfc1 = delta + c.key.output_bfc1 * ATTR_SIZE; + + if (c.key.output_edgeflag) + c.offset_edgeflag = delta + c.key.output_edgeflag * ATTR_SIZE; + + if (BRW_IS_IGDNG(brw)) + c.nr_regs = (c.key.nr_attrs + 1) / 2 + 3; /* are vertices packed, or reg-aligned? */ + else + c.nr_regs = (c.key.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */ + + c.nr_bytes = c.nr_regs * REG_SIZE; + + c.prog_data.clip_mode = c.key.clip_mode; /* XXX */ + + /* For some reason the thread is spawned with only 4 channels + * unmasked. + */ + brw_set_mask_control(&c.func, BRW_MASK_DISABLE); + + + /* Would ideally have the option of producing a program which could + * do all three: + */ + switch (key->primitive) { + case PIPE_PRIM_TRIANGLES: + if (key->do_unfilled) + brw_emit_unfilled_clip( &c ); + else + brw_emit_tri_clip( &c ); + break; + case PIPE_PRIM_LINES: + brw_emit_line_clip( &c ); + break; + case PIPE_PRIM_POINTS: + brw_emit_point_clip( &c ); + break; + default: + assert(0); + return PIPE_ERROR_BAD_INPUT; + } + + + + /* get the program + */ + ret = brw_get_program(&c.func, &program, &program_size); + if (ret) + return ret; + + /* Upload + */ + ret = brw_upload_cache( &brw->cache, + BRW_CLIP_PROG, + &c.key, sizeof(c.key), + NULL, 0, + program, program_size, + &c.prog_data, + &brw->clip.prog_data, + bo_out ); + if (ret) + return ret; + + return PIPE_OK; +} + +/* Calculate interpolants for triangle and line rasterization. + */ +static enum pipe_error +upload_clip_prog(struct brw_context *brw) +{ + const struct brw_vertex_shader *vs = brw->curr.vertex_shader; + struct brw_clip_prog_key key; + enum pipe_error ret; + + /* Populate the key, starting from the almost-complete version from + * the rast state. + */ + + /* PIPE_NEW_RAST */ + key = brw->curr.rast->clip_key; + + /* BRW_NEW_REDUCED_PRIMITIVE */ + key.primitive = brw->reduced_primitive; + + /* XXX: if edgeflag is moved to a proper TGSI vs output, can remove + * dependency on CACHE_NEW_VS_PROG + */ + /* CACHE_NEW_VS_PROG */ + key.nr_attrs = brw->vs.prog_data->nr_outputs; + key.output_edgeflag = brw->vs.prog_data->output_edgeflag; + + /* PIPE_NEW_VS */ + key.output_hpos = vs->output_hpos; + key.output_color0 = vs->output_color0; + key.output_color1 = vs->output_color1; + key.output_bfc0 = vs->output_bfc0; + key.output_bfc1 = vs->output_bfc1; + + /* PIPE_NEW_CLIP */ + key.nr_userclip = brw->curr.ucp.nr; + + /* Already cached? + */ + if (brw_search_cache(&brw->cache, BRW_CLIP_PROG, + &key, sizeof(key), + NULL, 0, + &brw->clip.prog_data, + &brw->clip.prog_bo)) + return PIPE_OK; + + /* Compile new program: + */ + ret = compile_clip_prog( brw, &key, &brw->clip.prog_bo ); + if (ret) + return ret; + + return PIPE_OK; +} + + +const struct brw_tracked_state brw_clip_prog = { + .dirty = { + .mesa = (PIPE_NEW_RAST | + PIPE_NEW_CLIP), + .brw = (BRW_NEW_REDUCED_PRIMITIVE), + .cache = CACHE_NEW_VS_PROG + }, + .prepare = upload_clip_prog +}; diff --git a/src/gallium/drivers/i965/brw_clip.h b/src/gallium/drivers/i965/brw_clip.h new file mode 100644 index 00000000000..80e3a11a370 --- /dev/null +++ b/src/gallium/drivers/i965/brw_clip.h @@ -0,0 +1,199 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifndef BRW_CLIP_H +#define BRW_CLIP_H + +#include "pipe/p_state.h" +#include "brw_reg.h" +#include "brw_eu.h" + +#define MAX_VERTS (3+6+6) + +/* Note that if unfilled primitives are being emitted, we have to fix + * up polygon offset and flatshading at this point: + */ +struct brw_clip_prog_key { + GLuint nr_attrs:6; + GLuint primitive:4; + GLuint nr_userclip:3; + GLuint do_flat_shading:1; + GLuint do_unfilled:1; + GLuint fill_cw:2; /* includes cull information */ + GLuint fill_ccw:2; /* includes cull information */ + GLuint offset_cw:1; + GLuint offset_ccw:1; + GLuint copy_bfc_cw:1; + GLuint copy_bfc_ccw:1; + GLuint clip_mode:3; + GLuint output_hpos:6; /* not always zero? */ + + GLuint output_color0:6; + GLuint output_color1:6; + GLuint output_bfc0:6; + GLuint output_bfc1:6; + GLuint output_edgeflag:6; + GLuint pad1:2; + + GLfloat offset_factor; + GLfloat offset_units; +}; + +struct brw_clip_prog_data { + GLuint curb_read_length; /* user planes? */ + GLuint clip_mode; + GLuint urb_read_length; + GLuint total_grf; +}; + +#define CLIP_LINE 0 +#define CLIP_POINT 1 +#define CLIP_FILL 2 +#define CLIP_CULL 3 + + +#define PRIM_MASK (0x1f) + +struct brw_clip_compile { + struct brw_compile func; + struct brw_clip_prog_key key; + struct brw_clip_prog_data prog_data; + + struct { + struct brw_reg R0; + struct brw_reg vertex[MAX_VERTS]; + + struct brw_reg t; + struct brw_reg t0, t1; + struct brw_reg dp0, dp1; + + struct brw_reg dpPrev; + struct brw_reg dp; + struct brw_reg loopcount; + struct brw_reg nr_verts; + struct brw_reg planemask; + + struct brw_reg inlist; + struct brw_reg outlist; + struct brw_reg freelist; + + struct brw_reg dir; + struct brw_reg tmp0, tmp1; + struct brw_reg offset; + + struct brw_reg fixed_planes; + struct brw_reg plane_equation; + + struct brw_reg ff_sync; + } reg; + + /* 3 different ways of expressing vertex size, including + * key.nr_attrs. + */ + GLuint nr_regs; + GLuint nr_bytes; + + GLuint first_tmp; + GLuint last_tmp; + + GLboolean need_direction; + struct brw_chipset chipset; + + GLuint last_mrf; + + GLuint header_position_offset; + GLboolean need_ff_sync; + + GLuint nr_color_attrs; + GLuint offset_color0; + GLuint offset_color1; + GLuint offset_bfc0; + GLuint offset_bfc1; + + GLuint offset_hpos; + GLuint offset_edgeflag; +}; + +#define ATTR_SIZE (4*4) + +/* Points are only culled, so no need for a clip routine, however it + * works out easier to have a dummy one. + */ +void brw_emit_unfilled_clip( struct brw_clip_compile *c ); +void brw_emit_tri_clip( struct brw_clip_compile *c ); +void brw_emit_line_clip( struct brw_clip_compile *c ); +void brw_emit_point_clip( struct brw_clip_compile *c ); + +/* brw_clip_tri.c, for use by the unfilled clip routine: + */ +void brw_clip_tri_init_vertices( struct brw_clip_compile *c ); +void brw_clip_tri_flat_shade( struct brw_clip_compile *c ); +void brw_clip_tri( struct brw_clip_compile *c ); +void brw_clip_tri_emit_polygon( struct brw_clip_compile *c ); +void brw_clip_tri_alloc_regs( struct brw_clip_compile *c, + GLuint nr_verts ); + + +/* Utils: + */ + +void brw_clip_interp_vertex( struct brw_clip_compile *c, + struct brw_indirect dest_ptr, + struct brw_indirect v0_ptr, /* from */ + struct brw_indirect v1_ptr, /* to */ + struct brw_reg t0, + GLboolean force_edgeflag ); + +void brw_clip_init_planes( struct brw_clip_compile *c ); + +void brw_clip_emit_vue(struct brw_clip_compile *c, + struct brw_indirect vert, + GLboolean allocate, + GLboolean eot, + GLuint header); + +void brw_clip_kill_thread(struct brw_clip_compile *c); + +struct brw_reg brw_clip_plane_stride( struct brw_clip_compile *c ); +struct brw_reg brw_clip_plane0_address( struct brw_clip_compile *c ); + +void brw_clip_copy_colors( struct brw_clip_compile *c, + GLuint to, GLuint from ); + +void brw_clip_init_clipmask( struct brw_clip_compile *c ); + +struct brw_reg get_tmp( struct brw_clip_compile *c ); + +void brw_clip_project_position(struct brw_clip_compile *c, + struct brw_reg pos ); +void brw_clip_ff_sync(struct brw_clip_compile *c); +void brw_clip_init_ff_sync(struct brw_clip_compile *c); +#endif diff --git a/src/gallium/drivers/i965/brw_clip_line.c b/src/gallium/drivers/i965/brw_clip_line.c new file mode 100644 index 00000000000..54282d975ed --- /dev/null +++ b/src/gallium/drivers/i965/brw_clip_line.c @@ -0,0 +1,271 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "util/u_debug.h" + +#include "brw_defines.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_clip.h" + + + + +static void brw_clip_line_alloc_regs( struct brw_clip_compile *c ) +{ + GLuint i = 0,j; + + /* Register usage is static, precompute here: + */ + c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++; + + if (c->key.nr_userclip) { + c->reg.fixed_planes = brw_vec4_grf(i, 0); + i += (6 + c->key.nr_userclip + 1) / 2; + + c->prog_data.curb_read_length = (6 + c->key.nr_userclip + 1) / 2; + } + else + c->prog_data.curb_read_length = 0; + + + /* Payload vertices plus space for more generated vertices: + */ + for (j = 0; j < 4; j++) { + c->reg.vertex[j] = brw_vec4_grf(i, 0); + i += c->nr_regs; + } + + c->reg.t = brw_vec1_grf(i, 0); + c->reg.t0 = brw_vec1_grf(i, 1); + c->reg.t1 = brw_vec1_grf(i, 2); + c->reg.planemask = retype(brw_vec1_grf(i, 3), BRW_REGISTER_TYPE_UD); + c->reg.plane_equation = brw_vec4_grf(i, 4); + i++; + + c->reg.dp0 = brw_vec1_grf(i, 0); /* fixme - dp4 will clobber r.1,2,3 */ + c->reg.dp1 = brw_vec1_grf(i, 4); + i++; + + if (!c->key.nr_userclip) { + c->reg.fixed_planes = brw_vec8_grf(i, 0); + i++; + } + + if (c->need_ff_sync) { + c->reg.ff_sync = retype(brw_vec1_grf(i, 0), BRW_REGISTER_TYPE_UD); + i++; + } + + c->first_tmp = i; + c->last_tmp = i; + + c->prog_data.urb_read_length = c->nr_regs; /* ? */ + c->prog_data.total_grf = i; +} + + + +/* Line clipping, more or less following the following algorithm: + * + * for (p=0;p<MAX_PLANES;p++) { + * if (clipmask & (1 << p)) { + * GLfloat dp0 = DOTPROD( vtx0, plane[p] ); + * GLfloat dp1 = DOTPROD( vtx1, plane[p] ); + * + * if (IS_NEGATIVE(dp1)) { + * GLfloat t = dp1 / (dp1 - dp0); + * if (t > t1) t1 = t; + * } else { + * GLfloat t = dp0 / (dp0 - dp1); + * if (t > t0) t0 = t; + * } + * + * if (t0 + t1 >= 1.0) + * return; + * } + * } + * + * interp( ctx, newvtx0, vtx0, vtx1, t0 ); + * interp( ctx, newvtx1, vtx1, vtx0, t1 ); + * + */ +static void clip_and_emit_line( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_indirect vtx0 = brw_indirect(0, 0); + struct brw_indirect vtx1 = brw_indirect(1, 0); + struct brw_indirect newvtx0 = brw_indirect(2, 0); + struct brw_indirect newvtx1 = brw_indirect(3, 0); + struct brw_indirect plane_ptr = brw_indirect(4, 0); + struct brw_instruction *plane_loop; + struct brw_instruction *plane_active; + struct brw_instruction *is_negative; + struct brw_instruction *is_neg2 = NULL; + struct brw_instruction *not_culled; + struct brw_reg v1_null_ud = retype(vec1(brw_null_reg()), BRW_REGISTER_TYPE_UD); + + brw_MOV(p, get_addr_reg(vtx0), brw_address(c->reg.vertex[0])); + brw_MOV(p, get_addr_reg(vtx1), brw_address(c->reg.vertex[1])); + brw_MOV(p, get_addr_reg(newvtx0), brw_address(c->reg.vertex[2])); + brw_MOV(p, get_addr_reg(newvtx1), brw_address(c->reg.vertex[3])); + brw_MOV(p, get_addr_reg(plane_ptr), brw_clip_plane0_address(c)); + + /* Note: init t0, t1 together: + */ + brw_MOV(p, vec2(c->reg.t0), brw_imm_f(0)); + + brw_clip_init_planes(c); + brw_clip_init_clipmask(c); + + /* -ve rhw workaround */ + if (c->chipset.is_965) { + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2), + brw_imm_ud(1<<20)); + brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(0x3f)); + } + + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + plane_loop = brw_DO(p, BRW_EXECUTE_1); + { + /* if (planemask & 1) + */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_AND(p, v1_null_ud, c->reg.planemask, brw_imm_ud(1)); + + plane_active = brw_IF(p, BRW_EXECUTE_1); + { + if (c->key.nr_userclip) + brw_MOV(p, c->reg.plane_equation, deref_4f(plane_ptr, 0)); + else + brw_MOV(p, c->reg.plane_equation, deref_4b(plane_ptr, 0)); + + /* dp = DP4(vtx->position, plane) + */ + brw_DP4(p, vec4(c->reg.dp0), deref_4f(vtx0, c->offset_hpos), c->reg.plane_equation); + + /* if (IS_NEGATIVE(dp1)) + */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_L); + brw_DP4(p, vec4(c->reg.dp1), deref_4f(vtx1, c->offset_hpos), c->reg.plane_equation); + is_negative = brw_IF(p, BRW_EXECUTE_1); + { + /* + * Both can be negative on GM965/G965 due to RHW workaround + * if so, this object should be rejected. + */ + if (c->chipset.is_965) { + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_LE, c->reg.dp0, brw_imm_f(0.0)); + is_neg2 = brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_kill_thread(c); + } + brw_ENDIF(p, is_neg2); + } + + brw_ADD(p, c->reg.t, c->reg.dp1, negate(c->reg.dp0)); + brw_math_invert(p, c->reg.t, c->reg.t); + brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp1); + + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_G, c->reg.t, c->reg.t1 ); + brw_MOV(p, c->reg.t1, c->reg.t); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + } + is_negative = brw_ELSE(p, is_negative); + { + /* Coming back in. We know that both cannot be negative + * because the line would have been culled in that case. + */ + + /* If both are positive, do nothing */ + /* Only on GM965/G965 */ + if (c->chipset.is_965) { + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.dp0, brw_imm_f(0.0)); + is_neg2 = brw_IF(p, BRW_EXECUTE_1); + } + + { + brw_ADD(p, c->reg.t, c->reg.dp0, negate(c->reg.dp1)); + brw_math_invert(p, c->reg.t, c->reg.t); + brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp0); + + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_G, c->reg.t, c->reg.t0 ); + brw_MOV(p, c->reg.t0, c->reg.t); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + } + + if (c->chipset.is_965) { + brw_ENDIF(p, is_neg2); + } + } + brw_ENDIF(p, is_negative); + } + brw_ENDIF(p, plane_active); + + /* plane_ptr++; + */ + brw_ADD(p, get_addr_reg(plane_ptr), get_addr_reg(plane_ptr), brw_clip_plane_stride(c)); + + /* while (planemask>>=1) != 0 + */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_SHR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(1)); + } + brw_WHILE(p, plane_loop); + + brw_ADD(p, c->reg.t, c->reg.t0, c->reg.t1); + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.t, brw_imm_f(1.0)); + not_culled = brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_interp_vertex(c, newvtx0, vtx0, vtx1, c->reg.t0, FALSE); + brw_clip_interp_vertex(c, newvtx1, vtx1, vtx0, c->reg.t1, FALSE); + + brw_clip_emit_vue(c, newvtx0, 1, 0, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_START); + brw_clip_emit_vue(c, newvtx1, 0, 1, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_END); + } + brw_ENDIF(p, not_culled); + brw_clip_kill_thread(c); +} + + + +void brw_emit_line_clip( struct brw_clip_compile *c ) +{ + brw_clip_line_alloc_regs(c); + brw_clip_init_ff_sync(c); + + if (c->key.do_flat_shading) + brw_clip_copy_colors(c, 0, 1); + + clip_and_emit_line(c); +} diff --git a/src/gallium/drivers/i965/brw_clip_point.c b/src/gallium/drivers/i965/brw_clip_point.c new file mode 100644 index 00000000000..e0a5330556d --- /dev/null +++ b/src/gallium/drivers/i965/brw_clip_point.c @@ -0,0 +1,48 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "brw_defines.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_clip.h" + + +/* Point clipping, nothing to do? + */ +void brw_emit_point_clip( struct brw_clip_compile *c ) +{ + /* Send an empty message to kill the thread: + */ + brw_clip_tri_alloc_regs(c, 0); + brw_clip_init_ff_sync(c); + + brw_clip_kill_thread(c); +} diff --git a/src/gallium/drivers/i965/brw_clip_state.c b/src/gallium/drivers/i965/brw_clip_state.c new file mode 100644 index 00000000000..5c3ccfd8d0d --- /dev/null +++ b/src/gallium/drivers/i965/brw_clip_state.c @@ -0,0 +1,209 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "util/u_math.h" + +#include "brw_context.h" +#include "brw_clip.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "brw_debug.h" + +struct brw_clip_unit_key { + unsigned int total_grf; + unsigned int urb_entry_read_length; + unsigned int curb_entry_read_length; + unsigned int clip_mode; + + unsigned int curbe_offset; + + unsigned int nr_urb_entries, urb_size; + + GLboolean depth_clamp; +}; + +static void +clip_unit_populate_key(struct brw_context *brw, struct brw_clip_unit_key *key) +{ + memset(key, 0, sizeof(*key)); + + /* CACHE_NEW_CLIP_PROG */ + key->total_grf = brw->clip.prog_data->total_grf; + key->urb_entry_read_length = brw->clip.prog_data->urb_read_length; + key->curb_entry_read_length = brw->clip.prog_data->curb_read_length; + key->clip_mode = brw->clip.prog_data->clip_mode; + + /* BRW_NEW_CURBE_OFFSETS */ + key->curbe_offset = brw->curbe.clip_start; + + /* BRW_NEW_URB_FENCE */ + key->nr_urb_entries = brw->urb.nr_clip_entries; + key->urb_size = brw->urb.vsize; + + /* */ + key->depth_clamp = 0; /* XXX: add this to gallium: ctx->Transform.DepthClamp; */ +} + +static enum pipe_error +clip_unit_create_from_key(struct brw_context *brw, + struct brw_clip_unit_key *key, + struct brw_winsys_reloc *reloc, + struct brw_winsys_buffer **bo_out) +{ + struct brw_clip_unit_state clip; + enum pipe_error ret; + + memset(&clip, 0, sizeof(clip)); + + clip.thread0.grf_reg_count = align(key->total_grf, 16) / 16 - 1; + /* reloc */ + clip.thread0.kernel_start_pointer = 0; + + clip.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; + clip.thread1.single_program_flow = 1; + + clip.thread3.urb_entry_read_length = key->urb_entry_read_length; + clip.thread3.const_urb_entry_read_length = key->curb_entry_read_length; + clip.thread3.const_urb_entry_read_offset = key->curbe_offset * 2; + clip.thread3.dispatch_grf_start_reg = 1; + clip.thread3.urb_entry_read_offset = 0; + + clip.thread4.nr_urb_entries = key->nr_urb_entries; + clip.thread4.urb_entry_allocation_size = key->urb_size - 1; + /* If we have enough clip URB entries to run two threads, do so. + */ + if (key->nr_urb_entries >= 10) { + /* Half of the URB entries go to each thread, and it has to be an + * even number. + */ + assert(key->nr_urb_entries % 2 == 0); + + /* Although up to 16 concurrent Clip threads are allowed on IGDNG, + * only 2 threads can output VUEs at a time. + */ + if (BRW_IS_IGDNG(brw)) + clip.thread4.max_threads = 16 - 1; + else + clip.thread4.max_threads = 2 - 1; + } else { + assert(key->nr_urb_entries >= 5); + clip.thread4.max_threads = 1 - 1; + } + + if (BRW_DEBUG & DEBUG_SINGLE_THREAD) + clip.thread4.max_threads = 0; + + if (BRW_DEBUG & DEBUG_STATS) + clip.thread4.stats_enable = 1; + + clip.clip5.userclip_enable_flags = 0x7f; + clip.clip5.userclip_must_clip = 1; + clip.clip5.guard_band_enable = 0; + if (!key->depth_clamp) + clip.clip5.viewport_z_clip_enable = 1; + clip.clip5.viewport_xy_clip_enable = 1; + clip.clip5.vertex_position_space = BRW_CLIP_NDCSPACE; + clip.clip5.api_mode = BRW_CLIP_API_OGL; + clip.clip5.clip_mode = key->clip_mode; + + if (BRW_IS_G4X(brw)) + clip.clip5.negative_w_clip_test = 1; + + clip.clip6.clipper_viewport_state_ptr = 0; + clip.viewport_xmin = -1; + clip.viewport_xmax = 1; + clip.viewport_ymin = -1; + clip.viewport_ymax = 1; + + ret = brw_upload_cache(&brw->cache, BRW_CLIP_UNIT, + key, sizeof(*key), + reloc, 1, + &clip, sizeof(clip), + NULL, NULL, + bo_out); + if (ret) + return ret; + + return PIPE_OK; +} + +static int upload_clip_unit( struct brw_context *brw ) +{ + struct brw_clip_unit_key key; + struct brw_winsys_reloc reloc[1]; + unsigned grf_reg_count; + enum pipe_error ret; + + clip_unit_populate_key(brw, &key); + + grf_reg_count = align(key.total_grf, 16) / 16 - 1; + + /* clip program relocation + * + * XXX: these reloc structs are long lived and only need to be + * updated when the bound BO changes. Hopefully the stuff mixed in + * in the delta's is non-orthogonal. + */ + assert(brw->clip.prog_bo); + make_reloc(&reloc[0], + BRW_USAGE_STATE, + grf_reg_count << 1, + offsetof(struct brw_clip_unit_state, thread0), + brw->clip.prog_bo); + + + if (brw_search_cache(&brw->cache, BRW_CLIP_UNIT, + &key, sizeof(key), + reloc, 1, + NULL, + &brw->clip.state_bo)) + return PIPE_OK; + + /* Create new: + */ + ret = clip_unit_create_from_key(brw, &key, + reloc, + &brw->clip.state_bo); + if (ret) + return ret; + + return PIPE_OK; +} + +const struct brw_tracked_state brw_clip_unit = { + .dirty = { + .mesa = 0, + .brw = (BRW_NEW_CURBE_OFFSETS | + BRW_NEW_URB_FENCE), + .cache = CACHE_NEW_CLIP_PROG + }, + .prepare = upload_clip_unit, +}; diff --git a/src/gallium/drivers/i965/brw_clip_tri.c b/src/gallium/drivers/i965/brw_clip_tri.c new file mode 100644 index 00000000000..4cde7294ea0 --- /dev/null +++ b/src/gallium/drivers/i965/brw_clip_tri.c @@ -0,0 +1,595 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "brw_defines.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_clip.h" + +static void release_tmps( struct brw_clip_compile *c ) +{ + c->last_tmp = c->first_tmp; +} + + +void brw_clip_tri_alloc_regs( struct brw_clip_compile *c, + GLuint nr_verts ) +{ + GLuint i = 0,j; + + /* Register usage is static, precompute here: + */ + c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++; + + if (c->key.nr_userclip) { + c->reg.fixed_planes = brw_vec4_grf(i, 0); + i += (6 + c->key.nr_userclip + 1) / 2; + + c->prog_data.curb_read_length = (6 + c->key.nr_userclip + 1) / 2; + } + else + c->prog_data.curb_read_length = 0; + + + /* Payload vertices plus space for more generated vertices: + */ + for (j = 0; j < nr_verts; j++) { + c->reg.vertex[j] = brw_vec4_grf(i, 0); + i += c->nr_regs; + } + + if (c->key.nr_attrs & 1) { + for (j = 0; j < 3; j++) { + GLuint delta = c->key.nr_attrs*16 + 32; + + if (c->chipset.is_igdng) + delta = c->key.nr_attrs * 16 + 32 * 3; + + brw_MOV(&c->func, byte_offset(c->reg.vertex[j], delta), brw_imm_f(0)); + } + } + + c->reg.t = brw_vec1_grf(i, 0); + c->reg.loopcount = retype(brw_vec1_grf(i, 1), BRW_REGISTER_TYPE_D); + c->reg.nr_verts = retype(brw_vec1_grf(i, 2), BRW_REGISTER_TYPE_UD); + c->reg.planemask = retype(brw_vec1_grf(i, 3), BRW_REGISTER_TYPE_UD); + c->reg.plane_equation = brw_vec4_grf(i, 4); + i++; + + c->reg.dpPrev = brw_vec1_grf(i, 0); /* fixme - dp4 will clobber r.1,2,3 */ + c->reg.dp = brw_vec1_grf(i, 4); + i++; + + c->reg.inlist = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0); + i++; + + c->reg.outlist = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0); + i++; + + c->reg.freelist = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0); + i++; + + if (!c->key.nr_userclip) { + c->reg.fixed_planes = brw_vec8_grf(i, 0); + i++; + } + + if (c->key.do_unfilled) { + c->reg.dir = brw_vec4_grf(i, 0); + c->reg.offset = brw_vec4_grf(i, 4); + i++; + c->reg.tmp0 = brw_vec4_grf(i, 0); + c->reg.tmp1 = brw_vec4_grf(i, 4); + i++; + } + + if (c->need_ff_sync) { + c->reg.ff_sync = retype(brw_vec1_grf(i, 0), BRW_REGISTER_TYPE_UD); + i++; + } + + c->first_tmp = i; + c->last_tmp = i; + + c->prog_data.urb_read_length = c->nr_regs; /* ? */ + c->prog_data.total_grf = i; +} + + + +void brw_clip_tri_init_vertices( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */ + struct brw_instruction *is_rev; + + /* Initial list of indices for incoming vertexes: + */ + brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK)); + brw_CMP(p, + vec1(brw_null_reg()), + BRW_CONDITIONAL_EQ, + tmp0, + brw_imm_ud(_3DPRIM_TRISTRIP_REVERSE)); + + /* XXX: Is there an easier way to do this? Need to reverse every + * second tristrip element: Can ignore sometimes? + */ + is_rev = brw_IF(p, BRW_EXECUTE_1); + { + brw_MOV(p, get_element(c->reg.inlist, 0), brw_address(c->reg.vertex[1]) ); + brw_MOV(p, get_element(c->reg.inlist, 1), brw_address(c->reg.vertex[0]) ); + if (c->need_direction) + brw_MOV(p, c->reg.dir, brw_imm_f(-1)); + } + is_rev = brw_ELSE(p, is_rev); + { + brw_MOV(p, get_element(c->reg.inlist, 0), brw_address(c->reg.vertex[0]) ); + brw_MOV(p, get_element(c->reg.inlist, 1), brw_address(c->reg.vertex[1]) ); + if (c->need_direction) + brw_MOV(p, c->reg.dir, brw_imm_f(1)); + } + brw_ENDIF(p, is_rev); + + brw_MOV(p, get_element(c->reg.inlist, 2), brw_address(c->reg.vertex[2]) ); + brw_MOV(p, brw_vec8_grf(c->reg.outlist.nr, 0), brw_imm_f(0)); + brw_MOV(p, c->reg.nr_verts, brw_imm_ud(3)); +} + + + +void brw_clip_tri_flat_shade( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *is_poly; + struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */ + + brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK)); + brw_CMP(p, + vec1(brw_null_reg()), + BRW_CONDITIONAL_EQ, + tmp0, + brw_imm_ud(_3DPRIM_POLYGON)); + + is_poly = brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_copy_colors(c, 1, 0); + brw_clip_copy_colors(c, 2, 0); + } + is_poly = brw_ELSE(p, is_poly); + { + brw_clip_copy_colors(c, 0, 2); + brw_clip_copy_colors(c, 1, 2); + } + brw_ENDIF(p, is_poly); +} + + + +/* Use mesa's clipping algorithms, translated to GEN4 assembly. + */ +void brw_clip_tri( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_indirect vtx = brw_indirect(0, 0); + struct brw_indirect vtxPrev = brw_indirect(1, 0); + struct brw_indirect vtxOut = brw_indirect(2, 0); + struct brw_indirect plane_ptr = brw_indirect(3, 0); + struct brw_indirect inlist_ptr = brw_indirect(4, 0); + struct brw_indirect outlist_ptr = brw_indirect(5, 0); + struct brw_indirect freelist_ptr = brw_indirect(6, 0); + struct brw_instruction *plane_loop; + struct brw_instruction *plane_active; + struct brw_instruction *vertex_loop; + struct brw_instruction *next_test; + struct brw_instruction *prev_test; + + brw_MOV(p, get_addr_reg(vtxPrev), brw_address(c->reg.vertex[2]) ); + brw_MOV(p, get_addr_reg(plane_ptr), brw_clip_plane0_address(c)); + brw_MOV(p, get_addr_reg(inlist_ptr), brw_address(c->reg.inlist)); + brw_MOV(p, get_addr_reg(outlist_ptr), brw_address(c->reg.outlist)); + + brw_MOV(p, get_addr_reg(freelist_ptr), brw_address(c->reg.vertex[3]) ); + + plane_loop = brw_DO(p, BRW_EXECUTE_1); + { + /* if (planemask & 1) + */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_AND(p, vec1(brw_null_reg()), c->reg.planemask, brw_imm_ud(1)); + + plane_active = brw_IF(p, BRW_EXECUTE_1); + { + /* vtxOut = freelist_ptr++ + */ + brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(freelist_ptr) ); + brw_ADD(p, get_addr_reg(freelist_ptr), get_addr_reg(freelist_ptr), brw_imm_uw(c->nr_regs * REG_SIZE)); + + if (c->key.nr_userclip) + brw_MOV(p, c->reg.plane_equation, deref_4f(plane_ptr, 0)); + else + brw_MOV(p, c->reg.plane_equation, deref_4b(plane_ptr, 0)); + + brw_MOV(p, c->reg.loopcount, c->reg.nr_verts); + brw_MOV(p, c->reg.nr_verts, brw_imm_ud(0)); + + vertex_loop = brw_DO(p, BRW_EXECUTE_1); + { + /* vtx = *input_ptr; + */ + brw_MOV(p, get_addr_reg(vtx), deref_1uw(inlist_ptr, 0)); + + /* IS_NEGATIVE(prev) */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_L); + brw_DP4(p, vec4(c->reg.dpPrev), deref_4f(vtxPrev, c->offset_hpos), c->reg.plane_equation); + prev_test = brw_IF(p, BRW_EXECUTE_1); + { + /* IS_POSITIVE(next) + */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_GE); + brw_DP4(p, vec4(c->reg.dp), deref_4f(vtx, c->offset_hpos), c->reg.plane_equation); + next_test = brw_IF(p, BRW_EXECUTE_1); + { + + /* Coming back in. + */ + brw_ADD(p, c->reg.t, c->reg.dpPrev, negate(c->reg.dp)); + brw_math_invert(p, c->reg.t, c->reg.t); + brw_MUL(p, c->reg.t, c->reg.t, c->reg.dpPrev); + + /* If (vtxOut == 0) vtxOut = vtxPrev + */ + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_EQ, get_addr_reg(vtxOut), brw_imm_uw(0) ); + brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(vtxPrev) ); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + brw_clip_interp_vertex(c, vtxOut, vtxPrev, vtx, c->reg.t, GL_FALSE); + + /* *outlist_ptr++ = vtxOut; + * nr_verts++; + * vtxOut = 0; + */ + brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxOut)); + brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short))); + brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1)); + brw_MOV(p, get_addr_reg(vtxOut), brw_imm_uw(0) ); + } + brw_ENDIF(p, next_test); + + } + prev_test = brw_ELSE(p, prev_test); + { + /* *outlist_ptr++ = vtxPrev; + * nr_verts++; + */ + brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxPrev)); + brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short))); + brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1)); + + /* IS_NEGATIVE(next) + */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_L); + brw_DP4(p, vec4(c->reg.dp), deref_4f(vtx, c->offset_hpos), c->reg.plane_equation); + next_test = brw_IF(p, BRW_EXECUTE_1); + { + /* Going out of bounds. Avoid division by zero as we + * know dp != dpPrev from DIFFERENT_SIGNS, above. + */ + brw_ADD(p, c->reg.t, c->reg.dp, negate(c->reg.dpPrev)); + brw_math_invert(p, c->reg.t, c->reg.t); + brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp); + + /* If (vtxOut == 0) vtxOut = vtx + */ + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_EQ, get_addr_reg(vtxOut), brw_imm_uw(0) ); + brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(vtx) ); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + brw_clip_interp_vertex(c, vtxOut, vtx, vtxPrev, c->reg.t, GL_TRUE); + + /* *outlist_ptr++ = vtxOut; + * nr_verts++; + * vtxOut = 0; + */ + brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxOut)); + brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short))); + brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1)); + brw_MOV(p, get_addr_reg(vtxOut), brw_imm_uw(0) ); + } + brw_ENDIF(p, next_test); + } + brw_ENDIF(p, prev_test); + + /* vtxPrev = vtx; + * inlist_ptr++; + */ + brw_MOV(p, get_addr_reg(vtxPrev), get_addr_reg(vtx)); + brw_ADD(p, get_addr_reg(inlist_ptr), get_addr_reg(inlist_ptr), brw_imm_uw(sizeof(short))); + + /* while (--loopcount != 0) + */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1)); + } + brw_WHILE(p, vertex_loop); + + /* vtxPrev = *(outlist_ptr-1) OR: outlist[nr_verts-1] + * inlist = outlist + * inlist_ptr = &inlist[0] + * outlist_ptr = &outlist[0] + */ + brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_w(-2)); + brw_MOV(p, get_addr_reg(vtxPrev), deref_1uw(outlist_ptr, 0)); + brw_MOV(p, brw_vec8_grf(c->reg.inlist.nr, 0), brw_vec8_grf(c->reg.outlist.nr, 0)); + brw_MOV(p, get_addr_reg(inlist_ptr), brw_address(c->reg.inlist)); + brw_MOV(p, get_addr_reg(outlist_ptr), brw_address(c->reg.outlist)); + } + brw_ENDIF(p, plane_active); + + /* plane_ptr++; + */ + brw_ADD(p, get_addr_reg(plane_ptr), get_addr_reg(plane_ptr), brw_clip_plane_stride(c)); + + /* nr_verts >= 3 + */ + brw_CMP(p, + vec1(brw_null_reg()), + BRW_CONDITIONAL_GE, + c->reg.nr_verts, + brw_imm_ud(3)); + + /* && (planemask>>=1) != 0 + */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_SHR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(1)); + } + brw_WHILE(p, plane_loop); +} + + + +void brw_clip_tri_emit_polygon(struct brw_clip_compile *c) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *loop, *if_insn; + + /* for (loopcount = nr_verts-2; loopcount > 0; loopcount--) + */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_G); + brw_ADD(p, + c->reg.loopcount, + c->reg.nr_verts, + brw_imm_d(-2)); + + if_insn = brw_IF(p, BRW_EXECUTE_1); + { + struct brw_indirect v0 = brw_indirect(0, 0); + struct brw_indirect vptr = brw_indirect(1, 0); + + brw_MOV(p, get_addr_reg(vptr), brw_address(c->reg.inlist)); + brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0)); + + brw_clip_emit_vue(c, v0, 1, 0, ((_3DPRIM_TRIFAN << 2) | R02_PRIM_START)); + + brw_ADD(p, get_addr_reg(vptr), get_addr_reg(vptr), brw_imm_uw(2)); + brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0)); + + loop = brw_DO(p, BRW_EXECUTE_1); + { + brw_clip_emit_vue(c, v0, 1, 0, (_3DPRIM_TRIFAN << 2)); + + brw_ADD(p, get_addr_reg(vptr), get_addr_reg(vptr), brw_imm_uw(2)); + brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0)); + + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1)); + } + brw_WHILE(p, loop); + + brw_clip_emit_vue(c, v0, 0, 1, ((_3DPRIM_TRIFAN << 2) | R02_PRIM_END)); + } + brw_ENDIF(p, if_insn); +} + +static void do_clip_tri( struct brw_clip_compile *c ) +{ + brw_clip_init_planes(c); + + brw_clip_tri(c); +} + + +static void maybe_do_clip_tri( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *do_clip; + + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, c->reg.planemask, brw_imm_ud(0)); + do_clip = brw_IF(p, BRW_EXECUTE_1); + { + do_clip_tri(c); + } + brw_ENDIF(p, do_clip); +} + +static void brw_clip_test( struct brw_clip_compile *c ) +{ + struct brw_reg t = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); + struct brw_reg t1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); + struct brw_reg t2 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); + struct brw_reg t3 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); + + struct brw_reg v0 = get_tmp(c); + struct brw_reg v1 = get_tmp(c); + struct brw_reg v2 = get_tmp(c); + + struct brw_indirect vt0 = brw_indirect(0, 0); + struct brw_indirect vt1 = brw_indirect(1, 0); + struct brw_indirect vt2 = brw_indirect(2, 0); + + struct brw_compile *p = &c->func; + struct brw_instruction *is_outside; + struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */ + + brw_MOV(p, get_addr_reg(vt0), brw_address(c->reg.vertex[0])); + brw_MOV(p, get_addr_reg(vt1), brw_address(c->reg.vertex[1])); + brw_MOV(p, get_addr_reg(vt2), brw_address(c->reg.vertex[2])); + brw_MOV(p, v0, deref_4f(vt0, c->offset_hpos)); + brw_MOV(p, v1, deref_4f(vt1, c->offset_hpos)); + brw_MOV(p, v2, deref_4f(vt2, c->offset_hpos)); + brw_AND(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(~0x3f)); + + /* test nearz, xmin, ymin plane */ + /* clip.xyz < -clip.w */ + brw_CMP(p, t1, BRW_CONDITIONAL_L, v0, negate(get_element(v0, 3))); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, t2, BRW_CONDITIONAL_L, v1, negate(get_element(v1, 3))); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, t3, BRW_CONDITIONAL_L, v2, negate(get_element(v2, 3))); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + /* All vertices are outside of a plane, rejected */ + brw_AND(p, t, t1, t2); + brw_AND(p, t, t, t3); + brw_OR(p, tmp0, get_element(t, 0), get_element(t, 1)); + brw_OR(p, tmp0, tmp0, get_element(t, 2)); + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_AND(p, brw_null_reg(), tmp0, brw_imm_ud(0x1)); + is_outside = brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_kill_thread(c); + } + brw_ENDIF(p, is_outside); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + /* some vertices are inside a plane, some are outside,need to clip */ + brw_XOR(p, t, t1, t2); + brw_XOR(p, t1, t2, t3); + brw_OR(p, t, t, t1); + brw_AND(p, t, t, brw_imm_ud(0x1)); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 0), brw_imm_ud(0)); + brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<5))); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 1), brw_imm_ud(0)); + brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<3))); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 2), brw_imm_ud(0)); + brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<1))); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + /* test farz, xmax, ymax plane */ + /* clip.xyz > clip.w */ + brw_CMP(p, t1, BRW_CONDITIONAL_G, v0, get_element(v0, 3)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, t2, BRW_CONDITIONAL_G, v1, get_element(v1, 3)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, t3, BRW_CONDITIONAL_G, v2, get_element(v2, 3)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + /* All vertices are outside of a plane, rejected */ + brw_AND(p, t, t1, t2); + brw_AND(p, t, t, t3); + brw_OR(p, tmp0, get_element(t, 0), get_element(t, 1)); + brw_OR(p, tmp0, tmp0, get_element(t, 2)); + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_AND(p, brw_null_reg(), tmp0, brw_imm_ud(0x1)); + is_outside = brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_kill_thread(c); + } + brw_ENDIF(p, is_outside); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + /* some vertices are inside a plane, some are outside,need to clip */ + brw_XOR(p, t, t1, t2); + brw_XOR(p, t1, t2, t3); + brw_OR(p, t, t, t1); + brw_AND(p, t, t, brw_imm_ud(0x1)); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 0), brw_imm_ud(0)); + brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<4))); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 1), brw_imm_ud(0)); + brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<2))); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 2), brw_imm_ud(0)); + brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<0))); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + release_tmps(c); +} + + +void brw_emit_tri_clip( struct brw_clip_compile *c ) +{ + struct brw_instruction *neg_rhw; + struct brw_compile *p = &c->func; + brw_clip_tri_alloc_regs(c, 3 + c->key.nr_userclip + 6); + brw_clip_tri_init_vertices(c); + brw_clip_init_clipmask(c); + brw_clip_init_ff_sync(c); + + /* if -ve rhw workaround bit is set, + do cliptest */ + if (c->chipset.is_965) { + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2), + brw_imm_ud(1<<20)); + neg_rhw = brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_test(c); + } + brw_ENDIF(p, neg_rhw); + } + /* Can't push into do_clip_tri because with polygon (or quad) + * flatshading, need to apply the flatshade here because we don't + * respect the PV when converting to trifan for emit: + */ + if (c->key.do_flat_shading) + brw_clip_tri_flat_shade(c); + + if ((c->key.clip_mode == BRW_CLIPMODE_NORMAL) || + (c->key.clip_mode == BRW_CLIPMODE_KERNEL_CLIP)) + do_clip_tri(c); + else + maybe_do_clip_tri(c); + + brw_clip_tri_emit_polygon(c); + + /* Send an empty message to kill the thread: + */ + brw_clip_kill_thread(c); +} diff --git a/src/gallium/drivers/i965/brw_clip_unfilled.c b/src/gallium/drivers/i965/brw_clip_unfilled.c new file mode 100644 index 00000000000..aec835b8cec --- /dev/null +++ b/src/gallium/drivers/i965/brw_clip_unfilled.c @@ -0,0 +1,497 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "brw_defines.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_clip.h" + + + +/* This is performed against the original triangles, so no indirection + * required: +BZZZT! + */ +static void compute_tri_direction( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_reg e = c->reg.tmp0; + struct brw_reg f = c->reg.tmp1; + struct brw_reg v0 = byte_offset(c->reg.vertex[0], c->offset_hpos); + struct brw_reg v1 = byte_offset(c->reg.vertex[1], c->offset_hpos); + struct brw_reg v2 = byte_offset(c->reg.vertex[2], c->offset_hpos); + + + struct brw_reg v0n = get_tmp(c); + struct brw_reg v1n = get_tmp(c); + struct brw_reg v2n = get_tmp(c); + + /* Convert to NDC. + * NOTE: We can't modify the original vertex coordinates, + * as it may impact further operations. + * So, we have to keep normalized coordinates in temp registers. + * + * TBD-KC + * Try to optimize unnecessary MOV's. + */ + brw_MOV(p, v0n, v0); + brw_MOV(p, v1n, v1); + brw_MOV(p, v2n, v2); + + brw_clip_project_position(c, v0n); + brw_clip_project_position(c, v1n); + brw_clip_project_position(c, v2n); + + /* Calculate the vectors of two edges of the triangle: + */ + brw_ADD(p, e, v0n, negate(v2n)); + brw_ADD(p, f, v1n, negate(v2n)); + + /* Take their crossproduct: + */ + brw_set_access_mode(p, BRW_ALIGN_16); + brw_MUL(p, vec4(brw_null_reg()), brw_swizzle(e, 1,2,0,3), brw_swizzle(f,2,0,1,3)); + brw_MAC(p, vec4(e), negate(brw_swizzle(e, 2,0,1,3)), brw_swizzle(f,1,2,0,3)); + brw_set_access_mode(p, BRW_ALIGN_1); + + brw_MUL(p, c->reg.dir, c->reg.dir, vec4(e)); +} + + +static void cull_direction( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *ccw; + GLuint conditional; + + assert (!(c->key.fill_ccw == CLIP_CULL && + c->key.fill_cw == CLIP_CULL)); + + if (c->key.fill_ccw == CLIP_CULL) + conditional = BRW_CONDITIONAL_GE; + else + conditional = BRW_CONDITIONAL_L; + + brw_CMP(p, + vec1(brw_null_reg()), + conditional, + get_element(c->reg.dir, 2), + brw_imm_f(0)); + + ccw = brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_kill_thread(c); + } + brw_ENDIF(p, ccw); +} + + + +static void copy_bfc( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *ccw; + GLuint conditional; + + /* Do we have any colors to copy? + */ + if ((c->offset_color0 == 0 || c->offset_bfc0 == 0) && + (c->offset_color1 == 0 || c->offset_bfc1 == 0)) + return; + + /* In some wierd degnerate cases we can end up testing the + * direction twice, once for culling and once for bfc copying. Oh + * well, that's what you get for setting wierd GL state. + */ + if (c->key.copy_bfc_ccw) + conditional = BRW_CONDITIONAL_GE; + else + conditional = BRW_CONDITIONAL_L; + + brw_CMP(p, + vec1(brw_null_reg()), + conditional, + get_element(c->reg.dir, 2), + brw_imm_f(0)); + + ccw = brw_IF(p, BRW_EXECUTE_1); + { + GLuint i; + + for (i = 0; i < 3; i++) { + if (c->offset_color0 && c->offset_bfc0) + brw_MOV(p, + byte_offset(c->reg.vertex[i], c->offset_color0), + byte_offset(c->reg.vertex[i], c->offset_bfc0)); + + if (c->offset_color1 && c->offset_bfc1) + brw_MOV(p, + byte_offset(c->reg.vertex[i], c->offset_color0), + byte_offset(c->reg.vertex[i], c->offset_bfc0)); + } + } + brw_ENDIF(p, ccw); +} + + + + +/* + GLfloat iz = 1.0 / dir.z; + GLfloat ac = dir.x * iz; + GLfloat bc = dir.y * iz; + offset = ctx->Polygon.OffsetUnits * DEPTH_SCALE; + offset += MAX2( abs(ac), abs(bc) ) * ctx->Polygon.OffsetFactor; + offset *= MRD; +*/ +static void compute_offset( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_reg off = c->reg.offset; + struct brw_reg dir = c->reg.dir; + + brw_math_invert(p, get_element(off, 2), get_element(dir, 2)); + brw_MUL(p, vec2(off), dir, get_element(off, 2)); + + brw_CMP(p, + vec1(brw_null_reg()), + BRW_CONDITIONAL_GE, + brw_abs(get_element(off, 0)), + brw_abs(get_element(off, 1))); + + brw_SEL(p, vec1(off), brw_abs(get_element(off, 0)), brw_abs(get_element(off, 1))); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + brw_MUL(p, vec1(off), off, brw_imm_f(c->key.offset_factor)); + brw_ADD(p, vec1(off), off, brw_imm_f(c->key.offset_units)); +} + + +static void merge_edgeflags( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *is_poly; + struct brw_reg tmp0 = get_element_ud(c->reg.tmp0, 0); + + brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK)); + brw_CMP(p, + vec1(brw_null_reg()), + BRW_CONDITIONAL_EQ, + tmp0, + brw_imm_ud(_3DPRIM_POLYGON)); + + /* Get away with using reg.vertex because we know that this is not + * a _3DPRIM_TRISTRIP_REVERSE: + */ + is_poly = brw_IF(p, BRW_EXECUTE_1); + { + brw_set_conditionalmod(p, BRW_CONDITIONAL_EQ); + brw_AND(p, vec1(brw_null_reg()), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<8)); + brw_MOV(p, byte_offset(c->reg.vertex[0], c->offset_edgeflag), brw_imm_f(0)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + brw_set_conditionalmod(p, BRW_CONDITIONAL_EQ); + brw_AND(p, vec1(brw_null_reg()), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<9)); + brw_MOV(p, byte_offset(c->reg.vertex[2], c->offset_edgeflag), brw_imm_f(0)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + } + brw_ENDIF(p, is_poly); +} + + + +static void apply_one_offset( struct brw_clip_compile *c, + struct brw_indirect vert ) +{ + struct brw_compile *p = &c->func; + struct brw_reg z = deref_1f(vert, c->header_position_offset + + 2 * type_sz(BRW_REGISTER_TYPE_F)); + + brw_ADD(p, z, z, vec1(c->reg.offset)); +} + + + +/*********************************************************************** + * Output clipped polygon as an unfilled primitive: + */ +static void emit_lines(struct brw_clip_compile *c, + GLboolean do_offset) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *loop; + struct brw_instruction *draw_edge; + struct brw_indirect v0 = brw_indirect(0, 0); + struct brw_indirect v1 = brw_indirect(1, 0); + struct brw_indirect v0ptr = brw_indirect(2, 0); + struct brw_indirect v1ptr = brw_indirect(3, 0); + + /* Need a seperate loop for offset: + */ + if (do_offset) { + brw_MOV(p, c->reg.loopcount, c->reg.nr_verts); + brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist)); + + loop = brw_DO(p, BRW_EXECUTE_1); + { + brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0)); + brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2)); + + apply_one_offset(c, v0); + + brw_set_conditionalmod(p, BRW_CONDITIONAL_G); + brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1)); + } + brw_WHILE(p, loop); + } + + /* v1ptr = &inlist[nr_verts] + * *v1ptr = v0 + */ + brw_MOV(p, c->reg.loopcount, c->reg.nr_verts); + brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist)); + brw_ADD(p, get_addr_reg(v1ptr), get_addr_reg(v0ptr), retype(c->reg.nr_verts, BRW_REGISTER_TYPE_UW)); + brw_ADD(p, get_addr_reg(v1ptr), get_addr_reg(v1ptr), retype(c->reg.nr_verts, BRW_REGISTER_TYPE_UW)); + brw_MOV(p, deref_1uw(v1ptr, 0), deref_1uw(v0ptr, 0)); + + loop = brw_DO(p, BRW_EXECUTE_1); + { + brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0)); + brw_MOV(p, get_addr_reg(v1), deref_1uw(v0ptr, 2)); + brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2)); + + /* draw edge if edgeflag != 0 */ + brw_CMP(p, + vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, + deref_1f(v0, c->offset_edgeflag), + brw_imm_f(0)); + draw_edge = brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_emit_vue(c, v0, 1, 0, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_START); + brw_clip_emit_vue(c, v1, 1, 0, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_END); + } + brw_ENDIF(p, draw_edge); + + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1)); + } + brw_WHILE(p, loop); +} + + + +static void emit_points(struct brw_clip_compile *c, + GLboolean do_offset ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *loop; + struct brw_instruction *draw_point; + + struct brw_indirect v0 = brw_indirect(0, 0); + struct brw_indirect v0ptr = brw_indirect(2, 0); + + brw_MOV(p, c->reg.loopcount, c->reg.nr_verts); + brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist)); + + loop = brw_DO(p, BRW_EXECUTE_1); + { + brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0)); + brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2)); + + /* draw if edgeflag != 0 + */ + brw_CMP(p, + vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, + deref_1f(v0, c->offset_edgeflag), + brw_imm_f(0)); + draw_point = brw_IF(p, BRW_EXECUTE_1); + { + if (do_offset) + apply_one_offset(c, v0); + + brw_clip_emit_vue(c, v0, 1, 0, (_3DPRIM_POINTLIST << 2) | R02_PRIM_START | R02_PRIM_END); + } + brw_ENDIF(p, draw_point); + + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1)); + } + brw_WHILE(p, loop); +} + + + + + + + +static void emit_primitives( struct brw_clip_compile *c, + GLuint mode, + GLboolean do_offset ) +{ + switch (mode) { + case CLIP_FILL: + brw_clip_tri_emit_polygon(c); + break; + + case CLIP_LINE: + emit_lines(c, do_offset); + break; + + case CLIP_POINT: + emit_points(c, do_offset); + break; + + case CLIP_CULL: + assert(0); + break; + } +} + + + +static void emit_unfilled_primitives( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *ccw; + + /* Direction culling has already been done. + */ + if (c->key.fill_ccw != c->key.fill_cw && + c->key.fill_ccw != CLIP_CULL && + c->key.fill_cw != CLIP_CULL) + { + brw_CMP(p, + vec1(brw_null_reg()), + BRW_CONDITIONAL_GE, + get_element(c->reg.dir, 2), + brw_imm_f(0)); + + ccw = brw_IF(p, BRW_EXECUTE_1); + { + emit_primitives(c, c->key.fill_ccw, c->key.offset_ccw); + } + ccw = brw_ELSE(p, ccw); + { + emit_primitives(c, c->key.fill_cw, c->key.offset_cw); + } + brw_ENDIF(p, ccw); + } + else if (c->key.fill_cw != CLIP_CULL) { + emit_primitives(c, c->key.fill_cw, c->key.offset_cw); + } + else if (c->key.fill_ccw != CLIP_CULL) { + emit_primitives(c, c->key.fill_ccw, c->key.offset_ccw); + } +} + + + + +static void check_nr_verts( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *if_insn; + + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.nr_verts, brw_imm_d(3)); + if_insn = brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_kill_thread(c); + } + brw_ENDIF(p, if_insn); +} + + +void brw_emit_unfilled_clip( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *do_clip; + + + c->need_direction = ((c->key.offset_ccw || c->key.offset_cw) || + (c->key.fill_ccw != c->key.fill_cw) || + c->key.fill_ccw == CLIP_CULL || + c->key.fill_cw == CLIP_CULL || + c->key.copy_bfc_cw || + c->key.copy_bfc_ccw); + + brw_clip_tri_alloc_regs(c, 3 + c->key.nr_userclip + 6); + brw_clip_tri_init_vertices(c); + brw_clip_init_ff_sync(c); + + assert(c->offset_edgeflag); + + if (c->key.fill_ccw == CLIP_CULL && + c->key.fill_cw == CLIP_CULL) { + brw_clip_kill_thread(c); + return; + } + + merge_edgeflags(c); + + /* Need to use the inlist indirection here: + */ + if (c->need_direction) + compute_tri_direction(c); + + if (c->key.fill_ccw == CLIP_CULL || + c->key.fill_cw == CLIP_CULL) + cull_direction(c); + + if (c->key.offset_ccw || + c->key.offset_cw) + compute_offset(c); + + if (c->key.copy_bfc_ccw || + c->key.copy_bfc_cw) + copy_bfc(c); + + /* Need to do this whether we clip or not: + */ + if (c->key.do_flat_shading) + brw_clip_tri_flat_shade(c); + + brw_clip_init_clipmask(c); + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, c->reg.planemask, brw_imm_ud(0)); + do_clip = brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_init_planes(c); + brw_clip_tri(c); + check_nr_verts(c); + } + brw_ENDIF(p, do_clip); + + emit_unfilled_primitives(c); + brw_clip_kill_thread(c); +} + + + diff --git a/src/gallium/drivers/i965/brw_clip_util.c b/src/gallium/drivers/i965/brw_clip_util.c new file mode 100644 index 00000000000..97a57103105 --- /dev/null +++ b/src/gallium/drivers/i965/brw_clip_util.c @@ -0,0 +1,388 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "brw_defines.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_clip.h" + + + + +struct brw_reg get_tmp( struct brw_clip_compile *c ) +{ + struct brw_reg tmp = brw_vec4_grf(c->last_tmp, 0); + + if (++c->last_tmp > c->prog_data.total_grf) + c->prog_data.total_grf = c->last_tmp; + + return tmp; +} + +static void release_tmp( struct brw_clip_compile *c, struct brw_reg tmp ) +{ + if (tmp.nr == c->last_tmp-1) + c->last_tmp--; +} + + +static struct brw_reg make_plane_ud(GLuint x, GLuint y, GLuint z, GLuint w) +{ + return brw_imm_ud((w<<24) | (z<<16) | (y<<8) | x); +} + + +void brw_clip_init_planes( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + + if (!c->key.nr_userclip) { + brw_MOV(p, get_element_ud(c->reg.fixed_planes, 0), make_plane_ud( 0, 0, 0xff, 1)); + brw_MOV(p, get_element_ud(c->reg.fixed_planes, 1), make_plane_ud( 0, 0, 1, 1)); + brw_MOV(p, get_element_ud(c->reg.fixed_planes, 2), make_plane_ud( 0, 0xff, 0, 1)); + brw_MOV(p, get_element_ud(c->reg.fixed_planes, 3), make_plane_ud( 0, 1, 0, 1)); + brw_MOV(p, get_element_ud(c->reg.fixed_planes, 4), make_plane_ud(0xff, 0, 0, 1)); + brw_MOV(p, get_element_ud(c->reg.fixed_planes, 5), make_plane_ud( 1, 0, 0, 1)); + } +} + + + +#define W 3 + +/* Project 'pos' to screen space (or back again), overwrite with results: + */ +void brw_clip_project_position(struct brw_clip_compile *c, struct brw_reg pos ) +{ + struct brw_compile *p = &c->func; + + /* calc rhw + */ + brw_math_invert(p, get_element(pos, W), get_element(pos, W)); + + /* value.xyz *= value.rhw + */ + brw_set_access_mode(p, BRW_ALIGN_16); + brw_MUL(p, brw_writemask(pos, BRW_WRITEMASK_XYZ), pos, brw_swizzle1(pos, W)); + brw_set_access_mode(p, BRW_ALIGN_1); +} + + +static void brw_clip_project_vertex( struct brw_clip_compile *c, + struct brw_indirect vert_addr ) +{ + struct brw_compile *p = &c->func; + struct brw_reg tmp = get_tmp(c); + + /* Fixup position. Extract from the original vertex and re-project + * to screen space: + */ + brw_MOV(p, tmp, deref_4f(vert_addr, c->offset_hpos)); + brw_clip_project_position(c, tmp); + brw_MOV(p, deref_4f(vert_addr, c->header_position_offset), tmp); + + release_tmp(c, tmp); +} + + + + +/* Interpolate between two vertices and put the result into a0.0. + * Increment a0.0 accordingly. + */ +void brw_clip_interp_vertex( struct brw_clip_compile *c, + struct brw_indirect dest_ptr, + struct brw_indirect v0_ptr, /* from */ + struct brw_indirect v1_ptr, /* to */ + struct brw_reg t0, + GLboolean force_edgeflag) +{ + struct brw_compile *p = &c->func; + struct brw_reg tmp = get_tmp(c); + GLuint i; + + /* Just copy the vertex header: + */ + /* + * After CLIP stage, only first 256 bits of the VUE are read + * back on IGDNG, so needn't change it + */ + brw_copy_indirect_to_indirect(p, dest_ptr, v0_ptr, 1); + + /* Iterate over each attribute (could be done in pairs?) + */ + for (i = 0; i < c->key.nr_attrs; i++) { + GLuint delta = i*16 + 32; + + if (c->chipset.is_igdng) + delta = i * 16 + 32 * 3; + + if (delta == c->offset_edgeflag) { + if (force_edgeflag) + brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(1)); + else + brw_MOV(p, deref_4f(dest_ptr, delta), deref_4f(v0_ptr, delta)); + } + else { + /* Interpolate: + * + * New = attr0 + t*attr1 - t*attr0 + */ + brw_MUL(p, + vec4(brw_null_reg()), + deref_4f(v1_ptr, delta), + t0); + + brw_MAC(p, + tmp, + negate(deref_4f(v0_ptr, delta)), + t0); + + brw_ADD(p, + deref_4f(dest_ptr, delta), + deref_4f(v0_ptr, delta), + tmp); + } + } + + if (i & 1) { + GLuint delta = i*16 + 32; + + if (c->chipset.is_igdng) + delta = i * 16 + 32 * 3; + + brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(0)); + } + + release_tmp(c, tmp); + + /* Recreate the projected (NDC) coordinate in the new vertex + * header: + */ + brw_clip_project_vertex(c, dest_ptr ); +} + + + + +#define MAX_MRF 16 + +void brw_clip_emit_vue(struct brw_clip_compile *c, + struct brw_indirect vert, + GLboolean allocate, + GLboolean eot, + GLuint header) +{ + struct brw_compile *p = &c->func; + GLuint start = c->last_mrf; + + brw_clip_ff_sync(c); + + assert(!(allocate && eot)); + + /* Cycle through mrf regs - probably futile as we have to wait for + * the allocation response anyway. Also, the order this function + * is invoked doesn't correspond to the order the instructions will + * be executed, so it won't have any effect in many cases. + */ +#if 0 + if (start + c->nr_regs + 1 >= MAX_MRF) + start = 0; + + c->last_mrf = start + c->nr_regs + 1; +#endif + + /* Copy the vertex from vertn into m1..mN+1: + */ + brw_copy_from_indirect(p, brw_message_reg(start+1), vert, c->nr_regs); + + /* Overwrite PrimType and PrimStart in the message header, for + * each vertex in turn: + */ + brw_MOV(p, get_element_ud(c->reg.R0, 2), brw_imm_ud(header)); + + + /* Send each vertex as a seperate write to the urb. This + * is different to the concept in brw_sf_emit.c, where + * subsequent writes are used to build up a single urb + * entry. Each of these writes instantiates a seperate + * urb entry - (I think... what about 'allocate'?) + */ + brw_urb_WRITE(p, + allocate ? c->reg.R0 : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), + start, + c->reg.R0, + allocate, + 1, /* used */ + c->nr_regs + 1, /* msg length */ + allocate ? 1 : 0, /* response_length */ + eot, /* eot */ + 1, /* writes_complete */ + 0, /* urb offset */ + BRW_URB_SWIZZLE_NONE); +} + + + +void brw_clip_kill_thread(struct brw_clip_compile *c) +{ + struct brw_compile *p = &c->func; + + brw_clip_ff_sync(c); + /* Send an empty message to kill the thread and release any + * allocated urb entry: + */ + brw_urb_WRITE(p, + retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), + 0, + c->reg.R0, + 0, /* allocate */ + 0, /* used */ + 1, /* msg len */ + 0, /* response len */ + 1, /* eot */ + 1, /* writes complete */ + 0, + BRW_URB_SWIZZLE_NONE); +} + + + + +struct brw_reg brw_clip_plane0_address( struct brw_clip_compile *c ) +{ + return brw_address(c->reg.fixed_planes); +} + + +struct brw_reg brw_clip_plane_stride( struct brw_clip_compile *c ) +{ + if (c->key.nr_userclip) { + return brw_imm_uw(16); + } + else { + return brw_imm_uw(4); + } +} + + +/* If flatshading, distribute color from provoking vertex prior to + * clipping. + */ +void brw_clip_copy_colors( struct brw_clip_compile *c, + GLuint to, GLuint from ) +{ + struct brw_compile *p = &c->func; + + if (c->offset_color0) + brw_MOV(p, + byte_offset(c->reg.vertex[to], c->offset_color0), + byte_offset(c->reg.vertex[from], c->offset_color0)); + + if (c->offset_color1) + brw_MOV(p, + byte_offset(c->reg.vertex[to], c->offset_color1), + byte_offset(c->reg.vertex[from], c->offset_color1)); + + if (c->offset_bfc0) + brw_MOV(p, + byte_offset(c->reg.vertex[to], c->offset_bfc0), + byte_offset(c->reg.vertex[from], c->offset_bfc0)); + + if (c->offset_bfc1) + brw_MOV(p, + byte_offset(c->reg.vertex[to], c->offset_bfc1), + byte_offset(c->reg.vertex[from], c->offset_bfc1)); +} + + + +void brw_clip_init_clipmask( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_reg incoming = get_element_ud(c->reg.R0, 2); + + /* Shift so that lowest outcode bit is rightmost: + */ + brw_SHR(p, c->reg.planemask, incoming, brw_imm_ud(26)); + + if (c->key.nr_userclip) { + struct brw_reg tmp = retype(vec1(get_tmp(c)), BRW_REGISTER_TYPE_UD); + + /* Rearrange userclip outcodes so that they come directly after + * the fixed plane bits. + */ + brw_AND(p, tmp, incoming, brw_imm_ud(0x3f<<14)); + brw_SHR(p, tmp, tmp, brw_imm_ud(8)); + brw_OR(p, c->reg.planemask, c->reg.planemask, tmp); + + release_tmp(c, tmp); + } +} + +void brw_clip_ff_sync(struct brw_clip_compile *c) +{ + if (c->need_ff_sync) { + struct brw_compile *p = &c->func; + struct brw_instruction *need_ff_sync; + + brw_set_conditionalmod(p, BRW_CONDITIONAL_Z); + brw_AND(p, brw_null_reg(), c->reg.ff_sync, brw_imm_ud(0x1)); + need_ff_sync = brw_IF(p, BRW_EXECUTE_1); + { + brw_OR(p, c->reg.ff_sync, c->reg.ff_sync, brw_imm_ud(0x1)); + brw_ff_sync(p, + c->reg.R0, + 0, + c->reg.R0, + 1, + 1, /* used */ + 1, /* msg length */ + 1, /* response length */ + 0, /* eot */ + 1, /* write compelete */ + 0, /* urb offset */ + BRW_URB_SWIZZLE_NONE); + } + brw_ENDIF(p, need_ff_sync); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + } +} + +void brw_clip_init_ff_sync(struct brw_clip_compile *c) +{ + if (c->need_ff_sync) { + struct brw_compile *p = &c->func; + + brw_MOV(p, c->reg.ff_sync, brw_imm_ud(0)); + } +} diff --git a/src/gallium/drivers/i965/brw_context.c b/src/gallium/drivers/i965/brw_context.c new file mode 100644 index 00000000000..e67551882dc --- /dev/null +++ b/src/gallium/drivers/i965/brw_context.c @@ -0,0 +1,154 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "pipe/p_context.h" +#include "util/u_simple_list.h" + +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_draw.h" +#include "brw_state.h" +#include "brw_batchbuffer.h" +#include "brw_winsys.h" +#include "brw_screen.h" + + +static void brw_destroy_context( struct pipe_context *pipe ) +{ + struct brw_context *brw = brw_context(pipe); + int i; + + brw_context_flush( brw ); + brw_batchbuffer_free( brw->batch ); + brw_destroy_state(brw); + + brw_draw_cleanup( brw ); + + brw_pipe_blend_cleanup( brw ); + brw_pipe_depth_stencil_cleanup( brw ); + brw_pipe_framebuffer_cleanup( brw ); + brw_pipe_flush_cleanup( brw ); + brw_pipe_misc_cleanup( brw ); + brw_pipe_query_cleanup( brw ); + brw_pipe_rast_cleanup( brw ); + brw_pipe_sampler_cleanup( brw ); + brw_pipe_shader_cleanup( brw ); + brw_pipe_vertex_cleanup( brw ); + brw_pipe_clear_cleanup( brw ); + + brw_hw_cc_cleanup( brw ); + + + FREE(brw->wm.compile_data); + + for (i = 0; i < brw->curr.fb.nr_cbufs; i++) + pipe_surface_reference(&brw->curr.fb.cbufs[i], NULL); + brw->curr.fb.nr_cbufs = 0; + pipe_surface_reference(&brw->curr.fb.zsbuf, NULL); + + bo_reference(&brw->curbe.curbe_bo, NULL); + bo_reference(&brw->vs.prog_bo, NULL); + bo_reference(&brw->vs.state_bo, NULL); + bo_reference(&brw->vs.bind_bo, NULL); + bo_reference(&brw->gs.prog_bo, NULL); + bo_reference(&brw->gs.state_bo, NULL); + bo_reference(&brw->clip.prog_bo, NULL); + bo_reference(&brw->clip.state_bo, NULL); + bo_reference(&brw->clip.vp_bo, NULL); + bo_reference(&brw->sf.prog_bo, NULL); + bo_reference(&brw->sf.state_bo, NULL); + bo_reference(&brw->sf.vp_bo, NULL); + + for (i = 0; i < Elements(brw->wm.sdc_bo); i++) + bo_reference(&brw->wm.sdc_bo[i], NULL); + + bo_reference(&brw->wm.bind_bo, NULL); + + for (i = 0; i < Elements(brw->wm.surf_bo); i++) + bo_reference(&brw->wm.surf_bo[i], NULL); + + bo_reference(&brw->wm.sampler_bo, NULL); + bo_reference(&brw->wm.prog_bo, NULL); + bo_reference(&brw->wm.state_bo, NULL); +} + + +struct pipe_context *brw_create_context(struct pipe_screen *screen) +{ + struct brw_context *brw = (struct brw_context *) CALLOC_STRUCT(brw_context); + + if (!brw) { + debug_printf("%s: failed to alloc context\n", __FUNCTION__); + return NULL; + } + + brw->base.screen = screen; + brw->base.destroy = brw_destroy_context; + brw->sws = brw_screen(screen)->sws; + brw->chipset = brw_screen(screen)->chipset; + + brw_pipe_blend_init( brw ); + brw_pipe_depth_stencil_init( brw ); + brw_pipe_framebuffer_init( brw ); + brw_pipe_flush_init( brw ); + brw_pipe_misc_init( brw ); + brw_pipe_query_init( brw ); + brw_pipe_rast_init( brw ); + brw_pipe_sampler_init( brw ); + brw_pipe_shader_init( brw ); + brw_pipe_vertex_init( brw ); + brw_pipe_clear_init( brw ); + + brw_hw_cc_init( brw ); + + brw_init_state( brw ); + brw_draw_init( brw ); + + brw->state.dirty.mesa = ~0; + brw->state.dirty.brw = ~0; + + brw->flags.always_emit_state = 0; + + make_empty_list(&brw->query.active_head); + + brw->batch = brw_batchbuffer_alloc( brw->sws, brw->chipset ); + if (brw->batch == NULL) + goto fail; + + return &brw->base; + +fail: + if (brw->batch) + brw_batchbuffer_free( brw->batch ); + return NULL; +} + diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h new file mode 100644 index 00000000000..56e78074000 --- /dev/null +++ b/src/gallium/drivers/i965/brw_context.h @@ -0,0 +1,853 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#ifndef BRWCONTEXT_INC +#define BRWCONTEXT_INC + +#include "brw_structs.h" +#include "brw_winsys.h" +#include "brw_reg.h" +#include "pipe/p_state.h" +#include "pipe/p_context.h" +#include "tgsi/tgsi_scan.h" + + +/* Glossary: + * + * URB - uniform resource buffer. A mid-sized buffer which is + * partitioned between the fixed function units and used for passing + * values (vertices, primitives, constants) between them. + * + * CURBE - constant URB entry. An urb region (entry) used to hold + * constant values which the fixed function units can be instructed to + * preload into the GRF when spawning a thread. + * + * VUE - vertex URB entry. An urb entry holding a vertex and usually + * a vertex header. The header contains control information and + * things like primitive type, Begin/end flags and clip codes. + * + * PUE - primitive URB entry. An urb entry produced by the setup (SF) + * unit holding rasterization and interpolation parameters. + * + * GRF - general register file. One of several register files + * addressable by programmed threads. The inputs (r0, payload, curbe, + * urb) of the thread are preloaded to this area before the thread is + * spawned. The registers are individually 8 dwords wide and suitable + * for general usage. Registers holding thread input values are not + * special and may be overwritten. + * + * MRF - message register file. Threads communicate (and terminate) + * by sending messages. Message parameters are placed in contiguous + * MRF registers. All program output is via these messages. URB + * entries are populated by sending a message to the shared URB + * function containing the new data, together with a control word, + * often an unmodified copy of R0. + * + * R0 - GRF register 0. Typically holds control information used when + * sending messages to other threads. + * + * EU or GEN4 EU: The name of the programmable subsystem of the + * i965 hardware. Threads are executed by the EU, the registers + * described above are part of the EU architecture. + * + * Fixed function units: + * + * CS - Command streamer. Notional first unit, little software + * interaction. Holds the URB entries used for constant data, ie the + * CURBEs. + * + * VF/VS - Vertex Fetch / Vertex Shader. The fixed function part of + * this unit is responsible for pulling vertices out of vertex buffers + * in vram and injecting them into the processing pipe as VUEs. If + * enabled, it first passes them to a VS thread which is a good place + * for the driver to implement any active vertex shader. + * + * GS - Geometry Shader. This corresponds to a new DX10 concept. If + * enabled, incoming strips etc are passed to GS threads in individual + * line/triangle/point units. The GS thread may perform arbitary + * computation and emit whatever primtives with whatever vertices it + * chooses. This makes GS an excellent place to implement GL's + * unfilled polygon modes, though of course it is capable of much + * more. Additionally, GS is used to translate away primitives not + * handled by latter units, including Quads and Lineloops. + * + * CS - Clipper. Mesa's clipping algorithms are imported to run on + * this unit. The fixed function part performs cliptesting against + * the 6 fixed clipplanes and makes decisions on whether or not the + * incoming primitive needs to be passed to a thread for clipping. + * User clip planes are handled via cooperation with the VS thread. + * + * SF - Strips Fans or Setup: Triangles are prepared for + * rasterization. Interpolation coefficients are calculated. + * Flatshading and two-side lighting usually performed here. + * + * WM - Windower. Interpolation of vertex attributes performed here. + * Fragment shader implemented here. SIMD aspects of EU taken full + * advantage of, as pixels are processed in blocks of 16. + * + * CC - Color Calculator. No EU threads associated with this unit. + * Handles blending and (presumably) depth and stencil testing. + */ + +#define BRW_MAX_CURBE (32*16) + +struct brw_context; + +struct brw_depth_stencil_state { + /* Precalculated hardware state: + */ + struct brw_cc0 cc0; + struct brw_cc1 cc1; + struct brw_cc2 cc2; + struct brw_cc3 cc3; + struct brw_cc7 cc7; + + unsigned iz_lookup; +}; + + +struct brw_blend_state { + /* Precalculated hardware state: + */ + struct brw_cc2 cc2; + struct brw_cc3 cc3; + struct brw_cc5 cc5; + struct brw_cc6 cc6; + + struct brw_surf_ss0 ss0; +}; + + +struct brw_rasterizer_state; + +struct brw_immediate_data { + unsigned nr; + float (*data)[4]; +}; + +struct brw_vertex_shader { + const struct tgsi_token *tokens; + struct brw_winsys_buffer *const_buffer; /** Program constant buffer/surface */ + + struct tgsi_shader_info info; + struct brw_immediate_data immediates; + + GLuint has_flow_control:1; + GLuint use_const_buffer:1; + + /* Offsets of special vertex shader outputs required for clipping. + */ + GLuint output_hpos:6; /* not always zero? */ + GLuint output_color0:6; + GLuint output_color1:6; + GLuint output_bfc0:6; + GLuint output_bfc1:6; + GLuint output_edgeflag:6; + + unsigned id; +}; + +struct brw_fs_signature { + GLuint nr_inputs; + struct { + GLuint interp:3; /* TGSI_INTERPOLATE_x */ + GLuint semantic:5; /* TGSI_SEMANTIC_x */ + GLuint semantic_index:24; + } input[PIPE_MAX_SHADER_INPUTS]; +}; + +#define brw_fs_signature_size(s) (offsetof(struct brw_fs_signature, input) + \ + ((s)->nr_inputs * sizeof (s)->input[0])) + + +struct brw_fragment_shader { + const struct tgsi_token *tokens; + struct tgsi_shader_info info; + + struct brw_fs_signature signature; + struct brw_immediate_data immediates; + + unsigned iz_lookup; + /*unsigned wm_lookup;*/ + + unsigned uses_depth:1; + unsigned has_flow_control:1; + + unsigned id; + struct brw_winsys_buffer *const_buffer; /** Program constant buffer/surface */ + GLboolean use_const_buffer; +}; + + +struct brw_sampler { + struct brw_ss0 ss0; + struct brw_ss1 ss1; + float border_color[4]; + struct brw_ss3 ss3; +}; + + + +#define PIPE_NEW_DEPTH_STENCIL_ALPHA 0x1 +#define PIPE_NEW_RAST 0x2 +#define PIPE_NEW_BLEND 0x4 +#define PIPE_NEW_VIEWPORT 0x8 +#define PIPE_NEW_SAMPLERS 0x10 +#define PIPE_NEW_VERTEX_BUFFER 0x20 +#define PIPE_NEW_VERTEX_ELEMENT 0x40 +#define PIPE_NEW_FRAGMENT_SHADER 0x80 +#define PIPE_NEW_VERTEX_SHADER 0x100 +#define PIPE_NEW_FRAGMENT_CONSTANTS 0x200 +#define PIPE_NEW_VERTEX_CONSTANTS 0x400 +#define PIPE_NEW_CLIP 0x800 +#define PIPE_NEW_INDEX_BUFFER 0x1000 +#define PIPE_NEW_INDEX_RANGE 0x2000 +#define PIPE_NEW_BLEND_COLOR 0x4000 +#define PIPE_NEW_POLYGON_STIPPLE 0x8000 +#define PIPE_NEW_FRAMEBUFFER_DIMENSIONS 0x10000 +#define PIPE_NEW_DEPTH_BUFFER 0x20000 +#define PIPE_NEW_COLOR_BUFFERS 0x40000 +#define PIPE_NEW_QUERY 0x80000 +#define PIPE_NEW_SCISSOR 0x100000 +#define PIPE_NEW_BOUND_TEXTURES 0x200000 +#define PIPE_NEW_NR_CBUFS 0x400000 +#define PIPE_NEW_FRAGMENT_SIGNATURE 0x800000 + + + +#define BRW_NEW_URB_FENCE 0x1 +#define BRW_NEW_FRAGMENT_PROGRAM 0x2 +#define BRW_NEW_VERTEX_PROGRAM 0x4 +#define BRW_NEW_INPUT_DIMENSIONS 0x8 +#define BRW_NEW_CURBE_OFFSETS 0x10 +#define BRW_NEW_REDUCED_PRIMITIVE 0x20 +#define BRW_NEW_PRIMITIVE 0x40 +#define BRW_NEW_CONTEXT 0x80 +#define BRW_NEW_WM_INPUT_DIMENSIONS 0x100 +#define BRW_NEW_PSP 0x800 +#define BRW_NEW_WM_SURFACES 0x1000 +#define BRW_NEW_xxx 0x2000 /* was FENCE */ +#define BRW_NEW_INDICES 0x4000 + +/** + * Used for any batch entry with a relocated pointer that will be used + * by any 3D rendering. Need to re-emit these fresh in each + * batchbuffer as the referenced buffers may be relocated in the + * meantime. + */ +#define BRW_NEW_BATCH 0x10000 +#define BRW_NEW_NR_WM_SURFACES 0x40000 +#define BRW_NEW_NR_VS_SURFACES 0x80000 +#define BRW_NEW_INDEX_BUFFER 0x100000 + +struct brw_state_flags { + /** State update flags signalled by mesa internals */ + GLuint mesa; + /** + * State update flags signalled as the result of brw_tracked_state updates + */ + GLuint brw; + /** State update flags signalled by brw_state_cache.c searches */ + GLuint cache; +}; + + + +/* Data about a particular attempt to compile a program. Note that + * there can be many of these, each in a different GL state + * corresponding to a different brw_wm_prog_key struct, with different + * compiled programs: + */ +struct brw_wm_prog_data { + GLuint curb_read_length; + GLuint urb_read_length; + + GLuint first_curbe_grf; + GLuint total_grf; + GLuint total_scratch; + + GLuint nr_params; /**< number of float params/constants */ + GLboolean error; + + /* Pointer to tracked values (only valid once + * _mesa_load_state_parameters has been called at runtime). + */ + const GLfloat *param[BRW_MAX_CURBE]; +}; + +struct brw_sf_prog_data { + GLuint urb_read_length; + GLuint total_grf; + + /* Each vertex may have upto 12 attributes, 4 components each, + * except WPOS which requires only 2. (11*4 + 2) == 44 ==> 11 + * rows. + * + * Actually we use 4 for each, so call it 12 rows. + */ + GLuint urb_entry_size; +}; + + +struct brw_clip_prog_data; + +struct brw_gs_prog_data { + GLuint urb_read_length; + GLuint total_grf; +}; + +struct brw_vs_prog_data { + GLuint curb_read_length; + GLuint urb_read_length; + GLuint total_grf; + + GLuint nr_outputs; + GLuint nr_inputs; + + GLuint nr_params; /**< number of TGSI_FILE_CONSTANT's */ + + GLuint output_edgeflag; + + GLboolean writes_psiz; + + /* Used for calculating urb partitions: + */ + GLuint urb_entry_size; +}; + + +/* Size == 0 if output either not written, or always [0,0,0,1] + */ +struct brw_vs_ouput_sizes { + GLubyte output_size[PIPE_MAX_SHADER_OUTPUTS]; +}; + + +/** Number of texture sampler units */ +#define BRW_MAX_TEX_UNIT 16 + +/** Max number of render targets in a shader */ +#define BRW_MAX_DRAW_BUFFERS 4 + +/** + * Size of our surface binding table for the WM. + * This contains pointers to the drawing surfaces and current texture + * objects and shader constant buffers (+2). + */ +#define BRW_WM_MAX_SURF (BRW_MAX_DRAW_BUFFERS + BRW_MAX_TEX_UNIT + 1) + +/** + * Helpers to convert drawing buffers, textures and constant buffers + * to surface binding table indexes, for WM. + */ +#define BTI_COLOR_BUF(d) (d) +#define BTI_FRAGMENT_CONSTANTS (BRW_MAX_DRAW_BUFFERS) +#define BTI_TEXTURE(t) (BRW_MAX_DRAW_BUFFERS + 1 + (t)) + +/** + * Size of surface binding table for the VS. + * Only one constant buffer for now. + */ +#define BRW_VS_MAX_SURF 1 + +/** + * Only a VS constant buffer + */ +#define SURF_INDEX_VERT_CONST_BUFFER 0 + + +/* Bit of a hack to align these with the winsys buffer_data_type enum. + */ +enum brw_cache_id { + BRW_CC_VP = BRW_DATA_GS_CC_VP, + BRW_CC_UNIT = BRW_DATA_GS_CC_UNIT, + BRW_WM_PROG = BRW_DATA_GS_WM_PROG, + BRW_SAMPLER_DEFAULT_COLOR = BRW_DATA_GS_SAMPLER_DEFAULT_COLOR, + BRW_SAMPLER = BRW_DATA_GS_SAMPLER, + BRW_WM_UNIT = BRW_DATA_GS_WM_UNIT, + BRW_SF_PROG = BRW_DATA_GS_SF_PROG, + BRW_SF_VP = BRW_DATA_GS_SF_VP, + BRW_SF_UNIT = BRW_DATA_GS_SF_UNIT, + BRW_VS_UNIT = BRW_DATA_GS_VS_UNIT, + BRW_VS_PROG = BRW_DATA_GS_VS_PROG, + BRW_GS_UNIT = BRW_DATA_GS_GS_UNIT, + BRW_GS_PROG = BRW_DATA_GS_GS_PROG, + BRW_CLIP_VP = BRW_DATA_GS_CLIP_VP, + BRW_CLIP_UNIT = BRW_DATA_GS_CLIP_UNIT, + BRW_CLIP_PROG = BRW_DATA_GS_CLIP_PROG, + BRW_SS_SURFACE = BRW_DATA_SS_SURFACE, + BRW_SS_SURF_BIND = BRW_DATA_SS_SURF_BIND, + + BRW_MAX_CACHE +}; + +struct brw_cache_item { + /** + * Effectively part of the key, cache_id identifies what kind of state + * buffer is involved, and also which brw->state.dirty.cache flag should + * be set when this cache item is chosen. + */ + enum brw_cache_id cache_id; + /** 32-bit hash of the key data */ + GLuint hash; + GLuint key_size; /* for variable-sized keys */ + const void *key; + struct brw_winsys_reloc *relocs; + GLuint nr_relocs; + + struct brw_winsys_buffer *bo; + GLuint data_size; + + struct brw_cache_item *next; +}; + + + +struct brw_cache { + struct brw_context *brw; + struct brw_winsys_screen *sws; + + struct brw_cache_item **items; + GLuint size, n_items; + + enum brw_buffer_type buffer_type; + + GLuint key_size[BRW_MAX_CACHE]; /* for fixed-size keys */ + GLuint aux_size[BRW_MAX_CACHE]; + char *name[BRW_MAX_CACHE]; + + + /* Record of the last BOs chosen for each cache_id. Used to set + * brw->state.dirty.cache when a new cache item is chosen. + */ + struct brw_winsys_buffer *last_bo[BRW_MAX_CACHE]; +}; + + +struct brw_tracked_state { + struct brw_state_flags dirty; + int (*prepare)( struct brw_context *brw ); + int (*emit)( struct brw_context *brw ); +}; + +/* Flags for brw->state.cache. + */ +#define CACHE_NEW_CC_VP (1<<BRW_CC_VP) +#define CACHE_NEW_CC_UNIT (1<<BRW_CC_UNIT) +#define CACHE_NEW_WM_PROG (1<<BRW_WM_PROG) +#define CACHE_NEW_SAMPLER_DEFAULT_COLOR (1<<BRW_SAMPLER_DEFAULT_COLOR) +#define CACHE_NEW_SAMPLER (1<<BRW_SAMPLER) +#define CACHE_NEW_WM_UNIT (1<<BRW_WM_UNIT) +#define CACHE_NEW_SF_PROG (1<<BRW_SF_PROG) +#define CACHE_NEW_SF_VP (1<<BRW_SF_VP) +#define CACHE_NEW_SF_UNIT (1<<BRW_SF_UNIT) +#define CACHE_NEW_VS_UNIT (1<<BRW_VS_UNIT) +#define CACHE_NEW_VS_PROG (1<<BRW_VS_PROG) +#define CACHE_NEW_GS_UNIT (1<<BRW_GS_UNIT) +#define CACHE_NEW_GS_PROG (1<<BRW_GS_PROG) +#define CACHE_NEW_CLIP_VP (1<<BRW_CLIP_VP) +#define CACHE_NEW_CLIP_UNIT (1<<BRW_CLIP_UNIT) +#define CACHE_NEW_CLIP_PROG (1<<BRW_CLIP_PROG) +#define CACHE_NEW_SURFACE (1<<BRW_SS_SURFACE) +#define CACHE_NEW_SURF_BIND (1<<BRW_SS_SURF_BIND) + +struct brw_cached_batch_item { + struct header *header; + GLuint sz; + struct brw_cached_batch_item *next; +}; + + + +/* Protect against a future where VERT_ATTRIB_MAX > 32. Wouldn't life + * be easier if C allowed arrays of packed elements? + */ +#define VS_INPUT_BITMASK_DWORDS ((PIPE_MAX_SHADER_INPUTS+31)/32) + + + + +struct brw_vertex_info { + GLuint sizes[VS_INPUT_BITMASK_DWORDS * 2]; /* sizes:2[VERT_ATTRIB_MAX] */ +}; + + +struct brw_query_object { + /** Doubly linked list of active query objects in the context. */ + struct brw_query_object *prev, *next; + + /** Last query BO associated with this query. */ + struct brw_winsys_buffer *bo; + /** First index in bo with query data for this object. */ + int first_index; + /** Last index in bo with query data for this object. */ + int last_index; + + /* Total count of pixels from previous BOs */ + uint64_t result; +}; + +#define CC_RELOC_VP 0 + + +/** + * brw_context is derived from pipe_context + */ +struct brw_context +{ + struct pipe_context base; + struct brw_chipset chipset; + + struct brw_winsys_screen *sws; + + struct brw_batchbuffer *batch; + + GLuint primitive; + GLuint reduced_primitive; + + /* Active state from the state tracker: + */ + struct { + struct brw_vertex_shader *vertex_shader; + struct brw_fragment_shader *fragment_shader; + const struct brw_blend_state *blend; + const struct brw_rasterizer_state *rast; + const struct brw_depth_stencil_state *zstencil; + + const struct brw_sampler *sampler[PIPE_MAX_SAMPLERS]; + unsigned num_samplers; + + struct pipe_texture *texture[PIPE_MAX_SAMPLERS]; + struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; + struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS]; + unsigned num_vertex_elements; + unsigned num_textures; + unsigned num_vertex_buffers; + + struct pipe_scissor_state scissor; + struct pipe_viewport_state viewport; + struct pipe_framebuffer_state fb; + struct pipe_clip_state ucp; + struct pipe_buffer *vertex_constants; + struct pipe_buffer *fragment_constants; + + struct brw_blend_constant_color bcc; + struct brw_polygon_stipple bps; + struct brw_cc_viewport ccv; + + /** + * Index buffer for this draw_prims call. + * + * Updates are signaled by PIPE_NEW_INDEX_BUFFER. + */ + struct pipe_buffer *index_buffer; + unsigned index_size; + + /* Updates are signalled by PIPE_NEW_INDEX_RANGE: + */ + unsigned min_index; + unsigned max_index; + + } curr; + + struct { + struct brw_state_flags dirty; + + /** + * List of buffers accumulated in brw_validate_state to receive + * dri_bo_check_aperture treatment before exec, so we can know if we + * should flush the batch and try again before emitting primitives. + * + * This can be a fixed number as we only have a limited number of + * objects referenced from the batchbuffer in a primitive emit, + * consisting of the vertex buffers, pipelined state pointers, + * the CURBE, the depth buffer, and a query BO. + */ + struct brw_winsys_buffer *validated_bos[PIPE_MAX_SHADER_INPUTS + 16]; + int validated_bo_count; + } state; + + struct brw_cache cache; /** non-surface items */ + struct brw_cache surface_cache; /* surface items */ + struct brw_cached_batch_item *cached_batch_items; + + struct { + struct u_upload_mgr *upload_vertex; + struct u_upload_mgr *upload_index; + + /* Information on uploaded vertex buffers: + */ + struct { + unsigned stride; /* in bytes between successive vertices */ + unsigned offset; /* in bytes, of first vertex in bo */ + unsigned vertex_count; /* count of valid vertices which may be accessed */ + struct brw_winsys_buffer *bo; + } vb[PIPE_MAX_ATTRIBS]; + + unsigned nr_vb; /* currently the same as curr.num_vertex_buffers */ + } vb; + + struct { + /* Updates to these fields are signaled by BRW_NEW_INDEX_BUFFER. */ + struct brw_winsys_buffer *bo; + unsigned int offset; + unsigned int size; + /* Offset to index buffer index to use in CMD_3D_PRIM so that we can + * avoid re-uploading the IB packet over and over if we're actually + * referencing the same index buffer. + */ + unsigned int start_vertex_offset; + } ib; + + + /* BRW_NEW_URB_ALLOCATIONS: + */ + struct { + GLuint vsize; /* vertex size plus header in urb registers */ + GLuint csize; /* constant buffer size in urb registers */ + GLuint sfsize; /* setup data size in urb registers */ + + GLboolean constrained; + + GLuint nr_vs_entries; + GLuint nr_gs_entries; + GLuint nr_clip_entries; + GLuint nr_sf_entries; + GLuint nr_cs_entries; + + GLuint vs_start; + GLuint gs_start; + GLuint clip_start; + GLuint sf_start; + GLuint cs_start; + } urb; + + + /* BRW_NEW_CURBE_OFFSETS: + */ + struct { + GLuint wm_start; /**< pos of first wm const in CURBE buffer */ + GLuint wm_size; /**< number of float[4] consts, multiple of 16 */ + GLuint clip_start; + GLuint clip_size; + GLuint vs_start; + GLuint vs_size; + GLuint total_size; + + struct brw_winsys_buffer *curbe_bo; + /** Offset within curbe_bo of space for current curbe entry */ + GLuint curbe_offset; + /** Offset within curbe_bo of space for next curbe entry */ + GLuint curbe_next_offset; + + GLfloat *last_buf; + GLuint last_bufsz; + /** + * Whether we should create a new bo instead of reusing the old one + * (if we just dispatch the batch pointing at the old one. + */ + GLboolean need_new_bo; + } curbe; + + struct { + struct brw_vs_prog_data *prog_data; + + struct brw_winsys_buffer *prog_bo; + struct brw_winsys_buffer *state_bo; + + /** Binding table of pointers to surf_bo entries */ + struct brw_winsys_buffer *bind_bo; + struct brw_winsys_buffer *surf_bo[BRW_VS_MAX_SURF]; + GLuint nr_surfaces; + } vs; + + struct { + struct brw_gs_prog_data *prog_data; + + GLboolean prog_active; + struct brw_winsys_buffer *prog_bo; + struct brw_winsys_buffer *state_bo; + } gs; + + struct { + struct brw_clip_prog_data *prog_data; + + struct brw_winsys_buffer *prog_bo; + struct brw_winsys_buffer *state_bo; + struct brw_winsys_buffer *vp_bo; + } clip; + + + struct { + struct brw_sf_prog_data *prog_data; + + struct brw_winsys_buffer *prog_bo; + struct brw_winsys_buffer *state_bo; + struct brw_winsys_buffer *vp_bo; + } sf; + + struct { + struct brw_wm_prog_data *prog_data; + struct brw_wm_compile *compile_data; + + /** Input sizes, calculated from active vertex program. + * One bit per fragment program input attribute. + */ + /*GLbitfield input_size_masks[4];*/ + + /** Array of surface default colors (texture border color) */ + struct brw_winsys_buffer *sdc_bo[BRW_MAX_TEX_UNIT]; + + GLuint render_surf; + GLuint nr_surfaces; + + GLuint max_threads; + struct brw_winsys_buffer *scratch_bo; + + GLuint sampler_count; + struct brw_winsys_buffer *sampler_bo; + + /** Binding table of pointers to surf_bo entries */ + struct brw_winsys_buffer *bind_bo; + struct brw_winsys_buffer *surf_bo[BRW_WM_MAX_SURF]; + + struct brw_winsys_buffer *prog_bo; + struct brw_winsys_buffer *state_bo; + } wm; + + + struct { + struct brw_winsys_buffer *state_bo; + + struct brw_cc_unit_state cc; + struct brw_winsys_reloc reloc[1]; + } cc; + + struct { + struct brw_query_object active_head; + struct brw_winsys_buffer *bo; + int index; + GLboolean active; + int stats_wm; + } query; + + struct { + unsigned always_emit_state:1; + unsigned always_flush_batch:1; + unsigned force_swtnl:1; + unsigned no_swtnl:1; + } flags; + + /* Used to give every program string a unique id + */ + GLuint program_id; +}; + + + +/*====================================================================== + * brw_queryobj.c + */ +void brw_init_query(struct brw_context *brw); +enum pipe_error brw_prepare_query_begin(struct brw_context *brw); +void brw_emit_query_begin(struct brw_context *brw); +void brw_emit_query_end(struct brw_context *brw); + +/*====================================================================== + * brw_state_dump.c + */ +void brw_debug_batch(struct brw_context *intel); + + +/*====================================================================== + * brw_pipe_*.c + */ +void brw_pipe_blend_init( struct brw_context *brw ); +void brw_pipe_depth_stencil_init( struct brw_context *brw ); +void brw_pipe_framebuffer_init( struct brw_context *brw ); +void brw_pipe_flush_init( struct brw_context *brw ); +void brw_pipe_misc_init( struct brw_context *brw ); +void brw_pipe_query_init( struct brw_context *brw ); +void brw_pipe_rast_init( struct brw_context *brw ); +void brw_pipe_sampler_init( struct brw_context *brw ); +void brw_pipe_shader_init( struct brw_context *brw ); +void brw_pipe_vertex_init( struct brw_context *brw ); +void brw_pipe_clear_init( struct brw_context *brw ); + + +void brw_pipe_blend_cleanup( struct brw_context *brw ); +void brw_pipe_depth_stencil_cleanup( struct brw_context *brw ); +void brw_pipe_framebuffer_cleanup( struct brw_context *brw ); +void brw_pipe_flush_cleanup( struct brw_context *brw ); +void brw_pipe_misc_cleanup( struct brw_context *brw ); +void brw_pipe_query_cleanup( struct brw_context *brw ); +void brw_pipe_rast_cleanup( struct brw_context *brw ); +void brw_pipe_sampler_cleanup( struct brw_context *brw ); +void brw_pipe_shader_cleanup( struct brw_context *brw ); +void brw_pipe_vertex_cleanup( struct brw_context *brw ); +void brw_pipe_clear_cleanup( struct brw_context *brw ); + +void brw_hw_cc_init( struct brw_context *brw ); +void brw_hw_cc_cleanup( struct brw_context *brw ); + + + +void brw_context_flush( struct brw_context *brw ); + + +/* brw_urb.c + */ +int brw_upload_urb_fence(struct brw_context *brw); + +/* brw_curbe.c + */ +int brw_upload_cs_urb_state(struct brw_context *brw); + + +/*====================================================================== + * Inline conversion functions. These are better-typed than the + * macros used previously: + */ +static INLINE struct brw_context * +brw_context( struct pipe_context *ctx ) +{ + return (struct brw_context *)ctx; +} + + +#define BRW_IS_965(brw) ((brw)->chipset.is_965) +#define BRW_IS_IGDNG(brw) ((brw)->chipset.is_igdng) +#define BRW_IS_G4X(brw) ((brw)->chipset.is_g4x) + + +#endif + diff --git a/src/gallium/drivers/i965/brw_curbe.c b/src/gallium/drivers/i965/brw_curbe.c new file mode 100644 index 00000000000..3f031577d5a --- /dev/null +++ b/src/gallium/drivers/i965/brw_curbe.c @@ -0,0 +1,390 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "util/u_memory.h" +#include "util/u_math.h" + +#include "brw_batchbuffer.h" +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_state.h" +#include "brw_util.h" +#include "brw_debug.h" +#include "brw_screen.h" + + +/** + * Partition the CURBE between the various users of constant values: + * Note that vertex and fragment shaders can now fetch constants out + * of constant buffers. We no longer allocatea block of the GRF for + * constants. That greatly reduces the demand for space in the CURBE. + * Some of the comments within are dated... + */ +static int calculate_curbe_offsets( struct brw_context *brw ) +{ + /* CACHE_NEW_WM_PROG */ + const GLuint nr_fp_regs = brw->wm.prog_data->curb_read_length; + + /* BRW_NEW_VERTEX_PROGRAM */ + const GLuint nr_vp_regs = brw->vs.prog_data->curb_read_length; + GLuint nr_clip_regs = 0; + GLuint total_regs; + + /* PIPE_NEW_CLIP */ + if (brw->curr.ucp.nr) { + GLuint nr_planes = 6 + brw->curr.ucp.nr; + nr_clip_regs = (nr_planes * 4 + 15) / 16; + } + + + total_regs = nr_fp_regs + nr_vp_regs + nr_clip_regs; + + /* When this is > 32, want to use a true constant buffer to hold + * the extra constants. + */ + assert(total_regs <= 32); + + /* Lazy resize: + */ + if (nr_fp_regs > brw->curbe.wm_size || + nr_vp_regs > brw->curbe.vs_size || + nr_clip_regs != brw->curbe.clip_size || + (total_regs < brw->curbe.total_size / 4 && + brw->curbe.total_size > 16)) { + + GLuint reg = 0; + + /* Calculate a new layout: + */ + reg = 0; + brw->curbe.wm_start = reg; + brw->curbe.wm_size = nr_fp_regs; reg += nr_fp_regs; + brw->curbe.clip_start = reg; + brw->curbe.clip_size = nr_clip_regs; reg += nr_clip_regs; + brw->curbe.vs_start = reg; + brw->curbe.vs_size = nr_vp_regs; reg += nr_vp_regs; + brw->curbe.total_size = reg; + + if (BRW_DEBUG & DEBUG_CURBE) + debug_printf("curbe wm %d+%d clip %d+%d vs %d+%d\n", + brw->curbe.wm_start, + brw->curbe.wm_size, + brw->curbe.clip_start, + brw->curbe.clip_size, + brw->curbe.vs_start, + brw->curbe.vs_size ); + + brw->state.dirty.brw |= BRW_NEW_CURBE_OFFSETS; + } + + return 0; +} + + +const struct brw_tracked_state brw_curbe_offsets = { + .dirty = { + .mesa = PIPE_NEW_CLIP, + .brw = BRW_NEW_VERTEX_PROGRAM, + .cache = CACHE_NEW_WM_PROG + }, + .prepare = calculate_curbe_offsets +}; + + + + +/* Define the number of curbes within CS's urb allocation. Multiple + * urb entries -> multiple curbes. These will be used by + * fixed-function hardware in a double-buffering scheme to avoid a + * pipeline stall each time the contents of the curbe is changed. + */ +int brw_upload_cs_urb_state(struct brw_context *brw) +{ + struct brw_cs_urb_state cs_urb; + memset(&cs_urb, 0, sizeof(cs_urb)); + + /* It appears that this is the state packet for the CS unit, ie. the + * urb entries detailed here are housed in the CS range from the + * URB_FENCE command. + */ + cs_urb.header.opcode = CMD_CS_URB_STATE; + cs_urb.header.length = sizeof(cs_urb)/4 - 2; + + /* BRW_NEW_URB_FENCE */ + cs_urb.bits0.nr_urb_entries = brw->urb.nr_cs_entries; + cs_urb.bits0.urb_entry_size = brw->urb.csize - 1; + + assert(brw->urb.nr_cs_entries); + BRW_CACHED_BATCH_STRUCT(brw, &cs_urb); + return 0; +} + +static GLfloat fixed_plane[6][4] = { + { 0, 0, -1, 1 }, + { 0, 0, 1, 1 }, + { 0, -1, 0, 1 }, + { 0, 1, 0, 1 }, + {-1, 0, 0, 1 }, + { 1, 0, 0, 1 } +}; + +/* Upload a new set of constants. Too much variability to go into the + * cache mechanism, but maybe would benefit from a comparison against + * the current uploaded set of constants. + */ +static enum pipe_error prepare_curbe_buffer(struct brw_context *brw) +{ + struct pipe_screen *screen = brw->base.screen; + const GLuint sz = brw->curbe.total_size; + const GLuint bufsz = sz * 16 * sizeof(GLfloat); + enum pipe_error ret; + GLfloat *buf; + GLuint i; + + if (sz == 0) { + if (brw->curbe.last_buf) { + free(brw->curbe.last_buf); + brw->curbe.last_buf = NULL; + brw->curbe.last_bufsz = 0; + } + return 0; + } + + buf = (GLfloat *) CALLOC(bufsz, 1); + + /* fragment shader constants */ + if (brw->curbe.wm_size) { + const struct brw_fragment_shader *fs = brw->curr.fragment_shader; + GLuint offset = brw->curbe.wm_start * 16; + GLuint nr_immediate, nr_const; + + nr_immediate = fs->immediates.nr; + if (nr_immediate) { + memcpy(&buf[offset], + fs->immediates.data, + nr_immediate * 4 * sizeof(float)); + + offset += nr_immediate * 4; + } + + nr_const = fs->info.file_max[TGSI_FILE_CONSTANT] + 1; +/* nr_const = brw->wm.prog_data->nr_params; */ + if (nr_const) { + const GLfloat *value = screen->buffer_map( screen, + brw->curr.fragment_constants, + PIPE_BUFFER_USAGE_CPU_READ); + + memcpy(&buf[offset], value, + nr_const * 4 * sizeof(float)); + + screen->buffer_unmap( screen, + brw->curr.fragment_constants ); + } + } + + + /* The clipplanes are actually delivered to both CLIP and VS units. + * VS uses them to calculate the outcode bitmasks. + */ + if (brw->curbe.clip_size) { + GLuint offset = brw->curbe.clip_start * 16; + GLuint j; + + /* If any planes are going this way, send them all this way: + */ + for (i = 0; i < 6; i++) { + buf[offset + i * 4 + 0] = fixed_plane[i][0]; + buf[offset + i * 4 + 1] = fixed_plane[i][1]; + buf[offset + i * 4 + 2] = fixed_plane[i][2]; + buf[offset + i * 4 + 3] = fixed_plane[i][3]; + } + + /* Clip planes: + */ + assert(brw->curr.ucp.nr <= 6); + for (j = 0; j < brw->curr.ucp.nr; j++) { + buf[offset + i * 4 + 0] = brw->curr.ucp.ucp[j][0]; + buf[offset + i * 4 + 1] = brw->curr.ucp.ucp[j][1]; + buf[offset + i * 4 + 2] = brw->curr.ucp.ucp[j][2]; + buf[offset + i * 4 + 3] = brw->curr.ucp.ucp[j][3]; + i++; + } + } + + /* vertex shader constants */ + if (brw->curbe.vs_size) { + GLuint offset = brw->curbe.vs_start * 16; + const struct brw_vertex_shader *vs = brw->curr.vertex_shader; + GLuint nr_immediate, nr_const; + + nr_immediate = vs->immediates.nr; + if (nr_immediate) { + memcpy(&buf[offset], + vs->immediates.data, + nr_immediate * 4 * sizeof(float)); + + offset += nr_immediate * 4; + } + + nr_const = vs->info.file_max[TGSI_FILE_CONSTANT] + 1; + if (nr_const) { + /* XXX: note that constant buffers are currently *already* in + * buffer objects. If we want to keep on putting them into the + * curbe, makes sense to treat constbuf's specially with malloc. + */ + const GLfloat *value = screen->buffer_map( screen, + brw->curr.vertex_constants, + PIPE_BUFFER_USAGE_CPU_READ); + + /* XXX: what if user's constant buffer is too small? + */ + memcpy(&buf[offset], value, nr_const * 4 * sizeof(float)); + + screen->buffer_unmap( screen, brw->curr.vertex_constants ); + } + } + + if (BRW_DEBUG & DEBUG_CURBE) { + for (i = 0; i < sz*16; i+=4) + debug_printf("curbe %d.%d: %f %f %f %f\n", i/8, i&4, + buf[i+0], buf[i+1], buf[i+2], buf[i+3]); + + debug_printf("last_buf %p buf %p sz %d/%d cmp %d\n", + (void *)brw->curbe.last_buf, (void *)buf, + bufsz, brw->curbe.last_bufsz, + brw->curbe.last_buf ? memcmp(buf, brw->curbe.last_buf, bufsz) : -1); + } + + if (brw->curbe.curbe_bo != NULL && + brw->curbe.last_buf && + bufsz == brw->curbe.last_bufsz && + memcmp(buf, brw->curbe.last_buf, bufsz) == 0) { + /* constants have not changed */ + FREE(buf); + } + else { + /* constants have changed */ + FREE(brw->curbe.last_buf); + + brw->curbe.last_buf = buf; + brw->curbe.last_bufsz = bufsz; + + if (brw->curbe.curbe_bo != NULL && + (brw->curbe.need_new_bo || + brw->curbe.curbe_next_offset + bufsz > brw->curbe.curbe_bo->size)) + { + bo_reference(&brw->curbe.curbe_bo, NULL); + } + + if (brw->curbe.curbe_bo == NULL) { + /* Allocate a single page for CURBE entries for this + * batchbuffer. They're generally around 64b. We will + * discard the curbe buffer after the batch is flushed to + * avoid synchronous updates. + */ + ret = brw->sws->bo_alloc(brw->sws, + BRW_BUFFER_TYPE_CURBE, + 4096, 1 << 6, + &brw->curbe.curbe_bo); + if (ret) + return ret; + + brw->curbe.curbe_next_offset = 0; + } + + brw->curbe.curbe_offset = brw->curbe.curbe_next_offset; + brw->curbe.curbe_next_offset += bufsz; + brw->curbe.curbe_next_offset = align(brw->curbe.curbe_next_offset, 64); + + /* Copy data to the buffer: + */ + brw->sws->bo_subdata(brw->curbe.curbe_bo, + BRW_DATA_CONSTANT_BUFFER, + brw->curbe.curbe_offset, + bufsz, + buf, + NULL, 0); + } + + brw_add_validated_bo(brw, brw->curbe.curbe_bo); + + /* Because this provokes an action (ie copy the constants into the + * URB), it shouldn't be shortcircuited if identical to the + * previous time - because eg. the urb destination may have + * changed, or the urb contents different to last time. + * + * Note that the data referred to is actually copied internally, + * not just used in place according to passed pointer. + * + * It appears that the CS unit takes care of using each available + * URB entry (Const URB Entry == CURBE) in turn, and issuing + * flushes as necessary when doublebuffering of CURBEs isn't + * possible. + */ + + return 0; +} + +static enum pipe_error emit_curbe_buffer(struct brw_context *brw) +{ + GLuint sz = brw->curbe.total_size; + + BEGIN_BATCH(2, IGNORE_CLIPRECTS); + if (sz == 0) { + OUT_BATCH((CMD_CONST_BUFFER << 16) | (2 - 2)); + OUT_BATCH(0); + } else { + OUT_BATCH((CMD_CONST_BUFFER << 16) | (1 << 8) | (2 - 2)); + OUT_RELOC(brw->curbe.curbe_bo, + BRW_USAGE_STATE, + (sz - 1) + brw->curbe.curbe_offset); + } + ADVANCE_BATCH(); + return 0; +} + +const struct brw_tracked_state brw_curbe_buffer = { + .dirty = { + .mesa = (PIPE_NEW_FRAGMENT_CONSTANTS | + PIPE_NEW_VERTEX_CONSTANTS | + PIPE_NEW_CLIP), + .brw = (BRW_NEW_FRAGMENT_PROGRAM | + BRW_NEW_VERTEX_PROGRAM | + BRW_NEW_URB_FENCE | /* Implicit - hardware requires this, not used above */ + BRW_NEW_PSP | /* Implicit - hardware requires this, not used above */ + BRW_NEW_CURBE_OFFSETS | + BRW_NEW_BATCH), + .cache = (CACHE_NEW_WM_PROG) + }, + .prepare = prepare_curbe_buffer, + .emit = emit_curbe_buffer, +}; + diff --git a/src/gallium/drivers/i965/brw_debug.h b/src/gallium/drivers/i965/brw_debug.h new file mode 100644 index 00000000000..ae8e9254a68 --- /dev/null +++ b/src/gallium/drivers/i965/brw_debug.h @@ -0,0 +1,43 @@ +#ifndef BRW_DEBUG_H +#define BRW_DEBUG_H + +/* ================================================================ + * Debugging: + */ + +#define DEBUG_TEXTURE 0x1 +#define DEBUG_STATE 0x2 +#define DEBUG_IOCTL 0x4 +#define DEBUG_BLIT 0x8 +#define DEBUG_CURBE 0x10 +#define DEBUG_FALLBACKS 0x20 +#define DEBUG_VERBOSE 0x40 +#define DEBUG_BATCH 0x80 +#define DEBUG_PIXEL 0x100 +#define DEBUG_WINSYS 0x200 +#define DEBUG_MIN_URB 0x400 +#define DEBUG_DISASSEM 0x800 +#define DEBUG_unused3 0x1000 +#define DEBUG_SYNC 0x2000 +#define DEBUG_PRIMS 0x4000 +#define DEBUG_VERTS 0x8000 +#define DEBUG_unused4 0x10000 +#define DEBUG_DMA 0x20000 +#define DEBUG_SANITY 0x40000 +#define DEBUG_SLEEP 0x80000 +#define DEBUG_STATS 0x100000 +#define DEBUG_unused5 0x200000 +#define DEBUG_SINGLE_THREAD 0x400000 +#define DEBUG_WM 0x800000 +#define DEBUG_URB 0x1000000 +#define DEBUG_VS 0x2000000 + +#ifdef DEBUG +extern int BRW_DEBUG; +#else +#define BRW_DEBUG 0 +#endif + + + +#endif diff --git a/src/gallium/drivers/i965/brw_defines.h b/src/gallium/drivers/i965/brw_defines.h new file mode 100644 index 00000000000..e201ce4d7ce --- /dev/null +++ b/src/gallium/drivers/i965/brw_defines.h @@ -0,0 +1,847 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#ifndef BRW_DEFINES_H +#define BRW_DEFINES_H + +/* 3D state: + */ +#define _3DOP_3DSTATE_PIPELINED 0x0 +#define _3DOP_3DSTATE_NONPIPELINED 0x1 +#define _3DOP_3DCONTROL 0x2 +#define _3DOP_3DPRIMITIVE 0x3 + +#define _3DSTATE_PIPELINED_POINTERS 0x00 +#define _3DSTATE_BINDING_TABLE_POINTERS 0x01 +#define _3DSTATE_VERTEX_BUFFERS 0x08 +#define _3DSTATE_VERTEX_ELEMENTS 0x09 +#define _3DSTATE_INDEX_BUFFER 0x0A +#define _3DSTATE_VF_STATISTICS 0x0B +#define _3DSTATE_DRAWING_RECTANGLE 0x00 +#define _3DSTATE_CONSTANT_COLOR 0x01 +#define _3DSTATE_SAMPLER_PALETTE_LOAD 0x02 +#define _3DSTATE_CHROMA_KEY 0x04 +#define _3DSTATE_DEPTH_BUFFER 0x05 +#define _3DSTATE_POLY_STIPPLE_OFFSET 0x06 +#define _3DSTATE_POLY_STIPPLE_PATTERN 0x07 +#define _3DSTATE_LINE_STIPPLE 0x08 +#define _3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP 0x09 +#define _3DCONTROL 0x00 + +#define PIPE_CONTROL_NOWRITE 0x00 +#define PIPE_CONTROL_WRITEIMMEDIATE 0x01 +#define PIPE_CONTROL_WRITEDEPTH 0x02 +#define PIPE_CONTROL_WRITETIMESTAMP 0x03 + +#define PIPE_CONTROL_GTTWRITE_PROCESS_LOCAL 0x00 +#define PIPE_CONTROL_GTTWRITE_GLOBAL 0x01 + +#define _3DPRIM_POINTLIST 0x01 +#define _3DPRIM_LINELIST 0x02 +#define _3DPRIM_LINESTRIP 0x03 +#define _3DPRIM_TRILIST 0x04 +#define _3DPRIM_TRISTRIP 0x05 +#define _3DPRIM_TRIFAN 0x06 +#define _3DPRIM_QUADLIST 0x07 +#define _3DPRIM_QUADSTRIP 0x08 +#define _3DPRIM_LINELIST_ADJ 0x09 +#define _3DPRIM_LINESTRIP_ADJ 0x0A +#define _3DPRIM_TRILIST_ADJ 0x0B +#define _3DPRIM_TRISTRIP_ADJ 0x0C +#define _3DPRIM_TRISTRIP_REVERSE 0x0D +#define _3DPRIM_POLYGON 0x0E +#define _3DPRIM_RECTLIST 0x0F +#define _3DPRIM_LINELOOP 0x10 +#define _3DPRIM_POINTLIST_BF 0x11 +#define _3DPRIM_LINESTRIP_CONT 0x12 +#define _3DPRIM_LINESTRIP_BF 0x13 +#define _3DPRIM_LINESTRIP_CONT_BF 0x14 +#define _3DPRIM_TRIFAN_NOSTIPPLE 0x15 + +#define _3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL 0 +#define _3DPRIM_VERTEXBUFFER_ACCESS_RANDOM 1 + +#define BRW_ANISORATIO_2 0 +#define BRW_ANISORATIO_4 1 +#define BRW_ANISORATIO_6 2 +#define BRW_ANISORATIO_8 3 +#define BRW_ANISORATIO_10 4 +#define BRW_ANISORATIO_12 5 +#define BRW_ANISORATIO_14 6 +#define BRW_ANISORATIO_16 7 + +#define BRW_BLENDFACTOR_ONE 0x1 +#define BRW_BLENDFACTOR_SRC_COLOR 0x2 +#define BRW_BLENDFACTOR_SRC_ALPHA 0x3 +#define BRW_BLENDFACTOR_DST_ALPHA 0x4 +#define BRW_BLENDFACTOR_DST_COLOR 0x5 +#define BRW_BLENDFACTOR_SRC_ALPHA_SATURATE 0x6 +#define BRW_BLENDFACTOR_CONST_COLOR 0x7 +#define BRW_BLENDFACTOR_CONST_ALPHA 0x8 +#define BRW_BLENDFACTOR_SRC1_COLOR 0x9 +#define BRW_BLENDFACTOR_SRC1_ALPHA 0x0A +#define BRW_BLENDFACTOR_ZERO 0x11 +#define BRW_BLENDFACTOR_INV_SRC_COLOR 0x12 +#define BRW_BLENDFACTOR_INV_SRC_ALPHA 0x13 +#define BRW_BLENDFACTOR_INV_DST_ALPHA 0x14 +#define BRW_BLENDFACTOR_INV_DST_COLOR 0x15 +#define BRW_BLENDFACTOR_INV_CONST_COLOR 0x17 +#define BRW_BLENDFACTOR_INV_CONST_ALPHA 0x18 +#define BRW_BLENDFACTOR_INV_SRC1_COLOR 0x19 +#define BRW_BLENDFACTOR_INV_SRC1_ALPHA 0x1A + +#define BRW_BLENDFUNCTION_ADD 0 +#define BRW_BLENDFUNCTION_SUBTRACT 1 +#define BRW_BLENDFUNCTION_REVERSE_SUBTRACT 2 +#define BRW_BLENDFUNCTION_MIN 3 +#define BRW_BLENDFUNCTION_MAX 4 + +#define BRW_ALPHATEST_FORMAT_UNORM8 0 +#define BRW_ALPHATEST_FORMAT_FLOAT32 1 + +#define BRW_CHROMAKEY_KILL_ON_ANY_MATCH 0 +#define BRW_CHROMAKEY_REPLACE_BLACK 1 + +#define BRW_CLIP_API_OGL 0 +#define BRW_CLIP_API_DX 1 + +#define BRW_CLIPMODE_NORMAL 0 +#define BRW_CLIPMODE_CLIP_ALL 1 +#define BRW_CLIPMODE_CLIP_NON_REJECTED 2 +#define BRW_CLIPMODE_REJECT_ALL 3 +#define BRW_CLIPMODE_ACCEPT_ALL 4 +#define BRW_CLIPMODE_KERNEL_CLIP 5 + +#define BRW_CLIP_NDCSPACE 0 +#define BRW_CLIP_SCREENSPACE 1 + +#define BRW_COMPAREFUNCTION_ALWAYS 0 +#define BRW_COMPAREFUNCTION_NEVER 1 +#define BRW_COMPAREFUNCTION_LESS 2 +#define BRW_COMPAREFUNCTION_EQUAL 3 +#define BRW_COMPAREFUNCTION_LEQUAL 4 +#define BRW_COMPAREFUNCTION_GREATER 5 +#define BRW_COMPAREFUNCTION_NOTEQUAL 6 +#define BRW_COMPAREFUNCTION_GEQUAL 7 + +#define BRW_COVERAGE_PIXELS_HALF 0 +#define BRW_COVERAGE_PIXELS_1 1 +#define BRW_COVERAGE_PIXELS_2 2 +#define BRW_COVERAGE_PIXELS_4 3 + +#define BRW_CULLMODE_BOTH 0 +#define BRW_CULLMODE_NONE 1 +#define BRW_CULLMODE_FRONT 2 +#define BRW_CULLMODE_BACK 3 + +#define BRW_DEFAULTCOLOR_R8G8B8A8_UNORM 0 +#define BRW_DEFAULTCOLOR_R32G32B32A32_FLOAT 1 + +#define BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT 0 +#define BRW_DEPTHFORMAT_D32_FLOAT 1 +#define BRW_DEPTHFORMAT_D24_UNORM_S8_UINT 2 +#define BRW_DEPTHFORMAT_D16_UNORM 5 + +#define BRW_FLOATING_POINT_IEEE_754 0 +#define BRW_FLOATING_POINT_NON_IEEE_754 1 + +#define BRW_FRONTWINDING_CW 0 +#define BRW_FRONTWINDING_CCW 1 + +#define BRW_SPRITE_POINT_ENABLE 16 + +#define BRW_INDEX_BYTE 0 +#define BRW_INDEX_WORD 1 +#define BRW_INDEX_DWORD 2 + +#define BRW_LOGICOPFUNCTION_CLEAR 0 +#define BRW_LOGICOPFUNCTION_NOR 1 +#define BRW_LOGICOPFUNCTION_AND_INVERTED 2 +#define BRW_LOGICOPFUNCTION_COPY_INVERTED 3 +#define BRW_LOGICOPFUNCTION_AND_REVERSE 4 +#define BRW_LOGICOPFUNCTION_INVERT 5 +#define BRW_LOGICOPFUNCTION_XOR 6 +#define BRW_LOGICOPFUNCTION_NAND 7 +#define BRW_LOGICOPFUNCTION_AND 8 +#define BRW_LOGICOPFUNCTION_EQUIV 9 +#define BRW_LOGICOPFUNCTION_NOOP 10 +#define BRW_LOGICOPFUNCTION_OR_INVERTED 11 +#define BRW_LOGICOPFUNCTION_COPY 12 +#define BRW_LOGICOPFUNCTION_OR_REVERSE 13 +#define BRW_LOGICOPFUNCTION_OR 14 +#define BRW_LOGICOPFUNCTION_SET 15 + +#define BRW_MAPFILTER_NEAREST 0x0 +#define BRW_MAPFILTER_LINEAR 0x1 +#define BRW_MAPFILTER_ANISOTROPIC 0x2 + +#define BRW_MIPFILTER_NONE 0 +#define BRW_MIPFILTER_NEAREST 1 +#define BRW_MIPFILTER_LINEAR 3 + +#define BRW_POLYGON_FRONT_FACING 0 +#define BRW_POLYGON_BACK_FACING 1 + +#define BRW_PREFILTER_ALWAYS 0x0 +#define BRW_PREFILTER_NEVER 0x1 +#define BRW_PREFILTER_LESS 0x2 +#define BRW_PREFILTER_EQUAL 0x3 +#define BRW_PREFILTER_LEQUAL 0x4 +#define BRW_PREFILTER_GREATER 0x5 +#define BRW_PREFILTER_NOTEQUAL 0x6 +#define BRW_PREFILTER_GEQUAL 0x7 + +#define BRW_PROVOKING_VERTEX_0 0 +#define BRW_PROVOKING_VERTEX_1 1 +#define BRW_PROVOKING_VERTEX_2 2 + +#define BRW_RASTRULE_UPPER_LEFT 0 +#define BRW_RASTRULE_UPPER_RIGHT 1 +/* These are listed as "Reserved, but not seen as useful" + * in Intel documentation (page 212, "Point Rasterization Rule", + * section 7.4 "SF Pipeline State Summary", of document + * "Intel® 965 Express Chipset Family and Intel® G35 Express + * Chipset Graphics Controller Programmer's Reference Manual, + * Volume 2: 3D/Media", Revision 1.0b as of January 2008, + * available at + * http://intellinuxgraphics.org/documentation.html + * at the time of this writing). + * + * These appear to be supported on at least some + * i965-family devices, and the BRW_RASTRULE_LOWER_RIGHT + * is useful when using OpenGL to render to a FBO + * (which has the pixel coordinate Y orientation inverted + * with respect to the normal OpenGL pixel coordinate system). + */ +#define BRW_RASTRULE_LOWER_LEFT 2 +#define BRW_RASTRULE_LOWER_RIGHT 3 + +#define BRW_RENDERTARGET_CLAMPRANGE_UNORM 0 +#define BRW_RENDERTARGET_CLAMPRANGE_SNORM 1 +#define BRW_RENDERTARGET_CLAMPRANGE_FORMAT 2 + +#define BRW_STENCILOP_KEEP 0 +#define BRW_STENCILOP_ZERO 1 +#define BRW_STENCILOP_REPLACE 2 +#define BRW_STENCILOP_INCRSAT 3 +#define BRW_STENCILOP_DECRSAT 4 +#define BRW_STENCILOP_INCR 5 +#define BRW_STENCILOP_DECR 6 +#define BRW_STENCILOP_INVERT 7 + +#define BRW_SURFACE_MIPMAPLAYOUT_BELOW 0 +#define BRW_SURFACE_MIPMAPLAYOUT_RIGHT 1 + +#define BRW_SURFACEFORMAT_R32G32B32A32_FLOAT 0x000 +#define BRW_SURFACEFORMAT_R32G32B32A32_SINT 0x001 +#define BRW_SURFACEFORMAT_R32G32B32A32_UINT 0x002 +#define BRW_SURFACEFORMAT_R32G32B32A32_UNORM 0x003 +#define BRW_SURFACEFORMAT_R32G32B32A32_SNORM 0x004 +#define BRW_SURFACEFORMAT_R64G64_FLOAT 0x005 +#define BRW_SURFACEFORMAT_R32G32B32X32_FLOAT 0x006 +#define BRW_SURFACEFORMAT_R32G32B32A32_SSCALED 0x007 +#define BRW_SURFACEFORMAT_R32G32B32A32_USCALED 0x008 +#define BRW_SURFACEFORMAT_R32G32B32_FLOAT 0x040 +#define BRW_SURFACEFORMAT_R32G32B32_SINT 0x041 +#define BRW_SURFACEFORMAT_R32G32B32_UINT 0x042 +#define BRW_SURFACEFORMAT_R32G32B32_UNORM 0x043 +#define BRW_SURFACEFORMAT_R32G32B32_SNORM 0x044 +#define BRW_SURFACEFORMAT_R32G32B32_SSCALED 0x045 +#define BRW_SURFACEFORMAT_R32G32B32_USCALED 0x046 +#define BRW_SURFACEFORMAT_R16G16B16A16_UNORM 0x080 +#define BRW_SURFACEFORMAT_R16G16B16A16_SNORM 0x081 +#define BRW_SURFACEFORMAT_R16G16B16A16_SINT 0x082 +#define BRW_SURFACEFORMAT_R16G16B16A16_UINT 0x083 +#define BRW_SURFACEFORMAT_R16G16B16A16_FLOAT 0x084 +#define BRW_SURFACEFORMAT_R32G32_FLOAT 0x085 +#define BRW_SURFACEFORMAT_R32G32_SINT 0x086 +#define BRW_SURFACEFORMAT_R32G32_UINT 0x087 +#define BRW_SURFACEFORMAT_R32_FLOAT_X8X24_TYPELESS 0x088 +#define BRW_SURFACEFORMAT_X32_TYPELESS_G8X24_UINT 0x089 +#define BRW_SURFACEFORMAT_L32A32_FLOAT 0x08A +#define BRW_SURFACEFORMAT_R32G32_UNORM 0x08B +#define BRW_SURFACEFORMAT_R32G32_SNORM 0x08C +#define BRW_SURFACEFORMAT_R64_FLOAT 0x08D +#define BRW_SURFACEFORMAT_R16G16B16X16_UNORM 0x08E +#define BRW_SURFACEFORMAT_R16G16B16X16_FLOAT 0x08F +#define BRW_SURFACEFORMAT_A32X32_FLOAT 0x090 +#define BRW_SURFACEFORMAT_L32X32_FLOAT 0x091 +#define BRW_SURFACEFORMAT_I32X32_FLOAT 0x092 +#define BRW_SURFACEFORMAT_R16G16B16A16_SSCALED 0x093 +#define BRW_SURFACEFORMAT_R16G16B16A16_USCALED 0x094 +#define BRW_SURFACEFORMAT_R32G32_SSCALED 0x095 +#define BRW_SURFACEFORMAT_R32G32_USCALED 0x096 +#define BRW_SURFACEFORMAT_B8G8R8A8_UNORM 0x0C0 +#define BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB 0x0C1 +#define BRW_SURFACEFORMAT_R10G10B10A2_UNORM 0x0C2 +#define BRW_SURFACEFORMAT_R10G10B10A2_UNORM_SRGB 0x0C3 +#define BRW_SURFACEFORMAT_R10G10B10A2_UINT 0x0C4 +#define BRW_SURFACEFORMAT_R10G10B10_SNORM_A2_UNORM 0x0C5 +#define BRW_SURFACEFORMAT_R8G8B8A8_UNORM 0x0C7 +#define BRW_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB 0x0C8 +#define BRW_SURFACEFORMAT_R8G8B8A8_SNORM 0x0C9 +#define BRW_SURFACEFORMAT_R8G8B8A8_SINT 0x0CA +#define BRW_SURFACEFORMAT_R8G8B8A8_UINT 0x0CB +#define BRW_SURFACEFORMAT_R16G16_UNORM 0x0CC +#define BRW_SURFACEFORMAT_R16G16_SNORM 0x0CD +#define BRW_SURFACEFORMAT_R16G16_SINT 0x0CE +#define BRW_SURFACEFORMAT_R16G16_UINT 0x0CF +#define BRW_SURFACEFORMAT_R16G16_FLOAT 0x0D0 +#define BRW_SURFACEFORMAT_B10G10R10A2_UNORM 0x0D1 +#define BRW_SURFACEFORMAT_B10G10R10A2_UNORM_SRGB 0x0D2 +#define BRW_SURFACEFORMAT_R11G11B10_FLOAT 0x0D3 +#define BRW_SURFACEFORMAT_R32_SINT 0x0D6 +#define BRW_SURFACEFORMAT_R32_UINT 0x0D7 +#define BRW_SURFACEFORMAT_R32_FLOAT 0x0D8 +#define BRW_SURFACEFORMAT_R24_UNORM_X8_TYPELESS 0x0D9 +#define BRW_SURFACEFORMAT_X24_TYPELESS_G8_UINT 0x0DA +#define BRW_SURFACEFORMAT_L16A16_UNORM 0x0DF +#define BRW_SURFACEFORMAT_I24X8_UNORM 0x0E0 +#define BRW_SURFACEFORMAT_L24X8_UNORM 0x0E1 +#define BRW_SURFACEFORMAT_A24X8_UNORM 0x0E2 +#define BRW_SURFACEFORMAT_I32_FLOAT 0x0E3 +#define BRW_SURFACEFORMAT_L32_FLOAT 0x0E4 +#define BRW_SURFACEFORMAT_A32_FLOAT 0x0E5 +#define BRW_SURFACEFORMAT_B8G8R8X8_UNORM 0x0E9 +#define BRW_SURFACEFORMAT_B8G8R8X8_UNORM_SRGB 0x0EA +#define BRW_SURFACEFORMAT_R8G8B8X8_UNORM 0x0EB +#define BRW_SURFACEFORMAT_R8G8B8X8_UNORM_SRGB 0x0EC +#define BRW_SURFACEFORMAT_R9G9B9E5_SHAREDEXP 0x0ED +#define BRW_SURFACEFORMAT_B10G10R10X2_UNORM 0x0EE +#define BRW_SURFACEFORMAT_L16A16_FLOAT 0x0F0 +#define BRW_SURFACEFORMAT_R32_UNORM 0x0F1 +#define BRW_SURFACEFORMAT_R32_SNORM 0x0F2 +#define BRW_SURFACEFORMAT_R10G10B10X2_USCALED 0x0F3 +#define BRW_SURFACEFORMAT_R8G8B8A8_SSCALED 0x0F4 +#define BRW_SURFACEFORMAT_R8G8B8A8_USCALED 0x0F5 +#define BRW_SURFACEFORMAT_R16G16_SSCALED 0x0F6 +#define BRW_SURFACEFORMAT_R16G16_USCALED 0x0F7 +#define BRW_SURFACEFORMAT_R32_SSCALED 0x0F8 +#define BRW_SURFACEFORMAT_R32_USCALED 0x0F9 +#define BRW_SURFACEFORMAT_B5G6R5_UNORM 0x100 +#define BRW_SURFACEFORMAT_B5G6R5_UNORM_SRGB 0x101 +#define BRW_SURFACEFORMAT_B5G5R5A1_UNORM 0x102 +#define BRW_SURFACEFORMAT_B5G5R5A1_UNORM_SRGB 0x103 +#define BRW_SURFACEFORMAT_B4G4R4A4_UNORM 0x104 +#define BRW_SURFACEFORMAT_B4G4R4A4_UNORM_SRGB 0x105 +#define BRW_SURFACEFORMAT_R8G8_UNORM 0x106 +#define BRW_SURFACEFORMAT_R8G8_SNORM 0x107 +#define BRW_SURFACEFORMAT_R8G8_SINT 0x108 +#define BRW_SURFACEFORMAT_R8G8_UINT 0x109 +#define BRW_SURFACEFORMAT_R16_UNORM 0x10A +#define BRW_SURFACEFORMAT_R16_SNORM 0x10B +#define BRW_SURFACEFORMAT_R16_SINT 0x10C +#define BRW_SURFACEFORMAT_R16_UINT 0x10D +#define BRW_SURFACEFORMAT_R16_FLOAT 0x10E +#define BRW_SURFACEFORMAT_I16_UNORM 0x111 +#define BRW_SURFACEFORMAT_L16_UNORM 0x112 +#define BRW_SURFACEFORMAT_A16_UNORM 0x113 +#define BRW_SURFACEFORMAT_L8A8_UNORM 0x114 +#define BRW_SURFACEFORMAT_I16_FLOAT 0x115 +#define BRW_SURFACEFORMAT_L16_FLOAT 0x116 +#define BRW_SURFACEFORMAT_A16_FLOAT 0x117 +#define BRW_SURFACEFORMAT_L8A8_UNORM_SRGB 0x118 +#define BRW_SURFACEFORMAT_R5G5_SNORM_B6_UNORM 0x119 +#define BRW_SURFACEFORMAT_B5G5R5X1_UNORM 0x11A +#define BRW_SURFACEFORMAT_B5G5R5X1_UNORM_SRGB 0x11B +#define BRW_SURFACEFORMAT_R8G8_SSCALED 0x11C +#define BRW_SURFACEFORMAT_R8G8_USCALED 0x11D +#define BRW_SURFACEFORMAT_R16_SSCALED 0x11E +#define BRW_SURFACEFORMAT_R16_USCALED 0x11F +#define BRW_SURFACEFORMAT_R8_UNORM 0x140 +#define BRW_SURFACEFORMAT_R8_SNORM 0x141 +#define BRW_SURFACEFORMAT_R8_SINT 0x142 +#define BRW_SURFACEFORMAT_R8_UINT 0x143 +#define BRW_SURFACEFORMAT_A8_UNORM 0x144 +#define BRW_SURFACEFORMAT_I8_UNORM 0x145 +#define BRW_SURFACEFORMAT_L8_UNORM 0x146 +#define BRW_SURFACEFORMAT_P4A4_UNORM 0x147 +#define BRW_SURFACEFORMAT_A4P4_UNORM 0x148 +#define BRW_SURFACEFORMAT_R8_SSCALED 0x149 +#define BRW_SURFACEFORMAT_R8_USCALED 0x14A +#define BRW_SURFACEFORMAT_L8_UNORM_SRGB 0x14C +#define BRW_SURFACEFORMAT_R1_UINT 0x181 +#define BRW_SURFACEFORMAT_YCRCB_NORMAL 0x182 +#define BRW_SURFACEFORMAT_YCRCB_SWAPUVY 0x183 +#define BRW_SURFACEFORMAT_BC1_UNORM 0x186 +#define BRW_SURFACEFORMAT_BC2_UNORM 0x187 +#define BRW_SURFACEFORMAT_BC3_UNORM 0x188 +#define BRW_SURFACEFORMAT_BC4_UNORM 0x189 +#define BRW_SURFACEFORMAT_BC5_UNORM 0x18A +#define BRW_SURFACEFORMAT_BC1_UNORM_SRGB 0x18B +#define BRW_SURFACEFORMAT_BC2_UNORM_SRGB 0x18C +#define BRW_SURFACEFORMAT_BC3_UNORM_SRGB 0x18D +#define BRW_SURFACEFORMAT_MONO8 0x18E +#define BRW_SURFACEFORMAT_YCRCB_SWAPUV 0x18F +#define BRW_SURFACEFORMAT_YCRCB_SWAPY 0x190 +#define BRW_SURFACEFORMAT_DXT1_RGB 0x191 +#define BRW_SURFACEFORMAT_FXT1 0x192 +#define BRW_SURFACEFORMAT_R8G8B8_UNORM 0x193 +#define BRW_SURFACEFORMAT_R8G8B8_SNORM 0x194 +#define BRW_SURFACEFORMAT_R8G8B8_SSCALED 0x195 +#define BRW_SURFACEFORMAT_R8G8B8_USCALED 0x196 +#define BRW_SURFACEFORMAT_R64G64B64A64_FLOAT 0x197 +#define BRW_SURFACEFORMAT_R64G64B64_FLOAT 0x198 +#define BRW_SURFACEFORMAT_BC4_SNORM 0x199 +#define BRW_SURFACEFORMAT_BC5_SNORM 0x19A +#define BRW_SURFACEFORMAT_R16G16B16_UNORM 0x19C +#define BRW_SURFACEFORMAT_R16G16B16_SNORM 0x19D +#define BRW_SURFACEFORMAT_R16G16B16_SSCALED 0x19E +#define BRW_SURFACEFORMAT_R16G16B16_USCALED 0x19F +#define BRW_SURFACEFORMAT_INVALID 0xFFF + +#define BRW_SURFACERETURNFORMAT_FLOAT32 0 +#define BRW_SURFACERETURNFORMAT_S1 1 + +#define BRW_SURFACE_1D 0 +#define BRW_SURFACE_2D 1 +#define BRW_SURFACE_3D 2 +#define BRW_SURFACE_CUBE 3 +#define BRW_SURFACE_BUFFER 4 +#define BRW_SURFACE_NULL 7 + +#define BRW_TEXCOORDMODE_WRAP 0 +#define BRW_TEXCOORDMODE_MIRROR 1 +#define BRW_TEXCOORDMODE_CLAMP 2 +#define BRW_TEXCOORDMODE_CUBE 3 +#define BRW_TEXCOORDMODE_CLAMP_BORDER 4 +#define BRW_TEXCOORDMODE_MIRROR_ONCE 5 + +#define BRW_THREAD_PRIORITY_NORMAL 0 +#define BRW_THREAD_PRIORITY_HIGH 1 + +#define BRW_TILEWALK_XMAJOR 0 +#define BRW_TILEWALK_YMAJOR 1 + +#define BRW_VERTEX_SUBPIXEL_PRECISION_8BITS 0 +#define BRW_VERTEX_SUBPIXEL_PRECISION_4BITS 1 + +/* Execution Unit (EU) defines + */ + +#define BRW_ALIGN_1 0 +#define BRW_ALIGN_16 1 + +#define BRW_ADDRESS_DIRECT 0 +#define BRW_ADDRESS_REGISTER_INDIRECT_REGISTER 1 + +#define BRW_CHANNEL_X 0 +#define BRW_CHANNEL_Y 1 +#define BRW_CHANNEL_Z 2 +#define BRW_CHANNEL_W 3 + +#define BRW_COMPRESSION_NONE 0 +#define BRW_COMPRESSION_2NDHALF 1 +#define BRW_COMPRESSION_COMPRESSED 2 + +#define BRW_CONDITIONAL_NONE 0 +#define BRW_CONDITIONAL_Z 1 +#define BRW_CONDITIONAL_NZ 2 +#define BRW_CONDITIONAL_EQ 1 /* Z */ +#define BRW_CONDITIONAL_NEQ 2 /* NZ */ +#define BRW_CONDITIONAL_G 3 +#define BRW_CONDITIONAL_GE 4 +#define BRW_CONDITIONAL_L 5 +#define BRW_CONDITIONAL_LE 6 +#define BRW_CONDITIONAL_R 7 +#define BRW_CONDITIONAL_O 8 +#define BRW_CONDITIONAL_U 9 + +#define BRW_DEBUG_NONE 0 +#define BRW_DEBUG_BREAKPOINT 1 + +#define BRW_DEPENDENCY_NORMAL 0 +#define BRW_DEPENDENCY_NOTCLEARED 1 +#define BRW_DEPENDENCY_NOTCHECKED 2 +#define BRW_DEPENDENCY_DISABLE 3 + +#define BRW_EXECUTE_1 0 +#define BRW_EXECUTE_2 1 +#define BRW_EXECUTE_4 2 +#define BRW_EXECUTE_8 3 +#define BRW_EXECUTE_16 4 +#define BRW_EXECUTE_32 5 + +#define BRW_HORIZONTAL_STRIDE_0 0 +#define BRW_HORIZONTAL_STRIDE_1 1 +#define BRW_HORIZONTAL_STRIDE_2 2 +#define BRW_HORIZONTAL_STRIDE_4 3 + +#define BRW_INSTRUCTION_NORMAL 0 +#define BRW_INSTRUCTION_SATURATE 1 + +#define BRW_MASK_ENABLE 0 +#define BRW_MASK_DISABLE 1 + +#define BRW_OPCODE_MOV 1 +#define BRW_OPCODE_SEL 2 +#define BRW_OPCODE_NOT 4 +#define BRW_OPCODE_AND 5 +#define BRW_OPCODE_OR 6 +#define BRW_OPCODE_XOR 7 +#define BRW_OPCODE_SHR 8 +#define BRW_OPCODE_SHL 9 +#define BRW_OPCODE_RSR 10 +#define BRW_OPCODE_RSL 11 +#define BRW_OPCODE_ASR 12 +#define BRW_OPCODE_CMP 16 +#define BRW_OPCODE_CMPN 17 +#define BRW_OPCODE_JMPI 32 +#define BRW_OPCODE_IF 34 +#define BRW_OPCODE_IFF 35 +#define BRW_OPCODE_ELSE 36 +#define BRW_OPCODE_ENDIF 37 +#define BRW_OPCODE_DO 38 +#define BRW_OPCODE_WHILE 39 +#define BRW_OPCODE_BREAK 40 +#define BRW_OPCODE_CONTINUE 41 +#define BRW_OPCODE_HALT 42 +#define BRW_OPCODE_MSAVE 44 +#define BRW_OPCODE_MRESTORE 45 +#define BRW_OPCODE_PUSH 46 +#define BRW_OPCODE_POP 47 +#define BRW_OPCODE_WAIT 48 +#define BRW_OPCODE_SEND 49 +#define BRW_OPCODE_ADD 64 +#define BRW_OPCODE_MUL 65 +#define BRW_OPCODE_AVG 66 +#define BRW_OPCODE_FRC 67 +#define BRW_OPCODE_RNDU 68 +#define BRW_OPCODE_RNDD 69 +#define BRW_OPCODE_RNDE 70 +#define BRW_OPCODE_RNDZ 71 +#define BRW_OPCODE_MAC 72 +#define BRW_OPCODE_MACH 73 +#define BRW_OPCODE_LZD 74 +#define BRW_OPCODE_SAD2 80 +#define BRW_OPCODE_SADA2 81 +#define BRW_OPCODE_DP4 84 +#define BRW_OPCODE_DPH 85 +#define BRW_OPCODE_DP3 86 +#define BRW_OPCODE_DP2 87 +#define BRW_OPCODE_DPA2 88 +#define BRW_OPCODE_LINE 89 +#define BRW_OPCODE_NOP 126 + +#define BRW_PREDICATE_NONE 0 +#define BRW_PREDICATE_NORMAL 1 +#define BRW_PREDICATE_ALIGN1_ANYV 2 +#define BRW_PREDICATE_ALIGN1_ALLV 3 +#define BRW_PREDICATE_ALIGN1_ANY2H 4 +#define BRW_PREDICATE_ALIGN1_ALL2H 5 +#define BRW_PREDICATE_ALIGN1_ANY4H 6 +#define BRW_PREDICATE_ALIGN1_ALL4H 7 +#define BRW_PREDICATE_ALIGN1_ANY8H 8 +#define BRW_PREDICATE_ALIGN1_ALL8H 9 +#define BRW_PREDICATE_ALIGN1_ANY16H 10 +#define BRW_PREDICATE_ALIGN1_ALL16H 11 +#define BRW_PREDICATE_ALIGN16_REPLICATE_X 2 +#define BRW_PREDICATE_ALIGN16_REPLICATE_Y 3 +#define BRW_PREDICATE_ALIGN16_REPLICATE_Z 4 +#define BRW_PREDICATE_ALIGN16_REPLICATE_W 5 +#define BRW_PREDICATE_ALIGN16_ANY4H 6 +#define BRW_PREDICATE_ALIGN16_ALL4H 7 + +#define BRW_ARCHITECTURE_REGISTER_FILE 0 +#define BRW_GENERAL_REGISTER_FILE 1 +#define BRW_MESSAGE_REGISTER_FILE 2 +#define BRW_IMMEDIATE_VALUE 3 + +#define BRW_REGISTER_TYPE_UD 0 +#define BRW_REGISTER_TYPE_D 1 +#define BRW_REGISTER_TYPE_UW 2 +#define BRW_REGISTER_TYPE_W 3 +#define BRW_REGISTER_TYPE_UB 4 +#define BRW_REGISTER_TYPE_B 5 +#define BRW_REGISTER_TYPE_VF 5 /* packed float vector, immediates only? */ +#define BRW_REGISTER_TYPE_HF 6 +#define BRW_REGISTER_TYPE_V 6 /* packed int vector, immediates only, uword dest only */ +#define BRW_REGISTER_TYPE_F 7 + +#define BRW_ARF_NULL 0x00 +#define BRW_ARF_ADDRESS 0x10 +#define BRW_ARF_ACCUMULATOR 0x20 +#define BRW_ARF_FLAG 0x30 +#define BRW_ARF_MASK 0x40 +#define BRW_ARF_MASK_STACK 0x50 +#define BRW_ARF_MASK_STACK_DEPTH 0x60 +#define BRW_ARF_STATE 0x70 +#define BRW_ARF_CONTROL 0x80 +#define BRW_ARF_NOTIFICATION_COUNT 0x90 +#define BRW_ARF_IP 0xA0 + +#define BRW_AMASK 0 +#define BRW_IMASK 1 +#define BRW_LMASK 2 +#define BRW_CMASK 3 + + + +#define BRW_THREAD_NORMAL 0 +#define BRW_THREAD_ATOMIC 1 +#define BRW_THREAD_SWITCH 2 + +#define BRW_VERTICAL_STRIDE_0 0 +#define BRW_VERTICAL_STRIDE_1 1 +#define BRW_VERTICAL_STRIDE_2 2 +#define BRW_VERTICAL_STRIDE_4 3 +#define BRW_VERTICAL_STRIDE_8 4 +#define BRW_VERTICAL_STRIDE_16 5 +#define BRW_VERTICAL_STRIDE_32 6 +#define BRW_VERTICAL_STRIDE_64 7 +#define BRW_VERTICAL_STRIDE_128 8 +#define BRW_VERTICAL_STRIDE_256 9 +#define BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL 0xF + +#define BRW_WIDTH_1 0 +#define BRW_WIDTH_2 1 +#define BRW_WIDTH_4 2 +#define BRW_WIDTH_8 3 +#define BRW_WIDTH_16 4 + +#define BRW_STATELESS_BUFFER_BOUNDARY_1K 0 +#define BRW_STATELESS_BUFFER_BOUNDARY_2K 1 +#define BRW_STATELESS_BUFFER_BOUNDARY_4K 2 +#define BRW_STATELESS_BUFFER_BOUNDARY_8K 3 +#define BRW_STATELESS_BUFFER_BOUNDARY_16K 4 +#define BRW_STATELESS_BUFFER_BOUNDARY_32K 5 +#define BRW_STATELESS_BUFFER_BOUNDARY_64K 6 +#define BRW_STATELESS_BUFFER_BOUNDARY_128K 7 +#define BRW_STATELESS_BUFFER_BOUNDARY_256K 8 +#define BRW_STATELESS_BUFFER_BOUNDARY_512K 9 +#define BRW_STATELESS_BUFFER_BOUNDARY_1M 10 +#define BRW_STATELESS_BUFFER_BOUNDARY_2M 11 + +#define BRW_POLYGON_FACING_FRONT 0 +#define BRW_POLYGON_FACING_BACK 1 + +#define BRW_MESSAGE_TARGET_NULL 0 +#define BRW_MESSAGE_TARGET_MATH 1 +#define BRW_MESSAGE_TARGET_SAMPLER 2 +#define BRW_MESSAGE_TARGET_GATEWAY 3 +#define BRW_MESSAGE_TARGET_DATAPORT_READ 4 +#define BRW_MESSAGE_TARGET_DATAPORT_WRITE 5 +#define BRW_MESSAGE_TARGET_URB 6 +#define BRW_MESSAGE_TARGET_THREAD_SPAWNER 7 + +#define BRW_SAMPLER_RETURN_FORMAT_FLOAT32 0 +#define BRW_SAMPLER_RETURN_FORMAT_UINT32 2 +#define BRW_SAMPLER_RETURN_FORMAT_SINT32 3 + +#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE 0 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE 0 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS 0 +#define BRW_SAMPLER_MESSAGE_SIMD8_KILLPIX 1 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD 1 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD 1 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS 2 +#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS 2 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE 0 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE 2 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO 2 +#define BRW_SAMPLER_MESSAGE_SIMD8_RESINFO 2 +#define BRW_SAMPLER_MESSAGE_SIMD16_RESINFO 2 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_LD 3 +#define BRW_SAMPLER_MESSAGE_SIMD8_LD 3 +#define BRW_SAMPLER_MESSAGE_SIMD16_LD 3 + +#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_IGDNG 0 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_IGDNG 0 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_IGDNG 0 +#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_IGDNG 1 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_BIAS_IGDNG 1 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS_IGDNG 1 +#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_IGDNG 2 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD_IGDNG 2 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD_IGDNG 2 +#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_COMPARE_IGDNG 3 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE_IGDNG 3 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE_IGDNG 3 + +/* for IGDNG only */ +#define BRW_SAMPLER_SIMD_MODE_SIMD4X2 0 +#define BRW_SAMPLER_SIMD_MODE_SIMD8 1 +#define BRW_SAMPLER_SIMD_MODE_SIMD16 2 +#define BRW_SAMPLER_SIMD_MODE_SIMD32_64 3 + +#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW 0 +#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDHIGH 1 +#define BRW_DATAPORT_OWORD_BLOCK_2_OWORDS 2 +#define BRW_DATAPORT_OWORD_BLOCK_4_OWORDS 3 +#define BRW_DATAPORT_OWORD_BLOCK_8_OWORDS 4 + +#define BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD 0 +#define BRW_DATAPORT_OWORD_DUAL_BLOCK_4OWORDS 2 + +#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS 2 +#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS 3 + +#define BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ 0 +#define BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 1 +#define BRW_DATAPORT_READ_MESSAGE_DWORD_BLOCK_READ 2 +#define BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 3 + +#define BRW_DATAPORT_READ_TARGET_DATA_CACHE 0 +#define BRW_DATAPORT_READ_TARGET_RENDER_CACHE 1 +#define BRW_DATAPORT_READ_TARGET_SAMPLER_CACHE 2 + +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE 0 +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED 1 +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01 2 +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23 3 +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01 4 + +#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 0 +#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 1 +#define BRW_DATAPORT_WRITE_MESSAGE_DWORD_BLOCK_WRITE 2 +#define BRW_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE 3 +#define BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE 4 +#define BRW_DATAPORT_WRITE_MESSAGE_STREAMED_VERTEX_BUFFER_WRITE 5 +#define BRW_DATAPORT_WRITE_MESSAGE_FLUSH_RENDER_CACHE 7 + +#define BRW_MATH_FUNCTION_INV 1 +#define BRW_MATH_FUNCTION_LOG 2 +#define BRW_MATH_FUNCTION_EXP 3 +#define BRW_MATH_FUNCTION_SQRT 4 +#define BRW_MATH_FUNCTION_RSQ 5 +#define BRW_MATH_FUNCTION_SIN 6 /* was 7 */ +#define BRW_MATH_FUNCTION_COS 7 /* was 8 */ +#define BRW_MATH_FUNCTION_SINCOS 8 /* was 6 */ +#define BRW_MATH_FUNCTION_TAN 9 +#define BRW_MATH_FUNCTION_POW 10 +#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER 11 +#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT 12 +#define BRW_MATH_FUNCTION_INT_DIV_REMAINDER 13 + +#define BRW_MATH_INTEGER_UNSIGNED 0 +#define BRW_MATH_INTEGER_SIGNED 1 + +#define BRW_MATH_PRECISION_FULL 0 +#define BRW_MATH_PRECISION_PARTIAL 1 + +#define BRW_MATH_SATURATE_NONE 0 +#define BRW_MATH_SATURATE_SATURATE 1 + +#define BRW_MATH_DATA_VECTOR 0 +#define BRW_MATH_DATA_SCALAR 1 + +#define BRW_URB_OPCODE_WRITE 0 + +#define BRW_URB_SWIZZLE_NONE 0 +#define BRW_URB_SWIZZLE_INTERLEAVE 1 +#define BRW_URB_SWIZZLE_TRANSPOSE 2 + +#define BRW_SCRATCH_SPACE_SIZE_1K 0 +#define BRW_SCRATCH_SPACE_SIZE_2K 1 +#define BRW_SCRATCH_SPACE_SIZE_4K 2 +#define BRW_SCRATCH_SPACE_SIZE_8K 3 +#define BRW_SCRATCH_SPACE_SIZE_16K 4 +#define BRW_SCRATCH_SPACE_SIZE_32K 5 +#define BRW_SCRATCH_SPACE_SIZE_64K 6 +#define BRW_SCRATCH_SPACE_SIZE_128K 7 +#define BRW_SCRATCH_SPACE_SIZE_256K 8 +#define BRW_SCRATCH_SPACE_SIZE_512K 9 +#define BRW_SCRATCH_SPACE_SIZE_1M 10 +#define BRW_SCRATCH_SPACE_SIZE_2M 11 + + + + +#define CMD_URB_FENCE 0x6000 +#define CMD_CS_URB_STATE 0x6001 +#define CMD_CONST_BUFFER 0x6002 + +#define CMD_STATE_BASE_ADDRESS 0x6101 +#define CMD_STATE_INSN_POINTER 0x6102 +#define CMD_PIPELINE_SELECT_965 0x6104 +#define CMD_PIPELINE_SELECT_GM45 0x6904 + +#define CMD_PIPELINED_STATE_POINTERS 0x7800 +#define CMD_BINDING_TABLE_PTRS 0x7801 + +#define CMD_VERTEX_BUFFER 0x7808 +# define BRW_VB0_INDEX_SHIFT 27 +# define BRW_VB0_ACCESS_VERTEXDATA (0 << 26) +# define BRW_VB0_ACCESS_INSTANCEDATA (1 << 26) +# define BRW_VB0_PITCH_SHIFT 0 + +#define CMD_VERTEX_ELEMENT 0x7809 +# define BRW_VE0_INDEX_SHIFT 27 +# define BRW_VE0_FORMAT_SHIFT 16 +# define BRW_VE0_VALID (1 << 26) +# define BRW_VE0_SRC_OFFSET_SHIFT 0 +# define BRW_VE1_COMPONENT_NOSTORE 0 +# define BRW_VE1_COMPONENT_STORE_SRC 1 +# define BRW_VE1_COMPONENT_STORE_0 2 +# define BRW_VE1_COMPONENT_STORE_1_FLT 3 +# define BRW_VE1_COMPONENT_STORE_1_INT 4 +# define BRW_VE1_COMPONENT_STORE_VID 5 +# define BRW_VE1_COMPONENT_STORE_IID 6 +# define BRW_VE1_COMPONENT_STORE_PID 7 +# define BRW_VE1_COMPONENT_0_SHIFT 28 +# define BRW_VE1_COMPONENT_1_SHIFT 24 +# define BRW_VE1_COMPONENT_2_SHIFT 20 +# define BRW_VE1_COMPONENT_3_SHIFT 16 +# define BRW_VE1_DST_OFFSET_SHIFT 0 + +#define CMD_INDEX_BUFFER 0x780a +#define CMD_VF_STATISTICS_965 0x780b +#define CMD_VF_STATISTICS_GM45 0x680b + +#define CMD_DRAW_RECT 0x7900 +#define CMD_BLEND_CONSTANT_COLOR 0x7901 +#define CMD_CHROMA_KEY 0x7904 +#define CMD_DEPTH_BUFFER 0x7905 +#define CMD_POLY_STIPPLE_OFFSET 0x7906 +#define CMD_POLY_STIPPLE_PATTERN 0x7907 +#define CMD_LINE_STIPPLE_PATTERN 0x7908 +#define CMD_GLOBAL_DEPTH_OFFSET_CLAMP 0x7909 +#define CMD_AA_LINE_PARAMETERS 0x790a + +#define CMD_PIPE_CONTROL 0x7a00 + +#define CMD_3D_PRIM 0x7b00 + +#define CMD_MI_FLUSH 0x0200 + + +/* Various values from the R0 vertex header: + */ +#define R02_PRIM_END 0x1 +#define R02_PRIM_START 0x2 + +#define URB_SIZES(brw) (BRW_IS_IGDNG(brw) ? 1024 : \ + (BRW_IS_G4X(brw) ? 384 : 256)) /* 512 bit units */ + + + +#endif diff --git a/src/gallium/drivers/i965/brw_disasm.c b/src/gallium/drivers/i965/brw_disasm.c new file mode 100644 index 00000000000..65db27248b1 --- /dev/null +++ b/src/gallium/drivers/i965/brw_disasm.c @@ -0,0 +1,922 @@ +/* + * Copyright © 2008 Keith Packard + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that copyright + * notice and this permission notice appear in supporting documentation, and + * that the name of the copyright holders not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. The copyright holders make no representations + * about the suitability of this software for any purpose. It is provided "as + * is" without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO + * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR + * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, + * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER + * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE + * OF THIS SOFTWARE. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <getopt.h> +#include <unistd.h> +#include <stdarg.h> + +#include "brw_disasm.h" +#include "brw_structs.h" +#include "brw_reg.h" +#include "brw_defines.h" + +struct { + char *name; + int nsrc; + int ndst; +} opcode[128] = { + [BRW_OPCODE_MOV] = { .name = "mov", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_FRC] = { .name = "frc", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_RNDU] = { .name = "rndu", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_RNDD] = { .name = "rndd", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_RNDE] = { .name = "rnde", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_RNDZ] = { .name = "rndz", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_NOT] = { .name = "not", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_LZD] = { .name = "lzd", .nsrc = 1, .ndst = 1 }, + + [BRW_OPCODE_MUL] = { .name = "mul", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_MAC] = { .name = "mac", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_MACH] = { .name = "mach", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_LINE] = { .name = "line", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_SAD2] = { .name = "sad2", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_SADA2] = { .name = "sada2", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_DP4] = { .name = "dp4", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_DPH] = { .name = "dph", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_DP3] = { .name = "dp3", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_DP2] = { .name = "dp2", .nsrc = 2, .ndst = 1 }, + + [BRW_OPCODE_AVG] = { .name = "avg", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_ADD] = { .name = "add", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_SEL] = { .name = "sel", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_AND] = { .name = "and", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_OR] = { .name = "or", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_XOR] = { .name = "xor", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_SHR] = { .name = "shr", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_SHL] = { .name = "shl", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_ASR] = { .name = "asr", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_CMP] = { .name = "cmp", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_CMPN] = { .name = "cmpn", .nsrc = 2, .ndst = 1 }, + + [BRW_OPCODE_SEND] = { .name = "send", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_NOP] = { .name = "nop", .nsrc = 0, .ndst = 0 }, + [BRW_OPCODE_JMPI] = { .name = "jmpi", .nsrc = 1, .ndst = 0 }, + [BRW_OPCODE_IF] = { .name = "if", .nsrc = 2, .ndst = 0 }, + [BRW_OPCODE_IFF] = { .name = "iff", .nsrc = 1, .ndst = 01 }, + [BRW_OPCODE_WHILE] = { .name = "while", .nsrc = 1, .ndst = 0 }, + [BRW_OPCODE_ELSE] = { .name = "else", .nsrc = 2, .ndst = 0 }, + [BRW_OPCODE_BREAK] = { .name = "break", .nsrc = 1, .ndst = 0 }, + [BRW_OPCODE_CONTINUE] = { .name = "cont", .nsrc = 1, .ndst = 0 }, + [BRW_OPCODE_HALT] = { .name = "halt", .nsrc = 1, .ndst = 0 }, + [BRW_OPCODE_MSAVE] = { .name = "msave", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_PUSH] = { .name = "push", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_MRESTORE] = { .name = "mrest", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_POP] = { .name = "pop", .nsrc = 2, .ndst = 0 }, + [BRW_OPCODE_WAIT] = { .name = "wait", .nsrc = 1, .ndst = 0 }, + [BRW_OPCODE_DO] = { .name = "do", .nsrc = 0, .ndst = 0 }, + [BRW_OPCODE_ENDIF] = { .name = "endif", .nsrc = 2, .ndst = 0 }, +}; + +char *conditional_modifier[16] = { + [BRW_CONDITIONAL_NONE] = "", + [BRW_CONDITIONAL_Z] = ".e", + [BRW_CONDITIONAL_NZ] = ".ne", + [BRW_CONDITIONAL_G] = ".g", + [BRW_CONDITIONAL_GE] = ".ge", + [BRW_CONDITIONAL_L] = ".l", + [BRW_CONDITIONAL_LE] = ".le", + [BRW_CONDITIONAL_R] = ".r", + [BRW_CONDITIONAL_O] = ".o", + [BRW_CONDITIONAL_U] = ".u", +}; + +char *negate[2] = { + [0] = "", + [1] = "-", +}; + +char *_abs[2] = { + [0] = "", + [1] = "(abs)", +}; + +char *vert_stride[16] = { + [0] = "0", + [1] = "1", + [2] = "2", + [3] = "4", + [4] = "8", + [5] = "16", + [6] = "32", + [15] = "VxH", +}; + +char *width[8] = { + [0] = "1", + [1] = "2", + [2] = "4", + [3] = "8", + [4] = "16", +}; + +char *horiz_stride[4] = { + [0] = "0", + [1] = "1", + [2] = "2", + [3] = "4" +}; + +char *chan_sel[4] = { + [0] = "x", + [1] = "y", + [2] = "z", + [3] = "w", +}; + +char *dest_condmod[16] = { + [0] = NULL +}; + +char *debug_ctrl[2] = { + [0] = "", + [1] = ".breakpoint" +}; + +char *saturate[2] = { + [0] = "", + [1] = ".sat" +}; + +char *exec_size[8] = { + [0] = "1", + [1] = "2", + [2] = "4", + [3] = "8", + [4] = "16", + [5] = "32" +}; + +char *pred_inv[2] = { + [0] = "+", + [1] = "-" +}; + +char *pred_ctrl_align16[16] = { + [1] = "", + [2] = ".x", + [3] = ".y", + [4] = ".z", + [5] = ".w", + [6] = ".any4h", + [7] = ".all4h", +}; + +char *pred_ctrl_align1[16] = { + [1] = "", + [2] = ".anyv", + [3] = ".allv", + [4] = ".any2h", + [5] = ".all2h", + [6] = ".any4h", + [7] = ".all4h", + [8] = ".any8h", + [9] = ".all8h", + [10] = ".any16h", + [11] = ".all16h", +}; + +char *thread_ctrl[4] = { + [0] = "", + [2] = "switch" +}; + +char *compr_ctrl[4] = { + [0] = "", + [1] = "sechalf", + [2] = "compr", +}; + +char *dep_ctrl[4] = { + [0] = "", + [1] = "NoDDClr", + [2] = "NoDDChk", + [3] = "NoDDClr,NoDDChk", +}; + +char *mask_ctrl[4] = { + [0] = "", + [1] = "nomask", +}; + +char *access_mode[2] = { + [0] = "align1", + [1] = "align16", +}; + +char *reg_encoding[8] = { + [0] = "UD", + [1] = "D", + [2] = "UW", + [3] = "W", + [4] = "UB", + [5] = "B", + [7] = "F" +}; + +char *imm_encoding[8] = { + [0] = "UD", + [1] = "D", + [2] = "UW", + [3] = "W", + [5] = "VF", + [5] = "V", + [7] = "F" +}; + +char *reg_file[4] = { + [0] = "A", + [1] = "g", + [2] = "m", + [3] = "imm", +}; + +char *writemask[16] = { + [0x0] = ".", + [0x1] = ".x", + [0x2] = ".y", + [0x3] = ".xy", + [0x4] = ".z", + [0x5] = ".xz", + [0x6] = ".yz", + [0x7] = ".xyz", + [0x8] = ".w", + [0x9] = ".xw", + [0xa] = ".yw", + [0xb] = ".xyw", + [0xc] = ".zw", + [0xd] = ".xzw", + [0xe] = ".yzw", + [0xf] = "", +}; + +char *end_of_thread[2] = { + [0] = "", + [1] = "EOT" +}; + +char *target_function[16] = { + [BRW_MESSAGE_TARGET_NULL] = "null", + [BRW_MESSAGE_TARGET_MATH] = "math", + [BRW_MESSAGE_TARGET_SAMPLER] = "sampler", + [BRW_MESSAGE_TARGET_GATEWAY] = "gateway", + [BRW_MESSAGE_TARGET_DATAPORT_READ] = "read", + [BRW_MESSAGE_TARGET_DATAPORT_WRITE] = "write", + [BRW_MESSAGE_TARGET_URB] = "urb", + [BRW_MESSAGE_TARGET_THREAD_SPAWNER] = "thread_spawner" +}; + +char *math_function[16] = { + [BRW_MATH_FUNCTION_INV] = "inv", + [BRW_MATH_FUNCTION_LOG] = "log", + [BRW_MATH_FUNCTION_EXP] = "exp", + [BRW_MATH_FUNCTION_SQRT] = "sqrt", + [BRW_MATH_FUNCTION_RSQ] = "rsq", + [BRW_MATH_FUNCTION_SIN] = "sin", + [BRW_MATH_FUNCTION_COS] = "cos", + [BRW_MATH_FUNCTION_SINCOS] = "sincos", + [BRW_MATH_FUNCTION_TAN] = "tan", + [BRW_MATH_FUNCTION_POW] = "pow", + [BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER] = "intdivmod", + [BRW_MATH_FUNCTION_INT_DIV_QUOTIENT] = "intmod", + [BRW_MATH_FUNCTION_INT_DIV_REMAINDER] = "intdiv", +}; + +char *math_saturate[2] = { + [0] = "", + [1] = "sat" +}; + +char *math_signed[2] = { + [0] = "", + [1] = "signed" +}; + +char *math_scalar[2] = { + [0] = "", + [1] = "scalar" +}; + +char *math_precision[2] = { + [0] = "", + [1] = "partial_precision" +}; + +char *urb_swizzle[4] = { + [BRW_URB_SWIZZLE_NONE] = "", + [BRW_URB_SWIZZLE_INTERLEAVE] = "interleave", + [BRW_URB_SWIZZLE_TRANSPOSE] = "transpose", +}; + +char *urb_allocate[2] = { + [0] = "", + [1] = "allocate" +}; + +char *urb_used[2] = { + [0] = "", + [1] = "used" +}; + +char *urb_complete[2] = { + [0] = "", + [1] = "complete" +}; + +char *sampler_target_format[4] = { + [0] = "F", + [2] = "UD", + [3] = "D" +}; + + +static int column; + +static int string (FILE *file, char *string) +{ + fputs (string, file); + column += strlen (string); + return 0; +} + +static int format (FILE *f, char *format, ...) +{ + char buf[1024]; + va_list args; + va_start (args, format); + + vsnprintf (buf, sizeof (buf) - 1, format, args); + string (f, buf); + return 0; +} + +static int newline (FILE *f) +{ + putc ('\n', f); + column = 0; + return 0; +} + +static int pad (FILE *f, int c) +{ + do + string (f, " "); + while (column < c); + return 0; +} + +static int control (FILE *file, char *name, char *ctrl[], GLuint id, int *space) +{ + if (!ctrl[id]) { + fprintf (file, "*** invalid %s value %d ", + name, id); + return 1; + } + if (ctrl[id][0]) + { + if (space && *space) + string (file, " "); + string (file, ctrl[id]); + if (space) + *space = 1; + } + return 0; +} + +static int print_opcode (FILE *file, int id) +{ + if (!opcode[id].name) { + format (file, "*** invalid opcode value %d ", id); + return 1; + } + string (file, opcode[id].name); + return 0; +} + +static int reg (FILE *file, GLuint _reg_file, GLuint _reg_nr) +{ + int err = 0; + if (_reg_file == BRW_ARCHITECTURE_REGISTER_FILE) { + switch (_reg_nr & 0xf0) { + case BRW_ARF_NULL: + string (file, "null"); + return -1; + case BRW_ARF_ADDRESS: + format (file, "a%d", _reg_nr & 0x0f); + break; + case BRW_ARF_ACCUMULATOR: + format (file, "acc%d", _reg_nr & 0x0f); + break; + case BRW_ARF_MASK: + format (file, "mask%d", _reg_nr & 0x0f); + break; + case BRW_ARF_MASK_STACK: + format (file, "msd%d", _reg_nr & 0x0f); + break; + case BRW_ARF_STATE: + format (file, "sr%d", _reg_nr & 0x0f); + break; + case BRW_ARF_CONTROL: + format (file, "cr%d", _reg_nr & 0x0f); + break; + case BRW_ARF_NOTIFICATION_COUNT: + format (file, "n%d", _reg_nr & 0x0f); + break; + case BRW_ARF_IP: + string (file, "ip"); + return -1; + break; + default: + format (file, "ARF%d", _reg_nr); + break; + } + } else { + err |= control (file, "src reg file", reg_file, _reg_file, NULL); + format (file, "%d", _reg_nr); + } + return err; +} + +static int dest (FILE *file, const struct brw_instruction *inst) +{ + int err = 0; + + if (inst->header.access_mode == BRW_ALIGN_1) + { + if (inst->bits1.da1.dest_address_mode == BRW_ADDRESS_DIRECT) + { + err |= reg (file, inst->bits1.da1.dest_reg_file, inst->bits1.da1.dest_reg_nr); + if (err == -1) + return 0; + if (inst->bits1.da1.dest_subreg_nr) + format (file, ".%d", inst->bits1.da1.dest_subreg_nr); + format (file, "<%d>", inst->bits1.da1.dest_horiz_stride); + err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.da1.dest_reg_type, NULL); + } + else + { + string (file, "g[a0"); + if (inst->bits1.ia1.dest_subreg_nr) + format (file, ".%d", inst->bits1.ia1.dest_subreg_nr); + if (inst->bits1.ia1.dest_indirect_offset) + format (file, " %d", inst->bits1.ia1.dest_indirect_offset); + string (file, "]"); + format (file, "<%d>", inst->bits1.ia1.dest_horiz_stride); + err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.ia1.dest_reg_type, NULL); + } + } + else + { + if (inst->bits1.da16.dest_address_mode == BRW_ADDRESS_DIRECT) + { + err |= reg (file, inst->bits1.da16.dest_reg_file, inst->bits1.da16.dest_reg_nr); + if (err == -1) + return 0; + if (inst->bits1.da16.dest_subreg_nr) + format (file, ".%d", inst->bits1.da16.dest_subreg_nr); + string (file, "<1>"); + err |= control (file, "writemask", writemask, inst->bits1.da16.dest_writemask, NULL); + err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.da16.dest_reg_type, NULL); + } + else + { + err = 1; + string (file, "Indirect align16 address mode not supported"); + } + } + + return 0; +} + +static int src_align1_region (FILE *file, + GLuint _vert_stride, GLuint _width, GLuint _horiz_stride) +{ + int err = 0; + string (file, "<"); + err |= control (file, "vert stride", vert_stride, _vert_stride, NULL); + string (file, ","); + err |= control (file, "width", width, _width, NULL); + string (file, ","); + err |= control (file, "horiz_stride", horiz_stride, _horiz_stride, NULL); + string (file, ">"); + return err; +} + +static int src_da1 (FILE *file, GLuint type, GLuint _reg_file, + GLuint _vert_stride, GLuint _width, GLuint _horiz_stride, + GLuint reg_num, GLuint sub_reg_num, GLuint __abs, GLuint _negate) +{ + int err = 0; + err |= control (file, "negate", negate, _negate, NULL); + err |= control (file, "abs", _abs, __abs, NULL); + + err |= reg (file, _reg_file, reg_num); + if (err == -1) + return 0; + if (sub_reg_num) + format (file, ".%d", sub_reg_num); + src_align1_region (file, _vert_stride, _width, _horiz_stride); + err |= control (file, "src reg encoding", reg_encoding, type, NULL); + return err; +} + +static int src_ia1 (FILE *file, + GLuint type, + GLuint _reg_file, + GLint _addr_imm, + GLuint _addr_subreg_nr, + GLuint _negate, + GLuint __abs, + GLuint _addr_mode, + GLuint _horiz_stride, + GLuint _width, + GLuint _vert_stride) +{ + int err = 0; + err |= control (file, "negate", negate, _negate, NULL); + err |= control (file, "abs", _abs, __abs, NULL); + + string (file, "g[a0"); + if (_addr_subreg_nr) + format (file, ".%d", _addr_subreg_nr); + if (_addr_imm) + format (file, " %d", _addr_imm); + string (file, "]"); + src_align1_region (file, _vert_stride, _width, _horiz_stride); + err |= control (file, "src reg encoding", reg_encoding, type, NULL); + return err; +} + +static int src_da16 (FILE *file, + GLuint _reg_type, + GLuint _reg_file, + GLuint _vert_stride, + GLuint _reg_nr, + GLuint _subreg_nr, + GLuint __abs, + GLuint _negate, + GLuint swz_x, + GLuint swz_y, + GLuint swz_z, + GLuint swz_w) +{ + int err = 0; + err |= control (file, "negate", negate, _negate, NULL); + err |= control (file, "abs", _abs, __abs, NULL); + + err |= reg (file, _reg_file, _reg_nr); + if (err == -1) + return 0; + if (_subreg_nr) + format (file, ".%d", _subreg_nr); + string (file, "<"); + err |= control (file, "vert stride", vert_stride, _vert_stride, NULL); + string (file, ",1,1>"); + err |= control (file, "src da16 reg type", reg_encoding, _reg_type, NULL); + /* + * Three kinds of swizzle display: + * identity - nothing printed + * 1->all - print the single channel + * 1->1 - print the mapping + */ + if (swz_x == BRW_CHANNEL_X && + swz_y == BRW_CHANNEL_Y && + swz_z == BRW_CHANNEL_Z && + swz_w == BRW_CHANNEL_W) + { + ; + } + else if (swz_x == swz_y && swz_x == swz_z && swz_x == swz_w) + { + string (file, "."); + err |= control (file, "channel select", chan_sel, swz_x, NULL); + } + else + { + string (file, "."); + err |= control (file, "channel select", chan_sel, swz_x, NULL); + err |= control (file, "channel select", chan_sel, swz_y, NULL); + err |= control (file, "channel select", chan_sel, swz_z, NULL); + err |= control (file, "channel select", chan_sel, swz_w, NULL); + } + return err; +} + + +static int imm (FILE *file, GLuint type, const struct brw_instruction *inst) { + switch (type) { + case BRW_REGISTER_TYPE_UD: + format (file, "0x%08xUD", inst->bits3.ud); + break; + case BRW_REGISTER_TYPE_D: + format (file, "%dD", inst->bits3.d); + break; + case BRW_REGISTER_TYPE_UW: + format (file, "0x%04xUW", (uint16_t) inst->bits3.ud); + break; + case BRW_REGISTER_TYPE_W: + format (file, "%dW", (int16_t) inst->bits3.d); + break; + case BRW_REGISTER_TYPE_UB: + format (file, "0x%02xUB", (int8_t) inst->bits3.ud); + break; + case BRW_REGISTER_TYPE_VF: + format (file, "Vector Float"); + break; + case BRW_REGISTER_TYPE_V: + format (file, "0x%08xV", inst->bits3.ud); + break; + case BRW_REGISTER_TYPE_F: + format (file, "%-gF", inst->bits3.f); + } + return 0; +} + +static int src0 (FILE *file, const struct brw_instruction *inst) +{ + if (inst->bits1.da1.src0_reg_file == BRW_IMMEDIATE_VALUE) + return imm (file, inst->bits1.da1.src0_reg_type, + inst); + else if (inst->header.access_mode == BRW_ALIGN_1) + { + if (inst->bits2.da1.src0_address_mode == BRW_ADDRESS_DIRECT) + { + return src_da1 (file, + inst->bits1.da1.src0_reg_type, + inst->bits1.da1.src0_reg_file, + inst->bits2.da1.src0_vert_stride, + inst->bits2.da1.src0_width, + inst->bits2.da1.src0_horiz_stride, + inst->bits2.da1.src0_reg_nr, + inst->bits2.da1.src0_subreg_nr, + inst->bits2.da1.src0_abs, + inst->bits2.da1.src0_negate); + } + else + { + return src_ia1 (file, + inst->bits1.ia1.src0_reg_type, + inst->bits1.ia1.src0_reg_file, + inst->bits2.ia1.src0_indirect_offset, + inst->bits2.ia1.src0_subreg_nr, + inst->bits2.ia1.src0_negate, + inst->bits2.ia1.src0_abs, + inst->bits2.ia1.src0_address_mode, + inst->bits2.ia1.src0_horiz_stride, + inst->bits2.ia1.src0_width, + inst->bits2.ia1.src0_vert_stride); + } + } + else + { + if (inst->bits2.da16.src0_address_mode == BRW_ADDRESS_DIRECT) + { + return src_da16 (file, + inst->bits1.da16.src0_reg_type, + inst->bits1.da16.src0_reg_file, + inst->bits2.da16.src0_vert_stride, + inst->bits2.da16.src0_reg_nr, + inst->bits2.da16.src0_subreg_nr, + inst->bits2.da16.src0_abs, + inst->bits2.da16.src0_negate, + inst->bits2.da16.src0_swz_x, + inst->bits2.da16.src0_swz_y, + inst->bits2.da16.src0_swz_z, + inst->bits2.da16.src0_swz_w); + } + else + { + string (file, "Indirect align16 address mode not supported"); + return 1; + } + } +} + +static int src1 (FILE *file, const struct brw_instruction *inst) +{ + if (inst->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE) + return imm (file, inst->bits1.da1.src1_reg_type, + inst); + else if (inst->header.access_mode == BRW_ALIGN_1) + { + if (inst->bits3.da1.src1_address_mode == BRW_ADDRESS_DIRECT) + { + return src_da1 (file, + inst->bits1.da1.src1_reg_type, + inst->bits1.da1.src1_reg_file, + inst->bits3.da1.src1_vert_stride, + inst->bits3.da1.src1_width, + inst->bits3.da1.src1_horiz_stride, + inst->bits3.da1.src1_reg_nr, + inst->bits3.da1.src1_subreg_nr, + inst->bits3.da1.src1_abs, + inst->bits3.da1.src1_negate); + } + else + { + return src_ia1 (file, + inst->bits1.ia1.src1_reg_type, + inst->bits1.ia1.src1_reg_file, + inst->bits3.ia1.src1_indirect_offset, + inst->bits3.ia1.src1_subreg_nr, + inst->bits3.ia1.src1_negate, + inst->bits3.ia1.src1_abs, + inst->bits3.ia1.src1_address_mode, + inst->bits3.ia1.src1_horiz_stride, + inst->bits3.ia1.src1_width, + inst->bits3.ia1.src1_vert_stride); + } + } + else + { + if (inst->bits3.da16.src1_address_mode == BRW_ADDRESS_DIRECT) + { + return src_da16 (file, + inst->bits1.da16.src1_reg_type, + inst->bits1.da16.src1_reg_file, + inst->bits3.da16.src1_vert_stride, + inst->bits3.da16.src1_reg_nr, + inst->bits3.da16.src1_subreg_nr, + inst->bits3.da16.src1_abs, + inst->bits3.da16.src1_negate, + inst->bits3.da16.src1_swz_x, + inst->bits3.da16.src1_swz_y, + inst->bits3.da16.src1_swz_z, + inst->bits3.da16.src1_swz_w); + } + else + { + string (file, "Indirect align16 address mode not supported"); + return 1; + } + } +} + +int brw_disasm_insn (FILE *file, const struct brw_instruction *inst) +{ + int err = 0; + int space = 0; + + if (inst->header.predicate_control) { + string (file, "("); + err |= control (file, "predicate inverse", pred_inv, inst->header.predicate_inverse, NULL); + string (file, "f0"); + if (inst->bits2.da1.flag_reg_nr) + format (file, ".%d", inst->bits2.da1.flag_reg_nr); + if (inst->header.access_mode == BRW_ALIGN_1) + err |= control (file, "predicate control align1", pred_ctrl_align1, + inst->header.predicate_control, NULL); + else + err |= control (file, "predicate control align16", pred_ctrl_align16, + inst->header.predicate_control, NULL); + string (file, ") "); + } + + err |= print_opcode (file, inst->header.opcode); + err |= control (file, "saturate", saturate, inst->header.saturate, NULL); + err |= control (file, "debug control", debug_ctrl, inst->header.debug_control, NULL); + + if (inst->header.opcode != BRW_OPCODE_SEND) + err |= control (file, "conditional modifier", conditional_modifier, + inst->header.destreg__conditionalmod, NULL); + + if (inst->header.opcode != BRW_OPCODE_NOP) { + string (file, "("); + err |= control (file, "execution size", exec_size, inst->header.execution_size, NULL); + string (file, ")"); + } + + if (inst->header.opcode == BRW_OPCODE_SEND) + format (file, " %d", inst->header.destreg__conditionalmod); + + if (opcode[inst->header.opcode].ndst > 0) { + pad (file, 16); + err |= dest (file, inst); + } + if (opcode[inst->header.opcode].nsrc > 0) { + pad (file, 32); + err |= src0 (file, inst); + } + if (opcode[inst->header.opcode].nsrc > 1) { + pad (file, 48); + err |= src1 (file, inst); + } + + if (inst->header.opcode == BRW_OPCODE_SEND) { + newline (file); + pad (file, 16); + space = 0; + err |= control (file, "target function", target_function, + inst->bits3.generic.msg_target, &space); + switch (inst->bits3.generic.msg_target) { + case BRW_MESSAGE_TARGET_MATH: + err |= control (file, "math function", math_function, + inst->bits3.math.function, &space); + err |= control (file, "math saturate", math_saturate, + inst->bits3.math.saturate, &space); + err |= control (file, "math signed", math_signed, + inst->bits3.math.int_type, &space); + err |= control (file, "math scalar", math_scalar, + inst->bits3.math.data_type, &space); + err |= control (file, "math precision", math_precision, + inst->bits3.math.precision, &space); + break; + case BRW_MESSAGE_TARGET_SAMPLER: + format (file, " (%d, %d, ", + inst->bits3.sampler.binding_table_index, + inst->bits3.sampler.sampler); + err |= control (file, "sampler target format", sampler_target_format, + inst->bits3.sampler.return_format, NULL); + string (file, ")"); + break; + case BRW_MESSAGE_TARGET_DATAPORT_WRITE: + format (file, " (%d, %d, %d, %d)", + inst->bits3.dp_write.binding_table_index, + (inst->bits3.dp_write.pixel_scoreboard_clear << 3) | + inst->bits3.dp_write.msg_control, + inst->bits3.dp_write.msg_type, + inst->bits3.dp_write.send_commit_msg); + break; + case BRW_MESSAGE_TARGET_URB: + format (file, " %d", inst->bits3.urb.offset); + space = 1; + err |= control (file, "urb swizzle", urb_swizzle, + inst->bits3.urb.swizzle_control, &space); + err |= control (file, "urb allocate", urb_allocate, + inst->bits3.urb.allocate, &space); + err |= control (file, "urb used", urb_used, + inst->bits3.urb.used, &space); + err |= control (file, "urb complete", urb_complete, + inst->bits3.urb.complete, &space); + break; + case BRW_MESSAGE_TARGET_THREAD_SPAWNER: + break; + default: + format (file, "unsupported target %d", inst->bits3.generic.msg_target); + break; + } + if (space) + string (file, " "); + format (file, "mlen %d", + inst->bits3.generic.msg_length); + format (file, " rlen %d", + inst->bits3.generic.response_length); + } + pad (file, 64); + if (inst->header.opcode != BRW_OPCODE_NOP) { + string (file, "{"); + space = 1; + err |= control(file, "access mode", access_mode, inst->header.access_mode, &space); + err |= control (file, "mask control", mask_ctrl, inst->header.mask_control, &space); + err |= control (file, "dependency control", dep_ctrl, inst->header.dependency_control, &space); + err |= control (file, "compression control", compr_ctrl, inst->header.compression_control, &space); + err |= control (file, "thread control", thread_ctrl, inst->header.thread_control, &space); + if (inst->header.opcode == BRW_OPCODE_SEND) + err |= control (file, "end of thread", end_of_thread, + inst->bits3.generic.end_of_thread, &space); + if (space) + string (file, " "); + string (file, "}"); + } + string (file, ";"); + newline (file); + return err; +} + + +int brw_disasm (FILE *file, + const struct brw_instruction *inst, + unsigned count) +{ + int i, err; + + for (i = 0; i < count; i++) { + err = brw_disasm_insn(stderr, &inst[i]); + if (err) + return err; + } + + fprintf(file, "\n"); + return 0; +} + diff --git a/src/gallium/drivers/i965/brw_disasm.h b/src/gallium/drivers/i965/brw_disasm.h new file mode 100644 index 00000000000..77d402d35e6 --- /dev/null +++ b/src/gallium/drivers/i965/brw_disasm.h @@ -0,0 +1,34 @@ +/* + * Copyright © 2008 Keith Packard + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that copyright + * notice and this permission notice appear in supporting documentation, and + * that the name of the copyright holders not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. The copyright holders make no representations + * about the suitability of this software for any purpose. It is provided "as + * is" without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO + * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR + * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, + * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER + * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE + * OF THIS SOFTWARE. + */ + +#ifndef BRW_DISASM_H +#define BRW_DISASM_H + +struct brw_instruction; + +int brw_disasm_insn (FILE *file, const struct brw_instruction *inst); +int brw_disasm (FILE *file, + const struct brw_instruction *inst, + unsigned count); + +#endif + diff --git a/src/gallium/drivers/i965/brw_draw.c b/src/gallium/drivers/i965/brw_draw.c new file mode 100644 index 00000000000..852fd229828 --- /dev/null +++ b/src/gallium/drivers/i965/brw_draw.c @@ -0,0 +1,291 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "util/u_prim.h" +#include "util/u_upload_mgr.h" + +#include "brw_draw.h" +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_state.h" +#include "brw_debug.h" +#include "brw_screen.h" + +#include "brw_batchbuffer.h" + + +static uint32_t prim_to_hw_prim[PIPE_PRIM_POLYGON+1] = { + _3DPRIM_POINTLIST, + _3DPRIM_LINELIST, + _3DPRIM_LINELOOP, + _3DPRIM_LINESTRIP, + _3DPRIM_TRILIST, + _3DPRIM_TRISTRIP, + _3DPRIM_TRIFAN, + _3DPRIM_QUADLIST, + _3DPRIM_QUADSTRIP, + _3DPRIM_POLYGON +}; + + + +/* When the primitive changes, set a state bit and re-validate. Not + * the nicest and would rather deal with this by having all the + * programs be immune to the active primitive (ie. cope with all + * possibilities). That may not be realistic however. + */ +static int brw_set_prim(struct brw_context *brw, unsigned prim ) +{ + + if (BRW_DEBUG & DEBUG_PRIMS) + debug_printf("PRIM: %s\n", u_prim_name(prim)); + + if (prim != brw->primitive) { + unsigned reduced_prim; + + brw->primitive = prim; + brw->state.dirty.brw |= BRW_NEW_PRIMITIVE; + + reduced_prim = u_reduced_prim(prim); + if (reduced_prim != brw->reduced_primitive) { + brw->reduced_primitive = reduced_prim; + brw->state.dirty.brw |= BRW_NEW_REDUCED_PRIMITIVE; + } + } + + return prim_to_hw_prim[prim]; +} + + + +static int brw_emit_prim(struct brw_context *brw, + unsigned start, + unsigned count, + boolean indexed, + uint32_t hw_prim) +{ + struct brw_3d_primitive prim_packet; + int ret; + + if (BRW_DEBUG & DEBUG_PRIMS) + debug_printf("%s start %d count %d indexed %d hw_prim %d\n", + __FUNCTION__, start, count, indexed, hw_prim); + + prim_packet.header.opcode = CMD_3D_PRIM; + prim_packet.header.length = sizeof(prim_packet)/4 - 2; + prim_packet.header.pad = 0; + prim_packet.header.topology = hw_prim; + prim_packet.header.indexed = indexed; + + prim_packet.verts_per_instance = count; + prim_packet.start_vert_location = start; + if (indexed) + prim_packet.start_vert_location += brw->ib.start_vertex_offset; + prim_packet.instance_count = 1; + prim_packet.start_instance_location = 0; + prim_packet.base_vert_location = 0; /* prim->basevertex; XXX: add this to gallium */ + + + /* If we're set to always flush, do it before and after the primitive emit. + * We want to catch both missed flushes that hurt instruction/state cache + * and missed flushes of the render cache as it heads to other parts of + * the besides the draw code. + */ + if (0) { + BEGIN_BATCH(1, IGNORE_CLIPRECTS); + OUT_BATCH((CMD_MI_FLUSH << 16) | BRW_FLUSH_STATE_CACHE); + ADVANCE_BATCH(); + } + if (prim_packet.verts_per_instance) { + ret = brw_batchbuffer_data( brw->batch, &prim_packet, + sizeof(prim_packet), LOOP_CLIPRECTS); + if (ret) + return ret; + } + if (0) { + BEGIN_BATCH(1, IGNORE_CLIPRECTS); + OUT_BATCH((CMD_MI_FLUSH << 16) | BRW_FLUSH_STATE_CACHE); + ADVANCE_BATCH(); + } + + return 0; +} + + +/* May fail if out of video memory for texture or vbo upload, or on + * fallback conditions. + */ +static int +try_draw_range_elements(struct brw_context *brw, + struct pipe_buffer *index_buffer, + unsigned hw_prim, + unsigned start, unsigned count) +{ + int ret; + + ret = brw_validate_state(brw); + if (ret) + return ret; + + /* Check that we can fit our state in with our existing batchbuffer, or + * flush otherwise. + */ + ret = brw->sws->check_aperture_space(brw->sws, + brw->state.validated_bos, + brw->state.validated_bo_count); + if (ret) + return ret; + + ret = brw_upload_state(brw); + if (ret) + return ret; + + ret = brw_emit_prim(brw, start, count, index_buffer != NULL, hw_prim); + if (ret) + return ret; + + if (brw->flags.always_flush_batch) + brw_context_flush( brw ); + + return 0; +} + + +static boolean +brw_draw_range_elements(struct pipe_context *pipe, + struct pipe_buffer *index_buffer, + unsigned index_size, + unsigned min_index, + unsigned max_index, + unsigned mode, unsigned start, unsigned count) +{ + struct brw_context *brw = brw_context(pipe); + int ret; + uint32_t hw_prim; + + hw_prim = brw_set_prim(brw, mode); + + if (BRW_DEBUG & DEBUG_PRIMS) + debug_printf("PRIM: %s start %d count %d index_buffer %p\n", + u_prim_name(mode), start, count, (void *)index_buffer); + + /* Potentially trigger upload of new index buffer. + * + * XXX: do we need to go through state validation to achieve this? + * Could just call upload code directly. + */ + if (brw->curr.index_buffer != index_buffer || + brw->curr.index_size != index_size) { + pipe_buffer_reference( &brw->curr.index_buffer, index_buffer ); + brw->curr.index_size = index_size; + brw->state.dirty.mesa |= PIPE_NEW_INDEX_BUFFER; + } + + /* XXX: do we really care? + */ + if (brw->curr.min_index != min_index || + brw->curr.max_index != max_index) + { + brw->curr.min_index = min_index; + brw->curr.max_index = max_index; + brw->state.dirty.mesa |= PIPE_NEW_INDEX_RANGE; + } + + + /* Make a first attempt at drawing: + */ + ret = try_draw_range_elements(brw, index_buffer, hw_prim, start, count ); + + /* Otherwise, flush and retry: + */ + if (ret != 0) { + brw_context_flush( brw ); + ret = try_draw_range_elements(brw, index_buffer, hw_prim, start, count ); + assert(ret == 0); + } + + return TRUE; +} + +static boolean +brw_draw_elements(struct pipe_context *pipe, + struct pipe_buffer *index_buffer, + unsigned index_size, + unsigned mode, + unsigned start, unsigned count) +{ + return brw_draw_range_elements( pipe, index_buffer, + index_size, + 0, 0xffffffff, + mode, + start, count ); +} + +static boolean +brw_draw_arrays(struct pipe_context *pipe, unsigned mode, + unsigned start, unsigned count) +{ + return brw_draw_elements(pipe, NULL, 0, mode, start, count); +} + + + +boolean brw_draw_init( struct brw_context *brw ) +{ + /* Register our drawing function: + */ + brw->base.draw_arrays = brw_draw_arrays; + brw->base.draw_elements = brw_draw_elements; + brw->base.draw_range_elements = brw_draw_range_elements; + + /* Create helpers for uploading data in user buffers: + */ + brw->vb.upload_vertex = u_upload_create( brw->base.screen, + 128 * 1024, + 64, + PIPE_BUFFER_USAGE_VERTEX ); + if (brw->vb.upload_vertex == NULL) + return FALSE; + + brw->vb.upload_index = u_upload_create( brw->base.screen, + 32 * 1024, + 64, + PIPE_BUFFER_USAGE_INDEX ); + if (brw->vb.upload_index == NULL) + return FALSE; + + return TRUE; +} + +void brw_draw_cleanup( struct brw_context *brw ) +{ + u_upload_destroy( brw->vb.upload_vertex ); + u_upload_destroy( brw->vb.upload_index ); + + bo_reference(&brw->ib.bo, NULL); +} diff --git a/src/gallium/drivers/i965/brw_draw.h b/src/gallium/drivers/i965/brw_draw.h new file mode 100644 index 00000000000..8dc5dbce622 --- /dev/null +++ b/src/gallium/drivers/i965/brw_draw.h @@ -0,0 +1,39 @@ + /************************************************************************** + * + * Copyright 2005 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef BRW_DRAW_H +#define BRW_DRAW_H + +#include "brw_types.h" + +struct brw_context; + +boolean brw_draw_init( struct brw_context *brw ); +void brw_draw_cleanup( struct brw_context *brw ); + + +#endif diff --git a/src/gallium/drivers/i965/brw_draw_upload.c b/src/gallium/drivers/i965/brw_draw_upload.c new file mode 100644 index 00000000000..a27da5f1c17 --- /dev/null +++ b/src/gallium/drivers/i965/brw_draw_upload.c @@ -0,0 +1,542 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_context.h" + +#include "util/u_upload_mgr.h" +#include "util/u_math.h" + +#include "brw_draw.h" +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_state.h" +#include "brw_screen.h" +#include "brw_batchbuffer.h" +#include "brw_debug.h" + + + + +static unsigned brw_translate_surface_format( unsigned id ) +{ + switch (id) { + case PIPE_FORMAT_R64_FLOAT: + return BRW_SURFACEFORMAT_R64_FLOAT; + case PIPE_FORMAT_R64G64_FLOAT: + return BRW_SURFACEFORMAT_R64G64_FLOAT; + case PIPE_FORMAT_R64G64B64_FLOAT: + return BRW_SURFACEFORMAT_R64G64B64_FLOAT; + case PIPE_FORMAT_R64G64B64A64_FLOAT: + return BRW_SURFACEFORMAT_R64G64B64A64_FLOAT; + + case PIPE_FORMAT_R32_FLOAT: + return BRW_SURFACEFORMAT_R32_FLOAT; + case PIPE_FORMAT_R32G32_FLOAT: + return BRW_SURFACEFORMAT_R32G32_FLOAT; + case PIPE_FORMAT_R32G32B32_FLOAT: + return BRW_SURFACEFORMAT_R32G32B32_FLOAT; + case PIPE_FORMAT_R32G32B32A32_FLOAT: + return BRW_SURFACEFORMAT_R32G32B32A32_FLOAT; + + case PIPE_FORMAT_R32_UNORM: + return BRW_SURFACEFORMAT_R32_UNORM; + case PIPE_FORMAT_R32G32_UNORM: + return BRW_SURFACEFORMAT_R32G32_UNORM; + case PIPE_FORMAT_R32G32B32_UNORM: + return BRW_SURFACEFORMAT_R32G32B32_UNORM; + case PIPE_FORMAT_R32G32B32A32_UNORM: + return BRW_SURFACEFORMAT_R32G32B32A32_UNORM; + + case PIPE_FORMAT_R32_USCALED: + return BRW_SURFACEFORMAT_R32_USCALED; + case PIPE_FORMAT_R32G32_USCALED: + return BRW_SURFACEFORMAT_R32G32_USCALED; + case PIPE_FORMAT_R32G32B32_USCALED: + return BRW_SURFACEFORMAT_R32G32B32_USCALED; + case PIPE_FORMAT_R32G32B32A32_USCALED: + return BRW_SURFACEFORMAT_R32G32B32A32_USCALED; + + case PIPE_FORMAT_R32_SNORM: + return BRW_SURFACEFORMAT_R32_SNORM; + case PIPE_FORMAT_R32G32_SNORM: + return BRW_SURFACEFORMAT_R32G32_SNORM; + case PIPE_FORMAT_R32G32B32_SNORM: + return BRW_SURFACEFORMAT_R32G32B32_SNORM; + case PIPE_FORMAT_R32G32B32A32_SNORM: + return BRW_SURFACEFORMAT_R32G32B32A32_SNORM; + + case PIPE_FORMAT_R32_SSCALED: + return BRW_SURFACEFORMAT_R32_SSCALED; + case PIPE_FORMAT_R32G32_SSCALED: + return BRW_SURFACEFORMAT_R32G32_SSCALED; + case PIPE_FORMAT_R32G32B32_SSCALED: + return BRW_SURFACEFORMAT_R32G32B32_SSCALED; + case PIPE_FORMAT_R32G32B32A32_SSCALED: + return BRW_SURFACEFORMAT_R32G32B32A32_SSCALED; + + case PIPE_FORMAT_R16_UNORM: + return BRW_SURFACEFORMAT_R16_UNORM; + case PIPE_FORMAT_R16G16_UNORM: + return BRW_SURFACEFORMAT_R16G16_UNORM; + case PIPE_FORMAT_R16G16B16_UNORM: + return BRW_SURFACEFORMAT_R16G16B16_UNORM; + case PIPE_FORMAT_R16G16B16A16_UNORM: + return BRW_SURFACEFORMAT_R16G16B16A16_UNORM; + + case PIPE_FORMAT_R16_USCALED: + return BRW_SURFACEFORMAT_R16_USCALED; + case PIPE_FORMAT_R16G16_USCALED: + return BRW_SURFACEFORMAT_R16G16_USCALED; + case PIPE_FORMAT_R16G16B16_USCALED: + return BRW_SURFACEFORMAT_R16G16B16_USCALED; + case PIPE_FORMAT_R16G16B16A16_USCALED: + return BRW_SURFACEFORMAT_R16G16B16A16_USCALED; + + case PIPE_FORMAT_R16_SNORM: + return BRW_SURFACEFORMAT_R16_SNORM; + case PIPE_FORMAT_R16G16_SNORM: + return BRW_SURFACEFORMAT_R16G16_SNORM; + case PIPE_FORMAT_R16G16B16_SNORM: + return BRW_SURFACEFORMAT_R16G16B16_SNORM; + case PIPE_FORMAT_R16G16B16A16_SNORM: + return BRW_SURFACEFORMAT_R16G16B16A16_SNORM; + + case PIPE_FORMAT_R16_SSCALED: + return BRW_SURFACEFORMAT_R16_SSCALED; + case PIPE_FORMAT_R16G16_SSCALED: + return BRW_SURFACEFORMAT_R16G16_SSCALED; + case PIPE_FORMAT_R16G16B16_SSCALED: + return BRW_SURFACEFORMAT_R16G16B16_SSCALED; + case PIPE_FORMAT_R16G16B16A16_SSCALED: + return BRW_SURFACEFORMAT_R16G16B16A16_SSCALED; + + case PIPE_FORMAT_R8_UNORM: + return BRW_SURFACEFORMAT_R8_UNORM; + case PIPE_FORMAT_R8G8_UNORM: + return BRW_SURFACEFORMAT_R8G8_UNORM; + case PIPE_FORMAT_R8G8B8_UNORM: + return BRW_SURFACEFORMAT_R8G8B8_UNORM; + case PIPE_FORMAT_R8G8B8A8_UNORM: + return BRW_SURFACEFORMAT_R8G8B8A8_UNORM; + + case PIPE_FORMAT_R8_USCALED: + return BRW_SURFACEFORMAT_R8_USCALED; + case PIPE_FORMAT_R8G8_USCALED: + return BRW_SURFACEFORMAT_R8G8_USCALED; + case PIPE_FORMAT_R8G8B8_USCALED: + return BRW_SURFACEFORMAT_R8G8B8_USCALED; + case PIPE_FORMAT_R8G8B8A8_USCALED: + return BRW_SURFACEFORMAT_R8G8B8A8_USCALED; + + case PIPE_FORMAT_R8_SNORM: + return BRW_SURFACEFORMAT_R8_SNORM; + case PIPE_FORMAT_R8G8_SNORM: + return BRW_SURFACEFORMAT_R8G8_SNORM; + case PIPE_FORMAT_R8G8B8_SNORM: + return BRW_SURFACEFORMAT_R8G8B8_SNORM; + case PIPE_FORMAT_R8G8B8A8_SNORM: + return BRW_SURFACEFORMAT_R8G8B8A8_SNORM; + + case PIPE_FORMAT_R8_SSCALED: + return BRW_SURFACEFORMAT_R8_SSCALED; + case PIPE_FORMAT_R8G8_SSCALED: + return BRW_SURFACEFORMAT_R8G8_SSCALED; + case PIPE_FORMAT_R8G8B8_SSCALED: + return BRW_SURFACEFORMAT_R8G8B8_SSCALED; + case PIPE_FORMAT_R8G8B8A8_SSCALED: + return BRW_SURFACEFORMAT_R8G8B8A8_SSCALED; + + default: + assert(0); + return 0; + } +} + +static unsigned get_index_type(int type) +{ + switch (type) { + case 1: return BRW_INDEX_BYTE; + case 2: return BRW_INDEX_WORD; + case 4: return BRW_INDEX_DWORD; + default: assert(0); return 0; + } +} + + +static int brw_prepare_vertices(struct brw_context *brw) +{ + unsigned int min_index = brw->curr.min_index; + unsigned int max_index = brw->curr.max_index; + GLuint i; + int ret; + + if (BRW_DEBUG & DEBUG_VERTS) + debug_printf("%s %d..%d\n", __FUNCTION__, min_index, max_index); + + + for (i = 0; i < brw->curr.num_vertex_buffers; i++) { + struct pipe_vertex_buffer *vb = &brw->curr.vertex_buffer[i]; + struct brw_winsys_buffer *bo; + struct pipe_buffer *upload_buf = NULL; + unsigned offset; + + if (BRW_DEBUG & DEBUG_VERTS) + debug_printf("%s vb[%d] user:%d offset:0x%x sz:0x%x stride:0x%x\n", + __FUNCTION__, i, + brw_buffer_is_user_buffer(vb->buffer), + vb->buffer_offset, + vb->buffer->size, + vb->stride); + + if (brw_buffer_is_user_buffer(vb->buffer)) { + + /* XXX: simplify this. Stop the state trackers from generating + * zero-stride buffers & have them use additional constants (or + * add support for >1 constant buffer) instead. + */ + unsigned size = (vb->stride == 0 ? + vb->buffer->size - vb->buffer_offset : + MAX2(vb->buffer->size - vb->buffer_offset, + vb->stride * (max_index + 1 - min_index))); + + ret = u_upload_buffer( brw->vb.upload_vertex, + vb->buffer_offset + min_index * vb->stride, + size, + vb->buffer, + &offset, + &upload_buf ); + if (ret) + return ret; + + bo = brw_buffer(upload_buf)->bo; + + assert(offset + size <= bo->size); + } + else + { + offset = vb->buffer_offset; + bo = brw_buffer(vb->buffer)->bo; + } + + assert(offset < bo->size); + + /* Set up post-upload info about this vertex buffer: + */ + brw->vb.vb[i].offset = offset; + brw->vb.vb[i].stride = vb->stride; + brw->vb.vb[i].vertex_count = (vb->stride == 0 ? + 1 : + (bo->size - offset) / vb->stride); + + bo_reference( &brw->vb.vb[i].bo, bo ); + + /* Don't need to retain this reference. We have a reference on + * the underlying winsys buffer: + */ + pipe_buffer_reference( &upload_buf, NULL ); + } + + brw->vb.nr_vb = i; + brw_prepare_query_begin(brw); + + for (i = 0; i < brw->vb.nr_vb; i++) { + brw_add_validated_bo(brw, brw->vb.vb[i].bo); + } + + return 0; +} + +static int brw_emit_vertex_buffers( struct brw_context *brw ) +{ + int i; + + /* If the VS doesn't read any inputs (calculating vertex position from + * a state variable for some reason, for example), just bail. + * + * The stale VB state stays in place, but they don't do anything unless + * a VE loads from them. + */ + if (brw->vb.nr_vb == 0) { + if (BRW_DEBUG & DEBUG_VERTS) + debug_printf("%s: no active vertex buffers\n", __FUNCTION__); + + return 0; + } + + /* Emit VB state packets. + */ + BEGIN_BATCH(1 + brw->vb.nr_vb * 4, IGNORE_CLIPRECTS); + OUT_BATCH((CMD_VERTEX_BUFFER << 16) | + ((1 + brw->vb.nr_vb * 4) - 2)); + + for (i = 0; i < brw->vb.nr_vb; i++) { + OUT_BATCH((i << BRW_VB0_INDEX_SHIFT) | + BRW_VB0_ACCESS_VERTEXDATA | + (brw->vb.vb[i].stride << BRW_VB0_PITCH_SHIFT)); + OUT_RELOC(brw->vb.vb[i].bo, + BRW_USAGE_VERTEX, + brw->vb.vb[i].offset); + if (BRW_IS_IGDNG(brw)) { + OUT_RELOC(brw->vb.vb[i].bo, + BRW_USAGE_VERTEX, + brw->vb.vb[i].bo->size - 1); + } else + OUT_BATCH(brw->vb.vb[i].stride ? brw->vb.vb[i].vertex_count : 0); + OUT_BATCH(0); /* Instance data step rate */ + } + ADVANCE_BATCH(); + return 0; +} + + + + +static int brw_emit_vertex_elements(struct brw_context *brw) +{ + GLuint nr = brw->curr.num_vertex_elements; + GLuint i; + + brw_emit_query_begin(brw); + + /* If the VS doesn't read any inputs (calculating vertex position from + * a state variable for some reason, for example), emit a single pad + * VERTEX_ELEMENT struct and bail. + * + * The stale VB state stays in place, but they don't do anything unless + * a VE loads from them. + */ + if (nr == 0) { + BEGIN_BATCH(3, IGNORE_CLIPRECTS); + OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | 1); + OUT_BATCH((0 << BRW_VE0_INDEX_SHIFT) | + BRW_VE0_VALID | + (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) | + (0 << BRW_VE0_SRC_OFFSET_SHIFT)); + OUT_BATCH((BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT) | + (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) | + (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | + (BRW_VE1_COMPONENT_STORE_1_FLT << BRW_VE1_COMPONENT_3_SHIFT)); + ADVANCE_BATCH(); + return 0; + } + + /* Now emit vertex element (VEP) state packets. + * + */ + BEGIN_BATCH(1 + nr * 2, IGNORE_CLIPRECTS); + OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | ((1 + nr * 2) - 2)); + for (i = 0; i < nr; i++) { + const struct pipe_vertex_element *input = &brw->curr.vertex_element[i]; + uint32_t format = brw_translate_surface_format( input->src_format ); + uint32_t comp0 = BRW_VE1_COMPONENT_STORE_SRC; + uint32_t comp1 = BRW_VE1_COMPONENT_STORE_SRC; + uint32_t comp2 = BRW_VE1_COMPONENT_STORE_SRC; + uint32_t comp3 = BRW_VE1_COMPONENT_STORE_SRC; + + switch (input->nr_components) { + case 0: comp0 = BRW_VE1_COMPONENT_STORE_0; + case 1: comp1 = BRW_VE1_COMPONENT_STORE_0; + case 2: comp2 = BRW_VE1_COMPONENT_STORE_0; + case 3: comp3 = BRW_VE1_COMPONENT_STORE_1_FLT; + break; + } + + OUT_BATCH((input->vertex_buffer_index << BRW_VE0_INDEX_SHIFT) | + BRW_VE0_VALID | + (format << BRW_VE0_FORMAT_SHIFT) | + (input->src_offset << BRW_VE0_SRC_OFFSET_SHIFT)); + + if (BRW_IS_IGDNG(brw)) + OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) | + (comp1 << BRW_VE1_COMPONENT_1_SHIFT) | + (comp2 << BRW_VE1_COMPONENT_2_SHIFT) | + (comp3 << BRW_VE1_COMPONENT_3_SHIFT)); + else + OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) | + (comp1 << BRW_VE1_COMPONENT_1_SHIFT) | + (comp2 << BRW_VE1_COMPONENT_2_SHIFT) | + (comp3 << BRW_VE1_COMPONENT_3_SHIFT) | + ((i * 4) << BRW_VE1_DST_OFFSET_SHIFT)); + } + ADVANCE_BATCH(); + return 0; +} + + +static int brw_emit_vertices( struct brw_context *brw ) +{ + int ret; + + ret = brw_emit_vertex_buffers( brw ); + if (ret) + return ret; + + ret = brw_emit_vertex_elements( brw ); + if (ret) + return ret; + + return 0; +} + + +const struct brw_tracked_state brw_vertices = { + .dirty = { + .mesa = (PIPE_NEW_INDEX_RANGE | + PIPE_NEW_VERTEX_BUFFER), + .brw = BRW_NEW_BATCH, + .cache = 0, + }, + .prepare = brw_prepare_vertices, + .emit = brw_emit_vertices, +}; + + +static int brw_prepare_indices(struct brw_context *brw) +{ + struct pipe_buffer *index_buffer = brw->curr.index_buffer; + struct pipe_buffer *upload_buf = NULL; + struct brw_winsys_buffer *bo = NULL; + GLuint offset; + GLuint index_size; + GLuint ib_size; + int ret; + + if (index_buffer == NULL) + return 0; + + if (BRW_DEBUG & DEBUG_VERTS) + debug_printf("%s: index_size:%d index_buffer->size:%d\n", + __FUNCTION__, + brw->curr.index_size, + brw->curr.index_buffer->size); + + ib_size = index_buffer->size; + index_size = brw->curr.index_size; + + /* Turn userbuffer into a proper hardware buffer? + */ + if (brw_buffer_is_user_buffer(index_buffer)) { + + ret = u_upload_buffer( brw->vb.upload_index, + 0, + ib_size, + index_buffer, + &offset, + &upload_buf ); + if (ret) + return ret; + + bo = brw_buffer(upload_buf)->bo; + + /* XXX: annotate the userbuffer with the upload information so + * that successive calls don't get re-uploaded. + */ + } + else { + bo = brw_buffer(index_buffer)->bo; + ib_size = bo->size; + offset = 0; + } + + /* Use CMD_3D_PRIM's start_vertex_offset to avoid re-uploading the + * index buffer state when we're just moving the start index of our + * drawing. + * + * In gallium this will happen in the case where successive draw + * calls are made with (distinct?) userbuffers, but the upload_mgr + * places the data into a single winsys buffer. + * + * This statechange doesn't raise any state flags and is always + * just merged into the final draw packet: + */ + if (1) { + assert((offset & (index_size - 1)) == 0); + brw->ib.start_vertex_offset = offset / index_size; + } + + /* These statechanges trigger a new CMD_INDEX_BUFFER packet: + */ + if (brw->ib.bo != bo || + brw->ib.size != ib_size) + { + bo_reference(&brw->ib.bo, bo); + brw->ib.size = ib_size; + brw->state.dirty.brw |= BRW_NEW_INDEX_BUFFER; + } + + pipe_buffer_reference( &upload_buf, NULL ); + brw_add_validated_bo(brw, brw->ib.bo); + return 0; +} + +const struct brw_tracked_state brw_indices = { + .dirty = { + .mesa = PIPE_NEW_INDEX_BUFFER, + .brw = 0, + .cache = 0, + }, + .prepare = brw_prepare_indices, +}; + +static int brw_emit_index_buffer(struct brw_context *brw) +{ + /* Emit the indexbuffer packet: + */ + if (brw->ib.bo) + { + struct brw_indexbuffer ib; + + memset(&ib, 0, sizeof(ib)); + + ib.header.bits.opcode = CMD_INDEX_BUFFER; + ib.header.bits.length = sizeof(ib)/4 - 2; + ib.header.bits.index_format = get_index_type(brw->ib.size); + ib.header.bits.cut_index_enable = 0; + + BEGIN_BATCH(4, IGNORE_CLIPRECTS); + OUT_BATCH( ib.header.dword ); + OUT_RELOC(brw->ib.bo, + BRW_USAGE_VERTEX, + brw->ib.offset); + OUT_RELOC(brw->ib.bo, + BRW_USAGE_VERTEX, + brw->ib.offset + brw->ib.size - 1); + OUT_BATCH( 0 ); + ADVANCE_BATCH(); + } + + return 0; +} + +const struct brw_tracked_state brw_index_buffer = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_BATCH | BRW_NEW_INDEX_BUFFER, + .cache = 0, + }, + .emit = brw_emit_index_buffer, +}; diff --git a/src/gallium/drivers/i965/brw_eu.c b/src/gallium/drivers/i965/brw_eu.c new file mode 100644 index 00000000000..a8fcb5f97eb --- /dev/null +++ b/src/gallium/drivers/i965/brw_eu.c @@ -0,0 +1,262 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "util/u_memory.h" + +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_eu.h" + + + +/* How does predicate control work when execution_size != 8? Do I + * need to test/set for 0xffff when execution_size is 16? + */ +void brw_set_predicate_control_flag_value( struct brw_compile *p, GLuint value ) +{ + p->current->header.predicate_control = BRW_PREDICATE_NONE; + + if (value != 0xff) { + if (value != p->flag_value) { + brw_push_insn_state(p); + brw_MOV(p, brw_flag_reg(), brw_imm_uw(value)); + p->flag_value = value; + brw_pop_insn_state(p); + } + + p->current->header.predicate_control = BRW_PREDICATE_NORMAL; + } +} + +void brw_set_predicate_control( struct brw_compile *p, GLuint pc ) +{ + p->current->header.predicate_control = pc; +} + +void brw_set_conditionalmod( struct brw_compile *p, GLuint conditional ) +{ + p->current->header.destreg__conditionalmod = conditional; +} + +void brw_set_access_mode( struct brw_compile *p, GLuint access_mode ) +{ + p->current->header.access_mode = access_mode; +} + +void brw_set_compression_control( struct brw_compile *p, GLboolean compression_control ) +{ + p->current->header.compression_control = compression_control; +} + +void brw_set_mask_control( struct brw_compile *p, GLuint value ) +{ + p->current->header.mask_control = value; +} + +void brw_set_saturate( struct brw_compile *p, GLuint value ) +{ + p->current->header.saturate = value; +} + +void brw_push_insn_state( struct brw_compile *p ) +{ + assert(p->current != &p->stack[BRW_EU_MAX_INSN_STACK-1]); + memcpy(p->current+1, p->current, sizeof(struct brw_instruction)); + p->current++; +} + +void brw_pop_insn_state( struct brw_compile *p ) +{ + assert(p->current != p->stack); + p->current--; +} + + +/*********************************************************************** + */ +void brw_init_compile( struct brw_context *brw, struct brw_compile *p ) +{ + p->brw = brw; + p->nr_insn = 0; + p->current = p->stack; + memset(p->current, 0, sizeof(p->current[0])); + + /* Some defaults? + */ + brw_set_mask_control(p, BRW_MASK_ENABLE); /* what does this do? */ + brw_set_saturate(p, 0); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_predicate_control_flag_value(p, 0xff); +} + + +enum pipe_error brw_get_program( struct brw_compile *p, + const GLuint **data, + GLuint *sz ) +{ + GLuint i; + + for (i = 0; i < 8; i++) + brw_NOP(p); + + /* Is the generated program malformed for some reason? + */ + if (p->error) + return PIPE_ERROR_BAD_INPUT; + + *sz = p->nr_insn * sizeof(struct brw_instruction); + *data = (const GLuint *)p->store; + return PIPE_OK; +} + + + +/** + * Subroutine calls require special attention. + * Mesa instructions may be expanded into multiple hardware instructions + * so the prog_instruction::BranchTarget field can't be used as an index + * into the hardware instructions. + * + * The BranchTarget field isn't needed, however. Mesa's GLSL compiler + * emits CAL and BGNSUB instructions with labels that can be used to map + * subroutine calls to actual subroutine code blocks. + * + * The structures and function here implement patching of CAL instructions + * so they jump to the right subroutine code... + */ + + +/** + * For each OPCODE_BGNSUB we create one of these. + */ +struct brw_eu_label +{ + GLuint label; /**< the label number */ + GLuint position; /**< the position of the brw instruction for this label */ + struct brw_eu_label *next; /**< next in linked list */ +}; + + +/** + * For each OPCODE_CAL we create one of these. + */ +struct brw_eu_call +{ + GLuint call_inst_pos; /**< location of the CAL instruction */ + GLuint label; + struct brw_eu_call *next; /**< next in linked list */ +}; + + +/** + * Called for each OPCODE_BGNSUB. + */ +void +brw_save_label(struct brw_compile *c, unsigned l, GLuint position) +{ + struct brw_eu_label *label = CALLOC_STRUCT(brw_eu_label); + label->label = l; + label->position = position; + label->next = c->first_label; + c->first_label = label; +} + + +/** + * Called for each OPCODE_CAL. + */ +void +brw_save_call(struct brw_compile *c, GLuint label, GLuint call_pos) +{ + struct brw_eu_call *call = CALLOC_STRUCT(brw_eu_call); + call->call_inst_pos = call_pos; + call->label = label; + call->next = c->first_call; + c->first_call = call; +} + + +/** + * Lookup a label, return label's position/offset. + */ +static GLuint +brw_lookup_label(struct brw_compile *c, unsigned l) +{ + const struct brw_eu_label *label; + for (label = c->first_label; label; label = label->next) { + if (l == label->label) { + return label->position; + } + } + abort(); /* should never happen */ + return ~0; +} + + +/** + * When we're done generating code, this function is called to resolve + * subroutine calls. + */ +void +brw_resolve_cals(struct brw_compile *c) +{ + const struct brw_eu_call *call; + + for (call = c->first_call; call; call = call->next) { + const GLuint sub_loc = brw_lookup_label(c, call->label); + struct brw_instruction *brw_call_inst = &c->store[call->call_inst_pos]; + struct brw_instruction *brw_sub_inst = &c->store[sub_loc]; + GLint offset = brw_sub_inst - brw_call_inst; + + /* patch brw_inst1 to point to brw_inst2 */ + brw_set_src1(brw_call_inst, brw_imm_d(offset * 16)); + } + + /* free linked list of calls */ + { + struct brw_eu_call *call, *next; + for (call = c->first_call; call; call = next) { + next = call->next; + FREE(call); + } + c->first_call = NULL; + } + + /* free linked list of labels */ + { + struct brw_eu_label *label, *next; + for (label = c->first_label; label; label = next) { + next = label->next; + FREE(label); + } + c->first_label = NULL; + } +} diff --git a/src/gallium/drivers/i965/brw_eu.h b/src/gallium/drivers/i965/brw_eu.h new file mode 100644 index 00000000000..af509b2e5f4 --- /dev/null +++ b/src/gallium/drivers/i965/brw_eu.h @@ -0,0 +1,992 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#ifndef BRW_EU_H +#define BRW_EU_H + +#include "util/u_debug.h" +#include "pipe/p_defines.h" + +#include "brw_structs.h" +#include "brw_defines.h" + +#define BRW_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<2) | ((c)<<4) | ((d)<<6)) +#define BRW_GET_SWZ(swz, idx) (((swz) >> ((idx)*2)) & 0x3) + +#define BRW_SWIZZLE_NOOP BRW_SWIZZLE4(0,1,2,3) +#define BRW_SWIZZLE_XYZW BRW_SWIZZLE4(0,1,2,3) +#define BRW_SWIZZLE_XXXX BRW_SWIZZLE4(0,0,0,0) +#define BRW_SWIZZLE_XYXY BRW_SWIZZLE4(0,1,0,1) + +#define BRW_WRITEMASK_NONE 0x00 +#define BRW_WRITEMASK_X 0x01 +#define BRW_WRITEMASK_Y 0x02 +#define BRW_WRITEMASK_XY 0x03 +#define BRW_WRITEMASK_Z 0x04 +#define BRW_WRITEMASK_XZ 0x05 +#define BRW_WRITEMASK_YZ 0x06 +#define BRW_WRITEMASK_XYZ 0x07 +#define BRW_WRITEMASK_W 0x08 +#define BRW_WRITEMASK_XW 0x09 +#define BRW_WRITEMASK_YW 0x0A +#define BRW_WRITEMASK_XYW 0x0B +#define BRW_WRITEMASK_ZW 0x0C +#define BRW_WRITEMASK_XZW 0x0D +#define BRW_WRITEMASK_YZW 0x0E +#define BRW_WRITEMASK_XYZW 0x0F + + +#define REG_SIZE (8*4) + + +/* These aren't hardware structs, just something useful for us to pass around: + * + * Align1 operation has a lot of control over input ranges. Used in + * WM programs to implement shaders decomposed into "channel serial" + * or "structure of array" form: + */ +struct brw_reg +{ + GLuint type:4; + GLuint file:2; + GLuint nr:8; + GLuint subnr:5; /* :1 in align16 */ + GLuint negate:1; /* source only */ + GLuint abs:1; /* source only */ + GLuint vstride:4; /* source only */ + GLuint width:3; /* src only, align1 only */ + GLuint hstride:2; /* align1 only */ + GLuint address_mode:1; /* relative addressing, hopefully! */ + GLuint pad0:1; + + union { + struct { + GLuint swizzle:8; /* src only, align16 only */ + GLuint writemask:4; /* dest only, align16 only */ + GLint indirect_offset:10; /* relative addressing offset */ + GLuint pad1:10; /* two dwords total */ + } bits; + + GLfloat f; + GLint d; + GLuint ud; + } dw1; +}; + + +struct brw_indirect { + GLuint addr_subnr:4; + GLint addr_offset:10; + GLuint pad:18; +}; + + +struct brw_eu_label; +struct brw_eu_call; + + + +#define BRW_EU_MAX_INSN_STACK 5 +#define BRW_EU_MAX_INSN 10000 + +struct brw_compile { + struct brw_instruction store[BRW_EU_MAX_INSN]; + GLuint nr_insn; + + /* Allow clients to push/pop instruction state: + */ + struct brw_instruction stack[BRW_EU_MAX_INSN_STACK]; + struct brw_instruction *current; + + GLuint flag_value; + GLboolean single_program_flow; + struct brw_context *brw; + + struct brw_eu_label *first_label; /**< linked list of labels */ + struct brw_eu_call *first_call; /**< linked list of CALs */ + + boolean error; +}; + + +void +brw_save_label(struct brw_compile *c, unsigned label, GLuint position); + +void +brw_save_call(struct brw_compile *c, unsigned label, GLuint call_pos); + +void +brw_resolve_cals(struct brw_compile *c); + + + +static INLINE int type_sz( GLuint type ) +{ + switch( type ) { + case BRW_REGISTER_TYPE_UD: + case BRW_REGISTER_TYPE_D: + case BRW_REGISTER_TYPE_F: + return 4; + case BRW_REGISTER_TYPE_HF: + case BRW_REGISTER_TYPE_UW: + case BRW_REGISTER_TYPE_W: + return 2; + case BRW_REGISTER_TYPE_UB: + case BRW_REGISTER_TYPE_B: + return 1; + default: + return 0; + } +} + +/** + * Construct a brw_reg. + * \param file one of the BRW_x_REGISTER_FILE values + * \param nr register number/index + * \param subnr register sub number + * \param type one of BRW_REGISTER_TYPE_x + * \param vstride one of BRW_VERTICAL_STRIDE_x + * \param width one of BRW_WIDTH_x + * \param hstride one of BRW_HORIZONTAL_STRIDE_x + * \param swizzle one of BRW_SWIZZLE_x + * \param writemask BRW_WRITEMASK_X/Y/Z/W bitfield + */ +static INLINE struct brw_reg brw_reg( GLuint file, + GLuint nr, + GLuint subnr, + GLuint type, + GLuint vstride, + GLuint width, + GLuint hstride, + GLuint swizzle, + GLuint writemask ) +{ + struct brw_reg reg; + if (type == BRW_GENERAL_REGISTER_FILE) + assert(nr < BRW_MAX_GRF); + else if (type == BRW_MESSAGE_REGISTER_FILE) + assert(nr < BRW_MAX_MRF); + else if (type == BRW_ARCHITECTURE_REGISTER_FILE) + assert(nr <= BRW_ARF_IP); + + reg.type = type; + reg.file = file; + reg.nr = nr; + reg.subnr = subnr * type_sz(type); + reg.negate = 0; + reg.abs = 0; + reg.vstride = vstride; + reg.width = width; + reg.hstride = hstride; + reg.address_mode = BRW_ADDRESS_DIRECT; + reg.pad0 = 0; + + /* Could do better: If the reg is r5.3<0;1,0>, we probably want to + * set swizzle and writemask to W, as the lower bits of subnr will + * be lost when converted to align16. This is probably too much to + * keep track of as you'd want it adjusted by suboffset(), etc. + * Perhaps fix up when converting to align16? + */ + reg.dw1.bits.swizzle = swizzle; + reg.dw1.bits.writemask = writemask; + reg.dw1.bits.indirect_offset = 0; + reg.dw1.bits.pad1 = 0; + return reg; +} + +/** Construct float[16] register */ +static INLINE struct brw_reg brw_vec16_reg( GLuint file, + GLuint nr, + GLuint subnr ) +{ + return brw_reg(file, + nr, + subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_16, + BRW_WIDTH_16, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XYZW, + BRW_WRITEMASK_XYZW); +} + +/** Construct float[8] register */ +static INLINE struct brw_reg brw_vec8_reg( GLuint file, + GLuint nr, + GLuint subnr ) +{ + return brw_reg(file, + nr, + subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_8, + BRW_WIDTH_8, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XYZW, + BRW_WRITEMASK_XYZW); +} + +/** Construct float[4] register */ +static INLINE struct brw_reg brw_vec4_reg( GLuint file, + GLuint nr, + GLuint subnr ) +{ + return brw_reg(file, + nr, + subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_4, + BRW_WIDTH_4, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XYZW, + BRW_WRITEMASK_XYZW); +} + +/** Construct float[2] register */ +static INLINE struct brw_reg brw_vec2_reg( GLuint file, + GLuint nr, + GLuint subnr ) +{ + return brw_reg(file, + nr, + subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_2, + BRW_WIDTH_2, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XYXY, + BRW_WRITEMASK_XY); +} + +/** Construct float[1] register */ +static INLINE struct brw_reg brw_vec1_reg( GLuint file, + GLuint nr, + GLuint subnr ) +{ + return brw_reg(file, + nr, + subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_0, + BRW_WIDTH_1, + BRW_HORIZONTAL_STRIDE_0, + BRW_SWIZZLE_XXXX, + BRW_WRITEMASK_X); +} + + +static INLINE struct brw_reg retype( struct brw_reg reg, + GLuint type ) +{ + reg.type = type; + return reg; +} + +static INLINE struct brw_reg suboffset( struct brw_reg reg, + GLuint delta ) +{ + reg.subnr += delta * type_sz(reg.type); + return reg; +} + + +static INLINE struct brw_reg offset( struct brw_reg reg, + GLuint delta ) +{ + reg.nr += delta; + return reg; +} + + +static INLINE struct brw_reg byte_offset( struct brw_reg reg, + GLuint bytes ) +{ + GLuint newoffset = reg.nr * REG_SIZE + reg.subnr + bytes; + reg.nr = newoffset / REG_SIZE; + reg.subnr = newoffset % REG_SIZE; + return reg; +} + + +/** Construct unsigned word[16] register */ +static INLINE struct brw_reg brw_uw16_reg( GLuint file, + GLuint nr, + GLuint subnr ) +{ + return suboffset(retype(brw_vec16_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr); +} + +/** Construct unsigned word[8] register */ +static INLINE struct brw_reg brw_uw8_reg( GLuint file, + GLuint nr, + GLuint subnr ) +{ + return suboffset(retype(brw_vec8_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr); +} + +/** Construct unsigned word[1] register */ +static INLINE struct brw_reg brw_uw1_reg( GLuint file, + GLuint nr, + GLuint subnr ) +{ + return suboffset(retype(brw_vec1_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr); +} + +static INLINE struct brw_reg brw_imm_reg( GLuint type ) +{ + return brw_reg( BRW_IMMEDIATE_VALUE, + 0, + 0, + type, + BRW_VERTICAL_STRIDE_0, + BRW_WIDTH_1, + BRW_HORIZONTAL_STRIDE_0, + 0, + 0); +} + +/** Construct float immediate register */ +static INLINE struct brw_reg brw_imm_f( GLfloat f ) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_F); + imm.dw1.f = f; + return imm; +} + +/** Construct integer immediate register */ +static INLINE struct brw_reg brw_imm_d( GLint d ) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_D); + imm.dw1.d = d; + return imm; +} + +/** Construct uint immediate register */ +static INLINE struct brw_reg brw_imm_ud( GLuint ud ) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UD); + imm.dw1.ud = ud; + return imm; +} + +/** Construct ushort immediate register */ +static INLINE struct brw_reg brw_imm_uw( GLushort uw ) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UW); + imm.dw1.ud = uw | (uw << 16); + return imm; +} + +/** Construct short immediate register */ +static INLINE struct brw_reg brw_imm_w( GLshort w ) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_W); + imm.dw1.d = w | (w << 16); + return imm; +} + +/* brw_imm_b and brw_imm_ub aren't supported by hardware - the type + * numbers alias with _V and _VF below: + */ + +/** Construct vector of eight signed half-byte values */ +static INLINE struct brw_reg brw_imm_v( GLuint v ) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_V); + imm.vstride = BRW_VERTICAL_STRIDE_0; + imm.width = BRW_WIDTH_8; + imm.hstride = BRW_HORIZONTAL_STRIDE_1; + imm.dw1.ud = v; + return imm; +} + +/** Construct vector of four 8-bit float values */ +static INLINE struct brw_reg brw_imm_vf( GLuint v ) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF); + imm.vstride = BRW_VERTICAL_STRIDE_0; + imm.width = BRW_WIDTH_4; + imm.hstride = BRW_HORIZONTAL_STRIDE_1; + imm.dw1.ud = v; + return imm; +} + +#define VF_ZERO 0x0 +#define VF_ONE 0x30 +#define VF_NEG (1<<7) + +static INLINE struct brw_reg brw_imm_vf4( GLuint v0, + GLuint v1, + GLuint v2, + GLuint v3) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF); + imm.vstride = BRW_VERTICAL_STRIDE_0; + imm.width = BRW_WIDTH_4; + imm.hstride = BRW_HORIZONTAL_STRIDE_1; + imm.dw1.ud = ((v0 << 0) | + (v1 << 8) | + (v2 << 16) | + (v3 << 24)); + return imm; +} + + +static INLINE struct brw_reg brw_address( struct brw_reg reg ) +{ + return brw_imm_uw(reg.nr * REG_SIZE + reg.subnr); +} + +/** Construct float[1] general-purpose register */ +static INLINE struct brw_reg brw_vec1_grf( GLuint nr, GLuint subnr ) +{ + return brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + +/** Construct float[2] general-purpose register */ +static INLINE struct brw_reg brw_vec2_grf( GLuint nr, GLuint subnr ) +{ + return brw_vec2_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + +/** Construct float[4] general-purpose register */ +static INLINE struct brw_reg brw_vec4_grf( GLuint nr, GLuint subnr ) +{ + return brw_vec4_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + +/** Construct float[8] general-purpose register */ +static INLINE struct brw_reg brw_vec8_grf( GLuint nr, GLuint subnr ) +{ + return brw_vec8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + + +static INLINE struct brw_reg brw_uw8_grf( GLuint nr, GLuint subnr ) +{ + return brw_uw8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + +static INLINE struct brw_reg brw_uw16_grf( GLuint nr, GLuint subnr ) +{ + return brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + + +/** Construct null register (usually used for setting condition codes) */ +static INLINE struct brw_reg brw_null_reg( void ) +{ + return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_NULL, + 0); +} + +static INLINE struct brw_reg brw_address_reg( GLuint subnr ) +{ + return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_ADDRESS, + subnr); +} + +/* If/else instructions break in align16 mode if writemask & swizzle + * aren't xyzw. This goes against the convention for other scalar + * regs: + */ +static INLINE struct brw_reg brw_ip_reg( void ) +{ + return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_IP, + 0, + BRW_REGISTER_TYPE_UD, + BRW_VERTICAL_STRIDE_4, /* ? */ + BRW_WIDTH_1, + BRW_HORIZONTAL_STRIDE_0, + BRW_SWIZZLE_XYZW, /* NOTE! */ + BRW_WRITEMASK_XYZW); /* NOTE! */ +} + +static INLINE struct brw_reg brw_acc_reg( void ) +{ + return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_ACCUMULATOR, + 0); +} + + +static INLINE struct brw_reg brw_flag_reg( void ) +{ + return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_FLAG, + 0); +} + + +static INLINE struct brw_reg brw_mask_reg( GLuint subnr ) +{ + return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_MASK, + subnr); +} + +static INLINE struct brw_reg brw_message_reg( GLuint nr ) +{ + assert(nr < BRW_MAX_MRF); + return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, + nr, + 0); +} + + + + +/* This is almost always called with a numeric constant argument, so + * make things easy to evaluate at compile time: + */ +static INLINE GLuint cvt( GLuint val ) +{ + switch (val) { + case 0: return 0; + case 1: return 1; + case 2: return 2; + case 4: return 3; + case 8: return 4; + case 16: return 5; + case 32: return 6; + } + return 0; +} + +static INLINE struct brw_reg stride( struct brw_reg reg, + GLuint vstride, + GLuint width, + GLuint hstride ) +{ + reg.vstride = cvt(vstride); + reg.width = cvt(width) - 1; + reg.hstride = cvt(hstride); + return reg; +} + + +static INLINE struct brw_reg vec16( struct brw_reg reg ) +{ + return stride(reg, 16,16,1); +} + +static INLINE struct brw_reg vec8( struct brw_reg reg ) +{ + return stride(reg, 8,8,1); +} + +static INLINE struct brw_reg vec4( struct brw_reg reg ) +{ + return stride(reg, 4,4,1); +} + +static INLINE struct brw_reg vec2( struct brw_reg reg ) +{ + return stride(reg, 2,2,1); +} + +static INLINE struct brw_reg vec1( struct brw_reg reg ) +{ + return stride(reg, 0,1,0); +} + + +static INLINE struct brw_reg get_element( struct brw_reg reg, GLuint elt ) +{ + return vec1(suboffset(reg, elt)); +} + +static INLINE struct brw_reg get_element_ud( struct brw_reg reg, GLuint elt ) +{ + return vec1(suboffset(retype(reg, BRW_REGISTER_TYPE_UD), elt)); +} + + +static INLINE struct brw_reg brw_swizzle( struct brw_reg reg, + GLuint x, + GLuint y, + GLuint z, + GLuint w) +{ + reg.dw1.bits.swizzle = BRW_SWIZZLE4(BRW_GET_SWZ(reg.dw1.bits.swizzle, x), + BRW_GET_SWZ(reg.dw1.bits.swizzle, y), + BRW_GET_SWZ(reg.dw1.bits.swizzle, z), + BRW_GET_SWZ(reg.dw1.bits.swizzle, w)); + return reg; +} + + +static INLINE struct brw_reg brw_swizzle1( struct brw_reg reg, + GLuint x ) +{ + return brw_swizzle(reg, x, x, x, x); +} + +static INLINE struct brw_reg brw_writemask( struct brw_reg reg, + GLuint mask ) +{ + reg.dw1.bits.writemask &= mask; + return reg; +} + +static INLINE struct brw_reg brw_set_writemask( struct brw_reg reg, + GLuint mask ) +{ + reg.dw1.bits.writemask = mask; + return reg; +} + +static INLINE struct brw_reg negate( struct brw_reg reg ) +{ + reg.negate ^= 1; + return reg; +} + +static INLINE struct brw_reg brw_abs( struct brw_reg reg ) +{ + reg.abs = 1; + return reg; +} + +/*********************************************************************** + */ +static INLINE struct brw_reg brw_vec4_indirect( GLuint subnr, + GLint offset ) +{ + struct brw_reg reg = brw_vec4_grf(0, 0); + reg.subnr = subnr; + reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER; + reg.dw1.bits.indirect_offset = offset; + return reg; +} + +static INLINE struct brw_reg brw_vec1_indirect( GLuint subnr, + GLint offset ) +{ + struct brw_reg reg = brw_vec1_grf(0, 0); + reg.subnr = subnr; + reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER; + reg.dw1.bits.indirect_offset = offset; + return reg; +} + +static INLINE struct brw_reg deref_4f(struct brw_indirect ptr, GLint offset) +{ + return brw_vec4_indirect(ptr.addr_subnr, ptr.addr_offset + offset); +} + +static INLINE struct brw_reg deref_1f(struct brw_indirect ptr, GLint offset) +{ + return brw_vec1_indirect(ptr.addr_subnr, ptr.addr_offset + offset); +} + +static INLINE struct brw_reg deref_4b(struct brw_indirect ptr, GLint offset) +{ + return retype(deref_4f(ptr, offset), BRW_REGISTER_TYPE_B); +} + +static INLINE struct brw_reg deref_1uw(struct brw_indirect ptr, GLint offset) +{ + return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UW); +} + +static INLINE struct brw_reg deref_1d(struct brw_indirect ptr, GLint offset) +{ + return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_D); +} + +static INLINE struct brw_reg deref_1ud(struct brw_indirect ptr, GLint offset) +{ + return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UD); +} + +static INLINE struct brw_reg get_addr_reg(struct brw_indirect ptr) +{ + return brw_address_reg(ptr.addr_subnr); +} + +static INLINE struct brw_indirect brw_indirect_offset( struct brw_indirect ptr, GLint offset ) +{ + ptr.addr_offset += offset; + return ptr; +} + +static INLINE struct brw_indirect brw_indirect( GLuint addr_subnr, GLint offset ) +{ + struct brw_indirect ptr; + ptr.addr_subnr = addr_subnr; + ptr.addr_offset = offset; + ptr.pad = 0; + return ptr; +} + +/** Do two brw_regs refer to the same register? */ +static INLINE GLboolean +brw_same_reg(struct brw_reg r1, struct brw_reg r2) +{ + return r1.file == r2.file && r1.nr == r2.nr; +} + +static INLINE struct brw_instruction *current_insn( struct brw_compile *p) +{ + return &p->store[p->nr_insn]; +} + +void brw_pop_insn_state( struct brw_compile *p ); +void brw_push_insn_state( struct brw_compile *p ); +void brw_set_mask_control( struct brw_compile *p, GLuint value ); +void brw_set_saturate( struct brw_compile *p, GLuint value ); +void brw_set_access_mode( struct brw_compile *p, GLuint access_mode ); +void brw_set_compression_control( struct brw_compile *p, GLboolean control ); +void brw_set_predicate_control_flag_value( struct brw_compile *p, GLuint value ); +void brw_set_predicate_control( struct brw_compile *p, GLuint pc ); +void brw_set_conditionalmod( struct brw_compile *p, GLuint conditional ); + +void brw_init_compile( struct brw_context *, struct brw_compile *p ); + +enum pipe_error brw_get_program( struct brw_compile *p, + const GLuint **program, + GLuint *sz ); + + +/* Helpers for regular instructions: + */ +#define ALU1(OP) \ +struct brw_instruction *brw_##OP(struct brw_compile *p, \ + struct brw_reg dest, \ + struct brw_reg src0); + +#define ALU2(OP) \ +struct brw_instruction *brw_##OP(struct brw_compile *p, \ + struct brw_reg dest, \ + struct brw_reg src0, \ + struct brw_reg src1); + +ALU1(MOV) +ALU2(SEL) +ALU1(NOT) +ALU2(AND) +ALU2(OR) +ALU2(XOR) +ALU2(SHR) +ALU2(SHL) +ALU2(RSR) +ALU2(RSL) +ALU2(ASR) +ALU2(JMPI) +ALU2(ADD) +ALU2(MUL) +ALU1(FRC) +ALU1(RNDD) +ALU1(RNDZ) +ALU2(MAC) +ALU2(MACH) +ALU1(LZD) +ALU2(DP4) +ALU2(DPH) +ALU2(DP3) +ALU2(DP2) +ALU2(LINE) + +#undef ALU1 +#undef ALU2 + + + +/* Helpers for SEND instruction: + */ +void brw_urb_WRITE(struct brw_compile *p, + struct brw_reg dest, + GLuint msg_reg_nr, + struct brw_reg src0, + GLboolean allocate, + GLboolean used, + GLuint msg_length, + GLuint response_length, + GLboolean eot, + GLboolean writes_complete, + GLuint offset, + GLuint swizzle); + +void brw_ff_sync(struct brw_compile *p, + struct brw_reg dest, + GLuint msg_reg_nr, + struct brw_reg src0, + GLboolean allocate, + GLboolean used, + GLuint msg_length, + GLuint response_length, + GLboolean eot, + GLboolean writes_complete, + GLuint offset, + GLuint swizzle); + +void brw_fb_WRITE(struct brw_compile *p, + struct brw_reg dest, + GLuint msg_reg_nr, + struct brw_reg src0, + GLuint binding_table_index, + GLuint msg_length, + GLuint response_length, + GLboolean eot); + +void brw_SAMPLE(struct brw_compile *p, + struct brw_reg dest, + GLuint msg_reg_nr, + struct brw_reg src0, + GLuint binding_table_index, + GLuint sampler, + GLuint writemask, + GLuint msg_type, + GLuint response_length, + GLuint msg_length, + GLboolean eot, + GLuint header_present, + GLuint simd_mode); + +void brw_math_16( struct brw_compile *p, + struct brw_reg dest, + GLuint function, + GLuint saturate, + GLuint msg_reg_nr, + struct brw_reg src, + GLuint precision ); + +void brw_math( struct brw_compile *p, + struct brw_reg dest, + GLuint function, + GLuint saturate, + GLuint msg_reg_nr, + struct brw_reg src, + GLuint data_type, + GLuint precision ); + +void brw_dp_READ_16( struct brw_compile *p, + struct brw_reg dest, + GLuint scratch_offset ); + +void brw_dp_READ_4( struct brw_compile *p, + struct brw_reg dest, + GLboolean relAddr, + GLuint location, + GLuint bind_table_index ); + +void brw_dp_READ_4_vs( struct brw_compile *p, + struct brw_reg dest, + GLuint oword, + GLboolean relAddr, + struct brw_reg addrReg, + GLuint location, + GLuint bind_table_index ); + +void brw_dp_WRITE_16( struct brw_compile *p, + struct brw_reg src, + GLuint scratch_offset ); + +/* If/else/endif. Works by manipulating the execution flags on each + * channel. + */ +struct brw_instruction *brw_IF(struct brw_compile *p, + GLuint execute_size); + +struct brw_instruction *brw_ELSE(struct brw_compile *p, + struct brw_instruction *if_insn); + +void brw_ENDIF(struct brw_compile *p, + struct brw_instruction *if_or_else_insn); + + +/* DO/WHILE loops: + */ +struct brw_instruction *brw_DO(struct brw_compile *p, + GLuint execute_size); + +struct brw_instruction *brw_WHILE(struct brw_compile *p, + struct brw_instruction *patch_insn); + +struct brw_instruction *brw_BREAK(struct brw_compile *p); +struct brw_instruction *brw_CONT(struct brw_compile *p); +/* Forward jumps: + */ +void brw_land_fwd_jump(struct brw_compile *p, + struct brw_instruction *jmp_insn); + + + +void brw_NOP(struct brw_compile *p); + +/* Special case: there is never a destination, execution size will be + * taken from src0: + */ +void brw_CMP(struct brw_compile *p, + struct brw_reg dest, + GLuint conditional, + struct brw_reg src0, + struct brw_reg src1); + +void brw_print_reg( struct brw_reg reg ); + + +/*********************************************************************** + * brw_eu_util.c: + */ + +void brw_copy_indirect_to_indirect(struct brw_compile *p, + struct brw_indirect dst_ptr, + struct brw_indirect src_ptr, + GLuint count); + +void brw_copy_from_indirect(struct brw_compile *p, + struct brw_reg dst, + struct brw_indirect ptr, + GLuint count); + +void brw_copy4(struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src, + GLuint count); + +void brw_copy8(struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src, + GLuint count); + +void brw_math_invert( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src); + +void brw_set_src1( struct brw_instruction *insn, + struct brw_reg reg ); +#endif diff --git a/src/gallium/drivers/i965/brw_eu_debug.c b/src/gallium/drivers/i965/brw_eu_debug.c new file mode 100644 index 00000000000..5989f5a04ee --- /dev/null +++ b/src/gallium/drivers/i965/brw_eu_debug.c @@ -0,0 +1,94 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "util/u_debug.h" + +#include "brw_eu.h" + +void brw_print_reg( struct brw_reg hwreg ) +{ + static const char *file[] = { + "arf", + "grf", + "msg", + "imm" + }; + + static const char *type[] = { + "ud", + "d", + "uw", + "w", + "ub", + "vf", + "hf", + "f" + }; + + debug_printf("%s%s", + hwreg.abs ? "abs/" : "", + hwreg.negate ? "-" : ""); + + if (hwreg.file == BRW_GENERAL_REGISTER_FILE && + hwreg.nr % 2 == 0 && + hwreg.subnr == 0 && + hwreg.vstride == BRW_VERTICAL_STRIDE_8 && + hwreg.width == BRW_WIDTH_8 && + hwreg.hstride == BRW_HORIZONTAL_STRIDE_1 && + hwreg.type == BRW_REGISTER_TYPE_F) { + /* vector register */ + debug_printf("vec%d", hwreg.nr); + } + else if (hwreg.file == BRW_GENERAL_REGISTER_FILE && + hwreg.vstride == BRW_VERTICAL_STRIDE_0 && + hwreg.width == BRW_WIDTH_1 && + hwreg.hstride == BRW_HORIZONTAL_STRIDE_0 && + hwreg.type == BRW_REGISTER_TYPE_F) { + /* "scalar" register */ + debug_printf("scl%d.%d", hwreg.nr, hwreg.subnr / 4); + } + else if (hwreg.file == BRW_IMMEDIATE_VALUE) { + debug_printf("imm %f", hwreg.dw1.f); + } + else { + debug_printf("%s%d.%d<%d;%d,%d>:%s", + file[hwreg.file], + hwreg.nr, + hwreg.subnr / type_sz(hwreg.type), + hwreg.vstride ? (1<<(hwreg.vstride-1)) : 0, + 1<<hwreg.width, + hwreg.hstride ? (1<<(hwreg.hstride-1)) : 0, + type[hwreg.type]); + } +} + + + diff --git a/src/gallium/drivers/i965/brw_eu_emit.c b/src/gallium/drivers/i965/brw_eu_emit.c new file mode 100644 index 00000000000..4fe7b6acc16 --- /dev/null +++ b/src/gallium/drivers/i965/brw_eu_emit.c @@ -0,0 +1,1433 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_eu.h" +#include "brw_debug.h" +#include "brw_disasm.h" + + + + +/*********************************************************************** + * Internal helper for constructing instructions + */ + +static void guess_execution_size( struct brw_instruction *insn, + struct brw_reg reg ) +{ + if (reg.width == BRW_WIDTH_8 && + insn->header.compression_control == BRW_COMPRESSION_COMPRESSED) + insn->header.execution_size = BRW_EXECUTE_16; + else + insn->header.execution_size = reg.width; /* note - definitions are compatible */ +} + + +static void brw_set_dest( struct brw_instruction *insn, + struct brw_reg dest ) +{ + if (dest.type != BRW_ARCHITECTURE_REGISTER_FILE) + assert(dest.nr < 128); + + insn->bits1.da1.dest_reg_file = dest.file; + insn->bits1.da1.dest_reg_type = dest.type; + insn->bits1.da1.dest_address_mode = dest.address_mode; + + if (dest.address_mode == BRW_ADDRESS_DIRECT) { + insn->bits1.da1.dest_reg_nr = dest.nr; + + if (insn->header.access_mode == BRW_ALIGN_1) { + insn->bits1.da1.dest_subreg_nr = dest.subnr; + if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) + dest.hstride = BRW_HORIZONTAL_STRIDE_1; + insn->bits1.da1.dest_horiz_stride = dest.hstride; + } + else { + insn->bits1.da16.dest_subreg_nr = dest.subnr / 16; + insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask; + } + } + else { + insn->bits1.ia1.dest_subreg_nr = dest.subnr; + + /* These are different sizes in align1 vs align16: + */ + if (insn->header.access_mode == BRW_ALIGN_1) { + insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset; + if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) + dest.hstride = BRW_HORIZONTAL_STRIDE_1; + insn->bits1.ia1.dest_horiz_stride = dest.hstride; + } + else { + insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset; + } + } + + /* NEW: Set the execution size based on dest.width and + * insn->compression_control: + */ + guess_execution_size(insn, dest); +} + +static void brw_set_src0( struct brw_instruction *insn, + struct brw_reg reg ) +{ + assert(reg.file != BRW_MESSAGE_REGISTER_FILE); + + if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE) + assert(reg.nr < 128); + + insn->bits1.da1.src0_reg_file = reg.file; + insn->bits1.da1.src0_reg_type = reg.type; + insn->bits2.da1.src0_abs = reg.abs; + insn->bits2.da1.src0_negate = reg.negate; + insn->bits2.da1.src0_address_mode = reg.address_mode; + + if (reg.file == BRW_IMMEDIATE_VALUE) { + insn->bits3.ud = reg.dw1.ud; + + /* Required to set some fields in src1 as well: + */ + insn->bits1.da1.src1_reg_file = 0; /* arf */ + insn->bits1.da1.src1_reg_type = reg.type; + } + else + { + if (reg.address_mode == BRW_ADDRESS_DIRECT) { + if (insn->header.access_mode == BRW_ALIGN_1) { + insn->bits2.da1.src0_subreg_nr = reg.subnr; + insn->bits2.da1.src0_reg_nr = reg.nr; + } + else { + insn->bits2.da16.src0_subreg_nr = reg.subnr / 16; + insn->bits2.da16.src0_reg_nr = reg.nr; + } + } + else { + insn->bits2.ia1.src0_subreg_nr = reg.subnr; + + if (insn->header.access_mode == BRW_ALIGN_1) { + insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset; + } + else { + insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset; + } + } + + if (insn->header.access_mode == BRW_ALIGN_1) { + if (reg.width == BRW_WIDTH_1 && + insn->header.execution_size == BRW_EXECUTE_1) { + insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0; + insn->bits2.da1.src0_width = BRW_WIDTH_1; + insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0; + } + else { + insn->bits2.da1.src0_horiz_stride = reg.hstride; + insn->bits2.da1.src0_width = reg.width; + insn->bits2.da1.src0_vert_stride = reg.vstride; + } + } + else { + insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); + insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); + insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); + insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); + + /* This is an oddity of the fact we're using the same + * descriptions for registers in align_16 as align_1: + */ + if (reg.vstride == BRW_VERTICAL_STRIDE_8) + insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4; + else + insn->bits2.da16.src0_vert_stride = reg.vstride; + } + } +} + + +void brw_set_src1( struct brw_instruction *insn, + struct brw_reg reg ) +{ + assert(reg.file != BRW_MESSAGE_REGISTER_FILE); + + assert(reg.nr < 128); + + insn->bits1.da1.src1_reg_file = reg.file; + insn->bits1.da1.src1_reg_type = reg.type; + insn->bits3.da1.src1_abs = reg.abs; + insn->bits3.da1.src1_negate = reg.negate; + + /* Only src1 can be immediate in two-argument instructions. + */ + assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE); + + if (reg.file == BRW_IMMEDIATE_VALUE) { + insn->bits3.ud = reg.dw1.ud; + } + else { + /* This is a hardware restriction, which may or may not be lifted + * in the future: + */ + assert (reg.address_mode == BRW_ADDRESS_DIRECT); + /*assert (reg.file == BRW_GENERAL_REGISTER_FILE); */ + + if (insn->header.access_mode == BRW_ALIGN_1) { + insn->bits3.da1.src1_subreg_nr = reg.subnr; + insn->bits3.da1.src1_reg_nr = reg.nr; + } + else { + insn->bits3.da16.src1_subreg_nr = reg.subnr / 16; + insn->bits3.da16.src1_reg_nr = reg.nr; + } + + if (insn->header.access_mode == BRW_ALIGN_1) { + if (reg.width == BRW_WIDTH_1 && + insn->header.execution_size == BRW_EXECUTE_1) { + insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0; + insn->bits3.da1.src1_width = BRW_WIDTH_1; + insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0; + } + else { + insn->bits3.da1.src1_horiz_stride = reg.hstride; + insn->bits3.da1.src1_width = reg.width; + insn->bits3.da1.src1_vert_stride = reg.vstride; + } + } + else { + insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); + insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); + insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); + insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); + + /* This is an oddity of the fact we're using the same + * descriptions for registers in align_16 as align_1: + */ + if (reg.vstride == BRW_VERTICAL_STRIDE_8) + insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4; + else + insn->bits3.da16.src1_vert_stride = reg.vstride; + } + } +} + + + +static void brw_set_math_message( struct brw_context *brw, + struct brw_instruction *insn, + GLuint msg_length, + GLuint response_length, + GLuint function, + GLuint integer_type, + GLboolean low_precision, + GLboolean saturate, + GLuint dataType ) +{ + brw_set_src1(insn, brw_imm_d(0)); + + if (BRW_IS_IGDNG(brw)) { + insn->bits3.math_igdng.function = function; + insn->bits3.math_igdng.int_type = integer_type; + insn->bits3.math_igdng.precision = low_precision; + insn->bits3.math_igdng.saturate = saturate; + insn->bits3.math_igdng.data_type = dataType; + insn->bits3.math_igdng.snapshot = 0; + insn->bits3.math_igdng.header_present = 0; + insn->bits3.math_igdng.response_length = response_length; + insn->bits3.math_igdng.msg_length = msg_length; + insn->bits3.math_igdng.end_of_thread = 0; + insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_MATH; + insn->bits2.send_igdng.end_of_thread = 0; + } else { + insn->bits3.math.function = function; + insn->bits3.math.int_type = integer_type; + insn->bits3.math.precision = low_precision; + insn->bits3.math.saturate = saturate; + insn->bits3.math.data_type = dataType; + insn->bits3.math.response_length = response_length; + insn->bits3.math.msg_length = msg_length; + insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH; + insn->bits3.math.end_of_thread = 0; + } +} + + +static void brw_set_ff_sync_message( struct brw_context *brw, + struct brw_instruction *insn, + GLboolean allocate, + GLboolean used, + GLuint msg_length, + GLuint response_length, + GLboolean end_of_thread, + GLboolean complete, + GLuint offset, + GLuint swizzle_control ) +{ + brw_set_src1(insn, brw_imm_d(0)); + + insn->bits3.urb_igdng.opcode = 1; + insn->bits3.urb_igdng.offset = offset; + insn->bits3.urb_igdng.swizzle_control = swizzle_control; + insn->bits3.urb_igdng.allocate = allocate; + insn->bits3.urb_igdng.used = used; + insn->bits3.urb_igdng.complete = complete; + insn->bits3.urb_igdng.header_present = 1; + insn->bits3.urb_igdng.response_length = response_length; + insn->bits3.urb_igdng.msg_length = msg_length; + insn->bits3.urb_igdng.end_of_thread = end_of_thread; + insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_URB; + insn->bits2.send_igdng.end_of_thread = end_of_thread; +} + +static void brw_set_urb_message( struct brw_context *brw, + struct brw_instruction *insn, + GLboolean allocate, + GLboolean used, + GLuint msg_length, + GLuint response_length, + GLboolean end_of_thread, + GLboolean complete, + GLuint offset, + GLuint swizzle_control ) +{ + brw_set_src1(insn, brw_imm_d(0)); + + if (BRW_IS_IGDNG(brw)) { + insn->bits3.urb_igdng.opcode = 0; /* ? */ + insn->bits3.urb_igdng.offset = offset; + insn->bits3.urb_igdng.swizzle_control = swizzle_control; + insn->bits3.urb_igdng.allocate = allocate; + insn->bits3.urb_igdng.used = used; /* ? */ + insn->bits3.urb_igdng.complete = complete; + insn->bits3.urb_igdng.header_present = 1; + insn->bits3.urb_igdng.response_length = response_length; + insn->bits3.urb_igdng.msg_length = msg_length; + insn->bits3.urb_igdng.end_of_thread = end_of_thread; + insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_URB; + insn->bits2.send_igdng.end_of_thread = end_of_thread; + } else { + insn->bits3.urb.opcode = 0; /* ? */ + insn->bits3.urb.offset = offset; + insn->bits3.urb.swizzle_control = swizzle_control; + insn->bits3.urb.allocate = allocate; + insn->bits3.urb.used = used; /* ? */ + insn->bits3.urb.complete = complete; + insn->bits3.urb.response_length = response_length; + insn->bits3.urb.msg_length = msg_length; + insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB; + insn->bits3.urb.end_of_thread = end_of_thread; + } +} + +static void brw_set_dp_write_message( struct brw_context *brw, + struct brw_instruction *insn, + GLuint binding_table_index, + GLuint msg_control, + GLuint msg_type, + GLuint msg_length, + GLuint pixel_scoreboard_clear, + GLuint response_length, + GLuint end_of_thread ) +{ + brw_set_src1(insn, brw_imm_d(0)); + + if (BRW_IS_IGDNG(brw)) { + insn->bits3.dp_write_igdng.binding_table_index = binding_table_index; + insn->bits3.dp_write_igdng.msg_control = msg_control; + insn->bits3.dp_write_igdng.pixel_scoreboard_clear = pixel_scoreboard_clear; + insn->bits3.dp_write_igdng.msg_type = msg_type; + insn->bits3.dp_write_igdng.send_commit_msg = 0; + insn->bits3.dp_write_igdng.header_present = 1; + insn->bits3.dp_write_igdng.response_length = response_length; + insn->bits3.dp_write_igdng.msg_length = msg_length; + insn->bits3.dp_write_igdng.end_of_thread = end_of_thread; + insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE; + insn->bits2.send_igdng.end_of_thread = end_of_thread; + } else { + insn->bits3.dp_write.binding_table_index = binding_table_index; + insn->bits3.dp_write.msg_control = msg_control; + insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear; + insn->bits3.dp_write.msg_type = msg_type; + insn->bits3.dp_write.send_commit_msg = 0; + insn->bits3.dp_write.response_length = response_length; + insn->bits3.dp_write.msg_length = msg_length; + insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE; + insn->bits3.dp_write.end_of_thread = end_of_thread; + } +} + +static void brw_set_dp_read_message( struct brw_context *brw, + struct brw_instruction *insn, + GLuint binding_table_index, + GLuint msg_control, + GLuint msg_type, + GLuint target_cache, + GLuint msg_length, + GLuint response_length, + GLuint end_of_thread ) +{ + brw_set_src1(insn, brw_imm_d(0)); + + if (BRW_IS_IGDNG(brw)) { + insn->bits3.dp_read_igdng.binding_table_index = binding_table_index; + insn->bits3.dp_read_igdng.msg_control = msg_control; + insn->bits3.dp_read_igdng.msg_type = msg_type; + insn->bits3.dp_read_igdng.target_cache = target_cache; + insn->bits3.dp_read_igdng.header_present = 1; + insn->bits3.dp_read_igdng.response_length = response_length; + insn->bits3.dp_read_igdng.msg_length = msg_length; + insn->bits3.dp_read_igdng.pad1 = 0; + insn->bits3.dp_read_igdng.end_of_thread = end_of_thread; + insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ; + insn->bits2.send_igdng.end_of_thread = end_of_thread; + } else { + insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/ + insn->bits3.dp_read.msg_control = msg_control; /*8:11*/ + insn->bits3.dp_read.msg_type = msg_type; /*12:13*/ + insn->bits3.dp_read.target_cache = target_cache; /*14:15*/ + insn->bits3.dp_read.response_length = response_length; /*16:19*/ + insn->bits3.dp_read.msg_length = msg_length; /*20:23*/ + insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/ + insn->bits3.dp_read.pad1 = 0; /*28:30*/ + insn->bits3.dp_read.end_of_thread = end_of_thread; /*31*/ + } +} + +static void brw_set_sampler_message(struct brw_context *brw, + struct brw_instruction *insn, + GLuint binding_table_index, + GLuint sampler, + GLuint msg_type, + GLuint response_length, + GLuint msg_length, + GLboolean eot, + GLuint header_present, + GLuint simd_mode) +{ + assert(eot == 0); + brw_set_src1(insn, brw_imm_d(0)); + + if (BRW_IS_IGDNG(brw)) { + insn->bits3.sampler_igdng.binding_table_index = binding_table_index; + insn->bits3.sampler_igdng.sampler = sampler; + insn->bits3.sampler_igdng.msg_type = msg_type; + insn->bits3.sampler_igdng.simd_mode = simd_mode; + insn->bits3.sampler_igdng.header_present = header_present; + insn->bits3.sampler_igdng.response_length = response_length; + insn->bits3.sampler_igdng.msg_length = msg_length; + insn->bits3.sampler_igdng.end_of_thread = eot; + insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_SAMPLER; + insn->bits2.send_igdng.end_of_thread = eot; + } else if (BRW_IS_G4X(brw)) { + insn->bits3.sampler_g4x.binding_table_index = binding_table_index; + insn->bits3.sampler_g4x.sampler = sampler; + insn->bits3.sampler_g4x.msg_type = msg_type; + insn->bits3.sampler_g4x.response_length = response_length; + insn->bits3.sampler_g4x.msg_length = msg_length; + insn->bits3.sampler_g4x.end_of_thread = eot; + insn->bits3.sampler_g4x.msg_target = BRW_MESSAGE_TARGET_SAMPLER; + } else { + insn->bits3.sampler.binding_table_index = binding_table_index; + insn->bits3.sampler.sampler = sampler; + insn->bits3.sampler.msg_type = msg_type; + insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32; + insn->bits3.sampler.response_length = response_length; + insn->bits3.sampler.msg_length = msg_length; + insn->bits3.sampler.end_of_thread = eot; + insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER; + } +} + + + +static struct brw_instruction *next_insn( struct brw_compile *p, + GLuint opcode ) +{ + struct brw_instruction *insn; + + if (0 && (BRW_DEBUG & DEBUG_DISASSEM)) + { + if (p->nr_insn) + brw_disasm_insn(stderr, &p->store[p->nr_insn-1]); + } + + assert(p->nr_insn + 1 < BRW_EU_MAX_INSN); + + insn = &p->store[p->nr_insn++]; + memcpy(insn, p->current, sizeof(*insn)); + + /* Reset this one-shot flag: + */ + + if (p->current->header.destreg__conditionalmod) { + p->current->header.destreg__conditionalmod = 0; + p->current->header.predicate_control = BRW_PREDICATE_NORMAL; + } + + insn->header.opcode = opcode; + return insn; +} + + +static struct brw_instruction *brw_alu1( struct brw_compile *p, + GLuint opcode, + struct brw_reg dest, + struct brw_reg src ) +{ + struct brw_instruction *insn = next_insn(p, opcode); + brw_set_dest(insn, dest); + brw_set_src0(insn, src); + return insn; +} + +static struct brw_instruction *brw_alu2(struct brw_compile *p, + GLuint opcode, + struct brw_reg dest, + struct brw_reg src0, + struct brw_reg src1 ) +{ + struct brw_instruction *insn = next_insn(p, opcode); + brw_set_dest(insn, dest); + brw_set_src0(insn, src0); + brw_set_src1(insn, src1); + return insn; +} + + +/*********************************************************************** + * Convenience routines. + */ +#define ALU1(OP) \ +struct brw_instruction *brw_##OP(struct brw_compile *p, \ + struct brw_reg dest, \ + struct brw_reg src0) \ +{ \ + return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \ +} + +#define ALU2(OP) \ +struct brw_instruction *brw_##OP(struct brw_compile *p, \ + struct brw_reg dest, \ + struct brw_reg src0, \ + struct brw_reg src1) \ +{ \ + return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \ +} + + +ALU1(MOV) +ALU2(SEL) +ALU1(NOT) +ALU2(AND) +ALU2(OR) +ALU2(XOR) +ALU2(SHR) +ALU2(SHL) +ALU2(RSR) +ALU2(RSL) +ALU2(ASR) +ALU2(ADD) +ALU2(MUL) +ALU1(FRC) +ALU1(RNDD) +ALU1(RNDZ) +ALU2(MAC) +ALU2(MACH) +ALU1(LZD) +ALU2(DP4) +ALU2(DPH) +ALU2(DP3) +ALU2(DP2) +ALU2(LINE) + + + + +void brw_NOP(struct brw_compile *p) +{ + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP); + brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_set_src1(insn, brw_imm_ud(0x0)); +} + + + + + +/*********************************************************************** + * Comparisons, if/else/endif + */ + +struct brw_instruction *brw_JMPI(struct brw_compile *p, + struct brw_reg dest, + struct brw_reg src0, + struct brw_reg src1) +{ + struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1); + + insn->header.execution_size = 1; + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.mask_control = BRW_MASK_DISABLE; + + p->current->header.predicate_control = BRW_PREDICATE_NONE; + + return insn; +} + +/* EU takes the value from the flag register and pushes it onto some + * sort of a stack (presumably merging with any flag value already on + * the stack). Within an if block, the flags at the top of the stack + * control execution on each channel of the unit, eg. on each of the + * 16 pixel values in our wm programs. + * + * When the matching 'else' instruction is reached (presumably by + * countdown of the instruction count patched in by our ELSE/ENDIF + * functions), the relevent flags are inverted. + * + * When the matching 'endif' instruction is reached, the flags are + * popped off. If the stack is now empty, normal execution resumes. + * + * No attempt is made to deal with stack overflow (14 elements?). + */ +struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size) +{ + struct brw_instruction *insn; + + if (p->single_program_flow) { + assert(execute_size == BRW_EXECUTE_1); + + insn = next_insn(p, BRW_OPCODE_ADD); + insn->header.predicate_inverse = 1; + } else { + insn = next_insn(p, BRW_OPCODE_IF); + } + + /* Override the defaults for this instruction: + */ + brw_set_dest(insn, brw_ip_reg()); + brw_set_src0(insn, brw_ip_reg()); + brw_set_src1(insn, brw_imm_d(0x0)); + + insn->header.execution_size = execute_size; + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.predicate_control = BRW_PREDICATE_NORMAL; + insn->header.mask_control = BRW_MASK_ENABLE; + if (!p->single_program_flow) + insn->header.thread_control = BRW_THREAD_SWITCH; + + p->current->header.predicate_control = BRW_PREDICATE_NONE; + + return insn; +} + + +struct brw_instruction *brw_ELSE(struct brw_compile *p, + struct brw_instruction *if_insn) +{ + struct brw_instruction *insn; + GLuint br = 1; + + if (BRW_IS_IGDNG(p->brw)) + br = 2; + + if (p->single_program_flow) { + insn = next_insn(p, BRW_OPCODE_ADD); + } else { + insn = next_insn(p, BRW_OPCODE_ELSE); + } + + brw_set_dest(insn, brw_ip_reg()); + brw_set_src0(insn, brw_ip_reg()); + brw_set_src1(insn, brw_imm_d(0x0)); + + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = if_insn->header.execution_size; + insn->header.mask_control = BRW_MASK_ENABLE; + if (!p->single_program_flow) + insn->header.thread_control = BRW_THREAD_SWITCH; + + /* Patch the if instruction to point at this instruction. + */ + if (p->single_program_flow) { + assert(if_insn->header.opcode == BRW_OPCODE_ADD); + + if_insn->bits3.ud = (insn - if_insn + 1) * 16; + } else { + assert(if_insn->header.opcode == BRW_OPCODE_IF); + + if_insn->bits3.if_else.jump_count = br * (insn - if_insn); + if_insn->bits3.if_else.pop_count = 0; + if_insn->bits3.if_else.pad0 = 0; + } + + return insn; +} + +void brw_ENDIF(struct brw_compile *p, + struct brw_instruction *patch_insn) +{ + GLuint br = 1; + + if (BRW_IS_IGDNG(p->brw)) + br = 2; + + if (p->single_program_flow) { + /* In single program flow mode, there's no need to execute an ENDIF, + * since we don't need to do any stack operations, and if we're executing + * currently, we want to just continue executing. + */ + struct brw_instruction *next = &p->store[p->nr_insn]; + + assert(patch_insn->header.opcode == BRW_OPCODE_ADD); + + patch_insn->bits3.ud = (next - patch_insn) * 16; + } else { + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF); + + brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_set_src1(insn, brw_imm_d(0x0)); + + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = patch_insn->header.execution_size; + insn->header.mask_control = BRW_MASK_ENABLE; + insn->header.thread_control = BRW_THREAD_SWITCH; + + assert(patch_insn->bits3.if_else.jump_count == 0); + + /* Patch the if or else instructions to point at this or the next + * instruction respectively. + */ + if (patch_insn->header.opcode == BRW_OPCODE_IF) { + /* Automagically turn it into an IFF: + */ + patch_insn->header.opcode = BRW_OPCODE_IFF; + patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1); + patch_insn->bits3.if_else.pop_count = 0; + patch_insn->bits3.if_else.pad0 = 0; + } else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) { + patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1); + patch_insn->bits3.if_else.pop_count = 1; + patch_insn->bits3.if_else.pad0 = 0; + } else { + assert(0); + } + + /* Also pop item off the stack in the endif instruction: + */ + insn->bits3.if_else.jump_count = 0; + insn->bits3.if_else.pop_count = 1; + insn->bits3.if_else.pad0 = 0; + } +} + +struct brw_instruction *brw_BREAK(struct brw_compile *p) +{ + struct brw_instruction *insn; + insn = next_insn(p, BRW_OPCODE_BREAK); + brw_set_dest(insn, brw_ip_reg()); + brw_set_src0(insn, brw_ip_reg()); + brw_set_src1(insn, brw_imm_d(0x0)); + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = BRW_EXECUTE_8; + /* insn->header.mask_control = BRW_MASK_DISABLE; */ + insn->bits3.if_else.pad0 = 0; + return insn; +} + +struct brw_instruction *brw_CONT(struct brw_compile *p) +{ + struct brw_instruction *insn; + insn = next_insn(p, BRW_OPCODE_CONTINUE); + brw_set_dest(insn, brw_ip_reg()); + brw_set_src0(insn, brw_ip_reg()); + brw_set_src1(insn, brw_imm_d(0x0)); + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = BRW_EXECUTE_8; + /* insn->header.mask_control = BRW_MASK_DISABLE; */ + insn->bits3.if_else.pad0 = 0; + return insn; +} + +/* DO/WHILE loop: + */ +struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size) +{ + if (p->single_program_flow) { + return &p->store[p->nr_insn]; + } else { + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO); + + /* Override the defaults for this instruction: + */ + brw_set_dest(insn, brw_null_reg()); + brw_set_src0(insn, brw_null_reg()); + brw_set_src1(insn, brw_null_reg()); + + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = execute_size; + insn->header.predicate_control = BRW_PREDICATE_NONE; + /* insn->header.mask_control = BRW_MASK_ENABLE; */ + /* insn->header.mask_control = BRW_MASK_DISABLE; */ + + return insn; + } +} + + + +struct brw_instruction *brw_WHILE(struct brw_compile *p, + struct brw_instruction *do_insn) +{ + struct brw_instruction *insn; + GLuint br = 1; + + if (BRW_IS_IGDNG(p->brw)) + br = 2; + + if (p->single_program_flow) + insn = next_insn(p, BRW_OPCODE_ADD); + else + insn = next_insn(p, BRW_OPCODE_WHILE); + + brw_set_dest(insn, brw_ip_reg()); + brw_set_src0(insn, brw_ip_reg()); + brw_set_src1(insn, brw_imm_d(0x0)); + + insn->header.compression_control = BRW_COMPRESSION_NONE; + + if (p->single_program_flow) { + insn->header.execution_size = BRW_EXECUTE_1; + + insn->bits3.d = (do_insn - insn) * 16; + } else { + insn->header.execution_size = do_insn->header.execution_size; + + assert(do_insn->header.opcode == BRW_OPCODE_DO); + insn->bits3.if_else.jump_count = br * (do_insn - insn + 1); + insn->bits3.if_else.pop_count = 0; + insn->bits3.if_else.pad0 = 0; + } + +/* insn->header.mask_control = BRW_MASK_ENABLE; */ + + /* insn->header.mask_control = BRW_MASK_DISABLE; */ + p->current->header.predicate_control = BRW_PREDICATE_NONE; + return insn; +} + + +/* FORWARD JUMPS: + */ +void brw_land_fwd_jump(struct brw_compile *p, + struct brw_instruction *jmp_insn) +{ + struct brw_instruction *landing = &p->store[p->nr_insn]; + GLuint jmpi = 1; + + if (BRW_IS_IGDNG(p->brw)) + jmpi = 2; + + assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI); + assert(jmp_insn->bits1.da1.src1_reg_file = BRW_IMMEDIATE_VALUE); + + jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1); +} + + + +/* To integrate with the above, it makes sense that the comparison + * instruction should populate the flag register. It might be simpler + * just to use the flag reg for most WM tasks? + */ +void brw_CMP(struct brw_compile *p, + struct brw_reg dest, + GLuint conditional, + struct brw_reg src0, + struct brw_reg src1) +{ + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP); + + insn->header.destreg__conditionalmod = conditional; + brw_set_dest(insn, dest); + brw_set_src0(insn, src0); + brw_set_src1(insn, src1); + +/* guess_execution_size(insn, src0); */ + + + /* Make it so that future instructions will use the computed flag + * value until brw_set_predicate_control_flag_value() is called + * again. + */ + if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE && + dest.nr == 0) { + p->current->header.predicate_control = BRW_PREDICATE_NORMAL; + p->flag_value = 0xff; + } +} + + + +/*********************************************************************** + * Helpers for the various SEND message types: + */ + +/** Extended math function, float[8]. + */ +void brw_math( struct brw_compile *p, + struct brw_reg dest, + GLuint function, + GLuint saturate, + GLuint msg_reg_nr, + struct brw_reg src, + GLuint data_type, + GLuint precision ) +{ + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; + GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; + + /* Example code doesn't set predicate_control for send + * instructions. + */ + insn->header.predicate_control = 0; + insn->header.destreg__conditionalmod = msg_reg_nr; + + brw_set_dest(insn, dest); + brw_set_src0(insn, src); + brw_set_math_message(p->brw, + insn, + msg_length, response_length, + function, + BRW_MATH_INTEGER_UNSIGNED, + precision, + saturate, + data_type); +} + +/** + * Extended math function, float[16]. + * Use 2 send instructions. + */ +void brw_math_16( struct brw_compile *p, + struct brw_reg dest, + GLuint function, + GLuint saturate, + GLuint msg_reg_nr, + struct brw_reg src, + GLuint precision ) +{ + struct brw_instruction *insn; + GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; + GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; + + /* First instruction: + */ + brw_push_insn_state(p); + brw_set_predicate_control_flag_value(p, 0xff); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + + insn = next_insn(p, BRW_OPCODE_SEND); + insn->header.destreg__conditionalmod = msg_reg_nr; + + brw_set_dest(insn, dest); + brw_set_src0(insn, src); + brw_set_math_message(p->brw, + insn, + msg_length, response_length, + function, + BRW_MATH_INTEGER_UNSIGNED, + precision, + saturate, + BRW_MATH_DATA_VECTOR); + + /* Second instruction: + */ + insn = next_insn(p, BRW_OPCODE_SEND); + insn->header.compression_control = BRW_COMPRESSION_2NDHALF; + insn->header.destreg__conditionalmod = msg_reg_nr+1; + + brw_set_dest(insn, offset(dest,1)); + brw_set_src0(insn, src); + brw_set_math_message(p->brw, + insn, + msg_length, response_length, + function, + BRW_MATH_INTEGER_UNSIGNED, + precision, + saturate, + BRW_MATH_DATA_VECTOR); + + brw_pop_insn_state(p); +} + + +/** + * Write block of 16 dwords/floats to the data port Render Cache scratch buffer. + * Scratch offset should be a multiple of 64. + * Used for register spilling. + */ +void brw_dp_WRITE_16( struct brw_compile *p, + struct brw_reg src, + GLuint scratch_offset ) +{ + GLuint msg_reg_nr = 1; + { + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + + /* set message header global offset field (reg 0, element 2) */ + brw_MOV(p, + retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D), + brw_imm_d(scratch_offset)); + + brw_pop_insn_state(p); + } + + { + GLuint msg_length = 3; + struct brw_reg dest = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW); + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + + insn->header.predicate_control = 0; /* XXX */ + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.destreg__conditionalmod = msg_reg_nr; + + brw_set_dest(insn, dest); + brw_set_src0(insn, src); + + brw_set_dp_write_message(p->brw, + insn, + 255, /* binding table index (255=stateless) */ + BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */ + BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */ + msg_length, + 0, /* pixel scoreboard */ + 0, /* response_length */ + 0); /* eot */ + } +} + + +/** + * Read block of 16 dwords/floats from the data port Render Cache scratch buffer. + * Scratch offset should be a multiple of 64. + * Used for register spilling. + */ +void brw_dp_READ_16( struct brw_compile *p, + struct brw_reg dest, + GLuint scratch_offset ) +{ + GLuint msg_reg_nr = 1; + { + brw_push_insn_state(p); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_mask_control(p, BRW_MASK_DISABLE); + + /* set message header global offset field (reg 0, element 2) */ + brw_MOV(p, + retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D), + brw_imm_d(scratch_offset)); + + brw_pop_insn_state(p); + } + + { + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + + insn->header.predicate_control = 0; /* XXX */ + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.destreg__conditionalmod = msg_reg_nr; + + brw_set_dest(insn, dest); /* UW? */ + brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW)); + + brw_set_dp_read_message(p->brw, + insn, + 255, /* binding table index (255=stateless) */ + 3, /* msg_control (3 means 4 Owords) */ + BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ + 1, /* target cache (render/scratch) */ + 1, /* msg_length */ + 2, /* response_length */ + 0); /* eot */ + } +} + + +/** + * Read a float[4] vector from the data port Data Cache (const buffer). + * Location (in buffer) should be a multiple of 16. + * Used for fetching shader constants. + * If relAddr is true, we'll do an indirect fetch using the address register. + */ +void brw_dp_READ_4( struct brw_compile *p, + struct brw_reg dest, + GLboolean relAddr, + GLuint location, + GLuint bind_table_index ) +{ + /* XXX: relAddr not implemented */ + GLuint msg_reg_nr = 1; + { + struct brw_reg b; + brw_push_insn_state(p); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_mask_control(p, BRW_MASK_DISABLE); + + /* Setup MRF[1] with location/offset into const buffer */ + b = brw_message_reg(msg_reg_nr); + b = retype(b, BRW_REGISTER_TYPE_UD); + /* XXX I think we're setting all the dwords of MRF[1] to 'location'. + * when the docs say only dword[2] should be set. Hmmm. But it works. + */ + brw_MOV(p, b, brw_imm_ud(location)); + brw_pop_insn_state(p); + } + + { + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + + insn->header.predicate_control = BRW_PREDICATE_NONE; + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.destreg__conditionalmod = msg_reg_nr; + insn->header.mask_control = BRW_MASK_DISABLE; + + /* cast dest to a uword[8] vector */ + dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW); + + brw_set_dest(insn, dest); + brw_set_src0(insn, brw_null_reg()); + + brw_set_dp_read_message(p->brw, + insn, + bind_table_index, + 0, /* msg_control (0 means 1 Oword) */ + BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ + 0, /* source cache = data cache */ + 1, /* msg_length */ + 1, /* response_length (1 Oword) */ + 0); /* eot */ + } +} + + +/** + * Read float[4] constant(s) from VS constant buffer. + * For relative addressing, two float[4] constants will be read into 'dest'. + * Otherwise, one float[4] constant will be read into the lower half of 'dest'. + */ +void brw_dp_READ_4_vs(struct brw_compile *p, + struct brw_reg dest, + GLuint oword, + GLboolean relAddr, + struct brw_reg addrReg, + GLuint location, + GLuint bind_table_index) +{ + GLuint msg_reg_nr = 1; + + assert(oword < 2); + /* + printf("vs const read msg, location %u, msg_reg_nr %d\n", + location, msg_reg_nr); + */ + + /* Setup MRF[1] with location/offset into const buffer */ + { + struct brw_reg b; + + brw_push_insn_state(p); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + /*brw_set_access_mode(p, BRW_ALIGN_16);*/ + + /* XXX I think we're setting all the dwords of MRF[1] to 'location'. + * when the docs say only dword[2] should be set. Hmmm. But it works. + */ + b = brw_message_reg(msg_reg_nr); + b = retype(b, BRW_REGISTER_TYPE_UD); + /*b = get_element_ud(b, 2);*/ + if (relAddr) { + brw_ADD(p, b, addrReg, brw_imm_ud(location)); + } + else { + brw_MOV(p, b, brw_imm_ud(location)); + } + + brw_pop_insn_state(p); + } + + { + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + + insn->header.predicate_control = BRW_PREDICATE_NONE; + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.destreg__conditionalmod = msg_reg_nr; + insn->header.mask_control = BRW_MASK_DISABLE; + /*insn->header.access_mode = BRW_ALIGN_16;*/ + + brw_set_dest(insn, dest); + brw_set_src0(insn, brw_null_reg()); + + brw_set_dp_read_message(p->brw, + insn, + bind_table_index, + oword, /* 0 = lower Oword, 1 = upper Oword */ + BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ + 0, /* source cache = data cache */ + 1, /* msg_length */ + 1, /* response_length (1 Oword) */ + 0); /* eot */ + } +} + + + +void brw_fb_WRITE(struct brw_compile *p, + struct brw_reg dest, + GLuint msg_reg_nr, + struct brw_reg src0, + GLuint binding_table_index, + GLuint msg_length, + GLuint response_length, + GLboolean eot) +{ + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + + insn->header.predicate_control = 0; /* XXX */ + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.destreg__conditionalmod = msg_reg_nr; + + brw_set_dest(insn, dest); + brw_set_src0(insn, src0); + brw_set_dp_write_message(p->brw, + insn, + binding_table_index, + BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE, /* msg_control */ + BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE, /* msg_type */ + msg_length, + 1, /* pixel scoreboard */ + response_length, + eot); +} + + +/** + * Texture sample instruction. + * Note: the msg_type plus msg_length values determine exactly what kind + * of sampling operation is performed. See volume 4, page 161 of docs. + */ +void brw_SAMPLE(struct brw_compile *p, + struct brw_reg dest, + GLuint msg_reg_nr, + struct brw_reg src0, + GLuint binding_table_index, + GLuint sampler, + GLuint writemask, + GLuint msg_type, + GLuint response_length, + GLuint msg_length, + GLboolean eot, + GLuint header_present, + GLuint simd_mode) +{ + GLboolean need_stall = 0; + + if (writemask == 0) { + /*debug_printf("%s: zero writemask??\n", __FUNCTION__); */ + return; + } + + /* Hardware doesn't do destination dependency checking on send + * instructions properly. Add a workaround which generates the + * dependency by other means. In practice it seems like this bug + * only crops up for texture samples, and only where registers are + * written by the send and then written again later without being + * read in between. Luckily for us, we already track that + * information and use it to modify the writemask for the + * instruction, so that is a guide for whether a workaround is + * needed. + */ + if (writemask != BRW_WRITEMASK_XYZW) { + GLuint dst_offset = 0; + GLuint i, newmask = 0, len = 0; + + for (i = 0; i < 4; i++) { + if (writemask & (1<<i)) + break; + dst_offset += 2; + } + for (; i < 4; i++) { + if (!(writemask & (1<<i))) + break; + newmask |= 1<<i; + len++; + } + + if (newmask != writemask) { + need_stall = 1; + /* debug_printf("need stall %x %x\n", newmask , writemask); */ + } + else { + struct brw_reg m1 = brw_message_reg(msg_reg_nr); + + newmask = ~newmask & BRW_WRITEMASK_XYZW; + + brw_push_insn_state(p); + + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_mask_control(p, BRW_MASK_DISABLE); + + brw_MOV(p, m1, brw_vec8_grf(0,0)); + brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12)); + + brw_pop_insn_state(p); + + src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW); + dest = offset(dest, dst_offset); + response_length = len * 2; + } + } + + { + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + + insn->header.predicate_control = 0; /* XXX */ + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.destreg__conditionalmod = msg_reg_nr; + + brw_set_dest(insn, dest); + brw_set_src0(insn, src0); + brw_set_sampler_message(p->brw, insn, + binding_table_index, + sampler, + msg_type, + response_length, + msg_length, + eot, + header_present, + simd_mode); + } + + if (need_stall) { + struct brw_reg reg = vec8(offset(dest, response_length-1)); + + /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 } + */ + brw_push_insn_state(p); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_MOV(p, reg, reg); + brw_pop_insn_state(p); + } + +} + +/* All these variables are pretty confusing - we might be better off + * using bitmasks and macros for this, in the old style. Or perhaps + * just having the caller instantiate the fields in dword3 itself. + */ +void brw_urb_WRITE(struct brw_compile *p, + struct brw_reg dest, + GLuint msg_reg_nr, + struct brw_reg src0, + GLboolean allocate, + GLboolean used, + GLuint msg_length, + GLuint response_length, + GLboolean eot, + GLboolean writes_complete, + GLuint offset, + GLuint swizzle) +{ + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + + assert(msg_length < BRW_MAX_MRF); + + brw_set_dest(insn, dest); + brw_set_src0(insn, src0); + brw_set_src1(insn, brw_imm_d(0)); + + insn->header.destreg__conditionalmod = msg_reg_nr; + + brw_set_urb_message(p->brw, + insn, + allocate, + used, + msg_length, + response_length, + eot, + writes_complete, + offset, + swizzle); +} + +void brw_ff_sync(struct brw_compile *p, + struct brw_reg dest, + GLuint msg_reg_nr, + struct brw_reg src0, + GLboolean allocate, + GLboolean used, + GLuint msg_length, + GLuint response_length, + GLboolean eot, + GLboolean writes_complete, + GLuint offset, + GLuint swizzle) +{ + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + + assert(msg_length < 16); + + brw_set_dest(insn, dest); + brw_set_src0(insn, src0); + brw_set_src1(insn, brw_imm_d(0)); + + insn->header.destreg__conditionalmod = msg_reg_nr; + + brw_set_ff_sync_message(p->brw, + insn, + allocate, + used, + msg_length, + response_length, + eot, + writes_complete, + offset, + swizzle); +} diff --git a/src/gallium/drivers/i965/brw_eu_util.c b/src/gallium/drivers/i965/brw_eu_util.c new file mode 100644 index 00000000000..5405cf17a4e --- /dev/null +++ b/src/gallium/drivers/i965/brw_eu_util.c @@ -0,0 +1,126 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_eu.h" + + +void brw_math_invert( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src) +{ + brw_math( p, + dst, + BRW_MATH_FUNCTION_INV, + BRW_MATH_SATURATE_NONE, + 0, + src, + BRW_MATH_PRECISION_FULL, + BRW_MATH_DATA_VECTOR ); +} + + + +void brw_copy4(struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src, + GLuint count) +{ + GLuint i; + + dst = vec4(dst); + src = vec4(src); + + for (i = 0; i < count; i++) + { + GLuint delta = i*32; + brw_MOV(p, byte_offset(dst, delta), byte_offset(src, delta)); + brw_MOV(p, byte_offset(dst, delta+16), byte_offset(src, delta+16)); + } +} + + +void brw_copy8(struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src, + GLuint count) +{ + GLuint i; + + dst = vec8(dst); + src = vec8(src); + + for (i = 0; i < count; i++) + { + GLuint delta = i*32; + brw_MOV(p, byte_offset(dst, delta), byte_offset(src, delta)); + } +} + + +void brw_copy_indirect_to_indirect(struct brw_compile *p, + struct brw_indirect dst_ptr, + struct brw_indirect src_ptr, + GLuint count) +{ + GLuint i; + + for (i = 0; i < count; i++) + { + GLuint delta = i*32; + brw_MOV(p, deref_4f(dst_ptr, delta), deref_4f(src_ptr, delta)); + brw_MOV(p, deref_4f(dst_ptr, delta+16), deref_4f(src_ptr, delta+16)); + } +} + + +void brw_copy_from_indirect(struct brw_compile *p, + struct brw_reg dst, + struct brw_indirect ptr, + GLuint count) +{ + GLuint i; + + dst = vec4(dst); + + for (i = 0; i < count; i++) + { + GLuint delta = i*32; + brw_MOV(p, byte_offset(dst, delta), deref_4f(ptr, delta)); + brw_MOV(p, byte_offset(dst, delta+16), deref_4f(ptr, delta+16)); + } +} + + + + diff --git a/src/gallium/drivers/i965/brw_gs.c b/src/gallium/drivers/i965/brw_gs.c new file mode 100644 index 00000000000..921b201bae2 --- /dev/null +++ b/src/gallium/drivers/i965/brw_gs.c @@ -0,0 +1,216 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "brw_batchbuffer.h" + +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_state.h" +#include "brw_gs.h" + + + +static enum pipe_error compile_gs_prog( struct brw_context *brw, + struct brw_gs_prog_key *key, + struct brw_winsys_buffer **bo_out ) +{ + struct brw_gs_compile c; + enum pipe_error ret; + const GLuint *program; + GLuint program_size; + + memset(&c, 0, sizeof(c)); + + c.key = *key; + c.need_ff_sync = BRW_IS_IGDNG(brw); + /* Need to locate the two positions present in vertex + header. + * These are currently hardcoded: + */ + c.nr_attrs = c.key.nr_attrs; + + if (BRW_IS_IGDNG(brw)) + c.nr_regs = (c.nr_attrs + 1) / 2 + 3; /* are vertices packed, or reg-aligned? */ + else + c.nr_regs = (c.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */ + + c.nr_bytes = c.nr_regs * REG_SIZE; + + + /* Begin the compilation: + */ + brw_init_compile(brw, &c.func); + + c.func.single_program_flow = 1; + + /* For some reason the thread is spawned with only 4 channels + * unmasked. + */ + brw_set_mask_control(&c.func, BRW_MASK_DISABLE); + + + /* Note that primitives which don't require a GS program have + * already been weeded out by this stage: + */ + switch (key->primitive) { + case PIPE_PRIM_QUADS: + brw_gs_quads( &c ); + break; + case PIPE_PRIM_QUAD_STRIP: + brw_gs_quad_strip( &c ); + break; + case PIPE_PRIM_LINE_LOOP: + brw_gs_lines( &c ); + break; + case PIPE_PRIM_LINES: + if (key->hint_gs_always) + brw_gs_lines( &c ); + else { + return PIPE_OK; + } + break; + case PIPE_PRIM_TRIANGLES: + if (key->hint_gs_always) + brw_gs_tris( &c ); + else { + return PIPE_OK; + } + break; + case PIPE_PRIM_POINTS: + if (key->hint_gs_always) + brw_gs_points( &c ); + else { + return PIPE_OK; + } + break; + default: + assert(0); + return PIPE_ERROR_BAD_INPUT; + } + + /* get the program + */ + ret = brw_get_program(&c.func, &program, &program_size); + if (ret) + return ret; + + /* Upload + */ + ret = brw_upload_cache( &brw->cache, BRW_GS_PROG, + &c.key, sizeof(c.key), + NULL, 0, + program, program_size, + &c.prog_data, + &brw->gs.prog_data, + bo_out ); + if (ret) + return ret; + + return PIPE_OK; +} + +static const unsigned gs_prim[PIPE_PRIM_MAX] = { + PIPE_PRIM_POINTS, + PIPE_PRIM_LINES, + PIPE_PRIM_LINE_LOOP, + PIPE_PRIM_LINES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_QUADS, + PIPE_PRIM_QUAD_STRIP, + PIPE_PRIM_TRIANGLES +}; + +static void populate_key( struct brw_context *brw, + struct brw_gs_prog_key *key ) +{ + const struct brw_fs_signature *sig = &brw->curr.fragment_shader->signature; + + memset(key, 0, sizeof(*key)); + + /* PIPE_NEW_FRAGMENT_SIGNATURE */ + key->nr_attrs = sig->nr_inputs + 1; + + /* BRW_NEW_PRIMITIVE */ + key->primitive = gs_prim[brw->primitive]; + + key->hint_gs_always = 0; /* debug code? */ + + key->need_gs_prog = (key->hint_gs_always || + brw->primitive == PIPE_PRIM_QUADS || + brw->primitive == PIPE_PRIM_QUAD_STRIP || + brw->primitive == PIPE_PRIM_LINE_LOOP); +} + +/* Calculate interpolants for triangle and line rasterization. + */ +static int prepare_gs_prog(struct brw_context *brw) +{ + struct brw_gs_prog_key key; + enum pipe_error ret; + + /* Populate the key: + */ + populate_key(brw, &key); + + if (brw->gs.prog_active != key.need_gs_prog) { + brw->state.dirty.cache |= CACHE_NEW_GS_PROG; + brw->gs.prog_active = key.need_gs_prog; + } + + if (!brw->gs.prog_active) + return PIPE_OK; + + if (brw_search_cache(&brw->cache, BRW_GS_PROG, + &key, sizeof(key), + NULL, 0, + &brw->gs.prog_data, + &brw->gs.prog_bo)) + return PIPE_OK; + + ret = compile_gs_prog( brw, &key, &brw->gs.prog_bo ); + if (ret) + return ret; + + return PIPE_OK; +} + + +const struct brw_tracked_state brw_gs_prog = { + .dirty = { + .mesa = PIPE_NEW_FRAGMENT_SIGNATURE, + .brw = BRW_NEW_PRIMITIVE, + .cache = 0, + }, + .prepare = prepare_gs_prog +}; diff --git a/src/gallium/drivers/i965/brw_gs.h b/src/gallium/drivers/i965/brw_gs.h new file mode 100644 index 00000000000..6e616dcb875 --- /dev/null +++ b/src/gallium/drivers/i965/brw_gs.h @@ -0,0 +1,76 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#ifndef BRW_GS_H +#define BRW_GS_H + + +#include "brw_context.h" +#include "brw_eu.h" + +#define MAX_GS_VERTS (4) + +struct brw_gs_prog_key { + GLuint nr_attrs:8; + GLuint primitive:4; + GLuint hint_gs_always:1; + GLuint need_gs_prog:1; + GLuint pad:18; +}; + +struct brw_gs_compile { + struct brw_compile func; + struct brw_gs_prog_key key; + struct brw_gs_prog_data prog_data; + + struct { + struct brw_reg R0; + struct brw_reg vertex[MAX_GS_VERTS]; + } reg; + + /* 3 different ways of expressing vertex size: + */ + GLuint nr_attrs; + GLuint nr_regs; + GLuint nr_bytes; + GLboolean need_ff_sync; +}; + +#define ATTR_SIZE (4*4) + +void brw_gs_quads( struct brw_gs_compile *c ); +void brw_gs_quad_strip( struct brw_gs_compile *c ); +void brw_gs_tris( struct brw_gs_compile *c ); +void brw_gs_lines( struct brw_gs_compile *c ); +void brw_gs_points( struct brw_gs_compile *c ); + +#endif diff --git a/src/gallium/drivers/i965/brw_gs_emit.c b/src/gallium/drivers/i965/brw_gs_emit.c new file mode 100644 index 00000000000..fd8e2accedd --- /dev/null +++ b/src/gallium/drivers/i965/brw_gs_emit.c @@ -0,0 +1,181 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "brw_batchbuffer.h" + +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_gs.h" + +static void brw_gs_alloc_regs( struct brw_gs_compile *c, + GLuint nr_verts ) +{ + GLuint i = 0,j; + + /* Register usage is static, precompute here: + */ + c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++; + + /* Payload vertices plus space for more generated vertices: + */ + for (j = 0; j < nr_verts; j++) { + c->reg.vertex[j] = brw_vec4_grf(i, 0); + i += c->nr_regs; + } + + c->prog_data.urb_read_length = c->nr_regs; + c->prog_data.total_grf = i; +} + + +static void brw_gs_emit_vue(struct brw_gs_compile *c, + struct brw_reg vert, + GLboolean last, + GLuint header) +{ + struct brw_compile *p = &c->func; + GLboolean allocate = !last; + + /* Overwrite PrimType and PrimStart in the message header, for + * each vertex in turn: + */ + brw_MOV(p, get_element_ud(c->reg.R0, 2), brw_imm_ud(header)); + + /* Copy the vertex from vertn into m1..mN+1: + */ + brw_copy8(p, brw_message_reg(1), vert, c->nr_regs); + + /* Send each vertex as a seperate write to the urb. This is + * different to the concept in brw_sf_emit.c, where subsequent + * writes are used to build up a single urb entry. Each of these + * writes instantiates a seperate urb entry, and a new one must be + * allocated each time. + */ + brw_urb_WRITE(p, + allocate ? c->reg.R0 : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), + 0, + c->reg.R0, + allocate, + 1, /* used */ + c->nr_regs + 1, /* msg length */ + allocate ? 1 : 0, /* response length */ + allocate ? 0 : 1, /* eot */ + 1, /* writes_complete */ + 0, /* urb offset */ + BRW_URB_SWIZZLE_NONE); +} + +static void brw_gs_ff_sync(struct brw_gs_compile *c, int num_prim) +{ + struct brw_compile *p = &c->func; + brw_MOV(p, get_element_ud(c->reg.R0, 1), brw_imm_ud(num_prim)); + brw_ff_sync(p, + c->reg.R0, + 0, + c->reg.R0, + 1, + 1, /* used */ + 1, /* msg length */ + 1, /* response length */ + 0, /* eot */ + 1, /* write compelete */ + 0, /* urb offset */ + BRW_URB_SWIZZLE_NONE); +} + + +void brw_gs_quads( struct brw_gs_compile *c ) +{ + brw_gs_alloc_regs(c, 4); + + /* Use polygons for correct edgeflag behaviour. Note that vertex 3 + * is the PV for quads, but vertex 0 for polygons: + */ + if (c->need_ff_sync) + brw_gs_ff_sync(c, 1); + brw_gs_emit_vue(c, c->reg.vertex[3], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START)); + brw_gs_emit_vue(c, c->reg.vertex[0], 0, (_3DPRIM_POLYGON << 2)); + brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_POLYGON << 2)); + brw_gs_emit_vue(c, c->reg.vertex[2], 1, ((_3DPRIM_POLYGON << 2) | R02_PRIM_END)); +} + +void brw_gs_quad_strip( struct brw_gs_compile *c ) +{ + brw_gs_alloc_regs(c, 4); + + if (c->need_ff_sync) + brw_gs_ff_sync(c, 1); + brw_gs_emit_vue(c, c->reg.vertex[2], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START)); + brw_gs_emit_vue(c, c->reg.vertex[3], 0, (_3DPRIM_POLYGON << 2)); + brw_gs_emit_vue(c, c->reg.vertex[0], 0, (_3DPRIM_POLYGON << 2)); + brw_gs_emit_vue(c, c->reg.vertex[1], 1, ((_3DPRIM_POLYGON << 2) | R02_PRIM_END)); +} + +void brw_gs_tris( struct brw_gs_compile *c ) +{ + brw_gs_alloc_regs(c, 3); + + if (c->need_ff_sync) + brw_gs_ff_sync(c, 1); + brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_TRILIST << 2) | R02_PRIM_START)); + brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_TRILIST << 2)); + brw_gs_emit_vue(c, c->reg.vertex[2], 1, ((_3DPRIM_TRILIST << 2) | R02_PRIM_END)); +} + +void brw_gs_lines( struct brw_gs_compile *c ) +{ + brw_gs_alloc_regs(c, 2); + + if (c->need_ff_sync) + brw_gs_ff_sync(c, 1); + brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_LINESTRIP << 2) | R02_PRIM_START)); + brw_gs_emit_vue(c, c->reg.vertex[1], 1, ((_3DPRIM_LINESTRIP << 2) | R02_PRIM_END)); +} + +void brw_gs_points( struct brw_gs_compile *c ) +{ + brw_gs_alloc_regs(c, 1); + + if (c->need_ff_sync) + brw_gs_ff_sync(c, 1); + brw_gs_emit_vue(c, c->reg.vertex[0], 1, ((_3DPRIM_POINTLIST << 2) | R02_PRIM_START | R02_PRIM_END)); +} + + + + + + + + diff --git a/src/gallium/drivers/i965/brw_gs_state.c b/src/gallium/drivers/i965/brw_gs_state.c new file mode 100644 index 00000000000..b64ec286cea --- /dev/null +++ b/src/gallium/drivers/i965/brw_gs_state.c @@ -0,0 +1,169 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "util/u_math.h" + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "brw_debug.h" + +struct brw_gs_unit_key { + unsigned int total_grf; + unsigned int urb_entry_read_length; + + unsigned int curbe_offset; + + unsigned int nr_urb_entries, urb_size; + GLboolean prog_active; +}; + +static void +gs_unit_populate_key(struct brw_context *brw, struct brw_gs_unit_key *key) +{ + memset(key, 0, sizeof(*key)); + + /* CACHE_NEW_GS_PROG */ + key->prog_active = brw->gs.prog_active; + if (key->prog_active) { + key->total_grf = brw->gs.prog_data->total_grf; + key->urb_entry_read_length = brw->gs.prog_data->urb_read_length; + } else { + key->total_grf = 1; + key->urb_entry_read_length = 1; + } + + /* BRW_NEW_CURBE_OFFSETS */ + key->curbe_offset = brw->curbe.clip_start; + + /* BRW_NEW_URB_FENCE */ + key->nr_urb_entries = brw->urb.nr_gs_entries; + key->urb_size = brw->urb.vsize; +} + +static enum pipe_error +gs_unit_create_from_key(struct brw_context *brw, + struct brw_gs_unit_key *key, + struct brw_winsys_reloc *reloc, + unsigned nr_reloc, + struct brw_winsys_buffer **bo_out) +{ + struct brw_gs_unit_state gs; + enum pipe_error ret; + + + memset(&gs, 0, sizeof(gs)); + + /* reloc */ + gs.thread0.grf_reg_count = align(key->total_grf, 16) / 16 - 1; + gs.thread0.kernel_start_pointer = 0; + + gs.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; + gs.thread1.single_program_flow = 1; + + gs.thread3.dispatch_grf_start_reg = 1; + gs.thread3.const_urb_entry_read_offset = 0; + gs.thread3.const_urb_entry_read_length = 0; + gs.thread3.urb_entry_read_offset = 0; + gs.thread3.urb_entry_read_length = key->urb_entry_read_length; + + gs.thread4.nr_urb_entries = key->nr_urb_entries; + gs.thread4.urb_entry_allocation_size = key->urb_size - 1; + + if (key->nr_urb_entries >= 8) + gs.thread4.max_threads = 1; + else + gs.thread4.max_threads = 0; + + if (BRW_IS_IGDNG(brw)) + gs.thread4.rendering_enable = 1; + + if (BRW_DEBUG & DEBUG_STATS) + gs.thread4.stats_enable = 1; + + ret = brw_upload_cache(&brw->cache, BRW_GS_UNIT, + key, sizeof(*key), + reloc, nr_reloc, + &gs, sizeof(gs), + NULL, NULL, + bo_out); + if (ret) + return ret; + + return PIPE_OK; +} + +static enum pipe_error prepare_gs_unit(struct brw_context *brw) +{ + struct brw_gs_unit_key key; + enum pipe_error ret; + struct brw_winsys_reloc reloc[1]; + unsigned nr_reloc = 0; + unsigned grf_reg_count; + + gs_unit_populate_key(brw, &key); + + grf_reg_count = (align(key.total_grf, 16) / 16 - 1); + + /* GS program relocation */ + if (key.prog_active) { + make_reloc(&reloc[nr_reloc++], + BRW_USAGE_STATE, + grf_reg_count << 1, + offsetof(struct brw_gs_unit_state, thread0), + brw->gs.prog_bo); + } + + if (brw_search_cache(&brw->cache, BRW_GS_UNIT, + &key, sizeof(key), + reloc, nr_reloc, + NULL, + &brw->gs.state_bo)) + return PIPE_OK; + + ret = gs_unit_create_from_key(brw, &key, + reloc, nr_reloc, + &brw->gs.state_bo); + if (ret) + return ret; + + return PIPE_OK; +} + +const struct brw_tracked_state brw_gs_unit = { + .dirty = { + .mesa = 0, + .brw = (BRW_NEW_CURBE_OFFSETS | + BRW_NEW_URB_FENCE), + .cache = CACHE_NEW_GS_PROG + }, + .prepare = prepare_gs_unit, +}; diff --git a/src/gallium/drivers/i965/brw_misc_state.c b/src/gallium/drivers/i965/brw_misc_state.c new file mode 100644 index 00000000000..e4b24229db3 --- /dev/null +++ b/src/gallium/drivers/i965/brw_misc_state.c @@ -0,0 +1,513 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + + +#include "brw_debug.h" +#include "brw_batchbuffer.h" +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "brw_screen.h" +#include "brw_pipe_rast.h" + + + + + +/*********************************************************************** + * Blend color + */ + +static int upload_blend_constant_color(struct brw_context *brw) +{ + BRW_CACHED_BATCH_STRUCT(brw, &brw->curr.bcc); + return 0; +} + + +const struct brw_tracked_state brw_blend_constant_color = { + .dirty = { + .mesa = PIPE_NEW_BLEND_COLOR, + .brw = 0, + .cache = 0 + }, + .emit = upload_blend_constant_color +}; + +/*********************************************************************** + * Drawing rectangle - framebuffer dimensions + */ +static int upload_drawing_rect(struct brw_context *brw) +{ + BEGIN_BATCH(4, NO_LOOP_CLIPRECTS); + OUT_BATCH(_3DSTATE_DRAWRECT_INFO_I965); + OUT_BATCH(0); + OUT_BATCH(((brw->curr.fb.width - 1) & 0xffff) | + ((brw->curr.fb.height - 1) << 16)); + OUT_BATCH(0); + ADVANCE_BATCH(); + return 0; +} + +const struct brw_tracked_state brw_drawing_rect = { + .dirty = { + .mesa = PIPE_NEW_FRAMEBUFFER_DIMENSIONS, + .brw = 0, + .cache = 0 + }, + .emit = upload_drawing_rect +}; + + +/*********************************************************************** + * Binding table pointers + */ + +static int prepare_binding_table_pointers(struct brw_context *brw) +{ + brw_add_validated_bo(brw, brw->vs.bind_bo); + brw_add_validated_bo(brw, brw->wm.bind_bo); + return 0; +} + +/** + * Upload the binding table pointers, which point each stage's array of surface + * state pointers. + * + * The binding table pointers are relative to the surface state base address, + * which is 0. + */ +static int upload_binding_table_pointers(struct brw_context *brw) +{ + BEGIN_BATCH(6, IGNORE_CLIPRECTS); + OUT_BATCH(CMD_BINDING_TABLE_PTRS << 16 | (6 - 2)); + if (brw->vs.bind_bo != NULL) + OUT_RELOC(brw->vs.bind_bo, + BRW_USAGE_SAMPLER, + 0); /* vs */ + else + OUT_BATCH(0); + OUT_BATCH(0); /* gs */ + OUT_BATCH(0); /* clip */ + OUT_BATCH(0); /* sf */ + OUT_RELOC(brw->wm.bind_bo, + BRW_USAGE_SAMPLER, + 0); /* wm/ps */ + ADVANCE_BATCH(); + return 0; +} + +const struct brw_tracked_state brw_binding_table_pointers = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_BATCH, + .cache = CACHE_NEW_SURF_BIND, + }, + .prepare = prepare_binding_table_pointers, + .emit = upload_binding_table_pointers, +}; + + +/********************************************************************** + * Upload pointers to the per-stage state. + * + * The state pointers in this packet are all relative to the general state + * base address set by CMD_STATE_BASE_ADDRESS, which is 0. + */ +static int upload_pipelined_state_pointers(struct brw_context *brw ) +{ + BEGIN_BATCH(7, IGNORE_CLIPRECTS); + OUT_BATCH(CMD_PIPELINED_STATE_POINTERS << 16 | (7 - 2)); + OUT_RELOC(brw->vs.state_bo, + BRW_USAGE_STATE, + 0); + if (brw->gs.prog_active) + OUT_RELOC(brw->gs.state_bo, + BRW_USAGE_STATE, + 1); + else + OUT_BATCH(0); + OUT_RELOC(brw->clip.state_bo, + BRW_USAGE_STATE, + 1); + OUT_RELOC(brw->sf.state_bo, + BRW_USAGE_STATE, + 0); + OUT_RELOC(brw->wm.state_bo, + BRW_USAGE_STATE, + 0); + OUT_RELOC(brw->cc.state_bo, + BRW_USAGE_STATE, + 0); + ADVANCE_BATCH(); + + brw->state.dirty.brw |= BRW_NEW_PSP; + return 0; +} + + +static int prepare_psp_urb_cbs(struct brw_context *brw) +{ + brw_add_validated_bo(brw, brw->vs.state_bo); + brw_add_validated_bo(brw, brw->gs.state_bo); + brw_add_validated_bo(brw, brw->clip.state_bo); + brw_add_validated_bo(brw, brw->sf.state_bo); + brw_add_validated_bo(brw, brw->wm.state_bo); + brw_add_validated_bo(brw, brw->cc.state_bo); + return 0; +} + +static int upload_psp_urb_cbs(struct brw_context *brw ) +{ + int ret; + + ret = upload_pipelined_state_pointers(brw); + if (ret) + return ret; + + ret = brw_upload_urb_fence(brw); + if (ret) + return ret; + + ret = brw_upload_cs_urb_state(brw); + if (ret) + return ret; + + return 0; +} + +const struct brw_tracked_state brw_psp_urb_cbs = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_URB_FENCE | BRW_NEW_BATCH, + .cache = (CACHE_NEW_VS_UNIT | + CACHE_NEW_GS_UNIT | + CACHE_NEW_GS_PROG | + CACHE_NEW_CLIP_UNIT | + CACHE_NEW_SF_UNIT | + CACHE_NEW_WM_UNIT | + CACHE_NEW_CC_UNIT) + }, + .prepare = prepare_psp_urb_cbs, + .emit = upload_psp_urb_cbs, +}; + + +/*********************************************************************** + * Depth buffer + */ + +static int prepare_depthbuffer(struct brw_context *brw) +{ + struct pipe_surface *zsbuf = brw->curr.fb.zsbuf; + + if (zsbuf) + brw_add_validated_bo(brw, brw_surface(zsbuf)->bo); + + return 0; +} + +static int emit_depthbuffer(struct brw_context *brw) +{ + struct pipe_surface *surface = brw->curr.fb.zsbuf; + unsigned int len = (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ? 6 : 5; + + if (surface == NULL) { + BEGIN_BATCH(len, IGNORE_CLIPRECTS); + OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (len - 2)); + OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) | + (BRW_SURFACE_NULL << 29)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + + if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) + OUT_BATCH(0); + + ADVANCE_BATCH(); + } else { + struct brw_winsys_buffer *bo; + unsigned int format; + unsigned int pitch; + unsigned int cpp; + + switch (surface->format) { + case PIPE_FORMAT_Z16_UNORM: + format = BRW_DEPTHFORMAT_D16_UNORM; + cpp = 2; + break; + case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_S8Z24_UNORM: + format = BRW_DEPTHFORMAT_D24_UNORM_S8_UINT; + cpp = 4; + break; + case PIPE_FORMAT_Z32_FLOAT: + format = BRW_DEPTHFORMAT_D32_FLOAT; + cpp = 4; + break; + default: + assert(0); + return PIPE_ERROR_BAD_INPUT; + } + + bo = brw_surface(surface)->bo; + pitch = brw_surface(surface)->pitch; + + BEGIN_BATCH(len, IGNORE_CLIPRECTS); + OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (len - 2)); + OUT_BATCH(((pitch * cpp) - 1) | + (format << 18) | + (BRW_TILEWALK_YMAJOR << 26) | + ((surface->layout != PIPE_SURFACE_LAYOUT_LINEAR) << 27) | + (BRW_SURFACE_2D << 29)); + OUT_RELOC(bo, + BRW_USAGE_DEPTH_BUFFER, + surface->offset); + OUT_BATCH((BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1) | + ((pitch - 1) << 6) | + ((surface->height - 1) << 19)); + OUT_BATCH(0); + + if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) + OUT_BATCH(0); + + ADVANCE_BATCH(); + } + + return 0; +} + +const struct brw_tracked_state brw_depthbuffer = { + .dirty = { + .mesa = PIPE_NEW_DEPTH_BUFFER, + .brw = BRW_NEW_BATCH, + .cache = 0, + }, + .prepare = prepare_depthbuffer, + .emit = emit_depthbuffer, +}; + + + +/*********************************************************************** + * Polygon stipple packet + */ + +static int upload_polygon_stipple(struct brw_context *brw) +{ + BRW_CACHED_BATCH_STRUCT(brw, &brw->curr.bps); + return 0; +} + +const struct brw_tracked_state brw_polygon_stipple = { + .dirty = { + .mesa = PIPE_NEW_POLYGON_STIPPLE, + .brw = 0, + .cache = 0 + }, + .emit = upload_polygon_stipple +}; + + +/*********************************************************************** + * Line stipple packet + */ + +static int upload_line_stipple(struct brw_context *brw) +{ + const struct brw_line_stipple *bls = &brw->curr.rast->bls; + if (bls->header.opcode) { + BRW_CACHED_BATCH_STRUCT(brw, bls); + } + return 0; +} + +const struct brw_tracked_state brw_line_stipple = { + .dirty = { + .mesa = PIPE_NEW_RAST, + .brw = 0, + .cache = 0 + }, + .emit = upload_line_stipple +}; + + +/*********************************************************************** + * Misc invarient state packets + */ + +static int upload_invarient_state( struct brw_context *brw ) +{ + { + /* 0x61040000 Pipeline Select */ + /* PipelineSelect : 0 */ + struct brw_pipeline_select ps; + + memset(&ps, 0, sizeof(ps)); + if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) + ps.header.opcode = CMD_PIPELINE_SELECT_GM45; + else + ps.header.opcode = CMD_PIPELINE_SELECT_965; + ps.header.pipeline_select = 0; + BRW_BATCH_STRUCT(brw, &ps); + } + + { + struct brw_global_depth_offset_clamp gdo; + memset(&gdo, 0, sizeof(gdo)); + + /* Disable depth offset clamping. + */ + gdo.header.opcode = CMD_GLOBAL_DEPTH_OFFSET_CLAMP; + gdo.header.length = sizeof(gdo)/4 - 2; + gdo.depth_offset_clamp = 0.0; + + BRW_BATCH_STRUCT(brw, &gdo); + } + + + /* 0x61020000 State Instruction Pointer */ + { + struct brw_system_instruction_pointer sip; + memset(&sip, 0, sizeof(sip)); + + sip.header.opcode = CMD_STATE_INSN_POINTER; + sip.header.length = 0; + sip.bits0.pad = 0; + sip.bits0.system_instruction_pointer = 0; + BRW_BATCH_STRUCT(brw, &sip); + } + + /* VF Statistics */ + { + struct brw_vf_statistics vfs; + memset(&vfs, 0, sizeof(vfs)); + + if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) + vfs.opcode = CMD_VF_STATISTICS_GM45; + else + vfs.opcode = CMD_VF_STATISTICS_965; + + if (BRW_DEBUG & DEBUG_STATS) + vfs.statistics_enable = 1; + + BRW_BATCH_STRUCT(brw, &vfs); + } + + if (!BRW_IS_965(brw)) + { + struct brw_aa_line_parameters balp; + + /* use legacy aa line coverage computation */ + memset(&balp, 0, sizeof(balp)); + balp.header.opcode = CMD_AA_LINE_PARAMETERS; + balp.header.length = sizeof(balp) / 4 - 2; + + BRW_BATCH_STRUCT(brw, &balp); + } + + { + struct brw_polygon_stipple_offset bpso; + + /* This is invarient state in gallium: + */ + memset(&bpso, 0, sizeof(bpso)); + bpso.header.opcode = CMD_POLY_STIPPLE_OFFSET; + bpso.header.length = sizeof(bpso)/4-2; + bpso.bits0.y_offset = 0; + bpso.bits0.x_offset = 0; + + BRW_BATCH_STRUCT(brw, &bpso); + } + + return 0; +} + +const struct brw_tracked_state brw_invarient_state = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_CONTEXT, + .cache = 0 + }, + .emit = upload_invarient_state +}; + + +/*********************************************************************** + * State base address + */ + +/** + * Define the base addresses which some state is referenced from. + * + * This allows us to avoid having to emit relocations in many places for + * cached state, and instead emit pointers inside of large, mostly-static + * state pools. This comes at the expense of memory, and more expensive cache + * misses. + */ +static int upload_state_base_address( struct brw_context *brw ) +{ + /* Output the structure (brw_state_base_address) directly to the + * batchbuffer, so we can emit relocations inline. + */ + if (BRW_IS_IGDNG(brw)) { + BEGIN_BATCH(8, IGNORE_CLIPRECTS); + OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (8 - 2)); + OUT_BATCH(1); /* General state base address */ + OUT_BATCH(1); /* Surface state base address */ + OUT_BATCH(1); /* Indirect object base address */ + OUT_BATCH(1); /* Instruction base address */ + OUT_BATCH(1); /* General state upper bound */ + OUT_BATCH(1); /* Indirect object upper bound */ + OUT_BATCH(1); /* Instruction access upper bound */ + ADVANCE_BATCH(); + } else { + BEGIN_BATCH(6, IGNORE_CLIPRECTS); + OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (6 - 2)); + OUT_BATCH(1); /* General state base address */ + OUT_BATCH(1); /* Surface state base address */ + OUT_BATCH(1); /* Indirect object base address */ + OUT_BATCH(1); /* General state upper bound */ + OUT_BATCH(1); /* Indirect object upper bound */ + ADVANCE_BATCH(); + } + return 0; +} + +const struct brw_tracked_state brw_state_base_address = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_CONTEXT, + .cache = 0, + }, + .emit = upload_state_base_address +}; diff --git a/src/gallium/drivers/i965/brw_pipe_blend.c b/src/gallium/drivers/i965/brw_pipe_blend.c new file mode 100644 index 00000000000..b759a910b63 --- /dev/null +++ b/src/gallium/drivers/i965/brw_pipe_blend.c @@ -0,0 +1,208 @@ + +#include "util/u_memory.h" +#include "pipe/p_context.h" +#include "pipe/p_state.h" + +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_debug.h" + +static int translate_logicop(unsigned logicop) +{ + switch (logicop) { + case PIPE_LOGICOP_CLEAR: + return BRW_LOGICOPFUNCTION_CLEAR; + case PIPE_LOGICOP_AND: + return BRW_LOGICOPFUNCTION_AND; + case PIPE_LOGICOP_AND_REVERSE: + return BRW_LOGICOPFUNCTION_AND_REVERSE; + case PIPE_LOGICOP_COPY: + return BRW_LOGICOPFUNCTION_COPY; + case PIPE_LOGICOP_COPY_INVERTED: + return BRW_LOGICOPFUNCTION_COPY_INVERTED; + case PIPE_LOGICOP_AND_INVERTED: + return BRW_LOGICOPFUNCTION_AND_INVERTED; + case PIPE_LOGICOP_NOOP: + return BRW_LOGICOPFUNCTION_NOOP; + case PIPE_LOGICOP_XOR: + return BRW_LOGICOPFUNCTION_XOR; + case PIPE_LOGICOP_OR: + return BRW_LOGICOPFUNCTION_OR; + case PIPE_LOGICOP_OR_INVERTED: + return BRW_LOGICOPFUNCTION_OR_INVERTED; + case PIPE_LOGICOP_NOR: + return BRW_LOGICOPFUNCTION_NOR; + case PIPE_LOGICOP_EQUIV: + return BRW_LOGICOPFUNCTION_EQUIV; + case PIPE_LOGICOP_INVERT: + return BRW_LOGICOPFUNCTION_INVERT; + case PIPE_LOGICOP_OR_REVERSE: + return BRW_LOGICOPFUNCTION_OR_REVERSE; + case PIPE_LOGICOP_NAND: + return BRW_LOGICOPFUNCTION_NAND; + case PIPE_LOGICOP_SET: + return BRW_LOGICOPFUNCTION_SET; + default: + assert(0); + return BRW_LOGICOPFUNCTION_SET; + } +} + + +static unsigned translate_blend_equation( unsigned mode ) +{ + switch (mode) { + case PIPE_BLEND_ADD: + return BRW_BLENDFUNCTION_ADD; + case PIPE_BLEND_MIN: + return BRW_BLENDFUNCTION_MIN; + case PIPE_BLEND_MAX: + return BRW_BLENDFUNCTION_MAX; + case PIPE_BLEND_SUBTRACT: + return BRW_BLENDFUNCTION_SUBTRACT; + case PIPE_BLEND_REVERSE_SUBTRACT: + return BRW_BLENDFUNCTION_REVERSE_SUBTRACT; + default: + assert(0); + return BRW_BLENDFUNCTION_ADD; + } +} + +static unsigned translate_blend_factor( unsigned factor ) +{ + switch(factor) { + case PIPE_BLENDFACTOR_ZERO: + return BRW_BLENDFACTOR_ZERO; + case PIPE_BLENDFACTOR_SRC_ALPHA: + return BRW_BLENDFACTOR_SRC_ALPHA; + case PIPE_BLENDFACTOR_ONE: + return BRW_BLENDFACTOR_ONE; + case PIPE_BLENDFACTOR_SRC_COLOR: + return BRW_BLENDFACTOR_SRC_COLOR; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + return BRW_BLENDFACTOR_INV_SRC_COLOR; + case PIPE_BLENDFACTOR_DST_COLOR: + return BRW_BLENDFACTOR_DST_COLOR; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + return BRW_BLENDFACTOR_INV_DST_COLOR; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + return BRW_BLENDFACTOR_INV_SRC_ALPHA; + case PIPE_BLENDFACTOR_DST_ALPHA: + return BRW_BLENDFACTOR_DST_ALPHA; + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + return BRW_BLENDFACTOR_INV_DST_ALPHA; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + return BRW_BLENDFACTOR_SRC_ALPHA_SATURATE; + case PIPE_BLENDFACTOR_CONST_COLOR: + return BRW_BLENDFACTOR_CONST_COLOR; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + return BRW_BLENDFACTOR_INV_CONST_COLOR; + case PIPE_BLENDFACTOR_CONST_ALPHA: + return BRW_BLENDFACTOR_CONST_ALPHA; + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + return BRW_BLENDFACTOR_INV_CONST_ALPHA; + default: + assert(0); + return BRW_BLENDFACTOR_ZERO; + } +} + +static void *brw_create_blend_state( struct pipe_context *pipe, + const struct pipe_blend_state *templ ) +{ + struct brw_blend_state *blend = CALLOC_STRUCT(brw_blend_state); + if (blend == NULL) + return NULL; + + if (templ->logicop_enable) { + blend->cc2.logicop_enable = 1; + blend->cc5.logicop_func = translate_logicop(templ->logicop_func); + } + else if (templ->blend_enable) { + blend->cc6.dest_blend_factor = translate_blend_factor(templ->rgb_dst_factor); + blend->cc6.src_blend_factor = translate_blend_factor(templ->rgb_src_factor); + blend->cc6.blend_function = translate_blend_equation(templ->rgb_func); + + blend->cc5.ia_dest_blend_factor = translate_blend_factor(templ->alpha_dst_factor); + blend->cc5.ia_src_blend_factor = translate_blend_factor(templ->alpha_src_factor); + blend->cc5.ia_blend_function = translate_blend_equation(templ->alpha_func); + + blend->cc3.blend_enable = 1; + blend->cc3.ia_blend_enable = + (blend->cc6.dest_blend_factor != blend->cc5.ia_dest_blend_factor || + blend->cc6.src_blend_factor != blend->cc5.ia_src_blend_factor || + blend->cc6.blend_function != blend->cc5.ia_blend_function); + + /* Per-surface blend enables, currently just follow global + * state: + */ + blend->ss0.color_blend = 1; + } + + blend->cc5.dither_enable = templ->dither; + + if (BRW_DEBUG & DEBUG_STATS) + blend->cc5.statistics_enable = 1; + + /* Per-surface color mask -- just follow global state: + */ + blend->ss0.writedisable_red = (templ->colormask & PIPE_MASK_R) ? 0 : 1; + blend->ss0.writedisable_green = (templ->colormask & PIPE_MASK_G) ? 0 : 1; + blend->ss0.writedisable_blue = (templ->colormask & PIPE_MASK_B) ? 0 : 1; + blend->ss0.writedisable_alpha = (templ->colormask & PIPE_MASK_A) ? 0 : 1; + + return (void *)blend; +} + +static void brw_bind_blend_state(struct pipe_context *pipe, + void *cso) +{ + struct brw_context *brw = brw_context(pipe); + brw->curr.blend = (const struct brw_blend_state *)cso; + brw->state.dirty.mesa |= PIPE_NEW_BLEND; +} + +static void brw_delete_blend_state(struct pipe_context *pipe, + void *cso) +{ + struct brw_context *brw = brw_context(pipe); + assert((const void *)cso != (const void *)brw->curr.blend); + FREE(cso); +} + + +static void brw_set_blend_color(struct pipe_context *pipe, + const struct pipe_blend_color *blend_color) +{ + struct brw_context *brw = brw_context(pipe); + struct brw_blend_constant_color *bcc = &brw->curr.bcc; + + bcc->blend_constant_color[0] = blend_color->color[0]; + bcc->blend_constant_color[1] = blend_color->color[1]; + bcc->blend_constant_color[2] = blend_color->color[2]; + bcc->blend_constant_color[3] = blend_color->color[3]; + + brw->state.dirty.mesa |= PIPE_NEW_BLEND_COLOR; +} + + +void brw_pipe_blend_init( struct brw_context *brw ) +{ + brw->base.set_blend_color = brw_set_blend_color; + brw->base.create_blend_state = brw_create_blend_state; + brw->base.bind_blend_state = brw_bind_blend_state; + brw->base.delete_blend_state = brw_delete_blend_state; + + { + struct brw_blend_constant_color *bcc = &brw->curr.bcc; + + memset(bcc, 0, sizeof(*bcc)); + bcc->header.opcode = CMD_BLEND_CONSTANT_COLOR; + bcc->header.length = sizeof(*bcc)/4-2; + } + +} + +void brw_pipe_blend_cleanup( struct brw_context *brw ) +{ +} diff --git a/src/gallium/drivers/i965/brw_pipe_clear.c b/src/gallium/drivers/i965/brw_pipe_clear.c new file mode 100644 index 00000000000..211be881789 --- /dev/null +++ b/src/gallium/drivers/i965/brw_pipe_clear.c @@ -0,0 +1,218 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "util/u_pack_color.h" + +#include "pipe/p_state.h" + +#include "brw_batchbuffer.h" +#include "brw_screen.h" +#include "brw_context.h" + +#define MASK16 0xffff +#define MASK24 0xffffff + + +/** + * Use blitting to clear the renderbuffers named by 'flags'. + * Note: we can't use the ctx->DrawBuffer->_ColorDrawBufferIndexes field + * since that might include software renderbuffers or renderbuffers + * which we're clearing with triangles. + * \param mask bitmask of BUFFER_BIT_* values indicating buffers to clear + */ +static enum pipe_error +try_clear( struct brw_context *brw, + struct brw_surface *surface, + unsigned value ) +{ + uint32_t BR13, CMD; + int x1 = 0; + int y1 = 0; + int x2 = surface->base.width; + int y2 = surface->base.height; + int pitch = surface->pitch; + int cpp = surface->cpp; + + if (x2 == 0 || y2 == 0) + return 0; + + debug_printf("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n", + __FUNCTION__, + (void *)surface->bo, pitch * cpp, + surface->base.offset, + x1, y1, x2 - x1, y2 - y1); + + BR13 = 0xf0 << 16; + CMD = XY_COLOR_BLT_CMD | XY_BLT_WRITE_RGB | XY_BLT_WRITE_ALPHA; + + /* Setup the blit command */ + if (cpp == 4) { + BR13 |= BR13_8888; + CMD |= XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB; + } + else { + assert(cpp == 2); + BR13 |= BR13_565; + } + + /* XXX: nasty hack for clearing depth buffers + */ + if (surface->tiling == BRW_TILING_Y) { + x2 = pitch; + } + + if (surface->tiling == BRW_TILING_X) { + CMD |= XY_DST_TILED; + pitch /= 4; + } + + BR13 |= (pitch * cpp); + + BEGIN_BATCH(6, 0); + OUT_BATCH(CMD); + OUT_BATCH(BR13); + OUT_BATCH((y1 << 16) | x1); + OUT_BATCH((y2 << 16) | x2); + OUT_RELOC(surface->bo, + BRW_USAGE_BLIT_DEST, + surface->base.offset); + OUT_BATCH(value); + ADVANCE_BATCH(); + + return 0; +} + + + + +static void color_clear(struct brw_context *brw, + struct brw_surface *bsurface, + const float *rgba ) +{ + enum pipe_error ret; + unsigned value; + + util_pack_color( rgba, bsurface->base.format, &value ); + + if (bsurface->cpp == 2) + value |= value << 16; + + ret = try_clear( brw, bsurface, value ); + + if (ret != 0) { + brw_context_flush( brw ); + ret = try_clear( brw, bsurface, value ); + assert( ret == 0 ); + } +} + +static void zstencil_clear(struct brw_context *brw, + struct brw_surface *bsurface, + double depth, + unsigned stencil ) +{ + enum pipe_error ret; + unsigned value; + + switch (bsurface->base.format) { + case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_S8Z24_UNORM: + value = ((unsigned)(depth * MASK24) & MASK24); + break; + case PIPE_FORMAT_Z16_UNORM: + value = ((unsigned)(depth * MASK16) & MASK16); + break; + default: + assert(0); + return; + } + + switch (bsurface->base.format) { + case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_S8Z24_UNORM: + value = value | (stencil << 24); + break; + + case PIPE_FORMAT_Z16_UNORM: + value = value | (value << 16); + break; + + default: + break; + } + + ret = try_clear( brw, bsurface, value ); + + if (ret != 0) { + brw_context_flush( brw ); + ret = try_clear( brw, bsurface, value ); + assert( ret == 0 ); + } +} + + + +/** + * Clear the given surface to the specified value. + * No masking, no scissor (clear entire buffer). + */ +static void brw_clear(struct pipe_context *pipe, + unsigned buffers, + const float *rgba, + double depth, + unsigned stencil) +{ + struct brw_context *brw = brw_context( pipe ); + int i; + + if (buffers & PIPE_CLEAR_COLOR) { + for (i = 0; i < brw->curr.fb.nr_cbufs; i++) { + color_clear( brw, + brw_surface(brw->curr.fb.cbufs[i]), + rgba ); + } + } + + if (buffers & PIPE_CLEAR_DEPTHSTENCIL) { + if (brw->curr.fb.zsbuf) { + zstencil_clear( brw, + brw_surface(brw->curr.fb.zsbuf), + depth, stencil ); + } + } +} + + +void brw_pipe_clear_init( struct brw_context *brw ) +{ + brw->base.clear = brw_clear; +} + + +void brw_pipe_clear_cleanup( struct brw_context *brw ) +{ +} diff --git a/src/gallium/drivers/i965/brw_pipe_depth.c b/src/gallium/drivers/i965/brw_pipe_depth.c new file mode 100644 index 00000000000..e010d76e0d3 --- /dev/null +++ b/src/gallium/drivers/i965/brw_pipe_depth.c @@ -0,0 +1,172 @@ + +#include "util/u_math.h" +#include "util/u_memory.h" + +#include "brw_context.h" +#include "brw_defines.h" + +/* XXX: Fixme - include this to get IZ_ defines + */ +#include "brw_wm.h" + +static unsigned brw_translate_compare_func(unsigned func) +{ + switch (func) { + case PIPE_FUNC_NEVER: + return BRW_COMPAREFUNCTION_NEVER; + case PIPE_FUNC_LESS: + return BRW_COMPAREFUNCTION_LESS; + case PIPE_FUNC_LEQUAL: + return BRW_COMPAREFUNCTION_LEQUAL; + case PIPE_FUNC_GREATER: + return BRW_COMPAREFUNCTION_GREATER; + case PIPE_FUNC_GEQUAL: + return BRW_COMPAREFUNCTION_GEQUAL; + case PIPE_FUNC_NOTEQUAL: + return BRW_COMPAREFUNCTION_NOTEQUAL; + case PIPE_FUNC_EQUAL: + return BRW_COMPAREFUNCTION_EQUAL; + case PIPE_FUNC_ALWAYS: + return BRW_COMPAREFUNCTION_ALWAYS; + default: + assert(0); + return BRW_COMPAREFUNCTION_ALWAYS; + } +} + +static unsigned translate_stencil_op(unsigned op) +{ + switch (op) { + case PIPE_STENCIL_OP_KEEP: + return BRW_STENCILOP_KEEP; + case PIPE_STENCIL_OP_ZERO: + return BRW_STENCILOP_ZERO; + case PIPE_STENCIL_OP_REPLACE: + return BRW_STENCILOP_REPLACE; + case PIPE_STENCIL_OP_INCR: + return BRW_STENCILOP_INCRSAT; + case PIPE_STENCIL_OP_DECR: + return BRW_STENCILOP_DECRSAT; + case PIPE_STENCIL_OP_INCR_WRAP: + return BRW_STENCILOP_INCR; + case PIPE_STENCIL_OP_DECR_WRAP: + return BRW_STENCILOP_DECR; + case PIPE_STENCIL_OP_INVERT: + return BRW_STENCILOP_INVERT; + default: + assert(0); + return BRW_STENCILOP_ZERO; + } +} + +static void create_bcc_state( struct brw_depth_stencil_state *zstencil, + const struct pipe_depth_stencil_alpha_state *templ ) +{ + if (templ->stencil[0].enabled) { + zstencil->cc0.stencil_enable = 1; + zstencil->cc0.stencil_func = + brw_translate_compare_func(templ->stencil[0].func); + zstencil->cc0.stencil_fail_op = + translate_stencil_op(templ->stencil[0].fail_op); + zstencil->cc0.stencil_pass_depth_fail_op = + translate_stencil_op(templ->stencil[0].zfail_op); + zstencil->cc0.stencil_pass_depth_pass_op = + translate_stencil_op(templ->stencil[0].zpass_op); + zstencil->cc1.stencil_ref = templ->stencil[0].ref_value; + zstencil->cc1.stencil_write_mask = templ->stencil[0].writemask; + zstencil->cc1.stencil_test_mask = templ->stencil[0].valuemask; + + if (templ->stencil[1].enabled) { + zstencil->cc0.bf_stencil_enable = 1; + zstencil->cc0.bf_stencil_func = + brw_translate_compare_func(templ->stencil[1].func); + zstencil->cc0.bf_stencil_fail_op = + translate_stencil_op(templ->stencil[1].fail_op); + zstencil->cc0.bf_stencil_pass_depth_fail_op = + translate_stencil_op(templ->stencil[1].zfail_op); + zstencil->cc0.bf_stencil_pass_depth_pass_op = + translate_stencil_op(templ->stencil[1].zpass_op); + zstencil->cc1.bf_stencil_ref = templ->stencil[1].ref_value; + zstencil->cc2.bf_stencil_write_mask = templ->stencil[1].writemask; + zstencil->cc2.bf_stencil_test_mask = templ->stencil[1].valuemask; + } + + zstencil->cc0.stencil_write_enable = (zstencil->cc1.stencil_write_mask || + zstencil->cc2.bf_stencil_write_mask); + } + + + if (templ->alpha.enabled) { + zstencil->cc3.alpha_test = 1; + zstencil->cc3.alpha_test_func = brw_translate_compare_func(templ->alpha.func); + zstencil->cc3.alpha_test_format = BRW_ALPHATEST_FORMAT_UNORM8; + zstencil->cc7.alpha_ref.ub[0] = float_to_ubyte(templ->alpha.ref_value); + } + + if (templ->depth.enabled) { + zstencil->cc2.depth_test = 1; + zstencil->cc2.depth_test_function = brw_translate_compare_func(templ->depth.func); + zstencil->cc2.depth_write_enable = templ->depth.writemask; + } +} + +static void create_wm_iz_state( struct brw_depth_stencil_state *zstencil ) +{ + if (zstencil->cc3.alpha_test) + zstencil->iz_lookup |= IZ_PS_KILL_ALPHATEST_BIT; + + if (zstencil->cc2.depth_test) + zstencil->iz_lookup |= IZ_DEPTH_TEST_ENABLE_BIT; + + if (zstencil->cc2.depth_write_enable) + zstencil->iz_lookup |= IZ_DEPTH_WRITE_ENABLE_BIT; + + if (zstencil->cc0.stencil_enable) + zstencil->iz_lookup |= IZ_STENCIL_TEST_ENABLE_BIT; + + if (zstencil->cc0.stencil_write_enable) + zstencil->iz_lookup |= IZ_STENCIL_WRITE_ENABLE_BIT; + +} + + +static void * +brw_create_depth_stencil_state( struct pipe_context *pipe, + const struct pipe_depth_stencil_alpha_state *templ ) +{ + struct brw_depth_stencil_state *zstencil = CALLOC_STRUCT(brw_depth_stencil_state); + + create_bcc_state( zstencil, templ ); + create_wm_iz_state( zstencil ); + + return (void *)zstencil; +} + + +static void brw_bind_depth_stencil_state(struct pipe_context *pipe, + void *cso) +{ + struct brw_context *brw = brw_context(pipe); + brw->curr.zstencil = (const struct brw_depth_stencil_state *)cso; + brw->state.dirty.mesa |= PIPE_NEW_DEPTH_STENCIL_ALPHA; +} + +static void brw_delete_depth_stencil_state(struct pipe_context *pipe, + void *cso) +{ + struct brw_context *brw = brw_context(pipe); + assert((const void *)cso != (const void *)brw->curr.zstencil); + FREE(cso); +} + + +void brw_pipe_depth_stencil_init( struct brw_context *brw ) +{ + brw->base.create_depth_stencil_alpha_state = brw_create_depth_stencil_state; + brw->base.bind_depth_stencil_alpha_state = brw_bind_depth_stencil_state; + brw->base.delete_depth_stencil_alpha_state = brw_delete_depth_stencil_state; +} + +void brw_pipe_depth_stencil_cleanup( struct brw_context *brw ) +{ +} diff --git a/src/gallium/drivers/i965/brw_pipe_fb.c b/src/gallium/drivers/i965/brw_pipe_fb.c new file mode 100644 index 00000000000..6b03094f502 --- /dev/null +++ b/src/gallium/drivers/i965/brw_pipe_fb.c @@ -0,0 +1,77 @@ +#include "util/u_math.h" +#include "pipe/p_context.h" +#include "pipe/p_state.h" + +#include "brw_context.h" + +/** + * called from intelDrawBuffer() + */ +static void brw_set_framebuffer_state( struct pipe_context *pipe, + const struct pipe_framebuffer_state *fb ) +{ + struct brw_context *brw = brw_context(pipe); + unsigned i; + + /* Dimensions: + */ + if (brw->curr.fb.width != fb->width || + brw->curr.fb.height != fb->height) { + brw->curr.fb.width = fb->width; + brw->curr.fb.height = fb->height; + brw->state.dirty.mesa |= PIPE_NEW_FRAMEBUFFER_DIMENSIONS; + } + + /* Z/Stencil + */ + if (brw->curr.fb.zsbuf != fb->zsbuf) { + pipe_surface_reference(&brw->curr.fb.zsbuf, fb->zsbuf); + brw->state.dirty.mesa |= PIPE_NEW_DEPTH_BUFFER; + } + + /* Color buffers: + */ + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { + if (brw->curr.fb.cbufs[i] != fb->cbufs[i]) { + brw->state.dirty.mesa |= PIPE_NEW_COLOR_BUFFERS; + pipe_surface_reference(&brw->curr.fb.cbufs[i], fb->cbufs[i]); + } + } + + if (brw->curr.fb.nr_cbufs != fb->nr_cbufs) { + brw->curr.fb.nr_cbufs = MIN2(BRW_MAX_DRAW_BUFFERS, fb->nr_cbufs); + brw->state.dirty.mesa |= PIPE_NEW_NR_CBUFS; + } +} + + +static void brw_set_viewport_state( struct pipe_context *pipe, + const struct pipe_viewport_state *viewport ) +{ + struct brw_context *brw = brw_context(pipe); + + brw->curr.viewport = *viewport; + brw->curr.ccv.min_depth = 0.0; /* XXX: near */ + brw->curr.ccv.max_depth = 1.0; /* XXX: far */ + + brw->state.dirty.mesa |= PIPE_NEW_VIEWPORT; +} + + +void brw_pipe_framebuffer_init( struct brw_context *brw ) +{ + brw->base.set_framebuffer_state = brw_set_framebuffer_state; + brw->base.set_viewport_state = brw_set_viewport_state; +} + +void brw_pipe_framebuffer_cleanup( struct brw_context *brw ) +{ + struct pipe_framebuffer_state *fb = &brw->curr.fb; + int i; + + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { + pipe_surface_reference(&fb->cbufs[i], NULL); + } + + pipe_surface_reference(&fb->zsbuf, NULL); +} diff --git a/src/gallium/drivers/i965/brw_pipe_flush.c b/src/gallium/drivers/i965/brw_pipe_flush.c new file mode 100644 index 00000000000..fdc4814b221 --- /dev/null +++ b/src/gallium/drivers/i965/brw_pipe_flush.c @@ -0,0 +1,83 @@ + +#include "util/u_upload_mgr.h" + +#include "brw_context.h" +#include "brw_screen.h" +#include "brw_batchbuffer.h" + + + +/* All batchbuffer flushes must go through this function. + */ +void brw_context_flush( struct brw_context *brw ) +{ + /* + * + */ + brw_emit_query_end(brw); + + /* Move to the end of the current upload buffer so that we'll force choosing + * a new buffer next time. + */ + u_upload_flush( brw->vb.upload_vertex ); + u_upload_flush( brw->vb.upload_index ); + + _brw_batchbuffer_flush( brw->batch, __FILE__, __LINE__ ); + + /* Mark all context state as needing to be re-emitted. + * This is probably not as severe as on 915, since almost all of our state + * is just in referenced buffers. + */ + brw->state.dirty.brw |= BRW_NEW_CONTEXT; + brw->state.dirty.mesa |= ~0; + brw->state.dirty.brw |= ~0; + brw->state.dirty.cache |= ~0; + + brw->curbe.need_new_bo = GL_TRUE; +} + +static void +brw_flush( struct pipe_context *pipe, + unsigned flags, + struct pipe_fence_handle **fence ) +{ + brw_context_flush( brw_context( pipe ) ); + if (fence) + *fence = NULL; +} + +static unsigned brw_is_buffer_referenced(struct pipe_context *pipe, + struct pipe_buffer *buffer) +{ + struct brw_context *brw = brw_context(pipe); + struct brw_screen *bscreen = brw_screen(brw->base.screen); + + return brw_is_buffer_referenced_by_bo( bscreen, + buffer, + brw->batch->buf ); +} + +static unsigned brw_is_texture_referenced(struct pipe_context *pipe, + struct pipe_texture *texture, + unsigned face, + unsigned level) +{ + struct brw_context *brw = brw_context(pipe); + struct brw_screen *bscreen = brw_screen(brw->base.screen); + + return brw_is_texture_referenced_by_bo( bscreen, + texture, face, level, + brw->batch->buf ); +} + +void brw_pipe_flush_init( struct brw_context *brw ) +{ + brw->base.flush = brw_flush; + brw->base.is_buffer_referenced = brw_is_buffer_referenced; + brw->base.is_texture_referenced = brw_is_texture_referenced; +} + + +void brw_pipe_flush_cleanup( struct brw_context *brw ) +{ +} diff --git a/src/gallium/drivers/i965/brw_pipe_misc.c b/src/gallium/drivers/i965/brw_pipe_misc.c new file mode 100644 index 00000000000..30359078079 --- /dev/null +++ b/src/gallium/drivers/i965/brw_pipe_misc.c @@ -0,0 +1,54 @@ + +#include "brw_context.h" +#include "brw_structs.h" +#include "brw_defines.h" + +static void brw_set_polygon_stipple( struct pipe_context *pipe, + const struct pipe_poly_stipple *stip ) +{ + struct brw_context *brw = brw_context(pipe); + struct brw_polygon_stipple *bps = &brw->curr.bps; + GLuint i; + + memset(bps, 0, sizeof *bps); + bps->header.opcode = CMD_POLY_STIPPLE_PATTERN; + bps->header.length = sizeof *bps/4-2; + + for (i = 0; i < 32; i++) + bps->stipple[i] = stip->stipple[i]; /* don't invert */ + + brw->state.dirty.mesa |= PIPE_NEW_POLYGON_STIPPLE; +} + + +static void brw_set_scissor_state( struct pipe_context *pipe, + const struct pipe_scissor_state *scissor ) +{ + struct brw_context *brw = brw_context(pipe); + + brw->curr.scissor = *scissor; + brw->state.dirty.mesa |= PIPE_NEW_SCISSOR; +} + + +static void brw_set_clip_state( struct pipe_context *pipe, + const struct pipe_clip_state *clip ) +{ + struct brw_context *brw = brw_context(pipe); + + brw->curr.ucp = *clip; + brw->state.dirty.mesa |= PIPE_NEW_CLIP; +} + + +void brw_pipe_misc_init( struct brw_context *brw ) +{ + brw->base.set_polygon_stipple = brw_set_polygon_stipple; + brw->base.set_scissor_state = brw_set_scissor_state; + brw->base.set_clip_state = brw_set_clip_state; +} + + +void brw_pipe_misc_cleanup( struct brw_context *brw ) +{ +} diff --git a/src/gallium/drivers/i965/brw_pipe_query.c b/src/gallium/drivers/i965/brw_pipe_query.c new file mode 100644 index 00000000000..2eb862635cc --- /dev/null +++ b/src/gallium/drivers/i965/brw_pipe_query.c @@ -0,0 +1,263 @@ +/* + * Copyright © 2008 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * + */ + +/** @file support for ARB_query_object + * + * ARB_query_object is implemented by using the PIPE_CONTROL command to stall + * execution on the completion of previous depth tests, and write the + * current PS_DEPTH_COUNT to a buffer object. + * + * We use before and after counts when drawing during a query so that + * we don't pick up other clients' query data in ours. To reduce overhead, + * a single BO is used to record the query data for all active queries at + * once. This also gives us a simple bound on how much batchbuffer space is + * required for handling queries, so that we can be sure that we won't + * have to emit a batchbuffer without getting the ending PS_DEPTH_COUNT. + */ +#include "util/u_simple_list.h" + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_batchbuffer.h" +#include "brw_reg.h" + +/** Waits on the query object's BO and totals the results for this query */ +static boolean +brw_query_get_result(struct pipe_context *pipe, + struct pipe_query *q, + boolean wait, + uint64_t *result) +{ + struct brw_context *brw = brw_context(pipe); + struct brw_query_object *query = (struct brw_query_object *)q; + + /* Map and count the pixels from the current query BO */ + if (query->bo) { + int i; + uint64_t *map; + + if (brw->sws->bo_is_busy(query->bo) && !wait) + return FALSE; + + map = bo_map_read(brw->sws, query->bo); + if (map == NULL) + return FALSE; + + for (i = query->first_index; i <= query->last_index; i++) { + query->result += map[i * 2 + 1] - map[i * 2]; + } + + brw->sws->bo_unmap(query->bo); + bo_reference(&query->bo, NULL); + } + + *result = query->result; + return TRUE; +} + +static struct pipe_query * +brw_query_create(struct pipe_context *pipe, unsigned type ) +{ + struct brw_query_object *query; + + switch (type) { + case PIPE_QUERY_OCCLUSION_COUNTER: + query = CALLOC_STRUCT( brw_query_object ); + if (query == NULL) + return NULL; + return (struct pipe_query *)query; + + default: + return NULL; + } +} + +static void +brw_query_destroy(struct pipe_context *pipe, struct pipe_query *q) +{ + struct brw_query_object *query = (struct brw_query_object *)q; + + bo_reference(&query->bo, NULL); + FREE(query); +} + +static void +brw_query_begin(struct pipe_context *pipe, struct pipe_query *q) +{ + struct brw_context *brw = brw_context(pipe); + struct brw_query_object *query = (struct brw_query_object *)q; + + /* Reset our driver's tracking of query state. */ + bo_reference(&query->bo, NULL); + query->result = 0; + query->first_index = -1; + query->last_index = -1; + + insert_at_head(&brw->query.active_head, query); + brw->query.stats_wm++; + brw->state.dirty.mesa |= PIPE_NEW_QUERY; +} + +static void +brw_query_end(struct pipe_context *pipe, struct pipe_query *q) +{ + struct brw_context *brw = brw_context(pipe); + struct brw_query_object *query = (struct brw_query_object *)q; + + /* Flush the batchbuffer in case it has writes to our query BO. + * Have later queries write to a new query BO so that further rendering + * doesn't delay the collection of our results. + */ + if (query->bo) { + brw_emit_query_end(brw); + brw_context_flush( brw ); + + bo_reference(&brw->query.bo, NULL); + } + + remove_from_list(query); + brw->query.stats_wm--; + brw->state.dirty.mesa |= PIPE_NEW_QUERY; +} + +/*********************************************************************** + * Internal functions and callbacks to implement queries + */ + +/** Called to set up the query BO and account for its aperture space */ +enum pipe_error +brw_prepare_query_begin(struct brw_context *brw) +{ + enum pipe_error ret; + + /* Skip if we're not doing any queries. */ + if (is_empty_list(&brw->query.active_head)) + return PIPE_OK; + + /* Get a new query BO if we're going to need it. */ + if (brw->query.bo == NULL || + brw->query.index * 2 + 1 >= 4096 / sizeof(uint64_t)) { + + ret = brw->sws->bo_alloc(brw->sws, BRW_BUFFER_TYPE_QUERY, 4096, 1, + &brw->query.bo); + if (ret) + return ret; + + brw->query.index = 0; + } + + brw_add_validated_bo(brw, brw->query.bo); + + return PIPE_OK; +} + +/** Called just before primitive drawing to get a beginning PS_DEPTH_COUNT. */ +void +brw_emit_query_begin(struct brw_context *brw) +{ + struct brw_query_object *query; + + /* Skip if we're not doing any queries, or we've emitted the start. */ + if (brw->query.active || is_empty_list(&brw->query.active_head)) + return; + + BEGIN_BATCH(4, IGNORE_CLIPRECTS); + OUT_BATCH(_3DSTATE_PIPE_CONTROL | + PIPE_CONTROL_DEPTH_STALL | + PIPE_CONTROL_WRITE_DEPTH_COUNT); + /* This object could be mapped cacheable, but we don't have an exposed + * mechanism to support that. Since it's going uncached, tell GEM that + * we're writing to it. The usual clflush should be all that's required + * to pick up the results. + */ + OUT_RELOC(brw->query.bo, + BRW_USAGE_QUERY_RESULT, + PIPE_CONTROL_GLOBAL_GTT_WRITE | + ((brw->query.index * 2) * sizeof(uint64_t))); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + + foreach(query, &brw->query.active_head) { + if (query->bo != brw->query.bo) { + uint64_t tmp; + + /* Propogate the results from this buffer to all of the + * active queries, as the bo is going away. + */ + if (query->bo != NULL) + brw_query_get_result( &brw->base, + (struct pipe_query *)query, + FALSE, + &tmp ); + + bo_reference( &query->bo, brw->query.bo ); + query->first_index = brw->query.index; + } + query->last_index = brw->query.index; + } + brw->query.active = GL_TRUE; +} + +/** Called at batchbuffer flush to get an ending PS_DEPTH_COUNT */ +void +brw_emit_query_end(struct brw_context *brw) +{ + if (!brw->query.active) + return; + + BEGIN_BATCH(4, IGNORE_CLIPRECTS); + OUT_BATCH(_3DSTATE_PIPE_CONTROL | + PIPE_CONTROL_DEPTH_STALL | + PIPE_CONTROL_WRITE_DEPTH_COUNT); + OUT_RELOC(brw->query.bo, + BRW_USAGE_QUERY_RESULT, + PIPE_CONTROL_GLOBAL_GTT_WRITE | + ((brw->query.index * 2 + 1) * sizeof(uint64_t))); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + + brw->query.active = GL_FALSE; + brw->query.index++; +} + +void brw_pipe_query_init( struct brw_context *brw ) +{ + brw->base.create_query = brw_query_create; + brw->base.destroy_query = brw_query_destroy; + brw->base.begin_query = brw_query_begin; + brw->base.end_query = brw_query_end; + brw->base.get_query_result = brw_query_get_result; +} + + +void brw_pipe_query_cleanup( struct brw_context *brw ) +{ + /* Unreference brw->query.bo ?? + */ +} diff --git a/src/gallium/drivers/i965/brw_pipe_rast.c b/src/gallium/drivers/i965/brw_pipe_rast.c new file mode 100644 index 00000000000..2117e91a9e4 --- /dev/null +++ b/src/gallium/drivers/i965/brw_pipe_rast.c @@ -0,0 +1,161 @@ + +#include "util/u_memory.h" +#include "pipe/p_defines.h" +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_pipe_rast.h" +#include "brw_wm.h" + + +static unsigned translate_fill( unsigned fill ) +{ + switch (fill) { + case PIPE_POLYGON_MODE_FILL: + return CLIP_FILL; + case PIPE_POLYGON_MODE_LINE: + return CLIP_LINE; + case PIPE_POLYGON_MODE_POINT: + return CLIP_POINT; + default: + assert(0); + return CLIP_FILL; + } +} + + +/* Calculates the key for triangle-mode clipping. Non-triangle + * clipping keys use much less information and are computed on the + * fly. + */ +static void +calculate_clip_key_rast( const struct brw_context *brw, + const struct pipe_rasterizer_state *templ, + const struct brw_rasterizer_state *rast, + struct brw_clip_prog_key *key) +{ + memset(key, 0, sizeof *key); + + if (brw->chipset.is_igdng) + key->clip_mode = BRW_CLIPMODE_KERNEL_CLIP; + else + key->clip_mode = BRW_CLIPMODE_NORMAL; + + key->do_flat_shading = templ->flatshade; + + if (templ->cull_mode == PIPE_WINDING_BOTH) { + key->clip_mode = BRW_CLIPMODE_REJECT_ALL; + return; + } + + key->fill_ccw = CLIP_CULL; + key->fill_cw = CLIP_CULL; + + if (!(templ->cull_mode & PIPE_WINDING_CCW)) { + key->fill_ccw = translate_fill(templ->fill_ccw); + } + + if (!(templ->cull_mode & PIPE_WINDING_CW)) { + key->fill_cw = translate_fill(templ->fill_cw); + } + + if (key->fill_cw == CLIP_LINE || + key->fill_ccw == CLIP_LINE || + key->fill_cw == CLIP_POINT || + key->fill_ccw == CLIP_POINT) { + key->do_unfilled = 1; + key->clip_mode = BRW_CLIPMODE_CLIP_NON_REJECTED; + } + + key->offset_ccw = templ->offset_ccw; + key->offset_cw = templ->offset_cw; + + if (templ->light_twoside && key->fill_cw != CLIP_CULL) + key->copy_bfc_cw = 1; + + if (templ->light_twoside && key->fill_ccw != CLIP_CULL) + key->copy_bfc_ccw = 1; +} + + +static void +calculate_line_stipple_rast( const struct pipe_rasterizer_state *templ, + struct brw_line_stipple *bls ) +{ + GLfloat tmp = 1.0f / (templ->line_stipple_factor + 1); + GLint tmpi = tmp * (1<<13); + + bls->header.opcode = CMD_LINE_STIPPLE_PATTERN; + bls->header.length = sizeof(*bls)/4 - 2; + bls->bits0.pattern = templ->line_stipple_pattern; + bls->bits1.repeat_count = templ->line_stipple_factor + 1; + bls->bits1.inverse_repeat_count = tmpi; +} + +static void *brw_create_rasterizer_state( struct pipe_context *pipe, + const struct pipe_rasterizer_state *templ ) +{ + struct brw_context *brw = brw_context(pipe); + struct brw_rasterizer_state *rast; + + rast = CALLOC_STRUCT(brw_rasterizer_state); + if (rast == NULL) + return NULL; + + rast->templ = *templ; + + calculate_clip_key_rast( brw, templ, rast, &rast->clip_key ); + + if (templ->line_stipple_enable) + calculate_line_stipple_rast( templ, &rast->bls ); + + /* Caclculate lookup value for WM IZ table. + */ + if (templ->line_smooth) { + if (templ->fill_cw == PIPE_POLYGON_MODE_LINE && + templ->fill_ccw == PIPE_POLYGON_MODE_LINE) { + rast->unfilled_aa_line = AA_ALWAYS; + } + else if (templ->fill_cw == PIPE_POLYGON_MODE_LINE || + templ->fill_ccw == PIPE_POLYGON_MODE_LINE) { + rast->unfilled_aa_line = AA_SOMETIMES; + } + else { + rast->unfilled_aa_line = AA_NEVER; + } + } + else { + rast->unfilled_aa_line = AA_NEVER; + } + + return (void *)rast; +} + + +static void brw_bind_rasterizer_state(struct pipe_context *pipe, + void *cso) +{ + struct brw_context *brw = brw_context(pipe); + brw->curr.rast = (const struct brw_rasterizer_state *)cso; + brw->state.dirty.mesa |= PIPE_NEW_RAST; +} + +static void brw_delete_rasterizer_state(struct pipe_context *pipe, + void *cso) +{ + struct brw_context *brw = brw_context(pipe); + assert((const void *)cso != (const void *)brw->curr.rast); + FREE(cso); +} + + + +void brw_pipe_rast_init( struct brw_context *brw ) +{ + brw->base.create_rasterizer_state = brw_create_rasterizer_state; + brw->base.bind_rasterizer_state = brw_bind_rasterizer_state; + brw->base.delete_rasterizer_state = brw_delete_rasterizer_state; +} + +void brw_pipe_rast_cleanup( struct brw_context *brw ) +{ +} diff --git a/src/gallium/drivers/i965/brw_pipe_rast.h b/src/gallium/drivers/i965/brw_pipe_rast.h new file mode 100644 index 00000000000..9354f01e18a --- /dev/null +++ b/src/gallium/drivers/i965/brw_pipe_rast.h @@ -0,0 +1,16 @@ +#ifndef BRW_PIPE_RAST_H +#define BRW_PIPE_RAST_H + +#include "brw_clip.h" + +struct brw_rasterizer_state { + struct pipe_rasterizer_state templ; /* for draw module */ + + /* Precalculated hardware state: + */ + struct brw_clip_prog_key clip_key; + struct brw_line_stipple bls; + unsigned unfilled_aa_line; +}; + +#endif diff --git a/src/gallium/drivers/i965/brw_pipe_sampler.c b/src/gallium/drivers/i965/brw_pipe_sampler.c new file mode 100644 index 00000000000..5ddc63f57ec --- /dev/null +++ b/src/gallium/drivers/i965/brw_pipe_sampler.c @@ -0,0 +1,233 @@ + +#include "util/u_memory.h" +#include "util/u_math.h" + +#include "pipe/p_context.h" +#include "pipe/p_state.h" + +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_debug.h" + + + +/* The brw (and related graphics cores) do not support GL_CLAMP. The + * Intel drivers for "other operating systems" implement GL_CLAMP as + * GL_CLAMP_TO_EDGE, so the same is done here. + */ +static GLuint translate_wrap_mode( unsigned wrap ) +{ + switch( wrap ) { + case PIPE_TEX_WRAP_REPEAT: + return BRW_TEXCOORDMODE_WRAP; + + case PIPE_TEX_WRAP_CLAMP: + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + return BRW_TEXCOORDMODE_CLAMP; + + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + return BRW_TEXCOORDMODE_CLAMP_BORDER; + + case PIPE_TEX_WRAP_MIRROR_REPEAT: + return BRW_TEXCOORDMODE_MIRROR; + + case PIPE_TEX_WRAP_MIRROR_CLAMP: + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + return BRW_TEXCOORDMODE_MIRROR_ONCE; + + default: + return BRW_TEXCOORDMODE_WRAP; + } +} + +static GLuint translate_img_filter( unsigned filter ) +{ + switch (filter) { + case PIPE_TEX_FILTER_NEAREST: + return BRW_MAPFILTER_NEAREST; + case PIPE_TEX_FILTER_LINEAR: + return BRW_MAPFILTER_LINEAR; + case PIPE_TEX_FILTER_ANISO: + return BRW_MAPFILTER_ANISOTROPIC; + default: + assert(0); + return BRW_MAPFILTER_NEAREST; + } +} + +static GLuint translate_mip_filter( unsigned filter ) +{ + switch (filter) { + case PIPE_TEX_MIPFILTER_NONE: + return BRW_MIPFILTER_NONE; + case PIPE_TEX_MIPFILTER_NEAREST: + return BRW_MIPFILTER_NEAREST; + case PIPE_TEX_MIPFILTER_LINEAR: + return BRW_MIPFILTER_LINEAR; + default: + assert(0); + return BRW_MIPFILTER_NONE; + } +} + +/* XXX: not sure why there are special translations for the shadow tex + * compare functions. In particular ALWAYS is translated to NEVER. + * Is this a hardware issue? Does i965 really suffer from this? + */ +static GLuint translate_shadow_compare_func( unsigned func ) +{ + switch (func) { + case PIPE_FUNC_NEVER: + return BRW_COMPAREFUNCTION_ALWAYS; + case PIPE_FUNC_LESS: + return BRW_COMPAREFUNCTION_LEQUAL; + case PIPE_FUNC_LEQUAL: + return BRW_COMPAREFUNCTION_LESS; + case PIPE_FUNC_GREATER: + return BRW_COMPAREFUNCTION_GEQUAL; + case PIPE_FUNC_GEQUAL: + return BRW_COMPAREFUNCTION_GREATER; + case PIPE_FUNC_NOTEQUAL: + return BRW_COMPAREFUNCTION_EQUAL; + case PIPE_FUNC_EQUAL: + return BRW_COMPAREFUNCTION_NOTEQUAL; + case PIPE_FUNC_ALWAYS: + return BRW_COMPAREFUNCTION_NEVER; + default: + assert(0); + return BRW_COMPAREFUNCTION_NEVER; + } +} + + + + +static void * +brw_create_sampler_state( struct pipe_context *pipe, + const struct pipe_sampler_state *template ) +{ + struct brw_sampler *sampler = CALLOC_STRUCT(brw_sampler); + + sampler->ss0.min_filter = translate_img_filter( template->min_img_filter ); + sampler->ss0.mag_filter = translate_img_filter( template->mag_img_filter ); + sampler->ss0.mip_filter = translate_mip_filter( template->min_mip_filter ); + + + /* XXX: anisotropy logic slightly changed: + */ + if (template->max_anisotropy > 1.0) { + sampler->ss0.min_filter = BRW_MAPFILTER_ANISOTROPIC; + sampler->ss0.mag_filter = BRW_MAPFILTER_ANISOTROPIC; + + if (template->max_anisotropy > 2.0) { + sampler->ss3.max_aniso = MIN2((template->max_anisotropy - 2) / 2, + BRW_ANISORATIO_16); + } + } + + sampler->ss1.r_wrap_mode = translate_wrap_mode(template->wrap_r); + sampler->ss1.s_wrap_mode = translate_wrap_mode(template->wrap_s); + sampler->ss1.t_wrap_mode = translate_wrap_mode(template->wrap_t); + + /* Set LOD bias: + */ + sampler->ss0.lod_bias = + util_signed_fixed(CLAMP(template->lod_bias, -16, 15), 6); + + + sampler->ss0.lod_preclamp = 1; /* OpenGL mode */ + sampler->ss0.default_color_mode = 0; /* OpenGL/DX10 mode */ + + /* Set shadow function: + */ + if (template->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { + + /* Shadowing is "enabled" by emitting a particular sampler + * message (sample_c). So need to recompile WM program when + * shadow comparison is enabled on each/any texture unit. + */ + sampler->ss0.shadow_function = + translate_shadow_compare_func(template->compare_func); + } + + /* Set BaseMipLevel, MaxLOD, MinLOD: + */ + sampler->ss0.base_level = + util_unsigned_fixed(0, 1); + + sampler->ss1.max_lod = + util_unsigned_fixed(CLAMP(template->max_lod, 0, 13), 6); + + sampler->ss1.min_lod = + util_unsigned_fixed(CLAMP(template->min_lod, 0, 13), 6); + + return (void *)sampler; +} + +static void brw_bind_sampler_state(struct pipe_context *pipe, + unsigned num, void **sampler) +{ + struct brw_context *brw = brw_context(pipe); + int i; + + for (i = 0; i < num; i++) + brw->curr.sampler[i] = sampler[i]; + + for (i = num; i < brw->curr.num_samplers; i++) + brw->curr.sampler[i] = NULL; + + brw->curr.num_samplers = num; + brw->state.dirty.mesa |= PIPE_NEW_SAMPLERS; +} + +static void brw_delete_sampler_state(struct pipe_context *pipe, + void *cso) +{ + FREE(cso); +} + +static void brw_set_sampler_textures(struct pipe_context *pipe, + unsigned num, + struct pipe_texture **texture) +{ + struct brw_context *brw = brw_context(pipe); + int i; + + for (i = 0; i < num; i++) + pipe_texture_reference(&brw->curr.texture[i], texture[i]); + + for (i = num; i < brw->curr.num_textures; i++) + pipe_texture_reference(&brw->curr.texture[i], NULL); + + brw->curr.num_textures = num; + brw->state.dirty.mesa |= PIPE_NEW_BOUND_TEXTURES; +} + +static void brw_set_vertex_sampler_textures(struct pipe_context *pipe, + unsigned num, + struct pipe_texture **texture) +{ +} + +static void brw_bind_vertex_sampler_state(struct pipe_context *pipe, + unsigned num, void **sampler) +{ +} + + +void brw_pipe_sampler_init( struct brw_context *brw ) +{ + brw->base.create_sampler_state = brw_create_sampler_state; + brw->base.delete_sampler_state = brw_delete_sampler_state; + + brw->base.set_fragment_sampler_textures = brw_set_sampler_textures; + brw->base.bind_fragment_sampler_states = brw_bind_sampler_state; + + brw->base.set_vertex_sampler_textures = brw_set_vertex_sampler_textures; + brw->base.bind_vertex_sampler_states = brw_bind_vertex_sampler_state; + +} +void brw_pipe_sampler_cleanup( struct brw_context *brw ) +{ +} diff --git a/src/gallium/drivers/i965/brw_pipe_shader.c b/src/gallium/drivers/i965/brw_pipe_shader.c new file mode 100644 index 00000000000..31a715ab655 --- /dev/null +++ b/src/gallium/drivers/i965/brw_pipe_shader.c @@ -0,0 +1,299 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "util/u_memory.h" + +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_scan.h" + +#include "brw_context.h" +#include "brw_util.h" +#include "brw_wm.h" + + +/** + * Determine if the given shader uses complex features such as flow + * conditionals, loops, subroutines. + */ +static GLboolean has_flow_control(const struct tgsi_shader_info *info) +{ + return (info->opcode_count[TGSI_OPCODE_ARL] > 0 || + info->opcode_count[TGSI_OPCODE_IF] > 0 || + info->opcode_count[TGSI_OPCODE_ENDIF] > 0 || /* redundant - IF */ + info->opcode_count[TGSI_OPCODE_CAL] > 0 || + info->opcode_count[TGSI_OPCODE_BRK] > 0 || /* redundant - BGNLOOP */ + info->opcode_count[TGSI_OPCODE_RET] > 0 || /* redundant - CAL */ + info->opcode_count[TGSI_OPCODE_BGNLOOP] > 0); +} + + +static void scan_immediates(const struct tgsi_token *tokens, + const struct tgsi_shader_info *info, + struct brw_immediate_data *imm) +{ + struct tgsi_parse_context parse; + boolean done = FALSE; + + imm->nr = 0; + imm->data = MALLOC(info->immediate_count * 4 * sizeof(float)); + + tgsi_parse_init( &parse, tokens ); + while (!tgsi_parse_end_of_tokens( &parse ) && !done) { + tgsi_parse_token( &parse ); + + switch (parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_DECLARATION: + break; + + case TGSI_TOKEN_TYPE_IMMEDIATE: { + static const float id[4] = {0,0,0,1}; + const float *value = &parse.FullToken.FullImmediate.u[0].Float; + unsigned size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; + unsigned i; + + for (i = 0; i < size; i++) + imm->data[imm->nr][i] = value[i]; + + for (; i < 4; i++) + imm->data[imm->nr][i] = id[i]; + + imm->nr++; + break; + } + + case TGSI_TOKEN_TYPE_INSTRUCTION: + done = 1; + break; + } + } +} + + +static void brw_bind_fs_state( struct pipe_context *pipe, void *prog ) +{ + struct brw_fragment_shader *fs = (struct brw_fragment_shader *)prog; + struct brw_context *brw = brw_context(pipe); + + if (brw->curr.fragment_shader == fs) + return; + + if (brw->curr.fragment_shader == NULL || + fs == NULL || + memcmp(&brw->curr.fragment_shader->signature, &fs->signature, + brw_fs_signature_size(&fs->signature)) != 0) { + brw->state.dirty.mesa |= PIPE_NEW_FRAGMENT_SIGNATURE; + } + + brw->curr.fragment_shader = fs; + brw->state.dirty.mesa |= PIPE_NEW_FRAGMENT_SHADER; +} + +static void brw_bind_vs_state( struct pipe_context *pipe, void *prog ) +{ + struct brw_context *brw = brw_context(pipe); + + brw->curr.vertex_shader = (struct brw_vertex_shader *)prog; + brw->state.dirty.mesa |= PIPE_NEW_VERTEX_SHADER; +} + + + +static void *brw_create_fs_state( struct pipe_context *pipe, + const struct pipe_shader_state *shader ) +{ + struct brw_context *brw = brw_context(pipe); + struct brw_fragment_shader *fs; + int i; + + fs = CALLOC_STRUCT(brw_fragment_shader); + if (fs == NULL) + return NULL; + + /* Duplicate tokens, scan shader + */ + fs->id = brw->program_id++; + fs->has_flow_control = has_flow_control(&fs->info); + + fs->tokens = tgsi_dup_tokens(shader->tokens); + if (fs->tokens == NULL) + goto fail; + + tgsi_scan_shader(fs->tokens, &fs->info); + scan_immediates(fs->tokens, &fs->info, &fs->immediates); + + fs->signature.nr_inputs = fs->info.num_inputs; + for (i = 0; i < fs->info.num_inputs; i++) { + fs->signature.input[i].interp = fs->info.input_interpolate[i]; + fs->signature.input[i].semantic = fs->info.input_semantic_name[i]; + fs->signature.input[i].semantic_index = fs->info.input_semantic_index[i]; + } + + for (i = 0; i < fs->info.num_inputs; i++) + if (fs->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION) + fs->uses_depth = 1; + + if (fs->info.uses_kill) + fs->iz_lookup |= IZ_PS_KILL_ALPHATEST_BIT; + + if (fs->info.writes_z) + fs->iz_lookup |= IZ_PS_COMPUTES_DEPTH_BIT; + + return (void *)fs; + +fail: + FREE(fs); + return NULL; +} + + +static void *brw_create_vs_state( struct pipe_context *pipe, + const struct pipe_shader_state *shader ) +{ + struct brw_context *brw = brw_context(pipe); + struct brw_vertex_shader *vs; + unsigned i; + + vs = CALLOC_STRUCT(brw_vertex_shader); + if (vs == NULL) + return NULL; + + /* Duplicate tokens, scan shader + */ + vs->tokens = tgsi_dup_tokens(shader->tokens); + if (vs->tokens == NULL) + goto fail; + + tgsi_scan_shader(vs->tokens, &vs->info); + scan_immediates(vs->tokens, &vs->info, &vs->immediates); + + vs->id = brw->program_id++; + vs->has_flow_control = has_flow_control(&vs->info); + + for (i = 0; i < vs->info.num_outputs; i++) { + int index = vs->info.output_semantic_index[i]; + switch (vs->info.output_semantic_name[i]) { + case TGSI_SEMANTIC_POSITION: + vs->output_hpos = i; + break; + case TGSI_SEMANTIC_COLOR: + if (index == 0) + vs->output_color0 = i; + else + vs->output_color1 = i; + break; + case TGSI_SEMANTIC_BCOLOR: + if (index == 0) + vs->output_bfc0 = i; + else + vs->output_bfc1 = i; + break; +#if 0 + case TGSI_SEMANTIC_EDGEFLAG: + vs->output_edgeflag = i; + break; +#endif + } + } + + + + /* Done: + */ + return (void *)vs; + +fail: + FREE(vs); + return NULL; +} + + +static void brw_delete_fs_state( struct pipe_context *pipe, void *prog ) +{ + struct brw_fragment_shader *fs = (struct brw_fragment_shader *)prog; + + bo_reference(&fs->const_buffer, NULL); + FREE( (void *)fs->tokens ); + FREE( fs ); +} + + +static void brw_delete_vs_state( struct pipe_context *pipe, void *prog ) +{ + struct brw_fragment_shader *vs = (struct brw_fragment_shader *)prog; + + /* Delete draw shader + */ + FREE( (void *)vs->tokens ); + FREE( vs ); +} + + +static void brw_set_constant_buffer(struct pipe_context *pipe, + uint shader, uint index, + const struct pipe_constant_buffer *buf) +{ + struct brw_context *brw = brw_context(pipe); + + assert(index == 0); + + if (shader == PIPE_SHADER_FRAGMENT) { + pipe_buffer_reference( &brw->curr.fragment_constants, + buf->buffer ); + + brw->state.dirty.mesa |= PIPE_NEW_FRAGMENT_CONSTANTS; + } + else { + pipe_buffer_reference( &brw->curr.vertex_constants, + buf->buffer ); + + brw->state.dirty.mesa |= PIPE_NEW_VERTEX_CONSTANTS; + } +} + + +void brw_pipe_shader_init( struct brw_context *brw ) +{ + brw->base.set_constant_buffer = brw_set_constant_buffer; + + brw->base.create_vs_state = brw_create_vs_state; + brw->base.bind_vs_state = brw_bind_vs_state; + brw->base.delete_vs_state = brw_delete_vs_state; + + brw->base.create_fs_state = brw_create_fs_state; + brw->base.bind_fs_state = brw_bind_fs_state; + brw->base.delete_fs_state = brw_delete_fs_state; +} + +void brw_pipe_shader_cleanup( struct brw_context *brw ) +{ + pipe_buffer_reference( &brw->curr.fragment_constants, NULL ); + pipe_buffer_reference( &brw->curr.vertex_constants, NULL ); +} diff --git a/src/gallium/drivers/i965/brw_pipe_vertex.c b/src/gallium/drivers/i965/brw_pipe_vertex.c new file mode 100644 index 00000000000..3d87a2853f7 --- /dev/null +++ b/src/gallium/drivers/i965/brw_pipe_vertex.c @@ -0,0 +1,78 @@ +#include "brw_context.h" + + +static void brw_set_vertex_elements( struct pipe_context *pipe, + unsigned count, + const struct pipe_vertex_element *elements ) +{ + struct brw_context *brw = brw_context(pipe); + + memcpy(brw->curr.vertex_element, elements, count * sizeof(elements[0])); + brw->curr.num_vertex_elements = count; + + brw->state.dirty.mesa |= PIPE_NEW_VERTEX_ELEMENT; +} + + +static void brw_set_vertex_buffers(struct pipe_context *pipe, + unsigned count, + const struct pipe_vertex_buffer *buffers) +{ + struct brw_context *brw = brw_context(pipe); + unsigned i; + + /* Check for no change */ + if (count == brw->curr.num_vertex_buffers && + memcmp(brw->curr.vertex_buffer, + buffers, + count * sizeof buffers[0]) == 0) + return; + + /* Adjust refcounts */ + for (i = 0; i < count; i++) + pipe_buffer_reference(&brw->curr.vertex_buffer[i].buffer, + buffers[i].buffer); + + for ( ; i < brw->curr.num_vertex_buffers; i++) + pipe_buffer_reference(&brw->curr.vertex_buffer[i].buffer, + NULL); + + /* Copy remaining data */ + memcpy(brw->curr.vertex_buffer, buffers, count * sizeof buffers[0]); + brw->curr.num_vertex_buffers = count; + + brw->state.dirty.mesa |= PIPE_NEW_VERTEX_BUFFER; +} + +static void brw_set_edgeflags( struct pipe_context *pipe, + const unsigned *bitfield ) +{ + /* XXX */ +} + + +void +brw_pipe_vertex_init( struct brw_context *brw ) +{ + brw->base.set_vertex_buffers = brw_set_vertex_buffers; + brw->base.set_vertex_elements = brw_set_vertex_elements; + brw->base.set_edgeflags = brw_set_edgeflags; +} + + +void +brw_pipe_vertex_cleanup( struct brw_context *brw ) +{ + + /* Release bound pipe vertex_buffers + */ + + /* Release some other stuff + */ +#if 0 + for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { + bo_reference(&brw->vb.inputs[i].bo, NULL); + brw->vb.inputs[i].bo = NULL; + } +#endif +} diff --git a/src/gallium/drivers/i965/brw_reg.h b/src/gallium/drivers/i965/brw_reg.h new file mode 100644 index 00000000000..a63403b6afd --- /dev/null +++ b/src/gallium/drivers/i965/brw_reg.h @@ -0,0 +1,115 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef BRW_REG_H +#define BRW_REG_H + +#define CMD_MI (0x0 << 29) +#define CMD_2D (0x2 << 29) +#define CMD_3D (0x3 << 29) + +#define MI_NOOP (CMD_MI | 0) +#define MI_BATCH_BUFFER_END (CMD_MI | 0xA << 23) +#define MI_FLUSH (CMD_MI | (4 << 23)) + +#define _3DSTATE_DRAWRECT_INFO_I965 (CMD_3D | (3 << 27) | (1 << 24) | 0x2) + +/** @{ + * + * PIPE_CONTROL operation, a combination MI_FLUSH and register write with + * additional flushing control. + */ +#define _3DSTATE_PIPE_CONTROL (CMD_3D | (3 << 27) | (2 << 24) | 2) +#define PIPE_CONTROL_NO_WRITE (0 << 14) +#define PIPE_CONTROL_WRITE_IMMEDIATE (1 << 14) +#define PIPE_CONTROL_WRITE_DEPTH_COUNT (2 << 14) +#define PIPE_CONTROL_WRITE_TIMESTAMP (3 << 14) +#define PIPE_CONTROL_DEPTH_STALL (1 << 13) +#define PIPE_CONTROL_WRITE_FLUSH (1 << 12) +#define PIPE_CONTROL_INSTRUCTION_FLUSH (1 << 11) +#define PIPE_CONTROL_INTERRUPT_ENABLE (1 << 8) +#define PIPE_CONTROL_PPGTT_WRITE (0 << 2) +#define PIPE_CONTROL_GLOBAL_GTT_WRITE (1 << 2) + +/** @} */ + +#define XY_SETUP_BLT_CMD (CMD_2D | (0x01 << 22) | 6) +#define XY_COLOR_BLT_CMD (CMD_2D | (0x50 << 22) | 4) +#define XY_SRC_COPY_BLT_CMD (CMD_2D | (0x53 << 22) | 6) + +/* BR00 */ +#define XY_BLT_WRITE_ALPHA (1 << 21) +#define XY_BLT_WRITE_RGB (1 << 20) +#define XY_SRC_TILED (1 << 15) +#define XY_DST_TILED (1 << 11) + +/* BR13 */ +#define BR13_565 (0x1 << 24) +#define BR13_8888 (0x3 << 24) + +#define FENCE_LINEAR 0 +#define FENCE_XMAJOR 1 +#define FENCE_YMAJOR 2 + + + +/* PCI IDs + */ +#define PCI_CHIP_I965_G 0x29A2 +#define PCI_CHIP_I965_Q 0x2992 +#define PCI_CHIP_I965_G_1 0x2982 +#define PCI_CHIP_I946_GZ 0x2972 +#define PCI_CHIP_I965_GM 0x2A02 +#define PCI_CHIP_I965_GME 0x2A12 + +#define PCI_CHIP_GM45_GM 0x2A42 + +#define PCI_CHIP_IGD_E_G 0x2E02 +#define PCI_CHIP_Q45_G 0x2E12 +#define PCI_CHIP_G45_G 0x2E22 +#define PCI_CHIP_G41_G 0x2E32 +#define PCI_CHIP_B43_G 0x2E42 + +#define PCI_CHIP_ILD_G 0x0042 +#define PCI_CHIP_ILM_G 0x0046 + +struct brw_chipset { + unsigned pci_id:16; + unsigned is_965:1; + unsigned is_igdng:1; + unsigned is_g4x:1; + unsigned pad:13; +}; + + +/* XXX: hacks + */ +#define VERT_RESULT_HPOS 0 /* not always true */ +#define VERT_RESULT_PSIZ 10000 /* disabled */ + + +#endif diff --git a/src/gallium/drivers/i965/brw_screen.c b/src/gallium/drivers/i965/brw_screen.c new file mode 100644 index 00000000000..0ecacac9a3a --- /dev/null +++ b/src/gallium/drivers/i965/brw_screen.c @@ -0,0 +1,403 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "pipe/p_inlines.h" +#include "util/u_memory.h" +#include "util/u_string.h" + +#include "brw_reg.h" +#include "brw_context.h" +#include "brw_screen.h" +#include "brw_winsys.h" +#include "brw_debug.h" + +#ifdef DEBUG +static const struct debug_named_value debug_names[] = { + { "tex", DEBUG_TEXTURE}, + { "state", DEBUG_STATE}, + { "ioctl", DEBUG_IOCTL}, + { "blit", DEBUG_BLIT}, + { "curbe", DEBUG_CURBE}, + { "fall", DEBUG_FALLBACKS}, + { "verb", DEBUG_VERBOSE}, + { "bat", DEBUG_BATCH}, + { "pix", DEBUG_PIXEL}, + { "wins", DEBUG_WINSYS}, + { "min", DEBUG_MIN_URB}, + { "dis", DEBUG_DISASSEM}, + { "sync", DEBUG_SYNC}, + { "prim", DEBUG_PRIMS }, + { "vert", DEBUG_VERTS }, + { "dma", DEBUG_DMA }, + { "san", DEBUG_SANITY }, + { "sleep", DEBUG_SLEEP }, + { "stats", DEBUG_STATS }, + { "sing", DEBUG_SINGLE_THREAD }, + { "thre", DEBUG_SINGLE_THREAD }, + { "wm", DEBUG_WM }, + { "urb", DEBUG_URB }, + { "vs", DEBUG_VS }, + { NULL, 0 } +}; + +static const struct debug_named_value dump_names[] = { + { "asm", DUMP_ASM}, + { "state", DUMP_STATE}, + { "batch", DUMP_BATCH}, + { NULL, 0 } +}; + +int BRW_DEBUG = 0; +int BRW_DUMP = 0; + +#endif + + +/* + * Probe functions + */ + + +static const char * +brw_get_vendor(struct pipe_screen *screen) +{ + return "VMware, Inc."; +} + +static const char * +brw_get_name(struct pipe_screen *screen) +{ + static char buffer[128]; + const char *chipset; + + switch (brw_screen(screen)->chipset.pci_id) { + case PCI_CHIP_I965_G: + chipset = "I965_G"; + break; + case PCI_CHIP_I965_Q: + chipset = "I965_Q"; + break; + case PCI_CHIP_I965_G_1: + chipset = "I965_G_1"; + break; + case PCI_CHIP_I946_GZ: + chipset = "I946_GZ"; + break; + case PCI_CHIP_I965_GM: + chipset = "I965_GM"; + break; + case PCI_CHIP_I965_GME: + chipset = "I965_GME"; + break; + case PCI_CHIP_GM45_GM: + chipset = "GM45_GM"; + break; + case PCI_CHIP_IGD_E_G: + chipset = "IGD_E_G"; + break; + case PCI_CHIP_Q45_G: + chipset = "Q45_G"; + break; + case PCI_CHIP_G45_G: + chipset = "G45_G"; + break; + case PCI_CHIP_G41_G: + chipset = "G41_G"; + break; + case PCI_CHIP_B43_G: + chipset = "B43_G"; + break; + case PCI_CHIP_ILD_G: + chipset = "ILD_G"; + break; + case PCI_CHIP_ILM_G: + chipset = "ILM_G"; + break; + } + + util_snprintf(buffer, sizeof(buffer), "i965 (chipset: %s)", chipset); + return buffer; +} + +static int +brw_get_param(struct pipe_screen *screen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: + return 8; + case PIPE_CAP_NPOT_TEXTURES: + return 1; + case PIPE_CAP_TWO_SIDED_STENCIL: + return 1; + case PIPE_CAP_GLSL: + return 0; + case PIPE_CAP_ANISOTROPIC_FILTER: + return 0; + case PIPE_CAP_POINT_SPRITE: + return 0; + case PIPE_CAP_MAX_RENDER_TARGETS: + return 1; + case PIPE_CAP_OCCLUSION_QUERY: + return 0; + case PIPE_CAP_TEXTURE_SHADOW_MAP: + return 1; + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + return 11; /* max 1024x1024 */ + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + return 8; /* max 128x128x128 */ + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return 11; /* max 1024x1024 */ + default: + return 0; + } +} + +static float +brw_get_paramf(struct pipe_screen *screen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_LINE_WIDTH: + /* fall-through */ + case PIPE_CAP_MAX_LINE_WIDTH_AA: + return 7.5; + + case PIPE_CAP_MAX_POINT_WIDTH: + /* fall-through */ + case PIPE_CAP_MAX_POINT_WIDTH_AA: + return 255.0; + + case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: + return 4.0; + + case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: + return 16.0; + + default: + return 0; + } +} + +static boolean +brw_is_format_supported(struct pipe_screen *screen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned tex_usage, + unsigned geom_flags) +{ + static const enum pipe_format tex_supported[] = { + PIPE_FORMAT_L8_UNORM, + PIPE_FORMAT_I8_UNORM, + PIPE_FORMAT_A8_UNORM, + PIPE_FORMAT_L16_UNORM, + /*PIPE_FORMAT_I16_UNORM,*/ + /*PIPE_FORMAT_A16_UNORM,*/ + PIPE_FORMAT_A8L8_UNORM, + PIPE_FORMAT_R5G6B5_UNORM, + PIPE_FORMAT_A1R5G5B5_UNORM, + PIPE_FORMAT_A4R4G4B4_UNORM, + PIPE_FORMAT_X8R8G8B8_UNORM, + PIPE_FORMAT_A8R8G8B8_UNORM, + /* video */ + PIPE_FORMAT_YCBCR, + PIPE_FORMAT_YCBCR_REV, + /* compressed */ + /*PIPE_FORMAT_FXT1_RGBA,*/ + PIPE_FORMAT_DXT1_RGB, + PIPE_FORMAT_DXT1_RGBA, + PIPE_FORMAT_DXT3_RGBA, + PIPE_FORMAT_DXT5_RGBA, + /* sRGB */ + PIPE_FORMAT_R8G8B8A8_SRGB, + PIPE_FORMAT_A8L8_SRGB, + PIPE_FORMAT_L8_SRGB, + PIPE_FORMAT_DXT1_SRGB, + /* depth */ + PIPE_FORMAT_Z32_FLOAT, + PIPE_FORMAT_X8Z24_UNORM, + PIPE_FORMAT_S8Z24_UNORM, + PIPE_FORMAT_Z16_UNORM, + /* signed */ + PIPE_FORMAT_R8G8_SNORM, + PIPE_FORMAT_R8G8B8A8_SNORM, + PIPE_FORMAT_NONE /* list terminator */ + }; + static const enum pipe_format render_supported[] = { + PIPE_FORMAT_X8R8G8B8_UNORM, + PIPE_FORMAT_A8R8G8B8_UNORM, + PIPE_FORMAT_R5G6B5_UNORM, + PIPE_FORMAT_NONE /* list terminator */ + }; + static const enum pipe_format depth_supported[] = { + PIPE_FORMAT_Z32_FLOAT, + PIPE_FORMAT_X8Z24_UNORM, + PIPE_FORMAT_S8Z24_UNORM, + PIPE_FORMAT_Z16_UNORM, + PIPE_FORMAT_NONE /* list terminator */ + }; + const enum pipe_format *list; + uint i; + + if (tex_usage & PIPE_TEXTURE_USAGE_DEPTH_STENCIL) + list = depth_supported; + else if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) + list = render_supported; + else + list = tex_supported; + + for (i = 0; list[i] != PIPE_FORMAT_NONE; i++) { + if (list[i] == format) + return TRUE; + } + + return FALSE; +} + + +/* + * Fence functions + */ + + +static void +brw_fence_reference(struct pipe_screen *screen, + struct pipe_fence_handle **ptr, + struct pipe_fence_handle *fence) +{ +} + +static int +brw_fence_signalled(struct pipe_screen *screen, + struct pipe_fence_handle *fence, + unsigned flags) +{ + return 0; /* XXX shouldn't this be a boolean? */ +} + +static int +brw_fence_finish(struct pipe_screen *screen, + struct pipe_fence_handle *fence, + unsigned flags) +{ + return 0; +} + + +/* + * Generic functions + */ + + +static void +brw_destroy_screen(struct pipe_screen *screen) +{ + struct brw_screen *bscreen = brw_screen(screen); + + if (bscreen->sws) + bscreen->sws->destroy(bscreen->sws); + + FREE(bscreen); +} + +/** + * Create a new brw_screen object + */ +struct pipe_screen * +brw_create_screen(struct brw_winsys_screen *sws, uint pci_id) +{ + struct brw_screen *bscreen; + struct brw_chipset chipset; + +#ifdef DEBUG + BRW_DEBUG = debug_get_flags_option("BRW_DEBUG", debug_names, 0); + BRW_DEBUG |= debug_get_flags_option("INTEL_DEBUG", debug_names, 0); + BRW_DEBUG |= DEBUG_STATS | DEBUG_MIN_URB | DEBUG_WM; + + BRW_DUMP = debug_get_flags_option("BRW_DUMP", dump_names, 0); +#endif + + memset(&chipset, 0, sizeof chipset); + + chipset.pci_id = pci_id; + + switch (pci_id) { + case PCI_CHIP_I965_G: + case PCI_CHIP_I965_Q: + case PCI_CHIP_I965_G_1: + case PCI_CHIP_I946_GZ: + case PCI_CHIP_I965_GM: + case PCI_CHIP_I965_GME: + chipset.is_965 = TRUE; + break; + + case PCI_CHIP_GM45_GM: + case PCI_CHIP_IGD_E_G: + case PCI_CHIP_Q45_G: + case PCI_CHIP_G45_G: + case PCI_CHIP_G41_G: + case PCI_CHIP_B43_G: + chipset.is_g4x = TRUE; + break; + + case PCI_CHIP_ILD_G: + case PCI_CHIP_ILM_G: + chipset.is_igdng = TRUE; + break; + + default: + debug_printf("%s: unknown pci id 0x%x, cannot create screen\n", + __FUNCTION__, pci_id); + return NULL; + } + + + bscreen = CALLOC_STRUCT(brw_screen); + if (!bscreen) + return NULL; + + bscreen->chipset = chipset; + bscreen->sws = sws; + bscreen->base.winsys = NULL; + bscreen->base.destroy = brw_destroy_screen; + bscreen->base.get_name = brw_get_name; + bscreen->base.get_vendor = brw_get_vendor; + bscreen->base.get_param = brw_get_param; + bscreen->base.get_paramf = brw_get_paramf; + bscreen->base.is_format_supported = brw_is_format_supported; + bscreen->base.fence_reference = brw_fence_reference; + bscreen->base.fence_signalled = brw_fence_signalled; + bscreen->base.fence_finish = brw_fence_finish; + + brw_screen_tex_init(bscreen); + brw_screen_tex_surface_init(bscreen); + brw_screen_buffer_init(bscreen); + + bscreen->no_tiling = debug_get_option("BRW_NO_TILING", FALSE) != NULL; + + + return &bscreen->base; +} diff --git a/src/gallium/drivers/i965/brw_screen.h b/src/gallium/drivers/i965/brw_screen.h new file mode 100644 index 00000000000..7226d9228b7 --- /dev/null +++ b/src/gallium/drivers/i965/brw_screen.h @@ -0,0 +1,199 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef BRW_SCREEN_H +#define BRW_SCREEN_H + +#include "pipe/p_state.h" +#include "pipe/p_screen.h" + +#include "brw_reg.h" +#include "brw_structs.h" + +struct brw_winsys_screen; + + +/** + * Subclass of pipe_screen + */ +struct brw_screen +{ + struct pipe_screen base; + struct brw_chipset chipset; + struct brw_winsys_screen *sws; + boolean no_tiling; +}; + +/** + * Subclass of pipe_transfer + */ +struct brw_transfer +{ + struct pipe_transfer base; + + unsigned offset; +}; + +struct brw_buffer +{ + struct pipe_buffer base; + + /* One of either bo or user_buffer will be non-null, depending on + * whether this is a hardware or user buffer. + */ + struct brw_winsys_buffer *bo; + void *user_buffer; + + /* Mapped pointer?? + */ + void *ptr; +}; + + +union brw_surface_id { + struct { + unsigned face:3; + unsigned zslice:13; + unsigned level:16; + } bits; + unsigned value; +}; + + +struct brw_surface +{ + struct pipe_surface base; + + union brw_surface_id id; + unsigned cpp; + unsigned pitch; + unsigned draw_offset; + unsigned tiling; + + struct brw_surface_state ss; + struct brw_winsys_buffer *bo; + struct brw_surface *next, *prev; +}; + + + +struct brw_texture +{ + struct pipe_texture base; + struct brw_winsys_buffer *bo; + struct brw_surface_state ss; + + unsigned *image_offset[PIPE_MAX_TEXTURE_LEVELS]; + unsigned nr_images[PIPE_MAX_TEXTURE_LEVELS]; + unsigned level_offset[PIPE_MAX_TEXTURE_LEVELS]; + + boolean compressed; + unsigned brw_target; + unsigned pitch; + unsigned tiling; + unsigned cpp; + unsigned total_height; + + struct brw_surface views[2]; +}; + + + +/* + * Cast wrappers + */ +static INLINE struct brw_screen * +brw_screen(struct pipe_screen *pscreen) +{ + return (struct brw_screen *) pscreen; +} + +static INLINE struct brw_transfer * +brw_transfer(struct pipe_transfer *transfer) +{ + return (struct brw_transfer *)transfer; +} + +static INLINE struct brw_surface * +brw_surface(struct pipe_surface *surface) +{ + return (struct brw_surface *)surface; +} + +static INLINE struct brw_buffer * +brw_buffer(struct pipe_buffer *buffer) +{ + return (struct brw_buffer *)buffer; +} + +static INLINE struct brw_texture * +brw_texture(struct pipe_texture *texture) +{ + return (struct brw_texture *)texture; +} + + +/* Pipe buffer helpers + */ +static INLINE boolean +brw_buffer_is_user_buffer( const struct pipe_buffer *buf ) +{ + return ((const struct brw_buffer *)buf)->user_buffer != NULL; +} + +unsigned +brw_surface_pitch( const struct pipe_surface *surface ); + +/*********************************************************************** + * Internal functions + */ +GLboolean brw_texture_layout(struct brw_screen *brw_screen, + struct brw_texture *tex ); + +void brw_update_texture( struct brw_screen *brw_screen, + struct brw_texture *tex ); + + +void brw_screen_tex_init( struct brw_screen *brw_screen ); +void brw_screen_tex_surface_init( struct brw_screen *brw_screen ); + +void brw_screen_buffer_init(struct brw_screen *brw_screen); + + +boolean brw_is_texture_referenced_by_bo( struct brw_screen *brw_screen, + struct pipe_texture *texture, + unsigned face, + unsigned level, + struct brw_winsys_buffer *bo ); + +boolean brw_is_buffer_referenced_by_bo( struct brw_screen *brw_screen, + struct pipe_buffer *buffer, + struct brw_winsys_buffer *bo ); + + + +#endif /* BRW_SCREEN_H */ diff --git a/src/gallium/drivers/i965/brw_screen_buffers.c b/src/gallium/drivers/i965/brw_screen_buffers.c new file mode 100644 index 00000000000..d8141a3f5b9 --- /dev/null +++ b/src/gallium/drivers/i965/brw_screen_buffers.c @@ -0,0 +1,202 @@ + +#include "util/u_memory.h" +#include "util/u_math.h" + +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" + +#include "brw_screen.h" +#include "brw_winsys.h" + + + +static void * +brw_buffer_map_range( struct pipe_screen *screen, + struct pipe_buffer *buffer, + unsigned offset, + unsigned length, + unsigned usage ) +{ + struct brw_screen *bscreen = brw_screen(screen); + struct brw_winsys_screen *sws = bscreen->sws; + struct brw_buffer *buf = brw_buffer( buffer ); + + if (buf->user_buffer) + return buf->user_buffer; + + return sws->bo_map( buf->bo, + BRW_DATA_OTHER, + offset, + length, + (usage & PIPE_BUFFER_USAGE_CPU_WRITE) ? TRUE : FALSE, + (usage & PIPE_BUFFER_USAGE_DISCARD) ? TRUE : FALSE, + (usage & PIPE_BUFFER_USAGE_FLUSH_EXPLICIT) ? TRUE : FALSE); +} + +static void * +brw_buffer_map( struct pipe_screen *screen, + struct pipe_buffer *buffer, + unsigned usage ) +{ + struct brw_screen *bscreen = brw_screen(screen); + struct brw_winsys_screen *sws = bscreen->sws; + struct brw_buffer *buf = brw_buffer( buffer ); + + if (buf->user_buffer) + return buf->user_buffer; + + return sws->bo_map( buf->bo, + BRW_DATA_OTHER, + 0, + buf->base.size, + (usage & PIPE_BUFFER_USAGE_CPU_WRITE) ? TRUE : FALSE, + FALSE, + FALSE); +} + + +static void +brw_buffer_flush_mapped_range( struct pipe_screen *screen, + struct pipe_buffer *buffer, + unsigned offset, + unsigned length ) +{ + struct brw_screen *bscreen = brw_screen(screen); + struct brw_winsys_screen *sws = bscreen->sws; + struct brw_buffer *buf = brw_buffer( buffer ); + + if (buf->user_buffer) + return; + + sws->bo_flush_range( buf->bo, + offset, + length ); +} + + +static void +brw_buffer_unmap( struct pipe_screen *screen, + struct pipe_buffer *buffer ) +{ + struct brw_screen *bscreen = brw_screen(screen); + struct brw_winsys_screen *sws = bscreen->sws; + struct brw_buffer *buf = brw_buffer( buffer ); + + if (buf->bo) + sws->bo_unmap(buf->bo); +} + +static void +brw_buffer_destroy( struct pipe_buffer *buffer ) +{ + struct brw_buffer *buf = brw_buffer( buffer ); + + assert(!p_atomic_read(&buffer->reference.count)); + + bo_reference(&buf->bo, NULL); + FREE(buf); +} + + +static struct pipe_buffer * +brw_buffer_create(struct pipe_screen *screen, + unsigned alignment, + unsigned usage, + unsigned size) +{ + struct brw_screen *bscreen = brw_screen(screen); + struct brw_winsys_screen *sws = bscreen->sws; + struct brw_buffer *buf; + unsigned buffer_type; + enum pipe_error ret; + + buf = CALLOC_STRUCT(brw_buffer); + if (!buf) + return NULL; + + pipe_reference_init(&buf->base.reference, 1); + buf->base.screen = screen; + buf->base.alignment = alignment; + buf->base.usage = usage; + buf->base.size = size; + + switch (usage & (PIPE_BUFFER_USAGE_VERTEX | + PIPE_BUFFER_USAGE_INDEX | + PIPE_BUFFER_USAGE_PIXEL | + PIPE_BUFFER_USAGE_CONSTANT)) + { + case PIPE_BUFFER_USAGE_VERTEX: + case PIPE_BUFFER_USAGE_INDEX: + case (PIPE_BUFFER_USAGE_VERTEX|PIPE_BUFFER_USAGE_INDEX): + buffer_type = BRW_BUFFER_TYPE_VERTEX; + break; + + case PIPE_BUFFER_USAGE_PIXEL: + buffer_type = BRW_BUFFER_TYPE_PIXEL; + break; + + case PIPE_BUFFER_USAGE_CONSTANT: + buffer_type = BRW_BUFFER_TYPE_SHADER_CONSTANTS; + break; + + default: + buffer_type = BRW_BUFFER_TYPE_GENERIC; + break; + } + + ret = sws->bo_alloc( sws, buffer_type, + size, alignment, + &buf->bo ); + if (ret != PIPE_OK) + return NULL; + + return &buf->base; +} + + +static struct pipe_buffer * +brw_user_buffer_create(struct pipe_screen *screen, + void *ptr, + unsigned bytes) +{ + struct brw_buffer *buf; + + buf = CALLOC_STRUCT(brw_buffer); + if (!buf) + return NULL; + + buf->user_buffer = ptr; + + pipe_reference_init(&buf->base.reference, 1); + buf->base.screen = screen; + buf->base.alignment = 1; + buf->base.usage = 0; + buf->base.size = bytes; + + return &buf->base; +} + + +boolean brw_is_buffer_referenced_by_bo( struct brw_screen *brw_screen, + struct pipe_buffer *buffer, + struct brw_winsys_buffer *bo ) +{ + struct brw_buffer *buf = brw_buffer(buffer); + if (buf->bo == NULL) + return FALSE; + + return brw_screen->sws->bo_references( bo, buf->bo ); +} + + +void brw_screen_buffer_init(struct brw_screen *brw_screen) +{ + brw_screen->base.buffer_create = brw_buffer_create; + brw_screen->base.user_buffer_create = brw_user_buffer_create; + brw_screen->base.buffer_map = brw_buffer_map; + brw_screen->base.buffer_map_range = brw_buffer_map_range; + brw_screen->base.buffer_flush_mapped_range = brw_buffer_flush_mapped_range; + brw_screen->base.buffer_unmap = brw_buffer_unmap; + brw_screen->base.buffer_destroy = brw_buffer_destroy; +} diff --git a/src/gallium/drivers/i965/brw_screen_surface.c b/src/gallium/drivers/i965/brw_screen_surface.c new file mode 100644 index 00000000000..e2b9954e596 --- /dev/null +++ b/src/gallium/drivers/i965/brw_screen_surface.c @@ -0,0 +1,262 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "util/u_memory.h" +#include "util/u_simple_list.h" +#include "util/u_math.h" + +#include "pipe/p_screen.h" +#include "brw_screen.h" +#include "brw_defines.h" +#include "brw_winsys.h" + +enum { + BRW_VIEW_LINEAR, + BRW_VIEW_IN_PLACE +}; + + +static boolean need_linear_view( struct brw_screen *brw_screen, + struct brw_texture *brw_texture, + union brw_surface_id id, + unsigned usage ) +{ +#if 0 + /* XXX: what about IDGNG? + */ + if (!BRW_IS_G4X(brw->brw_screen->pci_id)) + { + struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i]; + struct intel_renderbuffer *irb = intel_renderbuffer(rb); + + /* The original gen4 hardware couldn't set up WM surfaces pointing + * at an offset within a tile, which can happen when rendering to + * anything but the base level of a texture or the +X face/0 depth. + * This was fixed with the 4 Series hardware. + * + * For these original chips, you would have to make the depth and + * color destination surfaces include information on the texture + * type, LOD, face, and various limits to use them as a destination. + * + * This is easy in Gallium as surfaces are all backed by + * textures, but there's also a nasty requirement that the depth + * and the color surfaces all be of the same LOD, which is + * harder to get around as we can't look at a surface in + * isolation and decide if it's legal. + * + * Instead, end up being pessimistic and say that for i965, + * ... ?? + */ + if (brw_tex->tiling != I915_TILING_NONE && + (brw_tex_image_offset(brw_tex, face, level, zslize) & 4095)) { + if (BRW_DEBUG & DEBUG_VIEW) + debug_printf("%s: need surface view for non-aligned tex image\n", + __FUNCTION__); + return GL_TRUE; + } + } +#endif + + /* Tiled 3d textures don't have subsets that look like 2d surfaces: + */ + + /* Everything else should be fine to render to in-place: + */ + return GL_FALSE; +} + +/* Look at all texture views and figure out if any of them need to be + * back-copied into the texture for sampling + */ +void brw_update_texture( struct brw_screen *brw_screen, + struct brw_texture *tex ) +{ + /* currently nothing to do */ +} + + +/* Create a new surface with linear layout to serve as a render-target + * where it would be illegal (perhaps due to tiling constraints) to do + * this in-place. + * + * Currently not implmented, not sure if it's needed. + */ +static struct brw_surface *create_linear_view( struct brw_screen *brw_screen, + struct brw_texture *tex, + union brw_surface_id id, + unsigned usage ) +{ + return NULL; +} + + +/* Create a pipe_surface that just points directly into the existing + * texture's storage. + */ +static struct brw_surface *create_in_place_view( struct brw_screen *brw_screen, + struct brw_texture *tex, + union brw_surface_id id, + unsigned usage ) +{ + struct brw_surface *surface; + + surface = CALLOC_STRUCT(brw_surface); + if (surface == NULL) + return NULL; + + pipe_reference_init(&surface->base.reference, 1); + + /* XXX: ignoring render-to-slice-of-3d-texture + */ + assert(id.bits.zslice == 0); + + surface->base.format = tex->base.format; + surface->base.width = u_minify(tex->base.width0, id.bits.level); + surface->base.height = u_minify(tex->base.height0, id.bits.level); + surface->base.offset = tex->image_offset[id.bits.level][id.bits.face]; + surface->base.usage = usage; + surface->base.zslice = id.bits.zslice; + surface->base.face = id.bits.face; + surface->base.level = id.bits.level; + surface->id = id; + surface->cpp = tex->cpp; + surface->pitch = tex->pitch; + surface->tiling = tex->tiling; + + bo_reference( &surface->bo, tex->bo ); + pipe_texture_reference( &surface->base.texture, &tex->base ); + + surface->ss.ss0.surface_format = tex->ss.ss0.surface_format; + surface->ss.ss0.surface_type = BRW_SURFACE_2D; + + if (tex->tiling == BRW_TILING_NONE) { + surface->ss.ss1.base_addr = surface->base.offset; + } else { + uint32_t tile_offset = surface->base.offset % 4096; + + surface->ss.ss1.base_addr = surface->base.offset - tile_offset; + + if (brw_screen->chipset.is_g4x) { + if (tex->tiling == BRW_TILING_X) { + /* Note that the low bits of these fields are missing, so + * there's the possibility of getting in trouble. + */ + surface->ss.ss5.x_offset = (tile_offset % 512) / tex->cpp / 4; + surface->ss.ss5.y_offset = tile_offset / 512 / 2; + } else { + surface->ss.ss5.x_offset = (tile_offset % 128) / tex->cpp / 4; + surface->ss.ss5.y_offset = tile_offset / 128 / 2; + } + } + else { + assert(tile_offset == 0); + } + } + +#if 0 + if (region_bo != NULL) + surface->ss.ss1.base_addr += region_bo->offset; /* reloc */ +#endif + + surface->ss.ss2.width = surface->base.width - 1; + surface->ss.ss2.height = surface->base.height - 1; + surface->ss.ss3.tiled_surface = tex->ss.ss3.tiled_surface; + surface->ss.ss3.tile_walk = tex->ss.ss3.tile_walk; + surface->ss.ss3.pitch = tex->ss.ss3.pitch; + + return surface; +} + +/* Get a surface which is view into a texture + */ +static struct pipe_surface *brw_get_tex_surface(struct pipe_screen *screen, + struct pipe_texture *pt, + unsigned face, unsigned level, + unsigned zslice, + unsigned usage ) +{ + struct brw_texture *tex = brw_texture(pt); + struct brw_screen *bscreen = brw_screen(screen); + struct brw_surface *surface; + union brw_surface_id id; + int type; + + id.bits.face = face; + id.bits.level = level; + id.bits.zslice = zslice; + + if (need_linear_view(bscreen, tex, id, usage)) + type = BRW_VIEW_LINEAR; + else + type = BRW_VIEW_IN_PLACE; + + + foreach (surface, &tex->views[type]) { + if (id.value == surface->id.value) + return &surface->base; + } + + switch (type) { + case BRW_VIEW_LINEAR: + surface = create_linear_view( bscreen, tex, id, usage ); + break; + case BRW_VIEW_IN_PLACE: + surface = create_in_place_view( bscreen, tex, id, usage ); + break; + default: + return NULL; + } + + insert_at_head( &tex->views[type], surface ); + return &surface->base; +} + + +static void brw_tex_surface_destroy( struct pipe_surface *surf ) +{ + struct brw_surface *surface = brw_surface(surf); + + /* Unreference texture, shared buffer: + */ + remove_from_list(surface); + bo_reference(&surface->bo, NULL); + pipe_texture_reference( &surface->base.texture, NULL ); + + + FREE(surface); +} + + +void brw_screen_tex_surface_init( struct brw_screen *brw_screen ) +{ + brw_screen->base.get_tex_surface = brw_get_tex_surface; + brw_screen->base.tex_surface_destroy = brw_tex_surface_destroy; +} diff --git a/src/gallium/drivers/i965/brw_screen_tex_layout.c b/src/gallium/drivers/i965/brw_screen_tex_layout.c new file mode 100644 index 00000000000..894f4bea401 --- /dev/null +++ b/src/gallium/drivers/i965/brw_screen_tex_layout.c @@ -0,0 +1,414 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + +#include "pipe/p_format.h" + +#include "util/u_math.h" +#include "util/u_memory.h" + +#include "brw_screen.h" +#include "brw_debug.h" +#include "brw_winsys.h" + +/* Code to layout images in a mipmap tree for i965. + */ + +static int +brw_tex_pitch_align (struct brw_texture *tex, + int pitch) +{ + if (!tex->compressed) { + int pitch_align; + + switch (tex->tiling) { + case BRW_TILING_X: + pitch_align = 512; + break; + case BRW_TILING_Y: + pitch_align = 128; + break; + default: + /* XXX: Untiled pitch alignment of 64 bytes for now to allow + * render-to-texture to work in all cases. This should + * probably be replaced at some point by some scheme to only + * do this when really necessary, for example standalone + * render target views. + */ + pitch_align = 64; + break; + } + + pitch = align(pitch * tex->cpp, pitch_align); + pitch /= tex->cpp; + } + + return pitch; +} + + +static void +brw_tex_alignment_unit(enum pipe_format pf, + GLuint *w, GLuint *h) +{ + switch (pf) { + case PIPE_FORMAT_DXT1_RGB: + case PIPE_FORMAT_DXT1_RGBA: + case PIPE_FORMAT_DXT3_RGBA: + case PIPE_FORMAT_DXT5_RGBA: + case PIPE_FORMAT_DXT1_SRGB: + case PIPE_FORMAT_DXT1_SRGBA: + case PIPE_FORMAT_DXT3_SRGBA: + case PIPE_FORMAT_DXT5_SRGBA: + *w = 4; + *h = 4; + break; + + default: + *w = 4; + *h = 2; + break; + } +} + + +static void +brw_tex_set_level_info(struct brw_texture *tex, + GLuint level, + GLuint nr_images, + GLuint x, GLuint y, + GLuint w, GLuint h, GLuint d) +{ + + if (BRW_DEBUG & DEBUG_TEXTURE) + debug_printf("%s level %d size: %d,%d,%d offset %d,%d (0x%x)\n", __FUNCTION__, + level, w, h, d, x, y, tex->level_offset[level]); + + assert(tex->image_offset[level] == NULL); + assert(nr_images >= 1); + + tex->level_offset[level] = (x + y * tex->pitch) * tex->cpp; + tex->nr_images[level] = nr_images; + + tex->image_offset[level] = MALLOC(nr_images * sizeof(GLuint)); + tex->image_offset[level][0] = 0; +} + + +static void +brw_tex_set_image_offset(struct brw_texture *tex, + GLuint level, GLuint img, + GLuint x, GLuint y, + GLuint offset) +{ + assert((x == 0 && y == 0) || img != 0 || level != 0); + assert(img < tex->nr_images[level]); + + if (BRW_DEBUG & DEBUG_TEXTURE) + debug_printf("%s level %d img %d pos %d,%d image_offset %x\n", + __FUNCTION__, level, img, x, y, + tex->image_offset[level][img]); + + tex->image_offset[level][img] = (x + y * tex->pitch) * tex->cpp + offset; +} + + + +static void brw_layout_2d( struct brw_texture *tex ) +{ + GLuint align_h = 2, align_w = 4; + GLuint level; + GLuint x = 0; + GLuint y = 0; + GLuint width = tex->base.width0; + GLuint height = tex->base.height0; + + tex->pitch = tex->base.width0; + brw_tex_alignment_unit(tex->base.format, &align_w, &align_h); + + if (tex->compressed) { + tex->pitch = align(tex->base.width0, align_w); + } + + /* May need to adjust pitch to accomodate the placement of + * the 2nd mipmap. This occurs when the alignment + * constraints of mipmap placement push the right edge of the + * 2nd mipmap out past the width of its parent. + */ + if (tex->base.last_level > 0) { + GLuint mip1_width; + + if (tex->compressed) { + mip1_width = (align(u_minify(tex->base.width0, 1), align_w) + + align(u_minify(tex->base.width0, 2), align_w)); + } else { + mip1_width = (align(u_minify(tex->base.width0, 1), align_w) + + u_minify(tex->base.width0, 2)); + } + + if (mip1_width > tex->pitch) { + tex->pitch = mip1_width; + } + } + + /* Pitch must be a whole number of dwords, even though we + * express it in texels. + */ + tex->pitch = brw_tex_pitch_align (tex, tex->pitch); + tex->total_height = 0; + + for ( level = 0 ; level <= tex->base.last_level ; level++ ) { + GLuint img_height; + + brw_tex_set_level_info(tex, level, 1, x, y, width, height, 1); + + if (tex->compressed) + img_height = MAX2(1, height/4); + else + img_height = align(height, align_h); + + + /* Because the images are packed better, the final offset + * might not be the maximal one: + */ + tex->total_height = MAX2(tex->total_height, y + img_height); + + /* Layout_below: step right after second mipmap. + */ + if (level == 1) { + x += align(width, align_w); + } + else { + y += img_height; + } + + width = u_minify(width, 1); + height = u_minify(height, 1); + } +} + + +static boolean +brw_layout_cubemap_idgng( struct brw_texture *tex ) +{ + GLuint align_h = 2, align_w = 4; + GLuint level; + GLuint x = 0; + GLuint y = 0; + GLuint width = tex->base.width0; + GLuint height = tex->base.height0; + GLuint qpitch = 0; + GLuint y_pitch = 0; + + tex->pitch = tex->base.width0; + brw_tex_alignment_unit(tex->base.format, &align_w, &align_h); + y_pitch = align(height, align_h); + + if (tex->compressed) { + tex->pitch = align(tex->base.width0, align_w); + } + + if (tex->base.last_level != 0) { + GLuint mip1_width; + + if (tex->compressed) { + mip1_width = (align(u_minify(tex->base.width0, 1), align_w) + + align(u_minify(tex->base.width0, 2), align_w)); + } else { + mip1_width = (align(u_minify(tex->base.width0, 1), align_w) + + u_minify(tex->base.width0, 2)); + } + + if (mip1_width > tex->pitch) { + tex->pitch = mip1_width; + } + } + + tex->pitch = brw_tex_pitch_align(tex, tex->pitch); + + if (tex->compressed) { + qpitch = ((y_pitch + + align(u_minify(y_pitch, 1), align_h) + + 11 * align_h) / 4) * tex->pitch * tex->cpp; + + tex->total_height = ((y_pitch + + align(u_minify(y_pitch, 1), align_h) + + 11 * align_h) / 4) * 6; + } else { + qpitch = (y_pitch + + align(u_minify(y_pitch, 1), align_h) + + 11 * align_h) * tex->pitch * tex->cpp; + + tex->total_height = (y_pitch + + align(u_minify(y_pitch, 1), align_h) + + 11 * align_h) * 6; + } + + for (level = 0; level <= tex->base.last_level; level++) { + GLuint img_height; + GLuint nr_images = 6; + GLuint q = 0; + + brw_tex_set_level_info(tex, level, nr_images, x, y, width, height, 1); + + for (q = 0; q < nr_images; q++) + brw_tex_set_image_offset(tex, level, q, x, y, q * qpitch); + + if (tex->compressed) + img_height = MAX2(1, height/4); + else + img_height = align(height, align_h); + + if (level == 1) { + x += align(width, align_w); + } + else { + y += img_height; + } + + width = u_minify(width, 1); + height = u_minify(height, 1); + } + + return TRUE; +} + + +static boolean +brw_layout_3d_cube( struct brw_texture *tex ) +{ + GLuint width = tex->base.width0; + GLuint height = tex->base.height0; + GLuint depth = tex->base.depth0; + GLuint pack_x_pitch, pack_x_nr; + GLuint pack_y_pitch; + GLuint level; + GLuint align_h = 2; + GLuint align_w = 4; + + tex->total_height = 0; + brw_tex_alignment_unit(tex->base.format, &align_w, &align_h); + + if (tex->compressed) { + tex->pitch = align(width, align_w); + pack_y_pitch = (height + 3) / 4; + } else { + tex->pitch = brw_tex_pitch_align(tex, tex->base.width0); + pack_y_pitch = align(tex->base.height0, align_h); + } + + pack_x_pitch = width; + pack_x_nr = 1; + + for (level = 0 ; level <= tex->base.last_level ; level++) { + GLuint nr_images = tex->base.target == PIPE_TEXTURE_3D ? depth : 6; + GLint x = 0; + GLint y = 0; + GLint q, j; + + brw_tex_set_level_info(tex, level, nr_images, + 0, tex->total_height, + width, height, depth); + + for (q = 0; q < nr_images;) { + for (j = 0; j < pack_x_nr && q < nr_images; j++, q++) { + brw_tex_set_image_offset(tex, level, q, x, y, 0); + x += pack_x_pitch; + } + + x = 0; + y += pack_y_pitch; + } + + + tex->total_height += y; + width = u_minify(width, 1); + height = u_minify(height, 1); + depth = u_minify(depth, 1); + + if (tex->compressed) { + pack_y_pitch = (height + 3) / 4; + + if (pack_x_pitch > align(width, align_w)) { + pack_x_pitch = align(width, align_w); + pack_x_nr <<= 1; + } + } else { + if (pack_x_pitch > 4) { + pack_x_pitch >>= 1; + pack_x_nr <<= 1; + assert(pack_x_pitch * pack_x_nr <= tex->pitch); + } + + if (pack_y_pitch > 2) { + pack_y_pitch >>= 1; + pack_y_pitch = align(pack_y_pitch, align_h); + } + } + } + + /* The 965's sampler lays cachelines out according to how accesses + * in the texture surfaces run, so they may be "vertical" through + * memory. As a result, the docs say in Surface Padding Requirements: + * Sampling Engine Surfaces that two extra rows of padding are required. + */ + if (tex->base.target == PIPE_TEXTURE_CUBE) + tex->total_height += 2; + + return TRUE; +} + + + +GLboolean brw_texture_layout(struct brw_screen *brw_screen, + struct brw_texture *tex ) +{ + switch (tex->base.target) { + case PIPE_TEXTURE_CUBE: + if (brw_screen->chipset.is_igdng) + brw_layout_cubemap_idgng( tex ); + else + brw_layout_3d_cube( tex ); + break; + + case PIPE_TEXTURE_3D: + brw_layout_3d_cube( tex ); + break; + + default: + brw_layout_2d( tex ); + break; + } + + if (BRW_DEBUG & DEBUG_TEXTURE) + debug_printf("%s: %dx%dx%d - sz 0x%x\n", __FUNCTION__, + tex->pitch, + tex->total_height, + tex->cpp, + tex->pitch * tex->total_height * tex->cpp ); + + return GL_TRUE; +} diff --git a/src/gallium/drivers/i965/brw_screen_texture.c b/src/gallium/drivers/i965/brw_screen_texture.c new file mode 100644 index 00000000000..ff999086c02 --- /dev/null +++ b/src/gallium/drivers/i965/brw_screen_texture.c @@ -0,0 +1,572 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "util/u_memory.h" +#include "util/u_simple_list.h" + +#include "brw_screen.h" +#include "brw_defines.h" +#include "brw_structs.h" +#include "brw_winsys.h" + + + +static GLuint translate_tex_target( unsigned target ) +{ + switch (target) { + case PIPE_TEXTURE_1D: + return BRW_SURFACE_1D; + + case PIPE_TEXTURE_2D: + return BRW_SURFACE_2D; + + case PIPE_TEXTURE_3D: + return BRW_SURFACE_3D; + + case PIPE_TEXTURE_CUBE: + return BRW_SURFACE_CUBE; + + default: + assert(0); + return BRW_SURFACE_1D; + } +} + + +static GLuint translate_tex_format( enum pipe_format pf ) +{ + switch( pf ) { + case PIPE_FORMAT_L8_UNORM: + return BRW_SURFACEFORMAT_L8_UNORM; + + case PIPE_FORMAT_I8_UNORM: + return BRW_SURFACEFORMAT_I8_UNORM; + + case PIPE_FORMAT_A8_UNORM: + return BRW_SURFACEFORMAT_A8_UNORM; + + case PIPE_FORMAT_L16_UNORM: + return BRW_SURFACEFORMAT_L16_UNORM; + + /* XXX: Add these to gallium + case PIPE_FORMAT_I16_UNORM: + return BRW_SURFACEFORMAT_I16_UNORM; + + case PIPE_FORMAT_A16_UNORM: + return BRW_SURFACEFORMAT_A16_UNORM; + */ + + case PIPE_FORMAT_A8L8_UNORM: + return BRW_SURFACEFORMAT_L8A8_UNORM; + + case PIPE_FORMAT_R5G6B5_UNORM: + return BRW_SURFACEFORMAT_B5G6R5_UNORM; + + case PIPE_FORMAT_A1R5G5B5_UNORM: + return BRW_SURFACEFORMAT_B5G5R5A1_UNORM; + + case PIPE_FORMAT_A4R4G4B4_UNORM: + return BRW_SURFACEFORMAT_B4G4R4A4_UNORM; + + case PIPE_FORMAT_X8R8G8B8_UNORM: + return BRW_SURFACEFORMAT_R8G8B8X8_UNORM; + + case PIPE_FORMAT_A8R8G8B8_UNORM: + return BRW_SURFACEFORMAT_B8G8R8A8_UNORM; + + /* + * Video formats + */ + + case PIPE_FORMAT_YCBCR_REV: + return BRW_SURFACEFORMAT_YCRCB_NORMAL; + + case PIPE_FORMAT_YCBCR: + return BRW_SURFACEFORMAT_YCRCB_SWAPUVY; + + /* + * Compressed formats. + */ + /* XXX: Add FXT to gallium? + case PIPE_FORMAT_FXT1_RGBA: + return BRW_SURFACEFORMAT_FXT1; + */ + + case PIPE_FORMAT_DXT1_RGB: + return BRW_SURFACEFORMAT_DXT1_RGB; + + case PIPE_FORMAT_DXT1_RGBA: + return BRW_SURFACEFORMAT_BC1_UNORM; + + case PIPE_FORMAT_DXT3_RGBA: + return BRW_SURFACEFORMAT_BC2_UNORM; + + case PIPE_FORMAT_DXT5_RGBA: + return BRW_SURFACEFORMAT_BC3_UNORM; + + /* + * sRGB formats + */ + + case PIPE_FORMAT_R8G8B8A8_SRGB: + return BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB; + + case PIPE_FORMAT_A8L8_SRGB: + return BRW_SURFACEFORMAT_L8A8_UNORM_SRGB; + + case PIPE_FORMAT_L8_SRGB: + return BRW_SURFACEFORMAT_L8_UNORM_SRGB; + + case PIPE_FORMAT_DXT1_SRGB: + return BRW_SURFACEFORMAT_BC1_UNORM_SRGB; + + /* + * Depth formats + */ + + case PIPE_FORMAT_Z16_UNORM: + return BRW_SURFACEFORMAT_I16_UNORM; + + case PIPE_FORMAT_S8Z24_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: + return BRW_SURFACEFORMAT_I24X8_UNORM; + + case PIPE_FORMAT_Z32_FLOAT: + return BRW_SURFACEFORMAT_I32_FLOAT; + + /* XXX: presumably for bump mapping. Add this to mesa state + * tracker? + * + * XXX: Add flipped versions of these formats to Gallium. + */ + case PIPE_FORMAT_R8G8_SNORM: + return BRW_SURFACEFORMAT_R8G8_SNORM; + + case PIPE_FORMAT_R8G8B8A8_SNORM: + return BRW_SURFACEFORMAT_R8G8B8A8_SNORM; + + default: + return BRW_SURFACEFORMAT_INVALID; + } +} + + + + + +static struct pipe_texture *brw_texture_create( struct pipe_screen *screen, + const struct pipe_texture *templ ) + +{ + struct brw_screen *bscreen = brw_screen(screen); + struct brw_texture *tex; + enum brw_buffer_type buffer_type; + enum pipe_error ret; + + tex = CALLOC_STRUCT(brw_texture); + if (tex == NULL) + return NULL; + + memcpy(&tex->base, templ, sizeof *templ); + pipe_reference_init(&tex->base.reference, 1); + tex->base.screen = screen; + + /* XXX: compressed textures need special treatment here + */ + tex->cpp = pf_get_size(tex->base.format); + tex->compressed = pf_is_compressed(tex->base.format); + + make_empty_list(&tex->views[0]); + make_empty_list(&tex->views[1]); + + /* XXX: No tiling with compressed textures?? + */ + if (tex->compressed == 0 && + !bscreen->no_tiling) + { + if (bscreen->chipset.is_965 && + pf_is_depth_or_stencil(templ->format)) + tex->tiling = BRW_TILING_Y; + else + tex->tiling = BRW_TILING_X; + } + else { + tex->tiling = BRW_TILING_NONE; + } + + + + + if (!brw_texture_layout( bscreen, tex )) + goto fail; + + + if (templ->tex_usage & (PIPE_TEXTURE_USAGE_DISPLAY_TARGET | + PIPE_TEXTURE_USAGE_PRIMARY)) { + buffer_type = BRW_BUFFER_TYPE_SCANOUT; + } + else { + buffer_type = BRW_BUFFER_TYPE_TEXTURE; + } + + ret = bscreen->sws->bo_alloc( bscreen->sws, + buffer_type, + tex->pitch * tex->total_height * tex->cpp, + 64, + &tex->bo ); + if (ret) + goto fail; + + tex->ss.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW; + tex->ss.ss0.surface_type = translate_tex_target(tex->base.target); + tex->ss.ss0.surface_format = translate_tex_format(tex->base.format); + assert(tex->ss.ss0.surface_format != BRW_SURFACEFORMAT_INVALID); + + /* This is ok for all textures with channel width 8bit or less: + */ +/* tex->ss.ss0.data_return_format = BRW_SURFACERETURNFORMAT_S1; */ + + + /* XXX: what happens when tex->bo->offset changes??? + */ + tex->ss.ss1.base_addr = 0; /* reloc */ + tex->ss.ss2.mip_count = tex->base.last_level; + tex->ss.ss2.width = tex->base.width0 - 1; + tex->ss.ss2.height = tex->base.height0 - 1; + + switch (tex->tiling) { + case BRW_TILING_NONE: + tex->ss.ss3.tiled_surface = 0; + tex->ss.ss3.tile_walk = 0; + break; + case BRW_TILING_X: + tex->ss.ss3.tiled_surface = 1; + tex->ss.ss3.tile_walk = BRW_TILEWALK_XMAJOR; + break; + case BRW_TILING_Y: + tex->ss.ss3.tiled_surface = 1; + tex->ss.ss3.tile_walk = BRW_TILEWALK_YMAJOR; + break; + } + + tex->ss.ss3.pitch = (tex->pitch * tex->cpp) - 1; + tex->ss.ss3.depth = tex->base.depth0 - 1; + + tex->ss.ss4.min_lod = 0; + + if (tex->base.target == PIPE_TEXTURE_CUBE) { + tex->ss.ss0.cube_pos_x = 1; + tex->ss.ss0.cube_pos_y = 1; + tex->ss.ss0.cube_pos_z = 1; + tex->ss.ss0.cube_neg_x = 1; + tex->ss.ss0.cube_neg_y = 1; + tex->ss.ss0.cube_neg_z = 1; + } + + return &tex->base; + +fail: + bo_reference(&tex->bo, NULL); + FREE(tex); + return NULL; +} + +static struct pipe_texture *brw_texture_blanket(struct pipe_screen *screen, + const struct pipe_texture *templ, + const unsigned *stride, + struct pipe_buffer *buffer) +{ + return NULL; +} + +static void brw_texture_destroy(struct pipe_texture *pt) +{ + struct brw_texture *tex = brw_texture(pt); + bo_reference(&tex->bo, NULL); + FREE(pt); +} + + +static boolean brw_is_format_supported( struct pipe_screen *screen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned tex_usage, + unsigned geom_flags ) +{ + return translate_tex_format(format) != BRW_SURFACEFORMAT_INVALID; +} + + +boolean brw_is_texture_referenced_by_bo( struct brw_screen *brw_screen, + struct pipe_texture *texture, + unsigned face, + unsigned level, + struct brw_winsys_buffer *bo ) +{ + struct brw_texture *tex = brw_texture(texture); + struct brw_surface *surf; + int i; + + /* XXX: this is subject to false positives if the underlying + * texture BO is referenced, we can't tell whether the sub-region + * we care about participates in that. + */ + if (brw_screen->sws->bo_references( bo, tex->bo )) + return TRUE; + + /* Find any view on this texture for this face/level and see if it + * is referenced: + */ + for (i = 0; i < 2; i++) { + foreach (surf, &tex->views[i]) { + if (surf->bo == tex->bo) + continue; + + if (surf->id.bits.face != face || + surf->id.bits.level != level) + continue; + + if (brw_screen->sws->bo_references( bo, surf->bo)) + return TRUE; + } + } + + return FALSE; +} + + +/* + * Transfer functions + */ + +static struct pipe_transfer* +brw_get_tex_transfer(struct pipe_screen *screen, + struct pipe_texture *texture, + unsigned face, unsigned level, unsigned zslice, + enum pipe_transfer_usage usage, unsigned x, unsigned y, + unsigned w, unsigned h) +{ + struct brw_texture *tex = brw_texture(texture); + struct brw_transfer *trans; + unsigned offset; /* in bytes */ + + if (texture->target == PIPE_TEXTURE_CUBE) { + offset = tex->image_offset[level][face]; + } else if (texture->target == PIPE_TEXTURE_3D) { + offset = tex->image_offset[level][zslice]; + } else { + offset = tex->image_offset[level][0]; + assert(face == 0); + assert(zslice == 0); + } + + trans = CALLOC_STRUCT(brw_transfer); + if (trans) { + pipe_texture_reference(&trans->base.texture, texture); + trans->base.format = trans->base.format; + trans->base.x = x; + trans->base.y = y; + trans->base.width = w; + trans->base.height = h; + trans->base.block = texture->block; + trans->base.nblocksx = texture->nblocksx[level]; + trans->base.nblocksy = texture->nblocksy[level]; + trans->base.stride = tex->pitch * tex->cpp; + trans->offset = offset; + trans->base.usage = usage; + } + return &trans->base; +} + +static void * +brw_transfer_map(struct pipe_screen *screen, + struct pipe_transfer *transfer) +{ + struct brw_texture *tex = brw_texture(transfer->texture); + struct brw_winsys_screen *sws = brw_screen(screen)->sws; + char *map; + unsigned usage = transfer->usage; + + map = sws->bo_map(tex->bo, + BRW_DATA_OTHER, + 0, + tex->bo->size, + (usage & PIPE_TRANSFER_WRITE) ? TRUE : FALSE, + (usage & 0) ? TRUE : FALSE, + (usage & 0) ? TRUE : FALSE); + + if (!map) + return NULL; + + return map + brw_transfer(transfer)->offset + + transfer->y / transfer->block.height * transfer->stride + + transfer->x / transfer->block.width * transfer->block.size; +} + +static void +brw_transfer_unmap(struct pipe_screen *screen, + struct pipe_transfer *transfer) +{ + struct brw_texture *tex = brw_texture(transfer->texture); + struct brw_winsys_screen *sws = brw_screen(screen)->sws; + + sws->bo_unmap(tex->bo); +} + +static void +brw_tex_transfer_destroy(struct pipe_transfer *trans) +{ + pipe_texture_reference(&trans->texture, NULL); + FREE(trans); +} + + +/* + * Functions exported to the winsys + */ + +boolean brw_texture_get_winsys_buffer(struct pipe_texture *texture, + struct brw_winsys_buffer **buffer, + unsigned *stride) +{ + struct brw_texture *tex = brw_texture(texture); + + *buffer = tex->bo; + if (stride) + *stride = tex->pitch * tex->cpp; + + return TRUE; +} + +struct pipe_texture * +brw_texture_blanket_winsys_buffer(struct pipe_screen *screen, + const struct pipe_texture *templ, + unsigned pitch, + unsigned tiling, + struct brw_winsys_buffer *buffer) +{ + struct brw_screen *bscreen = brw_screen(screen); + struct brw_texture *tex; + + if (templ->target != PIPE_TEXTURE_2D || + templ->last_level != 0 || + templ->depth0 != 1) + return NULL; + + if (pf_is_compressed(templ->format)) + return NULL; + + tex = CALLOC_STRUCT(brw_texture); + if (!tex) + return NULL; + + memcpy(&tex->base, templ, sizeof *templ); + pipe_reference_init(&tex->base.reference, 1); + tex->base.screen = screen; + + tex->cpp = pf_get_size(tex->base.format); + tex->tiling = tiling; + + make_empty_list(&tex->views[0]); + make_empty_list(&tex->views[1]); + + if (!brw_texture_layout(bscreen, tex)) + goto fail; + + /* XXX Maybe some more checks? */ + if ((pitch / tex->cpp) < tex->pitch) + goto fail; + + tex->pitch = pitch / tex->cpp; + + tex->bo = buffer; + + /* fix this warning */ +#if 0 + if (tex->size > buffer->size) + goto fail; +#endif + + tex->ss.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW; + tex->ss.ss0.surface_type = translate_tex_target(tex->base.target); + tex->ss.ss0.surface_format = translate_tex_format(tex->base.format); + assert(tex->ss.ss0.surface_format != BRW_SURFACEFORMAT_INVALID); + + /* This is ok for all textures with channel width 8bit or less: + */ +/* tex->ss.ss0.data_return_format = BRW_SURFACERETURNFORMAT_S1; */ + + + /* XXX: what happens when tex->bo->offset changes??? + */ + tex->ss.ss1.base_addr = 0; /* reloc */ + tex->ss.ss2.mip_count = tex->base.last_level; + tex->ss.ss2.width = tex->base.width0 - 1; + tex->ss.ss2.height = tex->base.height0 - 1; + + switch (tex->tiling) { + case BRW_TILING_NONE: + tex->ss.ss3.tiled_surface = 0; + tex->ss.ss3.tile_walk = 0; + break; + case BRW_TILING_X: + tex->ss.ss3.tiled_surface = 1; + tex->ss.ss3.tile_walk = BRW_TILEWALK_XMAJOR; + break; + case BRW_TILING_Y: + tex->ss.ss3.tiled_surface = 1; + tex->ss.ss3.tile_walk = BRW_TILEWALK_YMAJOR; + break; + } + + tex->ss.ss3.pitch = (tex->pitch * tex->cpp) - 1; + tex->ss.ss3.depth = tex->base.depth0 - 1; + + tex->ss.ss4.min_lod = 0; + + return &tex->base; + +fail: + FREE(tex); + return NULL; +} + +void brw_screen_tex_init( struct brw_screen *brw_screen ) +{ + brw_screen->base.is_format_supported = brw_is_format_supported; + brw_screen->base.texture_create = brw_texture_create; + brw_screen->base.texture_destroy = brw_texture_destroy; + brw_screen->base.texture_blanket = brw_texture_blanket; + brw_screen->base.get_tex_transfer = brw_get_tex_transfer; + brw_screen->base.transfer_map = brw_transfer_map; + brw_screen->base.transfer_unmap = brw_transfer_unmap; + brw_screen->base.tex_transfer_destroy = brw_tex_transfer_destroy; +} diff --git a/src/gallium/drivers/i965/brw_sf.c b/src/gallium/drivers/i965/brw_sf.c new file mode 100644 index 00000000000..e1986a9dbbd --- /dev/null +++ b/src/gallium/drivers/i965/brw_sf.c @@ -0,0 +1,216 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "pipe/p_state.h" + +#include "brw_batchbuffer.h" +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_pipe_rast.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_sf.h" +#include "brw_state.h" + +static enum pipe_error compile_sf_prog( struct brw_context *brw, + struct brw_sf_prog_key *key, + struct brw_winsys_buffer **bo_out ) +{ + enum pipe_error ret; + struct brw_sf_compile c; + const GLuint *program; + GLuint program_size; + + memset(&c, 0, sizeof(c)); + + /* Begin the compilation: + */ + brw_init_compile(brw, &c.func); + + c.key = *key; + c.nr_attrs = c.key.nr_attrs; + c.nr_attr_regs = (c.nr_attrs+1)/2; + c.nr_setup_attrs = c.key.nr_attrs; + c.nr_setup_regs = (c.nr_setup_attrs+1)/2; + + c.prog_data.urb_read_length = c.nr_attr_regs; + c.prog_data.urb_entry_size = c.nr_setup_regs * 2; + + /* Special case when there are no attributes to setup. + * + * XXX: should be able to set nr_setup_attrs to nr_attrs-1 -- but + * breaks vp-tris.c + */ + if (c.nr_attrs - 1 == 0) { + c.nr_verts = 0; + brw_emit_null_setup( &c ); + } + else { + /* Which primitive? Or all three? + */ + switch (key->primitive) { + case SF_TRIANGLES: + c.nr_verts = 3; + brw_emit_tri_setup( &c, GL_TRUE ); + break; + case SF_LINES: + c.nr_verts = 2; + brw_emit_line_setup( &c, GL_TRUE ); + break; + case SF_POINTS: + c.nr_verts = 1; + if (key->do_point_sprite) + brw_emit_point_sprite_setup( &c, GL_TRUE ); + else + brw_emit_point_setup( &c, GL_TRUE ); + break; + case SF_UNFILLED_TRIS: + c.nr_verts = 3; + brw_emit_anyprim_setup( &c ); + break; + default: + assert(0); + return PIPE_ERROR_BAD_INPUT; + } + } + + /* get the program + */ + ret = brw_get_program(&c.func, &program, &program_size); + if (ret) + return ret; + + /* Upload + */ + ret = brw_upload_cache( &brw->cache, BRW_SF_PROG, + &c.key, sizeof(c.key), + NULL, 0, + program, program_size, + &c.prog_data, + &brw->sf.prog_data, + bo_out); + if (ret) + return ret; + + return PIPE_OK; +} + +/* Calculate interpolants for triangle and line rasterization. + */ +static enum pipe_error upload_sf_prog(struct brw_context *brw) +{ + const struct brw_fs_signature *sig = &brw->curr.fragment_shader->signature; + struct brw_sf_prog_key key; + enum pipe_error ret; + unsigned i; + + memset(&key, 0, sizeof(key)); + + /* Populate the key, noting state dependencies: + */ + + /* XXX: Add one to account for the position input. + */ + /* PIPE_NEW_FRAGMENT_SIGNATURE */ + key.nr_attrs = sig->nr_inputs + 1; + + + /* XXX: why is position required to be linear? why do we care + * about it at all? + */ + key.linear_attrs = 1; /* position -- but why? */ + + for (i = 0; i < sig->nr_inputs; i++) { + switch (sig->input[i].interp) { + case TGSI_INTERPOLATE_CONSTANT: + break; + case TGSI_INTERPOLATE_LINEAR: + key.linear_attrs |= 1 << (i+1); + break; + case TGSI_INTERPOLATE_PERSPECTIVE: + key.persp_attrs |= 1 << (i+1); + break; + } + } + + /* BRW_NEW_REDUCED_PRIMITIVE */ + switch (brw->reduced_primitive) { + case PIPE_PRIM_TRIANGLES: + /* PIPE_NEW_RAST + */ + if (brw->curr.rast->templ.fill_cw != PIPE_POLYGON_MODE_FILL || + brw->curr.rast->templ.fill_ccw != PIPE_POLYGON_MODE_FILL) + key.primitive = SF_UNFILLED_TRIS; + else + key.primitive = SF_TRIANGLES; + break; + case PIPE_PRIM_LINES: + key.primitive = SF_LINES; + break; + case PIPE_PRIM_POINTS: + key.primitive = SF_POINTS; + break; + } + + key.do_point_sprite = brw->curr.rast->templ.point_sprite; + key.sprite_origin_lower_left = 0; /* XXX: ctx->Point.SpriteOrigin - fix rast state */ + key.do_flat_shading = brw->curr.rast->templ.flatshade; + key.do_twoside_color = brw->curr.rast->templ.light_twoside; + + if (key.do_twoside_color) { + key.frontface_ccw = (brw->curr.rast->templ.front_winding == + PIPE_WINDING_CCW); + } + + if (brw_search_cache(&brw->cache, BRW_SF_PROG, + &key, sizeof(key), + NULL, 0, + &brw->sf.prog_data, + &brw->sf.prog_bo)) + return PIPE_OK; + + ret = compile_sf_prog( brw, &key, &brw->sf.prog_bo ); + if (ret) + return ret; + + return PIPE_OK; +} + + +const struct brw_tracked_state brw_sf_prog = { + .dirty = { + .mesa = (PIPE_NEW_RAST | PIPE_NEW_FRAGMENT_SIGNATURE), + .brw = (BRW_NEW_REDUCED_PRIMITIVE), + .cache = 0 + }, + .prepare = upload_sf_prog +}; + diff --git a/src/gallium/drivers/i965/brw_sf.h b/src/gallium/drivers/i965/brw_sf.h new file mode 100644 index 00000000000..a895c7d2f6a --- /dev/null +++ b/src/gallium/drivers/i965/brw_sf.h @@ -0,0 +1,122 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#ifndef BRW_SF_H +#define BRW_SF_H + + +#include "brw_context.h" +#include "brw_eu.h" + + +#define SF_POINTS 0 +#define SF_LINES 1 +#define SF_TRIANGLES 2 +#define SF_UNFILLED_TRIS 3 + +struct brw_sf_prog_key { + + /* Bitmask of linear and perspective interpolated inputs, 0..nr + */ + GLuint persp_attrs:32; + GLuint linear_attrs:32; + GLuint point_coord_replace_attrs:32; + + GLuint nr_attrs:8; + GLuint primitive:2; + GLuint do_twoside_color:1; + GLuint do_flat_shading:1; + GLuint frontface_ccw:1; + GLuint do_point_sprite:1; + GLuint sprite_origin_lower_left:1; + GLuint pad:17; + + GLuint attr_col0:8; + GLuint attr_col1:8; + GLuint attr_bfc0:8; + GLuint attr_bfc1:8; +}; + +struct brw_sf_point_tex { + GLboolean CoordReplace; +}; + +struct brw_sf_compile { + struct brw_compile func; + struct brw_sf_prog_key key; + struct brw_sf_prog_data prog_data; + + struct brw_reg pv; + struct brw_reg det; + struct brw_reg dx0; + struct brw_reg dx2; + struct brw_reg dy0; + struct brw_reg dy2; + + /* z and 1/w passed in seperately: + */ + struct brw_reg z[3]; + struct brw_reg inv_w[3]; + + /* The vertices: + */ + struct brw_reg vert[3]; + + /* Temporaries, allocated after last vertex reg. + */ + struct brw_reg inv_det; + struct brw_reg a1_sub_a0; + struct brw_reg a2_sub_a0; + struct brw_reg tmp; + + struct brw_reg m1Cx; + struct brw_reg m2Cy; + struct brw_reg m3C0; + + GLuint nr_verts; + GLuint nr_attrs; + GLuint nr_attr_regs; + GLuint nr_setup_attrs; + GLuint nr_setup_regs; + + GLuint point_coord_replace_mask; +}; + + +void brw_emit_null_setup( struct brw_sf_compile *c ); +void brw_emit_tri_setup( struct brw_sf_compile *c, GLboolean allocate ); +void brw_emit_line_setup( struct brw_sf_compile *c, GLboolean allocate ); +void brw_emit_point_setup( struct brw_sf_compile *c, GLboolean allocate ); +void brw_emit_point_sprite_setup( struct brw_sf_compile *c, GLboolean allocate ); +void brw_emit_anyprim_setup( struct brw_sf_compile *c ); + +#endif diff --git a/src/gallium/drivers/i965/brw_sf_emit.c b/src/gallium/drivers/i965/brw_sf_emit.c new file mode 100644 index 00000000000..3b85725e368 --- /dev/null +++ b/src/gallium/drivers/i965/brw_sf_emit.c @@ -0,0 +1,765 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "brw_batchbuffer.h" + +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_sf.h" + + +static struct brw_reg get_vert_attr(struct brw_sf_compile *c, + struct brw_reg vert, + GLuint attr) +{ + GLuint off = attr / 2; + GLuint sub = attr % 2; + + return brw_vec4_grf(vert.nr + off, sub * 4); +} + + +/*********************************************************************** + * Twoside lighting + */ +static void copy_bfc( struct brw_sf_compile *c, + struct brw_reg vert ) +{ + struct brw_compile *p = &c->func; + + if (c->key.attr_col0 && c->key.attr_bfc0) + brw_MOV(p, + get_vert_attr(c, vert, c->key.attr_col0), + get_vert_attr(c, vert, c->key.attr_bfc0)); + + if (c->key.attr_col1 && c->key.attr_bfc1) + brw_MOV(p, + get_vert_attr(c, vert, c->key.attr_col1), + get_vert_attr(c, vert, c->key.attr_bfc1)); +} + + +static void do_twoside_color( struct brw_sf_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *if_insn; + GLuint backface_conditional = c->key.frontface_ccw ? BRW_CONDITIONAL_G : BRW_CONDITIONAL_L; + + /* Already done in clip program: + */ + if (c->key.primitive == SF_UNFILLED_TRIS) + return; + + /* XXX: What happens if BFC isn't present? This could only happen + * for user-supplied vertex programs, as t_vp_build.c always does + * the right thing. + */ + if (!(c->key.attr_col0 && c->key.attr_bfc0) && + !(c->key.attr_col1 && c->key.attr_bfc1)) + return; + + /* Need to use BRW_EXECUTE_4 and also do an 4-wide compare in order + * to get all channels active inside the IF. In the clipping code + * we run with NoMask, so it's not an option and we can use + * BRW_EXECUTE_1 for all comparisions. + */ + brw_push_insn_state(p); + brw_CMP(p, vec4(brw_null_reg()), backface_conditional, c->det, brw_imm_f(0)); + if_insn = brw_IF(p, BRW_EXECUTE_4); + { + switch (c->nr_verts) { + case 3: copy_bfc(c, c->vert[2]); + case 2: copy_bfc(c, c->vert[1]); + case 1: copy_bfc(c, c->vert[0]); + } + } + brw_ENDIF(p, if_insn); + brw_pop_insn_state(p); +} + + + +/*********************************************************************** + * Flat shading + */ + +#define VERT_RESULT_COLOR_BITS ((1<<VERT_RESULT_COL0) | \ + (1<<VERT_RESULT_COL1)) + +static void copy_colors( struct brw_sf_compile *c, + struct brw_reg dst, + struct brw_reg src) +{ + struct brw_compile *p = &c->func; + + if (c->key.attr_col0) + brw_MOV(p, + get_vert_attr(c, dst, c->key.attr_col0), + get_vert_attr(c, src, c->key.attr_col0)); + + if (c->key.attr_col1) + brw_MOV(p, + get_vert_attr(c, dst, c->key.attr_col1), + get_vert_attr(c, src, c->key.attr_col1)); + +} + + + +/* Need to use a computed jump to copy flatshaded attributes as the + * vertices are ordered according to y-coordinate before reaching this + * point, so the PV could be anywhere. + */ +static void do_flatshade_triangle( struct brw_sf_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_reg ip = brw_ip_reg(); + GLuint jmpi = 1; + GLuint nr = 0; + + if (c->key.attr_col0) + nr++; + + if (c->key.attr_col1) + nr++; + + if (nr == 0) + return; + + /* Already done in clip program: + */ + if (c->key.primitive == SF_UNFILLED_TRIS) + return; + + if (BRW_IS_IGDNG(p->brw)) + jmpi = 2; + + brw_push_insn_state(p); + + brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr*2+1))); + brw_JMPI(p, ip, ip, c->pv); + + copy_colors(c, c->vert[1], c->vert[0]); + copy_colors(c, c->vert[2], c->vert[0]); + brw_JMPI(p, ip, ip, brw_imm_d(jmpi*(nr*4+1))); + + copy_colors(c, c->vert[0], c->vert[1]); + copy_colors(c, c->vert[2], c->vert[1]); + brw_JMPI(p, ip, ip, brw_imm_d(jmpi*nr*2)); + + copy_colors(c, c->vert[0], c->vert[2]); + copy_colors(c, c->vert[1], c->vert[2]); + + brw_pop_insn_state(p); +} + + +static void do_flatshade_line( struct brw_sf_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_reg ip = brw_ip_reg(); + GLuint jmpi = 1; + GLuint nr = 0; + + if (c->key.attr_col0) + nr++; + + if (c->key.attr_col1) + nr++; + + if (nr == 0) + return; + + /* Already done in clip program: + */ + if (c->key.primitive == SF_UNFILLED_TRIS) + return; + + if (BRW_IS_IGDNG(p->brw)) + jmpi = 2; + + brw_push_insn_state(p); + + brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr+1))); + brw_JMPI(p, ip, ip, c->pv); + copy_colors(c, c->vert[1], c->vert[0]); + + brw_JMPI(p, ip, ip, brw_imm_ud(jmpi*nr)); + copy_colors(c, c->vert[0], c->vert[1]); + + brw_pop_insn_state(p); +} + + + +/*********************************************************************** + * Triangle setup. + */ + + +static void alloc_regs( struct brw_sf_compile *c ) +{ + GLuint reg, i; + + /* Values computed by fixed function unit: + */ + c->pv = retype(brw_vec1_grf(1, 1), BRW_REGISTER_TYPE_D); + c->det = brw_vec1_grf(1, 2); + c->dx0 = brw_vec1_grf(1, 3); + c->dx2 = brw_vec1_grf(1, 4); + c->dy0 = brw_vec1_grf(1, 5); + c->dy2 = brw_vec1_grf(1, 6); + + /* z and 1/w passed in seperately: + */ + c->z[0] = brw_vec1_grf(2, 0); + c->inv_w[0] = brw_vec1_grf(2, 1); + c->z[1] = brw_vec1_grf(2, 2); + c->inv_w[1] = brw_vec1_grf(2, 3); + c->z[2] = brw_vec1_grf(2, 4); + c->inv_w[2] = brw_vec1_grf(2, 5); + + /* The vertices: + */ + reg = 3; + for (i = 0; i < c->nr_verts; i++) { + c->vert[i] = brw_vec8_grf(reg, 0); + reg += c->nr_attr_regs; + } + + /* Temporaries, allocated after last vertex reg. + */ + c->inv_det = brw_vec1_grf(reg, 0); reg++; + c->a1_sub_a0 = brw_vec8_grf(reg, 0); reg++; + c->a2_sub_a0 = brw_vec8_grf(reg, 0); reg++; + c->tmp = brw_vec8_grf(reg, 0); reg++; + + /* Note grf allocation: + */ + c->prog_data.total_grf = reg; + + + /* Outputs of this program - interpolation coefficients for + * rasterization: + */ + c->m1Cx = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 1, 0); + c->m2Cy = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 2, 0); + c->m3C0 = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 3, 0); +} + + +static void copy_z_inv_w( struct brw_sf_compile *c ) +{ + struct brw_compile *p = &c->func; + GLuint i; + + brw_push_insn_state(p); + + /* Copy both scalars with a single MOV: + */ + for (i = 0; i < c->nr_verts; i++) + brw_MOV(p, vec2(suboffset(c->vert[i], 2)), vec2(c->z[i])); + + brw_pop_insn_state(p); +} + + +static void invert_det( struct brw_sf_compile *c) +{ + /* Looks like we invert all 8 elements just to get 1/det in + * position 2 !?! + */ + brw_math(&c->func, + c->inv_det, + BRW_MATH_FUNCTION_INV, + BRW_MATH_SATURATE_NONE, + 0, + c->det, + BRW_MATH_DATA_SCALAR, + BRW_MATH_PRECISION_FULL); + +} + + +/* Two attributes packed into a wide register. Figure out if either + * or both of them need linear/perspective interpolation. Constant + * regs are left as-is. + */ +static GLboolean calculate_masks( struct brw_sf_compile *c, + GLuint reg, + GLushort *pc, + GLushort *pc_persp, + GLushort *pc_linear) +{ + GLboolean is_last_attr = (reg == c->nr_setup_regs - 1); + GLuint persp_mask = c->key.persp_attrs; + GLuint linear_mask = (c->key.persp_attrs | c->key.linear_attrs); + + *pc_persp = 0; + *pc_linear = 0; + *pc = 0xf; + + if (persp_mask & (1 << (reg*2))) + *pc_persp = 0xf; + + if (linear_mask & (1 << (reg*2))) + *pc_linear = 0xf; + + /* Maybe only processs one attribute on the final round: + */ + if (reg*2+1 < c->nr_setup_attrs) { + *pc |= 0xf0; + + if (persp_mask & (1 << (reg*2+1))) + *pc_persp |= 0xf0; + + if (linear_mask & (1 << (reg*2+1))) + *pc_linear |= 0xf0; + } + + return is_last_attr; +} + + +void brw_emit_null_setup( struct brw_sf_compile *c ) +{ + struct brw_compile *p = &c->func; + + /* m0 is implicitly copied from r0 in the send instruction: + */ + brw_urb_WRITE(p, + brw_null_reg(), + 0, + brw_vec8_grf(0, 0), /* r0, will be copied to m0 */ + 0, /* allocate */ + 1, /* used */ + 1, /* msg len */ + 0, /* response len */ + 1, /* eot */ + 1, /* writes complete */ + 0, /* offset */ + BRW_URB_SWIZZLE_TRANSPOSE); +} + +void brw_emit_tri_setup( struct brw_sf_compile *c, GLboolean allocate) +{ + struct brw_compile *p = &c->func; + GLuint i; + + c->nr_verts = 3; + + if (allocate) + alloc_regs(c); + + invert_det(c); + copy_z_inv_w(c); + + if (c->key.do_twoside_color) + do_twoside_color(c); + + if (c->key.do_flat_shading) + do_flatshade_triangle(c); + + + for (i = 0; i < c->nr_setup_regs; i++) + { + /* Pair of incoming attributes: + */ + struct brw_reg a0 = offset(c->vert[0], i); + struct brw_reg a1 = offset(c->vert[1], i); + struct brw_reg a2 = offset(c->vert[2], i); + GLushort pc, pc_persp, pc_linear; + GLboolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear); + + if (pc_persp) + { + brw_set_predicate_control_flag_value(p, pc_persp); + brw_MUL(p, a0, a0, c->inv_w[0]); + brw_MUL(p, a1, a1, c->inv_w[1]); + brw_MUL(p, a2, a2, c->inv_w[2]); + } + + + /* Calculate coefficients for interpolated values: + */ + if (pc_linear) + { + brw_set_predicate_control_flag_value(p, pc_linear); + + brw_ADD(p, c->a1_sub_a0, a1, negate(a0)); + brw_ADD(p, c->a2_sub_a0, a2, negate(a0)); + + /* calculate dA/dx + */ + brw_MUL(p, brw_null_reg(), c->a1_sub_a0, c->dy2); + brw_MAC(p, c->tmp, c->a2_sub_a0, negate(c->dy0)); + brw_MUL(p, c->m1Cx, c->tmp, c->inv_det); + + /* calculate dA/dy + */ + brw_MUL(p, brw_null_reg(), c->a2_sub_a0, c->dx0); + brw_MAC(p, c->tmp, c->a1_sub_a0, negate(c->dx2)); + brw_MUL(p, c->m2Cy, c->tmp, c->inv_det); + } + + { + brw_set_predicate_control_flag_value(p, pc); + /* start point for interpolation + */ + brw_MOV(p, c->m3C0, a0); + + /* Copy m0..m3 to URB. m0 is implicitly copied from r0 in + * the send instruction: + */ + brw_urb_WRITE(p, + brw_null_reg(), + 0, + brw_vec8_grf(0, 0), /* r0, will be copied to m0 */ + 0, /* allocate */ + 1, /* used */ + 4, /* msg len */ + 0, /* response len */ + last, /* eot */ + last, /* writes complete */ + i*4, /* offset */ + BRW_URB_SWIZZLE_TRANSPOSE); /* XXX: Swizzle control "SF to windower" */ + } + } +} + + + +void brw_emit_line_setup( struct brw_sf_compile *c, GLboolean allocate) +{ + struct brw_compile *p = &c->func; + GLuint i; + + + c->nr_verts = 2; + + if (allocate) + alloc_regs(c); + + invert_det(c); + copy_z_inv_w(c); + + if (c->key.do_flat_shading) + do_flatshade_line(c); + + for (i = 0; i < c->nr_setup_regs; i++) + { + /* Pair of incoming attributes: + */ + struct brw_reg a0 = offset(c->vert[0], i); + struct brw_reg a1 = offset(c->vert[1], i); + GLushort pc, pc_persp, pc_linear; + GLboolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear); + + if (pc_persp) + { + brw_set_predicate_control_flag_value(p, pc_persp); + brw_MUL(p, a0, a0, c->inv_w[0]); + brw_MUL(p, a1, a1, c->inv_w[1]); + } + + /* Calculate coefficients for position, color: + */ + if (pc_linear) { + brw_set_predicate_control_flag_value(p, pc_linear); + + brw_ADD(p, c->a1_sub_a0, a1, negate(a0)); + + brw_MUL(p, c->tmp, c->a1_sub_a0, c->dx0); + brw_MUL(p, c->m1Cx, c->tmp, c->inv_det); + + brw_MUL(p, c->tmp, c->a1_sub_a0, c->dy0); + brw_MUL(p, c->m2Cy, c->tmp, c->inv_det); + } + + { + brw_set_predicate_control_flag_value(p, pc); + + /* start point for interpolation + */ + brw_MOV(p, c->m3C0, a0); + + /* Copy m0..m3 to URB. + */ + brw_urb_WRITE(p, + brw_null_reg(), + 0, + brw_vec8_grf(0, 0), + 0, /* allocate */ + 1, /* used */ + 4, /* msg len */ + 0, /* response len */ + last, /* eot */ + last, /* writes complete */ + i*4, /* urb destination offset */ + BRW_URB_SWIZZLE_TRANSPOSE); + } + } +} + +void brw_emit_point_sprite_setup( struct brw_sf_compile *c, GLboolean allocate) +{ + struct brw_compile *p = &c->func; + GLuint i; + + c->nr_verts = 1; + + if (allocate) + alloc_regs(c); + + copy_z_inv_w(c); + + for (i = 0; i < c->nr_setup_regs; i++) + { + /* XXX: only seems to check point_coord_replace_attrs for every + * second attribute?!? + */ + boolean coord_replace = !!(c->key.point_coord_replace_attrs & (1<<(2*i))); + struct brw_reg a0 = offset(c->vert[0], i); + GLushort pc, pc_persp, pc_linear; + GLboolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear); + + if (pc_persp) + { + if (coord_replace) { + brw_set_predicate_control_flag_value(p, pc_persp); + brw_MUL(p, a0, a0, c->inv_w[0]); + } + } + + if (coord_replace) { + /* Caculate 1.0/PointWidth */ + brw_math(&c->func, + c->tmp, + BRW_MATH_FUNCTION_INV, + BRW_MATH_SATURATE_NONE, + 0, + c->dx0, + BRW_MATH_DATA_SCALAR, + BRW_MATH_PRECISION_FULL); + + if (c->key.sprite_origin_lower_left) { + brw_MUL(p, c->m1Cx, c->tmp, c->inv_w[0]); + brw_MOV(p, vec1(suboffset(c->m1Cx, 1)), brw_imm_f(0.0)); + brw_MUL(p, c->m2Cy, c->tmp, negate(c->inv_w[0])); + brw_MOV(p, vec1(suboffset(c->m2Cy, 0)), brw_imm_f(0.0)); + } + else { + brw_MUL(p, c->m1Cx, c->tmp, c->inv_w[0]); + brw_MOV(p, vec1(suboffset(c->m1Cx, 1)), brw_imm_f(0.0)); + brw_MUL(p, c->m2Cy, c->tmp, c->inv_w[0]); + brw_MOV(p, vec1(suboffset(c->m2Cy, 0)), brw_imm_f(0.0)); + } + } + else { + brw_MOV(p, c->m1Cx, brw_imm_ud(0)); + brw_MOV(p, c->m2Cy, brw_imm_ud(0)); + } + + { + brw_set_predicate_control_flag_value(p, pc); + if (coord_replace) { + if (c->key.sprite_origin_lower_left) { + brw_MUL(p, c->m3C0, c->inv_w[0], brw_imm_f(1.0)); + brw_MOV(p, vec1(suboffset(c->m3C0, 0)), brw_imm_f(0.0)); + } + else { + brw_MOV(p, c->m3C0, brw_imm_f(0.0)); + } + } + else { + brw_MOV(p, c->m3C0, a0); /* constant value */ + } + + /* Copy m0..m3 to URB. + */ + brw_urb_WRITE(p, + brw_null_reg(), + 0, + brw_vec8_grf(0, 0), + 0, /* allocate */ + 1, /* used */ + 4, /* msg len */ + 0, /* response len */ + last, /* eot */ + last, /* writes complete */ + i*4, /* urb destination offset */ + BRW_URB_SWIZZLE_TRANSPOSE); + } + } +} + +/* Points setup - several simplifications as all attributes are + * constant across the face of the point (point sprites excluded!) + */ +void brw_emit_point_setup( struct brw_sf_compile *c, GLboolean allocate) +{ + struct brw_compile *p = &c->func; + GLuint i; + + c->nr_verts = 1; + + if (allocate) + alloc_regs(c); + + copy_z_inv_w(c); + + brw_MOV(p, c->m1Cx, brw_imm_ud(0)); /* zero - move out of loop */ + brw_MOV(p, c->m2Cy, brw_imm_ud(0)); /* zero - move out of loop */ + + for (i = 0; i < c->nr_setup_regs; i++) + { + struct brw_reg a0 = offset(c->vert[0], i); + GLushort pc, pc_persp, pc_linear; + GLboolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear); + + if (pc_persp) + { + /* This seems odd as the values are all constant, but the + * fragment shader will be expecting it: + */ + brw_set_predicate_control_flag_value(p, pc_persp); + brw_MUL(p, a0, a0, c->inv_w[0]); + } + + + /* The delta values are always zero, just send the starting + * coordinate. Again, this is to fit in with the interpolation + * code in the fragment shader. + */ + { + brw_set_predicate_control_flag_value(p, pc); + + brw_MOV(p, c->m3C0, a0); /* constant value */ + + /* Copy m0..m3 to URB. + */ + brw_urb_WRITE(p, + brw_null_reg(), + 0, + brw_vec8_grf(0, 0), + 0, /* allocate */ + 1, /* used */ + 4, /* msg len */ + 0, /* response len */ + last, /* eot */ + last, /* writes complete */ + i*4, /* urb destination offset */ + BRW_URB_SWIZZLE_TRANSPOSE); + } + } +} + +void brw_emit_anyprim_setup( struct brw_sf_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_reg ip = brw_ip_reg(); + struct brw_reg payload_prim = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0); + struct brw_reg payload_attr = get_element_ud(brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0), 0); + struct brw_reg primmask; + struct brw_instruction *jmp; + struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD)); + + GLuint saveflag; + + c->nr_verts = 3; + alloc_regs(c); + + primmask = retype(get_element(c->tmp, 0), BRW_REGISTER_TYPE_UD); + + brw_MOV(p, primmask, brw_imm_ud(1)); + brw_SHL(p, primmask, primmask, payload_prim); + + brw_set_conditionalmod(p, BRW_CONDITIONAL_Z); + brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_TRILIST) | + (1<<_3DPRIM_TRISTRIP) | + (1<<_3DPRIM_TRIFAN) | + (1<<_3DPRIM_TRISTRIP_REVERSE) | + (1<<_3DPRIM_POLYGON) | + (1<<_3DPRIM_RECTLIST) | + (1<<_3DPRIM_TRIFAN_NOSTIPPLE))); + jmp = brw_JMPI(p, ip, ip, brw_imm_d(0)); + { + saveflag = p->flag_value; + brw_push_insn_state(p); + brw_emit_tri_setup( c, GL_FALSE ); + brw_pop_insn_state(p); + p->flag_value = saveflag; + /* note - thread killed in subroutine, so must + * restore the flag which is changed when building + * the subroutine. fix #13240 + */ + } + brw_land_fwd_jump(p, jmp); + + brw_set_conditionalmod(p, BRW_CONDITIONAL_Z); + brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_LINELIST) | + (1<<_3DPRIM_LINESTRIP) | + (1<<_3DPRIM_LINELOOP) | + (1<<_3DPRIM_LINESTRIP_CONT) | + (1<<_3DPRIM_LINESTRIP_BF) | + (1<<_3DPRIM_LINESTRIP_CONT_BF))); + jmp = brw_JMPI(p, ip, ip, brw_imm_d(0)); + { + saveflag = p->flag_value; + brw_push_insn_state(p); + brw_emit_line_setup( c, GL_FALSE ); + brw_pop_insn_state(p); + p->flag_value = saveflag; + /* note - thread killed in subroutine */ + } + brw_land_fwd_jump(p, jmp); + + brw_set_conditionalmod(p, BRW_CONDITIONAL_Z); + brw_AND(p, v1_null_ud, payload_attr, brw_imm_ud(1<<BRW_SPRITE_POINT_ENABLE)); + jmp = brw_JMPI(p, ip, ip, brw_imm_d(0)); + { + saveflag = p->flag_value; + brw_push_insn_state(p); + brw_emit_point_sprite_setup( c, GL_FALSE ); + brw_pop_insn_state(p); + p->flag_value = saveflag; + } + brw_land_fwd_jump(p, jmp); + + brw_emit_point_setup( c, GL_FALSE ); +} + + + + diff --git a/src/gallium/drivers/i965/brw_sf_state.c b/src/gallium/drivers/i965/brw_sf_state.c new file mode 100644 index 00000000000..25dc2b52e07 --- /dev/null +++ b/src/gallium/drivers/i965/brw_sf_state.c @@ -0,0 +1,333 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "util/u_math.h" + +#include "pipe/p_state.h" + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "brw_debug.h" +#include "brw_pipe_rast.h" + +static enum pipe_error upload_sf_vp(struct brw_context *brw) +{ + const struct pipe_viewport_state *vp = &brw->curr.viewport; + const struct pipe_scissor_state *scissor = &brw->curr.scissor; + struct brw_sf_viewport sfv; + enum pipe_error ret; + + memset(&sfv, 0, sizeof(sfv)); + + /* PIPE_NEW_VIEWPORT, PIPE_NEW_SCISSOR */ + + sfv.viewport.m00 = vp->scale[0]; + sfv.viewport.m11 = vp->scale[1]; + sfv.viewport.m22 = vp->scale[2]; + sfv.viewport.m30 = vp->translate[0]; + sfv.viewport.m31 = vp->translate[1]; + sfv.viewport.m32 = vp->translate[2]; + + sfv.scissor.xmin = scissor->minx; + sfv.scissor.xmax = scissor->maxx - 1; /* ? */ + sfv.scissor.ymin = scissor->miny; + sfv.scissor.ymax = scissor->maxy - 1; /* ? */ + + ret = brw_cache_data( &brw->cache, BRW_SF_VP, &sfv, NULL, 0, + &brw->sf.vp_bo ); + if (ret) + return ret; + + return PIPE_OK; +} + +const struct brw_tracked_state brw_sf_vp = { + .dirty = { + .mesa = (PIPE_NEW_VIEWPORT | + PIPE_NEW_SCISSOR), + .brw = 0, + .cache = 0 + }, + .prepare = upload_sf_vp +}; + +struct brw_sf_unit_key { + unsigned int total_grf; + unsigned int urb_entry_read_length; + unsigned int nr_urb_entries, urb_size, sfsize; + + unsigned scissor:1; + unsigned line_smooth:1; + unsigned point_sprite:1; + unsigned point_attenuated:1; + unsigned front_face:2; + unsigned cull_mode:2; + unsigned flatshade_first:1; + unsigned gl_rasterization_rules:1; + unsigned line_last_pixel_enable:1; + float line_width; + float point_size; +}; + +static void +sf_unit_populate_key(struct brw_context *brw, struct brw_sf_unit_key *key) +{ + const struct pipe_rasterizer_state *rast = &brw->curr.rast->templ; + memset(key, 0, sizeof(*key)); + + /* CACHE_NEW_SF_PROG */ + key->total_grf = brw->sf.prog_data->total_grf; + key->urb_entry_read_length = brw->sf.prog_data->urb_read_length; + + /* BRW_NEW_URB_FENCE */ + key->nr_urb_entries = brw->urb.nr_sf_entries; + key->urb_size = brw->urb.vsize; + key->sfsize = brw->urb.sfsize; + + /* PIPE_NEW_RAST */ + key->scissor = rast->scissor; + key->front_face = rast->front_winding; + key->cull_mode = rast->cull_mode; + key->line_smooth = rast->line_smooth; + key->line_width = rast->line_width; + key->flatshade_first = rast->flatshade_first; + key->line_last_pixel_enable = rast->line_last_pixel; + key->gl_rasterization_rules = rast->gl_rasterization_rules; + + key->point_sprite = rast->point_sprite; + key->point_attenuated = rast->point_size_per_vertex; + + key->point_size = CLAMP(rast->point_size, + rast->point_size_min, + rast->point_size_max); +} + +static enum pipe_error +sf_unit_create_from_key(struct brw_context *brw, + struct brw_sf_unit_key *key, + struct brw_winsys_reloc *reloc, + struct brw_winsys_buffer **bo_out) +{ + struct brw_sf_unit_state sf; + enum pipe_error ret; + int chipset_max_threads; + memset(&sf, 0, sizeof(sf)); + + sf.thread0.grf_reg_count = align(key->total_grf, 16) / 16 - 1; + /* reloc */ + sf.thread0.kernel_start_pointer = 0; + + sf.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; + + sf.thread3.dispatch_grf_start_reg = 3; + + if (BRW_IS_IGDNG(brw)) + sf.thread3.urb_entry_read_offset = 3; + else + sf.thread3.urb_entry_read_offset = 1; + + sf.thread3.urb_entry_read_length = key->urb_entry_read_length; + + sf.thread4.nr_urb_entries = key->nr_urb_entries; + sf.thread4.urb_entry_allocation_size = key->sfsize - 1; + + /* Each SF thread produces 1 PUE, and there can be up to 24(Pre-IGDNG) or + * 48(IGDNG) threads + */ + if (BRW_IS_IGDNG(brw)) + chipset_max_threads = 48; + else + chipset_max_threads = 24; + + sf.thread4.max_threads = MIN2(chipset_max_threads, key->nr_urb_entries) - 1; + + if (BRW_DEBUG & DEBUG_SINGLE_THREAD) + sf.thread4.max_threads = 0; + + if (BRW_DEBUG & DEBUG_STATS) + sf.thread4.stats_enable = 1; + + /* CACHE_NEW_SF_VP */ + /* reloc */ + sf.sf5.sf_viewport_state_offset = 0; + + sf.sf5.viewport_transform = 1; + + if (key->scissor) + sf.sf6.scissor = 1; + + if (key->front_face == PIPE_WINDING_CCW) + sf.sf5.front_winding = BRW_FRONTWINDING_CCW; + else + sf.sf5.front_winding = BRW_FRONTWINDING_CW; + + switch (key->cull_mode) { + case PIPE_WINDING_CCW: + case PIPE_WINDING_CW: + sf.sf6.cull_mode = (key->front_face == key->cull_mode ? + BRW_CULLMODE_FRONT : + BRW_CULLMODE_BACK); + break; + case PIPE_WINDING_BOTH: + sf.sf6.cull_mode = BRW_CULLMODE_BOTH; + break; + case PIPE_WINDING_NONE: + sf.sf6.cull_mode = BRW_CULLMODE_NONE; + break; + default: + assert(0); + sf.sf6.cull_mode = BRW_CULLMODE_NONE; + break; + } + + /* _NEW_LINE */ + /* XXX use ctx->Const.Min/MaxLineWidth here */ + sf.sf6.line_width = CLAMP(key->line_width, 1.0, 5.0) * (1<<1); + + sf.sf6.line_endcap_aa_region_width = 1; + if (key->line_smooth) + sf.sf6.aa_enable = 1; + else if (sf.sf6.line_width <= 0x2) + sf.sf6.line_width = 0; + + /* XXX: gl_rasterization_rules? something else? + */ + sf.sf6.point_rast_rule = BRW_RASTRULE_UPPER_RIGHT; + sf.sf6.point_rast_rule = BRW_RASTRULE_LOWER_RIGHT; + sf.sf6.point_rast_rule = 1; + + /* XXX clamp max depends on AA vs. non-AA */ + + /* _NEW_POINT */ + sf.sf7.sprite_point = key->point_sprite; + sf.sf7.point_size = CLAMP(rint(key->point_size), 1, 255) * (1<<3); + sf.sf7.use_point_size_state = !key->point_attenuated; + sf.sf7.aa_line_distance_mode = 0; + + /* might be BRW_NEW_PRIMITIVE if we have to adjust pv for polygons: + */ + if (!key->flatshade_first) { + sf.sf7.trifan_pv = 2; + sf.sf7.linestrip_pv = 1; + sf.sf7.tristrip_pv = 2; + } else { + sf.sf7.trifan_pv = 1; + sf.sf7.linestrip_pv = 0; + sf.sf7.tristrip_pv = 0; + } + + sf.sf7.line_last_pixel_enable = key->line_last_pixel_enable; + + /* Set bias for OpenGL rasterization rules: + */ + if (key->gl_rasterization_rules) { + sf.sf6.dest_org_vbias = 0x8; + sf.sf6.dest_org_hbias = 0x8; + } + else { + sf.sf6.dest_org_vbias = 0x0; + sf.sf6.dest_org_hbias = 0x0; + } + + ret = brw_upload_cache(&brw->cache, BRW_SF_UNIT, + key, sizeof(*key), + reloc, 2, + &sf, sizeof(sf), + NULL, NULL, + bo_out); + if (ret) + return ret; + + + return PIPE_OK; +} + +static enum pipe_error upload_sf_unit( struct brw_context *brw ) +{ + struct brw_sf_unit_key key; + struct brw_winsys_reloc reloc[2]; + unsigned total_grf; + unsigned viewport_transform; + unsigned front_winding; + enum pipe_error ret; + + sf_unit_populate_key(brw, &key); + + /* XXX: cut this crap and pre calculate the key: + */ + total_grf = (align(key.total_grf, 16) / 16 - 1); + viewport_transform = 1; + front_winding = (key.front_face == PIPE_WINDING_CCW ? + BRW_FRONTWINDING_CCW : + BRW_FRONTWINDING_CW); + + /* Emit SF program relocation */ + make_reloc(&reloc[0], + BRW_USAGE_STATE, + total_grf << 1, + offsetof(struct brw_sf_unit_state, thread0), + brw->sf.prog_bo); + + /* Emit SF viewport relocation */ + make_reloc(&reloc[1], + BRW_USAGE_STATE, + front_winding | (viewport_transform << 1), + offsetof(struct brw_sf_unit_state, sf5), + brw->sf.vp_bo); + + + if (brw_search_cache(&brw->cache, BRW_SF_UNIT, + &key, sizeof(key), + reloc, 2, + NULL, + &brw->sf.state_bo)) + return PIPE_OK; + + + ret = sf_unit_create_from_key(brw, &key, + reloc, + &brw->sf.state_bo); + if (ret) + return ret; + + return PIPE_OK; +} + +const struct brw_tracked_state brw_sf_unit = { + .dirty = { + .mesa = (PIPE_NEW_RAST), + .brw = BRW_NEW_URB_FENCE, + .cache = (CACHE_NEW_SF_VP | + CACHE_NEW_SF_PROG) + }, + .prepare = upload_sf_unit, +}; diff --git a/src/gallium/drivers/i965/brw_state.h b/src/gallium/drivers/i965/brw_state.h new file mode 100644 index 00000000000..d2bbd0123d1 --- /dev/null +++ b/src/gallium/drivers/i965/brw_state.h @@ -0,0 +1,174 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#ifndef BRW_STATE_H +#define BRW_STATE_H + +#include "pipe/p_defines.h" +#include "util/u_memory.h" + +#include "brw_context.h" + +static INLINE void +brw_add_validated_bo(struct brw_context *brw, struct brw_winsys_buffer *bo) +{ + assert(brw->state.validated_bo_count < Elements(brw->state.validated_bos)); + + if (bo != NULL) { + bo_reference( &brw->state.validated_bos[brw->state.validated_bo_count++], + bo ); + } +} + +const struct brw_tracked_state brw_blend_constant_color; +const struct brw_tracked_state brw_cc_unit; +const struct brw_tracked_state brw_cc_vp; +const struct brw_tracked_state brw_clip_prog; +const struct brw_tracked_state brw_clip_unit; +const struct brw_tracked_state brw_curbe_buffer; +const struct brw_tracked_state brw_curbe_offsets; +const struct brw_tracked_state brw_invarient_state; +const struct brw_tracked_state brw_gs_prog; +const struct brw_tracked_state brw_gs_unit; +const struct brw_tracked_state brw_line_stipple; +const struct brw_tracked_state brw_aa_line_parameters; +const struct brw_tracked_state brw_pipelined_state_pointers; +const struct brw_tracked_state brw_binding_table_pointers; +const struct brw_tracked_state brw_depthbuffer; +const struct brw_tracked_state brw_polygon_stipple; +const struct brw_tracked_state brw_program_parameters; +const struct brw_tracked_state brw_recalculate_urb_fence; +const struct brw_tracked_state brw_sf_prog; +const struct brw_tracked_state brw_sf_unit; +const struct brw_tracked_state brw_sf_vp; +const struct brw_tracked_state brw_state_base_address; +const struct brw_tracked_state brw_urb_fence; +const struct brw_tracked_state brw_vertex_state; +const struct brw_tracked_state brw_vs_surfaces; +const struct brw_tracked_state brw_vs_prog; +const struct brw_tracked_state brw_vs_unit; +const struct brw_tracked_state brw_wm_input_sizes; +const struct brw_tracked_state brw_wm_prog; +const struct brw_tracked_state brw_wm_samplers; +const struct brw_tracked_state brw_wm_constant_surface; +const struct brw_tracked_state brw_wm_surfaces; +const struct brw_tracked_state brw_wm_unit; + +const struct brw_tracked_state brw_psp_urb_cbs; + +const struct brw_tracked_state brw_pipe_control; + +const struct brw_tracked_state brw_drawing_rect; +const struct brw_tracked_state brw_indices; +const struct brw_tracked_state brw_vertices; +const struct brw_tracked_state brw_index_buffer; + + +/*********************************************************************** + * brw_state.c + */ +int brw_validate_state(struct brw_context *brw); +int brw_upload_state(struct brw_context *brw); +void brw_init_state(struct brw_context *brw); +void brw_destroy_state(struct brw_context *brw); + +/*********************************************************************** + * brw_state_cache.c + */ +enum pipe_error brw_cache_data(struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *data, + struct brw_winsys_reloc *relocs, + GLuint nr_relocs, + struct brw_winsys_buffer **bo_out ); + +enum pipe_error brw_cache_data_sz(struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *data, + GLuint data_size, + struct brw_winsys_reloc *relocs, + GLuint nr_relocs, + struct brw_winsys_buffer **bo_out); + +enum pipe_error brw_upload_cache( struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *key, + GLuint key_sz, + struct brw_winsys_reloc *relocs, + GLuint nr_relocs, + const void *data, + GLuint data_sz, + const void *aux, + void *aux_return , + struct brw_winsys_buffer **bo_out); + +boolean brw_search_cache( struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *key, + GLuint key_size, + struct brw_winsys_reloc *relocs, + GLuint nr_relocs, + void *aux_return, + struct brw_winsys_buffer **bo_out); + +void brw_state_cache_check_size( struct brw_context *brw ); + +void brw_init_caches( struct brw_context *brw ); +void brw_destroy_caches( struct brw_context *brw ); +void brw_state_cache_bo_delete(struct brw_cache *cache, struct brw_winsys_buffer *bo); + +/*********************************************************************** + * brw_state_batch.c + */ +#define BRW_BATCH_STRUCT(brw, s) brw_batchbuffer_data( brw->batch, (s), sizeof(*(s)), IGNORE_CLIPRECTS) +#define BRW_CACHED_BATCH_STRUCT(brw, s) brw_cached_batch_struct( brw, (s), sizeof(*(s)) ) + +GLboolean brw_cached_batch_struct( struct brw_context *brw, + const void *data, + GLuint sz ); +void brw_destroy_batch_cache( struct brw_context *brw ); +void brw_clear_batch_cache( struct brw_context *brw ); + +/*********************************************************************** + * brw_wm_surface_state.c + */ + +/*********************************************************************** + * brw_state_debug.c + */ +void brw_update_dirty_counts( unsigned mesa, + unsigned brw, + unsigned cache ); + + + +#endif diff --git a/src/gallium/drivers/i965/brw_state_batch.c b/src/gallium/drivers/i965/brw_state_batch.c new file mode 100644 index 00000000000..7d212e5c247 --- /dev/null +++ b/src/gallium/drivers/i965/brw_state_batch.c @@ -0,0 +1,98 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + + +#include "brw_state.h" +#include "brw_batchbuffer.h" + + + +/* A facility similar to the data caching code above, which aims to + * prevent identical commands being issued repeatedly. + */ +GLboolean brw_cached_batch_struct( struct brw_context *brw, + const void *data, + GLuint sz ) +{ + struct brw_cached_batch_item *item = brw->cached_batch_items; + struct header *newheader = (struct header *)data; + + if (brw->flags.always_emit_state) { + brw_batchbuffer_data(brw->batch, data, sz, IGNORE_CLIPRECTS); + return GL_TRUE; + } + + while (item) { + if (item->header->opcode == newheader->opcode) { + if (item->sz == sz && memcmp(item->header, newheader, sz) == 0) + return GL_FALSE; + if (item->sz != sz) { + FREE(item->header); + item->header = MALLOC(sz); + item->sz = sz; + } + goto emit; + } + item = item->next; + } + + assert(!item); + item = CALLOC_STRUCT(brw_cached_batch_item); + item->header = MALLOC(sz); + item->sz = sz; + item->next = brw->cached_batch_items; + brw->cached_batch_items = item; + + emit: + memcpy(item->header, newheader, sz); + brw_batchbuffer_data(brw->batch, data, sz, IGNORE_CLIPRECTS); + return GL_TRUE; +} + +void brw_clear_batch_cache( struct brw_context *brw ) +{ + struct brw_cached_batch_item *item = brw->cached_batch_items; + + while (item) { + struct brw_cached_batch_item *next = item->next; + free((void *)item->header); + free(item); + item = next; + } + + brw->cached_batch_items = NULL; +} + +void brw_destroy_batch_cache( struct brw_context *brw ) +{ + brw_clear_batch_cache(brw); +} diff --git a/src/gallium/drivers/i965/brw_state_cache.c b/src/gallium/drivers/i965/brw_state_cache.c new file mode 100644 index 00000000000..16b643ceb28 --- /dev/null +++ b/src/gallium/drivers/i965/brw_state_cache.c @@ -0,0 +1,617 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +/** @file brw_state_cache.c + * + * This file implements a simple static state cache for 965. The consumers + * can query the hash table of state using a cache_id, opaque key data, + * and list of buffers that will be used in relocations, and receive the + * corresponding state buffer object of state (plus associated auxiliary + * data) in return. + * + * The inner workings are a simple hash table based on a CRC of the key data. + * The cache_id and relocation target buffers associated with the state + * buffer are included as auxiliary key data, but are not part of the hash + * value (this should be fixed, but will likely be fixed instead by making + * consumers use structured keys). + * + * Replacement is not implemented. Instead, when the cache gets too big, at + * a safe point (unlock) we throw out all of the cache data and let it + * regenerate for the next rendering operation. + * + * The reloc structs need to be included as key data, otherwise the + * non-unique values stuffed in the offset in key data through + * brw_cache_data() may result in successful probe for state buffers + * even when the buffer being referenced doesn't match. The result would be + * that the same state cache entry is used twice for different buffers, + * only one of the two buffers referenced gets put into the offset, and the + * incorrect program is run for the other instance. + */ +#include "util/u_memory.h" + +#include "brw_debug.h" +#include "brw_state.h" +#include "brw_batchbuffer.h" + +/* XXX: Fixme - have to include these to get the sizes of the prog_key + * structs: + */ +#include "brw_wm.h" +#include "brw_vs.h" +#include "brw_clip.h" +#include "brw_sf.h" +#include "brw_gs.h" + + +static GLuint +hash_key(const void *key, GLuint key_size, + struct brw_winsys_reloc *relocs, GLuint nr_relocs) +{ + GLuint *ikey = (GLuint *)key; + GLuint hash = 0, i; + + assert(key_size % 4 == 0); + + /* I'm sure this can be improved on: + */ + for (i = 0; i < key_size/4; i++) { + hash ^= ikey[i]; + hash = (hash << 5) | (hash >> 27); + } + + /* Include the BO pointers as key data as well */ + ikey = (GLuint *)relocs; + key_size = nr_relocs * sizeof(struct brw_winsys_reloc); + for (i = 0; i < key_size/4; i++) { + hash ^= ikey[i]; + hash = (hash << 5) | (hash >> 27); + } + + return hash; +} + + +/** + * Marks a new buffer as being chosen for the given cache id. + */ +static void +update_cache_last(struct brw_cache *cache, enum brw_cache_id cache_id, + struct brw_winsys_buffer *bo) +{ + if (bo == cache->last_bo[cache_id]) + return; /* no change */ + + bo_reference( &cache->last_bo[cache_id], bo ); + + cache->brw->state.dirty.cache |= 1 << cache_id; +} + + +static struct brw_cache_item * +search_cache(struct brw_cache *cache, enum brw_cache_id cache_id, + GLuint hash, const void *key, GLuint key_size, + struct brw_winsys_reloc *relocs, GLuint nr_relocs) +{ + struct brw_cache_item *c; + +#if 0 + int bucketcount = 0; + + for (c = cache->items[hash % cache->size]; c; c = c->next) + bucketcount++; + + debug_printf("bucket %d/%d = %d/%d items\n", hash % cache->size, + cache->size, bucketcount, cache->n_items); +#endif + + for (c = cache->items[hash % cache->size]; c; c = c->next) { + if (c->cache_id == cache_id && + c->hash == hash && + c->key_size == key_size && + memcmp(c->key, key, key_size) == 0 && + c->nr_relocs == nr_relocs && + memcmp(c->relocs, relocs, nr_relocs * sizeof *relocs) == 0) + return c; + } + + return NULL; +} + + +static void +rehash(struct brw_cache *cache) +{ + struct brw_cache_item **items; + struct brw_cache_item *c, *next; + GLuint size, i; + + size = cache->size * 3; + items = (struct brw_cache_item**) CALLOC(size, sizeof(*items)); + + for (i = 0; i < cache->size; i++) + for (c = cache->items[i]; c; c = next) { + next = c->next; + c->next = items[c->hash % size]; + items[c->hash % size] = c; + } + + FREE(cache->items); + cache->items = items; + cache->size = size; +} + + +/** + * Returns the buffer object matching cache_id and key, or NULL. + */ +boolean +brw_search_cache(struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *key, + GLuint key_size, + struct brw_winsys_reloc *relocs, + GLuint nr_relocs, + void *aux_return, + struct brw_winsys_buffer **bo_out) +{ + struct brw_cache_item *item; + GLuint hash = hash_key(key, key_size, relocs, nr_relocs); + + item = search_cache(cache, cache_id, hash, key, key_size, + relocs, nr_relocs); + + if (item) { + if (aux_return) + *(void **)aux_return = (void *)((char *)item->key + item->key_size); + + update_cache_last(cache, cache_id, item->bo); + bo_reference(bo_out, item->bo); + return TRUE; + } + + return FALSE; +} + + +enum pipe_error +brw_upload_cache( struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *key, + GLuint key_size, + struct brw_winsys_reloc *relocs, + GLuint nr_relocs, + const void *data, + GLuint data_size, + const void *aux, + void *aux_return, + struct brw_winsys_buffer **bo_out) +{ + struct brw_cache_item *item = CALLOC_STRUCT(brw_cache_item); + GLuint hash = hash_key(key, key_size, relocs, nr_relocs); + GLuint relocs_size = nr_relocs * sizeof relocs[0]; + GLuint aux_size = cache->aux_size[cache_id]; + enum pipe_error ret; + void *tmp; + int i; + + /* Create the buffer object to contain the data. For now, use a + * single buffer type to describe all cached state atoms. Later, + * may want to take advantage of hardware distinctions between + * these various entities. + */ + ret = cache->sws->bo_alloc(cache->sws, + cache->buffer_type, + data_size, 1 << 6, + bo_out); + if (ret) + return ret; + + + /* Set up the memory containing the key, aux_data, and relocs */ + tmp = MALLOC(key_size + aux_size + relocs_size); + + memcpy(tmp, key, key_size); + memcpy((char *)tmp + key_size, aux, cache->aux_size[cache_id]); + memcpy((char *)tmp + key_size + aux_size, relocs, relocs_size); + for (i = 0; i < nr_relocs; i++) { + p_atomic_inc(&relocs[i].bo->reference.count); + } + + item->cache_id = cache_id; + item->key = tmp; + item->hash = hash; + item->key_size = key_size; + item->relocs = (struct brw_winsys_reloc *)((char *)tmp + key_size + aux_size); + item->nr_relocs = nr_relocs; + bo_reference( &item->bo, *bo_out ); + item->data_size = data_size; + + if (cache->n_items > cache->size * 1.5) + rehash(cache); + + hash %= cache->size; + item->next = cache->items[hash]; + cache->items[hash] = item; + cache->n_items++; + + if (aux_return) { + assert(cache->aux_size[cache_id]); + *(void **)aux_return = (void *)((char *)item->key + item->key_size); + } + + if (BRW_DEBUG & DEBUG_STATE) + debug_printf("upload %s: %d bytes to cache id %d\n", + cache->name[cache_id], + data_size, cache_id); + + /* Copy data to the buffer */ + ret = cache->sws->bo_subdata(item->bo, + cache_id, + 0, data_size, data, + relocs, nr_relocs); + if (ret) + return ret; + + update_cache_last(cache, cache_id, item->bo); + + return PIPE_OK; +} + + +/** + * This doesn't really work with aux data. Use search/upload instead + */ +enum pipe_error +brw_cache_data_sz(struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *data, + GLuint data_size, + struct brw_winsys_reloc *relocs, + GLuint nr_relocs, + struct brw_winsys_buffer **bo_out) +{ + struct brw_cache_item *item; + GLuint hash = hash_key(data, data_size, relocs, nr_relocs); + + item = search_cache(cache, cache_id, hash, data, data_size, + relocs, nr_relocs); + if (item) { + update_cache_last(cache, cache_id, item->bo); + + bo_reference(bo_out, item->bo); + return PIPE_OK; + } + + return brw_upload_cache(cache, cache_id, + data, data_size, + relocs, nr_relocs, + data, data_size, + NULL, NULL, + bo_out); +} + + +/** + * Wrapper around brw_cache_data_sz using the cache_id's canonical key size. + * + * If nr_relocs is nonzero, brw_search_cache()/brw_upload_cache() would be + * better to use, as the potentially changing offsets in the data-used-as-key + * will result in excessive cache misses. + * + * XXX: above is no longer true -- can we remove some code? + */ +enum pipe_error +brw_cache_data(struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *data, + struct brw_winsys_reloc *relocs, + GLuint nr_relocs, + struct brw_winsys_buffer **bo_out) +{ + return brw_cache_data_sz(cache, cache_id, data, cache->key_size[cache_id], + relocs, nr_relocs, bo_out); +} + + +static void +brw_init_cache_id(struct brw_cache *cache, + const char *name, + enum brw_cache_id id, + GLuint key_size, + GLuint aux_size) +{ + cache->name[id] = strdup(name); + cache->key_size[id] = key_size; + cache->aux_size[id] = aux_size; +} + + +static void +brw_init_general_state_cache(struct brw_context *brw) +{ + struct brw_cache *cache = &brw->cache; + + cache->brw = brw; + cache->sws = brw->sws; + + cache->buffer_type = BRW_BUFFER_TYPE_GENERAL_STATE; + + cache->size = 7; + cache->n_items = 0; + cache->items = (struct brw_cache_item **) + CALLOC(cache->size, sizeof(struct brw_cache_item)); + + brw_init_cache_id(cache, + "CC_VP", + BRW_CC_VP, + sizeof(struct brw_cc_viewport), + 0); + + brw_init_cache_id(cache, + "CC_UNIT", + BRW_CC_UNIT, + sizeof(struct brw_cc_unit_state), + 0); + + brw_init_cache_id(cache, + "WM_PROG", + BRW_WM_PROG, + sizeof(struct brw_wm_prog_key), + sizeof(struct brw_wm_prog_data)); + + brw_init_cache_id(cache, + "SAMPLER_DEFAULT_COLOR", + BRW_SAMPLER_DEFAULT_COLOR, + sizeof(struct brw_sampler_default_color), + 0); + + brw_init_cache_id(cache, + "SAMPLER", + BRW_SAMPLER, + 0, /* variable key/data size */ + 0); + + brw_init_cache_id(cache, + "WM_UNIT", + BRW_WM_UNIT, + sizeof(struct brw_wm_unit_state), + 0); + + brw_init_cache_id(cache, + "SF_PROG", + BRW_SF_PROG, + sizeof(struct brw_sf_prog_key), + sizeof(struct brw_sf_prog_data)); + + brw_init_cache_id(cache, + "SF_VP", + BRW_SF_VP, + sizeof(struct brw_sf_viewport), + 0); + + brw_init_cache_id(cache, + "SF_UNIT", + BRW_SF_UNIT, + sizeof(struct brw_sf_unit_state), + 0); + + brw_init_cache_id(cache, + "VS_UNIT", + BRW_VS_UNIT, + sizeof(struct brw_vs_unit_state), + 0); + + brw_init_cache_id(cache, + "VS_PROG", + BRW_VS_PROG, + sizeof(struct brw_vs_prog_key), + sizeof(struct brw_vs_prog_data)); + + brw_init_cache_id(cache, + "CLIP_UNIT", + BRW_CLIP_UNIT, + sizeof(struct brw_clip_unit_state), + 0); + + brw_init_cache_id(cache, + "CLIP_PROG", + BRW_CLIP_PROG, + sizeof(struct brw_clip_prog_key), + sizeof(struct brw_clip_prog_data)); + + brw_init_cache_id(cache, + "GS_UNIT", + BRW_GS_UNIT, + sizeof(struct brw_gs_unit_state), + 0); + + brw_init_cache_id(cache, + "GS_PROG", + BRW_GS_PROG, + sizeof(struct brw_gs_prog_key), + sizeof(struct brw_gs_prog_data)); +} + + +static void +brw_init_surface_state_cache(struct brw_context *brw) +{ + struct brw_cache *cache = &brw->surface_cache; + + cache->brw = brw; + cache->sws = brw->sws; + + cache->buffer_type = BRW_BUFFER_TYPE_SURFACE_STATE; + + cache->size = 7; + cache->n_items = 0; + cache->items = (struct brw_cache_item **) + CALLOC(cache->size, sizeof(struct brw_cache_item)); + + brw_init_cache_id(cache, + "SS_SURFACE", + BRW_SS_SURFACE, + sizeof(struct brw_surface_state), + 0); + + brw_init_cache_id(cache, + "SS_SURF_BIND", + BRW_SS_SURF_BIND, + 0, + 0); +} + + +void +brw_init_caches(struct brw_context *brw) +{ + brw_init_general_state_cache(brw); + brw_init_surface_state_cache(brw); +} + + +static void +brw_clear_cache(struct brw_context *brw, struct brw_cache *cache) +{ + struct brw_cache_item *c, *next; + GLuint i; + + if (BRW_DEBUG & DEBUG_STATE) + debug_printf("%s\n", __FUNCTION__); + + for (i = 0; i < cache->size; i++) { + for (c = cache->items[i]; c; c = next) { + int j; + + next = c->next; + + for (j = 0; j < c->nr_relocs; j++) + bo_reference(&c->relocs[j].bo, NULL); + + bo_reference(&c->bo, NULL); + FREE((void *)c->key); + FREE(c); + } + cache->items[i] = NULL; + } + + cache->n_items = 0; + + if (brw->curbe.last_buf) { + FREE(brw->curbe.last_buf); + brw->curbe.last_buf = NULL; + } + + brw->state.dirty.mesa |= ~0; + brw->state.dirty.brw |= ~0; + brw->state.dirty.cache |= ~0; +} + +/* Clear all entries from the cache that point to the given bo. + * + * This lets us release memory for reuse earlier for known-dead buffers, + * at the cost of walking the entire hash table. + */ +void +brw_state_cache_bo_delete(struct brw_cache *cache, struct brw_winsys_buffer *bo) +{ + struct brw_cache_item **prev; + GLuint i; + + if (BRW_DEBUG & DEBUG_STATE) + debug_printf("%s\n", __FUNCTION__); + + for (i = 0; i < cache->size; i++) { + for (prev = &cache->items[i]; *prev;) { + struct brw_cache_item *c = *prev; + + if (cache->sws->bo_references(c->bo, bo)) { + int j; + + *prev = c->next; + + for (j = 0; j < c->nr_relocs; j++) + bo_reference(&c->relocs[j].bo, NULL); + + bo_reference(&c->bo, NULL); + + FREE((void *)c->key); + FREE(c); + cache->n_items--; + } else { + prev = &c->next; + } + } + } +} + +void +brw_state_cache_check_size(struct brw_context *brw) +{ + if (BRW_DEBUG & DEBUG_STATE) + debug_printf("%s (n_items=%d)\n", __FUNCTION__, brw->cache.n_items); + + /* un-tuned guess. We've got around 20 state objects for a total of around + * 32k, so 1000 of them is around 1.5MB. + */ + if (brw->cache.n_items > 1000) + brw_clear_cache(brw, &brw->cache); + + if (brw->surface_cache.n_items > 1000) + brw_clear_cache(brw, &brw->surface_cache); +} + + +static void +brw_destroy_cache(struct brw_context *brw, struct brw_cache *cache) +{ + GLuint i; + + if (BRW_DEBUG & DEBUG_STATE) + debug_printf("%s\n", __FUNCTION__); + + brw_clear_cache(brw, cache); + for (i = 0; i < BRW_MAX_CACHE; i++) { + bo_reference(&cache->last_bo[i], NULL); + FREE(cache->name[i]); + } + FREE(cache->items); + cache->items = NULL; + cache->size = 0; +} + + +void +brw_destroy_caches(struct brw_context *brw) +{ + brw_destroy_cache(brw, &brw->cache); + brw_destroy_cache(brw, &brw->surface_cache); +} diff --git a/src/gallium/drivers/i965/brw_state_debug.c b/src/gallium/drivers/i965/brw_state_debug.c new file mode 100644 index 00000000000..049c278c93e --- /dev/null +++ b/src/gallium/drivers/i965/brw_state_debug.c @@ -0,0 +1,153 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + + + +#include "brw_context.h" +#include "brw_state.h" + + +struct dirty_bit_map { + uint32_t bit; + char *name; + uint32_t count; +}; + +#define DEFINE_BIT(name) {name, #name, 0} + +static struct dirty_bit_map mesa_bits[] = { + DEFINE_BIT(PIPE_NEW_DEPTH_STENCIL_ALPHA), + DEFINE_BIT(PIPE_NEW_RAST), + DEFINE_BIT(PIPE_NEW_BLEND), + DEFINE_BIT(PIPE_NEW_VIEWPORT), + DEFINE_BIT(PIPE_NEW_SAMPLERS), + DEFINE_BIT(PIPE_NEW_VERTEX_BUFFER), + DEFINE_BIT(PIPE_NEW_VERTEX_ELEMENT), + DEFINE_BIT(PIPE_NEW_FRAGMENT_SHADER), + DEFINE_BIT(PIPE_NEW_VERTEX_SHADER), + DEFINE_BIT(PIPE_NEW_FRAGMENT_CONSTANTS), + DEFINE_BIT(PIPE_NEW_VERTEX_CONSTANTS), + DEFINE_BIT(PIPE_NEW_CLIP), + DEFINE_BIT(PIPE_NEW_INDEX_BUFFER), + DEFINE_BIT(PIPE_NEW_INDEX_RANGE), + DEFINE_BIT(PIPE_NEW_BLEND_COLOR), + DEFINE_BIT(PIPE_NEW_POLYGON_STIPPLE), + DEFINE_BIT(PIPE_NEW_FRAMEBUFFER_DIMENSIONS), + DEFINE_BIT(PIPE_NEW_DEPTH_BUFFER), + DEFINE_BIT(PIPE_NEW_COLOR_BUFFERS), + DEFINE_BIT(PIPE_NEW_QUERY), + DEFINE_BIT(PIPE_NEW_SCISSOR), + DEFINE_BIT(PIPE_NEW_BOUND_TEXTURES), + DEFINE_BIT(PIPE_NEW_NR_CBUFS), + {0, 0, 0} +}; + +static struct dirty_bit_map brw_bits[] = { + DEFINE_BIT(BRW_NEW_URB_FENCE), + DEFINE_BIT(BRW_NEW_FRAGMENT_PROGRAM), + DEFINE_BIT(BRW_NEW_VERTEX_PROGRAM), + DEFINE_BIT(BRW_NEW_INPUT_DIMENSIONS), + DEFINE_BIT(BRW_NEW_CURBE_OFFSETS), + DEFINE_BIT(BRW_NEW_REDUCED_PRIMITIVE), + DEFINE_BIT(BRW_NEW_PRIMITIVE), + DEFINE_BIT(BRW_NEW_CONTEXT), + DEFINE_BIT(BRW_NEW_WM_INPUT_DIMENSIONS), + DEFINE_BIT(BRW_NEW_PSP), + DEFINE_BIT(BRW_NEW_WM_SURFACES), + DEFINE_BIT(BRW_NEW_xxx), + DEFINE_BIT(BRW_NEW_INDICES), + {0, 0, 0} +}; + +static struct dirty_bit_map cache_bits[] = { + DEFINE_BIT(CACHE_NEW_CC_VP), + DEFINE_BIT(CACHE_NEW_CC_UNIT), + DEFINE_BIT(CACHE_NEW_WM_PROG), + DEFINE_BIT(CACHE_NEW_SAMPLER_DEFAULT_COLOR), + DEFINE_BIT(CACHE_NEW_SAMPLER), + DEFINE_BIT(CACHE_NEW_WM_UNIT), + DEFINE_BIT(CACHE_NEW_SF_PROG), + DEFINE_BIT(CACHE_NEW_SF_VP), + DEFINE_BIT(CACHE_NEW_SF_UNIT), + DEFINE_BIT(CACHE_NEW_VS_UNIT), + DEFINE_BIT(CACHE_NEW_VS_PROG), + DEFINE_BIT(CACHE_NEW_GS_UNIT), + DEFINE_BIT(CACHE_NEW_GS_PROG), |