summaryrefslogtreecommitdiff
path: root/src/gallium/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r--src/gallium/drivers/cell/common.h119
-rw-r--r--src/gallium/drivers/cell/ppu/Makefile1
-rw-r--r--src/gallium/drivers/cell/ppu/cell_batch.c58
-rw-r--r--src/gallium/drivers/cell/ppu/cell_clear.c13
-rw-r--r--src/gallium/drivers/cell/ppu/cell_context.c38
-rw-r--r--src/gallium/drivers/cell/ppu/cell_context.h53
-rw-r--r--src/gallium/drivers/cell/ppu/cell_fence.c168
-rw-r--r--src/gallium/drivers/cell/ppu/cell_fence.h57
-rw-r--r--src/gallium/drivers/cell/ppu/cell_flush.c2
-rw-r--r--src/gallium/drivers/cell/ppu/cell_gen_fp.c1772
-rw-r--r--src/gallium/drivers/cell/ppu/cell_gen_fragment.c1541
-rw-r--r--src/gallium/drivers/cell/ppu/cell_gen_fragment.h2
-rw-r--r--src/gallium/drivers/cell/ppu/cell_pipe_state.c101
-rw-r--r--src/gallium/drivers/cell/ppu/cell_render.c1
-rw-r--r--src/gallium/drivers/cell/ppu/cell_screen.c18
-rw-r--r--src/gallium/drivers/cell/ppu/cell_spu.c46
-rw-r--r--src/gallium/drivers/cell/ppu/cell_spu.h13
-rw-r--r--src/gallium/drivers/cell/ppu/cell_state.h5
-rw-r--r--src/gallium/drivers/cell/ppu/cell_state_emit.c240
-rw-r--r--src/gallium/drivers/cell/ppu/cell_state_shader.c7
-rw-r--r--src/gallium/drivers/cell/ppu/cell_surface.c1
-rw-r--r--src/gallium/drivers/cell/ppu/cell_texture.c463
-rw-r--r--src/gallium/drivers/cell/ppu/cell_texture.h18
-rw-r--r--src/gallium/drivers/cell/ppu/cell_vbuf.c6
-rw-r--r--src/gallium/drivers/cell/ppu/cell_vertex_fetch.c33
-rw-r--r--src/gallium/drivers/cell/spu/.gitignore1
-rw-r--r--src/gallium/drivers/cell/spu/Makefile4
-rw-r--r--src/gallium/drivers/cell/spu/spu_colorpack.h49
-rw-r--r--src/gallium/drivers/cell/spu/spu_command.c815
-rw-r--r--src/gallium/drivers/cell/spu/spu_command.h35
-rw-r--r--src/gallium/drivers/cell/spu/spu_dcache.c4
-rw-r--r--src/gallium/drivers/cell/spu/spu_funcs.c173
-rw-r--r--src/gallium/drivers/cell/spu/spu_funcs.h35
-rw-r--r--src/gallium/drivers/cell/spu/spu_main.c626
-rw-r--r--src/gallium/drivers/cell/spu/spu_main.h87
-rw-r--r--src/gallium/drivers/cell/spu/spu_per_fragment_op.c286
-rw-r--r--src/gallium/drivers/cell/spu/spu_render.c38
-rw-r--r--src/gallium/drivers/cell/spu/spu_texture.c635
-rw-r--r--src/gallium/drivers/cell/spu/spu_texture.h28
-rw-r--r--src/gallium/drivers/cell/spu/spu_tile.c37
-rw-r--r--src/gallium/drivers/cell/spu/spu_tile.h6
-rw-r--r--src/gallium/drivers/cell/spu/spu_tri.c573
-rw-r--r--src/gallium/drivers/i915simple/i915_debug.c5
-rw-r--r--src/gallium/drivers/i915simple/i915_fpc_translate.c2
-rw-r--r--src/gallium/drivers/i915simple/i915_texture.c7
-rw-r--r--src/gallium/drivers/nouveau/nouveau_bo.h53
-rw-r--r--src/gallium/drivers/nouveau/nouveau_channel.h40
-rw-r--r--src/gallium/drivers/nouveau/nouveau_class.h8006
-rw-r--r--src/gallium/drivers/nouveau/nouveau_device.h30
-rw-r--r--src/gallium/drivers/nouveau/nouveau_gldefs.h196
-rw-r--r--src/gallium/drivers/nouveau/nouveau_grobj.h35
-rw-r--r--src/gallium/drivers/nouveau/nouveau_notifier.h43
-rw-r--r--src/gallium/drivers/nouveau/nouveau_push.h82
-rw-r--r--src/gallium/drivers/nouveau/nouveau_pushbuf.h32
-rw-r--r--src/gallium/drivers/nouveau/nouveau_resource.h37
-rw-r--r--src/gallium/drivers/nouveau/nouveau_stateobj.h158
-rw-r--r--src/gallium/drivers/nouveau/nouveau_util.h91
-rw-r--r--src/gallium/drivers/nouveau/nouveau_winsys.h98
-rw-r--r--src/gallium/drivers/nv04/Makefile28
-rw-r--r--src/gallium/drivers/nv04/nv04_clear.c12
-rw-r--r--src/gallium/drivers/nv04/nv04_context.c106
-rw-r--r--src/gallium/drivers/nv04/nv04_context.h146
-rw-r--r--src/gallium/drivers/nv04/nv04_fragprog.c21
-rw-r--r--src/gallium/drivers/nv04/nv04_fragtex.c73
-rw-r--r--src/gallium/drivers/nv04/nv04_miptree.c138
-rw-r--r--src/gallium/drivers/nv04/nv04_prim_vbuf.c309
-rw-r--r--src/gallium/drivers/nv04/nv04_screen.c212
-rw-r--r--src/gallium/drivers/nv04/nv04_screen.h25
-rw-r--r--src/gallium/drivers/nv04/nv04_state.c495
-rw-r--r--src/gallium/drivers/nv04/nv04_state.h71
-rw-r--r--src/gallium/drivers/nv04/nv04_state_emit.c156
-rw-r--r--src/gallium/drivers/nv04/nv04_surface.c64
-rw-r--r--src/gallium/drivers/nv04/nv04_vbo.c71
-rw-r--r--src/gallium/drivers/nv10/Makefile28
-rw-r--r--src/gallium/drivers/nv10/nv10_clear.c12
-rw-r--r--src/gallium/drivers/nv10/nv10_context.c296
-rw-r--r--src/gallium/drivers/nv10/nv10_context.h153
-rw-r--r--src/gallium/drivers/nv10/nv10_fragprog.c21
-rw-r--r--src/gallium/drivers/nv10/nv10_fragtex.c124
-rw-r--r--src/gallium/drivers/nv10/nv10_miptree.c149
-rw-r--r--src/gallium/drivers/nv10/nv10_prim_vbuf.c245
-rw-r--r--src/gallium/drivers/nv10/nv10_screen.c208
-rw-r--r--src/gallium/drivers/nv10/nv10_screen.h22
-rw-r--r--src/gallium/drivers/nv10/nv10_state.c588
-rw-r--r--src/gallium/drivers/nv10/nv10_state.h139
-rw-r--r--src/gallium/drivers/nv10/nv10_state_emit.c303
-rw-r--r--src/gallium/drivers/nv10/nv10_surface.c64
-rw-r--r--src/gallium/drivers/nv10/nv10_vbo.c77
-rw-r--r--src/gallium/drivers/nv20/Makefile29
-rw-r--r--src/gallium/drivers/nv20/nv20_clear.c12
-rw-r--r--src/gallium/drivers/nv20/nv20_context.c419
-rw-r--r--src/gallium/drivers/nv20/nv20_context.h153
-rw-r--r--src/gallium/drivers/nv20/nv20_fragprog.c21
-rw-r--r--src/gallium/drivers/nv20/nv20_fragtex.c124
-rw-r--r--src/gallium/drivers/nv20/nv20_miptree.c156
-rw-r--r--src/gallium/drivers/nv20/nv20_prim_vbuf.c402
-rw-r--r--src/gallium/drivers/nv20/nv20_screen.c204
-rw-r--r--src/gallium/drivers/nv20/nv20_screen.h22
-rw-r--r--src/gallium/drivers/nv20/nv20_state.c581
-rw-r--r--src/gallium/drivers/nv20/nv20_state.h139
-rw-r--r--src/gallium/drivers/nv20/nv20_state_emit.c359
-rw-r--r--src/gallium/drivers/nv20/nv20_surface.c64
-rw-r--r--src/gallium/drivers/nv20/nv20_vbo.c77
-rw-r--r--src/gallium/drivers/nv20/nv20_vertprog.c838
-rw-r--r--src/gallium/drivers/nv30/Makefile37
-rw-r--r--src/gallium/drivers/nv30/nv30_clear.c13
-rw-r--r--src/gallium/drivers/nv30/nv30_context.c72
-rw-r--r--src/gallium/drivers/nv30/nv30_context.h212
-rw-r--r--src/gallium/drivers/nv30/nv30_draw.c61
-rw-r--r--src/gallium/drivers/nv30/nv30_fragprog.c911
-rw-r--r--src/gallium/drivers/nv30/nv30_fragtex.c163
-rw-r--r--src/gallium/drivers/nv30/nv30_miptree.c195
-rw-r--r--src/gallium/drivers/nv30/nv30_query.c122
-rw-r--r--src/gallium/drivers/nv30/nv30_screen.c383
-rw-r--r--src/gallium/drivers/nv30/nv30_screen.h35
-rw-r--r--src/gallium/drivers/nv30/nv30_shader.h490
-rw-r--r--src/gallium/drivers/nv30/nv30_state.c724
-rw-r--r--src/gallium/drivers/nv30/nv30_state.h88
-rw-r--r--src/gallium/drivers/nv30/nv30_state_blend.c40
-rw-r--r--src/gallium/drivers/nv30/nv30_state_emit.c118
-rw-r--r--src/gallium/drivers/nv30/nv30_state_fb.c140
-rw-r--r--src/gallium/drivers/nv30/nv30_state_rasterizer.c17
-rw-r--r--src/gallium/drivers/nv30/nv30_state_scissor.c35
-rw-r--r--src/gallium/drivers/nv30/nv30_state_stipple.c39
-rw-r--r--src/gallium/drivers/nv30/nv30_state_viewport.c70
-rw-r--r--src/gallium/drivers/nv30/nv30_state_zsa.c17
-rw-r--r--src/gallium/drivers/nv30/nv30_surface.c77
-rw-r--r--src/gallium/drivers/nv30/nv30_vbo.c556
-rw-r--r--src/gallium/drivers/nv30/nv30_vertprog.c838
-rw-r--r--src/gallium/drivers/nv40/Makefile37
-rw-r--r--src/gallium/drivers/nv40/nv40_clear.c13
-rw-r--r--src/gallium/drivers/nv40/nv40_context.c72
-rw-r--r--src/gallium/drivers/nv40/nv40_context.h233
-rw-r--r--src/gallium/drivers/nv40/nv40_draw.c349
-rw-r--r--src/gallium/drivers/nv40/nv40_fragprog.c991
-rw-r--r--src/gallium/drivers/nv40/nv40_fragtex.c168
-rw-r--r--src/gallium/drivers/nv40/nv40_miptree.c194
-rw-r--r--src/gallium/drivers/nv40/nv40_query.c122
-rw-r--r--src/gallium/drivers/nv40/nv40_screen.c365
-rw-r--r--src/gallium/drivers/nv40/nv40_screen.h35
-rw-r--r--src/gallium/drivers/nv40/nv40_shader.h556
-rw-r--r--src/gallium/drivers/nv40/nv40_state.c740
-rw-r--r--src/gallium/drivers/nv40/nv40_state.h91
-rw-r--r--src/gallium/drivers/nv40/nv40_state_blend.c40
-rw-r--r--src/gallium/drivers/nv40/nv40_state_emit.c184
-rw-r--r--src/gallium/drivers/nv40/nv40_state_fb.c155
-rw-r--r--src/gallium/drivers/nv40/nv40_state_rasterizer.c17
-rw-r--r--src/gallium/drivers/nv40/nv40_state_scissor.c35
-rw-r--r--src/gallium/drivers/nv40/nv40_state_stipple.c39
-rw-r--r--src/gallium/drivers/nv40/nv40_state_viewport.c67
-rw-r--r--src/gallium/drivers/nv40/nv40_state_zsa.c17
-rw-r--r--src/gallium/drivers/nv40/nv40_surface.c77
-rw-r--r--src/gallium/drivers/nv40/nv40_vbo.c555
-rw-r--r--src/gallium/drivers/nv40/nv40_vertprog.c1070
-rw-r--r--src/gallium/drivers/nv50/Makefile29
-rw-r--r--src/gallium/drivers/nv50/nv50_clear.c90
-rw-r--r--src/gallium/drivers/nv50/nv50_context.c90
-rw-r--r--src/gallium/drivers/nv50/nv50_context.h185
-rw-r--r--src/gallium/drivers/nv50/nv50_draw.c89
-rw-r--r--src/gallium/drivers/nv50/nv50_miptree.c135
-rw-r--r--src/gallium/drivers/nv50/nv50_program.c1708
-rw-r--r--src/gallium/drivers/nv50/nv50_program.h45
-rw-r--r--src/gallium/drivers/nv50/nv50_query.c69
-rw-r--r--src/gallium/drivers/nv50/nv50_screen.c323
-rw-r--r--src/gallium/drivers/nv50/nv50_screen.h33
-rw-r--r--src/gallium/drivers/nv50/nv50_state.c638
-rw-r--r--src/gallium/drivers/nv50/nv50_state_validate.c309
-rw-r--r--src/gallium/drivers/nv50/nv50_surface.c123
-rw-r--r--src/gallium/drivers/nv50/nv50_tex.c124
-rw-r--r--src/gallium/drivers/nv50/nv50_texture.h126
-rw-r--r--src/gallium/drivers/nv50/nv50_vbo.c237
-rw-r--r--src/gallium/drivers/softpipe/sp_context.c11
-rw-r--r--src/gallium/drivers/softpipe/sp_context.h7
-rw-r--r--src/gallium/drivers/softpipe/sp_fs_exec.c26
-rw-r--r--src/gallium/drivers/softpipe/sp_fs_sse.c4
-rw-r--r--src/gallium/drivers/softpipe/sp_prim_vbuf.c29
-rw-r--r--src/gallium/drivers/softpipe/sp_quad_fs.c23
-rw-r--r--src/gallium/drivers/softpipe/sp_screen.c2
-rw-r--r--src/gallium/drivers/softpipe/sp_state.h2
-rw-r--r--src/gallium/drivers/softpipe/sp_tex_sample.c933
-rw-r--r--src/gallium/drivers/softpipe/sp_tex_sample.h50
-rw-r--r--src/gallium/drivers/softpipe/sp_tile_cache.c4
-rw-r--r--src/gallium/drivers/softpipe/sp_tile_cache.h2
183 files changed, 39913 insertions, 2287 deletions
diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h
index cb0631baf52..98554d7f521 100644
--- a/src/gallium/drivers/cell/common.h
+++ b/src/gallium/drivers/cell/common.h
@@ -64,9 +64,13 @@
#define ROUNDUP16(k) (((k) + 0xf) & ~0xf)
-#define CELL_MAX_SPUS 6
+#define CELL_MAX_SPUS 8
#define CELL_MAX_SAMPLERS 4
+#define CELL_MAX_TEXTURE_LEVELS 12 /* 2k x 2k */
+#define CELL_MAX_CONSTANTS 32 /**< number of float[4] constants */
+#define CELL_MAX_WIDTH 1024 /**< max framebuffer width */
+#define CELL_MAX_HEIGHT 1024 /**< max framebuffer width */
#define TILE_SIZE 32
@@ -94,43 +98,94 @@
#define CELL_CMD_STATE_BIND_VS 18
#define CELL_CMD_STATE_FRAGMENT_PROGRAM 19
#define CELL_CMD_STATE_ATTRIB_FETCH 20
-#define CELL_CMD_VS_EXECUTE 22
-#define CELL_CMD_FLUSH_BUFFER_RANGE 23
+#define CELL_CMD_STATE_FS_CONSTANTS 21
+#define CELL_CMD_STATE_RASTERIZER 22
+#define CELL_CMD_VS_EXECUTE 23
+#define CELL_CMD_FLUSH_BUFFER_RANGE 24
+#define CELL_CMD_FENCE 25
+/** Command/batch buffers */
#define CELL_NUM_BUFFERS 4
#define CELL_BUFFER_SIZE (4*1024) /**< 16KB would be the max */
#define CELL_BUFFER_STATUS_FREE 10
#define CELL_BUFFER_STATUS_USED 20
+/** Debug flags */
+#define CELL_DEBUG_CHECKER (1 << 0)
+#define CELL_DEBUG_ASM (1 << 1)
+#define CELL_DEBUG_SYNC (1 << 2)
+#define CELL_DEBUG_FRAGMENT_OPS (1 << 3)
+#define CELL_DEBUG_FRAGMENT_OP_FALLBACK (1 << 4)
+#define CELL_DEBUG_CMD (1 << 5)
+#define CELL_DEBUG_CACHE (1 << 6)
-#define CELL_DEBUG_CHECKER (1 << 0)
-#define CELL_DEBUG_SYNC (1 << 1)
+#define CELL_FENCE_IDLE 0
+#define CELL_FENCE_EMITTED 1
+#define CELL_FENCE_SIGNALLED 2
+#define CELL_FACING_FRONT 0
+#define CELL_FACING_BACK 1
+
+struct cell_fence
+{
+ /** There's a 16-byte status qword per SPU */
+ volatile uint status[CELL_MAX_SPUS][4];
+};
-/** Max instructions for doing per-fragment operations */
-#define SPU_MAX_FRAGMENT_OPS_INSTS 64
+/**
+ * Fence command sent to SPUs. In response, the SPUs will write
+ * CELL_FENCE_STATUS_SIGNALLED back to the fence status word in main memory.
+ */
+struct cell_command_fence
+{
+ uint64_t opcode; /**< CELL_CMD_FENCE */
+ struct cell_fence *fence;
+};
/**
* Command to specify per-fragment operations state and generated code.
+ * Note that this is a variant-length structure, allocated with as
+ * much memory as needed to hold the generated code; the "code"
+ * field *must* be the last field in the structure. Also, the entire
+ * length of the structure (including the variant code field) must be
+ * a multiple of 8 bytes; we require that this structure itself be
+ * a multiple of 8 bytes, and that the generated code also be a multiple
+ * of 8 bytes.
+ *
+ * Also note that the dsa, blend, blend_color fields are really only needed
+ * for the fallback/C per-pixel code. They're not used when we generate
+ * dynamic SPU fragment code (which is the normal case), and will eventually
+ * be removed from this structure.
*/
struct cell_command_fragment_ops
{
uint64_t opcode; /**< CELL_CMD_STATE_FRAGMENT_OPS */
+
+ /* Fields for the fallback case */
struct pipe_depth_stencil_alpha_state dsa;
struct pipe_blend_state blend;
- unsigned code[SPU_MAX_FRAGMENT_OPS_INSTS];
+ struct pipe_blend_color blend_color;
+
+ /* Fields for the generated SPU code */
+ unsigned total_code_size;
+ unsigned front_code_index;
+ unsigned back_code_index;
+ /* this field has variant length, and must be the last field in
+ * the structure
+ */
+ unsigned code[0];
};
/** Max instructions for fragment programs */
-#define SPU_MAX_FRAGMENT_PROGRAM_INSTS 128
+#define SPU_MAX_FRAGMENT_PROGRAM_INSTS 512
/**
- * Command to send a fragment progra to SPUs.
+ * Command to send a fragment program to SPUs.
*/
struct cell_command_fragment_program
{
@@ -145,7 +200,7 @@ struct cell_command_fragment_program
*/
struct cell_command_framebuffer
{
- uint64_t opcode; /**< CELL_CMD_FRAMEBUFFER */
+ uint64_t opcode; /**< CELL_CMD_STATE_FRAMEBUFFER */
int width, height;
void *color_start, *depth_start;
enum pipe_format color_format, depth_format;
@@ -153,6 +208,16 @@ struct cell_command_framebuffer
/**
+ * Tell SPUs about rasterizer state.
+ */
+struct cell_command_rasterizer
+{
+ uint64_t opcode; /**< CELL_CMD_STATE_RASTERIZER */
+ struct pipe_rasterizer_state rasterizer;
+};
+
+
+/**
* Clear framebuffer to the given value/color.
*/
struct cell_command_clear_surface
@@ -248,23 +313,27 @@ struct cell_command_sampler
struct cell_command_texture
{
uint64_t opcode; /**< CELL_CMD_STATE_TEXTURE */
+ uint target; /**< PIPE_TEXTURE_x */
uint unit;
- void *start; /**< Address in main memory */
- ushort width, height;
+ void *start[CELL_MAX_TEXTURE_LEVELS]; /**< Address in main memory */
+ ushort width[CELL_MAX_TEXTURE_LEVELS];
+ ushort height[CELL_MAX_TEXTURE_LEVELS];
+ ushort depth[CELL_MAX_TEXTURE_LEVELS];
};
-/** XXX unions don't seem to work */
-/* XXX this should go away; all commands should be placed in batch buffers */
-struct cell_command
+#define MAX_SPU_FUNCTIONS 12
+/**
+ * Used to tell the PPU about the address of particular functions in the
+ * SPU's address space.
+ */
+struct cell_spu_function_info
{
-#if 0
- struct cell_command_framebuffer fb;
- struct cell_command_clear_surface clear;
- struct cell_command_render render;
-#endif
- struct cell_command_vs vs;
-} ALIGN16_ATTRIB;
+ uint num;
+ char names[MAX_SPU_FUNCTIONS][16];
+ uint addrs[MAX_SPU_FUNCTIONS];
+ char pad[12]; /**< Pad struct to multiple of 16 bytes (256 currently) */
+};
/** This is the object passed to spe_create_thread() */
@@ -273,11 +342,13 @@ struct cell_init_info
unsigned id;
unsigned num_spus;
unsigned debug_flags; /**< mask of CELL_DEBUG_x flags */
- struct cell_command *cmd;
+ float inv_timebase; /**< 1.0/timebase, for perf measurement */
/** Buffers for command batches, vertex/index data */
ubyte *buffers[CELL_NUM_BUFFERS];
uint *buffer_status; /**< points at cell_context->buffer_status */
+
+ struct cell_spu_function_info *spu_functions;
} ALIGN16_ATTRIB;
diff --git a/src/gallium/drivers/cell/ppu/Makefile b/src/gallium/drivers/cell/ppu/Makefile
index b28f4c5c31f..9358a47284c 100644
--- a/src/gallium/drivers/cell/ppu/Makefile
+++ b/src/gallium/drivers/cell/ppu/Makefile
@@ -24,6 +24,7 @@ SOURCES = \
cell_clear.c \
cell_context.c \
cell_draw_arrays.c \
+ cell_fence.c \
cell_flush.c \
cell_gen_fragment.c \
cell_gen_fp.c \
diff --git a/src/gallium/drivers/cell/ppu/cell_batch.c b/src/gallium/drivers/cell/ppu/cell_batch.c
index 16882c01295..962775cd335 100644
--- a/src/gallium/drivers/cell/ppu/cell_batch.c
+++ b/src/gallium/drivers/cell/ppu/cell_batch.c
@@ -28,6 +28,7 @@
#include "cell_context.h"
#include "cell_batch.h"
+#include "cell_fence.h"
#include "cell_spu.h"
@@ -42,7 +43,9 @@
uint
cell_get_empty_buffer(struct cell_context *cell)
{
- uint buf = 0, tries = 0;
+ static uint prev_buffer = 0;
+ uint buf = (prev_buffer + 1) % CELL_NUM_BUFFERS;
+ uint tries = 0;
/* Find a buffer that's marked as free by all SPUs */
while (1) {
@@ -58,8 +61,13 @@ cell_get_empty_buffer(struct cell_context *cell)
cell->buffer_status[spu][buf][0] = CELL_BUFFER_STATUS_USED;
}
/*
- printf("PPU: ALLOC BUFFER %u\n", buf);
+ printf("PPU: ALLOC BUFFER %u, %u tries\n", buf, tries);
*/
+ prev_buffer = buf;
+
+ /* release tex buffer associated w/ prev use of this batch buf */
+ cell_free_fenced_buffers(cell, &cell->fenced_buffers[buf]);
+
return buf;
}
}
@@ -82,6 +90,37 @@ cell_get_empty_buffer(struct cell_context *cell)
/**
+ * Append a fence command to the current batch buffer.
+ * Note that we're sure there's always room for this because of the
+ * adjusted size check in cell_batch_free_space().
+ */
+static void
+emit_fence(struct cell_context *cell)
+{
+ const uint batch = cell->cur_batch;
+ const uint size = cell->buffer_size[batch];
+ struct cell_command_fence *fence_cmd;
+ struct cell_fence *fence = &cell->fenced_buffers[batch].fence;
+ uint i;
+
+ /* set fence status to emitted, not yet signalled */
+ for (i = 0; i < cell->num_spus; i++) {
+ fence->status[i][0] = CELL_FENCE_EMITTED;
+ }
+
+ ASSERT(size + sizeof(struct cell_command_fence) <= CELL_BUFFER_SIZE);
+
+ fence_cmd = (struct cell_command_fence *) (cell->buffer[batch] + size);
+ fence_cmd->opcode = CELL_CMD_FENCE;
+ fence_cmd->fence = fence;
+
+ /* update batch buffer size */
+ cell->buffer_size[batch] = size + sizeof(struct cell_command_fence);
+ assert(sizeof(struct cell_command_fence) % 8 == 0);
+}
+
+
+/**
* Flush the current batch buffer to the SPUs.
* An empty buffer will be found and set as the new current batch buffer
* for subsequent commands/data.
@@ -91,7 +130,7 @@ cell_batch_flush(struct cell_context *cell)
{
static boolean flushing = FALSE;
uint batch = cell->cur_batch;
- const uint size = cell->buffer_size[batch];
+ uint size = cell->buffer_size[batch];
uint spu, cmd_word;
assert(!flushing);
@@ -99,6 +138,14 @@ cell_batch_flush(struct cell_context *cell)
if (size == 0)
return;
+ /* Before we use this batch buffer, make sure any fenced texture buffers
+ * are released.
+ */
+ if (cell->fenced_buffers[batch].head) {
+ emit_fence(cell);
+ size = cell->buffer_size[batch];
+ }
+
flushing = TRUE;
assert(batch < CELL_NUM_BUFFERS);
@@ -139,6 +186,7 @@ uint
cell_batch_free_space(const struct cell_context *cell)
{
uint free = CELL_BUFFER_SIZE - cell->buffer_size[cell->cur_batch];
+ free -= sizeof(struct cell_command_fence);
return free;
}
@@ -169,7 +217,7 @@ cell_batch_append(struct cell_context *cell, const void *data, uint bytes)
size = cell->buffer_size[cell->cur_batch];
- if (size + bytes > CELL_BUFFER_SIZE) {
+ if (bytes > cell_batch_free_space(cell)) {
cell_batch_flush(cell);
size = 0;
}
@@ -223,7 +271,7 @@ cell_batch_alloc_aligned(struct cell_context *cell, uint bytes,
padbytes = (alignment - (size % alignment)) % alignment;
- if (padbytes + size + bytes > CELL_BUFFER_SIZE) {
+ if (padbytes + bytes > cell_batch_free_space(cell)) {
cell_batch_flush(cell);
size = 0;
}
diff --git a/src/gallium/drivers/cell/ppu/cell_clear.c b/src/gallium/drivers/cell/ppu/cell_clear.c
index c9c0c721bbe..037635e4660 100644
--- a/src/gallium/drivers/cell/ppu/cell_clear.c
+++ b/src/gallium/drivers/cell/ppu/cell_clear.c
@@ -106,4 +106,17 @@ cell_clear_surface(struct pipe_context *pipe, struct pipe_surface *ps,
clr->surface = surfIndex;
clr->value = clearValue;
}
+
+ /* Technically, the surface's contents are now known and cleared,
+ * so we could set the status to PIPE_SURFACE_STATUS_CLEAR. But
+ * it turns out it's quite painful to recognize when any particular
+ * surface goes from PIPE_SURFACE_STATUS_CLEAR to
+ * PIPE_SURFACE_STATUS_DEFINED (i.e. with known contents), because
+ * the drawing commands could be operating on numerous draw buffers,
+ * which we'd have to iterate through to set all their stati...
+ * For now, we cheat a bit and set the surface's status to DEFINED
+ * right here. Later we should revisit this and set the status to
+ * CLEAR here, and find a better place to set the status to DEFINED.
+ */
+ ps->status = PIPE_SURFACE_STATUS_DEFINED;
}
diff --git a/src/gallium/drivers/cell/ppu/cell_context.c b/src/gallium/drivers/cell/ppu/cell_context.c
index 71f1a3049d1..22d552d8e3d 100644
--- a/src/gallium/drivers/cell/ppu/cell_context.c
+++ b/src/gallium/drivers/cell/ppu/cell_context.c
@@ -47,6 +47,7 @@
#include "cell_clear.h"
#include "cell_context.h"
#include "cell_draw_arrays.h"
+#include "cell_fence.h"
#include "cell_flush.h"
#include "cell_state.h"
#include "cell_surface.h"
@@ -62,6 +63,8 @@ cell_destroy_context( struct pipe_context *pipe )
{
struct cell_context *cell = cell_context(pipe);
+ util_delete_keymap(cell->fragment_ops_cache, NULL);
+
cell_spu_exit(cell);
align_free(cell);
@@ -85,13 +88,16 @@ cell_draw_create(struct cell_context *cell)
}
-#ifdef DEBUG
static const struct debug_named_value cell_debug_flags[] = {
{"checker", CELL_DEBUG_CHECKER},/**< modulate tile clear color by SPU ID */
+ {"asm", CELL_DEBUG_ASM}, /**< dump SPU asm code */
{"sync", CELL_DEBUG_SYNC}, /**< SPUs do synchronous DMA */
+ {"fragops", CELL_DEBUG_FRAGMENT_OPS}, /**< SPUs emit fragment ops debug messages*/
+ {"fragopfallback", CELL_DEBUG_FRAGMENT_OP_FALLBACK}, /**< SPUs use reference implementation for fragment ops*/
+ {"cmd", CELL_DEBUG_CMD}, /**< SPUs dump command buffer info */
+ {"cache", CELL_DEBUG_CACHE}, /**< report texture cache stats on exit */
{NULL, 0}
};
-#endif
struct pipe_context *
@@ -99,6 +105,7 @@ cell_create_context(struct pipe_screen *screen,
struct cell_winsys *cws)
{
struct cell_context *cell;
+ uint i;
/* some fields need to be 16-byte aligned, so align the whole object */
cell = (struct cell_context*) align_malloc(sizeof(struct cell_context), 16);
@@ -125,11 +132,14 @@ cell_create_context(struct pipe_screen *screen,
cell_init_state_functions(cell);
cell_init_shader_functions(cell);
cell_init_surface_functions(cell);
- cell_init_texture_functions(cell);
cell_init_vertex_functions(cell);
cell->draw = cell_draw_create(cell);
+ /* Create cache of fragment ops generated code */
+ cell->fragment_ops_cache =
+ util_new_keymap(sizeof(struct cell_fragment_ops_key), ~0, NULL);
+
cell_init_vbuf(cell);
draw_set_rasterize_stage(cell->draw, cell->vbuf);
@@ -143,17 +153,31 @@ cell_create_context(struct pipe_screen *screen,
cell_debug_flags,
0 );
+ for (i = 0; i < CELL_NUM_BUFFERS; i++)
+ cell_fence_init(&cell->fenced_buffers[i].fence);
+
+
/*
* SPU stuff
*/
- cell->num_spus = 6;
- /* XXX is this in SDK 3.0 only?
- cell->num_spus = spe_cpu_info_get(SPE_COUNT_PHYSICAL_SPES, -1);
- */
+ /* This call only works with SDK 3.0. Anyone still using 2.1??? */
+ cell->num_cells = spe_cpu_info_get(SPE_COUNT_PHYSICAL_CPU_NODES, -1);
+ cell->num_spus = spe_cpu_info_get(SPE_COUNT_USABLE_SPES, 0);
+ if (cell->debug_flags) {
+ printf("Cell: found %d Cell(s) with %u SPUs\n",
+ cell->num_cells, cell->num_spus);
+ }
+ if (getenv("CELL_NUM_SPUS")) {
+ cell->num_spus = atoi(getenv("CELL_NUM_SPUS"));
+ assert(cell->num_spus > 0);
+ }
cell_start_spus(cell);
cell_init_batch_buffers(cell);
+ /* make sure SPU initializations are done before proceeding */
+ cell_flush_int(cell, CELL_FLUSH_WAIT);
+
return &cell->pipe;
}
diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h
index 14914b9c6f8..eb1397bb3fa 100644
--- a/src/gallium/drivers/cell/ppu/cell_context.h
+++ b/src/gallium/drivers/cell/ppu/cell_context.h
@@ -38,6 +38,7 @@
#include "cell/common.h"
#include "rtasm/rtasm_ppc_spe.h"
#include "tgsi/tgsi_scan.h"
+#include "util/u_keymap.h"
struct cell_vbuf_render;
@@ -67,31 +68,29 @@ struct cell_fragment_shader_state
/**
- * Cell blend state atom, subclass of pipe_blend_state.
+ * Key for mapping per-fragment state to cached SPU machine code.
+ * keymap(cell_fragment_ops_key) => cell_command_fragment_ops
*/
-struct cell_blend_state
+struct cell_fragment_ops_key
{
- struct pipe_blend_state base;
-
- /**
- * Generated code to perform alpha blending
- */
- struct spe_function code;
+ struct pipe_blend_state blend;
+ struct pipe_blend_color blend_color;
+ struct pipe_depth_stencil_alpha_state dsa;
+ enum pipe_format color_format;
+ enum pipe_format zs_format;
};
+struct cell_buffer_node;
+
/**
- * Cell depth/stencil/alpha state atom, subclass of
- * pipe_depth_stencil_alpha_state.
+ * Fenced buffer list. List of buffers which can be unreferenced after
+ * the fence has been executed/signalled.
*/
-struct cell_depth_stencil_alpha_state
+struct cell_buffer_list
{
- struct pipe_depth_stencil_alpha_state base;
-
- /**
- * Generated code to perform alpha, stencil, and depth testing on the SPE
- */
- struct spe_function code;
+ struct cell_fence fence ALIGN16_ATTRIB;
+ struct cell_buffer_node *head;
};
@@ -104,10 +103,10 @@ struct cell_context
struct cell_winsys *winsys;
- const struct cell_blend_state *blend;
+ const struct pipe_blend_state *blend;
const struct pipe_sampler_state *sampler[PIPE_MAX_SAMPLERS];
uint num_samplers;
- const struct cell_depth_stencil_alpha_state *depth_stencil;
+ const struct pipe_depth_stencil_alpha_state *depth_stencil;
const struct pipe_rasterizer_state *rasterizer;
const struct cell_vertex_shader_state *vs;
const struct cell_fragment_shader_state *fs;
@@ -135,6 +134,11 @@ struct cell_context
uint *tex_map;
uint dirty;
+ uint dirty_textures; /* bitmask of texture units */
+ uint dirty_samplers; /* bitmask of sampler units */
+
+ /** Cache of code generated for per-fragment ops */
+ struct keymap *fragment_ops_cache;
/** The primitive drawing context */
struct draw_context *draw;
@@ -149,8 +153,9 @@ struct cell_context
/** Mapped constant buffers */
void *mapped_constants[PIPE_SHADER_TYPES];
+ struct cell_spu_function_info spu_functions ALIGN16_ATTRIB;
- uint num_spus;
+ uint num_cells, num_spus;
/** Buffers for command batches, vertex/index data */
uint buffer_size[CELL_NUM_BUFFERS];
@@ -162,6 +167,14 @@ struct cell_context
uint buffer_status[CELL_MAX_SPUS][CELL_NUM_BUFFERS][4] ALIGN16_ATTRIB;
+ /** Associated with each command/batch buffer is a list of pipe_buffers
+ * that are fenced. When the last command in a buffer is executed, the
+ * fence will be signalled, indicating that any pipe_buffers preceeding
+ * that fence can be unreferenced (and probably freed).
+ */
+ struct cell_buffer_list fenced_buffers[CELL_NUM_BUFFERS];
+
+
struct spe_function attrib_fetch;
unsigned attrib_fetch_offsets[PIPE_MAX_ATTRIBS];
diff --git a/src/gallium/drivers/cell/ppu/cell_fence.c b/src/gallium/drivers/cell/ppu/cell_fence.c
new file mode 100644
index 00000000000..867b5dcaa09
--- /dev/null
+++ b/src/gallium/drivers/cell/ppu/cell_fence.c
@@ -0,0 +1,168 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <unistd.h>
+#include "util/u_memory.h"
+#include "pipe/p_inlines.h"
+#include "cell_context.h"
+#include "cell_batch.h"
+#include "cell_fence.h"
+#include "cell_texture.h"
+
+
+void
+cell_fence_init(struct cell_fence *fence)
+{
+ uint i;
+ ASSERT_ALIGN16(fence->status);
+ for (i = 0; i < CELL_MAX_SPUS; i++) {
+ fence->status[i][0] = CELL_FENCE_IDLE;
+ }
+}
+
+
+boolean
+cell_fence_signalled(const struct cell_context *cell,
+ const struct cell_fence *fence)
+{
+ uint i;
+ for (i = 0; i < cell->num_spus; i++) {
+ if (fence->status[i][0] != CELL_FENCE_SIGNALLED)
+ return FALSE;
+ /*assert(fence->status[i][0] == CELL_FENCE_EMITTED);*/
+ }
+ return TRUE;
+}
+
+
+void
+cell_fence_finish(const struct cell_context *cell,
+ const struct cell_fence *fence)
+{
+ while (!cell_fence_signalled(cell, fence)) {
+ usleep(10);
+ }
+
+#ifdef DEBUG
+ {
+ uint i;
+ for (i = 0; i < cell->num_spus; i++) {
+ assert(fence->status[i][0] == CELL_FENCE_SIGNALLED);
+ }
+ }
+#endif
+}
+
+
+
+
+struct cell_buffer_node
+{
+ struct pipe_buffer *buffer;
+ struct cell_buffer_node *next;
+};
+
+
+static void
+cell_add_buffer_to_list(struct cell_context *cell,
+ struct cell_buffer_list *list,
+ struct pipe_buffer *buffer)
+{
+ struct pipe_screen *ps = cell->pipe.screen;
+ struct cell_buffer_node *node = CALLOC_STRUCT(cell_buffer_node);
+ /* create new list node which references the buffer, insert at head */
+ if (node) {
+ pipe_buffer_reference(ps, &node->buffer, buffer);
+ node->next = list->head;
+ list->head = node;
+ }
+}
+
+
+/**
+ * Wait for completion of the given fence, then unreference any buffers
+ * on the list.
+ * This typically unrefs/frees texture buffers after any rendering which uses
+ * them has completed.
+ */
+void
+cell_free_fenced_buffers(struct cell_context *cell,
+ struct cell_buffer_list *list)
+{
+ if (list->head) {
+ struct pipe_screen *ps = cell->pipe.screen;
+ struct cell_buffer_node *node;
+
+ cell_fence_finish(cell, &list->fence);
+
+ /* traverse the list, unreferencing buffers, freeing nodes */
+ node = list->head;
+ while (node) {
+ struct cell_buffer_node *next = node->next;
+ assert(node->buffer);
+ pipe_buffer_unmap(ps, node->buffer);
+#if 0
+ printf("Unref buffer %p\n", node->buffer);
+ if (node->buffer->refcount == 1)
+ printf(" Delete!\n");
+#endif
+ pipe_buffer_reference(ps, &node->buffer, NULL);
+ FREE(node);
+ node = next;
+ }
+ list->head = NULL;
+ }
+}
+
+
+/**
+ * This should be called for each render command.
+ * Any texture buffers that are current bound will be added to a fenced
+ * list to be freed later when the fence is executed/signalled.
+ */
+void
+cell_add_fenced_textures(struct cell_context *cell)
+{
+ struct cell_buffer_list *list = &cell->fenced_buffers[cell->cur_batch];
+ uint i;
+
+ for (i = 0; i < cell->num_textures; i++) {
+ struct cell_texture *ct = cell->texture[i];
+ if (ct) {
+ uint level;
+ for (level = 0; level < CELL_MAX_TEXTURE_LEVELS; level++) {
+ if (ct->tiled_buffer[level]) {
+#if 0
+ printf("Adding texture %p buffer %p to list\n",
+ ct, ct->tiled_buffer[level]);
+#endif
+ cell_add_buffer_to_list(cell, list, ct->tiled_buffer[level]);
+ }
+ }
+ }
+ }
+}
diff --git a/src/gallium/drivers/cell/ppu/cell_fence.h b/src/gallium/drivers/cell/ppu/cell_fence.h
new file mode 100644
index 00000000000..536b4ba411a
--- /dev/null
+++ b/src/gallium/drivers/cell/ppu/cell_fence.h
@@ -0,0 +1,57 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#ifndef CELL_FENCE_H
+#define CELL_FENCE_H
+
+
+extern void
+cell_fence_init(struct cell_fence *fence);
+
+
+extern boolean
+cell_fence_signalled(const struct cell_context *cell,
+ const struct cell_fence *fence);
+
+
+extern void
+cell_fence_finish(const struct cell_context *cell,
+ const struct cell_fence *fence);
+
+
+
+extern void
+cell_free_fenced_buffers(struct cell_context *cell,
+ struct cell_buffer_list *list);
+
+
+extern void
+cell_add_fenced_textures(struct cell_context *cell);
+
+
+#endif /* CELL_FENCE_H */
diff --git a/src/gallium/drivers/cell/ppu/cell_flush.c b/src/gallium/drivers/cell/ppu/cell_flush.c
index 6596b720101..a64967b4b9e 100644
--- a/src/gallium/drivers/cell/ppu/cell_flush.c
+++ b/src/gallium/drivers/cell/ppu/cell_flush.c
@@ -49,7 +49,7 @@ cell_flush(struct pipe_context *pipe, unsigned flags,
flags |= CELL_FLUSH_WAIT;
}
- if (flags & PIPE_FLUSH_SWAPBUFFERS)
+ if (flags & (PIPE_FLUSH_SWAPBUFFERS | PIPE_FLUSH_RENDER_CACHE))
flags |= CELL_FLUSH_WAIT;
draw_flush( cell->draw );
diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c
index 6ffe94eb14a..96a1743fc10 100644
--- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c
+++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c
@@ -37,7 +37,7 @@
* \author Brian Paul
*/
-
+#include <math.h>
#include "pipe/p_defines.h"
#include "pipe/p_state.h"
#include "pipe/p_shader_tokens.h"
@@ -51,25 +51,43 @@
#include "cell_gen_fp.h"
-/** Set to 1 to enable debug/disassembly printfs */
-#define DISASSEM 01
+#define MAX_TEMPS 16
+#define MAX_IMMED 8
+#define CHAN_X 0
+#define CHAN_Y 1
+#define CHAN_Z 2
+#define CHAN_W 3
/**
* Context needed during code generation.
*/
struct codegen
{
+ struct cell_context *cell;
int inputs_reg; /**< 1st function parameter */
int outputs_reg; /**< 2nd function parameter */
int constants_reg; /**< 3rd function parameter */
- int temp_regs[8][4]; /**< maps TGSI temps to SPE registers */
+ int temp_regs[MAX_TEMPS][4]; /**< maps TGSI temps to SPE registers */
+ int imm_regs[MAX_IMMED][4]; /**< maps TGSI immediates to SPE registers */
+
+ int num_imm; /**< number of immediates */
int one_reg; /**< register containing {1.0, 1.0, 1.0, 1.0} */
/** Per-instruction temps / intermediate temps */
int num_itemps;
- int itemps[3];
+ int itemps[12];
+
+ /** Current IF/ELSE/ENDIF nesting level */
+ int if_nesting;
+ /** Index of execution mask register */
+ int exec_mask_reg;
+
+ /** KIL mask: indicates which fragments have been killed */
+ int kill_mask_reg;
+
+ int frame_size; /**< Stack frame size, in words */
struct spe_function *f;
boolean error;
@@ -112,19 +130,78 @@ get_const_one_reg(struct codegen *gen)
{
if (gen->one_reg <= 0) {
gen->one_reg = spe_allocate_available_register(gen->f);
- }
- /* one = {1.0, 1.0, 1.0, 1.0} */
- spe_load_float(gen->f, gen->one_reg, 1.0f);
-#if DISASSEM
- printf("il\tr%d, 1.0f\n", gen->one_reg);
-#endif
+ spe_indent(gen->f, 4);
+ spe_comment(gen->f, -4, "INIT CONSTANT 1.0:");
+
+ /* one = {1.0, 1.0, 1.0, 1.0} */
+ spe_load_float(gen->f, gen->one_reg, 1.0f);
+
+ spe_indent(gen->f, -4);
+ }
return gen->one_reg;
}
/**
+ * Return index of the pixel execution mask.
+ * The register is allocated an initialized upon the first call.
+ *
+ * The pixel execution mask controls which pixels in a quad are
+ * modified, according to surrounding conditionals, loops, etc.
+ */
+static int
+get_exec_mask_reg(struct codegen *gen)
+{
+ if (gen->exec_mask_reg <= 0) {
+ gen->exec_mask_reg = spe_allocate_available_register(gen->f);
+
+ spe_indent(gen->f, 4);
+ spe_comment(gen->f, -4, "INIT EXEC MASK = ~0:");
+
+ /* exec_mask = {~0, ~0, ~0, ~0} */
+ spe_load_int(gen->f, gen->exec_mask_reg, ~0);
+
+ spe_indent(gen->f, -4);
+ }
+
+ return gen->exec_mask_reg;
+}
+
+
+static boolean
+is_register_src(struct codegen *gen, int channel,
+ const struct tgsi_full_src_register *src)
+{
+ int swizzle = tgsi_util_get_full_src_register_extswizzle(src, channel);
+ int sign_op = tgsi_util_get_full_src_register_sign_mode(src, channel);
+
+ if (swizzle > TGSI_SWIZZLE_W || sign_op != TGSI_UTIL_SIGN_KEEP) {
+ return FALSE;
+ }
+ if (src->SrcRegister.File == TGSI_FILE_TEMPORARY ||
+ src->SrcRegister.File == TGSI_FILE_IMMEDIATE) {
+ return TRUE;
+ }
+ return FALSE;
+}
+
+
+static boolean
+is_memory_dst(struct codegen *gen, int channel,
+ const struct tgsi_full_dst_register *dst)
+{
+ if (dst->DstRegister.File == TGSI_FILE_OUTPUT) {
+ return TRUE;
+ }
+ else {
+ return FALSE;
+ }
+}
+
+
+/**
* Return the index of the SPU temporary containing the named TGSI
* source register. If the TGSI register is a TGSI_FILE_TEMPORARY we
* just return the corresponding SPE register. If the TGIS register
@@ -136,35 +213,93 @@ get_src_reg(struct codegen *gen,
int channel,
const struct tgsi_full_src_register *src)
{
- int reg;
+ int reg = -1;
+ int swizzle = tgsi_util_get_full_src_register_extswizzle(src, channel);
+ boolean reg_is_itemp = FALSE;
+ uint sign_op;
+
+ assert(swizzle >= TGSI_SWIZZLE_X);
+ assert(swizzle <= TGSI_EXTSWIZZLE_ONE);
+
+ if (swizzle == TGSI_EXTSWIZZLE_ONE) {
+ /* Load const one float and early out */
+ reg = get_const_one_reg(gen);
+ }
+ else if (swizzle == TGSI_EXTSWIZZLE_ZERO) {
+ /* Load const zero float and early out */
+ reg = get_itemp(gen);
+ spe_xor(gen->f, reg, reg, reg);
+ }
+ else {
+ assert(swizzle < 4);
- /* XXX need to examine src swizzle info here.
- * That will involve changing the channel var...
+ switch (src->SrcRegister.File) {
+ case TGSI_FILE_TEMPORARY:
+ reg = gen->temp_regs[src->SrcRegister.Index][swizzle];
+ break;
+ case TGSI_FILE_INPUT:
+ {
+ /* offset is measured in quadwords, not bytes */
+ int offset = src->SrcRegister.Index * 4 + swizzle;
+ reg = get_itemp(gen);
+ reg_is_itemp = TRUE;
+ /* Load: reg = memory[(machine_reg) + offset] */
+ spe_lqd(gen->f, reg, gen->inputs_reg, offset * 16);
+ }
+ break;
+ case TGSI_FILE_IMMEDIATE:
+ reg = gen->imm_regs[src->SrcRegister.Index][swizzle];
+ break;
+ case TGSI_FILE_CONSTANT:
+ {
+ /* offset is measured in quadwords, not bytes */
+ int offset = src->SrcRegister.Index * 4 + swizzle;
+ reg = get_itemp(gen);
+ reg_is_itemp = TRUE;
+ /* Load: reg = memory[(machine_reg) + offset] */
+ spe_lqd(gen->f, reg, gen->constants_reg, offset * 16);
+ }
+ break;
+ default:
+ assert(0);
+ }
+ }
+
+ /*
+ * Handle absolute value, negate or set-negative of src register.
*/
+ sign_op = tgsi_util_get_full_src_register_sign_mode(src, channel);
+ if (sign_op != TGSI_UTIL_SIGN_KEEP) {
+ /*
+ * All sign ops are done by manipulating bit 31, the IEEE float sign bit.
+ */
+ const int bit31mask_reg = get_itemp(gen);
+ int result_reg;
+
+ if (reg_is_itemp) {
+ /* re-use 'reg' for the result */
+ result_reg = reg;
+ }
+ else {
+ /* alloc a new reg for the result */
+ result_reg = get_itemp(gen);
+ }
+ /* mask with bit 31 set, the rest cleared */
+ spe_load_uint(gen->f, bit31mask_reg, (1 << 31));
- switch (src->SrcRegister.File) {
- case TGSI_FILE_TEMPORARY:
- reg = gen->temp_regs[src->SrcRegister.Index][channel];
- break;
- case TGSI_FILE_INPUT:
- {
- /* offset is measured in quadwords, not bytes */
- int offset = src->SrcRegister.Index * 4 + channel;
- reg = get_itemp(gen);
- /* Load: reg = memory[(machine_reg) + offset] */
- spe_lqd(gen->f, reg, gen->inputs_reg, offset);
-#if DISASSEM
- printf("lqd\tr%d, r%d + %d\n", reg, gen->inputs_reg, offset);
-#endif
+ if (sign_op == TGSI_UTIL_SIGN_CLEAR) {
+ spe_andc(gen->f, result_reg, reg, bit31mask_reg);
}
- break;
- case TGSI_FILE_IMMEDIATE:
- /* xxx fall-through for now / fix */
- case TGSI_FILE_CONSTANT:
- /* xxx fall-through for now / fix */
- default:
- assert(0);
+ else if (sign_op == TGSI_UTIL_SIGN_SET) {
+ spe_and(gen->f, result_reg, reg, bit31mask_reg);
+ }
+ else {
+ assert(sign_op == TGSI_UTIL_SIGN_TOGGLE);
+ spe_xor(gen->f, result_reg, reg, bit31mask_reg);
+ }
+
+ reg = result_reg;
}
return reg;
@@ -183,11 +318,14 @@ get_dst_reg(struct codegen *gen,
int channel,
const struct tgsi_full_dst_register *dest)
{
- int reg;
+ int reg = -1;
switch (dest->DstRegister.File) {
case TGSI_FILE_TEMPORARY:
- reg = gen->temp_regs[dest->DstRegister.Index][channel];
+ if (gen->if_nesting > 0)
+ reg = get_itemp(gen);
+ else
+ reg = gen->temp_regs[dest->DstRegister.Index][channel];
break;
case TGSI_FILE_OUTPUT:
reg = get_itemp(gen);
@@ -211,19 +349,59 @@ store_dest_reg(struct codegen *gen,
int value_reg, int channel,
const struct tgsi_full_dst_register *dest)
{
+ /*
+ * XXX need to implement dst reg clamping/saturation
+ */
+#if 0
+ switch (inst->Instruction.Saturate) {
+ case TGSI_SAT_NONE:
+ break;
+ case TGSI_SAT_ZERO_ONE:
+ break;
+ case TGSI_SAT_MINUS_PLUS_ONE:
+ break;
+ default:
+ assert( 0 );
+ }
+#endif
+
switch (dest->DstRegister.File) {
case TGSI_FILE_TEMPORARY:
- /* no-op */
+ if (gen->if_nesting > 0) {
+ int d_reg = gen->temp_regs[dest->DstRegister.Index][channel];
+ int exec_reg = get_exec_mask_reg(gen);
+ /* Mix d with new value according to exec mask:
+ * d[i] = mask_reg[i] ? value_reg : d_reg
+ */
+ spe_selb(gen->f, d_reg, d_reg, value_reg, exec_reg);
+ }
+ else {
+ /* we're not inside a condition or loop: do nothing special */
+
+ }
break;
case TGSI_FILE_OUTPUT:
{
/* offset is measured in quadwords, not bytes */
int offset = dest->DstRegister.Index * 4 + channel;
- /* Store: memory[(machine_reg) + offset] = reg */
- spe_stqd(gen->f, value_reg, gen->outputs_reg, offset);
-#if DISASSEM
- printf("stqd\tr%d, r%d + %d\n", value_reg, gen->outputs_reg, offset);
-#endif
+ if (gen->if_nesting > 0) {
+ int exec_reg = get_exec_mask_reg(gen);
+ int curval_reg = get_itemp(gen);
+ /* First read the current value from memory:
+ * Load: curval = memory[(machine_reg) + offset]
+ */
+ spe_lqd(gen->f, curval_reg, gen->outputs_reg, offset * 16);
+ /* Mix curval with newvalue according to exec mask:
+ * d[i] = mask_reg[i] ? value_reg : d_reg
+ */
+ spe_selb(gen->f, curval_reg, curval_reg, value_reg, exec_reg);
+ /* Store: memory[(machine_reg) + offset] = curval */
+ spe_stqd(gen->f, curval_reg, gen->outputs_reg, offset * 16);
+ }
+ else {
+ /* Store: memory[(machine_reg) + offset] = reg */
+ spe_stqd(gen->f, value_reg, gen->outputs_reg, offset * 16);
+ }
}
break;
default:
@@ -232,27 +410,114 @@ store_dest_reg(struct codegen *gen,
}
+
+static void
+emit_prologue(struct codegen *gen)
+{
+ gen->frame_size = 1024; /* XXX temporary, should be dynamic */
+
+ spe_comment(gen->f, -4, "Function prologue:");
+
+ /* save $lr on stack # stqd $lr,16($sp) */
+ spe_stqd(gen->f, SPE_REG_RA, SPE_REG_SP, 16);
+
+ if (gen->frame_size >= 512) {
+ /* offset is too large for ai instruction */
+ int offset_reg = spe_allocate_available_register(gen->f);
+ int sp_reg = spe_allocate_available_register(gen->f);
+ /* offset = -framesize */
+ spe_load_int(gen->f, offset_reg, -gen->frame_size);
+ /* sp = $sp */
+ spe_move(gen->f, sp_reg, SPE_REG_SP);
+ /* $sp = $sp + offset_reg */
+ spe_a(gen->f, SPE_REG_SP, SPE_REG_SP, offset_reg);
+ /* save $sp in stack frame */
+ spe_stqd(gen->f, sp_reg, SPE_REG_SP, 0);
+ /* clean up */
+ spe_release_register(gen->f, offset_reg);
+ spe_release_register(gen->f, sp_reg);
+ }
+ else {
+ /* save stack pointer # stqd $sp,-frameSize($sp) */
+ spe_stqd(gen->f, SPE_REG_SP, SPE_REG_SP, -gen->frame_size);
+
+ /* adjust stack pointer # ai $sp,$sp,-frameSize */
+ spe_ai(gen->f, SPE_REG_SP, SPE_REG_SP, -gen->frame_size);
+ }
+}
+
+
+static void
+emit_epilogue(struct codegen *gen)
+{
+ const int return_reg = 3;
+
+ spe_comment(gen->f, -4, "Function epilogue:");
+
+ spe_comment(gen->f, 0, "return the killed mask");
+ if (gen->kill_mask_reg > 0) {
+ /* shader called KIL, return the "alive" mask */
+ spe_move(gen->f, return_reg, gen->kill_mask_reg);
+ }
+ else {
+ /* return {0,0,0,0} */
+ spe_load_uint(gen->f, return_reg, 0);
+ }
+
+ spe_comment(gen->f, 0, "restore stack and return");
+ if (gen->frame_size >= 512) {
+ /* offset is too large for ai instruction */
+ int offset_reg = spe_allocate_available_register(gen->f);
+ /* offset = framesize */
+ spe_load_int(gen->f, offset_reg, gen->frame_size);
+ /* $sp = $sp + offset */
+ spe_a(gen->f, SPE_REG_SP, SPE_REG_SP, offset_reg);
+ /* clean up */
+ spe_release_register(gen->f, offset_reg);
+ }
+ else {
+ /* restore stack pointer # ai $sp,$sp,frameSize */
+ spe_ai(gen->f, SPE_REG_SP, SPE_REG_SP, gen->frame_size);
+ }
+
+ /* restore $lr # lqd $lr,16($sp) */
+ spe_lqd(gen->f, SPE_REG_RA, SPE_REG_SP, 16);
+
+ /* return from function call */
+ spe_bi(gen->f, SPE_REG_RA, 0, 0);
+}
+
+
static boolean
emit_MOV(struct codegen *gen, const struct tgsi_full_instruction *inst)
{
- int ch;
+ int ch, src_reg[4], dst_reg[4];
+
+ spe_comment(gen->f, -4, "MOV:");
for (ch = 0; ch < 4; ch++) {
if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
- int src_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
- int dst_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
- /* XXX we don't always need to actually emit a mov instruction here */
- spe_move(gen->f, dst_reg, src_reg);
-#if DISASSEM
- printf("mov\tr%d, r%d\n", dst_reg, src_reg);
-#endif
- store_dest_reg(gen, dst_reg, ch, &inst->FullDstRegisters[0]);
+ src_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+ dst_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ }
+ }
+
+ for (ch = 0; ch < 4; ch++) {
+ if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
+ if (is_register_src(gen, ch, &inst->FullSrcRegisters[0]) &&
+ is_memory_dst(gen, ch, &inst->FullDstRegisters[0])) {
+ /* special-case: register to memory store */
+ store_dest_reg(gen, src_reg[ch], ch, &inst->FullDstRegisters[0]);
+ }
+ else {
+ spe_move(gen->f, dst_reg[ch], src_reg[ch]);
+ store_dest_reg(gen, dst_reg[ch], ch, &inst->FullDstRegisters[0]);
+ }
free_itemps(gen);
}
}
return true;
}
-
/**
* Emit addition instructions. Recall that a single TGSI_OPCODE_ADD
* becomes (up to) four SPU "fa" instructions because we're doing SOA
@@ -261,24 +526,25 @@ emit_MOV(struct codegen *gen, const struct tgsi_full_instruction *inst)
static boolean
emit_ADD(struct codegen *gen, const struct tgsi_full_instruction *inst)
{
- int ch;
- /* Loop over Red/Green/Blue/Alpha channels */
+ int ch, s1_reg[4], s2_reg[4], d_reg[4];
+
+ spe_comment(gen->f, -4, "ADD:");
+ /* Loop over Red/Green/Blue/Alpha channels, fetch src operands */
for (ch = 0; ch < 4; ch++) {
/* If the dest R, G, B or A writemask is enabled... */
if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
- /* get indexes of the two src, one dest SPE registers */
- int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
- int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
- int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
-
+ s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+ s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
+ d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ }
+ }
+ /* Loop over Red/Green/Blue/Alpha channels, do the add, store results */
+ for (ch = 0; ch < 4; ch++) {
+ if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
/* Emit actual SPE instruction: d = s1 + s2 */
- spe_fa(gen->f, d_reg, s1_reg, s2_reg);
-#if DISASSEM
- printf("fa\tr%d, r%d, r%d\n", d_reg, s1_reg, s2_reg);
-#endif
-
+ spe_fa(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]);
/* Store the result (a no-op for TGSI_FILE_TEMPORARY dests) */
- store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
+ store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
/* Free any intermediate temps we allocated */
free_itemps(gen);
}
@@ -286,6 +552,99 @@ emit_ADD(struct codegen *gen, const struct tgsi_full_instruction *inst)
return true;
}
+/**
+ * Emit subtract. See emit_ADD for comments.
+ */
+static boolean
+emit_SUB(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ int ch, s1_reg[4], s2_reg[4], d_reg[4];
+ spe_comment(gen->f, -4, "SUB:");
+ for (ch = 0; ch < 4; ch++) {
+ if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
+ s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+ s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
+ d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ }
+ }
+ for (ch = 0; ch < 4; ch++) {
+ if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
+ /* d = s1 - s2 */
+ spe_fs(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]);
+ store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
+ free_itemps(gen);
+ }
+ }
+ return true;
+}
+
+/**
+ * Emit multiply add. See emit_ADD for comments.
+ */
+static boolean
+emit_MAD(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ int ch, s1_reg[4], s2_reg[4], s3_reg[4], d_reg[4];
+ spe_comment(gen->f, -4, "MAD:");
+ for (ch = 0; ch < 4; ch++) {
+ if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
+ s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+ s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
+ s3_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[2]);
+ d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ }
+ }
+ for (ch = 0; ch < 4; ch++) {
+ if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
+ /* d = s1 * s2 + s3 */
+ spe_fma(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch], s3_reg[ch]);
+ store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
+ free_itemps(gen);
+ }
+ }
+ return true;
+}
+
+
+/**
+ * Emit linear interpolate. See emit_ADD for comments.
+ */
+static boolean
+emit_LERP(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ int ch, s1_reg[4], s2_reg[4], s3_reg[4], d_reg[4], tmp_reg[4];
+ spe_comment(gen->f, -4, "LERP:");
+ /* setup/get src/dst/temp regs */
+ for (ch = 0; ch < 4; ch++) {
+ if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
+ s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+ s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
+ s3_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[2]);
+ d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ tmp_reg[ch] = get_itemp(gen);
+ }
+ }
+
+ /* d = s3 + s1(s2 - s3) */
+ /* do all subtracts, then all fma, then all stores to better pipeline */
+ for (ch = 0; ch < 4; ch++) {
+ if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
+ spe_fs(gen->f, tmp_reg[ch], s2_reg[ch], s3_reg[ch]);
+ }
+ }
+ for (ch = 0; ch < 4; ch++) {
+ if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
+ spe_fma(gen->f, d_reg[ch], tmp_reg[ch], s1_reg[ch], s3_reg[ch]);
+ }
+ }
+ for (ch = 0; ch < 4; ch++) {
+ if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
+ store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
+ }
+ }
+ free_itemps(gen);
+ return true;
+}
/**
* Emit multiply. See emit_ADD for comments.
@@ -293,17 +652,63 @@ emit_ADD(struct codegen *gen, const struct tgsi_full_instruction *inst)
static boolean
emit_MUL(struct codegen *gen, const struct tgsi_full_instruction *inst)
{
+ int ch, s1_reg[4], s2_reg[4], d_reg[4];
+ spe_comment(gen->f, -4, "MUL:");
+ for (ch = 0; ch < 4; ch++) {
+ if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
+ s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+ s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
+ d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ }
+ }
+ for (ch = 0; ch < 4; ch++) {
+ if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
+ /* d = s1 * s2 */
+ spe_fm(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]);
+ store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
+ free_itemps(gen);
+ }
+ }
+ return true;
+}
+
+/**
+ * Emit reciprocal. See emit_ADD for comments.
+ */
+static boolean
+emit_RCP(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
int ch;
+ spe_comment(gen->f, -4, "RCP:");
for (ch = 0; ch < 4; ch++) {
if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
- int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
- /* d = s1 * s2 */
- spe_fm(gen->f, d_reg, s1_reg, s2_reg);
-#if DISASSEM
- printf("fm\tr%d, r%d, r%d\n", d_reg, s1_reg, s2_reg);
-#endif
+ /* d = 1/s1 */
+ spe_frest(gen->f, d_reg, s1_reg);
+ spe_fi(gen->f, d_reg, s1_reg, d_reg);
+ store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
+ free_itemps(gen);
+ }
+ }
+ return true;
+}
+
+/**
+ * Emit reciprocal sqrt. See emit_ADD for comments.
+ */
+static boolean
+emit_RSQ(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ int ch;
+ spe_comment(gen->f, -4, "RSQ:");
+ for (ch = 0; ch < 4; ch++) {
+ if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
+ int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+ int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ /* d = 1/s1 */
+ spe_frsqest(gen->f, d_reg, s1_reg);
+ spe_fi(gen->f, d_reg, s1_reg, d_reg);
store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
free_itemps(gen);
}
@@ -311,6 +716,270 @@ emit_MUL(struct codegen *gen, const struct tgsi_full_instruction *inst)
return true;
}
+/**
+ * Emit absolute value. See emit_ADD for comments.
+ */
+static boolean
+emit_ABS(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ int ch;
+ spe_comment(gen->f, -4, "ABS:");
+ for (ch = 0; ch < 4; ch++) {
+ if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
+ int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+ int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ const int bit31mask_reg = get_itemp(gen);
+
+ /* mask with bit 31 set, the rest cleared */
+ spe_load_uint(gen->f, bit31mask_reg, (1 << 31));
+
+ /* d = sign bit cleared in s1 */
+ spe_andc(gen->f, d_reg, s1_reg, bit31mask_reg);
+
+ store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
+ free_itemps(gen);
+ }
+ }
+ return true;
+}
+
+/**
+ * Emit 3 component dot product. See emit_ADD for comments.
+ */
+static boolean
+emit_DP3(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ int ch;
+ int s1x_reg, s1y_reg, s1z_reg;
+ int s2x_reg, s2y_reg, s2z_reg;
+ int t0_reg = get_itemp(gen), t1_reg = get_itemp(gen);
+
+ spe_comment(gen->f, -4, "DP3:");
+
+ s1x_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]);
+ s2x_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]);
+ s1y_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]);
+ s2y_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]);
+ s1z_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]);
+ s2z_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]);
+
+ /* t0 = x0 * x1 */
+ spe_fm(gen->f, t0_reg, s1x_reg, s2x_reg);
+
+ /* t1 = y0 * y1 */
+ spe_fm(gen->f, t1_reg, s1y_reg, s2y_reg);
+
+ /* t0 = z0 * z1 + t0 */
+ spe_fma(gen->f, t0_reg, s1z_reg, s2z_reg, t0_reg);
+
+ /* t0 = t0 + t1 */
+ spe_fa(gen->f, t0_reg, t0_reg, t1_reg);
+
+ for (ch = 0; ch < 4; ch++) {
+ if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
+ int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ spe_move(gen->f, d_reg, t0_reg);
+ store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
+ }
+ }
+
+ free_itemps(gen);
+ return true;
+}
+
+/**
+ * Emit 4 component dot product. See emit_ADD for comments.
+ */
+static boolean
+emit_DP4(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ int ch;
+ int s0x_reg, s0y_reg, s0z_reg, s0w_reg;
+ int s1x_reg, s1y_reg, s1z_reg, s1w_reg;
+ int t0_reg = get_itemp(gen), t1_reg = get_itemp(gen);
+
+ spe_comment(gen->f, -4, "DP4:");
+
+ s0x_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]);
+ s1x_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]);
+ s0y_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]);
+ s1y_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]);
+ s0z_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]);
+ s1z_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]);
+ s0w_reg = get_src_reg(gen, CHAN_W, &inst->FullSrcRegisters[0]);
+ s1w_reg = get_src_reg(gen, CHAN_W, &inst->FullSrcRegisters[1]);
+
+ /* t0 = x0 * x1 */
+ spe_fm(gen->f, t0_reg, s0x_reg, s1x_reg);
+
+ /* t1 = y0 * y1 */
+ spe_fm(gen->f, t1_reg, s0y_reg, s1y_reg);
+
+ /* t0 = z0 * z1 + t0 */
+ spe_fma(gen->f, t0_reg, s0z_reg, s1z_reg, t0_reg);
+
+ /* t1 = w0 * w1 + t1 */
+ spe_fma(gen->f, t1_reg, s0w_reg, s1w_reg, t1_reg);
+
+ /* t0 = t0 + t1 */
+ spe_fa(gen->f, t0_reg, t0_reg, t1_reg);
+
+ for (ch = 0; ch < 4; ch++) {
+ if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
+ int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ spe_move(gen->f, d_reg, t0_reg);
+ store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
+ }
+ }
+
+ free_itemps(gen);
+ return true;
+}
+
+/**
+ * Emit homogeneous dot product. See emit_ADD for comments.
+ */
+static boolean
+emit_DPH(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ /* XXX rewrite this function to look more like DP3/DP4 */
+ int ch;
+ spe_comment(gen->f, -4, "DPH:");
+
+ int s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]);
+ int s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]);
+ int tmp_reg = get_itemp(gen);
+
+ /* t = x0 * x1 */
+ spe_fm(gen->f, tmp_reg, s1_reg, s2_reg);
+
+ s1_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]);
+ s2_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]);
+ /* t = y0 * y1 + t */
+ spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
+
+ s1_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]);
+ s2_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]);
+ /* t = z0 * z1 + t */
+ spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
+
+ s2_reg = get_src_reg(gen, CHAN_W, &inst->FullSrcRegisters[1]);
+ /* t = w1 + t */
+ spe_fa(gen->f, tmp_reg, s2_reg, tmp_reg);
+
+ for (ch = 0; ch < 4; ch++) {
+ if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
+ int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ spe_move(gen->f, d_reg, tmp_reg);
+ store_dest_reg(gen, tmp_reg, ch, &inst->FullDstRegisters[0]);
+ }
+ }
+
+ free_itemps(gen);
+ return true;
+}
+
+/**
+ * Emit 3-component vector normalize.
+ */
+static boolean
+emit_NRM3(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ int ch;
+ int src_reg[3];
+ int t0_reg = get_itemp(gen), t1_reg = get_itemp(gen);
+
+ spe_comment(gen->f, -4, "NRM3:");
+
+ src_reg[0] = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]);
+ src_reg[1] = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]);
+ src_reg[2] = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]);
+
+ /* t0 = x * x */
+ spe_fm(gen->f, t0_reg, src_reg[0], src_reg[0]);
+
+ /* t1 = y * y */
+ spe_fm(gen->f, t1_reg, src_reg[1], src_reg[1]);
+
+ /* t0 = z * z + t0 */
+ spe_fma(gen->f, t0_reg, src_reg[2], src_reg[2], t0_reg);
+
+ /* t0 = t0 + t1 */
+ spe_fa(gen->f, t0_reg, t0_reg, t1_reg);
+
+ /* t1 = 1.0 / sqrt(t0) */
+ spe_frsqest(gen->f, t1_reg, t0_reg);
+ spe_fi(gen->f, t1_reg, t0_reg, t1_reg);
+
+ for (ch = 0; ch < 3; ch++) { /* NOTE: omit W channel */
+ if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
+ int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ /* dst = src[ch] * t1 */
+ spe_fm(gen->f, d_reg, src_reg[ch], t1_reg);
+ store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
+ }
+ }
+
+ free_itemps(gen);
+ return true;
+}
+
+
+/**
+ * Emit cross product. See emit_ADD for comments.
+ */
+static boolean
+emit_XPD(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ spe_comment(gen->f, -4, "XPD:");
+
+ int s1_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]);
+ int s2_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]);
+ int tmp_reg = get_itemp(gen);
+
+ /* t = z0 * y1 */
+ spe_fm(gen->f, tmp_reg, s1_reg, s2_reg);
+
+ s1_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]);
+ s2_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]);
+ /* t = y0 * z1 - t */
+ spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
+
+ if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << CHAN_X)) {
+ store_dest_reg(gen, tmp_reg, CHAN_X, &inst->FullDstRegisters[0]);
+ }
+
+ s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]);
+ s2_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]);
+ /* t = x0 * z1 */
+ spe_fm(gen->f, tmp_reg, s1_reg, s2_reg);
+
+ s1_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]);
+ s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]);
+ /* t = z0 * x1 - t */
+ spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
+
+ if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << CHAN_Y)) {
+ store_dest_reg(gen, tmp_reg, CHAN_Y, &inst->FullDstRegisters[0]);
+ }
+
+ s1_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]);
+ s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]);
+ /* t = y0 * x1 */
+ spe_fm(gen->f, tmp_reg, s1_reg, s2_reg);
+
+ s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]);
+ s2_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]);
+ /* t = x0 * y1 - t */
+ spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
+
+ if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << CHAN_Z)) {
+ store_dest_reg(gen, tmp_reg, CHAN_Z, &inst->FullDstRegisters[0]);
+ }
+
+ free_itemps(gen);
+ return true;
+}
/**
* Emit set-if-greater-than.
@@ -323,6 +992,8 @@ emit_SGT(struct codegen *gen, const struct tgsi_full_instruction *inst)
{
int ch;
+ spe_comment(gen->f, -4, "SGT:");
+
for (ch = 0; ch < 4; ch++) {
if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
@@ -331,26 +1002,776 @@ emit_SGT(struct codegen *gen, const struct tgsi_full_instruction *inst)
/* d = (s1 > s2) */
spe_fcgt(gen->f, d_reg, s1_reg, s2_reg);
-#if DISASSEM
- printf("fcgt\tr%d, r%d, r%d\n", d_reg, s1_reg, s2_reg);
-#endif
/* convert d from 0x0/0xffffffff to 0.0/1.0 */
/* d = d & one_reg */
spe_and(gen->f, d_reg, d_reg, get_const_one_reg(gen));
-#if DISASSEM
- printf("and\tr%d, r%d, r%d\n", d_reg, d_reg, get_const_one_reg(gen));
+
+ store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
+ free_itemps(gen);
+ }
+ }
+
+ return true;
+}
+
+/**
+ * Emit set-if_less-then. See emit_SGT for comments.
+ */
+static boolean
+emit_SLT(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ int ch;
+
+ spe_comment(gen->f, -4, "SLT:");
+
+ for (ch = 0; ch < 4; ch++) {
+ if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
+ int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+ int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
+ int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+
+ /* d = (s1 < s2) */
+ spe_fcgt(gen->f, d_reg, s2_reg, s1_reg);
+
+ /* convert d from 0x0/0xffffffff to 0.0/1.0 */
+ /* d = d & one_reg */
+ spe_and(gen->f, d_reg, d_reg, get_const_one_reg(gen));
+
+ store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
+ free_itemps(gen);
+ }
+ }
+
+ return true;
+}
+
+/**
+ * Emit set-if_greater-then-or-equal. See emit_SGT for comments.
+ */
+static boolean
+emit_SGE(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ int ch;
+
+ spe_comment(gen->f, -4, "SGE:");
+
+ for (ch = 0; ch < 4; ch++) {
+ if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
+ int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+ int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
+ int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+
+ /* d = (s1 >= s2) */
+ spe_fcgt(gen->f, d_reg, s2_reg, s1_reg);
+
+ /* convert d from 0x0/0xffffffff to 0.0/1.0 */
+ /* d = ~d & one_reg */
+ spe_andc(gen->f, d_reg, get_const_one_reg(gen), d_reg);
+
+ store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
+ free_itemps(gen);
+ }
+ }
+
+ return true;
+}
+
+/**
+ * Emit set-if_less-then-or-equal. See emit_SGT for comments.
+ */
+static boolean
+emit_SLE(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ int ch;
+
+ spe_comment(gen->f, -4, "SLE:");
+
+ for (ch = 0; ch < 4; ch++) {
+ if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
+ int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+ int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
+ int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+
+ /* d = (s1 <= s2) */
+ spe_fcgt(gen->f, d_reg, s1_reg, s2_reg);
+
+ /* convert d from 0x0/0xffffffff to 0.0/1.0 */
+ /* d = ~d & one_reg */
+ spe_andc(gen->f, d_reg, get_const_one_reg(gen), d_reg);
+
+ store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
+ free_itemps(gen);
+ }
+ }
+
+ return true;
+}
+
+/**
+ * Emit set-if_equal. See emit_SGT for comments.
+ */
+static boolean
+emit_SEQ(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ int ch;
+
+ spe_comment(gen->f, -4, "SEQ:");
+
+ for (ch = 0; ch < 4; ch++) {
+ if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
+ int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+ int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
+ int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+
+ /* d = (s1 == s2) */
+ spe_fceq(gen->f, d_reg, s1_reg, s2_reg);
+
+ /* convert d from 0x0/0xffffffff to 0.0/1.0 */
+ /* d = d & one_reg */
+ spe_and(gen->f, d_reg, d_reg, get_const_one_reg(gen));
+
+ store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
+ free_itemps(gen);
+ }
+ }
+
+ return true;
+}
+
+/**
+ * Emit set-if_not_equal. See emit_SGT for comments.
+ */
+static boolean
+emit_SNE(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ int ch;
+
+ spe_comment(gen->f, -4, "SNE:");
+
+ for (ch = 0; ch < 4; ch++) {
+ if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
+ int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+ int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
+ int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+
+ /* d = (s1 != s2) */
+ spe_fceq(gen->f, d_reg, s1_reg, s2_reg);
+ spe_nor(gen->f, d_reg, d_reg, d_reg);
+
+ /* convert d from 0x0/0xffffffff to 0.0/1.0 */
+ /* d = d & one_reg */
+ spe_and(gen->f, d_reg, d_reg, get_const_one_reg(gen));
+
+ store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
+ free_itemps(gen);
+ }
+ }
+
+ return true;
+}
+
+/**
+ * Emit compare. See emit_SGT for comments.
+ */
+static boolean
+emit_CMP(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ int ch;
+
+ spe_comment(gen->f, -4, "CMP:");
+
+ for (ch = 0; ch < 4; ch++) {
+ if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
+ int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+ int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
+ int s3_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[2]);
+ int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ int zero_reg = get_itemp(gen);
+
+ spe_xor(gen->f, zero_reg, zero_reg, zero_reg);
+
+ /* d = (s1 < 0) ? s2 : s3 */
+ spe_fcgt(gen->f, d_reg, zero_reg, s1_reg);
+ spe_selb(gen->f, d_reg, s3_reg, s2_reg, d_reg);
+
+ store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
+ free_itemps(gen);
+ }
+ }
+
+ return true;
+}
+
+/**
+ * Emit trunc.
+ * Convert float to signed int
+ * Convert signed int to float
+ */
+static boolean
+emit_TRUNC(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ int ch;
+
+ spe_comment(gen->f, -4, "TRUNC:");
+
+ for (ch = 0; ch < 4; ch++) {
+ if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
+ int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+ int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+
+ /* Convert float to int */
+ spe_cflts(gen->f, d_reg, s1_reg, 0);
+
+ /* Convert int to float */
+ spe_csflt(gen->f, d_reg, d_reg, 0);
+
+ store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
+ free_itemps(gen);
+ }
+ }
+
+ return true;
+}
+
+/**
+ * Emit floor.
+ * If negative int subtract one
+ * Convert float to signed int
+ * Convert signed int to float
+ */
+static boolean
+emit_FLR(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ int ch;
+
+ spe_comment(gen->f, -4, "FLR:");
+
+ int zero_reg = get_itemp(gen);
+ spe_xor(gen->f, zero_reg, zero_reg, zero_reg);
+
+ for (ch = 0; ch < 4; ch++) {
+ if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
+ int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+ int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ int tmp_reg = get_itemp(gen);
+
+ /* If negative, subtract 1.0 */
+ spe_fcgt(gen->f, tmp_reg, zero_reg, s1_reg);
+ spe_selb(gen->f, tmp_reg, zero_reg, get_const_one_reg(gen), tmp_reg);
+ spe_fs(gen->f, tmp_reg, s1_reg, tmp_reg);
+
+ /* Convert float to int */
+ spe_cflts(gen->f, tmp_reg, tmp_reg, 0);
+
+ /* Convert int to float */
+ spe_csflt(gen->f, d_reg, tmp_reg, 0);
+
+ store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
+ free_itemps(gen);
+ }
+ }
+
+ return true;
+}
+
+/**
+ * Compute frac = Input - FLR(Input)
+ */
+static boolean
+emit_FRC(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ int ch;
+
+ spe_comment(gen->f, -4, "FRC:");
+
+ int zero_reg = get_itemp(gen);
+ spe_xor(gen->f, zero_reg, zero_reg, zero_reg);
+
+ for (ch = 0; ch < 4; ch++) {
+ if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
+ int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+ int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ int tmp_reg = get_itemp(gen);
+
+ /* If negative, subtract 1.0 */
+ spe_fcgt(gen->f, tmp_reg, zero_reg, s1_reg);
+ spe_selb(gen->f, tmp_reg, zero_reg, get_const_one_reg(gen), tmp_reg);
+ spe_fs(gen->f, tmp_reg, s1_reg, tmp_reg);
+
+ /* Convert float to int */
+ spe_cflts(gen->f, tmp_reg, tmp_reg, 0);
+
+ /* Convert int to float */
+ spe_csflt(gen->f, tmp_reg, tmp_reg, 0);
+
+ /* d = s1 - FLR(s1) */
+ spe_fs(gen->f, d_reg, s1_reg, tmp_reg);
+
+ store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
+ free_itemps(gen);
+ }
+ }
+
+ return true;
+}
+
+
+#if 0
+static void
+print_functions(struct cell_context *cell)
+{
+ struct cell_spu_function_info *funcs = &cell->spu_functions;
+ uint i;
+ for (i = 0; i < funcs->num; i++) {
+ printf("SPU func %u: %s at %u\n",
+ i, funcs->names[i], funcs->addrs[i]);
+ }
+}
#endif
+
+static uint
+lookup_function(struct cell_context *cell, const char *funcname)
+{
+ const struct cell_spu_function_info *funcs = &cell->spu_functions;
+ uint i, addr = 0;
+ for (i = 0; i < funcs->num; i++) {
+ if (strcmp(funcs->names[i], funcname) == 0) {
+ addr = funcs->addrs[i];
+ }
+ }
+ assert(addr && "spu function not found");
+ return addr / 4; /* discard 2 least significant bits */
+}
+
+
+/**
+ * Emit code to call a SPU function.
+ * Used to implement instructions like SIN/COS/POW/TEX/etc.
+ * If scalar, only the X components of the src regs are used, and the
+ * result is replicated across the dest register's XYZW components.
+ */
+static boolean
+emit_function_call(struct codegen *gen,
+ const struct tgsi_full_instruction *inst,
+ char *funcname, uint num_args, boolean scalar)
+{
+ const uint addr = lookup_function(gen->cell, funcname);
+ char comment[100];
+ int s_regs[3];
+ int func_called = FALSE;
+ uint a, ch;
+ int retval_reg = -1;
+
+ assert(num_args <= 3);
+
+ snprintf(comment, sizeof(comment), "CALL %s:", funcname);
+ spe_comment(gen->f, -4, comment);
+
+ if (scalar) {
+ for (a = 0; a < num_args; a++) {
+ s_regs[a] = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[a]);
+ }
+ /* we'll call the function, put the return value in this register,
+ * then replicate it across all write-enabled components in d_reg.
+ */
+ retval_reg = spe_allocate_available_register(gen->f);
+ }
+
+ for (ch = 0; ch < 4; ch++) {
+ if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
+ int d_reg;
+ ubyte usedRegs[SPE_NUM_REGS];
+ uint i, numUsed;
+
+ if (!scalar) {
+ for (a = 0; a < num_args; a++) {
+ s_regs[a] = get_src_reg(gen, ch, &inst->FullSrcRegisters[a]);
+ }
+ }
+
+ d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+
+ if (!scalar || !func_called) {
+ /* for a scalar function, we'll really only call the function once */
+
+ numUsed = spe_get_registers_used(gen->f, usedRegs);
+ assert(numUsed < gen->frame_size / 16 - 2);
+
+ /* save registers to stack */
+ for (i = 0; i < numUsed; i++) {
+ uint reg = usedRegs[i];
+ int offset = 2 + i;
+ spe_stqd(gen->f, reg, SPE_REG_SP, 16 * offset);
+ }
+
+ /* setup function arguments */
+ for (a = 0; a < num_args; a++) {
+ spe_move(gen->f, 3 + a, s_regs[a]);
+ }
+
+ /* branch to function, save return addr */
+ spe_brasl(gen->f, SPE_REG_RA, addr);
+
+ /* save function's return value */
+ if (scalar)
+ spe_move(gen->f, retval_reg, 3);
+ else
+ spe_move(gen->f, d_reg, 3);
+
+ /* restore registers from stack */
+ for (i = 0; i < numUsed; i++) {
+ uint reg = usedRegs[i];
+ if (reg != d_reg && reg != retval_reg) {
+ int offset = 2 + i;
+ spe_lqd(gen->f, reg, SPE_REG_SP, 16 * offset);
+ }
+ }
+
+ func_called = TRUE;
+ }
+
+ if (scalar) {
+ spe_move(gen->f, d_reg, retval_reg);
+ }
+
store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
free_itemps(gen);
}
}
+ if (scalar) {
+ spe_release_register(gen->f, retval_reg);
+ }
+
return true;
}
+static boolean
+emit_TEX(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ const uint target = inst->InstructionExtTexture.Texture;
+ const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
+ uint addr;
+ int ch;
+ int coord_regs[4], d_regs[4];
+
+ switch (target) {
+ case TGSI_TEXTURE_1D:
+ case TGSI_TEXTURE_2D:
+ addr = lookup_function(gen->cell, "spu_tex_2d");
+ break;
+ case TGSI_TEXTURE_3D:
+ addr = lookup_function(gen->cell, "spu_tex_3d");
+ break;
+ case TGSI_TEXTURE_CUBE:
+ addr = lookup_function(gen->cell, "spu_tex_cube");
+ break;
+ default:
+ ASSERT(0 && "unsupported texture target");
+ return FALSE;
+ }
+
+ assert(inst->FullSrcRegisters[1].SrcRegister.File == TGSI_FILE_SAMPLER);
+
+ spe_comment(gen->f, -4, "CALL tex:");
+
+ /* get src/dst reg info */
+ for (ch = 0; ch < 4; ch++) {
+ coord_regs[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+ d_regs[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ }
+
+ {
+ ubyte usedRegs[SPE_NUM_REGS];
+ uint i, numUsed;
+
+ numUsed = spe_get_registers_used(gen->f, usedRegs);
+ assert(numUsed < gen->frame_size / 16 - 2);
+
+ /* save registers to stack */
+ for (i = 0; i < numUsed; i++) {
+ uint reg = usedRegs[i];
+ int offset = 2 + i;
+ spe_stqd(gen->f, reg, SPE_REG_SP, 16 * offset);
+ }
+
+ /* setup function arguments (XXX depends on target) */
+ for (i = 0; i < 4; i++) {
+ spe_move(gen->f, 3 + i, coord_regs[i]);
+ }
+ spe_load_uint(gen->f, 7, unit); /* sampler unit */
+
+ /* branch to function, save return addr */
+ spe_brasl(gen->f, SPE_REG_RA, addr);
+
+ /* save function's return values (four pixel's colors) */
+ for (i = 0; i < 4; i++) {
+ spe_move(gen->f, d_regs[i], 3 + i);
+ }
+
+ /* restore registers from stack */
+ for (i = 0; i < numUsed; i++) {
+ uint reg = usedRegs[i];
+ if (reg != d_regs[0] &&
+ reg != d_regs[1] &&
+ reg != d_regs[2] &&
+ reg != d_regs[3]) {
+ int offset = 2 + i;
+ spe_lqd(gen->f, reg, SPE_REG_SP, 16 * offset);
+ }
+ }
+ }
+
+ for (ch = 0; ch < 4; ch++) {
+ if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
+ store_dest_reg(gen, d_regs[ch], ch, &inst->FullDstRegisters[0]);
+ free_itemps(gen);
+ }
+ }
+
+ return TRUE;
+}
+
+
+/**
+ * KILL if any of src reg values are less than zero.
+ */
+static boolean
+emit_KIL(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ int ch;
+ int s_regs[4], kil_reg = -1, cmp_reg, zero_reg;
+
+ spe_comment(gen->f, -4, "CALL kil:");
+
+ /* zero = {0,0,0,0} */
+ zero_reg = get_itemp(gen);
+ spe_load_uint(gen->f, zero_reg, 0);
+
+ cmp_reg = get_itemp(gen);
+
+ /* get src regs */
+ for (ch = 0; ch < 4; ch++) {
+ if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
+ s_regs[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+ }
+ }
+
+ /* test if any src regs are < 0 */
+ for (ch = 0; ch < 4; ch++) {
+ if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
+ if (kil_reg >= 0) {
+ /* cmp = 0 > src ? : ~0 : 0 */
+ spe_fcgt(gen->f, cmp_reg, zero_reg, s_regs[ch]);
+ /* kil = kil | cmp */
+ spe_or(gen->f, kil_reg, kil_reg, cmp_reg);
+ }
+ else {
+ kil_reg = get_itemp(gen);
+ /* kil = 0 > src ? : ~0 : 0 */
+ spe_fcgt(gen->f, kil_reg, zero_reg, s_regs[ch]);
+ }
+ }
+ }
+
+ if (gen->if_nesting) {
+ /* may have been a conditional kil */
+ spe_and(gen->f, kil_reg, kil_reg, gen->exec_mask_reg);
+ }
+
+ /* allocate the kill mask reg if needed */
+ if (gen->kill_mask_reg <= 0) {
+ gen->kill_mask_reg = spe_allocate_available_register(gen->f);
+ spe_move(gen->f, gen->kill_mask_reg, kil_reg);
+ }
+ else {
+ spe_or(gen->f, gen->kill_mask_reg, gen->kill_mask_reg, kil_reg);
+ }
+
+ free_itemps(gen);
+
+ return TRUE;
+}
+
+
+
+/**
+ * Emit max. See emit_SGT for comments.
+ */
+static boolean
+emit_MAX(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ int ch, s0_reg[4], s1_reg[4], d_reg[4], tmp_reg[4];
+
+ spe_comment(gen->f, -4, "MAX:");
+
+ for (ch = 0; ch < 4; ch++) {
+ if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
+ s0_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+ s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
+ d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ tmp_reg[ch] = get_itemp(gen);
+ }
+ }
+
+ /* d = (s0 > s1) ? s0 : s1 */
+ for (ch = 0; ch < 4; ch++) {
+ if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
+ spe_fcgt(gen->f, tmp_reg[ch], s0_reg[ch], s1_reg[ch]);
+ }
+ }
+ for (ch = 0; ch < 4; ch++) {
+ if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
+ spe_selb(gen->f, d_reg[ch], s1_reg[ch], s0_reg[ch], tmp_reg[ch]);
+ }
+ }
+
+ for (ch = 0; ch < 4; ch++) {
+ if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
+ store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
+ }
+ }
+
+ free_itemps(gen);
+ return true;
+}
+
+/**
+ * Emit max. See emit_SGT for comments.
+ */
+static boolean
+emit_MIN(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ int ch, s0_reg[4], s1_reg[4], d_reg[4], tmp_reg[4];
+
+ spe_comment(gen->f, -4, "MIN:");
+
+ for (ch = 0; ch < 4; ch++) {
+ if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
+ s0_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+ s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
+ d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ tmp_reg[ch] = get_itemp(gen);
+ }
+ }
+
+ /* d = (s1 > s0) ? s0 : s1 */
+ for (ch = 0; ch < 4; ch++) {
+ if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
+ spe_fcgt(gen->f, tmp_reg[ch], s1_reg[ch], s0_reg[ch]);
+ }
+ }
+ for (ch = 0; ch < 4; ch++) {
+ if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
+ spe_selb(gen->f, d_reg[ch], s1_reg[ch], s0_reg[ch], tmp_reg[ch]);
+ }
+ }
+
+ for (ch = 0; ch < 4; ch++) {
+ if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
+ store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
+ }
+ }
+
+ free_itemps(gen);
+ return true;
+}
+
+static boolean
+emit_IF(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ const int channel = 0;
+ const int exec_reg = get_exec_mask_reg(gen);
+
+ spe_comment(gen->f, -4, "IF:");
+
+ /* update execution mask with the predicate register */
+ int tmp_reg = get_itemp(gen);
+ int s1_reg = get_src_reg(gen, channel, &inst->FullSrcRegisters[0]);
+
+ /* tmp = (s1_reg == 0) */
+ spe_ceqi(gen->f, tmp_reg, s1_reg, 0);
+ /* tmp = !tmp */
+ spe_complement(gen->f, tmp_reg, tmp_reg);
+ /* exec_mask = exec_mask & tmp */
+ spe_and(gen->f, exec_reg, exec_reg, tmp_reg);
+
+ gen->if_nesting++;
+
+ free_itemps(gen);
+
+ return true;
+}
+
+
+static boolean
+emit_ELSE(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ const int exec_reg = get_exec_mask_reg(gen);
+
+ spe_comment(gen->f, -4, "ELSE:");
+
+ /* exec_mask = !exec_mask */
+ spe_complement(gen->f, exec_reg, exec_reg);
+
+ return true;
+}
+
+
+static boolean
+emit_ENDIF(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ const int exec_reg = get_exec_mask_reg(gen);
+
+ spe_comment(gen->f, -4, "ENDIF:");
+
+ /* XXX todo: pop execution mask */
+
+ spe_load_int(gen->f, exec_reg, ~0x0);
+
+ gen->if_nesting--;
+ return true;
+}
+
+
+static boolean
+emit_DDX_DDY(struct codegen *gen, const struct tgsi_full_instruction *inst,
+ boolean ddx)
+{
+ int ch;
+
+ spe_comment(gen->f, -4, ddx ? "DDX:" : "DDY:");
+
+ for (ch = 0; ch < 4; ch++) {
+ if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
+ int s_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+ int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+
+ int t1_reg = get_itemp(gen);
+ int t2_reg = get_itemp(gen);
+
+ spe_splat_word(gen->f, t1_reg, s_reg, 0); /* upper-left pixel */
+ if (ddx) {
+ spe_splat_word(gen->f, t2_reg, s_reg, 1); /* upper-right pixel */
+ }
+ else {
+ spe_splat_word(gen->f, t2_reg, s_reg, 2); /* lower-left pixel */
+ }
+ spe_fs(gen->f, d_reg, t2_reg, t1_reg);
+
+ free_itemps(gen);
+ }
+ }
+
+ return true;
+}
+
+
+
+
/**
* Emit END instruction.
* We just return from the shader function at this point.
@@ -361,11 +1782,8 @@ emit_SGT(struct codegen *gen, const struct tgsi_full_instruction *inst)
static boolean
emit_END(struct codegen *gen)
{
- /* return from function call */
- spe_bi(gen->f, SPE_REG_RA, 0, 0);
-#if DISASSEM
- printf("bi\trRA\n");
-#endif
+ spe_comment(gen->f, -4, "END:");
+ emit_epilogue(gen);
return true;
}
@@ -379,19 +1797,101 @@ emit_instruction(struct codegen *gen,
{
switch (inst->Instruction.Opcode) {
case TGSI_OPCODE_MOV:
+ case TGSI_OPCODE_SWZ:
return emit_MOV(gen, inst);
case TGSI_OPCODE_MUL:
return emit_MUL(gen, inst);
case TGSI_OPCODE_ADD:
return emit_ADD(gen, inst);
+ case TGSI_OPCODE_SUB:
+ return emit_SUB(gen, inst);
+ case TGSI_OPCODE_MAD:
+ return emit_MAD(gen, inst);
+ case TGSI_OPCODE_LERP:
+ return emit_LERP(gen, inst);
+ case TGSI_OPCODE_DP3:
+ return emit_DP3(gen, inst);
+ case TGSI_OPCODE_DP4:
+ return emit_DP4(gen, inst);
+ case TGSI_OPCODE_DPH:
+ return emit_DPH(gen, inst);
+ case TGSI_OPCODE_NRM:
+ return emit_NRM3(gen, inst);
+ case TGSI_OPCODE_XPD:
+ return emit_XPD(gen, inst);
+ case TGSI_OPCODE_RCP:
+ return emit_RCP(gen, inst);
+ case TGSI_OPCODE_RSQ:
+ return emit_RSQ(gen, inst);
+ case TGSI_OPCODE_ABS:
+ return emit_ABS(gen, inst);
case TGSI_OPCODE_SGT:
return emit_SGT(gen, inst);
+ case TGSI_OPCODE_SLT:
+ return emit_SLT(gen, inst);
+ case TGSI_OPCODE_SGE:
+ return emit_SGE(gen, inst);
+ case TGSI_OPCODE_SLE:
+ return emit_SLE(gen, inst);
+ case TGSI_OPCODE_SEQ:
+ return emit_SEQ(gen, inst);
+ case TGSI_OPCODE_SNE:
+ return emit_SNE(gen, inst);
+ case TGSI_OPCODE_CMP:
+ return emit_CMP(gen, inst);
+ case TGSI_OPCODE_MAX:
+ return emit_MAX(gen, inst);
+ case TGSI_OPCODE_MIN:
+ return emit_MIN(gen, inst);
+ case TGSI_OPCODE_TRUNC:
+ return emit_TRUNC(gen, inst);
+ case TGSI_OPCODE_FLR:
+ return emit_FLR(gen, inst);
+ case TGSI_OPCODE_FRC:
+ return emit_FRC(gen, inst);
case TGSI_OPCODE_END:
return emit_END(gen);
+ case TGSI_OPCODE_COS:
+ return emit_function_call(gen, inst, "spu_cos", 1, TRUE);
+ case TGSI_OPCODE_SIN:
+ return emit_function_call(gen, inst, "spu_sin", 1, TRUE);
+ case TGSI_OPCODE_POW:
+ return emit_function_call(gen, inst, "spu_pow", 2, TRUE);
+ case TGSI_OPCODE_EXPBASE2:
+ return emit_function_call(gen, inst, "spu_exp2", 1, TRUE);
+ case TGSI_OPCODE_LOGBASE2:
+ return emit_function_call(gen, inst, "spu_log2", 1, TRUE);
+ case TGSI_OPCODE_TEX:
+ /* fall-through for now */
+ case TGSI_OPCODE_TXD:
+ /* fall-through for now */
+ case TGSI_OPCODE_TXB:
+ /* fall-through for now */
+ case TGSI_OPCODE_TXL:
+ /* fall-through for now */
+ case TGSI_OPCODE_TXP:
+ return emit_TEX(gen, inst);
+ case TGSI_OPCODE_KIL:
+ return emit_KIL(gen, inst);
+
+ case TGSI_OPCODE_IF:
+ return emit_IF(gen, inst);
+ case TGSI_OPCODE_ELSE:
+ return emit_ELSE(gen, inst);
+ case TGSI_OPCODE_ENDIF:
+ return emit_ENDIF(gen, inst);
+
+ case TGSI_OPCODE_DDX:
+ return emit_DDX_DDY(gen, inst, true);
+ case TGSI_OPCODE_DDY:
+ return emit_DDX_DDY(gen, inst, false);
+
/* XXX lots more cases to do... */
default:
+ fprintf(stderr, "Cell: unimplemented TGSI instruction %d!\n",
+ inst->Instruction.Opcode);
return false;
}
@@ -401,48 +1901,93 @@ emit_instruction(struct codegen *gen,
/**
+ * Emit code for a TGSI immediate value (vector of four floats).
+ * This involves register allocation and initialization.
+ * XXX the initialization should be done by a "prepare" stage, not
+ * per quad execution!
+ */
+static boolean
+emit_immediate(struct codegen *gen, const struct tgsi_full_immediate *immed)
+{
+ int ch;
+
+ assert(gen->num_imm < MAX_TEMPS);
+
+ spe_comment(gen->f, -4, "IMMEDIATE:");
+
+ for (ch = 0; ch < 4; ch++) {
+ float val = immed->u.ImmediateFloat32[ch].Float;
+
+ if (ch > 0 && val == immed->u.ImmediateFloat32[ch - 1].Float) {
+ /* re-use previous register */
+ gen->imm_regs[gen->num_imm][ch] = gen->imm_regs[gen->num_imm][ch - 1];
+ }
+ else {
+ int reg = spe_allocate_available_register(gen->f);
+
+ if (reg < 0)
+ return false;
+
+ /* update immediate map */
+ gen->imm_regs[gen->num_imm][ch] = reg;
+
+ /* emit initializer instruction */
+ spe_load_float(gen->f, reg, val);
+ }
+ }
+
+ gen->num_imm++;
+
+ return true;
+}
+
+
+
+/**
* Emit "code" for a TGSI declaration.
* We only care about TGSI TEMPORARY register declarations at this time.
* For each TGSI TEMPORARY we allocate four SPE registers.
*/
-static void
-emit_declaration(struct codegen *gen, const struct tgsi_full_declaration *decl)
+static boolean
+emit_declaration(struct cell_context *cell,
+ struct codegen *gen, const struct tgsi_full_declaration *decl)
{
int i, ch;
switch (decl->Declaration.File) {
case TGSI_FILE_TEMPORARY:
-#if DISASSEM
- printf("Declare temp reg %d .. %d\n",
- decl->DeclarationRange.First,
- decl->DeclarationRange.Last);
-#endif
for (i = decl->DeclarationRange.First;
i <= decl->DeclarationRange.Last;
i++) {
+ assert(i < MAX_TEMPS);
for (ch = 0; ch < 4; ch++) {
gen->temp_regs[i][ch] = spe_allocate_available_register(gen->f);
+ if (gen->temp_regs[i][ch] < 0)
+ return false; /* out of regs */
}
/* XXX if we run out of SPE registers, we need to spill
* to SPU memory. someday...
*/
-#if DISASSEM
- printf(" SPE regs: %d %d %d %d\n",
- gen->temp_regs[i][0],
- gen->temp_regs[i][1],
- gen->temp_regs[i][2],
- gen->temp_regs[i][3]);
-#endif
+ {
+ char buf[100];
+ sprintf(buf, "TGSI temp[%d] maps to SPU regs [$%d $%d $%d $%d]", i,
+ gen->temp_regs[i][0], gen->temp_regs[i][1],
+ gen->temp_regs[i][2], gen->temp_regs[i][3]);
+ spe_comment(gen->f, -4, buf);
+ }
}
break;
default:
; /* ignore */
}
+
+ return true;
}
+
/**
* Translate TGSI shader code to SPE instructions. This is done when
* the state tracker gives us a new shader (via pipe->create_fs_state()).
@@ -460,6 +2005,7 @@ cell_gen_fragment_program(struct cell_context *cell,
struct codegen gen;
memset(&gen, 0, sizeof(gen));
+ gen.cell = cell;
gen.f = f;
/* For SPE function calls: reg $3 = first param, $4 = second param, etc. */
@@ -472,50 +2018,50 @@ cell_gen_fragment_program(struct cell_context *cell,
spe_allocate_register(f, gen.outputs_reg);
spe_allocate_register(f, gen.constants_reg);
-#if DISASSEM
- printf("Begin %s\n", __FUNCTION__);
- tgsi_dump(tokens, 0);
-#endif
+ if (cell->debug_flags & CELL_DEBUG_ASM) {
+ spe_print_code(f, true);
+ spe_indent(f, 8);
+ printf("Begin %s\n", __FUNCTION__);
+ tgsi_dump(tokens, 0);
+ }
tgsi_parse_init(&parse, tokens);
+ emit_prologue(&gen);
+
while (!tgsi_parse_end_of_tokens(&parse) && !gen.error) {
tgsi_parse_token(&parse);
switch (parse.FullToken.Token.Type) {
case TGSI_TOKEN_TYPE_IMMEDIATE:
-#if 0
- if (!note_immediate(&gen, &parse.FullToken.FullImmediate ))
- goto fail;
-#endif
+ if (!emit_immediate(&gen, &parse.FullToken.FullImmediate))
+ gen.error = true;
break;
case TGSI_TOKEN_TYPE_DECLARATION:
- emit_declaration(&gen, &parse.FullToken.FullDeclaration);
+ if (!emit_declaration(cell, &gen, &parse.FullToken.FullDeclaration))
+ gen.error = true;
break;
case TGSI_TOKEN_TYPE_INSTRUCTION:
- if (!emit_instruction(&gen, &parse.FullToken.FullInstruction )) {
+ if (!emit_instruction(&gen, &parse.FullToken.FullInstruction))
gen.error = true;
- }
break;
default:
assert(0);
-
}
}
-
if (gen.error) {
/* terminate the SPE code */
return emit_END(&gen);
}
-#if DISASSEM
- printf("cell_gen_fragment_program nr instructions: %d\n", f->num_inst);
- printf("End %s\n", __FUNCTION__);
-#endif
+ if (cell->debug_flags & CELL_DEBUG_ASM) {
+ printf("cell_gen_fragment_program nr instructions: %d\n", f->num_inst);
+ printf("End %s\n", __FUNCTION__);
+ }
tgsi_parse_free( &parse );
diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c
index 06219d4e980..2c64eb1bccd 100644
--- a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c
+++ b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c
@@ -54,12 +54,17 @@
* \param ifragZ_reg register containing integer fragment Z values (in)
* \param ifbZ_reg register containing integer frame buffer Z values (in/out)
* \param zmask_reg register containing result of Z test/comparison (out)
+ *
+ * Returns true if the Z-buffer needs to be updated.
*/
-static void
-gen_depth_test(const struct pipe_depth_stencil_alpha_state *dsa,
- struct spe_function *f,
+static boolean
+gen_depth_test(struct spe_function *f,
+ const struct pipe_depth_stencil_alpha_state *dsa,
int mask_reg, int ifragZ_reg, int ifbZ_reg, int zmask_reg)
{
+ /* NOTE: we use clgt below, not cgt, because we want to compare _unsigned_
+ * quantities. This only makes a difference for 32-bit Z values though.
+ */
ASSERT(dsa->depth.enabled);
switch (dsa->depth.func) {
@@ -79,28 +84,28 @@ gen_depth_test(const struct pipe_depth_stencil_alpha_state *dsa,
case PIPE_FUNC_GREATER:
/* zmask = (ifragZ > ref) */
- spe_cgt(f, zmask_reg, ifragZ_reg, ifbZ_reg);
+ spe_clgt(f, zmask_reg, ifragZ_reg, ifbZ_reg);
/* mask = (mask & zmask) */
spe_and(f, mask_reg, mask_reg, zmask_reg);
break;
case PIPE_FUNC_LESS:
/* zmask = (ref > ifragZ) */
- spe_cgt(f, zmask_reg, ifbZ_reg, ifragZ_reg);
+ spe_clgt(f, zmask_reg, ifbZ_reg, ifragZ_reg);
/* mask = (mask & zmask) */
spe_and(f, mask_reg, mask_reg, zmask_reg);
break;
case PIPE_FUNC_LEQUAL:
/* zmask = (ifragZ > ref) */
- spe_cgt(f, zmask_reg, ifragZ_reg, ifbZ_reg);
+ spe_clgt(f, zmask_reg, ifragZ_reg, ifbZ_reg);
/* mask = (mask & ~zmask) */
spe_andc(f, mask_reg, mask_reg, zmask_reg);
break;
case PIPE_FUNC_GEQUAL:
/* zmask = (ref > ifragZ) */
- spe_cgt(f, zmask_reg, ifbZ_reg, ifragZ_reg);
+ spe_clgt(f, zmask_reg, ifbZ_reg, ifragZ_reg);
/* mask = (mask & ~zmask) */
spe_andc(f, mask_reg, mask_reg, zmask_reg);
break;
@@ -129,7 +134,10 @@ gen_depth_test(const struct pipe_depth_stencil_alpha_state *dsa,
* framebufferZ = (ztest_passed ? fragmentZ : framebufferZ;
*/
spe_selb(f, ifbZ_reg, ifbZ_reg, ifragZ_reg, mask_reg);
+ return true;
}
+
+ return false;
}
@@ -229,7 +237,40 @@ gen_alpha_test(const struct pipe_depth_stencil_alpha_state *dsa,
spe_release_register(f, amask_reg);
}
+/* This pair of functions is used inline to allocate and deallocate
+ * optional constant registers. Once a constant is discovered to be
+ * needed, we will likely need it again, so we don't want to deallocate
+ * it and have to allocate and load it again unnecessarily.
+ */
+static inline void
+setup_optional_register(struct spe_function *f, boolean *is_already_set, unsigned int *r)
+{
+ if (*is_already_set) return;
+ *r = spe_allocate_available_register(f);
+ *is_already_set = true;
+}
+
+static inline void
+release_optional_register(struct spe_function *f, boolean *is_already_set, unsigned int r)
+{
+ if (!*is_already_set) return;
+ spe_release_register(f, r);
+ *is_already_set = false;
+}
+
+static inline void
+setup_const_register(struct spe_function *f, boolean *is_already_set, unsigned int *r, float value)
+{
+ if (*is_already_set) return;
+ setup_optional_register(f, is_already_set, r);
+ spe_load_float(f, *r, value);
+}
+static inline void
+release_const_register(struct spe_function *f, boolean *is_already_set, unsigned int r)
+{
+ release_optional_register(f, is_already_set, r);
+}
/**
* Generate SPE code to implement the given blend mode for a quad of pixels.
@@ -242,6 +283,7 @@ gen_alpha_test(const struct pipe_depth_stencil_alpha_state *dsa,
*/
static void
gen_blend(const struct pipe_blend_state *blend,
+ const struct pipe_blend_color *blend_color,
struct spe_function *f,
enum pipe_format color_format,
int fragR_reg, int fragG_reg, int fragB_reg, int fragA_reg,
@@ -262,10 +304,17 @@ gen_blend(const struct pipe_blend_state *blend,
int fbB_reg = spe_allocate_available_register(f);
int fbA_reg = spe_allocate_available_register(f);
- int one_reg = spe_allocate_available_register(f);
int tmp_reg = spe_allocate_available_register(f);
- boolean one_reg_set = false; /* avoid setting one_reg more than once */
+ /* Optional constant registers we might or might not end up using;
+ * if we do use them, make sure we only allocate them once by
+ * keeping a flag on each one.
+ */
+ boolean one_reg_set = false;
+ unsigned int one_reg;
+ boolean constR_reg_set = false, constG_reg_set = false,
+ constB_reg_set = false, constA_reg_set = false;
+ unsigned int constR_reg, constG_reg, constB_reg, constA_reg;
ASSERT(blend->blend_enable);
@@ -346,127 +395,449 @@ gen_blend(const struct pipe_blend_state *blend,
spe_release_register(f, mask_reg);
}
-
/*
- * Compute Src RGB terms
+ * Compute Src RGB terms. We're actually looking for the value
+ * of (the appropriate RGB factors) * (the incoming source RGB color),
+ * because in some cases (like PIPE_BLENDFACTOR_ONE and
+ * PIPE_BLENDFACTOR_ZERO) we can avoid doing unnecessary math.
*/
switch (blend->rgb_src_factor) {
case PIPE_BLENDFACTOR_ONE:
+ /* factors = (1,1,1), so term = (R,G,B) */
spe_move(f, term1R_reg, fragR_reg);
spe_move(f, term1G_reg, fragG_reg);
spe_move(f, term1B_reg, fragB_reg);
break;
case PIPE_BLENDFACTOR_ZERO:
- spe_zero(f, term1R_reg);
- spe_zero(f, term1G_reg);
- spe_zero(f, term1B_reg);
+ /* factors = (0,0,0), so term = (0,0,0) */
+ spe_load_float(f, term1R_reg, 0.0f);
+ spe_load_float(f, term1G_reg, 0.0f);
+ spe_load_float(f, term1B_reg, 0.0f);
break;
case PIPE_BLENDFACTOR_SRC_COLOR:
+ /* factors = (R,G,B), so term = (R*R, G*G, B*B) */
spe_fm(f, term1R_reg, fragR_reg, fragR_reg);
spe_fm(f, term1G_reg, fragG_reg, fragG_reg);
spe_fm(f, term1B_reg, fragB_reg, fragB_reg);
break;
case PIPE_BLENDFACTOR_SRC_ALPHA:
+ /* factors = (A,A,A), so term = (R*A, G*A, B*A) */
spe_fm(f, term1R_reg, fragR_reg, fragA_reg);
spe_fm(f, term1G_reg, fragG_reg, fragA_reg);
spe_fm(f, term1B_reg, fragB_reg, fragA_reg);
break;
- /* XXX more cases */
+ case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+ /* factors = (1-R,1-G,1-B), so term = (R*(1-R), G*(1-G), B*(1-B))
+ * or in other words term = (R-R*R, G-G*G, B-B*B)
+ * fnms(a,b,c,d) computes a = d - b*c
+ */
+ spe_fnms(f, term1R_reg, fragR_reg, fragR_reg, fragR_reg);
+ spe_fnms(f, term1G_reg, fragG_reg, fragG_reg, fragG_reg);
+ spe_fnms(f, term1B_reg, fragB_reg, fragB_reg, fragB_reg);
+ break;
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ /* factors = (Rfb,Gfb,Bfb), so term = (R*Rfb, G*Gfb, B*Bfb) */
+ spe_fm(f, term1R_reg, fragR_reg, fbR_reg);
+ spe_fm(f, term1G_reg, fragG_reg, fbG_reg);
+ spe_fm(f, term1B_reg, fragB_reg, fbB_reg);
+ break;
+ case PIPE_BLENDFACTOR_INV_DST_COLOR:
+ /* factors = (1-Rfb,1-Gfb,1-Bfb), so term = (R*(1-Rfb),G*(1-Gfb),B*(1-Bfb))
+ * or term = (R-R*Rfb, G-G*Gfb, B-B*Bfb)
+ * fnms(a,b,c,d) computes a = d - b*c
+ */
+ spe_fnms(f, term1R_reg, fragR_reg, fbR_reg, fragR_reg);
+ spe_fnms(f, term1G_reg, fragG_reg, fbG_reg, fragG_reg);
+ spe_fnms(f, term1B_reg, fragB_reg, fbB_reg, fragB_reg);
+ break;
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+ /* factors = (1-A,1-A,1-A), so term = (R*(1-A),G*(1-A),B*(1-A))
+ * or term = (R-R*A,G-G*A,B-B*A)
+ * fnms(a,b,c,d) computes a = d - b*c
+ */
+ spe_fnms(f, term1R_reg, fragR_reg, fragA_reg, fragR_reg);
+ spe_fnms(f, term1G_reg, fragG_reg, fragA_reg, fragG_reg);
+ spe_fnms(f, term1B_reg, fragB_reg, fragA_reg, fragB_reg);
+ break;
+ case PIPE_BLENDFACTOR_DST_ALPHA:
+ /* factors = (Afb, Afb, Afb), so term = (R*Afb, G*Afb, B*Afb) */
+ spe_fm(f, term1R_reg, fragR_reg, fbA_reg);
+ spe_fm(f, term1G_reg, fragG_reg, fbA_reg);
+ spe_fm(f, term1B_reg, fragB_reg, fbA_reg);
+ break;
+ case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+ /* factors = (1-Afb, 1-Afb, 1-Afb), so term = (R*(1-Afb),G*(1-Afb),B*(1-Afb))
+ * or term = (R-R*Afb,G-G*Afb,b-B*Afb)
+ * fnms(a,b,c,d) computes a = d - b*c
+ */
+ spe_fnms(f, term1R_reg, fragR_reg, fbA_reg, fragR_reg);
+ spe_fnms(f, term1G_reg, fragG_reg, fbA_reg, fragG_reg);
+ spe_fnms(f, term1B_reg, fragB_reg, fbA_reg, fragB_reg);
+ break;
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ /* We need the optional constant color registers */
+ setup_const_register(f, &constR_reg_set, &constR_reg, blend_color->color[0]);
+ setup_const_register(f, &constG_reg_set, &constG_reg, blend_color->color[1]);
+ setup_const_register(f, &constB_reg_set, &constB_reg, blend_color->color[2]);
+ /* now, factor = (Rc,Gc,Bc), so term = (R*Rc,G*Gc,B*Bc) */
+ spe_fm(f, term1R_reg, fragR_reg, constR_reg);
+ spe_fm(f, term1G_reg, fragG_reg, constG_reg);
+ spe_fm(f, term1B_reg, fragB_reg, constB_reg);
+ break;
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
+ /* we'll need the optional constant alpha register */
+ setup_const_register(f, &constA_reg_set, &constA_reg, blend_color->color[3]);
+ /* factor = (Ac,Ac,Ac), so term = (R*Ac,G*Ac,B*Ac) */
+ spe_fm(f, term1R_reg, fragR_reg, constA_reg);
+ spe_fm(f, term1G_reg, fragG_reg, constA_reg);
+ spe_fm(f, term1B_reg, fragB_reg, constA_reg);
+ break;
+ case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+ /* We need the optional constant color registers */
+ setup_const_register(f, &constR_reg_set, &constR_reg, blend_color->color[0]);
+ setup_const_register(f, &constG_reg_set, &constG_reg, blend_color->color[1]);
+ setup_const_register(f, &constB_reg_set, &constB_reg, blend_color->color[2]);
+ /* factor = (1-Rc,1-Gc,1-Bc), so term = (R*(1-Rc),G*(1-Gc),B*(1-Bc))
+ * or term = (R-R*Rc, G-G*Gc, B-B*Bc)
+ * fnms(a,b,c,d) computes a = d - b*c
+ */
+ spe_fnms(f, term1R_reg, fragR_reg, constR_reg, fragR_reg);
+ spe_fnms(f, term1G_reg, fragG_reg, constG_reg, fragG_reg);
+ spe_fnms(f, term1B_reg, fragB_reg, constB_reg, fragB_reg);
+ break;
+ case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+ /* We need the optional constant color registers */
+ setup_const_register(f, &constR_reg_set, &constR_reg, blend_color->color[0]);
+ setup_const_register(f, &constG_reg_set, &constG_reg, blend_color->color[1]);
+ setup_const_register(f, &constB_reg_set, &constB_reg, blend_color->color[2]);
+ /* factor = (1-Ac,1-Ac,1-Ac), so term = (R*(1-Ac),G*(1-Ac),B*(1-Ac))
+ * or term = (R-R*Ac,G-G*Ac,B-B*Ac)
+ * fnms(a,b,c,d) computes a = d - b*c
+ */
+ spe_fnms(f, term1R_reg, fragR_reg, constA_reg, fragR_reg);
+ spe_fnms(f, term1G_reg, fragG_reg, constA_reg, fragG_reg);
+ spe_fnms(f, term1B_reg, fragB_reg, constA_reg, fragB_reg);
+ break;
+ case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+ /* We'll need the optional {1,1,1,1} register */
+ setup_const_register(f, &one_reg_set, &one_reg, 1.0f);
+ /* factor = (min(A,1-Afb),min(A,1-Afb),min(A,1-Afb)), so
+ * term = (R*min(A,1-Afb), G*min(A,1-Afb), B*min(A,1-Afb))
+ * We could expand the term (as a*min(b,c) == min(a*b,a*c)
+ * as long as a is positive), but then we'd have to do three
+ * spe_float_min() functions instead of one, so this is simpler.
+ */
+ /* tmp = 1 - Afb */
+ spe_fs(f, tmp_reg, one_reg, fbA_reg);
+ /* tmp = min(A,tmp) */
+ spe_float_min(f, tmp_reg, fragA_reg, tmp_reg);
+ /* term = R*tmp */
+ spe_fm(f, term1R_reg, fragR_reg, tmp_reg);
+ spe_fm(f, term1G_reg, fragG_reg, tmp_reg);
+ spe_fm(f, term1B_reg, fragB_reg, tmp_reg);
+ break;
+
+ /* These are special D3D cases involving a second color output
+ * from the fragment shader. I'm not sure we can support them
+ * yet... XXX
+ */
+ case PIPE_BLENDFACTOR_SRC1_COLOR:
+ case PIPE_BLENDFACTOR_SRC1_ALPHA:
+ case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
+ case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
+
default:
ASSERT(0);
}
/*
- * Compute Src Alpha term
+ * Compute Src Alpha term. Like the above, we're looking for
+ * the full term A*factor, not just the factor itself, because
+ * in many cases we can avoid doing unnecessary multiplies.
*/
switch (blend->alpha_src_factor) {
+ case PIPE_BLENDFACTOR_ZERO:
+ /* factor = 0, so term = 0 */
+ spe_load_float(f, term1A_reg, 0.0f);
+ break;
+
+ case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: /* fall through */
case PIPE_BLENDFACTOR_ONE:
+ /* factor = 1, so term = A */
spe_move(f, term1A_reg, fragA_reg);
break;
+
case PIPE_BLENDFACTOR_SRC_COLOR:
+ /* factor = A, so term = A*A */
spe_fm(f, term1A_reg, fragA_reg, fragA_reg);
break;
case PIPE_BLENDFACTOR_SRC_ALPHA:
spe_fm(f, term1A_reg, fragA_reg, fragA_reg);
break;
- /* XXX more cases */
+
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA: /* fall through */
+ case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+ /* factor = 1-A, so term = A*(1-A) = A-A*A */
+ /* fnms(a,b,c,d) computes a = d - b*c */
+ spe_fnms(f, term1A_reg, fragA_reg, fragA_reg, fragA_reg);
+ break;
+
+ case PIPE_BLENDFACTOR_DST_ALPHA: /* fall through */
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ /* factor = Afb, so term = A*Afb */
+ spe_fm(f, term1A_reg, fragA_reg, fbA_reg);
+ break;
+
+ case PIPE_BLENDFACTOR_INV_DST_ALPHA: /* fall through */
+ case PIPE_BLENDFACTOR_INV_DST_COLOR:
+ /* factor = 1-Afb, so term = A*(1-Afb) = A - A*Afb */
+ /* fnms(a,b,c,d) computes a = d - b*c */
+ spe_fnms(f, term1A_reg, fragA_reg, fbA_reg, fragA_reg);
+ break;
+
+ case PIPE_BLENDFACTOR_CONST_ALPHA: /* fall through */
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ /* We need the optional constA_reg register */
+ setup_const_register(f, &constA_reg_set, &constA_reg, blend_color->color[3]);
+ /* factor = Ac, so term = A*Ac */
+ spe_fm(f, term1A_reg, fragA_reg, constA_reg);
+ break;
+
+ case PIPE_BLENDFACTOR_INV_CONST_ALPHA: /* fall through */
+ case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+ /* We need the optional constA_reg register */
+ setup_const_register(f, &constA_reg_set, &constA_reg, blend_color->color[3]);
+ /* factor = 1-Ac, so term = A*(1-Ac) = A-A*Ac */
+ /* fnms(a,b,c,d) computes a = d - b*c */
+ spe_fnms(f, term1A_reg, fragA_reg, constA_reg, fragA_reg);
+ break;
+
+ /* These are special D3D cases involving a second color output
+ * from the fragment shader. I'm not sure we can support them
+ * yet... XXX
+ */
+ case PIPE_BLENDFACTOR_SRC1_COLOR:
+ case PIPE_BLENDFACTOR_SRC1_ALPHA:
+ case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
+ case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
default:
ASSERT(0);
}
/*
- * Compute Dest RGB terms
+ * Compute Dest RGB term. Like the above, we're looking for
+ * the full term (Rfb,Gfb,Bfb)*(factor), not just the factor itself, because
+ * in many cases we can avoid doing unnecessary multiplies.
*/
switch (blend->rgb_dst_factor) {
case PIPE_BLENDFACTOR_ONE:
+ /* factors = (1,1,1), so term = (Rfb,Gfb,Bfb) */
spe_move(f, term2R_reg, fbR_reg);
spe_move(f, term2G_reg, fbG_reg);
spe_move(f, term2B_reg, fbB_reg);
break;
case PIPE_BLENDFACTOR_ZERO:
- spe_zero(f, term2R_reg);
- spe_zero(f, term2G_reg);
- spe_zero(f, term2B_reg);
+ /* factor s= (0,0,0), so term = (0,0,0) */
+ spe_load_float(f, term2R_reg, 0.0f);
+ spe_load_float(f, term2G_reg, 0.0f);
+ spe_load_float(f, term2B_reg, 0.0f);
break;
case PIPE_BLENDFACTOR_SRC_COLOR:
+ /* factors = (R,G,B), so term = (R*Rfb, G*Gfb, B*Bfb) */
spe_fm(f, term2R_reg, fbR_reg, fragR_reg);
spe_fm(f, term2G_reg, fbG_reg, fragG_reg);
spe_fm(f, term2B_reg, fbB_reg, fragB_reg);
break;
+ case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+ /* factors = (1-R,1-G,1-B), so term = (Rfb*(1-R), Gfb*(1-G), Bfb*(1-B))
+ * or in other words term = (Rfb-Rfb*R, Gfb-Gfb*G, Bfb-Bfb*B)
+ * fnms(a,b,c,d) computes a = d - b*c
+ */
+ spe_fnms(f, term2R_reg, fragR_reg, fbR_reg, fbR_reg);
+ spe_fnms(f, term2G_reg, fragG_reg, fbG_reg, fbG_reg);
+ spe_fnms(f, term2B_reg, fragB_reg, fbB_reg, fbB_reg);
+ break;
case PIPE_BLENDFACTOR_SRC_ALPHA:
+ /* factors = (A,A,A), so term = (Rfb*A, Gfb*A, Bfb*A) */
spe_fm(f, term2R_reg, fbR_reg, fragA_reg);
spe_fm(f, term2G_reg, fbG_reg, fragA_reg);
spe_fm(f, term2B_reg, fbB_reg, fragA_reg);
break;
case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
- /* one = {1.0, 1.0, 1.0, 1.0} */
- if (!one_reg_set) {
- spe_load_float(f, one_reg, 1.0f);
- one_reg_set = true;
- }
- /* tmp = one - fragA */
- spe_fs(f, tmp_reg, one_reg, fragA_reg);
- /* term = fb * tmp */
- spe_fm(f, term2R_reg, fbR_reg, tmp_reg);
- spe_fm(f, term2G_reg, fbG_reg, tmp_reg);
- spe_fm(f, term2B_reg, fbB_reg, tmp_reg);
- break;
- /* XXX more cases */
+ /* factors = (1-A,1-A,1-A) so term = (Rfb-Rfb*A,Gfb-Gfb*A,Bfb-Bfb*A) */
+ /* fnms(a,b,c,d) computes a = d - b*c */
+ spe_fnms(f, term2R_reg, fbR_reg, fragA_reg, fbR_reg);
+ spe_fnms(f, term2G_reg, fbG_reg, fragA_reg, fbG_reg);
+ spe_fnms(f, term2B_reg, fbB_reg, fragA_reg, fbB_reg);
+ break;
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ /* factors = (Rfb,Gfb,Bfb), so term = (Rfb*Rfb, Gfb*Gfb, Bfb*Bfb) */
+ spe_fm(f, term2R_reg, fbR_reg, fbR_reg);
+ spe_fm(f, term2G_reg, fbG_reg, fbG_reg);
+ spe_fm(f, term2B_reg, fbB_reg, fbB_reg);
+ break;
+ case PIPE_BLENDFACTOR_INV_DST_COLOR:
+ /* factors = (1-Rfb,1-Gfb,1-Bfb), so term = (Rfb*(1-Rfb),Gfb*(1-Gfb),Bfb*(1-Bfb))
+ * or term = (Rfb-Rfb*Rfb, Gfb-Gfb*Gfb, Bfb-Bfb*Bfb)
+ * fnms(a,b,c,d) computes a = d - b*c
+ */
+ spe_fnms(f, term2R_reg, fbR_reg, fbR_reg, fbR_reg);
+ spe_fnms(f, term2G_reg, fbG_reg, fbG_reg, fbG_reg);
+ spe_fnms(f, term2B_reg, fbB_reg, fbB_reg, fbB_reg);
+ break;
+
+ case PIPE_BLENDFACTOR_DST_ALPHA:
+ /* factors = (Afb, Afb, Afb), so term = (Rfb*Afb, Gfb*Afb, Bfb*Afb) */
+ spe_fm(f, term2R_reg, fbR_reg, fbA_reg);
+ spe_fm(f, term2G_reg, fbG_reg, fbA_reg);
+ spe_fm(f, term2B_reg, fbB_reg, fbA_reg);
+ break;
+ case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+ /* factors = (1-Afb, 1-Afb, 1-Afb), so term = (Rfb*(1-Afb),Gfb*(1-Afb),Bfb*(1-Afb))
+ * or term = (Rfb-Rfb*Afb,Gfb-Gfb*Afb,Bfb-Bfb*Afb)
+ * fnms(a,b,c,d) computes a = d - b*c
+ */
+ spe_fnms(f, term2R_reg, fbR_reg, fbA_reg, fbR_reg);
+ spe_fnms(f, term2G_reg, fbG_reg, fbA_reg, fbG_reg);
+ spe_fnms(f, term2B_reg, fbB_reg, fbA_reg, fbB_reg);
+ break;
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ /* We need the optional constant color registers */
+ setup_const_register(f, &constR_reg_set, &constR_reg, blend_color->color[0]);
+ setup_const_register(f, &constG_reg_set, &constG_reg, blend_color->color[1]);
+ setup_const_register(f, &constB_reg_set, &constB_reg, blend_color->color[2]);
+ /* now, factor = (Rc,Gc,Bc), so term = (Rfb*Rc,Gfb*Gc,Bfb*Bc) */
+ spe_fm(f, term2R_reg, fbR_reg, constR_reg);
+ spe_fm(f, term2G_reg, fbG_reg, constG_reg);
+ spe_fm(f, term2B_reg, fbB_reg, constB_reg);
+ break;
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
+ /* we'll need the optional constant alpha register */
+ setup_const_register(f, &constA_reg_set, &constA_reg, blend_color->color[3]);
+ /* factor = (Ac,Ac,Ac), so term = (Rfb*Ac,Gfb*Ac,Bfb*Ac) */
+ spe_fm(f, term2R_reg, fbR_reg, constA_reg);
+ spe_fm(f, term2G_reg, fbG_reg, constA_reg);
+ spe_fm(f, term2B_reg, fbB_reg, constA_reg);
+ break;
+ case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+ /* We need the optional constant color registers */
+ setup_const_register(f, &constR_reg_set, &constR_reg, blend_color->color[0]);
+ setup_const_register(f, &constG_reg_set, &constG_reg, blend_color->color[1]);
+ setup_const_register(f, &constB_reg_set, &constB_reg, blend_color->color[2]);
+ /* factor = (1-Rc,1-Gc,1-Bc), so term = (Rfb*(1-Rc),Gfb*(1-Gc),Bfb*(1-Bc))
+ * or term = (Rfb-Rfb*Rc, Gfb-Gfb*Gc, Bfb-Bfb*Bc)
+ * fnms(a,b,c,d) computes a = d - b*c
+ */
+ spe_fnms(f, term2R_reg, fbR_reg, constR_reg, fbR_reg);
+ spe_fnms(f, term2G_reg, fbG_reg, constG_reg, fbG_reg);
+ spe_fnms(f, term2B_reg, fbB_reg, constB_reg, fbB_reg);
+ break;
+ case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+ /* We need the optional constant color registers */
+ setup_const_register(f, &constR_reg_set, &constR_reg, blend_color->color[0]);
+ setup_const_register(f, &constG_reg_set, &constG_reg, blend_color->color[1]);
+ setup_const_register(f, &constB_reg_set, &constB_reg, blend_color->color[2]);
+ /* factor = (1-Ac,1-Ac,1-Ac), so term = (Rfb*(1-Ac),Gfb*(1-Ac),Bfb*(1-Ac))
+ * or term = (Rfb-Rfb*Ac,Gfb-Gfb*Ac,Bfb-Bfb*Ac)
+ * fnms(a,b,c,d) computes a = d - b*c
+ */
+ spe_fnms(f, term2R_reg, fbR_reg, constA_reg, fbR_reg);
+ spe_fnms(f, term2G_reg, fbG_reg, constA_reg, fbG_reg);
+ spe_fnms(f, term2B_reg, fbB_reg, constA_reg, fbB_reg);
+ break;
+ case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: /* not supported for dest RGB */
+ ASSERT(0);
+ break;
+
+ /* These are special D3D cases involving a second color output
+ * from the fragment shader. I'm not sure we can support them
+ * yet... XXX
+ */
+ case PIPE_BLENDFACTOR_SRC1_COLOR:
+ case PIPE_BLENDFACTOR_SRC1_ALPHA:
+ case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
+ case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
+
default:
ASSERT(0);
}
/*
- * Compute Dest Alpha term
+ * Compute Dest Alpha term. Like the above, we're looking for
+ * the full term Afb*factor, not just the factor itself, because
+ * in many cases we can avoid doing unnecessary multiplies.
*/
switch (blend->alpha_dst_factor) {
case PIPE_BLENDFACTOR_ONE:
+ /* factor = 1, so term = Afb */
spe_move(f, term2A_reg, fbA_reg);
break;
case PIPE_BLENDFACTOR_ZERO:
- spe_zero(f, term2A_reg);
+ /* factor = 0, so term = 0 */
+ spe_load_float(f, term2A_reg, 0.0f);
break;
- case PIPE_BLENDFACTOR_SRC_ALPHA:
+
+ case PIPE_BLENDFACTOR_SRC_ALPHA: /* fall through */
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ /* factor = A, so term = Afb*A */
spe_fm(f, term2A_reg, fbA_reg, fragA_reg);
break;
- case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
- /* one = {1.0, 1.0, 1.0, 1.0} */
- if (!one_reg_set) {
- spe_load_float(f, one_reg, 1.0f);
- one_reg_set = true;
- }
- /* tmp = one - fragA */
- spe_fs(f, tmp_reg, one_reg, fragA_reg);
- /* termA = fbA * tmp */
- spe_fm(f, term2A_reg, fbA_reg, tmp_reg);
+
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA: /* fall through */
+ case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+ /* factor = 1-A, so term = Afb*(1-A) = Afb-Afb*A */
+ /* fnms(a,b,c,d) computes a = d - b*c */
+ spe_fnms(f, term2A_reg, fbA_reg, fragA_reg, fbA_reg);
+ break;
+
+ case PIPE_BLENDFACTOR_DST_ALPHA: /* fall through */
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ /* factor = Afb, so term = Afb*Afb */
+ spe_fm(f, term2A_reg, fbA_reg, fbA_reg);
+ break;
+
+ case PIPE_BLENDFACTOR_INV_DST_ALPHA: /* fall through */
+ case PIPE_BLENDFACTOR_INV_DST_COLOR:
+ /* factor = 1-Afb, so term = Afb*(1-Afb) = Afb - Afb*Afb */
+ /* fnms(a,b,c,d) computes a = d - b*c */
+ spe_fnms(f, term2A_reg, fbA_reg, fbA_reg, fbA_reg);
+ break;
+
+ case PIPE_BLENDFACTOR_CONST_ALPHA: /* fall through */
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ /* We need the optional constA_reg register */
+ setup_const_register(f, &constA_reg_set, &constA_reg, blend_color->color[3]);
+ /* factor = Ac, so term = Afb*Ac */
+ spe_fm(f, term2A_reg, fbA_reg, constA_reg);
+ break;
+
+ case PIPE_BLENDFACTOR_INV_CONST_ALPHA: /* fall through */
+ case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+ /* We need the optional constA_reg register */
+ setup_const_register(f, &constA_reg_set, &constA_reg, blend_color->color[3]);
+ /* factor = 1-Ac, so term = Afb*(1-Ac) = Afb-Afb*Ac */
+ /* fnms(a,b,c,d) computes a = d - b*c */
+ spe_fnms(f, term2A_reg, fbA_reg, constA_reg, fbA_reg);
+ break;
+
+ case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: /* not supported for dest alpha */
+ ASSERT(0);
break;
- /* XXX more cases */
+
+ /* These are special D3D cases involving a second color output
+ * from the fragment shader. I'm not sure we can support them
+ * yet... XXX
+ */
+ case PIPE_BLENDFACTOR_SRC1_COLOR:
+ case PIPE_BLENDFACTOR_SRC1_ALPHA:
+ case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
+ case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
default:
ASSERT(0);
}
/*
- * Combine Src/Dest RGB terms
+ * Combine Src/Dest RGB terms as per the blend equation.
*/
switch (blend->rgb_func) {
case PIPE_BLEND_ADD:
@@ -479,7 +850,21 @@ gen_blend(const struct pipe_blend_state *blend,
spe_fs(f, fragG_reg, term1G_reg, term2G_reg);
spe_fs(f, fragB_reg, term1B_reg, term2B_reg);
break;
- /* XXX more cases */
+ case PIPE_BLEND_REVERSE_SUBTRACT:
+ spe_fs(f, fragR_reg, term2R_reg, term1R_reg);
+ spe_fs(f, fragG_reg, term2G_reg, term1G_reg);
+ spe_fs(f, fragB_reg, term2B_reg, term1B_reg);
+ break;
+ case PIPE_BLEND_MIN:
+ spe_float_min(f, fragR_reg, term1R_reg, term2R_reg);
+ spe_float_min(f, fragG_reg, term1G_reg, term2G_reg);
+ spe_float_min(f, fragB_reg, term1B_reg, term2B_reg);
+ break;
+ case PIPE_BLEND_MAX:
+ spe_float_max(f, fragR_reg, term1R_reg, term2R_reg);
+ spe_float_max(f, fragG_reg, term1G_reg, term2G_reg);
+ spe_float_max(f, fragB_reg, term1B_reg, term2B_reg);
+ break;
default:
ASSERT(0);
}
@@ -494,7 +879,15 @@ gen_blend(const struct pipe_blend_state *blend,
case PIPE_BLEND_SUBTRACT:
spe_fs(f, fragA_reg, term1A_reg, term2A_reg);
break;
- /* XXX more cases */
+ case PIPE_BLEND_REVERSE_SUBTRACT:
+ spe_fs(f, fragA_reg, term2A_reg, term1A_reg);
+ break;
+ case PIPE_BLEND_MIN:
+ spe_float_min(f, fragA_reg, term1A_reg, term2A_reg);
+ break;
+ case PIPE_BLEND_MAX:
+ spe_float_max(f, fragA_reg, term1A_reg, term2A_reg);
+ break;
default:
ASSERT(0);
}
@@ -514,8 +907,14 @@ gen_blend(const struct pipe_blend_state *blend,
spe_release_register(f, fbB_reg);
spe_release_register(f, fbA_reg);
- spe_release_register(f, one_reg);
spe_release_register(f, tmp_reg);
+
+ /* Free any optional registers that actually got used */
+ release_const_register(f, &one_reg_set, one_reg);
+ release_const_register(f, &constR_reg_set, constR_reg);
+ release_const_register(f, &constG_reg_set, constG_reg);
+ release_const_register(f, &constB_reg_set, constB_reg);
+ release_const_register(f, &constA_reg_set, constA_reg);
}
@@ -524,24 +923,74 @@ gen_logicop(const struct pipe_blend_state *blend,
struct spe_function *f,
int fragRGBA_reg, int fbRGBA_reg)
{
- /* XXX to-do */
- /* operate on 32-bit packed pixels, not float colors */
-}
-
-
-static void
-gen_colormask(uint colormask,
- struct spe_function *f,
- int fragRGBA_reg, int fbRGBA_reg)
-{
- /* XXX to-do */
- /* operate on 32-bit packed pixels, not float colors */
+ /* We've got four 32-bit RGBA packed pixels in each of
+ * fragRGBA_reg and fbRGBA_reg, not sets of floating-point
+ * reds, greens, blues, and alphas.
+ * */
+ ASSERT(blend->logicop_enable);
+
+ switch(blend->logicop_func) {
+ case PIPE_LOGICOP_CLEAR: /* 0 */
+ spe_zero(f, fragRGBA_reg);
+ break;
+ case PIPE_LOGICOP_NOR: /* ~(s | d) */
+ spe_nor(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg);
+ break;
+ case PIPE_LOGICOP_AND_INVERTED: /* ~s & d */
+ /* andc R, A, B computes R = A & ~B */
+ spe_andc(f, fragRGBA_reg, fbRGBA_reg, fragRGBA_reg);
+ break;
+ case PIPE_LOGICOP_COPY_INVERTED: /* ~s */
+ spe_complement(f, fragRGBA_reg, fragRGBA_reg);
+ break;
+ case PIPE_LOGICOP_AND_REVERSE: /* s & ~d */
+ /* andc R, A, B computes R = A & ~B */
+ spe_andc(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg);
+ break;
+ case PIPE_LOGICOP_INVERT: /* ~d */
+ /* Note that (A nor A) == ~(A|A) == ~A */
+ spe_nor(f, fragRGBA_reg, fbRGBA_reg, fbRGBA_reg);
+ break;
+ case PIPE_LOGICOP_XOR: /* s ^ d */
+ spe_xor(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg);
+ break;
+ case PIPE_LOGICOP_NAND: /* ~(s & d) */
+ spe_nand(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg);
+ break;
+ case PIPE_LOGICOP_AND: /* s & d */
+ spe_and(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg);
+ break;
+ case PIPE_LOGICOP_EQUIV: /* ~(s ^ d) */
+ spe_xor(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg);
+ spe_complement(f, fragRGBA_reg, fragRGBA_reg);
+ break;
+ case PIPE_LOGICOP_NOOP: /* d */
+ spe_move(f, fragRGBA_reg, fbRGBA_reg);
+ break;
+ case PIPE_LOGICOP_OR_INVERTED: /* ~s | d */
+ /* orc R, A, B computes R = A | ~B */
+ spe_orc(f, fragRGBA_reg, fbRGBA_reg, fragRGBA_reg);
+ break;
+ case PIPE_LOGICOP_COPY: /* s */
+ break;
+ case PIPE_LOGICOP_OR_REVERSE: /* s | ~d */
+ /* orc R, A, B computes R = A | ~B */
+ spe_orc(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg);
+ break;
+ case PIPE_LOGICOP_OR: /* s | d */
+ spe_or(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg);
+ break;
+ case PIPE_LOGICOP_SET: /* 1 */
+ spe_load_int(f, fragRGBA_reg, 0xffffffff);
+ break;
+ default:
+ ASSERT(0);
+ }
}
-
/**
- * Generate code to pack a quad of float colors into a four 32-bit integers.
+ * Generate code to pack a quad of float colors into four 32-bit integers.
*
* \param f SPE function to append instruction onto.
* \param color_format the dest color packing format
@@ -557,13 +1006,16 @@ gen_pack_colors(struct spe_function *f,
int r_reg, int g_reg, int b_reg, int a_reg,
int rgba_reg)
{
+ int rg_reg = spe_allocate_available_register(f);
+ int ba_reg = spe_allocate_available_register(f);
+
/* Convert float[4] in [0.0,1.0] to int[4] in [0,~0], with clamping */
spe_cfltu(f, r_reg, r_reg, 32);
spe_cfltu(f, g_reg, g_reg, 32);
spe_cfltu(f, b_reg, b_reg, 32);
spe_cfltu(f, a_reg, a_reg, 32);
- /* Shift the most significant bytes to least the significant positions.
+ /* Shift the most significant bytes to the least significant positions.
* I.e.: reg = reg >> 24
*/
spe_rotmi(f, r_reg, r_reg, -24);
@@ -595,12 +1047,713 @@ gen_pack_colors(struct spe_function *f,
* OR-ing all those together gives us four packed colors:
* RGBA = {0xffffffff, 0xaa114477, 0xbb225588, 0xcc336699}
*/
- spe_or(f, rgba_reg, r_reg, g_reg);
- spe_or(f, rgba_reg, rgba_reg, b_reg);
- spe_or(f, rgba_reg, rgba_reg, a_reg);
+ spe_or(f, rg_reg, r_reg, g_reg);
+ spe_or(f, ba_reg, a_reg, b_reg);
+ spe_or(f, rgba_reg, rg_reg, ba_reg);
+
+ spe_release_register(f, rg_reg);
+ spe_release_register(f, ba_reg);
+}
+
+static void
+gen_colormask(struct spe_function *f,
+ uint colormask,
+ enum pipe_format color_format,
+ int fragRGBA_reg, int fbRGBA_reg)
+{
+ /* We've got four 32-bit RGBA packed pixels in each of
+ * fragRGBA_reg and fbRGBA_reg, not sets of floating-point
+ * reds, greens, blues, and alphas. Further, the pixels
+ * are packed according to the given color format, not
+ * necessarily RGBA...
+ */
+ unsigned int r_mask;
+ unsigned int g_mask;
+ unsigned int b_mask;
+ unsigned int a_mask;
+
+ /* Calculate exactly where the bits for any particular color
+ * end up, so we can mask them correctly.
+ */
+ switch(color_format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ /* ARGB */
+ a_mask = 0xff000000;
+ r_mask = 0x00ff0000;
+ g_mask = 0x0000ff00;
+ b_mask = 0x000000ff;
+ break;
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ /* BGRA */
+ b_mask = 0xff000000;
+ g_mask = 0x00ff0000;
+ r_mask = 0x0000ff00;
+ a_mask = 0x000000ff;
+ break;
+ default:
+ ASSERT(0);
+ }
+
+ /* For each R, G, B, and A component we're supposed to mask out,
+ * clear its bits. Then our mask operation later will work
+ * as expected.
+ */
+ if (!(colormask & PIPE_MASK_R)) {
+ r_mask = 0;
+ }
+ if (!(colormask & PIPE_MASK_G)) {
+ g_mask = 0;
+ }
+ if (!(colormask & PIPE_MASK_B)) {
+ b_mask = 0;
+ }
+ if (!(colormask & PIPE_MASK_A)) {
+ a_mask = 0;
+ }
+
+ /* Get a temporary register to hold the mask that will be applied to the fragment */
+ int colormask_reg = spe_allocate_available_register(f);
+
+ /* The actual mask we're going to use is an OR of the remaining R, G, B, and A
+ * masks. Load the result value into our temporary register.
+ */
+ spe_load_uint(f, colormask_reg, r_mask | g_mask | b_mask | a_mask);
+
+ /* Use the mask register to select between the fragment color
+ * values and the frame buffer color values. Wherever the
+ * mask has a 0 bit, the current frame buffer color should override
+ * the fragment color. Wherever the mask has a 1 bit, the
+ * fragment color should persevere. The Select Bits (selb rt, rA, rB, rM)
+ * instruction will select bits from its first operand rA wherever the
+ * the mask bits rM are 0, and from its second operand rB wherever the
+ * mask bits rM are 1. That means that the frame buffer color is the
+ * first operand, and the fragment color the second.
+ */
+ spe_selb(f, fragRGBA_reg, fbRGBA_reg, fragRGBA_reg, colormask_reg);
+
+ /* Release the temporary register and we're done */
+ spe_release_register(f, colormask_reg);
}
+/* This function is annoyingly similar to gen_depth_test(), above, except
+ * that instead of comparing two varying values (i.e. fragment and buffer),
+ * we're comparing a varying value with a static value. As such, we have
+ * access to the Compare Immediate instructions where we don't in
+ * gen_depth_test(), which is what makes us very different.
+ *
+ * There's some added complexity if there's a non-trivial state->mask
+ * value; then stencil and reference both must be masked
+ *
+ * The return value in the stencil_pass_reg is a bitmask of valid
+ * fragments that also passed the stencil test. The bitmask of valid
+ * fragments that failed would be found in (fragment_mask_reg & ~stencil_pass_reg).
+ */
+static void
+gen_stencil_test(struct spe_function *f, const struct pipe_stencil_state *state,
+ unsigned int stencil_max_value,
+ unsigned int fragment_mask_reg, unsigned int fbS_reg,
+ unsigned int stencil_pass_reg)
+{
+ /* Generate code that puts the set of passing fragments into the stencil_pass_reg
+ * register, taking into account whether each fragment was active to begin with.
+ */
+ switch (state->func) {
+ case PIPE_FUNC_EQUAL:
+ if (state->value_mask == stencil_max_value) {
+ /* stencil_pass = fragment_mask & (s == reference) */
+ spe_compare_equal_uint(f, stencil_pass_reg, fbS_reg, state->ref_value);
+ spe_and(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg);
+ }
+ else {
+ /* stencil_pass = fragment_mask & ((s&mask) == (reference&mask)) */
+ unsigned int tmp_masked_stencil = spe_allocate_available_register(f);
+ spe_and_uint(f, tmp_masked_stencil, fbS_reg, state->value_mask);
+ spe_compare_equal_uint(f, stencil_pass_reg, tmp_masked_stencil, state->value_mask & state->ref_value);
+ spe_and(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg);
+ spe_release_register(f, tmp_masked_stencil);
+ }
+ break;
+ case PIPE_FUNC_NOTEQUAL:
+ if (state->value_mask == stencil_max_value) {
+ /* stencil_pass = fragment_mask & ~(s == reference) */
+ spe_compare_equal_uint(f, stencil_pass_reg, fbS_reg, state->ref_value);
+ spe_andc(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg);
+ }
+ else {
+ /* stencil_pass = fragment_mask & ~((s&mask) == (reference&mask)) */
+ unsigned int tmp_masked_stencil = spe_allocate_available_register(f);
+ spe_and_uint(f, tmp_masked_stencil, fbS_reg, state->value_mask);
+ spe_compare_equal_uint(f, stencil_pass_reg, tmp_masked_stencil, state->value_mask & state->ref_value);
+ spe_andc(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg);
+ spe_release_register(f, tmp_masked_stencil);
+ }
+ break;
+
+ case PIPE_FUNC_LESS:
+ if (state->value_mask == stencil_max_value) {
+ /* stencil_pass = fragment_mask & (reference < s) */
+ spe_compare_greater_uint(f, stencil_pass_reg, fbS_reg, state->ref_value);
+ spe_and(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg);
+ }
+ else {
+ /* stencil_pass = fragment_mask & ((reference&mask) < (s & mask)) */
+ unsigned int tmp_masked_stencil = spe_allocate_available_register(f);
+ spe_and_uint(f, tmp_masked_stencil, fbS_reg, state->value_mask);
+ spe_compare_greater_uint(f, stencil_pass_reg, tmp_masked_stencil, state->value_mask & state->ref_value);
+ spe_and(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg);
+ spe_release_register(f, tmp_masked_stencil);
+ }
+ break;
+
+ case PIPE_FUNC_GREATER:
+ if (state->value_mask == stencil_max_value) {
+ /* stencil_pass = fragment_mask & (reference > s) */
+ /* There's no convenient Compare Less Than Immediate instruction, so
+ * we'll have to do this one the harder way, by loading a register and
+ * comparing directly. Compare Logical Greater Than Word (clgt)
+ * treats its operands as unsigned - no sign extension.
+ */
+ unsigned int tmp_reg = spe_allocate_available_register(f);
+ spe_load_uint(f, tmp_reg, state->ref_value);
+ spe_clgt(f, stencil_pass_reg, tmp_reg, fbS_reg);
+ spe_and(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg);
+ spe_release_register(f, tmp_reg);
+ }
+ else {
+ /* stencil_pass = fragment_mask & ((reference&mask) > (s&mask)) */
+ unsigned int tmp_reg = spe_allocate_available_register(f);
+ unsigned int tmp_masked_stencil = spe_allocate_available_register(f);
+ spe_load_uint(f, tmp_reg, state->value_mask & state->ref_value);
+ spe_and_uint(f, tmp_masked_stencil, fbS_reg, state->value_mask);
+ spe_clgt(f, stencil_pass_reg, tmp_reg, tmp_masked_stencil);
+ spe_and(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg);
+ spe_release_register(f, tmp_reg);
+ spe_release_register(f, tmp_masked_stencil);
+ }
+ break;
+
+ case PIPE_FUNC_GEQUAL:
+ if (state->value_mask == stencil_max_value) {
+ /* stencil_pass = fragment_mask & (reference >= s)
+ * = fragment_mask & ~(s > reference) */
+ spe_compare_greater_uint(f, stencil_pass_reg, fbS_reg, state->ref_value);
+ spe_andc(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg);
+ }
+ else {
+ /* stencil_pass = fragment_mask & ~((s&mask) > (reference&mask)) */
+ unsigned int tmp_masked_stencil = spe_allocate_available_register(f);
+ spe_and_uint(f, tmp_masked_stencil, fbS_reg, state->value_mask);
+ spe_compare_greater_uint(f, stencil_pass_reg, tmp_masked_stencil, state->value_mask & state->ref_value);
+ spe_andc(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg);
+ spe_release_register(f, tmp_masked_stencil);
+ }
+ break;
+
+ case PIPE_FUNC_LEQUAL:
+ if (state->value_mask == stencil_max_value) {
+ /* stencil_pass = fragment_mask & (reference <= s) ]
+ * = fragment_mask & ~(reference > s) */
+ /* As above, we have to do this by loading a register */
+ unsigned int tmp_reg = spe_allocate_available_register(f);
+ spe_load_uint(f, tmp_reg, state->ref_value);
+ spe_clgt(f, stencil_pass_reg, tmp_reg, fbS_reg);
+ spe_andc(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg);
+ spe_release_register(f, tmp_reg);
+ }
+ else {
+ /* stencil_pass = fragment_mask & ~((reference&mask) > (s&mask)) */
+ unsigned int tmp_reg = spe_allocate_available_register(f);
+ unsigned int tmp_masked_stencil = spe_allocate_available_register(f);
+ spe_load_uint(f, tmp_reg, state->ref_value & state->value_mask);
+ spe_and_uint(f, tmp_masked_stencil, fbS_reg, state->value_mask);
+ spe_clgt(f, stencil_pass_reg, tmp_reg, tmp_masked_stencil);
+ spe_andc(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg);
+ spe_release_register(f, tmp_reg);
+ spe_release_register(f, tmp_masked_stencil);
+ }
+ break;
+
+ case PIPE_FUNC_NEVER:
+ /* stencil_pass = fragment_mask & 0 = 0 */
+ spe_load_uint(f, stencil_pass_reg, 0);
+ break;
+
+ case PIPE_FUNC_ALWAYS:
+ /* stencil_pass = fragment_mask & 1 = fragment_mask */
+ spe_move(f, stencil_pass_reg, fragment_mask_reg);
+ break;
+ }
+
+ /* The fragments that passed the stencil test are now in stencil_pass_reg.
+ * The fragments that failed would be (fragment_mask_reg & ~stencil_pass_reg).
+ */
+}
+
+/* This function generates code that calculates a set of new stencil values
+ * given the earlier values and the operation to apply. It does not
+ * apply any tests. It is intended to be called up to 3 times
+ * (for the stencil fail operation, for the stencil pass-z fail operation,
+ * and for the stencil pass-z pass operation) to collect up to three
+ * possible sets of values, and for the caller to combine them based
+ * on the result of the tests.
+ *
+ * stencil_max_value should be (2^n - 1) where n is the number of bits
+ * in the stencil buffer - in other words, it should be usable as a mask.
+ */
+static void
+gen_stencil_values(struct spe_function *f, unsigned int stencil_op,
+ unsigned int stencil_ref_value, unsigned int stencil_max_value,
+ unsigned int fbS_reg, unsigned int newS_reg)
+{
+ /* The code below assumes that newS_reg and fbS_reg are not the same
+ * register; if they can be, the calculations below will have to use
+ * an additional temporary register. For now, mark the assumption
+ * with an assertion that will fail if they are the same.
+ */
+ ASSERT(fbS_reg != newS_reg);
+
+ /* The code also assumes the the stencil_max_value is of the form
+ * 2^n-1 and can therefore be used as a mask for the valid bits in
+ * addition to a maximum. Make sure this is the case as well.
+ * The clever math below exploits the fact that incrementing a
+ * binary number serves to flip all the bits of a number starting at
+ * the LSB and continuing to (and including) the first zero bit
+ * found. That means that a number and its increment will always
+ * have at least one bit in common (the high order bit, if nothing
+ * else) *unless* the number is zero, *or* the number is of a form
+ * consisting of some number of 1s in the low-order bits followed
+ * by nothing but 0s in the high-order bits. The latter case
+ * implies it's of the form 2^n-1.
+ */
+ ASSERT(stencil_max_value > 0 && ((stencil_max_value + 1) & stencil_max_value) == 0);
+
+ switch(stencil_op) {
+ case PIPE_STENCIL_OP_KEEP:
+ /* newS = S */
+ spe_move(f, newS_reg, fbS_reg);
+ break;
+
+ case PIPE_STENCIL_OP_ZERO:
+ /* newS = 0 */
+ spe_zero(f, newS_reg);
+ break;
+
+ case PIPE_STENCIL_OP_REPLACE:
+ /* newS = stencil reference value */
+ spe_load_uint(f, newS_reg, stencil_ref_value);
+ break;
+
+ case PIPE_STENCIL_OP_INCR: {
+ /* newS = (s == max ? max : s + 1) */
+ unsigned int equals_reg = spe_allocate_available_register(f);
+
+ spe_compare_equal_uint(f, equals_reg, fbS_reg, stencil_max_value);
+ /* Add Word Immediate computes rT = rA + 10-bit signed immediate */
+ spe_ai(f, newS_reg, fbS_reg, 1);
+ /* Select from the current value or the new value based on the equality test */
+ spe_selb(f, newS_reg, newS_reg, fbS_reg, equals_reg);
+
+ spe_release_register(f, equals_reg);
+ break;
+ }
+ case PIPE_STENCIL_OP_DECR: {
+ /* newS = (s == 0 ? 0 : s - 1) */
+ unsigned int equals_reg = spe_allocate_available_register(f);
+
+ spe_compare_equal_uint(f, equals_reg, fbS_reg, 0);
+ /* Add Word Immediate with a (-1) value works */
+ spe_ai(f, newS_reg, fbS_reg, -1);
+ /* Select from the current value or the new value based on the equality test */
+ spe_selb(f, newS_reg, newS_reg, fbS_reg, equals_reg);
+
+ spe_release_register(f, equals_reg);
+ break;
+ }
+ case PIPE_STENCIL_OP_INCR_WRAP:
+ /* newS = (s == max ? 0 : s + 1), but since max is 2^n-1, we can
+ * do a normal add and mask off the correct bits
+ */
+ spe_ai(f, newS_reg, fbS_reg, 1);
+ spe_and_uint(f, newS_reg, newS_reg, stencil_max_value);
+ break;
+
+ case PIPE_STENCIL_OP_DECR_WRAP:
+ /* newS = (s == 0 ? max : s - 1), but we'll pull the same mask trick as above */
+ spe_ai(f, newS_reg, fbS_reg, -1);
+ spe_and_uint(f, newS_reg, newS_reg, stencil_max_value);
+ break;
+
+ case PIPE_STENCIL_OP_INVERT:
+ /* newS = ~s. We take advantage of the mask/max value to invert only
+ * the valid bits for the field so we don't have to do an extra "and".
+ */
+ spe_xor_uint(f, newS_reg, fbS_reg, stencil_max_value);
+ break;
+
+ default:
+ ASSERT(0);
+ }
+}
+
+
+/* This function generates code to get all the necessary possible
+ * stencil values. For each of the output registers (fail_reg,
+ * zfail_reg, and zpass_reg), it either allocates a new register
+ * and calculates a new set of values based on the stencil operation,
+ * or it reuses a register allocation and calculation done for an
+ * earlier (matching) operation, or it reuses the fbS_reg register
+ * (if the stencil operation is KEEP, which doesn't change the
+ * stencil buffer).
+ *
+ * Since this function allocates a variable number of registers,
+ * to avoid incurring complex logic to free them, they should
+ * be allocated after a spe_allocate_register_set() call
+ * and released by the corresponding spe_release_register_set() call.
+ */
+static void
+gen_get_stencil_values(struct spe_function *f, const struct pipe_stencil_state *stencil,
+ const unsigned int depth_enabled,
+ unsigned int fbS_reg,
+ unsigned int *fail_reg, unsigned int *zfail_reg,
+ unsigned int *zpass_reg)
+{
+ unsigned zfail_op;
+
+ /* Stenciling had better be enabled here */
+ ASSERT(stencil->enabled);
+
+ /* If the depth test is not enabled, it is treated as though it always
+ * passes, which means that the zfail_op is not considered - a
+ * failing stencil test triggers the fail_op, and a passing one
+ * triggers the zpass_op
+ *
+ * As an optimization, override calculation of the zfail_op values
+ * if they aren't going to be used. By setting the value of
+ * the operation to PIPE_STENCIL_OP_KEEP, its value will be assumed
+ * to match the incoming stencil values, and no calculation will
+ * be done.
+ */
+ if (depth_enabled) {
+ zfail_op = stencil->zfail_op;
+ }
+ else {
+ zfail_op = PIPE_STENCIL_OP_KEEP;
+ }
+
+ /* One-sided or front-facing stencil */
+ if (stencil->fail_op == PIPE_STENCIL_OP_KEEP) {
+ *fail_reg = fbS_reg;
+ }
+ else {
+ *fail_reg = spe_allocate_available_register(f);
+ gen_stencil_values(f, stencil->fail_op, stencil->ref_value,
+ 0xff, fbS_reg, *fail_reg);
+ }
+
+ /* Check the possibly overridden value, not the structure value */
+ if (zfail_op == PIPE_STENCIL_OP_KEEP) {
+ *zfail_reg = fbS_reg;
+ }
+ else if (zfail_op == stencil->fail_op) {
+ *zfail_reg = *fail_reg;
+ }
+ else {
+ *zfail_reg = spe_allocate_available_register(f);
+ gen_stencil_values(f, stencil->zfail_op, stencil->ref_value,
+ 0xff, fbS_reg, *zfail_reg);
+ }
+
+ if (stencil->zpass_op == PIPE_STENCIL_OP_KEEP) {
+ *zpass_reg = fbS_reg;
+ }
+ else if (stencil->zpass_op == stencil->fail_op) {
+ *zpass_reg = *fail_reg;
+ }
+ else if (stencil->zpass_op == zfail_op) {
+ *zpass_reg = *zfail_reg;
+ }
+ else {
+ *zpass_reg = spe_allocate_available_register(f);
+ gen_stencil_values(f, stencil->zpass_op, stencil->ref_value,
+ 0xff, fbS_reg, *zpass_reg);
+ }
+}
+
+/* Note that fbZ_reg may *not* be set on entry, if in fact
+ * the depth test is not enabled. This function must not use
+ * the register if depth is not enabled.
+ */
+static boolean
+gen_stencil_depth_test(struct spe_function *f,
+ const struct pipe_depth_stencil_alpha_state *dsa,
+ const uint facing,
+ const int mask_reg, const int fragZ_reg,
+ const int fbZ_reg, const int fbS_reg)
+{
+ /* True if we've generated code that could require writeback to the
+ * depth and/or stencil buffers
+ */
+ boolean modified_buffers = false;
+
+ boolean need_to_calculate_stencil_values;
+ boolean need_to_writemask_stencil_values;
+
+ struct pipe_stencil_state *stencil;
+
+ /* Registers. We may or may not actually allocate these, depending
+ * on whether the state values indicate that we need them.
+ */
+ unsigned int stencil_pass_reg, stencil_fail_reg;
+ unsigned int stencil_fail_values, stencil_pass_depth_fail_values, stencil_pass_depth_pass_values;
+ unsigned int stencil_writemask_reg;
+ unsigned int zmask_reg;
+ unsigned int newS_reg;
+
+ /* Stenciling is quite complex: up to six different configurable stencil
+ * operations/calculations can be required (three each for front-facing
+ * and back-facing fragments). Many of those operations will likely
+ * be identical, so there's good reason to try to avoid calculating
+ * the same values more than once (which unfortunately makes the code less
+ * straightforward).
+ *
+ * To make register management easier, we start a new
+ * register set; we can release all the registers in the set at
+ * once, and avoid having to keep track of exactly which registers
+ * we allocate. We can still allocate and free registers as
+ * desired (if we know we no longer need a register), but we don't
+ * have to spend the complexity to track the more difficult variant
+ * register usage scenarios.
+ */
+ spe_comment(f, 0, "Allocating stencil register set");
+ spe_allocate_register_set(f);
+
+ /* The facing we're given is the fragment facing; it doesn't
+ * exactly match the stencil facing. If stencil is enabled,
+ * but two-sided stencil is *not* enabled, we use the same
+ * stencil settings for both front- and back-facing fragments.
+ * We only use the "back-facing" stencil for backfacing fragments
+ * if two-sided stenciling is enabled.
+ */
+ if (facing == CELL_FACING_BACK && dsa->stencil[1].enabled) {
+ stencil = &dsa->stencil[1];
+ }
+ else {
+ stencil = &dsa->stencil[0];
+ }
+
+ /* Calculate the writemask. If the writemask is trivial (either
+ * all 0s, meaning that we don't need to calculate any stencil values
+ * because they're not going to change the stencil anyway, or all 1s,
+ * meaning that we have to calculate the stencil values but do not
+ * need to mask them), we can avoid generating code. Don't forget
+ * that we need to consider backfacing stencil, if enabled.
+ *
+ * Note that if the backface stencil is *not* enabled, the backface
+ * stencil will have the same values as the frontface stencil.
+ */
+ if (stencil->fail_op == PIPE_STENCIL_OP_KEEP &&
+ stencil->zfail_op == PIPE_STENCIL_OP_KEEP &&
+ stencil->zpass_op == PIPE_STENCIL_OP_KEEP) {
+ need_to_calculate_stencil_values = false;
+ need_to_writemask_stencil_values = false;
+ }
+ else if (stencil->write_mask == 0x0) {
+ /* All changes are writemasked out, so no need to calculate
+ * what those changes might be, and no need to write anything back.
+ */
+ need_to_calculate_stencil_values = false;
+ need_to_writemask_stencil_values = false;
+ }
+ else if (stencil->write_mask == 0xff) {
+ /* Still trivial, but a little less so. We need to write the stencil
+ * values, but we don't need to mask them.
+ */
+ need_to_calculate_stencil_values = true;
+ need_to_writemask_stencil_values = false;
+ }
+ else {
+ /* The general case: calculate, mask, and write */
+ need_to_calculate_stencil_values = true;
+ need_to_writemask_stencil_values = true;
+
+ /* While we're here, generate code that calculates what the
+ * writemask should be. If backface stenciling is enabled,
+ * and the backface writemask is not the same as the frontface
+ * writemask, we'll have to generate code that merges the
+ * two masks into a single effective mask based on fragment facing.
+ */
+ spe_comment(f, 0, "Computing stencil writemask");
+ stencil_writemask_reg = spe_allocate_available_register(f);
+ spe_load_uint(f, stencil_writemask_reg, dsa->stencil[facing].write_mask);
+ }
+
+ /* At least one-sided stenciling must be on. Generate code that
+ * runs the stencil test on the basic/front-facing stencil, leaving
+ * the mask of passing stencil bits in stencil_pass_reg. This mask will
+ * be used both to mask the set of active pixels, and also to
+ * determine how the stencil buffer changes.
+ *
+ * This test will *not* change the value in mask_reg (because we don't
+ * yet know whether to apply the two-sided stencil or one-sided stencil).
+ */
+ spe_comment(f, 0, "Running basic stencil test");
+ stencil_pass_reg = spe_allocate_available_register(f);
+ gen_stencil_test(f, stencil, 0xff, mask_reg, fbS_reg, stencil_pass_reg);
+
+ /* Generate code that, given the mask of valid fragments and the
+ * mask of valid fragments that passed the stencil test, computes
+ * the mask of valid fragments that failed the stencil test. We
+ * have to do this before we run a depth test (because the
+ * depth test should not be performed on fragments that failed the
+ * stencil test, and because the depth test will update the
+ * mask of valid fragments based on the results of the depth test).
+ */
+ spe_comment(f, 0, "Computing stencil fail mask and updating fragment mask");
+ stencil_fail_reg = spe_allocate_available_register(f);
+ spe_andc(f, stencil_fail_reg, mask_reg, stencil_pass_reg);
+ /* Now remove the stenciled-out pixels from the valid fragment mask,
+ * so we can later use the valid fragment mask in the depth test.
+ */
+ spe_and(f, mask_reg, mask_reg, stencil_pass_reg);
+
+ /* We may not need to calculate stencil values, if the writemask is off */
+ if (need_to_calculate_stencil_values) {
+ /* Generate code that calculates exactly which stencil values we need,
+ * without calculating the same value twice (say, if two different
+ * stencil ops have the same value). This code will work for one-sided
+ * and two-sided stenciling (so that we take into account that operations
+ * may match between front and back stencils), and will also take into
+ * account whether the depth test is enabled (if the depth test is off,
+ * we don't need any of the zfail results, because the depth test always
+ * is considered to pass if it is disabled). Any register value that
+ * does not need to be calculated will come back with the same value
+ * that's in fbS_reg.
+ *
+ * This function will allocate a variant number of registers that
+ * will be released as part of the register set.
+ */
+ spe_comment(f, 0, facing == CELL_FACING_FRONT ? "Computing front-facing stencil values" : "Computing back-facing stencil values");
+ gen_get_stencil_values(f, stencil, dsa->depth.enabled, fbS_reg,
+ &stencil_fail_values, &stencil_pass_depth_fail_values,
+ &stencil_pass_depth_pass_values);
+ }
+
+ /* We now have all the stencil values we need. We also need
+ * the results of the depth test to figure out which
+ * stencil values will become the new stencil values. (Even if
+ * we aren't actually calculating stencil values, we need to apply
+ * the depth test if it's enabled.)
+ *
+ * The code generated by gen_depth_test() returns the results of the
+ * test in the given register, but also alters the mask_reg based
+ * on the results of the test.
+ */
+ if (dsa->depth.enabled) {
+ spe_comment(f, 0, "Running stencil depth test");
+ zmask_reg = spe_allocate_available_register(f);
+ modified_buffers |= gen_depth_test(f, dsa, mask_reg, fragZ_reg, fbZ_reg, zmask_reg);
+ }
+
+ if (need_to_calculate_stencil_values) {
+
+ /* If we need to writemask the stencil values before going into
+ * the stencil buffer, we'll have to use a new register to
+ * hold the new values. If not, we can just keep using the
+ * current register.
+ */
+ if (need_to_writemask_stencil_values) {
+ newS_reg = spe_allocate_available_register(f);
+ spe_comment(f, 0, "Saving current stencil values for writemasking");
+ spe_move(f, newS_reg, fbS_reg);
+ }
+ else {
+ newS_reg = fbS_reg;
+ }
+
+ /* Merge in the selected stencil fail values */
+ if (stencil_fail_values != fbS_reg) {
+ spe_comment(f, 0, "Loading stencil fail values");
+ spe_selb(f, newS_reg, newS_reg, stencil_fail_values, stencil_fail_reg);
+ modified_buffers = true;
+ }
+
+ /* Same for the stencil pass/depth fail values. If this calculation
+ * is not needed (say, if depth test is off), then the
+ * stencil_pass_depth_fail_values register will be equal to fbS_reg
+ * and we'll skip the calculation.
+ */
+ if (stencil_pass_depth_fail_values != fbS_reg) {
+ /* We don't actually have a stencil pass/depth fail mask yet.
+ * Calculate it here from the stencil passing mask and the
+ * depth passing mask. Note that zmask_reg *must* have been
+ * set above if we're here.
+ */
+ unsigned int stencil_pass_depth_fail_mask = spe_allocate_available_register(f);
+ spe_comment(f, 0, "Loading stencil pass/depth fail values");
+ spe_andc(f, stencil_pass_depth_fail_mask, stencil_pass_reg, zmask_reg);
+
+ spe_selb(f, newS_reg, newS_reg, stencil_pass_depth_fail_values, stencil_pass_depth_fail_mask);
+
+ spe_release_register(f, stencil_pass_depth_fail_mask);
+ modified_buffers = true;
+ }
+
+ /* Same for the stencil pass/depth pass mask. Note that we
+ * *can* get here with zmask_reg being unset (if the depth
+ * test is off but the stencil test is on). In this case,
+ * we assume the depth test passes, and don't need to mask
+ * the stencil pass mask with the Z mask.
+ */
+ if (stencil_pass_depth_pass_values != fbS_reg) {
+ if (dsa->depth.enabled) {
+ unsigned int stencil_pass_depth_pass_mask = spe_allocate_available_register(f);
+ /* We'll need a separate register */
+ spe_comment(f, 0, "Loading stencil pass/depth pass values");
+ spe_and(f, stencil_pass_depth_pass_mask, stencil_pass_reg, zmask_reg);
+ spe_selb(f, newS_reg, newS_reg, stencil_pass_depth_pass_values, stencil_pass_depth_pass_mask);
+ spe_release_register(f, stencil_pass_depth_pass_mask);
+ }
+ else {
+ /* We can use the same stencil-pass register */
+ spe_comment(f, 0, "Loading stencil pass values");
+ spe_selb(f, newS_reg, newS_reg, stencil_pass_depth_pass_values, stencil_pass_reg);
+ }
+ modified_buffers = true;
+ }
+
+ /* Almost done. If we need to writemask, do it now, leaving the
+ * results in the fbS_reg register passed in. If we don't need
+ * to writemask, then the results are *already* in the fbS_reg,
+ * so there's nothing more to do.
+ */
+
+ if (need_to_writemask_stencil_values && modified_buffers) {
+ /* The Select Bytes command makes a fine writemask. Where
+ * the mask is 0, the first (original) values are retained,
+ * effectively masking out changes. Where the mask is 1, the
+ * second (new) values are retained, incorporating changes.
+ */
+ spe_comment(f, 0, "Writemasking new stencil values");
+ spe_selb(f, fbS_reg, fbS_reg, newS_reg, stencil_writemask_reg);
+ }
+
+ } /* done calculating stencil values */
+
+ /* The stencil and/or depth values have been applied, and the
+ * mask_reg, fbS_reg, and fbZ_reg values have been updated.
+ * We're all done, except that we've allocated a fair number
+ * of registers that we didn't bother tracking. Release all
+ * those registers as part of the register set, and go home.
+ */
+ spe_comment(f, 0, "Releasing stencil register set");
+ spe_release_register_set(f);
+
+ /* Return true if we could have modified the stencil and/or
+ * depth buffers.
+ */
+ return modified_buffers;
+}
/**
@@ -621,14 +1774,19 @@ gen_pack_colors(struct spe_function *f,
* should be much faster.
*
* \param cell the rendering context (in)
- * \param f the generated function (out)
+ * \param facing whether the generated code is for front-facing or
+ * back-facing fragments
+ * \param f the generated function (in/out); on input, the function
+ * must already have been initialized. On exit, whatever
+ * instructions within the generated function have had
+ * the fragment ops appended.
*/
void
-cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f)
+cell_gen_fragment_function(struct cell_context *cell, const uint facing, struct spe_function *f)
{
- const struct pipe_depth_stencil_alpha_state *dsa =
- &cell->depth_stencil->base;
- const struct pipe_blend_state *blend = &cell->blend->base;
+ const struct pipe_depth_stencil_alpha_state *dsa = cell->depth_stencil;
+ const struct pipe_blend_state *blend = cell->blend;
+ const struct pipe_blend_color *blend_color = &cell->blend_color;
const enum pipe_format color_format = cell->framebuffer.cbufs[0]->format;
/* For SPE function calls: reg $3 = first param, $4 = second param, etc. */
@@ -643,6 +1801,8 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f)
const int fragA_reg = 11; /* vector float */
const int mask_reg = 12; /* vector uint */
+ ASSERT(facing == CELL_FACING_FRONT || facing == CELL_FACING_BACK);
+
/* offset of quad from start of tile
* XXX assuming 4-byte pixels for color AND Z/stencil!!!!
*/
@@ -651,7 +1811,12 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f)
int fbRGBA_reg; /**< framebuffer's RGBA colors for quad */
int fbZS_reg; /**< framebuffer's combined z/stencil values for quad */
- spe_init_func(f, SPU_MAX_FRAGMENT_OPS_INSTS * SPE_INST_SIZE);
+ if (cell->debug_flags & CELL_DEBUG_ASM) {
+ spe_print_code(f, true);
+ spe_indent(f, 8);
+ spe_comment(f, -4, facing == CELL_FACING_FRONT ? "Begin front-facing per-fragment ops": "Begin back-facing per-fragment ops");
+ }
+
spe_allocate_register(f, x_reg);
spe_allocate_register(f, y_reg);
spe_allocate_register(f, color_tile_reg);
@@ -674,8 +1839,9 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f)
ASSERT(TILE_SIZE == 32);
- spe_rotmi(f, x2_reg, x_reg, -1); /* x2 = x / 2 */
+ spe_comment(f, 0, "Compute quad offset within tile");
spe_rotmi(f, y2_reg, y_reg, -1); /* y2 = y / 2 */
+ spe_rotmi(f, x2_reg, x_reg, -1); /* x2 = x / 2 */
spe_shli(f, y2_reg, y2_reg, 4); /* y2 *= 16 */
spe_a(f, quad_offset_reg, y2_reg, x2_reg); /* offset = y2 + x2 */
spe_shli(f, quad_offset_reg, quad_offset_reg, 4); /* offset *= 16 */
@@ -684,139 +1850,196 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f)
spe_release_register(f, y2_reg);
}
-
+ /* Generate the alpha test, if needed. */
if (dsa->alpha.enabled) {
gen_alpha_test(dsa, f, mask_reg, fragA_reg);
}
+ /* If we need the stencil buffers (because one- or two-sided stencil is
+ * enabled) or the depth buffer (because the depth test is enabled),
+ * go grab them. Note that if either one- or two-sided stencil is
+ * enabled, dsa->stencil[0].enabled will be true.
+ */
if (dsa->depth.enabled || dsa->stencil[0].enabled) {
const enum pipe_format zs_format = cell->framebuffer.zsbuf->format;
boolean write_depth_stencil;
- int fbZ_reg = spe_allocate_available_register(f); /* Z values */
- int fbS_reg = spe_allocate_available_register(f); /* Stencil values */
+ /* We may or may not need to allocate a register for Z or stencil values */
+ boolean fbS_reg_set = false, fbZ_reg_set = false;
+ unsigned int fbS_reg, fbZ_reg = 0;
+
+ spe_comment(f, 0, "Fetching Z/stencil quad from tile");
/* fetch quad of depth/stencil values from tile at (x,y) */
/* Load: fbZS_reg = memory[depth_tile_reg + offset_reg] */
+ /* XXX Not sure this is allowed if we've only got a 16-bit Z buffer... */
spe_lqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg);
- if (dsa->depth.enabled) {
- /* Extract Z bits from fbZS_reg into fbZ_reg */
- if (zs_format == PIPE_FORMAT_S8Z24_UNORM ||
- zs_format == PIPE_FORMAT_X8Z24_UNORM) {
- int mask_reg = spe_allocate_available_register(f);
- spe_fsmbi(f, mask_reg, 0x7777); /* mask[0,1,2,3] = 0x00ffffff */
- spe_and(f, fbZ_reg, fbZS_reg, mask_reg); /* fbZ = fbZS & mask */
- spe_release_register(f, mask_reg);
- /* OK, fbZ_reg has four 24-bit Z values now */
- }
- else {
- /* XXX handle other z/stencil formats */
- ASSERT(0);
- }
+ /* From the Z/stencil buffer format, pull out the bits we need for
+ * Z and/or stencil. We'll also convert the incoming fragment Z
+ * value in fragZ_reg from a floating point value in [0.0..1.0] to
+ * an unsigned integer value with the appropriate resolution.
+ * Note that even if depth or stencil is *not* enabled, if it's
+ * present in the buffer, we pull it out and put it back later;
+ * otherwise, we can inadvertently destroy the contents of
+ * buffers we're not supposed to touch (e.g., if the user is
+ * clearing the depth buffer but not the stencil buffer, a
+ * quad of constant depth is drawn over the surface; the stencil
+ * buffer must be maintained).
+ */
+ switch(zs_format) {
- /* Convert fragZ values from float[4] to uint[4] */
- if (zs_format == PIPE_FORMAT_S8Z24_UNORM ||
- zs_format == PIPE_FORMAT_X8Z24_UNORM ||
- zs_format == PIPE_FORMAT_Z24S8_UNORM ||
- zs_format == PIPE_FORMAT_Z24X8_UNORM) {
- /* 24-bit Z values */
- int scale_reg = spe_allocate_available_register(f);
+ case PIPE_FORMAT_S8Z24_UNORM: /* fall through */
+ case PIPE_FORMAT_X8Z24_UNORM:
+ /* Pull out both Z and stencil */
+ setup_optional_register(f, &fbZ_reg_set, &fbZ_reg);
+ setup_optional_register(f, &fbS_reg_set, &fbS_reg);
- /* scale_reg[0,1,2,3] = float(2^24-1) */
- spe_load_float(f, scale_reg, (float) 0xffffff);
+ /* four 24-bit Z values in the low-order bits */
+ spe_and_uint(f, fbZ_reg, fbZS_reg, 0x00ffffff);
- /* XXX these two instructions might be combined */
- spe_fm(f, fragZ_reg, fragZ_reg, scale_reg); /* fragZ *= scale */
- spe_cfltu(f, fragZ_reg, fragZ_reg, 0); /* fragZ = (int) fragZ */
+ /* Incoming fragZ_reg value is a float in 0.0...1.0; convert
+ * to a 24-bit unsigned integer
+ */
+ spe_cfltu(f, fragZ_reg, fragZ_reg, 32);
+ spe_rotmi(f, fragZ_reg, fragZ_reg, -8);
- spe_release_register(f, scale_reg);
- }
- else {
- /* XXX handle 16-bit Z format */
- ASSERT(0);
- }
- }
+ /* four 8-bit stencil values in the high-order bits */
+ spe_rotmi(f, fbS_reg, fbZS_reg, -24);
+ break;
- if (dsa->stencil[0].enabled) {
- /* Extract Stencil bit sfrom fbZS_reg into fbS_reg */
- if (zs_format == PIPE_FORMAT_S8Z24_UNORM ||
- zs_format == PIPE_FORMAT_X8Z24_UNORM) {
- /* XXX extract with a shift */
- ASSERT(0);
- }
- else if (zs_format == PIPE_FORMAT_Z24S8_UNORM ||
- zs_format == PIPE_FORMAT_Z24X8_UNORM) {
- /* XXX extract with a mask */
- ASSERT(0);
- }
- }
+ case PIPE_FORMAT_Z24S8_UNORM: /* fall through */
+ case PIPE_FORMAT_Z24X8_UNORM:
+ setup_optional_register(f, &fbZ_reg_set, &fbZ_reg);
+ setup_optional_register(f, &fbS_reg_set, &fbS_reg);
+
+ /* shift by 8 to get the upper 24-bit values */
+ spe_rotmi(f, fbS_reg, fbZS_reg, -8);
+
+ /* Incoming fragZ_reg value is a float in 0.0...1.0; convert
+ * to a 24-bit unsigned integer
+ */
+ spe_cfltu(f, fragZ_reg, fragZ_reg, 32);
+ spe_rotmi(f, fragZ_reg, fragZ_reg, -8);
+
+ /* 8-bit stencil in the low-order bits - mask them out */
+ spe_and_uint(f, fbS_reg, fbZS_reg, 0x000000ff);
+ break;
+
+ case PIPE_FORMAT_Z32_UNORM:
+ setup_optional_register(f, &fbZ_reg_set, &fbZ_reg);
+ /* Copy over 4 32-bit values */
+ spe_move(f, fbZ_reg, fbZS_reg);
+
+ /* Incoming fragZ_reg value is a float in 0.0...1.0; convert
+ * to a 32-bit unsigned integer
+ */
+ spe_cfltu(f, fragZ_reg, fragZ_reg, 32);
+ /* No stencil, so can't do anything there */
+ break;
+ case PIPE_FORMAT_Z16_UNORM:
+ /* XXX Not sure this is correct, but it was here before, so we're
+ * going with it for now
+ */
+ setup_optional_register(f, &fbZ_reg_set, &fbZ_reg);
+ /* Copy over 4 32-bit values */
+ spe_move(f, fbZ_reg, fbZS_reg);
+
+ /* Incoming fragZ_reg value is a float in 0.0...1.0; convert
+ * to a 16-bit unsigned integer
+ */
+ spe_cfltu(f, fragZ_reg, fragZ_reg, 32);
+ spe_rotmi(f, fragZ_reg, fragZ_reg, -16);
+ /* No stencil */
+
+ default:
+ ASSERT(0); /* invalid format */
+ }
+ /* If stencil is enabled, use the stencil-specific code
+ * generator to generate both the stencil and depth (if needed)
+ * tests. Otherwise, if only depth is enabled, generate
+ * a quick depth test. The test generators themselves will
+ * report back whether the depth/stencil buffer has to be
+ * written back.
+ */
if (dsa->stencil[0].enabled) {
- /* XXX this may involve depth testing too */
- // gen_stencil_test(dsa, f, ... );
- ASSERT(0);
+ /* This will perform the stencil and depth tests, and update
+ * the mask_reg, fbZ_reg, and fbS_reg as required by the
+ * tests.
+ */
+ ASSERT(fbS_reg_set);
+ spe_comment(f, 0, "Perform stencil test");
+
+ /* Note that fbZ_reg may not be set on entry, if stenciling
+ * is enabled but there's no Z-buffer. The
+ * gen_stencil_depth_test() function must ignore the
+ * fbZ_reg register if depth is not enabled.
+ */
+ write_depth_stencil = gen_stencil_depth_test(f, dsa, facing, mask_reg, fragZ_reg, fbZ_reg, fbS_reg);
}
else if (dsa->depth.enabled) {
int zmask_reg = spe_allocate_available_register(f);
- gen_depth_test(dsa, f, mask_reg, fragZ_reg, fbZ_reg, zmask_reg);
+ ASSERT(fbZ_reg_set);
+ spe_comment(f, 0, "Perform depth test");
+ write_depth_stencil = gen_depth_test(f, dsa, mask_reg, fragZ_reg, fbZ_reg, zmask_reg);
spe_release_register(f, zmask_reg);
}
-
- /* do we need to write Z and/or Stencil back into framebuffer? */
- write_depth_stencil = (dsa->depth.writemask |
- dsa->stencil[0].write_mask |
- dsa->stencil[1].write_mask);
+ else {
+ write_depth_stencil = false;
+ }
if (write_depth_stencil) {
/* Merge latest Z and Stencil values into fbZS_reg.
* fbZ_reg has four Z vals in bits [23..0] or bits [15..0].
* fbS_reg has four 8-bit Z values in bits [7..0].
*/
+ spe_comment(f, 0, "Store quad's depth/stencil values in tile");
if (zs_format == PIPE_FORMAT_S8Z24_UNORM ||
zs_format == PIPE_FORMAT_X8Z24_UNORM) {
spe_shli(f, fbS_reg, fbS_reg, 24); /* fbS = fbS << 24 */
spe_or(f, fbZS_reg, fbS_reg, fbZ_reg); /* fbZS = fbS | fbZ */
}
- else if (zs_format == PIPE_FORMAT_S8Z24_UNORM ||
- zs_format == PIPE_FORMAT_X8Z24_UNORM) {
- /* XXX to do */
- ASSERT(0);
+ else if (zs_format == PIPE_FORMAT_Z24S8_UNORM ||
+ zs_format == PIPE_FORMAT_Z24X8_UNORM) {
+ spe_shli(f, fbZ_reg, fbZ_reg, 8); /* fbZ = fbZ << 8 */
+ spe_or(f, fbZS_reg, fbS_reg, fbZ_reg); /* fbZS = fbS | fbZ */
+ }
+ else if (zs_format == PIPE_FORMAT_Z32_UNORM) {
+ spe_move(f, fbZS_reg, fbZ_reg); /* fbZS = fbZ */
}
else if (zs_format == PIPE_FORMAT_Z16_UNORM) {
- /* XXX to do */
- ASSERT(0);
+ spe_move(f, fbZS_reg, fbZ_reg); /* fbZS = fbZ */
}
else if (zs_format == PIPE_FORMAT_S8_UNORM) {
- /* XXX to do */
- ASSERT(0);
+ ASSERT(0); /* XXX to do */
}
else {
- /* bad zs_format */
- ASSERT(0);
+ ASSERT(0); /* bad zs_format */
}
/* Store: memory[depth_tile_reg + quad_offset_reg] = fbZS */
spe_stqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg);
}
- spe_release_register(f, fbZ_reg);
- spe_release_register(f, fbS_reg);
+ /* Don't need these any more */
+ release_optional_register(f, &fbZ_reg_set, fbZ_reg);
+ release_optional_register(f, &fbS_reg_set, fbS_reg);
}
-
/* Get framebuffer quad/colors. We'll need these for blending,
* color masking, and to obey the quad/pixel mask.
* Load: fbRGBA_reg = memory[color_tile + quad_offset]
* Note: if mask={~0,~0,~0,~0} and we're not blending or colormasking
* we could skip this load.
*/
+ spe_comment(f, 0, "Fetch quad colors from tile");
spe_lqx(f, fbRGBA_reg, color_tile_reg, quad_offset_reg);
-
if (blend->blend_enable) {
- gen_blend(blend, f, color_format,
+ spe_comment(f, 0, "Perform blending");
+ gen_blend(blend, blend_color, f, color_format,
fragR_reg, fragG_reg, fragB_reg, fragA_reg, fbRGBA_reg);
}
@@ -829,19 +2052,21 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f)
int rgba_reg = spe_allocate_available_register(f);
/* Pack four float colors as four 32-bit int colors */
+ spe_comment(f, 0, "Convert float quad colors to packed int framebuffer colors");
gen_pack_colors(f, color_format,
fragR_reg, fragG_reg, fragB_reg, fragA_reg,
rgba_reg);
if (blend->logicop_enable) {
+ spe_comment(f, 0, "Compute logic op");
gen_logicop(blend, f, rgba_reg, fbRGBA_reg);
}
- if (blend->colormask != 0xf) {
- gen_colormask(blend->colormask, f, rgba_reg, fbRGBA_reg);
+ if (blend->colormask != PIPE_MASK_RGBA) {
+ spe_comment(f, 0, "Compute color mask");
+ gen_colormask(f, blend->colormask, color_format, rgba_reg, fbRGBA_reg);
}
-
/* Mix fragment colors with framebuffer colors using the quad/pixel mask:
* if (mask[i])
* rgba[i] = rgba[i];
@@ -853,6 +2078,7 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f)
/* Store updated quad in tile:
* memory[color_tile + quad_offset] = rgba_reg;
*/
+ spe_comment(f, 0, "Store quad colors into color tile");
spe_stqx(f, rgba_reg, color_tile_reg, quad_offset_reg);
spe_release_register(f, rgba_reg);
@@ -862,9 +2088,14 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f)
spe_bi(f, SPE_REG_RA, 0, 0); /* return from function call */
-
spe_release_register(f, fbRGBA_reg);
spe_release_register(f, fbZS_reg);
spe_release_register(f, quad_offset_reg);
-}
+ if (cell->debug_flags & CELL_DEBUG_ASM) {
+ char buffer[1024];
+ sprintf(buffer, "End %s-facing per-fragment ops: %d instructions",
+ facing == CELL_FACING_FRONT ? "front" : "back", f->num_inst);
+ spe_comment(f, -4, buffer);
+ }
+}
diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.h b/src/gallium/drivers/cell/ppu/cell_gen_fragment.h
index b59de198dce..21b35d1fafe 100644
--- a/src/gallium/drivers/cell/ppu/cell_gen_fragment.h
+++ b/src/gallium/drivers/cell/ppu/cell_gen_fragment.h
@@ -31,7 +31,7 @@
extern void
-cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f);
+cell_gen_fragment_function(struct cell_context *cell, const uint facing, struct spe_function *f);
#endif /* CELL_GEN_FRAGMENT_H */
diff --git a/src/gallium/drivers/cell/ppu/cell_pipe_state.c b/src/gallium/drivers/cell/ppu/cell_pipe_state.c
index 475c6ef0ce6..81efd137c73 100644
--- a/src/gallium/drivers/cell/ppu/cell_pipe_state.c
+++ b/src/gallium/drivers/cell/ppu/cell_pipe_state.c
@@ -35,9 +35,9 @@
#include "draw/draw_context.h"
#include "cell_context.h"
#include "cell_flush.h"
+#include "cell_pipe_state.h"
#include "cell_state.h"
#include "cell_texture.h"
-#include "cell_state_per_fragment.h"
@@ -45,24 +45,18 @@ static void *
cell_create_blend_state(struct pipe_context *pipe,
const struct pipe_blend_state *blend)
{
- struct cell_blend_state *cb = MALLOC(sizeof(struct cell_blend_state));
-
- (void) memcpy(cb, blend, sizeof(*blend));
-#if 0
- cell_generate_alpha_blend(cb);
-#endif
- return cb;
+ return mem_dup(blend, sizeof(*blend));
}
static void
-cell_bind_blend_state(struct pipe_context *pipe, void *state)
+cell_bind_blend_state(struct pipe_context *pipe, void *blend)
{
struct cell_context *cell = cell_context(pipe);
draw_flush(cell->draw);
- cell->blend = (struct cell_blend_state *) state;
+ cell->blend = (struct pipe_blend_state *) blend;
cell->dirty |= CELL_NEW_BLEND;
}
@@ -70,10 +64,7 @@ cell_bind_blend_state(struct pipe_context *pipe, void *state)
static void
cell_delete_blend_state(struct pipe_context *pipe, void *blend)
{
- struct cell_blend_state *cb = (struct cell_blend_state *) blend;
-
- spe_release_func(& cb->code);
- FREE(cb);
+ FREE(blend);
}
@@ -95,41 +86,29 @@ cell_set_blend_color(struct pipe_context *pipe,
static void *
cell_create_depth_stencil_alpha_state(struct pipe_context *pipe,
- const struct pipe_depth_stencil_alpha_state *depth_stencil)
+ const struct pipe_depth_stencil_alpha_state *dsa)
{
- struct cell_depth_stencil_alpha_state *cdsa =
- MALLOC(sizeof(struct cell_depth_stencil_alpha_state));
-
- (void) memcpy(cdsa, depth_stencil, sizeof(*depth_stencil));
-#if 0
- cell_generate_depth_stencil_test(cdsa);
-#endif
- return cdsa;
+ return mem_dup(dsa, sizeof(*dsa));
}
static void
cell_bind_depth_stencil_alpha_state(struct pipe_context *pipe,
- void *depth_stencil)
+ void *dsa)
{
struct cell_context *cell = cell_context(pipe);
draw_flush(cell->draw);
- cell->depth_stencil =
- (struct cell_depth_stencil_alpha_state *) depth_stencil;
+ cell->depth_stencil = (struct pipe_depth_stencil_alpha_state *) dsa;
cell->dirty |= CELL_NEW_DEPTH_STENCIL;
}
static void
-cell_delete_depth_stencil_alpha_state(struct pipe_context *pipe, void *depth)
+cell_delete_depth_stencil_alpha_state(struct pipe_context *pipe, void *dsa)
{
- struct cell_depth_stencil_alpha_state *cdsa =
- (struct cell_depth_stencil_alpha_state *) depth;
-
- spe_release_func(& cdsa->code);
- FREE(cdsa);
+ FREE(dsa);
}
@@ -191,24 +170,23 @@ cell_set_polygon_stipple( struct pipe_context *pipe,
static void *
cell_create_rasterizer_state(struct pipe_context *pipe,
- const struct pipe_rasterizer_state *setup)
+ const struct pipe_rasterizer_state *rasterizer)
{
- struct pipe_rasterizer_state *state
- = MALLOC(sizeof(struct pipe_rasterizer_state));
- memcpy(state, setup, sizeof(struct pipe_rasterizer_state));
- return state;
+ return mem_dup(rasterizer, sizeof(*rasterizer));
}
static void
-cell_bind_rasterizer_state(struct pipe_context *pipe, void *setup)
+cell_bind_rasterizer_state(struct pipe_context *pipe, void *rast)
{
+ struct pipe_rasterizer_state *rasterizer =
+ (struct pipe_rasterizer_state *) rast;
struct cell_context *cell = cell_context(pipe);
/* pass-through to draw module */
- draw_set_rasterizer_state(cell->draw, setup);
+ draw_set_rasterizer_state(cell->draw, rasterizer);
- cell->rasterizer = (struct pipe_rasterizer_state *)setup;
+ cell->rasterizer = rasterizer;
cell->dirty |= CELL_NEW_RASTERIZER;
}
@@ -235,17 +213,24 @@ cell_bind_sampler_states(struct pipe_context *pipe,
unsigned num, void **samplers)
{
struct cell_context *cell = cell_context(pipe);
+ uint i, changed = 0x0;
assert(num <= CELL_MAX_SAMPLERS);
draw_flush(cell->draw);
- memcpy(cell->sampler, samplers, num * sizeof(void *));
- memset(&cell->sampler[num], 0, (CELL_MAX_SAMPLERS - num) *
- sizeof(void *));
- cell->num_samplers = num;
+ for (i = 0; i < CELL_MAX_SAMPLERS; i++) {
+ struct pipe_sampler_state *new_samp = i < num ? samplers[i] : NULL;
+ if (cell->sampler[i] != new_samp) {
+ cell->sampler[i] = new_samp;
+ changed |= (1 << i);
+ }
+ }
- cell->dirty |= CELL_NEW_SAMPLER;
+ if (changed) {
+ cell->dirty |= CELL_NEW_SAMPLER;
+ cell->dirty_samplers |= changed;
+ }
}
@@ -263,27 +248,25 @@ cell_set_sampler_textures(struct pipe_context *pipe,
unsigned num, struct pipe_texture **texture)
{
struct cell_context *cell = cell_context(pipe);
- uint i;
+ uint i, changed = 0x0;
assert(num <= CELL_MAX_SAMPLERS);
- /* Check for no-op */
- if (num == cell->num_textures &&
- !memcmp(cell->texture, texture, num * sizeof(struct pipe_texture *)))
- return;
-
- draw_flush(cell->draw);
-
for (i = 0; i < CELL_MAX_SAMPLERS; i++) {
- struct pipe_texture *tex = i < num ? texture[i] : NULL;
-
- pipe_texture_reference((struct pipe_texture **) &cell->texture[i], tex);
+ struct pipe_texture *new_tex = i < num ? texture[i] : NULL;
+ if ((struct pipe_texture *) cell->texture[i] != new_tex) {
+ pipe_texture_reference((struct pipe_texture **) &cell->texture[i],
+ new_tex);
+ changed |= (1 << i);
+ }
}
- cell->num_textures = num;
- cell_update_texture_mapping(cell);
+ cell->num_textures = num;
- cell->dirty |= CELL_NEW_TEXTURE;
+ if (changed) {
+ cell->dirty |= CELL_NEW_TEXTURE;
+ cell->dirty_textures |= changed;
+ }
}
diff --git a/src/gallium/drivers/cell/ppu/cell_render.c b/src/gallium/drivers/cell/ppu/cell_render.c
index dd25ae880e5..79cb8df82fa 100644
--- a/src/gallium/drivers/cell/ppu/cell_render.c
+++ b/src/gallium/drivers/cell/ppu/cell_render.c
@@ -152,6 +152,7 @@ cell_flush_prim_buffer(struct cell_context *cell)
struct cell_command_render *render = &cell_global.command[i].render;
render->prim_type = PIPE_PRIM_TRIANGLES;
render->num_verts = cell->prim_buffer.num_verts;
+ render->front_winding = cell->rasterizer->front_winding;
render->vertex_size = cell->vertex_info->size * 4;
render->xmin = cell->prim_buffer.xmin;
render->ymin = cell->prim_buffer.ymin;
diff --git a/src/gallium/drivers/cell/ppu/cell_screen.c b/src/gallium/drivers/cell/ppu/cell_screen.c
index 139b3719b62..d2235579507 100644
--- a/src/gallium/drivers/cell/ppu/cell_screen.c
+++ b/src/gallium/drivers/cell/ppu/cell_screen.c
@@ -58,9 +58,9 @@ cell_get_param(struct pipe_screen *screen, int param)
case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
return CELL_MAX_SAMPLERS;
case PIPE_CAP_NPOT_TEXTURES:
- return 0;
+ return 1;
case PIPE_CAP_TWO_SIDED_STENCIL:
- return 0;
+ return 1;
case PIPE_CAP_GLSL:
return 1;
case PIPE_CAP_S3TC:
@@ -68,21 +68,21 @@ cell_get_param(struct pipe_screen *screen, int param)
case PIPE_CAP_ANISOTROPIC_FILTER:
return 0;
case PIPE_CAP_POINT_SPRITE:
- return 0;
+ return 1;
case PIPE_CAP_MAX_RENDER_TARGETS:
return 1;
case PIPE_CAP_OCCLUSION_QUERY:
- return 0;
+ return 1;
case PIPE_CAP_TEXTURE_SHADOW_MAP:
- return 0;
+ return 10;
case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
- return 12; /* max 2Kx2K */
+ return CELL_MAX_TEXTURE_LEVELS;
case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
return 8; /* max 128x128x128 */
case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
- return 12; /* max 2Kx2K */
+ return CELL_MAX_TEXTURE_LEVELS;
default:
- return 0;
+ return 10;
}
}
@@ -108,7 +108,7 @@ cell_get_paramf(struct pipe_screen *screen, int param)
return 16.0; /* arbitrary */
default:
- return 0;
+ return 10;
}
}
diff --git a/src/gallium/drivers/cell/ppu/cell_spu.c b/src/gallium/drivers/cell/ppu/cell_spu.c
index 9508227e298..28e5e6d706d 100644
--- a/src/gallium/drivers/cell/ppu/cell_spu.c
+++ b/src/gallium/drivers/cell/ppu/cell_spu.c
@@ -36,6 +36,7 @@
#include "cell_spu.h"
#include "pipe/p_format.h"
#include "pipe/p_state.h"
+#include "util/u_memory.h"
#include "cell/common.h"
@@ -52,6 +53,35 @@ struct cell_global_info cell_global;
/**
+ * Scan /proc/cpuinfo to determine the timebase for the system.
+ * This is used by the SPUs to convert 'decrementer' ticks to seconds.
+ * There may be a better way to get this value...
+ */
+static unsigned
+get_timebase(void)
+{
+ FILE *f = fopen("/proc/cpuinfo", "r");
+ unsigned timebase;
+
+ assert(f);
+ while (!feof(f)) {
+ char line[80];
+ fgets(line, sizeof(line), f);
+ if (strncmp(line, "timebase", 8) == 0) {
+ char *colon = strchr(line, ':');
+ if (colon) {
+ timebase = atoi(colon + 2);
+ break;
+ }
+ }
+ }
+ fclose(f);
+
+ return timebase;
+}
+
+
+/**
* Write a 1-word message to the given SPE mailbox.
*/
void
@@ -114,6 +144,7 @@ cell_start_spus(struct cell_context *cell)
{
static boolean one_time_init = FALSE;
uint i, j;
+ uint timebase = get_timebase();
if (one_time_init) {
fprintf(stderr, "PPU: Multiple rendering contexts not yet supported "
@@ -123,24 +154,29 @@ cell_start_spus(struct cell_context *cell)
one_time_init = TRUE;
- assert(cell->num_spus <= MAX_SPUS);
-
- ASSERT_ALIGN16(&cell_global.command[0]);
- ASSERT_ALIGN16(&cell_global.command[1]);
+ assert(cell->num_spus <= CELL_MAX_SPUS);
ASSERT_ALIGN16(&cell_global.inits[0]);
ASSERT_ALIGN16(&cell_global.inits[1]);
+ /*
+ * Initialize the global 'inits' structure for each SPU.
+ * A pointer to the init struct will be passed to each SPU.
+ * The SPUs will then each grab their init info with mfc_get().
+ */
for (i = 0; i < cell->num_spus; i++) {
cell_global.inits[i].id = i;
cell_global.inits[i].num_spus = cell->num_spus;
cell_global.inits[i].debug_flags = cell->debug_flags;
- cell_global.inits[i].cmd = &cell_global.command[i];
+ cell_global.inits[i].inv_timebase = 1000.0f / timebase;
+
for (j = 0; j < CELL_NUM_BUFFERS; j++) {
cell_global.inits[i].buffers[j] = cell->buffer[j];
}
cell_global.inits[i].buffer_status = &cell->buffer_status[0][0][0];
+ cell_global.inits[i].spu_functions = &cell->spu_functions;
+
cell_global.spe_contexts[i] = spe_context_create(0, NULL);
if (!cell_global.spe_contexts[i]) {
fprintf(stderr, "spe_context_create() failed\n");
diff --git a/src/gallium/drivers/cell/ppu/cell_spu.h b/src/gallium/drivers/cell/ppu/cell_spu.h
index 137f26612e4..c93958a9ed5 100644
--- a/src/gallium/drivers/cell/ppu/cell_spu.h
+++ b/src/gallium/drivers/cell/ppu/cell_spu.h
@@ -30,14 +30,12 @@
#include <libspe2.h>
-#include <libmisc.h>
+#include <pthread.h>
#include "cell/common.h"
#include "cell_context.h"
-#define MAX_SPUS 8
-
/**
* Global vars, for now anyway.
*/
@@ -46,14 +44,13 @@ struct cell_global_info
/**
* SPU/SPE handles, etc
*/
- spe_context_ptr_t spe_contexts[MAX_SPUS];
- pthread_t spe_threads[MAX_SPUS];
+ spe_context_ptr_t spe_contexts[CELL_MAX_SPUS];
+ pthread_t spe_threads[CELL_MAX_SPUS];
/**
- * Data sent to SPUs
+ * Data sent to SPUs at start-up
*/
- struct cell_init_info inits[MAX_SPUS];
- struct cell_command command[MAX_SPUS];
+ struct cell_init_info inits[CELL_MAX_SPUS];
};
diff --git a/src/gallium/drivers/cell/ppu/cell_state.h b/src/gallium/drivers/cell/ppu/cell_state.h
index a7771a55a31..b193170f9ce 100644
--- a/src/gallium/drivers/cell/ppu/cell_state.h
+++ b/src/gallium/drivers/cell/ppu/cell_state.h
@@ -44,8 +44,9 @@
#define CELL_NEW_TEXTURE 0x800
#define CELL_NEW_VERTEX 0x1000
#define CELL_NEW_VS 0x2000
-#define CELL_NEW_CONSTANTS 0x4000
-#define CELL_NEW_VERTEX_INFO 0x8000
+#define CELL_NEW_VS_CONSTANTS 0x4000
+#define CELL_NEW_FS_CONSTANTS 0x8000
+#define CELL_NEW_VERTEX_INFO 0x10000
extern void
diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c
index 2da3097983c..0a0af81f53f 100644
--- a/src/gallium/drivers/cell/ppu/cell_state_emit.c
+++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c
@@ -25,18 +25,155 @@
*
**************************************************************************/
+#include "pipe/p_inlines.h"
#include "util/u_memory.h"
#include "cell_context.h"
#include "cell_gen_fragment.h"
#include "cell_state.h"
#include "cell_state_emit.h"
-#include "cell_state_per_fragment.h"
#include "cell_batch.h"
#include "cell_texture.h"
#include "draw/draw_context.h"
#include "draw/draw_private.h"
+/**
+ * Find/create a cell_command_fragment_ops object corresponding to the
+ * current blend/stencil/z/colormask/etc. state.
+ */
+static struct cell_command_fragment_ops *
+lookup_fragment_ops(struct cell_context *cell)
+{
+ struct cell_fragment_ops_key key;
+ struct cell_command_fragment_ops *ops;
+
+ /*
+ * Build key
+ */
+ memset(&key, 0, sizeof(key));
+ key.blend = *cell->blend;
+ key.blend_color = cell->blend_color;
+ key.dsa = *cell->depth_stencil;
+
+ if (cell->framebuffer.cbufs[0])
+ key.color_format = cell->framebuffer.cbufs[0]->format;
+ else
+ key.color_format = PIPE_FORMAT_NONE;
+
+ if (cell->framebuffer.zsbuf)
+ key.zs_format = cell->framebuffer.zsbuf->format;
+ else
+ key.zs_format = PIPE_FORMAT_NONE;
+
+ /*
+ * Look up key in cache.
+ */
+ ops = (struct cell_command_fragment_ops *)
+ util_keymap_lookup(cell->fragment_ops_cache, &key);
+
+ /*
+ * If not found, create/save new fragment ops command.
+ */
+ if (!ops) {
+ struct spe_function spe_code_front, spe_code_back;
+ unsigned int facing_dependent, total_code_size;
+
+ if (0)
+ debug_printf("**** Create New Fragment Ops\n");
+
+ /* Prepare the buffer that will hold the generated code. The
+ * "0" passed in for the size means that the SPE code will
+ * use a default size.
+ */
+ spe_init_func(&spe_code_front, 0);
+ spe_init_func(&spe_code_back, 0);
+
+ /* Generate new code. Always generate new code for both front-facing
+ * and back-facing fragments, even if it's the same code in both
+ * cases.
+ */
+ cell_gen_fragment_function(cell, CELL_FACING_FRONT, &spe_code_front);
+ cell_gen_fragment_function(cell, CELL_FACING_BACK, &spe_code_back);
+
+ /* Make sure the code is a multiple of 8 bytes long; this is
+ * required to ensure that the dual pipe instruction alignment
+ * is correct. It's also important for the SPU unpacking,
+ * which assumes 8-byte boundaries.
+ */
+ unsigned int front_code_size = spe_code_size(&spe_code_front);
+ while (front_code_size % 8 != 0) {
+ spe_lnop(&spe_code_front);
+ front_code_size = spe_code_size(&spe_code_front);
+ }
+ unsigned int back_code_size = spe_code_size(&spe_code_back);
+ while (back_code_size % 8 != 0) {
+ spe_lnop(&spe_code_back);
+ back_code_size = spe_code_size(&spe_code_back);
+ }
+
+ /* Determine whether the code we generated is facing-dependent, by
+ * determining whether the generated code is different for the front-
+ * and back-facing fragments.
+ */
+ if (front_code_size == back_code_size && memcmp(spe_code_front.store, spe_code_back.store, front_code_size) == 0) {
+ /* Code is identical; only need one copy. */
+ facing_dependent = 0;
+ total_code_size = front_code_size;
+ }
+ else {
+ /* Code is different for front-facing and back-facing fragments.
+ * Need to send both copies.
+ */
+ facing_dependent = 1;
+ total_code_size = front_code_size + back_code_size;
+ }
+
+ /* alloc new fragment ops command. Note that this structure
+ * has variant length based on the total code size required.
+ */
+ ops = CALLOC_VARIANT_LENGTH_STRUCT(cell_command_fragment_ops, total_code_size);
+ /* populate the new cell_command_fragment_ops object */
+ ops->opcode = CELL_CMD_STATE_FRAGMENT_OPS;
+ ops->total_code_size = total_code_size;
+ ops->front_code_index = 0;
+ memcpy(ops->code, spe_code_front.store, front_code_size);
+ if (facing_dependent) {
+ /* We have separate front- and back-facing code. Append the
+ * back-facing code to the buffer. Be careful because the code
+ * size is in bytes, but the buffer is of unsigned elements.
+ */
+ ops->back_code_index = front_code_size / sizeof(spe_code_front.store[0]);
+ memcpy(ops->code + ops->back_code_index, spe_code_back.store, back_code_size);
+ }
+ else {
+ /* Use the same code for front- and back-facing fragments */
+ ops->back_code_index = ops->front_code_index;
+ }
+
+ /* Set the fields for the fallback case. Note that these fields
+ * (and the whole fallback case) will eventually go away.
+ */
+ ops->dsa = *cell->depth_stencil;
+ ops->blend = *cell->blend;
+ ops->blend_color = cell->blend_color;
+
+ /* insert cell_command_fragment_ops object into keymap/cache */
+ util_keymap_insert(cell->fragment_ops_cache, &key, ops, NULL);
+
+ /* release rtasm buffer */
+ spe_release_func(&spe_code_front);
+ spe_release_func(&spe_code_back);
+ }
+ else {
+ if (0)
+ debug_printf("**** Re-use Fragment Ops\n");
+ }
+
+ return ops;
+}
+
+
+
static void
emit_state_cmd(struct cell_context *cell, uint cmd,
const void *state, uint state_size)
@@ -73,6 +210,13 @@ cell_emit_state(struct cell_context *cell)
#endif
}
+ if (cell->dirty & (CELL_NEW_RASTERIZER)) {
+ struct cell_command_rasterizer *rast =
+ cell_batch_alloc(cell, sizeof(*rast));
+ rast->opcode = CELL_CMD_STATE_RASTERIZER;
+ rast->rasterizer = *cell->rasterizer;
+ }
+
if (cell->dirty & (CELL_NEW_FS)) {
/* Send new fragment program to SPUs */
struct cell_command_fragment_program *fp
@@ -90,59 +234,81 @@ cell_emit_state(struct cell_context *cell)
}
}
+ if (cell->dirty & (CELL_NEW_FS_CONSTANTS)) {
+ const uint shader = PIPE_SHADER_FRAGMENT;
+ const uint num_const = cell->constants[shader].size / sizeof(float);
+ uint i, j;
+ float *buf = cell_batch_alloc(cell, 16 + num_const * sizeof(float));
+ uint64_t *ibuf = (uint64_t *) buf;
+ const float *constants = pipe_buffer_map(cell->pipe.screen,
+ cell->constants[shader].buffer,
+ PIPE_BUFFER_USAGE_CPU_READ);
+ ibuf[0] = CELL_CMD_STATE_FS_CONSTANTS;
+ ibuf[1] = num_const;
+ j = 4;
+ for (i = 0; i < num_const; i++) {
+ buf[j++] = constants[i];
+ }
+ pipe_buffer_unmap(cell->pipe.screen, cell->constants[shader].buffer);
+ }
+
if (cell->dirty & (CELL_NEW_FRAMEBUFFER |
CELL_NEW_DEPTH_STENCIL |
CELL_NEW_BLEND)) {
- /* XXX we don't want to always do codegen here. We should have
- * a hash/lookup table to cache previous results...
- */
- struct cell_command_fragment_ops *fops
- = cell_batch_alloc(cell, sizeof(*fops));
- struct spe_function spe_code;
-
- /* generate new code */
- cell_gen_fragment_function(cell, &spe_code);
- /* put the new code into the batch buffer */
- fops->opcode = CELL_CMD_STATE_FRAGMENT_OPS;
- memcpy(&fops->code, spe_code.store,
- SPU_MAX_FRAGMENT_OPS_INSTS * SPE_INST_SIZE);
- fops->dsa = cell->depth_stencil->base;
- fops->blend = cell->blend->base;
- /* free codegen buffer */
- spe_release_func(&spe_code);
+ struct cell_command_fragment_ops *fops, *fops_cmd;
+ /* Note that cell_command_fragment_ops is a variant-sized record */
+ fops = lookup_fragment_ops(cell);
+ fops_cmd = cell_batch_alloc(cell, sizeof(*fops_cmd) + fops->total_code_size);
+ memcpy(fops_cmd, fops, sizeof(*fops) + fops->total_code_size);
}
if (cell->dirty & CELL_NEW_SAMPLER) {
uint i;
for (i = 0; i < CELL_MAX_SAMPLERS; i++) {
- if (cell->sampler[i]) {
- struct cell_command_sampler *sampler
- = cell_batch_alloc(cell, sizeof(*sampler));
- sampler->opcode = CELL_CMD_STATE_SAMPLER;
- sampler->unit = i;
- sampler->state = *cell->sampler[i];
+ if (cell->dirty_samplers & (1 << i)) {
+ if (cell->sampler[i]) {
+ struct cell_command_sampler *sampler
+ = cell_batch_alloc(cell, sizeof(*sampler));
+ sampler->opcode = CELL_CMD_STATE_SAMPLER;
+ sampler->unit = i;
+ sampler->state = *cell->sampler[i];
+ }
}
}
+ cell->dirty_samplers = 0x0;
}
if (cell->dirty & CELL_NEW_TEXTURE) {
uint i;
for (i = 0;i < CELL_MAX_SAMPLERS; i++) {
- struct cell_command_texture *texture
- = cell_batch_alloc(cell, sizeof(*texture));
- texture->opcode = CELL_CMD_STATE_TEXTURE;
- texture->unit = i;
- if (cell->texture[i]) {
- texture->start = cell->texture[i]->tiled_data;
- texture->width = cell->texture[i]->base.width[0];
- texture->height = cell->texture[i]->base.height[0];
- }
- else {
- texture->start = NULL;
- texture->width = 1;
- texture->height = 1;
+ if (cell->dirty_textures & (1 << i)) {
+ struct cell_command_texture *texture
+ = cell_batch_alloc(cell, sizeof(*texture));
+ texture->opcode = CELL_CMD_STATE_TEXTURE;
+ texture->unit = i;
+ if (cell->texture[i]) {
+ uint level;
+ for (level = 0; level < CELL_MAX_TEXTURE_LEVELS; level++) {
+ texture->start[level] = cell->texture[i]->tiled_mapped[level];
+ texture->width[level] = cell->texture[i]->base.width[level];
+ texture->height[level] = cell->texture[i]->base.height[level];
+ texture->depth[level] = cell->texture[i]->base.depth[level];
+ }
+ texture->target = cell->texture[i]->base.target;
+ }
+ else {
+ uint level;
+ for (level = 0; level < CELL_MAX_TEXTURE_LEVELS; level++) {
+ texture->start[level] = NULL;
+ texture->width[level] = 0;
+ texture->height[level] = 0;
+ texture->depth[level] = 0;
+ }
+ texture->target = 0;
+ }
}
}
+ cell->dirty_textures = 0x0;
}
if (cell->dirty & CELL_NEW_VERTEX_INFO) {
diff --git a/src/gallium/drivers/cell/ppu/cell_state_shader.c b/src/gallium/drivers/cell/ppu/cell_state_shader.c
index 3a0d066da2a..cda39f8d592 100644
--- a/src/gallium/drivers/cell/ppu/cell_state_shader.c
+++ b/src/gallium/drivers/cell/ppu/cell_state_shader.c
@@ -191,13 +191,18 @@ cell_set_constant_buffer(struct pipe_context *pipe,
assert(shader < PIPE_SHADER_TYPES);
assert(index == 0);
+ draw_flush(cell->draw);
+
/* note: reference counting */
winsys_buffer_reference(ws,
&cell->constants[shader].buffer,
buf->buffer);
cell->constants[shader].size = buf->size;
- cell->dirty |= CELL_NEW_CONSTANTS;
+ if (shader == PIPE_SHADER_VERTEX)
+ cell->dirty |= CELL_NEW_VS_CONSTANTS;
+ else if (shader == PIPE_SHADER_FRAGMENT)
+ cell->dirty |= CELL_NEW_FS_CONSTANTS;
}
diff --git a/src/gallium/drivers/cell/ppu/cell_surface.c b/src/gallium/drivers/cell/ppu/cell_surface.c
index 732c64082ef..c9203fee087 100644
--- a/src/gallium/drivers/cell/ppu/cell_surface.c
+++ b/src/gallium/drivers/cell/ppu/cell_surface.c
@@ -27,6 +27,7 @@
#include "util/u_rect.h"
#include "cell_context.h"
+#include "cell_surface.h"
void
diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c
index b6590dfb86e..9f83ab8fa4c 100644
--- a/src/gallium/drivers/cell/ppu/cell_texture.c
+++ b/src/gallium/drivers/cell/ppu/cell_texture.c
@@ -28,6 +28,7 @@
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
* Michel Dänzer <michel@tungstengraphics.com>
+ * Brian Paul
*/
#include "pipe/p_context.h"
@@ -42,30 +43,31 @@
#include "cell_texture.h"
-/* Simple, maximally packed layout.
- */
-static unsigned minify( unsigned d )
+static unsigned
+minify(unsigned d)
{
return MAX2(1, d>>1);
}
static void
-cell_texture_layout(struct cell_texture * spt)
+cell_texture_layout(struct cell_texture *ct)
{
- struct pipe_texture *pt = &spt->base;
+ struct pipe_texture *pt = &ct->base;
unsigned level;
unsigned width = pt->width[0];
unsigned height = pt->height[0];
unsigned depth = pt->depth[0];
- spt->buffer_size = 0;
+ ct->buffer_size = 0;
for ( level = 0 ; level <= pt->last_level ; level++ ) {
unsigned size;
unsigned w_tile, h_tile;
+ assert(level < CELL_MAX_TEXTURE_LEVELS);
+
/* width, height, rounded up to tile size */
w_tile = align(width, TILE_SIZE);
h_tile = align(height, TILE_SIZE);
@@ -76,9 +78,9 @@ cell_texture_layout(struct cell_texture * spt)
pt->nblocksx[level] = pf_get_nblocksx(&pt->block, w_tile);
pt->nblocksy[level] = pf_get_nblocksy(&pt->block, h_tile);
- spt->stride[level] = pt->nblocksx[level] * pt->block.size;
+ ct->stride[level] = pt->nblocksx[level] * pt->block.size;
- spt->level_offset[level] = spt->buffer_size;
+ ct->level_offset[level] = ct->buffer_size;
size = pt->nblocksx[level] * pt->nblocksy[level] * pt->block.size;
if (pt->target == PIPE_TEXTURE_CUBE)
@@ -86,7 +88,7 @@ cell_texture_layout(struct cell_texture * spt)
else
size *= depth;
- spt->buffer_size += size;
+ ct->buffer_size += size;
width = minify(width);
height = minify(height);
@@ -100,26 +102,25 @@ cell_texture_create(struct pipe_screen *screen,
const struct pipe_texture *templat)
{
struct pipe_winsys *ws = screen->winsys;
- struct cell_texture *spt = CALLOC_STRUCT(cell_texture);
- if (!spt)
+ struct cell_texture *ct = CALLOC_STRUCT(cell_texture);
+ if (!ct)
return NULL;
- spt->base = *templat;
- spt->base.refcount = 1;
- spt->base.screen = screen;
+ ct->base = *templat;
+ ct->base.refcount = 1;
+ ct->base.screen = screen;
- cell_texture_layout(spt);
+ cell_texture_layout(ct);
- spt->buffer = ws->buffer_create(ws, 32,
- PIPE_BUFFER_USAGE_PIXEL,
- spt->buffer_size);
+ ct->buffer = ws->buffer_create(ws, 32, PIPE_BUFFER_USAGE_PIXEL,
+ ct->buffer_size);
- if (!spt->buffer) {
- FREE(spt);
+ if (!ct->buffer) {
+ FREE(ct);
return NULL;
}
- return &spt->base;
+ return &ct->base;
}
@@ -135,177 +136,310 @@ cell_texture_release(struct pipe_screen *screen,
__FUNCTION__, (void *) *pt, (*pt)->refcount - 1);
*/
if (--(*pt)->refcount <= 0) {
- struct cell_texture *spt = cell_texture(*pt);
+ /* Delete this texture now.
+ * But note that the underlying pipe_buffer may linger...
+ */
+ struct cell_texture *ct = cell_texture(*pt);
+ uint i;
/*
- DBG("%s deleting %p\n", __FUNCTION__, (void *) spt);
+ DBG("%s deleting %p\n", __FUNCTION__, (void *) ct);
*/
- pipe_buffer_reference(screen, &spt->buffer, NULL);
+ pipe_buffer_reference(screen, &ct->buffer, NULL);
- FREE(spt);
+ for (i = 0; i < CELL_MAX_TEXTURE_LEVELS; i++) {
+ /* Unreference the tiled image buffer.
+ * It may not actually be deleted until a fence is hit.
+ */
+ if (ct->tiled_buffer[i]) {
+ ct->tiled_mapped[i] = NULL;
+ winsys_buffer_reference(screen->winsys, &ct->tiled_buffer[i], NULL);
+ }
+ }
+
+ FREE(ct);
}
*pt = NULL;
}
-#if 0
+
+/**
+ * Convert image from linear layout to tiled layout. 4-byte pixels.
+ */
static void
-cell_texture_update(struct pipe_context *pipe, struct pipe_texture *texture,
- uint face, uint levelsMask)
+twiddle_image_uint(uint w, uint h, uint tile_size, uint *dst,
+ uint src_stride, const uint *src)
{
- /* XXX TO DO: re-tile the texture data ... */
+ const uint tile_size2 = tile_size * tile_size;
+ const uint h_t = (h + tile_size - 1) / tile_size;
+ const uint w_t = (w + tile_size - 1) / tile_size;
-}
-#endif
+ uint it, jt; /* tile counters */
+ uint i, j; /* intra-tile counters */
+ src_stride /= 4; /* convert from bytes to pixels */
-static struct pipe_surface *
-cell_get_tex_surface(struct pipe_screen *screen,
- struct pipe_texture *pt,
- unsigned face, unsigned level, unsigned zslice,
- unsigned usage)
-{
- struct pipe_winsys *ws = screen->winsys;
- struct cell_texture *spt = cell_texture(pt);
- struct pipe_surface *ps;
+ /* loop over dest tiles */
+ for (it = 0; it < h_t; it++) {
+ for (jt = 0; jt < w_t; jt++) {
+ /* start of dest tile: */
+ uint *tdst = dst + (it * w_t + jt) * tile_size2;
- ps = ws->surface_alloc(ws);
- if (ps) {
- assert(ps->refcount);
- assert(ps->winsys);
- winsys_buffer_reference(ws, &ps->buffer, spt->buffer);
- ps->format = pt->format;
- ps->block = pt->block;
- ps->width = pt->width[level];
- ps->height = pt->height[level];
- ps->nblocksx = pt->nblocksx[level];
- ps->nblocksy = pt->nblocksy[level];
- ps->stride = spt->stride[level];
- ps->offset = spt->level_offset[level];
- ps->usage = usage;
+ /* compute size of this tile (may be smaller than tile_size) */
+ /* XXX note: a compiler bug was found here. That's why the code
+ * looks as it does.
+ */
+ uint tile_width = w - jt * tile_size;
+ tile_width = MIN2(tile_width, tile_size);
+ uint tile_height = h - it * tile_size;
+ tile_height = MIN2(tile_height, tile_size);
- /* XXX may need to override usage flags (see sp_texture.c) */
+ /* loop over texels in the tile */
+ for (i = 0; i < tile_height; i++) {
+ for (j = 0; j < tile_width; j++) {
+ const uint srci = it * tile_size + i;
+ const uint srcj = jt * tile_size + j;
+ ASSERT(srci < h);
+ ASSERT(srcj < w);
+ tdst[i * tile_size + j] = src[srci * src_stride + srcj];
+ }
+ }
+ }
+ }
+}
- pipe_texture_reference(&ps->texture, pt);
- ps->face = face;
- ps->level = level;
- ps->zslice = zslice;
- if (pt->target == PIPE_TEXTURE_CUBE || pt->target == PIPE_TEXTURE_3D) {
- ps->offset += ((pt->target == PIPE_TEXTURE_CUBE) ? face : zslice) *
- ps->nblocksy *
- ps->stride;
- }
- else {
- assert(face == 0);
- assert(zslice == 0);
+/**
+ * For Cell. Basically, rearrange the pixels/quads from this layout:
+ * +--+--+--+--+
+ * |p0|p1|p2|p3|....
+ * +--+--+--+--+
+ *
+ * to this layout:
+ * +--+--+
+ * |p0|p1|....
+ * +--+--+
+ * |p2|p3|
+ * +--+--+
+ */
+static void
+twiddle_tile(const uint *tileIn, uint *tileOut)
+{
+ int y, x;
+
+ for (y = 0; y < TILE_SIZE; y+=2) {
+ for (x = 0; x < TILE_SIZE; x+=2) {
+ int k = 4 * (y/2 * TILE_SIZE/2 + x/2);
+ tileOut[y * TILE_SIZE + (x + 0)] = tileIn[k];
+ tileOut[y * TILE_SIZE + (x + 1)] = tileIn[k+1];
+ tileOut[(y + 1) * TILE_SIZE + (x + 0)] = tileIn[k+2];
+ tileOut[(y + 1) * TILE_SIZE + (x + 1)] = tileIn[k+3];
}
}
- return ps;
}
-
/**
- * Copy tile data from linear layout to tiled layout.
- * XXX this should be rolled into the future surface-creation code.
- * XXX also need "untile" code...
+ * Convert image from tiled layout to linear layout. 4-byte pixels.
*/
static void
-tile_copy_data(uint w, uint h, uint tile_size, uint *dst, const uint *src)
+untwiddle_image_uint(uint w, uint h, uint tile_size, uint *dst,
+ uint dst_stride, const uint *src)
{
const uint tile_size2 = tile_size * tile_size;
- const uint h_t = h / tile_size, w_t = w / tile_size;
-
+ const uint h_t = (h + tile_size - 1) / tile_size;
+ const uint w_t = (w + tile_size - 1) / tile_size;
+ uint *tile_buf;
uint it, jt; /* tile counters */
uint i, j; /* intra-tile counters */
- /* loop over dest tiles */
+ dst_stride /= 4; /* convert from bytes to pixels */
+
+ tile_buf = align_malloc(tile_size * tile_size * 4, 16);
+
+ /* loop over src tiles */
for (it = 0; it < h_t; it++) {
for (jt = 0; jt < w_t; jt++) {
- /* start of dest tile: */
- uint *tdst = dst + (it * w_t + jt) * tile_size2;
+ /* start of src tile: */
+ const uint *tsrc = src + (it * w_t + jt) * tile_size2;
+
+ twiddle_tile(tsrc, tile_buf);
+ tsrc = tile_buf;
+
+ /* compute size of this tile (may be smaller than tile_size) */
+ /* XXX note: a compiler bug was found here. That's why the code
+ * looks as it does.
+ */
+ uint tile_width = w - jt * tile_size;
+ tile_width = MIN2(tile_width, tile_size);
+ uint tile_height = h - it * tile_size;
+ tile_height = MIN2(tile_height, tile_size);
+
/* loop over texels in the tile */
- for (i = 0; i < tile_size; i++) {
- for (j = 0; j < tile_size; j++) {
- const uint srci = it * tile_size + i;
- const uint srcj = jt * tile_size + j;
- *tdst++ = src[srci * w + srcj];
+ for (i = 0; i < tile_height; i++) {
+ for (j = 0; j < tile_width; j++) {
+ uint dsti = it * tile_size + i;
+ uint dstj = jt * tile_size + j;
+ ASSERT(dsti < h);
+ ASSERT(dstj < w);
+ dst[dsti * dst_stride + dstj] = tsrc[i * tile_size + j];
}
}
}
}
-}
+ align_free(tile_buf);
+}
/**
* Convert linear texture image data to tiled format for SPU usage.
- * XXX recast this in terms of pipe_surfaces (aka texture views).
*/
static void
-cell_tile_texture(struct cell_context *cell,
- struct cell_texture *texture)
+cell_twiddle_texture(struct pipe_screen *screen,
+ struct pipe_surface *surface)
{
- struct pipe_screen *screen = cell->pipe.screen;
- uint face = 0, level = 0, zslice = 0;
- struct pipe_surface *surf;
- const uint w = texture->base.width[0], h = texture->base.height[0];
- const uint *src;
-
- /* temporary restrictions: */
- assert(w >= TILE_SIZE);
- assert(h >= TILE_SIZE);
- assert(w % TILE_SIZE == 0);
- assert(h % TILE_SIZE == 0);
-
- surf = screen->get_tex_surface(screen, &texture->base, face, level, zslice,
- PIPE_BUFFER_USAGE_CPU_WRITE);
- ASSERT(surf);
-
- src = (const uint *) pipe_surface_map(surf, PIPE_BUFFER_USAGE_CPU_WRITE);
-
- if (texture->tiled_data) {
- align_free(texture->tiled_data);
+ struct cell_texture *ct = cell_texture(surface->texture);
+ const uint level = surface->level;
+ const uint texWidth = ct->base.width[level];
+ const uint texHeight = ct->base.height[level];
+ const uint bufWidth = align(texWidth, TILE_SIZE);
+ const uint bufHeight = align(texHeight, TILE_SIZE);
+ const void *map = pipe_buffer_map(screen, surface->buffer,
+ PIPE_BUFFER_USAGE_CPU_READ);
+ const uint *src = (const uint *) ((const ubyte *) map + surface->offset);
+
+ switch (ct->base.format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ case PIPE_FORMAT_S8Z24_UNORM:
+ {
+ int numFaces = ct->base.target == PIPE_TEXTURE_CUBE ? 6 : 1;
+ int offset = bufWidth * bufHeight * 4 * surface->face;
+ uint *dst;
+
+ if (!ct->tiled_buffer[level]) {
+ /* allocate buffer for tiled data now */
+ struct pipe_winsys *ws = screen->winsys;
+ uint bytes = bufWidth * bufHeight * 4 * numFaces;
+ ct->tiled_buffer[level] = ws->buffer_create(ws, 16,
+ PIPE_BUFFER_USAGE_PIXEL,
+ bytes);
+ /* and map it */
+ ct->tiled_mapped[level] = ws->buffer_map(ws, ct->tiled_buffer[level],
+ PIPE_BUFFER_USAGE_GPU_READ);
+ }
+ dst = (uint *) ((ubyte *) ct->tiled_mapped[level] + offset);
+
+ twiddle_image_uint(texWidth, texHeight, TILE_SIZE, dst,
+ surface->stride, src);
+ }
+ break;
+ default:
+ printf("Cell: twiddle unsupported texture format %s\n", pf_name(ct->base.format));
+ ;
}
- texture->tiled_data = align_malloc(w * h * 4, 16);
- tile_copy_data(w, h, TILE_SIZE, texture->tiled_data, src);
+ pipe_buffer_unmap(screen, surface->buffer);
+}
+
+
+/**
+ * Convert SPU tiled texture image data to linear format for app usage.
+ */
+static void
+cell_untwiddle_texture(struct pipe_screen *screen,
+ struct pipe_surface *surface)
+{
+ struct cell_texture *ct = cell_texture(surface->texture);
+ const uint level = surface->level;
+ const uint texWidth = ct->base.width[level];
+ const uint texHeight = ct->base.height[level];
+ const void *map = pipe_buffer_map(screen, surface->buffer,
+ PIPE_BUFFER_USAGE_CPU_READ);
+ const uint *src = (const uint *) ((const ubyte *) map + surface->offset);
+
+ switch (ct->base.format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ case PIPE_FORMAT_S8Z24_UNORM:
+ {
+ int numFaces = ct->base.target == PIPE_TEXTURE_CUBE ? 6 : 1;
+ int offset = surface->stride * texHeight * 4 * surface->face;
+ uint *dst;
+
+ if (!ct->untiled_data[level]) {
+ ct->untiled_data[level] =
+ align_malloc(surface->stride * texHeight * 4 * numFaces, 16);
+ }
+
+ dst = (uint *) ((ubyte *) ct->untiled_data[level] + offset);
- pipe_surface_unmap(surf);
+ untwiddle_image_uint(texWidth, texHeight, TILE_SIZE, dst,
+ surface->stride, src);
+ }
+ break;
+ default:
+ {
+ ct->untiled_data[level] = NULL;
+ printf("Cell: untwiddle unsupported texture format %s\n", pf_name(ct->base.format));
+ }
+ }
- pipe_surface_reference(&surf, NULL);
+ pipe_buffer_unmap(screen, surface->buffer);
}
-void
-cell_update_texture_mapping(struct cell_context *cell)
+static struct pipe_surface *
+cell_get_tex_surface(struct pipe_screen *screen,
+ struct pipe_texture *pt,
+ unsigned face, unsigned level, unsigned zslice,
+ unsigned usage)
{
-#if 0
- uint face = 0, level = 0, zslice = 0;
-#endif
- uint i;
-
- for (i = 0; i < CELL_MAX_SAMPLERS; i++) {
- if (cell->texture[i])
- cell_tile_texture(cell, cell->texture[i]);
- }
+ struct pipe_winsys *ws = screen->winsys;
+ struct cell_texture *ct = cell_texture(pt);
+ struct pipe_surface *ps;
-#if 0
- if (cell->tex_surf && cell->tex_map) {
- pipe_surface_unmap(cell->tex_surf);
- cell->tex_map = NULL;
- }
+ ps = ws->surface_alloc(ws);
+ if (ps) {
+ assert(ps->refcount);
+ assert(ps->winsys);
+ winsys_buffer_reference(ws, &ps->buffer, ct->buffer);
+ ps->format = pt->format;
+ ps->block = pt->block;
+ ps->width = pt->width[level];
+ ps->height = pt->height[level];
+ ps->nblocksx = pt->nblocksx[level];
+ ps->nblocksy = pt->nblocksy[level];
+ ps->stride = ct->stride[level];
+ ps->offset = ct->level_offset[level];
+ ps->usage = usage;
- /* XXX free old surface */
+ /* XXX may need to override usage flags (see sp_texture.c) */
- cell->tex_surf = cell_get_tex_surface(&cell->pipe,
- &cell->texture[0]->base,
- face, level, zslice);
+ pipe_texture_reference(&ps->texture, pt);
+ ps->face = face;
+ ps->level = level;
+ ps->zslice = zslice;
- cell->tex_map = pipe_surface_map(cell->tex_surf);
-#endif
+ if (pt->target == PIPE_TEXTURE_CUBE || pt->target == PIPE_TEXTURE_3D) {
+ ps->offset += ((pt->target == PIPE_TEXTURE_CUBE) ? face : zslice) *
+ ps->nblocksy *
+ ps->stride;
+ }
+ else {
+ assert(face == 0);
+ assert(zslice == 0);
+ }
+
+ if (ps->usage & PIPE_BUFFER_USAGE_CPU_READ) {
+ /* convert from tiled to linear layout */
+ cell_untwiddle_texture(screen, ps);
+ }
+ }
+ return ps;
}
@@ -313,11 +447,17 @@ static void
cell_tex_surface_release(struct pipe_screen *screen,
struct pipe_surface **s)
{
- /* Effectively do the texture_update work here - if texture images
- * needed post-processing to put them into hardware layout, this is
- * where it would happen. For softpipe, nothing to do.
- */
- assert ((*s)->texture);
+ struct cell_texture *ct = cell_texture((*s)->texture);
+ const uint level = (*s)->level;
+
+ if (((*s)->usage & PIPE_BUFFER_USAGE_CPU_READ) && (ct->untiled_data[level]))
+ {
+ align_free(ct->untiled_data[level]);
+ ct->untiled_data[level] = NULL;
+ }
+
+ /* XXX if done rendering to teximage, re-tile */
+
pipe_texture_reference(&(*s)->texture, NULL);
screen->winsys->surface_release(screen->winsys, s);
@@ -325,11 +465,15 @@ cell_tex_surface_release(struct pipe_screen *screen,
static void *
-cell_surface_map( struct pipe_screen *screen,
- struct pipe_surface *surface,
- unsigned flags )
+cell_surface_map(struct pipe_screen *screen,
+ struct pipe_surface *surface,
+ unsigned flags)
{
ubyte *map;
+ struct cell_texture *ct = cell_texture(surface->texture);
+ const uint level = surface->level;
+
+ assert(ct);
if (flags & ~surface->usage) {
assert(0);
@@ -339,22 +483,15 @@ cell_surface_map( struct pipe_screen *screen,
map = pipe_buffer_map( screen, surface->buffer, flags );
if (map == NULL)
return NULL;
-
- /* May want to different things here depending on read/write nature
- * of the map:
- */
- if (surface->texture &&
- (flags & PIPE_BUFFER_USAGE_CPU_WRITE))
+ else
{
- /* Do something to notify sharing contexts of a texture change.
- * In softpipe, that would mean flushing the texture cache.
- */
-#if 0
- cell_screen(screen)->timestamp++;
-#endif
+ if ((surface->usage & PIPE_BUFFER_USAGE_CPU_READ) && (ct->untiled_data[level])) {
+ return (void *) ((ubyte *) ct->untiled_data[level] + surface->offset);
+ }
+ else {
+ return (void *) (map + surface->offset);
+ }
}
-
- return map + surface->offset;
}
@@ -362,17 +499,21 @@ static void
cell_surface_unmap(struct pipe_screen *screen,
struct pipe_surface *surface)
{
- pipe_buffer_unmap( screen, surface->buffer );
-}
+ struct cell_texture *ct = cell_texture(surface->texture);
+ assert(ct);
-void
-cell_init_texture_functions(struct cell_context *cell)
-{
- /*cell->pipe.texture_update = cell_texture_update;*/
+ if ((ct->base.tex_usage & PIPE_TEXTURE_USAGE_SAMPLER) &&
+ (surface->usage & PIPE_BUFFER_USAGE_CPU_WRITE)) {
+ /* convert from linear to tiled layout */
+ cell_twiddle_texture(screen, surface);
+ }
+
+ pipe_buffer_unmap( screen, surface->buffer );
}
+
void
cell_init_screen_texture_funcs(struct pipe_screen *screen)
{
diff --git a/src/gallium/drivers/cell/ppu/cell_texture.h b/src/gallium/drivers/cell/ppu/cell_texture.h
index 6d37e95ebce..7018b0c9bf7 100644
--- a/src/gallium/drivers/cell/ppu/cell_texture.h
+++ b/src/gallium/drivers/cell/ppu/cell_texture.h
@@ -40,15 +40,19 @@ struct cell_texture
{
struct pipe_texture base;
- unsigned long level_offset[PIPE_MAX_TEXTURE_LEVELS];
- unsigned long stride[PIPE_MAX_TEXTURE_LEVELS];
+ unsigned long level_offset[CELL_MAX_TEXTURE_LEVELS];
+ unsigned long stride[CELL_MAX_TEXTURE_LEVELS];
/* The data is held here:
*/
struct pipe_buffer *buffer;
unsigned long buffer_size;
- void *tiled_data; /* XXX this may be temporary */ /*ALIGN16*/
+ /** Texture data in tiled layout is held here */
+ struct pipe_buffer *tiled_buffer[CELL_MAX_TEXTURE_LEVELS];
+ /** Mapped, tiled texture data */
+ void *tiled_mapped[CELL_MAX_TEXTURE_LEVELS];
+ void *untiled_data[CELL_MAX_TEXTURE_LEVELS];
};
@@ -62,14 +66,6 @@ cell_texture(struct pipe_texture *pt)
extern void
-cell_update_texture_mapping(struct cell_context *cell);
-
-
-extern void
-cell_init_texture_functions(struct cell_context *cell);
-
-
-extern void
cell_init_screen_texture_funcs(struct pipe_screen *screen);
diff --git a/src/gallium/drivers/cell/ppu/cell_vbuf.c b/src/gallium/drivers/cell/ppu/cell_vbuf.c
index aa63435b934..65ba51b6bb2 100644
--- a/src/gallium/drivers/cell/ppu/cell_vbuf.c
+++ b/src/gallium/drivers/cell/ppu/cell_vbuf.c
@@ -38,6 +38,7 @@
#include "cell_batch.h"
#include "cell_context.h"
+#include "cell_fence.h"
#include "cell_flush.h"
#include "cell_spu.h"
#include "cell_vbuf.h"
@@ -108,6 +109,11 @@ cell_vbuf_release_vertices(struct vbuf_render *vbr, void *vertices,
__FUNCTION__, cvbr->vertex_buf, vertices_used);
*/
+ /* Make sure texture buffers aren't released until we're done rendering
+ * with them.
+ */
+ cell_add_fenced_textures(cell);
+
/* Tell SPUs they can release the vert buf */
if (cvbr->vertex_buf != ~0U) {
struct cell_command_release_verts *release
diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c
index 566df7f59e3..9cba537d9eb 100644
--- a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c
+++ b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c
@@ -73,8 +73,8 @@ emit_matrix_transpose(struct spe_function *p,
int col3;
- spe_lqd(p, shuf_hi, shuf_ptr, 3);
- spe_lqd(p, shuf_lo, shuf_ptr, 4);
+ spe_lqd(p, shuf_hi, shuf_ptr, 3*16);
+ spe_lqd(p, shuf_lo, shuf_ptr, 4*16);
spe_shufb(p, t1, row0, row2, shuf_hi);
spe_shufb(p, t2, row0, row2, shuf_lo);
@@ -122,13 +122,13 @@ emit_matrix_transpose(struct spe_function *p,
*/
switch (count) {
case 4:
- spe_stqd(p, col3, dest_ptr, 3);
+ spe_stqd(p, col3, dest_ptr, 3 * 16);
case 3:
- spe_stqd(p, col2, dest_ptr, 2);
+ spe_stqd(p, col2, dest_ptr, 2 * 16);
case 2:
- spe_stqd(p, col1, dest_ptr, 1);
+ spe_stqd(p, col1, dest_ptr, 1 * 16);
case 1:
- spe_stqd(p, col0, dest_ptr, 0);
+ spe_stqd(p, col0, dest_ptr, 0 * 16);
}
@@ -145,6 +145,8 @@ emit_matrix_transpose(struct spe_function *p,
}
+#if 0
+/* This appears to not be used currently */
static void
emit_fetch(struct spe_function *p,
unsigned in_ptr, unsigned *offset,
@@ -166,17 +168,17 @@ emit_fetch(struct spe_function *p,
float scale_signed = 0.0;
float scale_unsigned = 0.0;
- spe_lqd(p, v0, in_ptr, 0 + offset[0]);
- spe_lqd(p, v1, in_ptr, 1 + offset[0]);
- spe_lqd(p, v2, in_ptr, 2 + offset[0]);
- spe_lqd(p, v3, in_ptr, 3 + offset[0]);
+ spe_lqd(p, v0, in_ptr, (0 + offset[0]) * 16);
+ spe_lqd(p, v1, in_ptr, (1 + offset[0]) * 16);
+ spe_lqd(p, v2, in_ptr, (2 + offset[0]) * 16);
+ spe_lqd(p, v3, in_ptr, (3 + offset[0]) * 16);
offset[0] += 4;
switch (bytes) {
case 1:
scale_signed = 1.0f / 127.0f;
scale_unsigned = 1.0f / 255.0f;
- spe_lqd(p, tmp, shuf_ptr, 1);
+ spe_lqd(p, tmp, shuf_ptr, 1 * 16);
spe_shufb(p, v0, v0, v0, tmp);
spe_shufb(p, v1, v1, v1, tmp);
spe_shufb(p, v2, v2, v2, tmp);
@@ -185,7 +187,7 @@ emit_fetch(struct spe_function *p,
case 2:
scale_signed = 1.0f / 32767.0f;
scale_unsigned = 1.0f / 65535.0f;
- spe_lqd(p, tmp, shuf_ptr, 2);
+ spe_lqd(p, tmp, shuf_ptr, 2 * 16);
spe_shufb(p, v0, v0, v0, tmp);
spe_shufb(p, v1, v1, v1, tmp);
spe_shufb(p, v2, v2, v2, tmp);
@@ -241,11 +243,11 @@ emit_fetch(struct spe_function *p,
switch (count) {
case 1:
- spe_stqd(p, float_zero, out_ptr, 1);
+ spe_stqd(p, float_zero, out_ptr, 1 * 16);
case 2:
- spe_stqd(p, float_zero, out_ptr, 2);
+ spe_stqd(p, float_zero, out_ptr, 2 * 16);
case 3:
- spe_stqd(p, float_one, out_ptr, 3);
+ spe_stqd(p, float_one, out_ptr, 3 * 16);
}
if (float_zero != -1) {
@@ -256,6 +258,7 @@ emit_fetch(struct spe_function *p,
spe_release_register(p, float_one);
}
}
+#endif
void cell_update_vertex_fetch(struct draw_context *draw)
diff --git a/src/gallium/drivers/cell/spu/.gitignore b/src/gallium/drivers/cell/spu/.gitignore
new file mode 100644
index 00000000000..2be9a2d3242
--- /dev/null
+++ b/src/gallium/drivers/cell/spu/.gitignore
@@ -0,0 +1 @@
+g3d_spu
diff --git a/src/gallium/drivers/cell/spu/Makefile b/src/gallium/drivers/cell/spu/Makefile
index 1ae0dfb8c10..116453b79c5 100644
--- a/src/gallium/drivers/cell/spu/Makefile
+++ b/src/gallium/drivers/cell/spu/Makefile
@@ -16,8 +16,10 @@ PROG_SPU_EMBED_O = $(PROG)_spu-embed.o
SOURCES = \
- spu_main.c \
+ spu_command.c \
spu_dcache.c \
+ spu_funcs.c \
+ spu_main.c \
spu_per_fragment_op.c \
spu_render.c \
spu_texture.c \
diff --git a/src/gallium/drivers/cell/spu/spu_colorpack.h b/src/gallium/drivers/cell/spu/spu_colorpack.h
index fd8dc6ded3e..d7ce0055248 100644
--- a/src/gallium/drivers/cell/spu/spu_colorpack.h
+++ b/src/gallium/drivers/cell/spu/spu_colorpack.h
@@ -31,6 +31,7 @@
#define SPU_COLORPACK_H
+#include <transpose_matrix4x4.h>
#include <spu_intrinsics.h>
@@ -84,10 +85,10 @@ spu_unpack_B8G8R8A8(uint color)
vector unsigned int color_u4 = spu_splats(color);
color_u4 = spu_shuffle(color_u4, color_u4,
((vector unsigned char) {
- 10, 10, 10, 10,
- 5, 5, 5, 5,
+ 2, 2, 2, 2,
+ 1, 1, 1, 1,
0, 0, 0, 0,
- 15, 15, 15, 15}) );
+ 3, 3, 3, 3}) );
return spu_convtf(color_u4, 32);
}
@@ -98,13 +99,47 @@ spu_unpack_A8R8G8B8(uint color)
vector unsigned int color_u4 = spu_splats(color);
color_u4 = spu_shuffle(color_u4, color_u4,
((vector unsigned char) {
- 5, 5, 5, 5,
- 10, 10, 10, 10,
- 15, 15, 15, 15,
+ 1, 1, 1, 1,
+ 2, 2, 2, 2,
+ 3, 3, 3, 3,
0, 0, 0, 0}) );
-
return spu_convtf(color_u4, 32);
}
+/**
+ * \param color_in - array of 32-bit packed ARGB colors
+ * \param color_out - returns float colors in RRRR, GGGG, BBBB, AAAA order
+ */
+static INLINE void
+spu_unpack_A8R8G8B8_transpose4(const vector unsigned int color_in[4],
+ vector float color_out[4])
+{
+ vector unsigned int c0;
+
+ c0 = spu_shuffle(color_in[0], color_in[0],
+ ((vector unsigned char) {
+ 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 0, 0, 0, 0}) );
+ color_out[0] = spu_convtf(c0, 32);
+
+ c0 = spu_shuffle(color_in[1], color_in[1],
+ ((vector unsigned char) {
+ 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 0, 0, 0, 0}) );
+ color_out[1] = spu_convtf(c0, 32);
+
+ c0 = spu_shuffle(color_in[2], color_in[2],
+ ((vector unsigned char) {
+ 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 0, 0, 0, 0}) );
+ color_out[2] = spu_convtf(c0, 32);
+
+ c0 = spu_shuffle(color_in[3], color_in[3],
+ ((vector unsigned char) {
+ 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 0, 0, 0, 0}) );
+ color_out[3] = spu_convtf(c0, 32);
+
+ _transpose_matrix4x4(color_out, color_out);
+}
+
+
+
#endif /* SPU_COLORPACK_H */
diff --git a/src/gallium/drivers/cell/spu/spu_command.c b/src/gallium/drivers/cell/spu/spu_command.c
new file mode 100644
index 00000000000..8500d19754e
--- /dev/null
+++ b/src/gallium/drivers/cell/spu/spu_command.c
@@ -0,0 +1,815 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+/**
+ * SPU command processing code
+ */
+
+
+#include <stdio.h>
+#include <libmisc.h>
+
+#include "pipe/p_defines.h"
+
+#include "spu_command.h"
+#include "spu_main.h"
+#include "spu_render.h"
+#include "spu_per_fragment_op.h"
+#include "spu_texture.h"
+#include "spu_tile.h"
+#include "spu_vertex_shader.h"
+#include "spu_dcache.h"
+#include "cell/common.h"
+
+
+struct spu_vs_context draw;
+
+
+/**
+ * Buffers containing dynamically generated SPU code:
+ */
+static unsigned char attribute_fetch_code_buffer[136 * PIPE_MAX_ATTRIBS]
+ ALIGN16_ATTRIB;
+
+
+
+static INLINE int
+align(int value, int alignment)
+{
+ return (value + alignment - 1) & ~(alignment - 1);
+}
+
+
+
+/**
+ * Tell the PPU that this SPU has finished copying a buffer to
+ * local store and that it may be reused by the PPU.
+ * This is done by writting a 16-byte batch-buffer-status block back into
+ * main memory (in cell_context->buffer_status[]).
+ */
+static void
+release_buffer(uint buffer)
+{
+ /* Evidently, using less than a 16-byte status doesn't work reliably */
+ static const vector unsigned int status = {CELL_BUFFER_STATUS_FREE,
+ CELL_BUFFER_STATUS_FREE,
+ CELL_BUFFER_STATUS_FREE,
+ CELL_BUFFER_STATUS_FREE};
+ const uint index = 4 * (spu.init.id * CELL_NUM_BUFFERS + buffer);
+ uint *dst = spu.init.buffer_status + index;
+
+ ASSERT(buffer < CELL_NUM_BUFFERS);
+
+ mfc_put((void *) &status, /* src in local memory */
+ (unsigned int) dst, /* dst in main memory */
+ sizeof(status), /* size */
+ TAG_MISC, /* tag is unimportant */
+ 0, /* tid */
+ 0 /* rid */);
+}
+
+
+/**
+ * Write CELL_FENCE_SIGNALLED back to the fence status qword in main memory.
+ * There's a qword of status per SPU.
+ */
+static void
+cmd_fence(struct cell_command_fence *fence_cmd)
+{
+ static const vector unsigned int status = {CELL_FENCE_SIGNALLED,
+ CELL_FENCE_SIGNALLED,
+ CELL_FENCE_SIGNALLED,
+ CELL_FENCE_SIGNALLED};
+ uint *dst = (uint *) fence_cmd->fence;
+ dst += 4 * spu.init.id; /* main store/memory address, not local store */
+ ASSERT_ALIGN16(dst);
+ mfc_put((void *) &status, /* src in local memory */
+ (unsigned int) dst, /* dst in main memory */
+ sizeof(status), /* size */
+ TAG_FENCE, /* tag */
+ 0, /* tid */
+ 0 /* rid */);
+}
+
+
+static void
+cmd_clear_surface(const struct cell_command_clear_surface *clear)
+{
+ D_PRINTF(CELL_DEBUG_CMD, "CLEAR SURF %u to 0x%08x\n", clear->surface, clear->value);
+
+ if (clear->surface == 0) {
+ spu.fb.color_clear_value = clear->value;
+ if (spu.init.debug_flags & CELL_DEBUG_CHECKER) {
+ uint x = (spu.init.id << 4) | (spu.init.id << 12) |
+ (spu.init.id << 20) | (spu.init.id << 28);
+ spu.fb.color_clear_value ^= x;
+ }
+ }
+ else {
+ spu.fb.depth_clear_value = clear->value;
+ }
+
+#define CLEAR_OPT 1
+#if CLEAR_OPT
+
+ /* Simply set all tiles' status to CLEAR.
+ * When we actually begin rendering into a tile, we'll initialize it to
+ * the clear value. If any tiles go untouched during the frame,
+ * really_clear_tiles() will set them to the clear value.
+ */
+ if (clear->surface == 0) {
+ memset(spu.ctile_status, TILE_STATUS_CLEAR, sizeof(spu.ctile_status));
+ }
+ else {
+ memset(spu.ztile_status, TILE_STATUS_CLEAR, sizeof(spu.ztile_status));
+ }
+
+#else
+
+ /*
+ * This path clears the whole framebuffer to the clear color right now.
+ */
+
+ /*
+ printf("SPU: %s num=%d w=%d h=%d\n",
+ __FUNCTION__, num_tiles, spu.fb.width_tiles, spu.fb.height_tiles);
+ */
+
+ /* init a single tile to the clear value */
+ if (clear->surface == 0) {
+ clear_c_tile(&spu.ctile);
+ }
+ else {
+ clear_z_tile(&spu.ztile);
+ }
+
+ /* walk over my tiles, writing the 'clear' tile's data */
+ {
+ const uint num_tiles = spu.fb.width_tiles * spu.fb.height_tiles;
+ uint i;
+ for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) {
+ uint tx = i % spu.fb.width_tiles;
+ uint ty = i / spu.fb.width_tiles;
+ if (clear->surface == 0)
+ put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 0);
+ else
+ put_tile(tx, ty, &spu.ztile, TAG_SURFACE_CLEAR, 1);
+ }
+ }
+
+ if (spu.init.debug_flags & CELL_DEBUG_SYNC) {
+ wait_on_mask(1 << TAG_SURFACE_CLEAR);
+ }
+
+#endif /* CLEAR_OPT */
+
+ D_PRINTF(CELL_DEBUG_CMD, "CLEAR SURF done\n");
+}
+
+
+static void
+cmd_release_verts(const struct cell_command_release_verts *release)
+{
+ D_PRINTF(CELL_DEBUG_CMD, "RELEASE VERTS %u\n", release->vertex_buf);
+ ASSERT(release->vertex_buf != ~0U);
+ release_buffer(release->vertex_buf);
+}
+
+
+/**
+ * Process a CELL_CMD_STATE_FRAGMENT_OPS command.
+ * This involves installing new fragment ops SPU code.
+ * If this function is never called, we'll use a regular C fallback function
+ * for fragment processing.
+ */
+static void
+cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops)
+{
+ D_PRINTF(CELL_DEBUG_CMD, "CMD_STATE_FRAGMENT_OPS\n");
+
+ /* Copy state info (for fallback case only - this will eventually
+ * go away when the fallback case goes away)
+ */
+ memcpy(&spu.depth_stencil_alpha, &fops->dsa, sizeof(fops->dsa));
+ memcpy(&spu.blend, &fops->blend, sizeof(fops->blend));
+ memcpy(&spu.blend_color, &fops->blend_color, sizeof(fops->blend_color));
+
+ /* Make sure the SPU knows which buffers it's expected to read when
+ * it's told to pull tiles.
+ */
+ spu.read_depth_stencil = (spu.depth_stencil_alpha.depth.enabled || spu.depth_stencil_alpha.stencil[0].enabled);
+
+ /* If we're forcing the fallback code to be used (for debug purposes),
+ * install that. Otherwise install the incoming SPU code.
+ */
+ if ((spu.init.debug_flags & CELL_DEBUG_FRAGMENT_OP_FALLBACK) != 0) {
+ static unsigned int warned = 0;
+ if (!warned) {
+ fprintf(stderr, "Cell Warning: using fallback per-fragment code\n");
+ warned = 1;
+ }
+ /* The following two lines aren't really necessary if you
+ * know the debug flags won't change during a run, and if you
+ * know that the function pointers are initialized correctly.
+ * We set them here to allow a person to change the debug
+ * flags during a run (from inside a debugger).
+ */
+ spu.fragment_ops[CELL_FACING_FRONT] = spu_fallback_fragment_ops;
+ spu.fragment_ops[CELL_FACING_BACK] = spu_fallback_fragment_ops;
+ return;
+ }
+
+ /* Make sure the SPU code buffer is large enough to hold the incoming code.
+ * Note that we *don't* use align_malloc() and align_free(), because
+ * those utility functions are *not* available in SPU code.
+ * */
+ if (spu.fragment_ops_code_size < fops->total_code_size) {
+ if (spu.fragment_ops_code != NULL) {
+ free(spu.fragment_ops_code);
+ }
+ spu.fragment_ops_code_size = fops->total_code_size;
+ spu.fragment_ops_code = malloc(fops->total_code_size);
+ if (spu.fragment_ops_code == NULL) {
+ /* Whoops. */
+ fprintf(stderr, "CELL Warning: failed to allocate fragment ops code (%d bytes) - using fallback\n", fops->total_code_size);
+ spu.fragment_ops_code = NULL;
+ spu.fragment_ops_code_size = 0;
+ spu.fragment_ops[CELL_FACING_FRONT] = spu_fallback_fragment_ops;
+ spu.fragment_ops[CELL_FACING_BACK] = spu_fallback_fragment_ops;
+ return;
+ }
+ }
+
+ /* Copy the SPU code from the command buffer to the spu buffer */
+ memcpy(spu.fragment_ops_code, fops->code, fops->total_code_size);
+
+ /* Set the pointers for the front-facing and back-facing fragments
+ * to the specified offsets within the code. Note that if the
+ * front-facing and back-facing code are the same, they'll have
+ * the same offset.
+ */
+ spu.fragment_ops[CELL_FACING_FRONT] = (spu_fragment_ops_func) &spu.fragment_ops_code[fops->front_code_index];
+ spu.fragment_ops[CELL_FACING_BACK] = (spu_fragment_ops_func) &spu.fragment_ops_code[fops->back_code_index];
+}
+
+static void
+cmd_state_fragment_program(const struct cell_command_fragment_program *fp)
+{
+ D_PRINTF(CELL_DEBUG_CMD, "CMD_STATE_FRAGMENT_PROGRAM\n");
+ /* Copy SPU code from batch buffer to spu buffer */
+ memcpy(spu.fragment_program_code, fp->code,
+ SPU_MAX_FRAGMENT_PROGRAM_INSTS * 4);
+#if 01
+ /* Point function pointer at new code */
+ spu.fragment_program = (spu_fragment_program_func)spu.fragment_program_code;
+#endif
+}
+
+
+static uint
+cmd_state_fs_constants(const uint64_t *buffer, uint pos)
+{
+ const uint num_const = buffer[pos + 1];
+ const float *constants = (const float *) &buffer[pos + 2];
+ uint i;
+
+ D_PRINTF(CELL_DEBUG_CMD, "CMD_STATE_FS_CONSTANTS (%u)\n", num_const);
+
+ /* Expand each float to float[4] for SOA execution */
+ for (i = 0; i < num_const; i++) {
+ D_PRINTF(CELL_DEBUG_CMD, " const[%u] = %f\n", i, constants[i]);
+ spu.constants[i] = spu_splats(constants[i]);
+ }
+
+ /* return new buffer pos (in 8-byte words) */
+ return pos + 2 + num_const / 2;
+}
+
+
+static void
+cmd_state_framebuffer(const struct cell_command_framebuffer *cmd)
+{
+ D_PRINTF(CELL_DEBUG_CMD, "FRAMEBUFFER: %d x %d at %p, cformat 0x%x zformat 0x%x\n",
+ cmd->width,
+ cmd->height,
+ cmd->color_start,
+ cmd->color_format,
+ cmd->depth_format);
+
+ ASSERT_ALIGN16(cmd->color_start);
+ ASSERT_ALIGN16(cmd->depth_start);
+
+ spu.fb.color_start = cmd->color_start;
+ spu.fb.depth_start = cmd->depth_start;
+ spu.fb.color_format = cmd->color_format;
+ spu.fb.depth_format = cmd->depth_format;
+ spu.fb.width = cmd->width;
+ spu.fb.height = cmd->height;
+ spu.fb.width_tiles = (spu.fb.width + TILE_SIZE - 1) / TILE_SIZE;
+ spu.fb.height_tiles = (spu.fb.height + TILE_SIZE - 1) / TILE_SIZE;
+
+ switch (spu.fb.depth_format) {
+ case PIPE_FORMAT_Z32_UNORM:
+ spu.fb.zsize = 4;
+ spu.fb.zscale = (float) 0xffffffffu;
+ break;
+ case PIPE_FORMAT_Z24S8_UNORM:
+ case PIPE_FORMAT_S8Z24_UNORM:
+ case PIPE_FORMAT_Z24X8_UNORM:
+ case PIPE_FORMAT_X8Z24_UNORM:
+ spu.fb.zsize = 4;
+ spu.fb.zscale = (float) 0x00ffffffu;
+ break;
+ case PIPE_FORMAT_Z16_UNORM:
+ spu.fb.zsize = 2;
+ spu.fb.zscale = (float) 0xffffu;
+ break;
+ default:
+ spu.fb.zsize = 0;
+ break;
+ }
+}
+
+
+/**
+ * Tex texture mask_s/t and scale_s/t fields depend on the texture size and
+ * sampler wrap modes.
+ */
+static void
+update_tex_masks(struct spu_texture *texture,
+ const struct pipe_sampler_state *sampler)
+{
+ uint i;
+
+ for (i = 0; i < CELL_MAX_TEXTURE_LEVELS; i++) {
+ int width = texture->level[i].width;
+ int height = texture->level[i].height;
+
+ if (sampler->wrap_s == PIPE_TEX_WRAP_REPEAT)
+ texture->level[i].mask_s = spu_splats(width - 1);
+ else
+ texture->level[i].mask_s = spu_splats(~0);
+
+ if (sampler->wrap_t == PIPE_TEX_WRAP_REPEAT)
+ texture->level[i].mask_t = spu_splats(height - 1);
+ else
+ texture->level[i].mask_t = spu_splats(~0);
+
+ if (sampler->normalized_coords) {
+ texture->level[i].scale_s = spu_splats((float) width);
+ texture->level[i].scale_t = spu_splats((float) height);
+ }
+ else {
+ texture->level[i].scale_s = spu_splats(1.0f);
+ texture->level[i].scale_t = spu_splats(1.0f);
+ }
+ }
+}
+
+
+static void
+cmd_state_sampler(const struct cell_command_sampler *sampler)
+{
+ uint unit = sampler->unit;
+
+ D_PRINTF(CELL_DEBUG_CMD, "SAMPLER [%u]\n", unit);
+
+ spu.sampler[unit] = sampler->state;
+
+ switch (spu.sampler[unit].min_img_filter) {
+ case PIPE_TEX_FILTER_LINEAR:
+ spu.min_sample_texture_2d[unit] = sample_texture_2d_bilinear;
+ break;
+ case PIPE_TEX_FILTER_ANISO:
+ /* fall-through, for now */
+ case PIPE_TEX_FILTER_NEAREST:
+ spu.min_sample_texture_2d[unit] = sample_texture_2d_nearest;
+ break;
+ default:
+ ASSERT(0);
+ }
+
+ switch (spu.sampler[sampler->unit].mag_img_filter) {
+ case PIPE_TEX_FILTER_LINEAR:
+ spu.mag_sample_texture_2d[unit] = sample_texture_2d_bilinear;
+ break;
+ case PIPE_TEX_FILTER_ANISO:
+ /* fall-through, for now */
+ case PIPE_TEX_FILTER_NEAREST:
+ spu.mag_sample_texture_2d[unit] = sample_texture_2d_nearest;
+ break;
+ default:
+ ASSERT(0);
+ }
+
+ switch (spu.sampler[sampler->unit].min_mip_filter) {
+ case PIPE_TEX_MIPFILTER_NEAREST:
+ case PIPE_TEX_MIPFILTER_LINEAR:
+ spu.sample_texture_2d[unit] = sample_texture_2d_lod;
+ break;
+ case PIPE_TEX_MIPFILTER_NONE:
+ spu.sample_texture_2d[unit] = spu.mag_sample_texture_2d[unit];
+ break;
+ default:
+ ASSERT(0);
+ }
+
+ update_tex_masks(&spu.texture[unit], &spu.sampler[unit]);
+}
+
+
+static void
+cmd_state_texture(const struct cell_command_texture *texture)
+{
+ const uint unit = texture->unit;
+ uint i;
+
+ D_PRINTF(CELL_DEBUG_CMD, "TEXTURE [%u]\n", texture->unit);
+
+ spu.texture[unit].max_level = 0;
+ spu.texture[unit].target = texture->target;
+
+ for (i = 0; i < CELL_MAX_TEXTURE_LEVELS; i++) {
+ uint width = texture->width[i];
+ uint height = texture->height[i];
+ uint depth = texture->depth[i];
+
+ D_PRINTF(CELL_DEBUG_CMD, " LEVEL %u: at %p size[0] %u x %u\n", i,
+ texture->start[i], texture->width[i], texture->height[i]);
+
+ spu.texture[unit].level[i].start = texture->start[i];
+ spu.texture[unit].level[i].width = width;
+ spu.texture[unit].level[i].height = height;
+ spu.texture[unit].level[i].depth = depth;
+
+ spu.texture[unit].level[i].tiles_per_row =
+ (width + TILE_SIZE - 1) / TILE_SIZE;
+
+ spu.texture[unit].level[i].bytes_per_image =
+ 4 * align(width, TILE_SIZE) * align(height, TILE_SIZE) * depth;
+
+ spu.texture[unit].level[i].max_s = spu_splats((int) width - 1);
+ spu.texture[unit].level[i].max_t = spu_splats((int) height - 1);
+
+ if (texture->start[i])
+ spu.texture[unit].max_level = i;
+ }
+
+ update_tex_masks(&spu.texture[unit], &spu.sampler[unit]);
+}
+
+
+static void
+cmd_state_vertex_info(const struct vertex_info *vinfo)
+{
+ D_PRINTF(CELL_DEBUG_CMD, "VERTEX_INFO num_attribs=%u\n", vinfo->num_attribs);
+ ASSERT(vinfo->num_attribs >= 1);
+ ASSERT(vinfo->num_attribs <= 8);
+ memcpy(&spu.vertex_info, vinfo, sizeof(*vinfo));
+}
+
+
+static void
+cmd_state_vs_array_info(const struct cell_array_info *vs_info)
+{
+ const unsigned attr = vs_info->attr;
+
+ ASSERT(attr < PIPE_MAX_ATTRIBS);
+ draw.vertex_fetch.src_ptr[attr] = vs_info->base;
+ draw.vertex_fetch.pitch[attr] = vs_info->pitch;
+ draw.vertex_fetch.size[attr] = vs_info->size;
+ draw.vertex_fetch.code_offset[attr] = vs_info->function_offset;
+ draw.vertex_fetch.dirty = 1;
+}
+
+
+static void
+cmd_state_attrib_fetch(const struct cell_attribute_fetch_code *code)
+{
+ mfc_get(attribute_fetch_code_buffer,
+ (unsigned int) code->base, /* src */
+ code->size,
+ TAG_BATCH_BUFFER,
+ 0, /* tid */
+ 0 /* rid */);
+ wait_on_mask(1 << TAG_BATCH_BUFFER);
+
+ draw.vertex_fetch.code = attribute_fetch_code_buffer;
+}
+
+
+static void
+cmd_finish(void)
+{
+ D_PRINTF(CELL_DEBUG_CMD, "FINISH\n");
+ really_clear_tiles(0);
+ /* wait for all outstanding DMAs to finish */
+ mfc_write_tag_mask(~0);
+ mfc_read_tag_status_all();
+ /* send mbox message to PPU */
+ spu_write_out_mbox(CELL_CMD_FINISH);
+}
+
+
+/**
+ * Execute a batch of commands which was sent to us by the PPU.
+ * See the cell_emit_state.c code to see where the commands come from.
+ *
+ * The opcode param encodes the location of the buffer and its size.
+ */
+static void
+cmd_batch(uint opcode)
+{
+ const uint buf = (opcode >> 8) & 0xff;
+ uint size = (opcode >> 16);
+ uint64_t buffer[CELL_BUFFER_SIZE / 8] ALIGN16_ATTRIB;
+ const unsigned usize = size / sizeof(buffer[0]);
+ uint pos;
+
+ D_PRINTF(CELL_DEBUG_CMD, "BATCH buffer %u, len %u, from %p\n",
+ buf, size, spu.init.buffers[buf]);
+
+ ASSERT((opcode & CELL_CMD_OPCODE_MASK) == CELL_CMD_BATCH);
+
+ ASSERT_ALIGN16(spu.init.buffers[buf]);
+
+ size = ROUNDUP16(size);
+
+ ASSERT_ALIGN16(spu.init.buffers[buf]);
+
+ mfc_get(buffer, /* dest */
+ (unsigned int) spu.init.buffers[buf], /* src */
+ size,
+ TAG_BATCH_BUFFER,
+ 0, /* tid */
+ 0 /* rid */);
+ wait_on_mask(1 << TAG_BATCH_BUFFER);
+
+ /* Tell PPU we're done copying the buffer to local store */
+ D_PRINTF(CELL_DEBUG_CMD, "release batch buf %u\n", buf);
+ release_buffer(buf);
+
+ /*
+ * Loop over commands in the batch buffer
+ */
+ for (pos = 0; pos < usize; /* no incr */) {
+ switch (buffer[pos]) {
+ /*
+ * rendering commands
+ */
+ case CELL_CMD_CLEAR_SURFACE:
+ {
+ struct cell_command_clear_surface *clr
+ = (struct cell_command_clear_surface *) &buffer[pos];
+ cmd_clear_surface(clr);
+ pos += sizeof(*clr) / 8;
+ }
+ break;
+ case CELL_CMD_RENDER:
+ {
+ struct cell_command_render *render
+ = (struct cell_command_render *) &buffer[pos];
+ uint pos_incr;
+ cmd_render(render, &pos_incr);
+ pos += pos_incr;
+ }
+ break;
+ /*
+ * state-update commands
+ */
+ case CELL_CMD_STATE_FRAMEBUFFER:
+ {
+ struct cell_command_framebuffer *fb
+ = (struct cell_command_framebuffer *) &buffer[pos];
+ cmd_state_framebuffer(fb);
+ pos += sizeof(*fb) / 8;
+ }
+ break;
+ case CELL_CMD_STATE_FRAGMENT_OPS:
+ {
+ struct cell_command_fragment_ops *fops
+ = (struct cell_command_fragment_ops *) &buffer[pos];
+ cmd_state_fragment_ops(fops);
+ /* This is a variant-sized command */
+ pos += (sizeof(*fops) + fops->total_code_size)/ 8;
+ }
+ break;
+ case CELL_CMD_STATE_FRAGMENT_PROGRAM:
+ {
+ struct cell_command_fragment_program *fp
+ = (struct cell_command_fragment_program *) &buffer[pos];
+ cmd_state_fragment_program(fp);
+ pos += sizeof(*fp) / 8;
+ }
+ break;
+ case CELL_CMD_STATE_FS_CONSTANTS:
+ pos = cmd_state_fs_constants(buffer, pos);
+ break;
+ case CELL_CMD_STATE_RASTERIZER:
+ {
+ struct cell_command_rasterizer *rast =
+ (struct cell_command_rasterizer *) &buffer[pos];
+ spu.rasterizer = rast->rasterizer;
+ pos += sizeof(*rast) / 8;
+ }
+ break;
+ case CELL_CMD_STATE_SAMPLER:
+ {
+ struct cell_command_sampler *sampler
+ = (struct cell_command_sampler *) &buffer[pos];
+ cmd_state_sampler(sampler);
+ pos += sizeof(*sampler) / 8;
+ }
+ break;
+ case CELL_CMD_STATE_TEXTURE:
+ {
+ struct cell_command_texture *texture
+ = (struct cell_command_texture *) &buffer[pos];
+ cmd_state_texture(texture);
+ pos += sizeof(*texture) / 8;
+ }
+ break;
+ case CELL_CMD_STATE_VERTEX_INFO:
+ cmd_state_vertex_info((struct vertex_info *) &buffer[pos+1]);
+ pos += (1 + ROUNDUP8(sizeof(struct vertex_info)) / 8);
+ break;
+ case CELL_CMD_STATE_VIEWPORT:
+ (void) memcpy(& draw.viewport, &buffer[pos+1],
+ sizeof(struct pipe_viewport_state));
+ pos += (1 + ROUNDUP8(sizeof(struct pipe_viewport_state)) / 8);
+ break;
+ case CELL_CMD_STATE_UNIFORMS:
+ draw.constants = (const float (*)[4]) (uintptr_t) buffer[pos + 1];
+ pos += 2;
+ break;
+ case CELL_CMD_STATE_VS_ARRAY_INFO:
+ cmd_state_vs_array_info((struct cell_array_info *) &buffer[pos+1]);
+ pos += (1 + ROUNDUP8(sizeof(struct cell_array_info)) / 8);
+ break;
+ case CELL_CMD_STATE_BIND_VS:
+#if 0
+ spu_bind_vertex_shader(&draw,
+ (struct cell_shader_info *) &buffer[pos+1]);
+#endif
+ pos += (1 + ROUNDUP8(sizeof(struct cell_shader_info)) / 8);
+ break;
+ case CELL_CMD_STATE_ATTRIB_FETCH:
+ cmd_state_attrib_fetch((struct cell_attribute_fetch_code *)
+ &buffer[pos+1]);
+ pos += (1 + ROUNDUP8(sizeof(struct cell_attribute_fetch_code)) / 8);
+ break;
+ /*
+ * misc commands
+ */
+ case CELL_CMD_FINISH:
+ cmd_finish();
+ pos += 1;
+ break;
+ case CELL_CMD_FENCE:
+ {
+ struct cell_command_fence *fence_cmd =
+ (struct cell_command_fence *) &buffer[pos];
+ cmd_fence(fence_cmd);
+ pos += sizeof(*fence_cmd) / 8;
+ }
+ break;
+ case CELL_CMD_RELEASE_VERTS:
+ {
+ struct cell_command_release_verts *release
+ = (struct cell_command_release_verts *) &buffer[pos];
+ cmd_release_verts(release);
+ pos += sizeof(*release) / 8;
+ }
+ break;
+ case CELL_CMD_FLUSH_BUFFER_RANGE: {
+ struct cell_buffer_range *br = (struct cell_buffer_range *)
+ &buffer[pos+1];
+
+ spu_dcache_mark_dirty((unsigned) br->base, br->size);
+ pos += (1 + ROUNDUP8(sizeof(struct cell_buffer_range)) / 8);
+ break;
+ }
+ default:
+ printf("SPU %u: bad opcode: 0x%llx\n", spu.init.id, buffer[pos]);
+ ASSERT(0);
+ break;
+ }
+ }
+
+ D_PRINTF(CELL_DEBUG_CMD, "BATCH complete\n");
+}
+
+
+#define PERF 0
+
+
+/**
+ * Main loop for SPEs: Get a command, execute it, repeat.
+ */
+void
+command_loop(void)
+{
+ int exitFlag = 0;
+ uint t0, t1;
+
+ D_PRINTF(CELL_DEBUG_CMD, "Enter command loop\n");
+
+ while (!exitFlag) {
+ unsigned opcode;
+
+ D_PRINTF(CELL_DEBUG_CMD, "Wait for cmd...\n");
+
+ if (PERF)
+ spu_write_decrementer(~0);
+
+ /* read/wait from mailbox */
+ opcode = (unsigned int) spu_read_in_mbox();
+ D_PRINTF(CELL_DEBUG_CMD, "got cmd 0x%x\n", opcode);
+
+ if (PERF)
+ t0 = spu_read_decrementer();
+
+ switch (opcode & CELL_CMD_OPCODE_MASK) {
+ case CELL_CMD_EXIT:
+ D_PRINTF(CELL_DEBUG_CMD, "EXIT\n");
+ exitFlag = 1;
+ break;
+ case CELL_CMD_VS_EXECUTE:
+#if 0
+ spu_execute_vertex_shader(&draw, &cmd.vs);
+#endif
+ break;
+ case CELL_CMD_BATCH:
+ cmd_batch(opcode);
+ break;
+ default:
+ printf("Bad opcode 0x%x!\n", opcode & CELL_CMD_OPCODE_MASK);
+ }
+
+ if (PERF) {
+ t1 = spu_read_decrementer();
+ printf("wait mbox time: %gms batch time: %gms\n",
+ (~0u - t0) * spu.init.inv_timebase,
+ (t0 - t1) * spu.init.inv_timebase);
+ }
+ }
+
+ D_PRINTF(CELL_DEBUG_CMD, "Exit command loop\n");
+
+ if (spu.init.debug_flags & CELL_DEBUG_CACHE)
+ spu_dcache_report();
+}
+
+/* Initialize this module; we manage the fragment ops buffer here. */
+void
+spu_command_init(void)
+{
+ /* Install default/fallback fragment processing function.
+ * This will normally be overriden by a code-gen'd function
+ * unless CELL_FORCE_FRAGMENT_OPS_FALLBACK is set.
+ */
+ spu.fragment_ops[CELL_FACING_FRONT] = spu_fallback_fragment_ops;
+ spu.fragment_ops[CELL_FACING_BACK] = spu_fallback_fragment_ops;
+
+ /* Set up the basic empty buffer for code-gen'ed fragment ops */
+ spu.fragment_ops_code = NULL;
+ spu.fragment_ops_code_size = 0;
+}
+
+void
+spu_command_close(void)
+{
+ /* Deallocate the code-gen buffer for fragment ops, and reset the
+ * fragment ops functions to their initial setting (just to leave
+ * things in a good state).
+ */
+ if (spu.fragment_ops_code != NULL) {
+ free(spu.fragment_ops_code);
+ }
+ spu_command_init();
+}
diff --git a/src/gallium/drivers/cell/spu/spu_command.h b/src/gallium/drivers/cell/spu/spu_command.h
new file mode 100644
index 00000000000..83dcdade288
--- /dev/null
+++ b/src/gallium/drivers/cell/spu/spu_command.h
@@ -0,0 +1,35 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+extern void
+command_loop(void);
+
+extern void
+spu_command_init(void);
+
+extern void
+spu_command_close(void);
diff --git a/src/gallium/drivers/cell/spu/spu_dcache.c b/src/gallium/drivers/cell/spu/spu_dcache.c
index 167404cdc54..a6d67634fd8 100644
--- a/src/gallium/drivers/cell/spu/spu_dcache.c
+++ b/src/gallium/drivers/cell/spu/spu_dcache.c
@@ -36,7 +36,9 @@
#define CACHE_SET_TAGID(set) (((set) & 0x03) + TAG_DCACHE0)
#define CACHE_LOG2NNWAY 2
#define CACHE_LOG2NSETS 6
-/*#define CACHE_STATS 1*/
+#ifdef DEBUG
+#define CACHE_STATS 1
+#endif
#include <cache-api.h>
/* Yes folks, this is ugly.
diff --git a/src/gallium/drivers/cell/spu/spu_funcs.c b/src/gallium/drivers/cell/spu/spu_funcs.c
new file mode 100644
index 00000000000..ff3d609d258
--- /dev/null
+++ b/src/gallium/drivers/cell/spu/spu_funcs.c
@@ -0,0 +1,173 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+/**
+ * SPU functions accessed by shaders.
+ *
+ * Authors: Brian Paul
+ */
+
+
+#include <string.h>
+#include <libmisc.h>
+#include <math.h>
+#include <cos14_v.h>
+#include <sin14_v.h>
+#include <simdmath/exp2f4.h>
+#include <simdmath/log2f4.h>
+#include <simdmath/powf4.h>
+
+#include "cell/common.h"
+#include "spu_main.h"
+#include "spu_funcs.h"
+#include "spu_texture.h"
+
+
+/** For "return"-ing four vectors */
+struct vec_4x4
+{
+ vector float v[4];
+};
+
+
+static vector float
+spu_cos(vector float x)
+{
+ return _cos14_v(x);
+}
+
+static vector float
+spu_sin(vector float x)
+{
+ return _sin14_v(x);
+}
+
+static vector float
+spu_pow(vector float x, vector float y)
+{
+ return _powf4(x, y);
+}
+
+static vector float
+spu_exp2(vector float x)
+{
+ return _exp2f4(x);
+}
+
+static vector float
+spu_log2(vector float x)
+{
+ return _log2f4(x);
+}
+
+
+static struct vec_4x4
+spu_tex_2d(vector float s, vector float t, vector float r, vector float q,
+ unsigned unit)
+{
+ struct vec_4x4 colors;
+ (void) r;
+ (void) q;
+ spu.sample_texture_2d[unit](s, t, unit, 0, 0, colors.v);
+ return colors;
+}
+
+static struct vec_4x4
+spu_tex_3d(vector float s, vector float t, vector float r, vector float q,
+ unsigned unit)
+{
+ struct vec_4x4 colors;
+ (void) r;
+ (void) q;
+ spu.sample_texture_2d[unit](s, t, unit, 0, 0, colors.v);
+ return colors;
+}
+
+static struct vec_4x4
+spu_tex_cube(vector float s, vector float t, vector float r, vector float q,
+ unsigned unit)
+{
+ struct vec_4x4 colors;
+ (void) q;
+ sample_texture_cube(s, t, r, unit, colors.v);
+ return colors;
+}
+
+
+/**
+ * Add named function to list of "exported" functions that will be
+ * made available to the PPU-hosted code generator.
+ */
+static void
+export_func(struct cell_spu_function_info *spu_functions,
+ const char *name, void *addr)
+{
+ uint n = spu_functions->num;
+ ASSERT(strlen(name) < 16);
+ strcpy(spu_functions->names[n], name);
+ spu_functions->addrs[n] = (uint) addr;
+ spu_functions->num++;
+ ASSERT(spu_functions->num <= 16);
+}
+
+
+/**
+ * Return info about the SPU's function to the PPU / main memory.
+ * The PPU needs to know the address of some SPU-side functions so
+ * that we can generate shader code with function calls.
+ */
+void
+return_function_info(void)
+{
+ struct cell_spu_function_info funcs ALIGN16_ATTRIB;
+ int tag = TAG_MISC;
+
+ ASSERT(sizeof(funcs) == 256); /* must be multiple of 16 bytes */
+
+ funcs.num = 0;
+ export_func(&funcs, "spu_cos", &spu_cos);
+ export_func(&funcs, "spu_sin", &spu_sin);
+ export_func(&funcs, "spu_pow", &spu_pow);
+ export_func(&funcs, "spu_exp2", &spu_exp2);
+ export_func(&funcs, "spu_log2", &spu_log2);
+ export_func(&funcs, "spu_tex_2d", &spu_tex_2d);
+ export_func(&funcs, "spu_tex_3d", &spu_tex_3d);
+ export_func(&funcs, "spu_tex_cube", &spu_tex_cube);
+
+ /* Send the function info back to the PPU / main memory */
+ mfc_put((void *) &funcs, /* src in local store */
+ (unsigned int) spu.init.spu_functions, /* dst in main memory */
+ sizeof(funcs), /* bytes */
+ tag,
+ 0, /* tid */
+ 0 /* rid */);
+ wait_on_mask(1 << tag);
+}
+
+
+
diff --git a/src/gallium/drivers/cell/spu/spu_funcs.h b/src/gallium/drivers/cell/spu/spu_funcs.h
new file mode 100644
index 00000000000..3adb6ae99f9
--- /dev/null
+++ b/src/gallium/drivers/cell/spu/spu_funcs.h
@@ -0,0 +1,35 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef SPU_FUNCS_H
+#define SPU_FUNCS_H
+
+extern void
+return_function_info(void);
+
+#endif
+
diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c
index 78260c4259c..97c86d194da 100644
--- a/src/gallium/drivers/cell/spu/spu_main.c
+++ b/src/gallium/drivers/cell/spu/spu_main.c
@@ -32,16 +32,15 @@
#include <stdio.h>
#include <libmisc.h>
+#include "pipe/p_defines.h"
+
+#include "spu_funcs.h"
+#include "spu_command.h"
#include "spu_main.h"
-#include "spu_render.h"
#include "spu_per_fragment_op.h"
#include "spu_texture.h"
-#include "spu_tile.h"
//#include "spu_test.h"
-#include "spu_vertex_shader.h"
-#include "spu_dcache.h"
#include "cell/common.h"
-#include "pipe/p_defines.h"
/*
@@ -50,600 +49,8 @@ helpful headers:
/opt/cell/sdk/usr/include/libmisc.h
*/
-boolean Debug = FALSE;
-
struct spu_global spu;
-struct spu_vs_context draw;
-
-
-/**
- * Buffers containing dynamically generated SPU code:
- */
-static unsigned char attribute_fetch_code_buffer[136 * PIPE_MAX_ATTRIBS]
- ALIGN16_ATTRIB;
-
-
-
-/**
- * Tell the PPU that this SPU has finished copying a buffer to
- * local store and that it may be reused by the PPU.
- * This is done by writting a 16-byte batch-buffer-status block back into
- * main memory (in cell_context->buffer_status[]).
- */
-static void
-release_buffer(uint buffer)
-{
- /* Evidently, using less than a 16-byte status doesn't work reliably */
- static const uint status[4] ALIGN16_ATTRIB
- = {CELL_BUFFER_STATUS_FREE, 0, 0, 0};
-
- const uint index = 4 * (spu.init.id * CELL_NUM_BUFFERS + buffer);
- uint *dst = spu.init.buffer_status + index;
-
- ASSERT(buffer < CELL_NUM_BUFFERS);
-
- mfc_put((void *) &status, /* src in local memory */
- (unsigned int) dst, /* dst in main memory */
- sizeof(status), /* size */
- TAG_MISC, /* tag is unimportant */
- 0, /* tid */
- 0 /* rid */);
-}
-
-
-/**
- * For tiles whose status is TILE_STATUS_CLEAR, write solid-filled
- * tiles back to the main framebuffer.
- */
-static void
-really_clear_tiles(uint surfaceIndex)
-{
- const uint num_tiles = spu.fb.width_tiles * spu.fb.height_tiles;
- uint i;
-
- if (surfaceIndex == 0) {
- clear_c_tile(&spu.ctile);
-
- for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) {
- uint tx = i % spu.fb.width_tiles;
- uint ty = i / spu.fb.width_tiles;
- if (spu.ctile_status[ty][tx] == TILE_STATUS_CLEAR) {
- put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 0);
- }
- }
- }
- else {
- clear_z_tile(&spu.ztile);
-
- for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) {
- uint tx = i % spu.fb.width_tiles;
- uint ty = i / spu.fb.width_tiles;
- if (spu.ztile_status[ty][tx] == TILE_STATUS_CLEAR)
- put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 1);
- }
- }
-
-#if 0
- wait_on_mask(1 << TAG_SURFACE_CLEAR);
-#endif
-}
-
-
-static void
-cmd_clear_surface(const struct cell_command_clear_surface *clear)
-{
- if (Debug)
- printf("SPU %u: CLEAR SURF %u to 0x%08x\n", spu.init.id,
- clear->surface, clear->value);
-
- if (clear->surface == 0) {
- spu.fb.color_clear_value = clear->value;
- if (spu.init.debug_flags & CELL_DEBUG_CHECKER) {
- uint x = (spu.init.id << 4) | (spu.init.id << 12) |
- (spu.init.id << 20) | (spu.init.id << 28);
- spu.fb.color_clear_value ^= x;
- }
- }
- else {
- spu.fb.depth_clear_value = clear->value;
- }
-
-#define CLEAR_OPT 1
-#if CLEAR_OPT
-
- /* Simply set all tiles' status to CLEAR.
- * When we actually begin rendering into a tile, we'll initialize it to
- * the clear value. If any tiles go untouched during the frame,
- * really_clear_tiles() will set them to the clear value.
- */
- if (clear->surface == 0) {
- memset(spu.ctile_status, TILE_STATUS_CLEAR, sizeof(spu.ctile_status));
- }
- else {
- memset(spu.ztile_status, TILE_STATUS_CLEAR, sizeof(spu.ztile_status));
- }
-
-#else
-
- /*
- * This path clears the whole framebuffer to the clear color right now.
- */
-
- /*
- printf("SPU: %s num=%d w=%d h=%d\n",
- __FUNCTION__, num_tiles, spu.fb.width_tiles, spu.fb.height_tiles);
- */
-
- /* init a single tile to the clear value */
- if (clear->surface == 0) {
- clear_c_tile(&spu.ctile);
- }
- else {
- clear_z_tile(&spu.ztile);
- }
-
- /* walk over my tiles, writing the 'clear' tile's data */
- {
- const uint num_tiles = spu.fb.width_tiles * spu.fb.height_tiles;
- uint i;
- for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) {
- uint tx = i % spu.fb.width_tiles;
- uint ty = i / spu.fb.width_tiles;
- if (clear->surface == 0)
- put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 0);
- else
- put_tile(tx, ty, &spu.ztile, TAG_SURFACE_CLEAR, 1);
- }
- }
-
- if (spu.init.debug_flags & CELL_DEBUG_SYNC) {
- wait_on_mask(1 << TAG_SURFACE_CLEAR);
- }
-
-#endif /* CLEAR_OPT */
-
- if (Debug)
- printf("SPU %u: CLEAR SURF done\n", spu.init.id);
-}
-
-
-static void
-cmd_release_verts(const struct cell_command_release_verts *release)
-{
- if (Debug)
- printf("SPU %u: RELEASE VERTS %u\n",
- spu.init.id, release->vertex_buf);
- ASSERT(release->vertex_buf != ~0U);
- release_buffer(release->vertex_buf);
-}
-
-
-/**
- * Process a CELL_CMD_STATE_FRAGMENT_OPS command.
- * This involves installing new fragment ops SPU code.
- * If this function is never called, we'll use a regular C fallback function
- * for fragment processing.
- */
-static void
-cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops)
-{
- if (Debug)
- printf("SPU %u: CMD_STATE_FRAGMENT_OPS\n", spu.init.id);
- /* Copy SPU code from batch buffer to spu buffer */
- memcpy(spu.fragment_ops_code, fops->code, SPU_MAX_FRAGMENT_OPS_INSTS * 4);
- /* Copy state info (for fallback case only) */
- memcpy(&spu.depth_stencil_alpha, &fops->dsa, sizeof(fops->dsa));
- memcpy(&spu.blend, &fops->blend, sizeof(fops->blend));
-
- /* Point function pointer at new code */
- spu.fragment_ops = (spu_fragment_ops_func) spu.fragment_ops_code;
-
- spu.read_depth = spu.depth_stencil_alpha.depth.enabled;
- spu.read_stencil = spu.depth_stencil_alpha.stencil[0].enabled;
-}
-
-
-static void
-cmd_state_fragment_program(const struct cell_command_fragment_program *fp)
-{
- if (Debug)
- printf("SPU %u: CMD_STATE_FRAGMENT_PROGRAM\n", spu.init.id);
- /* Copy SPU code from batch buffer to spu buffer */
- memcpy(spu.fragment_program_code, fp->code,
- SPU_MAX_FRAGMENT_PROGRAM_INSTS * 4);
-#if 01
- /* Point function pointer at new code */
- spu.fragment_program = (spu_fragment_program_func)spu.fragment_program_code;
-#endif
-}
-
-
-static void
-cmd_state_framebuffer(const struct cell_command_framebuffer *cmd)
-{
- if (Debug)
- printf("SPU %u: FRAMEBUFFER: %d x %d at %p, cformat 0x%x zformat 0x%x\n",
- spu.init.id,
- cmd->width,
- cmd->height,
- cmd->color_start,
- cmd->color_format,
- cmd->depth_format);
-
- ASSERT_ALIGN16(cmd->color_start);
- ASSERT_ALIGN16(cmd->depth_start);
-
- spu.fb.color_start = cmd->color_start;
- spu.fb.depth_start = cmd->depth_start;
- spu.fb.color_format = cmd->color_format;
- spu.fb.depth_format = cmd->depth_format;
- spu.fb.width = cmd->width;
- spu.fb.height = cmd->height;
- spu.fb.width_tiles = (spu.fb.width + TILE_SIZE - 1) / TILE_SIZE;
- spu.fb.height_tiles = (spu.fb.height + TILE_SIZE - 1) / TILE_SIZE;
-
- switch (spu.fb.depth_format) {
- case PIPE_FORMAT_Z32_UNORM:
- spu.fb.zsize = 4;
- spu.fb.zscale = (float) 0xffffffffu;
- break;
- case PIPE_FORMAT_Z24S8_UNORM:
- case PIPE_FORMAT_S8Z24_UNORM:
- case PIPE_FORMAT_Z24X8_UNORM:
- case PIPE_FORMAT_X8Z24_UNORM:
- spu.fb.zsize = 4;
- spu.fb.zscale = (float) 0x00ffffffu;
- break;
- case PIPE_FORMAT_Z16_UNORM:
- spu.fb.zsize = 2;
- spu.fb.zscale = (float) 0xffffu;
- break;
- default:
- spu.fb.zsize = 0;
- break;
- }
-}
-
-
-static void
-cmd_state_sampler(const struct cell_command_sampler *sampler)
-{
- if (Debug)
- printf("SPU %u: SAMPLER [%u]\n",
- spu.init.id, sampler->unit);
-
- spu.sampler[sampler->unit] = sampler->state;
- if (spu.sampler[sampler->unit].min_img_filter == PIPE_TEX_FILTER_LINEAR)
- spu.sample_texture[sampler->unit] = sample_texture_bilinear;
- else
- spu.sample_texture[sampler->unit] = sample_texture_nearest;
-}
-
-
-static void
-cmd_state_texture(const struct cell_command_texture *texture)
-{
- const uint unit = texture->unit;
- const uint width = texture->width;
- const uint height = texture->height;
-
- if (Debug) {
- printf("SPU %u: TEXTURE [%u] at %p size %u x %u\n", spu.init.id,
- texture->unit, texture->start,
- texture->width, texture->height);
- }
-
- spu.texture[unit].start = texture->start;
- spu.texture[unit].width = width;
- spu.texture[unit].height = height;
-
- spu.texture[unit].tiles_per_row = width / TILE_SIZE;
-
- spu.texture[unit].tex_size = (vector float) { width, height, 0.0, 0.0};
- spu.texture[unit].tex_size_mask = (vector unsigned int)
- { width - 1, height - 1, 0, 0 };
- spu.texture[unit].tex_size_x_mask = spu_splats(width - 1);
- spu.texture[unit].tex_size_y_mask = spu_splats(height - 1);
-}
-
-
-static void
-cmd_state_vertex_info(const struct vertex_info *vinfo)
-{
- if (Debug) {
- printf("SPU %u: VERTEX_INFO num_attribs=%u\n", spu.init.id,
- vinfo->num_attribs);
- }
- ASSERT(vinfo->num_attribs >= 1);
- ASSERT(vinfo->num_attribs <= 8);
- memcpy(&spu.vertex_info, vinfo, sizeof(*vinfo));
-}
-
-
-static void
-cmd_state_vs_array_info(const struct cell_array_info *vs_info)
-{
- const unsigned attr = vs_info->attr;
-
- ASSERT(attr < PIPE_MAX_ATTRIBS);
- draw.vertex_fetch.src_ptr[attr] = vs_info->base;
- draw.vertex_fetch.pitch[attr] = vs_info->pitch;
- draw.vertex_fetch.size[attr] = vs_info->size;
- draw.vertex_fetch.code_offset[attr] = vs_info->function_offset;
- draw.vertex_fetch.dirty = 1;
-}
-
-
-static void
-cmd_state_attrib_fetch(const struct cell_attribute_fetch_code *code)
-{
- mfc_get(attribute_fetch_code_buffer,
- (unsigned int) code->base, /* src */
- code->size,
- TAG_BATCH_BUFFER,
- 0, /* tid */
- 0 /* rid */);
- wait_on_mask(1 << TAG_BATCH_BUFFER);
-
- draw.vertex_fetch.code = attribute_fetch_code_buffer;
-}
-
-
-static void
-cmd_finish(void)
-{
- if (Debug)
- printf("SPU %u: FINISH\n", spu.init.id);
- really_clear_tiles(0);
- /* wait for all outstanding DMAs to finish */
- mfc_write_tag_mask(~0);
- mfc_read_tag_status_all();
- /* send mbox message to PPU */
- spu_write_out_mbox(CELL_CMD_FINISH);
-}
-
-
-/**
- * Execute a batch of commands which was sent to us by the PPU.
- * See the cell_emit_state.c code to see where the commands come from.
- *
- * The opcode param encodes the location of the buffer and its size.
- */
-static void
-cmd_batch(uint opcode)
-{
- const uint buf = (opcode >> 8) & 0xff;
- uint size = (opcode >> 16);
- uint64_t buffer[CELL_BUFFER_SIZE / 8] ALIGN16_ATTRIB;
- const unsigned usize = size / sizeof(buffer[0]);
- uint pos;
-
- if (Debug)
- printf("SPU %u: BATCH buffer %u, len %u, from %p\n",
- spu.init.id, buf, size, spu.init.buffers[buf]);
-
- ASSERT((opcode & CELL_CMD_OPCODE_MASK) == CELL_CMD_BATCH);
-
- ASSERT_ALIGN16(spu.init.buffers[buf]);
-
- size = ROUNDUP16(size);
-
- ASSERT_ALIGN16(spu.init.buffers[buf]);
-
- mfc_get(buffer, /* dest */
- (unsigned int) spu.init.buffers[buf], /* src */
- size,
- TAG_BATCH_BUFFER,
- 0, /* tid */
- 0 /* rid */);
- wait_on_mask(1 << TAG_BATCH_BUFFER);
-
- /* Tell PPU we're done copying the buffer to local store */
- if (Debug)
- printf("SPU %u: release batch buf %u\n", spu.init.id, buf);
- release_buffer(buf);
-
- /*
- * Loop over commands in the batch buffer
- */
- for (pos = 0; pos < usize; /* no incr */) {
- switch (buffer[pos]) {
- /*
- * rendering commands
- */
- case CELL_CMD_CLEAR_SURFACE:
- {
- struct cell_command_clear_surface *clr
- = (struct cell_command_clear_surface *) &buffer[pos];
- cmd_clear_surface(clr);
- pos += sizeof(*clr) / 8;
- }
- break;
- case CELL_CMD_RENDER:
- {
- struct cell_command_render *render
- = (struct cell_command_render *) &buffer[pos];
- uint pos_incr;
- cmd_render(render, &pos_incr);
- pos += pos_incr;
- }
- break;
- /*
- * state-update commands
- */
- case CELL_CMD_STATE_FRAMEBUFFER:
- {
- struct cell_command_framebuffer *fb
- = (struct cell_command_framebuffer *) &buffer[pos];
- cmd_state_framebuffer(fb);
- pos += sizeof(*fb) / 8;
- }
- break;
- case CELL_CMD_STATE_FRAGMENT_OPS:
- {
- struct cell_command_fragment_ops *fops
- = (struct cell_command_fragment_ops *) &buffer[pos];
- cmd_state_fragment_ops(fops);
- pos += sizeof(*fops) / 8;
- }
- break;
- case CELL_CMD_STATE_FRAGMENT_PROGRAM:
- {
- struct cell_command_fragment_program *fp
- = (struct cell_command_fragment_program *) &buffer[pos];
- cmd_state_fragment_program(fp);
- pos += sizeof(*fp) / 8;
- }
- break;
- case CELL_CMD_STATE_SAMPLER:
- {
- struct cell_command_sampler *sampler
- = (struct cell_command_sampler *) &buffer[pos];
- cmd_state_sampler(sampler);
- pos += sizeof(*sampler) / 8;
- }
- break;
- case CELL_CMD_STATE_TEXTURE:
- {
- struct cell_command_texture *texture
- = (struct cell_command_texture *) &buffer[pos];
- cmd_state_texture(texture);
- pos += sizeof(*texture) / 8;
- }
- break;
- case CELL_CMD_STATE_VERTEX_INFO:
- cmd_state_vertex_info((struct vertex_info *) &buffer[pos+1]);
- pos += (1 + ROUNDUP8(sizeof(struct vertex_info)) / 8);
- break;
- case CELL_CMD_STATE_VIEWPORT:
- (void) memcpy(& draw.viewport, &buffer[pos+1],
- sizeof(struct pipe_viewport_state));
- pos += (1 + ROUNDUP8(sizeof(struct pipe_viewport_state)) / 8);
- break;
- case CELL_CMD_STATE_UNIFORMS:
- draw.constants = (const float (*)[4]) (uintptr_t) buffer[pos + 1];
- pos += 2;
- break;
- case CELL_CMD_STATE_VS_ARRAY_INFO:
- cmd_state_vs_array_info((struct cell_array_info *) &buffer[pos+1]);
- pos += (1 + ROUNDUP8(sizeof(struct cell_array_info)) / 8);
- break;
- case CELL_CMD_STATE_BIND_VS:
-#if 0
- spu_bind_vertex_shader(&draw,
- (struct cell_shader_info *) &buffer[pos+1]);
-#endif
- pos += (1 + ROUNDUP8(sizeof(struct cell_shader_info)) / 8);
- break;
- case CELL_CMD_STATE_ATTRIB_FETCH:
- cmd_state_attrib_fetch((struct cell_attribute_fetch_code *)
- &buffer[pos+1]);
- pos += (1 + ROUNDUP8(sizeof(struct cell_attribute_fetch_code)) / 8);
- break;
- /*
- * misc commands
- */
- case CELL_CMD_FINISH:
- cmd_finish();
- pos += 1;
- break;
- case CELL_CMD_RELEASE_VERTS:
- {
- struct cell_command_release_verts *release
- = (struct cell_command_release_verts *) &buffer[pos];
- cmd_release_verts(release);
- pos += sizeof(*release) / 8;
- }
- break;
- case CELL_CMD_FLUSH_BUFFER_RANGE: {
- struct cell_buffer_range *br = (struct cell_buffer_range *)
- &buffer[pos+1];
-
- spu_dcache_mark_dirty((unsigned) br->base, br->size);
- pos += (1 + ROUNDUP8(sizeof(struct cell_buffer_range)) / 8);
- break;
- }
- default:
- printf("SPU %u: bad opcode: 0x%llx\n", spu.init.id, buffer[pos]);
- ASSERT(0);
- break;
- }
- }
-
- if (Debug)
- printf("SPU %u: BATCH complete\n", spu.init.id);
-}
-
-
-/**
- * Temporary/simple main loop for SPEs: Get a command, execute it, repeat.
- */
-static void
-main_loop(void)
-{
- struct cell_command cmd;
- int exitFlag = 0;
-
- if (Debug)
- printf("SPU %u: Enter main loop\n", spu.init.id);
-
- ASSERT((sizeof(struct cell_command) & 0xf) == 0);
- ASSERT_ALIGN16(&cmd);
-
- while (!exitFlag) {
- unsigned opcode;
- int tag = 0;
-
- if (Debug)
- printf("SPU %u: Wait for cmd...\n", spu.init.id);
-
- /* read/wait from mailbox */
- opcode = (unsigned int) spu_read_in_mbox();
-
- if (Debug)
- printf("SPU %u: got cmd 0x%x\n", spu.init.id, opcode);
-
- /* command payload */
- mfc_get(&cmd, /* dest */
- (unsigned int) spu.init.cmd, /* src */
- sizeof(struct cell_command), /* bytes */
- tag,
- 0, /* tid */
- 0 /* rid */);
- wait_on_mask( 1 << tag );
-
- /*
- * NOTE: most commands should be contained in a batch buffer
- */
-
- switch (opcode & CELL_CMD_OPCODE_MASK) {
- case CELL_CMD_EXIT:
- if (Debug)
- printf("SPU %u: EXIT\n", spu.init.id);
- exitFlag = 1;
- break;
- case CELL_CMD_VS_EXECUTE:
-#if 0
- spu_execute_vertex_shader(&draw, &cmd.vs);
-#endif
- break;
- case CELL_CMD_BATCH:
- cmd_batch(opcode);
- break;
- default:
- printf("Bad opcode!\n");
- }
-
- }
-
- if (Debug)
- printf("SPU %u: Exit main loop\n", spu.init.id);
-
- spu_dcache_report();
-}
-
-
static void
one_time_init(void)
@@ -651,15 +58,8 @@ one_time_init(void)
memset(spu.ctile_status, TILE_STATUS_DEFINED, sizeof(spu.ctile_status));
memset(spu.ztile_status, TILE_STATUS_DEFINED, sizeof(spu.ztile_status));
invalidate_tex_cache();
-
- /* Install default/fallback fragment processing function.
- * This will normally be overriden by a code-gen'd function.
- */
- spu.fragment_ops = spu_fallback_fragment_ops;
}
-
-
/* In some versions of the SDK the SPE main takes 'unsigned long' as a
* parameter. In others it takes 'unsigned long long'. Use a define to
* select between the two.
@@ -682,12 +82,16 @@ main(main_param_t speid, main_param_t argp)
ASSERT(sizeof(tile_t) == TILE_SIZE * TILE_SIZE * 4);
ASSERT(sizeof(struct cell_command_render) % 8 == 0);
+ ASSERT(sizeof(struct cell_command_fragment_ops) % 8 == 0);
+ ASSERT(((unsigned long) &spu.fragment_program_code) % 8 == 0);
one_time_init();
+ spu_command_init();
- if (Debug)
- printf("SPU: main() speid=%lu\n", (unsigned long) speid);
+ D_PRINTF(CELL_DEBUG_CMD, "main() speid=%lu\n", (unsigned long) speid);
+ D_PRINTF(CELL_DEBUG_FRAGMENT_OP_FALLBACK, "using fragment op fallback\n");
+ /* get initialization data */
mfc_get(&spu.init, /* dest */
(unsigned int) argp, /* src */
sizeof(struct cell_init_info), /* bytes */
@@ -696,12 +100,18 @@ main(main_param_t speid, main_param_t argp)
0 /* rid */);
wait_on_mask( 1 << tag );
+ if (spu.init.id == 0) {
+ return_function_info();
+ }
+
#if 0
if (spu.init.id==0)
- spu_test_misc();
+ spu_test_misc(spu.init.id);
#endif
- main_loop();
+ command_loop();
+
+ spu_command_close();
return 0;
}
diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h
index 2c7b6258402..33767e7c51d 100644
--- a/src/gallium/drivers/cell/spu/spu_main.h
+++ b/src/gallium/drivers/cell/spu/spu_main.h
@@ -36,9 +36,18 @@
#include "pipe/p_state.h"
-
-#define MAX_WIDTH 1024
-#define MAX_HEIGHT 1024
+#if DEBUG
+/* These debug macros use the unusual construction ", ##__VA_ARGS__"
+ * which expands to the expected comma + args if variadic arguments
+ * are supplied, but swallows the comma if there are no variadic
+ * arguments (which avoids syntax errors that would otherwise occur).
+ */
+#define D_PRINTF(flag, format,...) \
+ if (spu.init.debug_flags & (flag)) \
+ printf("SPU %u: " format, spu.init.id, ##__VA_ARGS__)
+#else
+#define D_PRINTF(...)
+#endif
/**
@@ -61,8 +70,11 @@ typedef union {
/** Function for sampling textures */
-typedef vector float (*spu_sample_texture_func)(uint unit,
- vector float texcoord);
+typedef void (*spu_sample_texture_2d_func)(vector float s,
+ vector float t,
+ uint unit, uint level, uint face,
+ vector float colors[4]);
+
/** Function for performing per-fragment ops */
typedef void (*spu_fragment_ops_func)(uint x, uint y,
@@ -76,9 +88,9 @@ typedef void (*spu_fragment_ops_func)(uint x, uint y,
vector unsigned int mask);
/** Function for running fragment program */
-typedef void (*spu_fragment_program_func)(vector float *inputs,
- vector float *outputs,
- vector float *constants);
+typedef vector unsigned int (*spu_fragment_program_func)(vector float *inputs,
+ vector float *outputs,
+ vector float *constants);
struct spu_framebuffer
@@ -98,15 +110,27 @@ struct spu_framebuffer
} ALIGN16_ATTRIB;
-struct spu_texture
+/** per-texture level info */
+struct spu_texture_level
{
void *start;
- ushort width, height;
+ ushort width, height, depth;
ushort tiles_per_row;
- vector float tex_size;
- vector unsigned int tex_size_mask; /**< == int(size - 1) */
- vector unsigned int tex_size_x_mask; /**< == int(size - 1) */
- vector unsigned int tex_size_y_mask; /**< == int(size - 1) */
+ uint bytes_per_image;
+ /** texcoord scale factors */
+ vector float scale_s, scale_t, scale_r;
+ /** texcoord masks (if REPEAT then size-1, else ~0) */
+ vector signed int mask_s, mask_t, mask_r;
+ /** texcoord clamp limits */
+ vector signed int max_s, max_t, max_r;
+} ALIGN16_ATTRIB;
+
+
+struct spu_texture
+{
+ struct spu_texture_level level[CELL_MAX_TEXTURE_LEVELS];
+ uint max_level;
+ uint target; /**< PIPE_TEXTURE_x */
} ALIGN16_ATTRIB;
@@ -124,7 +148,9 @@ struct spu_global
struct spu_framebuffer fb;
struct pipe_depth_stencil_alpha_state depth_stencil_alpha;
struct pipe_blend_state blend;
+ struct pipe_blend_color blend_color;
struct pipe_sampler_state sampler[PIPE_MAX_SAMPLERS];
+ struct pipe_rasterizer_state rasterizer;
struct spu_texture texture[PIPE_MAX_SAMPLERS];
struct vertex_info vertex_info;
@@ -133,39 +159,38 @@ struct spu_global
tile_t ztile ALIGN16_ATTRIB;
/** Read depth/stencil tiles? */
- boolean read_depth;
- boolean read_stencil;
+ boolean read_depth_stencil;
/** Current tiles' status */
ubyte cur_ctile_status, cur_ztile_status;
/** Status of all tiles in framebuffer */
- ubyte ctile_status[MAX_HEIGHT/TILE_SIZE][MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB;
- ubyte ztile_status[MAX_HEIGHT/TILE_SIZE][MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB;
+ ubyte ctile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB;
+ ubyte ztile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB;
- /** Current fragment ops machine code */
- uint fragment_ops_code[SPU_MAX_FRAGMENT_OPS_INSTS];
- /** Current fragment ops function */
- spu_fragment_ops_func fragment_ops;
+ /** Current fragment ops machine code, at 8-byte boundary */
+ uint *fragment_ops_code;
+ uint fragment_ops_code_size;
+ /** Current fragment ops functions, 0 = frontfacing, 1 = backfacing */
+ spu_fragment_ops_func fragment_ops[2];
- /** Current fragment program machine code */
- uint fragment_program_code[SPU_MAX_FRAGMENT_PROGRAM_INSTS];
+ /** Current fragment program machine code, at 8-byte boundary */
+ uint fragment_program_code[SPU_MAX_FRAGMENT_PROGRAM_INSTS] ALIGN8_ATTRIB;
/** Current fragment ops function */
spu_fragment_program_func fragment_program;
/** Current texture sampler function */
- spu_sample_texture_func sample_texture[CELL_MAX_SAMPLERS];
+ spu_sample_texture_2d_func sample_texture_2d[CELL_MAX_SAMPLERS];
+ spu_sample_texture_2d_func min_sample_texture_2d[CELL_MAX_SAMPLERS];
+ spu_sample_texture_2d_func mag_sample_texture_2d[CELL_MAX_SAMPLERS];
- /** Fragment program constants (XXX preliminary/used) */
-#define MAX_CONSTANTS 32
- vector float constants[MAX_CONSTANTS];
+ /** Fragment program constants */
+ vector float constants[4 * CELL_MAX_CONSTANTS];
} ALIGN16_ATTRIB;
extern struct spu_global spu;
-extern boolean Debug;
-
@@ -184,7 +209,7 @@ extern boolean Debug;
#define TAG_DCACHE1 21
#define TAG_DCACHE2 22
#define TAG_DCACHE3 23
-
+#define TAG_FENCE 24
static INLINE void
diff --git a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c
index 03dd547845b..683664e8a4e 100644
--- a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c
+++ b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c
@@ -40,6 +40,24 @@
#define LINEAR_QUAD_LAYOUT 1
+static INLINE vector float
+spu_min(vector float a, vector float b)
+{
+ vector unsigned int m;
+ m = spu_cmpgt(a, b); /* m = a > b ? ~0 : 0 */
+ return spu_sel(a, b, m);
+}
+
+
+static INLINE vector float
+spu_max(vector float a, vector float b)
+{
+ vector unsigned int m;
+ m = spu_cmpgt(a, b); /* m = a > b ? ~0 : 0 */
+ return spu_sel(b, a, m);
+}
+
+
/**
* Called by rasterizer for each quad after the shader has run. Do
* all the per-fragment operations including alpha test, z test,
@@ -60,9 +78,12 @@ spu_fallback_fragment_ops(uint x, uint y,
vector unsigned int mask)
{
vector float frag_aos[4];
- unsigned int c0, c1, c2, c3;
+ unsigned int fbc0, fbc1, fbc2, fbc3 ; /* framebuffer/tile colors */
+ unsigned int fragc0, fragc1, fragc2, fragc3; /* fragment colors */
- /* do alpha test */
+ /*
+ * Do alpha test
+ */
if (spu.depth_stencil_alpha.alpha.enabled) {
vector float ref = spu_splats(spu.depth_stencil_alpha.alpha.ref);
vector unsigned int amask;
@@ -102,7 +123,10 @@ spu_fallback_fragment_ops(uint x, uint y,
mask = spu_and(mask, amask);
}
- /* Z and/or stencil testing... */
+
+ /*
+ * Z and/or stencil testing...
+ */
if (spu.depth_stencil_alpha.depth.enabled ||
spu.depth_stencil_alpha.stencil[0].enabled) {
@@ -178,6 +202,32 @@ spu_fallback_fragment_ops(uint x, uint y,
}
}
+
+ /*
+ * If we'll need the current framebuffer/tile colors for blending
+ * or logicop or colormask, fetch them now.
+ */
+ if (spu.blend.blend_enable ||
+ spu.blend.logicop_enable ||
+ spu.blend.colormask != 0xf) {
+
+#if LINEAR_QUAD_LAYOUT /* See comments/diagram below */
+ fbc0 = colorTile->ui[y][x*2+0];
+ fbc1 = colorTile->ui[y][x*2+1];
+ fbc2 = colorTile->ui[y][x*2+2];
+ fbc3 = colorTile->ui[y][x*2+3];
+#else
+ fbc0 = colorTile->ui[y+0][x+0];
+ fbc1 = colorTile->ui[y+0][x+1];
+ fbc2 = colorTile->ui[y+1][x+0];
+ fbc3 = colorTile->ui[y+1][x+1];
+#endif
+ }
+
+
+ /*
+ * Do blending
+ */
if (spu.blend.blend_enable) {
/* blending terms, misc regs */
vector float term1r, term1g, term1b, term1a;
@@ -186,43 +236,30 @@ spu_fallback_fragment_ops(uint x, uint y,
vector float fbRGBA[4]; /* current framebuffer colors */
- /* get colors from framebuffer/tile */
+ /* convert framebuffer colors from packed int to vector float */
{
- vector float fc[4];
- uint c0, c1, c2, c3;
-
-#if LINEAR_QUAD_LAYOUT /* See comments/diagram below */
- c0 = colorTile->ui[y][x*2+0];
- c1 = colorTile->ui[y][x*2+1];
- c2 = colorTile->ui[y][x*2+2];
- c3 = colorTile->ui[y][x*2+3];
-#else
- c0 = colorTile->ui[y+0][x+0];
- c1 = colorTile->ui[y+0][x+1];
- c2 = colorTile->ui[y+1][x+0];
- c3 = colorTile->ui[y+1][x+1];
-#endif
+ vector float temp[4]; /* float colors in AOS form */
switch (spu.fb.color_format) {
case PIPE_FORMAT_B8G8R8A8_UNORM:
- fc[0] = spu_unpack_B8G8R8A8(c0);
- fc[1] = spu_unpack_B8G8R8A8(c1);
- fc[2] = spu_unpack_B8G8R8A8(c2);
- fc[3] = spu_unpack_B8G8R8A8(c3);
+ temp[0] = spu_unpack_B8G8R8A8(fbc0);
+ temp[1] = spu_unpack_B8G8R8A8(fbc1);
+ temp[2] = spu_unpack_B8G8R8A8(fbc2);
+ temp[3] = spu_unpack_B8G8R8A8(fbc3);
break;
case PIPE_FORMAT_A8R8G8B8_UNORM:
- fc[0] = spu_unpack_A8R8G8B8(c0);
- fc[1] = spu_unpack_A8R8G8B8(c1);
- fc[2] = spu_unpack_A8R8G8B8(c2);
- fc[3] = spu_unpack_A8R8G8B8(c3);
+ temp[0] = spu_unpack_A8R8G8B8(fbc0);
+ temp[1] = spu_unpack_A8R8G8B8(fbc1);
+ temp[2] = spu_unpack_A8R8G8B8(fbc2);
+ temp[3] = spu_unpack_A8R8G8B8(fbc3);
break;
default:
ASSERT(0);
}
- _transpose_matrix4x4(fbRGBA, fc);
+ _transpose_matrix4x4(fbRGBA, temp); /* fbRGBA = transpose(temp) */
}
/*
- * Compute Src RGB terms
+ * Compute Src RGB terms (fragment color * factor)
*/
switch (spu.blend.rgb_src_factor) {
case PIPE_BLENDFACTOR_ONE:
@@ -245,13 +282,33 @@ spu_fallback_fragment_ops(uint x, uint y,
term1g = spu_mul(fragG, fragA);
term1b = spu_mul(fragB, fragA);
break;
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ term1r = spu_mul(fragR, fbRGBA[0]);
+ term1g = spu_mul(fragG, fbRGBA[1]);
+ term1b = spu_mul(fragB, fbRGBA[1]);
+ break;
+ case PIPE_BLENDFACTOR_DST_ALPHA:
+ term1r = spu_mul(fragR, fbRGBA[3]);
+ term1g = spu_mul(fragG, fbRGBA[3]);
+ term1b = spu_mul(fragB, fbRGBA[3]);
+ break;
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ term1r = spu_mul(fragR, spu_splats(spu.blend_color.color[0]));
+ term1g = spu_mul(fragG, spu_splats(spu.blend_color.color[1]));
+ term1b = spu_mul(fragB, spu_splats(spu.blend_color.color[2]));
+ break;
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
+ term1r = spu_mul(fragR, spu_splats(spu.blend_color.color[3]));
+ term1g = spu_mul(fragG, spu_splats(spu.blend_color.color[3]));
+ term1b = spu_mul(fragB, spu_splats(spu.blend_color.color[3]));
+ break;
/* XXX more cases */
default:
ASSERT(0);
}
/*
- * Compute Src Alpha term
+ * Compute Src Alpha term (fragment alpha * factor)
*/
switch (spu.blend.alpha_src_factor) {
case PIPE_BLENDFACTOR_ONE:
@@ -263,19 +320,29 @@ spu_fallback_fragment_ops(uint x, uint y,
case PIPE_BLENDFACTOR_SRC_ALPHA:
term1a = spu_mul(fragA, fragA);
break;
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ /* fall-through */
+ case PIPE_BLENDFACTOR_DST_ALPHA:
+ term1a = spu_mul(fragA, fbRGBA[3]);
+ break;
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ /* fall-through */
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
+ term1a = spu_mul(fragR, spu_splats(spu.blend_color.color[3]));
+ break;
/* XXX more cases */
default:
ASSERT(0);
}
/*
- * Compute Dest RGB terms
+ * Compute Dest RGB terms (framebuffer color * factor)
*/
switch (spu.blend.rgb_dst_factor) {
case PIPE_BLENDFACTOR_ONE:
- term2r = fragR;
- term2g = fragG;
- term2b = fragB;
+ term2r = fbRGBA[0];
+ term2g = fbRGBA[1];
+ term2b = fbRGBA[2];
break;
case PIPE_BLENDFACTOR_ZERO:
term2r =
@@ -299,17 +366,37 @@ spu_fallback_fragment_ops(uint x, uint y,
term2g = spu_mul(fbRGBA[1], tmp);
term2b = spu_mul(fbRGBA[2], tmp);
break;
- /* XXX more cases */
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ term2r = spu_mul(fbRGBA[0], fbRGBA[0]);
+ term2g = spu_mul(fbRGBA[1], fbRGBA[1]);
+ term2b = spu_mul(fbRGBA[2], fbRGBA[2]);
+ break;
+ case PIPE_BLENDFACTOR_DST_ALPHA:
+ term2r = spu_mul(fbRGBA[0], fbRGBA[3]);
+ term2g = spu_mul(fbRGBA[1], fbRGBA[3]);
+ term2b = spu_mul(fbRGBA[2], fbRGBA[3]);
+ break;
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ term2r = spu_mul(fbRGBA[0], spu_splats(spu.blend_color.color[0]));
+ term2g = spu_mul(fbRGBA[1], spu_splats(spu.blend_color.color[1]));
+ term2b = spu_mul(fbRGBA[2], spu_splats(spu.blend_color.color[2]));
+ break;
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
+ term2r = spu_mul(fbRGBA[0], spu_splats(spu.blend_color.color[3]));
+ term2g = spu_mul(fbRGBA[1], spu_splats(spu.blend_color.color[3]));
+ term2b = spu_mul(fbRGBA[2], spu_splats(spu.blend_color.color[3]));
+ break;
+ /* XXX more cases */
default:
ASSERT(0);
}
/*
- * Compute Dest Alpha term
+ * Compute Dest Alpha term (framebuffer alpha * factor)
*/
switch (spu.blend.alpha_dst_factor) {
case PIPE_BLENDFACTOR_ONE:
- term2a = fragA;
+ term2a = fbRGBA[3];
break;
case PIPE_BLENDFACTOR_SRC_COLOR:
term2a = spu_splats(0.0f);
@@ -322,6 +409,16 @@ spu_fallback_fragment_ops(uint x, uint y,
tmp = spu_sub(one, fragA);
term2a = spu_mul(fbRGBA[3], tmp);
break;
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ /* fall-through */
+ case PIPE_BLENDFACTOR_DST_ALPHA:
+ term2a = spu_mul(fbRGBA[3], fbRGBA[3]);
+ break;
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ /* fall-through */
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
+ term2a = spu_mul(fbRGBA[3], spu_splats(spu.blend_color.color[3]));
+ break;
/* XXX more cases */
default:
ASSERT(0);
@@ -341,7 +438,21 @@ spu_fallback_fragment_ops(uint x, uint y,
fragG = spu_sub(term1g, term2g);
fragB = spu_sub(term1b, term2b);
break;
- /* XXX more cases */
+ case PIPE_BLEND_REVERSE_SUBTRACT:
+ fragR = spu_sub(term2r, term1r);
+ fragG = spu_sub(term2g, term1g);
+ fragB = spu_sub(term2b, term1b);
+ break;
+ case PIPE_BLEND_MIN:
+ fragR = spu_min(term1r, term2r);
+ fragG = spu_min(term1g, term2g);
+ fragB = spu_min(term1b, term2b);
+ break;
+ case PIPE_BLEND_MAX:
+ fragR = spu_max(term1r, term2r);
+ fragG = spu_max(term1g, term2g);
+ fragB = spu_max(term1b, term2b);
+ break;
default:
ASSERT(0);
}
@@ -356,7 +467,15 @@ spu_fallback_fragment_ops(uint x, uint y,
case PIPE_BLEND_SUBTRACT:
fragA = spu_sub(term1a, term2a);
break;
- /* XXX more cases */
+ case PIPE_BLEND_REVERSE_SUBTRACT:
+ fragA = spu_sub(term2a, term1a);
+ break;
+ case PIPE_BLEND_MIN:
+ fragA = spu_min(term1a, term2a);
+ break;
+ case PIPE_BLEND_MAX:
+ fragA = spu_max(term1a, term2a);
+ break;
default:
ASSERT(0);
}
@@ -384,21 +503,20 @@ spu_fallback_fragment_ops(uint x, uint y,
#endif
/*
- * Pack float colors into 32-bit RGBA words.
+ * Pack fragment float colors into 32-bit RGBA words.
*/
switch (spu.fb.color_format) {
case PIPE_FORMAT_A8R8G8B8_UNORM:
- c0 = spu_pack_A8R8G8B8(frag_aos[0]);
- c1 = spu_pack_A8R8G8B8(frag_aos[1]);
- c2 = spu_pack_A8R8G8B8(frag_aos[2]);
- c3 = spu_pack_A8R8G8B8(frag_aos[3]);
+ fragc0 = spu_pack_A8R8G8B8(frag_aos[0]);
+ fragc1 = spu_pack_A8R8G8B8(frag_aos[1]);
+ fragc2 = spu_pack_A8R8G8B8(frag_aos[2]);
+ fragc3 = spu_pack_A8R8G8B8(frag_aos[3]);
break;
-
case PIPE_FORMAT_B8G8R8A8_UNORM:
- c0 = spu_pack_B8G8R8A8(frag_aos[0]);
- c1 = spu_pack_B8G8R8A8(frag_aos[1]);
- c2 = spu_pack_B8G8R8A8(frag_aos[2]);
- c3 = spu_pack_B8G8R8A8(frag_aos[3]);
+ fragc0 = spu_pack_B8G8R8A8(frag_aos[0]);
+ fragc1 = spu_pack_B8G8R8A8(frag_aos[1]);
+ fragc2 = spu_pack_B8G8R8A8(frag_aos[2]);
+ fragc3 = spu_pack_B8G8R8A8(frag_aos[3]);
break;
default:
fprintf(stderr, "SPU: Bad pixel format in spu_default_fragment_ops\n");
@@ -407,20 +525,57 @@ spu_fallback_fragment_ops(uint x, uint y,
/*
- * Color masking
+ * Do color masking
*/
if (spu.blend.colormask != 0xf) {
- /* XXX to do */
- /* apply color mask to 32-bit packed colors */
+ uint cmask = 0x0; /* each byte corresponds to a color channel */
+
+ /* Form bitmask depending on color buffer format and colormask bits */
+ switch (spu.fb.color_format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ if (spu.blend.colormask & PIPE_MASK_R)
+ cmask |= 0x00ff0000; /* red */
+ if (spu.blend.colormask & PIPE_MASK_G)
+ cmask |= 0x0000ff00; /* green */
+ if (spu.blend.colormask & PIPE_MASK_B)
+ cmask |= 0x000000ff; /* blue */
+ if (spu.blend.colormask & PIPE_MASK_A)
+ cmask |= 0xff000000; /* alpha */
+ break;
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ if (spu.blend.colormask & PIPE_MASK_R)
+ cmask |= 0x0000ff00; /* red */
+ if (spu.blend.colormask & PIPE_MASK_G)
+ cmask |= 0x00ff0000; /* green */
+ if (spu.blend.colormask & PIPE_MASK_B)
+ cmask |= 0xff000000; /* blue */
+ if (spu.blend.colormask & PIPE_MASK_A)
+ cmask |= 0x000000ff; /* alpha */
+ break;
+ default:
+ ASSERT(0);
+ }
+
+ /*
+ * Apply color mask to the 32-bit packed colors.
+ * if (cmask[i])
+ * frag color[i] = frag color[i];
+ * else
+ * frag color[i] = framebuffer color[i];
+ */
+ fragc0 = (fragc0 & cmask) | (fbc0 & ~cmask);
+ fragc1 = (fragc1 & cmask) | (fbc1 & ~cmask);
+ fragc2 = (fragc2 & cmask) | (fbc2 & ~cmask);
+ fragc3 = (fragc3 & cmask) | (fbc3 & ~cmask);
}
/*
- * Logic Ops
+ * Do logic ops
*/
if (spu.blend.logicop_enable) {
/* XXX to do */
- /* apply logicop to 32-bit packed colors */
+ /* apply logicop to 32-bit packed colors (fragcx and fbcx) */
}
@@ -431,45 +586,46 @@ spu_fallback_fragment_ops(uint x, uint y,
spu.cur_ctile_status = TILE_STATUS_DIRTY;
}
else {
+ /* write no fragments */
return;
}
/*
- * Write new quad colors to the framebuffer/tile.
+ * Write new fragment/quad colors to the framebuffer/tile.
* Only write pixels where the corresponding mask word is set.
*/
#if LINEAR_QUAD_LAYOUT
/*
* Quad layout:
* +--+--+--+--+
- * |p0|p1|p2|p3|
+ * |p0|p1|p2|p3|...
* +--+--+--+--+
*/
if (spu_extract(mask, 0))
- colorTile->ui[y][x*2] = c0;
+ colorTile->ui[y][x*2] = fragc0;
if (spu_extract(mask, 1))
- colorTile->ui[y][x*2+1] = c1;
+ colorTile->ui[y][x*2+1] = fragc1;
if (spu_extract(mask, 2))
- colorTile->ui[y][x*2+2] = c2;
+ colorTile->ui[y][x*2+2] = fragc2;
if (spu_extract(mask, 3))
- colorTile->ui[y][x*2+3] = c3;
+ colorTile->ui[y][x*2+3] = fragc3;
#else
/*
* Quad layout:
* +--+--+
- * |p0|p1|
+ * |p0|p1|...
* +--+--+
- * |p2|p3|
+ * |p2|p3|...
* +--+--+
*/
if (spu_extract(mask, 0))
- colorTile->ui[y+0][x+0] = c0;
+ colorTile->ui[y+0][x+0] = fragc0;
if (spu_extract(mask, 1))
- colorTile->ui[y+0][x+1] = c1;
+ colorTile->ui[y+0][x+1] = fragc1;
if (spu_extract(mask, 2))
- colorTile->ui[y+1][x+0] = c2;
+ colorTile->ui[y+1][x+0] = fragc2;
if (spu_extract(mask, 3))
- colorTile->ui[y+1][x+1] = c3;
+ colorTile->ui[y+1][x+1] = fragc3;
#endif
}
diff --git a/src/gallium/drivers/cell/spu/spu_render.c b/src/gallium/drivers/cell/spu/spu_render.c
index 305dc988810..7c225e2f27c 100644
--- a/src/gallium/drivers/cell/spu/spu_render.c
+++ b/src/gallium/drivers/cell/spu/spu_render.c
@@ -98,7 +98,7 @@ my_tile(uint tx, uint ty)
static INLINE void
get_cz_tiles(uint tx, uint ty)
{
- if (spu.read_depth) {
+ if (spu.read_depth_stencil) {
if (spu.cur_ztile_status != TILE_STATUS_CLEAR) {
//printf("SPU %u: getting Z tile %u, %u\n", spu.init.id, tx, ty);
get_tile(tx, ty, &spu.ztile, TAG_READ_TILE_Z, 1);
@@ -153,7 +153,7 @@ static INLINE void
wait_put_cz_tiles(void)
{
wait_on_mask(1 << TAG_WRITE_TILE_COLOR);
- if (spu.read_depth) {
+ if (spu.read_depth_stencil) {
wait_on_mask(1 << TAG_WRITE_TILE_Z);
}
}
@@ -175,22 +175,14 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr)
const ubyte *vertices;
const ushort *indexes;
uint i, j;
+ uint num_tiles;
-
- if (Debug) {
- printf("SPU %u: RENDER prim %u, num_vert=%u num_ind=%u "
- "inline_vert=%u\n",
- spu.init.id,
- render->prim_type,
- render->num_verts,
- render->num_indexes,
- render->inline_verts);
-
- /*
- printf(" bound: %g, %g .. %g, %g\n",
- render->xmin, render->ymin, render->xmax, render->ymax);
- */
- }
+ D_PRINTF(CELL_DEBUG_CMD,
+ "RENDER prim=%u num_vert=%u num_ind=%u inline_vert=%u\n",
+ render->prim_type,
+ render->num_verts,
+ render->num_indexes,
+ render->inline_verts);
ASSERT(sizeof(*render) % 4 == 0);
ASSERT(total_vertex_bytes % 16 == 0);
@@ -251,6 +243,8 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr)
wait_on_mask(1 << TAG_SURFACE_CLEAR); /* XXX temporary */
+ num_tiles = 0;
+
/**
** loop over tiles, rendering tris
**/
@@ -264,6 +258,8 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr)
if (!my_tile(tx, ty))
continue;
+ num_tiles++;
+
spu.cur_ctile_status = spu.ctile_status[ty][tx];
spu.cur_ztile_status = spu.ztile_status[ty][tx];
@@ -293,9 +289,7 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr)
spu.ztile_status[ty][tx] = spu.cur_ztile_status;
}
- if (Debug)
- printf("SPU %u: RENDER done\n",
- spu.init.id);
+ D_PRINTF(CELL_DEBUG_CMD,
+ "RENDER done (%u tiles hit)\n",
+ num_tiles);
}
-
-
diff --git a/src/gallium/drivers/cell/spu/spu_texture.c b/src/gallium/drivers/cell/spu/spu_texture.c
index 117b8a36f80..69784c89788 100644
--- a/src/gallium/drivers/cell/spu/spu_texture.c
+++ b/src/gallium/drivers/cell/spu/spu_texture.c
@@ -26,6 +26,8 @@
**************************************************************************/
+#include <math.h>
+
#include "pipe/p_compiler.h"
#include "spu_main.h"
#include "spu_texture.h"
@@ -40,37 +42,19 @@
void
invalidate_tex_cache(void)
{
- uint unit = 0;
- uint bytes = 4 * spu.texture[unit].width
- * spu.texture[unit].height;
-
- spu_dcache_mark_dirty((unsigned) spu.texture[unit].start, bytes);
-}
+ uint lvl;
+ for (lvl = 0; lvl < CELL_MAX_TEXTURE_LEVELS; lvl++) {
+ uint unit = 0;
+ uint bytes = 4 * spu.texture[unit].level[lvl].width
+ * spu.texture[unit].level[lvl].height;
+ if (spu.texture[unit].target == PIPE_TEXTURE_CUBE)
+ bytes *= 6;
+ else if (spu.texture[unit].target == PIPE_TEXTURE_3D)
+ bytes *= spu.texture[unit].level[lvl].depth;
-/**
- * XXX look into getting texels for all four pixels in a quad at once.
- */
-static uint
-get_texel(uint unit, vec_uint4 coordinate)
-{
- /*
- * XXX we could do the "/ TILE_SIZE" and "% TILE_SIZE" operations as
- * SIMD since X and Y are already in a SIMD register.
- */
- const unsigned texture_ea = (uintptr_t) spu.texture[unit].start;
- ushort x = spu_extract(coordinate, 0);
- ushort y = spu_extract(coordinate, 1);
- unsigned tile_offset = sizeof(tile_t)
- * ((y / TILE_SIZE * spu.texture[unit].tiles_per_row) + (x / TILE_SIZE));
- ushort texel_offset = (ushort) 4
- * (ushort) (((ushort) (y % TILE_SIZE) * (ushort) TILE_SIZE) + (x % TILE_SIZE));
- vec_uint4 tmp;
-
- spu_dcache_fetch_unaligned((qword *) & tmp,
- texture_ea + tile_offset + texel_offset,
- 4);
- return spu_extract(tmp, 0);
+ spu_dcache_mark_dirty((unsigned) spu.texture[unit].level[lvl].start, bytes);
+ }
}
@@ -88,15 +72,17 @@ get_texel(uint unit, vec_uint4 coordinate)
* a time.
*/
static void
-get_four_texels(uint unit, vec_uint4 x, vec_uint4 y, vec_uint4 *texels)
+get_four_texels(const struct spu_texture_level *tlevel, uint face,
+ vec_int4 x, vec_int4 y,
+ vec_uint4 *texels)
{
- const unsigned texture_ea = (uintptr_t) spu.texture[unit].start;
- vec_uint4 tile_x = spu_rlmask(x, -5);
- vec_uint4 tile_y = spu_rlmask(y, -5);
- const qword offset_x = si_andi((qword) x, 0x1f);
- const qword offset_y = si_andi((qword) y, 0x1f);
+ unsigned texture_ea = (uintptr_t) tlevel->start;
+ const vec_int4 tile_x = spu_rlmask(x, -5); /* tile_x = x / 32 */
+ const vec_int4 tile_y = spu_rlmask(y, -5); /* tile_y = y / 32 */
+ const qword offset_x = si_andi((qword) x, 0x1f); /* offset_x = x & 0x1f */
+ const qword offset_y = si_andi((qword) y, 0x1f); /* offset_y = y & 0x1f */
- const qword tiles_per_row = (qword) spu_splats(spu.texture[unit].tiles_per_row);
+ const qword tiles_per_row = (qword) spu_splats(tlevel->tiles_per_row);
const qword tile_size = (qword) spu_splats((unsigned) sizeof(tile_t));
qword tile_offset = si_mpya((qword) tile_y, tiles_per_row, (qword) tile_x);
@@ -107,6 +93,8 @@ get_four_texels(uint unit, vec_uint4 x, vec_uint4 y, vec_uint4 *texels)
vec_uint4 offset = (vec_uint4) si_a(tile_offset, texel_offset);
+ texture_ea = texture_ea + face * tlevel->bytes_per_image;
+
spu_dcache_fetch_unaligned((qword *) & texels[0],
texture_ea + spu_extract(offset, 0), 4);
spu_dcache_fetch_unaligned((qword *) & texels[1],
@@ -118,83 +106,536 @@ get_four_texels(uint unit, vec_uint4 x, vec_uint4 y, vec_uint4 *texels)
}
+/** clamp vec to [0, max] */
+static INLINE vector signed int
+spu_clamp(vector signed int vec, vector signed int max)
+{
+ static const vector signed int zero = {0,0,0,0};
+ vector unsigned int c;
+ c = spu_cmpgt(vec, zero); /* c = vec > zero ? ~0 : 0 */
+ vec = spu_sel(zero, vec, c);
+ c = spu_cmpgt(vec, max); /* c = vec > max ? ~0 : 0 */
+ vec = spu_sel(vec, max, c);
+ return vec;
+}
+
+
+
/**
- * Get texture sample at texcoord.
+ * Do nearest texture sampling for four pixels.
+ * \param colors returned colors in SOA format (rrrr, gggg, bbbb, aaaa).
*/
-vector float
-sample_texture_nearest(uint unit, vector float texcoord)
+void
+sample_texture_2d_nearest(vector float s, vector float t,
+ uint unit, uint level, uint face,
+ vector float colors[4])
{
- vector float tc = spu_mul(texcoord, spu.texture[unit].tex_size);
- vector unsigned int itc = spu_convtu(tc, 0); /* convert to int */
- itc = spu_and(itc, spu.texture[unit].tex_size_mask); /* mask (GL_REPEAT) */
- uint texel = get_texel(unit, itc);
- return spu_unpack_A8R8G8B8(texel);
+ const struct spu_texture_level *tlevel = &spu.texture[unit].level[level];
+ vector float ss = spu_mul(s, tlevel->scale_s);
+ vector float tt = spu_mul(t, tlevel->scale_t);
+ vector signed int is = spu_convts(ss, 0);
+ vector signed int it = spu_convts(tt, 0);
+ vec_uint4 texels[4];
+
+ /* PIPE_TEX_WRAP_REPEAT */
+ is = spu_and(is, tlevel->mask_s);
+ it = spu_and(it, tlevel->mask_t);
+
+ /* PIPE_TEX_WRAP_CLAMP */
+ is = spu_clamp(is, tlevel->max_s);
+ it = spu_clamp(it, tlevel->max_t);
+
+ get_four_texels(tlevel, face, is, it, texels);
+
+ /* convert four packed ARGBA pixels to float RRRR,GGGG,BBBB,AAAA */
+ spu_unpack_A8R8G8B8_transpose4(texels, colors);
}
-vector float
-sample_texture_bilinear(uint unit, vector float texcoord)
+/**
+ * Do bilinear texture sampling for four pixels.
+ * \param colors returned colors in SOA format (rrrr, gggg, bbbb, aaaa).
+ */
+void
+sample_texture_2d_bilinear(vector float s, vector float t,
+ uint unit, uint level, uint face,
+ vector float colors[4])
{
- static const vec_uint4 offset_x = {0, 0, 1, 1};
- static const vec_uint4 offset_y = {0, 1, 0, 1};
+ const struct spu_texture_level *tlevel = &spu.texture[unit].level[level];
+ static const vector float half = {-0.5f, -0.5f, -0.5f, -0.5f};
- vector float tc = spu_mul(texcoord, spu.texture[unit].tex_size);
- tc = spu_add(tc, spu_splats(-0.5f)); /* half texel bias */
+ vector float ss = spu_madd(s, tlevel->scale_s, half);
+ vector float tt = spu_madd(t, tlevel->scale_t, half);
- /* integer texcoords S,T: */
- vec_uint4 itc = spu_convtu(tc, 0); /* convert to int */
+ vector signed int is0 = spu_convts(ss, 0);
+ vector signed int it0 = spu_convts(tt, 0);
- vec_uint4 texels[4];
-
- /* setup texcoords for quad:
- * +-----+-----+
- * |x0,y0|x1,y1|
- * +-----+-----+
- * |x2,y2|x3,y3|
- * +-----+-----+
- */
- vec_uint4 x = spu_splats(spu_extract(itc, 0));
- vec_uint4 y = spu_splats(spu_extract(itc, 1));
- x = spu_add(x, offset_x);
- y = spu_add(y, offset_y);
+ /* is + 1, it + 1 */
+ vector signed int is1 = spu_add(is0, 1);
+ vector signed int it1 = spu_add(it0, 1);
- /* GL_REPEAT wrap mode: */
- x = spu_and(x, spu.texture[unit].tex_size_x_mask);
- y = spu_and(y, spu.texture[unit].tex_size_y_mask);
+ /* PIPE_TEX_WRAP_REPEAT */
+ is0 = spu_and(is0, tlevel->mask_s);
+ it0 = spu_and(it0, tlevel->mask_t);
+ is1 = spu_and(is1, tlevel->mask_s);
+ it1 = spu_and(it1, tlevel->mask_t);
- get_four_texels(unit, x, y, texels);
+ /* PIPE_TEX_WRAP_CLAMP */
+ is0 = spu_clamp(is0, tlevel->max_s);
+ it0 = spu_clamp(it0, tlevel->max_t);
+ is1 = spu_clamp(is1, tlevel->max_s);
+ it1 = spu_clamp(it1, tlevel->max_t);
- /* integer A8R8G8B8 to float texel conversion */
- vector float texel00 = spu_unpack_A8R8G8B8(spu_extract(texels[0], 0));
- vector float texel01 = spu_unpack_A8R8G8B8(spu_extract(texels[1], 0));
- vector float texel10 = spu_unpack_A8R8G8B8(spu_extract(texels[2], 0));
- vector float texel11 = spu_unpack_A8R8G8B8(spu_extract(texels[3], 0));
+ /* get packed int texels */
+ vector unsigned int texels[16];
+ get_four_texels(tlevel, face, is0, it0, texels + 0); /* upper-left */
+ get_four_texels(tlevel, face, is1, it0, texels + 4); /* upper-right */
+ get_four_texels(tlevel, face, is0, it1, texels + 8); /* lower-left */
+ get_four_texels(tlevel, face, is1, it1, texels + 12); /* lower-right */
+ /* convert packed int texels to float colors */
+ vector float ftexels[16];
+ spu_unpack_A8R8G8B8_transpose4(texels + 0, ftexels + 0);
+ spu_unpack_A8R8G8B8_transpose4(texels + 4, ftexels + 4);
+ spu_unpack_A8R8G8B8_transpose4(texels + 8, ftexels + 8);
+ spu_unpack_A8R8G8B8_transpose4(texels + 12, ftexels + 12);
/* Compute weighting factors in [0,1]
* Multiply texcoord by 1024, AND with 1023, convert back to float.
*/
- vector float tc1024 = spu_mul(tc, spu_splats(1024.0f));
- vector signed int itc1024 = spu_convts(tc1024, 0);
- itc1024 = spu_and(itc1024, spu_splats((1 << 10) - 1));
- vector float weight = spu_convtf(itc1024, 10);
-
- /* smeared frac and 1-frac */
- vector float sfrac = spu_splats(spu_extract(weight, 0));
- vector float tfrac = spu_splats(spu_extract(weight, 1));
- vector float sfrac1 = spu_sub(spu_splats(1.0f), sfrac);
- vector float tfrac1 = spu_sub(spu_splats(1.0f), tfrac);
-
- /* multiply the samples (colors) by the S/T weights */
- texel00 = spu_mul(spu_mul(texel00, sfrac1), tfrac1);
- texel10 = spu_mul(spu_mul(texel10, sfrac ), tfrac1);
- texel01 = spu_mul(spu_mul(texel01, sfrac1), tfrac );
- texel11 = spu_mul(spu_mul(texel11, sfrac ), tfrac );
-
- /* compute sum of weighted samples */
- vector float texel_sum = spu_add(texel00, texel01);
- texel_sum = spu_add(texel_sum, texel10);
- texel_sum = spu_add(texel_sum, texel11);
-
- return texel_sum;
+ vector float ss1024 = spu_mul(ss, spu_splats(1024.0f));
+ vector signed int iss1024 = spu_convts(ss1024, 0);
+ iss1024 = spu_and(iss1024, 1023);
+ vector float sWeights0 = spu_convtf(iss1024, 10);
+
+ vector float tt1024 = spu_mul(tt, spu_splats(1024.0f));
+ vector signed int itt1024 = spu_convts(tt1024, 0);
+ itt1024 = spu_and(itt1024, 1023);
+ vector float tWeights0 = spu_convtf(itt1024, 10);
+
+ /* 1 - sWeight and 1 - tWeight */
+ vector float sWeights1 = spu_sub(spu_splats(1.0f), sWeights0);
+ vector float tWeights1 = spu_sub(spu_splats(1.0f), tWeights0);
+
+ /* reds, for four pixels */
+ ftexels[ 0] = spu_mul(ftexels[ 0], spu_mul(sWeights1, tWeights1)); /*ul*/
+ ftexels[ 4] = spu_mul(ftexels[ 4], spu_mul(sWeights0, tWeights1)); /*ur*/
+ ftexels[ 8] = spu_mul(ftexels[ 8], spu_mul(sWeights1, tWeights0)); /*ll*/
+ ftexels[12] = spu_mul(ftexels[12], spu_mul(sWeights0, tWeights0)); /*lr*/
+ colors[0] = spu_add(spu_add(ftexels[0], ftexels[4]),
+ spu_add(ftexels[8], ftexels[12]));
+
+ /* greens, for four pixels */
+ ftexels[ 1] = spu_mul(ftexels[ 1], spu_mul(sWeights1, tWeights1)); /*ul*/
+ ftexels[ 5] = spu_mul(ftexels[ 5], spu_mul(sWeights0, tWeights1)); /*ur*/
+ ftexels[ 9] = spu_mul(ftexels[ 9], spu_mul(sWeights1, tWeights0)); /*ll*/
+ ftexels[13] = spu_mul(ftexels[13], spu_mul(sWeights0, tWeights0)); /*lr*/
+ colors[1] = spu_add(spu_add(ftexels[1], ftexels[5]),
+ spu_add(ftexels[9], ftexels[13]));
+
+ /* blues, for four pixels */
+ ftexels[ 2] = spu_mul(ftexels[ 2], spu_mul(sWeights1, tWeights1)); /*ul*/
+ ftexels[ 6] = spu_mul(ftexels[ 6], spu_mul(sWeights0, tWeights1)); /*ur*/
+ ftexels[10] = spu_mul(ftexels[10], spu_mul(sWeights1, tWeights0)); /*ll*/
+ ftexels[14] = spu_mul(ftexels[14], spu_mul(sWeights0, tWeights0)); /*lr*/
+ colors[2] = spu_add(spu_add(ftexels[2], ftexels[6]),
+ spu_add(ftexels[10], ftexels[14]));
+
+ /* alphas, for four pixels */
+ ftexels[ 3] = spu_mul(ftexels[ 3], spu_mul(sWeights1, tWeights1)); /*ul*/
+ ftexels[ 7] = spu_mul(ftexels[ 7], spu_mul(sWeights0, tWeights1)); /*ur*/
+ ftexels[11] = spu_mul(ftexels[11], spu_mul(sWeights1, tWeights0)); /*ll*/
+ ftexels[15] = spu_mul(ftexels[15], spu_mul(sWeights0, tWeights0)); /*lr*/
+ colors[3] = spu_add(spu_add(ftexels[3], ftexels[7]),
+ spu_add(ftexels[11], ftexels[15]));
+}
+
+
+
+/**
+ * Adapted from /opt/cell/sdk/usr/spu/include/transpose_matrix4x4.h
+ */
+static INLINE void
+transpose(vector unsigned int *mOut0,
+ vector unsigned int *mOut1,
+ vector unsigned int *mOut2,
+ vector unsigned int *mOut3,
+ vector unsigned int *mIn)
+{
+ vector unsigned int abcd, efgh, ijkl, mnop; /* input vectors */
+ vector unsigned int aeim, bfjn, cgko, dhlp; /* output vectors */
+ vector unsigned int aibj, ckdl, emfn, gohp; /* intermediate vectors */
+
+ vector unsigned char shufflehi = ((vector unsigned char) {
+ 0x00, 0x01, 0x02, 0x03,
+ 0x10, 0x11, 0x12, 0x13,
+ 0x04, 0x05, 0x06, 0x07,
+ 0x14, 0x15, 0x16, 0x17});
+ vector unsigned char shufflelo = ((vector unsigned char) {
+ 0x08, 0x09, 0x0A, 0x0B,
+ 0x18, 0x19, 0x1A, 0x1B,
+ 0x0C, 0x0D, 0x0E, 0x0F,
+ 0x1C, 0x1D, 0x1E, 0x1F});
+ abcd = *(mIn+0);
+ efgh = *(mIn+1);
+ ijkl = *(mIn+2);
+ mnop = *(mIn+3);
+
+ aibj = spu_shuffle(abcd, ijkl, shufflehi);
+ ckdl = spu_shuffle(abcd, ijkl, shufflelo);
+ emfn = spu_shuffle(efgh, mnop, shufflehi);
+ gohp = spu_shuffle(efgh, mnop, shufflelo);
+
+ aeim = spu_shuffle(aibj, emfn, shufflehi);
+ bfjn = spu_shuffle(aibj, emfn, shufflelo);
+ cgko = spu_shuffle(ckdl, gohp, shufflehi);
+ dhlp = spu_shuffle(ckdl, gohp, shufflelo);
+
+ *mOut0 = aeim;
+ *mOut1 = bfjn;
+ *mOut2 = cgko;
+ *mOut3 = dhlp;
+}
+
+
+/**
+ * Bilinear filtering, using int instead of float arithmetic for computing
+ * sample weights.
+ */
+void
+sample_texture_2d_bilinear_int(vector float s, vector float t,
+ uint unit, uint level, uint face,
+ vector float colors[4])
+{
+ const struct spu_texture_level *tlevel = &spu.texture[unit].level[level];
+ static const vector float half = {-0.5f, -0.5f, -0.5f, -0.5f};
+
+ /* Scale texcoords by size of texture, and add half pixel bias */
+ vector float ss = spu_madd(s, tlevel->scale_s, half);
+ vector float tt = spu_madd(t, tlevel->scale_t, half);
+
+ /* convert float coords to fixed-pt coords with 7 fraction bits */
+ vector signed int is = spu_convts(ss, 7); /* XXX really need floor() here */
+ vector signed int it = spu_convts(tt, 7); /* XXX really need floor() here */
+
+ /* compute integer texel weights in [0, 127] */
+ vector signed int sWeights0 = spu_and(is, 127);
+ vector signed int tWeights0 = spu_and(it, 127);
+ vector signed int sWeights1 = spu_sub(127, sWeights0);
+ vector signed int tWeights1 = spu_sub(127, tWeights0);
+
+ /* texel coords: is0 = is / 128, it0 = is / 128 */
+ vector signed int is0 = spu_rlmask(is, -7);
+ vector signed int it0 = spu_rlmask(it, -7);
+
+ /* texel coords: i1 = is0 + 1, it1 = it0 + 1 */
+ vector signed int is1 = spu_add(is0, 1);
+ vector signed int it1 = spu_add(it0, 1);
+
+ /* PIPE_TEX_WRAP_REPEAT */
+ is0 = spu_and(is0, tlevel->mask_s);
+ it0 = spu_and(it0, tlevel->mask_t);
+ is1 = spu_and(is1, tlevel->mask_s);
+ it1 = spu_and(it1, tlevel->mask_t);
+
+ /* PIPE_TEX_WRAP_CLAMP */
+ is0 = spu_clamp(is0, tlevel->max_s);
+ it0 = spu_clamp(it0, tlevel->max_t);
+ is1 = spu_clamp(is1, tlevel->max_s);
+ it1 = spu_clamp(it1, tlevel->max_t);
+
+ /* get packed int texels */
+ vector unsigned int texels[16];
+ get_four_texels(tlevel, face, is0, it0, texels + 0); /* upper-left */
+ get_four_texels(tlevel, face, is1, it0, texels + 4); /* upper-right */
+ get_four_texels(tlevel, face, is0, it1, texels + 8); /* lower-left */
+ get_four_texels(tlevel, face, is1, it1, texels + 12); /* lower-right */
+
+ /* twiddle packed 32-bit BGRA pixels into RGBA as four unsigned ints */
+ {
+ static const unsigned char ZERO = 0x80;
+ int i;
+ for (i = 0; i < 16; i++) {
+ texels[i] = spu_shuffle(texels[i], texels[i],
+ ((vector unsigned char) {
+ ZERO, ZERO, ZERO, 1,
+ ZERO, ZERO, ZERO, 2,
+ ZERO, ZERO, ZERO, 3,
+ ZERO, ZERO, ZERO, 0}));
+ }
+ }
+
+ /* convert RGBA,RGBA,RGBA,RGBA to RRRR,GGGG,BBBB,AAAA */
+ vector unsigned int texel0, texel1, texel2, texel3, texel4, texel5, texel6, texel7,
+ texel8, texel9, texel10, texel11, texel12, texel13, texel14, texel15;
+ transpose(&texel0, &texel1, &texel2, &texel3, texels + 0);
+ transpose(&texel4, &texel5, &texel6, &texel7, texels + 4);
+ transpose(&texel8, &texel9, &texel10, &texel11, texels + 8);
+ transpose(&texel12, &texel13, &texel14, &texel15, texels + 12);
+
+ /* computed weighted colors */
+ vector unsigned int c0, c1, c2, c3, cSum;
+
+ /* red */
+ c0 = (vector unsigned int) si_mpy((qword) texel0, si_mpy((qword) sWeights1, (qword) tWeights1)); /*ul*/
+ c1 = (vector unsigned int) si_mpy((qword) texel4, si_mpy((qword) sWeights0, (qword) tWeights1)); /*ur*/
+ c2 = (vector unsigned int) si_mpy((qword) texel8, si_mpy((qword) sWeights1, (qword) tWeights0)); /*ll*/
+ c3 = (vector unsigned int) si_mpy((qword) texel12, si_mpy((qword) sWeights0, (qword) tWeights0)); /*lr*/
+ cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3));
+ colors[0] = spu_convtf(cSum, 22);
+
+ /* green */
+ c0 = (vector unsigned int) si_mpy((qword) texel1, si_mpy((qword) sWeights1, (qword) tWeights1)); /*ul*/
+ c1 = (vector unsigned int) si_mpy((qword) texel5, si_mpy((qword) sWeights0, (qword) tWeights1)); /*ur*/
+ c2 = (vector unsigned int) si_mpy((qword) texel9, si_mpy((qword) sWeights1, (qword) tWeights0)); /*ll*/
+ c3 = (vector unsigned int) si_mpy((qword) texel13, si_mpy((qword) sWeights0, (qword) tWeights0)); /*lr*/
+ cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3));
+ colors[1] = spu_convtf(cSum, 22);
+
+ /* blue */
+ c0 = (vector unsigned int) si_mpy((qword) texel2, si_mpy((qword) sWeights1, (qword) tWeights1)); /*ul*/
+ c1 = (vector unsigned int) si_mpy((qword) texel6, si_mpy((qword) sWeights0, (qword) tWeights1)); /*ur*/
+ c2 = (vector unsigned int) si_mpy((qword) texel10, si_mpy((qword) sWeights1, (qword) tWeights0)); /*ll*/
+ c3 = (vector unsigned int) si_mpy((qword) texel14, si_mpy((qword) sWeights0, (qword) tWeights0)); /*lr*/
+ cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3));
+ colors[2] = spu_convtf(cSum, 22);
+
+ /* alpha */
+ c0 = (vector unsigned int) si_mpy((qword) texel3, si_mpy((qword) sWeights1, (qword) tWeights1)); /*ul*/
+ c1 = (vector unsigned int) si_mpy((qword) texel7, si_mpy((qword) sWeights0, (qword) tWeights1)); /*ur*/
+ c2 = (vector unsigned int) si_mpy((qword) texel11, si_mpy((qword) sWeights1, (qword) tWeights0)); /*ll*/
+ c3 = (vector unsigned int) si_mpy((qword) texel15, si_mpy((qword) sWeights0, (qword) tWeights0)); /*lr*/
+ cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3));
+ colors[3] = spu_convtf(cSum, 22);
+}
+
+
+
+/**
+ * Compute level of detail factor from texcoords.
+ */
+static INLINE float
+compute_lambda_2d(uint unit, vector float s, vector float t)
+{
+ uint baseLevel = 0;
+ float width = spu.texture[unit].level[baseLevel].width;
+ float height = spu.texture[unit].level[baseLevel].width;
+ float dsdx = width * (spu_extract(s, 1) - spu_extract(s, 0));
+ float dsdy = width * (spu_extract(s, 2) - spu_extract(s, 0));
+ float dtdx = height * (spu_extract(t, 1) - spu_extract(t, 0));
+ float dtdy = height * (spu_extract(t, 2) - spu_extract(t, 0));
+#if 0
+ /* ideal value */
+ float x = dsdx * dsdx + dtdx * dtdx;
+ float y = dsdy * dsdy + dtdy * dtdy;
+ float rho = x > y ? x : y;
+ rho = sqrtf(rho);
+#else
+ /* approximation */
+ dsdx = fabsf(dsdx);
+ dsdy = fabsf(dsdy);
+ dtdx = fabsf(dtdx);
+ dtdy = fabsf(dtdy);
+ float rho = (dsdx + dsdy + dtdx + dtdy) * 0.5;
+#endif
+ float lambda = logf(rho) * 1.442695f; /* compute logbase2(rho) */
+ return lambda;
+}
+
+
+/**
+ * Blend two sets of colors according to weight.
+ */
+static void
+blend_colors(vector float c0[4], const vector float c1[4], float weight)
+{
+ vector float t = spu_splats(weight);
+ vector float dc0 = spu_sub(c1[0], c0[0]);
+ vector float dc1 = spu_sub(c1[1], c0[1]);
+ vector float dc2 = spu_sub(c1[2], c0[2]);
+ vector float dc3 = spu_sub(c1[3], c0[3]);
+ c0[0] = spu_madd(dc0, t, c0[0]);
+ c0[1] = spu_madd(dc1, t, c0[1]);
+ c0[2] = spu_madd(dc2, t, c0[2]);
+ c0[3] = spu_madd(dc3, t, c0[3]);
+}
+
+
+/**
+ * Texture sampling with level of detail selection and possibly mipmap
+ * interpolation.
+ */
+void
+sample_texture_2d_lod(vector float s, vector float t,
+ uint unit, uint level_ignored, uint face,
+ vector float colors[4])
+{
+ /*
+ * Note that we're computing a lambda/lod here that's used for all
+ * four pixels in the quad.
+ */
+ float lambda = compute_lambda_2d(unit, s, t);
+
+ (void) face;
+ (void) level_ignored;
+
+ /* apply lod bias */
+ lambda += spu.sampler[unit].lod_bias;
+
+ /* clamp */
+ if (lambda < spu.sampler[unit].min_lod)
+ lambda = spu.sampler[unit].min_lod;
+ else if (lambda > spu.sampler[unit].max_lod)
+ lambda = spu.sampler[unit].max_lod;
+
+ if (lambda <= 0.0f) {
+ /* magnify */
+ spu.mag_sample_texture_2d[unit](s, t, unit, 0, face, colors);
+ }
+ else {
+ /* minify */
+ if (spu.sampler[unit].min_img_filter == PIPE_TEX_FILTER_LINEAR) {
+ /* sample two mipmap levels and interpolate */
+ int level = (int) lambda;
+ if (level > (int) spu.texture[unit].max_level)
+ level = spu.texture[unit].max_level;
+ spu.min_sample_texture_2d[unit](s, t, unit, level, face, colors);
+ if (spu.sampler[unit].min_img_filter == PIPE_TEX_FILTER_LINEAR) {
+ /* sample second mipmap level */
+ float weight = lambda - (float) level;
+ level++;
+ if (level <= (int) spu.texture[unit].max_level) {
+ vector float colors2[4];
+ spu.min_sample_texture_2d[unit](s, t, unit, level, face, colors2);
+ blend_colors(colors, colors2, weight);
+ }
+ }
+ }
+ else {
+ /* sample one mipmap level */
+ int level = (int) (lambda + 0.5f);
+ if (level > (int) spu.texture[unit].max_level)
+ level = spu.texture[unit].max_level;
+ spu.min_sample_texture_2d[unit](s, t, unit, level, face, colors);
+ }
+ }
+}
+
+
+/** XXX need a SIMD version of this */
+static unsigned
+choose_cube_face(float rx, float ry, float rz, float *newS, float *newT)
+{
+ /*
+ major axis
+ direction target sc tc ma
+ ---------- ------------------------------- --- --- ---
+ +rx TEXTURE_CUBE_MAP_POSITIVE_X_EXT -rz -ry rx
+ -rx TEXTURE_CUBE_MAP_NEGATIVE_X_EXT +rz -ry rx
+ +ry TEXTURE_CUBE_MAP_POSITIVE_Y_EXT +rx +rz ry
+ -ry TEXTURE_CUBE_MAP_NEGATIVE_Y_EXT +rx -rz ry
+ +rz TEXTURE_CUBE_MAP_POSITIVE_Z_EXT +rx -ry rz
+ -rz TEXTURE_CUBE_MAP_NEGATIVE_Z_EXT -rx -ry rz
+ */
+ const float arx = fabsf(rx);
+ const float ary = fabsf(ry);
+ const float arz = fabsf(rz);
+ unsigned face;
+ float sc, tc, ma;
+
+ if (arx > ary && arx > arz) {
+ if (rx >= 0.0F) {
+ face = PIPE_TEX_FACE_POS_X;
+ sc = -rz;
+ tc = -ry;
+ ma = arx;
+ }
+ else {
+ face = PIPE_TEX_FACE_NEG_X;
+ sc = rz;
+ tc = -ry;
+ ma = arx;
+ }
+ }
+ else if (ary > arx && ary > arz) {
+ if (ry >= 0.0F) {
+ face = PIPE_TEX_FACE_POS_Y;
+ sc = rx;
+ tc = rz;
+ ma = ary;
+ }
+ else {
+ face = PIPE_TEX_FACE_NEG_Y;
+ sc = rx;
+ tc = -rz;
+ ma = ary;
+ }
+ }
+ else {
+ if (rz > 0.0F) {
+ face = PIPE_TEX_FACE_POS_Z;
+ sc = rx;
+ tc = -ry;
+ ma = arz;
+ }
+ else {
+ face = PIPE_TEX_FACE_NEG_Z;
+ sc = -rx;
+ tc = -ry;
+ ma = arz;
+ }
+ }
+
+ *newS = (sc / ma + 1.0F) * 0.5F;
+ *newT = (tc / ma + 1.0F) * 0.5F;
+
+ return face;
+}
+
+
+
+void
+sample_texture_cube(vector float s, vector float t, vector float r,
+ uint unit, vector float colors[4])
+{
+ uint p, faces[4], level = 0;
+ float newS[4], newT[4];
+
+ /* Compute cube faces referenced by the four sets of texcoords.
+ * XXX we should SIMD-ize this.
+ */
+ for (p = 0; p < 4; p++) {
+ float rx = spu_extract(s, p);
+ float ry = spu_extract(t, p);
+ float rz = spu_extract(r, p);
+ faces[p] = choose_cube_face(rx, ry, rz, &newS[p], &newT[p]);
+ }
+
+ if (faces[0] == faces[1] &&
+ faces[0] == faces[2] &&
+ faces[0] == faces[3]) {
+ /* GOOD! All four texcoords refer to the same cube face */
+ s = (vector float) {newS[0], newS[1], newS[2], newS[3]};
+ t = (vector float) {newT[0], newT[1], newT[2], newT[3]};
+ spu.sample_texture_2d[unit](s, t, unit, level, faces[0], colors);
+ }
+ else {
+ /* BAD! The four texcoords refer to different faces */
+ for (p = 0; p < 4; p++) {
+ vector float c[4];
+
+ spu.sample_texture_2d[unit](spu_splats(newS[p]), spu_splats(newT[p]),
+ unit, level, faces[p], c);
+
+ float red = spu_extract(c[0], p);
+ float green = spu_extract(c[1], p);
+ float blue = spu_extract(c[2], p);
+ float alpha = spu_extract(c[3], p);
+
+ colors[0] = spu_insert(red, colors[0], p);
+ colors[1] = spu_insert(green, colors[1], p);
+ colors[2] = spu_insert(blue, colors[2], p);
+ colors[3] = spu_insert(alpha, colors[3], p);
+ }
+ }
}
diff --git a/src/gallium/drivers/cell/spu/spu_texture.h b/src/gallium/drivers/cell/spu/spu_texture.h
index f7c9738be88..7b75b007b5a 100644
--- a/src/gallium/drivers/cell/spu/spu_texture.h
+++ b/src/gallium/drivers/cell/spu/spu_texture.h
@@ -36,12 +36,32 @@ extern void
invalidate_tex_cache(void);
-extern vector float
-sample_texture_nearest(uint unit, vector float texcoord);
+extern void
+sample_texture_2d_nearest(vector float s, vector float t,
+ uint unit, uint level, uint face,
+ vector float colors[4]);
+
+
+extern void
+sample_texture_2d_bilinear(vector float s, vector float t,
+ uint unit, uint level, uint face,
+ vector float colors[4]);
+
+extern void
+sample_texture_2d_bilinear_int(vector float s, vector float t,
+ uint unit, uint level, uint face,
+ vector float colors[4]);
+
+extern void
+sample_texture_2d_lod(vector float s, vector float t,
+ uint unit, uint level, uint face,
+ vector float colors[4]);
-extern vector float
-sample_texture_bilinear(uint unit, vector float texcoord);
+
+extern void
+sample_texture_cube(vector float s, vector float t, vector float r,
+ uint unit, vector float colors[4]);
#endif /* SPU_TEXTURE_H */
diff --git a/src/gallium/drivers/cell/spu/spu_tile.c b/src/gallium/drivers/cell/spu/spu_tile.c
index 216a33126b7..6905015a483 100644
--- a/src/gallium/drivers/cell/spu/spu_tile.c
+++ b/src/gallium/drivers/cell/spu/spu_tile.c
@@ -87,3 +87,40 @@ put_tile(uint tx, uint ty, const tile_t *tile, int tag, int zBuf)
0 /* rid */);
}
+
+/**
+ * For tiles whose status is TILE_STATUS_CLEAR, write solid-filled
+ * tiles back to the main framebuffer.
+ */
+void
+really_clear_tiles(uint surfaceIndex)
+{
+ const uint num_tiles = spu.fb.width_tiles * spu.fb.height_tiles;
+ uint i;
+
+ if (surfaceIndex == 0) {
+ clear_c_tile(&spu.ctile);
+
+ for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) {
+ uint tx = i % spu.fb.width_tiles;
+ uint ty = i / spu.fb.width_tiles;
+ if (spu.ctile_status[ty][tx] == TILE_STATUS_CLEAR) {
+ put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 0);
+ }
+ }
+ }
+ else {
+ clear_z_tile(&spu.ztile);
+
+ for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) {
+ uint tx = i % spu.fb.width_tiles;
+ uint ty = i / spu.fb.width_tiles;
+ if (spu.ztile_status[ty][tx] == TILE_STATUS_CLEAR)
+ put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 1);
+ }
+ }
+
+#if 0
+ wait_on_mask(1 << TAG_SURFACE_CLEAR);
+#endif
+}
diff --git a/src/gallium/drivers/cell/spu/spu_tile.h b/src/gallium/drivers/cell/spu/spu_tile.h
index 1b5491112db..7bfb52be8f3 100644
--- a/src/gallium/drivers/cell/spu/spu_tile.h
+++ b/src/gallium/drivers/cell/spu/spu_tile.h
@@ -36,12 +36,14 @@
-void
+extern void
get_tile(uint tx, uint ty, tile_t *tile, int tag, int zBuf);
-void
+extern void
put_tile(uint tx, uint ty, const tile_t *tile, int tag, int zBuf);
+extern void
+really_clear_tiles(uint surfaceIndex);
static INLINE void
diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c
index 8b938781920..22e51a86ae5 100644
--- a/src/gallium/drivers/cell/spu/spu_tri.c
+++ b/src/gallium/drivers/cell/spu/spu_tri.c
@@ -43,11 +43,6 @@
/** Masks are uint[4] vectors with each element being 0 or 0xffffffff */
typedef vector unsigned int mask_t;
-typedef union
-{
- vector float v;
- float f[4];
-} float4;
/**
@@ -91,9 +86,9 @@ struct edge {
struct interp_coef
{
- float4 a0;
- float4 dadx;
- float4 dady;
+ vector float a0;
+ vector float dadx;
+ vector float dady;
};
@@ -116,21 +111,15 @@ struct setup_stage {
struct edge etop;
struct edge emaj;
- float oneoverarea;
+ float oneOverArea; /* XXX maybe make into vector? */
+
+ uint facing;
- uint tx, ty;
+ uint tx, ty; /**< position of current tile (x, y) */
int cliprect_minx, cliprect_maxx, cliprect_miny, cliprect_maxy;
-#if 0
- struct tgsi_interp_coef coef[PIPE_MAX_SHADER_INPUTS];
-#else
struct interp_coef coef[PIPE_MAX_SHADER_INPUTS];
-#endif
-
-#if 0
- struct quad_header quad;
-#endif
struct {
int left[2]; /**< [0] = row0, [1] = row1 */
@@ -142,118 +131,105 @@ struct setup_stage {
};
-
static struct setup_stage setup;
-
-
-#if 0
-/**
- * Basically a cast wrapper.
- */
-static INLINE struct setup_stage *setup_stage( struct draw_stage *stage )
-{
- return (struct setup_stage *)stage;
-}
-#endif
-
-#if 0
-/**
- * Clip setup.quad against the scissor/surface bounds.
- */
-static INLINE void
-quad_clip(struct setup_stage *setup)
-{
- const struct pipe_scissor_state *cliprect = &setup.softpipe->cliprect;
- const int minx = (int) cliprect->minx;
- const int maxx = (int) cliprect->maxx;
- const int miny = (int) cliprect->miny;
- const int maxy = (int) cliprect->maxy;
-
- if (setup.quad.x0 >= maxx ||
- setup.quad.y0 >= maxy ||
- setup.quad.x0 + 1 < minx ||
- setup.quad.y0 + 1 < miny) {
- /* totally clipped */
- setup.quad.mask = 0x0;
- return;
- }
- if (setup.quad.x0 < minx)
- setup.quad.mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT);
- if (setup.quad.y0 < miny)
- setup.quad.mask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT);
- if (setup.quad.x0 == maxx - 1)
- setup.quad.mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT);
- if (setup.quad.y0 == maxy - 1)
- setup.quad.mask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT);
-}
-#endif
-
-#if 0
-/**
- * Emit a quad (pass to next stage) with clipping.
- */
-static INLINE void
-clip_emit_quad(struct setup_stage *setup)
-{
- quad_clip(setup);
- if (setup.quad.mask) {
- struct softpipe_context *sp = setup.softpipe;
- sp->quad.first->run(sp->quad.first, &setup.quad);
- }
-}
-#endif
-
/**
* Evaluate attribute coefficients (plane equations) to compute
* attribute values for the four fragments in a quad.
* Eg: four colors will be computed (in AoS format).
*/
static INLINE void
-eval_coeff(uint slot, float x, float y, vector float result[4])
+eval_coeff(uint slot, float x, float y, vector float w, vector float result[4])
{
- switch (spu.vertex_info.interp_mode[slot]) {
+ switch (spu.vertex_info.attrib[slot].interp_mode) {
case INTERP_CONSTANT:
result[QUAD_TOP_LEFT] =
result[QUAD_TOP_RIGHT] =
result[QUAD_BOTTOM_LEFT] =
- result[QUAD_BOTTOM_RIGHT] = setup.coef[slot].a0.v;
+ result[QUAD_BOTTOM_RIGHT] = setup.coef[slot].a0;
break;
-
case INTERP_LINEAR:
- /* fall-through, for now */
- default:
{
- register vector float dadx = setup.coef[slot].dadx.v;
- register vector float dady = setup.coef[slot].dady.v;
- register vector float topLeft
- = spu_add(setup.coef[slot].a0.v,
- spu_add(spu_mul(spu_splats(x), dadx),
- spu_mul(spu_splats(y), dady)));
+ vector float dadx = setup.coef[slot].dadx;
+ vector float dady = setup.coef[slot].dady;
+ vector float topLeft =
+ spu_add(setup.coef[slot].a0,
+ spu_add(spu_mul(spu_splats(x), dadx),
+ spu_mul(spu_splats(y), dady)));
result[QUAD_TOP_LEFT] = topLeft;
result[QUAD_TOP_RIGHT] = spu_add(topLeft, dadx);
result[QUAD_BOTTOM_LEFT] = spu_add(topLeft, dady);
result[QUAD_BOTTOM_RIGHT] = spu_add(spu_add(topLeft, dadx), dady);
}
+ break;
+ case INTERP_PERSPECTIVE:
+ {
+ vector float dadx = setup.coef[slot].dadx;
+ vector float dady = setup.coef[slot].dady;
+ vector float topLeft =
+ spu_add(setup.coef[slot].a0,
+ spu_add(spu_mul(spu_splats(x), dadx),
+ spu_mul(spu_splats(y), dady)));
+
+ vector float wInv = spu_re(w); /* 1.0 / w */
+
+ result[QUAD_TOP_LEFT] = spu_mul(topLeft, wInv);
+ result[QUAD_TOP_RIGHT] = spu_mul(spu_add(topLeft, dadx), wInv);
+ result[QUAD_BOTTOM_LEFT] = spu_mul(spu_add(topLeft, dady), wInv);
+ result[QUAD_BOTTOM_RIGHT] = spu_mul(spu_add(spu_add(topLeft, dadx), dady), wInv);
+ }
+ break;
+ case INTERP_POS:
+ case INTERP_NONE:
+ break;
+ default:
+ ASSERT(0);
}
}
+/**
+ * As above, but return 4 vectors in SOA format.
+ * XXX this will all be re-written someday.
+ */
+static INLINE void
+eval_coeff_soa(uint slot, float x, float y, vector float w, vector float result[4])
+{
+ eval_coeff(slot, x, y, w, result);
+ _transpose_matrix4x4(result, result);
+}
+
+
+/** Evalute coefficients to get Z for four pixels in a quad */
static INLINE vector float
eval_z(float x, float y)
{
const uint slot = 0;
- const float dzdx = setup.coef[slot].dadx.f[2];
- const float dzdy = setup.coef[slot].dady.f[2];
- const float topLeft = setup.coef[slot].a0.f[2] + x * dzdx + y * dzdy;
+ const float dzdx = spu_extract(setup.coef[slot].dadx, 2);
+ const float dzdy = spu_extract(setup.coef[slot].dady, 2);
+ const float topLeft = spu_extract(setup.coef[slot].a0, 2) + x * dzdx + y * dzdy;
const vector float topLeftv = spu_splats(topLeft);
const vector float derivs = (vector float) { 0.0, dzdx, dzdy, dzdx + dzdy };
return spu_add(topLeftv, derivs);
}
+/** Evalute coefficients to get W for four pixels in a quad */
+static INLINE vector float
+eval_w(float x, float y)
+{
+ const uint slot = 0;
+ const float dwdx = spu_extract(setup.coef[slot].dadx, 3);
+ const float dwdy = spu_extract(setup.coef[slot].dady, 3);
+ const float topLeft = spu_extract(setup.coef[slot].a0, 3) + x * dwdx + y * dwdy;
+ const vector float topLeftv = spu_splats(topLeft);
+ const vector float derivs = (vector float) { 0.0, dwdx, dwdy, dwdx + dwdy };
+ return spu_add(topLeftv, derivs);
+}
+
+
/**
* Emit a quad (pass to next stage). No clipping is done.
* Note: about 1/5 to 1/7 of the time, mask is zero and this function
@@ -261,120 +237,59 @@ eval_z(float x, float y)
* overall.
*/
static INLINE void
-emit_quad( int x, int y, mask_t mask )
+emit_quad( int x, int y, mask_t mask)
{
/* If any bits in mask are set... */
if (spu_extract(spu_orx(mask), 0)) {
const int ix = x - setup.cliprect_minx;
const int iy = y - setup.cliprect_miny;
- vector float colors[4];
spu.cur_ctile_status = TILE_STATUS_DIRTY;
spu.cur_ztile_status = TILE_STATUS_DIRTY;
- if (spu.texture[0].start) {
- /* texture mapping */
- const uint unit = 0;
- vector float texcoords[4];
- eval_coeff(2, (float) x, (float) y, texcoords);
-
- if (spu_extract(mask, 0))
- colors[0] = spu.sample_texture[unit](unit, texcoords[0]);
- if (spu_extract(mask, 1))
- colors[1] = spu.sample_texture[unit](unit, texcoords[1]);
- if (spu_extract(mask, 2))
- colors[2] = spu.sample_texture[unit](unit, texcoords[2]);
- if (spu_extract(mask, 3))
- colors[3] = spu.sample_texture[unit](unit, texcoords[3]);
-
-
- if (spu.texture[1].start) {
- /* multi-texture mapping */
- const uint unit = 1;
- vector float colors1[4];
-
- eval_coeff(2, (float) x, (float) y, texcoords);
-
- if (spu_extract(mask, 0))
- colors1[0] = spu.sample_texture[unit](unit, texcoords[0]);
- if (spu_extract(mask, 1))
- colors1[1] = spu.sample_texture[unit](unit, texcoords[1]);
- if (spu_extract(mask, 2))
- colors1[2] = spu.sample_texture[unit](unit, texcoords[2]);
- if (spu_extract(mask, 3))
- colors1[3] = spu.sample_texture[unit](unit, texcoords[3]);
-
- /* hack: modulate first texture by second */
- colors[0] = spu_mul(colors[0], colors1[0]);
- colors[1] = spu_mul(colors[1], colors1[1]);
- colors[2] = spu_mul(colors[2], colors1[2]);
- colors[3] = spu_mul(colors[3], colors1[3]);
- }
+ {
+ /*
+ * Run fragment shader, execute per-fragment ops, update fb/tile.
+ */
+ vector float inputs[4*4], outputs[2*4];
+ vector float fragZ = eval_z((float) x, (float) y);
+ vector float fragW = eval_w((float) x, (float) y);
+ vector unsigned int kill_mask;
- }
- else {
- /* simple shading */
+ /* setup inputs */
#if 0
- eval_coeff(1, (float) x, (float) y, colors);
-
+ eval_coeff_soa(1, (float) x, (float) y, fragW, inputs);
#else
- /* XXX new fragment program code */
-
- if (spu.fragment_program) {
- vector float inputs[4*4], outputs[2*4];
-
- /* setup inputs */
- eval_coeff(1, (float) x, (float) y, inputs);
-
- /* Execute the current fragment program */
- spu.fragment_program(inputs, outputs, spu.constants);
-
- /* Copy outputs */
- colors[0] = outputs[0*4+0];
- colors[1] = outputs[0*4+1];
- colors[2] = outputs[0*4+2];
- colors[3] = outputs[0*4+3];
-
- if (0 && spu.init.id==0 && y == 48) {
- printf("colors[0] = %f %f %f %f\n",
- spu_extract(colors[0], 0),
- spu_extract(colors[0], 1),
- spu_extract(colors[0], 2),
- spu_extract(colors[0], 3));
- printf("colors[1] = %f %f %f %f\n",
- spu_extract(colors[1], 0),
- spu_extract(colors[1], 1),
- spu_extract(colors[1], 2),
- spu_extract(colors[1], 3));
- }
-
+ uint i;
+ for (i = 0; i < spu.vertex_info.num_attribs; i++) {
+ eval_coeff_soa(i+1, (float) x, (float) y, fragW, inputs + i * 4);
}
#endif
- }
-
-
- {
- /* Convert fragment data from AoS to SoA format.
- * I.e. (RGBA,RGBA,RGBA,RGBA) -> (RRRR,GGGG,BBBB,AAAA)
- * This is temporary!
+ ASSERT(spu.fragment_program);
+ ASSERT(spu.fragment_ops);
+
+ /* Execute the current fragment program */
+ kill_mask = spu.fragment_program(inputs, outputs, spu.constants);
+
+ mask = spu_andc(mask, kill_mask);
+
+ /* Execute per-fragment/quad operations, including:
+ * alpha test, z test, stencil test, blend and framebuffer writing.
+ * Note that there are two different fragment operations functions
+ * that can be called, one for front-facing fragments, and one
+ * for back-facing fragments. (Often the two are the same;
+ * but in some cases, like two-sided stenciling, they can be
+ * very different.) So choose the correct function depending
+ * on the calculated facing.
*/
- vector float soa_frag[4];
- _transpose_matrix4x4(soa_frag, colors);
-
- float4 fragZ;
-
- fragZ.v = eval_z((float) x, (float) y);
-
- /* Do all per-fragment/quad operations here, including:
- * alpha test, z test, stencil test, blend and framebuffer writing.
- */
- spu.fragment_ops(ix, iy, &spu.ctile, &spu.ztile,
- fragZ.v,
- soa_frag[0], soa_frag[1],
- soa_frag[2], soa_frag[3],
+ spu.fragment_ops[setup.facing](ix, iy, &spu.ctile, &spu.ztile,
+ fragZ,
+ outputs[0*4+0],
+ outputs[0*4+1],
+ outputs[0*4+2],
+ outputs[0*4+3],
mask);
}
-
}
}
@@ -383,7 +298,8 @@ emit_quad( int x, int y, mask_t mask )
* Given an X or Y coordinate, return the block/quad coordinate that it
* belongs to.
*/
-static INLINE int block( int x )
+static INLINE int
+block(int x)
{
return x & ~1;
}
@@ -394,7 +310,8 @@ static INLINE int block( int x )
* the triangle's bounds.
* The mask is a uint4 vector and each element will be 0 or 0xffffffff.
*/
-static INLINE mask_t calculate_mask( int x )
+static INLINE mask_t
+calculate_mask(int x)
{
/* This is a little tricky.
* Use & instead of && to avoid branches.
@@ -412,7 +329,8 @@ static INLINE mask_t calculate_mask( int x )
/**
* Render a horizontal span of quads
*/
-static void flush_spans( void )
+static void
+flush_spans(void)
{
int minleft, maxright;
int x;
@@ -440,7 +358,6 @@ static void flush_spans( void )
return;
}
-
/* OK, we're very likely to need the tile data now.
* clear or finish waiting if needed.
*/
@@ -457,7 +374,7 @@ static void flush_spans( void )
}
ASSERT(spu.cur_ctile_status != TILE_STATUS_DEFINED);
- if (spu.read_depth) {
+ if (spu.read_depth_stencil) {
if (spu.cur_ztile_status == TILE_STATUS_GETTING) {
/* wait for mfc_get() to complete */
//printf("SPU: %u: waiting for ztile\n", spu.init.id);
@@ -476,9 +393,7 @@ static void flush_spans( void )
* calculate_mask() could be simplified a bit...
*/
for (x = block(minleft); x <= block(maxright); x += 2) {
-#if 1
- emit_quad( x, setup.span.y, calculate_mask( x ) );
-#endif
+ emit_quad( x, setup.span.y, calculate_mask( x ));
}
setup.span.y = 0;
@@ -487,33 +402,46 @@ static void flush_spans( void )
setup.span.right[1] = 0;
}
+
#if DEBUG_VERTS
-static void print_vertex(const struct vertex_header *v)
+static void
+print_vertex(const struct vertex_header *v)
{
- int i;
- fprintf(stderr, "Vertex: (%p)\n", v);
- for (i = 0; i < setup.quad.nr_attrs; i++) {
- fprintf(stderr, " %d: %f %f %f %f\n", i,
- v->data[i][0], v->data[i][1], v->data[i][2], v->data[i][3]);
+ uint i;
+ fprintf(stderr, " Vertex: (%p)\n", v);
+ for (i = 0; i < spu.vertex_info.num_attribs; i++) {
+ fprintf(stderr, " %d: %f %f %f %f\n", i,
+ spu_extract(v->data[i], 0),
+ spu_extract(v->data[i], 1),
+ spu_extract(v->data[i], 2),
+ spu_extract(v->data[i], 3));
}
}
#endif
-static boolean setup_sort_vertices(const struct vertex_header *v0,
- const struct vertex_header *v1,
- const struct vertex_header *v2)
+/**
+ * Sort vertices from top to bottom.
+ * Compute area and determine front vs. back facing.
+ * Do coarse clip test against tile bounds
+ * \return FALSE if tri is totally outside tile, TRUE otherwise
+ */
+static boolean
+setup_sort_vertices(const struct vertex_header *v0,
+ const struct vertex_header *v1,
+ const struct vertex_header *v2)
{
+ float area, sign;
#if DEBUG_VERTS
- fprintf(stderr, "Triangle:\n");
- print_vertex(v0);
- print_vertex(v1);
- print_vertex(v2);
+ if (spu.init.id==0) {
+ fprintf(stderr, "SPU %u: Triangle:\n", spu.init.id);
+ print_vertex(v0);
+ print_vertex(v1);
+ print_vertex(v2);
+ }
#endif
- setup.vprovoke = v2;
-
/* determine bottom to top order of vertices */
{
float y0 = spu_extract(v0->data[0], 1);
@@ -525,18 +453,21 @@ static boolean setup_sort_vertices(const struct vertex_header *v0,
setup.vmin = v0;
setup.vmid = v1;
setup.vmax = v2;
+ sign = -1.0f;
}
else if (y2 <= y0) {
/* y2<=y0<=y1 */
setup.vmin = v2;
setup.vmid = v0;
setup.vmax = v1;
+ sign = -1.0f;
}
else {
/* y0<=y2<=y1 */
setup.vmin = v0;
setup.vmid = v2;
setup.vmax = v1;
+ sign = 1.0f;
}
}
else {
@@ -545,18 +476,21 @@ static boolean setup_sort_vertices(const struct vertex_header *v0,
setup.vmin = v1;
setup.vmid = v0;
setup.vmax = v2;
+ sign = 1.0f;
}
else if (y2 <= y1) {
/* y2<=y1<=y0 */
setup.vmin = v2;
setup.vmid = v1;
setup.vmax = v0;
+ sign = 1.0f;
}
else {
/* y1<=y2<=y0 */
setup.vmin = v1;
setup.vmid = v2;
setup.vmax = v0;
+ sign = -1.0f;
}
}
}
@@ -585,31 +519,23 @@ static boolean setup_sort_vertices(const struct vertex_header *v0,
/*
* Compute triangle's area. Use 1/area to compute partial
* derivatives of attributes later.
- *
- * The area will be the same as prim->det, but the sign may be
- * different depending on how the vertices get sorted above.
- *
- * To determine whether the primitive is front or back facing we
- * use the prim->det value because its sign is correct.
*/
- {
- const float area = (setup.emaj.dx * setup.ebot.dy -
- setup.ebot.dx * setup.emaj.dy);
-
- setup.oneoverarea = 1.0f / area;
- /*
- _mesa_printf("%s one-over-area %f area %f det %f\n",
- __FUNCTION__, setup.oneoverarea, area, prim->det );
- */
- }
+ area = setup.emaj.dx * setup.ebot.dy - setup.ebot.dx * setup.emaj.dy;
-#if 0
- /* We need to know if this is a front or back-facing triangle for:
- * - the GLSL gl_FrontFacing fragment attribute (bool)
- * - two-sided stencil test
+ setup.oneOverArea = 1.0f / area;
+
+ /* The product of area * sign indicates front/back orientation (0/1).
+ * Just in case someone gets the bright idea of switching the front
+ * and back constants without noticing that we're assuming their
+ * values in this operation, also assert that the values are
+ * what we think they are.
*/
- setup.quad.facing = (prim->det > 0.0) ^ (setup.softpipe->rasterizer->front_winding == PIPE_WINDING_CW);
-#endif
+ ASSERT(CELL_FACING_FRONT == 0);
+ ASSERT(CELL_FACING_BACK == 1);
+ setup.facing = (area * sign > 0.0f)
+ ^ (spu.rasterizer.front_winding == PIPE_WINDING_CW);
+
+ setup.vprovoke = v2;
return TRUE;
}
@@ -622,63 +548,11 @@ static boolean setup_sort_vertices(const struct vertex_header *v0,
* \param slot which attribute slot
*/
static INLINE void
-const_coeff(uint slot)
+const_coeff4(uint slot)
{
- setup.coef[slot].dadx.v = (vector float) {0.0, 0.0, 0.0, 0.0};
- setup.coef[slot].dady.v = (vector float) {0.0, 0.0, 0.0, 0.0};
- setup.coef[slot].a0.v = setup.vprovoke->data[slot];
-}
-
-
-/**
- * Compute a0, dadx and dady for a linearly interpolated coefficient,
- * for a triangle.
- */
-static INLINE void
-tri_linear_coeff(uint slot, uint firstComp, uint lastComp)
-{
- uint i;
- const float *vmin_d = (float *) &setup.vmin->data[slot];
- const float *vmid_d = (float *) &setup.vmid->data[slot];
- const float *vmax_d = (float *) &setup.vmax->data[slot];
- const float x = spu_extract(setup.vmin->data[0], 0) - 0.5f;
- const float y = spu_extract(setup.vmin->data[0], 1) - 0.5f;
-
- for (i = firstComp; i < lastComp; i++) {
- float botda = vmid_d[i] - vmin_d[i];
- float majda = vmax_d[i] - vmin_d[i];
- float a = setup.ebot.dy * majda - botda * setup.emaj.dy;
- float b = setup.emaj.dx * botda - majda * setup.ebot.dx;
-
- ASSERT(slot < PIPE_MAX_SHADER_INPUTS);
-
- setup.coef[slot].dadx.f[i] = a * setup.oneoverarea;
- setup.coef[slot].dady.f[i] = b * setup.oneoverarea;
-
- /* calculate a0 as the value which would be sampled for the
- * fragment at (0,0), taking into account that we want to sample at
- * pixel centers, in other words (0.5, 0.5).
- *
- * this is neat but unfortunately not a good way to do things for
- * triangles with very large values of dadx or dady as it will
- * result in the subtraction and re-addition from a0 of a very
- * large number, which means we'll end up loosing a lot of the
- * fractional bits and precision from a0. the way to fix this is
- * to define a0 as the sample at a pixel center somewhere near vmin
- * instead - i'll switch to this later.
- */
- setup.coef[slot].a0.f[i] = (vmin_d[i] -
- (setup.coef[slot].dadx.f[i] * x +
- setup.coef[slot].dady.f[i] * y));
- }
-
- /*
- _mesa_printf("attr[%d].%c: %f dx:%f dy:%f\n",
- slot, "xyzw"[i],
- setup.coef[slot].a0[i],
- setup.coef[slot].dadx.f[i],
- setup.coef[slot].dady.f[i]);
- */
+ setup.coef[slot].dadx = (vector float) {0.0, 0.0, 0.0, 0.0};
+ setup.coef[slot].dady = (vector float) {0.0, 0.0, 0.0, 0.0};
+ setup.coef[slot].a0 = setup.vprovoke->data[slot];
}
@@ -702,18 +576,16 @@ tri_linear_coeff4(uint slot)
vector float b = spu_sub(spu_mul(spu_splats(setup.emaj.dx), botda),
spu_mul(majda, spu_splats(setup.ebot.dx)));
- setup.coef[slot].dadx.v = spu_mul(a, spu_splats(setup.oneoverarea));
- setup.coef[slot].dady.v = spu_mul(b, spu_splats(setup.oneoverarea));
+ setup.coef[slot].dadx = spu_mul(a, spu_splats(setup.oneOverArea));
+ setup.coef[slot].dady = spu_mul(b, spu_splats(setup.oneOverArea));
- vector float tempx = spu_mul(setup.coef[slot].dadx.v, xxxx);
- vector float tempy = spu_mul(setup.coef[slot].dady.v, yyyy);
+ vector float tempx = spu_mul(setup.coef[slot].dadx, xxxx);
+ vector float tempy = spu_mul(setup.coef[slot].dady, yyyy);
- setup.coef[slot].a0.v = spu_sub(vmin_d, spu_add(tempx, tempy));
+ setup.coef[slot].a0 = spu_sub(vmin_d, spu_add(tempx, tempy));
}
-
-#if 0
/**
* Compute a0, dadx and dady for a perspective-corrected interpolant,
* for a triangle.
@@ -722,82 +594,76 @@ tri_linear_coeff4(uint slot)
* Later, when we compute the value at a particular fragment position we'll
* divide the interpolated value by the interpolated W at that fragment.
*/
-static void tri_persp_coeff( unsigned slot,
- unsigned i )
+static void
+tri_persp_coeff4(uint slot)
{
- /* premultiply by 1/w:
- */
- float mina = setup.vmin->data[slot][i] * setup.vmin->data[0][3];
- float mida = setup.vmid->data[slot][i] * setup.vmid->data[0][3];
- float maxa = setup.vmax->data[slot][i] * setup.vmax->data[0][3];
-
- float botda = mida - mina;
- float majda = maxa - mina;
- float a = setup.ebot.dy * majda - botda * setup.emaj.dy;
- float b = setup.emaj.dx * botda - majda * setup.ebot.dx;
-
- /*
- printf("tri persp %d,%d: %f %f %f\n", slot, i,
- setup.vmin->data[slot][i],
- setup.vmid->data[slot][i],
- setup.vmax->data[slot][i]
- );
- */
+ const vector float xxxx = spu_splats(spu_extract(setup.vmin->data[0], 0) - 0.5f);
+ const vector float yyyy = spu_splats(spu_extract(setup.vmin->data[0], 1) - 0.5f);
+
+ const vector float vmin_w = spu_splats(spu_extract(setup.vmin->data[0], 3));
+ const vector float vmid_w = spu_splats(spu_extract(setup.vmid->data[0], 3));
+ const vector float vmax_w = spu_splats(spu_extract(setup.vmax->data[0], 3));
+
+ vector float vmin_d = setup.vmin->data[slot];
+ vector float vmid_d = setup.vmid->data[slot];
+ vector float vmax_d = setup.vmax->data[slot];
+
+ vmin_d = spu_mul(vmin_d, vmin_w);
+ vmid_d = spu_mul(vmid_d, vmid_w);
+ vmax_d = spu_mul(vmax_d, vmax_w);
+
+ vector float botda = vmid_d - vmin_d;
+ vector float majda = vmax_d - vmin_d;
- assert(slot < PIPE_MAX_SHADER_INPUTS);
- assert(i <= 3);
+ vector float a = spu_sub(spu_mul(spu_splats(setup.ebot.dy), majda),
+ spu_mul(botda, spu_splats(setup.emaj.dy)));
+ vector float b = spu_sub(spu_mul(spu_splats(setup.emaj.dx), botda),
+ spu_mul(majda, spu_splats(setup.ebot.dx)));
- setup.coef[slot].dadx.f[i] = a * setup.oneoverarea;
- setup.coef[slot].dady.f[i] = b * setup.oneoverarea;
- setup.coef[slot].a0.f[i] = (mina -
- (setup.coef[slot].dadx.f[i] * (setup.vmin->data[0][0] - 0.5f) +
- setup.coef[slot].dady.f[i] * (setup.vmin->data[0][1] - 0.5f)));
+ setup.coef[slot].dadx = spu_mul(a, spu_splats(setup.oneOverArea));
+ setup.coef[slot].dady = spu_mul(b, spu_splats(setup.oneOverArea));
+
+ vector float tempx = spu_mul(setup.coef[slot].dadx, xxxx);
+ vector float tempy = spu_mul(setup.coef[slot].dady, yyyy);
+
+ setup.coef[slot].a0 = spu_sub(vmin_d, spu_add(tempx, tempy));
}
-#endif
+
/**
* Compute the setup.coef[] array dadx, dady, a0 values.
* Must be called after setup.vmin,vmid,vmax,vprovoke are initialized.
*/
-static void setup_tri_coefficients(void)
+static void
+setup_tri_coefficients(void)
{
-#if 1
uint i;
for (i = 0; i < spu.vertex_info.num_attribs; i++) {
- switch (spu.vertex_info.interp_mode[i]) {
+ switch (spu.vertex_info.attrib[i].interp_mode) {
case INTERP_NONE:
break;
- case INTERP_POS:
- /*tri_linear_coeff(i, 2, 3);*/
- /* XXX interp W if PERSPECTIVE... */
- tri_linear_coeff4(i);
- break;
case INTERP_CONSTANT:
- const_coeff(i);
+ const_coeff4(i);
break;
+ case INTERP_POS:
+ /* fall-through */
case INTERP_LINEAR:
tri_linear_coeff4(i);
break;
case INTERP_PERSPECTIVE:
- tri_linear_coeff4(i); /* temporary */
+ tri_persp_coeff4(i);
break;
default:
ASSERT(0);
}
}
-#else
- ASSERT(spu.vertex_info.interp_mode[0] == INTERP_POS);
- ASSERT(spu.vertex_info.interp_mode[1] == INTERP_LINEAR ||
- spu.vertex_info.interp_mode[1] == INTERP_CONSTANT);
- tri_linear_coeff(0, 2, 3); /* slot 0, z */
- tri_linear_coeff(1, 0, 4); /* slot 1, color */
-#endif
}
-static void setup_tri_edges(void)
+static void
+setup_tri_edges(void)
{
float vmin_x = spu_extract(setup.vmin->data[0], 0) + 0.5f;
float vmid_x = spu_extract(setup.vmid->data[0], 0) + 0.5f;
@@ -827,9 +693,8 @@ static void setup_tri_edges(void)
* Render the upper or lower half of a triangle.
* Scissoring/cliprect is applied here too.
*/
-static void subtriangle( struct edge *eleft,
- struct edge *eright,
- unsigned lines )
+static void
+subtriangle(struct edge *eleft, struct edge *eright, unsigned lines)
{
const int minx = setup.cliprect_minx;
const int maxx = setup.cliprect_maxx;
@@ -902,7 +767,8 @@ static void subtriangle( struct edge *eleft,
* The tile data should have already been fetched.
*/
boolean
-tri_draw(const float *v0, const float *v1, const float *v2, uint tx, uint ty)
+tri_draw(const float *v0, const float *v1, const float *v2,
+ uint tx, uint ty)
{
setup.tx = tx;
setup.ty = ty;
@@ -926,19 +792,14 @@ tri_draw(const float *v0, const float *v1, const float *v2, uint tx, uint ty)
setup.span.y_flags = 0;
setup.span.right[0] = 0;
setup.span.right[1] = 0;
- /* setup.span.z_mode = tri_z_mode( setup.ctx ); */
- /* init_constant_attribs( setup ); */
-
- if (setup.oneoverarea < 0.0) {
- /* emaj on left:
- */
+ if (setup.oneOverArea < 0.0) {
+ /* emaj on left */
subtriangle( &setup.emaj, &setup.ebot, setup.ebot.lines );
subtriangle( &setup.emaj, &setup.etop, setup.etop.lines );
}
else {
- /* emaj on right:
- */
+ /* emaj on right */
subtriangle( &setup.ebot, &setup.emaj, setup.ebot.lines );
subtriangle( &setup.etop, &setup.emaj, setup.etop.lines );
}
diff --git a/src/gallium/drivers/i915simple/i915_debug.c b/src/gallium/drivers/i915simple/i915_debug.c
index 5e26d1b9057..7a4e7051d20 100644
--- a/src/gallium/drivers/i915simple/i915_debug.c
+++ b/src/gallium/drivers/i915simple/i915_debug.c
@@ -210,6 +210,7 @@ BITS(
PRINTF(stream, ": 0x%x\n", ((dw) & himask) >> (lo));
}
+#ifdef DEBUG
#define MBZ( dw, hi, lo) do { \
unsigned x = (dw) >> (lo); \
unsigned lomask = (1 << (lo)) - 1; \
@@ -217,6 +218,10 @@ BITS(
himask = (1UL << (hi)) - 1; \
assert ((x & himask & ~lomask) == 0); \
} while (0)
+#else
+#define MBZ( dw, hi, lo) do { \
+} while (0)
+#endif
static void
FLAG(
diff --git a/src/gallium/drivers/i915simple/i915_fpc_translate.c b/src/gallium/drivers/i915simple/i915_fpc_translate.c
index 34b4a846c11..43d62c51765 100644
--- a/src/gallium/drivers/i915simple/i915_fpc_translate.c
+++ b/src/gallium/drivers/i915simple/i915_fpc_translate.c
@@ -144,7 +144,7 @@ src_vector(struct i915_fp_compile *p,
const struct tgsi_full_src_register *source)
{
uint index = source->SrcRegister.Index;
- uint src, sem_name, sem_ind;
+ uint src = 0, sem_name, sem_ind;
switch (source->SrcRegister.File) {
case TGSI_FILE_TEMPORARY:
diff --git a/src/gallium/drivers/i915simple/i915_texture.c b/src/gallium/drivers/i915simple/i915_texture.c
index bd87217063c..2f5459af67f 100644
--- a/src/gallium/drivers/i915simple/i915_texture.c
+++ b/src/gallium/drivers/i915simple/i915_texture.c
@@ -178,7 +178,9 @@ i915_displaytarget_layout(struct i915_texture *tex)
if (tex->base.width[0] >= 128) {
tex->stride = power_of_two(tex->base.nblocksx[0] * pt->block.size);
tex->total_nblocksy = round_up(tex->base.nblocksy[0], 8);
+#if 0 /* used for tiled display targets */
tex->tiled = 1;
+#endif
} else {
tex->stride = round_up(tex->base.nblocksx[0] * pt->block.size, 64);
tex->total_nblocksy = tex->base.nblocksy[0];
@@ -206,11 +208,10 @@ i945_miptree_layout_2d( struct i915_texture *tex )
unsigned nblocksx = pt->nblocksx[0];
unsigned nblocksy = pt->nblocksy[0];
-#if 0 /* used for tiled display targets */
- if (pt->last_level == 0 && pt->block.size == 4)
+ /* used for tiled display targets */
+ if (0)
if (i915_displaytarget_layout(tex))
return;
-#endif
tex->stride = round_up(pt->nblocksx[0] * pt->block.size, 4);
diff --git a/src/gallium/drivers/nouveau/nouveau_bo.h b/src/gallium/drivers/nouveau/nouveau_bo.h
new file mode 100644
index 00000000000..65b138283c4
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nouveau_bo.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright 2007 Nouveau Project
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __NOUVEAU_BO_H__
+#define __NOUVEAU_BO_H__
+
+/* Relocation/Buffer type flags */
+#define NOUVEAU_BO_VRAM (1 << 0)
+#define NOUVEAU_BO_GART (1 << 1)
+#define NOUVEAU_BO_RD (1 << 2)
+#define NOUVEAU_BO_WR (1 << 3)
+#define NOUVEAU_BO_RDWR (NOUVEAU_BO_RD | NOUVEAU_BO_WR)
+#define NOUVEAU_BO_MAP (1 << 4)
+#define NOUVEAU_BO_PIN (1 << 5)
+#define NOUVEAU_BO_LOW (1 << 6)
+#define NOUVEAU_BO_HIGH (1 << 7)
+#define NOUVEAU_BO_OR (1 << 8)
+#define NOUVEAU_BO_LOCAL (1 << 9)
+#define NOUVEAU_BO_TILED (1 << 10)
+#define NOUVEAU_BO_ZTILE (1 << 11)
+#define NOUVEAU_BO_DUMMY (1 << 31)
+
+struct nouveau_bo {
+ struct nouveau_device *device;
+ uint64_t handle;
+
+ uint64_t size;
+ void *map;
+
+ uint32_t flags;
+ uint64_t offset;
+};
+
+#endif
diff --git a/src/gallium/drivers/nouveau/nouveau_channel.h b/src/gallium/drivers/nouveau/nouveau_channel.h
new file mode 100644
index 00000000000..cd99a676bdc
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nouveau_channel.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright 2007 Nouveau Project
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __NOUVEAU_CHANNEL_H__
+#define __NOUVEAU_CHANNEL_H__
+
+struct nouveau_channel {
+ struct nouveau_device *device;
+ int id;
+
+ struct nouveau_pushbuf *pushbuf;
+
+ struct nouveau_grobj *nullobj;
+ struct nouveau_grobj *vram;
+ struct nouveau_grobj *gart;
+
+ void *user_private;
+ void (*hang_notify)(struct nouveau_channel *);
+};
+
+#endif
diff --git a/src/gallium/drivers/nouveau/nouveau_class.h b/src/gallium/drivers/nouveau/nouveau_class.h
new file mode 100644
index 00000000000..3df3d7b2b83
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nouveau_class.h
@@ -0,0 +1,8006 @@
+/*************************************************************************
+
+ Autogenerated file, do not edit !
+
+**************************************************************************
+
+ Copyright (C) 2006-2008 :
+ Dmitry Baryshkov,
+ Laurent Carlier,
+ Matthieu Castet,
+ Dawid Gajownik,
+ Jeremy Kolb,
+ Stephane Loeuillet,
+ Patrice Mandin,
+ Stephane Marchesin,
+ Serge Martin,
+ Sylvain Munaut,
+ Simon Raffeiner,
+ Ben Skeggs,
+ Erik Waling,
+ koala_br,
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+*************************************************************************/
+
+
+#ifndef NOUVEAU_REG_H
+#define NOUVEAU_REG_H 1
+
+
+#define NV01_ROOT 0x00000001
+
+
+
+#define NV01_CONTEXT_DMA 0x00000002
+
+
+
+#define NV01_DEVICE 0x00000003
+
+
+
+#define NV01_TIMER 0x00000004
+
+#define NV01_TIMER_SYNCHRONIZE 0x00000100
+#define NV01_TIMER_STOP_ALARM 0x00000104
+#define NV01_TIMER_DMA_NOTIFY 0x00000180
+#define NV01_TIMER_TIME(x) (0x00000300+((x)*4))
+#define NV01_TIMER_TIME__SIZE 0x00000002
+#define NV01_TIMER_ALARM_NOTIFY 0x00000308
+
+
+#define NV_IMAGE_STENCIL 0x00000010
+
+#define NV_IMAGE_STENCIL_NOTIFY 0x00000104
+#define NV_IMAGE_STENCIL_DMA_NOTIFY 0x00000180
+#define NV_IMAGE_STENCIL_IMAGE_OUTPUT 0x00000200
+#define NV_IMAGE_STENCIL_IMAGE_INPUT(x) (0x00000204+((x)*4))
+#define NV_IMAGE_STENCIL_IMAGE_INPUT__SIZE 0x00000002
+
+
+#define NV_IMAGE_BLEND_AND 0x00000011
+
+#define NV_IMAGE_BLEND_AND_NOP 0x00000100
+#define NV_IMAGE_BLEND_AND_NOTIFY 0x00000104
+#define NV_IMAGE_BLEND_AND_DMA_NOTIFY 0x00000180
+#define NV_IMAGE_BLEND_AND_IMAGE_OUTPUT 0x00000200
+#define NV_IMAGE_BLEND_AND_BETA_INPUT 0x00000204
+#define NV_IMAGE_BLEND_AND_IMAGE_INPUT 0x00000208
+
+
+#define NV01_CONTEXT_BETA1 0x00000012
+
+#define NV01_CONTEXT_BETA1_NOP 0x00000100
+#define NV01_CONTEXT_BETA1_NOTIFY 0x00000104
+#define NV01_CONTEXT_BETA1_DMA_NOTIFY 0x00000180
+#define NV01_CONTEXT_BETA1_BETA_1D31 0x00000300
+
+
+#define NV_IMAGE_ROP_AND 0x00000013
+
+#define NV_IMAGE_ROP_AND_NOTIFY 0x00000104
+#define NV_IMAGE_ROP_AND_DMA_NOTIFY 0x00000180
+#define NV_IMAGE_ROP_AND_IMAGE_OUTPUT 0x00000200
+#define NV_IMAGE_ROP_AND_ROP_INPUT 0x00000204
+#define NV_IMAGE_ROP_AND_IMAGE_INPUT(x) (0x00000208+((x)*4))
+#define NV_IMAGE_ROP_AND_IMAGE_INPUT__SIZE 0x00000002
+
+
+#define NV_IMAGE_COLOR_KEY 0x00000015
+
+
+
+#define NV01_CONTEXT_COLOR_KEY 0x00000017
+
+#define NV01_CONTEXT_COLOR_KEY_NOP 0x00000100
+#define NV01_CONTEXT_COLOR_KEY_NOTIFY 0x00000104
+#define NV01_CONTEXT_COLOR_KEY_DMA_NOTIFY 0x00000180
+#define NV01_CONTEXT_COLOR_KEY_COLOR_FORMAT 0x00000300
+#define NV01_CONTEXT_COLOR_KEY_COLOR_FORMAT_X16A8Y8 0x00000001
+#define NV01_CONTEXT_COLOR_KEY_COLOR_FORMAT_X24Y8 0x00000002
+#define NV01_CONTEXT_COLOR_KEY_COLOR_FORMAT_X16A1R5G5B5 0x00000003
+#define NV01_CONTEXT_COLOR_KEY_COLOR_FORMAT_X17R5G5B5 0x00000004
+#define NV01_CONTEXT_COLOR_KEY_COLOR_FORMAT_A8R8G8B8 0x00000005
+#define NV01_CONTEXT_COLOR_KEY_COLOR_FORMAT_X8R8G8B8 0x00000006
+#define NV01_CONTEXT_COLOR_KEY_COLOR_FORMAT_A16Y16 0x00000007
+#define NV01_CONTEXT_COLOR_KEY_COLOR_FORMAT_X16Y16 0x00000008
+#define NV01_CONTEXT_COLOR_KEY_COLOR 0x00000304
+
+
+#define NV01_CONTEXT_PATTERN 0x00000018
+
+#define NV01_CONTEXT_PATTERN_NOP 0x00000100
+#define NV01_CONTEXT_PATTERN_NOTIFY 0x00000104
+#define NV01_CONTEXT_PATTERN_DMA_NOTIFY 0x00000180
+#define NV01_CONTEXT_PATTERN_COLOR_FORMAT 0x00000300
+#define NV01_CONTEXT_PATTERN_MONOCHROME_FORMAT 0x00000304
+#define NV01_CONTEXT_PATTERN_SHAPE 0x00000308
+#define NV01_CONTEXT_PATTERN_COLOR(x) (0x00000310+((x)*4))
+#define NV01_CONTEXT_PATTERN_COLOR__SIZE 0x00000002
+#define NV01_CONTEXT_PATTERN_PATTERN(x) (0x00000318+((x)*4))
+#define NV01_CONTEXT_PATTERN_PATTERN__SIZE 0x00000002
+
+
+#define NV01_CONTEXT_CLIP_RECTANGLE 0x00000019
+
+#define NV01_CONTEXT_CLIP_RECTANGLE_NOP 0x00000100
+#define NV01_CONTEXT_CLIP_RECTANGLE_NOTIFY 0x00000104
+#define NV01_CONTEXT_CLIP_RECTANGLE_DMA_NOTIFY 0x00000180
+#define NV01_CONTEXT_CLIP_RECTANGLE_POINT 0x00000300
+#define NV01_CONTEXT_CLIP_RECTANGLE_POINT_X_SHIFT 0
+#define NV01_CONTEXT_CLIP_RECTANGLE_POINT_X_MASK 0x0000ffff
+#define NV01_CONTEXT_CLIP_RECTANGLE_POINT_Y_SHIFT 16
+#define NV01_CONTEXT_CLIP_RECTANGLE_POINT_Y_MASK 0xffff0000
+#define NV01_CONTEXT_CLIP_RECTANGLE_SIZE 0x00000304
+#define NV01_CONTEXT_CLIP_RECTANGLE_SIZE_W_SHIFT 0
+#define NV01_CONTEXT_CLIP_RECTANGLE_SIZE_W_MASK 0x0000ffff
+#define NV01_CONTEXT_CLIP_RECTANGLE_SIZE_H_SHIFT 16
+#define NV01_CONTEXT_CLIP_RECTANGLE_SIZE_H_MASK 0xffff0000
+
+
+#define NV01_RENDER_SOLID_LINE 0x0000001c
+
+#define NV01_RENDER_SOLID_LINE_NOP 0x00000100
+#define NV01_RENDER_SOLID_LINE_NOTIFY 0x00000104
+#define NV01_RENDER_SOLID_LINE_PATCH 0x0000010c
+#define NV01_RENDER_SOLID_LINE_DMA_NOTIFY 0x00000180
+#define NV01_RENDER_SOLID_LINE_CLIP_RECTANGLE 0x00000184
+#define NV01_RENDER_SOLID_LINE_PATTERN 0x00000188
+#define NV01_RENDER_SOLID_LINE_ROP 0x0000018c
+#define NV01_RENDER_SOLID_LINE_BETA1 0x00000190
+#define NV01_RENDER_SOLID_LINE_SURFACE 0x00000194
+#define NV01_RENDER_SOLID_LINE_OPERATION 0x000002fc
+#define NV01_RENDER_SOLID_LINE_OPERATION_SRCCOPY_AND 0x00000000
+#define NV01_RENDER_SOLID_LINE_OPERATION_ROP_AND 0x00000001
+#define NV01_RENDER_SOLID_LINE_OPERATION_BLEND_AND 0x00000002
+#define NV01_RENDER_SOLID_LINE_OPERATION_SRCCOPY 0x00000003
+#define NV01_RENDER_SOLID_LINE_OPERATION_SRCCOPY_PREMULT 0x00000004
+#define NV01_RENDER_SOLID_LINE_OPERATION_BLEND_PREMULT 0x00000005
+#define NV01_RENDER_SOLID_LINE_COLOR_FORMAT 0x00000300
+#define NV01_RENDER_SOLID_LINE_COLOR_FORMAT_X16A8Y8 0x00000001
+#define NV01_RENDER_SOLID_LINE_COLOR_FORMAT_X24Y8 0x00000002
+#define NV01_RENDER_SOLID_LINE_COLOR_FORMAT_X16A1R5G5B5 0x00000003
+#define NV01_RENDER_SOLID_LINE_COLOR_FORMAT_X17R5G5B5 0x00000004
+#define NV01_RENDER_SOLID_LINE_COLOR_FORMAT_A8R8G8B8 0x00000005
+#define NV01_RENDER_SOLID_LINE_COLOR_FORMAT_X8R8G8B8 0x00000006
+#define NV01_RENDER_SOLID_LINE_COLOR_FORMAT_A16Y16 0x00000007
+#define NV01_RENDER_SOLID_LINE_COLOR_FORMAT_X16Y16 0x00000008
+#define NV01_RENDER_SOLID_LINE_COLOR 0x00000304
+#define NV01_RENDER_SOLID_LINE_LINE_POINT0(x) (0x00000400+((x)*8))
+#define NV01_RENDER_SOLID_LINE_LINE_POINT0__SIZE 0x00000010
+#define NV01_RENDER_SOLID_LINE_LINE_POINT0_X_SHIFT 0
+#define NV01_RENDER_SOLID_LINE_LINE_POINT0_X_MASK 0x0000ffff
+#define NV01_RENDER_SOLID_LINE_LINE_POINT0_Y_SHIFT 16
+#define NV01_RENDER_SOLID_LINE_LINE_POINT0_Y_MASK 0xffff0000
+#define NV01_RENDER_SOLID_LINE_LINE_POINT1(x) (0x00000404+((x)*8))
+#define NV01_RENDER_SOLID_LINE_LINE_POINT1__SIZE 0x00000010
+#define NV01_RENDER_SOLID_LINE_LINE_POINT1_X_SHIFT 0
+#define NV01_RENDER_SOLID_LINE_LINE_POINT1_X_MASK 0x0000ffff
+#define NV01_RENDER_SOLID_LINE_LINE_POINT1_Y_SHIFT 16
+#define NV01_RENDER_SOLID_LINE_LINE_POINT1_Y_MASK 0xffff0000
+#define NV01_RENDER_SOLID_LINE_LINE32_POINT0_X(x) (0x00000480+((x)*16))
+#define NV01_RENDER_SOLID_LINE_LINE32_POINT0_X__SIZE 0x00000010
+#define NV01_RENDER_SOLID_LINE_LINE32_POINT0_Y(x) (0x00000484+((x)*16))
+#define NV01_RENDER_SOLID_LINE_LINE32_POINT0_Y__SIZE 0x00000010
+#define NV01_RENDER_SOLID_LINE_LINE32_POINT1_X(x) (0x00000488+((x)*16))
+#define NV01_RENDER_SOLID_LINE_LINE32_POINT1_X__SIZE 0x00000010
+#define NV01_RENDER_SOLID_LINE_LINE32_POINT1_Y(x) (0x0000048c+((x)*16))
+#define NV01_RENDER_SOLID_LINE_LINE32_POINT1_Y__SIZE 0x00000010
+#define NV01_RENDER_SOLID_LINE_POLYLINE(x) (0x00000500+((x)*4))
+#define NV01_RENDER_SOLID_LINE_POLYLINE__SIZE 0x00000020
+#define NV01_RENDER_SOLID_LINE_POLYLINE_X_SHIFT 0
+#define NV01_RENDER_SOLID_LINE_POLYLINE_X_MASK 0x0000ffff
+#define NV01_RENDER_SOLID_LINE_POLYLINE_Y_SHIFT 16
+#define NV01_RENDER_SOLID_LINE_POLYLINE_Y_MASK 0xffff0000
+#define NV01_RENDER_SOLID_LINE_POLYLINE32_POINT_X(x) (0x00000580+((x)*8))
+#define NV01_RENDER_SOLID_LINE_POLYLINE32_POINT_X__SIZE 0x00000010
+#define NV01_RENDER_SOLID_LINE_POLYLINE32_POINT_Y(x) (0x00000584+((x)*8))
+#define NV01_RENDER_SOLID_LINE_POLYLINE32_POINT_Y__SIZE 0x00000010
+#define NV01_RENDER_SOLID_LINE_CPOLYLINE_COLOR(x) (0x00000600+((x)*8))
+#define NV01_RENDER_SOLID_LINE_CPOLYLINE_COLOR__SIZE 0x00000010
+#define NV01_RENDER_SOLID_LINE_CPOLYLINE_POINT(x) (0x00000604+((x)*8))
+#define NV01_RENDER_SOLID_LINE_CPOLYLINE_POINT__SIZE 0x00000010
+#define NV01_RENDER_SOLID_LINE_CPOLYLINE_POINT_X_SHIFT 0
+#define NV01_RENDER_SOLID_LINE_CPOLYLINE_POINT_X_MASK 0x0000ffff
+#define NV01_RENDER_SOLID_LINE_CPOLYLINE_POINT_Y_SHIFT 16
+#define NV01_RENDER_SOLID_LINE_CPOLYLINE_POINT_Y_MASK 0xffff0000
+
+
+#define NV01_RENDER_SOLID_TRIANGLE 0x0000001d
+
+#define NV01_RENDER_SOLID_TRIANGLE_NOP 0x00000100
+#define NV01_RENDER_SOLID_TRIANGLE_NOTIFY 0x00000104
+#define NV01_RENDER_SOLID_TRIANGLE_PATCH 0x0000010c
+#define NV01_RENDER_SOLID_TRIANGLE_DMA_NOTIFY 0x00000180
+#define NV01_RENDER_SOLID_TRIANGLE_CLIP_RECTANGLE 0x00000184
+#define NV01_RENDER_SOLID_TRIANGLE_PATTERN 0x00000188
+#define NV01_RENDER_SOLID_TRIANGLE_ROP 0x0000018c
+#define NV01_RENDER_SOLID_TRIANGLE_BETA1 0x00000190
+#define NV01_RENDER_SOLID_TRIANGLE_SURFACE 0x00000194
+#define NV01_RENDER_SOLID_TRIANGLE_OPERATION 0x000002fc
+#define NV01_RENDER_SOLID_TRIANGLE_OPERATION_SRCCOPY_AND 0x00000000
+#define NV01_RENDER_SOLID_TRIANGLE_OPERATION_ROP_AND 0x00000001
+#define NV01_RENDER_SOLID_TRIANGLE_OPERATION_BLEND_AND 0x00000002
+#define NV01_RENDER_SOLID_TRIANGLE_OPERATION_SRCCOPY 0x00000003
+#define NV01_RENDER_SOLID_TRIANGLE_OPERATION_SRCCOPY_PREMULT 0x00000004
+#define NV01_RENDER_SOLID_TRIANGLE_OPERATION_BLEND_PREMULT 0x00000005
+#define NV01_RENDER_SOLID_TRIANGLE_COLOR_FORMAT 0x00000300
+#define NV01_RENDER_SOLID_TRIANGLE_COLOR 0x00000304
+#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT0 0x00000310
+#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT0_X_SHIFT 0
+#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT0_X_MASK 0x0000ffff
+#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT0_Y_SHIFT 16
+#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT0_Y_MASK 0xffff0000
+#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT1 0x00000314
+#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT1_X_SHIFT 0
+#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT1_X_MASK 0x0000ffff
+#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT1_Y_SHIFT 16
+#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT1_Y_MASK 0xffff0000
+#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT2 0x00000318
+#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT2_X_SHIFT 0
+#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT2_X_MASK 0x0000ffff
+#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT2_Y_SHIFT 16
+#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT2_Y_MASK 0xffff0000
+#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE32_POINT0_X 0x00000320
+#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE32_POINT0_Y 0x00000324
+#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE32_POINT1_X 0x00000328
+#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE32_POINT1_Y 0x0000032c
+#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE32_POINT2_X 0x00000330
+#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE32_POINT2_Y 0x00000334
+#define NV01_RENDER_SOLID_TRIANGLE_TRIMESH(x) (0x00000400+((x)*4))
+#define NV01_RENDER_SOLID_TRIANGLE_TRIMESH__SIZE 0x00000020
+#define NV01_RENDER_SOLID_TRIANGLE_TRIMESH_X_SHIFT 0
+#define NV01_RENDER_SOLID_TRIANGLE_TRIMESH_X_MASK 0x0000ffff
+#define NV01_RENDER_SOLID_TRIANGLE_TRIMESH_Y_SHIFT 16
+#define NV01_RENDER_SOLID_TRIANGLE_TRIMESH_Y_MASK 0xffff0000
+#define NV01_RENDER_SOLID_TRIANGLE_TRIMESH32_POINT_X(x) (0x00000480+((x)*8))
+#define NV01_RENDER_SOLID_TRIANGLE_TRIMESH32_POINT_X__SIZE 0x00000010
+#define NV01_RENDER_SOLID_TRIANGLE_TRIMESH32_POINT_Y(x) (0x00000484+((x)*8))
+#define NV01_RENDER_SOLID_TRIANGLE_TRIMESH32_POINT_Y__SIZE 0x00000010
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_COLOR(x) (0x00000500+((x)*16))
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_COLOR__SIZE 0x00000008
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT0(x) (0x00000504+((x)*16))
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT0__SIZE 0x00000008
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT0_X_SHIFT 0
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT0_X_MASK 0x0000ffff
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT0_Y_SHIFT 16
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT0_Y_MASK 0xffff0000
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT1(x) (0x00000508+((x)*16))
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT1__SIZE 0x00000008
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT1_X_SHIFT 0
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT1_X_MASK 0x0000ffff
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT1_Y_SHIFT 16
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT1_Y_MASK 0xffff0000
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT2(x) (0x0000050c+((x)*16))
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT2__SIZE 0x00000008
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT2_X_SHIFT 0
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT2_X_MASK 0x0000ffff
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT2_Y_SHIFT 16
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT2_Y_MASK 0xffff0000
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIMESH_COLOR(x) (0x00000580+((x)*8))
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIMESH_COLOR__SIZE 0x00000010
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIMESH_POINT(x) (0x00000584+((x)*8))
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIMESH_POINT__SIZE 0x00000010
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIMESH_POINT_X_SHIFT 0
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIMESH_POINT_X_MASK 0x0000ffff
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIMESH_POINT_Y_SHIFT 16
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIMESH_POINT_Y_MASK 0xffff0000
+
+
+#define NV01_RENDER_SOLID_RECTANGLE 0x0000001e
+
+#define NV01_RENDER_SOLID_RECTANGLE_NOP 0x00000100
+#define NV01_RENDER_SOLID_RECTANGLE_NOTIFY 0x00000104
+#define NV01_RENDER_SOLID_RECTANGLE_PATCH 0x0000010c
+#define NV01_RENDER_SOLID_RECTANGLE_DMA_NOTIFY 0x00000180
+#define NV01_RENDER_SOLID_RECTANGLE_CLIP_RECTANGLE 0x00000184
+#define NV01_RENDER_SOLID_RECTANGLE_PATTERN 0x00000188
+#define NV01_RENDER_SOLID_RECTANGLE_ROP 0x0000018c
+#define NV01_RENDER_SOLID_RECTANGLE_BETA1 0x00000190
+#define NV01_RENDER_SOLID_RECTANGLE_SURFACE 0x00000194
+#define NV01_RENDER_SOLID_RECTANGLE_OPERATION 0x000002fc
+#define NV01_RENDER_SOLID_RECTANGLE_OPERATION_SRCCOPY_AND 0x00000000
+#define NV01_RENDER_SOLID_RECTANGLE_OPERATION_ROP_AND 0x00000001
+#define NV01_RENDER_SOLID_RECTANGLE_OPERATION_BLEND_AND 0x00000002
+#define NV01_RENDER_SOLID_RECTANGLE_OPERATION_SRCCOPY 0x00000003
+#define NV01_RENDER_SOLID_RECTANGLE_OPERATION_SRCCOPY_PREMULT 0x00000004
+#define NV01_RENDER_SOLID_RECTANGLE_OPERATION_BLEND_PREMULT 0x00000005
+#define NV01_RENDER_SOLID_RECTANGLE_COLOR_FORMAT 0x00000300
+#define NV01_RENDER_SOLID_RECTANGLE_COLOR 0x00000304
+#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_POINT(x) (0x00000400+((x)*8))
+#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_POINT__SIZE 0x00000010
+#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_POINT_X_SHIFT 0
+#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_POINT_X_MASK 0x0000ffff
+#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_POINT_Y_SHIFT 16
+#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_POINT_Y_MASK 0xffff0000
+#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_SIZE(x) (0x00000404+((x)*8))
+#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_SIZE__SIZE 0x00000010
+#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_SIZE_W_SHIFT 0
+#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_SIZE_W_MASK 0x0000ffff
+#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_SIZE_H_SHIFT 16
+#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_SIZE_H_MASK 0xffff0000
+
+
+#define NV01_IMAGE_BLIT 0x0000001f
+
+#define NV01_IMAGE_BLIT_NOP 0x00000100
+#define NV01_IMAGE_BLIT_NOTIFY 0x00000104
+#define NV01_IMAGE_BLIT_PATCH 0x0000010c
+#define NV01_IMAGE_BLIT_DMA_NOTIFY 0x00000180
+#define NV01_IMAGE_BLIT_COLOR_KEY 0x00000184
+#define NV01_IMAGE_BLIT_CLIP_RECTANGLE 0x00000188
+#define NV01_IMAGE_BLIT_PATTERN 0x0000018c
+#define NV01_IMAGE_BLIT_ROP 0x00000190
+#define NV01_IMAGE_BLIT_BETA1 0x00000194
+#define NV01_IMAGE_BLIT_SURFACE 0x0000019c
+#define NV01_IMAGE_BLIT_OPERATION 0x000002fc
+#define NV01_IMAGE_BLIT_IMAGE_INPUT 0x00000204
+#define NV01_IMAGE_BLIT_POINT_IN 0x00000300
+#define NV01_IMAGE_BLIT_POINT_IN_X_SHIFT 0
+#define NV01_IMAGE_BLIT_POINT_IN_X_MASK 0x0000ffff
+#define NV01_IMAGE_BLIT_POINT_IN_Y_SHIFT 16
+#define NV01_IMAGE_BLIT_POINT_IN_Y_MASK 0xffff0000
+#define NV01_IMAGE_BLIT_POINT_OUT 0x00000304
+#define NV01_IMAGE_BLIT_POINT_OUT_X_SHIFT 0
+#define NV01_IMAGE_BLIT_POINT_OUT_X_MASK 0x0000ffff
+#define NV01_IMAGE_BLIT_POINT_OUT_Y_SHIFT 16
+#define NV01_IMAGE_BLIT_POINT_OUT_Y_MASK 0xffff0000
+#define NV01_IMAGE_BLIT_SIZE 0x00000308
+#define NV01_IMAGE_BLIT_SIZE_W_SHIFT 0
+#define NV01_IMAGE_BLIT_SIZE_W_MASK 0x0000ffff
+#define NV01_IMAGE_BLIT_SIZE_H_SHIFT 16
+#define NV01_IMAGE_BLIT_SIZE_H_MASK 0xffff0000
+
+
+#define NV01_IMAGE_FROM_CPU 0x00000021
+
+#define NV01_IMAGE_FROM_CPU_NOP 0x00000100
+#define NV01_IMAGE_FROM_CPU_NOTIFY 0x00000104
+#define NV01_IMAGE_FROM_CPU_PATCH 0x0000010c
+#define NV01_IMAGE_FROM_CPU_DMA_NOTIFY 0x00000180
+#define NV01_IMAGE_FROM_CPU_COLOR_KEY 0x00000184
+#define NV01_IMAGE_FROM_CPU_CLIP_RECTANGLE 0x00000188
+#define NV01_IMAGE_FROM_CPU_PATTERN 0x0000018c
+#define NV01_IMAGE_FROM_CPU_ROP 0x00000190
+#define NV01_IMAGE_FROM_CPU_BETA1 0x00000194
+#define NV01_IMAGE_FROM_CPU_SURFACE 0x00000198
+#define NV01_IMAGE_FROM_CPU_OPERATION 0x000002fc
+#define NV01_IMAGE_FROM_CPU_OPERATION_SRCCOPY_AND 0x00000000
+#define NV01_IMAGE_FROM_CPU_OPERATION_ROP_AND 0x00000001
+#define NV01_IMAGE_FROM_CPU_OPERATION_BLEND_AND 0x00000002
+#define NV01_IMAGE_FROM_CPU_OPERATION_SRCCOPY 0x00000003
+#define NV01_IMAGE_FROM_CPU_OPERATION_SRCCOPY_PREMULT 0x00000004
+#define NV01_IMAGE_FROM_CPU_OPERATION_BLEND_PREMULT 0x00000005
+#define NV01_IMAGE_FROM_CPU_COLOR_FORMAT 0x00000300
+#define NV01_IMAGE_FROM_CPU_COLOR_FORMAT_Y8 0x00000001
+#define NV01_IMAGE_FROM_CPU_COLOR_FORMAT_A1R5G5B5 0x00000002
+#define NV01_IMAGE_FROM_CPU_COLOR_FORMAT_X1R5G5B5 0x00000003
+#define NV01_IMAGE_FROM_CPU_COLOR_FORMAT_A8R8G8B8 0x00000004
+#define NV01_IMAGE_FROM_CPU_COLOR_FORMAT_X8R8G8B8 0x00000005
+#define NV01_IMAGE_FROM_CPU_POINT 0x00000304
+#define NV01_IMAGE_FROM_CPU_POINT_X_SHIFT 0
+#define NV01_IMAGE_FROM_CPU_POINT_X_MASK 0x0000ffff
+#define NV01_IMAGE_FROM_CPU_POINT_Y_SHIFT 16
+#define NV01_IMAGE_FROM_CPU_POINT_Y_MASK 0xffff0000
+#define NV01_IMAGE_FROM_CPU_SIZE_OUT 0x00000308
+#define NV01_IMAGE_FROM_CPU_SIZE_OUT_W_SHIFT 0
+#define NV01_IMAGE_FROM_CPU_SIZE_OUT_W_MASK 0x0000ffff
+#define NV01_IMAGE_FROM_CPU_SIZE_OUT_H_SHIFT 16
+#define NV01_IMAGE_FROM_CPU_SIZE_OUT_H_MASK 0xffff0000
+#define NV01_IMAGE_FROM_CPU_SIZE_IN 0x0000030c
+#define NV01_IMAGE_FROM_CPU_SIZE_IN_W_SHIFT 0
+#define NV01_IMAGE_FROM_CPU_SIZE_IN_W_MASK 0x0000ffff
+#define NV01_IMAGE_FROM_CPU_SIZE_IN_H_SHIFT 16
+#define NV01_IMAGE_FROM_CPU_SIZE_IN_H_MASK 0xffff0000
+#define NV01_IMAGE_FROM_CPU_COLOR(x) (0x00000400+((x)*4))
+#define NV01_IMAGE_FROM_CPU_COLOR__SIZE 0x00000020
+
+
+#define NV01_NULL 0x00000030
+
+
+
+#define NV03_STRETCHED_IMAGE_FROM_CPU 0x00000036
+
+#define NV03_STRETCHED_IMAGE_FROM_CPU_NOP 0x00000100
+#define NV03_STRETCHED_IMAGE_FROM_CPU_NOTIFY 0x00000104
+#define NV03_STRETCHED_IMAGE_FROM_CPU_PATCH 0x0000010c
+#define NV03_STRETCHED_IMAGE_FROM_CPU_DMA_NOTIFY 0x00000180
+#define NV03_STRETCHED_IMAGE_FROM_CPU_COLOR_KEY 0x00000184
+#define NV03_STRETCHED_IMAGE_FROM_CPU_PATTERN 0x00000188
+#define NV03_STRETCHED_IMAGE_FROM_CPU_ROP 0x0000018c
+#define NV03_STRETCHED_IMAGE_FROM_CPU_BETA1 0x00000190
+#define NV03_STRETCHED_IMAGE_FROM_CPU_SURFACE 0x00000194
+#define NV03_STRETCHED_IMAGE_FROM_CPU_OPERATION 0x000002fc
+#define NV03_STRETCHED_IMAGE_FROM_CPU_COLOR_FORMAT 0x00000300
+#define NV03_STRETCHED_IMAGE_FROM_CPU_SIZE_IN 0x00000304
+#define NV03_STRETCHED_IMAGE_FROM_CPU_SIZE_IN_W_SHIFT 0
+#define NV03_STRETCHED_IMAGE_FROM_CPU_SIZE_IN_W_MASK 0x0000ffff
+#define NV03_STRETCHED_IMAGE_FROM_CPU_SIZE_IN_H_SHIFT 16
+#define NV03_STRETCHED_IMAGE_FROM_CPU_SIZE_IN_H_MASK 0xffff0000
+#define NV03_STRETCHED_IMAGE_FROM_CPU_DX_DU 0x00000308
+#define NV03_STRETCHED_IMAGE_FROM_CPU_DY_DV 0x0000030c
+#define NV03_STRETCHED_IMAGE_FROM_CPU_CLIP_POINT 0x00000310
+#define NV03_STRETCHED_IMAGE_FROM_CPU_CLIP_POINT_X_SHIFT 0
+#define NV03_STRETCHED_IMAGE_FROM_CPU_CLIP_POINT_X_MASK 0x0000ffff
+#define NV03_STRETCHED_IMAGE_FROM_CPU_CLIP_POINT_Y_SHIFT 16
+#define NV03_STRETCHED_IMAGE_FROM_CPU_CLIP_POINT_Y_MASK 0xffff0000
+#define NV03_STRETCHED_IMAGE_FROM_CPU_CLIP_SIZE 0x00000314
+#define NV03_STRETCHED_IMAGE_FROM_CPU_CLIP_SIZE_W_SHIFT 0
+#define NV03_STRETCHED_IMAGE_FROM_CPU_CLIP_SIZE_W_MASK 0x0000ffff
+#define NV03_STRETCHED_IMAGE_FROM_CPU_CLIP_SIZE_H_SHIFT 16
+#define NV03_STRETCHED_IMAGE_FROM_CPU_CLIP_SIZE_H_MASK 0xffff0000
+#define NV03_STRETCHED_IMAGE_FROM_CPU_POINT12D4 0x00000318
+#define NV03_STRETCHED_IMAGE_FROM_CPU_POINT12D4_X_SHIFT 0
+#define NV03_STRETCHED_IMAGE_FROM_CPU_POINT12D4_X_MASK 0x0000ffff
+#define NV03_STRETCHED_IMAGE_FROM_CPU_POINT12D4_Y_SHIFT 16
+#define NV03_STRETCHED_IMAGE_FROM_CPU_POINT12D4_Y_MASK 0xffff0000
+#define NV03_STRETCHED_IMAGE_FROM_CPU_COLOR(x) (0x00000400+((x)*4))
+#define NV03_STRETCHED_IMAGE_FROM_CPU_COLOR__SIZE 0x00000020
+
+
+#define NV03_SCALED_IMAGE_FROM_MEMORY 0x00000037
+
+#define NV03_SCALED_IMAGE_FROM_MEMORY_NOP 0x00000100
+#define NV03_SCALED_IMAGE_FROM_MEMORY_NOTIFY 0x00000104
+#define NV03_SCALED_IMAGE_FROM_MEMORY_DMA_NOTIFY 0x00000180
+#define NV03_SCALED_IMAGE_FROM_MEMORY_DMA_IMAGE 0x00000184
+#define NV03_SCALED_IMAGE_FROM_MEMORY_PATTERN 0x00000188
+#define NV03_SCALED_IMAGE_FROM_MEMORY_ROP 0x0000018c
+#define NV03_SCALED_IMAGE_FROM_MEMORY_BETA1 0x00000190
+#define NV03_SCALED_IMAGE_FROM_MEMORY_SURFACE 0x00000194
+#define NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT 0x00000300
+#define NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_A1R5G5B5 0x00000001
+#define NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_X1R5G5B5 0x00000002
+#define NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_A8R8G8B8 0x00000003
+#define NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_X8R8G8B8 0x00000004
+#define NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_V8YB8U8YA8 0x00000005
+#define NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_YB8V8YA8U8 0x00000006
+#define NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_R5G6B5 0x00000007
+#define NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_Y8 0x00000008
+#define NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_AY8 0x00000009
+#define NV03_SCALED_IMAGE_FROM_MEMORY_OPERATION 0x00000304
+#define NV03_SCALED_IMAGE_FROM_MEMORY_OPERATION_SRCCOPY_AND 0x00000000
+#define NV03_SCALED_IMAGE_FROM_MEMORY_OPERATION_ROP_AND 0x00000001
+#define NV03_SCALED_IMAGE_FROM_MEMORY_OPERATION_BLEND_AND 0x00000002
+#define NV03_SCALED_IMAGE_FROM_MEMORY_OPERATION_SRCCOPY 0x00000003
+#define NV03_SCALED_IMAGE_FROM_MEMORY_OPERATION_SRCCOPY_PREMULT 0x00000004
+#define NV03_SCALED_IMAGE_FROM_MEMORY_OPERATION_BLEND_PREMULT 0x00000005
+#define NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT 0x00000308
+#define NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT_X_SHIFT 0
+#define NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT_X_MASK 0x0000ffff
+#define NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT_Y_SHIFT 16
+#define NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT_Y_MASK 0xffff0000
+#define NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE 0x0000030c
+#define NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE_W_SHIFT 0
+#define NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE_W_MASK 0x0000ffff
+#define NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE_H_SHIFT 16
+#define NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE_H_MASK 0xffff0000
+#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_OUT_POINT 0x00000310
+#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_OUT_POINT_X_SHIFT 0
+#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_OUT_POINT_X_MASK 0x0000ffff
+#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_OUT_POINT_Y_SHIFT 16
+#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_OUT_POINT_Y_MASK 0xffff0000
+#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_OUT_SIZE 0x00000314
+#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_OUT_SIZE_W_SHIFT 0
+#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_OUT_SIZE_W_MASK 0x0000ffff
+#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_OUT_SIZE_H_SHIFT 16
+#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_OUT_SIZE_H_MASK 0xffff0000
+#define NV03_SCALED_IMAGE_FROM_MEMORY_DELTA_DU_DX 0x00000318
+#define NV03_SCALED_IMAGE_FROM_MEMORY_DELTA_DV_DY 0x0000031c
+#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_SIZE 0x00000400
+#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_SIZE_W_SHIFT 0
+#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_SIZE_W_MASK 0x0000ffff
+#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_SIZE_H_SHIFT 16
+#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_SIZE_H_MASK 0xffff0000
+#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_FORMAT 0x00000404
+#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_FORMAT_PITCH_SHIFT 0
+#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_FORMAT_PITCH_MASK 0x0000ffff
+#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_FORMAT_ORIGIN_SHIFT 16
+#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_FORMAT_ORIGIN_MASK 0x00ff0000
+#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_FORMAT_ORIGIN_CENTER 0x00010000
+#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_FORMAT_ORIGIN_CORNER 0x00020000
+#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_FORMAT_INTERPOLATOR_SHIFT 24
+#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_FORMAT_INTERPOLATOR_MASK 0xff000000
+#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_OFFSET 0x00000408
+#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_POINT 0x0000040c
+#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_POINT_U_SHIFT 0
+#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_POINT_U_MASK 0x0000ffff
+#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_POINT_V_SHIFT 16
+#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_POINT_V_MASK 0xffff0000
+
+
+#define NV04_DVD_SUBPICTURE 0x00000038
+
+#define NV04_DVD_SUBPICTURE_NOP 0x00000100
+#define NV04_DVD_SUBPICTURE_NOTIFY 0x00000104
+#define NV04_DVD_SUBPICTURE_WAIT_FOR_IDLE 0x00000108
+#define NV04_DVD_SUBPICTURE_DMA_NOTIFY 0x00000180
+#define NV04_DVD_SUBPICTURE_DMA_OVERLAY 0x00000184
+#define NV04_DVD_SUBPICTURE_DMA_IMAGEIN 0x00000188
+#define NV04_DVD_SUBPICTURE_DMA_IMAGEOUT 0x0000018c
+#define NV04_DVD_SUBPICTURE_IMAGEOUT_POINT 0x00000300
+#define NV04_DVD_SUBPICTURE_IMAGEOUT_POINT_X_SHIFT 0
+#define NV04_DVD_SUBPICTURE_IMAGEOUT_POINT_X_MASK 0x0000ffff
+#define NV04_DVD_SUBPICTURE_IMAGEOUT_POINT_Y_SHIFT 16
+#define NV04_DVD_SUBPICTURE_IMAGEOUT_POINT_Y_MASK 0xffff0000
+#define NV04_DVD_SUBPICTURE_IMAGEOUT_SIZE 0x00000304
+#define NV04_DVD_SUBPICTURE_IMAGEOUT_SIZE_W_SHIFT 0
+#define NV04_DVD_SUBPICTURE_IMAGEOUT_SIZE_W_MASK 0x0000ffff
+#define NV04_DVD_SUBPICTURE_IMAGEOUT_SIZE_H_SHIFT 16
+#define NV04_DVD_SUBPICTURE_IMAGEOUT_SIZE_H_MASK 0xffff0000
+#define NV04_DVD_SUBPICTURE_IMAGEOUT_FORMAT 0x00000308
+#define NV04_DVD_SUBPICTURE_IMAGEOUT_FORMAT_PITCH_SHIFT 0
+#define NV04_DVD_SUBPICTURE_IMAGEOUT_FORMAT_PITCH_MASK 0x0000ffff
+#define NV04_DVD_SUBPICTURE_IMAGEOUT_FORMAT_COLOR_SHIFT 16
+#define NV04_DVD_SUBPICTURE_IMAGEOUT_FORMAT_COLOR_MASK 0xffff0000
+#define NV04_DVD_SUBPICTURE_IMAGEOUT_OFFSET 0x0000030c
+#define NV04_DVD_SUBPICTURE_IMAGEIN_DELTA_DU_DX 0x00000310
+#define NV04_DVD_SUBPICTURE_IMAGEIN_DELTA_DV_DY 0x00000314
+#define NV04_DVD_SUBPICTURE_IMAGEIN_SIZE 0x00000318
+#define NV04_DVD_SUBPICTURE_IMAGEIN_SIZE_W_SHIFT 0
+#define NV04_DVD_SUBPICTURE_IMAGEIN_SIZE_W_MASK 0x0000ffff
+#define NV04_DVD_SUBPICTURE_IMAGEIN_SIZE_H_SHIFT 16
+#define NV04_DVD_SUBPICTURE_IMAGEIN_SIZE_H_MASK 0xffff0000
+#define NV04_DVD_SUBPICTURE_IMAGEIN_FORMAT 0x0000031c
+#define NV04_DVD_SUBPICTURE_IMAGEIN_FORMAT_PITCH_SHIFT 0
+#define NV04_DVD_SUBPICTURE_IMAGEIN_FORMAT_PITCH_MASK 0x0000ffff
+#define NV04_DVD_SUBPICTURE_IMAGEIN_FORMAT_COLOR_SHIFT 16
+#define NV04_DVD_SUBPICTURE_IMAGEIN_FORMAT_COLOR_MASK 0xffff0000
+#define NV04_DVD_SUBPICTURE_IMAGEIN_OFFSET 0x00000320
+#define NV04_DVD_SUBPICTURE_IMAGEIN_POINT 0x00000324
+#define NV04_DVD_SUBPICTURE_IMAGEIN_POINT_U_SHIFT 0
+#define NV04_DVD_SUBPICTURE_IMAGEIN_POINT_U_MASK 0x0000ffff
+#define NV04_DVD_SUBPICTURE_IMAGEIN_POINT_V_SHIFT 16
+#define NV04_DVD_SUBPICTURE_IMAGEIN_POINT_V_MASK 0xffff0000
+#define NV04_DVD_SUBPICTURE_OVERLAY_DELTA_DU_DX 0x00000328
+#define NV04_DVD_SUBPICTURE_OVERLAY_DELTA_DV_DY 0x0000032c
+#define NV04_DVD_SUBPICTURE_OVERLAY_SIZE 0x00000330
+#define NV04_DVD_SUBPICTURE_OVERLAY_SIZE_W_SHIFT 0
+#define NV04_DVD_SUBPICTURE_OVERLAY_SIZE_W_MASK 0x0000ffff
+#define NV04_DVD_SUBPICTURE_OVERLAY_SIZE_H_SHIFT 16
+#define NV04_DVD_SUBPICTURE_OVERLAY_SIZE_H_MASK 0xffff0000
+#define NV04_DVD_SUBPICTURE_OVERLAY_FORMAT 0x00000334
+#define NV04_DVD_SUBPICTURE_OVERLAY_FORMAT_PITCH_SHIFT 0
+#define NV04_DVD_SUBPICTURE_OVERLAY_FORMAT_PITCH_MASK 0x0000ffff
+#define NV04_DVD_SUBPICTURE_OVERLAY_FORMAT_COLOR_SHIFT 16
+#define NV04_DVD_SUBPICTURE_OVERLAY_FORMAT_COLOR_MASK 0xffff0000
+#define NV04_DVD_SUBPICTURE_OVERLAY_OFFSET 0x00000338
+#define NV04_DVD_SUBPICTURE_OVERLAY_POINT 0x0000033c
+#define NV04_DVD_SUBPICTURE_OVERLAY_POINT_U_SHIFT 0
+#define NV04_DVD_SUBPICTURE_OVERLAY_POINT_U_MASK 0x0000ffff
+#define NV04_DVD_SUBPICTURE_OVERLAY_POINT_V_SHIFT 16
+#define NV04_DVD_SUBPICTURE_OVERLAY_POINT_V_MASK 0xffff0000
+
+
+#define NV04_MEMORY_TO_MEMORY_FORMAT 0x00000039
+
+#define NV04_MEMORY_TO_MEMORY_FORMAT_NOP 0x00000100
+#define NV04_MEMORY_TO_MEMORY_FORMAT_NOTIFY 0x00000104
+#define NV04_MEMORY_TO_MEMORY_FORMAT_DMA_NOTIFY 0x00000180
+#define NV04_MEMORY_TO_MEMORY_FORMAT_DMA_BUFFER_IN 0x00000184
+#define NV04_MEMORY_TO_MEMORY_FORMAT_DMA_BUFFER_OUT 0x00000188
+#define NV04_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN 0x0000030c
+#define NV04_MEMORY_TO_MEMORY_FORMAT_OFFSET_OUT 0x00000310
+#define NV04_MEMORY_TO_MEMORY_FORMAT_PITCH_IN 0x00000314
+#define NV04_MEMORY_TO_MEMORY_FORMAT_PITCH_OUT 0x00000318
+#define NV04_MEMORY_TO_MEMORY_FORMAT_LINE_LENGTH_IN 0x0000031c
+#define NV04_MEMORY_TO_MEMORY_FORMAT_LINE_COUNT 0x00000320
+#define NV04_MEMORY_TO_MEMORY_FORMAT_FORMAT 0x00000324
+#define NV04_MEMORY_TO_MEMORY_FORMAT_FORMAT_INPUT_INC_SHIFT 0
+#define NV04_MEMORY_TO_MEMORY_FORMAT_FORMAT_INPUT_INC_MASK 0x0000000f
+#define NV04_MEMORY_TO_MEMORY_FORMAT_FORMAT_OUTPUT_INC_SHIFT 8
+#define NV04_MEMORY_TO_MEMORY_FORMAT_FORMAT_OUTPUT_INC_MASK 0x00000f00
+#define NV04_MEMORY_TO_MEMORY_FORMAT_BUF_NOTIFY 0x00000328
+
+
+#define NV01_MEMORY_LOCAL_BANKED 0x0000003d
+
+
+
+#define NV01_MAPPING_SYSTEM 0x0000003e
+
+
+
+#define NV03_MEMORY_LOCAL_CURSOR 0x0000003f
+
+
+
+#define NV01_MEMORY_LOCAL_LINEAR 0x00000040
+
+
+
+#define NV01_MAPPING_LOCAL 0x00000041
+
+
+
+#define NV04_CONTEXT_SURFACES_2D 0x00000042
+
+#define NV04_CONTEXT_SURFACES_2D_NOP 0x00000100
+#define NV04_CONTEXT_SURFACES_2D_NOTIFY 0x00000104
+#define NV04_CONTEXT_SURFACES_2D_PM_TRIGGER 0x00000140
+#define NV04_CONTEXT_SURFACES_2D_DMA_NOTIFY 0x00000180
+#define NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE 0x00000184
+#define NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_DESTIN 0x00000188
+#define NV04_CONTEXT_SURFACES_2D_FORMAT 0x00000300
+#define NV04_CONTEXT_SURFACES_2D_FORMAT_Y8 0x00000001
+#define NV04_CONTEXT_SURFACES_2D_FORMAT_X1R5G5B5_Z1R5G5B5 0x00000002
+#define NV04_CONTEXT_SURFACES_2D_FORMAT_X1R5G5B5_X1R5G5B5 0x00000003
+#define NV04_CONTEXT_SURFACES_2D_FORMAT_R5G6B5 0x00000004
+#define NV04_CONTEXT_SURFACES_2D_FORMAT_Y16 0x00000005
+#define NV04_CONTEXT_SURFACES_2D_FORMAT_X8R8G8B8_Z8R8G8B8 0x00000006
+#define NV04_CONTEXT_SURFACES_2D_FORMAT_X8R8G8B8_X8R8G8B8 0x00000007
+#define NV04_CONTEXT_SURFACES_2D_FORMAT_X1A7R8G8B8_Z1A7R8G8B8 0x00000008
+#define NV04_CONTEXT_SURFACES_2D_FORMAT_X1A7R8G8B8_X1A7R8G8B8 0x00000009
+#define NV04_CONTEXT_SURFACES_2D_FORMAT_A8R8G8B8 0x0000000a
+#define NV04_CONTEXT_SURFACES_2D_FORMAT_Y32 0x0000000b
+#define NV04_CONTEXT_SURFACES_2D_PITCH 0x00000304
+#define NV04_CONTEXT_SURFACES_2D_PITCH_SOURCE_SHIFT 0
+#define NV04_CONTEXT_SURFACES_2D_PITCH_SOURCE_MASK 0x0000ffff
+#define NV04_CONTEXT_SURFACES_2D_PITCH_DESTIN_SHIFT 16
+#define NV04_CONTEXT_SURFACES_2D_PITCH_DESTIN_MASK 0xffff0000
+#define NV04_CONTEXT_SURFACES_2D_OFFSET_SOURCE 0x00000308
+#define NV04_CONTEXT_SURFACES_2D_OFFSET_DESTIN 0x0000030c
+
+
+#define NV03_CONTEXT_ROP 0x00000043
+
+#define NV03_CONTEXT_ROP_NOP 0x00000100
+#define NV03_CONTEXT_ROP_NOTIFY 0x00000104
+#define NV03_CONTEXT_ROP_DMA_NOTIFY 0x00000180
+#define NV03_CONTEXT_ROP_ROP 0x00000300
+#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_SHIFT 0
+#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_MASK 0x0000000f
+#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_CLEAR 0x00000000
+#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_NOR 0x00000001
+#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_AND_INVERTED 0x00000002
+#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_COPY_INVERTED 0x00000003
+#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_AND_REVERSE 0x00000004
+#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_INVERT 0x00000005
+#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_XOR 0x00000006
+#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_NAND 0x00000007
+#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_AND 0x00000008
+#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_EQUI 0x00000009
+#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_NOOP 0x0000000a
+#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_OR_INVERTED 0x0000000b
+#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_COPY 0x0000000c
+#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_OR_REVERSE 0x0000000d
+#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_OR 0x0000000e
+#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_SET 0x0000000f
+#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_SHIFT 4
+#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_MASK 0x000000f0
+#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_CLEAR 0x00000000
+#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_NOR 0x00000010
+#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_AND_INVERTED 0x00000020
+#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_COPY_INVERTED 0x00000030
+#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_AND_REVERSE 0x00000040
+#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_INVERT 0x00000050
+#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_XOR 0x00000060
+#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_NAND 0x00000070
+#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_AND 0x00000080
+#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_EQUI 0x00000090
+#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_NOOP 0x000000a0
+#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_OR_INVERTED 0x000000b0
+#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_COPY 0x000000c0
+#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_OR_REVERSE 0x000000d0
+#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_OR 0x000000e0
+#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_SET 0x000000f0
+
+
+#define NV04_IMAGE_PATTERN 0x00000044
+
+#define NV04_IMAGE_PATTERN_NOP 0x00000100
+#define NV04_IMAGE_PATTERN_NOTIFY 0x00000104
+#define NV04_IMAGE_PATTERN_DMA_NOTIFY 0x00000180
+#define NV04_IMAGE_PATTERN_COLOR_FORMAT 0x00000300
+#define NV04_IMAGE_PATTERN_COLOR_FORMAT_A16R5G6B5 0x00000001
+#define NV04_IMAGE_PATTERN_COLOR_FORMAT_X16A1R5G5B5 0x00000002
+#define NV04_IMAGE_PATTERN_COLOR_FORMAT_A8R8G8B8 0x00000003
+#define NV04_IMAGE_PATTERN_MONOCHROME_FORMAT 0x00000304
+#define NV04_IMAGE_PATTERN_MONOCHROME_FORMAT_CGA6 0x00000001
+#define NV04_IMAGE_PATTERN_MONOCHROME_FORMAT_LE 0x00000002
+#define NV04_IMAGE_PATTERN_MONOCHROME_SHAPE 0x00000308
+#define NV04_IMAGE_PATTERN_MONOCHROME_SHAPE_8X8 0x00000000
+#define NV04_IMAGE_PATTERN_MONOCHROME_SHAPE_64X1 0x00000001
+#define NV04_IMAGE_PATTERN_MONOCHROME_SHAPE_1X64 0x00000002
+#define NV04_IMAGE_PATTERN_PATTERN_SELECT 0x0000030c
+#define NV04_IMAGE_PATTERN_PATTERN_SELECT_MONO 0x00000001
+#define NV04_IMAGE_PATTERN_PATTERN_SELECT_COLOR 0x00000002
+#define NV04_IMAGE_PATTERN_MONOCHROME_COLOR0 0x00000310
+#define NV04_IMAGE_PATTERN_MONOCHROME_COLOR1 0x00000314
+#define NV04_IMAGE_PATTERN_MONOCHROME_PATTERN0 0x00000318
+#define NV04_IMAGE_PATTERN_MONOCHROME_PATTERN1 0x0000031c
+#define NV04_IMAGE_PATTERN_PATTERN_Y8(x) (0x00000400+((x)*4))
+#define NV04_IMAGE_PATTERN_PATTERN_Y8__SIZE 0x00000010
+#define NV04_IMAGE_PATTERN_PATTERN_Y8_Y0_SHIFT 0
+#define NV04_IMAGE_PATTERN_PATTERN_Y8_Y0_MASK 0x000000ff
+#define NV04_IMAGE_PATTERN_PATTERN_Y8_Y1_SHIFT 8
+#define NV04_IMAGE_PATTERN_PATTERN_Y8_Y1_MASK 0x0000ff00
+#define NV04_IMAGE_PATTERN_PATTERN_Y8_Y2_SHIFT 16
+#define NV04_IMAGE_PATTERN_PATTERN_Y8_Y2_MASK 0x00ff0000
+#define NV04_IMAGE_PATTERN_PATTERN_Y8_Y3_SHIFT 24
+#define NV04_IMAGE_PATTERN_PATTERN_Y8_Y3_MASK 0xff000000
+#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5(x) (0x00000500+((x)*4))
+#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5__SIZE 0x00000020
+#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_B0_SHIFT 0
+#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_B0_MASK 0x0000001f
+#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_G0_SHIFT 5
+#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_G0_MASK 0x000007e0
+#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_R0_SHIFT 11
+#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_R0_MASK 0x0000f800
+#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_B1_SHIFT 16
+#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_B1_MASK 0x001f0000
+#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_G1_SHIFT 21
+#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_G1_MASK 0x07e00000
+#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_R1_SHIFT 27
+#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_R1_MASK 0xf8000000
+#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5(x) (0x00000600+((x)*4))
+#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5__SIZE 0x00000020
+#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_B0_SHIFT 0
+#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_B0_MASK 0x0000001f
+#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_G0_SHIFT 5
+#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_G0_MASK 0x000003e0
+#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_R0_SHIFT 10
+#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_R0_MASK 0x00007c00
+#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_B1_SHIFT 16
+#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_B1_MASK 0x001f0000
+#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_G1_SHIFT 21
+#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_G1_MASK 0x03e00000
+#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_R1_SHIFT 26
+#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_R1_MASK 0x7c000000
+#define NV04_IMAGE_PATTERN_PATTERN_X8R8G8B8(x) (0x00000700+((x)*4))
+#define NV04_IMAGE_PATTERN_PATTERN_X8R8G8B8__SIZE 0x00000040
+#define NV04_IMAGE_PATTERN_PATTERN_X8R8G8B8_B_SHIFT 0
+#define NV04_IMAGE_PATTERN_PATTERN_X8R8G8B8_B_MASK 0x000000ff
+#define NV04_IMAGE_PATTERN_PATTERN_X8R8G8B8_G_SHIFT 8
+#define NV04_IMAGE_PATTERN_PATTERN_X8R8G8B8_G_MASK 0x0000ff00
+#define NV04_IMAGE_PATTERN_PATTERN_X8R8G8B8_R_SHIFT 16
+#define NV04_IMAGE_PATTERN_PATTERN_X8R8G8B8_R_MASK 0x00ff0000
+
+
+#define NV03_VIDEO_LUT_CURSOR_DAC 0x00000046
+
+#define NV03_VIDEO_LUT_CURSOR_DAC_SYNCHRONIZE 0x00000100
+#define NV03_VIDEO_LUT_CURSOR_DAC_STOP_IMAGE 0x00000104
+#define NV03_VIDEO_LUT_CURSOR_DAC_STOP_CURSOR 0x00000108
+#define NV03_VIDEO_LUT_CURSOR_DAC_STOP_DAC 0x0000010c
+#define NV03_VIDEO_LUT_CURSOR_DAC_DMA_NOTIFY 0x00000180
+#define NV03_VIDEO_LUT_CURSOR_DAC_DMA_IMAGE(x) (0x00000184+((x)*4))
+#define NV03_VIDEO_LUT_CURSOR_DAC_DMA_IMAGE__SIZE 0x00000002
+#define NV03_VIDEO_LUT_CURSOR_DAC_DMA_LUT(x) (0x0000018c+((x)*4))
+#define NV03_VIDEO_LUT_CURSOR_DAC_DMA_LUT__SIZE 0x00000002
+#define NV03_VIDEO_LUT_CURSOR_DAC_DMA_CURSOR(x) (0x00000194+((x)*4))
+#define NV03_VIDEO_LUT_CURSOR_DAC_DMA_CURSOR__SIZE 0x00000002
+#define NV03_VIDEO_LUT_CURSOR_DAC_GET 0x000002fc
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_IMAGE_OFFSET(x) (0x00000300+((x)*8))
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_IMAGE_OFFSET__SIZE 0x00000002
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_IMAGE_FORMAT(x) (0x00000304+((x)*8))
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_IMAGE_FORMAT__SIZE 0x00000002
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_IMAGE_FORMAT_PITCH_SHIFT 0
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_IMAGE_FORMAT_PITCH_MASK 0x0000ffff
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_IMAGE_FORMAT_COLOR_SHIFT 16
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_IMAGE_FORMAT_COLOR_MASK 0x0fff0000
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_IMAGE_FORMAT_NOTIFY_SHIFT 28
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_IMAGE_FORMAT_NOTIFY_MASK 0xf0000000
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_CURSOR_OFFSET(x) (0x00000340+((x)*12))
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_CURSOR_OFFSET__SIZE 0x00000002
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_CURSOR_POINT_OUT(x) (0x00000344+((x)*12))
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_CURSOR_POINT_OUT__SIZE 0x00000002
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_CURSOR_POINT_OUT_X_SHIFT 0
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_CURSOR_POINT_OUT_X_MASK 0x0000ffff
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_CURSOR_POINT_OUT_Y_SHIFT 16
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_CURSOR_POINT_OUT_Y_MASK 0xffff0000
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_CURSOR_FORMAT(x) (0x00000348+((x)*12))
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_CURSOR_FORMAT__SIZE 0x00000002
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_CURSOR_POINT_OUT_A 0x00000358
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_CURSOR_POINT_OUT_A_X_SHIFT 0
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_CURSOR_POINT_OUT_A_X_MASK 0x0000ffff
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_CURSOR_POINT_OUT_A_Y_SHIFT 16
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_CURSOR_POINT_OUT_A_Y_MASK 0xffff0000
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_IMAGE_SIZE(x) (0x00000380+((x)*16))
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_IMAGE_SIZE__SIZE 0x00000002
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_IMAGE_SIZE_W_SHIFT 0
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_IMAGE_SIZE_W_MASK 0x0000ffff
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_IMAGE_SIZE_H_SHIFT 16
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_IMAGE_SIZE_H_MASK 0xffff0000
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_HSYNC(x) (0x00000384+((x)*16))
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_HSYNC__SIZE 0x00000002
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_HSYNC_START_SHIFT 0
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_HSYNC_START_MASK 0x0000ffff
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_HSYNC_WIDTH_SHIFT 16
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_HSYNC_WIDTH_MASK 0x0fff0000
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_HSYNC_POLARITY_SHIFT 28
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_HSYNC_POLARITY_MASK 0xf0000000
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_VSYNC(x) (0x00000388+((x)*16))
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_VSYNC__SIZE 0x00000002
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_VSYNC_START_SHIFT 0
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_VSYNC_START_MASK 0x0000ffff
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_VSYNC_WIDTH_SHIFT 16
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_VSYNC_WIDTH_MASK 0x0fff0000
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_VSYNC_POLARITY_SHIFT 28
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_VSYNC_POLARITY_MASK 0xf0000000
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_TOTAL_SIZE(x) (0x0000038c+((x)*16))
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_TOTAL_SIZE__SIZE 0x00000002
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_TOTAL_SIZE_WIDTH_SHIFT 0
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_TOTAL_SIZE_WIDTH_MASK 0x0000ffff
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_TOTAL_SIZE_HEIGHT_SHIFT 16
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_TOTAL_SIZE_HEIGHT_MASK 0x0fff0000
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_TOTAL_SIZE_NOTIFY_SHIFT 28
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_TOTAL_SIZE_NOTIFY_MASK 0xf0000000
+#define NV03_VIDEO_LUT_CURSOR_DAC_SET_PIXEL_CLOCK 0x000003a0
+
+
+#define NV03_DX3_TEXTURED_TRIANGLE 0x00000048
+
+#define NV03_DX3_TEXTURED_TRIANGLE_NOP 0x00000100
+#define NV03_DX3_TEXTURED_TRIANGLE_NOTIFY 0x00000104
+#define NV03_DX3_TEXTURED_TRIANGLE_PATCH 0x0000010c
+#define NV03_DX3_TEXTURED_TRIANGLE_DMA_NOTIFY 0x00000180
+#define NV03_DX3_TEXTURED_TRIANGLE_DMA_TEXTURE 0x00000184
+#define NV03_DX3_TEXTURED_TRIANGLE_CLIP_RECTANGLE 0x00000188
+#define NV03_DX3_TEXTURED_TRIANGLE_SURFACE 0x0000018c
+#define NV03_DX3_TEXTURED_TRIANGLE_TEXTURE_OFFSET 0x00000304
+#define NV03_DX3_TEXTURED_TRIANGLE_TEXTURE_FORMAT 0x00000308
+#define NV03_DX3_TEXTURED_TRIANGLE_TEXTURE_FORMAT_COLOR_KEY_MASK_SHIFT 0
+#define NV03_DX3_TEXTURED_TRIANGLE_TEXTURE_FORMAT_COLOR_KEY_MASK_MASK 0x0000ffff
+#define NV03_DX3_TEXTURED_TRIANGLE_TEXTURE_FORMAT_COLOR_KEY_ENABLE_SHIFT 16
+#define NV03_DX3_TEXTURED_TRIANGLE_TEXTURE_FORMAT_COLOR_KEY_ENABLE_MASK 0x000f0000
+#define NV03_DX3_TEXTURED_TRIANGLE_TEXTURE_FORMAT_COLOR_SHIFT 20
+#define NV03_DX3_TEXTURED_TRIANGLE_TEXTURE_FORMAT_COLOR_MASK 0x00f00000
+#define NV03_DX3_TEXTURED_TRIANGLE_TEXTURE_FORMAT_SIZE_MIN_SHIFT 24
+#define NV03_DX3_TEXTURED_TRIANGLE_TEXTURE_FORMAT_SIZE_MIN_MASK 0x0f000000
+#define NV03_DX3_TEXTURED_TRIANGLE_TEXTURE_FORMAT_SIZE_MAX_SHIFT 28
+#define NV03_DX3_TEXTURED_TRIANGLE_TEXTURE_FORMAT_SIZE_MAX_MASK 0xf0000000
+#define NV03_DX3_TEXTURED_TRIANGLE_FILTER 0x0000030c
+#define NV03_DX3_TEXTURED_TRIANGLE_FILTER_SPREAD_X_SHIFT 0
+#define NV03_DX3_TEXTURED_TRIANGLE_FILTER_SPREAD_X_MASK 0x0000001f
+#define NV03_DX3_TEXTURED_TRIANGLE_FILTER_SPREAD_Y_SHIFT 8
+#define NV03_DX3_TEXTURED_TRIANGLE_FILTER_SPREAD_Y_MASK 0x00001f00
+#define NV03_DX3_TEXTURED_TRIANGLE_FILTER_SIZE_ADJUST_SHIFT 16
+#define NV03_DX3_TEXTURED_TRIANGLE_FILTER_SIZE_ADJUST_MASK 0x00ff0000
+#define NV03_DX3_TEXTURED_TRIANGLE_FOG_COLOR 0x00000310
+#define NV03_DX3_TEXTURED_TRIANGLE_FOG_COLOR_B_SHIFT 0
+#define NV03_DX3_TEXTURED_TRIANGLE_FOG_COLOR_B_MASK 0x000000ff
+#define NV03_DX3_TEXTURED_TRIANGLE_FOG_COLOR_G_SHIFT 8
+#define NV03_DX3_TEXTURED_TRIANGLE_FOG_COLOR_G_MASK 0x0000ff00
+#define NV03_DX3_TEXTURED_TRIANGLE_FOG_COLOR_R_SHIFT 16
+#define NV03_DX3_TEXTURED_TRIANGLE_FOG_COLOR_R_MASK 0x00ff0000
+#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT 0x00000314
+#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_INTERPOLATOR_SHIFT 0
+#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_INTERPOLATOR_MASK 0x0000000f
+#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_WRAP_U_SHIFT 4
+#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_WRAP_U_MASK 0x00000030
+#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_WRAP_V_SHIFT 6
+#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_WRAP_V_MASK 0x000000c0
+#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_SOURCE_COLOR_SHIFT 8
+#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_SOURCE_COLOR_MASK 0x00000f00
+#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_CULLING_SHIFT 12
+#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_CULLING_MASK 0x00007000
+#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_Z_PERSPECTIVE_ENABLE (1 << 15)
+#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_Z_FUNC_SHIFT 16
+#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_Z_FUNC_MASK 0x000f0000
+#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_Z_WRITE_ENABLE_SHIFT 20
+#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_Z_WRITE_ENABLE_MASK 0x00f00000
+#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_COLOR_WRITE_ENABLE_SHIFT 24
+#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_COLOR_WRITE_ENABLE_MASK 0x07000000
+#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_ROP_SHIFT 27
+#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_ROP_MASK 0x18000000
+#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_BETA (1 << 29)
+#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_DST_BLEND (1 << 30)
+#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_SRC_BLEND (1 << 31)
+#define NV03_DX3_TEXTURED_TRIANGLE_ALPHA_CONTROL 0x00000318
+#define NV03_DX3_TEXTURED_TRIANGLE_ALPHA_CONTROL_ALPHA_REF_SHIFT 0
+#define NV03_DX3_TEXTURED_TRIANGLE_ALPHA_CONTROL_ALPHA_REF_MASK 0x000000ff
+#define NV03_DX3_TEXTURED_TRIANGLE_ALPHA_CONTROL_ALPHA_FUNC_SHIFT 8
+#define NV03_DX3_TEXTURED_TRIANGLE_ALPHA_CONTROL_ALPHA_FUNC_MASK 0xffffff00
+#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR(x) (0x00001000+((x)*32))
+#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR__SIZE 0x00000040
+#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR_I0_SHIFT 0
+#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR_I0_MASK 0x0000000f
+#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR_I1_SHIFT 4
+#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR_I1_MASK 0x000000f0
+#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR_I2_SHIFT 8
+#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR_I2_MASK 0x00000f00
+#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR_I3_SHIFT 12
+#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR_I3_MASK 0x0000f000
+#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR_I4_SHIFT 16
+#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR_I4_MASK 0x000f0000
+#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR_I5_SHIFT 20
+#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR_I5_MASK 0x00f00000
+#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR_FOG_SHIFT 24
+#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR_FOG_MASK 0xff000000
+#define NV03_DX3_TEXTURED_TRIANGLE_COLOR(x) (0x00001004+((x)*32))
+#define NV03_DX3_TEXTURED_TRIANGLE_COLOR__SIZE 0x00000040
+#define NV03_DX3_TEXTURED_TRIANGLE_X(x) (0x00001008+((x)*32))
+#define NV03_DX3_TEXTURED_TRIANGLE_X__SIZE 0x00000040
+#define NV03_DX3_TEXTURED_TRIANGLE_Y(x) (0x0000100c+((x)*32))
+#define NV03_DX3_TEXTURED_TRIANGLE_Y__SIZE 0x00000040
+#define NV03_DX3_TEXTURED_TRIANGLE_Z(x) (0x00001010+((x)*32))
+#define NV03_DX3_TEXTURED_TRIANGLE_Z__SIZE 0x00000040
+#define NV03_DX3_TEXTURED_TRIANGLE_M(x) (0x00001014+((x)*32))
+#define NV03_DX3_TEXTURED_TRIANGLE_M__SIZE 0x00000040
+#define NV03_DX3_TEXTURED_TRIANGLE_U(x) (0x00001018+((x)*32))
+#define NV03_DX3_TEXTURED_TRIANGLE_U__SIZE 0x00000040
+#define NV03_DX3_TEXTURED_TRIANGLE_V(x) (0x0000101c+((x)*32))
+#define NV03_DX3_TEXTURED_TRIANGLE_V__SIZE 0x00000040
+
+
+#define NV04_GDI_RECTANGLE_TEXT 0x0000004a
+
+#define NV04_GDI_RECTANGLE_TEXT_NOP 0x00000100
+#define NV04_GDI_RECTANGLE_TEXT_NOTIFY 0x00000104
+#define NV04_GDI_RECTANGLE_TEXT_PATCH 0x0000010c
+#define NV04_GDI_RECTANGLE_TEXT_PM_TRIGGER 0x00000140
+#define NV04_GDI_RECTANGLE_TEXT_DMA_NOTIFY 0x00000180
+#define NV04_GDI_RECTANGLE_TEXT_DMA_FONTS 0x00000184
+#define NV04_GDI_RECTANGLE_TEXT_PATTERN 0x00000188
+#define NV04_GDI_RECTANGLE_TEXT_ROP 0x0000018c
+#define NV04_GDI_RECTANGLE_TEXT_BETA1 0x00000190
+#define NV04_GDI_RECTANGLE_TEXT_BETA4 0x00000194
+#define NV04_GDI_RECTANGLE_TEXT_SURFACE 0x00000198
+#define NV04_GDI_RECTANGLE_TEXT_OPERATION 0x000002fc
+#define NV04_GDI_RECTANGLE_TEXT_OPERATION_SRCCOPY_AND 0x00000000
+#define NV04_GDI_RECTANGLE_TEXT_OPERATION_ROP_AND 0x00000001
+#define NV04_GDI_RECTANGLE_TEXT_OPERATION_BLEND_AND 0x00000002
+#define NV04_GDI_RECTANGLE_TEXT_OPERATION_SRCCOPY 0x00000003
+#define NV04_GDI_RECTANGLE_TEXT_OPERATION_SRCCOPY_PREMULT 0x00000004
+#define NV04_GDI_RECTANGLE_TEXT_OPERATION_BLEND_PREMULT 0x00000005
+#define NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT 0x00000300
+#define NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A16R5G6B5 0x00000001
+#define NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_X16A1R5G5B5 0x00000002
+#define NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A8R8G8B8 0x00000003
+#define NV04_GDI_RECTANGLE_TEXT_MONOCHROME_FORMAT 0x00000304
+#define NV04_GDI_RECTANGLE_TEXT_MONOCHROME_FORMAT_CGA6 0x00000001
+#define NV04_GDI_RECTANGLE_TEXT_MONOCHROME_FORMAT_LE 0x00000002
+#define NV04_GDI_RECTANGLE_TEXT_COLOR1_A 0x000003fc
+#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT(x) (0x00000400+((x)*8))
+#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT__SIZE 0x00000020
+#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT_Y_SHIFT 0
+#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT_Y_MASK 0x0000ffff
+#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT_X_SHIFT 16
+#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT_X_MASK 0xffff0000
+#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_SIZE(x) (0x00000404+((x)*8))
+#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_SIZE__SIZE 0x00000020
+#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_SIZE_H_SHIFT 0
+#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_SIZE_H_MASK 0x0000ffff
+#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_SIZE_W_SHIFT 16
+#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_SIZE_W_MASK 0xffff0000
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_B_POINT0 0x000005f4
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_B_POINT0_L_SHIFT 0
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_B_POINT0_L_MASK 0x0000ffff
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_B_POINT0_T_SHIFT 16
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_B_POINT0_T_MASK 0xffff0000
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_B_POINT1 0x000005f8
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_B_POINT1_R_SHIFT 0
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_B_POINT1_R_MASK 0x0000ffff
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_B_POINT1_B_SHIFT 16
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_B_POINT1_B_MASK 0xffff0000
+#define NV04_GDI_RECTANGLE_TEXT_COLOR1_B 0x000005fc
+#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_0(x) (0x00000600+((x)*8))
+#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_0__SIZE 0x00000020
+#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_0_L_SHIFT 0
+#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_0_L_MASK 0x0000ffff
+#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_0_T_SHIFT 16
+#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_0_T_MASK 0xffff0000
+#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_1(x) (0x00000604+((x)*8))
+#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_1__SIZE 0x00000020
+#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_1_R_SHIFT 0
+#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_1_R_MASK 0x0000ffff
+#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_1_B_SHIFT 16
+#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_1_B_MASK 0xffff0000
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_C_POINT0 0x000007ec
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_C_POINT0_L_SHIFT 0
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_C_POINT0_L_MASK 0x0000ffff
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_C_POINT0_T_SHIFT 16
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_C_POINT0_T_MASK 0xffff0000
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_C_POINT1 0x000007f0
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_C_POINT1_R_SHIFT 0
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_C_POINT1_R_MASK 0x0000ffff
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_C_POINT1_B_SHIFT 16
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_C_POINT1_B_MASK 0xffff0000
+#define NV04_GDI_RECTANGLE_TEXT_COLOR1_C 0x000007f4
+#define NV04_GDI_RECTANGLE_TEXT_SIZE_C 0x000007f8
+#define NV04_GDI_RECTANGLE_TEXT_SIZE_C_W_SHIFT 0
+#define NV04_GDI_RECTANGLE_TEXT_SIZE_C_W_MASK 0x0000ffff
+#define NV04_GDI_RECTANGLE_TEXT_SIZE_C_H_SHIFT 16
+#define NV04_GDI_RECTANGLE_TEXT_SIZE_C_H_MASK 0xffff0000
+#define NV04_GDI_RECTANGLE_TEXT_POINT_C 0x000007fc
+#define NV04_GDI_RECTANGLE_TEXT_POINT_C_X_SHIFT 0
+#define NV04_GDI_RECTANGLE_TEXT_POINT_C_X_MASK 0x0000ffff
+#define NV04_GDI_RECTANGLE_TEXT_POINT_C_Y_SHIFT 16
+#define NV04_GDI_RECTANGLE_TEXT_POINT_C_Y_MASK 0xffff0000
+#define NV04_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR1_C(x) (0x00000800+((x)*4))
+#define NV04_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR1_C__SIZE 0x00000080
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_E_POINT0 0x00000be4
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_E_POINT0_L_SHIFT 0
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_E_POINT0_L_MASK 0x0000ffff
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_E_POINT0_T_SHIFT 16
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_E_POINT0_T_MASK 0xffff0000
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_E_POINT1 0x00000be8
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_E_POINT1_R_SHIFT 0
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_E_POINT1_R_MASK 0x0000ffff
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_E_POINT1_B_SHIFT 16
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_E_POINT1_B_MASK 0xffff0000
+#define NV04_GDI_RECTANGLE_TEXT_COLOR0_E 0x00000bec
+#define NV04_GDI_RECTANGLE_TEXT_COLOR1_E 0x00000bf0
+#define NV04_GDI_RECTANGLE_TEXT_SIZE_IN_E 0x00000bf4
+#define NV04_GDI_RECTANGLE_TEXT_SIZE_IN_E_W_SHIFT 0
+#define NV04_GDI_RECTANGLE_TEXT_SIZE_IN_E_W_MASK 0x0000ffff
+#define NV04_GDI_RECTANGLE_TEXT_SIZE_IN_E_H_SHIFT 16
+#define NV04_GDI_RECTANGLE_TEXT_SIZE_IN_E_H_MASK 0xffff0000
+#define NV04_GDI_RECTANGLE_TEXT_SIZE_OUT_E 0x00000bf8
+#define NV04_GDI_RECTANGLE_TEXT_SIZE_OUT_E_W_SHIFT 0
+#define NV04_GDI_RECTANGLE_TEXT_SIZE_OUT_E_W_MASK 0x0000ffff
+#define NV04_GDI_RECTANGLE_TEXT_SIZE_OUT_E_H_SHIFT 16
+#define NV04_GDI_RECTANGLE_TEXT_SIZE_OUT_E_H_MASK 0xffff0000
+#define NV04_GDI_RECTANGLE_TEXT_POINT_E 0x00000bfc
+#define NV04_GDI_RECTANGLE_TEXT_POINT_E_X_SHIFT 0
+#define NV04_GDI_RECTANGLE_TEXT_POINT_E_X_MASK 0x0000ffff
+#define NV04_GDI_RECTANGLE_TEXT_POINT_E_Y_SHIFT 16
+#define NV04_GDI_RECTANGLE_TEXT_POINT_E_Y_MASK 0xffff0000
+#define NV04_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR01_E(x) (0x00000c00+((x)*4))
+#define NV04_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR01_E__SIZE 0x00000080
+#define NV04_GDI_RECTANGLE_TEXT_FONT_F 0x00000ff0
+#define NV04_GDI_RECTANGLE_TEXT_FONT_F_OFFSET_SHIFT 0
+#define NV04_GDI_RECTANGLE_TEXT_FONT_F_OFFSET_MASK 0x0fffffff
+#define NV04_GDI_RECTANGLE_TEXT_FONT_F_PITCH_SHIFT 28
+#define NV04_GDI_RECTANGLE_TEXT_FONT_F_PITCH_MASK 0xf0000000
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_F_POINT0 0x00000ff4
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_F_POINT0_L_SHIFT 0
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_F_POINT0_L_MASK 0x0000ffff
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_F_POINT0_T_SHIFT 16
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_F_POINT0_T_MASK 0xffff0000
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_F_POINT1 0x00000ff8
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_F_POINT1_R_SHIFT 0
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_F_POINT1_R_MASK 0x0000ffff
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_F_POINT1_B_SHIFT 16
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_F_POINT1_B_MASK 0xffff0000
+#define NV04_GDI_RECTANGLE_TEXT_COLOR1_F 0x00000ffc
+#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_F(x) (0x00001000+((x)*4))
+#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_F__SIZE 0x00000100
+#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_F_INDEX_SHIFT 0
+#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_F_INDEX_MASK 0x000000ff
+#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_F_X_SHIFT 8
+#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_F_X_MASK 0x000fff00
+#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_F_Y_SHIFT 20
+#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_F_Y_MASK 0xfff00000
+#define NV04_GDI_RECTANGLE_TEXT_FONT_G 0x000017f0
+#define NV04_GDI_RECTANGLE_TEXT_FONT_G_OFFSET_SHIFT 0
+#define NV04_GDI_RECTANGLE_TEXT_FONT_G_OFFSET_MASK 0x0fffffff
+#define NV04_GDI_RECTANGLE_TEXT_FONT_G_PITCH_SHIFT 28
+#define NV04_GDI_RECTANGLE_TEXT_FONT_G_PITCH_MASK 0xf0000000
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_G_POINT0 0x000017f4
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_G_POINT0_L_SHIFT 0
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_G_POINT0_L_MASK 0x0000ffff
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_G_POINT0_T_SHIFT 16
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_G_POINT0_T_MASK 0xffff0000
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_G_POINT1 0x000017f8
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_G_POINT1_R_SHIFT 0
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_G_POINT1_R_MASK 0x0000ffff
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_G_POINT1_B_SHIFT 16
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_G_POINT1_B_MASK 0xffff0000
+#define NV04_GDI_RECTANGLE_TEXT_COLOR1_G 0x000017fc
+#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_G_POINT(x) (0x00001800+((x)*8))
+#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_G_POINT__SIZE 0x00000100
+#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_G_POINT_X_SHIFT 0
+#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_G_POINT_X_MASK 0x0000ffff
+#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_G_POINT_Y_SHIFT 16
+#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_G_POINT_Y_MASK 0xffff0000
+#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_G_INDEX(x) (0x00001804+((x)*8))
+#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_G_INDEX__SIZE 0x00000100
+
+
+#define NV03_GDI_RECTANGLE_TEXT 0x0000004b
+
+#define NV03_GDI_RECTANGLE_TEXT_NOP 0x00000100
+#define NV03_GDI_RECTANGLE_TEXT_NOTIFY 0x00000104
+#define NV03_GDI_RECTANGLE_TEXT_DMA_NOTIFY 0x00000180
+#define NV03_GDI_RECTANGLE_TEXT_PATTERN 0x00000184
+#define NV03_GDI_RECTANGLE_TEXT_ROP 0x00000188
+#define NV03_GDI_RECTANGLE_TEXT_BETA1 0x0000018c
+#define NV03_GDI_RECTANGLE_TEXT_SURFACE 0x00000190
+#define NV03_GDI_RECTANGLE_TEXT_OPERATION 0x000002fc
+#define NV03_GDI_RECTANGLE_TEXT_COLOR_FORMAT 0x00000300
+#define NV03_GDI_RECTANGLE_TEXT_MONOCHROME_FORMAT 0x00000304
+#define NV03_GDI_RECTANGLE_TEXT_COLOR1_A 0x000003fc
+#define NV03_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT 0x00000400
+#define NV03_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT_Y_SHIFT 0
+#define NV03_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT_Y_MASK 0x0000ffff
+#define NV03_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT_X_SHIFT 16
+#define NV03_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT_X_MASK 0xffff0000
+#define NV03_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_SIZE 0x00000404
+#define NV03_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_SIZE_H_SHIFT 0
+#define NV03_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_SIZE_H_MASK 0x0000ffff
+#define NV03_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_SIZE_W_SHIFT 16
+#define NV03_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_SIZE_W_MASK 0xffff0000
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_POINT0_B 0x000007f4
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_POINT0_B_L_SHIFT 0
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_POINT0_B_L_MASK 0x0000ffff
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_POINT0_B_T_SHIFT 16
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_POINT0_B_T_MASK 0xffff0000
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_POINT1_B 0x000007f8
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_POINT1_B_R_SHIFT 0
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_POINT1_B_R_MASK 0x0000ffff
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_POINT1_B_B_SHIFT 16
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_POINT1_B_B_MASK 0xffff0000
+#define NV03_GDI_RECTANGLE_TEXT_COLOR1_B 0x000007fc
+#define NV03_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_0 0x00000800
+#define NV03_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_0_L_SHIFT 0
+#define NV03_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_0_L_MASK 0x0000ffff
+#define NV03_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_0_T_SHIFT 16
+#define NV03_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_0_T_MASK 0xffff0000
+#define NV03_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_1 0x00000804
+#define NV03_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_1_R_SHIFT 0
+#define NV03_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_1_R_MASK 0x0000ffff
+#define NV03_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_1_B_SHIFT 16
+#define NV03_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_1_B_MASK 0xffff0000
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_C_POINT0 0x00000bec
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_C_POINT0_L_SHIFT 0
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_C_POINT0_L_MASK 0x0000ffff
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_C_POINT0_T_SHIFT 16
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_C_POINT0_T_MASK 0xffff0000
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_C_POINT1 0x00000bf0
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_C_POINT1_R_SHIFT 0
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_C_POINT1_R_MASK 0x0000ffff
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_C_POINT1_B_SHIFT 16
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_C_POINT1_B_MASK 0xffff0000
+#define NV03_GDI_RECTANGLE_TEXT_COLOR1_C 0x00000bf4
+#define NV03_GDI_RECTANGLE_TEXT_SIZE_C 0x00000bf8
+#define NV03_GDI_RECTANGLE_TEXT_SIZE_C_W_SHIFT 0
+#define NV03_GDI_RECTANGLE_TEXT_SIZE_C_W_MASK 0x0000ffff
+#define NV03_GDI_RECTANGLE_TEXT_SIZE_C_H_SHIFT 16
+#define NV03_GDI_RECTANGLE_TEXT_SIZE_C_H_MASK 0xffff0000
+#define NV03_GDI_RECTANGLE_TEXT_POINT_C 0x00000bfc
+#define NV03_GDI_RECTANGLE_TEXT_POINT_C_X_SHIFT 0
+#define NV03_GDI_RECTANGLE_TEXT_POINT_C_X_MASK 0x0000ffff
+#define NV03_GDI_RECTANGLE_TEXT_POINT_C_Y_SHIFT 16
+#define NV03_GDI_RECTANGLE_TEXT_POINT_C_Y_MASK 0xffff0000
+#define NV03_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR1_C(x) (0x00000c00+((x)*4))
+#define NV03_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR1_C__SIZE 0x00000020
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_D_POINT0 0x00000fe8
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_D_POINT0_L_SHIFT 0
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_D_POINT0_L_MASK 0x0000ffff
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_D_POINT0_T_SHIFT 16
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_D_POINT0_T_MASK 0xffff0000
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_D_POINT1 0x00000fec
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_D_POINT1_R_SHIFT 0
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_D_POINT1_R_MASK 0x0000ffff
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_D_POINT1_B_SHIFT 16
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_D_POINT1_B_MASK 0xffff0000
+#define NV03_GDI_RECTANGLE_TEXT_COLOR1_D 0x00000ff0
+#define NV03_GDI_RECTANGLE_TEXT_SIZE_IN_D 0x00000ff4
+#define NV03_GDI_RECTANGLE_TEXT_SIZE_IN_D_W_SHIFT 0
+#define NV03_GDI_RECTANGLE_TEXT_SIZE_IN_D_W_MASK 0x0000ffff
+#define NV03_GDI_RECTANGLE_TEXT_SIZE_IN_D_H_SHIFT 16
+#define NV03_GDI_RECTANGLE_TEXT_SIZE_IN_D_H_MASK 0xffff0000
+#define NV03_GDI_RECTANGLE_TEXT_SIZE_OUT_D 0x00000ff8
+#define NV03_GDI_RECTANGLE_TEXT_SIZE_OUT_D_W_SHIFT 0
+#define NV03_GDI_RECTANGLE_TEXT_SIZE_OUT_D_W_MASK 0x0000ffff
+#define NV03_GDI_RECTANGLE_TEXT_SIZE_OUT_D_H_SHIFT 16
+#define NV03_GDI_RECTANGLE_TEXT_SIZE_OUT_D_H_MASK 0xffff0000
+#define NV03_GDI_RECTANGLE_TEXT_POINT_D 0x00000ffc
+#define NV03_GDI_RECTANGLE_TEXT_POINT_D_X_SHIFT 0
+#define NV03_GDI_RECTANGLE_TEXT_POINT_D_X_MASK 0x0000ffff
+#define NV03_GDI_RECTANGLE_TEXT_POINT_D_Y_SHIFT 16
+#define NV03_GDI_RECTANGLE_TEXT_POINT_D_Y_MASK 0xffff0000
+#define NV03_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR1_D(x) (0x00001000+((x)*4))
+#define NV03_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR1_D__SIZE 0x00000020
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_E_POINT0 0x000013e4
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_E_POINT0_L_SHIFT 0
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_E_POINT0_L_MASK 0x0000ffff
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_E_POINT0_T_SHIFT 16
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_E_POINT0_T_MASK 0xffff0000
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_E_POINT1 0x000013e8
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_E_POINT1_R_SHIFT 0
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_E_POINT1_R_MASK 0x0000ffff
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_E_POINT1_B_SHIFT 16
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_E_POINT1_B_MASK 0xffff0000
+#define NV03_GDI_RECTANGLE_TEXT_COLOR0_E 0x000013ec
+#define NV03_GDI_RECTANGLE_TEXT_COLOR1_E 0x000013f0
+#define NV03_GDI_RECTANGLE_TEXT_SIZE_IN_E 0x000013f4
+#define NV03_GDI_RECTANGLE_TEXT_SIZE_IN_E_W_SHIFT 0
+#define NV03_GDI_RECTANGLE_TEXT_SIZE_IN_E_W_MASK 0x0000ffff
+#define NV03_GDI_RECTANGLE_TEXT_SIZE_IN_E_H_SHIFT 16
+#define NV03_GDI_RECTANGLE_TEXT_SIZE_IN_E_H_MASK 0xffff0000
+#define NV03_GDI_RECTANGLE_TEXT_SIZE_OUT_E 0x000013f8
+#define NV03_GDI_RECTANGLE_TEXT_SIZE_OUT_E_W_SHIFT 0
+#define NV03_GDI_RECTANGLE_TEXT_SIZE_OUT_E_W_MASK 0x0000ffff
+#define NV03_GDI_RECTANGLE_TEXT_SIZE_OUT_E_H_SHIFT 16
+#define NV03_GDI_RECTANGLE_TEXT_SIZE_OUT_E_H_MASK 0xffff0000
+#define NV03_GDI_RECTANGLE_TEXT_POINT_E 0x000013fc
+#define NV03_GDI_RECTANGLE_TEXT_POINT_E_X_SHIFT 0
+#define NV03_GDI_RECTANGLE_TEXT_POINT_E_X_MASK 0x0000ffff
+#define NV03_GDI_RECTANGLE_TEXT_POINT_E_Y_SHIFT 16
+#define NV03_GDI_RECTANGLE_TEXT_POINT_E_Y_MASK 0xffff0000
+#define NV03_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR01_E(x) (0x00001400+((x)*4))
+#define NV03_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR01_E__SIZE 0x00000020
+
+
+#define NV04_SWIZZLED_SURFACE 0x00000052
+
+#define NV04_SWIZZLED_SURFACE_NOP 0x00000100
+#define NV04_SWIZZLED_SURFACE_NOTIFY 0x00000104
+#define NV04_SWIZZLED_SURFACE_DMA_NOTIFY 0x00000180
+#define NV04_SWIZZLED_SURFACE_DMA_IMAGE 0x00000184
+#define NV04_SWIZZLED_SURFACE_FORMAT 0x00000300
+#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_SHIFT 0
+#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_MASK 0x000000ff
+#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_Y8 0x00000001
+#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_X1R5G5B5_Z1R5G5B5 0x00000002
+#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_X1R5G5B5_X1R5G5B5 0x00000003
+#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_R5G6B5 0x00000004
+#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_Y16 0x00000005
+#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_X8R8G8B8_Z8R8G8B8 0x00000006
+#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_X8R8G8B8_X8R8G8B8 0x00000007
+#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_X1A7R8G8B8_Z1A7R8G8B8 0x00000008
+#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_X1A7R8G8B8_X1A7R8G8B8 0x00000009
+#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_A8R8G8B8 0x0000000a
+#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_Y32 0x0000000b
+#define NV04_SWIZZLED_SURFACE_FORMAT_BASE_SIZE_U_SHIFT 16
+#define NV04_SWIZZLED_SURFACE_FORMAT_BASE_SIZE_U_MASK 0x00ff0000
+#define NV04_SWIZZLED_SURFACE_FORMAT_BASE_SIZE_V_SHIFT 24
+#define NV04_SWIZZLED_SURFACE_FORMAT_BASE_SIZE_V_MASK 0xff000000
+#define NV04_SWIZZLED_SURFACE_OFFSET 0x00000304
+
+
+#define NV04_CONTEXT_SURFACES_3D 0x00000053
+
+#define NV04_CONTEXT_SURFACES_3D_NOP 0x00000100
+#define NV04_CONTEXT_SURFACES_3D_NOTIFY 0x00000104
+#define NV04_CONTEXT_SURFACES_3D_DMA_NOTIFY 0x00000180
+#define NV04_CONTEXT_SURFACES_3D_DMA_COLOR 0x00000184
+#define NV04_CONTEXT_SURFACES_3D_DMA_ZETA 0x00000188
+#define NV04_CONTEXT_SURFACES_3D_CLIP_HORIZONTAL 0x000002f8
+#define NV04_CONTEXT_SURFACES_3D_CLIP_HORIZONTAL_X_SHIFT 0
+#define NV04_CONTEXT_SURFACES_3D_CLIP_HORIZONTAL_X_MASK 0x0000ffff
+#define NV04_CONTEXT_SURFACES_3D_CLIP_HORIZONTAL_W_SHIFT 16
+#define NV04_CONTEXT_SURFACES_3D_CLIP_HORIZONTAL_W_MASK 0xffff0000
+#define NV04_CONTEXT_SURFACES_3D_CLIP_VERTICAL 0x000002fc
+#define NV04_CONTEXT_SURFACES_3D_CLIP_VERTICAL_Y_SHIFT 0
+#define NV04_CONTEXT_SURFACES_3D_CLIP_VERTICAL_Y_MASK 0x0000ffff
+#define NV04_CONTEXT_SURFACES_3D_CLIP_VERTICAL_H_SHIFT 16
+#define NV04_CONTEXT_SURFACES_3D_CLIP_VERTICAL_H_MASK 0xffff0000
+#define NV04_CONTEXT_SURFACES_3D_FORMAT 0x00000300
+#define NV04_CONTEXT_SURFACES_3D_FORMAT_COLOR_SHIFT 0
+#define NV04_CONTEXT_SURFACES_3D_FORMAT_COLOR_MASK 0x000000ff
+#define NV04_CONTEXT_SURFACES_3D_FORMAT_COLOR_X1R5G5B5_Z1R5G5B5 0x00000001
+#define NV04_CONTEXT_SURFACES_3D_FORMAT_COLOR_X1R5G5B5_X1R5G5B5 0x00000002
+#define NV04_CONTEXT_SURFACES_3D_FORMAT_COLOR_R5G6B5 0x00000003
+#define NV04_CONTEXT_SURFACES_3D_FORMAT_COLOR_X8R8G8B8_Z8R8G8B8 0x00000004
+#define NV04_CONTEXT_SURFACES_3D_FORMAT_COLOR_X8R8G8B8_X8R8G8B8 0x00000005
+#define NV04_CONTEXT_SURFACES_3D_FORMAT_COLOR_X1A7R8G8B8_Z1A7R8G8B8 0x00000006
+#define NV04_CONTEXT_SURFACES_3D_FORMAT_COLOR_X1A7R8G8B8_X1A7R8G8B8 0x00000007
+#define NV04_CONTEXT_SURFACES_3D_FORMAT_COLOR_A8R8G8B8 0x00000008
+#define NV04_CONTEXT_SURFACES_3D_FORMAT_TYPE_SHIFT 8
+#define NV04_CONTEXT_SURFACES_3D_FORMAT_TYPE_MASK 0x0000ff00
+#define NV04_CONTEXT_SURFACES_3D_FORMAT_TYPE_PITCH 0x00000100
+#define NV04_CONTEXT_SURFACES_3D_FORMAT_TYPE_SWIZZLE 0x00000200
+#define NV04_CONTEXT_SURFACES_3D_FORMAT_BASE_SIZE_U_SHIFT 16
+#define NV04_CONTEXT_SURFACES_3D_FORMAT_BASE_SIZE_U_MASK 0x00ff0000
+#define NV04_CONTEXT_SURFACES_3D_FORMAT_BASE_SIZE_V_SHIFT 24
+#define NV04_CONTEXT_SURFACES_3D_FORMAT_BASE_SIZE_V_MASK 0xff000000
+#define NV04_CONTEXT_SURFACES_3D_CLIP_SIZE 0x00000304
+#define NV04_CONTEXT_SURFACES_3D_CLIP_SIZE_W_SHIFT 0
+#define NV04_CONTEXT_SURFACES_3D_CLIP_SIZE_W_MASK 0x0000ffff
+#define NV04_CONTEXT_SURFACES_3D_CLIP_SIZE_H_SHIFT 16
+#define NV04_CONTEXT_SURFACES_3D_CLIP_SIZE_H_MASK 0xffff0000
+#define NV04_CONTEXT_SURFACES_3D_PITCH 0x00000308
+#define NV04_CONTEXT_SURFACES_3D_PITCH_COLOR_SHIFT 0
+#define NV04_CONTEXT_SURFACES_3D_PITCH_COLOR_MASK 0x0000ffff
+#define NV04_CONTEXT_SURFACES_3D_PITCH_ZETA_SHIFT 16
+#define NV04_CONTEXT_SURFACES_3D_PITCH_ZETA_MASK 0xffff0000
+#define NV04_CONTEXT_SURFACES_3D_OFFSET_COLOR 0x0000030c
+#define NV04_CONTEXT_SURFACES_3D_OFFSET_ZETA 0x00000310
+
+
+#define NV04_DX5_TEXTURED_TRIANGLE 0x00000054
+
+#define NV04_DX5_TEXTURED_TRIANGLE_NOP 0x00000100
+#define NV04_DX5_TEXTURED_TRIANGLE_NOTIFY 0x00000104
+#define NV04_DX5_TEXTURED_TRIANGLE_DMA_NOTIFY 0x00000180
+#define NV04_DX5_TEXTURED_TRIANGLE_DMA_A 0x00000184
+#define NV04_DX5_TEXTURED_TRIANGLE_DMA_B 0x00000188
+#define NV04_DX5_TEXTURED_TRIANGLE_SURFACE 0x0000018c
+#define NV04_DX5_TEXTURED_TRIANGLE_COLORKEY 0x00000300
+#define NV04_DX5_TEXTURED_TRIANGLE_OFFSET 0x00000304
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT 0x00000308
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_DMA_SHIFT 0
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_DMA_MASK 0x00000003
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_COLOR_KEY_MATCH_SHIFT 2
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_COLOR_KEY_MATCH_MASK 0x0000000c
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ORIGIN_ZOH_SHIFT 4
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ORIGIN_ZOH_MASK 0x00000030
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ORIGIN_ZOH_CENTER 0x00000010
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ORIGIN_ZOH_CORNER 0x00000020
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ORIGIN_FOH_SHIFT 6
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ORIGIN_FOH_MASK 0x000000c0
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ORIGIN_FOH_CENTER 0x00000040
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ORIGIN_FOH_CORNER 0x00000080
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_COLOR_SHIFT 8
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_COLOR_MASK 0x00000f00
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_COLOR_Y8 0x00000100
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_COLOR_A1R5G5B5 0x00000200
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_COLOR_X1R5G5B5 0x00000300
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_COLOR_A4R4G4B4 0x00000400
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_COLOR_R5G6B5 0x00000500
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_COLOR_A8R8G8B8 0x00000600
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_COLOR_X8R8G8B8 0x00000700
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_MIPMAP_LEVELS_SHIFT 12
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_MIPMAP_LEVELS_MASK 0x0000f000
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_U_SHIFT 16
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_U_MASK 0x000f0000
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_V_SHIFT 20
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_V_MASK 0x00f00000
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_SHIFT 24
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_MASK 0x07000000
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_REPEAT 0x01000000
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_MIRRORED_REPEAT 0x02000000
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_EDGE 0x03000000
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_BORDER 0x04000000
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP 0x05000000
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_WRAPU (1 << 27)
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_SHIFT 28
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_MASK 0x70000000
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_REPEAT 0x10000000
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_MIRRORED_REPEAT 0x20000000
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_CLAMP_TO_EDGE 0x30000000
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_CLAMP_TO_BORDER 0x40000000
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_CLAMP 0x50000000
+#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_WRAPV (1 << 31)
+#define NV04_DX5_TEXTURED_TRIANGLE_FILTER 0x0000030c
+#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_KERNEL_SIZE_X_SHIFT 0
+#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_KERNEL_SIZE_X_MASK 0x000000ff
+#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_KERNEL_SIZE_Y_SHIFT 8
+#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_KERNEL_SIZE_Y_MASK 0x00007f00
+#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_MIPMAP_DITHER_ENABLE (1 << 15)
+#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_MIPMAP_LODBIAS_SHIFT 16
+#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_MIPMAP_LODBIAS_MASK 0x00ff0000
+#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_SHIFT 24
+#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_MASK 0x07000000
+#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST 0x01000000
+#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR 0x02000000
+#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST 0x03000000
+#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST 0x04000000
+#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR 0x05000000
+#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR 0x06000000
+#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_ANISOTROPIC_MINIFY_ENABLE (1 << 27)
+#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_MAGNIFY_SHIFT 28
+#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_MAGNIFY_MASK 0x70000000
+#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_MAGNIFY_NEAREST 0x10000000
+#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_MAGNIFY_LINEAR 0x20000000
+#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_ANISOTROPIC_MAGNIFY_ENABLE (1 << 31)
+#define NV04_DX5_TEXTURED_TRIANGLE_BLEND 0x00000310
+#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_TEXTURE_MAP_SHIFT 0
+#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_TEXTURE_MAP_MASK 0x0000000f
+#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_MASK_BIT_SHIFT 4
+#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_MASK_BIT_MASK 0x00000030
+#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_SHIFT 6
+#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_MASK 0x000000c0
+#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_FLAT 0x00000040
+#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_GOURAUD 0x00000080
+#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_PHONG 0x000000c0
+#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_TEXTURE_PERSPECTIVE_ENABLE_SHIFT 8
+#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_TEXTURE_PERSPECTIVE_ENABLE_MASK 0x00000f00
+#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_SPECULAR_ENABLE_SHIFT 12
+#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_SPECULAR_ENABLE_MASK 0x0000f000
+#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_FOG_ENABLE_SHIFT 16
+#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_FOG_ENABLE_MASK 0x000f0000
+#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_ALPHA_ENABLE_SHIFT 20
+#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_ALPHA_ENABLE_MASK 0x00f00000
+#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_SRC_SHIFT 24
+#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_SRC_MASK 0x0f000000
+#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_DST_SHIFT 28
+#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_DST_MASK 0xf0000000
+#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL 0x00000314
+#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_ALPHA_REF_SHIFT 0
+#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_ALPHA_REF_MASK 0x000000ff
+#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_ALPHA_FUNC_SHIFT 8
+#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_ALPHA_FUNC_MASK 0x00000f00
+#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_ALPHA_TEST_ENABLE (1 << 12)
+#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_ORIGIN (1 << 13)
+#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_ENABLE_SHIFT 14
+#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_ENABLE_MASK 0x0000c000
+#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_FUNC_SHIFT 16
+#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_FUNC_MASK 0x000f0000
+#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_CULL_MODE_SHIFT 20
+#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_CULL_MODE_MASK 0x00300000
+#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_DITHER_ENABLE (1 << 22)
+#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_PERSPECTIVE_ENABLE (1 << 23)
+#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_WRITE_ENABLE_SHIFT 24
+#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_WRITE_ENABLE_MASK 0x3f000000
+#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_FORMAT_SHIFT 30
+#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_FORMAT_MASK 0xc0000000
+#define NV04_DX5_TEXTURED_TRIANGLE_FOGCOLOR 0x00000318
+#define NV04_DX5_TEXTURED_TRIANGLE_FOGCOLOR_B_SHIFT 0
+#define NV04_DX5_TEXTURED_TRIANGLE_FOGCOLOR_B_MASK 0x000000ff
+#define NV04_DX5_TEXTURED_TRIANGLE_FOGCOLOR_G_SHIFT 8
+#define NV04_DX5_TEXTURED_TRIANGLE_FOGCOLOR_G_MASK 0x0000ff00
+#define NV04_DX5_TEXTURED_TRIANGLE_FOGCOLOR_R_SHIFT 16
+#define NV04_DX5_TEXTURED_TRIANGLE_FOGCOLOR_R_MASK 0x00ff0000
+#define NV04_DX5_TEXTURED_TRIANGLE_FOGCOLOR_A_SHIFT 24
+#define NV04_DX5_TEXTURED_TRIANGLE_FOGCOLOR_A_MASK 0xff000000
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(x) (0x00000400+((x)*32))
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX__SIZE 0x00000010
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SY(x) (0x00000404+((x)*32))
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SY__SIZE 0x00000010
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SZ(x) (0x00000408+((x)*32))
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SZ__SIZE 0x00000010
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_RHW(x) (0x0000040c+((x)*32))
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_RHW__SIZE 0x00000010
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_COLOR(x) (0x00000410+((x)*32))
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_COLOR__SIZE 0x00000010
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_COLOR_B_SHIFT 0
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_COLOR_B_MASK 0x000000ff
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_COLOR_G_SHIFT 8
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_COLOR_G_MASK 0x0000ff00
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_COLOR_R_SHIFT 16
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_COLOR_R_MASK 0x00ff0000
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_COLOR_A_SHIFT 24
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_COLOR_A_MASK 0xff000000
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SPECULAR(x) (0x00000414+((x)*32))
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SPECULAR__SIZE 0x00000010
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SPECULAR_B_SHIFT 0
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SPECULAR_B_MASK 0x000000ff
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SPECULAR_G_SHIFT 8
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SPECULAR_G_MASK 0x0000ff00
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SPECULAR_R_SHIFT 16
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SPECULAR_R_MASK 0x00ff0000
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SPECULAR_FOG_SHIFT 24
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SPECULAR_FOG_MASK 0xff000000
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_TU(x) (0x00000418+((x)*32))
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_TU__SIZE 0x00000010
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_TV(x) (0x0000041c+((x)*32))
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_TV__SIZE 0x00000010
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE(x) (0x00000600+((x)*4))
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE__SIZE 0x00000040
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE_I0_SHIFT 0
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE_I0_MASK 0x0000000f
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE_I1_SHIFT 4
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE_I1_MASK 0x000000f0
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE_I2_SHIFT 8
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE_I2_MASK 0x00000f00
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE_I3_SHIFT 12
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE_I3_MASK 0x0000f000
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE_I4_SHIFT 16
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE_I4_MASK 0x000f0000
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE_I5_SHIFT 20
+#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE_I5_MASK 0x00f00000
+
+
+#define NV04_DX6_MULTITEX_TRIANGLE 0x00000055
+
+#define NV04_DX6_MULTITEX_TRIANGLE_NOP 0x00000100
+#define NV04_DX6_MULTITEX_TRIANGLE_NOTIFY 0x00000104
+#define NV04_DX6_MULTITEX_TRIANGLE_DMA_NOTIFY 0x00000180
+#define NV04_DX6_MULTITEX_TRIANGLE_DMA_A 0x00000184
+#define NV04_DX6_MULTITEX_TRIANGLE_DMA_B 0x00000188
+#define NV04_DX6_MULTITEX_TRIANGLE_SURFACE 0x0000018c
+#define NV04_DX6_MULTITEX_TRIANGLE_OFFSET(x) (0x00000308+((x)*4))
+#define NV04_DX6_MULTITEX_TRIANGLE_OFFSET__SIZE 0x00000002
+#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT(x) (0x00000310+((x)*4))
+#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT__SIZE 0x00000002
+#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_DMA_SHIFT 0
+#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_DMA_MASK 0x0000000f
+#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_ORIGIN_ZOH_SHIFT 4
+#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_ORIGIN_ZOH_MASK 0x00000030
+#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_ORIGIN_FOH_SHIFT 6
+#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_ORIGIN_FOH_MASK 0x000000c0
+#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_COLOR_SHIFT 8
+#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_COLOR_MASK 0x00000f00
+#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_MIPMAP_LEVELS_SHIFT 12
+#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_MIPMAP_LEVELS_MASK 0x0000f000
+#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_BASE_SIZE_U_SHIFT 16
+#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_BASE_SIZE_U_MASK 0x000f0000
+#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_BASE_SIZE_V_SHIFT 20
+#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_BASE_SIZE_V_MASK 0x00f00000
+#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_ADDRESSU_SHIFT 24
+#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_ADDRESSU_MASK 0x07000000
+#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_WRAPU (1 << 27)
+#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_ADDRESSV_SHIFT 28
+#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_ADDRESSV_MASK 0x70000000
+#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_WRAPV (1 << 31)
+#define NV04_DX6_MULTITEX_TRIANGLE_FILTER(x) (0x00000318+((x)*4))
+#define NV04_DX6_MULTITEX_TRIANGLE_FILTER__SIZE 0x00000002
+#define NV04_DX6_MULTITEX_TRIANGLE_FILTER_KERNEL_SIZE_X_SHIFT 0
+#define NV04_DX6_MULTITEX_TRIANGLE_FILTER_KERNEL_SIZE_X_MASK 0x000000ff
+#define NV04_DX6_MULTITEX_TRIANGLE_FILTER_KERNEL_SIZE_Y_SHIFT 8
+#define NV04_DX6_MULTITEX_TRIANGLE_FILTER_KERNEL_SIZE_Y_MASK 0x00007f00
+#define NV04_DX6_MULTITEX_TRIANGLE_FILTER_MIPMAP_DITHER_ENABLE (1 << 15)
+#define NV04_DX6_MULTITEX_TRIANGLE_FILTER_MIPMAP_LODBIAS_SHIFT 16
+#define NV04_DX6_MULTITEX_TRIANGLE_FILTER_MIPMAP_LODBIAS_MASK 0x00ff0000
+#define NV04_DX6_MULTITEX_TRIANGLE_FILTER_MINIFY_SHIFT 24
+#define NV04_DX6_MULTITEX_TRIANGLE_FILTER_MINIFY_MASK 0x07000000
+#define NV04_DX6_MULTITEX_TRIANGLE_FILTER_ANISOTROPIC_MINIFY_ENABLE (1 << 27)
+#define NV04_DX6_MULTITEX_TRIANGLE_FILTER_MAGNIFY_SHIFT 28
+#define NV04_DX6_MULTITEX_TRIANGLE_FILTER_MAGNIFY_MASK 0x70000000
+#define NV04_DX6_MULTITEX_TRIANGLE_FILTER_ANISOTROPIC_MAGNIFY_ENABLE (1 << 31)
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA 0x00000320
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_INVERSE0 (1 << 0)
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_ALPHA0 (1 << 1)
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_ARGUMENT0_SHIFT 2
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_ARGUMENT0_MASK 0x000000fc
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_INVERSE1 (1 << 8)
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_ALPHA1 (1 << 9)
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_ARGUMENT1_SHIFT 10
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_ARGUMENT1_MASK 0x0000fc00
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_INVERSE2 (1 << 16)
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_ALPHA2 (1 << 17)
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_ARGUMENT2_SHIFT 18
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_ARGUMENT2_MASK 0x00fc0000
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_INVERSE3 (1 << 24)
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_ALPHA3 (1 << 25)
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_ARGUMENT3_SHIFT 26
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_ARGUMENT3_MASK 0x1c000000
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_OPERATION_SHIFT 29
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_OPERATION_MASK 0xe0000000
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR 0x00000324
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_INVERSE0 (1 << 0)
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_ALPHA0 (1 << 1)
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_ARGUMENT0_SHIFT 2
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_ARGUMENT0_MASK 0x000000fc
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_INVERSE1 (1 << 8)
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_ALPHA1 (1 << 9)
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_ARGUMENT1_SHIFT 10
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_ARGUMENT1_MASK 0x0000fc00
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_INVERSE2 (1 << 16)
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_ALPHA2 (1 << 17)
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_ARGUMENT2_SHIFT 18
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_ARGUMENT2_MASK 0x00fc0000
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_INVERSE3 (1 << 24)
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_ALPHA3 (1 << 25)
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_ARGUMENT3_SHIFT 26
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_ARGUMENT3_MASK 0x1c000000
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_OPERATION_SHIFT 29
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_OPERATION_MASK 0xe0000000
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA 0x0000032c
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_INVERSE0 (1 << 0)
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_ALPHA0 (1 << 1)
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_ARGUMENT0_SHIFT 2
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_ARGUMENT0_MASK 0x000000fc
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_INVERSE1 (1 << 8)
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_ALPHA1 (1 << 9)
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_ARGUMENT1_SHIFT 10
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_ARGUMENT1_MASK 0x0000fc00
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_INVERSE2 (1 << 16)
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_ALPHA2 (1 << 17)
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_ARGUMENT2_SHIFT 18
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_ARGUMENT2_MASK 0x00fc0000
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_INVERSE3 (1 << 24)
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_ALPHA3 (1 << 25)
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_ARGUMENT3_SHIFT 26
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_ARGUMENT3_MASK 0x1c000000
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_OPERATION_SHIFT 29
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_OPERATION_MASK 0xe0000000
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR 0x00000330
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_INVERSE0 (1 << 0)
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_ALPHA0 (1 << 1)
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_ARGUMENT0_SHIFT 2
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_ARGUMENT0_MASK 0x000000fc
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_INVERSE1 (1 << 8)
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_ALPHA1 (1 << 9)
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_ARGUMENT1_SHIFT 10
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_ARGUMENT1_MASK 0x0000fc00
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_INVERSE2 (1 << 16)
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_ALPHA2 (1 << 17)
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_ARGUMENT2_SHIFT 18
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_ARGUMENT2_MASK 0x00fc0000
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_INVERSE3 (1 << 24)
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_ALPHA3 (1 << 25)
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_ARGUMENT3_SHIFT 26
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_ARGUMENT3_MASK 0x1c000000
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_OPERATION_SHIFT 29
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_OPERATION_MASK 0xe0000000
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_FACTOR 0x00000334
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_FACTOR_B_SHIFT 0
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_FACTOR_B_MASK 0x000000ff
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_FACTOR_G_SHIFT 8
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_FACTOR_G_MASK 0x0000ff00
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_FACTOR_R_SHIFT 16
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_FACTOR_R_MASK 0x00ff0000
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_FACTOR_A_SHIFT 24
+#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_FACTOR_A_MASK 0xff000000
+#define NV04_DX6_MULTITEX_TRIANGLE_BLEND 0x00000338
+#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_MASK_BIT_SHIFT 4
+#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_MASK_BIT_MASK 0x00000030
+#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_SHADE_MODE_SHIFT 6
+#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_SHADE_MODE_MASK 0x000000c0
+#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_TEXTURE_PERSPECTIVE_ENABLE_SHIFT 8
+#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_TEXTURE_PERSPECTIVE_ENABLE_MASK 0x00000f00
+#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_SPECULAR_ENABLE_SHIFT 12
+#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_SPECULAR_ENABLE_MASK 0x0000f000
+#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_FOG_ENABLE_SHIFT 16
+#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_FOG_ENABLE_MASK 0x000f0000
+#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_ALPHA_ENABLE_SHIFT 20
+#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_ALPHA_ENABLE_MASK 0x00f00000
+#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_SRC_SHIFT 24
+#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_SRC_MASK 0x0f000000
+#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_DST_SHIFT 28
+#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_DST_MASK 0xf0000000
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0 0x0000033c
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_ALPHA_REF_SHIFT 0
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_ALPHA_REF_MASK 0x000000ff
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_ALPHA_FUNC_SHIFT 8
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_ALPHA_FUNC_MASK 0x00000f00
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_ALPHA_TEST_ENABLE (1 << 12)
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_ORIGIN (1 << 13)
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_Z_ENABLE_SHIFT 14
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_Z_ENABLE_MASK 0x0000c000
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_Z_FUNC_SHIFT 16
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_Z_FUNC_MASK 0x000f0000
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_CULL_MODE_SHIFT 20
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_CULL_MODE_MASK 0x00300000
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_DITHER_ENABLE (1 << 22)
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_Z_PERSPECTIVE_ENABLE (1 << 23)
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_Z_WRITE_ENABLE (1 << 24)
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_STENCIL_WRITE_ENABLE (1 << 25)
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_ALPHA_WRITE_ENABLE (1 << 26)
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_RED_WRITE_ENABLE (1 << 27)
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_GREEN_WRITE_ENABLE (1 << 28)
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_BLUE_WRITE_ENABLE (1 << 29)
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_Z_FORMAT_SHIFT 30
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_Z_FORMAT_MASK 0xc0000000
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL1 0x00000340
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL1_STENCIL_TEST_ENABLE_SHIFT 0
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL1_STENCIL_TEST_ENABLE_MASK 0x0000000f
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL1_STENCIL_FUNC_SHIFT 4
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL1_STENCIL_FUNC_MASK 0x000000f0
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL1_STENCIL_REF_SHIFT 8
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL1_STENCIL_REF_MASK 0x0000ff00
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL1_STENCIL_MASK_READ_SHIFT 16
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL1_STENCIL_MASK_READ_MASK 0x00ff0000
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL1_STENCIL_MASK_WRITE_SHIFT 24
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL1_STENCIL_MASK_WRITE_MASK 0xff000000
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL2 0x00000344
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL2_STENCIL_OP_FAIL_SHIFT 0
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL2_STENCIL_OP_FAIL_MASK 0x0000000f
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL2_STENCIL_OP_ZFAIL_SHIFT 4
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL2_STENCIL_OP_ZFAIL_MASK 0x000000f0
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL2_STENCIL_OP_ZPASS_SHIFT 8
+#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL2_STENCIL_OP_ZPASS_MASK 0x00000f00
+#define NV04_DX6_MULTITEX_TRIANGLE_FOGCOLOR 0x00000348
+#define NV04_DX6_MULTITEX_TRIANGLE_FOGCOLOR_B_SHIFT 0
+#define NV04_DX6_MULTITEX_TRIANGLE_FOGCOLOR_B_MASK 0x000000ff
+#define NV04_DX6_MULTITEX_TRIANGLE_FOGCOLOR_G_SHIFT 8
+#define NV04_DX6_MULTITEX_TRIANGLE_FOGCOLOR_G_MASK 0x0000ff00
+#define NV04_DX6_MULTITEX_TRIANGLE_FOGCOLOR_R_SHIFT 16
+#define NV04_DX6_MULTITEX_TRIANGLE_FOGCOLOR_R_MASK 0x00ff0000
+#define NV04_DX6_MULTITEX_TRIANGLE_FOGCOLOR_A_SHIFT 24
+#define NV04_DX6_MULTITEX_TRIANGLE_FOGCOLOR_A_MASK 0xff000000
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SX(x) (0x00000400+((x)*40))
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SX__SIZE 0x00000008
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SY(x) (0x00000404+((x)*40))
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SY__SIZE 0x00000008
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SZ(x) (0x00000408+((x)*40))
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SZ__SIZE 0x00000008
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_RHW(x) (0x0000040c+((x)*40))
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_RHW__SIZE 0x00000008
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_COLOR(x) (0x00000410+((x)*40))
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_COLOR__SIZE 0x00000008
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_COLOR_B_SHIFT 0
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_COLOR_B_MASK 0x000000ff
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_COLOR_G_SHIFT 8
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_COLOR_G_MASK 0x0000ff00
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_COLOR_R_SHIFT 16
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_COLOR_R_MASK 0x00ff0000
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_COLOR_A_SHIFT 24
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_COLOR_A_MASK 0xff000000
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SPECULAR(x) (0x00000414+((x)*40))
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SPECULAR__SIZE 0x00000008
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SPECULAR_B_SHIFT 0
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SPECULAR_B_MASK 0x000000ff
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SPECULAR_G_SHIFT 8
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SPECULAR_G_MASK 0x0000ff00
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SPECULAR_R_SHIFT 16
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SPECULAR_R_MASK 0x00ff0000
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SPECULAR_FOG_SHIFT 24
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SPECULAR_FOG_MASK 0xff000000
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_TU0(x) (0x00000418+((x)*40))
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_TU0__SIZE 0x00000008
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_TV0(x) (0x0000041c+((x)*40))
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_TV0__SIZE 0x00000008
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_TU1(x) (0x00000420+((x)*40))
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_TU1__SIZE 0x00000008
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_TV1(x) (0x00000424+((x)*40))
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_TV1__SIZE 0x00000008
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_DRAWPRIMITIVE(x) (0x00000540+((x)*4))
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_DRAWPRIMITIVE__SIZE 0x00000030
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_DRAWPRIMITIVE_I0_SHIFT 0
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_DRAWPRIMITIVE_I0_MASK 0x0000000f
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_DRAWPRIMITIVE_I1_SHIFT 4
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_DRAWPRIMITIVE_I1_MASK 0x000000f0
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_DRAWPRIMITIVE_I2_SHIFT 8
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_DRAWPRIMITIVE_I2_MASK 0x00000f00
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_DRAWPRIMITIVE_I3_SHIFT 12
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_DRAWPRIMITIVE_I3_MASK 0x0000f000
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_DRAWPRIMITIVE_I4_SHIFT 16
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_DRAWPRIMITIVE_I4_MASK 0x000f0000
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_DRAWPRIMITIVE_I5_SHIFT 20
+#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_DRAWPRIMITIVE_I5_MASK 0x00f00000
+
+
+#define NV10_DX5_TEXTURED_TRIANGLE 0x00000094
+
+
+
+#define NV10TCL 0x00000056
+
+#define NV10TCL_NOP 0x00000100
+#define NV10TCL_NOTIFY 0x00000104
+#define NV10TCL_DMA_NOTIFY 0x00000180
+#define NV10TCL_DMA_IN_MEMORY0 0x00000184
+#define NV10TCL_DMA_IN_MEMORY1 0x00000188
+#define NV10TCL_DMA_VTXBUF0 0x0000018c
+#define NV10TCL_DMA_IN_MEMORY2 0x00000194
+#define NV10TCL_DMA_IN_MEMORY3 0x00000198
+#define NV10TCL_RT_HORIZ 0x00000200
+#define NV10TCL_RT_HORIZ_X_SHIFT 0
+#define NV10TCL_RT_HORIZ_X_MASK 0x0000ffff
+#define NV10TCL_RT_HORIZ_W_SHIFT 16
+#define NV10TCL_RT_HORIZ_W_MASK 0xffff0000
+#define NV10TCL_RT_VERT 0x00000204
+#define NV10TCL_RT_VERT_Y_SHIFT 0
+#define NV10TCL_RT_VERT_Y_MASK 0x0000ffff
+#define NV10TCL_RT_VERT_H_SHIFT 16
+#define NV10TCL_RT_VERT_H_MASK 0xffff0000
+#define NV10TCL_RT_FORMAT 0x00000208
+#define NV10TCL_RT_FORMAT_TYPE_SHIFT 8
+#define NV10TCL_RT_FORMAT_TYPE_MASK 0x00000f00
+#define NV10TCL_RT_FORMAT_TYPE_LINEAR 0x00000100
+#define NV10TCL_RT_FORMAT_TYPE_SWIZZLED 0x00000200
+#define NV10TCL_RT_FORMAT_COLOR_SHIFT 0
+#define NV10TCL_RT_FORMAT_COLOR_MASK 0x0000001f
+#define NV10TCL_RT_FORMAT_COLOR_R5G6B5 0x00000003
+#define NV10TCL_RT_FORMAT_COLOR_X8R8G8B8 0x00000005
+#define NV10TCL_RT_FORMAT_COLOR_A8R8G8B8 0x00000008
+#define NV10TCL_RT_FORMAT_COLOR_B8 0x00000009
+#define NV10TCL_RT_FORMAT_COLOR_UNKNOWN 0x0000000d
+#define NV10TCL_RT_FORMAT_COLOR_X8B8G8R8 0x0000000f
+#define NV10TCL_RT_FORMAT_COLOR_A8B8G8R8 0x00000010
+#define NV10TCL_RT_PITCH 0x0000020c
+#define NV10TCL_RT_PITCH_COLOR_PITCH_SHIFT 0
+#define NV10TCL_RT_PITCH_COLOR_PITCH_MASK 0x0000ffff
+#define NV10TCL_RT_PITCH_ZETA_PITCH_SHIFT 16
+#define NV10TCL_RT_PITCH_ZETA_PITCH_MASK 0xffff0000
+#define NV10TCL_COLOR_OFFSET 0x00000210
+#define NV10TCL_ZETA_OFFSET 0x00000214
+#define NV10TCL_TX_OFFSET(x) (0x00000218+((x)*4))
+#define NV10TCL_TX_OFFSET__SIZE 0x00000002
+#define NV10TCL_TX_FORMAT(x) (0x00000220+((x)*4))
+#define NV10TCL_TX_FORMAT__SIZE 0x00000002
+#define NV10TCL_TX_FORMAT_DMA0 (1 << 0)
+#define NV10TCL_TX_FORMAT_DMA1 (1 << 1)
+#define NV10TCL_TX_FORMAT_CUBE_MAP (1 << 2)
+#define NV10TCL_TX_FORMAT_FORMAT_SHIFT 7
+#define NV10TCL_TX_FORMAT_FORMAT_MASK 0x00000780
+#define NV10TCL_TX_FORMAT_FORMAT_L8 0x00000000
+#define NV10TCL_TX_FORMAT_FORMAT_A8 0x00000080
+#define NV10TCL_TX_FORMAT_FORMAT_A1R5G5B5 0x00000100
+#define NV10TCL_TX_FORMAT_FORMAT_A8_RECT 0x00000180
+#define NV10TCL_TX_FORMAT_FORMAT_A4R4G4B4 0x00000200
+#define NV10TCL_TX_FORMAT_FORMAT_R5G6B5 0x00000280
+#define NV10TCL_TX_FORMAT_FORMAT_A8R8G8B8 0x00000300
+#define NV10TCL_TX_FORMAT_FORMAT_X8R8G8B8 0x00000380
+#define NV10TCL_TX_FORMAT_FORMAT_INDEX8 0x00000580
+#define NV10TCL_TX_FORMAT_FORMAT_DXT1 0x00000600
+#define NV10TCL_TX_FORMAT_FORMAT_DXT3 0x00000700
+#define NV10TCL_TX_FORMAT_FORMAT_DXT5 0x00000780
+#define NV10TCL_TX_FORMAT_FORMAT_A1R5G5B5_RECT 0x00000800
+#define NV10TCL_TX_FORMAT_FORMAT_R5G6B5_RECT 0x00000880
+#define NV10TCL_TX_FORMAT_FORMAT_A8R8G8B8_RECT 0x00000900
+#define NV10TCL_TX_FORMAT_FORMAT_L8_RECT 0x00000980
+#define NV10TCL_TX_FORMAT_FORMAT_A8L8 0x00000d00
+#define NV10TCL_TX_FORMAT_FORMAT_A8_RECT2 0x00000d80
+#define NV10TCL_TX_FORMAT_FORMAT_A4R4G4B4_RECT 0x00000e80
+#define NV10TCL_TX_FORMAT_FORMAT_R8G8B8_RECT 0x00000f00
+#define NV10TCL_TX_FORMAT_FORMAT_L8A8_RECT 0x00001000
+#define NV10TCL_TX_FORMAT_FORMAT_DSDT 0x00001400
+#define NV10TCL_TX_FORMAT_FORMAT_A16 0x00001900
+#define NV10TCL_TX_FORMAT_FORMAT_HILO16 0x00001980
+#define NV10TCL_TX_FORMAT_FORMAT_A16_RECT 0x00001a80
+#define NV10TCL_TX_FORMAT_FORMAT_HILO16_RECT 0x00001b00
+#define NV10TCL_TX_FORMAT_FORMAT_HILO8 0x00002200
+#define NV10TCL_TX_FORMAT_FORMAT_SIGNED_HILO8 0x00002280
+#define NV10TCL_TX_FORMAT_FORMAT_HILO8_RECT 0x00002300
+#define NV10TCL_TX_FORMAT_FORMAT_SIGNED_HILO8_RECT 0x00002380
+#define NV10TCL_TX_FORMAT_FORMAT_FLOAT_RGBA16_NV 0x00002500
+#define NV10TCL_TX_FORMAT_FORMAT_FLOAT_RGBA32_NV 0x00002580
+#define NV10TCL_TX_FORMAT_FORMAT_FLOAT_R32_NV 0x00002600
+#define NV10TCL_TX_FORMAT_NPOT (1 << 11)
+#define NV10TCL_TX_FORMAT_MIPMAP (1 << 15)
+#define NV10TCL_TX_FORMAT_BASE_SIZE_U_SHIFT 16
+#define NV10TCL_TX_FORMAT_BASE_SIZE_U_MASK 0x000f0000
+#define NV10TCL_TX_FORMAT_BASE_SIZE_V_SHIFT 20
+#define NV10TCL_TX_FORMAT_BASE_SIZE_V_MASK 0x00f00000
+#define NV10TCL_TX_FORMAT_WRAP_S_SHIFT 24
+#define NV10TCL_TX_FORMAT_WRAP_S_MASK 0x0f000000
+#define NV10TCL_TX_FORMAT_WRAP_S_REPEAT 0x01000000
+#define NV10TCL_TX_FORMAT_WRAP_S_MIRRORED_REPEAT 0x02000000
+#define NV10TCL_TX_FORMAT_WRAP_S_CLAMP_TO_EDGE 0x03000000
+#define NV10TCL_TX_FORMAT_WRAP_S_CLAMP_TO_BORDER 0x04000000
+#define NV10TCL_TX_FORMAT_WRAP_S_CLAMP 0x05000000
+#define NV10TCL_TX_FORMAT_WRAP_T_SHIFT 28
+#define NV10TCL_TX_FORMAT_WRAP_T_MASK 0xf0000000
+#define NV10TCL_TX_FORMAT_WRAP_T_REPEAT 0x10000000
+#define NV10TCL_TX_FORMAT_WRAP_T_MIRRORED_REPEAT 0x20000000
+#define NV10TCL_TX_FORMAT_WRAP_T_CLAMP_TO_EDGE 0x30000000
+#define NV10TCL_TX_FORMAT_WRAP_T_CLAMP_TO_BORDER 0x40000000
+#define NV10TCL_TX_FORMAT_WRAP_T_CLAMP 0x50000000
+#define NV10TCL_TX_ENABLE(x) (0x00000228+((x)*4))
+#define NV10TCL_TX_ENABLE__SIZE 0x00000002
+#define NV10TCL_TX_ENABLE_ANISOTROPY_SHIFT 4
+#define NV10TCL_TX_ENABLE_ANISOTROPY_MASK 0x00000030
+#define NV10TCL_TX_ENABLE_MIPMAP_MAX_LOD_SHIFT 14
+#define NV10TCL_TX_ENABLE_MIPMAP_MAX_LOD_MASK 0x0003c000
+#define NV10TCL_TX_ENABLE_MIPMAP_MIN_LOD_SHIFT 26
+#define NV10TCL_TX_ENABLE_MIPMAP_MIN_LOD_MASK 0x3c000000
+#define NV10TCL_TX_ENABLE_ENABLE (1 << 30)
+#define NV10TCL_TX_NPOT_PITCH(x) (0x00000230+((x)*4))
+#define NV10TCL_TX_NPOT_PITCH__SIZE 0x00000002
+#define NV10TCL_TX_NPOT_PITCH_PITCH_SHIFT 16
+#define NV10TCL_TX_NPOT_PITCH_PITCH_MASK 0xffff0000
+#define NV10TCL_TX_NPOT_SIZE(x) (0x00000240+((x)*4))
+#define NV10TCL_TX_NPOT_SIZE__SIZE 0x00000002
+#define NV10TCL_TX_NPOT_SIZE_H_SHIFT 0
+#define NV10TCL_TX_NPOT_SIZE_H_MASK 0x0000ffff
+#define NV10TCL_TX_NPOT_SIZE_W_SHIFT 16
+#define NV10TCL_TX_NPOT_SIZE_W_MASK 0xffff0000
+#define NV10TCL_TX_FILTER(x) (0x00000248+((x)*4))
+#define NV10TCL_TX_FILTER__SIZE 0x00000002
+#define NV10TCL_TX_FILTER_LOD_BIAS_SHIFT 8
+#define NV10TCL_TX_FILTER_LOD_BIAS_MASK 0x00000f00
+#define NV10TCL_TX_FILTER_MINIFY_SHIFT 24
+#define NV10TCL_TX_FILTER_MINIFY_MASK 0x0f000000
+#define NV10TCL_TX_FILTER_MINIFY_NEAREST 0x01000000
+#define NV10TCL_TX_FILTER_MINIFY_LINEAR 0x02000000
+#define NV10TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST 0x03000000
+#define NV10TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST 0x04000000
+#define NV10TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR 0x05000000
+#define NV10TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR 0x06000000
+#define NV10TCL_TX_FILTER_MAGNIFY_SHIFT 28
+#define NV10TCL_TX_FILTER_MAGNIFY_MASK 0xf0000000
+#define NV10TCL_TX_FILTER_MAGNIFY_NEAREST 0x10000000
+#define NV10TCL_TX_FILTER_MAGNIFY_LINEAR 0x20000000
+#define NV10TCL_TX_PALETTE_OFFSET(x) (0x00000250+((x)*4))
+#define NV10TCL_TX_PALETTE_OFFSET__SIZE 0x00000002
+#define NV10TCL_RC_IN_ALPHA(x) (0x00000260+((x)*4))
+#define NV10TCL_RC_IN_ALPHA__SIZE 0x00000002
+#define NV10TCL_RC_IN_ALPHA_D_INPUT_SHIFT 0
+#define NV10TCL_RC_IN_ALPHA_D_INPUT_MASK 0x0000000f
+#define NV10TCL_RC_IN_ALPHA_D_INPUT_ZERO 0x00000000
+#define NV10TCL_RC_IN_ALPHA_D_INPUT_CONSTANT_COLOR0_NV 0x00000001
+#define NV10TCL_RC_IN_ALPHA_D_INPUT_CONSTANT_COLOR1_NV 0x00000002
+#define NV10TCL_RC_IN_ALPHA_D_INPUT_FOG 0x00000003
+#define NV10TCL_RC_IN_ALPHA_D_INPUT_PRIMARY_COLOR_NV 0x00000004
+#define NV10TCL_RC_IN_ALPHA_D_INPUT_SECONDARY_COLOR_NV 0x00000005
+#define NV10TCL_RC_IN_ALPHA_D_INPUT_TEXTURE0_ARB 0x00000008
+#define NV10TCL_RC_IN_ALPHA_D_INPUT_TEXTURE1_ARB 0x00000009
+#define NV10TCL_RC_IN_ALPHA_D_INPUT_SPARE0_NV 0x0000000c
+#define NV10TCL_RC_IN_ALPHA_D_INPUT_SPARE1_NV 0x0000000d
+#define NV10TCL_RC_IN_ALPHA_D_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0000000e
+#define NV10TCL_RC_IN_ALPHA_D_INPUT_E_TIMES_F_NV 0x0000000f
+#define NV10TCL_RC_IN_ALPHA_D_COMPONENT_USAGE (1 << 4)
+#define NV10TCL_RC_IN_ALPHA_D_COMPONENT_USAGE_BLUE 0x00000000
+#define NV10TCL_RC_IN_ALPHA_D_COMPONENT_USAGE_ALPHA 0x00000010
+#define NV10TCL_RC_IN_ALPHA_D_MAPPING_SHIFT 5
+#define NV10TCL_RC_IN_ALPHA_D_MAPPING_MASK 0x000000e0
+#define NV10TCL_RC_IN_ALPHA_D_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV10TCL_RC_IN_ALPHA_D_MAPPING_UNSIGNED_INVERT_NV 0x00000020
+#define NV10TCL_RC_IN_ALPHA_D_MAPPING_EXPAND_NORMAL_NV 0x00000040
+#define NV10TCL_RC_IN_ALPHA_D_MAPPING_EXPAND_NEGATE_NV 0x00000060
+#define NV10TCL_RC_IN_ALPHA_D_MAPPING_HALF_BIAS_NORMAL_NV 0x00000080
+#define NV10TCL_RC_IN_ALPHA_D_MAPPING_HALF_BIAS_NEGATE_NV 0x000000a0
+#define NV10TCL_RC_IN_ALPHA_D_MAPPING_SIGNED_IDENTITY_NV 0x000000c0
+#define NV10TCL_RC_IN_ALPHA_D_MAPPING_SIGNED_NEGATE_NV 0x000000e0
+#define NV10TCL_RC_IN_ALPHA_C_INPUT_SHIFT 8
+#define NV10TCL_RC_IN_ALPHA_C_INPUT_MASK 0x00000f00
+#define NV10TCL_RC_IN_ALPHA_C_INPUT_ZERO 0x00000000
+#define NV10TCL_RC_IN_ALPHA_C_INPUT_CONSTANT_COLOR0_NV 0x00000100
+#define NV10TCL_RC_IN_ALPHA_C_INPUT_CONSTANT_COLOR1_NV 0x00000200
+#define NV10TCL_RC_IN_ALPHA_C_INPUT_FOG 0x00000300
+#define NV10TCL_RC_IN_ALPHA_C_INPUT_PRIMARY_COLOR_NV 0x00000400
+#define NV10TCL_RC_IN_ALPHA_C_INPUT_SECONDARY_COLOR_NV 0x00000500
+#define NV10TCL_RC_IN_ALPHA_C_INPUT_TEXTURE0_ARB 0x00000800
+#define NV10TCL_RC_IN_ALPHA_C_INPUT_TEXTURE1_ARB 0x00000900
+#define NV10TCL_RC_IN_ALPHA_C_INPUT_SPARE0_NV 0x00000c00
+#define NV10TCL_RC_IN_ALPHA_C_INPUT_SPARE1_NV 0x00000d00
+#define NV10TCL_RC_IN_ALPHA_C_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x00000e00
+#define NV10TCL_RC_IN_ALPHA_C_INPUT_E_TIMES_F_NV 0x00000f00
+#define NV10TCL_RC_IN_ALPHA_C_COMPONENT_USAGE (1 << 12)
+#define NV10TCL_RC_IN_ALPHA_C_COMPONENT_USAGE_BLUE 0x00000000
+#define NV10TCL_RC_IN_ALPHA_C_COMPONENT_USAGE_ALPHA 0x00001000
+#define NV10TCL_RC_IN_ALPHA_C_MAPPING_SHIFT 13
+#define NV10TCL_RC_IN_ALPHA_C_MAPPING_MASK 0x0000e000
+#define NV10TCL_RC_IN_ALPHA_C_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV10TCL_RC_IN_ALPHA_C_MAPPING_UNSIGNED_INVERT_NV 0x00002000
+#define NV10TCL_RC_IN_ALPHA_C_MAPPING_EXPAND_NORMAL_NV 0x00004000
+#define NV10TCL_RC_IN_ALPHA_C_MAPPING_EXPAND_NEGATE_NV 0x00006000
+#define NV10TCL_RC_IN_ALPHA_C_MAPPING_HALF_BIAS_NORMAL_NV 0x00008000
+#define NV10TCL_RC_IN_ALPHA_C_MAPPING_HALF_BIAS_NEGATE_NV 0x0000a000
+#define NV10TCL_RC_IN_ALPHA_C_MAPPING_SIGNED_IDENTITY_NV 0x0000c000
+#define NV10TCL_RC_IN_ALPHA_C_MAPPING_SIGNED_NEGATE_NV 0x0000e000
+#define NV10TCL_RC_IN_ALPHA_B_INPUT_SHIFT 16
+#define NV10TCL_RC_IN_ALPHA_B_INPUT_MASK 0x000f0000
+#define NV10TCL_RC_IN_ALPHA_B_INPUT_ZERO 0x00000000
+#define NV10TCL_RC_IN_ALPHA_B_INPUT_CONSTANT_COLOR0_NV 0x00010000
+#define NV10TCL_RC_IN_ALPHA_B_INPUT_CONSTANT_COLOR1_NV 0x00020000
+#define NV10TCL_RC_IN_ALPHA_B_INPUT_FOG 0x00030000
+#define NV10TCL_RC_IN_ALPHA_B_INPUT_PRIMARY_COLOR_NV 0x00040000
+#define NV10TCL_RC_IN_ALPHA_B_INPUT_SECONDARY_COLOR_NV 0x00050000
+#define NV10TCL_RC_IN_ALPHA_B_INPUT_TEXTURE0_ARB 0x00080000
+#define NV10TCL_RC_IN_ALPHA_B_INPUT_TEXTURE1_ARB 0x00090000
+#define NV10TCL_RC_IN_ALPHA_B_INPUT_SPARE0_NV 0x000c0000
+#define NV10TCL_RC_IN_ALPHA_B_INPUT_SPARE1_NV 0x000d0000
+#define NV10TCL_RC_IN_ALPHA_B_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x000e0000
+#define NV10TCL_RC_IN_ALPHA_B_INPUT_E_TIMES_F_NV 0x000f0000
+#define NV10TCL_RC_IN_ALPHA_B_COMPONENT_USAGE (1 << 20)
+#define NV10TCL_RC_IN_ALPHA_B_COMPONENT_USAGE_BLUE 0x00000000
+#define NV10TCL_RC_IN_ALPHA_B_COMPONENT_USAGE_ALPHA 0x00100000
+#define NV10TCL_RC_IN_ALPHA_B_MAPPING_SHIFT 21
+#define NV10TCL_RC_IN_ALPHA_B_MAPPING_MASK 0x00e00000
+#define NV10TCL_RC_IN_ALPHA_B_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV10TCL_RC_IN_ALPHA_B_MAPPING_UNSIGNED_INVERT_NV 0x00200000
+#define NV10TCL_RC_IN_ALPHA_B_MAPPING_EXPAND_NORMAL_NV 0x00400000
+#define NV10TCL_RC_IN_ALPHA_B_MAPPING_EXPAND_NEGATE_NV 0x00600000
+#define NV10TCL_RC_IN_ALPHA_B_MAPPING_HALF_BIAS_NORMAL_NV 0x00800000
+#define NV10TCL_RC_IN_ALPHA_B_MAPPING_HALF_BIAS_NEGATE_NV 0x00a00000
+#define NV10TCL_RC_IN_ALPHA_B_MAPPING_SIGNED_IDENTITY_NV 0x00c00000
+#define NV10TCL_RC_IN_ALPHA_B_MAPPING_SIGNED_NEGATE_NV 0x00e00000
+#define NV10TCL_RC_IN_ALPHA_A_INPUT_SHIFT 24
+#define NV10TCL_RC_IN_ALPHA_A_INPUT_MASK 0x0f000000
+#define NV10TCL_RC_IN_ALPHA_A_INPUT_ZERO 0x00000000
+#define NV10TCL_RC_IN_ALPHA_A_INPUT_CONSTANT_COLOR0_NV 0x01000000
+#define NV10TCL_RC_IN_ALPHA_A_INPUT_CONSTANT_COLOR1_NV 0x02000000
+#define NV10TCL_RC_IN_ALPHA_A_INPUT_FOG 0x03000000
+#define NV10TCL_RC_IN_ALPHA_A_INPUT_PRIMARY_COLOR_NV 0x04000000
+#define NV10TCL_RC_IN_ALPHA_A_INPUT_SECONDARY_COLOR_NV 0x05000000
+#define NV10TCL_RC_IN_ALPHA_A_INPUT_TEXTURE0_ARB 0x08000000
+#define NV10TCL_RC_IN_ALPHA_A_INPUT_TEXTURE1_ARB 0x09000000
+#define NV10TCL_RC_IN_ALPHA_A_INPUT_SPARE0_NV 0x0c000000
+#define NV10TCL_RC_IN_ALPHA_A_INPUT_SPARE1_NV 0x0d000000
+#define NV10TCL_RC_IN_ALPHA_A_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0e000000
+#define NV10TCL_RC_IN_ALPHA_A_INPUT_E_TIMES_F_NV 0x0f000000
+#define NV10TCL_RC_IN_ALPHA_A_COMPONENT_USAGE (1 << 28)
+#define NV10TCL_RC_IN_ALPHA_A_COMPONENT_USAGE_BLUE 0x00000000
+#define NV10TCL_RC_IN_ALPHA_A_COMPONENT_USAGE_ALPHA 0x10000000
+#define NV10TCL_RC_IN_ALPHA_A_MAPPING_SHIFT 29
+#define NV10TCL_RC_IN_ALPHA_A_MAPPING_MASK 0xe0000000
+#define NV10TCL_RC_IN_ALPHA_A_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV10TCL_RC_IN_ALPHA_A_MAPPING_UNSIGNED_INVERT_NV 0x20000000
+#define NV10TCL_RC_IN_ALPHA_A_MAPPING_EXPAND_NORMAL_NV 0x40000000
+#define NV10TCL_RC_IN_ALPHA_A_MAPPING_EXPAND_NEGATE_NV 0x60000000
+#define NV10TCL_RC_IN_ALPHA_A_MAPPING_HALF_BIAS_NORMAL_NV 0x80000000
+#define NV10TCL_RC_IN_ALPHA_A_MAPPING_HALF_BIAS_NEGATE_NV 0xa0000000
+#define NV10TCL_RC_IN_ALPHA_A_MAPPING_SIGNED_IDENTITY_NV 0xc0000000
+#define NV10TCL_RC_IN_ALPHA_A_MAPPING_SIGNED_NEGATE_NV 0xe0000000
+#define NV10TCL_RC_IN_RGB(x) (0x00000268+((x)*4))
+#define NV10TCL_RC_IN_RGB__SIZE 0x00000002
+#define NV10TCL_RC_IN_RGB_D_INPUT_SHIFT 0
+#define NV10TCL_RC_IN_RGB_D_INPUT_MASK 0x0000000f
+#define NV10TCL_RC_IN_RGB_D_INPUT_ZERO 0x00000000
+#define NV10TCL_RC_IN_RGB_D_INPUT_CONSTANT_COLOR0_NV 0x00000001
+#define NV10TCL_RC_IN_RGB_D_INPUT_CONSTANT_COLOR1_NV 0x00000002
+#define NV10TCL_RC_IN_RGB_D_INPUT_FOG 0x00000003
+#define NV10TCL_RC_IN_RGB_D_INPUT_PRIMARY_COLOR_NV 0x00000004
+#define NV10TCL_RC_IN_RGB_D_INPUT_SECONDARY_COLOR_NV 0x00000005
+#define NV10TCL_RC_IN_RGB_D_INPUT_TEXTURE0_ARB 0x00000008
+#define NV10TCL_RC_IN_RGB_D_INPUT_TEXTURE1_ARB 0x00000009
+#define NV10TCL_RC_IN_RGB_D_INPUT_SPARE0_NV 0x0000000c
+#define NV10TCL_RC_IN_RGB_D_INPUT_SPARE1_NV 0x0000000d
+#define NV10TCL_RC_IN_RGB_D_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0000000e
+#define NV10TCL_RC_IN_RGB_D_INPUT_E_TIMES_F_NV 0x0000000f
+#define NV10TCL_RC_IN_RGB_D_COMPONENT_USAGE (1 << 4)
+#define NV10TCL_RC_IN_RGB_D_COMPONENT_USAGE_RGB 0x00000000
+#define NV10TCL_RC_IN_RGB_D_COMPONENT_USAGE_ALPHA 0x00000010
+#define NV10TCL_RC_IN_RGB_D_MAPPING_SHIFT 5
+#define NV10TCL_RC_IN_RGB_D_MAPPING_MASK 0x000000e0
+#define NV10TCL_RC_IN_RGB_D_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV10TCL_RC_IN_RGB_D_MAPPING_UNSIGNED_INVERT_NV 0x00000020
+#define NV10TCL_RC_IN_RGB_D_MAPPING_EXPAND_NORMAL_NV 0x00000040
+#define NV10TCL_RC_IN_RGB_D_MAPPING_EXPAND_NEGATE_NV 0x00000060
+#define NV10TCL_RC_IN_RGB_D_MAPPING_HALF_BIAS_NORMAL_NV 0x00000080
+#define NV10TCL_RC_IN_RGB_D_MAPPING_HALF_BIAS_NEGATE_NV 0x000000a0
+#define NV10TCL_RC_IN_RGB_D_MAPPING_SIGNED_IDENTITY_NV 0x000000c0
+#define NV10TCL_RC_IN_RGB_D_MAPPING_SIGNED_NEGATE_NV 0x000000e0
+#define NV10TCL_RC_IN_RGB_C_INPUT_SHIFT 8
+#define NV10TCL_RC_IN_RGB_C_INPUT_MASK 0x00000f00
+#define NV10TCL_RC_IN_RGB_C_INPUT_ZERO 0x00000000
+#define NV10TCL_RC_IN_RGB_C_INPUT_CONSTANT_COLOR0_NV 0x00000100
+#define NV10TCL_RC_IN_RGB_C_INPUT_CONSTANT_COLOR1_NV 0x00000200
+#define NV10TCL_RC_IN_RGB_C_INPUT_FOG 0x00000300
+#define NV10TCL_RC_IN_RGB_C_INPUT_PRIMARY_COLOR_NV 0x00000400
+#define NV10TCL_RC_IN_RGB_C_INPUT_SECONDARY_COLOR_NV 0x00000500
+#define NV10TCL_RC_IN_RGB_C_INPUT_TEXTURE0_ARB 0x00000800
+#define NV10TCL_RC_IN_RGB_C_INPUT_TEXTURE1_ARB 0x00000900
+#define NV10TCL_RC_IN_RGB_C_INPUT_SPARE0_NV 0x00000c00
+#define NV10TCL_RC_IN_RGB_C_INPUT_SPARE1_NV 0x00000d00
+#define NV10TCL_RC_IN_RGB_C_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x00000e00
+#define NV10TCL_RC_IN_RGB_C_INPUT_E_TIMES_F_NV 0x00000f00
+#define NV10TCL_RC_IN_RGB_C_COMPONENT_USAGE (1 << 12)
+#define NV10TCL_RC_IN_RGB_C_COMPONENT_USAGE_RGB 0x00000000
+#define NV10TCL_RC_IN_RGB_C_COMPONENT_USAGE_ALPHA 0x00001000
+#define NV10TCL_RC_IN_RGB_C_MAPPING_SHIFT 13
+#define NV10TCL_RC_IN_RGB_C_MAPPING_MASK 0x0000e000
+#define NV10TCL_RC_IN_RGB_C_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV10TCL_RC_IN_RGB_C_MAPPING_UNSIGNED_INVERT_NV 0x00002000
+#define NV10TCL_RC_IN_RGB_C_MAPPING_EXPAND_NORMAL_NV 0x00004000
+#define NV10TCL_RC_IN_RGB_C_MAPPING_EXPAND_NEGATE_NV 0x00006000
+#define NV10TCL_RC_IN_RGB_C_MAPPING_HALF_BIAS_NORMAL_NV 0x00008000
+#define NV10TCL_RC_IN_RGB_C_MAPPING_HALF_BIAS_NEGATE_NV 0x0000a000
+#define NV10TCL_RC_IN_RGB_C_MAPPING_SIGNED_IDENTITY_NV 0x0000c000
+#define NV10TCL_RC_IN_RGB_C_MAPPING_SIGNED_NEGATE_NV 0x0000e000
+#define NV10TCL_RC_IN_RGB_B_INPUT_SHIFT 16
+#define NV10TCL_RC_IN_RGB_B_INPUT_MASK 0x000f0000
+#define NV10TCL_RC_IN_RGB_B_INPUT_ZERO 0x00000000
+#define NV10TCL_RC_IN_RGB_B_INPUT_CONSTANT_COLOR0_NV 0x00010000
+#define NV10TCL_RC_IN_RGB_B_INPUT_CONSTANT_COLOR1_NV 0x00020000
+#define NV10TCL_RC_IN_RGB_B_INPUT_FOG 0x00030000
+#define NV10TCL_RC_IN_RGB_B_INPUT_PRIMARY_COLOR_NV 0x00040000
+#define NV10TCL_RC_IN_RGB_B_INPUT_SECONDARY_COLOR_NV 0x00050000
+#define NV10TCL_RC_IN_RGB_B_INPUT_TEXTURE0_ARB 0x00080000
+#define NV10TCL_RC_IN_RGB_B_INPUT_TEXTURE1_ARB 0x00090000
+#define NV10TCL_RC_IN_RGB_B_INPUT_SPARE0_NV 0x000c0000
+#define NV10TCL_RC_IN_RGB_B_INPUT_SPARE1_NV 0x000d0000
+#define NV10TCL_RC_IN_RGB_B_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x000e0000
+#define NV10TCL_RC_IN_RGB_B_INPUT_E_TIMES_F_NV 0x000f0000
+#define NV10TCL_RC_IN_RGB_B_COMPONENT_USAGE (1 << 20)
+#define NV10TCL_RC_IN_RGB_B_COMPONENT_USAGE_RGB 0x00000000
+#define NV10TCL_RC_IN_RGB_B_COMPONENT_USAGE_ALPHA 0x00100000
+#define NV10TCL_RC_IN_RGB_B_MAPPING_SHIFT 21
+#define NV10TCL_RC_IN_RGB_B_MAPPING_MASK 0x00e00000
+#define NV10TCL_RC_IN_RGB_B_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV10TCL_RC_IN_RGB_B_MAPPING_UNSIGNED_INVERT_NV 0x00200000
+#define NV10TCL_RC_IN_RGB_B_MAPPING_EXPAND_NORMAL_NV 0x00400000
+#define NV10TCL_RC_IN_RGB_B_MAPPING_EXPAND_NEGATE_NV 0x00600000
+#define NV10TCL_RC_IN_RGB_B_MAPPING_HALF_BIAS_NORMAL_NV 0x00800000
+#define NV10TCL_RC_IN_RGB_B_MAPPING_HALF_BIAS_NEGATE_NV 0x00a00000
+#define NV10TCL_RC_IN_RGB_B_MAPPING_SIGNED_IDENTITY_NV 0x00c00000
+#define NV10TCL_RC_IN_RGB_B_MAPPING_SIGNED_NEGATE_NV 0x00e00000
+#define NV10TCL_RC_IN_RGB_A_INPUT_SHIFT 24
+#define NV10TCL_RC_IN_RGB_A_INPUT_MASK 0x0f000000
+#define NV10TCL_RC_IN_RGB_A_INPUT_ZERO 0x00000000
+#define NV10TCL_RC_IN_RGB_A_INPUT_CONSTANT_COLOR0_NV 0x01000000
+#define NV10TCL_RC_IN_RGB_A_INPUT_CONSTANT_COLOR1_NV 0x02000000
+#define NV10TCL_RC_IN_RGB_A_INPUT_FOG 0x03000000
+#define NV10TCL_RC_IN_RGB_A_INPUT_PRIMARY_COLOR_NV 0x04000000
+#define NV10TCL_RC_IN_RGB_A_INPUT_SECONDARY_COLOR_NV 0x05000000
+#define NV10TCL_RC_IN_RGB_A_INPUT_TEXTURE0_ARB 0x08000000
+#define NV10TCL_RC_IN_RGB_A_INPUT_TEXTURE1_ARB 0x09000000
+#define NV10TCL_RC_IN_RGB_A_INPUT_SPARE0_NV 0x0c000000
+#define NV10TCL_RC_IN_RGB_A_INPUT_SPARE1_NV 0x0d000000
+#define NV10TCL_RC_IN_RGB_A_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0e000000
+#define NV10TCL_RC_IN_RGB_A_INPUT_E_TIMES_F_NV 0x0f000000
+#define NV10TCL_RC_IN_RGB_A_COMPONENT_USAGE (1 << 28)
+#define NV10TCL_RC_IN_RGB_A_COMPONENT_USAGE_RGB 0x00000000
+#define NV10TCL_RC_IN_RGB_A_COMPONENT_USAGE_ALPHA 0x10000000
+#define NV10TCL_RC_IN_RGB_A_MAPPING_SHIFT 29
+#define NV10TCL_RC_IN_RGB_A_MAPPING_MASK 0xe0000000
+#define NV10TCL_RC_IN_RGB_A_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV10TCL_RC_IN_RGB_A_MAPPING_UNSIGNED_INVERT_NV 0x20000000
+#define NV10TCL_RC_IN_RGB_A_MAPPING_EXPAND_NORMAL_NV 0x40000000
+#define NV10TCL_RC_IN_RGB_A_MAPPING_EXPAND_NEGATE_NV 0x60000000
+#define NV10TCL_RC_IN_RGB_A_MAPPING_HALF_BIAS_NORMAL_NV 0x80000000
+#define NV10TCL_RC_IN_RGB_A_MAPPING_HALF_BIAS_NEGATE_NV 0xa0000000
+#define NV10TCL_RC_IN_RGB_A_MAPPING_SIGNED_IDENTITY_NV 0xc0000000
+#define NV10TCL_RC_IN_RGB_A_MAPPING_SIGNED_NEGATE_NV 0xe0000000
+#define NV10TCL_RC_COLOR(x) (0x00000270+((x)*4))
+#define NV10TCL_RC_COLOR__SIZE 0x00000002
+#define NV10TCL_RC_COLOR_B_SHIFT 0
+#define NV10TCL_RC_COLOR_B_MASK 0x000000ff
+#define NV10TCL_RC_COLOR_G_SHIFT 8
+#define NV10TCL_RC_COLOR_G_MASK 0x0000ff00
+#define NV10TCL_RC_COLOR_R_SHIFT 16
+#define NV10TCL_RC_COLOR_R_MASK 0x00ff0000
+#define NV10TCL_RC_COLOR_A_SHIFT 24
+#define NV10TCL_RC_COLOR_A_MASK 0xff000000
+#define NV10TCL_RC_OUT_ALPHA(x) (0x00000278+((x)*4))
+#define NV10TCL_RC_OUT_ALPHA__SIZE 0x00000002
+#define NV10TCL_RC_OUT_ALPHA_CD_OUTPUT_SHIFT 0
+#define NV10TCL_RC_OUT_ALPHA_CD_OUTPUT_MASK 0x0000000f
+#define NV10TCL_RC_OUT_ALPHA_CD_OUTPUT_ZERO 0x00000000
+#define NV10TCL_RC_OUT_ALPHA_CD_OUTPUT_CONSTANT_COLOR0_NV 0x00000001
+#define NV10TCL_RC_OUT_ALPHA_CD_OUTPUT_CONSTANT_COLOR1_NV 0x00000002
+#define NV10TCL_RC_OUT_ALPHA_CD_OUTPUT_FOG 0x00000003
+#define NV10TCL_RC_OUT_ALPHA_CD_OUTPUT_PRIMARY_COLOR_NV 0x00000004
+#define NV10TCL_RC_OUT_ALPHA_CD_OUTPUT_SECONDARY_COLOR_NV 0x00000005
+#define NV10TCL_RC_OUT_ALPHA_CD_OUTPUT_TEXTURE0_ARB 0x00000008
+#define NV10TCL_RC_OUT_ALPHA_CD_OUTPUT_TEXTURE1_ARB 0x00000009
+#define NV10TCL_RC_OUT_ALPHA_CD_OUTPUT_SPARE0_NV 0x0000000c
+#define NV10TCL_RC_OUT_ALPHA_CD_OUTPUT_SPARE1_NV 0x0000000d
+#define NV10TCL_RC_OUT_ALPHA_CD_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0000000e
+#define NV10TCL_RC_OUT_ALPHA_CD_OUTPUT_E_TIMES_F_NV 0x0000000f
+#define NV10TCL_RC_OUT_ALPHA_AB_OUTPUT_SHIFT 4
+#define NV10TCL_RC_OUT_ALPHA_AB_OUTPUT_MASK 0x000000f0
+#define NV10TCL_RC_OUT_ALPHA_AB_OUTPUT_ZERO 0x00000000
+#define NV10TCL_RC_OUT_ALPHA_AB_OUTPUT_CONSTANT_COLOR0_NV 0x00000010
+#define NV10TCL_RC_OUT_ALPHA_AB_OUTPUT_CONSTANT_COLOR1_NV 0x00000020
+#define NV10TCL_RC_OUT_ALPHA_AB_OUTPUT_FOG 0x00000030
+#define NV10TCL_RC_OUT_ALPHA_AB_OUTPUT_PRIMARY_COLOR_NV 0x00000040
+#define NV10TCL_RC_OUT_ALPHA_AB_OUTPUT_SECONDARY_COLOR_NV 0x00000050
+#define NV10TCL_RC_OUT_ALPHA_AB_OUTPUT_TEXTURE0_ARB 0x00000080
+#define NV10TCL_RC_OUT_ALPHA_AB_OUTPUT_TEXTURE1_ARB 0x00000090
+#define NV10TCL_RC_OUT_ALPHA_AB_OUTPUT_SPARE0_NV 0x000000c0
+#define NV10TCL_RC_OUT_ALPHA_AB_OUTPUT_SPARE1_NV 0x000000d0
+#define NV10TCL_RC_OUT_ALPHA_AB_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x000000e0
+#define NV10TCL_RC_OUT_ALPHA_AB_OUTPUT_E_TIMES_F_NV 0x000000f0
+#define NV10TCL_RC_OUT_ALPHA_SUM_OUTPUT_SHIFT 8
+#define NV10TCL_RC_OUT_ALPHA_SUM_OUTPUT_MASK 0x00000f00
+#define NV10TCL_RC_OUT_ALPHA_SUM_OUTPUT_ZERO 0x00000000
+#define NV10TCL_RC_OUT_ALPHA_SUM_OUTPUT_CONSTANT_COLOR0_NV 0x00000100
+#define NV10TCL_RC_OUT_ALPHA_SUM_OUTPUT_CONSTANT_COLOR1_NV 0x00000200
+#define NV10TCL_RC_OUT_ALPHA_SUM_OUTPUT_FOG 0x00000300
+#define NV10TCL_RC_OUT_ALPHA_SUM_OUTPUT_PRIMARY_COLOR_NV 0x00000400
+#define NV10TCL_RC_OUT_ALPHA_SUM_OUTPUT_SECONDARY_COLOR_NV 0x00000500
+#define NV10TCL_RC_OUT_ALPHA_SUM_OUTPUT_TEXTURE0_ARB 0x00000800
+#define NV10TCL_RC_OUT_ALPHA_SUM_OUTPUT_TEXTURE1_ARB 0x00000900
+#define NV10TCL_RC_OUT_ALPHA_SUM_OUTPUT_SPARE0_NV 0x00000c00
+#define NV10TCL_RC_OUT_ALPHA_SUM_OUTPUT_SPARE1_NV 0x00000d00
+#define NV10TCL_RC_OUT_ALPHA_SUM_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x00000e00
+#define NV10TCL_RC_OUT_ALPHA_SUM_OUTPUT_E_TIMES_F_NV 0x00000f00
+#define NV10TCL_RC_OUT_ALPHA_CD_DOT_PRODUCT (1 << 12)
+#define NV10TCL_RC_OUT_ALPHA_AB_DOT_PRODUCT (1 << 13)
+#define NV10TCL_RC_OUT_ALPHA_MUX_SUM (1 << 14)
+#define NV10TCL_RC_OUT_ALPHA_BIAS (1 << 15)
+#define NV10TCL_RC_OUT_ALPHA_BIAS_NONE 0x00000000
+#define NV10TCL_RC_OUT_ALPHA_BIAS_BIAS_BY_NEGATIVE_ONE_HALF_NV 0x00008000
+#define NV10TCL_RC_OUT_ALPHA_SCALE_SHIFT 17
+#define NV10TCL_RC_OUT_ALPHA_SCALE_MASK 0x00000000
+#define NV10TCL_RC_OUT_ALPHA_SCALE_NONE 0x00000000
+#define NV10TCL_RC_OUT_ALPHA_SCALE_SCALE_BY_TWO_NV 0x00020000
+#define NV10TCL_RC_OUT_ALPHA_SCALE_SCALE_BY_FOUR_NV 0x00040000
+#define NV10TCL_RC_OUT_ALPHA_SCALE_SCALE_BY_ONE_HALF_NV 0x00060000
+#define NV10TCL_RC_OUT_RGB(x) (0x00000280+((x)*4))
+#define NV10TCL_RC_OUT_RGB__SIZE 0x00000002
+#define NV10TCL_RC_OUT_RGB_CD_OUTPUT_SHIFT 0
+#define NV10TCL_RC_OUT_RGB_CD_OUTPUT_MASK 0x0000000f
+#define NV10TCL_RC_OUT_RGB_CD_OUTPUT_ZERO 0x00000000
+#define NV10TCL_RC_OUT_RGB_CD_OUTPUT_CONSTANT_COLOR0_NV 0x00000001
+#define NV10TCL_RC_OUT_RGB_CD_OUTPUT_CONSTANT_COLOR1_NV 0x00000002
+#define NV10TCL_RC_OUT_RGB_CD_OUTPUT_FOG 0x00000003
+#define NV10TCL_RC_OUT_RGB_CD_OUTPUT_PRIMARY_COLOR_NV 0x00000004
+#define NV10TCL_RC_OUT_RGB_CD_OUTPUT_SECONDARY_COLOR_NV 0x00000005
+#define NV10TCL_RC_OUT_RGB_CD_OUTPUT_TEXTURE0_ARB 0x00000008
+#define NV10TCL_RC_OUT_RGB_CD_OUTPUT_TEXTURE1_ARB 0x00000009
+#define NV10TCL_RC_OUT_RGB_CD_OUTPUT_SPARE0_NV 0x0000000c
+#define NV10TCL_RC_OUT_RGB_CD_OUTPUT_SPARE1_NV 0x0000000d
+#define NV10TCL_RC_OUT_RGB_CD_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0000000e
+#define NV10TCL_RC_OUT_RGB_CD_OUTPUT_E_TIMES_F_NV 0x0000000f
+#define NV10TCL_RC_OUT_RGB_AB_OUTPUT_SHIFT 4
+#define NV10TCL_RC_OUT_RGB_AB_OUTPUT_MASK 0x000000f0
+#define NV10TCL_RC_OUT_RGB_AB_OUTPUT_ZERO 0x00000000
+#define NV10TCL_RC_OUT_RGB_AB_OUTPUT_CONSTANT_COLOR0_NV 0x00000010
+#define NV10TCL_RC_OUT_RGB_AB_OUTPUT_CONSTANT_COLOR1_NV 0x00000020
+#define NV10TCL_RC_OUT_RGB_AB_OUTPUT_FOG 0x00000030
+#define NV10TCL_RC_OUT_RGB_AB_OUTPUT_PRIMARY_COLOR_NV 0x00000040
+#define NV10TCL_RC_OUT_RGB_AB_OUTPUT_SECONDARY_COLOR_NV 0x00000050
+#define NV10TCL_RC_OUT_RGB_AB_OUTPUT_TEXTURE0_ARB 0x00000080
+#define NV10TCL_RC_OUT_RGB_AB_OUTPUT_TEXTURE1_ARB 0x00000090
+#define NV10TCL_RC_OUT_RGB_AB_OUTPUT_SPARE0_NV 0x000000c0
+#define NV10TCL_RC_OUT_RGB_AB_OUTPUT_SPARE1_NV 0x000000d0
+#define NV10TCL_RC_OUT_RGB_AB_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x000000e0
+#define NV10TCL_RC_OUT_RGB_AB_OUTPUT_E_TIMES_F_NV 0x000000f0
+#define NV10TCL_RC_OUT_RGB_SUM_OUTPUT_SHIFT 8
+#define NV10TCL_RC_OUT_RGB_SUM_OUTPUT_MASK 0x00000f00
+#define NV10TCL_RC_OUT_RGB_SUM_OUTPUT_ZERO 0x00000000
+#define NV10TCL_RC_OUT_RGB_SUM_OUTPUT_CONSTANT_COLOR0_NV 0x00000100
+#define NV10TCL_RC_OUT_RGB_SUM_OUTPUT_CONSTANT_COLOR1_NV 0x00000200
+#define NV10TCL_RC_OUT_RGB_SUM_OUTPUT_FOG 0x00000300
+#define NV10TCL_RC_OUT_RGB_SUM_OUTPUT_PRIMARY_COLOR_NV 0x00000400
+#define NV10TCL_RC_OUT_RGB_SUM_OUTPUT_SECONDARY_COLOR_NV 0x00000500
+#define NV10TCL_RC_OUT_RGB_SUM_OUTPUT_TEXTURE0_ARB 0x00000800
+#define NV10TCL_RC_OUT_RGB_SUM_OUTPUT_TEXTURE1_ARB 0x00000900
+#define NV10TCL_RC_OUT_RGB_SUM_OUTPUT_SPARE0_NV 0x00000c00
+#define NV10TCL_RC_OUT_RGB_SUM_OUTPUT_SPARE1_NV 0x00000d00
+#define NV10TCL_RC_OUT_RGB_SUM_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x00000e00
+#define NV10TCL_RC_OUT_RGB_SUM_OUTPUT_E_TIMES_F_NV 0x00000f00
+#define NV10TCL_RC_OUT_RGB_CD_DOT_PRODUCT (1 << 12)
+#define NV10TCL_RC_OUT_RGB_AB_DOT_PRODUCT (1 << 13)
+#define NV10TCL_RC_OUT_RGB_MUX_SUM (1 << 14)
+#define NV10TCL_RC_OUT_RGB_BIAS (1 << 15)
+#define NV10TCL_RC_OUT_RGB_BIAS_NONE 0x00000000
+#define NV10TCL_RC_OUT_RGB_BIAS_BIAS_BY_NEGATIVE_ONE_HALF_NV 0x00008000
+#define NV10TCL_RC_OUT_RGB_SCALE_SHIFT 17
+#define NV10TCL_RC_OUT_RGB_SCALE_MASK 0x00000000
+#define NV10TCL_RC_OUT_RGB_SCALE_NONE 0x00000000
+#define NV10TCL_RC_OUT_RGB_SCALE_SCALE_BY_TWO_NV 0x00020000
+#define NV10TCL_RC_OUT_RGB_SCALE_SCALE_BY_FOUR_NV 0x00040000
+#define NV10TCL_RC_OUT_RGB_SCALE_SCALE_BY_ONE_HALF_NV 0x00060000
+#define NV10TCL_RC_OUT_RGB_OPERATION_SHIFT 27
+#define NV10TCL_RC_OUT_RGB_OPERATION_MASK 0x38000000
+#define NV10TCL_RC_FINAL0 0x00000288
+#define NV10TCL_RC_FINAL0_D_INPUT_SHIFT 0
+#define NV10TCL_RC_FINAL0_D_INPUT_MASK 0x0000000f
+#define NV10TCL_RC_FINAL0_D_INPUT_ZERO 0x00000000
+#define NV10TCL_RC_FINAL0_D_INPUT_CONSTANT_COLOR0_NV 0x00000001
+#define NV10TCL_RC_FINAL0_D_INPUT_CONSTANT_COLOR1_NV 0x00000002
+#define NV10TCL_RC_FINAL0_D_INPUT_FOG 0x00000003
+#define NV10TCL_RC_FINAL0_D_INPUT_PRIMARY_COLOR_NV 0x00000004
+#define NV10TCL_RC_FINAL0_D_INPUT_SECONDARY_COLOR_NV 0x00000005
+#define NV10TCL_RC_FINAL0_D_INPUT_TEXTURE0_ARB 0x00000008
+#define NV10TCL_RC_FINAL0_D_INPUT_TEXTURE1_ARB 0x00000009
+#define NV10TCL_RC_FINAL0_D_INPUT_SPARE0_NV 0x0000000c
+#define NV10TCL_RC_FINAL0_D_INPUT_SPARE1_NV 0x0000000d
+#define NV10TCL_RC_FINAL0_D_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0000000e
+#define NV10TCL_RC_FINAL0_D_INPUT_E_TIMES_F_NV 0x0000000f
+#define NV10TCL_RC_FINAL0_D_COMPONENT_USAGE (1 << 4)
+#define NV10TCL_RC_FINAL0_D_COMPONENT_USAGE_RGB 0x00000000
+#define NV10TCL_RC_FINAL0_D_COMPONENT_USAGE_ALPHA 0x00000010
+#define NV10TCL_RC_FINAL0_D_MAPPING_SHIFT 5
+#define NV10TCL_RC_FINAL0_D_MAPPING_MASK 0x000000e0
+#define NV10TCL_RC_FINAL0_D_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV10TCL_RC_FINAL0_D_MAPPING_UNSIGNED_INVERT_NV 0x00000020
+#define NV10TCL_RC_FINAL0_D_MAPPING_EXPAND_NORMAL_NV 0x00000040
+#define NV10TCL_RC_FINAL0_D_MAPPING_EXPAND_NEGATE_NV 0x00000060
+#define NV10TCL_RC_FINAL0_D_MAPPING_HALF_BIAS_NORMAL_NV 0x00000080
+#define NV10TCL_RC_FINAL0_D_MAPPING_HALF_BIAS_NEGATE_NV 0x000000a0
+#define NV10TCL_RC_FINAL0_D_MAPPING_SIGNED_IDENTITY_NV 0x000000c0
+#define NV10TCL_RC_FINAL0_D_MAPPING_SIGNED_NEGATE_NV 0x000000e0
+#define NV10TCL_RC_FINAL0_C_INPUT_SHIFT 8
+#define NV10TCL_RC_FINAL0_C_INPUT_MASK 0x00000f00
+#define NV10TCL_RC_FINAL0_C_INPUT_ZERO 0x00000000
+#define NV10TCL_RC_FINAL0_C_INPUT_CONSTANT_COLOR0_NV 0x00000100
+#define NV10TCL_RC_FINAL0_C_INPUT_CONSTANT_COLOR1_NV 0x00000200
+#define NV10TCL_RC_FINAL0_C_INPUT_FOG 0x00000300
+#define NV10TCL_RC_FINAL0_C_INPUT_PRIMARY_COLOR_NV 0x00000400
+#define NV10TCL_RC_FINAL0_C_INPUT_SECONDARY_COLOR_NV 0x00000500
+#define NV10TCL_RC_FINAL0_C_INPUT_TEXTURE0_ARB 0x00000800
+#define NV10TCL_RC_FINAL0_C_INPUT_TEXTURE1_ARB 0x00000900
+#define NV10TCL_RC_FINAL0_C_INPUT_SPARE0_NV 0x00000c00
+#define NV10TCL_RC_FINAL0_C_INPUT_SPARE1_NV 0x00000d00
+#define NV10TCL_RC_FINAL0_C_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x00000e00
+#define NV10TCL_RC_FINAL0_C_INPUT_E_TIMES_F_NV 0x00000f00
+#define NV10TCL_RC_FINAL0_C_COMPONENT_USAGE (1 << 12)
+#define NV10TCL_RC_FINAL0_C_COMPONENT_USAGE_RGB 0x00000000
+#define NV10TCL_RC_FINAL0_C_COMPONENT_USAGE_ALPHA 0x00001000
+#define NV10TCL_RC_FINAL0_C_MAPPING_SHIFT 13
+#define NV10TCL_RC_FINAL0_C_MAPPING_MASK 0x0000e000
+#define NV10TCL_RC_FINAL0_C_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV10TCL_RC_FINAL0_C_MAPPING_UNSIGNED_INVERT_NV 0x00002000
+#define NV10TCL_RC_FINAL0_C_MAPPING_EXPAND_NORMAL_NV 0x00004000
+#define NV10TCL_RC_FINAL0_C_MAPPING_EXPAND_NEGATE_NV 0x00006000
+#define NV10TCL_RC_FINAL0_C_MAPPING_HALF_BIAS_NORMAL_NV 0x00008000
+#define NV10TCL_RC_FINAL0_C_MAPPING_HALF_BIAS_NEGATE_NV 0x0000a000
+#define NV10TCL_RC_FINAL0_C_MAPPING_SIGNED_IDENTITY_NV 0x0000c000
+#define NV10TCL_RC_FINAL0_C_MAPPING_SIGNED_NEGATE_NV 0x0000e000
+#define NV10TCL_RC_FINAL0_B_INPUT_SHIFT 16
+#define NV10TCL_RC_FINAL0_B_INPUT_MASK 0x000f0000
+#define NV10TCL_RC_FINAL0_B_INPUT_ZERO 0x00000000
+#define NV10TCL_RC_FINAL0_B_INPUT_CONSTANT_COLOR0_NV 0x00010000
+#define NV10TCL_RC_FINAL0_B_INPUT_CONSTANT_COLOR1_NV 0x00020000
+#define NV10TCL_RC_FINAL0_B_INPUT_FOG 0x00030000
+#define NV10TCL_RC_FINAL0_B_INPUT_PRIMARY_COLOR_NV 0x00040000
+#define NV10TCL_RC_FINAL0_B_INPUT_SECONDARY_COLOR_NV 0x00050000
+#define NV10TCL_RC_FINAL0_B_INPUT_TEXTURE0_ARB 0x00080000
+#define NV10TCL_RC_FINAL0_B_INPUT_TEXTURE1_ARB 0x00090000
+#define NV10TCL_RC_FINAL0_B_INPUT_SPARE0_NV 0x000c0000
+#define NV10TCL_RC_FINAL0_B_INPUT_SPARE1_NV 0x000d0000
+#define NV10TCL_RC_FINAL0_B_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x000e0000
+#define NV10TCL_RC_FINAL0_B_INPUT_E_TIMES_F_NV 0x000f0000
+#define NV10TCL_RC_FINAL0_B_COMPONENT_USAGE (1 << 20)
+#define NV10TCL_RC_FINAL0_B_COMPONENT_USAGE_RGB 0x00000000
+#define NV10TCL_RC_FINAL0_B_COMPONENT_USAGE_ALPHA 0x00100000
+#define NV10TCL_RC_FINAL0_B_MAPPING_SHIFT 21
+#define NV10TCL_RC_FINAL0_B_MAPPING_MASK 0x00e00000
+#define NV10TCL_RC_FINAL0_B_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV10TCL_RC_FINAL0_B_MAPPING_UNSIGNED_INVERT_NV 0x00200000
+#define NV10TCL_RC_FINAL0_B_MAPPING_EXPAND_NORMAL_NV 0x00400000
+#define NV10TCL_RC_FINAL0_B_MAPPING_EXPAND_NEGATE_NV 0x00600000
+#define NV10TCL_RC_FINAL0_B_MAPPING_HALF_BIAS_NORMAL_NV 0x00800000
+#define NV10TCL_RC_FINAL0_B_MAPPING_HALF_BIAS_NEGATE_NV 0x00a00000
+#define NV10TCL_RC_FINAL0_B_MAPPING_SIGNED_IDENTITY_NV 0x00c00000
+#define NV10TCL_RC_FINAL0_B_MAPPING_SIGNED_NEGATE_NV 0x00e00000
+#define NV10TCL_RC_FINAL0_A_INPUT_SHIFT 24
+#define NV10TCL_RC_FINAL0_A_INPUT_MASK 0x0f000000
+#define NV10TCL_RC_FINAL0_A_INPUT_ZERO 0x00000000
+#define NV10TCL_RC_FINAL0_A_INPUT_CONSTANT_COLOR0_NV 0x01000000
+#define NV10TCL_RC_FINAL0_A_INPUT_CONSTANT_COLOR1_NV 0x02000000
+#define NV10TCL_RC_FINAL0_A_INPUT_FOG 0x03000000
+#define NV10TCL_RC_FINAL0_A_INPUT_PRIMARY_COLOR_NV 0x04000000
+#define NV10TCL_RC_FINAL0_A_INPUT_SECONDARY_COLOR_NV 0x05000000
+#define NV10TCL_RC_FINAL0_A_INPUT_TEXTURE0_ARB 0x08000000
+#define NV10TCL_RC_FINAL0_A_INPUT_TEXTURE1_ARB 0x09000000
+#define NV10TCL_RC_FINAL0_A_INPUT_SPARE0_NV 0x0c000000
+#define NV10TCL_RC_FINAL0_A_INPUT_SPARE1_NV 0x0d000000
+#define NV10TCL_RC_FINAL0_A_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0e000000
+#define NV10TCL_RC_FINAL0_A_INPUT_E_TIMES_F_NV 0x0f000000
+#define NV10TCL_RC_FINAL0_A_COMPONENT_USAGE (1 << 28)
+#define NV10TCL_RC_FINAL0_A_COMPONENT_USAGE_RGB 0x00000000
+#define NV10TCL_RC_FINAL0_A_COMPONENT_USAGE_ALPHA 0x10000000
+#define NV10TCL_RC_FINAL0_A_MAPPING_SHIFT 29
+#define NV10TCL_RC_FINAL0_A_MAPPING_MASK 0xe0000000
+#define NV10TCL_RC_FINAL0_A_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV10TCL_RC_FINAL0_A_MAPPING_UNSIGNED_INVERT_NV 0x20000000
+#define NV10TCL_RC_FINAL0_A_MAPPING_EXPAND_NORMAL_NV 0x40000000
+#define NV10TCL_RC_FINAL0_A_MAPPING_EXPAND_NEGATE_NV 0x60000000
+#define NV10TCL_RC_FINAL0_A_MAPPING_HALF_BIAS_NORMAL_NV 0x80000000
+#define NV10TCL_RC_FINAL0_A_MAPPING_HALF_BIAS_NEGATE_NV 0xa0000000
+#define NV10TCL_RC_FINAL0_A_MAPPING_SIGNED_IDENTITY_NV 0xc0000000
+#define NV10TCL_RC_FINAL0_A_MAPPING_SIGNED_NEGATE_NV 0xe0000000
+#define NV10TCL_RC_FINAL1 0x0000028c
+#define NV10TCL_RC_FINAL1_COLOR_SUM_CLAMP (1 << 7)
+#define NV10TCL_RC_FINAL1_G_INPUT_SHIFT 8
+#define NV10TCL_RC_FINAL1_G_INPUT_MASK 0x00000f00
+#define NV10TCL_RC_FINAL1_G_INPUT_ZERO 0x00000000
+#define NV10TCL_RC_FINAL1_G_INPUT_CONSTANT_COLOR0_NV 0x00000100
+#define NV10TCL_RC_FINAL1_G_INPUT_CONSTANT_COLOR1_NV 0x00000200
+#define NV10TCL_RC_FINAL1_G_INPUT_FOG 0x00000300
+#define NV10TCL_RC_FINAL1_G_INPUT_PRIMARY_COLOR_NV 0x00000400
+#define NV10TCL_RC_FINAL1_G_INPUT_SECONDARY_COLOR_NV 0x00000500
+#define NV10TCL_RC_FINAL1_G_INPUT_TEXTURE0_ARB 0x00000800
+#define NV10TCL_RC_FINAL1_G_INPUT_TEXTURE1_ARB 0x00000900
+#define NV10TCL_RC_FINAL1_G_INPUT_SPARE0_NV 0x00000c00
+#define NV10TCL_RC_FINAL1_G_INPUT_SPARE1_NV 0x00000d00
+#define NV10TCL_RC_FINAL1_G_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x00000e00
+#define NV10TCL_RC_FINAL1_G_INPUT_E_TIMES_F_NV 0x00000f00
+#define NV10TCL_RC_FINAL1_G_COMPONENT_USAGE (1 << 12)
+#define NV10TCL_RC_FINAL1_G_COMPONENT_USAGE_RGB 0x00000000
+#define NV10TCL_RC_FINAL1_G_COMPONENT_USAGE_ALPHA 0x00001000
+#define NV10TCL_RC_FINAL1_G_MAPPING_SHIFT 13
+#define NV10TCL_RC_FINAL1_G_MAPPING_MASK 0x0000e000
+#define NV10TCL_RC_FINAL1_G_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV10TCL_RC_FINAL1_G_MAPPING_UNSIGNED_INVERT_NV 0x00002000
+#define NV10TCL_RC_FINAL1_G_MAPPING_EXPAND_NORMAL_NV 0x00004000
+#define NV10TCL_RC_FINAL1_G_MAPPING_EXPAND_NEGATE_NV 0x00006000
+#define NV10TCL_RC_FINAL1_G_MAPPING_HALF_BIAS_NORMAL_NV 0x00008000
+#define NV10TCL_RC_FINAL1_G_MAPPING_HALF_BIAS_NEGATE_NV 0x0000a000
+#define NV10TCL_RC_FINAL1_G_MAPPING_SIGNED_IDENTITY_NV 0x0000c000
+#define NV10TCL_RC_FINAL1_G_MAPPING_SIGNED_NEGATE_NV 0x0000e000
+#define NV10TCL_RC_FINAL1_F_INPUT_SHIFT 16
+#define NV10TCL_RC_FINAL1_F_INPUT_MASK 0x000f0000
+#define NV10TCL_RC_FINAL1_F_INPUT_ZERO 0x00000000
+#define NV10TCL_RC_FINAL1_F_INPUT_CONSTANT_COLOR0_NV 0x00010000
+#define NV10TCL_RC_FINAL1_F_INPUT_CONSTANT_COLOR1_NV 0x00020000
+#define NV10TCL_RC_FINAL1_F_INPUT_FOG 0x00030000
+#define NV10TCL_RC_FINAL1_F_INPUT_PRIMARY_COLOR_NV 0x00040000
+#define NV10TCL_RC_FINAL1_F_INPUT_SECONDARY_COLOR_NV 0x00050000
+#define NV10TCL_RC_FINAL1_F_INPUT_TEXTURE0_ARB 0x00080000
+#define NV10TCL_RC_FINAL1_F_INPUT_TEXTURE1_ARB 0x00090000
+#define NV10TCL_RC_FINAL1_F_INPUT_SPARE0_NV 0x000c0000
+#define NV10TCL_RC_FINAL1_F_INPUT_SPARE1_NV 0x000d0000
+#define NV10TCL_RC_FINAL1_F_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x000e0000
+#define NV10TCL_RC_FINAL1_F_INPUT_E_TIMES_F_NV 0x000f0000
+#define NV10TCL_RC_FINAL1_F_COMPONENT_USAGE (1 << 20)
+#define NV10TCL_RC_FINAL1_F_COMPONENT_USAGE_RGB 0x00000000
+#define NV10TCL_RC_FINAL1_F_COMPONENT_USAGE_ALPHA 0x00100000
+#define NV10TCL_RC_FINAL1_F_MAPPING_SHIFT 21
+#define NV10TCL_RC_FINAL1_F_MAPPING_MASK 0x00e00000
+#define NV10TCL_RC_FINAL1_F_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV10TCL_RC_FINAL1_F_MAPPING_UNSIGNED_INVERT_NV 0x00200000
+#define NV10TCL_RC_FINAL1_F_MAPPING_EXPAND_NORMAL_NV 0x00400000
+#define NV10TCL_RC_FINAL1_F_MAPPING_EXPAND_NEGATE_NV 0x00600000
+#define NV10TCL_RC_FINAL1_F_MAPPING_HALF_BIAS_NORMAL_NV 0x00800000
+#define NV10TCL_RC_FINAL1_F_MAPPING_HALF_BIAS_NEGATE_NV 0x00a00000
+#define NV10TCL_RC_FINAL1_F_MAPPING_SIGNED_IDENTITY_NV 0x00c00000
+#define NV10TCL_RC_FINAL1_F_MAPPING_SIGNED_NEGATE_NV 0x00e00000
+#define NV10TCL_RC_FINAL1_E_INPUT_SHIFT 24
+#define NV10TCL_RC_FINAL1_E_INPUT_MASK 0x0f000000
+#define NV10TCL_RC_FINAL1_E_INPUT_ZERO 0x00000000
+#define NV10TCL_RC_FINAL1_E_INPUT_CONSTANT_COLOR0_NV 0x01000000
+#define NV10TCL_RC_FINAL1_E_INPUT_CONSTANT_COLOR1_NV 0x02000000
+#define NV10TCL_RC_FINAL1_E_INPUT_FOG 0x03000000
+#define NV10TCL_RC_FINAL1_E_INPUT_PRIMARY_COLOR_NV 0x04000000
+#define NV10TCL_RC_FINAL1_E_INPUT_SECONDARY_COLOR_NV 0x05000000
+#define NV10TCL_RC_FINAL1_E_INPUT_TEXTURE0_ARB 0x08000000
+#define NV10TCL_RC_FINAL1_E_INPUT_TEXTURE1_ARB 0x09000000
+#define NV10TCL_RC_FINAL1_E_INPUT_SPARE0_NV 0x0c000000
+#define NV10TCL_RC_FINAL1_E_INPUT_SPARE1_NV 0x0d000000
+#define NV10TCL_RC_FINAL1_E_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0e000000
+#define NV10TCL_RC_FINAL1_E_INPUT_E_TIMES_F_NV 0x0f000000
+#define NV10TCL_RC_FINAL1_E_COMPONENT_USAGE (1 << 28)
+#define NV10TCL_RC_FINAL1_E_COMPONENT_USAGE_RGB 0x00000000
+#define NV10TCL_RC_FINAL1_E_COMPONENT_USAGE_ALPHA 0x10000000
+#define NV10TCL_RC_FINAL1_E_MAPPING_SHIFT 29
+#define NV10TCL_RC_FINAL1_E_MAPPING_MASK 0xe0000000
+#define NV10TCL_RC_FINAL1_E_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV10TCL_RC_FINAL1_E_MAPPING_UNSIGNED_INVERT_NV 0x20000000
+#define NV10TCL_RC_FINAL1_E_MAPPING_EXPAND_NORMAL_NV 0x40000000
+#define NV10TCL_RC_FINAL1_E_MAPPING_EXPAND_NEGATE_NV 0x60000000
+#define NV10TCL_RC_FINAL1_E_MAPPING_HALF_BIAS_NORMAL_NV 0x80000000
+#define NV10TCL_RC_FINAL1_E_MAPPING_HALF_BIAS_NEGATE_NV 0xa0000000
+#define NV10TCL_RC_FINAL1_E_MAPPING_SIGNED_IDENTITY_NV 0xc0000000
+#define NV10TCL_RC_FINAL1_E_MAPPING_SIGNED_NEGATE_NV 0xe0000000
+#define NV10TCL_LIGHT_MODEL 0x00000294
+#define NV10TCL_LIGHT_MODEL_COLOR_CONTROL (1 << 1)
+#define NV10TCL_LIGHT_MODEL_LOCAL_VIEWER (1 << 16)
+#define NV10TCL_COLOR_MATERIAL_ENABLE 0x00000298
+#define NV10TCL_COLOR_MATERIAL_ENABLE_SPECULAR (1 << 0)
+#define NV10TCL_COLOR_MATERIAL_ENABLE_DIFFUSE (1 << 1)
+#define NV10TCL_COLOR_MATERIAL_ENABLE_AMBIENT (1 << 2)
+#define NV10TCL_COLOR_MATERIAL_ENABLE_EMISSION (1 << 3)
+#define NV10TCL_FOG_MODE 0x0000029c
+#define NV10TCL_FOG_MODE_EXP 0x00000800
+#define NV10TCL_FOG_MODE_EXP_2 0x00000802
+#define NV10TCL_FOG_MODE_EXP2 0x00000803
+#define NV10TCL_FOG_MODE_LINEAR 0x00000804
+#define NV10TCL_FOG_MODE_LINEAR_2 0x00002601
+#define NV10TCL_FOG_COORD_DIST 0x000002a0
+#define NV10TCL_FOG_COORD_DIST_COORD_FALSE 0x00000000
+#define NV10TCL_FOG_COORD_DIST_COORD_FRAGMENT_DEPTH_DISTANCE_EYE_RADIAL_NV 0x00000001
+#define NV10TCL_FOG_COORD_DIST_COORD_FRAGMENT_DEPTH_DISTANCE_EYE_PLANE_ABSOLUTE_NV 0x00000002
+#define NV10TCL_FOG_COORD_DIST_COORD_FOG 0x00000003
+#define NV10TCL_FOG_ENABLE 0x000002a4
+#define NV10TCL_FOG_COLOR 0x000002a8
+#define NV10TCL_FOG_COLOR_R_SHIFT 0
+#define NV10TCL_FOG_COLOR_R_MASK 0x000000ff
+#define NV10TCL_FOG_COLOR_G_SHIFT 8
+#define NV10TCL_FOG_COLOR_G_MASK 0x0000ff00
+#define NV10TCL_FOG_COLOR_B_SHIFT 16
+#define NV10TCL_FOG_COLOR_B_MASK 0x00ff0000
+#define NV10TCL_FOG_COLOR_A_SHIFT 24
+#define NV10TCL_FOG_COLOR_A_MASK 0xff000000
+#define NV10TCL_VIEWPORT_CLIP_MODE 0x000002b4
+#define NV10TCL_VIEWPORT_CLIP_HORIZ(x) (0x000002c0+((x)*4))
+#define NV10TCL_VIEWPORT_CLIP_HORIZ__SIZE 0x00000008
+#define NV10TCL_VIEWPORT_CLIP_HORIZ_CLIP_L_SHIFT 0
+#define NV10TCL_VIEWPORT_CLIP_HORIZ_CLIP_L_MASK 0x000007ff
+#define NV10TCL_VIEWPORT_CLIP_HORIZ_CLIP_LEFT_ENABLE (1 << 11)
+#define NV10TCL_VIEWPORT_CLIP_HORIZ_CLIP_R_SHIFT 16
+#define NV10TCL_VIEWPORT_CLIP_HORIZ_CLIP_R_MASK 0x07ff0000
+#define NV10TCL_VIEWPORT_CLIP_HORIZ_CLIP_RIGHT_ENABLE (1 << 27)
+#define NV10TCL_VIEWPORT_CLIP_VERT(x) (0x000002e0+((x)*4))
+#define NV10TCL_VIEWPORT_CLIP_VERT__SIZE 0x00000008
+#define NV10TCL_VIEWPORT_CLIP_VERT_CLIP_T_SHIFT 0
+#define NV10TCL_VIEWPORT_CLIP_VERT_CLIP_T_MASK 0x000007ff
+#define NV10TCL_VIEWPORT_CLIP_VERT_CLIP_TOP_ENABLE (1 << 11)
+#define NV10TCL_VIEWPORT_CLIP_VERT_CLIP_B_SHIFT 16
+#define NV10TCL_VIEWPORT_CLIP_VERT_CLIP_B_MASK 0x07ff0000
+#define NV10TCL_VIEWPORT_CLIP_VERT_CLIP_BOTTOM_ENABLE (1 << 27)
+#define NV10TCL_ALPHA_FUNC_ENABLE 0x00000300
+#define NV10TCL_BLEND_FUNC_ENABLE 0x00000304
+#define NV10TCL_CULL_FACE_ENABLE 0x00000308
+#define NV10TCL_DEPTH_TEST_ENABLE 0x0000030c
+#define NV10TCL_DITHER_ENABLE 0x00000310
+#define NV10TCL_LIGHTING_ENABLE 0x00000314
+#define NV10TCL_POINT_PARAMETERS_ENABLE 0x00000318
+#define NV10TCL_POINT_SMOOTH_ENABLE 0x0000031c
+#define NV10TCL_LINE_SMOOTH_ENABLE 0x00000320
+#define NV10TCL_POLYGON_SMOOTH_ENABLE 0x00000324
+#define NV10TCL_VERTEX_WEIGHT_ENABLE 0x00000328
+#define NV10TCL_STENCIL_ENABLE 0x0000032c
+#define NV10TCL_POLYGON_OFFSET_POINT_ENABLE 0x00000330
+#define NV10TCL_POLYGON_OFFSET_LINE_ENABLE 0x00000334
+#define NV10TCL_POLYGON_OFFSET_FILL_ENABLE 0x00000338
+#define NV10TCL_ALPHA_FUNC_FUNC 0x0000033c
+#define NV10TCL_ALPHA_FUNC_FUNC_NEVER 0x00000200
+#define NV10TCL_ALPHA_FUNC_FUNC_LESS 0x00000201
+#define NV10TCL_ALPHA_FUNC_FUNC_EQUAL 0x00000202
+#define NV10TCL_ALPHA_FUNC_FUNC_LEQUAL 0x00000203
+#define NV10TCL_ALPHA_FUNC_FUNC_GREATER 0x00000204
+#define NV10TCL_ALPHA_FUNC_FUNC_GREATER 0x00000204
+#define NV10TCL_ALPHA_FUNC_FUNC_NOTEQUAL 0x00000205
+#define NV10TCL_ALPHA_FUNC_FUNC_GEQUAL 0x00000206
+#define NV10TCL_ALPHA_FUNC_FUNC_ALWAYS 0x00000207
+#define NV10TCL_ALPHA_FUNC_REF 0x00000340
+#define NV10TCL_BLEND_FUNC_SRC 0x00000344
+#define NV10TCL_BLEND_FUNC_SRC_ZERO 0x00000000
+#define NV10TCL_BLEND_FUNC_SRC_ONE 0x00000001
+#define NV10TCL_BLEND_FUNC_SRC_SRC_COLOR 0x00000300
+#define NV10TCL_BLEND_FUNC_SRC_ONE_MINUS_SRC_COLOR 0x00000301
+#define NV10TCL_BLEND_FUNC_SRC_SRC_ALPHA 0x00000302
+#define NV10TCL_BLEND_FUNC_SRC_ONE_MINUS_SRC_ALPHA 0x00000303
+#define NV10TCL_BLEND_FUNC_SRC_DST_ALPHA 0x00000304
+#define NV10TCL_BLEND_FUNC_SRC_ONE_MINUS_DST_ALPHA 0x00000305
+#define NV10TCL_BLEND_FUNC_SRC_DST_COLOR 0x00000306
+#define NV10TCL_BLEND_FUNC_SRC_ONE_MINUS_DST_COLOR 0x00000307
+#define NV10TCL_BLEND_FUNC_SRC_SRC_ALPHA_SATURATE 0x00000308
+#define NV10TCL_BLEND_FUNC_SRC_CONSTANT_COLOR 0x00008001
+#define NV10TCL_BLEND_FUNC_SRC_ONE_MINUS_CONSTANT_COLOR 0x00008002
+#define NV10TCL_BLEND_FUNC_SRC_CONSTANT_ALPHA 0x00008003
+#define NV10TCL_BLEND_FUNC_SRC_ONE_MINUS_CONSTANT_ALPHA 0x00008004
+#define NV10TCL_BLEND_FUNC_DST 0x00000348
+#define NV10TCL_BLEND_FUNC_DST_ZERO 0x00000000
+#define NV10TCL_BLEND_FUNC_DST_ONE 0x00000001
+#define NV10TCL_BLEND_FUNC_DST_SRC_COLOR 0x00000300
+#define NV10TCL_BLEND_FUNC_DST_ONE_MINUS_SRC_COLOR 0x00000301
+#define NV10TCL_BLEND_FUNC_DST_SRC_ALPHA 0x00000302
+#define NV10TCL_BLEND_FUNC_DST_ONE_MINUS_SRC_ALPHA 0x00000303
+#define NV10TCL_BLEND_FUNC_DST_DST_ALPHA 0x00000304
+#define NV10TCL_BLEND_FUNC_DST_ONE_MINUS_DST_ALPHA 0x00000305
+#define NV10TCL_BLEND_FUNC_DST_DST_COLOR 0x00000306
+#define NV10TCL_BLEND_FUNC_DST_ONE_MINUS_DST_COLOR 0x00000307
+#define NV10TCL_BLEND_FUNC_DST_SRC_ALPHA_SATURATE 0x00000308
+#define NV10TCL_BLEND_FUNC_DST_CONSTANT_COLOR 0x00008001
+#define NV10TCL_BLEND_FUNC_DST_ONE_MINUS_CONSTANT_COLOR 0x00008002
+#define NV10TCL_BLEND_FUNC_DST_CONSTANT_ALPHA 0x00008003
+#define NV10TCL_BLEND_FUNC_DST_ONE_MINUS_CONSTANT_ALPHA 0x00008004
+#define NV10TCL_BLEND_COLOR 0x0000034c
+#define NV10TCL_BLEND_COLOR_B_SHIFT 0
+#define NV10TCL_BLEND_COLOR_B_MASK 0x000000ff
+#define NV10TCL_BLEND_COLOR_G_SHIFT 8
+#define NV10TCL_BLEND_COLOR_G_MASK 0x0000ff00
+#define NV10TCL_BLEND_COLOR_R_SHIFT 16
+#define NV10TCL_BLEND_COLOR_R_MASK 0x00ff0000
+#define NV10TCL_BLEND_COLOR_A_SHIFT 24
+#define NV10TCL_BLEND_COLOR_A_MASK 0xff000000
+#define NV10TCL_BLEND_EQUATION 0x00000350
+#define NV10TCL_BLEND_EQUATION_FUNC_ADD 0x00008006
+#define NV10TCL_BLEND_EQUATION_MIN 0x00008007
+#define NV10TCL_BLEND_EQUATION_MAX 0x00008008
+#define NV10TCL_BLEND_EQUATION_FUNC_SUBTRACT 0x0000800a
+#define NV10TCL_BLEND_EQUATION_FUNC_REVERSE_SUBTRACT 0x0000800b
+#define NV10TCL_DEPTH_FUNC 0x00000354
+#define NV10TCL_DEPTH_FUNC_NEVER 0x00000200
+#define NV10TCL_DEPTH_FUNC_LESS 0x00000201
+#define NV10TCL_DEPTH_FUNC_EQUAL 0x00000202
+#define NV10TCL_DEPTH_FUNC_LEQUAL 0x00000203
+#define NV10TCL_DEPTH_FUNC_GREATER 0x00000204
+#define NV10TCL_DEPTH_FUNC_GREATER 0x00000204
+#define NV10TCL_DEPTH_FUNC_NOTEQUAL 0x00000205
+#define NV10TCL_DEPTH_FUNC_GEQUAL 0x00000206
+#define NV10TCL_DEPTH_FUNC_ALWAYS 0x00000207
+#define NV10TCL_COLOR_MASK 0x00000358
+#define NV10TCL_COLOR_MASK_B (1 << 0)
+#define NV10TCL_COLOR_MASK_G (1 << 8)
+#define NV10TCL_COLOR_MASK_R (1 << 16)
+#define NV10TCL_COLOR_MASK_A (1 << 24)
+#define NV10TCL_DEPTH_WRITE_ENABLE 0x0000035c
+#define NV10TCL_STENCIL_MASK 0x00000360
+#define NV10TCL_STENCIL_FUNC_FUNC 0x00000364
+#define NV10TCL_STENCIL_FUNC_FUNC_NEVER 0x00000200
+#define NV10TCL_STENCIL_FUNC_FUNC_LESS 0x00000201
+#define NV10TCL_STENCIL_FUNC_FUNC_EQUAL 0x00000202
+#define NV10TCL_STENCIL_FUNC_FUNC_LEQUAL 0x00000203
+#define NV10TCL_STENCIL_FUNC_FUNC_GREATER 0x00000204
+#define NV10TCL_STENCIL_FUNC_FUNC_GREATER 0x00000204
+#define NV10TCL_STENCIL_FUNC_FUNC_NOTEQUAL 0x00000205
+#define NV10TCL_STENCIL_FUNC_FUNC_GEQUAL 0x00000206
+#define NV10TCL_STENCIL_FUNC_FUNC_ALWAYS 0x00000207
+#define NV10TCL_STENCIL_FUNC_REF 0x00000368
+#define NV10TCL_STENCIL_FUNC_MASK 0x0000036c
+#define NV10TCL_STENCIL_OP_FAIL 0x00000370
+#define NV10TCL_STENCIL_OP_FAIL_ZERO 0x00000000
+#define NV10TCL_STENCIL_OP_FAIL_INVERT 0x0000150a
+#define NV10TCL_STENCIL_OP_FAIL_KEEP 0x00001e00
+#define NV10TCL_STENCIL_OP_FAIL_REPLACE 0x00001e01
+#define NV10TCL_STENCIL_OP_FAIL_INCR 0x00001e02
+#define NV10TCL_STENCIL_OP_FAIL_DECR 0x00001e03
+#define NV10TCL_STENCIL_OP_FAIL_INCR_WRAP 0x00008507
+#define NV10TCL_STENCIL_OP_FAIL_DECR_WRAP 0x00008508
+#define NV10TCL_STENCIL_OP_ZFAIL 0x00000374
+#define NV10TCL_STENCIL_OP_ZFAIL_ZERO 0x00000000
+#define NV10TCL_STENCIL_OP_ZFAIL_INVERT 0x0000150a
+#define NV10TCL_STENCIL_OP_ZFAIL_KEEP 0x00001e00
+#define NV10TCL_STENCIL_OP_ZFAIL_REPLACE 0x00001e01
+#define NV10TCL_STENCIL_OP_ZFAIL_INCR 0x00001e02
+#define NV10TCL_STENCIL_OP_ZFAIL_DECR 0x00001e03
+#define NV10TCL_STENCIL_OP_ZFAIL_INCR_WRAP 0x00008507
+#define NV10TCL_STENCIL_OP_ZFAIL_DECR_WRAP 0x00008508
+#define NV10TCL_STENCIL_OP_ZPASS 0x00000378
+#define NV10TCL_STENCIL_OP_ZPASS_ZERO 0x00000000
+#define NV10TCL_STENCIL_OP_ZPASS_INVERT 0x0000150a
+#define NV10TCL_STENCIL_OP_ZPASS_KEEP 0x00001e00
+#define NV10TCL_STENCIL_OP_ZPASS_REPLACE 0x00001e01
+#define NV10TCL_STENCIL_OP_ZPASS_INCR 0x00001e02
+#define NV10TCL_STENCIL_OP_ZPASS_DECR 0x00001e03
+#define NV10TCL_STENCIL_OP_ZPASS_INCR_WRAP 0x00008507
+#define NV10TCL_STENCIL_OP_ZPASS_DECR_WRAP 0x00008508
+#define NV10TCL_SHADE_MODEL 0x0000037c
+#define NV10TCL_SHADE_MODEL_FLAT 0x00001d00
+#define NV10TCL_SHADE_MODEL_SMOOTH 0x00001d01
+#define NV10TCL_LINE_WIDTH 0x00000380
+#define NV10TCL_POLYGON_OFFSET_FACTOR 0x00000384
+#define NV10TCL_POLYGON_OFFSET_UNITS 0x00000388
+#define NV10TCL_POLYGON_MODE_FRONT 0x0000038c
+#define NV10TCL_POLYGON_MODE_FRONT_POINT 0x00001b00
+#define NV10TCL_POLYGON_MODE_FRONT_LINE 0x00001b01
+#define NV10TCL_POLYGON_MODE_FRONT_FILL 0x00001b02
+#define NV10TCL_POLYGON_MODE_BACK 0x00000390
+#define NV10TCL_POLYGON_MODE_BACK_POINT 0x00001b00
+#define NV10TCL_POLYGON_MODE_BACK_LINE 0x00001b01
+#define NV10TCL_POLYGON_MODE_BACK_FILL 0x00001b02
+#define NV10TCL_DEPTH_RANGE_NEAR 0x00000394
+#define NV10TCL_DEPTH_RANGE_FAR 0x00000398
+#define NV10TCL_CULL_FACE 0x0000039c
+#define NV10TCL_CULL_FACE_FRONT 0x00000404
+#define NV10TCL_CULL_FACE_BACK 0x00000405
+#define NV10TCL_CULL_FACE_FRONT_AND_BACK 0x00000408
+#define NV10TCL_FRONT_FACE 0x000003a0
+#define NV10TCL_FRONT_FACE_CW 0x00000900
+#define NV10TCL_FRONT_FACE_CCW 0x00000901
+#define NV10TCL_NORMALIZE_ENABLE 0x000003a4
+#define NV10TCL_COLOR_MATERIAL_R 0x000003a8
+#define NV10TCL_COLOR_MATERIAL_G 0x000003ac
+#define NV10TCL_COLOR_MATERIAL_B 0x000003b0
+#define NV10TCL_COLOR_MATERIAL_A 0x000003b4
+#define NV10TCL_COLOR_CONTROL 0x000003b8
+#define NV10TCL_ENABLED_LIGHTS 0x000003bc
+#define NV10TCL_ENABLED_LIGHTS_LIGHT0 (1 << 0)
+#define NV10TCL_ENABLED_LIGHTS_LIGHT1 (1 << 2)
+#define NV10TCL_ENABLED_LIGHTS_LIGHT2 (1 << 4)
+#define NV10TCL_ENABLED_LIGHTS_LIGHT3 (1 << 6)
+#define NV10TCL_ENABLED_LIGHTS_LIGHT4 (1 << 8)
+#define NV10TCL_ENABLED_LIGHTS_LIGHT5 (1 << 10)
+#define NV10TCL_ENABLED_LIGHTS_LIGHT6 (1 << 12)
+#define NV10TCL_ENABLED_LIGHTS_LIGHT7 (1 << 14)
+#define NV10TCL_TX_GEN_S(x) (0x000003c0+((x)*16))
+#define NV10TCL_TX_GEN_S__SIZE 0x00000002
+#define NV10TCL_TX_GEN_S_FALSE 0x00000000
+#define NV10TCL_TX_GEN_S_EYE_LINEAR 0x00002400
+#define NV10TCL_TX_GEN_S_OBJECT_LINEAR 0x00002401
+#define NV10TCL_TX_GEN_S_SPHERE_MAP 0x00002402
+#define NV10TCL_TX_GEN_S_NORMAL_MAP 0x00008511
+#define NV10TCL_TX_GEN_S_REFLECTION_MAP 0x00008512
+#define NV10TCL_TX_GEN_T(x) (0x000003c4+((x)*16))
+#define NV10TCL_TX_GEN_T__SIZE 0x00000002
+#define NV10TCL_TX_GEN_T_FALSE 0x00000000
+#define NV10TCL_TX_GEN_T_EYE_LINEAR 0x00002400
+#define NV10TCL_TX_GEN_T_OBJECT_LINEAR 0x00002401
+#define NV10TCL_TX_GEN_T_SPHERE_MAP 0x00002402
+#define NV10TCL_TX_GEN_T_NORMAL_MAP 0x00008511
+#define NV10TCL_TX_GEN_T_REFLECTION_MAP 0x00008512
+#define NV10TCL_TX_GEN_R(x) (0x000003c8+((x)*16))
+#define NV10TCL_TX_GEN_R__SIZE 0x00000002
+#define NV10TCL_TX_GEN_R_FALSE 0x00000000
+#define NV10TCL_TX_GEN_R_EYE_LINEAR 0x00002400
+#define NV10TCL_TX_GEN_R_OBJECT_LINEAR 0x00002401
+#define NV10TCL_TX_GEN_R_SPHERE_MAP 0x00002402
+#define NV10TCL_TX_GEN_R_NORMAL_MAP 0x00008511
+#define NV10TCL_TX_GEN_R_REFLECTION_MAP 0x00008512
+#define NV10TCL_TX_GEN_Q(x) (0x000003cc+((x)*16))
+#define NV10TCL_TX_GEN_Q__SIZE 0x00000002
+#define NV10TCL_TX_GEN_Q_FALSE 0x00000000
+#define NV10TCL_TX_GEN_Q_EYE_LINEAR 0x00002400
+#define NV10TCL_TX_GEN_Q_OBJECT_LINEAR 0x00002401
+#define NV10TCL_TX_GEN_Q_SPHERE_MAP 0x00002402
+#define NV10TCL_TX_GEN_Q_NORMAL_MAP 0x00008511
+#define NV10TCL_TX_GEN_Q_REFLECTION_MAP 0x00008512
+#define NV10TCL_TX_MATRIX_ENABLE(x) (0x000003e0+((x)*4))
+#define NV10TCL_TX_MATRIX_ENABLE__SIZE 0x00000002
+#define NV10TCL_VIEW_MATRIX_ENABLE 0x000003e8
+#define NV10TCL_VIEW_MATRIX_ENABLE_MODELVIEW1 (1 << 0)
+#define NV10TCL_VIEW_MATRIX_ENABLE_MODELVIEW0 (1 << 1)
+#define NV10TCL_VIEW_MATRIX_ENABLE_PROJECTION (1 << 2)
+#define NV10TCL_POINT_SIZE 0x000003ec
+#define NV10TCL_MODELVIEW0_MATRIX(x) (0x00000400+((x)*4))
+#define NV10TCL_MODELVIEW0_MATRIX__SIZE 0x00000010
+#define NV10TCL_MODELVIEW1_MATRIX(x) (0x00000440+((x)*4))
+#define NV10TCL_MODELVIEW1_MATRIX__SIZE 0x00000010
+#define NV10TCL_INVERSE_MODELVIEW0_MATRIX(x) (0x00000480+((x)*4))
+#define NV10TCL_INVERSE_MODELVIEW0_MATRIX__SIZE 0x00000010
+#define NV10TCL_INVERSE_MODELVIEW1_MATRIX(x) (0x000004c0+((x)*4))
+#define NV10TCL_INVERSE_MODELVIEW1_MATRIX__SIZE 0x00000010
+#define NV10TCL_PROJECTION_MATRIX(x) (0x00000500+((x)*4))
+#define NV10TCL_PROJECTION_MATRIX__SIZE 0x00000010
+#define NV10TCL_TX0_MATRIX(x) (0x00000540+((x)*4))
+#define NV10TCL_TX0_MATRIX__SIZE 0x00000010
+#define NV10TCL_TX1_MATRIX(x) (0x00000580+((x)*4))
+#define NV10TCL_TX1_MATRIX__SIZE 0x00000010
+#define NV10TCL_CLIP_PLANE_A(x) (0x00000600+((x)*16))
+#define NV10TCL_CLIP_PLANE_A__SIZE 0x00000008
+#define NV10TCL_CLIP_PLANE_B(x) (0x00000604+((x)*16))
+#define NV10TCL_CLIP_PLANE_B__SIZE 0x00000008
+#define NV10TCL_CLIP_PLANE_C(x) (0x00000608+((x)*16))
+#define NV10TCL_CLIP_PLANE_C__SIZE 0x00000008
+#define NV10TCL_CLIP_PLANE_D(x) (0x0000060c+((x)*16))
+#define NV10TCL_CLIP_PLANE_D__SIZE 0x00000008
+#define NV10TCL_FOG_EQUATION_CONSTANT 0x00000680
+#define NV10TCL_FOG_EQUATION_LINEAR 0x00000684
+#define NV10TCL_FOG_EQUATION_QUADRATIC 0x00000688
+#define NV10TCL_FRONT_MATERIAL_SHININESS(x) (0x000006a0+((x)*4))
+#define NV10TCL_FRONT_MATERIAL_SHININESS__SIZE 0x00000006
+#define NV10TCL_LIGHT_MODEL_FRONT_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION_R 0x000006c4
+#define NV10TCL_LIGHT_MODEL_FRONT_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION_G 0x000006c8
+#define NV10TCL_LIGHT_MODEL_FRONT_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION_B 0x000006cc
+#define NV10TCL_VIEWPORT_SCALE_X 0x000006e8
+#define NV10TCL_VIEWPORT_SCALE_Y 0x000006ec
+#define NV10TCL_VIEWPORT_SCALE_Z 0x000006f0
+#define NV10TCL_VIEWPORT_SCALE_W 0x000006f4
+#define NV10TCL_POINT_PARAMETER(x) (0x000006f8+((x)*4))
+#define NV10TCL_POINT_PARAMETER__SIZE 0x00000008
+#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_R(x) (0x00000800+((x)*128))
+#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_R__SIZE 0x00000008
+#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_G(x) (0x00000804+((x)*128))
+#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_G__SIZE 0x00000008
+#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_B(x) (0x00000808+((x)*128))
+#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_B__SIZE 0x00000008
+#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_R(x) (0x0000080c+((x)*128))
+#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_R__SIZE 0x00000008
+#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_G(x) (0x00000810+((x)*128))
+#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_G__SIZE 0x00000008
+#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_B(x) (0x00000814+((x)*128))
+#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_B__SIZE 0x00000008
+#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_R(x) (0x00000818+((x)*128))
+#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_R__SIZE 0x00000008
+#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_G(x) (0x0000081c+((x)*128))
+#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_G__SIZE 0x00000008
+#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_B(x) (0x00000820+((x)*128))
+#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_B__SIZE 0x00000008
+#define NV10TCL_LIGHT_HALF_VECTOR_X(x) (0x00000828+((x)*128))
+#define NV10TCL_LIGHT_HALF_VECTOR_X__SIZE 0x00000008
+#define NV10TCL_LIGHT_HALF_VECTOR_Y(x) (0x0000082c+((x)*128))
+#define NV10TCL_LIGHT_HALF_VECTOR_Y__SIZE 0x00000008
+#define NV10TCL_LIGHT_HALF_VECTOR_Z(x) (0x00000830+((x)*128))
+#define NV10TCL_LIGHT_HALF_VECTOR_Z__SIZE 0x00000008
+#define NV10TCL_LIGHT_DIRECTION_X(x) (0x00000834+((x)*128))
+#define NV10TCL_LIGHT_DIRECTION_X__SIZE 0x00000008
+#define NV10TCL_LIGHT_DIRECTION_Y(x) (0x00000838+((x)*128))
+#define NV10TCL_LIGHT_DIRECTION_Y__SIZE 0x00000008
+#define NV10TCL_LIGHT_DIRECTION_Z(x) (0x0000083c+((x)*128))
+#define NV10TCL_LIGHT_DIRECTION_Z__SIZE 0x00000008
+#define NV10TCL_LIGHT_SPOT_CUTOFF_A(x) (0x00000840+((x)*128))
+#define NV10TCL_LIGHT_SPOT_CUTOFF_A__SIZE 0x00000008
+#define NV10TCL_LIGHT_SPOT_CUTOFF_B(x) (0x00000844+((x)*128))
+#define NV10TCL_LIGHT_SPOT_CUTOFF_B__SIZE 0x00000008
+#define NV10TCL_LIGHT_SPOT_CUTOFF_C(x) (0x00000848+((x)*128))
+#define NV10TCL_LIGHT_SPOT_CUTOFF_C__SIZE 0x00000008
+#define NV10TCL_LIGHT_SPOT_DIR_X(x) (0x0000084c+((x)*128))
+#define NV10TCL_LIGHT_SPOT_DIR_X__SIZE 0x00000008
+#define NV10TCL_LIGHT_SPOT_DIR_Y(x) (0x00000850+((x)*128))
+#define NV10TCL_LIGHT_SPOT_DIR_Y__SIZE 0x00000008
+#define NV10TCL_LIGHT_SPOT_DIR_Z(x) (0x00000854+((x)*128))
+#define NV10TCL_LIGHT_SPOT_DIR_Z__SIZE 0x00000008
+#define NV10TCL_LIGHT_SPOT_CUTOFF_D(x) (0x00000858+((x)*128))
+#define NV10TCL_LIGHT_SPOT_CUTOFF_D__SIZE 0x00000008
+#define NV10TCL_LIGHT_POSITION_X(x) (0x0000085c+((x)*128))
+#define NV10TCL_LIGHT_POSITION_X__SIZE 0x00000008
+#define NV10TCL_LIGHT_POSITION_Y(x) (0x00000860+((x)*128))
+#define NV10TCL_LIGHT_POSITION_Y__SIZE 0x00000008
+#define NV10TCL_LIGHT_POSITION_Z(x) (0x00000864+((x)*128))
+#define NV10TCL_LIGHT_POSITION_Z__SIZE 0x00000008
+#define NV10TCL_LIGHT_ATTENUATION_CONSTANT(x) (0x00000868+((x)*128))
+#define NV10TCL_LIGHT_ATTENUATION_CONSTANT__SIZE 0x00000008
+#define NV10TCL_LIGHT_ATTENUATION_LINEAR(x) (0x0000086c+((x)*128))
+#define NV10TCL_LIGHT_ATTENUATION_LINEAR__SIZE 0x00000008
+#define NV10TCL_LIGHT_ATTENUATION_QUADRATIC(x) (0x00000870+((x)*128))
+#define NV10TCL_LIGHT_ATTENUATION_QUADRATIC__SIZE 0x00000008
+#define NV10TCL_VERTEX_POS_3F_X 0x00000c00
+#define NV10TCL_VERTEX_POS_3F_Y 0x00000c04
+#define NV10TCL_VERTEX_POS_3F_Z 0x00000c08
+#define NV10TCL_VERTEX_POS_4F_X 0x00000c18
+#define NV10TCL_VERTEX_POS_4F_Y 0x00000c1c
+#define NV10TCL_VERTEX_POS_4F_Z 0x00000c20
+#define NV10TCL_VERTEX_POS_4F_W 0x00000c24
+#define NV10TCL_VERTEX_NOR_3F_X 0x00000c30
+#define NV10TCL_VERTEX_NOR_3F_Y 0x00000c34
+#define NV10TCL_VERTEX_NOR_3F_Z 0x00000c38
+#define NV10TCL_VERTEX_NOR_3I_XY 0x00000c40
+#define NV10TCL_VERTEX_NOR_3I_XY_X_SHIFT 0
+#define NV10TCL_VERTEX_NOR_3I_XY_X_MASK 0x0000ffff
+#define NV10TCL_VERTEX_NOR_3I_XY_Y_SHIFT 16
+#define NV10TCL_VERTEX_NOR_3I_XY_Y_MASK 0xffff0000
+#define NV10TCL_VERTEX_NOR_3I_Z 0x00000c44
+#define NV10TCL_VERTEX_NOR_3I_Z_Z_SHIFT 0
+#define NV10TCL_VERTEX_NOR_3I_Z_Z_MASK 0x0000ffff
+#define NV10TCL_VERTEX_COL_4F_R 0x00000c50
+#define NV10TCL_VERTEX_COL_4F_G 0x00000c54
+#define NV10TCL_VERTEX_COL_4F_B 0x00000c58
+#define NV10TCL_VERTEX_COL_4F_A 0x00000c5c
+#define NV10TCL_VERTEX_COL_3F_R 0x00000c60
+#define NV10TCL_VERTEX_COL_3F_G 0x00000c64
+#define NV10TCL_VERTEX_COL_3F_B 0x00000c68
+#define NV10TCL_VERTEX_COL_4I 0x00000c6c
+#define NV10TCL_VERTEX_COL_4I_R_SHIFT 0
+#define NV10TCL_VERTEX_COL_4I_R_MASK 0x000000ff
+#define NV10TCL_VERTEX_COL_4I_G_SHIFT 8
+#define NV10TCL_VERTEX_COL_4I_G_MASK 0x0000ff00
+#define NV10TCL_VERTEX_COL_4I_B_SHIFT 16
+#define NV10TCL_VERTEX_COL_4I_B_MASK 0x00ff0000
+#define NV10TCL_VERTEX_COL_4I_A_SHIFT 24
+#define NV10TCL_VERTEX_COL_4I_A_MASK 0xff000000
+#define NV10TCL_VERTEX_COL2_3F_R 0x00000c80
+#define NV10TCL_VERTEX_COL2_3F_G 0x00000c84
+#define NV10TCL_VERTEX_COL2_3F_B 0x00000c88
+#define NV10TCL_VERTEX_COL2_3I 0x00000c8c
+#define NV10TCL_VERTEX_COL2_3I_R_SHIFT 0
+#define NV10TCL_VERTEX_COL2_3I_R_MASK 0x000000ff
+#define NV10TCL_VERTEX_COL2_3I_G_SHIFT 8
+#define NV10TCL_VERTEX_COL2_3I_G_MASK 0x0000ff00
+#define NV10TCL_VERTEX_COL2_3I_B_SHIFT 16
+#define NV10TCL_VERTEX_COL2_3I_B_MASK 0x00ff0000
+#define NV10TCL_VERTEX_TX0_2F_S 0x00000c90
+#define NV10TCL_VERTEX_TX0_2F_T 0x00000c94
+#define NV10TCL_VERTEX_TX0_2I 0x00000c98
+#define NV10TCL_VERTEX_TX0_2I_S_SHIFT 0
+#define NV10TCL_VERTEX_TX0_2I_S_MASK 0x0000ffff
+#define NV10TCL_VERTEX_TX0_2I_T_SHIFT 16
+#define NV10TCL_VERTEX_TX0_2I_T_MASK 0xffff0000
+#define NV10TCL_VERTEX_TX0_4F_S 0x00000ca0
+#define NV10TCL_VERTEX_TX0_4F_T 0x00000ca4
+#define NV10TCL_VERTEX_TX0_4F_R 0x00000ca8
+#define NV10TCL_VERTEX_TX0_4F_Q 0x00000cac
+#define NV10TCL_VERTEX_TX0_4I_ST 0x00000cb0
+#define NV10TCL_VERTEX_TX0_4I_ST_S_SHIFT 0
+#define NV10TCL_VERTEX_TX0_4I_ST_S_MASK 0x0000ffff
+#define NV10TCL_VERTEX_TX0_4I_ST_T_SHIFT 16
+#define NV10TCL_VERTEX_TX0_4I_ST_T_MASK 0xffff0000
+#define NV10TCL_VERTEX_TX0_4I_RQ 0x00000cb4
+#define NV10TCL_VERTEX_TX0_4I_RQ_R_SHIFT 0
+#define NV10TCL_VERTEX_TX0_4I_RQ_R_MASK 0x0000ffff
+#define NV10TCL_VERTEX_TX0_4I_RQ_Q_SHIFT 16
+#define NV10TCL_VERTEX_TX0_4I_RQ_Q_MASK 0xffff0000
+#define NV10TCL_VERTEX_TX1_2F_S 0x00000cb8
+#define NV10TCL_VERTEX_TX1_2F_T 0x00000cbc
+#define NV10TCL_VERTEX_TX1_2I 0x00000cc0
+#define NV10TCL_VERTEX_TX1_2I_S_SHIFT 0
+#define NV10TCL_VERTEX_TX1_2I_S_MASK 0x0000ffff
+#define NV10TCL_VERTEX_TX1_2I_T_SHIFT 16
+#define NV10TCL_VERTEX_TX1_2I_T_MASK 0xffff0000
+#define NV10TCL_VERTEX_TX1_4F_S 0x00000cc8
+#define NV10TCL_VERTEX_TX1_4F_T 0x00000ccc
+#define NV10TCL_VERTEX_TX1_4F_R 0x00000cd0
+#define NV10TCL_VERTEX_TX1_4F_Q 0x00000cd4
+#define NV10TCL_VERTEX_TX1_4I_ST 0x00000cd8
+#define NV10TCL_VERTEX_TX1_4I_ST_S_SHIFT 0
+#define NV10TCL_VERTEX_TX1_4I_ST_S_MASK 0x0000ffff
+#define NV10TCL_VERTEX_TX1_4I_ST_T_SHIFT 16
+#define NV10TCL_VERTEX_TX1_4I_ST_T_MASK 0xffff0000
+#define NV10TCL_VERTEX_TX1_4I_RQ 0x00000cdc
+#define NV10TCL_VERTEX_TX1_4I_RQ_R_SHIFT 0
+#define NV10TCL_VERTEX_TX1_4I_RQ_R_MASK 0x0000ffff
+#define NV10TCL_VERTEX_TX1_4I_RQ_Q_SHIFT 16
+#define NV10TCL_VERTEX_TX1_4I_RQ_Q_MASK 0xffff0000
+#define NV10TCL_VERTEX_FOG_1F 0x00000ce0
+#define NV10TCL_VERTEX_WGH_1F 0x00000ce4
+#define NV10TCL_EDGEFLAG_ENABLE 0x00000cec
+#define NV10TCL_VERTEX_ARRAY_VALIDATE 0x00000cf0
+#define NV10TCL_VERTEX_ARRAY_ATTRIB_OFFSET(x) (0x00000d00+((x)*8))
+#define NV10TCL_VERTEX_ARRAY_ATTRIB_OFFSET__SIZE 0x00000008
+#define NV10TCL_VERTEX_ARRAY_ATTRIB_FORMAT(x) (0x00000d04+((x)*8))
+#define NV10TCL_VERTEX_ARRAY_ATTRIB_FORMAT__SIZE 0x00000008
+#define NV10TCL_VERTEX_ARRAY_ATTRIB_FORMAT_TYPE_SHIFT 0
+#define NV10TCL_VERTEX_ARRAY_ATTRIB_FORMAT_TYPE_MASK 0x0000000f
+#define NV10TCL_VERTEX_ARRAY_ATTRIB_FORMAT_FIELDS_SHIFT 4
+#define NV10TCL_VERTEX_ARRAY_ATTRIB_FORMAT_FIELDS_MASK 0x000000f0
+#define NV10TCL_VERTEX_ARRAY_ATTRIB_FORMAT_STRIDE_SHIFT 8
+#define NV10TCL_VERTEX_ARRAY_ATTRIB_FORMAT_STRIDE_MASK 0x0000ff00
+#define NV10TCL_VERTEX_ARRAY_OFFSET_POS 0x00000d00
+#define NV10TCL_VERTEX_ARRAY_FORMAT_POS 0x00000d04
+#define NV10TCL_VERTEX_ARRAY_FORMAT_POS_TYPE_SHIFT 0
+#define NV10TCL_VERTEX_ARRAY_FORMAT_POS_TYPE_MASK 0x0000000f
+#define NV10TCL_VERTEX_ARRAY_FORMAT_POS_FIELDS_SHIFT 4
+#define NV10TCL_VERTEX_ARRAY_FORMAT_POS_FIELDS_MASK 0x000000f0
+#define NV10TCL_VERTEX_ARRAY_FORMAT_POS_STRIDE_SHIFT 8
+#define NV10TCL_VERTEX_ARRAY_FORMAT_POS_STRIDE_MASK 0x0000ff00
+#define NV10TCL_VERTEX_ARRAY_OFFSET_COL 0x00000d08
+#define NV10TCL_VERTEX_ARRAY_FORMAT_COL 0x00000d0c
+#define NV10TCL_VERTEX_ARRAY_FORMAT_COL_TYPE_SHIFT 0
+#define NV10TCL_VERTEX_ARRAY_FORMAT_COL_TYPE_MASK 0x0000000f
+#define NV10TCL_VERTEX_ARRAY_FORMAT_COL_FIELDS_SHIFT 4
+#define NV10TCL_VERTEX_ARRAY_FORMAT_COL_FIELDS_MASK 0x000000f0
+#define NV10TCL_VERTEX_ARRAY_FORMAT_COL_STRIDE_SHIFT 8
+#define NV10TCL_VERTEX_ARRAY_FORMAT_COL_STRIDE_MASK 0x0000ff00
+#define NV10TCL_VERTEX_ARRAY_OFFSET_COL2 0x00000d10
+#define NV10TCL_VERTEX_ARRAY_FORMAT_COL2 0x00000d14
+#define NV10TCL_VERTEX_ARRAY_FORMAT_COL2_TYPE_SHIFT 0
+#define NV10TCL_VERTEX_ARRAY_FORMAT_COL2_TYPE_MASK 0x0000000f
+#define NV10TCL_VERTEX_ARRAY_FORMAT_COL2_FIELDS_SHIFT 4
+#define NV10TCL_VERTEX_ARRAY_FORMAT_COL2_FIELDS_MASK 0x000000f0
+#define NV10TCL_VERTEX_ARRAY_FORMAT_COL2_STRIDE_SHIFT 8
+#define NV10TCL_VERTEX_ARRAY_FORMAT_COL2_STRIDE_MASK 0x0000ff00
+#define NV10TCL_VERTEX_ARRAY_OFFSET_TX0 0x00000d18
+#define NV10TCL_VERTEX_ARRAY_FORMAT_TX0 0x00000d1c
+#define NV10TCL_VERTEX_ARRAY_FORMAT_TX0_TYPE_SHIFT 0
+#define NV10TCL_VERTEX_ARRAY_FORMAT_TX0_TYPE_MASK 0x0000000f
+#define NV10TCL_VERTEX_ARRAY_FORMAT_TX0_FIELDS_SHIFT 4
+#define NV10TCL_VERTEX_ARRAY_FORMAT_TX0_FIELDS_MASK 0x000000f0
+#define NV10TCL_VERTEX_ARRAY_FORMAT_TX0_STRIDE_SHIFT 8
+#define NV10TCL_VERTEX_ARRAY_FORMAT_TX0_STRIDE_MASK 0x0000ff00
+#define NV10TCL_VERTEX_ARRAY_OFFSET_TX1 0x00000d20
+#define NV10TCL_VERTEX_ARRAY_FORMAT_TX1 0x00000d24
+#define NV10TCL_VERTEX_ARRAY_FORMAT_TX1_TYPE_SHIFT 0
+#define NV10TCL_VERTEX_ARRAY_FORMAT_TX1_TYPE_MASK 0x0000000f
+#define NV10TCL_VERTEX_ARRAY_FORMAT_TX1_FIELDS_SHIFT 4
+#define NV10TCL_VERTEX_ARRAY_FORMAT_TX1_FIELDS_MASK 0x000000f0
+#define NV10TCL_VERTEX_ARRAY_FORMAT_TX1_STRIDE_SHIFT 8
+#define NV10TCL_VERTEX_ARRAY_FORMAT_TX1_STRIDE_MASK 0x0000ff00
+#define NV10TCL_VERTEX_ARRAY_OFFSET_NOR 0x00000d28
+#define NV10TCL_VERTEX_ARRAY_FORMAT_NOR 0x00000d2c
+#define NV10TCL_VERTEX_ARRAY_FORMAT_NOR_TYPE_SHIFT 0
+#define NV10TCL_VERTEX_ARRAY_FORMAT_NOR_TYPE_MASK 0x0000000f
+#define NV10TCL_VERTEX_ARRAY_FORMAT_NOR_FIELDS_SHIFT 4
+#define NV10TCL_VERTEX_ARRAY_FORMAT_NOR_FIELDS_MASK 0x000000f0
+#define NV10TCL_VERTEX_ARRAY_FORMAT_NOR_STRIDE_SHIFT 8
+#define NV10TCL_VERTEX_ARRAY_FORMAT_NOR_STRIDE_MASK 0x0000ff00
+#define NV10TCL_VERTEX_ARRAY_OFFSET_WGH 0x00000d30
+#define NV10TCL_VERTEX_ARRAY_FORMAT_WGH 0x00000d34
+#define NV10TCL_VERTEX_ARRAY_FORMAT_WGH_TYPE_SHIFT 0
+#define NV10TCL_VERTEX_ARRAY_FORMAT_WGH_TYPE_MASK 0x0000000f
+#define NV10TCL_VERTEX_ARRAY_FORMAT_WGH_FIELDS_SHIFT 4
+#define NV10TCL_VERTEX_ARRAY_FORMAT_WGH_FIELDS_MASK 0x000000f0
+#define NV10TCL_VERTEX_ARRAY_FORMAT_WGH_STRIDE_SHIFT 8
+#define NV10TCL_VERTEX_ARRAY_FORMAT_WGH_STRIDE_MASK 0x0000ff00
+#define NV10TCL_VERTEX_ARRAY_OFFSET_FOG 0x00000d38
+#define NV10TCL_VERTEX_ARRAY_FORMAT_FOG 0x00000d3c
+#define NV10TCL_VERTEX_ARRAY_FORMAT_FOG_TYPE_SHIFT 0
+#define NV10TCL_VERTEX_ARRAY_FORMAT_FOG_TYPE_MASK 0x0000000f
+#define NV10TCL_VERTEX_ARRAY_FORMAT_FOG_FIELDS_SHIFT 4
+#define NV10TCL_VERTEX_ARRAY_FORMAT_FOG_FIELDS_MASK 0x000000f0
+#define NV10TCL_VERTEX_ARRAY_FORMAT_FOG_STRIDE_SHIFT 8
+#define NV10TCL_VERTEX_ARRAY_FORMAT_FOG_STRIDE_MASK 0x0000ff00
+#define NV10TCL_VERTEX_BEGIN_END 0x00000dfc
+#define NV10TCL_VERTEX_BEGIN_END_STOP 0x00000000
+#define NV10TCL_VERTEX_BEGIN_END_POINTS 0x00000001
+#define NV10TCL_VERTEX_BEGIN_END_LINES 0x00000002
+#define NV10TCL_VERTEX_BEGIN_END_LINE_LOOP 0x00000003
+#define NV10TCL_VERTEX_BEGIN_END_LINE_STRIP 0x00000004
+#define NV10TCL_VERTEX_BEGIN_END_TRIANGLES 0x00000005
+#define NV10TCL_VERTEX_BEGIN_END_TRIANGLE_STRIP 0x00000006
+#define NV10TCL_VERTEX_BEGIN_END_TRIANGLE_FAN 0x00000007
+#define NV10TCL_VERTEX_BEGIN_END_QUADS 0x00000008
+#define NV10TCL_VERTEX_BEGIN_END_QUAD_STRIP 0x00000009
+#define NV10TCL_VERTEX_BEGIN_END_POLYGON 0x0000000a
+#define NV10TCL_VB_ELEMENT_U16 0x00000e00
+#define NV10TCL_VB_ELEMENT_U16_I0_SHIFT 0
+#define NV10TCL_VB_ELEMENT_U16_I0_MASK 0x0000ffff
+#define NV10TCL_VB_ELEMENT_U16_I1_SHIFT 16
+#define NV10TCL_VB_ELEMENT_U16_I1_MASK 0xffff0000
+#define NV10TCL_VB_ELEMENT_U32 0x00001100
+#define NV10TCL_VERTEX_BUFFER_BEGIN_END 0x000013fc
+#define NV10TCL_VERTEX_BUFFER_BEGIN_END_STOP 0x00000000
+#define NV10TCL_VERTEX_BUFFER_BEGIN_END_POINTS 0x00000001
+#define NV10TCL_VERTEX_BUFFER_BEGIN_END_LINES 0x00000002
+#define NV10TCL_VERTEX_BUFFER_BEGIN_END_LINE_LOOP 0x00000003
+#define NV10TCL_VERTEX_BUFFER_BEGIN_END_LINE_STRIP 0x00000004
+#define NV10TCL_VERTEX_BUFFER_BEGIN_END_TRIANGLES 0x00000005
+#define NV10TCL_VERTEX_BUFFER_BEGIN_END_TRIANGLE_STRIP 0x00000006
+#define NV10TCL_VERTEX_BUFFER_BEGIN_END_TRIANGLE_FAN 0x00000007
+#define NV10TCL_VERTEX_BUFFER_BEGIN_END_QUADS 0x00000008
+#define NV10TCL_VERTEX_BUFFER_BEGIN_END_QUAD_STRIP 0x00000009
+#define NV10TCL_VERTEX_BUFFER_BEGIN_END_POLYGON 0x0000000a
+#define NV10TCL_VERTEX_BUFFER_DRAW_ARRAYS 0x00001400
+#define NV10TCL_VERTEX_BUFFER_DRAW_ARRAYS_FIRST_SHIFT 0
+#define NV10TCL_VERTEX_BUFFER_DRAW_ARRAYS_FIRST_MASK 0x0000ffff
+#define NV10TCL_VERTEX_BUFFER_DRAW_ARRAYS_LAST_SHIFT 24
+#define NV10TCL_VERTEX_BUFFER_DRAW_ARRAYS_LAST_MASK 0xff000000
+#define NV10TCL_VERTEX_ARRAY_DATA 0x00001800
+
+
+#define NV04_CONTEXT_COLOR_KEY 0x00000057
+
+
+
+#define NV03_CONTEXT_SURFACES_2D 0x00000058
+
+#define NV03_CONTEXT_SURFACES_2D_SYNCHRONIZE 0x00000100
+#define NV03_CONTEXT_SURFACES_2D_DMA_NOTIFY 0x00000180
+#define NV03_CONTEXT_SURFACES_2D_DMA_SOURCE 0x00000184
+#define NV03_CONTEXT_SURFACES_2D_DMA_DESTIN 0x00000188
+#define NV03_CONTEXT_SURFACES_2D_COLOR_FORMAT 0x00000300
+#define NV03_CONTEXT_SURFACES_2D_PITCH 0x00000304
+#define NV03_CONTEXT_SURFACES_2D_PITCH_SOURCE_SHIFT 0
+#define NV03_CONTEXT_SURFACES_2D_PITCH_SOURCE_MASK 0x0000ffff
+#define NV03_CONTEXT_SURFACES_2D_PITCH_DESTIN_SHIFT 16
+#define NV03_CONTEXT_SURFACES_2D_PITCH_DESTIN_MASK 0xffff0000
+#define NV03_CONTEXT_SURFACES_2D_OFFSET_SOURCE 0x00000308
+#define NV03_CONTEXT_SURFACES_2D_OFFSET_DESTIN 0x0000030c
+
+
+#define NV03_CONTEXT_SURFACES_3D 0x0000005a
+
+#define NV03_CONTEXT_SURFACES_3D_SYNCHRONIZE 0x00000100
+#define NV03_CONTEXT_SURFACES_3D_DMA_NOTIFY 0x00000180
+#define NV03_CONTEXT_SURFACES_3D_DMA_SURFACE 0x00000184
+#define NV03_CONTEXT_SURFACES_3D_PITCH 0x00000300
+#define NV03_CONTEXT_SURFACES_3D_OFFSET_COLOR 0x00000304
+#define NV03_CONTEXT_SURFACES_3D_OFFSET_ZETA 0x00000308
+
+
+#define NV04_RENDER_SOLID_LINE 0x0000005c
+
+#define NV04_RENDER_SOLID_LINE_SURFACE 0x00000198
+
+
+#define NV04_RENDER_SOLID_TRIANGLE 0x0000005d
+
+
+
+#define NV04_RENDER_SOLID_RECTANGLE 0x0000005e
+
+#define NV04_RENDER_SOLID_RECTANGLE_SURFACE 0x00000198
+
+
+#define NV04_IMAGE_BLIT 0x0000005f
+
+#define NV04_IMAGE_BLIT_NOP 0x00000100
+#define NV04_IMAGE_BLIT_NOTIFY 0x00000104
+#define NV04_IMAGE_BLIT_DMA_NOTIFY 0x00000180
+#define NV04_IMAGE_BLIT_COLOR_KEY 0x00000184
+#define NV04_IMAGE_BLIT_CLIP_RECTANGLE 0x00000188
+#define NV04_IMAGE_BLIT_PATTERN 0x0000018c
+#define NV04_IMAGE_BLIT_ROP 0x00000190
+#define NV04_IMAGE_BLIT_BETA4 0x00000198
+#define NV04_IMAGE_BLIT_SURFACE 0x0000019c
+#define NV04_IMAGE_BLIT_OPERATION 0x000002fc
+#define NV04_IMAGE_BLIT_OPERATION_SRCCOPY_AND 0x00000000
+#define NV04_IMAGE_BLIT_OPERATION_ROP_AND 0x00000001
+#define NV04_IMAGE_BLIT_OPERATION_BLEND_AND 0x00000002
+#define NV04_IMAGE_BLIT_OPERATION_SRCCOPY 0x00000003
+#define NV04_IMAGE_BLIT_OPERATION_SRCCOPY_PREMULT 0x00000004
+#define NV04_IMAGE_BLIT_OPERATION_BLEND_PREMULT 0x00000005
+
+
+#define NV04_INDEXED_IMAGE_FROM_CPU 0x00000060
+
+#define NV04_INDEXED_IMAGE_FROM_CPU_NOP 0x00000100
+#define NV04_INDEXED_IMAGE_FROM_CPU_NOTIFY 0x00000104
+#define NV04_INDEXED_IMAGE_FROM_CPU_PATCH 0x0000010c
+#define NV04_INDEXED_IMAGE_FROM_CPU_DMA_NOTIFY 0x00000180
+#define NV04_INDEXED_IMAGE_FROM_CPU_DMA_LUT 0x00000184
+#define NV04_INDEXED_IMAGE_FROM_CPU_COLOR_FORMAT 0x000003e8
+#define NV04_INDEXED_IMAGE_FROM_CPU_INDEX_FORMAT 0x000003ec
+#define NV04_INDEXED_IMAGE_FROM_CPU_LUT_OFFSET 0x000003f0
+#define NV04_INDEXED_IMAGE_FROM_CPU_POINT 0x000003f4
+#define NV04_INDEXED_IMAGE_FROM_CPU_SIZE_OUT 0x000003f8
+#define NV04_INDEXED_IMAGE_FROM_CPU_SIZE_IN 0x000003fc
+#define NV04_INDEXED_IMAGE_FROM_CPU_COLOR 0x00000400
+
+
+#define NV04_IMAGE_FROM_CPU 0x00000061
+
+#define NV04_IMAGE_FROM_CPU_BETA4 0x00000198
+#define NV04_IMAGE_FROM_CPU_SURFACE 0x0000019c
+
+
+#define NV10_CONTEXT_SURFACES_2D 0x00000062
+
+
+
+#define NV05_SCALED_IMAGE_FROM_MEMORY 0x00000063
+
+#define NV05_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION 0x000002fc
+#define NV05_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION_DITHER 0x00000000
+#define NV05_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION_TRUNCATE 0x00000001
+#define NV05_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION_SUBTR_TRUNCATE 0x00000002
+
+
+#define NV01_IMAGE_SRCCOPY_AND 0x00000064
+
+#define NV01_IMAGE_SRCCOPY_AND_NOTIFY 0x00000104
+#define NV01_IMAGE_SRCCOPY_AND_DMA_NOTIFY 0x00000180
+#define NV01_IMAGE_SRCCOPY_AND_IMAGE_OUTPUT 0x00000200
+#define NV01_IMAGE_SRCCOPY_AND_IMAGE_INPUT 0x00000204
+
+
+#define NV05_INDEXED_IMAGE_FROM_CPU 0x00000064
+
+#define NV05_INDEXED_IMAGE_FROM_CPU_COLOR_KEY 0x00000188
+#define NV05_INDEXED_IMAGE_FROM_CPU_CLIP_RECTANGLE 0x0000018c
+#define NV05_INDEXED_IMAGE_FROM_CPU_PATTERN 0x00000190
+#define NV05_INDEXED_IMAGE_FROM_CPU_ROP 0x00000194
+#define NV05_INDEXED_IMAGE_FROM_CPU_BETA1 0x00000198
+#define NV05_INDEXED_IMAGE_FROM_CPU_BETA4 0x0000019c
+#define NV05_INDEXED_IMAGE_FROM_CPU_SURFACE 0x000001a0
+#define NV05_INDEXED_IMAGE_FROM_CPU_COLOR_CONVERSION 0x000003e0
+#define NV05_INDEXED_IMAGE_FROM_CPU_OPERATION 0x000003e4
+#define NV05_INDEXED_IMAGE_FROM_CPU_INDICES 0x00000400
+
+
+#define NV05_IMAGE_FROM_CPU 0x00000065
+
+#define NV05_IMAGE_FROM_CPU_BETA4 0x00000198
+#define NV05_IMAGE_FROM_CPU_SURFACE 0x0000019c
+
+
+#define NV05_STRETCHED_IMAGE_FROM_CPU 0x00000066
+
+#define NV05_STRETCHED_IMAGE_FROM_CPU_BETA4 0x00000194
+#define NV05_STRETCHED_IMAGE_FROM_CPU_SURFACE 0x00000198
+#define NV05_STRETCHED_IMAGE_FROM_CPU_COLOR_CONVERSION 0x000002f8
+
+
+#define NV04_IMAGE_BLEND_PREMULT 0x00000067
+
+#define NV04_IMAGE_BLEND_PREMULT_NOP 0x00000100
+#define NV04_IMAGE_BLEND_PREMULT_NOTIFY 0x00000104
+#define NV04_IMAGE_BLEND_PREMULT_DMA_NOTIFY 0x00000180
+#define NV04_IMAGE_BLEND_PREMULT_IMAGE_OUTPUT 0x00000200
+#define NV04_IMAGE_BLEND_PREMULT_BETA_INPUT 0x00000204
+#define NV04_IMAGE_BLEND_PREMULT_IMAGE_INPUT 0x00000208
+
+
+#define NV03_CHANNEL_PIO 0x0000006a
+
+
+
+#define NV03_CHANNEL_DMA 0x0000006b
+
+
+
+#define NV04_BETA_SOLID 0x00000072
+
+#define NV04_BETA_SOLID_NOP 0x00000100
+#define NV04_BETA_SOLID_NOTIFY 0x00000104
+#define NV04_BETA_SOLID_DMA_NOTIFY 0x00000180
+#define NV04_BETA_SOLID_BETA_OUTPUT 0x00000200
+#define NV04_BETA_SOLID_BETA_FACTOR 0x00000300
+
+
+#define NV04_STRETCHED_IMAGE_FROM_CPU 0x00000076
+
+
+
+#define NV04_SCALED_IMAGE_FROM_MEMORY 0x00000077
+
+#define NV04_SCALED_IMAGE_FROM_MEMORY_NOP 0x00000100
+#define NV04_SCALED_IMAGE_FROM_MEMORY_NOTIFY 0x00000104
+#define NV04_SCALED_IMAGE_FROM_MEMORY_DMA_NOTIFY 0x00000180
+#define NV04_SCALED_IMAGE_FROM_MEMORY_DMA_IMAGE 0x00000184
+#define NV04_SCALED_IMAGE_FROM_MEMORY_PATTERN 0x00000188
+#define NV04_SCALED_IMAGE_FROM_MEMORY_ROP 0x0000018c
+#define NV04_SCALED_IMAGE_FROM_MEMORY_BETA1 0x00000190
+#define NV04_SCALED_IMAGE_FROM_MEMORY_BETA4 0x00000194
+#define NV04_SCALED_IMAGE_FROM_MEMORY_SURFACE 0x00000198
+#define NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION 0x000002fc
+#define NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION_DITHER 0x00000000
+#define NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION_TRUNCATE 0x00000001
+#define NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION_SUBTR_TRUNCATE 0x00000002
+#define NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT 0x00000300
+#define NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_A1R5G5B5 0x00000001
+#define NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_X1R5G5B5 0x00000002
+#define NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_A8R8G8B8 0x00000003
+#define NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_X8R8G8B8 0x00000004
+#define NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_V8YB8U8YA8 0x00000005
+#define NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_YB8V8YA8U8 0x00000006
+#define NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_R5G6B5 0x00000007
+#define NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_Y8 0x00000008
+#define NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_AY8 0x00000009
+#define NV04_SCALED_IMAGE_FROM_MEMORY_OPERATION 0x00000304
+#define NV04_SCALED_IMAGE_FROM_MEMORY_OPERATION_SRCCOPY_AND 0x00000000
+#define NV04_SCALED_IMAGE_FROM_MEMORY_OPERATION_ROP_AND 0x00000001
+#define NV04_SCALED_IMAGE_FROM_MEMORY_OPERATION_BLEND_AND 0x00000002
+#define NV04_SCALED_IMAGE_FROM_MEMORY_OPERATION_SRCCOPY 0x00000003
+#define NV04_SCALED_IMAGE_FROM_MEMORY_OPERATION_SRCCOPY_PREMULT 0x00000004
+#define NV04_SCALED_IMAGE_FROM_MEMORY_OPERATION_BLEND_PREMULT 0x00000005
+#define NV04_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT 0x00000308
+#define NV04_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT_X_SHIFT 0
+#define NV04_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT_X_MASK 0x0000ffff
+#define NV04_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT_Y_SHIFT 16
+#define NV04_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT_Y_MASK 0xffff0000
+#define NV04_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE 0x0000030c
+#define NV04_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE_W_SHIFT 0
+#define NV04_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE_W_MASK 0x0000ffff
+#define NV04_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE_H_SHIFT 16
+#define NV04_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE_H_MASK 0xffff0000
+#define NV04_SCALED_IMAGE_FROM_MEMORY_OUT_POINT 0x00000310
+#define NV04_SCALED_IMAGE_FROM_MEMORY_OUT_POINT_X_SHIFT 0
+#define NV04_SCALED_IMAGE_FROM_MEMORY_OUT_POINT_X_MASK 0x0000ffff
+#define NV04_SCALED_IMAGE_FROM_MEMORY_OUT_POINT_Y_SHIFT 16
+#define NV04_SCALED_IMAGE_FROM_MEMORY_OUT_POINT_Y_MASK 0xffff0000
+#define NV04_SCALED_IMAGE_FROM_MEMORY_OUT_SIZE 0x00000314
+#define NV04_SCALED_IMAGE_FROM_MEMORY_OUT_SIZE_W_SHIFT 0
+#define NV04_SCALED_IMAGE_FROM_MEMORY_OUT_SIZE_W_MASK 0x0000ffff
+#define NV04_SCALED_IMAGE_FROM_MEMORY_OUT_SIZE_H_SHIFT 16
+#define NV04_SCALED_IMAGE_FROM_MEMORY_OUT_SIZE_H_MASK 0xffff0000
+#define NV04_SCALED_IMAGE_FROM_MEMORY_DU_DX 0x00000318
+#define NV04_SCALED_IMAGE_FROM_MEMORY_DV_DY 0x0000031c
+#define NV04_SCALED_IMAGE_FROM_MEMORY_SIZE 0x00000400
+#define NV04_SCALED_IMAGE_FROM_MEMORY_SIZE_W_SHIFT 0
+#define NV04_SCALED_IMAGE_FROM_MEMORY_SIZE_W_MASK 0x0000ffff
+#define NV04_SCALED_IMAGE_FROM_MEMORY_SIZE_H_SHIFT 16
+#define NV04_SCALED_IMAGE_FROM_MEMORY_SIZE_H_MASK 0xffff0000
+#define NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT 0x00000404
+#define NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_PITCH_SHIFT 0
+#define NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_PITCH_MASK 0x0000ffff
+#define NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_ORIGIN_SHIFT 16
+#define NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_ORIGIN_MASK 0x00ff0000
+#define NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_ORIGIN_CENTER 0x00010000
+#define NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_ORIGIN_CORNER 0x00020000
+#define NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_FILTER_SHIFT 24
+#define NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_FILTER_MASK 0xff000000
+#define NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_FILTER_POINT_SAMPLE 0x00000000
+#define NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_FILTER_BILINEAR 0x01000000
+#define NV04_SCALED_IMAGE_FROM_MEMORY_ADDRESS 0x00000408
+#define NV04_SCALED_IMAGE_FROM_MEMORY_POINT 0x0000040c
+#define NV04_SCALED_IMAGE_FROM_MEMORY_POINT_X_SHIFT 0
+#define NV04_SCALED_IMAGE_FROM_MEMORY_POINT_X_MASK 0x0000ffff
+#define NV04_SCALED_IMAGE_FROM_MEMORY_POINT_Y_SHIFT 16
+#define NV04_SCALED_IMAGE_FROM_MEMORY_POINT_Y_MASK 0xffff0000
+
+
+#define NV10_TEXTURE_FROM_CPU 0x0000007b
+
+#define NV10_TEXTURE_FROM_CPU_NOP 0x00000100
+#define NV10_TEXTURE_FROM_CPU_NOTIFY 0x00000104
+#define NV10_TEXTURE_FROM_CPU_WAIT_FOR_IDLE 0x00000108
+#define NV10_TEXTURE_FROM_CPU_PM_TRIGGER 0x00000140
+#define NV10_TEXTURE_FROM_CPU_DMA_NOTIFY 0x00000180
+#define NV10_TEXTURE_FROM_CPU_SURFACE 0x00000184
+#define NV10_TEXTURE_FROM_CPU_COLOR_FORMAT 0x00000300
+#define NV10_TEXTURE_FROM_CPU_POINT 0x00000304
+#define NV10_TEXTURE_FROM_CPU_POINT_X_SHIFT 0
+#define NV10_TEXTURE_FROM_CPU_POINT_X_MASK 0x0000ffff
+#define NV10_TEXTURE_FROM_CPU_POINT_Y_SHIFT 16
+#define NV10_TEXTURE_FROM_CPU_POINT_Y_MASK 0xffff0000
+#define NV10_TEXTURE_FROM_CPU_SIZE 0x00000308
+#define NV10_TEXTURE_FROM_CPU_SIZE_W_SHIFT 0
+#define NV10_TEXTURE_FROM_CPU_SIZE_W_MASK 0x0000ffff
+#define NV10_TEXTURE_FROM_CPU_SIZE_H_SHIFT 16
+#define NV10_TEXTURE_FROM_CPU_SIZE_H_MASK 0xffff0000
+#define NV10_TEXTURE_FROM_CPU_CLIP_HORIZONTAL 0x0000030c
+#define NV10_TEXTURE_FROM_CPU_CLIP_HORIZONTAL_X_SHIFT 0
+#define NV10_TEXTURE_FROM_CPU_CLIP_HORIZONTAL_X_MASK 0x0000ffff
+#define NV10_TEXTURE_FROM_CPU_CLIP_HORIZONTAL_W_SHIFT 16
+#define NV10_TEXTURE_FROM_CPU_CLIP_HORIZONTAL_W_MASK 0xffff0000
+#define NV10_TEXTURE_FROM_CPU_CLIP_VERTICAL 0x00000310
+#define NV10_TEXTURE_FROM_CPU_CLIP_VERTICAL_Y_SHIFT 0
+#define NV10_TEXTURE_FROM_CPU_CLIP_VERTICAL_Y_MASK 0x0000ffff
+#define NV10_TEXTURE_FROM_CPU_CLIP_VERTICAL_H_SHIFT 16
+#define NV10_TEXTURE_FROM_CPU_CLIP_VERTICAL_H_MASK 0xffff0000
+#define NV10_TEXTURE_FROM_CPU_COLOR(x) (0x00000400+((x)*4))
+#define NV10_TEXTURE_FROM_CPU_COLOR__SIZE 0x00000700
+
+
+#define NV10_VIDEO_DISPLAY 0x0000007c
+
+
+
+#define NV10_DVD_SUBPICTURE 0x00000088
+
+
+
+#define NV10_SCALED_IMAGE_FROM_MEMORY 0x00000089
+
+#define NV10_SCALED_IMAGE_FROM_MEMORY_WAIT_FOR_IDLE 0x00000108
+
+
+#define NV10_IMAGE_FROM_CPU 0x0000008a
+
+#define NV10_IMAGE_FROM_CPU_COLOR_CONVERSION 0x000002f8
+
+
+#define NV10_CONTEXT_SURFACES_3D 0x00000093
+
+
+
+#define NV10_DX5_TEXTURE_TRIANGLE 0x00000094
+
+
+
+#define NV10_DX6_MULTI_TEXTURE_TRIANGLE 0x00000095
+
+
+
+#define NV11TCL 0x00000096
+
+#define NV11TCL_COLOR_LOGIC_OP_ENABLE 0x00000d40
+#define NV11TCL_COLOR_LOGIC_OP_OP 0x00000d44
+#define NV11TCL_COLOR_LOGIC_OP_OP_CLEAR 0x00001500
+#define NV11TCL_COLOR_LOGIC_OP_OP_AND 0x00001501
+#define NV11TCL_COLOR_LOGIC_OP_OP_AND_REVERSE 0x00001502
+#define NV11TCL_COLOR_LOGIC_OP_OP_COPY 0x00001503
+#define NV11TCL_COLOR_LOGIC_OP_OP_AND_INVERTED 0x00001504
+#define NV11TCL_COLOR_LOGIC_OP_OP_NOOP 0x00001505
+#define NV11TCL_COLOR_LOGIC_OP_OP_XOR 0x00001506
+#define NV11TCL_COLOR_LOGIC_OP_OP_OR 0x00001507
+#define NV11TCL_COLOR_LOGIC_OP_OP_NOR 0x00001508
+#define NV11TCL_COLOR_LOGIC_OP_OP_EQUIV 0x00001509
+#define NV11TCL_COLOR_LOGIC_OP_OP_INVERT 0x0000150a
+#define NV11TCL_COLOR_LOGIC_OP_OP_OR_REVERSE 0x0000150b
+#define NV11TCL_COLOR_LOGIC_OP_OP_COPY_INVERTED 0x0000150c
+#define NV11TCL_COLOR_LOGIC_OP_OP_OR_INVERTED 0x0000150d
+#define NV11TCL_COLOR_LOGIC_OP_OP_NAND 0x0000150e
+#define NV11TCL_COLOR_LOGIC_OP_OP_SET 0x0000150f
+
+
+#define NV20TCL 0x00000097
+
+#define NV20TCL_NOP 0x00000100
+#define NV20TCL_NOTIFY 0x00000104
+#define NV20TCL_DMA_NOTIFY 0x00000180
+#define NV20TCL_DMA_TEXTURE0 0x00000184
+#define NV20TCL_DMA_TEXTURE1 0x00000188
+#define NV20TCL_DMA_COLOR 0x00000194
+#define NV20TCL_DMA_ZETA 0x00000198
+#define NV20TCL_DMA_VTXBUF0 0x0000019c
+#define NV20TCL_DMA_VTXBUF1 0x000001a0
+#define NV20TCL_DMA_FENCE 0x000001a4
+#define NV20TCL_DMA_QUERY 0x000001a8
+#define NV20TCL_RT_HORIZ 0x00000200
+#define NV20TCL_RT_HORIZ_X_SHIFT 0
+#define NV20TCL_RT_HORIZ_X_MASK 0x0000ffff
+#define NV20TCL_RT_HORIZ_W_SHIFT 16
+#define NV20TCL_RT_HORIZ_W_MASK 0xffff0000
+#define NV20TCL_RT_VERT 0x00000204
+#define NV20TCL_RT_VERT_Y_SHIFT 0
+#define NV20TCL_RT_VERT_Y_MASK 0x0000ffff
+#define NV20TCL_RT_VERT_H_SHIFT 16
+#define NV20TCL_RT_VERT_H_MASK 0xffff0000
+#define NV20TCL_RT_FORMAT 0x00000208
+#define NV20TCL_RT_FORMAT_TYPE_SHIFT 8
+#define NV20TCL_RT_FORMAT_TYPE_MASK 0x00000f00
+#define NV20TCL_RT_FORMAT_TYPE_LINEAR 0x00000100
+#define NV20TCL_RT_FORMAT_TYPE_SWIZZLED 0x00000200
+#define NV20TCL_RT_FORMAT_COLOR_SHIFT 0
+#define NV20TCL_RT_FORMAT_COLOR_MASK 0x0000001f
+#define NV20TCL_RT_FORMAT_COLOR_R5G6B5 0x00000003
+#define NV20TCL_RT_FORMAT_COLOR_X8R8G8B8 0x00000005
+#define NV20TCL_RT_FORMAT_COLOR_A8R8G8B8 0x00000008
+#define NV20TCL_RT_FORMAT_COLOR_B8 0x00000009
+#define NV20TCL_RT_FORMAT_COLOR_UNKNOWN 0x0000000d
+#define NV20TCL_RT_FORMAT_COLOR_X8B8G8R8 0x0000000f
+#define NV20TCL_RT_FORMAT_COLOR_A8B8G8R8 0x00000010
+#define NV20TCL_RT_PITCH 0x0000020c
+#define NV20TCL_RT_PITCH_COLOR_PITCH_SHIFT 0
+#define NV20TCL_RT_PITCH_COLOR_PITCH_MASK 0x0000ffff
+#define NV20TCL_RT_PITCH_ZETA_PITCH_SHIFT 16
+#define NV20TCL_RT_PITCH_ZETA_PITCH_MASK 0xffff0000
+#define NV20TCL_COLOR_OFFSET 0x00000210
+#define NV20TCL_ZETA_OFFSET 0x00000214
+#define NV20TCL_RC_IN_ALPHA(x) (0x00000260+((x)*4))
+#define NV20TCL_RC_IN_ALPHA__SIZE 0x00000008
+#define NV20TCL_RC_IN_ALPHA_D_INPUT_SHIFT 0
+#define NV20TCL_RC_IN_ALPHA_D_INPUT_MASK 0x0000000f
+#define NV20TCL_RC_IN_ALPHA_D_INPUT_ZERO 0x00000000
+#define NV20TCL_RC_IN_ALPHA_D_INPUT_CONSTANT_COLOR0_NV 0x00000001
+#define NV20TCL_RC_IN_ALPHA_D_INPUT_CONSTANT_COLOR1_NV 0x00000002
+#define NV20TCL_RC_IN_ALPHA_D_INPUT_FOG 0x00000003
+#define NV20TCL_RC_IN_ALPHA_D_INPUT_PRIMARY_COLOR_NV 0x00000004
+#define NV20TCL_RC_IN_ALPHA_D_INPUT_SECONDARY_COLOR_NV 0x00000005
+#define NV20TCL_RC_IN_ALPHA_D_INPUT_TEXTURE0_ARB 0x00000008
+#define NV20TCL_RC_IN_ALPHA_D_INPUT_TEXTURE1_ARB 0x00000009
+#define NV20TCL_RC_IN_ALPHA_D_INPUT_SPARE0_NV 0x0000000c
+#define NV20TCL_RC_IN_ALPHA_D_INPUT_SPARE1_NV 0x0000000d
+#define NV20TCL_RC_IN_ALPHA_D_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0000000e
+#define NV20TCL_RC_IN_ALPHA_D_INPUT_E_TIMES_F_NV 0x0000000f
+#define NV20TCL_RC_IN_ALPHA_D_COMPONENT_USAGE (1 << 4)
+#define NV20TCL_RC_IN_ALPHA_D_COMPONENT_USAGE_BLUE 0x00000000
+#define NV20TCL_RC_IN_ALPHA_D_COMPONENT_USAGE_ALPHA 0x00000010
+#define NV20TCL_RC_IN_ALPHA_D_MAPPING_SHIFT 5
+#define NV20TCL_RC_IN_ALPHA_D_MAPPING_MASK 0x000000e0
+#define NV20TCL_RC_IN_ALPHA_D_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV20TCL_RC_IN_ALPHA_D_MAPPING_UNSIGNED_INVERT_NV 0x00000020
+#define NV20TCL_RC_IN_ALPHA_D_MAPPING_EXPAND_NORMAL_NV 0x00000040
+#define NV20TCL_RC_IN_ALPHA_D_MAPPING_EXPAND_NEGATE_NV 0x00000060
+#define NV20TCL_RC_IN_ALPHA_D_MAPPING_HALF_BIAS_NORMAL_NV 0x00000080
+#define NV20TCL_RC_IN_ALPHA_D_MAPPING_HALF_BIAS_NEGATE_NV 0x000000a0
+#define NV20TCL_RC_IN_ALPHA_D_MAPPING_SIGNED_IDENTITY_NV 0x000000c0
+#define NV20TCL_RC_IN_ALPHA_D_MAPPING_SIGNED_NEGATE_NV 0x000000e0
+#define NV20TCL_RC_IN_ALPHA_C_INPUT_SHIFT 8
+#define NV20TCL_RC_IN_ALPHA_C_INPUT_MASK 0x00000f00
+#define NV20TCL_RC_IN_ALPHA_C_INPUT_ZERO 0x00000000
+#define NV20TCL_RC_IN_ALPHA_C_INPUT_CONSTANT_COLOR0_NV 0x00000100
+#define NV20TCL_RC_IN_ALPHA_C_INPUT_CONSTANT_COLOR1_NV 0x00000200
+#define NV20TCL_RC_IN_ALPHA_C_INPUT_FOG 0x00000300
+#define NV20TCL_RC_IN_ALPHA_C_INPUT_PRIMARY_COLOR_NV 0x00000400
+#define NV20TCL_RC_IN_ALPHA_C_INPUT_SECONDARY_COLOR_NV 0x00000500
+#define NV20TCL_RC_IN_ALPHA_C_INPUT_TEXTURE0_ARB 0x00000800
+#define NV20TCL_RC_IN_ALPHA_C_INPUT_TEXTURE1_ARB 0x00000900
+#define NV20TCL_RC_IN_ALPHA_C_INPUT_SPARE0_NV 0x00000c00
+#define NV20TCL_RC_IN_ALPHA_C_INPUT_SPARE1_NV 0x00000d00
+#define NV20TCL_RC_IN_ALPHA_C_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x00000e00
+#define NV20TCL_RC_IN_ALPHA_C_INPUT_E_TIMES_F_NV 0x00000f00
+#define NV20TCL_RC_IN_ALPHA_C_COMPONENT_USAGE (1 << 12)
+#define NV20TCL_RC_IN_ALPHA_C_COMPONENT_USAGE_BLUE 0x00000000
+#define NV20TCL_RC_IN_ALPHA_C_COMPONENT_USAGE_ALPHA 0x00001000
+#define NV20TCL_RC_IN_ALPHA_C_MAPPING_SHIFT 13
+#define NV20TCL_RC_IN_ALPHA_C_MAPPING_MASK 0x0000e000
+#define NV20TCL_RC_IN_ALPHA_C_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV20TCL_RC_IN_ALPHA_C_MAPPING_UNSIGNED_INVERT_NV 0x00002000
+#define NV20TCL_RC_IN_ALPHA_C_MAPPING_EXPAND_NORMAL_NV 0x00004000
+#define NV20TCL_RC_IN_ALPHA_C_MAPPING_EXPAND_NEGATE_NV 0x00006000
+#define NV20TCL_RC_IN_ALPHA_C_MAPPING_HALF_BIAS_NORMAL_NV 0x00008000
+#define NV20TCL_RC_IN_ALPHA_C_MAPPING_HALF_BIAS_NEGATE_NV 0x0000a000
+#define NV20TCL_RC_IN_ALPHA_C_MAPPING_SIGNED_IDENTITY_NV 0x0000c000
+#define NV20TCL_RC_IN_ALPHA_C_MAPPING_SIGNED_NEGATE_NV 0x0000e000
+#define NV20TCL_RC_IN_ALPHA_B_INPUT_SHIFT 16
+#define NV20TCL_RC_IN_ALPHA_B_INPUT_MASK 0x000f0000
+#define NV20TCL_RC_IN_ALPHA_B_INPUT_ZERO 0x00000000
+#define NV20TCL_RC_IN_ALPHA_B_INPUT_CONSTANT_COLOR0_NV 0x00010000
+#define NV20TCL_RC_IN_ALPHA_B_INPUT_CONSTANT_COLOR1_NV 0x00020000
+#define NV20TCL_RC_IN_ALPHA_B_INPUT_FOG 0x00030000
+#define NV20TCL_RC_IN_ALPHA_B_INPUT_PRIMARY_COLOR_NV 0x00040000
+#define NV20TCL_RC_IN_ALPHA_B_INPUT_SECONDARY_COLOR_NV 0x00050000
+#define NV20TCL_RC_IN_ALPHA_B_INPUT_TEXTURE0_ARB 0x00080000
+#define NV20TCL_RC_IN_ALPHA_B_INPUT_TEXTURE1_ARB 0x00090000
+#define NV20TCL_RC_IN_ALPHA_B_INPUT_SPARE0_NV 0x000c0000
+#define NV20TCL_RC_IN_ALPHA_B_INPUT_SPARE1_NV 0x000d0000
+#define NV20TCL_RC_IN_ALPHA_B_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x000e0000
+#define NV20TCL_RC_IN_ALPHA_B_INPUT_E_TIMES_F_NV 0x000f0000
+#define NV20TCL_RC_IN_ALPHA_B_COMPONENT_USAGE (1 << 20)
+#define NV20TCL_RC_IN_ALPHA_B_COMPONENT_USAGE_BLUE 0x00000000
+#define NV20TCL_RC_IN_ALPHA_B_COMPONENT_USAGE_ALPHA 0x00100000
+#define NV20TCL_RC_IN_ALPHA_B_MAPPING_SHIFT 21
+#define NV20TCL_RC_IN_ALPHA_B_MAPPING_MASK 0x00e00000
+#define NV20TCL_RC_IN_ALPHA_B_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV20TCL_RC_IN_ALPHA_B_MAPPING_UNSIGNED_INVERT_NV 0x00200000
+#define NV20TCL_RC_IN_ALPHA_B_MAPPING_EXPAND_NORMAL_NV 0x00400000
+#define NV20TCL_RC_IN_ALPHA_B_MAPPING_EXPAND_NEGATE_NV 0x00600000
+#define NV20TCL_RC_IN_ALPHA_B_MAPPING_HALF_BIAS_NORMAL_NV 0x00800000
+#define NV20TCL_RC_IN_ALPHA_B_MAPPING_HALF_BIAS_NEGATE_NV 0x00a00000
+#define NV20TCL_RC_IN_ALPHA_B_MAPPING_SIGNED_IDENTITY_NV 0x00c00000
+#define NV20TCL_RC_IN_ALPHA_B_MAPPING_SIGNED_NEGATE_NV 0x00e00000
+#define NV20TCL_RC_IN_ALPHA_A_INPUT_SHIFT 24
+#define NV20TCL_RC_IN_ALPHA_A_INPUT_MASK 0x0f000000
+#define NV20TCL_RC_IN_ALPHA_A_INPUT_ZERO 0x00000000
+#define NV20TCL_RC_IN_ALPHA_A_INPUT_CONSTANT_COLOR0_NV 0x01000000
+#define NV20TCL_RC_IN_ALPHA_A_INPUT_CONSTANT_COLOR1_NV 0x02000000
+#define NV20TCL_RC_IN_ALPHA_A_INPUT_FOG 0x03000000
+#define NV20TCL_RC_IN_ALPHA_A_INPUT_PRIMARY_COLOR_NV 0x04000000
+#define NV20TCL_RC_IN_ALPHA_A_INPUT_SECONDARY_COLOR_NV 0x05000000
+#define NV20TCL_RC_IN_ALPHA_A_INPUT_TEXTURE0_ARB 0x08000000
+#define NV20TCL_RC_IN_ALPHA_A_INPUT_TEXTURE1_ARB 0x09000000
+#define NV20TCL_RC_IN_ALPHA_A_INPUT_SPARE0_NV 0x0c000000
+#define NV20TCL_RC_IN_ALPHA_A_INPUT_SPARE1_NV 0x0d000000
+#define NV20TCL_RC_IN_ALPHA_A_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0e000000
+#define NV20TCL_RC_IN_ALPHA_A_INPUT_E_TIMES_F_NV 0x0f000000
+#define NV20TCL_RC_IN_ALPHA_A_COMPONENT_USAGE (1 << 28)
+#define NV20TCL_RC_IN_ALPHA_A_COMPONENT_USAGE_BLUE 0x00000000
+#define NV20TCL_RC_IN_ALPHA_A_COMPONENT_USAGE_ALPHA 0x10000000
+#define NV20TCL_RC_IN_ALPHA_A_MAPPING_SHIFT 29
+#define NV20TCL_RC_IN_ALPHA_A_MAPPING_MASK 0xe0000000
+#define NV20TCL_RC_IN_ALPHA_A_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV20TCL_RC_IN_ALPHA_A_MAPPING_UNSIGNED_INVERT_NV 0x20000000
+#define NV20TCL_RC_IN_ALPHA_A_MAPPING_EXPAND_NORMAL_NV 0x40000000
+#define NV20TCL_RC_IN_ALPHA_A_MAPPING_EXPAND_NEGATE_NV 0x60000000
+#define NV20TCL_RC_IN_ALPHA_A_MAPPING_HALF_BIAS_NORMAL_NV 0x80000000
+#define NV20TCL_RC_IN_ALPHA_A_MAPPING_HALF_BIAS_NEGATE_NV 0xa0000000
+#define NV20TCL_RC_IN_ALPHA_A_MAPPING_SIGNED_IDENTITY_NV 0xc0000000
+#define NV20TCL_RC_IN_ALPHA_A_MAPPING_SIGNED_NEGATE_NV 0xe0000000
+#define NV20TCL_RC_FINAL0 0x00000288
+#define NV20TCL_RC_FINAL0_D_INPUT_SHIFT 0
+#define NV20TCL_RC_FINAL0_D_INPUT_MASK 0x0000000f
+#define NV20TCL_RC_FINAL0_D_INPUT_ZERO 0x00000000
+#define NV20TCL_RC_FINAL0_D_INPUT_CONSTANT_COLOR0_NV 0x00000001
+#define NV20TCL_RC_FINAL0_D_INPUT_CONSTANT_COLOR1_NV 0x00000002
+#define NV20TCL_RC_FINAL0_D_INPUT_FOG 0x00000003
+#define NV20TCL_RC_FINAL0_D_INPUT_PRIMARY_COLOR_NV 0x00000004
+#define NV20TCL_RC_FINAL0_D_INPUT_SECONDARY_COLOR_NV 0x00000005
+#define NV20TCL_RC_FINAL0_D_INPUT_TEXTURE0_ARB 0x00000008
+#define NV20TCL_RC_FINAL0_D_INPUT_TEXTURE1_ARB 0x00000009
+#define NV20TCL_RC_FINAL0_D_INPUT_SPARE0_NV 0x0000000c
+#define NV20TCL_RC_FINAL0_D_INPUT_SPARE1_NV 0x0000000d
+#define NV20TCL_RC_FINAL0_D_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0000000e
+#define NV20TCL_RC_FINAL0_D_INPUT_E_TIMES_F_NV 0x0000000f
+#define NV20TCL_RC_FINAL0_D_COMPONENT_USAGE (1 << 4)
+#define NV20TCL_RC_FINAL0_D_COMPONENT_USAGE_RGB 0x00000000
+#define NV20TCL_RC_FINAL0_D_COMPONENT_USAGE_ALPHA 0x00000010
+#define NV20TCL_RC_FINAL0_D_MAPPING_SHIFT 5
+#define NV20TCL_RC_FINAL0_D_MAPPING_MASK 0x000000e0
+#define NV20TCL_RC_FINAL0_D_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV20TCL_RC_FINAL0_D_MAPPING_UNSIGNED_INVERT_NV 0x00000020
+#define NV20TCL_RC_FINAL0_D_MAPPING_EXPAND_NORMAL_NV 0x00000040
+#define NV20TCL_RC_FINAL0_D_MAPPING_EXPAND_NEGATE_NV 0x00000060
+#define NV20TCL_RC_FINAL0_D_MAPPING_HALF_BIAS_NORMAL_NV 0x00000080
+#define NV20TCL_RC_FINAL0_D_MAPPING_HALF_BIAS_NEGATE_NV 0x000000a0
+#define NV20TCL_RC_FINAL0_D_MAPPING_SIGNED_IDENTITY_NV 0x000000c0
+#define NV20TCL_RC_FINAL0_D_MAPPING_SIGNED_NEGATE_NV 0x000000e0
+#define NV20TCL_RC_FINAL0_C_INPUT_SHIFT 8
+#define NV20TCL_RC_FINAL0_C_INPUT_MASK 0x00000f00
+#define NV20TCL_RC_FINAL0_C_INPUT_ZERO 0x00000000
+#define NV20TCL_RC_FINAL0_C_INPUT_CONSTANT_COLOR0_NV 0x00000100
+#define NV20TCL_RC_FINAL0_C_INPUT_CONSTANT_COLOR1_NV 0x00000200
+#define NV20TCL_RC_FINAL0_C_INPUT_FOG 0x00000300
+#define NV20TCL_RC_FINAL0_C_INPUT_PRIMARY_COLOR_NV 0x00000400
+#define NV20TCL_RC_FINAL0_C_INPUT_SECONDARY_COLOR_NV 0x00000500
+#define NV20TCL_RC_FINAL0_C_INPUT_TEXTURE0_ARB 0x00000800
+#define NV20TCL_RC_FINAL0_C_INPUT_TEXTURE1_ARB 0x00000900
+#define NV20TCL_RC_FINAL0_C_INPUT_SPARE0_NV 0x00000c00
+#define NV20TCL_RC_FINAL0_C_INPUT_SPARE1_NV 0x00000d00
+#define NV20TCL_RC_FINAL0_C_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x00000e00
+#define NV20TCL_RC_FINAL0_C_INPUT_E_TIMES_F_NV 0x00000f00
+#define NV20TCL_RC_FINAL0_C_COMPONENT_USAGE (1 << 12)
+#define NV20TCL_RC_FINAL0_C_COMPONENT_USAGE_RGB 0x00000000
+#define NV20TCL_RC_FINAL0_C_COMPONENT_USAGE_ALPHA 0x00001000
+#define NV20TCL_RC_FINAL0_C_MAPPING_SHIFT 13
+#define NV20TCL_RC_FINAL0_C_MAPPING_MASK 0x0000e000
+#define NV20TCL_RC_FINAL0_C_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV20TCL_RC_FINAL0_C_MAPPING_UNSIGNED_INVERT_NV 0x00002000
+#define NV20TCL_RC_FINAL0_C_MAPPING_EXPAND_NORMAL_NV 0x00004000
+#define NV20TCL_RC_FINAL0_C_MAPPING_EXPAND_NEGATE_NV 0x00006000
+#define NV20TCL_RC_FINAL0_C_MAPPING_HALF_BIAS_NORMAL_NV 0x00008000
+#define NV20TCL_RC_FINAL0_C_MAPPING_HALF_BIAS_NEGATE_NV 0x0000a000
+#define NV20TCL_RC_FINAL0_C_MAPPING_SIGNED_IDENTITY_NV 0x0000c000
+#define NV20TCL_RC_FINAL0_C_MAPPING_SIGNED_NEGATE_NV 0x0000e000
+#define NV20TCL_RC_FINAL0_B_INPUT_SHIFT 16
+#define NV20TCL_RC_FINAL0_B_INPUT_MASK 0x000f0000
+#define NV20TCL_RC_FINAL0_B_INPUT_ZERO 0x00000000
+#define NV20TCL_RC_FINAL0_B_INPUT_CONSTANT_COLOR0_NV 0x00010000
+#define NV20TCL_RC_FINAL0_B_INPUT_CONSTANT_COLOR1_NV 0x00020000
+#define NV20TCL_RC_FINAL0_B_INPUT_FOG 0x00030000
+#define NV20TCL_RC_FINAL0_B_INPUT_PRIMARY_COLOR_NV 0x00040000
+#define NV20TCL_RC_FINAL0_B_INPUT_SECONDARY_COLOR_NV 0x00050000
+#define NV20TCL_RC_FINAL0_B_INPUT_TEXTURE0_ARB 0x00080000
+#define NV20TCL_RC_FINAL0_B_INPUT_TEXTURE1_ARB 0x00090000
+#define NV20TCL_RC_FINAL0_B_INPUT_SPARE0_NV 0x000c0000
+#define NV20TCL_RC_FINAL0_B_INPUT_SPARE1_NV 0x000d0000
+#define NV20TCL_RC_FINAL0_B_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x000e0000
+#define NV20TCL_RC_FINAL0_B_INPUT_E_TIMES_F_NV 0x000f0000
+#define NV20TCL_RC_FINAL0_B_COMPONENT_USAGE (1 << 20)
+#define NV20TCL_RC_FINAL0_B_COMPONENT_USAGE_RGB 0x00000000
+#define NV20TCL_RC_FINAL0_B_COMPONENT_USAGE_ALPHA 0x00100000
+#define NV20TCL_RC_FINAL0_B_MAPPING_SHIFT 21
+#define NV20TCL_RC_FINAL0_B_MAPPING_MASK 0x00e00000
+#define NV20TCL_RC_FINAL0_B_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV20TCL_RC_FINAL0_B_MAPPING_UNSIGNED_INVERT_NV 0x00200000
+#define NV20TCL_RC_FINAL0_B_MAPPING_EXPAND_NORMAL_NV 0x00400000
+#define NV20TCL_RC_FINAL0_B_MAPPING_EXPAND_NEGATE_NV 0x00600000
+#define NV20TCL_RC_FINAL0_B_MAPPING_HALF_BIAS_NORMAL_NV 0x00800000
+#define NV20TCL_RC_FINAL0_B_MAPPING_HALF_BIAS_NEGATE_NV 0x00a00000
+#define NV20TCL_RC_FINAL0_B_MAPPING_SIGNED_IDENTITY_NV 0x00c00000
+#define NV20TCL_RC_FINAL0_B_MAPPING_SIGNED_NEGATE_NV 0x00e00000
+#define NV20TCL_RC_FINAL0_A_INPUT_SHIFT 24
+#define NV20TCL_RC_FINAL0_A_INPUT_MASK 0x0f000000
+#define NV20TCL_RC_FINAL0_A_INPUT_ZERO 0x00000000
+#define NV20TCL_RC_FINAL0_A_INPUT_CONSTANT_COLOR0_NV 0x01000000
+#define NV20TCL_RC_FINAL0_A_INPUT_CONSTANT_COLOR1_NV 0x02000000
+#define NV20TCL_RC_FINAL0_A_INPUT_FOG 0x03000000
+#define NV20TCL_RC_FINAL0_A_INPUT_PRIMARY_COLOR_NV 0x04000000
+#define NV20TCL_RC_FINAL0_A_INPUT_SECONDARY_COLOR_NV 0x05000000
+#define NV20TCL_RC_FINAL0_A_INPUT_TEXTURE0_ARB 0x08000000
+#define NV20TCL_RC_FINAL0_A_INPUT_TEXTURE1_ARB 0x09000000
+#define NV20TCL_RC_FINAL0_A_INPUT_SPARE0_NV 0x0c000000
+#define NV20TCL_RC_FINAL0_A_INPUT_SPARE1_NV 0x0d000000
+#define NV20TCL_RC_FINAL0_A_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0e000000
+#define NV20TCL_RC_FINAL0_A_INPUT_E_TIMES_F_NV 0x0f000000
+#define NV20TCL_RC_FINAL0_A_COMPONENT_USAGE (1 << 28)
+#define NV20TCL_RC_FINAL0_A_COMPONENT_USAGE_RGB 0x00000000
+#define NV20TCL_RC_FINAL0_A_COMPONENT_USAGE_ALPHA 0x10000000
+#define NV20TCL_RC_FINAL0_A_MAPPING_SHIFT 29
+#define NV20TCL_RC_FINAL0_A_MAPPING_MASK 0xe0000000
+#define NV20TCL_RC_FINAL0_A_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV20TCL_RC_FINAL0_A_MAPPING_UNSIGNED_INVERT_NV 0x20000000
+#define NV20TCL_RC_FINAL0_A_MAPPING_EXPAND_NORMAL_NV 0x40000000
+#define NV20TCL_RC_FINAL0_A_MAPPING_EXPAND_NEGATE_NV 0x60000000
+#define NV20TCL_RC_FINAL0_A_MAPPING_HALF_BIAS_NORMAL_NV 0x80000000
+#define NV20TCL_RC_FINAL0_A_MAPPING_HALF_BIAS_NEGATE_NV 0xa0000000
+#define NV20TCL_RC_FINAL0_A_MAPPING_SIGNED_IDENTITY_NV 0xc0000000
+#define NV20TCL_RC_FINAL0_A_MAPPING_SIGNED_NEGATE_NV 0xe0000000
+#define NV20TCL_RC_FINAL1 0x0000028c
+#define NV20TCL_RC_FINAL1_COLOR_SUM_CLAMP (1 << 7)
+#define NV20TCL_RC_FINAL1_G_INPUT_SHIFT 8
+#define NV20TCL_RC_FINAL1_G_INPUT_MASK 0x00000f00
+#define NV20TCL_RC_FINAL1_G_INPUT_ZERO 0x00000000
+#define NV20TCL_RC_FINAL1_G_INPUT_CONSTANT_COLOR0_NV 0x00000100
+#define NV20TCL_RC_FINAL1_G_INPUT_CONSTANT_COLOR1_NV 0x00000200
+#define NV20TCL_RC_FINAL1_G_INPUT_FOG 0x00000300
+#define NV20TCL_RC_FINAL1_G_INPUT_PRIMARY_COLOR_NV 0x00000400
+#define NV20TCL_RC_FINAL1_G_INPUT_SECONDARY_COLOR_NV 0x00000500
+#define NV20TCL_RC_FINAL1_G_INPUT_TEXTURE0_ARB 0x00000800
+#define NV20TCL_RC_FINAL1_G_INPUT_TEXTURE1_ARB 0x00000900
+#define NV20TCL_RC_FINAL1_G_INPUT_SPARE0_NV 0x00000c00
+#define NV20TCL_RC_FINAL1_G_INPUT_SPARE1_NV 0x00000d00
+#define NV20TCL_RC_FINAL1_G_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x00000e00
+#define NV20TCL_RC_FINAL1_G_INPUT_E_TIMES_F_NV 0x00000f00
+#define NV20TCL_RC_FINAL1_G_COMPONENT_USAGE (1 << 12)
+#define NV20TCL_RC_FINAL1_G_COMPONENT_USAGE_RGB 0x00000000
+#define NV20TCL_RC_FINAL1_G_COMPONENT_USAGE_ALPHA 0x00001000
+#define NV20TCL_RC_FINAL1_G_MAPPING_SHIFT 13
+#define NV20TCL_RC_FINAL1_G_MAPPING_MASK 0x0000e000
+#define NV20TCL_RC_FINAL1_G_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV20TCL_RC_FINAL1_G_MAPPING_UNSIGNED_INVERT_NV 0x00002000
+#define NV20TCL_RC_FINAL1_G_MAPPING_EXPAND_NORMAL_NV 0x00004000
+#define NV20TCL_RC_FINAL1_G_MAPPING_EXPAND_NEGATE_NV 0x00006000
+#define NV20TCL_RC_FINAL1_G_MAPPING_HALF_BIAS_NORMAL_NV 0x00008000
+#define NV20TCL_RC_FINAL1_G_MAPPING_HALF_BIAS_NEGATE_NV 0x0000a000
+#define NV20TCL_RC_FINAL1_G_MAPPING_SIGNED_IDENTITY_NV 0x0000c000
+#define NV20TCL_RC_FINAL1_G_MAPPING_SIGNED_NEGATE_NV 0x0000e000
+#define NV20TCL_RC_FINAL1_F_INPUT_SHIFT 16
+#define NV20TCL_RC_FINAL1_F_INPUT_MASK 0x000f0000
+#define NV20TCL_RC_FINAL1_F_INPUT_ZERO 0x00000000
+#define NV20TCL_RC_FINAL1_F_INPUT_CONSTANT_COLOR0_NV 0x00010000
+#define NV20TCL_RC_FINAL1_F_INPUT_CONSTANT_COLOR1_NV 0x00020000
+#define NV20TCL_RC_FINAL1_F_INPUT_FOG 0x00030000
+#define NV20TCL_RC_FINAL1_F_INPUT_PRIMARY_COLOR_NV 0x00040000
+#define NV20TCL_RC_FINAL1_F_INPUT_SECONDARY_COLOR_NV 0x00050000
+#define NV20TCL_RC_FINAL1_F_INPUT_TEXTURE0_ARB 0x00080000
+#define NV20TCL_RC_FINAL1_F_INPUT_TEXTURE1_ARB 0x00090000
+#define NV20TCL_RC_FINAL1_F_INPUT_SPARE0_NV 0x000c0000
+#define NV20TCL_RC_FINAL1_F_INPUT_SPARE1_NV 0x000d0000
+#define NV20TCL_RC_FINAL1_F_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x000e0000
+#define NV20TCL_RC_FINAL1_F_INPUT_E_TIMES_F_NV 0x000f0000
+#define NV20TCL_RC_FINAL1_F_COMPONENT_USAGE (1 << 20)
+#define NV20TCL_RC_FINAL1_F_COMPONENT_USAGE_RGB 0x00000000
+#define NV20TCL_RC_FINAL1_F_COMPONENT_USAGE_ALPHA 0x00100000
+#define NV20TCL_RC_FINAL1_F_MAPPING_SHIFT 21
+#define NV20TCL_RC_FINAL1_F_MAPPING_MASK 0x00e00000
+#define NV20TCL_RC_FINAL1_F_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV20TCL_RC_FINAL1_F_MAPPING_UNSIGNED_INVERT_NV 0x00200000
+#define NV20TCL_RC_FINAL1_F_MAPPING_EXPAND_NORMAL_NV 0x00400000
+#define NV20TCL_RC_FINAL1_F_MAPPING_EXPAND_NEGATE_NV 0x00600000
+#define NV20TCL_RC_FINAL1_F_MAPPING_HALF_BIAS_NORMAL_NV 0x00800000
+#define NV20TCL_RC_FINAL1_F_MAPPING_HALF_BIAS_NEGATE_NV 0x00a00000
+#define NV20TCL_RC_FINAL1_F_MAPPING_SIGNED_IDENTITY_NV 0x00c00000
+#define NV20TCL_RC_FINAL1_F_MAPPING_SIGNED_NEGATE_NV 0x00e00000
+#define NV20TCL_RC_FINAL1_E_INPUT_SHIFT 24
+#define NV20TCL_RC_FINAL1_E_INPUT_MASK 0x0f000000
+#define NV20TCL_RC_FINAL1_E_INPUT_ZERO 0x00000000
+#define NV20TCL_RC_FINAL1_E_INPUT_CONSTANT_COLOR0_NV 0x01000000
+#define NV20TCL_RC_FINAL1_E_INPUT_CONSTANT_COLOR1_NV 0x02000000
+#define NV20TCL_RC_FINAL1_E_INPUT_FOG 0x03000000
+#define NV20TCL_RC_FINAL1_E_INPUT_PRIMARY_COLOR_NV 0x04000000
+#define NV20TCL_RC_FINAL1_E_INPUT_SECONDARY_COLOR_NV 0x05000000
+#define NV20TCL_RC_FINAL1_E_INPUT_TEXTURE0_ARB 0x08000000
+#define NV20TCL_RC_FINAL1_E_INPUT_TEXTURE1_ARB 0x09000000
+#define NV20TCL_RC_FINAL1_E_INPUT_SPARE0_NV 0x0c000000
+#define NV20TCL_RC_FINAL1_E_INPUT_SPARE1_NV 0x0d000000
+#define NV20TCL_RC_FINAL1_E_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0e000000
+#define NV20TCL_RC_FINAL1_E_INPUT_E_TIMES_F_NV 0x0f000000
+#define NV20TCL_RC_FINAL1_E_COMPONENT_USAGE (1 << 28)
+#define NV20TCL_RC_FINAL1_E_COMPONENT_USAGE_RGB 0x00000000
+#define NV20TCL_RC_FINAL1_E_COMPONENT_USAGE_ALPHA 0x10000000
+#define NV20TCL_RC_FINAL1_E_MAPPING_SHIFT 29
+#define NV20TCL_RC_FINAL1_E_MAPPING_MASK 0xe0000000
+#define NV20TCL_RC_FINAL1_E_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV20TCL_RC_FINAL1_E_MAPPING_UNSIGNED_INVERT_NV 0x20000000
+#define NV20TCL_RC_FINAL1_E_MAPPING_EXPAND_NORMAL_NV 0x40000000
+#define NV20TCL_RC_FINAL1_E_MAPPING_EXPAND_NEGATE_NV 0x60000000
+#define NV20TCL_RC_FINAL1_E_MAPPING_HALF_BIAS_NORMAL_NV 0x80000000
+#define NV20TCL_RC_FINAL1_E_MAPPING_HALF_BIAS_NEGATE_NV 0xa0000000
+#define NV20TCL_RC_FINAL1_E_MAPPING_SIGNED_IDENTITY_NV 0xc0000000
+#define NV20TCL_RC_FINAL1_E_MAPPING_SIGNED_NEGATE_NV 0xe0000000
+#define NV20TCL_LIGHT_CONTROL 0x00000294
+#define NV20TCL_FOG_MODE 0x0000029c
+#define NV20TCL_FOG_MODE_EXP 0x00000800
+#define NV20TCL_FOG_MODE_EXP_2 0x00000802
+#define NV20TCL_FOG_MODE_EXP2 0x00000803
+#define NV20TCL_FOG_MODE_LINEAR 0x00000804
+#define NV20TCL_FOG_MODE_LINEAR_2 0x00002601
+#define NV20TCL_FOG_COORD_DIST 0x000002a0
+#define NV20TCL_FOG_COORD_DIST_COORD_FALSE 0x00000000
+#define NV20TCL_FOG_COORD_DIST_COORD_FRAGMENT_DEPTH_DISTANCE_EYE_RADIAL_NV 0x00000001
+#define NV20TCL_FOG_COORD_DIST_COORD_FRAGMENT_DEPTH_DISTANCE_EYE_PLANE_ABSOLUTE_NV 0x00000002
+#define NV20TCL_FOG_COORD_DIST_COORD_FOG 0x00000003
+#define NV20TCL_FOG_ENABLE 0x000002a4
+#define NV20TCL_FOG_COLOR 0x000002a8
+#define NV20TCL_FOG_COLOR_R_SHIFT 0
+#define NV20TCL_FOG_COLOR_R_MASK 0x000000ff
+#define NV20TCL_FOG_COLOR_G_SHIFT 8
+#define NV20TCL_FOG_COLOR_G_MASK 0x0000ff00
+#define NV20TCL_FOG_COLOR_B_SHIFT 16
+#define NV20TCL_FOG_COLOR_B_MASK 0x00ff0000
+#define NV20TCL_FOG_COLOR_A_SHIFT 24
+#define NV20TCL_FOG_COLOR_A_MASK 0xff000000
+#define NV20TCL_VIEWPORT_CLIP_MODE 0x000002b4
+#define NV20TCL_VIEWPORT_CLIP_HORIZ(x) (0x000002c0+((x)*4))
+#define NV20TCL_VIEWPORT_CLIP_HORIZ__SIZE 0x00000008
+#define NV20TCL_VIEWPORT_CLIP_VERT(x) (0x000002e0+((x)*4))
+#define NV20TCL_VIEWPORT_CLIP_VERT__SIZE 0x00000008
+#define NV20TCL_ALPHA_FUNC_ENABLE 0x00000300
+#define NV20TCL_BLEND_FUNC_ENABLE 0x00000304
+#define NV20TCL_CULL_FACE_ENABLE 0x00000308
+#define NV20TCL_DEPTH_TEST_ENABLE 0x0000030c
+#define NV20TCL_DITHER_ENABLE 0x00000310
+#define NV20TCL_LIGHTING_ENABLE 0x00000314
+#define NV20TCL_POINT_PARAMETERS_ENABLE 0x00000318
+#define NV20TCL_POINT_SMOOTH_ENABLE 0x0000031c
+#define NV20TCL_LINE_SMOOTH_ENABLE 0x00000320
+#define NV20TCL_POLYGON_SMOOTH_ENABLE 0x00000324
+#define NV20TCL_STENCIL_ENABLE 0x0000032c
+#define NV20TCL_POLYGON_OFFSET_POINT_ENABLE 0x00000330
+#define NV20TCL_POLYGON_OFFSET_LINE_ENABLE 0x00000334
+#define NV20TCL_POLYGON_OFFSET_FILL_ENABLE 0x00000338
+#define NV20TCL_ALPHA_FUNC_FUNC 0x0000033c
+#define NV20TCL_ALPHA_FUNC_FUNC_NEVER 0x00000200
+#define NV20TCL_ALPHA_FUNC_FUNC_LESS 0x00000201
+#define NV20TCL_ALPHA_FUNC_FUNC_EQUAL 0x00000202
+#define NV20TCL_ALPHA_FUNC_FUNC_LEQUAL 0x00000203
+#define NV20TCL_ALPHA_FUNC_FUNC_GREATER 0x00000204
+#define NV20TCL_ALPHA_FUNC_FUNC_GREATER 0x00000204
+#define NV20TCL_ALPHA_FUNC_FUNC_NOTEQUAL 0x00000205
+#define NV20TCL_ALPHA_FUNC_FUNC_GEQUAL 0x00000206
+#define NV20TCL_ALPHA_FUNC_FUNC_ALWAYS 0x00000207
+#define NV20TCL_ALPHA_FUNC_REF 0x00000340
+#define NV20TCL_BLEND_FUNC_SRC 0x00000344
+#define NV20TCL_BLEND_FUNC_SRC_ZERO 0x00000000
+#define NV20TCL_BLEND_FUNC_SRC_ONE 0x00000001
+#define NV20TCL_BLEND_FUNC_SRC_SRC_COLOR 0x00000300
+#define NV20TCL_BLEND_FUNC_SRC_ONE_MINUS_SRC_COLOR 0x00000301
+#define NV20TCL_BLEND_FUNC_SRC_SRC_ALPHA 0x00000302
+#define NV20TCL_BLEND_FUNC_SRC_ONE_MINUS_SRC_ALPHA 0x00000303
+#define NV20TCL_BLEND_FUNC_SRC_DST_ALPHA 0x00000304
+#define NV20TCL_BLEND_FUNC_SRC_ONE_MINUS_DST_ALPHA 0x00000305
+#define NV20TCL_BLEND_FUNC_SRC_DST_COLOR 0x00000306
+#define NV20TCL_BLEND_FUNC_SRC_ONE_MINUS_DST_COLOR 0x00000307
+#define NV20TCL_BLEND_FUNC_SRC_SRC_ALPHA_SATURATE 0x00000308
+#define NV20TCL_BLEND_FUNC_SRC_CONSTANT_COLOR 0x00008001
+#define NV20TCL_BLEND_FUNC_SRC_ONE_MINUS_CONSTANT_COLOR 0x00008002
+#define NV20TCL_BLEND_FUNC_SRC_CONSTANT_ALPHA 0x00008003
+#define NV20TCL_BLEND_FUNC_SRC_ONE_MINUS_CONSTANT_ALPHA 0x00008004
+#define NV20TCL_BLEND_FUNC_DST 0x00000348
+#define NV20TCL_BLEND_FUNC_DST_ZERO 0x00000000
+#define NV20TCL_BLEND_FUNC_DST_ONE 0x00000001
+#define NV20TCL_BLEND_FUNC_DST_SRC_COLOR 0x00000300
+#define NV20TCL_BLEND_FUNC_DST_ONE_MINUS_SRC_COLOR 0x00000301
+#define NV20TCL_BLEND_FUNC_DST_SRC_ALPHA 0x00000302
+#define NV20TCL_BLEND_FUNC_DST_ONE_MINUS_SRC_ALPHA 0x00000303
+#define NV20TCL_BLEND_FUNC_DST_DST_ALPHA 0x00000304
+#define NV20TCL_BLEND_FUNC_DST_ONE_MINUS_DST_ALPHA 0x00000305
+#define NV20TCL_BLEND_FUNC_DST_DST_COLOR 0x00000306
+#define NV20TCL_BLEND_FUNC_DST_ONE_MINUS_DST_COLOR 0x00000307
+#define NV20TCL_BLEND_FUNC_DST_SRC_ALPHA_SATURATE 0x00000308
+#define NV20TCL_BLEND_FUNC_DST_CONSTANT_COLOR 0x00008001
+#define NV20TCL_BLEND_FUNC_DST_ONE_MINUS_CONSTANT_COLOR 0x00008002
+#define NV20TCL_BLEND_FUNC_DST_CONSTANT_ALPHA 0x00008003
+#define NV20TCL_BLEND_FUNC_DST_ONE_MINUS_CONSTANT_ALPHA 0x00008004
+#define NV20TCL_BLEND_COLOR 0x0000034c
+#define NV20TCL_BLEND_COLOR_B_SHIFT 0
+#define NV20TCL_BLEND_COLOR_B_MASK 0x000000ff
+#define NV20TCL_BLEND_COLOR_G_SHIFT 8
+#define NV20TCL_BLEND_COLOR_G_MASK 0x0000ff00
+#define NV20TCL_BLEND_COLOR_R_SHIFT 16
+#define NV20TCL_BLEND_COLOR_R_MASK 0x00ff0000
+#define NV20TCL_BLEND_COLOR_A_SHIFT 24
+#define NV20TCL_BLEND_COLOR_A_MASK 0xff000000
+#define NV20TCL_BLEND_EQUATION 0x00000350
+#define NV20TCL_BLEND_EQUATION_FUNC_ADD 0x00008006
+#define NV20TCL_BLEND_EQUATION_MIN 0x00008007
+#define NV20TCL_BLEND_EQUATION_MAX 0x00008008
+#define NV20TCL_BLEND_EQUATION_FUNC_SUBTRACT 0x0000800a
+#define NV20TCL_BLEND_EQUATION_FUNC_REVERSE_SUBTRACT 0x0000800b
+#define NV20TCL_DEPTH_FUNC 0x00000354
+#define NV20TCL_DEPTH_FUNC_NEVER 0x00000200
+#define NV20TCL_DEPTH_FUNC_LESS 0x00000201
+#define NV20TCL_DEPTH_FUNC_EQUAL 0x00000202
+#define NV20TCL_DEPTH_FUNC_LEQUAL 0x00000203
+#define NV20TCL_DEPTH_FUNC_GREATER 0x00000204
+#define NV20TCL_DEPTH_FUNC_GREATER 0x00000204
+#define NV20TCL_DEPTH_FUNC_NOTEQUAL 0x00000205
+#define NV20TCL_DEPTH_FUNC_GEQUAL 0x00000206
+#define NV20TCL_DEPTH_FUNC_ALWAYS 0x00000207
+#define NV20TCL_COLOR_MASK 0x00000358
+#define NV20TCL_COLOR_MASK_B (1 << 0)
+#define NV20TCL_COLOR_MASK_G (1 << 8)
+#define NV20TCL_COLOR_MASK_R (1 << 16)
+#define NV20TCL_COLOR_MASK_A (1 << 24)
+#define NV20TCL_DEPTH_WRITE_ENABLE 0x0000035c
+#define NV20TCL_STENCIL_MASK 0x00000360
+#define NV20TCL_STENCIL_FUNC_FUNC 0x00000364
+#define NV20TCL_STENCIL_FUNC_FUNC_NEVER 0x00000200
+#define NV20TCL_STENCIL_FUNC_FUNC_LESS 0x00000201
+#define NV20TCL_STENCIL_FUNC_FUNC_EQUAL 0x00000202
+#define NV20TCL_STENCIL_FUNC_FUNC_LEQUAL 0x00000203
+#define NV20TCL_STENCIL_FUNC_FUNC_GREATER 0x00000204
+#define NV20TCL_STENCIL_FUNC_FUNC_GREATER 0x00000204
+#define NV20TCL_STENCIL_FUNC_FUNC_NOTEQUAL 0x00000205
+#define NV20TCL_STENCIL_FUNC_FUNC_GEQUAL 0x00000206
+#define NV20TCL_STENCIL_FUNC_FUNC_ALWAYS 0x00000207
+#define NV20TCL_STENCIL_FUNC_REF 0x00000368
+#define NV20TCL_STENCIL_FUNC_MASK 0x0000036c
+#define NV20TCL_STENCIL_OP_FAIL 0x00000370
+#define NV20TCL_STENCIL_OP_FAIL_ZERO 0x00000000
+#define NV20TCL_STENCIL_OP_FAIL_INVERT 0x0000150a
+#define NV20TCL_STENCIL_OP_FAIL_KEEP 0x00001e00
+#define NV20TCL_STENCIL_OP_FAIL_REPLACE 0x00001e01
+#define NV20TCL_STENCIL_OP_FAIL_INCR 0x00001e02
+#define NV20TCL_STENCIL_OP_FAIL_DECR 0x00001e03
+#define NV20TCL_STENCIL_OP_FAIL_INCR_WRAP 0x00008507
+#define NV20TCL_STENCIL_OP_FAIL_DECR_WRAP 0x00008508
+#define NV20TCL_STENCIL_OP_ZFAIL 0x00000374
+#define NV20TCL_STENCIL_OP_ZFAIL_ZERO 0x00000000
+#define NV20TCL_STENCIL_OP_ZFAIL_INVERT 0x0000150a
+#define NV20TCL_STENCIL_OP_ZFAIL_KEEP 0x00001e00
+#define NV20TCL_STENCIL_OP_ZFAIL_REPLACE 0x00001e01
+#define NV20TCL_STENCIL_OP_ZFAIL_INCR 0x00001e02
+#define NV20TCL_STENCIL_OP_ZFAIL_DECR 0x00001e03
+#define NV20TCL_STENCIL_OP_ZFAIL_INCR_WRAP 0x00008507
+#define NV20TCL_STENCIL_OP_ZFAIL_DECR_WRAP 0x00008508
+#define NV20TCL_STENCIL_OP_ZPASS 0x00000378
+#define NV20TCL_STENCIL_OP_ZPASS_ZERO 0x00000000
+#define NV20TCL_STENCIL_OP_ZPASS_INVERT 0x0000150a
+#define NV20TCL_STENCIL_OP_ZPASS_KEEP 0x00001e00
+#define NV20TCL_STENCIL_OP_ZPASS_REPLACE 0x00001e01
+#define NV20TCL_STENCIL_OP_ZPASS_INCR 0x00001e02
+#define NV20TCL_STENCIL_OP_ZPASS_DECR 0x00001e03
+#define NV20TCL_STENCIL_OP_ZPASS_INCR_WRAP 0x00008507
+#define NV20TCL_STENCIL_OP_ZPASS_DECR_WRAP 0x00008508
+#define NV20TCL_SHADE_MODEL 0x0000037c
+#define NV20TCL_SHADE_MODEL_FLAT 0x00001d00
+#define NV20TCL_SHADE_MODEL_SMOOTH 0x00001d01
+#define NV20TCL_LINE_WIDTH 0x00000380
+#define NV20TCL_POLYGON_OFFSET_FACTOR 0x00000384
+#define NV20TCL_POLYGON_OFFSET_UNITS 0x00000388
+#define NV20TCL_POLYGON_MODE_FRONT 0x0000038c
+#define NV20TCL_POLYGON_MODE_FRONT_POINT 0x00001b00
+#define NV20TCL_POLYGON_MODE_FRONT_LINE 0x00001b01
+#define NV20TCL_POLYGON_MODE_FRONT_FILL 0x00001b02
+#define NV20TCL_POLYGON_MODE_BACK 0x00000390
+#define NV20TCL_POLYGON_MODE_BACK_POINT 0x00001b00
+#define NV20TCL_POLYGON_MODE_BACK_LINE 0x00001b01
+#define NV20TCL_POLYGON_MODE_BACK_FILL 0x00001b02
+#define NV20TCL_DEPTH_RANGE_NEAR 0x00000394
+#define NV20TCL_DEPTH_RANGE_FAR 0x00000398
+#define NV20TCL_CULL_FACE 0x0000039c
+#define NV20TCL_CULL_FACE_FRONT 0x00000404
+#define NV20TCL_CULL_FACE_BACK 0x00000405
+#define NV20TCL_CULL_FACE_FRONT_AND_BACK 0x00000408
+#define NV20TCL_FRONT_FACE 0x000003a0
+#define NV20TCL_FRONT_FACE_CW 0x00000900
+#define NV20TCL_FRONT_FACE_CCW 0x00000901
+#define NV20TCL_NORMALIZE_ENABLE 0x000003a4
+#define NV20TCL_COLOR_MATERIAL_FRONT_R 0x000003a8
+#define NV20TCL_COLOR_MATERIAL_FRONT_G 0x000003ac
+#define NV20TCL_COLOR_MATERIAL_FRONT_B 0x000003b0
+#define NV20TCL_COLOR_MATERIAL_FRONT_A 0x000003b4
+#define NV20TCL_SEPARATE_SPECULAR_ENABLE 0x000003b8
+#define NV20TCL_ENABLED_LIGHTS 0x000003bc
+#define NV20TCL_TX_GEN_S(x) (0x000003c0+((x)*16))
+#define NV20TCL_TX_GEN_S__SIZE 0x00000004
+#define NV20TCL_TX_GEN_S_FALSE 0x00000000
+#define NV20TCL_TX_GEN_S_EYE_LINEAR 0x00002400
+#define NV20TCL_TX_GEN_S_OBJECT_LINEAR 0x00002401
+#define NV20TCL_TX_GEN_S_SPHERE_MAP 0x00002402
+#define NV20TCL_TX_GEN_S_NORMAL_MAP 0x00008511
+#define NV20TCL_TX_GEN_S_REFLECTION_MAP 0x00008512
+#define NV20TCL_TX_GEN_T(x) (0x000003c4+((x)*16))
+#define NV20TCL_TX_GEN_T__SIZE 0x00000004
+#define NV20TCL_TX_GEN_T_FALSE 0x00000000
+#define NV20TCL_TX_GEN_T_EYE_LINEAR 0x00002400
+#define NV20TCL_TX_GEN_T_OBJECT_LINEAR 0x00002401
+#define NV20TCL_TX_GEN_T_SPHERE_MAP 0x00002402
+#define NV20TCL_TX_GEN_T_NORMAL_MAP 0x00008511
+#define NV20TCL_TX_GEN_T_REFLECTION_MAP 0x00008512
+#define NV20TCL_TX_GEN_R(x) (0x000003c8+((x)*16))
+#define NV20TCL_TX_GEN_R__SIZE 0x00000004
+#define NV20TCL_TX_GEN_R_FALSE 0x00000000
+#define NV20TCL_TX_GEN_R_EYE_LINEAR 0x00002400
+#define NV20TCL_TX_GEN_R_OBJECT_LINEAR 0x00002401
+#define NV20TCL_TX_GEN_R_SPHERE_MAP 0x00002402
+#define NV20TCL_TX_GEN_R_NORMAL_MAP 0x00008511
+#define NV20TCL_TX_GEN_R_REFLECTION_MAP 0x00008512
+#define NV20TCL_TX_GEN_Q(x) (0x000003cc+((x)*16))
+#define NV20TCL_TX_GEN_Q__SIZE 0x00000004
+#define NV20TCL_TX_GEN_Q_FALSE 0x00000000
+#define NV20TCL_TX_GEN_Q_EYE_LINEAR 0x00002400
+#define NV20TCL_TX_GEN_Q_OBJECT_LINEAR 0x00002401
+#define NV20TCL_TX_GEN_Q_SPHERE_MAP 0x00002402
+#define NV20TCL_TX_GEN_Q_NORMAL_MAP 0x00008511
+#define NV20TCL_TX_GEN_Q_REFLECTION_MAP 0x00008512
+#define NV20TCL_TX_MATRIX_ENABLE(x) (0x00000420+((x)*4))
+#define NV20TCL_TX_MATRIX_ENABLE__SIZE 0x00000004
+#define NV20TCL_POINT_SIZE 0x0000043c
+#define NV20TCL_MODELVIEW0_MATRIX(x) (0x00000480+((x)*4))
+#define NV20TCL_MODELVIEW0_MATRIX__SIZE 0x00000010
+#define NV20TCL_MODELVIEW1_MATRIX(x) (0x000004c0+((x)*4))
+#define NV20TCL_MODELVIEW1_MATRIX__SIZE 0x00000010
+#define NV20TCL_MODELVIEW2_MATRIX(x) (0x00000500+((x)*4))
+#define NV20TCL_MODELVIEW2_MATRIX__SIZE 0x00000010
+#define NV20TCL_MODELVIEW3_MATRIX(x) (0x00000540+((x)*4))
+#define NV20TCL_MODELVIEW3_MATRIX__SIZE 0x00000010
+#define NV20TCL_INVERSE_MODELVIEW0_MATRIX(x) (0x00000580+((x)*4))
+#define NV20TCL_INVERSE_MODELVIEW0_MATRIX__SIZE 0x00000010
+#define NV20TCL_INVERSE_MODELVIEW1_MATRIX(x) (0x000005c0+((x)*4))
+#define NV20TCL_INVERSE_MODELVIEW1_MATRIX__SIZE 0x00000010
+#define NV20TCL_INVERSE_MODELVIEW2_MATRIX(x) (0x00000600+((x)*4))
+#define NV20TCL_INVERSE_MODELVIEW2_MATRIX__SIZE 0x00000010
+#define NV20TCL_INVERSE_MODELVIEW3_MATRIX(x) (0x00000640+((x)*4))
+#define NV20TCL_INVERSE_MODELVIEW3_MATRIX__SIZE 0x00000010
+#define NV20TCL_PROJECTION_MATRIX(x) (0x00000680+((x)*4))
+#define NV20TCL_PROJECTION_MATRIX__SIZE 0x00000010
+#define NV20TCL_TX0_MATRIX(x) (0x000006c0+((x)*4))
+#define NV20TCL_TX0_MATRIX__SIZE 0x00000010
+#define NV20TCL_TX1_MATRIX(x) (0x00000700+((x)*4))
+#define NV20TCL_TX1_MATRIX__SIZE 0x00000010
+#define NV20TCL_TX2_MATRIX(x) (0x00000740+((x)*4))
+#define NV20TCL_TX2_MATRIX__SIZE 0x00000010
+#define NV20TCL_TX3_MATRIX(x) (0x00000780+((x)*4))
+#define NV20TCL_TX3_MATRIX__SIZE 0x00000010
+#define NV20TCL_TX0_CLIP_PLANE_A(x) (0x00000840+((x)*16))
+#define NV20TCL_TX0_CLIP_PLANE_A__SIZE 0x00000004
+#define NV20TCL_TX0_CLIP_PLANE_B(x) (0x00000844+((x)*16))
+#define NV20TCL_TX0_CLIP_PLANE_B__SIZE 0x00000004
+#define NV20TCL_TX0_CLIP_PLANE_C(x) (0x00000848+((x)*16))
+#define NV20TCL_TX0_CLIP_PLANE_C__SIZE 0x00000004
+#define NV20TCL_TX0_CLIP_PLANE_D(x) (0x0000084c+((x)*16))
+#define NV20TCL_TX0_CLIP_PLANE_D__SIZE 0x00000004
+#define NV20TCL_TX1_CLIP_PLANE_A(x) (0x00000880+((x)*16))
+#define NV20TCL_TX1_CLIP_PLANE_A__SIZE 0x00000004
+#define NV20TCL_TX1_CLIP_PLANE_B(x) (0x00000884+((x)*16))
+#define NV20TCL_TX1_CLIP_PLANE_B__SIZE 0x00000004
+#define NV20TCL_TX1_CLIP_PLANE_C(x) (0x00000888+((x)*16))
+#define NV20TCL_TX1_CLIP_PLANE_C__SIZE 0x00000004
+#define NV20TCL_TX1_CLIP_PLANE_D(x) (0x0000088c+((x)*16))
+#define NV20TCL_TX1_CLIP_PLANE_D__SIZE 0x00000004
+#define NV20TCL_TX2_CLIP_PLANE_A(x) (0x000008c0+((x)*16))
+#define NV20TCL_TX2_CLIP_PLANE_A__SIZE 0x00000004
+#define NV20TCL_TX2_CLIP_PLANE_B(x) (0x000008c4+((x)*16))
+#define NV20TCL_TX2_CLIP_PLANE_B__SIZE 0x00000004
+#define NV20TCL_TX2_CLIP_PLANE_C(x) (0x000008c8+((x)*16))
+#define NV20TCL_TX2_CLIP_PLANE_C__SIZE 0x00000004
+#define NV20TCL_TX2_CLIP_PLANE_D(x) (0x000008cc+((x)*16))
+#define NV20TCL_TX2_CLIP_PLANE_D__SIZE 0x00000004
+#define NV20TCL_TX3_CLIP_PLANE_A(x) (0x00000900+((x)*16))
+#define NV20TCL_TX3_CLIP_PLANE_A__SIZE 0x00000004
+#define NV20TCL_TX3_CLIP_PLANE_B(x) (0x00000904+((x)*16))
+#define NV20TCL_TX3_CLIP_PLANE_B__SIZE 0x00000004
+#define NV20TCL_TX3_CLIP_PLANE_C(x) (0x00000908+((x)*16))
+#define NV20TCL_TX3_CLIP_PLANE_C__SIZE 0x00000004
+#define NV20TCL_TX3_CLIP_PLANE_D(x) (0x0000090c+((x)*16))
+#define NV20TCL_TX3_CLIP_PLANE_D__SIZE 0x00000004
+#define NV20TCL_FOG_EQUATION_CONSTANT 0x000009c0
+#define NV20TCL_FOG_EQUATION_LINEAR 0x000009c4
+#define NV20TCL_FOG_EQUATION_QUADRATIC 0x000009c8
+#define NV20TCL_FRONT_MATERIAL_SHININESS(x) (0x000009e0+((x)*4))
+#define NV20TCL_FRONT_MATERIAL_SHININESS__SIZE 0x00000006
+#define NV20TCL_LIGHT_MODEL_FRONT_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION_R 0x00000a10
+#define NV20TCL_LIGHT_MODEL_FRONT_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION_G 0x00000a14
+#define NV20TCL_LIGHT_MODEL_FRONT_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION_B 0x00000a18
+#define NV20TCL_VIEWPORT_SCALE0_X 0x00000a20
+#define NV20TCL_VIEWPORT_SCALE0_Y 0x00000a24
+#define NV20TCL_VIEWPORT_SCALE0_Z 0x00000a28
+#define NV20TCL_VIEWPORT_SCALE0_W 0x00000a2c
+#define NV20TCL_POINT_PARAMETER(x) (0x00000a30+((x)*4))
+#define NV20TCL_POINT_PARAMETER__SIZE 0x00000008
+#define NV20TCL_RC_CONSTANT_COLOR0(x) (0x00000a60+((x)*4))
+#define NV20TCL_RC_CONSTANT_COLOR0__SIZE 0x00000008
+#define NV20TCL_RC_CONSTANT_COLOR0_B_SHIFT 0
+#define NV20TCL_RC_CONSTANT_COLOR0_B_MASK 0x000000ff
+#define NV20TCL_RC_CONSTANT_COLOR0_G_SHIFT 8
+#define NV20TCL_RC_CONSTANT_COLOR0_G_MASK 0x0000ff00
+#define NV20TCL_RC_CONSTANT_COLOR0_R_SHIFT 16
+#define NV20TCL_RC_CONSTANT_COLOR0_R_MASK 0x00ff0000
+#define NV20TCL_RC_CONSTANT_COLOR0_A_SHIFT 24
+#define NV20TCL_RC_CONSTANT_COLOR0_A_MASK 0xff000000
+#define NV20TCL_RC_CONSTANT_COLOR1(x) (0x00000a80+((x)*4))
+#define NV20TCL_RC_CONSTANT_COLOR1__SIZE 0x00000008
+#define NV20TCL_RC_CONSTANT_COLOR1_B_SHIFT 0
+#define NV20TCL_RC_CONSTANT_COLOR1_B_MASK 0x000000ff
+#define NV20TCL_RC_CONSTANT_COLOR1_G_SHIFT 8
+#define NV20TCL_RC_CONSTANT_COLOR1_G_MASK 0x0000ff00
+#define NV20TCL_RC_CONSTANT_COLOR1_R_SHIFT 16
+#define NV20TCL_RC_CONSTANT_COLOR1_R_MASK 0x00ff0000
+#define NV20TCL_RC_CONSTANT_COLOR1_A_SHIFT 24
+#define NV20TCL_RC_CONSTANT_COLOR1_A_MASK 0xff000000
+#define NV20TCL_RC_OUT_ALPHA(x) (0x00000aa0+((x)*4))
+#define NV20TCL_RC_OUT_ALPHA__SIZE 0x00000008
+#define NV20TCL_RC_OUT_ALPHA_CD_OUTPUT_SHIFT 0
+#define NV20TCL_RC_OUT_ALPHA_CD_OUTPUT_MASK 0x0000000f
+#define NV20TCL_RC_OUT_ALPHA_CD_OUTPUT_ZERO 0x00000000
+#define NV20TCL_RC_OUT_ALPHA_CD_OUTPUT_CONSTANT_COLOR0_NV 0x00000001
+#define NV20TCL_RC_OUT_ALPHA_CD_OUTPUT_CONSTANT_COLOR1_NV 0x00000002
+#define NV20TCL_RC_OUT_ALPHA_CD_OUTPUT_FOG 0x00000003
+#define NV20TCL_RC_OUT_ALPHA_CD_OUTPUT_PRIMARY_COLOR_NV 0x00000004
+#define NV20TCL_RC_OUT_ALPHA_CD_OUTPUT_SECONDARY_COLOR_NV 0x00000005
+#define NV20TCL_RC_OUT_ALPHA_CD_OUTPUT_TEXTURE0_ARB 0x00000008
+#define NV20TCL_RC_OUT_ALPHA_CD_OUTPUT_TEXTURE1_ARB 0x00000009
+#define NV20TCL_RC_OUT_ALPHA_CD_OUTPUT_SPARE0_NV 0x0000000c
+#define NV20TCL_RC_OUT_ALPHA_CD_OUTPUT_SPARE1_NV 0x0000000d
+#define NV20TCL_RC_OUT_ALPHA_CD_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0000000e
+#define NV20TCL_RC_OUT_ALPHA_CD_OUTPUT_E_TIMES_F_NV 0x0000000f
+#define NV20TCL_RC_OUT_ALPHA_AB_OUTPUT_SHIFT 4
+#define NV20TCL_RC_OUT_ALPHA_AB_OUTPUT_MASK 0x000000f0
+#define NV20TCL_RC_OUT_ALPHA_AB_OUTPUT_ZERO 0x00000000
+#define NV20TCL_RC_OUT_ALPHA_AB_OUTPUT_CONSTANT_COLOR0_NV 0x00000010
+#define NV20TCL_RC_OUT_ALPHA_AB_OUTPUT_CONSTANT_COLOR1_NV 0x00000020
+#define NV20TCL_RC_OUT_ALPHA_AB_OUTPUT_FOG 0x00000030
+#define NV20TCL_RC_OUT_ALPHA_AB_OUTPUT_PRIMARY_COLOR_NV 0x00000040
+#define NV20TCL_RC_OUT_ALPHA_AB_OUTPUT_SECONDARY_COLOR_NV 0x00000050
+#define NV20TCL_RC_OUT_ALPHA_AB_OUTPUT_TEXTURE0_ARB 0x00000080
+#define NV20TCL_RC_OUT_ALPHA_AB_OUTPUT_TEXTURE1_ARB 0x00000090
+#define NV20TCL_RC_OUT_ALPHA_AB_OUTPUT_SPARE0_NV 0x000000c0
+#define NV20TCL_RC_OUT_ALPHA_AB_OUTPUT_SPARE1_NV 0x000000d0
+#define NV20TCL_RC_OUT_ALPHA_AB_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x000000e0
+#define NV20TCL_RC_OUT_ALPHA_AB_OUTPUT_E_TIMES_F_NV 0x000000f0
+#define NV20TCL_RC_OUT_ALPHA_SUM_OUTPUT_SHIFT 8
+#define NV20TCL_RC_OUT_ALPHA_SUM_OUTPUT_MASK 0x00000f00
+#define NV20TCL_RC_OUT_ALPHA_SUM_OUTPUT_ZERO 0x00000000
+#define NV20TCL_RC_OUT_ALPHA_SUM_OUTPUT_CONSTANT_COLOR0_NV 0x00000100
+#define NV20TCL_RC_OUT_ALPHA_SUM_OUTPUT_CONSTANT_COLOR1_NV 0x00000200
+#define NV20TCL_RC_OUT_ALPHA_SUM_OUTPUT_FOG 0x00000300
+#define NV20TCL_RC_OUT_ALPHA_SUM_OUTPUT_PRIMARY_COLOR_NV 0x00000400
+#define NV20TCL_RC_OUT_ALPHA_SUM_OUTPUT_SECONDARY_COLOR_NV 0x00000500
+#define NV20TCL_RC_OUT_ALPHA_SUM_OUTPUT_TEXTURE0_ARB 0x00000800
+#define NV20TCL_RC_OUT_ALPHA_SUM_OUTPUT_TEXTURE1_ARB 0x00000900
+#define NV20TCL_RC_OUT_ALPHA_SUM_OUTPUT_SPARE0_NV 0x00000c00
+#define NV20TCL_RC_OUT_ALPHA_SUM_OUTPUT_SPARE1_NV 0x00000d00
+#define NV20TCL_RC_OUT_ALPHA_SUM_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x00000e00
+#define NV20TCL_RC_OUT_ALPHA_SUM_OUTPUT_E_TIMES_F_NV 0x00000f00
+#define NV20TCL_RC_OUT_ALPHA_CD_DOT_PRODUCT (1 << 12)
+#define NV20TCL_RC_OUT_ALPHA_AB_DOT_PRODUCT (1 << 13)
+#define NV20TCL_RC_OUT_ALPHA_MUX_SUM (1 << 14)
+#define NV20TCL_RC_OUT_ALPHA_BIAS (1 << 15)
+#define NV20TCL_RC_OUT_ALPHA_BIAS_NONE 0x00000000
+#define NV20TCL_RC_OUT_ALPHA_BIAS_BIAS_BY_NEGATIVE_ONE_HALF_NV 0x00008000
+#define NV20TCL_RC_OUT_ALPHA_SCALE_SHIFT 17
+#define NV20TCL_RC_OUT_ALPHA_SCALE_MASK 0x00000000
+#define NV20TCL_RC_OUT_ALPHA_SCALE_NONE 0x00000000
+#define NV20TCL_RC_OUT_ALPHA_SCALE_SCALE_BY_TWO_NV 0x00020000
+#define NV20TCL_RC_OUT_ALPHA_SCALE_SCALE_BY_FOUR_NV 0x00040000
+#define NV20TCL_RC_OUT_ALPHA_SCALE_SCALE_BY_ONE_HALF_NV 0x00060000
+#define NV20TCL_RC_IN_RGB(x) (0x00000ac0+((x)*4))
+#define NV20TCL_RC_IN_RGB__SIZE 0x00000008
+#define NV20TCL_RC_IN_RGB_D_INPUT_SHIFT 0
+#define NV20TCL_RC_IN_RGB_D_INPUT_MASK 0x0000000f
+#define NV20TCL_RC_IN_RGB_D_INPUT_ZERO 0x00000000
+#define NV20TCL_RC_IN_RGB_D_INPUT_CONSTANT_COLOR0_NV 0x00000001
+#define NV20TCL_RC_IN_RGB_D_INPUT_CONSTANT_COLOR1_NV 0x00000002
+#define NV20TCL_RC_IN_RGB_D_INPUT_FOG 0x00000003
+#define NV20TCL_RC_IN_RGB_D_INPUT_PRIMARY_COLOR_NV 0x00000004
+#define NV20TCL_RC_IN_RGB_D_INPUT_SECONDARY_COLOR_NV 0x00000005
+#define NV20TCL_RC_IN_RGB_D_INPUT_TEXTURE0_ARB 0x00000008
+#define NV20TCL_RC_IN_RGB_D_INPUT_TEXTURE1_ARB 0x00000009
+#define NV20TCL_RC_IN_RGB_D_INPUT_SPARE0_NV 0x0000000c
+#define NV20TCL_RC_IN_RGB_D_INPUT_SPARE1_NV 0x0000000d
+#define NV20TCL_RC_IN_RGB_D_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0000000e
+#define NV20TCL_RC_IN_RGB_D_INPUT_E_TIMES_F_NV 0x0000000f
+#define NV20TCL_RC_IN_RGB_D_COMPONENT_USAGE (1 << 4)
+#define NV20TCL_RC_IN_RGB_D_COMPONENT_USAGE_RGB 0x00000000
+#define NV20TCL_RC_IN_RGB_D_COMPONENT_USAGE_ALPHA 0x00000010
+#define NV20TCL_RC_IN_RGB_D_MAPPING_SHIFT 5
+#define NV20TCL_RC_IN_RGB_D_MAPPING_MASK 0x000000e0
+#define NV20TCL_RC_IN_RGB_D_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV20TCL_RC_IN_RGB_D_MAPPING_UNSIGNED_INVERT_NV 0x00000020
+#define NV20TCL_RC_IN_RGB_D_MAPPING_EXPAND_NORMAL_NV 0x00000040
+#define NV20TCL_RC_IN_RGB_D_MAPPING_EXPAND_NEGATE_NV 0x00000060
+#define NV20TCL_RC_IN_RGB_D_MAPPING_HALF_BIAS_NORMAL_NV 0x00000080
+#define NV20TCL_RC_IN_RGB_D_MAPPING_HALF_BIAS_NEGATE_NV 0x000000a0
+#define NV20TCL_RC_IN_RGB_D_MAPPING_SIGNED_IDENTITY_NV 0x000000c0
+#define NV20TCL_RC_IN_RGB_D_MAPPING_SIGNED_NEGATE_NV 0x000000e0
+#define NV20TCL_RC_IN_RGB_C_INPUT_SHIFT 8
+#define NV20TCL_RC_IN_RGB_C_INPUT_MASK 0x00000f00
+#define NV20TCL_RC_IN_RGB_C_INPUT_ZERO 0x00000000
+#define NV20TCL_RC_IN_RGB_C_INPUT_CONSTANT_COLOR0_NV 0x00000100
+#define NV20TCL_RC_IN_RGB_C_INPUT_CONSTANT_COLOR1_NV 0x00000200
+#define NV20TCL_RC_IN_RGB_C_INPUT_FOG 0x00000300
+#define NV20TCL_RC_IN_RGB_C_INPUT_PRIMARY_COLOR_NV 0x00000400
+#define NV20TCL_RC_IN_RGB_C_INPUT_SECONDARY_COLOR_NV 0x00000500
+#define NV20TCL_RC_IN_RGB_C_INPUT_TEXTURE0_ARB 0x00000800
+#define NV20TCL_RC_IN_RGB_C_INPUT_TEXTURE1_ARB 0x00000900
+#define NV20TCL_RC_IN_RGB_C_INPUT_SPARE0_NV 0x00000c00
+#define NV20TCL_RC_IN_RGB_C_INPUT_SPARE1_NV 0x00000d00
+#define NV20TCL_RC_IN_RGB_C_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x00000e00
+#define NV20TCL_RC_IN_RGB_C_INPUT_E_TIMES_F_NV 0x00000f00
+#define NV20TCL_RC_IN_RGB_C_COMPONENT_USAGE (1 << 12)
+#define NV20TCL_RC_IN_RGB_C_COMPONENT_USAGE_RGB 0x00000000
+#define NV20TCL_RC_IN_RGB_C_COMPONENT_USAGE_ALPHA 0x00001000
+#define NV20TCL_RC_IN_RGB_C_MAPPING_SHIFT 13
+#define NV20TCL_RC_IN_RGB_C_MAPPING_MASK 0x0000e000
+#define NV20TCL_RC_IN_RGB_C_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV20TCL_RC_IN_RGB_C_MAPPING_UNSIGNED_INVERT_NV 0x00002000
+#define NV20TCL_RC_IN_RGB_C_MAPPING_EXPAND_NORMAL_NV 0x00004000
+#define NV20TCL_RC_IN_RGB_C_MAPPING_EXPAND_NEGATE_NV 0x00006000
+#define NV20TCL_RC_IN_RGB_C_MAPPING_HALF_BIAS_NORMAL_NV 0x00008000
+#define NV20TCL_RC_IN_RGB_C_MAPPING_HALF_BIAS_NEGATE_NV 0x0000a000
+#define NV20TCL_RC_IN_RGB_C_MAPPING_SIGNED_IDENTITY_NV 0x0000c000
+#define NV20TCL_RC_IN_RGB_C_MAPPING_SIGNED_NEGATE_NV 0x0000e000
+#define NV20TCL_RC_IN_RGB_B_INPUT_SHIFT 16
+#define NV20TCL_RC_IN_RGB_B_INPUT_MASK 0x000f0000
+#define NV20TCL_RC_IN_RGB_B_INPUT_ZERO 0x00000000
+#define NV20TCL_RC_IN_RGB_B_INPUT_CONSTANT_COLOR0_NV 0x00010000
+#define NV20TCL_RC_IN_RGB_B_INPUT_CONSTANT_COLOR1_NV 0x00020000
+#define NV20TCL_RC_IN_RGB_B_INPUT_FOG 0x00030000
+#define NV20TCL_RC_IN_RGB_B_INPUT_PRIMARY_COLOR_NV 0x00040000
+#define NV20TCL_RC_IN_RGB_B_INPUT_SECONDARY_COLOR_NV 0x00050000
+#define NV20TCL_RC_IN_RGB_B_INPUT_TEXTURE0_ARB 0x00080000
+#define NV20TCL_RC_IN_RGB_B_INPUT_TEXTURE1_ARB 0x00090000
+#define NV20TCL_RC_IN_RGB_B_INPUT_SPARE0_NV 0x000c0000
+#define NV20TCL_RC_IN_RGB_B_INPUT_SPARE1_NV 0x000d0000
+#define NV20TCL_RC_IN_RGB_B_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x000e0000
+#define NV20TCL_RC_IN_RGB_B_INPUT_E_TIMES_F_NV 0x000f0000
+#define NV20TCL_RC_IN_RGB_B_COMPONENT_USAGE (1 << 20)
+#define NV20TCL_RC_IN_RGB_B_COMPONENT_USAGE_RGB 0x00000000
+#define NV20TCL_RC_IN_RGB_B_COMPONENT_USAGE_ALPHA 0x00100000
+#define NV20TCL_RC_IN_RGB_B_MAPPING_SHIFT 21
+#define NV20TCL_RC_IN_RGB_B_MAPPING_MASK 0x00e00000
+#define NV20TCL_RC_IN_RGB_B_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV20TCL_RC_IN_RGB_B_MAPPING_UNSIGNED_INVERT_NV 0x00200000
+#define NV20TCL_RC_IN_RGB_B_MAPPING_EXPAND_NORMAL_NV 0x00400000
+#define NV20TCL_RC_IN_RGB_B_MAPPING_EXPAND_NEGATE_NV 0x00600000
+#define NV20TCL_RC_IN_RGB_B_MAPPING_HALF_BIAS_NORMAL_NV 0x00800000
+#define NV20TCL_RC_IN_RGB_B_MAPPING_HALF_BIAS_NEGATE_NV 0x00a00000
+#define NV20TCL_RC_IN_RGB_B_MAPPING_SIGNED_IDENTITY_NV 0x00c00000
+#define NV20TCL_RC_IN_RGB_B_MAPPING_SIGNED_NEGATE_NV 0x00e00000
+#define NV20TCL_RC_IN_RGB_A_INPUT_SHIFT 24
+#define NV20TCL_RC_IN_RGB_A_INPUT_MASK 0x0f000000
+#define NV20TCL_RC_IN_RGB_A_INPUT_ZERO 0x00000000
+#define NV20TCL_RC_IN_RGB_A_INPUT_CONSTANT_COLOR0_NV 0x01000000
+#define NV20TCL_RC_IN_RGB_A_INPUT_CONSTANT_COLOR1_NV 0x02000000
+#define NV20TCL_RC_IN_RGB_A_INPUT_FOG 0x03000000
+#define NV20TCL_RC_IN_RGB_A_INPUT_PRIMARY_COLOR_NV 0x04000000
+#define NV20TCL_RC_IN_RGB_A_INPUT_SECONDARY_COLOR_NV 0x05000000
+#define NV20TCL_RC_IN_RGB_A_INPUT_TEXTURE0_ARB 0x08000000
+#define NV20TCL_RC_IN_RGB_A_INPUT_TEXTURE1_ARB 0x09000000
+#define NV20TCL_RC_IN_RGB_A_INPUT_SPARE0_NV 0x0c000000
+#define NV20TCL_RC_IN_RGB_A_INPUT_SPARE1_NV 0x0d000000
+#define NV20TCL_RC_IN_RGB_A_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0e000000
+#define NV20TCL_RC_IN_RGB_A_INPUT_E_TIMES_F_NV 0x0f000000
+#define NV20TCL_RC_IN_RGB_A_COMPONENT_USAGE (1 << 28)
+#define NV20TCL_RC_IN_RGB_A_COMPONENT_USAGE_RGB 0x00000000
+#define NV20TCL_RC_IN_RGB_A_COMPONENT_USAGE_ALPHA 0x10000000
+#define NV20TCL_RC_IN_RGB_A_MAPPING_SHIFT 29
+#define NV20TCL_RC_IN_RGB_A_MAPPING_MASK 0xe0000000
+#define NV20TCL_RC_IN_RGB_A_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV20TCL_RC_IN_RGB_A_MAPPING_UNSIGNED_INVERT_NV 0x20000000
+#define NV20TCL_RC_IN_RGB_A_MAPPING_EXPAND_NORMAL_NV 0x40000000
+#define NV20TCL_RC_IN_RGB_A_MAPPING_EXPAND_NEGATE_NV 0x60000000
+#define NV20TCL_RC_IN_RGB_A_MAPPING_HALF_BIAS_NORMAL_NV 0x80000000
+#define NV20TCL_RC_IN_RGB_A_MAPPING_HALF_BIAS_NEGATE_NV 0xa0000000
+#define NV20TCL_RC_IN_RGB_A_MAPPING_SIGNED_IDENTITY_NV 0xc0000000
+#define NV20TCL_RC_IN_RGB_A_MAPPING_SIGNED_NEGATE_NV 0xe0000000
+#define NV20TCL_VIEWPORT_SCALE1_X 0x00000af0
+#define NV20TCL_VIEWPORT_SCALE1_Y 0x00000af4
+#define NV20TCL_VIEWPORT_SCALE1_Z 0x00000af8
+#define NV20TCL_VIEWPORT_SCALE1_W 0x00000afc
+#define NV20TCL_VP_UPLOAD_INST(x) (0x00000b00+((x)*4))
+#define NV20TCL_VP_UPLOAD_INST__SIZE 0x00000004
+#define NV20TCL_VP_UPLOAD_CONST(x) (0x00000b80+((x)*4))
+#define NV20TCL_VP_UPLOAD_CONST__SIZE 0x00000004
+#define NV20TCL_LIGHT_BACK_SIDE_PRODUCT_AMBIENT_R(x) (0x00000c00+((x)*64))
+#define NV20TCL_LIGHT_BACK_SIDE_PRODUCT_AMBIENT_R__SIZE 0x00000008
+#define NV20TCL_LIGHT_BACK_SIDE_PRODUCT_AMBIENT_G(x) (0x00000c04+((x)*64))
+#define NV20TCL_LIGHT_BACK_SIDE_PRODUCT_AMBIENT_G__SIZE 0x00000008
+#define NV20TCL_LIGHT_BACK_SIDE_PRODUCT_AMBIENT_B(x) (0x00000c08+((x)*64))
+#define NV20TCL_LIGHT_BACK_SIDE_PRODUCT_AMBIENT_B__SIZE 0x00000008
+#define NV20TCL_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_R(x) (0x00001000+((x)*128))
+#define NV20TCL_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_R__SIZE 0x00000008
+#define NV20TCL_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_G(x) (0x00001004+((x)*128))
+#define NV20TCL_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_G__SIZE 0x00000008
+#define NV20TCL_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_B(x) (0x00001008+((x)*128))
+#define NV20TCL_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_B__SIZE 0x00000008
+#define NV20TCL_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_R(x) (0x0000100c+((x)*128))
+#define NV20TCL_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_R__SIZE 0x00000008
+#define NV20TCL_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_G(x) (0x00001010+((x)*128))
+#define NV20TCL_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_G__SIZE 0x00000008
+#define NV20TCL_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_B(x) (0x00001014+((x)*128))
+#define NV20TCL_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_B__SIZE 0x00000008
+#define NV20TCL_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_R(x) (0x00001018+((x)*128))
+#define NV20TCL_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_R__SIZE 0x00000008
+#define NV20TCL_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_G(x) (0x0000101c+((x)*128))
+#define NV20TCL_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_G__SIZE 0x00000008
+#define NV20TCL_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_B(x) (0x00001020+((x)*128))
+#define NV20TCL_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_B__SIZE 0x00000008
+#define NV20TCL_LIGHT_HALF_VECTOR_X(x) (0x00001028+((x)*128))
+#define NV20TCL_LIGHT_HALF_VECTOR_X__SIZE 0x00000008
+#define NV20TCL_LIGHT_HALF_VECTOR_Y(x) (0x0000102c+((x)*128))
+#define NV20TCL_LIGHT_HALF_VECTOR_Y__SIZE 0x00000008
+#define NV20TCL_LIGHT_HALF_VECTOR_Z(x) (0x00001030+((x)*128))
+#define NV20TCL_LIGHT_HALF_VECTOR_Z__SIZE 0x00000008
+#define NV20TCL_LIGHT_DIRECTION_X(x) (0x00001034+((x)*128))
+#define NV20TCL_LIGHT_DIRECTION_X__SIZE 0x00000008
+#define NV20TCL_LIGHT_DIRECTION_Y(x) (0x00001038+((x)*128))
+#define NV20TCL_LIGHT_DIRECTION_Y__SIZE 0x00000008
+#define NV20TCL_LIGHT_DIRECTION_Z(x) (0x0000103c+((x)*128))
+#define NV20TCL_LIGHT_DIRECTION_Z__SIZE 0x00000008
+#define NV20TCL_LIGHT_POSITION_X(x) (0x0000105c+((x)*128))
+#define NV20TCL_LIGHT_POSITION_X__SIZE 0x00000008
+#define NV20TCL_LIGHT_POSITION_Y(x) (0x00001060+((x)*128))
+#define NV20TCL_LIGHT_POSITION_Y__SIZE 0x00000008
+#define NV20TCL_LIGHT_POSITION_Z(x) (0x00001064+((x)*128))
+#define NV20TCL_LIGHT_POSITION_Z__SIZE 0x00000008
+#define NV20TCL_LIGHT_CONSTANT_ATTENUATION(x) (0x00001068+((x)*128))
+#define NV20TCL_LIGHT_CONSTANT_ATTENUATION__SIZE 0x00000008
+#define NV20TCL_LIGHT_LINEAR_ATTENUATION(x) (0x0000106c+((x)*128))
+#define NV20TCL_LIGHT_LINEAR_ATTENUATION__SIZE 0x00000008
+#define NV20TCL_LIGHT_QUADRATIC_ATTENUATION(x) (0x00001070+((x)*128))
+#define NV20TCL_LIGHT_QUADRATIC_ATTENUATION__SIZE 0x00000008
+#define NV20TCL_POLYGON_STIPPLE_ENABLE 0x0000147c
+#define NV20TCL_POLYGON_STIPPLE_PATTERN(x) (0x00001480+((x)*4))
+#define NV20TCL_POLYGON_STIPPLE_PATTERN__SIZE 0x00000020
+#define NV20TCL_VERTEX_POS_3F_X 0x00001500
+#define NV20TCL_VERTEX_POS_3F_Y 0x00001504
+#define NV20TCL_VERTEX_POS_3F_Z 0x00001508
+#define NV20TCL_VERTEX_POS_4F_X 0x00001518
+#define NV20TCL_VERTEX_POS_4F_Y 0x0000151c
+#define NV20TCL_VERTEX_POS_4F_Z 0x00001520
+#define NV20TCL_VERTEX_POS_3I_XY 0x00001528
+#define NV20TCL_VERTEX_POS_3I_XY_X_SHIFT 0
+#define NV20TCL_VERTEX_POS_3I_XY_X_MASK 0x0000ffff
+#define NV20TCL_VERTEX_POS_3I_XY_Y_SHIFT 16
+#define NV20TCL_VERTEX_POS_3I_XY_Y_MASK 0xffff0000
+#define NV20TCL_VERTEX_POS_3I_Z 0x0000152c
+#define NV20TCL_VERTEX_POS_3I_Z_Z_SHIFT 0
+#define NV20TCL_VERTEX_POS_3I_Z_Z_MASK 0x0000ffff
+#define NV20TCL_VERTEX_NOR_3F_X 0x00001530
+#define NV20TCL_VERTEX_NOR_3F_Y 0x00001534
+#define NV20TCL_VERTEX_NOR_3F_Z 0x00001538
+#define NV20TCL_VERTEX_NOR_3I_XY 0x00001540
+#define NV20TCL_VERTEX_NOR_3I_XY_X_SHIFT 0
+#define NV20TCL_VERTEX_NOR_3I_XY_X_MASK 0x0000ffff
+#define NV20TCL_VERTEX_NOR_3I_XY_Y_SHIFT 16
+#define NV20TCL_VERTEX_NOR_3I_XY_Y_MASK 0xffff0000
+#define NV20TCL_VERTEX_NOR_3I_Z 0x00001544
+#define NV20TCL_VERTEX_NOR_3I_Z_Z_SHIFT 0
+#define NV20TCL_VERTEX_NOR_3I_Z_Z_MASK 0x0000ffff
+#define NV20TCL_VERTEX_COL_4F_X 0x00001550
+#define NV20TCL_VERTEX_COL_4F_Y 0x00001554
+#define NV20TCL_VERTEX_COL_4F_Z 0x00001558
+#define NV20TCL_VERTEX_COL_4F_W 0x0000155c
+#define NV20TCL_VERTEX_COL_3F_X 0x00001560
+#define NV20TCL_VERTEX_COL_3F_Y 0x00001564
+#define NV20TCL_VERTEX_COL_3F_Z 0x00001568
+#define NV20TCL_VERTEX_COL_4I 0x0000156c
+#define NV20TCL_VERTEX_COL_4I_R_SHIFT 0
+#define NV20TCL_VERTEX_COL_4I_R_MASK 0x000000ff
+#define NV20TCL_VERTEX_COL_4I_G_SHIFT 8
+#define NV20TCL_VERTEX_COL_4I_G_MASK 0x0000ff00
+#define NV20TCL_VERTEX_COL_4I_B_SHIFT 16
+#define NV20TCL_VERTEX_COL_4I_B_MASK 0x00ff0000
+#define NV20TCL_VERTEX_COL_4I_A_SHIFT 24
+#define NV20TCL_VERTEX_COL_4I_A_MASK 0xff000000
+#define NV20TCL_VERTEX_COL2_3F_X 0x00001580
+#define NV20TCL_VERTEX_COL2_3F_Y 0x00001584
+#define NV20TCL_VERTEX_COL2_3F_Z 0x00001588
+#define NV20TCL_VERTEX_COL2_4I 0x0000158c
+#define NV20TCL_VERTEX_COL2_4I_R_SHIFT 0
+#define NV20TCL_VERTEX_COL2_4I_R_MASK 0x000000ff
+#define NV20TCL_VERTEX_COL2_4I_G_SHIFT 8
+#define NV20TCL_VERTEX_COL2_4I_G_MASK 0x0000ff00
+#define NV20TCL_VERTEX_COL2_4I_B_SHIFT 16
+#define NV20TCL_VERTEX_COL2_4I_B_MASK 0x00ff0000
+#define NV20TCL_VERTEX_COL2_4I_A_SHIFT 24
+#define NV20TCL_VERTEX_COL2_4I_A_MASK 0xff000000
+#define NV20TCL_VERTEX_TX0_2F_S 0x00001590
+#define NV20TCL_VERTEX_TX0_2F_T 0x00001594
+#define NV20TCL_VERTEX_TX0_2I 0x00001598
+#define NV20TCL_VERTEX_TX0_2I_S_SHIFT 0
+#define NV20TCL_VERTEX_TX0_2I_S_MASK 0x0000ffff
+#define NV20TCL_VERTEX_TX0_2I_T_SHIFT 16
+#define NV20TCL_VERTEX_TX0_2I_T_MASK 0xffff0000
+#define NV20TCL_VERTEX_TX0_4F_S 0x000015a0
+#define NV20TCL_VERTEX_TX0_4F_T 0x000015a4
+#define NV20TCL_VERTEX_TX0_4F_R 0x000015a8
+#define NV20TCL_VERTEX_TX0_4F_Q 0x000015ac
+#define NV20TCL_VERTEX_TX0_4I_ST 0x000015b0
+#define NV20TCL_VERTEX_TX0_4I_ST_S_SHIFT 0
+#define NV20TCL_VERTEX_TX0_4I_ST_S_MASK 0x0000ffff
+#define NV20TCL_VERTEX_TX0_4I_ST_T_SHIFT 16
+#define NV20TCL_VERTEX_TX0_4I_ST_T_MASK 0xffff0000
+#define NV20TCL_VERTEX_TX0_4I_RQ 0x000015b4
+#define NV20TCL_VERTEX_TX0_4I_RQ_R_SHIFT 0
+#define NV20TCL_VERTEX_TX0_4I_RQ_R_MASK 0x0000ffff
+#define NV20TCL_VERTEX_TX0_4I_RQ_Q_SHIFT 16
+#define NV20TCL_VERTEX_TX0_4I_RQ_Q_MASK 0xffff0000
+#define NV20TCL_VERTEX_TX1_2F_S 0x000015b8
+#define NV20TCL_VERTEX_TX1_2F_T 0x000015bc
+#define NV20TCL_VERTEX_TX1_2I 0x000015c0
+#define NV20TCL_VERTEX_TX1_2I_S_SHIFT 0
+#define NV20TCL_VERTEX_TX1_2I_S_MASK 0x0000ffff
+#define NV20TCL_VERTEX_TX1_2I_T_SHIFT 16
+#define NV20TCL_VERTEX_TX1_2I_T_MASK 0xffff0000
+#define NV20TCL_VERTEX_TX1_4F_S 0x000015c8
+#define NV20TCL_VERTEX_TX1_4F_T 0x000015cc
+#define NV20TCL_VERTEX_TX1_4F_R 0x000015d0
+#define NV20TCL_VERTEX_TX1_4F_Q 0x000015d4
+#define NV20TCL_VERTEX_TX1_4I_ST 0x000015d8
+#define NV20TCL_VERTEX_TX1_4I_ST_S_SHIFT 0
+#define NV20TCL_VERTEX_TX1_4I_ST_S_MASK 0x0000ffff
+#define NV20TCL_VERTEX_TX1_4I_ST_T_SHIFT 16
+#define NV20TCL_VERTEX_TX1_4I_ST_T_MASK 0xffff0000
+#define NV20TCL_VERTEX_TX1_4I_RQ 0x000015dc
+#define NV20TCL_VERTEX_TX1_4I_RQ_R_SHIFT 0
+#define NV20TCL_VERTEX_TX1_4I_RQ_R_MASK 0x0000ffff
+#define NV20TCL_VERTEX_TX1_4I_RQ_Q_SHIFT 16
+#define NV20TCL_VERTEX_TX1_4I_RQ_Q_MASK 0xffff0000
+#define NV20TCL_VERTEX_TX2_2F_S 0x000015e0
+#define NV20TCL_VERTEX_TX2_2F_T 0x000015e4
+#define NV20TCL_VERTEX_TX2_2I 0x000015e8
+#define NV20TCL_VERTEX_TX2_2I_S_SHIFT 0
+#define NV20TCL_VERTEX_TX2_2I_S_MASK 0x0000ffff
+#define NV20TCL_VERTEX_TX2_2I_T_SHIFT 16
+#define NV20TCL_VERTEX_TX2_2I_T_MASK 0xffff0000
+#define NV20TCL_VERTEX_TX2_4F_S 0x000015f0
+#define NV20TCL_VERTEX_TX2_4F_T 0x000015f4
+#define NV20TCL_VERTEX_TX2_4F_R 0x000015f8
+#define NV20TCL_VERTEX_TX2_4F_Q 0x000015fc
+#define NV20TCL_VERTEX_TX2_4I_ST 0x00001600
+#define NV20TCL_VERTEX_TX2_4I_ST_S_SHIFT 0
+#define NV20TCL_VERTEX_TX2_4I_ST_S_MASK 0x0000ffff
+#define NV20TCL_VERTEX_TX2_4I_ST_T_SHIFT 16
+#define NV20TCL_VERTEX_TX2_4I_ST_T_MASK 0xffff0000
+#define NV20TCL_VERTEX_TX2_4I_RQ 0x00001604
+#define NV20TCL_VERTEX_TX2_4I_RQ_R_SHIFT 0
+#define NV20TCL_VERTEX_TX2_4I_RQ_R_MASK 0x0000ffff
+#define NV20TCL_VERTEX_TX2_4I_RQ_Q_SHIFT 16
+#define NV20TCL_VERTEX_TX2_4I_RQ_Q_MASK 0xffff0000
+#define NV20TCL_VERTEX_TX3_2F_S 0x00001608
+#define NV20TCL_VERTEX_TX3_2F_T 0x0000160c
+#define NV20TCL_VERTEX_TX3_2I 0x00001610
+#define NV20TCL_VERTEX_TX3_2I_S_SHIFT 0
+#define NV20TCL_VERTEX_TX3_2I_S_MASK 0x0000ffff
+#define NV20TCL_VERTEX_TX3_2I_T_SHIFT 16
+#define NV20TCL_VERTEX_TX3_2I_T_MASK 0xffff0000
+#define NV20TCL_VERTEX_TX3_4F_S 0x00001620
+#define NV20TCL_VERTEX_TX3_4F_T 0x00001624
+#define NV20TCL_VERTEX_TX3_4F_R 0x00001628
+#define NV20TCL_VERTEX_TX3_4F_Q 0x0000162c
+#define NV20TCL_VERTEX_TX3_4I_ST 0x00001630
+#define NV20TCL_VERTEX_TX3_4I_ST_S_SHIFT 0
+#define NV20TCL_VERTEX_TX3_4I_ST_S_MASK 0x0000ffff
+#define NV20TCL_VERTEX_TX3_4I_ST_T_SHIFT 16
+#define NV20TCL_VERTEX_TX3_4I_ST_T_MASK 0xffff0000
+#define NV20TCL_VERTEX_TX3_4I_RQ 0x00001634
+#define NV20TCL_VERTEX_TX3_4I_RQ_R_SHIFT 0
+#define NV20TCL_VERTEX_TX3_4I_RQ_R_MASK 0x0000ffff
+#define NV20TCL_VERTEX_TX3_4I_RQ_Q_SHIFT 16
+#define NV20TCL_VERTEX_TX3_4I_RQ_Q_MASK 0xffff0000
+#define NV20TCL_VERTEX_FOG_1F 0x00001698
+#define NV20TCL_EDGEFLAG_ENABLE 0x000016bc
+#define NV20TCL_VTXBUF_ADDRESS(x) (0x00001720+((x)*4))
+#define NV20TCL_VTXBUF_ADDRESS__SIZE 0x00000010
+#define NV20TCL_VTXBUF_ADDRESS_DMA1 (1 << 31)
+#define NV20TCL_VTXBUF_ADDRESS_OFFSET_SHIFT 0
+#define NV20TCL_VTXBUF_ADDRESS_OFFSET_MASK 0x0fffffff
+#define NV20TCL_VTXFMT(x) (0x00001760+((x)*4))
+#define NV20TCL_VTXFMT__SIZE 0x00000010
+#define NV20TCL_VTXFMT_TYPE_SHIFT 0
+#define NV20TCL_VTXFMT_TYPE_MASK 0x0000000f
+#define NV20TCL_VTXFMT_TYPE_FLOAT 0x00000002
+#define NV20TCL_VTXFMT_TYPE_UBYTE 0x00000004
+#define NV20TCL_VTXFMT_TYPE_USHORT 0x00000005
+#define NV20TCL_VTXFMT_SIZE_SHIFT 4
+#define NV20TCL_VTXFMT_SIZE_MASK 0x000000f0
+#define NV20TCL_VTXFMT_STRIDE_SHIFT 8
+#define NV20TCL_VTXFMT_STRIDE_MASK 0x0000ff00
+#define NV20TCL_LIGHT_MODEL_BACK_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION_R 0x000017a0
+#define NV20TCL_LIGHT_MODEL_BACK_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION_G 0x000017a4
+#define NV20TCL_LIGHT_MODEL_BACK_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION_B 0x000017a8
+#define NV20TCL_COLOR_MATERIAL_BACK_A 0x000017ac
+#define NV20TCL_COLOR_MATERIAL_BACK_R 0x000017b0
+#define NV20TCL_COLOR_MATERIAL_BACK_G 0x000017b4
+#define NV20TCL_COLOR_MATERIAL_BACK_B 0x000017b8
+#define NV20TCL_COLOR_LOGIC_OP_ENABLE 0x000017bc
+#define NV20TCL_COLOR_LOGIC_OP_OP 0x000017c0
+#define NV20TCL_COLOR_LOGIC_OP_OP_CLEAR 0x00001500
+#define NV20TCL_COLOR_LOGIC_OP_OP_AND 0x00001501
+#define NV20TCL_COLOR_LOGIC_OP_OP_AND_REVERSE 0x00001502
+#define NV20TCL_COLOR_LOGIC_OP_OP_COPY 0x00001503
+#define NV20TCL_COLOR_LOGIC_OP_OP_AND_INVERTED 0x00001504
+#define NV20TCL_COLOR_LOGIC_OP_OP_NOOP 0x00001505
+#define NV20TCL_COLOR_LOGIC_OP_OP_XOR 0x00001506
+#define NV20TCL_COLOR_LOGIC_OP_OP_OR 0x00001507
+#define NV20TCL_COLOR_LOGIC_OP_OP_NOR 0x00001508
+#define NV20TCL_COLOR_LOGIC_OP_OP_EQUIV 0x00001509
+#define NV20TCL_COLOR_LOGIC_OP_OP_INVERT 0x0000150a
+#define NV20TCL_COLOR_LOGIC_OP_OP_OR_REVERSE 0x0000150b
+#define NV20TCL_COLOR_LOGIC_OP_OP_COPY_INVERTED 0x0000150c
+#define NV20TCL_COLOR_LOGIC_OP_OP_OR_INVERTED 0x0000150d
+#define NV20TCL_COLOR_LOGIC_OP_OP_NAND 0x0000150e
+#define NV20TCL_COLOR_LOGIC_OP_OP_SET 0x0000150f
+#define NV20TCL_LIGHT_MODEL_TWO_SIDE_ENABLE 0x000017c4
+#define NV20TCL_TX_SHADER_CULL_MODE 0x000017f8
+#define NV20TCL_TX_SHADER_CULL_MODE_TX0_S (1 << 0)
+#define NV20TCL_TX_SHADER_CULL_MODE_TX0_S_GEQUAL 0x00000000
+#define NV20TCL_TX_SHADER_CULL_MODE_TX0_S_LESS 0x00000001
+#define NV20TCL_TX_SHADER_CULL_MODE_TX0_T (1 << 1)
+#define NV20TCL_TX_SHADER_CULL_MODE_TX0_T_GEQUAL 0x00000000
+#define NV20TCL_TX_SHADER_CULL_MODE_TX0_T_LESS 0x00000002
+#define NV20TCL_TX_SHADER_CULL_MODE_TX0_R (1 << 2)
+#define NV20TCL_TX_SHADER_CULL_MODE_TX0_R_GEQUAL 0x00000000
+#define NV20TCL_TX_SHADER_CULL_MODE_TX0_R_LESS 0x00000004
+#define NV20TCL_TX_SHADER_CULL_MODE_TX0_Q (1 << 3)
+#define NV20TCL_TX_SHADER_CULL_MODE_TX0_Q_GEQUAL 0x00000000
+#define NV20TCL_TX_SHADER_CULL_MODE_TX0_Q_LESS 0x00000008
+#define NV20TCL_TX_SHADER_CULL_MODE_TX1_S (1 << 4)
+#define NV20TCL_TX_SHADER_CULL_MODE_TX1_S_GEQUAL 0x00000000
+#define NV20TCL_TX_SHADER_CULL_MODE_TX1_S_LESS 0x00000010
+#define NV20TCL_TX_SHADER_CULL_MODE_TX1_T (1 << 5)
+#define NV20TCL_TX_SHADER_CULL_MODE_TX1_T_GEQUAL 0x00000000
+#define NV20TCL_TX_SHADER_CULL_MODE_TX1_T_LESS 0x00000020
+#define NV20TCL_TX_SHADER_CULL_MODE_TX1_R (1 << 6)
+#define NV20TCL_TX_SHADER_CULL_MODE_TX1_R_GEQUAL 0x00000000
+#define NV20TCL_TX_SHADER_CULL_MODE_TX1_R_LESS 0x00000040
+#define NV20TCL_TX_SHADER_CULL_MODE_TX1_Q (1 << 7)
+#define NV20TCL_TX_SHADER_CULL_MODE_TX1_Q_GEQUAL 0x00000000
+#define NV20TCL_TX_SHADER_CULL_MODE_TX1_Q_LESS 0x00000080
+#define NV20TCL_TX_SHADER_CULL_MODE_TX2_S (1 << 8)
+#define NV20TCL_TX_SHADER_CULL_MODE_TX2_S_GEQUAL 0x00000000
+#define NV20TCL_TX_SHADER_CULL_MODE_TX2_S_LESS 0x00000100
+#define NV20TCL_TX_SHADER_CULL_MODE_TX2_T (1 << 9)
+#define NV20TCL_TX_SHADER_CULL_MODE_TX2_T_GEQUAL 0x00000000
+#define NV20TCL_TX_SHADER_CULL_MODE_TX2_T_LESS 0x00000200
+#define NV20TCL_TX_SHADER_CULL_MODE_TX2_R (1 << 10)
+#define NV20TCL_TX_SHADER_CULL_MODE_TX2_R_GEQUAL 0x00000000
+#define NV20TCL_TX_SHADER_CULL_MODE_TX2_R_LESS 0x00000400
+#define NV20TCL_TX_SHADER_CULL_MODE_TX2_Q (1 << 11)
+#define NV20TCL_TX_SHADER_CULL_MODE_TX2_Q_GEQUAL 0x00000000
+#define NV20TCL_TX_SHADER_CULL_MODE_TX2_Q_LESS 0x00000800
+#define NV20TCL_TX_SHADER_CULL_MODE_TX3_S (1 << 12)
+#define NV20TCL_TX_SHADER_CULL_MODE_TX3_S_GEQUAL 0x00000000
+#define NV20TCL_TX_SHADER_CULL_MODE_TX3_S_LESS 0x00001000
+#define NV20TCL_TX_SHADER_CULL_MODE_TX3_T (1 << 13)
+#define NV20TCL_TX_SHADER_CULL_MODE_TX3_T_GEQUAL 0x00000000
+#define NV20TCL_TX_SHADER_CULL_MODE_TX3_T_LESS 0x00002000
+#define NV20TCL_TX_SHADER_CULL_MODE_TX3_R (1 << 14)
+#define NV20TCL_TX_SHADER_CULL_MODE_TX3_R_GEQUAL 0x00000000
+#define NV20TCL_TX_SHADER_CULL_MODE_TX3_R_LESS 0x00004000
+#define NV20TCL_TX_SHADER_CULL_MODE_TX3_Q (1 << 15)
+#define NV20TCL_TX_SHADER_CULL_MODE_TX3_Q_GEQUAL 0x00000000
+#define NV20TCL_TX_SHADER_CULL_MODE_TX3_Q_LESS 0x00008000
+#define NV20TCL_VERTEX_BEGIN_END 0x000017fc
+#define NV20TCL_VERTEX_BEGIN_END_STOP 0x00000000
+#define NV20TCL_VERTEX_BEGIN_END_POINTS 0x00000001
+#define NV20TCL_VERTEX_BEGIN_END_LINES 0x00000002
+#define NV20TCL_VERTEX_BEGIN_END_LINE_LOOP 0x00000003
+#define NV20TCL_VERTEX_BEGIN_END_LINE_STRIP 0x00000004
+#define NV20TCL_VERTEX_BEGIN_END_TRIANGLES 0x00000005
+#define NV20TCL_VERTEX_BEGIN_END_TRIANGLE_STRIP 0x00000006
+#define NV20TCL_VERTEX_BEGIN_END_TRIANGLE_FAN 0x00000007
+#define NV20TCL_VERTEX_BEGIN_END_QUADS 0x00000008
+#define NV20TCL_VERTEX_BEGIN_END_QUAD_STRIP 0x00000009
+#define NV20TCL_VERTEX_BEGIN_END_POLYGON 0x0000000a
+#define NV20TCL_VB_ELEMENT_U16 0x00001800
+#define NV20TCL_VB_ELEMENT_U16_I0_SHIFT 0
+#define NV20TCL_VB_ELEMENT_U16_I0_MASK 0x0000ffff
+#define NV20TCL_VB_ELEMENT_U16_I1_SHIFT 16
+#define NV20TCL_VB_ELEMENT_U16_I1_MASK 0xffff0000
+#define NV20TCL_VB_VERTEX_BATCH 0x00001810
+#define NV20TCL_VB_VERTEX_BATCH_OFFSET_SHIFT 0
+#define NV20TCL_VB_VERTEX_BATCH_OFFSET_MASK 0x00ffffff
+#define NV20TCL_VB_VERTEX_BATCH_COUNT_SHIFT 24
+#define NV20TCL_VB_VERTEX_BATCH_COUNT_MASK 0xff000000
+#define NV20TCL_VERTEX_DATA 0x00001818
+#define NV20TCL_TX_SHADER_CONST_EYE_X 0x0000181c
+#define NV20TCL_TX_SHADER_CONST_EYE_Y 0x00001820
+#define NV20TCL_TX_SHADER_CONST_EYE_Z 0x00001824
+#define NV20TCL_VTX_ATTR_4F_X(x) (0x00001a00+((x)*16))
+#define NV20TCL_VTX_ATTR_4F_X__SIZE 0x00000010
+#define NV20TCL_VTX_ATTR_4F_Y(x) (0x00001a04+((x)*16))
+#define NV20TCL_VTX_ATTR_4F_Y__SIZE 0x00000010
+#define NV20TCL_VTX_ATTR_4F_Z(x) (0x00001a08+((x)*16))
+#define NV20TCL_VTX_ATTR_4F_Z__SIZE 0x00000010
+#define NV20TCL_VTX_ATTR_4F_W(x) (0x00001a0c+((x)*16))
+#define NV20TCL_VTX_ATTR_4F_W__SIZE 0x00000010
+#define NV20TCL_TX_OFFSET(x) (0x00001b00+((x)*64))
+#define NV20TCL_TX_OFFSET__SIZE 0x00000004
+#define NV20TCL_TX_FORMAT(x) (0x00001b04+((x)*64))
+#define NV20TCL_TX_FORMAT__SIZE 0x00000004
+#define NV20TCL_TX_FORMAT_DMA0 (1 << 0)
+#define NV20TCL_TX_FORMAT_DMA1 (1 << 1)
+#define NV20TCL_TX_FORMAT_CUBIC (1 << 2)
+#define NV20TCL_TX_FORMAT_NO_BORDER (1 << 3)
+#define NV20TCL_TX_FORMAT_DIMS_SHIFT 4
+#define NV20TCL_TX_FORMAT_DIMS_MASK 0x000000f0
+#define NV20TCL_TX_FORMAT_DIMS_1D 0x00000010
+#define NV20TCL_TX_FORMAT_DIMS_2D 0x00000020
+#define NV20TCL_TX_FORMAT_DIMS_3D 0x00000030
+#define NV20TCL_TX_FORMAT_FORMAT_SHIFT 8
+#define NV20TCL_TX_FORMAT_FORMAT_MASK 0x0000ff00
+#define NV20TCL_TX_FORMAT_FORMAT_L8 0x00000000
+#define NV20TCL_TX_FORMAT_FORMAT_A8 0x00000100
+#define NV20TCL_TX_FORMAT_FORMAT_A1R5G5B5 0x00000200
+#define NV20TCL_TX_FORMAT_FORMAT_A8_RECT 0x00000300
+#define NV20TCL_TX_FORMAT_FORMAT_A4R4G4B4 0x00000400
+#define NV20TCL_TX_FORMAT_FORMAT_R5G6B5 0x00000500
+#define NV20TCL_TX_FORMAT_FORMAT_A8R8G8B8 0x00000600
+#define NV20TCL_TX_FORMAT_FORMAT_X8R8G8B8 0x00000700
+#define NV20TCL_TX_FORMAT_FORMAT_INDEX8 0x00000b00
+#define NV20TCL_TX_FORMAT_FORMAT_DXT1 0x00000c00
+#define NV20TCL_TX_FORMAT_FORMAT_DXT3 0x00000e00
+#define NV20TCL_TX_FORMAT_FORMAT_DXT5 0x00000f00
+#define NV20TCL_TX_FORMAT_FORMAT_A1R5G5B5_RECT 0x00001000
+#define NV20TCL_TX_FORMAT_FORMAT_R5G6B5_RECT 0x00001100
+#define NV20TCL_TX_FORMAT_FORMAT_A8R8G8B8_RECT 0x00001200
+#define NV20TCL_TX_FORMAT_FORMAT_L8_RECT 0x00001300
+#define NV20TCL_TX_FORMAT_FORMAT_A8L8 0x00001a00
+#define NV20TCL_TX_FORMAT_FORMAT_A8_RECT2 0x00001b00
+#define NV20TCL_TX_FORMAT_FORMAT_A4R4G4B4_RECT 0x00001d00
+#define NV20TCL_TX_FORMAT_FORMAT_R8G8B8_RECT 0x00001e00
+#define NV20TCL_TX_FORMAT_FORMAT_L8A8_RECT 0x00002000
+#define NV20TCL_TX_FORMAT_FORMAT_DSDT 0x00002800
+#define NV20TCL_TX_FORMAT_FORMAT_A16 0x00003200
+#define NV20TCL_TX_FORMAT_FORMAT_HILO16 0x00003300
+#define NV20TCL_TX_FORMAT_FORMAT_A16_RECT 0x00003500
+#define NV20TCL_TX_FORMAT_FORMAT_HILO16_RECT 0x00003600
+#define NV20TCL_TX_FORMAT_FORMAT_HILO8 0x00004400
+#define NV20TCL_TX_FORMAT_FORMAT_SIGNED_HILO8 0x00004500
+#define NV20TCL_TX_FORMAT_FORMAT_HILO8_RECT 0x00004600
+#define NV20TCL_TX_FORMAT_FORMAT_SIGNED_HILO8_RECT 0x00004700
+#define NV20TCL_TX_FORMAT_FORMAT_FLOAT_RGBA16_NV 0x00004a00
+#define NV20TCL_TX_FORMAT_FORMAT_FLOAT_RGBA32_NV 0x00004b00
+#define NV20TCL_TX_FORMAT_FORMAT_FLOAT_R32_NV 0x00004c00
+#define NV20TCL_TX_FORMAT_MIPMAP (1 << 19)
+#define NV20TCL_TX_FORMAT_BASE_SIZE_U_SHIFT 20
+#define NV20TCL_TX_FORMAT_BASE_SIZE_U_MASK 0x00f00000
+#define NV20TCL_TX_FORMAT_BASE_SIZE_V_SHIFT 24
+#define NV20TCL_TX_FORMAT_BASE_SIZE_V_MASK 0x0f000000
+#define NV20TCL_TX_FORMAT_BASE_SIZE_W_SHIFT 28
+#define NV20TCL_TX_FORMAT_BASE_SIZE_W_MASK 0xf0000000
+#define NV20TCL_TX_WRAP(x) (0x00001b08+((x)*64))
+#define NV20TCL_TX_WRAP__SIZE 0x00000004
+#define NV20TCL_TX_WRAP_S_SHIFT 0
+#define NV20TCL_TX_WRAP_S_MASK 0x000000ff
+#define NV20TCL_TX_WRAP_S_REPEAT 0x00000001
+#define NV20TCL_TX_WRAP_S_MIRRORED_REPEAT 0x00000002
+#define NV20TCL_TX_WRAP_S_CLAMP_TO_EDGE 0x00000003
+#define NV20TCL_TX_WRAP_S_CLAMP_TO_BORDER 0x00000004
+#define NV20TCL_TX_WRAP_S_CLAMP 0x00000005
+#define NV20TCL_TX_WRAP_T_SHIFT 8
+#define NV20TCL_TX_WRAP_T_MASK 0x00000f00
+#define NV20TCL_TX_WRAP_T_REPEAT 0x00000100
+#define NV20TCL_TX_WRAP_T_MIRRORED_REPEAT 0x00000200
+#define NV20TCL_TX_WRAP_T_CLAMP_TO_EDGE 0x00000300
+#define NV20TCL_TX_WRAP_T_CLAMP_TO_BORDER 0x00000400
+#define NV20TCL_TX_WRAP_T_CLAMP 0x00000500
+#define NV20TCL_TX_WRAP_R_SHIFT 16
+#define NV20TCL_TX_WRAP_R_MASK 0x000f0000
+#define NV20TCL_TX_WRAP_R_REPEAT 0x00010000
+#define NV20TCL_TX_WRAP_R_MIRRORED_REPEAT 0x00020000
+#define NV20TCL_TX_WRAP_R_CLAMP_TO_EDGE 0x00030000
+#define NV20TCL_TX_WRAP_R_CLAMP_TO_BORDER 0x00040000
+#define NV20TCL_TX_WRAP_R_CLAMP 0x00050000
+#define NV20TCL_TX_ENABLE(x) (0x00001b0c+((x)*64))
+#define NV20TCL_TX_ENABLE__SIZE 0x00000004
+#define NV20TCL_TX_ENABLE_ANISO_SHIFT 4
+#define NV20TCL_TX_ENABLE_ANISO_MASK 0x00000030
+#define NV20TCL_TX_ENABLE_ANISO_NONE 0x00000000
+#define NV20TCL_TX_ENABLE_ANISO_2X 0x00000010
+#define NV20TCL_TX_ENABLE_ANISO_4X 0x00000020
+#define NV20TCL_TX_ENABLE_ANISO_8X 0x00000030
+#define NV20TCL_TX_ENABLE_MIPMAP_MAX_LOD_SHIFT 14
+#define NV20TCL_TX_ENABLE_MIPMAP_MAX_LOD_MASK 0x0003c000
+#define NV20TCL_TX_ENABLE_MIPMAP_MIN_LOD_SHIFT 26
+#define NV20TCL_TX_ENABLE_MIPMAP_MIN_LOD_MASK 0x3c000000
+#define NV20TCL_TX_ENABLE_ENABLE (1 << 30)
+#define NV20TCL_TX_SWIZZLE(x) (0x00001b10+((x)*64))
+#define NV20TCL_TX_SWIZZLE__SIZE 0x00000004
+#define NV20TCL_TX_SWIZZLE_RECT_PITCH_SHIFT 16
+#define NV20TCL_TX_SWIZZLE_RECT_PITCH_MASK 0xffff0000
+#define NV20TCL_TX_FILTER(x) (0x00001b14+((x)*64))
+#define NV20TCL_TX_FILTER__SIZE 0x00000004
+#define NV20TCL_TX_FILTER_LOD_BIAS_SHIFT 8
+#define NV20TCL_TX_FILTER_LOD_BIAS_MASK 0x00000f00
+#define NV20TCL_TX_FILTER_MINIFY_SHIFT 16
+#define NV20TCL_TX_FILTER_MINIFY_MASK 0x000f0000
+#define NV20TCL_TX_FILTER_MINIFY_NEAREST 0x00010000
+#define NV20TCL_TX_FILTER_MINIFY_LINEAR 0x00020000
+#define NV20TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST 0x00030000
+#define NV20TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST 0x00040000
+#define NV20TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR 0x00050000
+#define NV20TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR 0x00060000
+#define NV20TCL_TX_FILTER_MAGNIFY_SHIFT 24
+#define NV20TCL_TX_FILTER_MAGNIFY_MASK 0x0f000000
+#define NV20TCL_TX_FILTER_MAGNIFY_NEAREST 0x01000000
+#define NV20TCL_TX_FILTER_MAGNIFY_LINEAR 0x02000000
+#define NV20TCL_TX_NPOT_SIZE(x) (0x00001b1c+((x)*64))
+#define NV20TCL_TX_NPOT_SIZE__SIZE 0x00000004
+#define NV20TCL_TX_NPOT_SIZE_H_SHIFT 0
+#define NV20TCL_TX_NPOT_SIZE_H_MASK 0x0000ffff
+#define NV20TCL_TX_NPOT_SIZE_W_SHIFT 16
+#define NV20TCL_TX_NPOT_SIZE_W_MASK 0xffff0000
+#define NV20TCL_TX_PALETTE_OFFSET(x) (0x00001b20+((x)*64))
+#define NV20TCL_TX_PALETTE_OFFSET__SIZE 0x00000004
+#define NV20TCL_TX_BORDER_COLOR(x) (0x00001b24+((x)*64))
+#define NV20TCL_TX_BORDER_COLOR__SIZE 0x00000004
+#define NV20TCL_TX_BORDER_COLOR_B_SHIFT 0
+#define NV20TCL_TX_BORDER_COLOR_B_MASK 0x000000ff
+#define NV20TCL_TX_BORDER_COLOR_G_SHIFT 8
+#define NV20TCL_TX_BORDER_COLOR_G_MASK 0x0000ff00
+#define NV20TCL_TX_BORDER_COLOR_R_SHIFT 16
+#define NV20TCL_TX_BORDER_COLOR_R_MASK 0x00ff0000
+#define NV20TCL_TX_BORDER_COLOR_A_SHIFT 24
+#define NV20TCL_TX_BORDER_COLOR_A_MASK 0xff000000
+#define NV20TCL_TX_SHADER_OFFSET_MATRIX00(x) (0x00001b28+((x)*64))
+#define NV20TCL_TX_SHADER_OFFSET_MATRIX00__SIZE 0x00000004
+#define NV20TCL_TX_SHADER_OFFSET_MATRIX01(x) (0x00001b2c+((x)*64))
+#define NV20TCL_TX_SHADER_OFFSET_MATRIX01__SIZE 0x00000004
+#define NV20TCL_TX_SHADER_OFFSET_MATRIX11(x) (0x00001b30+((x)*64))
+#define NV20TCL_TX_SHADER_OFFSET_MATRIX11__SIZE 0x00000004
+#define NV20TCL_TX_SHADER_OFFSET_MATRIX10(x) (0x00001b34+((x)*64))
+#define NV20TCL_TX_SHADER_OFFSET_MATRIX10__SIZE 0x00000004
+#define NV20TCL_DEPTH_UNK17D8 0x00001d78
+#define NV20TCL_DEPTH_UNK17D8_CLAMP_SHIFT 4
+#define NV20TCL_DEPTH_UNK17D8_CLAMP_MASK 0x000000f0
+#define NV20TCL_MULTISAMPLE_CONTROL 0x00001d7c
+#define NV20TCL_CLEAR_DEPTH_VALUE 0x00001d8c
+#define NV20TCL_CLEAR_VALUE 0x00001d90
+#define NV20TCL_CLEAR_BUFFERS 0x00001d94
+#define NV20TCL_CLEAR_BUFFERS_COLOR_A (1 << 7)
+#define NV20TCL_CLEAR_BUFFERS_COLOR_B (1 << 6)
+#define NV20TCL_CLEAR_BUFFERS_COLOR_G (1 << 5)
+#define NV20TCL_CLEAR_BUFFERS_COLOR_R (1 << 4)
+#define NV20TCL_CLEAR_BUFFERS_STENCIL (1 << 1)
+#define NV20TCL_CLEAR_BUFFERS_DEPTH (1 << 0)
+#define NV20TCL_RC_COLOR0 0x00001e20
+#define NV20TCL_RC_COLOR0_B_SHIFT 0
+#define NV20TCL_RC_COLOR0_B_MASK 0x000000ff
+#define NV20TCL_RC_COLOR0_G_SHIFT 8
+#define NV20TCL_RC_COLOR0_G_MASK 0x0000ff00
+#define NV20TCL_RC_COLOR0_R_SHIFT 16
+#define NV20TCL_RC_COLOR0_R_MASK 0x00ff0000
+#define NV20TCL_RC_COLOR0_A_SHIFT 24
+#define NV20TCL_RC_COLOR0_A_MASK 0xff000000
+#define NV20TCL_RC_COLOR1 0x00001e24
+#define NV20TCL_RC_COLOR1_B_SHIFT 0
+#define NV20TCL_RC_COLOR1_B_MASK 0x000000ff
+#define NV20TCL_RC_COLOR1_G_SHIFT 8
+#define NV20TCL_RC_COLOR1_G_MASK 0x0000ff00
+#define NV20TCL_RC_COLOR1_R_SHIFT 16
+#define NV20TCL_RC_COLOR1_R_MASK 0x00ff0000
+#define NV20TCL_RC_COLOR1_A_SHIFT 24
+#define NV20TCL_RC_COLOR1_A_MASK 0xff000000
+#define NV20TCL_BACK_MATERIAL_SHININESS(x) (0x00001e28+((x)*4))
+#define NV20TCL_BACK_MATERIAL_SHININESS__SIZE 0x00000006
+#define NV20TCL_RC_OUT_RGB(x) (0x00001e40+((x)*4))
+#define NV20TCL_RC_OUT_RGB__SIZE 0x00000008
+#define NV20TCL_RC_OUT_RGB_CD_OUTPUT_SHIFT 0
+#define NV20TCL_RC_OUT_RGB_CD_OUTPUT_MASK 0x0000000f
+#define NV20TCL_RC_OUT_RGB_CD_OUTPUT_ZERO 0x00000000
+#define NV20TCL_RC_OUT_RGB_CD_OUTPUT_CONSTANT_COLOR0_NV 0x00000001
+#define NV20TCL_RC_OUT_RGB_CD_OUTPUT_CONSTANT_COLOR1_NV 0x00000002
+#define NV20TCL_RC_OUT_RGB_CD_OUTPUT_FOG 0x00000003
+#define NV20TCL_RC_OUT_RGB_CD_OUTPUT_PRIMARY_COLOR_NV 0x00000004
+#define NV20TCL_RC_OUT_RGB_CD_OUTPUT_SECONDARY_COLOR_NV 0x00000005
+#define NV20TCL_RC_OUT_RGB_CD_OUTPUT_TEXTURE0_ARB 0x00000008
+#define NV20TCL_RC_OUT_RGB_CD_OUTPUT_TEXTURE1_ARB 0x00000009
+#define NV20TCL_RC_OUT_RGB_CD_OUTPUT_SPARE0_NV 0x0000000c
+#define NV20TCL_RC_OUT_RGB_CD_OUTPUT_SPARE1_NV 0x0000000d
+#define NV20TCL_RC_OUT_RGB_CD_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0000000e
+#define NV20TCL_RC_OUT_RGB_CD_OUTPUT_E_TIMES_F_NV 0x0000000f
+#define NV20TCL_RC_OUT_RGB_AB_OUTPUT_SHIFT 4
+#define NV20TCL_RC_OUT_RGB_AB_OUTPUT_MASK 0x000000f0
+#define NV20TCL_RC_OUT_RGB_AB_OUTPUT_ZERO 0x00000000
+#define NV20TCL_RC_OUT_RGB_AB_OUTPUT_CONSTANT_COLOR0_NV 0x00000010
+#define NV20TCL_RC_OUT_RGB_AB_OUTPUT_CONSTANT_COLOR1_NV 0x00000020
+#define NV20TCL_RC_OUT_RGB_AB_OUTPUT_FOG 0x00000030
+#define NV20TCL_RC_OUT_RGB_AB_OUTPUT_PRIMARY_COLOR_NV 0x00000040
+#define NV20TCL_RC_OUT_RGB_AB_OUTPUT_SECONDARY_COLOR_NV 0x00000050
+#define NV20TCL_RC_OUT_RGB_AB_OUTPUT_TEXTURE0_ARB 0x00000080
+#define NV20TCL_RC_OUT_RGB_AB_OUTPUT_TEXTURE1_ARB 0x00000090
+#define NV20TCL_RC_OUT_RGB_AB_OUTPUT_SPARE0_NV 0x000000c0
+#define NV20TCL_RC_OUT_RGB_AB_OUTPUT_SPARE1_NV 0x000000d0
+#define NV20TCL_RC_OUT_RGB_AB_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x000000e0
+#define NV20TCL_RC_OUT_RGB_AB_OUTPUT_E_TIMES_F_NV 0x000000f0
+#define NV20TCL_RC_OUT_RGB_SUM_OUTPUT_SHIFT 8
+#define NV20TCL_RC_OUT_RGB_SUM_OUTPUT_MASK 0x00000f00
+#define NV20TCL_RC_OUT_RGB_SUM_OUTPUT_ZERO 0x00000000
+#define NV20TCL_RC_OUT_RGB_SUM_OUTPUT_CONSTANT_COLOR0_NV 0x00000100
+#define NV20TCL_RC_OUT_RGB_SUM_OUTPUT_CONSTANT_COLOR1_NV 0x00000200
+#define NV20TCL_RC_OUT_RGB_SUM_OUTPUT_FOG 0x00000300
+#define NV20TCL_RC_OUT_RGB_SUM_OUTPUT_PRIMARY_COLOR_NV 0x00000400
+#define NV20TCL_RC_OUT_RGB_SUM_OUTPUT_SECONDARY_COLOR_NV 0x00000500
+#define NV20TCL_RC_OUT_RGB_SUM_OUTPUT_TEXTURE0_ARB 0x00000800
+#define NV20TCL_RC_OUT_RGB_SUM_OUTPUT_TEXTURE1_ARB 0x00000900
+#define NV20TCL_RC_OUT_RGB_SUM_OUTPUT_SPARE0_NV 0x00000c00
+#define NV20TCL_RC_OUT_RGB_SUM_OUTPUT_SPARE1_NV 0x00000d00
+#define NV20TCL_RC_OUT_RGB_SUM_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x00000e00
+#define NV20TCL_RC_OUT_RGB_SUM_OUTPUT_E_TIMES_F_NV 0x00000f00
+#define NV20TCL_RC_OUT_RGB_CD_DOT_PRODUCT (1 << 12)
+#define NV20TCL_RC_OUT_RGB_AB_DOT_PRODUCT (1 << 13)
+#define NV20TCL_RC_OUT_RGB_MUX_SUM (1 << 14)
+#define NV20TCL_RC_OUT_RGB_BIAS (1 << 15)
+#define NV20TCL_RC_OUT_RGB_BIAS_NONE 0x00000000
+#define NV20TCL_RC_OUT_RGB_BIAS_BIAS_BY_NEGATIVE_ONE_HALF_NV 0x00008000
+#define NV20TCL_RC_OUT_RGB_SCALE_SHIFT 17
+#define NV20TCL_RC_OUT_RGB_SCALE_MASK 0x00000000
+#define NV20TCL_RC_OUT_RGB_SCALE_NONE 0x00000000
+#define NV20TCL_RC_OUT_RGB_SCALE_SCALE_BY_TWO_NV 0x00020000
+#define NV20TCL_RC_OUT_RGB_SCALE_SCALE_BY_FOUR_NV 0x00040000
+#define NV20TCL_RC_OUT_RGB_SCALE_SCALE_BY_ONE_HALF_NV 0x00060000
+#define NV20TCL_RC_ENABLE 0x00001e60
+#define NV20TCL_RC_ENABLE_NUM_COMBINERS_SHIFT 0
+#define NV20TCL_RC_ENABLE_NUM_COMBINERS_MASK 0x0000000f
+#define NV20TCL_TX_RCOMP 0x00001e6c
+#define NV20TCL_TX_RCOMP_NEVER 0x00000000
+#define NV20TCL_TX_RCOMP_GREATER 0x00000001
+#define NV20TCL_TX_RCOMP_EQUAL 0x00000002
+#define NV20TCL_TX_RCOMP_GEQUAL 0x00000003
+#define NV20TCL_TX_RCOMP_LESS 0x00000004
+#define NV20TCL_TX_RCOMP_NOTEQUAL 0x00000005
+#define NV20TCL_TX_RCOMP_LEQUAL 0x00000006
+#define NV20TCL_TX_RCOMP_ALWAYS 0x00000007
+#define NV20TCL_TX_SHADER_OP 0x00001e70
+#define NV20TCL_TX_SHADER_OP_TX0_SHIFT 0
+#define NV20TCL_TX_SHADER_OP_TX0_MASK 0x0000001f
+#define NV20TCL_TX_SHADER_OP_TX0_NONE 0x00000000
+#define NV20TCL_TX_SHADER_OP_TX0_TEXTURE_2D 0x00000001
+#define NV20TCL_TX_SHADER_OP_TX0_PASS_THROUGH 0x00000004
+#define NV20TCL_TX_SHADER_OP_TX0_CULL_FRAGMENT 0x00000005
+#define NV20TCL_TX_SHADER_OP_TX0_OFFSET_TEXTURE_2D 0x00000006
+#define NV20TCL_TX_SHADER_OP_TX0_DOT_PRODUCT_TEXTURE_2D 0x00000009
+#define NV20TCL_TX_SHADER_OP_TX0_DOT_PRODUCT_DEPTH_REPLACE 0x0000000a
+#define NV20TCL_TX_SHADER_OP_TX0_DEPENDANT_AR_TEXTURE_2D 0x0000000f
+#define NV20TCL_TX_SHADER_OP_TX0_DEPENDANT_GB_TEXTURE_2D 0x00000010
+#define NV20TCL_TX_SHADER_OP_TX0_DOT_PRODUCT 0x00000011
+#define NV20TCL_TX_SHADER_OP_TX1_SHIFT 5
+#define NV20TCL_TX_SHADER_OP_TX1_MASK 0x000003e0
+#define NV20TCL_TX_SHADER_OP_TX1_NONE 0x00000000
+#define NV20TCL_TX_SHADER_OP_TX1_TEXTURE_2D 0x00000020
+#define NV20TCL_TX_SHADER_OP_TX1_PASS_THROUGH 0x00000080
+#define NV20TCL_TX_SHADER_OP_TX1_CULL_FRAGMENT 0x000000a0
+#define NV20TCL_TX_SHADER_OP_TX1_OFFSET_TEXTURE_2D 0x000000c0
+#define NV20TCL_TX_SHADER_OP_TX1_DOT_PRODUCT_TEXTURE_2D 0x00000120
+#define NV20TCL_TX_SHADER_OP_TX1_DOT_PRODUCT_DEPTH_REPLACE 0x00000140
+#define NV20TCL_TX_SHADER_OP_TX1_DEPENDANT_AR_TEXTURE_2D 0x000001e0
+#define NV20TCL_TX_SHADER_OP_TX1_DEPENDANT_GB_TEXTURE_2D 0x00000200
+#define NV20TCL_TX_SHADER_OP_TX1_DOT_PRODUCT 0x00000220
+#define NV20TCL_TX_SHADER_OP_TX2_SHIFT 10
+#define NV20TCL_TX_SHADER_OP_TX2_MASK 0x00007c00
+#define NV20TCL_TX_SHADER_OP_TX2_NONE 0x00000000
+#define NV20TCL_TX_SHADER_OP_TX2_TEXTURE_2D 0x00000400
+#define NV20TCL_TX_SHADER_OP_TX2_PASS_THROUGH 0x00001000
+#define NV20TCL_TX_SHADER_OP_TX2_CULL_FRAGMENT 0x00001400
+#define NV20TCL_TX_SHADER_OP_TX2_OFFSET_TEXTURE_2D 0x00001800
+#define NV20TCL_TX_SHADER_OP_TX2_DOT_PRODUCT_TEXTURE_2D 0x00002400
+#define NV20TCL_TX_SHADER_OP_TX2_DOT_PRODUCT_DEPTH_REPLACE 0x00002800
+#define NV20TCL_TX_SHADER_OP_TX2_DEPENDANT_AR_TEXTURE_2D 0x00003c00
+#define NV20TCL_TX_SHADER_OP_TX2_DEPENDANT_GB_TEXTURE_2D 0x00004000
+#define NV20TCL_TX_SHADER_OP_TX2_DOT_PRODUCT 0x00004400
+#define NV20TCL_TX_SHADER_OP_TX3_SHIFT 15
+#define NV20TCL_TX_SHADER_OP_TX3_MASK 0x000f8000
+#define NV20TCL_TX_SHADER_OP_TX3_NONE 0x00000000
+#define NV20TCL_TX_SHADER_OP_TX3_TEXTURE_2D 0x00008000
+#define NV20TCL_TX_SHADER_OP_TX3_PASS_THROUGH 0x00020000
+#define NV20TCL_TX_SHADER_OP_TX3_CULL_FRAGMENT 0x00028000
+#define NV20TCL_TX_SHADER_OP_TX3_OFFSET_TEXTURE_2D 0x00030000
+#define NV20TCL_TX_SHADER_OP_TX3_DOT_PRODUCT_TEXTURE_2D 0x00048000
+#define NV20TCL_TX_SHADER_OP_TX3_DOT_PRODUCT_DEPTH_REPLACE 0x00050000
+#define NV20TCL_TX_SHADER_OP_TX3_DEPENDANT_AR_TEXTURE_2D 0x00078000
+#define NV20TCL_TX_SHADER_OP_TX3_DEPENDANT_GB_TEXTURE_2D 0x00080000
+#define NV20TCL_TX_SHADER_OP_TX3_DOT_PRODUCT 0x00088000
+#define NV20TCL_TX_SHADER_DOTMAPPING 0x00001e74
+#define NV20TCL_TX_SHADER_DOTMAPPING_TX0_SHIFT 0
+#define NV20TCL_TX_SHADER_DOTMAPPING_TX0_MASK 0x0000000f
+#define NV20TCL_TX_SHADER_DOTMAPPING_TX1_SHIFT 4
+#define NV20TCL_TX_SHADER_DOTMAPPING_TX1_MASK 0x000000f0
+#define NV20TCL_TX_SHADER_DOTMAPPING_TX2_SHIFT 8
+#define NV20TCL_TX_SHADER_DOTMAPPING_TX2_MASK 0x00000f00
+#define NV20TCL_TX_SHADER_DOTMAPPING_TX3_SHIFT 12
+#define NV20TCL_TX_SHADER_DOTMAPPING_TX3_MASK 0x0000f000
+#define NV20TCL_TX_SHADER_PREVIOUS 0x00001e78
+#define NV20TCL_TX_SHADER_PREVIOUS_TX0_SHIFT 8
+#define NV20TCL_TX_SHADER_PREVIOUS_TX0_MASK 0x00000f00
+#define NV20TCL_TX_SHADER_PREVIOUS_TX1_SHIFT 12
+#define NV20TCL_TX_SHADER_PREVIOUS_TX1_MASK 0x0000f000
+#define NV20TCL_TX_SHADER_PREVIOUS_TX2_SHIFT 16
+#define NV20TCL_TX_SHADER_PREVIOUS_TX2_MASK 0x00030000
+#define NV20TCL_TX_SHADER_PREVIOUS_TX3_SHIFT 20
+#define NV20TCL_TX_SHADER_PREVIOUS_TX3_MASK 0x00300000
+#define NV20TCL_ENGINE 0x00001e94
+#define NV20TCL_ENGINE_VP (1 << 1)
+#define NV20TCL_ENGINE_FIXED (1 << 2)
+#define NV20TCL_VP_UPLOAD_FROM_ID 0x00001e9c
+#define NV20TCL_VP_START_FROM_ID 0x00001ea0
+#define NV20TCL_VP_UPLOAD_CONST_ID 0x00001ea4
+#define NV20TCL_VIEWPORT_TRANSLATE_X 0x00001f00
+#define NV20TCL_VIEWPORT_TRANSLATE_Y 0x00001f04
+#define NV20TCL_VIEWPORT_TRANSLATE_Z 0x00001f08
+#define NV20TCL_VIEWPORT_TRANSLATE_W 0x00001f0c
+
+
+#define NV17TCL 0x00000099
+
+#define NV17TCL_DMA_IN_MEMORY4 0x000001ac
+#define NV17TCL_DMA_IN_MEMORY5 0x000001b0
+#define NV17TCL_COLOR_MASK_ENABLE 0x000002bc
+#define NV17TCL_LMA_DEPTH_BUFFER_PITCH 0x00000d5c
+#define NV17TCL_LMA_DEPTH_BUFFER_OFFSET 0x00000d60
+#define NV17TCL_LMA_DEPTH_FILL_VALUE 0x00000d68
+#define NV17TCL_LMA_DEPTH_BUFFER_CLEAR 0x00000d6c
+#define NV17TCL_LMA_DEPTH_ENABLE 0x00001658
+
+
+#define NV20_SWIZZLED_SURFACE 0x0000009e
+
+
+
+#define NV12_IMAGE_BLIT 0x0000009f
+
+
+
+#define NV30_CONTEXT_SURFACES_2D 0x00000362
+
+
+
+#define NV30_STRETCHED_IMAGE_FROM_CPU 0x00000366
+
+
+
+#define NV30_TEXTURE_FROM_CPU 0x0000037b
+
+
+
+#define NV30_SCALED_IMAGE_FROM_MEMORY 0x00000389
+
+
+
+#define NV30_IMAGE_FROM_CPU 0x0000038a
+
+
+
+#define NV30TCL 0x00000397
+
+
+
+#define NV30_SWIZZLED_SURFACE 0x0000039e
+
+
+
+#define NV35TCL 0x00000497
+
+
+
+#define NV25TCL 0x00000597
+
+#define NV25TCL_DMA_IN_MEMORY4 0x0000019c
+#define NV25TCL_DMA_IN_MEMORY5 0x000001a0
+#define NV25TCL_DMA_IN_MEMORY8 0x000001ac
+#define NV25TCL_DMA_IN_MEMORY9 0x000001b0
+
+
+#define NV34TCL 0x00000697
+
+#define NV34TCL_NOP 0x00000100
+#define NV34TCL_NOTIFY 0x00000104
+#define NV34TCL_DMA_NOTIFY 0x00000180
+#define NV34TCL_DMA_TEXTURE0 0x00000184
+#define NV34TCL_DMA_TEXTURE1 0x00000188
+#define NV34TCL_DMA_COLOR1 0x0000018c
+#define NV34TCL_DMA_COLOR0 0x00000194
+#define NV34TCL_DMA_ZETA 0x00000198
+#define NV34TCL_DMA_VTXBUF0 0x0000019c
+#define NV34TCL_DMA_VTXBUF1 0x000001a0
+#define NV34TCL_DMA_FENCE 0x000001a4
+#define NV34TCL_DMA_QUERY 0x000001a8
+#define NV34TCL_DMA_IN_MEMORY7 0x000001ac
+#define NV34TCL_DMA_IN_MEMORY8 0x000001b0
+#define NV34TCL_RT_HORIZ 0x00000200
+#define NV34TCL_RT_HORIZ_X_SHIFT 0
+#define NV34TCL_RT_HORIZ_X_MASK 0x0000ffff
+#define NV34TCL_RT_HORIZ_W_SHIFT 16
+#define NV34TCL_RT_HORIZ_W_MASK 0xffff0000
+#define NV34TCL_RT_VERT 0x00000204
+#define NV34TCL_RT_VERT_Y_SHIFT 0
+#define NV34TCL_RT_VERT_Y_MASK 0x0000ffff
+#define NV34TCL_RT_VERT_H_SHIFT 16
+#define NV34TCL_RT_VERT_H_MASK 0xffff0000
+#define NV34TCL_RT_FORMAT 0x00000208
+#define NV34TCL_RT_FORMAT_TYPE_SHIFT 8
+#define NV34TCL_RT_FORMAT_TYPE_MASK 0x00000f00
+#define NV34TCL_RT_FORMAT_TYPE_LINEAR 0x00000100
+#define NV34TCL_RT_FORMAT_TYPE_SWIZZLED 0x00000200
+#define NV34TCL_RT_FORMAT_ZETA_SHIFT 5
+#define NV34TCL_RT_FORMAT_ZETA_MASK 0x000000e0
+#define NV34TCL_RT_FORMAT_ZETA_Z16 0x00000020
+#define NV34TCL_RT_FORMAT_ZETA_Z24S8 0x00000040
+#define NV34TCL_RT_FORMAT_COLOR_SHIFT 0
+#define NV34TCL_RT_FORMAT_COLOR_MASK 0x0000001f
+#define NV34TCL_RT_FORMAT_COLOR_R5G6B5 0x00000003
+#define NV34TCL_RT_FORMAT_COLOR_X8R8G8B8 0x00000005
+#define NV34TCL_RT_FORMAT_COLOR_A8R8G8B8 0x00000008
+#define NV34TCL_RT_FORMAT_COLOR_B8 0x00000009
+#define NV34TCL_RT_FORMAT_COLOR_UNKNOWN 0x0000000d
+#define NV34TCL_RT_FORMAT_COLOR_X8B8G8R8 0x0000000f
+#define NV34TCL_RT_FORMAT_COLOR_A8B8G8R8 0x00000010
+#define NV34TCL_COLOR0_PITCH 0x0000020c
+#define NV34TCL_COLOR0_PITCH_COLOR0_SHIFT 0
+#define NV34TCL_COLOR0_PITCH_COLOR0_MASK 0x0000ffff
+#define NV34TCL_COLOR0_PITCH_ZETA_SHIFT 16
+#define NV34TCL_COLOR0_PITCH_ZETA_MASK 0xffff0000
+#define NV34TCL_COLOR0_OFFSET 0x00000210
+#define NV34TCL_ZETA_OFFSET 0x00000214
+#define NV34TCL_COLOR1_OFFSET 0x00000218
+#define NV34TCL_COLOR1_PITCH 0x0000021c
+#define NV34TCL_RT_ENABLE 0x00000220
+#define NV34TCL_RT_ENABLE_MRT (1 << 4)
+#define NV34TCL_RT_ENABLE_COLOR1 (1 << 1)
+#define NV34TCL_RT_ENABLE_COLOR0 (1 << 0)
+#define NV34TCL_LMA_DEPTH_PITCH 0x0000022c
+#define NV34TCL_LMA_DEPTH_OFFSET 0x00000230
+#define NV34TCL_TX_UNITS_ENABLE 0x0000023c
+#define NV34TCL_TX_UNITS_ENABLE_TX0 (1 << 0)
+#define NV34TCL_TX_UNITS_ENABLE_TX1 (1 << 1)
+#define NV34TCL_TX_UNITS_ENABLE_TX2 (1 << 2)
+#define NV34TCL_TX_UNITS_ENABLE_TX3 (1 << 3)
+#define NV34TCL_TX_UNITS_ENABLE_TX4 (1 << 4)
+#define NV34TCL_TX_UNITS_ENABLE_TX5 (1 << 5)
+#define NV34TCL_TX_UNITS_ENABLE_TX6 (1 << 6)
+#define NV34TCL_TX_UNITS_ENABLE_TX7 (1 << 7)
+#define NV34TCL_TX_MATRIX_ENABLE(x) (0x00000240+((x)*4))
+#define NV34TCL_TX_MATRIX_ENABLE__SIZE 0x00000008
+#define NV34TCL_VIEWPORT_TX_ORIGIN 0x000002b8
+#define NV34TCL_VIEWPORT_TX_ORIGIN_X_SHIFT 0
+#define NV34TCL_VIEWPORT_TX_ORIGIN_X_MASK 0x0000ffff
+#define NV34TCL_VIEWPORT_TX_ORIGIN_Y_SHIFT 16
+#define NV34TCL_VIEWPORT_TX_ORIGIN_Y_MASK 0xffff0000
+#define NV34TCL_VIEWPORT_CLIP_MODE 0x000002bc
+#define NV34TCL_VIEWPORT_CLIP_HORIZ(x) (0x000002c0+((x)*8))
+#define NV34TCL_VIEWPORT_CLIP_HORIZ__SIZE 0x00000008
+#define NV34TCL_VIEWPORT_CLIP_HORIZ_L_SHIFT 0
+#define NV34TCL_VIEWPORT_CLIP_HORIZ_L_MASK 0x0000ffff
+#define NV34TCL_VIEWPORT_CLIP_HORIZ_R_SHIFT 16
+#define NV34TCL_VIEWPORT_CLIP_HORIZ_R_MASK 0xffff0000
+#define NV34TCL_VIEWPORT_CLIP_VERT(x) (0x000002c4+((x)*8))
+#define NV34TCL_VIEWPORT_CLIP_VERT__SIZE 0x00000008
+#define NV34TCL_VIEWPORT_CLIP_VERT_T_SHIFT 0
+#define NV34TCL_VIEWPORT_CLIP_VERT_T_MASK 0x0000ffff
+#define NV34TCL_VIEWPORT_CLIP_VERT_D_SHIFT 16
+#define NV34TCL_VIEWPORT_CLIP_VERT_D_MASK 0xffff0000
+#define NV34TCL_DITHER_ENABLE 0x00000300
+#define NV34TCL_ALPHA_FUNC_ENABLE 0x00000304
+#define NV34TCL_ALPHA_FUNC_FUNC 0x00000308
+#define NV34TCL_ALPHA_FUNC_FUNC_NEVER 0x00000200
+#define NV34TCL_ALPHA_FUNC_FUNC_LESS 0x00000201
+#define NV34TCL_ALPHA_FUNC_FUNC_EQUAL 0x00000202
+#define NV34TCL_ALPHA_FUNC_FUNC_LEQUAL 0x00000203
+#define NV34TCL_ALPHA_FUNC_FUNC_GREATER 0x00000204
+#define NV34TCL_ALPHA_FUNC_FUNC_GREATER 0x00000204
+#define NV34TCL_ALPHA_FUNC_FUNC_NOTEQUAL 0x00000205
+#define NV34TCL_ALPHA_FUNC_FUNC_GEQUAL 0x00000206
+#define NV34TCL_ALPHA_FUNC_FUNC_ALWAYS 0x00000207
+#define NV34TCL_ALPHA_FUNC_REF 0x0000030c
+#define NV34TCL_BLEND_FUNC_ENABLE 0x00000310
+#define NV34TCL_BLEND_FUNC_SRC 0x00000314
+#define NV34TCL_BLEND_FUNC_SRC_RGB_SHIFT 0
+#define NV34TCL_BLEND_FUNC_SRC_RGB_MASK 0x0000ffff
+#define NV34TCL_BLEND_FUNC_SRC_RGB_ZERO 0x00000000
+#define NV34TCL_BLEND_FUNC_SRC_RGB_ONE 0x00000001
+#define NV34TCL_BLEND_FUNC_SRC_RGB_SRC_COLOR 0x00000300
+#define NV34TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC_COLOR 0x00000301
+#define NV34TCL_BLEND_FUNC_SRC_RGB_SRC_ALPHA 0x00000302
+#define NV34TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC_ALPHA 0x00000303
+#define NV34TCL_BLEND_FUNC_SRC_RGB_DST_ALPHA 0x00000304
+#define NV34TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_DST_ALPHA 0x00000305
+#define NV34TCL_BLEND_FUNC_SRC_RGB_DST_COLOR 0x00000306
+#define NV34TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_DST_COLOR 0x00000307
+#define NV34TCL_BLEND_FUNC_SRC_RGB_SRC_ALPHA_SATURATE 0x00000308
+#define NV34TCL_BLEND_FUNC_SRC_RGB_CONSTANT_COLOR 0x00008001
+#define NV34TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_CONSTANT_COLOR 0x00008002
+#define NV34TCL_BLEND_FUNC_SRC_RGB_CONSTANT_ALPHA 0x00008003
+#define NV34TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_CONSTANT_ALPHA 0x00008004
+#define NV34TCL_BLEND_FUNC_SRC_ALPHA_SHIFT 16
+#define NV34TCL_BLEND_FUNC_SRC_ALPHA_MASK 0xffff0000
+#define NV34TCL_BLEND_FUNC_SRC_ALPHA_ZERO 0x00000000
+#define NV34TCL_BLEND_FUNC_SRC_ALPHA_ONE 0x00010000
+#define NV34TCL_BLEND_FUNC_SRC_ALPHA_SRC_COLOR 0x03000000
+#define NV34TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_SRC_COLOR 0x03010000
+#define NV34TCL_BLEND_FUNC_SRC_ALPHA_SRC_ALPHA 0x03020000
+#define NV34TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_SRC_ALPHA 0x03030000
+#define NV34TCL_BLEND_FUNC_SRC_ALPHA_DST_ALPHA 0x03040000
+#define NV34TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_DST_ALPHA 0x03050000
+#define NV34TCL_BLEND_FUNC_SRC_ALPHA_DST_COLOR 0x03060000
+#define NV34TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_DST_COLOR 0x03070000
+#define NV34TCL_BLEND_FUNC_SRC_ALPHA_SRC_ALPHA_SATURATE 0x03080000
+#define NV34TCL_BLEND_FUNC_SRC_ALPHA_CONSTANT_COLOR 0x80010000
+#define NV34TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_CONSTANT_COLOR 0x80020000
+#define NV34TCL_BLEND_FUNC_SRC_ALPHA_CONSTANT_ALPHA 0x80030000
+#define NV34TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_CONSTANT_ALPHA 0x80040000
+#define NV34TCL_BLEND_FUNC_DST 0x00000318
+#define NV34TCL_BLEND_FUNC_DST_RGB_SHIFT 0
+#define NV34TCL_BLEND_FUNC_DST_RGB_MASK 0x0000ffff
+#define NV34TCL_BLEND_FUNC_DST_RGB_ZERO 0x00000000
+#define NV34TCL_BLEND_FUNC_DST_RGB_ONE 0x00000001
+#define NV34TCL_BLEND_FUNC_DST_RGB_SRC_COLOR 0x00000300
+#define NV34TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_SRC_COLOR 0x00000301
+#define NV34TCL_BLEND_FUNC_DST_RGB_SRC_ALPHA 0x00000302
+#define NV34TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_SRC_ALPHA 0x00000303
+#define NV34TCL_BLEND_FUNC_DST_RGB_DST_ALPHA 0x00000304
+#define NV34TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_DST_ALPHA 0x00000305
+#define NV34TCL_BLEND_FUNC_DST_RGB_DST_COLOR 0x00000306
+#define NV34TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_DST_COLOR 0x00000307
+#define NV34TCL_BLEND_FUNC_DST_RGB_SRC_ALPHA_SATURATE 0x00000308
+#define NV34TCL_BLEND_FUNC_DST_RGB_CONSTANT_COLOR 0x00008001
+#define NV34TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_CONSTANT_COLOR 0x00008002
+#define NV34TCL_BLEND_FUNC_DST_RGB_CONSTANT_ALPHA 0x00008003
+#define NV34TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_CONSTANT_ALPHA 0x00008004
+#define NV34TCL_BLEND_FUNC_DST_ALPHA_SHIFT 16
+#define NV34TCL_BLEND_FUNC_DST_ALPHA_MASK 0xffff0000
+#define NV34TCL_BLEND_FUNC_DST_ALPHA_ZERO 0x00000000
+#define NV34TCL_BLEND_FUNC_DST_ALPHA_ONE 0x00010000
+#define NV34TCL_BLEND_FUNC_DST_ALPHA_SRC_COLOR 0x03000000
+#define NV34TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_SRC_COLOR 0x03010000
+#define NV34TCL_BLEND_FUNC_DST_ALPHA_SRC_ALPHA 0x03020000
+#define NV34TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_SRC_ALPHA 0x03030000
+#define NV34TCL_BLEND_FUNC_DST_ALPHA_DST_ALPHA 0x03040000
+#define NV34TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_DST_ALPHA 0x03050000
+#define NV34TCL_BLEND_FUNC_DST_ALPHA_DST_COLOR 0x03060000
+#define NV34TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_DST_COLOR 0x03070000
+#define NV34TCL_BLEND_FUNC_DST_ALPHA_SRC_ALPHA_SATURATE 0x03080000
+#define NV34TCL_BLEND_FUNC_DST_ALPHA_CONSTANT_COLOR 0x80010000
+#define NV34TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_CONSTANT_COLOR 0x80020000
+#define NV34TCL_BLEND_FUNC_DST_ALPHA_CONSTANT_ALPHA 0x80030000
+#define NV34TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_CONSTANT_ALPHA 0x80040000
+#define NV34TCL_BLEND_COLOR 0x0000031c
+#define NV34TCL_BLEND_COLOR_B_SHIFT 0
+#define NV34TCL_BLEND_COLOR_B_MASK 0x000000ff
+#define NV34TCL_BLEND_COLOR_G_SHIFT 8
+#define NV34TCL_BLEND_COLOR_G_MASK 0x0000ff00
+#define NV34TCL_BLEND_COLOR_R_SHIFT 16
+#define NV34TCL_BLEND_COLOR_R_MASK 0x00ff0000
+#define NV34TCL_BLEND_COLOR_A_SHIFT 24
+#define NV34TCL_BLEND_COLOR_A_MASK 0xff000000
+#define NV34TCL_BLEND_EQUATION 0x00000320
+#define NV34TCL_BLEND_EQUATION_FUNC_ADD 0x00008006
+#define NV34TCL_BLEND_EQUATION_MIN 0x00008007
+#define NV34TCL_BLEND_EQUATION_MAX 0x00008008
+#define NV34TCL_BLEND_EQUATION_FUNC_SUBTRACT 0x0000800a
+#define NV34TCL_BLEND_EQUATION_FUNC_REVERSE_SUBTRACT 0x0000800b
+#define NV34TCL_COLOR_MASK 0x00000324
+#define NV34TCL_COLOR_MASK_B_SHIFT 0
+#define NV34TCL_COLOR_MASK_B_MASK 0x000000ff
+#define NV34TCL_COLOR_MASK_G_SHIFT 8
+#define NV34TCL_COLOR_MASK_G_MASK 0x0000ff00
+#define NV34TCL_COLOR_MASK_R_SHIFT 16
+#define NV34TCL_COLOR_MASK_R_MASK 0x00ff0000
+#define NV34TCL_COLOR_MASK_A_SHIFT 24
+#define NV34TCL_COLOR_MASK_A_MASK 0xff000000
+#define NV34TCL_STENCIL_BACK_ENABLE 0x00000328
+#define NV34TCL_STENCIL_BACK_MASK 0x0000032c
+#define NV34TCL_STENCIL_BACK_FUNC_FUNC 0x00000330
+#define NV34TCL_STENCIL_BACK_FUNC_FUNC_NEVER 0x00000200
+#define NV34TCL_STENCIL_BACK_FUNC_FUNC_LESS 0x00000201
+#define NV34TCL_STENCIL_BACK_FUNC_FUNC_EQUAL 0x00000202
+#define NV34TCL_STENCIL_BACK_FUNC_FUNC_LEQUAL 0x00000203
+#define NV34TCL_STENCIL_BACK_FUNC_FUNC_GREATER 0x00000204
+#define NV34TCL_STENCIL_BACK_FUNC_FUNC_GREATER 0x00000204
+#define NV34TCL_STENCIL_BACK_FUNC_FUNC_NOTEQUAL 0x00000205
+#define NV34TCL_STENCIL_BACK_FUNC_FUNC_GEQUAL 0x00000206
+#define NV34TCL_STENCIL_BACK_FUNC_FUNC_ALWAYS 0x00000207
+#define NV34TCL_STENCIL_BACK_FUNC_REF 0x00000334
+#define NV34TCL_STENCIL_BACK_FUNC_MASK 0x00000338
+#define NV34TCL_STENCIL_BACK_OP_FAIL 0x0000033c
+#define NV34TCL_STENCIL_BACK_OP_FAIL_ZERO 0x00000000
+#define NV34TCL_STENCIL_BACK_OP_FAIL_INVERT 0x0000150a
+#define NV34TCL_STENCIL_BACK_OP_FAIL_KEEP 0x00001e00
+#define NV34TCL_STENCIL_BACK_OP_FAIL_REPLACE 0x00001e01
+#define NV34TCL_STENCIL_BACK_OP_FAIL_INCR 0x00001e02
+#define NV34TCL_STENCIL_BACK_OP_FAIL_DECR 0x00001e03
+#define NV34TCL_STENCIL_BACK_OP_FAIL_INCR_WRAP 0x00008507
+#define NV34TCL_STENCIL_BACK_OP_FAIL_DECR_WRAP 0x00008508
+#define NV34TCL_STENCIL_BACK_OP_ZFAIL 0x00000340
+#define NV34TCL_STENCIL_BACK_OP_ZFAIL_ZERO 0x00000000
+#define NV34TCL_STENCIL_BACK_OP_ZFAIL_INVERT 0x0000150a
+#define NV34TCL_STENCIL_BACK_OP_ZFAIL_KEEP 0x00001e00
+#define NV34TCL_STENCIL_BACK_OP_ZFAIL_REPLACE 0x00001e01
+#define NV34TCL_STENCIL_BACK_OP_ZFAIL_INCR 0x00001e02
+#define NV34TCL_STENCIL_BACK_OP_ZFAIL_DECR 0x00001e03
+#define NV34TCL_STENCIL_BACK_OP_ZFAIL_INCR_WRAP 0x00008507
+#define NV34TCL_STENCIL_BACK_OP_ZFAIL_DECR_WRAP 0x00008508
+#define NV34TCL_STENCIL_BACK_OP_ZPASS 0x00000344
+#define NV34TCL_STENCIL_BACK_OP_ZPASS_ZERO 0x00000000
+#define NV34TCL_STENCIL_BACK_OP_ZPASS_INVERT 0x0000150a
+#define NV34TCL_STENCIL_BACK_OP_ZPASS_KEEP 0x00001e00
+#define NV34TCL_STENCIL_BACK_OP_ZPASS_REPLACE 0x00001e01
+#define NV34TCL_STENCIL_BACK_OP_ZPASS_INCR 0x00001e02
+#define NV34TCL_STENCIL_BACK_OP_ZPASS_DECR 0x00001e03
+#define NV34TCL_STENCIL_BACK_OP_ZPASS_INCR_WRAP 0x00008507
+#define NV34TCL_STENCIL_BACK_OP_ZPASS_DECR_WRAP 0x00008508
+#define NV34TCL_STENCIL_FRONT_ENABLE 0x00000348
+#define NV34TCL_STENCIL_FRONT_MASK 0x0000034c
+#define NV34TCL_STENCIL_FRONT_FUNC_FUNC 0x00000350
+#define NV34TCL_STENCIL_FRONT_FUNC_FUNC_NEVER 0x00000200
+#define NV34TCL_STENCIL_FRONT_FUNC_FUNC_LESS 0x00000201
+#define NV34TCL_STENCIL_FRONT_FUNC_FUNC_EQUAL 0x00000202
+#define NV34TCL_STENCIL_FRONT_FUNC_FUNC_LEQUAL 0x00000203
+#define NV34TCL_STENCIL_FRONT_FUNC_FUNC_GREATER 0x00000204
+#define NV34TCL_STENCIL_FRONT_FUNC_FUNC_GREATER 0x00000204
+#define NV34TCL_STENCIL_FRONT_FUNC_FUNC_NOTEQUAL 0x00000205
+#define NV34TCL_STENCIL_FRONT_FUNC_FUNC_GEQUAL 0x00000206
+#define NV34TCL_STENCIL_FRONT_FUNC_FUNC_ALWAYS 0x00000207
+#define NV34TCL_STENCIL_FRONT_FUNC_REF 0x00000354
+#define NV34TCL_STENCIL_FRONT_FUNC_MASK 0x00000358
+#define NV34TCL_STENCIL_FRONT_OP_FAIL 0x0000035c
+#define NV34TCL_STENCIL_FRONT_OP_FAIL_ZERO 0x00000000
+#define NV34TCL_STENCIL_FRONT_OP_FAIL_INVERT 0x0000150a
+#define NV34TCL_STENCIL_FRONT_OP_FAIL_KEEP 0x00001e00
+#define NV34TCL_STENCIL_FRONT_OP_FAIL_REPLACE 0x00001e01
+#define NV34TCL_STENCIL_FRONT_OP_FAIL_INCR 0x00001e02
+#define NV34TCL_STENCIL_FRONT_OP_FAIL_DECR 0x00001e03
+#define NV34TCL_STENCIL_FRONT_OP_FAIL_INCR_WRAP 0x00008507
+#define NV34TCL_STENCIL_FRONT_OP_FAIL_DECR_WRAP 0x00008508
+#define NV34TCL_STENCIL_FRONT_OP_ZFAIL 0x00000360
+#define NV34TCL_STENCIL_FRONT_OP_ZFAIL_ZERO 0x00000000
+#define NV34TCL_STENCIL_FRONT_OP_ZFAIL_INVERT 0x0000150a
+#define NV34TCL_STENCIL_FRONT_OP_ZFAIL_KEEP 0x00001e00
+#define NV34TCL_STENCIL_FRONT_OP_ZFAIL_REPLACE 0x00001e01
+#define NV34TCL_STENCIL_FRONT_OP_ZFAIL_INCR 0x00001e02
+#define NV34TCL_STENCIL_FRONT_OP_ZFAIL_DECR 0x00001e03
+#define NV34TCL_STENCIL_FRONT_OP_ZFAIL_INCR_WRAP 0x00008507
+#define NV34TCL_STENCIL_FRONT_OP_ZFAIL_DECR_WRAP 0x00008508
+#define NV34TCL_STENCIL_FRONT_OP_ZPASS 0x00000364
+#define NV34TCL_STENCIL_FRONT_OP_ZPASS_ZERO 0x00000000
+#define NV34TCL_STENCIL_FRONT_OP_ZPASS_INVERT 0x0000150a
+#define NV34TCL_STENCIL_FRONT_OP_ZPASS_KEEP 0x00001e00
+#define NV34TCL_STENCIL_FRONT_OP_ZPASS_REPLACE 0x00001e01
+#define NV34TCL_STENCIL_FRONT_OP_ZPASS_INCR 0x00001e02
+#define NV34TCL_STENCIL_FRONT_OP_ZPASS_DECR 0x00001e03
+#define NV34TCL_STENCIL_FRONT_OP_ZPASS_INCR_WRAP 0x00008507
+#define NV34TCL_STENCIL_FRONT_OP_ZPASS_DECR_WRAP 0x00008508
+#define NV34TCL_SHADE_MODEL 0x00000368
+#define NV34TCL_SHADE_MODEL_FLAT 0x00001d00
+#define NV34TCL_SHADE_MODEL_SMOOTH 0x00001d01
+#define NV34TCL_FOG_ENABLE 0x0000036c
+#define NV34TCL_FOG_COLOR 0x00000370
+#define NV34TCL_FOG_COLOR_R_SHIFT 0
+#define NV34TCL_FOG_COLOR_R_MASK 0x000000ff
+#define NV34TCL_FOG_COLOR_G_SHIFT 8
+#define NV34TCL_FOG_COLOR_G_MASK 0x0000ff00
+#define NV34TCL_FOG_COLOR_B_SHIFT 16
+#define NV34TCL_FOG_COLOR_B_MASK 0x00ff0000
+#define NV34TCL_FOG_COLOR_A_SHIFT 24
+#define NV34TCL_FOG_COLOR_A_MASK 0xff000000
+#define NV34TCL_COLOR_LOGIC_OP_ENABLE 0x00000374
+#define NV34TCL_COLOR_LOGIC_OP_OP 0x00000378
+#define NV34TCL_COLOR_LOGIC_OP_OP_CLEAR 0x00001500
+#define NV34TCL_COLOR_LOGIC_OP_OP_AND 0x00001501
+#define NV34TCL_COLOR_LOGIC_OP_OP_AND_REVERSE 0x00001502
+#define NV34TCL_COLOR_LOGIC_OP_OP_COPY 0x00001503
+#define NV34TCL_COLOR_LOGIC_OP_OP_AND_INVERTED 0x00001504
+#define NV34TCL_COLOR_LOGIC_OP_OP_NOOP 0x00001505
+#define NV34TCL_COLOR_LOGIC_OP_OP_XOR 0x00001506
+#define NV34TCL_COLOR_LOGIC_OP_OP_OR 0x00001507
+#define NV34TCL_COLOR_LOGIC_OP_OP_NOR 0x00001508
+#define NV34TCL_COLOR_LOGIC_OP_OP_EQUIV 0x00001509
+#define NV34TCL_COLOR_LOGIC_OP_OP_INVERT 0x0000150a
+#define NV34TCL_COLOR_LOGIC_OP_OP_OR_REVERSE 0x0000150b
+#define NV34TCL_COLOR_LOGIC_OP_OP_COPY_INVERTED 0x0000150c
+#define NV34TCL_COLOR_LOGIC_OP_OP_OR_INVERTED 0x0000150d
+#define NV34TCL_COLOR_LOGIC_OP_OP_NAND 0x0000150e
+#define NV34TCL_COLOR_LOGIC_OP_OP_SET 0x0000150f
+#define NV34TCL_NORMALIZE_ENABLE 0x0000037c
+#define NV34TCL_COLOR_MATERIAL 0x00000390
+#define NV34TCL_COLOR_MATERIAL_FRONT_EMISSION_ENABLE (1 << 0)
+#define NV34TCL_COLOR_MATERIAL_FRONT_AMBIENT_ENABLE (1 << 2)
+#define NV34TCL_COLOR_MATERIAL_FRONT_DIFFUSE_ENABLE (1 << 4)
+#define NV34TCL_COLOR_MATERIAL_FRONT_SPECULAR_ENABLE (1 << 6)
+#define NV34TCL_COLOR_MATERIAL_BACK_EMISSION_ENABLE (1 << 8)
+#define NV34TCL_COLOR_MATERIAL_BACK_AMBIENT_ENABLE (1 << 10)
+#define NV34TCL_COLOR_MATERIAL_BACK_DIFFUSE_ENABLE (1 << 12)
+#define NV34TCL_COLOR_MATERIAL_BACK_SPECULAR_ENABLE (1 << 14)
+#define NV34TCL_DEPTH_RANGE_NEAR 0x00000394
+#define NV34TCL_DEPTH_RANGE_FAR 0x00000398
+#define NV34TCL_COLOR_MATERIAL_FRONT_R 0x000003a0
+#define NV34TCL_COLOR_MATERIAL_FRONT_G 0x000003a4
+#define NV34TCL_COLOR_MATERIAL_FRONT_B 0x000003a8
+#define NV34TCL_COLOR_MATERIAL_FRONT_A 0x000003b4
+#define NV34TCL_LINE_WIDTH 0x000003b8
+#define NV34TCL_LINE_SMOOTH_ENABLE 0x000003bc
+#define NV34TCL_TX_GEN_S(x) (0x00000400+((x)*16))
+#define NV34TCL_TX_GEN_S__SIZE 0x00000008
+#define NV34TCL_TX_GEN_S_FALSE 0x00000000
+#define NV34TCL_TX_GEN_S_EYE_LINEAR 0x00002400
+#define NV34TCL_TX_GEN_S_OBJECT_LINEAR 0x00002401
+#define NV34TCL_TX_GEN_S_SPHERE_MAP 0x00002402
+#define NV34TCL_TX_GEN_S_NORMAL_MAP 0x00008511
+#define NV34TCL_TX_GEN_S_REFLECTION_MAP 0x00008512
+#define NV34TCL_TX_GEN_T(x) (0x00000404+((x)*16))
+#define NV34TCL_TX_GEN_T__SIZE 0x00000008
+#define NV34TCL_TX_GEN_T_FALSE 0x00000000
+#define NV34TCL_TX_GEN_T_EYE_LINEAR 0x00002400
+#define NV34TCL_TX_GEN_T_OBJECT_LINEAR 0x00002401
+#define NV34TCL_TX_GEN_T_SPHERE_MAP 0x00002402
+#define NV34TCL_TX_GEN_T_NORMAL_MAP 0x00008511
+#define NV34TCL_TX_GEN_T_REFLECTION_MAP 0x00008512
+#define NV34TCL_TX_GEN_R(x) (0x00000408+((x)*16))
+#define NV34TCL_TX_GEN_R__SIZE 0x00000008
+#define NV34TCL_TX_GEN_R_FALSE 0x00000000
+#define NV34TCL_TX_GEN_R_EYE_LINEAR 0x00002400
+#define NV34TCL_TX_GEN_R_OBJECT_LINEAR 0x00002401
+#define NV34TCL_TX_GEN_R_SPHERE_MAP 0x00002402
+#define NV34TCL_TX_GEN_R_NORMAL_MAP 0x00008511
+#define NV34TCL_TX_GEN_R_REFLECTION_MAP 0x00008512
+#define NV34TCL_TX_GEN_Q(x) (0x0000040c+((x)*16))
+#define NV34TCL_TX_GEN_Q__SIZE 0x00000008
+#define NV34TCL_TX_GEN_Q_FALSE 0x00000000
+#define NV34TCL_TX_GEN_Q_EYE_LINEAR 0x00002400
+#define NV34TCL_TX_GEN_Q_OBJECT_LINEAR 0x00002401
+#define NV34TCL_TX_GEN_Q_SPHERE_MAP 0x00002402
+#define NV34TCL_TX_GEN_Q_NORMAL_MAP 0x00008511
+#define NV34TCL_TX_GEN_Q_REFLECTION_MAP 0x00008512
+#define NV34TCL_MODELVIEW_MATRIX(x) (0x00000480+((x)*4))
+#define NV34TCL_MODELVIEW_MATRIX__SIZE 0x00000010
+#define NV34TCL_INVERSE_MODELVIEW_MATRIX(x) (0x00000580+((x)*4))
+#define NV34TCL_INVERSE_MODELVIEW_MATRIX__SIZE 0x0000000c
+#define NV34TCL_PROJECTION_MATRIX(x) (0x00000680+((x)*4))
+#define NV34TCL_PROJECTION_MATRIX__SIZE 0x00000010
+#define NV34TCL_TX0_MATRIX(x) (0x000006c0+((x)*4))
+#define NV34TCL_TX0_MATRIX__SIZE 0x00000010
+#define NV34TCL_TX1_MATRIX(x) (0x00000700+((x)*4))
+#define NV34TCL_TX1_MATRIX__SIZE 0x00000010
+#define NV34TCL_TX2_MATRIX(x) (0x00000740+((x)*4))
+#define NV34TCL_TX2_MATRIX__SIZE 0x00000010
+#define NV34TCL_TX3_MATRIX(x) (0x00000780+((x)*4))
+#define NV34TCL_TX3_MATRIX__SIZE 0x00000010
+#define NV34TCL_TX4_MATRIX(x) (0x000007c0+((x)*4))
+#define NV34TCL_TX4_MATRIX__SIZE 0x00000010
+#define NV34TCL_TX5_MATRIX(x) (0x00000800+((x)*4))
+#define NV34TCL_TX5_MATRIX__SIZE 0x00000010
+#define NV34TCL_TX6_MATRIX(x) (0x00000840+((x)*4))
+#define NV34TCL_TX6_MATRIX__SIZE 0x00000010
+#define NV34TCL_TX7_MATRIX(x) (0x00000880+((x)*4))
+#define NV34TCL_TX7_MATRIX__SIZE 0x00000010
+#define NV34TCL_SCISSOR_HORIZ 0x000008c0
+#define NV34TCL_SCISSOR_HORIZ_X_SHIFT 0
+#define NV34TCL_SCISSOR_HORIZ_X_MASK 0x0000ffff
+#define NV34TCL_SCISSOR_HORIZ_W_SHIFT 16
+#define NV34TCL_SCISSOR_HORIZ_W_MASK 0xffff0000
+#define NV34TCL_SCISSOR_VERT 0x000008c4
+#define NV34TCL_SCISSOR_VERT_Y_SHIFT 0
+#define NV34TCL_SCISSOR_VERT_Y_MASK 0x0000ffff
+#define NV34TCL_SCISSOR_VERT_H_SHIFT 16
+#define NV34TCL_SCISSOR_VERT_H_MASK 0xffff0000
+#define NV34TCL_FOG_COORD_DIST 0x000008c8
+#define NV34TCL_FOG_COORD_DIST_COORD_FALSE 0x00000000
+#define NV34TCL_FOG_COORD_DIST_COORD_FRAGMENT_DEPTH_DISTANCE_EYE_RADIAL_NV 0x00000001
+#define NV34TCL_FOG_COORD_DIST_COORD_FRAGMENT_DEPTH_DISTANCE_EYE_PLANE_ABSOLUTE_NV 0x00000002
+#define NV34TCL_FOG_COORD_DIST_COORD_FOG 0x00000003
+#define NV34TCL_FOG_MODE 0x000008cc
+#define NV34TCL_FOG_MODE_EXP 0x00000800
+#define NV34TCL_FOG_MODE_EXP_2 0x00000802
+#define NV34TCL_FOG_MODE_EXP2 0x00000803
+#define NV34TCL_FOG_MODE_LINEAR 0x00000804
+#define NV34TCL_FOG_MODE_LINEAR_2 0x00002601
+#define NV34TCL_FOG_EQUATION_CONSTANT 0x000008d0
+#define NV34TCL_FOG_EQUATION_LINEAR 0x000008d4
+#define NV34TCL_FOG_EQUATION_QUADRATIC 0x000008d8
+#define NV34TCL_FP_ACTIVE_PROGRAM 0x000008e4
+#define NV34TCL_FP_ACTIVE_PROGRAM_DMA0 (1 << 0)
+#define NV34TCL_FP_ACTIVE_PROGRAM_DMA1 (1 << 1)
+#define NV34TCL_FP_ACTIVE_PROGRAM_OFFSET_SHIFT 2
+#define NV34TCL_FP_ACTIVE_PROGRAM_OFFSET_MASK 0xfffffffc
+#define NV34TCL_RC_COLOR0 0x000008ec
+#define NV34TCL_RC_COLOR0_B_SHIFT 0
+#define NV34TCL_RC_COLOR0_B_MASK 0x000000ff
+#define NV34TCL_RC_COLOR0_G_SHIFT 8
+#define NV34TCL_RC_COLOR0_G_MASK 0x0000ff00
+#define NV34TCL_RC_COLOR0_R_SHIFT 16
+#define NV34TCL_RC_COLOR0_R_MASK 0x00ff0000
+#define NV34TCL_RC_COLOR0_A_SHIFT 24
+#define NV34TCL_RC_COLOR0_A_MASK 0xff000000
+#define NV34TCL_RC_COLOR1 0x000008f0
+#define NV34TCL_RC_COLOR1_B_SHIFT 0
+#define NV34TCL_RC_COLOR1_B_MASK 0x000000ff
+#define NV34TCL_RC_COLOR1_G_SHIFT 8
+#define NV34TCL_RC_COLOR1_G_MASK 0x0000ff00
+#define NV34TCL_RC_COLOR1_R_SHIFT 16
+#define NV34TCL_RC_COLOR1_R_MASK 0x00ff0000
+#define NV34TCL_RC_COLOR1_A_SHIFT 24
+#define NV34TCL_RC_COLOR1_A_MASK 0xff000000
+#define NV34TCL_RC_FINAL0 0x000008f4
+#define NV34TCL_RC_FINAL0_D_INPUT_SHIFT 0
+#define NV34TCL_RC_FINAL0_D_INPUT_MASK 0x0000000f
+#define NV34TCL_RC_FINAL0_D_INPUT_ZERO 0x00000000
+#define NV34TCL_RC_FINAL0_D_INPUT_CONSTANT_COLOR0_NV 0x00000001
+#define NV34TCL_RC_FINAL0_D_INPUT_CONSTANT_COLOR1_NV 0x00000002
+#define NV34TCL_RC_FINAL0_D_INPUT_FOG 0x00000003
+#define NV34TCL_RC_FINAL0_D_INPUT_PRIMARY_COLOR_NV 0x00000004
+#define NV34TCL_RC_FINAL0_D_INPUT_SECONDARY_COLOR_NV 0x00000005
+#define NV34TCL_RC_FINAL0_D_INPUT_TEXTURE0_ARB 0x00000008
+#define NV34TCL_RC_FINAL0_D_INPUT_TEXTURE1_ARB 0x00000009
+#define NV34TCL_RC_FINAL0_D_INPUT_SPARE0_NV 0x0000000c
+#define NV34TCL_RC_FINAL0_D_INPUT_SPARE1_NV 0x0000000d
+#define NV34TCL_RC_FINAL0_D_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0000000e
+#define NV34TCL_RC_FINAL0_D_INPUT_E_TIMES_F_NV 0x0000000f
+#define NV34TCL_RC_FINAL0_D_COMPONENT_USAGE (1 << 4)
+#define NV34TCL_RC_FINAL0_D_COMPONENT_USAGE_RGB 0x00000000
+#define NV34TCL_RC_FINAL0_D_COMPONENT_USAGE_ALPHA 0x00000010
+#define NV34TCL_RC_FINAL0_D_MAPPING_SHIFT 5
+#define NV34TCL_RC_FINAL0_D_MAPPING_MASK 0x000000e0
+#define NV34TCL_RC_FINAL0_D_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV34TCL_RC_FINAL0_D_MAPPING_UNSIGNED_INVERT_NV 0x00000020
+#define NV34TCL_RC_FINAL0_D_MAPPING_EXPAND_NORMAL_NV 0x00000040
+#define NV34TCL_RC_FINAL0_D_MAPPING_EXPAND_NEGATE_NV 0x00000060
+#define NV34TCL_RC_FINAL0_D_MAPPING_HALF_BIAS_NORMAL_NV 0x00000080
+#define NV34TCL_RC_FINAL0_D_MAPPING_HALF_BIAS_NEGATE_NV 0x000000a0
+#define NV34TCL_RC_FINAL0_D_MAPPING_SIGNED_IDENTITY_NV 0x000000c0
+#define NV34TCL_RC_FINAL0_D_MAPPING_SIGNED_NEGATE_NV 0x000000e0
+#define NV34TCL_RC_FINAL0_C_INPUT_SHIFT 8
+#define NV34TCL_RC_FINAL0_C_INPUT_MASK 0x00000f00
+#define NV34TCL_RC_FINAL0_C_INPUT_ZERO 0x00000000
+#define NV34TCL_RC_FINAL0_C_INPUT_CONSTANT_COLOR0_NV 0x00000100
+#define NV34TCL_RC_FINAL0_C_INPUT_CONSTANT_COLOR1_NV 0x00000200
+#define NV34TCL_RC_FINAL0_C_INPUT_FOG 0x00000300
+#define NV34TCL_RC_FINAL0_C_INPUT_PRIMARY_COLOR_NV 0x00000400
+#define NV34TCL_RC_FINAL0_C_INPUT_SECONDARY_COLOR_NV 0x00000500
+#define NV34TCL_RC_FINAL0_C_INPUT_TEXTURE0_ARB 0x00000800
+#define NV34TCL_RC_FINAL0_C_INPUT_TEXTURE1_ARB 0x00000900
+#define NV34TCL_RC_FINAL0_C_INPUT_SPARE0_NV 0x00000c00
+#define NV34TCL_RC_FINAL0_C_INPUT_SPARE1_NV 0x00000d00
+#define NV34TCL_RC_FINAL0_C_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x00000e00
+#define NV34TCL_RC_FINAL0_C_INPUT_E_TIMES_F_NV 0x00000f00
+#define NV34TCL_RC_FINAL0_C_COMPONENT_USAGE (1 << 12)
+#define NV34TCL_RC_FINAL0_C_COMPONENT_USAGE_RGB 0x00000000
+#define NV34TCL_RC_FINAL0_C_COMPONENT_USAGE_ALPHA 0x00001000
+#define NV34TCL_RC_FINAL0_C_MAPPING_SHIFT 13
+#define NV34TCL_RC_FINAL0_C_MAPPING_MASK 0x0000e000
+#define NV34TCL_RC_FINAL0_C_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV34TCL_RC_FINAL0_C_MAPPING_UNSIGNED_INVERT_NV 0x00002000
+#define NV34TCL_RC_FINAL0_C_MAPPING_EXPAND_NORMAL_NV 0x00004000
+#define NV34TCL_RC_FINAL0_C_MAPPING_EXPAND_NEGATE_NV 0x00006000
+#define NV34TCL_RC_FINAL0_C_MAPPING_HALF_BIAS_NORMAL_NV 0x00008000
+#define NV34TCL_RC_FINAL0_C_MAPPING_HALF_BIAS_NEGATE_NV 0x0000a000
+#define NV34TCL_RC_FINAL0_C_MAPPING_SIGNED_IDENTITY_NV 0x0000c000
+#define NV34TCL_RC_FINAL0_C_MAPPING_SIGNED_NEGATE_NV 0x0000e000
+#define NV34TCL_RC_FINAL0_B_INPUT_SHIFT 16
+#define NV34TCL_RC_FINAL0_B_INPUT_MASK 0x000f0000
+#define NV34TCL_RC_FINAL0_B_INPUT_ZERO 0x00000000
+#define NV34TCL_RC_FINAL0_B_INPUT_CONSTANT_COLOR0_NV 0x00010000
+#define NV34TCL_RC_FINAL0_B_INPUT_CONSTANT_COLOR1_NV 0x00020000
+#define NV34TCL_RC_FINAL0_B_INPUT_FOG 0x00030000
+#define NV34TCL_RC_FINAL0_B_INPUT_PRIMARY_COLOR_NV 0x00040000
+#define NV34TCL_RC_FINAL0_B_INPUT_SECONDARY_COLOR_NV 0x00050000
+#define NV34TCL_RC_FINAL0_B_INPUT_TEXTURE0_ARB 0x00080000
+#define NV34TCL_RC_FINAL0_B_INPUT_TEXTURE1_ARB 0x00090000
+#define NV34TCL_RC_FINAL0_B_INPUT_SPARE0_NV 0x000c0000
+#define NV34TCL_RC_FINAL0_B_INPUT_SPARE1_NV 0x000d0000
+#define NV34TCL_RC_FINAL0_B_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x000e0000
+#define NV34TCL_RC_FINAL0_B_INPUT_E_TIMES_F_NV 0x000f0000
+#define NV34TCL_RC_FINAL0_B_COMPONENT_USAGE (1 << 20)
+#define NV34TCL_RC_FINAL0_B_COMPONENT_USAGE_RGB 0x00000000
+#define NV34TCL_RC_FINAL0_B_COMPONENT_USAGE_ALPHA 0x00100000
+#define NV34TCL_RC_FINAL0_B_MAPPING_SHIFT 21
+#define NV34TCL_RC_FINAL0_B_MAPPING_MASK 0x00e00000
+#define NV34TCL_RC_FINAL0_B_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV34TCL_RC_FINAL0_B_MAPPING_UNSIGNED_INVERT_NV 0x00200000
+#define NV34TCL_RC_FINAL0_B_MAPPING_EXPAND_NORMAL_NV 0x00400000
+#define NV34TCL_RC_FINAL0_B_MAPPING_EXPAND_NEGATE_NV 0x00600000
+#define NV34TCL_RC_FINAL0_B_MAPPING_HALF_BIAS_NORMAL_NV 0x00800000
+#define NV34TCL_RC_FINAL0_B_MAPPING_HALF_BIAS_NEGATE_NV 0x00a00000
+#define NV34TCL_RC_FINAL0_B_MAPPING_SIGNED_IDENTITY_NV 0x00c00000
+#define NV34TCL_RC_FINAL0_B_MAPPING_SIGNED_NEGATE_NV 0x00e00000
+#define NV34TCL_RC_FINAL0_A_INPUT_SHIFT 24
+#define NV34TCL_RC_FINAL0_A_INPUT_MASK 0x0f000000
+#define NV34TCL_RC_FINAL0_A_INPUT_ZERO 0x00000000
+#define NV34TCL_RC_FINAL0_A_INPUT_CONSTANT_COLOR0_NV 0x01000000
+#define NV34TCL_RC_FINAL0_A_INPUT_CONSTANT_COLOR1_NV 0x02000000
+#define NV34TCL_RC_FINAL0_A_INPUT_FOG 0x03000000
+#define NV34TCL_RC_FINAL0_A_INPUT_PRIMARY_COLOR_NV 0x04000000
+#define NV34TCL_RC_FINAL0_A_INPUT_SECONDARY_COLOR_NV 0x05000000
+#define NV34TCL_RC_FINAL0_A_INPUT_TEXTURE0_ARB 0x08000000
+#define NV34TCL_RC_FINAL0_A_INPUT_TEXTURE1_ARB 0x09000000
+#define NV34TCL_RC_FINAL0_A_INPUT_SPARE0_NV 0x0c000000
+#define NV34TCL_RC_FINAL0_A_INPUT_SPARE1_NV 0x0d000000
+#define NV34TCL_RC_FINAL0_A_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0e000000
+#define NV34TCL_RC_FINAL0_A_INPUT_E_TIMES_F_NV 0x0f000000
+#define NV34TCL_RC_FINAL0_A_COMPONENT_USAGE (1 << 28)
+#define NV34TCL_RC_FINAL0_A_COMPONENT_USAGE_RGB 0x00000000
+#define NV34TCL_RC_FINAL0_A_COMPONENT_USAGE_ALPHA 0x10000000
+#define NV34TCL_RC_FINAL0_A_MAPPING_SHIFT 29
+#define NV34TCL_RC_FINAL0_A_MAPPING_MASK 0xe0000000
+#define NV34TCL_RC_FINAL0_A_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV34TCL_RC_FINAL0_A_MAPPING_UNSIGNED_INVERT_NV 0x20000000
+#define NV34TCL_RC_FINAL0_A_MAPPING_EXPAND_NORMAL_NV 0x40000000
+#define NV34TCL_RC_FINAL0_A_MAPPING_EXPAND_NEGATE_NV 0x60000000
+#define NV34TCL_RC_FINAL0_A_MAPPING_HALF_BIAS_NORMAL_NV 0x80000000
+#define NV34TCL_RC_FINAL0_A_MAPPING_HALF_BIAS_NEGATE_NV 0xa0000000
+#define NV34TCL_RC_FINAL0_A_MAPPING_SIGNED_IDENTITY_NV 0xc0000000
+#define NV34TCL_RC_FINAL0_A_MAPPING_SIGNED_NEGATE_NV 0xe0000000
+#define NV34TCL_RC_FINAL1 0x000008f8
+#define NV34TCL_RC_FINAL1_COLOR_SUM_CLAMP (1 << 7)
+#define NV34TCL_RC_FINAL1_G_INPUT_SHIFT 8
+#define NV34TCL_RC_FINAL1_G_INPUT_MASK 0x00000f00
+#define NV34TCL_RC_FINAL1_G_INPUT_ZERO 0x00000000
+#define NV34TCL_RC_FINAL1_G_INPUT_CONSTANT_COLOR0_NV 0x00000100
+#define NV34TCL_RC_FINAL1_G_INPUT_CONSTANT_COLOR1_NV 0x00000200
+#define NV34TCL_RC_FINAL1_G_INPUT_FOG 0x00000300
+#define NV34TCL_RC_FINAL1_G_INPUT_PRIMARY_COLOR_NV 0x00000400
+#define NV34TCL_RC_FINAL1_G_INPUT_SECONDARY_COLOR_NV 0x00000500
+#define NV34TCL_RC_FINAL1_G_INPUT_TEXTURE0_ARB 0x00000800
+#define NV34TCL_RC_FINAL1_G_INPUT_TEXTURE1_ARB 0x00000900
+#define NV34TCL_RC_FINAL1_G_INPUT_SPARE0_NV 0x00000c00
+#define NV34TCL_RC_FINAL1_G_INPUT_SPARE1_NV 0x00000d00
+#define NV34TCL_RC_FINAL1_G_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x00000e00
+#define NV34TCL_RC_FINAL1_G_INPUT_E_TIMES_F_NV 0x00000f00
+#define NV34TCL_RC_FINAL1_G_COMPONENT_USAGE (1 << 12)
+#define NV34TCL_RC_FINAL1_G_COMPONENT_USAGE_RGB 0x00000000
+#define NV34TCL_RC_FINAL1_G_COMPONENT_USAGE_ALPHA 0x00001000
+#define NV34TCL_RC_FINAL1_G_MAPPING_SHIFT 13
+#define NV34TCL_RC_FINAL1_G_MAPPING_MASK 0x0000e000
+#define NV34TCL_RC_FINAL1_G_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV34TCL_RC_FINAL1_G_MAPPING_UNSIGNED_INVERT_NV 0x00002000
+#define NV34TCL_RC_FINAL1_G_MAPPING_EXPAND_NORMAL_NV 0x00004000
+#define NV34TCL_RC_FINAL1_G_MAPPING_EXPAND_NEGATE_NV 0x00006000
+#define NV34TCL_RC_FINAL1_G_MAPPING_HALF_BIAS_NORMAL_NV 0x00008000
+#define NV34TCL_RC_FINAL1_G_MAPPING_HALF_BIAS_NEGATE_NV 0x0000a000
+#define NV34TCL_RC_FINAL1_G_MAPPING_SIGNED_IDENTITY_NV 0x0000c000
+#define NV34TCL_RC_FINAL1_G_MAPPING_SIGNED_NEGATE_NV 0x0000e000
+#define NV34TCL_RC_FINAL1_F_INPUT_SHIFT 16
+#define NV34TCL_RC_FINAL1_F_INPUT_MASK 0x000f0000
+#define NV34TCL_RC_FINAL1_F_INPUT_ZERO 0x00000000
+#define NV34TCL_RC_FINAL1_F_INPUT_CONSTANT_COLOR0_NV 0x00010000
+#define NV34TCL_RC_FINAL1_F_INPUT_CONSTANT_COLOR1_NV 0x00020000
+#define NV34TCL_RC_FINAL1_F_INPUT_FOG 0x00030000
+#define NV34TCL_RC_FINAL1_F_INPUT_PRIMARY_COLOR_NV 0x00040000
+#define NV34TCL_RC_FINAL1_F_INPUT_SECONDARY_COLOR_NV 0x00050000
+#define NV34TCL_RC_FINAL1_F_INPUT_TEXTURE0_ARB 0x00080000
+#define NV34TCL_RC_FINAL1_F_INPUT_TEXTURE1_ARB 0x00090000
+#define NV34TCL_RC_FINAL1_F_INPUT_SPARE0_NV 0x000c0000
+#define NV34TCL_RC_FINAL1_F_INPUT_SPARE1_NV 0x000d0000
+#define NV34TCL_RC_FINAL1_F_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x000e0000
+#define NV34TCL_RC_FINAL1_F_INPUT_E_TIMES_F_NV 0x000f0000
+#define NV34TCL_RC_FINAL1_F_COMPONENT_USAGE (1 << 20)
+#define NV34TCL_RC_FINAL1_F_COMPONENT_USAGE_RGB 0x00000000
+#define NV34TCL_RC_FINAL1_F_COMPONENT_USAGE_ALPHA 0x00100000
+#define NV34TCL_RC_FINAL1_F_MAPPING_SHIFT 21
+#define NV34TCL_RC_FINAL1_F_MAPPING_MASK 0x00e00000
+#define NV34TCL_RC_FINAL1_F_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV34TCL_RC_FINAL1_F_MAPPING_UNSIGNED_INVERT_NV 0x00200000
+#define NV34TCL_RC_FINAL1_F_MAPPING_EXPAND_NORMAL_NV 0x00400000
+#define NV34TCL_RC_FINAL1_F_MAPPING_EXPAND_NEGATE_NV 0x00600000
+#define NV34TCL_RC_FINAL1_F_MAPPING_HALF_BIAS_NORMAL_NV 0x00800000
+#define NV34TCL_RC_FINAL1_F_MAPPING_HALF_BIAS_NEGATE_NV 0x00a00000
+#define NV34TCL_RC_FINAL1_F_MAPPING_SIGNED_IDENTITY_NV 0x00c00000
+#define NV34TCL_RC_FINAL1_F_MAPPING_SIGNED_NEGATE_NV 0x00e00000
+#define NV34TCL_RC_FINAL1_E_INPUT_SHIFT 24
+#define NV34TCL_RC_FINAL1_E_INPUT_MASK 0x0f000000
+#define NV34TCL_RC_FINAL1_E_INPUT_ZERO 0x00000000
+#define NV34TCL_RC_FINAL1_E_INPUT_CONSTANT_COLOR0_NV 0x01000000
+#define NV34TCL_RC_FINAL1_E_INPUT_CONSTANT_COLOR1_NV 0x02000000
+#define NV34TCL_RC_FINAL1_E_INPUT_FOG 0x03000000
+#define NV34TCL_RC_FINAL1_E_INPUT_PRIMARY_COLOR_NV 0x04000000
+#define NV34TCL_RC_FINAL1_E_INPUT_SECONDARY_COLOR_NV 0x05000000
+#define NV34TCL_RC_FINAL1_E_INPUT_TEXTURE0_ARB 0x08000000
+#define NV34TCL_RC_FINAL1_E_INPUT_TEXTURE1_ARB 0x09000000
+#define NV34TCL_RC_FINAL1_E_INPUT_SPARE0_NV 0x0c000000
+#define NV34TCL_RC_FINAL1_E_INPUT_SPARE1_NV 0x0d000000
+#define NV34TCL_RC_FINAL1_E_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0e000000
+#define NV34TCL_RC_FINAL1_E_INPUT_E_TIMES_F_NV 0x0f000000
+#define NV34TCL_RC_FINAL1_E_COMPONENT_USAGE (1 << 28)
+#define NV34TCL_RC_FINAL1_E_COMPONENT_USAGE_RGB 0x00000000
+#define NV34TCL_RC_FINAL1_E_COMPONENT_USAGE_ALPHA 0x10000000
+#define NV34TCL_RC_FINAL1_E_MAPPING_SHIFT 29
+#define NV34TCL_RC_FINAL1_E_MAPPING_MASK 0xe0000000
+#define NV34TCL_RC_FINAL1_E_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV34TCL_RC_FINAL1_E_MAPPING_UNSIGNED_INVERT_NV 0x20000000
+#define NV34TCL_RC_FINAL1_E_MAPPING_EXPAND_NORMAL_NV 0x40000000
+#define NV34TCL_RC_FINAL1_E_MAPPING_EXPAND_NEGATE_NV 0x60000000
+#define NV34TCL_RC_FINAL1_E_MAPPING_HALF_BIAS_NORMAL_NV 0x80000000
+#define NV34TCL_RC_FINAL1_E_MAPPING_HALF_BIAS_NEGATE_NV 0xa0000000
+#define NV34TCL_RC_FINAL1_E_MAPPING_SIGNED_IDENTITY_NV 0xc0000000
+#define NV34TCL_RC_FINAL1_E_MAPPING_SIGNED_NEGATE_NV 0xe0000000
+#define NV34TCL_RC_ENABLE 0x000008fc
+#define NV34TCL_RC_ENABLE_NUM_COMBINERS_SHIFT 0
+#define NV34TCL_RC_ENABLE_NUM_COMBINERS_MASK 0x0000000f
+#define NV34TCL_RC_ENABLE_STAGE_CONSTANT_COLOR0_SHIFT 12
+#define NV34TCL_RC_ENABLE_STAGE_CONSTANT_COLOR0_MASK 0x0000f000
+#define NV34TCL_RC_ENABLE_STAGE_CONSTANT_COLOR1_SHIFT 16
+#define NV34TCL_RC_ENABLE_STAGE_CONSTANT_COLOR1_MASK 0x000f0000
+#define NV34TCL_RC_IN_ALPHA(x) (0x00000900+((x)*32))
+#define NV34TCL_RC_IN_ALPHA__SIZE 0x00000008
+#define NV34TCL_RC_IN_ALPHA_D_INPUT_SHIFT 0
+#define NV34TCL_RC_IN_ALPHA_D_INPUT_MASK 0x0000000f
+#define NV34TCL_RC_IN_ALPHA_D_INPUT_ZERO 0x00000000
+#define NV34TCL_RC_IN_ALPHA_D_INPUT_CONSTANT_COLOR0_NV 0x00000001
+#define NV34TCL_RC_IN_ALPHA_D_INPUT_CONSTANT_COLOR1_NV 0x00000002
+#define NV34TCL_RC_IN_ALPHA_D_INPUT_FOG 0x00000003
+#define NV34TCL_RC_IN_ALPHA_D_INPUT_PRIMARY_COLOR_NV 0x00000004
+#define NV34TCL_RC_IN_ALPHA_D_INPUT_SECONDARY_COLOR_NV 0x00000005
+#define NV34TCL_RC_IN_ALPHA_D_INPUT_TEXTURE0_ARB 0x00000008
+#define NV34TCL_RC_IN_ALPHA_D_INPUT_TEXTURE1_ARB 0x00000009
+#define NV34TCL_RC_IN_ALPHA_D_INPUT_SPARE0_NV 0x0000000c
+#define NV34TCL_RC_IN_ALPHA_D_INPUT_SPARE1_NV 0x0000000d
+#define NV34TCL_RC_IN_ALPHA_D_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0000000e
+#define NV34TCL_RC_IN_ALPHA_D_INPUT_E_TIMES_F_NV 0x0000000f
+#define NV34TCL_RC_IN_ALPHA_D_COMPONENT_USAGE (1 << 4)
+#define NV34TCL_RC_IN_ALPHA_D_COMPONENT_USAGE_BLUE 0x00000000
+#define NV34TCL_RC_IN_ALPHA_D_COMPONENT_USAGE_ALPHA 0x00000010
+#define NV34TCL_RC_IN_ALPHA_D_MAPPING_SHIFT 5
+#define NV34TCL_RC_IN_ALPHA_D_MAPPING_MASK 0x000000e0
+#define NV34TCL_RC_IN_ALPHA_D_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV34TCL_RC_IN_ALPHA_D_MAPPING_UNSIGNED_INVERT_NV 0x00000020
+#define NV34TCL_RC_IN_ALPHA_D_MAPPING_EXPAND_NORMAL_NV 0x00000040
+#define NV34TCL_RC_IN_ALPHA_D_MAPPING_EXPAND_NEGATE_NV 0x00000060
+#define NV34TCL_RC_IN_ALPHA_D_MAPPING_HALF_BIAS_NORMAL_NV 0x00000080
+#define NV34TCL_RC_IN_ALPHA_D_MAPPING_HALF_BIAS_NEGATE_NV 0x000000a0
+#define NV34TCL_RC_IN_ALPHA_D_MAPPING_SIGNED_IDENTITY_NV 0x000000c0
+#define NV34TCL_RC_IN_ALPHA_D_MAPPING_SIGNED_NEGATE_NV 0x000000e0
+#define NV34TCL_RC_IN_ALPHA_C_INPUT_SHIFT 8
+#define NV34TCL_RC_IN_ALPHA_C_INPUT_MASK 0x00000f00
+#define NV34TCL_RC_IN_ALPHA_C_INPUT_ZERO 0x00000000
+#define NV34TCL_RC_IN_ALPHA_C_INPUT_CONSTANT_COLOR0_NV 0x00000100
+#define NV34TCL_RC_IN_ALPHA_C_INPUT_CONSTANT_COLOR1_NV 0x00000200
+#define NV34TCL_RC_IN_ALPHA_C_INPUT_FOG 0x00000300
+#define NV34TCL_RC_IN_ALPHA_C_INPUT_PRIMARY_COLOR_NV 0x00000400
+#define NV34TCL_RC_IN_ALPHA_C_INPUT_SECONDARY_COLOR_NV 0x00000500
+#define NV34TCL_RC_IN_ALPHA_C_INPUT_TEXTURE0_ARB 0x00000800
+#define NV34TCL_RC_IN_ALPHA_C_INPUT_TEXTURE1_ARB 0x00000900
+#define NV34TCL_RC_IN_ALPHA_C_INPUT_SPARE0_NV 0x00000c00
+#define NV34TCL_RC_IN_ALPHA_C_INPUT_SPARE1_NV 0x00000d00
+#define NV34TCL_RC_IN_ALPHA_C_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x00000e00
+#define NV34TCL_RC_IN_ALPHA_C_INPUT_E_TIMES_F_NV 0x00000f00
+#define NV34TCL_RC_IN_ALPHA_C_COMPONENT_USAGE (1 << 12)
+#define NV34TCL_RC_IN_ALPHA_C_COMPONENT_USAGE_BLUE 0x00000000
+#define NV34TCL_RC_IN_ALPHA_C_COMPONENT_USAGE_ALPHA 0x00001000
+#define NV34TCL_RC_IN_ALPHA_C_MAPPING_SHIFT 13
+#define NV34TCL_RC_IN_ALPHA_C_MAPPING_MASK 0x0000e000
+#define NV34TCL_RC_IN_ALPHA_C_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV34TCL_RC_IN_ALPHA_C_MAPPING_UNSIGNED_INVERT_NV 0x00002000
+#define NV34TCL_RC_IN_ALPHA_C_MAPPING_EXPAND_NORMAL_NV 0x00004000
+#define NV34TCL_RC_IN_ALPHA_C_MAPPING_EXPAND_NEGATE_NV 0x00006000
+#define NV34TCL_RC_IN_ALPHA_C_MAPPING_HALF_BIAS_NORMAL_NV 0x00008000
+#define NV34TCL_RC_IN_ALPHA_C_MAPPING_HALF_BIAS_NEGATE_NV 0x0000a000
+#define NV34TCL_RC_IN_ALPHA_C_MAPPING_SIGNED_IDENTITY_NV 0x0000c000
+#define NV34TCL_RC_IN_ALPHA_C_MAPPING_SIGNED_NEGATE_NV 0x0000e000
+#define NV34TCL_RC_IN_ALPHA_B_INPUT_SHIFT 16
+#define NV34TCL_RC_IN_ALPHA_B_INPUT_MASK 0x000f0000
+#define NV34TCL_RC_IN_ALPHA_B_INPUT_ZERO 0x00000000
+#define NV34TCL_RC_IN_ALPHA_B_INPUT_CONSTANT_COLOR0_NV 0x00010000
+#define NV34TCL_RC_IN_ALPHA_B_INPUT_CONSTANT_COLOR1_NV 0x00020000
+#define NV34TCL_RC_IN_ALPHA_B_INPUT_FOG 0x00030000
+#define NV34TCL_RC_IN_ALPHA_B_INPUT_PRIMARY_COLOR_NV 0x00040000
+#define NV34TCL_RC_IN_ALPHA_B_INPUT_SECONDARY_COLOR_NV 0x00050000
+#define NV34TCL_RC_IN_ALPHA_B_INPUT_TEXTURE0_ARB 0x00080000
+#define NV34TCL_RC_IN_ALPHA_B_INPUT_TEXTURE1_ARB 0x00090000
+#define NV34TCL_RC_IN_ALPHA_B_INPUT_SPARE0_NV 0x000c0000
+#define NV34TCL_RC_IN_ALPHA_B_INPUT_SPARE1_NV 0x000d0000
+#define NV34TCL_RC_IN_ALPHA_B_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x000e0000
+#define NV34TCL_RC_IN_ALPHA_B_INPUT_E_TIMES_F_NV 0x000f0000
+#define NV34TCL_RC_IN_ALPHA_B_COMPONENT_USAGE (1 << 20)
+#define NV34TCL_RC_IN_ALPHA_B_COMPONENT_USAGE_BLUE 0x00000000
+#define NV34TCL_RC_IN_ALPHA_B_COMPONENT_USAGE_ALPHA 0x00100000
+#define NV34TCL_RC_IN_ALPHA_B_MAPPING_SHIFT 21
+#define NV34TCL_RC_IN_ALPHA_B_MAPPING_MASK 0x00e00000
+#define NV34TCL_RC_IN_ALPHA_B_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV34TCL_RC_IN_ALPHA_B_MAPPING_UNSIGNED_INVERT_NV 0x00200000
+#define NV34TCL_RC_IN_ALPHA_B_MAPPING_EXPAND_NORMAL_NV 0x00400000
+#define NV34TCL_RC_IN_ALPHA_B_MAPPING_EXPAND_NEGATE_NV 0x00600000
+#define NV34TCL_RC_IN_ALPHA_B_MAPPING_HALF_BIAS_NORMAL_NV 0x00800000
+#define NV34TCL_RC_IN_ALPHA_B_MAPPING_HALF_BIAS_NEGATE_NV 0x00a00000
+#define NV34TCL_RC_IN_ALPHA_B_MAPPING_SIGNED_IDENTITY_NV 0x00c00000
+#define NV34TCL_RC_IN_ALPHA_B_MAPPING_SIGNED_NEGATE_NV 0x00e00000
+#define NV34TCL_RC_IN_ALPHA_A_INPUT_SHIFT 24
+#define NV34TCL_RC_IN_ALPHA_A_INPUT_MASK 0x0f000000
+#define NV34TCL_RC_IN_ALPHA_A_INPUT_ZERO 0x00000000
+#define NV34TCL_RC_IN_ALPHA_A_INPUT_CONSTANT_COLOR0_NV 0x01000000
+#define NV34TCL_RC_IN_ALPHA_A_INPUT_CONSTANT_COLOR1_NV 0x02000000
+#define NV34TCL_RC_IN_ALPHA_A_INPUT_FOG 0x03000000
+#define NV34TCL_RC_IN_ALPHA_A_INPUT_PRIMARY_COLOR_NV 0x04000000
+#define NV34TCL_RC_IN_ALPHA_A_INPUT_SECONDARY_COLOR_NV 0x05000000
+#define NV34TCL_RC_IN_ALPHA_A_INPUT_TEXTURE0_ARB 0x08000000
+#define NV34TCL_RC_IN_ALPHA_A_INPUT_TEXTURE1_ARB 0x09000000
+#define NV34TCL_RC_IN_ALPHA_A_INPUT_SPARE0_NV 0x0c000000
+#define NV34TCL_RC_IN_ALPHA_A_INPUT_SPARE1_NV 0x0d000000
+#define NV34TCL_RC_IN_ALPHA_A_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0e000000
+#define NV34TCL_RC_IN_ALPHA_A_INPUT_E_TIMES_F_NV 0x0f000000
+#define NV34TCL_RC_IN_ALPHA_A_COMPONENT_USAGE (1 << 28)
+#define NV34TCL_RC_IN_ALPHA_A_COMPONENT_USAGE_BLUE 0x00000000
+#define NV34TCL_RC_IN_ALPHA_A_COMPONENT_USAGE_ALPHA 0x10000000
+#define NV34TCL_RC_IN_ALPHA_A_MAPPING_SHIFT 29
+#define NV34TCL_RC_IN_ALPHA_A_MAPPING_MASK 0xe0000000
+#define NV34TCL_RC_IN_ALPHA_A_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV34TCL_RC_IN_ALPHA_A_MAPPING_UNSIGNED_INVERT_NV 0x20000000
+#define NV34TCL_RC_IN_ALPHA_A_MAPPING_EXPAND_NORMAL_NV 0x40000000
+#define NV34TCL_RC_IN_ALPHA_A_MAPPING_EXPAND_NEGATE_NV 0x60000000
+#define NV34TCL_RC_IN_ALPHA_A_MAPPING_HALF_BIAS_NORMAL_NV 0x80000000
+#define NV34TCL_RC_IN_ALPHA_A_MAPPING_HALF_BIAS_NEGATE_NV 0xa0000000
+#define NV34TCL_RC_IN_ALPHA_A_MAPPING_SIGNED_IDENTITY_NV 0xc0000000
+#define NV34TCL_RC_IN_ALPHA_A_MAPPING_SIGNED_NEGATE_NV 0xe0000000
+#define NV34TCL_RC_IN_RGB(x) (0x00000904+((x)*32))
+#define NV34TCL_RC_IN_RGB__SIZE 0x00000008
+#define NV34TCL_RC_IN_RGB_D_INPUT_SHIFT 0
+#define NV34TCL_RC_IN_RGB_D_INPUT_MASK 0x0000000f
+#define NV34TCL_RC_IN_RGB_D_INPUT_ZERO 0x00000000
+#define NV34TCL_RC_IN_RGB_D_INPUT_CONSTANT_COLOR0_NV 0x00000001
+#define NV34TCL_RC_IN_RGB_D_INPUT_CONSTANT_COLOR1_NV 0x00000002
+#define NV34TCL_RC_IN_RGB_D_INPUT_FOG 0x00000003
+#define NV34TCL_RC_IN_RGB_D_INPUT_PRIMARY_COLOR_NV 0x00000004
+#define NV34TCL_RC_IN_RGB_D_INPUT_SECONDARY_COLOR_NV 0x00000005
+#define NV34TCL_RC_IN_RGB_D_INPUT_TEXTURE0_ARB 0x00000008
+#define NV34TCL_RC_IN_RGB_D_INPUT_TEXTURE1_ARB 0x00000009
+#define NV34TCL_RC_IN_RGB_D_INPUT_SPARE0_NV 0x0000000c
+#define NV34TCL_RC_IN_RGB_D_INPUT_SPARE1_NV 0x0000000d
+#define NV34TCL_RC_IN_RGB_D_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0000000e
+#define NV34TCL_RC_IN_RGB_D_INPUT_E_TIMES_F_NV 0x0000000f
+#define NV34TCL_RC_IN_RGB_D_COMPONENT_USAGE (1 << 4)
+#define NV34TCL_RC_IN_RGB_D_COMPONENT_USAGE_RGB 0x00000000
+#define NV34TCL_RC_IN_RGB_D_COMPONENT_USAGE_ALPHA 0x00000010
+#define NV34TCL_RC_IN_RGB_D_MAPPING_SHIFT 5
+#define NV34TCL_RC_IN_RGB_D_MAPPING_MASK 0x000000e0
+#define NV34TCL_RC_IN_RGB_D_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV34TCL_RC_IN_RGB_D_MAPPING_UNSIGNED_INVERT_NV 0x00000020
+#define NV34TCL_RC_IN_RGB_D_MAPPING_EXPAND_NORMAL_NV 0x00000040
+#define NV34TCL_RC_IN_RGB_D_MAPPING_EXPAND_NEGATE_NV 0x00000060
+#define NV34TCL_RC_IN_RGB_D_MAPPING_HALF_BIAS_NORMAL_NV 0x00000080
+#define NV34TCL_RC_IN_RGB_D_MAPPING_HALF_BIAS_NEGATE_NV 0x000000a0
+#define NV34TCL_RC_IN_RGB_D_MAPPING_SIGNED_IDENTITY_NV 0x000000c0
+#define NV34TCL_RC_IN_RGB_D_MAPPING_SIGNED_NEGATE_NV 0x000000e0
+#define NV34TCL_RC_IN_RGB_C_INPUT_SHIFT 8
+#define NV34TCL_RC_IN_RGB_C_INPUT_MASK 0x00000f00
+#define NV34TCL_RC_IN_RGB_C_INPUT_ZERO 0x00000000
+#define NV34TCL_RC_IN_RGB_C_INPUT_CONSTANT_COLOR0_NV 0x00000100
+#define NV34TCL_RC_IN_RGB_C_INPUT_CONSTANT_COLOR1_NV 0x00000200
+#define NV34TCL_RC_IN_RGB_C_INPUT_FOG 0x00000300
+#define NV34TCL_RC_IN_RGB_C_INPUT_PRIMARY_COLOR_NV 0x00000400
+#define NV34TCL_RC_IN_RGB_C_INPUT_SECONDARY_COLOR_NV 0x00000500
+#define NV34TCL_RC_IN_RGB_C_INPUT_TEXTURE0_ARB 0x00000800
+#define NV34TCL_RC_IN_RGB_C_INPUT_TEXTURE1_ARB 0x00000900
+#define NV34TCL_RC_IN_RGB_C_INPUT_SPARE0_NV 0x00000c00
+#define NV34TCL_RC_IN_RGB_C_INPUT_SPARE1_NV 0x00000d00
+#define NV34TCL_RC_IN_RGB_C_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x00000e00
+#define NV34TCL_RC_IN_RGB_C_INPUT_E_TIMES_F_NV 0x00000f00
+#define NV34TCL_RC_IN_RGB_C_COMPONENT_USAGE (1 << 12)
+#define NV34TCL_RC_IN_RGB_C_COMPONENT_USAGE_RGB 0x00000000
+#define NV34TCL_RC_IN_RGB_C_COMPONENT_USAGE_ALPHA 0x00001000
+#define NV34TCL_RC_IN_RGB_C_MAPPING_SHIFT 13
+#define NV34TCL_RC_IN_RGB_C_MAPPING_MASK 0x0000e000
+#define NV34TCL_RC_IN_RGB_C_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV34TCL_RC_IN_RGB_C_MAPPING_UNSIGNED_INVERT_NV 0x00002000
+#define NV34TCL_RC_IN_RGB_C_MAPPING_EXPAND_NORMAL_NV 0x00004000
+#define NV34TCL_RC_IN_RGB_C_MAPPING_EXPAND_NEGATE_NV 0x00006000
+#define NV34TCL_RC_IN_RGB_C_MAPPING_HALF_BIAS_NORMAL_NV 0x00008000
+#define NV34TCL_RC_IN_RGB_C_MAPPING_HALF_BIAS_NEGATE_NV 0x0000a000
+#define NV34TCL_RC_IN_RGB_C_MAPPING_SIGNED_IDENTITY_NV 0x0000c000
+#define NV34TCL_RC_IN_RGB_C_MAPPING_SIGNED_NEGATE_NV 0x0000e000
+#define NV34TCL_RC_IN_RGB_B_INPUT_SHIFT 16
+#define NV34TCL_RC_IN_RGB_B_INPUT_MASK 0x000f0000
+#define NV34TCL_RC_IN_RGB_B_INPUT_ZERO 0x00000000
+#define NV34TCL_RC_IN_RGB_B_INPUT_CONSTANT_COLOR0_NV 0x00010000
+#define NV34TCL_RC_IN_RGB_B_INPUT_CONSTANT_COLOR1_NV 0x00020000
+#define NV34TCL_RC_IN_RGB_B_INPUT_FOG 0x00030000
+#define NV34TCL_RC_IN_RGB_B_INPUT_PRIMARY_COLOR_NV 0x00040000
+#define NV34TCL_RC_IN_RGB_B_INPUT_SECONDARY_COLOR_NV 0x00050000
+#define NV34TCL_RC_IN_RGB_B_INPUT_TEXTURE0_ARB 0x00080000
+#define NV34TCL_RC_IN_RGB_B_INPUT_TEXTURE1_ARB 0x00090000
+#define NV34TCL_RC_IN_RGB_B_INPUT_SPARE0_NV 0x000c0000
+#define NV34TCL_RC_IN_RGB_B_INPUT_SPARE1_NV 0x000d0000
+#define NV34TCL_RC_IN_RGB_B_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x000e0000
+#define NV34TCL_RC_IN_RGB_B_INPUT_E_TIMES_F_NV 0x000f0000
+#define NV34TCL_RC_IN_RGB_B_COMPONENT_USAGE (1 << 20)
+#define NV34TCL_RC_IN_RGB_B_COMPONENT_USAGE_RGB 0x00000000
+#define NV34TCL_RC_IN_RGB_B_COMPONENT_USAGE_ALPHA 0x00100000
+#define NV34TCL_RC_IN_RGB_B_MAPPING_SHIFT 21
+#define NV34TCL_RC_IN_RGB_B_MAPPING_MASK 0x00e00000
+#define NV34TCL_RC_IN_RGB_B_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV34TCL_RC_IN_RGB_B_MAPPING_UNSIGNED_INVERT_NV 0x00200000
+#define NV34TCL_RC_IN_RGB_B_MAPPING_EXPAND_NORMAL_NV 0x00400000
+#define NV34TCL_RC_IN_RGB_B_MAPPING_EXPAND_NEGATE_NV 0x00600000
+#define NV34TCL_RC_IN_RGB_B_MAPPING_HALF_BIAS_NORMAL_NV 0x00800000
+#define NV34TCL_RC_IN_RGB_B_MAPPING_HALF_BIAS_NEGATE_NV 0x00a00000
+#define NV34TCL_RC_IN_RGB_B_MAPPING_SIGNED_IDENTITY_NV 0x00c00000
+#define NV34TCL_RC_IN_RGB_B_MAPPING_SIGNED_NEGATE_NV 0x00e00000
+#define NV34TCL_RC_IN_RGB_A_INPUT_SHIFT 24
+#define NV34TCL_RC_IN_RGB_A_INPUT_MASK 0x0f000000
+#define NV34TCL_RC_IN_RGB_A_INPUT_ZERO 0x00000000
+#define NV34TCL_RC_IN_RGB_A_INPUT_CONSTANT_COLOR0_NV 0x01000000
+#define NV34TCL_RC_IN_RGB_A_INPUT_CONSTANT_COLOR1_NV 0x02000000
+#define NV34TCL_RC_IN_RGB_A_INPUT_FOG 0x03000000
+#define NV34TCL_RC_IN_RGB_A_INPUT_PRIMARY_COLOR_NV 0x04000000
+#define NV34TCL_RC_IN_RGB_A_INPUT_SECONDARY_COLOR_NV 0x05000000
+#define NV34TCL_RC_IN_RGB_A_INPUT_TEXTURE0_ARB 0x08000000
+#define NV34TCL_RC_IN_RGB_A_INPUT_TEXTURE1_ARB 0x09000000
+#define NV34TCL_RC_IN_RGB_A_INPUT_SPARE0_NV 0x0c000000
+#define NV34TCL_RC_IN_RGB_A_INPUT_SPARE1_NV 0x0d000000
+#define NV34TCL_RC_IN_RGB_A_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0e000000
+#define NV34TCL_RC_IN_RGB_A_INPUT_E_TIMES_F_NV 0x0f000000
+#define NV34TCL_RC_IN_RGB_A_COMPONENT_USAGE (1 << 28)
+#define NV34TCL_RC_IN_RGB_A_COMPONENT_USAGE_RGB 0x00000000
+#define NV34TCL_RC_IN_RGB_A_COMPONENT_USAGE_ALPHA 0x10000000
+#define NV34TCL_RC_IN_RGB_A_MAPPING_SHIFT 29
+#define NV34TCL_RC_IN_RGB_A_MAPPING_MASK 0xe0000000
+#define NV34TCL_RC_IN_RGB_A_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000
+#define NV34TCL_RC_IN_RGB_A_MAPPING_UNSIGNED_INVERT_NV 0x20000000
+#define NV34TCL_RC_IN_RGB_A_MAPPING_EXPAND_NORMAL_NV 0x40000000
+#define NV34TCL_RC_IN_RGB_A_MAPPING_EXPAND_NEGATE_NV 0x60000000
+#define NV34TCL_RC_IN_RGB_A_MAPPING_HALF_BIAS_NORMAL_NV 0x80000000
+#define NV34TCL_RC_IN_RGB_A_MAPPING_HALF_BIAS_NEGATE_NV 0xa0000000
+#define NV34TCL_RC_IN_RGB_A_MAPPING_SIGNED_IDENTITY_NV 0xc0000000
+#define NV34TCL_RC_IN_RGB_A_MAPPING_SIGNED_NEGATE_NV 0xe0000000
+#define NV34TCL_RC_CONSTANT_COLOR0(x) (0x00000908+((x)*32))
+#define NV34TCL_RC_CONSTANT_COLOR0__SIZE 0x00000008
+#define NV34TCL_RC_CONSTANT_COLOR0_B_SHIFT 0
+#define NV34TCL_RC_CONSTANT_COLOR0_B_MASK 0x000000ff
+#define NV34TCL_RC_CONSTANT_COLOR0_G_SHIFT 8
+#define NV34TCL_RC_CONSTANT_COLOR0_G_MASK 0x0000ff00
+#define NV34TCL_RC_CONSTANT_COLOR0_R_SHIFT 16
+#define NV34TCL_RC_CONSTANT_COLOR0_R_MASK 0x00ff0000
+#define NV34TCL_RC_CONSTANT_COLOR0_A_SHIFT 24
+#define NV34TCL_RC_CONSTANT_COLOR0_A_MASK 0xff000000
+#define NV34TCL_RC_CONSTANT_COLOR1(x) (0x0000090c+((x)*32))
+#define NV34TCL_RC_CONSTANT_COLOR1__SIZE 0x00000008
+#define NV34TCL_RC_CONSTANT_COLOR1_B_SHIFT 0
+#define NV34TCL_RC_CONSTANT_COLOR1_B_MASK 0x000000ff
+#define NV34TCL_RC_CONSTANT_COLOR1_G_SHIFT 8
+#define NV34TCL_RC_CONSTANT_COLOR1_G_MASK 0x0000ff00
+#define NV34TCL_RC_CONSTANT_COLOR1_R_SHIFT 16
+#define NV34TCL_RC_CONSTANT_COLOR1_R_MASK 0x00ff0000
+#define NV34TCL_RC_CONSTANT_COLOR1_A_SHIFT 24
+#define NV34TCL_RC_CONSTANT_COLOR1_A_MASK 0xff000000
+#define NV34TCL_RC_OUT_ALPHA(x) (0x00000910+((x)*32))
+#define NV34TCL_RC_OUT_ALPHA__SIZE 0x00000008
+#define NV34TCL_RC_OUT_ALPHA_CD_OUTPUT_SHIFT 0
+#define NV34TCL_RC_OUT_ALPHA_CD_OUTPUT_MASK 0x0000000f
+#define NV34TCL_RC_OUT_ALPHA_CD_OUTPUT_ZERO 0x00000000
+#define NV34TCL_RC_OUT_ALPHA_CD_OUTPUT_CONSTANT_COLOR0_NV 0x00000001
+#define NV34TCL_RC_OUT_ALPHA_CD_OUTPUT_CONSTANT_COLOR1_NV 0x00000002
+#define NV34TCL_RC_OUT_ALPHA_CD_OUTPUT_FOG 0x00000003
+#define NV34TCL_RC_OUT_ALPHA_CD_OUTPUT_PRIMARY_COLOR_NV 0x00000004
+#define NV34TCL_RC_OUT_ALPHA_CD_OUTPUT_SECONDARY_COLOR_NV 0x00000005
+#define NV34TCL_RC_OUT_ALPHA_CD_OUTPUT_TEXTURE0_ARB 0x00000008
+#define NV34TCL_RC_OUT_ALPHA_CD_OUTPUT_TEXTURE1_ARB 0x00000009
+#define NV34TCL_RC_OUT_ALPHA_CD_OUTPUT_SPARE0_NV 0x0000000c
+#define NV34TCL_RC_OUT_ALPHA_CD_OUTPUT_SPARE1_NV 0x0000000d
+#define NV34TCL_RC_OUT_ALPHA_CD_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0000000e
+#define NV34TCL_RC_OUT_ALPHA_CD_OUTPUT_E_TIMES_F_NV 0x0000000f
+#define NV34TCL_RC_OUT_ALPHA_AB_OUTPUT_SHIFT 4
+#define NV34TCL_RC_OUT_ALPHA_AB_OUTPUT_MASK 0x000000f0
+#define NV34TCL_RC_OUT_ALPHA_AB_OUTPUT_ZERO 0x00000000
+#define NV34TCL_RC_OUT_ALPHA_AB_OUTPUT_CONSTANT_COLOR0_NV 0x00000010
+#define NV34TCL_RC_OUT_ALPHA_AB_OUTPUT_CONSTANT_COLOR1_NV 0x00000020
+#define NV34TCL_RC_OUT_ALPHA_AB_OUTPUT_FOG 0x00000030
+#define NV34TCL_RC_OUT_ALPHA_AB_OUTPUT_PRIMARY_COLOR_NV 0x00000040
+#define NV34TCL_RC_OUT_ALPHA_AB_OUTPUT_SECONDARY_COLOR_NV 0x00000050
+#define NV34TCL_RC_OUT_ALPHA_AB_OUTPUT_TEXTURE0_ARB 0x00000080
+#define NV34TCL_RC_OUT_ALPHA_AB_OUTPUT_TEXTURE1_ARB 0x00000090
+#define NV34TCL_RC_OUT_ALPHA_AB_OUTPUT_SPARE0_NV 0x000000c0
+#define NV34TCL_RC_OUT_ALPHA_AB_OUTPUT_SPARE1_NV 0x000000d0
+#define NV34TCL_RC_OUT_ALPHA_AB_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x000000e0
+#define NV34TCL_RC_OUT_ALPHA_AB_OUTPUT_E_TIMES_F_NV 0x000000f0
+#define NV34TCL_RC_OUT_ALPHA_SUM_OUTPUT_SHIFT 8
+#define NV34TCL_RC_OUT_ALPHA_SUM_OUTPUT_MASK 0x00000f00
+#define NV34TCL_RC_OUT_ALPHA_SUM_OUTPUT_ZERO 0x00000000
+#define NV34TCL_RC_OUT_ALPHA_SUM_OUTPUT_CONSTANT_COLOR0_NV 0x00000100
+#define NV34TCL_RC_OUT_ALPHA_SUM_OUTPUT_CONSTANT_COLOR1_NV 0x00000200
+#define NV34TCL_RC_OUT_ALPHA_SUM_OUTPUT_FOG 0x00000300
+#define NV34TCL_RC_OUT_ALPHA_SUM_OUTPUT_PRIMARY_COLOR_NV 0x00000400
+#define NV34TCL_RC_OUT_ALPHA_SUM_OUTPUT_SECONDARY_COLOR_NV 0x00000500
+#define NV34TCL_RC_OUT_ALPHA_SUM_OUTPUT_TEXTURE0_ARB 0x00000800
+#define NV34TCL_RC_OUT_ALPHA_SUM_OUTPUT_TEXTURE1_ARB 0x00000900
+#define NV34TCL_RC_OUT_ALPHA_SUM_OUTPUT_SPARE0_NV 0x00000c00
+#define NV34TCL_RC_OUT_ALPHA_SUM_OUTPUT_SPARE1_NV 0x00000d00
+#define NV34TCL_RC_OUT_ALPHA_SUM_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x00000e00
+#define NV34TCL_RC_OUT_ALPHA_SUM_OUTPUT_E_TIMES_F_NV 0x00000f00
+#define NV34TCL_RC_OUT_ALPHA_CD_DOT_PRODUCT (1 << 12)
+#define NV34TCL_RC_OUT_ALPHA_AB_DOT_PRODUCT (1 << 13)
+#define NV34TCL_RC_OUT_ALPHA_MUX_SUM (1 << 14)
+#define NV34TCL_RC_OUT_ALPHA_BIAS (1 << 15)
+#define NV34TCL_RC_OUT_ALPHA_BIAS_NONE 0x00000000
+#define NV34TCL_RC_OUT_ALPHA_BIAS_BIAS_BY_NEGATIVE_ONE_HALF_NV 0x00008000
+#define NV34TCL_RC_OUT_ALPHA_SCALE_SHIFT 17
+#define NV34TCL_RC_OUT_ALPHA_SCALE_MASK 0x00000000
+#define NV34TCL_RC_OUT_ALPHA_SCALE_NONE 0x00000000
+#define NV34TCL_RC_OUT_ALPHA_SCALE_SCALE_BY_TWO_NV 0x00020000
+#define NV34TCL_RC_OUT_ALPHA_SCALE_SCALE_BY_FOUR_NV 0x00040000
+#define NV34TCL_RC_OUT_ALPHA_SCALE_SCALE_BY_ONE_HALF_NV 0x00060000
+#define NV34TCL_RC_OUT_RGB(x) (0x00000914+((x)*32))
+#define NV34TCL_RC_OUT_RGB__SIZE 0x00000008
+#define NV34TCL_RC_OUT_RGB_CD_OUTPUT_SHIFT 0
+#define NV34TCL_RC_OUT_RGB_CD_OUTPUT_MASK 0x0000000f
+#define NV34TCL_RC_OUT_RGB_CD_OUTPUT_ZERO 0x00000000
+#define NV34TCL_RC_OUT_RGB_CD_OUTPUT_CONSTANT_COLOR0_NV 0x00000001
+#define NV34TCL_RC_OUT_RGB_CD_OUTPUT_CONSTANT_COLOR1_NV 0x00000002
+#define NV34TCL_RC_OUT_RGB_CD_OUTPUT_FOG 0x00000003
+#define NV34TCL_RC_OUT_RGB_CD_OUTPUT_PRIMARY_COLOR_NV 0x00000004
+#define NV34TCL_RC_OUT_RGB_CD_OUTPUT_SECONDARY_COLOR_NV 0x00000005
+#define NV34TCL_RC_OUT_RGB_CD_OUTPUT_TEXTURE0_ARB 0x00000008
+#define NV34TCL_RC_OUT_RGB_CD_OUTPUT_TEXTURE1_ARB 0x00000009
+#define NV34TCL_RC_OUT_RGB_CD_OUTPUT_SPARE0_NV 0x0000000c
+#define NV34TCL_RC_OUT_RGB_CD_OUTPUT_SPARE1_NV 0x0000000d
+#define NV34TCL_RC_OUT_RGB_CD_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0000000e
+#define NV34TCL_RC_OUT_RGB_CD_OUTPUT_E_TIMES_F_NV 0x0000000f
+#define NV34TCL_RC_OUT_RGB_AB_OUTPUT_SHIFT 4
+#define NV34TCL_RC_OUT_RGB_AB_OUTPUT_MASK 0x000000f0
+#define NV34TCL_RC_OUT_RGB_AB_OUTPUT_ZERO 0x00000000
+#define NV34TCL_RC_OUT_RGB_AB_OUTPUT_CONSTANT_COLOR0_NV 0x00000010
+#define NV34TCL_RC_OUT_RGB_AB_OUTPUT_CONSTANT_COLOR1_NV 0x00000020
+#define NV34TCL_RC_OUT_RGB_AB_OUTPUT_FOG 0x00000030
+#define NV34TCL_RC_OUT_RGB_AB_OUTPUT_PRIMARY_COLOR_NV 0x00000040
+#define NV34TCL_RC_OUT_RGB_AB_OUTPUT_SECONDARY_COLOR_NV 0x00000050
+#define NV34TCL_RC_OUT_RGB_AB_OUTPUT_TEXTURE0_ARB 0x00000080
+#define NV34TCL_RC_OUT_RGB_AB_OUTPUT_TEXTURE1_ARB 0x00000090
+#define NV34TCL_RC_OUT_RGB_AB_OUTPUT_SPARE0_NV 0x000000c0
+#define NV34TCL_RC_OUT_RGB_AB_OUTPUT_SPARE1_NV 0x000000d0
+#define NV34TCL_RC_OUT_RGB_AB_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x000000e0
+#define NV34TCL_RC_OUT_RGB_AB_OUTPUT_E_TIMES_F_NV 0x000000f0
+#define NV34TCL_RC_OUT_RGB_SUM_OUTPUT_SHIFT 8
+#define NV34TCL_RC_OUT_RGB_SUM_OUTPUT_MASK 0x00000f00
+#define NV34TCL_RC_OUT_RGB_SUM_OUTPUT_ZERO 0x00000000
+#define NV34TCL_RC_OUT_RGB_SUM_OUTPUT_CONSTANT_COLOR0_NV 0x00000100
+#define NV34TCL_RC_OUT_RGB_SUM_OUTPUT_CONSTANT_COLOR1_NV 0x00000200
+#define NV34TCL_RC_OUT_RGB_SUM_OUTPUT_FOG 0x00000300
+#define NV34TCL_RC_OUT_RGB_SUM_OUTPUT_PRIMARY_COLOR_NV 0x00000400
+#define NV34TCL_RC_OUT_RGB_SUM_OUTPUT_SECONDARY_COLOR_NV 0x00000500
+#define NV34TCL_RC_OUT_RGB_SUM_OUTPUT_TEXTURE0_ARB 0x00000800
+#define NV34TCL_RC_OUT_RGB_SUM_OUTPUT_TEXTURE1_ARB 0x00000900
+#define NV34TCL_RC_OUT_RGB_SUM_OUTPUT_SPARE0_NV 0x00000c00
+#define NV34TCL_RC_OUT_RGB_SUM_OUTPUT_SPARE1_NV 0x00000d00
+#define NV34TCL_RC_OUT_RGB_SUM_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x00000e00
+#define NV34TCL_RC_OUT_RGB_SUM_OUTPUT_E_TIMES_F_NV 0x00000f00
+#define NV34TCL_RC_OUT_RGB_CD_DOT_PRODUCT (1 << 12)
+#define NV34TCL_RC_OUT_RGB_AB_DOT_PRODUCT (1 << 13)
+#define NV34TCL_RC_OUT_RGB_MUX_SUM (1 << 14)
+#define NV34TCL_RC_OUT_RGB_BIAS (1 << 15)
+#define NV34TCL_RC_OUT_RGB_BIAS_NONE 0x00000000
+#define NV34TCL_RC_OUT_RGB_BIAS_BIAS_BY_NEGATIVE_ONE_HALF_NV 0x00008000
+#define NV34TCL_RC_OUT_RGB_SCALE_SHIFT 17
+#define NV34TCL_RC_OUT_RGB_SCALE_MASK 0x00000000
+#define NV34TCL_RC_OUT_RGB_SCALE_NONE 0x00000000
+#define NV34TCL_RC_OUT_RGB_SCALE_SCALE_BY_TWO_NV 0x00020000
+#define NV34TCL_RC_OUT_RGB_SCALE_SCALE_BY_FOUR_NV 0x00040000
+#define NV34TCL_RC_OUT_RGB_SCALE_SCALE_BY_ONE_HALF_NV 0x00060000
+#define NV34TCL_VIEWPORT_HORIZ 0x00000a00
+#define NV34TCL_VIEWPORT_HORIZ_X_SHIFT 0
+#define NV34TCL_VIEWPORT_HORIZ_X_MASK 0x0000ffff
+#define NV34TCL_VIEWPORT_HORIZ_W_SHIFT 16
+#define NV34TCL_VIEWPORT_HORIZ_W_MASK 0xffff0000
+#define NV34TCL_VIEWPORT_VERT 0x00000a04
+#define NV34TCL_VIEWPORT_VERT_Y_SHIFT 0
+#define NV34TCL_VIEWPORT_VERT_Y_MASK 0x0000ffff
+#define NV34TCL_VIEWPORT_VERT_H_SHIFT 16
+#define NV34TCL_VIEWPORT_VERT_H_MASK 0xffff0000
+#define NV34TCL_LIGHT_MODEL_FRONT_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION_R 0x00000a10
+#define NV34TCL_LIGHT_MODEL_FRONT_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION_G 0x00000a14
+#define NV34TCL_LIGHT_MODEL_FRONT_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION_B 0x00000a18
+#define NV34TCL_VIEWPORT_TRANSLATE_X 0x00000a20
+#define NV34TCL_VIEWPORT_TRANSLATE_Y 0x00000a24
+#define NV34TCL_VIEWPORT_TRANSLATE_Z 0x00000a28
+#define NV34TCL_VIEWPORT_TRANSLATE_W 0x00000a2c
+#define NV34TCL_VIEWPORT_SCALE_X 0x00000a30
+#define NV34TCL_VIEWPORT_SCALE_Y 0x00000a34
+#define NV34TCL_VIEWPORT_SCALE_Z 0x00000a38
+#define NV34TCL_VIEWPORT_SCALE_W 0x00000a3c
+#define NV34TCL_POLYGON_OFFSET_POINT_ENABLE 0x00000a60
+#define NV34TCL_POLYGON_OFFSET_LINE_ENABLE 0x00000a64
+#define NV34TCL_POLYGON_OFFSET_FILL_ENABLE 0x00000a68
+#define NV34TCL_DEPTH_FUNC 0x00000a6c
+#define NV34TCL_DEPTH_FUNC_NEVER 0x00000200
+#define NV34TCL_DEPTH_FUNC_LESS 0x00000201
+#define NV34TCL_DEPTH_FUNC_EQUAL 0x00000202
+#define NV34TCL_DEPTH_FUNC_LEQUAL 0x00000203
+#define NV34TCL_DEPTH_FUNC_GREATER 0x00000204
+#define NV34TCL_DEPTH_FUNC_GREATER 0x00000204
+#define NV34TCL_DEPTH_FUNC_NOTEQUAL 0x00000205
+#define NV34TCL_DEPTH_FUNC_GEQUAL 0x00000206
+#define NV34TCL_DEPTH_FUNC_ALWAYS 0x00000207
+#define NV34TCL_DEPTH_WRITE_ENABLE 0x00000a70
+#define NV34TCL_DEPTH_TEST_ENABLE 0x00000a74
+#define NV34TCL_POLYGON_OFFSET_FACTOR 0x00000a78
+#define NV34TCL_POLYGON_OFFSET_UNITS 0x00000a7c
+#define NV34TCL_VTX_ATTR_3I_XY(x) (0x00000a80+((x)*8))
+#define NV34TCL_VTX_ATTR_3I_XY__SIZE 0x00000010
+#define NV34TCL_VTX_ATTR_3I_XY_X_SHIFT 0
+#define NV34TCL_VTX_ATTR_3I_XY_X_MASK 0x0000ffff
+#define NV34TCL_VTX_ATTR_3I_XY_Y_SHIFT 16
+#define NV34TCL_VTX_ATTR_3I_XY_Y_MASK 0xffff0000
+#define NV34TCL_VTX_ATTR_3I_Z(x) (0x00000a84+((x)*8))
+#define NV34TCL_VTX_ATTR_3I_Z__SIZE 0x00000010
+#define NV34TCL_VTX_ATTR_3I_Z_Z_SHIFT 0
+#define NV34TCL_VTX_ATTR_3I_Z_Z_MASK 0x0000ffff
+#define NV34TCL_VP_UPLOAD_INST(x) (0x00000b80+((x)*4))
+#define NV34TCL_VP_UPLOAD_INST__SIZE 0x00000004
+#define NV34TCL_TX0_CLIP_PLANE_A(x) (0x00000e00+((x)*16))
+#define NV34TCL_TX0_CLIP_PLANE_A__SIZE 0x00000004
+#define NV34TCL_TX0_CLIP_PLANE_B(x) (0x00000e04+((x)*16))
+#define NV34TCL_TX0_CLIP_PLANE_B__SIZE 0x00000004
+#define NV34TCL_TX0_CLIP_PLANE_C(x) (0x00000e08+((x)*16))
+#define NV34TCL_TX0_CLIP_PLANE_C__SIZE 0x00000004
+#define NV34TCL_TX0_CLIP_PLANE_D(x) (0x00000e0c+((x)*16))
+#define NV34TCL_TX0_CLIP_PLANE_D__SIZE 0x00000004
+#define NV34TCL_TX1_CLIP_PLANE_A(x) (0x00000e40+((x)*16))
+#define NV34TCL_TX1_CLIP_PLANE_A__SIZE 0x00000004
+#define NV34TCL_TX1_CLIP_PLANE_B(x) (0x00000e44+((x)*16))
+#define NV34TCL_TX1_CLIP_PLANE_B__SIZE 0x00000004
+#define NV34TCL_TX1_CLIP_PLANE_C(x) (0x00000e48+((x)*16))
+#define NV34TCL_TX1_CLIP_PLANE_C__SIZE 0x00000004
+#define NV34TCL_TX1_CLIP_PLANE_D(x) (0x00000e4c+((x)*16))
+#define NV34TCL_TX1_CLIP_PLANE_D__SIZE 0x00000004
+#define NV34TCL_TX2_CLIP_PLANE_A(x) (0x00000e80+((x)*16))
+#define NV34TCL_TX2_CLIP_PLANE_A__SIZE 0x00000004
+#define NV34TCL_TX2_CLIP_PLANE_B(x) (0x00000e84+((x)*16))
+#define NV34TCL_TX2_CLIP_PLANE_B__SIZE 0x00000004
+#define NV34TCL_TX2_CLIP_PLANE_C(x) (0x00000e88+((x)*16))
+#define NV34TCL_TX2_CLIP_PLANE_C__SIZE 0x00000004
+#define NV34TCL_TX2_CLIP_PLANE_D(x) (0x00000e8c+((x)*16))
+#define NV34TCL_TX2_CLIP_PLANE_D__SIZE 0x00000004
+#define NV34TCL_TX3_CLIP_PLANE_A(x) (0x00000ec0+((x)*16))
+#define NV34TCL_TX3_CLIP_PLANE_A__SIZE 0x00000004
+#define NV34TCL_TX3_CLIP_PLANE_B(x) (0x00000ec4+((x)*16))
+#define NV34TCL_TX3_CLIP_PLANE_B__SIZE 0x00000004
+#define NV34TCL_TX3_CLIP_PLANE_C(x) (0x00000ec8+((x)*16))
+#define NV34TCL_TX3_CLIP_PLANE_C__SIZE 0x00000004
+#define NV34TCL_TX3_CLIP_PLANE_D(x) (0x00000ecc+((x)*16))
+#define NV34TCL_TX3_CLIP_PLANE_D__SIZE 0x00000004
+#define NV34TCL_TX4_CLIP_PLANE_A(x) (0x00000f00+((x)*16))
+#define NV34TCL_TX4_CLIP_PLANE_A__SIZE 0x00000004
+#define NV34TCL_TX4_CLIP_PLANE_B(x) (0x00000f04+((x)*16))
+#define NV34TCL_TX4_CLIP_PLANE_B__SIZE 0x00000004
+#define NV34TCL_TX4_CLIP_PLANE_C(x) (0x00000f08+((x)*16))
+#define NV34TCL_TX4_CLIP_PLANE_C__SIZE 0x00000004
+#define NV34TCL_TX4_CLIP_PLANE_D(x) (0x00000f0c+((x)*16))
+#define NV34TCL_TX4_CLIP_PLANE_D__SIZE 0x00000004
+#define NV34TCL_TX5_CLIP_PLANE_A(x) (0x00000f40+((x)*16))
+#define NV34TCL_TX5_CLIP_PLANE_A__SIZE 0x00000004
+#define NV34TCL_TX5_CLIP_PLANE_B(x) (0x00000f44+((x)*16))
+#define NV34TCL_TX5_CLIP_PLANE_B__SIZE 0x00000004
+#define NV34TCL_TX5_CLIP_PLANE_C(x) (0x00000f48+((x)*16))
+#define NV34TCL_TX5_CLIP_PLANE_C__SIZE 0x00000004
+#define NV34TCL_TX5_CLIP_PLANE_D(x) (0x00000f4c+((x)*16))
+#define NV34TCL_TX5_CLIP_PLANE_D__SIZE 0x00000004
+#define NV34TCL_TX6_CLIP_PLANE_A(x) (0x00000f80+((x)*16))
+#define NV34TCL_TX6_CLIP_PLANE_A__SIZE 0x00000004
+#define NV34TCL_TX6_CLIP_PLANE_B(x) (0x00000f84+((x)*16))
+#define NV34TCL_TX6_CLIP_PLANE_B__SIZE 0x00000004
+#define NV34TCL_TX6_CLIP_PLANE_C(x) (0x00000f88+((x)*16))
+#define NV34TCL_TX6_CLIP_PLANE_C__SIZE 0x00000004
+#define NV34TCL_TX6_CLIP_PLANE_D(x) (0x00000f8c+((x)*16))
+#define NV34TCL_TX6_CLIP_PLANE_D__SIZE 0x00000004
+#define NV34TCL_TX7_CLIP_PLANE_A(x) (0x00000fc0+((x)*16))
+#define NV34TCL_TX7_CLIP_PLANE_A__SIZE 0x00000004
+#define NV34TCL_TX7_CLIP_PLANE_B(x) (0x00000fc4+((x)*16))
+#define NV34TCL_TX7_CLIP_PLANE_B__SIZE 0x00000004
+#define NV34TCL_TX7_CLIP_PLANE_C(x) (0x00000fc8+((x)*16))
+#define NV34TCL_TX7_CLIP_PLANE_C__SIZE 0x00000004
+#define NV34TCL_TX7_CLIP_PLANE_D(x) (0x00000fcc+((x)*16))
+#define NV34TCL_TX7_CLIP_PLANE_D__SIZE 0x00000004
+#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_R(x) (0x00001000+((x)*64))
+#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_R__SIZE 0x00000008
+#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_G(x) (0x00001004+((x)*64))
+#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_G__SIZE 0x00000008
+#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_B(x) (0x00001008+((x)*64))
+#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_B__SIZE 0x00000008
+#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_R(x) (0x0000100c+((x)*64))
+#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_R__SIZE 0x00000008
+#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_G(x) (0x00001010+((x)*64))
+#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_G__SIZE 0x00000008
+#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_B(x) (0x00001014+((x)*64))
+#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_B__SIZE 0x00000008
+#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_R(x) (0x00001018+((x)*64))
+#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_R__SIZE 0x00000008
+#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_G(x) (0x0000101c+((x)*64))
+#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_G__SIZE 0x00000008
+#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_B(x) (0x00001020+((x)*64))
+#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_B__SIZE 0x00000008
+#define NV34TCL_LIGHT_HALF_VECTOR_X(x) (0x00001028+((x)*64))
+#define NV34TCL_LIGHT_HALF_VECTOR_X__SIZE 0x00000008
+#define NV34TCL_LIGHT_HALF_VECTOR_Y(x) (0x0000102c+((x)*64))
+#define NV34TCL_LIGHT_HALF_VECTOR_Y__SIZE 0x00000008
+#define NV34TCL_LIGHT_HALF_VECTOR_Z(x) (0x00001030+((x)*64))
+#define NV34TCL_LIGHT_HALF_VECTOR_Z__SIZE 0x00000008
+#define NV34TCL_LIGHT_DIRECTION_X(x) (0x00001034+((x)*64))
+#define NV34TCL_LIGHT_DIRECTION_X__SIZE 0x00000008
+#define NV34TCL_LIGHT_DIRECTION_Y(x) (0x00001038+((x)*64))
+#define NV34TCL_LIGHT_DIRECTION_Y__SIZE 0x00000008
+#define NV34TCL_LIGHT_DIRECTION_Z(x) (0x0000103c+((x)*64))
+#define NV34TCL_LIGHT_DIRECTION_Z__SIZE 0x00000008
+#define NV34TCL_LIGHT_SPOT_CUTOFF_A(x) (0x00001200+((x)*64))
+#define NV34TCL_LIGHT_SPOT_CUTOFF_A__SIZE 0x00000008
+#define NV34TCL_LIGHT_SPOT_CUTOFF_B(x) (0x00001204+((x)*64))
+#define NV34TCL_LIGHT_SPOT_CUTOFF_B__SIZE 0x00000008
+#define NV34TCL_LIGHT_SPOT_CUTOFF_C(x) (0x00001208+((x)*64))
+#define NV34TCL_LIGHT_SPOT_CUTOFF_C__SIZE 0x00000008
+#define NV34TCL_LIGHT_SPOT_DIR_X(x) (0x0000120c+((x)*64))
+#define NV34TCL_LIGHT_SPOT_DIR_X__SIZE 0x00000008
+#define NV34TCL_LIGHT_SPOT_DIR_Y(x) (0x00001210+((x)*64))
+#define NV34TCL_LIGHT_SPOT_DIR_Y__SIZE 0x00000008
+#define NV34TCL_LIGHT_SPOT_DIR_Z(x) (0x00001214+((x)*64))
+#define NV34TCL_LIGHT_SPOT_DIR_Z__SIZE 0x00000008
+#define NV34TCL_LIGHT_SPOT_CUTOFF_D(x) (0x00001218+((x)*64))
+#define NV34TCL_LIGHT_SPOT_CUTOFF_D__SIZE 0x00000008
+#define NV34TCL_LIGHT_POSITION_X(x) (0x0000121c+((x)*64))
+#define NV34TCL_LIGHT_POSITION_X__SIZE 0x00000008
+#define NV34TCL_LIGHT_POSITION_Y(x) (0x00001220+((x)*64))
+#define NV34TCL_LIGHT_POSITION_Y__SIZE 0x00000008
+#define NV34TCL_LIGHT_POSITION_Z(x) (0x00001224+((x)*64))
+#define NV34TCL_LIGHT_POSITION_Z__SIZE 0x00000008
+#define NV34TCL_LIGHT_ATTENUATION_CONSTANT(x) (0x00001228+((x)*64))
+#define NV34TCL_LIGHT_ATTENUATION_CONSTANT__SIZE 0x00000008
+#define NV34TCL_LIGHT_ATTENUATION_LINEAR(x) (0x0000122c+((x)*64))
+#define NV34TCL_LIGHT_ATTENUATION_LINEAR__SIZE 0x00000008
+#define NV34TCL_LIGHT_ATTENUATION_QUADRATIC(x) (0x00001230+((x)*64))
+#define NV34TCL_LIGHT_ATTENUATION_QUADRATIC__SIZE 0x00000008
+#define NV34TCL_FRONT_MATERIAL_SHININESS(x) (0x00001400+((x)*4))
+#define NV34TCL_FRONT_MATERIAL_SHININESS__SIZE 0x00000006
+#define NV34TCL_ENABLED_LIGHTS 0x00001420
+#define NV34TCL_FP_REG_CONTROL 0x00001450
+#define NV34TCL_FP_REG_CONTROL_UNK1_SHIFT 16
+#define NV34TCL_FP_REG_CONTROL_UNK1_MASK 0xffff0000
+#define NV34TCL_FP_REG_CONTROL_UNK0_SHIFT 0
+#define NV34TCL_FP_REG_CONTROL_UNK0_MASK 0x0000ffff
+#define NV34TCL_VP_CLIP_PLANES_ENABLE 0x00001478
+#define NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE0 (1 << 1)
+#define NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE1 (1 << 5)
+#define NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE2 (1 << 9)
+#define NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE3 (1 << 13)
+#define NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE4 (1 << 17)
+#define NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE5 (1 << 21)
+#define NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE6 (1 << 25)
+#define NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE7 (1 << 29)
+#define NV34TCL_POLYGON_STIPPLE_ENABLE 0x0000147c
+#define NV34TCL_POLYGON_STIPPLE_PATTERN(x) (0x00001480+((x)*4))
+#define NV34TCL_POLYGON_STIPPLE_PATTERN__SIZE 0x00000020
+#define NV34TCL_VTX_ATTR_3F_X(x) (0x00001500+((x)*16))
+#define NV34TCL_VTX_ATTR_3F_X__SIZE 0x00000010
+#define NV34TCL_VTX_ATTR_3F_Y(x) (0x00001504+((x)*16))
+#define NV34TCL_VTX_ATTR_3F_Y__SIZE 0x00000010
+#define NV34TCL_VTX_ATTR_3F_Z(x) (0x00001508+((x)*16))
+#define NV34TCL_VTX_ATTR_3F_Z__SIZE 0x00000010
+#define NV34TCL_VP_CLIP_PLANE_A(x) (0x00001600+((x)*16))
+#define NV34TCL_VP_CLIP_PLANE_A__SIZE 0x00000006
+#define NV34TCL_VP_CLIP_PLANE_B(x) (0x00001604+((x)*16))
+#define NV34TCL_VP_CLIP_PLANE_B__SIZE 0x00000006
+#define NV34TCL_VP_CLIP_PLANE_C(x) (0x00001608+((x)*16))
+#define NV34TCL_VP_CLIP_PLANE_C__SIZE 0x00000006
+#define NV34TCL_VP_CLIP_PLANE_D(x) (0x0000160c+((x)*16))
+#define NV34TCL_VP_CLIP_PLANE_D__SIZE 0x00000006
+#define NV34TCL_VTXBUF_ADDRESS(x) (0x00001680+((x)*4))
+#define NV34TCL_VTXBUF_ADDRESS__SIZE 0x00000010
+#define NV34TCL_VTXBUF_ADDRESS_DMA1 (1 << 31)
+#define NV34TCL_VTXBUF_ADDRESS_OFFSET_SHIFT 0
+#define NV34TCL_VTXBUF_ADDRESS_OFFSET_MASK 0x0fffffff
+#define NV34TCL_VTXFMT(x) (0x00001740+((x)*4))
+#define NV34TCL_VTXFMT__SIZE 0x00000010
+#define NV34TCL_VTXFMT_TYPE_SHIFT 0
+#define NV34TCL_VTXFMT_TYPE_MASK 0x0000000f
+#define NV34TCL_VTXFMT_TYPE_FLOAT 0x00000002
+#define NV34TCL_VTXFMT_TYPE_UBYTE 0x00000004
+#define NV34TCL_VTXFMT_TYPE_USHORT 0x00000005
+#define NV34TCL_VTXFMT_SIZE_SHIFT 4
+#define NV34TCL_VTXFMT_SIZE_MASK 0x000000f0
+#define NV34TCL_VTXFMT_STRIDE_SHIFT 8
+#define NV34TCL_VTXFMT_STRIDE_MASK 0x0000ff00
+#define NV34TCL_LIGHT_MODEL_BACK_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION_R 0x000017a0
+#define NV34TCL_LIGHT_MODEL_BACK_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION_G 0x000017a4
+#define NV34TCL_LIGHT_MODEL_BACK_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION_B 0x000017a8
+#define NV34TCL_COLOR_MATERIAL_BACK_R 0x000017b0
+#define NV34TCL_COLOR_MATERIAL_BACK_G 0x000017b4
+#define NV34TCL_COLOR_MATERIAL_BACK_B 0x000017b8
+#define NV34TCL_COLOR_MATERIAL_BACK_A 0x000017c0
+#define NV34TCL_QUERY_RESET 0x000017c8
+#define NV34TCL_QUERY_UNK17CC 0x000017cc
+#define NV34TCL_QUERY_GET 0x00001800
+#define NV34TCL_QUERY_GET_UNK24_SHIFT 24
+#define NV34TCL_QUERY_GET_UNK24_MASK 0xff000000
+#define NV34TCL_QUERY_GET_OFFSET_SHIFT 0
+#define NV34TCL_QUERY_GET_OFFSET_MASK 0x00ffffff
+#define NV34TCL_VERTEX_BEGIN_END 0x00001808
+#define NV34TCL_VERTEX_BEGIN_END_STOP 0x00000000
+#define NV34TCL_VERTEX_BEGIN_END_POINTS 0x00000001
+#define NV34TCL_VERTEX_BEGIN_END_LINES 0x00000002
+#define NV34TCL_VERTEX_BEGIN_END_LINE_LOOP 0x00000003
+#define NV34TCL_VERTEX_BEGIN_END_LINE_STRIP 0x00000004
+#define NV34TCL_VERTEX_BEGIN_END_TRIANGLES 0x00000005
+#define NV34TCL_VERTEX_BEGIN_END_TRIANGLE_STRIP 0x00000006
+#define NV34TCL_VERTEX_BEGIN_END_TRIANGLE_FAN 0x00000007
+#define NV34TCL_VERTEX_BEGIN_END_QUADS 0x00000008
+#define NV34TCL_VERTEX_BEGIN_END_QUAD_STRIP 0x00000009
+#define NV34TCL_VERTEX_BEGIN_END_POLYGON 0x0000000a
+#define NV34TCL_VB_ELEMENT_U16 0x0000180c
+#define NV34TCL_VB_ELEMENT_U16_I0_SHIFT 0
+#define NV34TCL_VB_ELEMENT_U16_I0_MASK 0x0000ffff
+#define NV34TCL_VB_ELEMENT_U16_I1_SHIFT 16
+#define NV34TCL_VB_ELEMENT_U16_I1_MASK 0xffff0000
+#define NV34TCL_VB_ELEMENT_U32 0x00001810
+#define NV34TCL_VB_VERTEX_BATCH 0x00001814
+#define NV34TCL_VB_VERTEX_BATCH_OFFSET_SHIFT 0
+#define NV34TCL_VB_VERTEX_BATCH_OFFSET_MASK 0x00ffffff
+#define NV34TCL_VB_VERTEX_BATCH_COUNT_SHIFT 24
+#define NV34TCL_VB_VERTEX_BATCH_COUNT_MASK 0xff000000
+#define NV34TCL_VERTEX_DATA 0x00001818
+#define NV34TCL_IDXBUF_ADDRESS 0x0000181c
+#define NV34TCL_IDXBUF_FORMAT 0x00001820
+#define NV34TCL_IDXBUF_FORMAT_TYPE_SHIFT 4
+#define NV34TCL_IDXBUF_FORMAT_TYPE_MASK 0x000000f0
+#define NV34TCL_IDXBUF_FORMAT_TYPE_U32 0x00000000
+#define NV34TCL_IDXBUF_FORMAT_TYPE_U16 0x00000010
+#define NV34TCL_IDXBUF_FORMAT_DMA1 (1 << 0)
+#define NV34TCL_VB_INDEX_BATCH 0x00001824
+#define NV34TCL_VB_INDEX_BATCH_COUNT_SHIFT 24
+#define NV34TCL_VB_INDEX_BATCH_COUNT_MASK 0xff000000
+#define NV34TCL_VB_INDEX_BATCH_START_SHIFT 0
+#define NV34TCL_VB_INDEX_BATCH_START_MASK 0x00ffffff
+#define NV34TCL_POLYGON_MODE_FRONT 0x00001828
+#define NV34TCL_POLYGON_MODE_FRONT_POINT 0x00001b00
+#define NV34TCL_POLYGON_MODE_FRONT_LINE 0x00001b01
+#define NV34TCL_POLYGON_MODE_FRONT_FILL 0x00001b02
+#define NV34TCL_POLYGON_MODE_BACK 0x0000182c
+#define NV34TCL_POLYGON_MODE_BACK_POINT 0x00001b00
+#define NV34TCL_POLYGON_MODE_BACK_LINE 0x00001b01
+#define NV34TCL_POLYGON_MODE_BACK_FILL 0x00001b02
+#define NV34TCL_CULL_FACE 0x00001830
+#define NV34TCL_CULL_FACE_FRONT 0x00000404
+#define NV34TCL_CULL_FACE_BACK 0x00000405
+#define NV34TCL_CULL_FACE_FRONT_AND_BACK 0x00000408
+#define NV34TCL_FRONT_FACE 0x00001834
+#define NV34TCL_FRONT_FACE_CW 0x00000900
+#define NV34TCL_FRONT_FACE_CCW 0x00000901
+#define NV34TCL_POLYGON_SMOOTH_ENABLE 0x00001838
+#define NV34TCL_CULL_FACE_ENABLE 0x0000183c
+#define NV34TCL_TX_PALETTE_OFFSET(x) (0x00001840+((x)*4))
+#define NV34TCL_TX_PALETTE_OFFSET__SIZE 0x00000004
+#define NV34TCL_VTX_ATTR_2F_X(x) (0x00001880+((x)*8))
+#define NV34TCL_VTX_ATTR_2F_X__SIZE 0x00000010
+#define NV34TCL_VTX_ATTR_2F_Y(x) (0x00001884+((x)*8))
+#define NV34TCL_VTX_ATTR_2F_Y__SIZE 0x00000010
+#define NV34TCL_VTX_ATTR_2I(x) (0x00001900+((x)*4))
+#define NV34TCL_VTX_ATTR_2I__SIZE 0x00000010
+#define NV34TCL_VTX_ATTR_2I_X_SHIFT 0
+#define NV34TCL_VTX_ATTR_2I_X_MASK 0x0000ffff
+#define NV34TCL_VTX_ATTR_2I_Y_SHIFT 16
+#define NV34TCL_VTX_ATTR_2I_Y_MASK 0xffff0000
+#define NV34TCL_VTX_ATTR_4UB(x) (0x00001940+((x)*4))
+#define NV34TCL_VTX_ATTR_4UB__SIZE 0x00000010
+#define NV34TCL_VTX_ATTR_4UB_X_SHIFT 0
+#define NV34TCL_VTX_ATTR_4UB_X_MASK 0x000000ff
+#define NV34TCL_VTX_ATTR_4UB_Y_SHIFT 8
+#define NV34TCL_VTX_ATTR_4UB_Y_MASK 0x0000ff00
+#define NV34TCL_VTX_ATTR_4UB_Z_SHIFT 16
+#define NV34TCL_VTX_ATTR_4UB_Z_MASK 0x00ff0000
+#define NV34TCL_VTX_ATTR_4UB_W_SHIFT 24
+#define NV34TCL_VTX_ATTR_4UB_W_MASK 0xff000000
+#define NV34TCL_VTX_ATTR_4I_XY(x) (0x00001980+((x)*8))
+#define NV34TCL_VTX_ATTR_4I_XY__SIZE 0x00000010
+#define NV34TCL_VTX_ATTR_4I_XY_X_SHIFT 0
+#define NV34TCL_VTX_ATTR_4I_XY_X_MASK 0x0000ffff
+#define NV34TCL_VTX_ATTR_4I_XY_Y_SHIFT 16
+#define NV34TCL_VTX_ATTR_4I_XY_Y_MASK 0xffff0000
+#define NV34TCL_VTX_ATTR_4I_ZW(x) (0x00001984+((x)*8))
+#define NV34TCL_VTX_ATTR_4I_ZW__SIZE 0x00000010
+#define NV34TCL_VTX_ATTR_4I_ZW_Z_SHIFT 0
+#define NV34TCL_VTX_ATTR_4I_ZW_Z_MASK 0x0000ffff
+#define NV34TCL_VTX_ATTR_4I_ZW_W_SHIFT 16
+#define NV34TCL_VTX_ATTR_4I_ZW_W_MASK 0xffff0000
+#define NV34TCL_TX_OFFSET(x) (0x00001a00+((x)*32))
+#define NV34TCL_TX_OFFSET__SIZE 0x00000004
+#define NV34TCL_TX_FORMAT(x) (0x00001a04+((x)*32))
+#define NV34TCL_TX_FORMAT__SIZE 0x00000004
+#define NV34TCL_TX_FORMAT_DMA0 (1 << 0)
+#define NV34TCL_TX_FORMAT_DMA1 (1 << 1)
+#define NV34TCL_TX_FORMAT_CUBIC (1 << 2)
+#define NV34TCL_TX_FORMAT_NO_BORDER (1 << 3)
+#define NV34TCL_TX_FORMAT_DIMS_SHIFT 4
+#define NV34TCL_TX_FORMAT_DIMS_MASK 0x000000f0
+#define NV34TCL_TX_FORMAT_DIMS_1D 0x00000010
+#define NV34TCL_TX_FORMAT_DIMS_2D 0x00000020
+#define NV34TCL_TX_FORMAT_DIMS_3D 0x00000030
+#define NV34TCL_TX_FORMAT_FORMAT_SHIFT 8
+#define NV34TCL_TX_FORMAT_FORMAT_MASK 0x0000ff00
+#define NV34TCL_TX_FORMAT_FORMAT_L8 0x00000000
+#define NV34TCL_TX_FORMAT_FORMAT_A8 0x00000100
+#define NV34TCL_TX_FORMAT_FORMAT_A1R5G5B5 0x00000200
+#define NV34TCL_TX_FORMAT_FORMAT_A8_RECT 0x00000300
+#define NV34TCL_TX_FORMAT_FORMAT_A4R4G4B4 0x00000400
+#define NV34TCL_TX_FORMAT_FORMAT_R5G6B5 0x00000500
+#define NV34TCL_TX_FORMAT_FORMAT_A8R8G8B8 0x00000600
+#define NV34TCL_TX_FORMAT_FORMAT_X8R8G8B8 0x00000700
+#define NV34TCL_TX_FORMAT_FORMAT_INDEX8 0x00000b00
+#define NV34TCL_TX_FORMAT_FORMAT_DXT1 0x00000c00
+#define NV34TCL_TX_FORMAT_FORMAT_DXT3 0x00000e00
+#define NV34TCL_TX_FORMAT_FORMAT_DXT5 0x00000f00
+#define NV34TCL_TX_FORMAT_FORMAT_A1R5G5B5_RECT 0x00001000
+#define NV34TCL_TX_FORMAT_FORMAT_R5G6B5_RECT 0x00001100
+#define NV34TCL_TX_FORMAT_FORMAT_A8R8G8B8_RECT 0x00001200
+#define NV34TCL_TX_FORMAT_FORMAT_L8_RECT 0x00001300
+#define NV34TCL_TX_FORMAT_FORMAT_A8L8 0x00001a00
+#define NV34TCL_TX_FORMAT_FORMAT_A8_RECT2 0x00001b00
+#define NV34TCL_TX_FORMAT_FORMAT_A4R4G4B4_RECT 0x00001d00
+#define NV34TCL_TX_FORMAT_FORMAT_R8G8B8_RECT 0x00001e00
+#define NV34TCL_TX_FORMAT_FORMAT_L8A8_RECT 0x00002000
+#define NV34TCL_TX_FORMAT_FORMAT_DSDT 0x00002800
+#define NV34TCL_TX_FORMAT_FORMAT_A16 0x00003200
+#define NV34TCL_TX_FORMAT_FORMAT_HILO16 0x00003300
+#define NV34TCL_TX_FORMAT_FORMAT_A16_RECT 0x00003500
+#define NV34TCL_TX_FORMAT_FORMAT_HILO16_RECT 0x00003600
+#define NV34TCL_TX_FORMAT_FORMAT_HILO8 0x00004400
+#define NV34TCL_TX_FORMAT_FORMAT_SIGNED_HILO8 0x00004500
+#define NV34TCL_TX_FORMAT_FORMAT_HILO8_RECT 0x00004600
+#define NV34TCL_TX_FORMAT_FORMAT_SIGNED_HILO8_RECT 0x00004700
+#define NV34TCL_TX_FORMAT_FORMAT_FLOAT_RGBA16_NV 0x00004a00
+#define NV34TCL_TX_FORMAT_FORMAT_FLOAT_RGBA32_NV 0x00004b00
+#define NV34TCL_TX_FORMAT_FORMAT_FLOAT_R32_NV 0x00004c00
+#define NV34TCL_TX_FORMAT_MIPMAP (1 << 19)
+#define NV34TCL_TX_FORMAT_BASE_SIZE_U_SHIFT 20
+#define NV34TCL_TX_FORMAT_BASE_SIZE_U_MASK 0x00f00000
+#define NV34TCL_TX_FORMAT_BASE_SIZE_V_SHIFT 24
+#define NV34TCL_TX_FORMAT_BASE_SIZE_V_MASK 0x0f000000
+#define NV34TCL_TX_FORMAT_BASE_SIZE_W_SHIFT 28
+#define NV34TCL_TX_FORMAT_BASE_SIZE_W_MASK 0xf0000000
+#define NV34TCL_TX_WRAP(x) (0x00001a08+((x)*32))
+#define NV34TCL_TX_WRAP__SIZE 0x00000004
+#define NV34TCL_TX_WRAP_S_SHIFT 0
+#define NV34TCL_TX_WRAP_S_MASK 0x000000ff
+#define NV34TCL_TX_WRAP_S_REPEAT 0x00000001
+#define NV34TCL_TX_WRAP_S_MIRRORED_REPEAT 0x00000002
+#define NV34TCL_TX_WRAP_S_CLAMP_TO_EDGE 0x00000003
+#define NV34TCL_TX_WRAP_S_CLAMP_TO_BORDER 0x00000004
+#define NV34TCL_TX_WRAP_S_CLAMP 0x00000005
+#define NV34TCL_TX_WRAP_T_SHIFT 8
+#define NV34TCL_TX_WRAP_T_MASK 0x00000f00
+#define NV34TCL_TX_WRAP_T_REPEAT 0x00000100
+#define NV34TCL_TX_WRAP_T_MIRRORED_REPEAT 0x00000200
+#define NV34TCL_TX_WRAP_T_CLAMP_TO_EDGE 0x00000300
+#define NV34TCL_TX_WRAP_T_CLAMP_TO_BORDER 0x00000400
+#define NV34TCL_TX_WRAP_T_CLAMP 0x00000500
+#define NV34TCL_TX_WRAP_EXPAND_NORMAL_SHIFT 12
+#define NV34TCL_TX_WRAP_EXPAND_NORMAL_MASK 0x0000f000
+#define NV34TCL_TX_WRAP_R_SHIFT 16
+#define NV34TCL_TX_WRAP_R_MASK 0x000f0000
+#define NV34TCL_TX_WRAP_R_REPEAT 0x00010000
+#define NV34TCL_TX_WRAP_R_MIRRORED_REPEAT 0x00020000
+#define NV34TCL_TX_WRAP_R_CLAMP_TO_EDGE 0x00030000
+#define NV34TCL_TX_WRAP_R_CLAMP_TO_BORDER 0x00040000
+#define NV34TCL_TX_WRAP_R_CLAMP 0x00050000
+#define NV34TCL_TX_WRAP_RCOMP_SHIFT 28
+#define NV34TCL_TX_WRAP_RCOMP_MASK 0xf0000000
+#define NV34TCL_TX_WRAP_RCOMP_NEVER 0x00000000
+#define NV34TCL_TX_WRAP_RCOMP_GREATER 0x10000000
+#define NV34TCL_TX_WRAP_RCOMP_EQUAL 0x20000000
+#define NV34TCL_TX_WRAP_RCOMP_GEQUAL 0x30000000
+#define NV34TCL_TX_WRAP_RCOMP_LESS 0x40000000
+#define NV34TCL_TX_WRAP_RCOMP_NOTEQUAL 0x50000000
+#define NV34TCL_TX_WRAP_RCOMP_LEQUAL 0x60000000
+#define NV34TCL_TX_WRAP_RCOMP_ALWAYS 0x70000000
+#define NV34TCL_TX_ENABLE(x) (0x00001a0c+((x)*32))
+#define NV34TCL_TX_ENABLE__SIZE 0x00000004
+#define NV34TCL_TX_ENABLE_ANISO_SHIFT 4
+#define NV34TCL_TX_ENABLE_ANISO_MASK 0x00000030
+#define NV34TCL_TX_ENABLE_ANISO_NONE 0x00000000
+#define NV34TCL_TX_ENABLE_ANISO_2X 0x00000010
+#define NV34TCL_TX_ENABLE_ANISO_4X 0x00000020
+#define NV34TCL_TX_ENABLE_ANISO_8X 0x00000030
+#define NV34TCL_TX_ENABLE_MIPMAP_MAX_LOD_SHIFT 14
+#define NV34TCL_TX_ENABLE_MIPMAP_MAX_LOD_MASK 0x0003c000
+#define NV34TCL_TX_ENABLE_MIPMAP_MIN_LOD_SHIFT 26
+#define NV34TCL_TX_ENABLE_MIPMAP_MIN_LOD_MASK 0x3c000000
+#define NV34TCL_TX_ENABLE_ENABLE (1 << 30)
+#define NV34TCL_TX_SWIZZLE(x) (0x00001a10+((x)*32))
+#define NV34TCL_TX_SWIZZLE__SIZE 0x00000004
+#define NV34TCL_TX_SWIZZLE_S0_X_SHIFT 14
+#define NV34TCL_TX_SWIZZLE_S0_X_MASK 0x0000c000
+#define NV34TCL_TX_SWIZZLE_S0_X_ZERO 0x00000000
+#define NV34TCL_TX_SWIZZLE_S0_X_ONE 0x00004000
+#define NV34TCL_TX_SWIZZLE_S0_X_S1 0x00008000
+#define NV34TCL_TX_SWIZZLE_S0_Y_SHIFT 12
+#define NV34TCL_TX_SWIZZLE_S0_Y_MASK 0x00003000
+#define NV34TCL_TX_SWIZZLE_S0_Y_ZERO 0x00000000
+#define NV34TCL_TX_SWIZZLE_S0_Y_ONE 0x00001000
+#define NV34TCL_TX_SWIZZLE_S0_Y_S1 0x00002000
+#define NV34TCL_TX_SWIZZLE_S0_Z_SHIFT 10
+#define NV34TCL_TX_SWIZZLE_S0_Z_MASK 0x00000c00
+#define NV34TCL_TX_SWIZZLE_S0_Z_ZERO 0x00000000
+#define NV34TCL_TX_SWIZZLE_S0_Z_ONE 0x00000400
+#define NV34TCL_TX_SWIZZLE_S0_Z_S1 0x00000800
+#define NV34TCL_TX_SWIZZLE_S0_W_SHIFT 8
+#define NV34TCL_TX_SWIZZLE_S0_W_MASK 0x00000300
+#define NV34TCL_TX_SWIZZLE_S0_W_ZERO 0x00000000
+#define NV34TCL_TX_SWIZZLE_S0_W_ONE 0x00000100
+#define NV34TCL_TX_SWIZZLE_S0_W_S1 0x00000200
+#define NV34TCL_TX_SWIZZLE_S1_X_SHIFT 6
+#define NV34TCL_TX_SWIZZLE_S1_X_MASK 0x000000c0
+#define NV34TCL_TX_SWIZZLE_S1_X_W 0x00000000
+#define NV34TCL_TX_SWIZZLE_S1_X_Z 0x00000040
+#define NV34TCL_TX_SWIZZLE_S1_X_Y 0x00000080
+#define NV34TCL_TX_SWIZZLE_S1_X_X 0x000000c0
+#define NV34TCL_TX_SWIZZLE_S1_Y_SHIFT 4
+#define NV34TCL_TX_SWIZZLE_S1_Y_MASK 0x00000030
+#define NV34TCL_TX_SWIZZLE_S1_Y_W 0x00000000
+#define NV34TCL_TX_SWIZZLE_S1_Y_Z 0x00000010
+#define NV34TCL_TX_SWIZZLE_S1_Y_Y 0x00000020
+#define NV34TCL_TX_SWIZZLE_S1_Y_X 0x00000030
+#define NV34TCL_TX_SWIZZLE_S1_Z_SHIFT 2
+#define NV34TCL_TX_SWIZZLE_S1_Z_MASK 0x0000000c
+#define NV34TCL_TX_SWIZZLE_S1_Z_W 0x00000000
+#define NV34TCL_TX_SWIZZLE_S1_Z_Z 0x00000004
+#define NV34TCL_TX_SWIZZLE_S1_Z_Y 0x00000008
+#define NV34TCL_TX_SWIZZLE_S1_Z_X 0x0000000c
+#define NV34TCL_TX_SWIZZLE_S1_W_SHIFT 0
+#define NV34TCL_TX_SWIZZLE_S1_W_MASK 0x00000003
+#define NV34TCL_TX_SWIZZLE_S1_W_W 0x00000000
+#define NV34TCL_TX_SWIZZLE_S1_W_Z 0x00000001
+#define NV34TCL_TX_SWIZZLE_S1_W_Y 0x00000002
+#define NV34TCL_TX_SWIZZLE_S1_W_X 0x00000003
+#define NV34TCL_TX_SWIZZLE_RECT_PITCH_SHIFT 16
+#define NV34TCL_TX_SWIZZLE_RECT_PITCH_MASK 0xffff0000
+#define NV34TCL_TX_FILTER(x) (0x00001a14+((x)*32))
+#define NV34TCL_TX_FILTER__SIZE 0x00000004
+#define NV34TCL_TX_FILTER_LOD_BIAS_SHIFT 8
+#define NV34TCL_TX_FILTER_LOD_BIAS_MASK 0x00000f00
+#define NV34TCL_TX_FILTER_MINIFY_SHIFT 16
+#define NV34TCL_TX_FILTER_MINIFY_MASK 0x000f0000
+#define NV34TCL_TX_FILTER_MINIFY_NEAREST 0x00010000
+#define NV34TCL_TX_FILTER_MINIFY_LINEAR 0x00020000
+#define NV34TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST 0x00030000
+#define NV34TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST 0x00040000
+#define NV34TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR 0x00050000
+#define NV34TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR 0x00060000
+#define NV34TCL_TX_FILTER_MAGNIFY_SHIFT 24
+#define NV34TCL_TX_FILTER_MAGNIFY_MASK 0x0f000000
+#define NV34TCL_TX_FILTER_MAGNIFY_NEAREST 0x01000000
+#define NV34TCL_TX_FILTER_MAGNIFY_LINEAR 0x02000000
+#define NV34TCL_TX_FILTER_SIGNED_BLUE (1 << 28)
+#define NV34TCL_TX_FILTER_SIGNED_GREEN (1 << 29)
+#define NV34TCL_TX_FILTER_SIGNED_RED (1 << 30)
+#define NV34TCL_TX_FILTER_SIGNED_ALPHA (1 << 31)
+#define NV34TCL_TX_NPOT_SIZE(x) (0x00001a18+((x)*32))
+#define NV34TCL_TX_NPOT_SIZE__SIZE 0x00000004
+#define NV34TCL_TX_NPOT_SIZE_H_SHIFT 0
+#define NV34TCL_TX_NPOT_SIZE_H_MASK 0x0000ffff
+#define NV34TCL_TX_NPOT_SIZE_W_SHIFT 16
+#define NV34TCL_TX_NPOT_SIZE_W_MASK 0xffff0000
+#define NV34TCL_TX_BORDER_COLOR(x) (0x00001a1c+((x)*32))
+#define NV34TCL_TX_BORDER_COLOR__SIZE 0x00000004
+#define NV34TCL_TX_BORDER_COLOR_B_SHIFT 0
+#define NV34TCL_TX_BORDER_COLOR_B_MASK 0x000000ff
+#define NV34TCL_TX_BORDER_COLOR_G_SHIFT 8
+#define NV34TCL_TX_BORDER_COLOR_G_MASK 0x0000ff00
+#define NV34TCL_TX_BORDER_COLOR_R_SHIFT 16
+#define NV34TCL_TX_BORDER_COLOR_R_MASK 0x00ff0000
+#define NV34TCL_TX_BORDER_COLOR_A_SHIFT 24
+#define NV34TCL_TX_BORDER_COLOR_A_MASK 0xff000000
+#define NV34TCL_VTX_ATTR_4F_X(x) (0x00001c00+((x)*16))
+#define NV34TCL_VTX_ATTR_4F_X__SIZE 0x00000010
+#define NV34TCL_VTX_ATTR_4F_Y(x) (0x00001c04+((x)*16))
+#define NV34TCL_VTX_ATTR_4F_Y__SIZE 0x00000010
+#define NV34TCL_VTX_ATTR_4F_Z(x) (0x00001c08+((x)*16))
+#define NV34TCL_VTX_ATTR_4F_Z__SIZE 0x00000010
+#define NV34TCL_VTX_ATTR_4F_W(x) (0x00001c0c+((x)*16))
+#define NV34TCL_VTX_ATTR_4F_W__SIZE 0x00000010
+#define NV34TCL_FP_CONTROL 0x00001d60
+#define NV34TCL_FP_CONTROL_USES_KIL (1 << 7)
+#define NV34TCL_FP_CONTROL_USED_REGS_MINUS1_DIV2_SHIFT 0
+#define NV34TCL_FP_CONTROL_USED_REGS_MINUS1_DIV2_MASK 0x0000000f
+#define NV34TCL_DEPTH_UNK17D8 0x00001d78
+#define NV34TCL_DEPTH_UNK17D8_CLAMP_SHIFT 4
+#define NV34TCL_DEPTH_UNK17D8_CLAMP_MASK 0x000000f0
+#define NV34TCL_MULTISAMPLE_CONTROL 0x00001d7c
+#define NV34TCL_MULTISAMPLE_CONTROL_ENABLE (1 << 0)
+#define NV34TCL_MULTISAMPLE_CONTROL_SAMPLE_ALPHA_TO_COVERAGE (1 << 4)
+#define NV34TCL_MULTISAMPLE_CONTROL_SAMPLE_ALPHA_TO_ONE (1 << 8)
+#define NV34TCL_MULTISAMPLE_CONTROL_SAMPLE_COVERAGE_SHIFT 16
+#define NV34TCL_MULTISAMPLE_CONTROL_SAMPLE_COVERAGE_MASK 0xffff0000
+#define NV34TCL_CLEAR_DEPTH_VALUE 0x00001d8c
+#define NV34TCL_CLEAR_COLOR_VALUE 0x00001d90
+#define NV34TCL_CLEAR_COLOR_VALUE_B_SHIFT 0
+#define NV34TCL_CLEAR_COLOR_VALUE_B_MASK 0x000000ff
+#define NV34TCL_CLEAR_COLOR_VALUE_G_SHIFT 8
+#define NV34TCL_CLEAR_COLOR_VALUE_G_MASK 0x0000ff00
+#define NV34TCL_CLEAR_COLOR_VALUE_R_SHIFT 16
+#define NV34TCL_CLEAR_COLOR_VALUE_R_MASK 0x00ff0000
+#define NV34TCL_CLEAR_COLOR_VALUE_A_SHIFT 24
+#define NV34TCL_CLEAR_COLOR_VALUE_A_MASK 0xff000000
+#define NV34TCL_CLEAR_BUFFERS 0x00001d94
+#define NV34TCL_CLEAR_BUFFERS_COLOR_A (1 << 7)
+#define NV34TCL_CLEAR_BUFFERS_COLOR_B (1 << 6)
+#define NV34TCL_CLEAR_BUFFERS_COLOR_G (1 << 5)
+#define NV34TCL_CLEAR_BUFFERS_COLOR_R (1 << 4)
+#define NV34TCL_CLEAR_BUFFERS_STENCIL (1 << 1)
+#define NV34TCL_CLEAR_BUFFERS_DEPTH (1 << 0)
+#define NV34TCL_DO_VERTICES 0x00001dac
+#define NV34TCL_LINE_STIPPLE_ENABLE 0x00001db4
+#define NV34TCL_LINE_STIPPLE_PATTERN 0x00001db8
+#define NV34TCL_LINE_STIPPLE_PATTERN_FACTOR_SHIFT 0
+#define NV34TCL_LINE_STIPPLE_PATTERN_FACTOR_MASK 0x0000ffff
+#define NV34TCL_LINE_STIPPLE_PATTERN_PATTERN_SHIFT 16
+#define NV34TCL_LINE_STIPPLE_PATTERN_PATTERN_MASK 0xffff0000
+#define NV34TCL_BACK_MATERIAL_SHININESS(x) (0x00001e20+((x)*4))
+#define NV34TCL_BACK_MATERIAL_SHININESS__SIZE 0x00000006
+#define NV34TCL_VTX_ATTR_1F(x) (0x00001e40+((x)*4))
+#define NV34TCL_VTX_ATTR_1F__SIZE 0x00000010
+#define NV34TCL_ENGINE 0x00001e94
+#define NV34TCL_ENGINE_FP (1 << 0)
+#define NV34TCL_ENGINE_VP (1 << 1)
+#define NV34TCL_ENGINE_FIXED (1 << 2)
+#define NV34TCL_VP_UPLOAD_FROM_ID 0x00001e9c
+#define NV34TCL_VP_START_FROM_ID 0x00001ea0
+#define NV34TCL_POINT_PARAMETERS(x) (0x00001ec0+((x)*4))
+#define NV34TCL_POINT_PARAMETERS__SIZE 0x00000008
+#define NV34TCL_POINT_SIZE 0x00001ee0
+#define NV34TCL_POINT_PARAMETERS_ENABLE 0x00001ee4
+#define NV34TCL_POINT_SPRITE 0x00001ee8
+#define NV34TCL_POINT_SPRITE_ENABLE (1 << 0)
+#define NV34TCL_POINT_SPRITE_R_MODE_SHIFT 1
+#define NV34TCL_POINT_SPRITE_R_MODE_MASK 0x00000006
+#define NV34TCL_POINT_SPRITE_R_MODE_ZERO 0x00000000
+#define NV34TCL_POINT_SPRITE_R_MODE_R 0x00000002
+#define NV34TCL_POINT_SPRITE_R_MODE_S 0x00000004
+#define NV34TCL_POINT_SPRITE_COORD_REPLACE (1 << 11)
+#define NV34TCL_VP_UPLOAD_CONST_ID 0x00001efc
+#define NV34TCL_VP_UPLOAD_CONST_X(x) (0x00001f00+((x)*16))
+#define NV34TCL_VP_UPLOAD_CONST_X__SIZE 0x00000004
+#define NV34TCL_VP_UPLOAD_CONST_Y(x) (0x00001f04+((x)*16))
+#define NV34TCL_VP_UPLOAD_CONST_Y__SIZE 0x00000004
+#define NV34TCL_VP_UPLOAD_CONST_Z(x) (0x00001f08+((x)*16))
+#define NV34TCL_VP_UPLOAD_CONST_Z__SIZE 0x00000004
+#define NV34TCL_VP_UPLOAD_CONST_W(x) (0x00001f0c+((x)*16))
+#define NV34TCL_VP_UPLOAD_CONST_W__SIZE 0x00000004
+#define NV34TCL_UNK1f80(x) (0x00001f80+((x)*4))
+#define NV34TCL_UNK1f80__SIZE 0x00000010
+
+
+#define NV40_CONTEXT_SURFACES_2D 0x00003062
+
+
+
+#define NV40_STRETCHED_IMAGE_FROM_CPU 0x00003066
+
+
+
+#define NV40_TEXTURE_FROM_CPU 0x0000307b
+
+
+
+#define NV40_SCALED_IMAGE_FROM_MEMORY 0x00003089
+
+
+
+#define NV40_IMAGE_FROM_CPU 0x0000308a
+
+
+
+#define NV40_SWIZZLED_SURFACE 0x0000309e
+
+
+
+#define NV40TCL 0x00004097
+
+#define NV40TCL_REF_CNT 0x00000050
+#define NV40TCL_NOP 0x00000100
+#define NV40TCL_NOTIFY 0x00000104
+#define NV40TCL_DMA_NOTIFY 0x00000180
+#define NV40TCL_DMA_TEXTURE0 0x00000184
+#define NV40TCL_DMA_TEXTURE1 0x00000188
+#define NV40TCL_DMA_COLOR1 0x0000018c
+#define NV40TCL_DMA_COLOR0 0x00000194
+#define NV40TCL_DMA_ZETA 0x00000198
+#define NV40TCL_DMA_VTXBUF0 0x0000019c
+#define NV40TCL_DMA_VTXBUF1 0x000001a0
+#define NV40TCL_DMA_FENCE 0x000001a4
+#define NV40TCL_DMA_QUERY 0x000001a8
+#define NV40TCL_DMA_UNK01AC 0x000001ac
+#define NV40TCL_DMA_UNK01B0 0x000001b0
+#define NV40TCL_DMA_COLOR2 0x000001b4
+#define NV40TCL_DMA_COLOR3 0x000001b8
+#define NV40TCL_RT_HORIZ 0x00000200
+#define NV40TCL_RT_HORIZ_W_SHIFT 16
+#define NV40TCL_RT_HORIZ_W_MASK 0xffff0000
+#define NV40TCL_RT_HORIZ_X_SHIFT 0
+#define NV40TCL_RT_HORIZ_X_MASK 0x0000ffff
+#define NV40TCL_RT_VERT 0x00000204
+#define NV40TCL_RT_VERT_H_SHIFT 16
+#define NV40TCL_RT_VERT_H_MASK 0xffff0000
+#define NV40TCL_RT_VERT_Y_SHIFT 0
+#define NV40TCL_RT_VERT_Y_MASK 0x0000ffff
+#define NV40TCL_RT_FORMAT 0x00000208
+#define NV40TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT 24
+#define NV40TCL_RT_FORMAT_LOG2_HEIGHT_MASK 0xff000000
+#define NV40TCL_RT_FORMAT_LOG2_WIDTH_SHIFT 16
+#define NV40TCL_RT_FORMAT_LOG2_WIDTH_MASK 0x00ff0000
+#define NV40TCL_RT_FORMAT_TYPE_SHIFT 8
+#define NV40TCL_RT_FORMAT_TYPE_MASK 0x00000f00
+#define NV40TCL_RT_FORMAT_TYPE_LINEAR 0x00000100
+#define NV40TCL_RT_FORMAT_TYPE_SWIZZLED 0x00000200
+#define NV40TCL_RT_FORMAT_ZETA_SHIFT 5
+#define NV40TCL_RT_FORMAT_ZETA_MASK 0x000000e0
+#define NV40TCL_RT_FORMAT_ZETA_Z16 0x00000020
+#define NV40TCL_RT_FORMAT_ZETA_Z24S8 0x00000040
+#define NV40TCL_RT_FORMAT_COLOR_SHIFT 0
+#define NV40TCL_RT_FORMAT_COLOR_MASK 0x0000001f
+#define NV40TCL_RT_FORMAT_COLOR_R5G6B5 0x00000003
+#define NV40TCL_RT_FORMAT_COLOR_X8R8G8B8 0x00000005
+#define NV40TCL_RT_FORMAT_COLOR_A8R8G8B8 0x00000008
+#define NV40TCL_RT_FORMAT_COLOR_B8 0x00000009
+#define NV40TCL_RT_FORMAT_COLOR_UNKNOWN 0x0000000d
+#define NV40TCL_RT_FORMAT_COLOR_X8B8G8R8 0x0000000f
+#define NV40TCL_RT_FORMAT_COLOR_A8B8G8R8 0x00000010
+#define NV40TCL_COLOR0_PITCH 0x0000020c
+#define NV40TCL_COLOR0_OFFSET 0x00000210
+#define NV40TCL_ZETA_OFFSET 0x00000214
+#define NV40TCL_COLOR1_OFFSET 0x00000218
+#define NV40TCL_COLOR1_PITCH 0x0000021c
+#define NV40TCL_RT_ENABLE 0x00000220
+#define NV40TCL_RT_ENABLE_MRT (1 << 4)
+#define NV40TCL_RT_ENABLE_COLOR3 (1 << 3)
+#define NV40TCL_RT_ENABLE_COLOR2 (1 << 2)
+#define NV40TCL_RT_ENABLE_COLOR1 (1 << 1)
+#define NV40TCL_RT_ENABLE_COLOR0 (1 << 0)
+#define NV40TCL_ZETA_PITCH 0x0000022c
+#define NV40TCL_COLOR2_PITCH 0x00000280
+#define NV40TCL_COLOR3_PITCH 0x00000284
+#define NV40TCL_COLOR2_OFFSET 0x00000288
+#define NV40TCL_COLOR3_OFFSET 0x0000028c
+#define NV40TCL_VIEWPORT_CLIP_HORIZ(x) (0x000002c0+((x)*8))
+#define NV40TCL_VIEWPORT_CLIP_HORIZ__SIZE 0x00000008
+#define NV40TCL_VIEWPORT_CLIP_VERT(x) (0x000002c4+((x)*8))
+#define NV40TCL_VIEWPORT_CLIP_VERT__SIZE 0x00000008
+#define NV40TCL_DITHER_ENABLE 0x00000300
+#define NV40TCL_ALPHA_TEST_ENABLE 0x00000304
+#define NV40TCL_ALPHA_TEST_FUNC 0x00000308
+#define NV40TCL_ALPHA_TEST_FUNC_NEVER 0x00000200
+#define NV40TCL_ALPHA_TEST_FUNC_LESS 0x00000201
+#define NV40TCL_ALPHA_TEST_FUNC_EQUAL 0x00000202
+#define NV40TCL_ALPHA_TEST_FUNC_LEQUAL 0x00000203
+#define NV40TCL_ALPHA_TEST_FUNC_GREATER 0x00000204
+#define NV40TCL_ALPHA_TEST_FUNC_GREATER 0x00000204
+#define NV40TCL_ALPHA_TEST_FUNC_NOTEQUAL 0x00000205
+#define NV40TCL_ALPHA_TEST_FUNC_GEQUAL 0x00000206
+#define NV40TCL_ALPHA_TEST_FUNC_ALWAYS 0x00000207
+#define NV40TCL_ALPHA_TEST_REF 0x0000030c
+#define NV40TCL_BLEND_ENABLE 0x00000310
+#define NV40TCL_BLEND_FUNC_SRC 0x00000314
+#define NV40TCL_BLEND_FUNC_SRC_RGB_SHIFT 0
+#define NV40TCL_BLEND_FUNC_SRC_RGB_MASK 0x0000ffff
+#define NV40TCL_BLEND_FUNC_SRC_RGB_ZERO 0x00000000
+#define NV40TCL_BLEND_FUNC_SRC_RGB_ONE 0x00000001
+#define NV40TCL_BLEND_FUNC_SRC_RGB_SRC_COLOR 0x00000300
+#define NV40TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC_COLOR 0x00000301
+#define NV40TCL_BLEND_FUNC_SRC_RGB_SRC_ALPHA 0x00000302
+#define NV40TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC_ALPHA 0x00000303
+#define NV40TCL_BLEND_FUNC_SRC_RGB_DST_ALPHA 0x00000304
+#define NV40TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_DST_ALPHA 0x00000305
+#define NV40TCL_BLEND_FUNC_SRC_RGB_DST_COLOR 0x00000306
+#define NV40TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_DST_COLOR 0x00000307
+#define NV40TCL_BLEND_FUNC_SRC_RGB_SRC_ALPHA_SATURATE 0x00000308
+#define NV40TCL_BLEND_FUNC_SRC_RGB_CONSTANT_COLOR 0x00008001
+#define NV40TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_CONSTANT_COLOR 0x00008002
+#define NV40TCL_BLEND_FUNC_SRC_RGB_CONSTANT_ALPHA 0x00008003
+#define NV40TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_CONSTANT_ALPHA 0x00008004
+#define NV40TCL_BLEND_FUNC_SRC_ALPHA_SHIFT 16
+#define NV40TCL_BLEND_FUNC_SRC_ALPHA_MASK 0xffff0000
+#define NV40TCL_BLEND_FUNC_SRC_ALPHA_ZERO 0x00000000
+#define NV40TCL_BLEND_FUNC_SRC_ALPHA_ONE 0x00010000
+#define NV40TCL_BLEND_FUNC_SRC_ALPHA_SRC_COLOR 0x03000000
+#define NV40TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_SRC_COLOR 0x03010000
+#define NV40TCL_BLEND_FUNC_SRC_ALPHA_SRC_ALPHA 0x03020000
+#define NV40TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_SRC_ALPHA 0x03030000
+#define NV40TCL_BLEND_FUNC_SRC_ALPHA_DST_ALPHA 0x03040000
+#define NV40TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_DST_ALPHA 0x03050000
+#define NV40TCL_BLEND_FUNC_SRC_ALPHA_DST_COLOR 0x03060000
+#define NV40TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_DST_COLOR 0x03070000
+#define NV40TCL_BLEND_FUNC_SRC_ALPHA_SRC_ALPHA_SATURATE 0x03080000
+#define NV40TCL_BLEND_FUNC_SRC_ALPHA_CONSTANT_COLOR 0x80010000
+#define NV40TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_CONSTANT_COLOR 0x80020000
+#define NV40TCL_BLEND_FUNC_SRC_ALPHA_CONSTANT_ALPHA 0x80030000
+#define NV40TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_CONSTANT_ALPHA 0x80040000
+#define NV40TCL_BLEND_FUNC_DST 0x00000318
+#define NV40TCL_BLEND_FUNC_DST_RGB_SHIFT 0
+#define NV40TCL_BLEND_FUNC_DST_RGB_MASK 0x0000ffff
+#define NV40TCL_BLEND_FUNC_DST_RGB_ZERO 0x00000000
+#define NV40TCL_BLEND_FUNC_DST_RGB_ONE 0x00000001
+#define NV40TCL_BLEND_FUNC_DST_RGB_SRC_COLOR 0x00000300
+#define NV40TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_SRC_COLOR 0x00000301
+#define NV40TCL_BLEND_FUNC_DST_RGB_SRC_ALPHA 0x00000302
+#define NV40TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_SRC_ALPHA 0x00000303
+#define NV40TCL_BLEND_FUNC_DST_RGB_DST_ALPHA 0x00000304
+#define NV40TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_DST_ALPHA 0x00000305
+#define NV40TCL_BLEND_FUNC_DST_RGB_DST_COLOR 0x00000306
+#define NV40TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_DST_COLOR 0x00000307
+#define NV40TCL_BLEND_FUNC_DST_RGB_SRC_ALPHA_SATURATE 0x00000308
+#define NV40TCL_BLEND_FUNC_DST_RGB_CONSTANT_COLOR 0x00008001
+#define NV40TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_CONSTANT_COLOR 0x00008002
+#define NV40TCL_BLEND_FUNC_DST_RGB_CONSTANT_ALPHA 0x00008003
+#define NV40TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_CONSTANT_ALPHA 0x00008004
+#define NV40TCL_BLEND_FUNC_DST_ALPHA_SHIFT 16
+#define NV40TCL_BLEND_FUNC_DST_ALPHA_MASK 0xffff0000
+#define NV40TCL_BLEND_FUNC_DST_ALPHA_ZERO 0x00000000
+#define NV40TCL_BLEND_FUNC_DST_ALPHA_ONE 0x00010000
+#define NV40TCL_BLEND_FUNC_DST_ALPHA_SRC_COLOR 0x03000000
+#define NV40TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_SRC_COLOR 0x03010000
+#define NV40TCL_BLEND_FUNC_DST_ALPHA_SRC_ALPHA 0x03020000
+#define NV40TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_SRC_ALPHA 0x03030000
+#define NV40TCL_BLEND_FUNC_DST_ALPHA_DST_ALPHA 0x03040000
+#define NV40TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_DST_ALPHA 0x03050000
+#define NV40TCL_BLEND_FUNC_DST_ALPHA_DST_COLOR 0x03060000
+#define NV40TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_DST_COLOR 0x03070000
+#define NV40TCL_BLEND_FUNC_DST_ALPHA_SRC_ALPHA_SATURATE 0x03080000
+#define NV40TCL_BLEND_FUNC_DST_ALPHA_CONSTANT_COLOR 0x80010000
+#define NV40TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_CONSTANT_COLOR 0x80020000
+#define NV40TCL_BLEND_FUNC_DST_ALPHA_CONSTANT_ALPHA 0x80030000
+#define NV40TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_CONSTANT_ALPHA 0x80040000
+#define NV40TCL_BLEND_COLOR 0x0000031c
+#define NV40TCL_BLEND_EQUATION 0x00000320
+#define NV40TCL_BLEND_EQUATION_RGB_SHIFT 0
+#define NV40TCL_BLEND_EQUATION_RGB_MASK 0x0000ffff
+#define NV40TCL_BLEND_EQUATION_RGB_FUNC_ADD 0x00008006
+#define NV40TCL_BLEND_EQUATION_RGB_MIN 0x00008007
+#define NV40TCL_BLEND_EQUATION_RGB_MAX 0x00008008
+#define NV40TCL_BLEND_EQUATION_RGB_FUNC_SUBTRACT 0x0000800a
+#define NV40TCL_BLEND_EQUATION_RGB_FUNC_REVERSE_SUBTRACT 0x0000800b
+#define NV40TCL_BLEND_EQUATION_ALPHA_SHIFT 16
+#define NV40TCL_BLEND_EQUATION_ALPHA_MASK 0xffff0000
+#define NV40TCL_BLEND_EQUATION_ALPHA_FUNC_ADD 0x80060000
+#define NV40TCL_BLEND_EQUATION_ALPHA_MIN 0x80070000
+#define NV40TCL_BLEND_EQUATION_ALPHA_MAX 0x80080000
+#define NV40TCL_BLEND_EQUATION_ALPHA_FUNC_SUBTRACT 0x800a0000
+#define NV40TCL_BLEND_EQUATION_ALPHA_FUNC_REVERSE_SUBTRACT 0x800b0000
+#define NV40TCL_COLOR_MASK 0x00000324
+#define NV40TCL_COLOR_MASK_BUFFER0_B_SHIFT 0
+#define NV40TCL_COLOR_MASK_BUFFER0_B_MASK 0x000000ff
+#define NV40TCL_COLOR_MASK_BUFFER0_G_SHIFT 8
+#define NV40TCL_COLOR_MASK_BUFFER0_G_MASK 0x0000ff00
+#define NV40TCL_COLOR_MASK_BUFFER0_R_SHIFT 16
+#define NV40TCL_COLOR_MASK_BUFFER0_R_MASK 0x00ff0000
+#define NV40TCL_COLOR_MASK_BUFFER0_A_SHIFT 24
+#define NV40TCL_COLOR_MASK_BUFFER0_A_MASK 0xff000000
+#define NV40TCL_STENCIL_FRONT_ENABLE 0x00000328
+#define NV40TCL_STENCIL_FRONT_MASK 0x0000032c
+#define NV40TCL_STENCIL_FRONT_FUNC_FUNC 0x00000330
+#define NV40TCL_STENCIL_FRONT_FUNC_FUNC_NEVER 0x00000200
+#define NV40TCL_STENCIL_FRONT_FUNC_FUNC_LESS 0x00000201
+#define NV40TCL_STENCIL_FRONT_FUNC_FUNC_EQUAL 0x00000202
+#define NV40TCL_STENCIL_FRONT_FUNC_FUNC_LEQUAL 0x00000203
+#define NV40TCL_STENCIL_FRONT_FUNC_FUNC_GREATER 0x00000204
+#define NV40TCL_STENCIL_FRONT_FUNC_FUNC_GREATER 0x00000204
+#define NV40TCL_STENCIL_FRONT_FUNC_FUNC_NOTEQUAL 0x00000205
+#define NV40TCL_STENCIL_FRONT_FUNC_FUNC_GEQUAL 0x00000206
+#define NV40TCL_STENCIL_FRONT_FUNC_FUNC_ALWAYS 0x00000207
+#define NV40TCL_STENCIL_FRONT_FUNC_REF 0x00000334
+#define NV40TCL_STENCIL_FRONT_FUNC_MASK 0x00000338
+#define NV40TCL_STENCIL_FRONT_OP_FAIL 0x0000033c
+#define NV40TCL_STENCIL_FRONT_OP_FAIL_ZERO 0x00000000
+#define NV40TCL_STENCIL_FRONT_OP_FAIL_INVERT 0x0000150a
+#define NV40TCL_STENCIL_FRONT_OP_FAIL_KEEP 0x00001e00
+#define NV40TCL_STENCIL_FRONT_OP_FAIL_REPLACE 0x00001e01
+#define NV40TCL_STENCIL_FRONT_OP_FAIL_INCR 0x00001e02
+#define NV40TCL_STENCIL_FRONT_OP_FAIL_DECR 0x00001e03
+#define NV40TCL_STENCIL_FRONT_OP_FAIL_INCR_WRAP 0x00008507
+#define NV40TCL_STENCIL_FRONT_OP_FAIL_DECR_WRAP 0x00008508
+#define NV40TCL_STENCIL_FRONT_OP_ZFAIL 0x00000340
+#define NV40TCL_STENCIL_FRONT_OP_ZFAIL_ZERO 0x00000000
+#define NV40TCL_STENCIL_FRONT_OP_ZFAIL_INVERT 0x0000150a
+#define NV40TCL_STENCIL_FRONT_OP_ZFAIL_KEEP 0x00001e00
+#define NV40TCL_STENCIL_FRONT_OP_ZFAIL_REPLACE 0x00001e01
+#define NV40TCL_STENCIL_FRONT_OP_ZFAIL_INCR 0x00001e02
+#define NV40TCL_STENCIL_FRONT_OP_ZFAIL_DECR 0x00001e03
+#define NV40TCL_STENCIL_FRONT_OP_ZFAIL_INCR_WRAP 0x00008507
+#define NV40TCL_STENCIL_FRONT_OP_ZFAIL_DECR_WRAP 0x00008508
+#define NV40TCL_STENCIL_FRONT_OP_ZPASS 0x00000344
+#define NV40TCL_STENCIL_FRONT_OP_ZPASS_ZERO 0x00000000
+#define NV40TCL_STENCIL_FRONT_OP_ZPASS_INVERT 0x0000150a
+#define NV40TCL_STENCIL_FRONT_OP_ZPASS_KEEP 0x00001e00
+#define NV40TCL_STENCIL_FRONT_OP_ZPASS_REPLACE 0x00001e01
+#define NV40TCL_STENCIL_FRONT_OP_ZPASS_INCR 0x00001e02
+#define NV40TCL_STENCIL_FRONT_OP_ZPASS_DECR 0x00001e03
+#define NV40TCL_STENCIL_FRONT_OP_ZPASS_INCR_WRAP 0x00008507
+#define NV40TCL_STENCIL_FRONT_OP_ZPASS_DECR_WRAP 0x00008508
+#define NV40TCL_STENCIL_BACK_ENABLE 0x00000348
+#define NV40TCL_STENCIL_BACK_MASK 0x0000034c
+#define NV40TCL_STENCIL_BACK_FUNC_FUNC 0x00000350
+#define NV40TCL_STENCIL_BACK_FUNC_FUNC_NEVER 0x00000200
+#define NV40TCL_STENCIL_BACK_FUNC_FUNC_LESS 0x00000201
+#define NV40TCL_STENCIL_BACK_FUNC_FUNC_EQUAL 0x00000202
+#define NV40TCL_STENCIL_BACK_FUNC_FUNC_LEQUAL 0x00000203
+#define NV40TCL_STENCIL_BACK_FUNC_FUNC_GREATER 0x00000204
+#define NV40TCL_STENCIL_BACK_FUNC_FUNC_GREATER 0x00000204
+#define NV40TCL_STENCIL_BACK_FUNC_FUNC_NOTEQUAL 0x00000205
+#define NV40TCL_STENCIL_BACK_FUNC_FUNC_GEQUAL 0x00000206
+#define NV40TCL_STENCIL_BACK_FUNC_FUNC_ALWAYS 0x00000207
+#define NV40TCL_STENCIL_BACK_FUNC_REF 0x00000354
+#define NV40TCL_STENCIL_BACK_FUNC_MASK 0x00000358
+#define NV40TCL_STENCIL_BACK_OP_FAIL 0x0000035c
+#define NV40TCL_STENCIL_BACK_OP_FAIL_ZERO 0x00000000
+#define NV40TCL_STENCIL_BACK_OP_FAIL_INVERT 0x0000150a
+#define NV40TCL_STENCIL_BACK_OP_FAIL_KEEP 0x00001e00
+#define NV40TCL_STENCIL_BACK_OP_FAIL_REPLACE 0x00001e01
+#define NV40TCL_STENCIL_BACK_OP_FAIL_INCR 0x00001e02
+#define NV40TCL_STENCIL_BACK_OP_FAIL_DECR 0x00001e03
+#define NV40TCL_STENCIL_BACK_OP_FAIL_INCR_WRAP 0x00008507
+#define NV40TCL_STENCIL_BACK_OP_FAIL_DECR_WRAP 0x00008508
+#define NV40TCL_STENCIL_BACK_OP_ZFAIL 0x00000360
+#define NV40TCL_STENCIL_BACK_OP_ZFAIL_ZERO 0x00000000
+#define NV40TCL_STENCIL_BACK_OP_ZFAIL_INVERT 0x0000150a
+#define NV40TCL_STENCIL_BACK_OP_ZFAIL_KEEP 0x00001e00
+#define NV40TCL_STENCIL_BACK_OP_ZFAIL_REPLACE 0x00001e01
+#define NV40TCL_STENCIL_BACK_OP_ZFAIL_INCR 0x00001e02
+#define NV40TCL_STENCIL_BACK_OP_ZFAIL_DECR 0x00001e03
+#define NV40TCL_STENCIL_BACK_OP_ZFAIL_INCR_WRAP 0x00008507
+#define NV40TCL_STENCIL_BACK_OP_ZFAIL_DECR_WRAP 0x00008508
+#define NV40TCL_STENCIL_BACK_OP_ZPASS 0x00000364
+#define NV40TCL_STENCIL_BACK_OP_ZPASS_ZERO 0x00000000
+#define NV40TCL_STENCIL_BACK_OP_ZPASS_INVERT 0x0000150a
+#define NV40TCL_STENCIL_BACK_OP_ZPASS_KEEP 0x00001e00
+#define NV40TCL_STENCIL_BACK_OP_ZPASS_REPLACE 0x00001e01
+#define NV40TCL_STENCIL_BACK_OP_ZPASS_INCR 0x00001e02
+#define NV40TCL_STENCIL_BACK_OP_ZPASS_DECR 0x00001e03
+#define NV40TCL_STENCIL_BACK_OP_ZPASS_INCR_WRAP 0x00008507
+#define NV40TCL_STENCIL_BACK_OP_ZPASS_DECR_WRAP 0x00008508
+#define NV40TCL_SHADE_MODEL 0x00000368
+#define NV40TCL_SHADE_MODEL_FLAT 0x00001d00
+#define NV40TCL_SHADE_MODEL_SMOOTH 0x00001d01
+#define NV40TCL_MRT_COLOR_MASK 0x00000370
+#define NV40TCL_MRT_COLOR_MASK_BUFFER1_A (1 << 4)
+#define NV40TCL_MRT_COLOR_MASK_BUFFER1_R (1 << 5)
+#define NV40TCL_MRT_COLOR_MASK_BUFFER1_G (1 << 6)
+#define NV40TCL_MRT_COLOR_MASK_BUFFER1_B (1 << 7)
+#define NV40TCL_MRT_COLOR_MASK_BUFFER2_A (1 << 8)
+#define NV40TCL_MRT_COLOR_MASK_BUFFER2_R (1 << 9)
+#define NV40TCL_MRT_COLOR_MASK_BUFFER2_G (1 << 10)
+#define NV40TCL_MRT_COLOR_MASK_BUFFER2_B (1 << 11)
+#define NV40TCL_MRT_COLOR_MASK_BUFFER3_A (1 << 12)
+#define NV40TCL_MRT_COLOR_MASK_BUFFER3_R (1 << 13)
+#define NV40TCL_MRT_COLOR_MASK_BUFFER3_G (1 << 14)
+#define NV40TCL_MRT_COLOR_MASK_BUFFER3_B (1 << 15)
+#define NV40TCL_COLOR_LOGIC_OP_ENABLE 0x00000374
+#define NV40TCL_COLOR_LOGIC_OP 0x00000378
+#define NV40TCL_COLOR_LOGIC_OP_CLEAR 0x00001500
+#define NV40TCL_COLOR_LOGIC_OP_AND 0x00001501
+#define NV40TCL_COLOR_LOGIC_OP_AND_REVERSE 0x00001502
+#define NV40TCL_COLOR_LOGIC_OP_COPY 0x00001503
+#define NV40TCL_COLOR_LOGIC_OP_AND_INVERTED 0x00001504
+#define NV40TCL_COLOR_LOGIC_OP_NOOP 0x00001505
+#define NV40TCL_COLOR_LOGIC_OP_XOR 0x00001506
+#define NV40TCL_COLOR_LOGIC_OP_OR 0x00001507
+#define NV40TCL_COLOR_LOGIC_OP_NOR 0x00001508
+#define NV40TCL_COLOR_LOGIC_OP_EQUIV 0x00001509
+#define NV40TCL_COLOR_LOGIC_OP_INVERT 0x0000150a
+#define NV40TCL_COLOR_LOGIC_OP_OR_REVERSE 0x0000150b
+#define NV40TCL_COLOR_LOGIC_OP_COPY_INVERTED 0x0000150c
+#define NV40TCL_COLOR_LOGIC_OP_OR_INVERTED 0x0000150d
+#define NV40TCL_COLOR_LOGIC_OP_NAND 0x0000150e
+#define NV40TCL_COLOR_LOGIC_OP_SET 0x0000150f
+#define NV40TCL_DEPTH_RANGE_NEAR 0x00000394
+#define NV40TCL_DEPTH_RANGE_FAR 0x00000398
+#define NV40TCL_LINE_WIDTH 0x000003b8
+#define NV40TCL_LINE_SMOOTH_ENABLE 0x000003bc
+#define NV40TCL_UNK03C0(x) (0x000003c0+((x)*4))
+#define NV40TCL_UNK03C0__SIZE 0x00000010
+#define NV40TCL_UNK0400(x) (0x00000400+((x)*4))
+#define NV40TCL_UNK0400__SIZE 0x00000010
+#define NV40TCL_UNK0440(x) (0x00000440+((x)*4))
+#define NV40TCL_UNK0440__SIZE 0x00000020
+#define NV40TCL_SCISSOR_HORIZ 0x000008c0
+#define NV40TCL_SCISSOR_HORIZ_X_SHIFT 0
+#define NV40TCL_SCISSOR_HORIZ_X_MASK 0x0000ffff
+#define NV40TCL_SCISSOR_HORIZ_W_SHIFT 16
+#define NV40TCL_SCISSOR_HORIZ_W_MASK 0xffff0000
+#define NV40TCL_SCISSOR_VERT 0x000008c4
+#define NV40TCL_SCISSOR_VERT_Y_SHIFT 0
+#define NV40TCL_SCISSOR_VERT_Y_MASK 0x0000ffff
+#define NV40TCL_SCISSOR_VERT_H_SHIFT 16
+#define NV40TCL_SCISSOR_VERT_H_MASK 0xffff0000
+#define NV40TCL_FOG_MODE 0x000008cc
+#define NV40TCL_FOG_EQUATION_CONSTANT 0x000008d0
+#define NV40TCL_FOG_EQUATION_LINEAR 0x000008d4
+#define NV40TCL_FOG_EQUATION_QUADRATIC 0x000008d8
+#define NV40TCL_FP_ADDRESS 0x000008e4
+#define NV40TCL_FP_ADDRESS_OFFSET_SHIFT 8
+#define NV40TCL_FP_ADDRESS_OFFSET_MASK 0xffffff00
+#define NV40TCL_FP_ADDRESS_DMA1 (1 << 1)
+#define NV40TCL_FP_ADDRESS_DMA0 (1 << 0)
+#define NV40TCL_VIEWPORT_HORIZ 0x00000a00
+#define NV40TCL_VIEWPORT_HORIZ_W_SHIFT 16
+#define NV40TCL_VIEWPORT_HORIZ_W_MASK 0xffff0000
+#define NV40TCL_VIEWPORT_HORIZ_X_SHIFT 0
+#define NV40TCL_VIEWPORT_HORIZ_X_MASK 0x0000ffff
+#define NV40TCL_VIEWPORT_VERT 0x00000a04
+#define NV40TCL_VIEWPORT_VERT_H_SHIFT 16
+#define NV40TCL_VIEWPORT_VERT_H_MASK 0xffff0000
+#define NV40TCL_VIEWPORT_VERT_Y_SHIFT 0
+#define NV40TCL_VIEWPORT_VERT_Y_MASK 0x0000ffff
+#define NV40TCL_VIEWPORT_TRANSLATE_X 0x00000a20
+#define NV40TCL_VIEWPORT_TRANSLATE_Y 0x00000a24
+#define NV40TCL_VIEWPORT_TRANSLATE_Z 0x00000a28
+#define NV40TCL_VIEWPORT_TRANSLATE_W 0x00000a2c
+#define NV40TCL_VIEWPORT_SCALE_X 0x00000a30
+#define NV40TCL_VIEWPORT_SCALE_Y 0x00000a34
+#define NV40TCL_VIEWPORT_SCALE_Z 0x00000a38
+#define NV40TCL_VIEWPORT_SCALE_W 0x00000a3c
+#define NV40TCL_POLYGON_OFFSET_POINT_ENABLE 0x00000a60
+#define NV40TCL_POLYGON_OFFSET_LINE_ENABLE 0x00000a64
+#define NV40TCL_POLYGON_OFFSET_FILL_ENABLE 0x00000a68
+#define NV40TCL_DEPTH_FUNC 0x00000a6c
+#define NV40TCL_DEPTH_FUNC_NEVER 0x00000200
+#define NV40TCL_DEPTH_FUNC_LESS 0x00000201
+#define NV40TCL_DEPTH_FUNC_EQUAL 0x00000202
+#define NV40TCL_DEPTH_FUNC_LEQUAL 0x00000203
+#define NV40TCL_DEPTH_FUNC_GREATER 0x00000204
+#define NV40TCL_DEPTH_FUNC_GREATER 0x00000204
+#define NV40TCL_DEPTH_FUNC_NOTEQUAL 0x00000205
+#define NV40TCL_DEPTH_FUNC_GEQUAL 0x00000206
+#define NV40TCL_DEPTH_FUNC_ALWAYS 0x00000207
+#define NV40TCL_DEPTH_WRITE_ENABLE 0x00000a70
+#define NV40TCL_DEPTH_TEST_ENABLE 0x00000a74
+#define NV40TCL_POLYGON_OFFSET_FACTOR 0x00000a78
+#define NV40TCL_POLYGON_OFFSET_UNITS 0x00000a7c
+#define NV40TCL_VTX_ATTR_3I_XY(x) (0x00000a80+((x)*8))
+#define NV40TCL_VTX_ATTR_3I_XY__SIZE 0x00000010
+#define NV40TCL_VTX_ATTR_3I_XY_X_SHIFT 0
+#define NV40TCL_VTX_ATTR_3I_XY_X_MASK 0x0000ffff
+#define NV40TCL_VTX_ATTR_3I_XY_Y_SHIFT 16
+#define NV40TCL_VTX_ATTR_3I_XY_Y_MASK 0xffff0000
+#define NV40TCL_VTX_ATTR_3I_Z(x) (0x00000a84+((x)*8))
+#define NV40TCL_VTX_ATTR_3I_Z__SIZE 0x00000010
+#define NV40TCL_VTX_ATTR_3I_Z_Z_SHIFT 0
+#define NV40TCL_VTX_ATTR_3I_Z_Z_MASK 0x0000ffff
+#define NV40TCL_UNK0B40(x) (0x00000b40+((x)*4))
+#define NV40TCL_UNK0B40__SIZE 0x00000008
+#define NV40TCL_VP_UPLOAD_INST(x) (0x00000b80+((x)*4))
+#define NV40TCL_VP_UPLOAD_INST__SIZE 0x00000004
+#define NV40TCL_CLIP_PLANE_ENABLE 0x00001478
+#define NV40TCL_CLIP_PLANE_ENABLE_PLANE0 (1 << 1)
+#define NV40TCL_CLIP_PLANE_ENABLE_PLANE1 (1 << 5)
+#define NV40TCL_CLIP_PLANE_ENABLE_PLANE2 (1 << 9)
+#define NV40TCL_CLIP_PLANE_ENABLE_PLANE3 (1 << 13)
+#define NV40TCL_CLIP_PLANE_ENABLE_PLANE4 (1 << 17)
+#define NV40TCL_CLIP_PLANE_ENABLE_PLANE5 (1 << 21)
+#define NV40TCL_POLYGON_STIPPLE_ENABLE 0x0000147c
+#define NV40TCL_POLYGON_STIPPLE_PATTERN(x) (0x00001480+((x)*4))
+#define NV40TCL_POLYGON_STIPPLE_PATTERN__SIZE 0x00000020
+#define NV40TCL_VTX_ATTR_3F_X(x) (0x00001500+((x)*16))
+#define NV40TCL_VTX_ATTR_3F_X__SIZE 0x00000010
+#define NV40TCL_VTX_ATTR_3F_Y(x) (0x00001504+((x)*16))
+#define NV40TCL_VTX_ATTR_3F_Y__SIZE 0x00000010
+#define NV40TCL_VTX_ATTR_3F_Z(x) (0x00001508+((x)*16))
+#define NV40TCL_VTX_ATTR_3F_Z__SIZE 0x00000010
+#define NV40TCL_VTXBUF_ADDRESS(x) (0x00001680+((x)*4))
+#define NV40TCL_VTXBUF_ADDRESS__SIZE 0x00000010
+#define NV40TCL_VTXBUF_ADDRESS_DMA1 (1 << 31)
+#define NV40TCL_VTXBUF_ADDRESS_OFFSET_SHIFT 0
+#define NV40TCL_VTXBUF_ADDRESS_OFFSET_MASK 0x0fffffff
+#define NV40TCL_VTX_CACHE_INVALIDATE 0x00001714
+#define NV40TCL_VTXFMT(x) (0x00001740+((x)*4))
+#define NV40TCL_VTXFMT__SIZE 0x00000010
+#define NV40TCL_VTXFMT_TYPE_SHIFT 0
+#define NV40TCL_VTXFMT_TYPE_MASK 0x0000000f
+#define NV40TCL_VTXFMT_TYPE_FLOAT 0x00000002
+#define NV40TCL_VTXFMT_TYPE_UBYTE 0x00000004
+#define NV40TCL_VTXFMT_TYPE_USHORT 0x00000005
+#define NV40TCL_VTXFMT_SIZE_SHIFT 4
+#define NV40TCL_VTXFMT_SIZE_MASK 0x000000f0
+#define NV40TCL_VTXFMT_STRIDE_SHIFT 8
+#define NV40TCL_VTXFMT_STRIDE_MASK 0x0000ff00
+#define NV40TCL_QUERY_RESET 0x000017c8
+#define NV40TCL_QUERY_UNK17CC 0x000017cc
+#define NV40TCL_QUERY_GET 0x00001800
+#define NV40TCL_QUERY_GET_UNK24_SHIFT 24
+#define NV40TCL_QUERY_GET_UNK24_MASK 0xff000000
+#define NV40TCL_QUERY_GET_OFFSET_SHIFT 0
+#define NV40TCL_QUERY_GET_OFFSET_MASK 0x00ffffff
+#define NV40TCL_BEGIN_END 0x00001808
+#define NV40TCL_BEGIN_END_STOP 0x00000000
+#define NV40TCL_BEGIN_END_POINTS 0x00000001
+#define NV40TCL_BEGIN_END_LINES 0x00000002
+#define NV40TCL_BEGIN_END_LINE_LOOP 0x00000003
+#define NV40TCL_BEGIN_END_LINE_STRIP 0x00000004
+#define NV40TCL_BEGIN_END_TRIANGLES 0x00000005
+#define NV40TCL_BEGIN_END_TRIANGLE_STRIP 0x00000006
+#define NV40TCL_BEGIN_END_TRIANGLE_FAN 0x00000007
+#define NV40TCL_BEGIN_END_QUADS 0x00000008
+#define NV40TCL_BEGIN_END_QUAD_STRIP 0x00000009
+#define NV40TCL_BEGIN_END_POLYGON 0x0000000a
+#define NV40TCL_VB_ELEMENT_U16 0x0000180c
+#define NV40TCL_VB_ELEMENT_U16_1_SHIFT 16
+#define NV40TCL_VB_ELEMENT_U16_1_MASK 0xffff0000
+#define NV40TCL_VB_ELEMENT_U16_0_SHIFT 0
+#define NV40TCL_VB_ELEMENT_U16_0_MASK 0x0000ffff
+#define NV40TCL_VB_ELEMENT_U32 0x00001810
+#define NV40TCL_VB_VERTEX_BATCH 0x00001814
+#define NV40TCL_VB_VERTEX_BATCH_COUNT_SHIFT 24
+#define NV40TCL_VB_VERTEX_BATCH_COUNT_MASK 0xff000000
+#define NV40TCL_VB_VERTEX_BATCH_START_SHIFT 0
+#define NV40TCL_VB_VERTEX_BATCH_START_MASK 0x00ffffff
+#define NV40TCL_VERTEX_DATA 0x00001818
+#define NV40TCL_IDXBUF_ADDRESS 0x0000181c
+#define NV40TCL_IDXBUF_FORMAT 0x00001820
+#define NV40TCL_IDXBUF_FORMAT_TYPE_SHIFT 4
+#define NV40TCL_IDXBUF_FORMAT_TYPE_MASK 0x000000f0
+#define NV40TCL_IDXBUF_FORMAT_TYPE_U32 0x00000000
+#define NV40TCL_IDXBUF_FORMAT_TYPE_U16 0x00000010
+#define NV40TCL_IDXBUF_FORMAT_DMA1 (1 << 0)
+#define NV40TCL_VB_INDEX_BATCH 0x00001824
+#define NV40TCL_VB_INDEX_BATCH_COUNT_SHIFT 24
+#define NV40TCL_VB_INDEX_BATCH_COUNT_MASK 0xff000000
+#define NV40TCL_VB_INDEX_BATCH_START_SHIFT 0
+#define NV40TCL_VB_INDEX_BATCH_START_MASK 0x00ffffff
+#define NV40TCL_POLYGON_MODE_FRONT 0x00001828
+#define NV40TCL_POLYGON_MODE_FRONT_POINT 0x00001b00
+#define NV40TCL_POLYGON_MODE_FRONT_LINE 0x00001b01
+#define NV40TCL_POLYGON_MODE_FRONT_FILL 0x00001b02
+#define NV40TCL_POLYGON_MODE_BACK 0x0000182c
+#define NV40TCL_POLYGON_MODE_BACK_POINT 0x00001b00
+#define NV40TCL_POLYGON_MODE_BACK_LINE 0x00001b01
+#define NV40TCL_POLYGON_MODE_BACK_FILL 0x00001b02
+#define NV40TCL_CULL_FACE 0x00001830
+#define NV40TCL_CULL_FACE_FRONT 0x00000404
+#define NV40TCL_CULL_FACE_BACK 0x00000405
+#define NV40TCL_CULL_FACE_FRONT_AND_BACK 0x00000408
+#define NV40TCL_FRONT_FACE 0x00001834
+#define NV40TCL_FRONT_FACE_CW 0x00000900
+#define NV40TCL_FRONT_FACE_CCW 0x00000901
+#define NV40TCL_POLYGON_SMOOTH_ENABLE 0x00001838
+#define NV40TCL_CULL_FACE_ENABLE 0x0000183c
+#define NV40TCL_TEX_SIZE1(x) (0x00001840+((x)*4))
+#define NV40TCL_TEX_SIZE1__SIZE 0x00000008
+#define NV40TCL_TEX_SIZE1_DEPTH_SHIFT 20
+#define NV40TCL_TEX_SIZE1_DEPTH_MASK 0xfff00000
+#define NV40TCL_TEX_SIZE1_PITCH_SHIFT 0
+#define NV40TCL_TEX_SIZE1_PITCH_MASK 0x0000ffff
+#define NV40TCL_VTX_ATTR_2F_X(x) (0x00001880+((x)*8))
+#define NV40TCL_VTX_ATTR_2F_X__SIZE 0x00000010
+#define NV40TCL_VTX_ATTR_2F_Y(x) (0x00001884+((x)*8))
+#define NV40TCL_VTX_ATTR_2F_Y__SIZE 0x00000010
+#define NV40TCL_VTX_ATTR_2I(x) (0x00001900+((x)*4))
+#define NV40TCL_VTX_ATTR_2I__SIZE 0x00000010
+#define NV40TCL_VTX_ATTR_2I_X_SHIFT 0
+#define NV40TCL_VTX_ATTR_2I_X_MASK 0x0000ffff
+#define NV40TCL_VTX_ATTR_2I_Y_SHIFT 16
+#define NV40TCL_VTX_ATTR_2I_Y_MASK 0xffff0000
+#define NV40TCL_VTX_ATTR_4UB(x) (0x00001940+((x)*4))
+#define NV40TCL_VTX_ATTR_4UB__SIZE 0x00000010
+#define NV40TCL_VTX_ATTR_4UB_X_SHIFT 0
+#define NV40TCL_VTX_ATTR_4UB_X_MASK 0x000000ff
+#define NV40TCL_VTX_ATTR_4UB_Y_SHIFT 8
+#define NV40TCL_VTX_ATTR_4UB_Y_MASK 0x0000ff00
+#define NV40TCL_VTX_ATTR_4UB_Z_SHIFT 16
+#define NV40TCL_VTX_ATTR_4UB_Z_MASK 0x00ff0000
+#define NV40TCL_VTX_ATTR_4UB_W_SHIFT 24
+#define NV40TCL_VTX_ATTR_4UB_W_MASK 0xff000000
+#define NV40TCL_VTX_ATTR_4I_XY(x) (0x00001980+((x)*8))
+#define NV40TCL_VTX_ATTR_4I_XY__SIZE 0x00000010
+#define NV40TCL_VTX_ATTR_4I_XY_X_SHIFT 0
+#define NV40TCL_VTX_ATTR_4I_XY_X_MASK 0x0000ffff
+#define NV40TCL_VTX_ATTR_4I_XY_Y_SHIFT 16
+#define NV40TCL_VTX_ATTR_4I_XY_Y_MASK 0xffff0000
+#define NV40TCL_VTX_ATTR_4I_ZW(x) (0x00001984+((x)*8))
+#define NV40TCL_VTX_ATTR_4I_ZW__SIZE 0x00000010
+#define NV40TCL_VTX_ATTR_4I_ZW_Z_SHIFT 0
+#define NV40TCL_VTX_ATTR_4I_ZW_Z_MASK 0x0000ffff
+#define NV40TCL_VTX_ATTR_4I_ZW_W_SHIFT 16
+#define NV40TCL_VTX_ATTR_4I_ZW_W_MASK 0xffff0000
+#define NV40TCL_TEX_OFFSET(x) (0x00001a00+((x)*32))
+#define NV40TCL_TEX_OFFSET__SIZE 0x00000010
+#define NV40TCL_TEX_FORMAT(x) (0x00001a04+((x)*32))
+#define NV40TCL_TEX_FORMAT__SIZE 0x00000010
+#define NV40TCL_TEX_FORMAT_MIPMAP_COUNT_SHIFT 16
+#define NV40TCL_TEX_FORMAT_MIPMAP_COUNT_MASK 0x000f0000
+#define NV40TCL_TEX_FORMAT_RECT (1 << 14)
+#define NV40TCL_TEX_FORMAT_LINEAR (1 << 13)
+#define NV40TCL_TEX_FORMAT_FORMAT_SHIFT 8
+#define NV40TCL_TEX_FORMAT_FORMAT_MASK 0x00001f00
+#define NV40TCL_TEX_FORMAT_FORMAT_L8 0x00000100
+#define NV40TCL_TEX_FORMAT_FORMAT_A1R5G5B5 0x00000200
+#define NV40TCL_TEX_FORMAT_FORMAT_A4R4G4B4 0x00000300
+#define NV40TCL_TEX_FORMAT_FORMAT_R5G6B5 0x00000400
+#define NV40TCL_TEX_FORMAT_FORMAT_A8R8G8B8 0x00000500
+#define NV40TCL_TEX_FORMAT_FORMAT_DXT1 0x00000600
+#define NV40TCL_TEX_FORMAT_FORMAT_DXT3 0x00000700
+#define NV40TCL_TEX_FORMAT_FORMAT_DXT5 0x00000800
+#define NV40TCL_TEX_FORMAT_FORMAT_A8L8 0x00000b00
+#define NV40TCL_TEX_FORMAT_FORMAT_Z24 0x00001000
+#define NV40TCL_TEX_FORMAT_FORMAT_Z16 0x00001200
+#define NV40TCL_TEX_FORMAT_FORMAT_A16 0x00001400
+#define NV40TCL_TEX_FORMAT_FORMAT_A16L16 0x00001500
+#define NV40TCL_TEX_FORMAT_FORMAT_HILO8 0x00001800
+#define NV40TCL_TEX_FORMAT_FORMAT_RGBA16F 0x00001a00
+#define NV40TCL_TEX_FORMAT_FORMAT_RGBA32F 0x00001b00
+#define NV40TCL_TEX_FORMAT_DIMS_SHIFT 4
+#define NV40TCL_TEX_FORMAT_DIMS_MASK 0x000000f0
+#define NV40TCL_TEX_FORMAT_DIMS_1D 0x00000010
+#define NV40TCL_TEX_FORMAT_DIMS_2D 0x00000020
+#define NV40TCL_TEX_FORMAT_DIMS_3D 0x00000030
+#define NV40TCL_TEX_FORMAT_NO_BORDER (1 << 3)
+#define NV40TCL_TEX_FORMAT_CUBIC (1 << 2)
+#define NV40TCL_TEX_FORMAT_DMA1 (1 << 1)
+#define NV40TCL_TEX_FORMAT_DMA0 (1 << 0)
+#define NV40TCL_TEX_WRAP(x) (0x00001a08+((x)*32))
+#define NV40TCL_TEX_WRAP__SIZE 0x00000010
+#define NV40TCL_TEX_WRAP_S_SHIFT 0
+#define NV40TCL_TEX_WRAP_S_MASK 0x000000ff
+#define NV40TCL_TEX_WRAP_S_REPEAT 0x00000001
+#define NV40TCL_TEX_WRAP_S_MIRRORED_REPEAT 0x00000002
+#define NV40TCL_TEX_WRAP_S_CLAMP_TO_EDGE 0x00000003
+#define NV40TCL_TEX_WRAP_S_CLAMP_TO_BORDER 0x00000004
+#define NV40TCL_TEX_WRAP_S_CLAMP 0x00000005
+#define NV40TCL_TEX_WRAP_S_MIRROR_CLAMP_TO_EDGE 0x00000006
+#define NV40TCL_TEX_WRAP_S_MIRROR_CLAMP_TO_BORDER 0x00000007
+#define NV40TCL_TEX_WRAP_S_MIRROR_CLAMP 0x00000008
+#define NV40TCL_TEX_WRAP_T_SHIFT 8
+#define NV40TCL_TEX_WRAP_T_MASK 0x00000f00
+#define NV40TCL_TEX_WRAP_T_REPEAT 0x00000100
+#define NV40TCL_TEX_WRAP_T_MIRRORED_REPEAT 0x00000200
+#define NV40TCL_TEX_WRAP_T_CLAMP_TO_EDGE 0x00000300
+#define NV40TCL_TEX_WRAP_T_CLAMP_TO_BORDER 0x00000400
+#define NV40TCL_TEX_WRAP_T_CLAMP 0x00000500
+#define NV40TCL_TEX_WRAP_T_MIRROR_CLAMP_TO_EDGE 0x00000600
+#define NV40TCL_TEX_WRAP_T_MIRROR_CLAMP_TO_BORDER 0x00000700
+#define NV40TCL_TEX_WRAP_T_MIRROR_CLAMP 0x00000800
+#define NV40TCL_TEX_WRAP_EXPAND_NORMAL_SHIFT 12
+#define NV40TCL_TEX_WRAP_EXPAND_NORMAL_MASK 0x0000f000
+#define NV40TCL_TEX_WRAP_R_SHIFT 16
+#define NV40TCL_TEX_WRAP_R_MASK 0x00ff0000
+#define NV40TCL_TEX_WRAP_R_REPEAT 0x00010000
+#define NV40TCL_TEX_WRAP_R_MIRRORED_REPEAT 0x00020000
+#define NV40TCL_TEX_WRAP_R_CLAMP_TO_EDGE 0x00030000
+#define NV40TCL_TEX_WRAP_R_CLAMP_TO_BORDER 0x00040000
+#define NV40TCL_TEX_WRAP_R_CLAMP 0x00050000
+#define NV40TCL_TEX_WRAP_R_MIRROR_CLAMP_TO_EDGE 0x00060000
+#define NV40TCL_TEX_WRAP_R_MIRROR_CLAMP_TO_BORDER 0x00070000
+#define NV40TCL_TEX_WRAP_R_MIRROR_CLAMP 0x00080000
+#define NV40TCL_TEX_WRAP_RCOMP_SHIFT 28
+#define NV40TCL_TEX_WRAP_RCOMP_MASK 0xf0000000
+#define NV40TCL_TEX_WRAP_RCOMP_NEVER 0x00000000
+#define NV40TCL_TEX_WRAP_RCOMP_GREATER 0x10000000
+#define NV40TCL_TEX_WRAP_RCOMP_EQUAL 0x20000000
+#define NV40TCL_TEX_WRAP_RCOMP_GEQUAL 0x30000000
+#define NV40TCL_TEX_WRAP_RCOMP_LESS 0x40000000
+#define NV40TCL_TEX_WRAP_RCOMP_NOTEQUAL 0x50000000
+#define NV40TCL_TEX_WRAP_RCOMP_LEQUAL 0x60000000
+#define NV40TCL_TEX_WRAP_RCOMP_ALWAYS 0x70000000
+#define NV40TCL_TEX_ENABLE(x) (0x00001a0c+((x)*32))
+#define NV40TCL_TEX_ENABLE__SIZE 0x00000010
+#define NV40TCL_TEX_ENABLE_ENABLE (1 << 31)
+#define NV40TCL_TEX_ENABLE_MIPMAP_MIN_LOD_SHIFT 27
+#define NV40TCL_TEX_ENABLE_MIPMAP_MIN_LOD_MASK 0x38000000
+#define NV40TCL_TEX_ENABLE_MIPMAP_MAX_LOD_SHIFT 15
+#define NV40TCL_TEX_ENABLE_MIPMAP_MAX_LOD_MASK 0x00038000
+#define NV40TCL_TEX_ENABLE_ANISO_SHIFT 4
+#define NV40TCL_TEX_ENABLE_ANISO_MASK 0x000000f0
+#define NV40TCL_TEX_ENABLE_ANISO_NONE 0x00000000
+#define NV40TCL_TEX_ENABLE_ANISO_2X 0x00000010
+#define NV40TCL_TEX_ENABLE_ANISO_4X 0x00000020
+#define NV40TCL_TEX_ENABLE_ANISO_6X 0x00000030
+#define NV40TCL_TEX_ENABLE_ANISO_8X 0x00000040
+#define NV40TCL_TEX_ENABLE_ANISO_10X 0x00000050
+#define NV40TCL_TEX_ENABLE_ANISO_12X 0x00000060
+#define NV40TCL_TEX_ENABLE_ANISO_16X 0x00000070
+#define NV40TCL_TEX_SWIZZLE(x) (0x00001a10+((x)*32))
+#define NV40TCL_TEX_SWIZZLE__SIZE 0x00000010
+#define NV40TCL_TEX_SWIZZLE_S0_X_SHIFT 14
+#define NV40TCL_TEX_SWIZZLE_S0_X_MASK 0x0000c000
+#define NV40TCL_TEX_SWIZZLE_S0_X_ZERO 0x00000000
+#define NV40TCL_TEX_SWIZZLE_S0_X_ONE 0x00004000
+#define NV40TCL_TEX_SWIZZLE_S0_X_S1 0x00008000
+#define NV40TCL_TEX_SWIZZLE_S0_Y_SHIFT 12
+#define NV40TCL_TEX_SWIZZLE_S0_Y_MASK 0x00003000
+#define NV40TCL_TEX_SWIZZLE_S0_Y_ZERO 0x00000000
+#define NV40TCL_TEX_SWIZZLE_S0_Y_ONE 0x00001000
+#define NV40TCL_TEX_SWIZZLE_S0_Y_S1 0x00002000
+#define NV40TCL_TEX_SWIZZLE_S0_Z_SHIFT 10
+#define NV40TCL_TEX_SWIZZLE_S0_Z_MASK 0x00000c00
+#define NV40TCL_TEX_SWIZZLE_S0_Z_ZERO 0x00000000
+#define NV40TCL_TEX_SWIZZLE_S0_Z_ONE 0x00000400
+#define NV40TCL_TEX_SWIZZLE_S0_Z_S1 0x00000800
+#define NV40TCL_TEX_SWIZZLE_S0_W_SHIFT 8
+#define NV40TCL_TEX_SWIZZLE_S0_W_MASK 0x00000300
+#define NV40TCL_TEX_SWIZZLE_S0_W_ZERO 0x00000000
+#define NV40TCL_TEX_SWIZZLE_S0_W_ONE 0x00000100
+#define NV40TCL_TEX_SWIZZLE_S0_W_S1 0x00000200
+#define NV40TCL_TEX_SWIZZLE_S1_X_SHIFT 6
+#define NV40TCL_TEX_SWIZZLE_S1_X_MASK 0x000000c0
+#define NV40TCL_TEX_SWIZZLE_S1_X_W 0x00000000
+#define NV40TCL_TEX_SWIZZLE_S1_X_Z 0x00000040
+#define NV40TCL_TEX_SWIZZLE_S1_X_Y 0x00000080
+#define NV40TCL_TEX_SWIZZLE_S1_X_X 0x000000c0
+#define NV40TCL_TEX_SWIZZLE_S1_Y_SHIFT 4
+#define NV40TCL_TEX_SWIZZLE_S1_Y_MASK 0x00000030
+#define NV40TCL_TEX_SWIZZLE_S1_Y_W 0x00000000
+#define NV40TCL_TEX_SWIZZLE_S1_Y_Z 0x00000010
+#define NV40TCL_TEX_SWIZZLE_S1_Y_Y 0x00000020
+#define NV40TCL_TEX_SWIZZLE_S1_Y_X 0x00000030
+#define NV40TCL_TEX_SWIZZLE_S1_Z_SHIFT 2
+#define NV40TCL_TEX_SWIZZLE_S1_Z_MASK 0x0000000c
+#define NV40TCL_TEX_SWIZZLE_S1_Z_W 0x00000000
+#define NV40TCL_TEX_SWIZZLE_S1_Z_Z 0x00000004
+#define NV40TCL_TEX_SWIZZLE_S1_Z_Y 0x00000008
+#define NV40TCL_TEX_SWIZZLE_S1_Z_X 0x0000000c
+#define NV40TCL_TEX_SWIZZLE_S1_W_SHIFT 0
+#define NV40TCL_TEX_SWIZZLE_S1_W_MASK 0x00000003
+#define NV40TCL_TEX_SWIZZLE_S1_W_W 0x00000000
+#define NV40TCL_TEX_SWIZZLE_S1_W_Z 0x00000001
+#define NV40TCL_TEX_SWIZZLE_S1_W_Y 0x00000002
+#define NV40TCL_TEX_SWIZZLE_S1_W_X 0x00000003
+#define NV40TCL_TEX_FILTER(x) (0x00001a14+((x)*32))
+#define NV40TCL_TEX_FILTER__SIZE 0x00000010
+#define NV40TCL_TEX_FILTER_SIGNED_ALPHA (1 << 31)
+#define NV40TCL_TEX_FILTER_SIGNED_RED (1 << 30)
+#define NV40TCL_TEX_FILTER_SIGNED_GREEN (1 << 29)
+#define NV40TCL_TEX_FILTER_SIGNED_BLUE (1 << 28)
+#define NV40TCL_TEX_FILTER_MIN_SHIFT 16
+#define NV40TCL_TEX_FILTER_MIN_MASK 0x000f0000
+#define NV40TCL_TEX_FILTER_MIN_NEAREST 0x00010000
+#define NV40TCL_TEX_FILTER_MIN_LINEAR 0x00020000
+#define NV40TCL_TEX_FILTER_MIN_NEAREST_MIPMAP_NEAREST 0x00030000
+#define NV40TCL_TEX_FILTER_MIN_LINEAR_MIPMAP_NEAREST 0x00040000
+#define NV40TCL_TEX_FILTER_MIN_NEAREST_MIPMAP_LINEAR 0x00050000
+#define NV40TCL_TEX_FILTER_MIN_LINEAR_MIPMAP_LINEAR 0x00060000
+#define NV40TCL_TEX_FILTER_MAG_SHIFT 24
+#define NV40TCL_TEX_FILTER_MAG_MASK 0x0f000000
+#define NV40TCL_TEX_FILTER_MAG_NEAREST 0x01000000
+#define NV40TCL_TEX_FILTER_MAG_LINEAR 0x02000000
+#define NV40TCL_TEX_SIZE0(x) (0x00001a18+((x)*32))
+#define NV40TCL_TEX_SIZE0__SIZE 0x00000010
+#define NV40TCL_TEX_SIZE0_H_SHIFT 0
+#define NV40TCL_TEX_SIZE0_H_MASK 0x0000ffff
+#define NV40TCL_TEX_SIZE0_W_SHIFT 16
+#define NV40TCL_TEX_SIZE0_W_MASK 0xffff0000
+#define NV40TCL_TEX_BORDER_COLOR(x) (0x00001a1c+((x)*32))
+#define NV40TCL_TEX_BORDER_COLOR__SIZE 0x00000010
+#define NV40TCL_TEX_BORDER_COLOR_B_SHIFT 0
+#define NV40TCL_TEX_BORDER_COLOR_B_MASK 0x000000ff
+#define NV40TCL_TEX_BORDER_COLOR_G_SHIFT 8
+#define NV40TCL_TEX_BORDER_COLOR_G_MASK 0x0000ff00
+#define NV40TCL_TEX_BORDER_COLOR_R_SHIFT 16
+#define NV40TCL_TEX_BORDER_COLOR_R_MASK 0x00ff0000
+#define NV40TCL_TEX_BORDER_COLOR_A_SHIFT 24
+#define NV40TCL_TEX_BORDER_COLOR_A_MASK 0xff000000
+#define NV40TCL_VTX_ATTR_4F_X(x) (0x00001c00+((x)*16))
+#define NV40TCL_VTX_ATTR_4F_X__SIZE 0x00000010
+#define NV40TCL_VTX_ATTR_4F_Y(x) (0x00001c04+((x)*16))
+#define NV40TCL_VTX_ATTR_4F_Y__SIZE 0x00000010
+#define NV40TCL_VTX_ATTR_4F_Z(x) (0x00001c08+((x)*16))
+#define NV40TCL_VTX_ATTR_4F_Z__SIZE 0x00000010
+#define NV40TCL_VTX_ATTR_4F_W(x) (0x00001c0c+((x)*16))
+#define NV40TCL_VTX_ATTR_4F_W__SIZE 0x00000010
+#define NV40TCL_FP_CONTROL 0x00001d60
+#define NV40TCL_FP_CONTROL_TEMP_COUNT_SHIFT 24
+#define NV40TCL_FP_CONTROL_TEMP_COUNT_MASK 0xff000000
+#define NV40TCL_FP_CONTROL_KIL (1 << 7)
+#define NV40TCL_MULTISAMPLE_CONTROL 0x00001d7c
+#define NV40TCL_CLEAR_VALUE_DEPTH 0x00001d8c
+#define NV40TCL_CLEAR_VALUE_COLOR 0x00001d90
+#define NV40TCL_CLEAR_BUFFERS 0x00001d94
+#define NV40TCL_CLEAR_BUFFERS_COLOR_A (1 << 7)
+#define NV40TCL_CLEAR_BUFFERS_COLOR_B (1 << 6)
+#define NV40TCL_CLEAR_BUFFERS_COLOR_G (1 << 5)
+#define NV40TCL_CLEAR_BUFFERS_COLOR_R (1 << 4)
+#define NV40TCL_CLEAR_BUFFERS_STENCIL (1 << 1)
+#define NV40TCL_CLEAR_BUFFERS_DEPTH (1 << 0)
+#define NV40TCL_LINE_STIPPLE_ENABLE 0x00001db4
+#define NV40TCL_LINE_STIPPLE_PATTERN 0x00001db8
+#define NV40TCL_LINE_STIPPLE_PATTERN_FACTOR_SHIFT 0
+#define NV40TCL_LINE_STIPPLE_PATTERN_FACTOR_MASK 0x0000ffff
+#define NV40TCL_LINE_STIPPLE_PATTERN_PATTERN_SHIFT 16
+#define NV40TCL_LINE_STIPPLE_PATTERN_PATTERN_MASK 0xffff0000
+#define NV40TCL_VTX_ATTR_1F(x) (0x00001e40+((x)*4))
+#define NV40TCL_VTX_ATTR_1F__SIZE 0x00000010
+#define NV40TCL_VP_UPLOAD_FROM_ID 0x00001e9c
+#define NV40TCL_VP_START_FROM_ID 0x00001ea0
+#define NV40TCL_POINT_SIZE 0x00001ee0
+#define NV40TCL_POINT_SPRITE 0x00001ee8
+#define NV40TCL_VP_UPLOAD_CONST_ID 0x00001efc
+#define NV40TCL_VP_UPLOAD_CONST_X(x) (0x00001f00+((x)*16))
+#define NV40TCL_VP_UPLOAD_CONST_X__SIZE 0x00000004
+#define NV40TCL_VP_UPLOAD_CONST_Y(x) (0x00001f04+((x)*16))
+#define NV40TCL_VP_UPLOAD_CONST_Y__SIZE 0x00000004
+#define NV40TCL_VP_UPLOAD_CONST_Z(x) (0x00001f08+((x)*16))
+#define NV40TCL_VP_UPLOAD_CONST_Z__SIZE 0x00000004
+#define NV40TCL_VP_UPLOAD_CONST_W(x) (0x00001f0c+((x)*16))
+#define NV40TCL_VP_UPLOAD_CONST_W__SIZE 0x00000004
+#define NV40TCL_TEX_CACHE_CTL 0x00001fd8
+#define NV40TCL_VP_ATTRIB_EN 0x00001ff0
+#define NV40TCL_VP_RESULT_EN 0x00001ff4
+
+
+#define NV44TCL 0x00004497
+
+
+
+#define NV50_2D 0x0000502d
+
+#define NV50_2D_NOP 0x00000100
+#define NV50_2D_NOTIFY 0x00000104
+#define NV50_2D_DMA_NOTIFY 0x00000180
+#define NV50_2D_DMA_IN_MEMORY0 0x00000184
+#define NV50_2D_DMA_IN_MEMORY1 0x00000188
+#define NV50_2D_DMA_IN_MEMORY2 0x0000018c
+#define NV50_2D_DST_FORMAT 0x00000200
+#define NV50_2D_DST_FORMAT_32BPP 0x000000cf
+#define NV50_2D_DST_FORMAT_24BPP 0x000000e6
+#define NV50_2D_DST_FORMAT_16BPP 0x000000e8
+#define NV50_2D_DST_FORMAT_8BPP 0x000000f3
+#define NV50_2D_DST_FORMAT_15BPP 0x000000f8
+#define NV50_2D_DST_PITCH 0x00000214
+#define NV50_2D_DST_WIDTH 0x00000218
+#define NV50_2D_DST_HEIGHT 0x0000021c
+#define NV50_2D_DST_ADDRESS_HIGH 0x00000220
+#define NV50_2D_DST_ADDRESS_LOW 0x00000224
+#define NV50_2D_SRC_FORMAT 0x00000230
+#define NV50_2D_SRC_FORMAT_32BPP 0x000000cf
+#define NV50_2D_SRC_FORMAT_24BPP 0x000000e6
+#define NV50_2D_SRC_FORMAT_16BPP 0x000000e8
+#define NV50_2D_SRC_FORMAT_8BPP 0x000000f3
+#define NV50_2D_SRC_FORMAT_15BPP 0x000000f8
+#define NV50_2D_SRC_PITCH 0x00000244
+#define NV50_2D_SRC_WIDTH 0x00000248
+#define NV50_2D_SRC_HEIGHT 0x0000024c
+#define NV50_2D_SRC_ADDRESS_HIGH 0x00000250
+#define NV50_2D_SRC_ADDRESS_LOW 0x00000254
+#define NV50_2D_CLIP_X 0x00000280
+#define NV50_2D_CLIP_Y 0x00000284
+#define NV50_2D_CLIP_Z 0x00000288
+#define NV50_2D_CLIP_W 0x0000028c
+#define NV50_2D_ROP 0x000002a0
+#define NV50_2D_OPERATION 0x000002ac
+#define NV50_2D_OPERATION_SRCCOPY_AND 0x00000000
+#define NV50_2D_OPERATION_ROP_AND 0x00000001
+#define NV50_2D_OPERATION_BLEND_AND 0x00000002
+#define NV50_2D_OPERATION_SRCCOPY 0x00000003
+#define NV50_2D_OPERATION_SRCCOPY_PREMULT 0x00000004
+#define NV50_2D_OPERATION_BLEND_PREMULT 0x00000005
+#define NV50_2D_PATTERN_FORMAT 0x000002e8
+#define NV50_2D_PATTERN_FORMAT_16BPP 0x00000000
+#define NV50_2D_PATTERN_FORMAT_15BPP 0x00000001
+#define NV50_2D_PATTERN_FORMAT_32BPP 0x00000002
+#define NV50_2D_PATTERN_FORMAT_8BPP 0x00000003
+#define NV50_2D_PATTERN_COLOR(x) (0x000002f0+((x)*4))
+#define NV50_2D_PATTERN_COLOR__SIZE 0x00000002
+#define NV50_2D_PATTERN_BITMAP(x) (0x000002f8+((x)*4))
+#define NV50_2D_PATTERN_BITMAP__SIZE 0x00000002
+#define NV50_2D_RECT_FORMAT 0x00000584
+#define NV50_2D_RECT_FORMAT_32BPP 0x000000cf
+#define NV50_2D_RECT_FORMAT_24BPP 0x000000e6
+#define NV50_2D_RECT_FORMAT_16BPP 0x000000e8
+#define NV50_2D_RECT_FORMAT_8BPP 0x000000f3
+#define NV50_2D_RECT_FORMAT_15BPP 0x000000f8
+#define NV50_2D_RECT_COLOR 0x00000588
+#define NV50_2D_RECT_X1 0x00000600
+#define NV50_2D_RECT_Y1 0x00000604
+#define NV50_2D_RECT_X2 0x00000608
+#define NV50_2D_RECT_Y2 0x0000060c
+#define NV50_2D_SIFC_UNK0800 0x00000800
+#define NV50_2D_SIFC_FORMAT 0x00000804
+#define NV50_2D_SIFC_FORMAT_32BPP 0x000000cf
+#define NV50_2D_SIFC_FORMAT_24BPP 0x000000e6
+#define NV50_2D_SIFC_FORMAT_16BPP 0x000000e8
+#define NV50_2D_SIFC_FORMAT_8BPP 0x000000f3
+#define NV50_2D_SIFC_FORMAT_15BPP 0x000000f8
+#define NV50_2D_SIFC_WIDTH 0x00000838
+#define NV50_2D_SIFC_HEIGHT 0x0000083c
+#define NV50_2D_SIFC_SCALE_UNK0840 0x00000840
+#define NV50_2D_SIFC_SCALE_UNK0844 0x00000844
+#define NV50_2D_SIFC_SCALE_UNK0848 0x00000848
+#define NV50_2D_SIFC_SCALE_UNK084C 0x0000084c
+#define NV50_2D_SIFC_UNK0850 0x00000850
+#define NV50_2D_SIFC_DST_X 0x00000854
+#define NV50_2D_SIFC_UNK0858 0x00000858
+#define NV50_2D_SIFC_DST_Y 0x0000085c
+#define NV50_2D_SIFC_DATA 0x00000860
+#define NV50_2D_BLIT_DST_X 0x000008b0
+#define NV50_2D_BLIT_DST_Y 0x000008b4
+#define NV50_2D_BLIT_DST_W 0x000008b8
+#define NV50_2D_BLIT_DST_H 0x000008bc
+#define NV50_2D_BLIT_SRC_X 0x000008d4
+#define NV50_2D_BLIT_SRC_Y 0x000008dc
+
+
+#define NV50_MEMORY_TO_MEMORY_FORMAT 0x00005039
+
+#define NV50_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN_HIGH 0x00000238
+#define NV50_MEMORY_TO_MEMORY_FORMAT_OFFSET_OUT_HIGH 0x0000023c
+
+
+#define NV50TCL 0x00005097
+
+#define NV50TCL_NOP 0x00000100
+#define NV50TCL_NOTIFY 0x00000104
+#define NV50TCL_DMA_NOTIFY 0x00000180
+#define NV50TCL_DMA_UNK0(x) (0x00000184+((x)*4))
+#define NV50TCL_DMA_UNK0__SIZE 0x0000000b
+#define NV50TCL_DMA_UNK1(x) (0x000001c0+((x)*4))
+#define NV50TCL_DMA_UNK1__SIZE 0x00000008
+#define NV50TCL_RT_ADDRESS_HIGH(x) (0x00000200+((x)*32))
+#define NV50TCL_RT_ADDRESS_HIGH__SIZE 0x00000008
+#define NV50TCL_RT_ADDRESS_LOW(x) (0x00000204+((x)*32))
+#define NV50TCL_RT_ADDRESS_LOW__SIZE 0x00000008
+#define NV50TCL_RT_FORMAT(x) (0x00000208+((x)*32))
+#define NV50TCL_RT_FORMAT__SIZE 0x00000008
+#define NV50TCL_RT_FORMAT_32BPP 0x000000cf
+#define NV50TCL_RT_FORMAT_24BPP 0x000000e6
+#define NV50TCL_RT_FORMAT_16BPP 0x000000e8
+#define NV50TCL_RT_FORMAT_8BPP 0x000000f3
+#define NV50TCL_RT_FORMAT_15BPP 0x000000f8
+#define NV50TCL_RT_TILE_UNK(x) (0x0000020c+((x)*32))
+#define NV50TCL_RT_TILE_UNK__SIZE 0x00000008
+#define NV50TCL_RT_UNK4(x) (0x00000210+((x)*32))
+#define NV50TCL_RT_UNK4__SIZE 0x00000008
+#define NV50TCL_VTX_ATTR_1F(x) (0x00000300+((x)*4))
+#define NV50TCL_VTX_ATTR_1F__SIZE 0x00000010
+#define NV50TCL_VTX_ATTR_2F_X(x) (0x00000380+((x)*8))
+#define NV50TCL_VTX_ATTR_2F_X__SIZE 0x00000010
+#define NV50TCL_VTX_ATTR_2F_Y(x) (0x00000384+((x)*8))
+#define NV50TCL_VTX_ATTR_2F_Y__SIZE 0x00000010
+#define NV50TCL_VTX_ATTR_3F_X(x) (0x00000400+((x)*16))
+#define NV50TCL_VTX_ATTR_3F_X__SIZE 0x00000010
+#define NV50TCL_VTX_ATTR_3F_Y(x) (0x00000404+((x)*16))
+#define NV50TCL_VTX_ATTR_3F_Y__SIZE 0x00000010
+#define NV50TCL_VTX_ATTR_3F_Z(x) (0x00000408+((x)*16))
+#define NV50TCL_VTX_ATTR_3F_Z__SIZE 0x00000010
+#define NV50TCL_VTX_ATTR_3F_W(x) (0x0000040c+((x)*16))
+#define NV50TCL_VTX_ATTR_3F_W__SIZE 0x00000010
+#define NV50TCL_VTX_ATTR_4F_X(x) (0x00000500+((x)*16))
+#define NV50TCL_VTX_ATTR_4F_X__SIZE 0x00000010
+#define NV50TCL_VTX_ATTR_4F_Y(x) (0x00000504+((x)*16))
+#define NV50TCL_VTX_ATTR_4F_Y__SIZE 0x00000010
+#define NV50TCL_VTX_ATTR_4F_Z(x) (0x00000508+((x)*16))
+#define NV50TCL_VTX_ATTR_4F_Z__SIZE 0x00000010
+#define NV50TCL_VTX_ATTR_4F_W(x) (0x0000050c+((x)*16))
+#define NV50TCL_VTX_ATTR_4F_W__SIZE 0x00000010
+#define NV50TCL_VTX_ATTR_2I(x) (0x00000680+((x)*4))
+#define NV50TCL_VTX_ATTR_2I__SIZE 0x00000010
+#define NV50TCL_VTX_ATTR_2I_X_SHIFT 0
+#define NV50TCL_VTX_ATTR_2I_X_MASK 0x0000ffff
+#define NV50TCL_VTX_ATTR_2I_Y_SHIFT 16
+#define NV50TCL_VTX_ATTR_2I_Y_MASK 0xffff0000
+#define NV50TCL_VTX_ATTR_4I_0(x) (0x00000700+((x)*8))
+#define NV50TCL_VTX_ATTR_4I_0__SIZE 0x00000010
+#define NV50TCL_VTX_ATTR_4I_0_X_SHIFT 0
+#define NV50TCL_VTX_ATTR_4I_0_X_MASK 0x0000ffff
+#define NV50TCL_VTX_ATTR_4I_0_Y_SHIFT 16
+#define NV50TCL_VTX_ATTR_4I_0_Y_MASK 0xffff0000
+#define NV50TCL_VTX_ATTR_4I_1(x) (0x00000704+((x)*8))
+#define NV50TCL_VTX_ATTR_4I_1__SIZE 0x00000010
+#define NV50TCL_VTX_ATTR_4I_1_Z_SHIFT 0
+#define NV50TCL_VTX_ATTR_4I_1_Z_MASK 0x0000ffff
+#define NV50TCL_VTX_ATTR_4I_1_W_SHIFT 16
+#define NV50TCL_VTX_ATTR_4I_1_W_MASK 0xffff0000
+#define NV50TCL_VTX_ATTR_4NI_0(x) (0x00000780+((x)*8))
+#define NV50TCL_VTX_ATTR_4NI_0__SIZE 0x00000010
+#define NV50TCL_VTX_ATTR_4NI_0_X_SHIFT 0
+#define NV50TCL_VTX_ATTR_4NI_0_X_MASK 0x0000ffff
+#define NV50TCL_VTX_ATTR_4NI_0_Y_SHIFT 16
+#define NV50TCL_VTX_ATTR_4NI_0_Y_MASK 0xffff0000
+#define NV50TCL_VTX_ATTR_4NI_1(x) (0x00000784+((x)*8))
+#define NV50TCL_VTX_ATTR_4NI_1__SIZE 0x00000010
+#define NV50TCL_VTX_ATTR_4NI_1_Z_SHIFT 0
+#define NV50TCL_VTX_ATTR_4NI_1_Z_MASK 0x0000ffff
+#define NV50TCL_VTX_ATTR_4NI_1_W_SHIFT 16
+#define NV50TCL_VTX_ATTR_4NI_1_W_MASK 0xffff0000
+#define NV50TCL_VERTEX_ARRAY_FORMAT(x) (0x00000900+((x)*16))
+#define NV50TCL_VERTEX_ARRAY_FORMAT__SIZE 0x00000010
+#define NV50TCL_VIEWPORT_UNK0(x) (0x00000a00+((x)*4))
+#define NV50TCL_VIEWPORT_UNK0__SIZE 0x00000003
+#define NV50TCL_VIEWPORT_UNK1(x) (0x00000a0c+((x)*4))
+#define NV50TCL_VIEWPORT_UNK1__SIZE 0x00000003
+#define NV50TCL_VIEWPORT_HORIZ 0x00000c00
+#define NV50TCL_VIEWPORT_HORIZ_X_SHIFT 0
+#define NV50TCL_VIEWPORT_HORIZ_X_MASK 0x0000ffff
+#define NV50TCL_VIEWPORT_HORIZ_W_SHIFT 16
+#define NV50TCL_VIEWPORT_HORIZ_W_MASK 0xffff0000
+#define NV50TCL_VIEWPORT_VERT 0x00000c04
+#define NV50TCL_VIEWPORT_VERT_Y_SHIFT 0
+#define NV50TCL_VIEWPORT_VERT_Y_MASK 0x0000ffff
+#define NV50TCL_VIEWPORT_VERT_H_SHIFT 16
+#define NV50TCL_VIEWPORT_VERT_H_MASK 0xffff0000
+#define NV50TCL_DEPTH_RANGE_NEAR 0x00000c08
+#define NV50TCL_DEPTH_RANGE_FAR 0x00000c0c
+#define NV50TCL_VIEWPORT_CLIP_HORIZ(x) (0x00000d00+((x)*8))
+#define NV50TCL_VIEWPORT_CLIP_HORIZ__SIZE 0x00000008
+#define NV50TCL_VIEWPORT_CLIP_VERT(x) (0x00000d04+((x)*8))
+#define NV50TCL_VIEWPORT_CLIP_VERT__SIZE 0x00000008
+#define NV50TCL_VERTEX_BUFFER_FIRST 0x00000d74
+#define NV50TCL_VERTEX_BUFFER_COUNT 0x00000d78
+#define NV50TCL_CLEAR_COLOR(x) (0x00000d80+((x)*4))
+#define NV50TCL_CLEAR_COLOR__SIZE 0x00000004
+#define NV50TCL_CLEAR_DEPTH 0x00000d90
+#define NV50TCL_CLEAR_STENCIL 0x00000da0
+#define NV50TCL_POLYGON_MODE_FRONT 0x00000dac
+#define NV50TCL_POLYGON_MODE_FRONT_POINT 0x00001b00
+#define NV50TCL_POLYGON_MODE_FRONT_LINE 0x00001b01
+#define NV50TCL_POLYGON_MODE_FRONT_FILL 0x00001b02
+#define NV50TCL_POLYGON_MODE_BACK 0x00000db0
+#define NV50TCL_POLYGON_MODE_BACK_POINT 0x00001b00
+#define NV50TCL_POLYGON_MODE_BACK_LINE 0x00001b01
+#define NV50TCL_POLYGON_MODE_BACK_FILL 0x00001b02
+#define NV50TCL_POLYGON_SMOOTH_ENABLE 0x00000db4
+#define NV50TCL_POLYGON_OFFSET_POINT_ENABLE 0x00000dc0
+#define NV50TCL_POLYGON_OFFSET_LINE_ENABLE 0x00000dc4
+#define NV50TCL_POLYGON_OFFSET_FILL_ENABLE 0x00000dc8
+#define NV50TCL_SCISSOR_HORIZ 0x00000e04
+#define NV50TCL_SCISSOR_HORIZ_L_SHIFT 0
+#define NV50TCL_SCISSOR_HORIZ_L_MASK 0x0000ffff
+#define NV50TCL_SCISSOR_HORIZ_R_SHIFT 16
+#define NV50TCL_SCISSOR_HORIZ_R_MASK 0xffff0000
+#define NV50TCL_SCISSOR_VERT 0x00000e08
+#define NV50TCL_SCISSOR_VERT_T_SHIFT 0
+#define NV50TCL_SCISSOR_VERT_T_MASK 0x0000ffff
+#define NV50TCL_SCISSOR_VERT_B_SHIFT 16
+#define NV50TCL_SCISSOR_VERT_B_MASK 0xffff0000
+#define NV50TCL_CB_ADDR 0x00000f00
+#define NV50TCL_CB_ADDR_ID_SHIFT 8
+#define NV50TCL_CB_ADDR_ID_MASK 0xffffff00
+#define NV50TCL_CB_ADDR_BUFFER_SHIFT 0
+#define NV50TCL_CB_ADDR_BUFFER_MASK 0x000000ff
+#define NV50TCL_CB_DATA(x) (0x00000f04+((x)*4))
+#define NV50TCL_CB_DATA__SIZE 0x00000010
+#define NV50TCL_STENCIL_FRONT_FUNC_REF 0x00000f54
+#define NV50TCL_STENCIL_FRONT_MASK 0x00000f58
+#define NV50TCL_STENCIL_FRONT_FUNC_MASK 0x00000f5c
+#define NV50TCL_GP_ADDRESS_HIGH 0x00000f70
+#define NV50TCL_GP_ADDRESS_LOW 0x00000f74
+#define NV50TCL_VP_ADDRESS_HIGH 0x00000f7c
+#define NV50TCL_VP_ADDRESS_LOW 0x00000f80
+#define NV50TCL_FP_ADDRESS_HIGH 0x00000fa4
+#define NV50TCL_FP_ADDRESS_LOW 0x00000fa8
+#define NV50TCL_ZETA_ADDRESS_HIGH 0x00000fe0
+#define NV50TCL_ZETA_ADDRESS_LOW 0x00000fe4
+#define NV50TCL_UNKFF4 0x00000ff4
+#define NV50TCL_UNKFF4_W_SHIFT 16
+#define NV50TCL_UNKFF4_W_MASK 0xffff0000
+#define NV50TCL_UNKFF8 0x00000ff8
+#define NV50TCL_UNKFF8_H_SHIFT 16
+#define NV50TCL_UNKFF8_H_MASK 0xffff0000
+#define NV50TCL_RT_HORIZ(x) (0x00001240+((x)*8))
+#define NV50TCL_RT_HORIZ__SIZE 0x00000008
+#define NV50TCL_RT_VERT(x) (0x00001244+((x)*8))
+#define NV50TCL_RT_VERT__SIZE 0x00000008
+#define NV50TCL_CB_DEF_ADDRESS_HIGH 0x00001280
+#define NV50TCL_CB_DEF_ADDRESS_LOW 0x00001284
+#define NV50TCL_CB_DEF_SET 0x00001288
+#define NV50TCL_CB_DEF_SET_SIZE_SHIFT 0
+#define NV50TCL_CB_DEF_SET_SIZE_MASK 0x0000ffff
+#define NV50TCL_CB_DEF_SET_BUFFER_SHIFT 16
+#define NV50TCL_CB_DEF_SET_BUFFER_MASK 0xffff0000
+#define NV50TCL_DEPTH_TEST_ENABLE 0x000012cc
+#define NV50TCL_SHADE_MODEL 0x000012d4
+#define NV50TCL_SHADE_MODEL_FLAT 0x00001d00
+#define NV50TCL_SHADE_MODEL_SMOOTH 0x00001d01
+#define NV50TCL_DEPTH_WRITE_ENABLE 0x000012e8
+#define NV50TCL_ALPHA_TEST_ENABLE 0x000012ec
+#define NV50TCL_DEPTH_TEST_FUNC 0x0000130c
+#define NV50TCL_DEPTH_TEST_FUNC_NEVER 0x00000200
+#define NV50TCL_DEPTH_TEST_FUNC_LESS 0x00000201
+#define NV50TCL_DEPTH_TEST_FUNC_EQUAL 0x00000202
+#define NV50TCL_DEPTH_TEST_FUNC_LEQUAL 0x00000203
+#define NV50TCL_DEPTH_TEST_FUNC_GREATER 0x00000204
+#define NV50TCL_DEPTH_TEST_FUNC_GREATER 0x00000204
+#define NV50TCL_DEPTH_TEST_FUNC_NOTEQUAL 0x00000205
+#define NV50TCL_DEPTH_TEST_FUNC_GEQUAL 0x00000206
+#define NV50TCL_DEPTH_TEST_FUNC_ALWAYS 0x00000207
+#define NV50TCL_ALPHA_TEST_REF 0x00001310
+#define NV50TCL_ALPHA_TEST_FUNC 0x00001314
+#define NV50TCL_ALPHA_TEST_FUNC_NEVER 0x00000200
+#define NV50TCL_ALPHA_TEST_FUNC_LESS 0x00000201
+#define NV50TCL_ALPHA_TEST_FUNC_EQUAL 0x00000202
+#define NV50TCL_ALPHA_TEST_FUNC_LEQUAL 0x00000203
+#define NV50TCL_ALPHA_TEST_FUNC_GREATER 0x00000204
+#define NV50TCL_ALPHA_TEST_FUNC_GREATER 0x00000204
+#define NV50TCL_ALPHA_TEST_FUNC_NOTEQUAL 0x00000205
+#define NV50TCL_ALPHA_TEST_FUNC_GEQUAL 0x00000206
+#define NV50TCL_ALPHA_TEST_FUNC_ALWAYS 0x00000207
+#define NV50TCL_BLEND_COLOR(x) (0x0000131c+((x)*4))
+#define NV50TCL_BLEND_COLOR__SIZE 0x00000004
+#define NV50TCL_BLEND_EQUATION_RGB 0x00001340
+#define NV50TCL_BLEND_EQUATION_RGB_FUNC_ADD 0x00008006
+#define NV50TCL_BLEND_EQUATION_RGB_MIN 0x00008007
+#define NV50TCL_BLEND_EQUATION_RGB_MAX 0x00008008
+#define NV50TCL_BLEND_EQUATION_RGB_FUNC_SUBTRACT 0x0000800a
+#define NV50TCL_BLEND_EQUATION_RGB_FUNC_REVERSE_SUBTRACT 0x0000800b
+#define NV50TCL_BLEND_FUNC_SRC_RGB 0x00001344
+#define NV50TCL_BLEND_FUNC_SRC_RGB_ZERO 0x00000000
+#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE 0x00000001
+#define NV50TCL_BLEND_FUNC_SRC_RGB_SRC_COLOR 0x00000300
+#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC_COLOR 0x00000301
+#define NV50TCL_BLEND_FUNC_SRC_RGB_SRC_ALPHA 0x00000302
+#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC_ALPHA 0x00000303
+#define NV50TCL_BLEND_FUNC_SRC_RGB_DST_ALPHA 0x00000304
+#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_DST_ALPHA 0x00000305
+#define NV50TCL_BLEND_FUNC_SRC_RGB_DST_COLOR 0x00000306
+#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_DST_COLOR 0x00000307
+#define NV50TCL_BLEND_FUNC_SRC_RGB_SRC_ALPHA_SATURATE 0x00000308
+#define NV50TCL_BLEND_FUNC_SRC_RGB_CONSTANT_COLOR 0x00008001
+#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_CONSTANT_COLOR 0x00008002
+#define NV50TCL_BLEND_FUNC_SRC_RGB_CONSTANT_ALPHA 0x00008003
+#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_CONSTANT_ALPHA 0x00008004
+#define NV50TCL_BLEND_FUNC_DST_RGB 0x00001348
+#define NV50TCL_BLEND_FUNC_DST_RGB_ZERO 0x00000000
+#define NV50TCL_BLEND_FUNC_DST_RGB_ONE 0x00000001
+#define NV50TCL_BLEND_FUNC_DST_RGB_SRC_COLOR 0x00000300
+#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_SRC_COLOR 0x00000301
+#define NV50TCL_BLEND_FUNC_DST_RGB_SRC_ALPHA 0x00000302
+#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_SRC_ALPHA 0x00000303
+#define NV50TCL_BLEND_FUNC_DST_RGB_DST_ALPHA 0x00000304
+#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_DST_ALPHA 0x00000305
+#define NV50TCL_BLEND_FUNC_DST_RGB_DST_COLOR 0x00000306
+#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_DST_COLOR 0x00000307
+#define NV50TCL_BLEND_FUNC_DST_RGB_SRC_ALPHA_SATURATE 0x00000308
+#define NV50TCL_BLEND_FUNC_DST_RGB_CONSTANT_COLOR 0x00008001
+#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_CONSTANT_COLOR 0x00008002
+#define NV50TCL_BLEND_FUNC_DST_RGB_CONSTANT_ALPHA 0x00008003
+#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_CONSTANT_ALPHA 0x00008004
+#define NV50TCL_BLEND_EQUATION_ALPHA 0x0000134c
+#define NV50TCL_BLEND_EQUATION_ALPHA_FUNC_ADD 0x00008006
+#define NV50TCL_BLEND_EQUATION_ALPHA_MIN 0x00008007
+#define NV50TCL_BLEND_EQUATION_ALPHA_MAX 0x00008008
+#define NV50TCL_BLEND_EQUATION_ALPHA_FUNC_SUBTRACT 0x0000800a
+#define NV50TCL_BLEND_EQUATION_ALPHA_FUNC_REVERSE_SUBTRACT 0x0000800b
+#define NV50TCL_BLEND_FUNC_SRC_ALPHA 0x00001350
+#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ZERO 0x00000000
+#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE 0x00000001
+#define NV50TCL_BLEND_FUNC_SRC_ALPHA_SRC_COLOR 0x00000300
+#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_SRC_COLOR 0x00000301
+#define NV50TCL_BLEND_FUNC_SRC_ALPHA_SRC_ALPHA 0x00000302
+#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_SRC_ALPHA 0x00000303
+#define NV50TCL_BLEND_FUNC_SRC_ALPHA_DST_ALPHA 0x00000304
+#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_DST_ALPHA 0x00000305
+#define NV50TCL_BLEND_FUNC_SRC_ALPHA_DST_COLOR 0x00000306
+#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_DST_COLOR 0x00000307
+#define NV50TCL_BLEND_FUNC_SRC_ALPHA_SRC_ALPHA_SATURATE 0x00000308
+#define NV50TCL_BLEND_FUNC_SRC_ALPHA_CONSTANT_COLOR 0x00008001
+#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_CONSTANT_COLOR 0x00008002
+#define NV50TCL_BLEND_FUNC_SRC_ALPHA_CONSTANT_ALPHA 0x00008003
+#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_CONSTANT_ALPHA 0x00008004
+#define NV50TCL_BLEND_FUNC_DST_ALPHA 0x00001358
+#define NV50TCL_BLEND_FUNC_DST_ALPHA_ZERO 0x00000000
+#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE 0x00000001
+#define NV50TCL_BLEND_FUNC_DST_ALPHA_SRC_COLOR 0x00000300
+#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_SRC_COLOR 0x00000301
+#define NV50TCL_BLEND_FUNC_DST_ALPHA_SRC_ALPHA 0x00000302
+#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_SRC_ALPHA 0x00000303
+#define NV50TCL_BLEND_FUNC_DST_ALPHA_DST_ALPHA 0x00000304
+#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_DST_ALPHA 0x00000305
+#define NV50TCL_BLEND_FUNC_DST_ALPHA_DST_COLOR 0x00000306
+#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_DST_COLOR 0x00000307
+#define NV50TCL_BLEND_FUNC_DST_ALPHA_SRC_ALPHA_SATURATE 0x00000308
+#define NV50TCL_BLEND_FUNC_DST_ALPHA_CONSTANT_COLOR 0x00008001
+#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_CONSTANT_COLOR 0x00008002
+#define NV50TCL_BLEND_FUNC_DST_ALPHA_CONSTANT_ALPHA 0x00008003
+#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_CONSTANT_ALPHA 0x00008004
+#define NV50TCL_BLEND_ENABLE(x) (0x00001360+((x)*4))
+#define NV50TCL_BLEND_ENABLE__SIZE 0x00000008
+#define NV50TCL_STENCIL_BACK_ENABLE 0x00001380
+#define NV50TCL_STENCIL_BACK_OP_FAIL 0x00001384
+#define NV50TCL_STENCIL_BACK_OP_FAIL_ZERO 0x00000000
+#define NV50TCL_STENCIL_BACK_OP_FAIL_INVERT 0x0000150a
+#define NV50TCL_STENCIL_BACK_OP_FAIL_KEEP 0x00001e00
+#define NV50TCL_STENCIL_BACK_OP_FAIL_REPLACE 0x00001e01
+#define NV50TCL_STENCIL_BACK_OP_FAIL_INCR 0x00001e02
+#define NV50TCL_STENCIL_BACK_OP_FAIL_DECR 0x00001e03
+#define NV50TCL_STENCIL_BACK_OP_FAIL_INCR_WRAP 0x00008507
+#define NV50TCL_STENCIL_BACK_OP_FAIL_DECR_WRAP 0x00008508
+#define NV50TCL_STENCIL_BACK_OP_ZFAIL 0x00001388
+#define NV50TCL_STENCIL_BACK_OP_ZFAIL_ZERO 0x00000000
+#define NV50TCL_STENCIL_BACK_OP_ZFAIL_INVERT 0x0000150a
+#define NV50TCL_STENCIL_BACK_OP_ZFAIL_KEEP 0x00001e00
+#define NV50TCL_STENCIL_BACK_OP_ZFAIL_REPLACE 0x00001e01
+#define NV50TCL_STENCIL_BACK_OP_ZFAIL_INCR 0x00001e02
+#define NV50TCL_STENCIL_BACK_OP_ZFAIL_DECR 0x00001e03
+#define NV50TCL_STENCIL_BACK_OP_ZFAIL_INCR_WRAP 0x00008507
+#define NV50TCL_STENCIL_BACK_OP_ZFAIL_DECR_WRAP 0x00008508
+#define NV50TCL_STENCIL_BACK_OP_ZPASS 0x0000138c
+#define NV50TCL_STENCIL_BACK_OP_ZPASS_ZERO 0x00000000
+#define NV50TCL_STENCIL_BACK_OP_ZPASS_INVERT 0x0000150a
+#define NV50TCL_STENCIL_BACK_OP_ZPASS_KEEP 0x00001e00
+#define NV50TCL_STENCIL_BACK_OP_ZPASS_REPLACE 0x00001e01
+#define NV50TCL_STENCIL_BACK_OP_ZPASS_INCR 0x00001e02
+#define NV50TCL_STENCIL_BACK_OP_ZPASS_DECR 0x00001e03
+#define NV50TCL_STENCIL_BACK_OP_ZPASS_INCR_WRAP 0x00008507
+#define NV50TCL_STENCIL_BACK_OP_ZPASS_DECR_WRAP 0x00008508
+#define NV50TCL_STENCIL_BACK_FUNC_FUNC 0x00001390
+#define NV50TCL_STENCIL_BACK_FUNC_FUNC_NEVER 0x00000200
+#define NV50TCL_STENCIL_BACK_FUNC_FUNC_LESS 0x00000201
+#define NV50TCL_STENCIL_BACK_FUNC_FUNC_EQUAL 0x00000202
+#define NV50TCL_STENCIL_BACK_FUNC_FUNC_LEQUAL 0x00000203
+#define NV50TCL_STENCIL_BACK_FUNC_FUNC_GREATER 0x00000204
+#define NV50TCL_STENCIL_BACK_FUNC_FUNC_GREATER 0x00000204
+#define NV50TCL_STENCIL_BACK_FUNC_FUNC_NOTEQUAL 0x00000205
+#define NV50TCL_STENCIL_BACK_FUNC_FUNC_GEQUAL 0x00000206
+#define NV50TCL_STENCIL_BACK_FUNC_FUNC_ALWAYS 0x00000207
+#define NV50TCL_STENCIL_BACK_FUNC_REF 0x00001394
+#define NV50TCL_STENCIL_BACK_MASK 0x00001398
+#define NV50TCL_STENCIL_BACK_FUNC_MASK 0x0000139c
+#define NV50TCL_LINE_WIDTH 0x000013b0
+#define NV50TCL_VP_START_ID 0x0000140c
+#define NV50TCL_GP_START_ID 0x00001410
+#define NV50TCL_FP_START_ID 0x00001414
+#define NV50TCL_POINT_SIZE 0x00001518
+#define NV50TCL_TSC_ADDRESS_HIGH 0x0000155c
+#define NV50TCL_TSC_ADDRESS_LOW 0x00001560
+#define NV50TCL_POLYGON_OFFSET_FACTOR 0x0000156c
+#define NV50TCL_LINE_SMOOTH_ENABLE 0x00001570
+#define NV50TCL_TIC_ADDRESS_HIGH 0x00001574
+#define NV50TCL_TIC_ADDRESS_LOW 0x00001578
+#define NV50TCL_STENCIL_FRONT_ENABLE 0x00001594
+#define NV50TCL_STENCIL_FRONT_OP_FAIL 0x00001598
+#define NV50TCL_STENCIL_FRONT_OP_FAIL_ZERO 0x00000000
+#define NV50TCL_STENCIL_FRONT_OP_FAIL_INVERT 0x0000150a
+#define NV50TCL_STENCIL_FRONT_OP_FAIL_KEEP 0x00001e00
+#define NV50TCL_STENCIL_FRONT_OP_FAIL_REPLACE 0x00001e01
+#define NV50TCL_STENCIL_FRONT_OP_FAIL_INCR 0x00001e02
+#define NV50TCL_STENCIL_FRONT_OP_FAIL_DECR 0x00001e03
+#define NV50TCL_STENCIL_FRONT_OP_FAIL_INCR_WRAP 0x00008507
+#define NV50TCL_STENCIL_FRONT_OP_FAIL_DECR_WRAP 0x00008508
+#define NV50TCL_STENCIL_FRONT_OP_ZFAIL 0x0000159c
+#define NV50TCL_STENCIL_FRONT_OP_ZFAIL_ZERO 0x00000000
+#define NV50TCL_STENCIL_FRONT_OP_ZFAIL_INVERT 0x0000150a
+#define NV50TCL_STENCIL_FRONT_OP_ZFAIL_KEEP 0x00001e00
+#define NV50TCL_STENCIL_FRONT_OP_ZFAIL_REPLACE 0x00001e01
+#define NV50TCL_STENCIL_FRONT_OP_ZFAIL_INCR 0x00001e02
+#define NV50TCL_STENCIL_FRONT_OP_ZFAIL_DECR 0x00001e03
+#define NV50TCL_STENCIL_FRONT_OP_ZFAIL_INCR_WRAP 0x00008507
+#define NV50TCL_STENCIL_FRONT_OP_ZFAIL_DECR_WRAP 0x00008508
+#define NV50TCL_STENCIL_FRONT_OP_ZPASS 0x000015a0
+#define NV50TCL_STENCIL_FRONT_OP_ZPASS_ZERO 0x00000000
+#define NV50TCL_STENCIL_FRONT_OP_ZPASS_INVERT 0x0000150a
+#define NV50TCL_STENCIL_FRONT_OP_ZPASS_KEEP 0x00001e00
+#define NV50TCL_STENCIL_FRONT_OP_ZPASS_REPLACE 0x00001e01
+#define NV50TCL_STENCIL_FRONT_OP_ZPASS_INCR 0x00001e02
+#define NV50TCL_STENCIL_FRONT_OP_ZPASS_DECR 0x00001e03
+#define NV50TCL_STENCIL_FRONT_OP_ZPASS_INCR_WRAP 0x00008507
+#define NV50TCL_STENCIL_FRONT_OP_ZPASS_DECR_WRAP 0x00008508
+#define NV50TCL_STENCIL_FRONT_FUNC_FUNC 0x000015a4
+#define NV50TCL_STENCIL_FRONT_FUNC_FUNC_NEVER 0x00000200
+#define NV50TCL_STENCIL_FRONT_FUNC_FUNC_LESS 0x00000201
+#define NV50TCL_STENCIL_FRONT_FUNC_FUNC_EQUAL 0x00000202
+#define NV50TCL_STENCIL_FRONT_FUNC_FUNC_LEQUAL 0x00000203
+#define NV50TCL_STENCIL_FRONT_FUNC_FUNC_GREATER 0x00000204
+#define NV50TCL_STENCIL_FRONT_FUNC_FUNC_GREATER 0x00000204
+#define NV50TCL_STENCIL_FRONT_FUNC_FUNC_NOTEQUAL 0x00000205
+#define NV50TCL_STENCIL_FRONT_FUNC_FUNC_GEQUAL 0x00000206
+#define NV50TCL_STENCIL_FRONT_FUNC_FUNC_ALWAYS 0x00000207
+#define NV50TCL_POLYGON_OFFSET_UNITS 0x000015bc
+#define NV50TCL_VERTEX_BEGIN 0x000015dc
+#define NV50TCL_VERTEX_BEGIN_POINTS 0x00000000
+#define NV50TCL_VERTEX_BEGIN_LINES 0x00000001
+#define NV50TCL_VERTEX_BEGIN_LINE_LOOP 0x00000002
+#define NV50TCL_VERTEX_BEGIN_LINE_STRIP 0x00000003
+#define NV50TCL_VERTEX_BEGIN_TRIANGLES 0x00000004
+#define NV50TCL_VERTEX_BEGIN_TRIANGLE_STRIP 0x00000005
+#define NV50TCL_VERTEX_BEGIN_TRIANGLE_FAN 0x00000006
+#define NV50TCL_VERTEX_BEGIN_QUADS 0x00000007
+#define NV50TCL_VERTEX_BEGIN_QUAD_STRIP 0x00000008
+#define NV50TCL_VERTEX_BEGIN_POLYGON 0x00000009
+#define NV50TCL_VERTEX_END 0x000015e0
+#define NV50TCL_VERTEX_DATA 0x00001640
+#define NV50TCL_VP_ATTR_EN_0 0x00001650
+#define NV50TCL_VP_ATTR_EN_0_7_SHIFT 28
+#define NV50TCL_VP_ATTR_EN_0_7_MASK 0xf0000000
+#define NV50TCL_VP_ATTR_EN_0_7_NONE 0x00000000
+#define NV50TCL_VP_ATTR_EN_0_7_XNNN 0x10000000
+#define NV50TCL_VP_ATTR_EN_0_7_NYNN 0x20000000
+#define NV50TCL_VP_ATTR_EN_0_7_XYNN 0x30000000
+#define NV50TCL_VP_ATTR_EN_0_7_NNZN 0x40000000
+#define NV50TCL_VP_ATTR_EN_0_7_XNZN 0x50000000
+#define NV50TCL_VP_ATTR_EN_0_7_NYZN 0x60000000
+#define NV50TCL_VP_ATTR_EN_0_7_XYZN 0x70000000
+#define NV50TCL_VP_ATTR_EN_0_7_NNNW 0x80000000
+#define NV50TCL_VP_ATTR_EN_0_7_XNNW 0x90000000
+#define NV50TCL_VP_ATTR_EN_0_7_NYNW 0xa0000000
+#define NV50TCL_VP_ATTR_EN_0_7_XYNW 0xb0000000
+#define NV50TCL_VP_ATTR_EN_0_7_NNZW 0xc0000000
+#define NV50TCL_VP_ATTR_EN_0_7_XNZW 0xd0000000
+#define NV50TCL_VP_ATTR_EN_0_7_NYZW 0xe0000000
+#define NV50TCL_VP_ATTR_EN_0_7_XYZW 0xf0000000
+#define NV50TCL_VP_ATTR_EN_0_6_SHIFT 24
+#define NV50TCL_VP_ATTR_EN_0_6_MASK 0x0f000000
+#define NV50TCL_VP_ATTR_EN_0_6_NONE 0x00000000
+#define NV50TCL_VP_ATTR_EN_0_6_XNNN 0x01000000
+#define NV50TCL_VP_ATTR_EN_0_6_NYNN 0x02000000
+#define NV50TCL_VP_ATTR_EN_0_6_XYNN 0x03000000
+#define NV50TCL_VP_ATTR_EN_0_6_NNZN 0x04000000
+#define NV50TCL_VP_ATTR_EN_0_6_XNZN 0x05000000
+#define NV50TCL_VP_ATTR_EN_0_6_NYZN 0x06000000
+#define NV50TCL_VP_ATTR_EN_0_6_XYZN 0x07000000
+#define NV50TCL_VP_ATTR_EN_0_6_NNNW 0x08000000
+#define NV50TCL_VP_ATTR_EN_0_6_XNNW 0x09000000
+#define NV50TCL_VP_ATTR_EN_0_6_NYNW 0x0a000000
+#define NV50TCL_VP_ATTR_EN_0_6_XYNW 0x0b000000
+#define NV50TCL_VP_ATTR_EN_0_6_NNZW 0x0c000000
+#define NV50TCL_VP_ATTR_EN_0_6_XNZW 0x0d000000
+#define NV50TCL_VP_ATTR_EN_0_6_NYZW 0x0e000000
+#define NV50TCL_VP_ATTR_EN_0_6_XYZW 0x0f000000
+#define NV50TCL_VP_ATTR_EN_0_5_SHIFT 20
+#define NV50TCL_VP_ATTR_EN_0_5_MASK 0x00f00000
+#define NV50TCL_VP_ATTR_EN_0_5_NONE 0x00000000
+#define NV50TCL_VP_ATTR_EN_0_5_XNNN 0x00100000
+#define NV50TCL_VP_ATTR_EN_0_5_NYNN 0x00200000
+#define NV50TCL_VP_ATTR_EN_0_5_XYNN 0x00300000
+#define NV50TCL_VP_ATTR_EN_0_5_NNZN 0x00400000
+#define NV50TCL_VP_ATTR_EN_0_5_XNZN 0x00500000
+#define NV50TCL_VP_ATTR_EN_0_5_NYZN 0x00600000
+#define NV50TCL_VP_ATTR_EN_0_5_XYZN 0x00700000
+#define NV50TCL_VP_ATTR_EN_0_5_NNNW 0x00800000
+#define NV50TCL_VP_ATTR_EN_0_5_XNNW 0x00900000
+#define NV50TCL_VP_ATTR_EN_0_5_NYNW 0x00a00000
+#define NV50TCL_VP_ATTR_EN_0_5_XYNW 0x00b00000
+#define NV50TCL_VP_ATTR_EN_0_5_NNZW 0x00c00000
+#define NV50TCL_VP_ATTR_EN_0_5_XNZW 0x00d00000
+#define NV50TCL_VP_ATTR_EN_0_5_NYZW 0x00e00000
+#define NV50TCL_VP_ATTR_EN_0_5_XYZW 0x00f00000
+#define NV50TCL_VP_ATTR_EN_0_4_SHIFT 16
+#define NV50TCL_VP_ATTR_EN_0_4_MASK 0x000f0000
+#define NV50TCL_VP_ATTR_EN_0_4_NONE 0x00000000
+#define NV50TCL_VP_ATTR_EN_0_4_XNNN 0x00010000
+#define NV50TCL_VP_ATTR_EN_0_4_NYNN 0x00020000
+#define NV50TCL_VP_ATTR_EN_0_4_XYNN 0x00030000
+#define NV50TCL_VP_ATTR_EN_0_4_NNZN 0x00040000
+#define NV50TCL_VP_ATTR_EN_0_4_XNZN 0x00050000
+#define NV50TCL_VP_ATTR_EN_0_4_NYZN 0x00060000
+#define NV50TCL_VP_ATTR_EN_0_4_XYZN 0x00070000
+#define NV50TCL_VP_ATTR_EN_0_4_NNNW 0x00080000
+#define NV50TCL_VP_ATTR_EN_0_4_XNNW 0x00090000
+#define NV50TCL_VP_ATTR_EN_0_4_NYNW 0x000a0000
+#define NV50TCL_VP_ATTR_EN_0_4_XYNW 0x000b0000
+#define NV50TCL_VP_ATTR_EN_0_4_NNZW 0x000c0000
+#define NV50TCL_VP_ATTR_EN_0_4_XNZW 0x000d0000
+#define NV50TCL_VP_ATTR_EN_0_4_NYZW 0x000e0000
+#define NV50TCL_VP_ATTR_EN_0_4_XYZW 0x000f0000
+#define NV50TCL_VP_ATTR_EN_0_3_SHIFT 12
+#define NV50TCL_VP_ATTR_EN_0_3_MASK 0x0000f000
+#define NV50TCL_VP_ATTR_EN_0_3_NONE 0x00000000
+#define NV50TCL_VP_ATTR_EN_0_3_XNNN 0x00001000
+#define NV50TCL_VP_ATTR_EN_0_3_NYNN 0x00002000
+#define NV50TCL_VP_ATTR_EN_0_3_XYNN 0x00003000
+#define NV50TCL_VP_ATTR_EN_0_3_NNZN 0x00004000
+#define NV50TCL_VP_ATTR_EN_0_3_XNZN 0x00005000
+#define NV50TCL_VP_ATTR_EN_0_3_NYZN 0x00006000
+#define NV50TCL_VP_ATTR_EN_0_3_XYZN 0x00007000
+#define NV50TCL_VP_ATTR_EN_0_3_NNNW 0x00008000
+#define NV50TCL_VP_ATTR_EN_0_3_XNNW 0x00009000
+#define NV50TCL_VP_ATTR_EN_0_3_NYNW 0x0000a000
+#define NV50TCL_VP_ATTR_EN_0_3_XYNW 0x0000b000
+#define NV50TCL_VP_ATTR_EN_0_3_NNZW 0x0000c000
+#define NV50TCL_VP_ATTR_EN_0_3_XNZW 0x0000d000
+#define NV50TCL_VP_ATTR_EN_0_3_NYZW 0x0000e000
+#define NV50TCL_VP_ATTR_EN_0_3_XYZW 0x0000f000
+#define NV50TCL_VP_ATTR_EN_0_2_SHIFT 8
+#define NV50TCL_VP_ATTR_EN_0_2_MASK 0x00000f00
+#define NV50TCL_VP_ATTR_EN_0_2_NONE 0x00000000
+#define NV50TCL_VP_ATTR_EN_0_2_XNNN 0x00000100
+#define NV50TCL_VP_ATTR_EN_0_2_NYNN 0x00000200
+#define NV50TCL_VP_ATTR_EN_0_2_XYNN 0x00000300
+#define NV50TCL_VP_ATTR_EN_0_2_NNZN 0x00000400
+#define NV50TCL_VP_ATTR_EN_0_2_XNZN 0x00000500
+#define NV50TCL_VP_ATTR_EN_0_2_NYZN 0x00000600
+#define NV50TCL_VP_ATTR_EN_0_2_XYZN 0x00000700
+#define NV50TCL_VP_ATTR_EN_0_2_NNNW 0x00000800
+#define NV50TCL_VP_ATTR_EN_0_2_XNNW 0x00000900
+#define NV50TCL_VP_ATTR_EN_0_2_NYNW 0x00000a00
+#define NV50TCL_VP_ATTR_EN_0_2_XYNW 0x00000b00
+#define NV50TCL_VP_ATTR_EN_0_2_NNZW 0x00000c00
+#define NV50TCL_VP_ATTR_EN_0_2_XNZW 0x00000d00
+#define NV50TCL_VP_ATTR_EN_0_2_NYZW 0x00000e00
+#define NV50TCL_VP_ATTR_EN_0_2_XYZW 0x00000f00
+#define NV50TCL_VP_ATTR_EN_0_1_SHIFT 4
+#define NV50TCL_VP_ATTR_EN_0_1_MASK 0x000000f0
+#define NV50TCL_VP_ATTR_EN_0_1_NONE 0x00000000
+#define NV50TCL_VP_ATTR_EN_0_1_XNNN 0x00000010
+#define NV50TCL_VP_ATTR_EN_0_1_NYNN 0x00000020
+#define NV50TCL_VP_ATTR_EN_0_1_XYNN 0x00000030
+#define NV50TCL_VP_ATTR_EN_0_1_NNZN 0x00000040
+#define NV50TCL_VP_ATTR_EN_0_1_XNZN 0x00000050
+#define NV50TCL_VP_ATTR_EN_0_1_NYZN 0x00000060
+#define NV50TCL_VP_ATTR_EN_0_1_XYZN 0x00000070
+#define NV50TCL_VP_ATTR_EN_0_1_NNNW 0x00000080
+#define NV50TCL_VP_ATTR_EN_0_1_XNNW 0x00000090
+#define NV50TCL_VP_ATTR_EN_0_1_NYNW 0x000000a0
+#define NV50TCL_VP_ATTR_EN_0_1_XYNW 0x000000b0
+#define NV50TCL_VP_ATTR_EN_0_1_NNZW 0x000000c0
+#define NV50TCL_VP_ATTR_EN_0_1_XNZW 0x000000d0
+#define NV50TCL_VP_ATTR_EN_0_1_NYZW 0x000000e0
+#define NV50TCL_VP_ATTR_EN_0_1_XYZW 0x000000f0
+#define NV50TCL_VP_ATTR_EN_0_0_SHIFT 0
+#define NV50TCL_VP_ATTR_EN_0_0_MASK 0x0000000f
+#define NV50TCL_VP_ATTR_EN_0_0_NONE 0x00000000
+#define NV50TCL_VP_ATTR_EN_0_0_XNNN 0x00000001
+#define NV50TCL_VP_ATTR_EN_0_0_NYNN 0x00000002
+#define NV50TCL_VP_ATTR_EN_0_0_XYNN 0x00000003
+#define NV50TCL_VP_ATTR_EN_0_0_NNZN 0x00000004
+#define NV50TCL_VP_ATTR_EN_0_0_XNZN 0x00000005
+#define NV50TCL_VP_ATTR_EN_0_0_NYZN 0x00000006
+#define NV50TCL_VP_ATTR_EN_0_0_XYZN 0x00000007
+#define NV50TCL_VP_ATTR_EN_0_0_NNNW 0x00000008
+#define NV50TCL_VP_ATTR_EN_0_0_XNNW 0x00000009
+#define NV50TCL_VP_ATTR_EN_0_0_NYNW 0x0000000a
+#define NV50TCL_VP_ATTR_EN_0_0_XYNW 0x0000000b
+#define NV50TCL_VP_ATTR_EN_0_0_NNZW 0x0000000c
+#define NV50TCL_VP_ATTR_EN_0_0_XNZW 0x0000000d
+#define NV50TCL_VP_ATTR_EN_0_0_NYZW 0x0000000e
+#define NV50TCL_VP_ATTR_EN_0_0_XYZW 0x0000000f
+#define NV50TCL_VP_ATTR_EN_1 0x00001654
+#define NV50TCL_VP_ATTR_EN_1_15_SHIFT 28
+#define NV50TCL_VP_ATTR_EN_1_15_MASK 0xf0000000
+#define NV50TCL_VP_ATTR_EN_1_15_NONE 0x00000000
+#define NV50TCL_VP_ATTR_EN_1_15_XNNN 0x10000000
+#define NV50TCL_VP_ATTR_EN_1_15_NYNN 0x20000000
+#define NV50TCL_VP_ATTR_EN_1_15_XYNN 0x30000000
+#define NV50TCL_VP_ATTR_EN_1_15_NNZN 0x40000000
+#define NV50TCL_VP_ATTR_EN_1_15_XNZN 0x50000000
+#define NV50TCL_VP_ATTR_EN_1_15_NYZN 0x60000000
+#define NV50TCL_VP_ATTR_EN_1_15_XYZN 0x70000000
+#define NV50TCL_VP_ATTR_EN_1_15_NNNW 0x80000000
+#define NV50TCL_VP_ATTR_EN_1_15_XNNW 0x90000000
+#define NV50TCL_VP_ATTR_EN_1_15_NYNW 0xa0000000
+#define NV50TCL_VP_ATTR_EN_1_15_XYNW 0xb0000000
+#define NV50TCL_VP_ATTR_EN_1_15_NNZW 0xc0000000
+#define NV50TCL_VP_ATTR_EN_1_15_XNZW 0xd0000000
+#define NV50TCL_VP_ATTR_EN_1_15_NYZW 0xe0000000
+#define NV50TCL_VP_ATTR_EN_1_15_XYZW 0xf0000000
+#define NV50TCL_VP_ATTR_EN_1_14_SHIFT 24
+#define NV50TCL_VP_ATTR_EN_1_14_MASK 0x0f000000
+#define NV50TCL_VP_ATTR_EN_1_14_NONE 0x00000000
+#define NV50TCL_VP_ATTR_EN_1_14_XNNN 0x01000000
+#define NV50TCL_VP_ATTR_EN_1_14_NYNN 0x02000000
+#define NV50TCL_VP_ATTR_EN_1_14_XYNN 0x03000000
+#define NV50TCL_VP_ATTR_EN_1_14_NNZN 0x04000000
+#define NV50TCL_VP_ATTR_EN_1_14_XNZN 0x05000000
+#define NV50TCL_VP_ATTR_EN_1_14_NYZN 0x06000000
+#define NV50TCL_VP_ATTR_EN_1_14_XYZN 0x07000000
+#define NV50TCL_VP_ATTR_EN_1_14_NNNW 0x08000000
+#define NV50TCL_VP_ATTR_EN_1_14_XNNW 0x09000000
+#define NV50TCL_VP_ATTR_EN_1_14_NYNW 0x0a000000
+#define NV50TCL_VP_ATTR_EN_1_14_XYNW 0x0b000000
+#define NV50TCL_VP_ATTR_EN_1_14_NNZW 0x0c000000
+#define NV50TCL_VP_ATTR_EN_1_14_XNZW 0x0d000000
+#define NV50TCL_VP_ATTR_EN_1_14_NYZW 0x0e000000
+#define NV50TCL_VP_ATTR_EN_1_14_XYZW 0x0f000000
+#define NV50TCL_VP_ATTR_EN_1_13_SHIFT 20
+#define NV50TCL_VP_ATTR_EN_1_13_MASK 0x00f00000
+#define NV50TCL_VP_ATTR_EN_1_13_NONE 0x00000000
+#define NV50TCL_VP_ATTR_EN_1_13_XNNN 0x00100000
+#define NV50TCL_VP_ATTR_EN_1_13_NYNN 0x00200000
+#define NV50TCL_VP_ATTR_EN_1_13_XYNN 0x00300000
+#define NV50TCL_VP_ATTR_EN_1_13_NNZN 0x00400000
+#define NV50TCL_VP_ATTR_EN_1_13_XNZN 0x00500000
+#define NV50TCL_VP_ATTR_EN_1_13_NYZN 0x00600000
+#define NV50TCL_VP_ATTR_EN_1_13_XYZN 0x00700000
+#define NV50TCL_VP_ATTR_EN_1_13_NNNW 0x00800000
+#define NV50TCL_VP_ATTR_EN_1_13_XNNW 0x00900000
+#define NV50TCL_VP_ATTR_EN_1_13_NYNW 0x00a00000
+#define NV50TCL_VP_ATTR_EN_1_13_XYNW 0x00b00000
+#define NV50TCL_VP_ATTR_EN_1_13_NNZW 0x00c00000
+#define NV50TCL_VP_ATTR_EN_1_13_XNZW 0x00d00000
+#define NV50TCL_VP_ATTR_EN_1_13_NYZW 0x00e00000
+#define NV50TCL_VP_ATTR_EN_1_13_XYZW 0x00f00000
+#define NV50TCL_VP_ATTR_EN_1_12_SHIFT 16
+#define NV50TCL_VP_ATTR_EN_1_12_MASK 0x000f0000
+#define NV50TCL_VP_ATTR_EN_1_12_NONE 0x00000000
+#define NV50TCL_VP_ATTR_EN_1_12_XNNN 0x00010000
+#define NV50TCL_VP_ATTR_EN_1_12_NYNN 0x00020000
+#define NV50TCL_VP_ATTR_EN_1_12_XYNN 0x00030000
+#define NV50TCL_VP_ATTR_EN_1_12_NNZN 0x00040000
+#define NV50TCL_VP_ATTR_EN_1_12_XNZN 0x00050000
+#define NV50TCL_VP_ATTR_EN_1_12_NYZN 0x00060000
+#define NV50TCL_VP_ATTR_EN_1_12_XYZN 0x00070000
+#define NV50TCL_VP_ATTR_EN_1_12_NNNW 0x00080000
+#define NV50TCL_VP_ATTR_EN_1_12_XNNW 0x00090000
+#define NV50TCL_VP_ATTR_EN_1_12_NYNW 0x000a0000
+#define NV50TCL_VP_ATTR_EN_1_12_XYNW 0x000b0000
+#define NV50TCL_VP_ATTR_EN_1_12_NNZW 0x000c0000
+#define NV50TCL_VP_ATTR_EN_1_12_XNZW 0x000d0000
+#define NV50TCL_VP_ATTR_EN_1_12_NYZW 0x000e0000
+#define NV50TCL_VP_ATTR_EN_1_12_XYZW 0x000f0000
+#define NV50TCL_VP_ATTR_EN_1_11_SHIFT 12
+#define NV50TCL_VP_ATTR_EN_1_11_MASK 0x0000f000
+#define NV50TCL_VP_ATTR_EN_1_11_NONE 0x00000000
+#define NV50TCL_VP_ATTR_EN_1_11_XNNN 0x00001000
+#define NV50TCL_VP_ATTR_EN_1_11_NYNN 0x00002000
+#define NV50TCL_VP_ATTR_EN_1_11_XYNN 0x00003000
+#define NV50TCL_VP_ATTR_EN_1_11_NNZN 0x00004000
+#define NV50TCL_VP_ATTR_EN_1_11_XNZN 0x00005000
+#define NV50TCL_VP_ATTR_EN_1_11_NYZN 0x00006000
+#define NV50TCL_VP_ATTR_EN_1_11_XYZN 0x00007000
+#define NV50TCL_VP_ATTR_EN_1_11_NNNW 0x00008000
+#define NV50TCL_VP_ATTR_EN_1_11_XNNW 0x00009000
+#define NV50TCL_VP_ATTR_EN_1_11_NYNW 0x0000a000
+#define NV50TCL_VP_ATTR_EN_1_11_XYNW 0x0000b000
+#define NV50TCL_VP_ATTR_EN_1_11_NNZW 0x0000c000
+#define NV50TCL_VP_ATTR_EN_1_11_XNZW 0x0000d000
+#define NV50TCL_VP_ATTR_EN_1_11_NYZW 0x0000e000
+#define NV50TCL_VP_ATTR_EN_1_11_XYZW 0x0000f000
+#define NV50TCL_VP_ATTR_EN_1_10_SHIFT 8
+#define NV50TCL_VP_ATTR_EN_1_10_MASK 0x00000f00
+#define NV50TCL_VP_ATTR_EN_1_10_NONE 0x00000000
+#define NV50TCL_VP_ATTR_EN_1_10_XNNN 0x00000100
+#define NV50TCL_VP_ATTR_EN_1_10_NYNN 0x00000200
+#define NV50TCL_VP_ATTR_EN_1_10_XYNN 0x00000300
+#define NV50TCL_VP_ATTR_EN_1_10_NNZN 0x00000400
+#define NV50TCL_VP_ATTR_EN_1_10_XNZN 0x00000500
+#define NV50TCL_VP_ATTR_EN_1_10_NYZN 0x00000600
+#define NV50TCL_VP_ATTR_EN_1_10_XYZN 0x00000700
+#define NV50TCL_VP_ATTR_EN_1_10_NNNW 0x00000800
+#define NV50TCL_VP_ATTR_EN_1_10_XNNW 0x00000900
+#define NV50TCL_VP_ATTR_EN_1_10_NYNW 0x00000a00
+#define NV50TCL_VP_ATTR_EN_1_10_XYNW 0x00000b00
+#define NV50TCL_VP_ATTR_EN_1_10_NNZW 0x00000c00
+#define NV50TCL_VP_ATTR_EN_1_10_XNZW 0x00000d00
+#define NV50TCL_VP_ATTR_EN_1_10_NYZW 0x00000e00
+#define NV50TCL_VP_ATTR_EN_1_10_XYZW 0x00000f00
+#define NV50TCL_VP_ATTR_EN_1_9_SHIFT 4
+#define NV50TCL_VP_ATTR_EN_1_9_MASK 0x000000f0
+#define NV50TCL_VP_ATTR_EN_1_9_NONE 0x00000000
+#define NV50TCL_VP_ATTR_EN_1_9_XNNN 0x00000010
+#define NV50TCL_VP_ATTR_EN_1_9_NYNN 0x00000020
+#define NV50TCL_VP_ATTR_EN_1_9_XYNN 0x00000030
+#define NV50TCL_VP_ATTR_EN_1_9_NNZN 0x00000040
+#define NV50TCL_VP_ATTR_EN_1_9_XNZN 0x00000050
+#define NV50TCL_VP_ATTR_EN_1_9_NYZN 0x00000060
+#define NV50TCL_VP_ATTR_EN_1_9_XYZN 0x00000070
+#define NV50TCL_VP_ATTR_EN_1_9_NNNW 0x00000080
+#define NV50TCL_VP_ATTR_EN_1_9_XNNW 0x00000090
+#define NV50TCL_VP_ATTR_EN_1_9_NYNW 0x000000a0
+#define NV50TCL_VP_ATTR_EN_1_9_XYNW 0x000000b0
+#define NV50TCL_VP_ATTR_EN_1_9_NNZW 0x000000c0
+#define NV50TCL_VP_ATTR_EN_1_9_XNZW 0x000000d0
+#define NV50TCL_VP_ATTR_EN_1_9_NYZW 0x000000e0
+#define NV50TCL_VP_ATTR_EN_1_9_XYZW 0x000000f0
+#define NV50TCL_VP_ATTR_EN_1_8_SHIFT 0
+#define NV50TCL_VP_ATTR_EN_1_8_MASK 0x0000000f
+#define NV50TCL_VP_ATTR_EN_1_8_NONE 0x00000000
+#define NV50TCL_VP_ATTR_EN_1_8_XNNN 0x00000001
+#define NV50TCL_VP_ATTR_EN_1_8_NYNN 0x00000002
+#define NV50TCL_VP_ATTR_EN_1_8_XYNN 0x00000003
+#define NV50TCL_VP_ATTR_EN_1_8_NNZN 0x00000004
+#define NV50TCL_VP_ATTR_EN_1_8_XNZN 0x00000005
+#define NV50TCL_VP_ATTR_EN_1_8_NYZN 0x00000006
+#define NV50TCL_VP_ATTR_EN_1_8_XYZN 0x00000007
+#define NV50TCL_VP_ATTR_EN_1_8_NNNW 0x00000008
+#define NV50TCL_VP_ATTR_EN_1_8_XNNW 0x00000009
+#define NV50TCL_VP_ATTR_EN_1_8_NYNW 0x0000000a
+#define NV50TCL_VP_ATTR_EN_1_8_XYNW 0x0000000b
+#define NV50TCL_VP_ATTR_EN_1_8_NNZW 0x0000000c
+#define NV50TCL_VP_ATTR_EN_1_8_XNZW 0x0000000d
+#define NV50TCL_VP_ATTR_EN_1_8_NYZW 0x0000000e
+#define NV50TCL_VP_ATTR_EN_1_8_XYZW 0x0000000f
+#define NV50TCL_LINE_STIPPLE_ENABLE 0x0000166c
+#define NV50TCL_LINE_STIPPLE_PATTERN 0x00001680
+#define NV50TCL_POLYGON_STIPPLE_ENABLE 0x0000168c
+#define NV50TCL_VP_REG_HPOS 0x000016bc
+#define NV50TCL_VP_REG_HPOS_X_SHIFT 0
+#define NV50TCL_VP_REG_HPOS_X_MASK 0x000000ff
+#define NV50TCL_VP_REG_HPOS_Y_SHIFT 8
+#define NV50TCL_VP_REG_HPOS_Y_MASK 0x0000ff00
+#define NV50TCL_VP_REG_HPOS_Z_SHIFT 16
+#define NV50TCL_VP_REG_HPOS_Z_MASK 0x00ff0000
+#define NV50TCL_VP_REG_HPOS_W_SHIFT 24
+#define NV50TCL_VP_REG_HPOS_W_MASK 0xff000000
+#define NV50TCL_VP_REG_COL0 0x000016c0
+#define NV50TCL_VP_REG_COL0_X_SHIFT 0
+#define NV50TCL_VP_REG_COL0_X_MASK 0x000000ff
+#define NV50TCL_VP_REG_COL0_Y_SHIFT 8
+#define NV50TCL_VP_REG_COL0_Y_MASK 0x0000ff00
+#define NV50TCL_VP_REG_COL0_Z_SHIFT 16
+#define NV50TCL_VP_REG_COL0_Z_MASK 0x00ff0000
+#define NV50TCL_VP_REG_COL0_W_SHIFT 24
+#define NV50TCL_VP_REG_COL0_W_MASK 0xff000000
+#define NV50TCL_POLYGON_STIPPLE_PATTERN(x) (0x00001700+((x)*4))
+#define NV50TCL_POLYGON_STIPPLE_PATTERN__SIZE 0x00000020
+#define NV50TCL_CULL_FACE_ENABLE 0x00001918
+#define NV50TCL_FRONT_FACE 0x0000191c
+#define NV50TCL_FRONT_FACE_CW 0x00000900
+#define NV50TCL_FRONT_FACE_CCW 0x00000901
+#define NV50TCL_CULL_FACE 0x00001920
+#define NV50TCL_CULL_FACE_FRONT 0x00000404
+#define NV50TCL_CULL_FACE_BACK 0x00000405
+#define NV50TCL_CULL_FACE_FRONT_AND_BACK 0x00000408
+#define NV50TCL_LOGIC_OP_ENABLE 0x000019c4
+#define NV50TCL_LOGIC_OP 0x000019c8
+#define NV50TCL_LOGIC_OP_CLEAR 0x00001500
+#define NV50TCL_LOGIC_OP_AND 0x00001501
+#define NV50TCL_LOGIC_OP_AND_REVERSE 0x00001502
+#define NV50TCL_LOGIC_OP_COPY 0x00001503
+#define NV50TCL_LOGIC_OP_AND_INVERTED 0x00001504
+#define NV50TCL_LOGIC_OP_NOOP 0x00001505
+#define NV50TCL_LOGIC_OP_XOR 0x00001506
+#define NV50TCL_LOGIC_OP_OR 0x00001507
+#define NV50TCL_LOGIC_OP_NOR 0x00001508
+#define NV50TCL_LOGIC_OP_EQUIV 0x00001509
+#define NV50TCL_LOGIC_OP_INVERT 0x0000150a
+#define NV50TCL_LOGIC_OP_OR_REVERSE 0x0000150b
+#define NV50TCL_LOGIC_OP_COPY_INVERTED 0x0000150c
+#define NV50TCL_LOGIC_OP_OR_INVERTED 0x0000150d
+#define NV50TCL_LOGIC_OP_NAND 0x0000150e
+#define NV50TCL_LOGIC_OP_SET 0x0000150f
+#define NV50TCL_CLEAR_BUFFERS 0x000019d0
+#define NV50TCL_COLOR_MASK(x) (0x00001a00+((x)*4))
+#define NV50TCL_COLOR_MASK__SIZE 0x00000008
+#define NV50TCL_COLOR_MASK_R_SHIFT 0
+#define NV50TCL_COLOR_MASK_R_MASK 0x0000000f
+#define NV50TCL_COLOR_MASK_G_SHIFT 4
+#define NV50TCL_COLOR_MASK_G_MASK 0x000000f0
+#define NV50TCL_COLOR_MASK_B_SHIFT 8
+#define NV50TCL_COLOR_MASK_B_MASK 0x00000f00
+#define NV50TCL_COLOR_MASK_A_SHIFT 12
+#define NV50TCL_COLOR_MASK_A_MASK 0x0000f000
+
+
+#define NV50_COMPUTE 0x000050c0
+
+#define NV50_COMPUTE_DMA_UNK0 0x000001a0
+#define NV50_COMPUTE_DMA_STATUS 0x000001a4
+#define NV50_COMPUTE_DMA_UNK1 0x000001b8
+#define NV50_COMPUTE_DMA_UNK2 0x000001bc
+#define NV50_COMPUTE_DMA_UNK3 0x000001c0
+#define NV50_COMPUTE_UNK4_HIGH 0x00000210
+#define NV50_COMPUTE_UNK4_LOW 0x00000214
+#define NV50_COMPUTE_UNK5_HIGH 0x00000218
+#define NV50_COMPUTE_UNK5_LOW 0x0000021c
+#define NV50_COMPUTE_UNK6_HIGH 0x00000294
+#define NV50_COMPUTE_UNK6_LOW 0x00000298
+#define NV50_COMPUTE_CONST_BASE_HIGH 0x000002a4
+#define NV50_COMPUTE_CONST_BASE_LO 0x000002a8
+#define NV50_COMPUTE_CONST_SIZE_SEG 0x000002ac
+#define NV50_COMPUTE_REG_COUNT 0x000002c0
+#define NV50_COMPUTE_STATUS_HIGH 0x00000310
+#define NV50_COMPUTE_STATUS_LOW 0x00000314
+#define NV50_COMPUTE_EXECUTE 0x0000031c
+#define NV50_COMPUTE_USER_PARAM_COUNT 0x00000374
+#define NV50_COMPUTE_GRIDDIM_YX 0x000003a4
+#define NV50_COMPUTE_SHARED_SIZE 0x000003a8
+#define NV50_COMPUTE_BLOCKDIM_YX 0x000003ac
+#define NV50_COMPUTE_BLOCKDIM_Z 0x000003b0
+#define NV50_COMPUTE_CALL_ADDRESS 0x000003b4
+#define NV50_COMPUTE_GLOBAL_BASE_HIGH(x) (0x00000400+((x)*32))
+#define NV50_COMPUTE_GLOBAL_BASE_HIGH__SIZE 0x00000010
+#define NV50_COMPUTE_GLOBAL_BASE_LOW(x) (0x00000404+((x)*32))
+#define NV50_COMPUTE_GLOBAL_BASE_LOW__SIZE 0x00000010
+#define NV50_COMPUTE_GLOBAL_LIMIT_HIGH(x) (0x00000408+((x)*32))
+#define NV50_COMPUTE_GLOBAL_LIMIT_HIGH__SIZE 0x00000010
+#define NV50_COMPUTE_GLOBAL_LIMIT_LOW(x) (0x0000040c+((x)*32))
+#define NV50_COMPUTE_GLOBAL_LIMIT_LOW__SIZE 0x00000010
+#define NV50_COMPUTE_GLOBAL_UNK(x) (0x00000410+((x)*32))
+#define NV50_COMPUTE_GLOBAL_UNK__SIZE 0x00000010
+#define NV50_COMPUTE_USER_PARAM(x) (0x00000600+((x)*4))
+#define NV50_COMPUTE_USER_PARAM__SIZE 0x00000040
+
+
+#define NV54TCL 0x00008297
+
+
+
+#endif /* NOUVEAU_REG_H */
diff --git a/src/gallium/drivers/nouveau/nouveau_device.h b/src/gallium/drivers/nouveau/nouveau_device.h
new file mode 100644
index 00000000000..e25e89fedda
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nouveau_device.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2007 Nouveau Project
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __NOUVEAU_DEVICE_H__
+#define __NOUVEAU_DEVICE_H__
+
+struct nouveau_device {
+ unsigned chipset;
+};
+
+#endif
diff --git a/src/gallium/drivers/nouveau/nouveau_gldefs.h b/src/gallium/drivers/nouveau/nouveau_gldefs.h
new file mode 100644
index 00000000000..ff97aaa9af0
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nouveau_gldefs.h
@@ -0,0 +1,196 @@
+#ifndef __NOUVEAU_GLDEFS_H__
+#define __NOUVEAU_GLDEFS_H__
+
+static INLINE unsigned
+nvgl_blend_func(unsigned factor)
+{
+ switch (factor) {
+ case PIPE_BLENDFACTOR_ZERO:
+ return 0x0000;
+ case PIPE_BLENDFACTOR_ONE:
+ return 0x0001;
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ return 0x0300;
+ case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+ return 0x0301;
+ case PIPE_BLENDFACTOR_SRC_ALPHA:
+ return 0x0302;
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+ return 0x0303;
+ case PIPE_BLENDFACTOR_DST_ALPHA:
+ return 0x0304;
+ case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+ return 0x0305;
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ return 0x0306;
+ case PIPE_BLENDFACTOR_INV_DST_COLOR:
+ return 0x0307;
+ case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+ return 0x0308;
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ return 0x8001;
+ case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+ return 0x8002;
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
+ return 0x8003;
+ case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+ return 0x8004;
+ default:
+ return 0x0000;
+ }
+}
+
+static INLINE unsigned
+nvgl_blend_eqn(unsigned func)
+{
+ switch (func) {
+ case PIPE_BLEND_ADD:
+ return 0x8006;
+ case PIPE_BLEND_MIN:
+ return 0x8007;
+ case PIPE_BLEND_MAX:
+ return 0x8008;
+ case PIPE_BLEND_SUBTRACT:
+ return 0x800a;
+ case PIPE_BLEND_REVERSE_SUBTRACT:
+ return 0x800b;
+ default:
+ return 0x8006;
+ }
+}
+
+static INLINE unsigned
+nvgl_logicop_func(unsigned func)
+{
+ switch (func) {
+ case PIPE_LOGICOP_CLEAR:
+ return 0x1500;
+ case PIPE_LOGICOP_NOR:
+ return 0x1508;
+ case PIPE_LOGICOP_AND_INVERTED:
+ return 0x1504;
+ case PIPE_LOGICOP_COPY_INVERTED:
+ return 0x150c;
+ case PIPE_LOGICOP_AND_REVERSE:
+ return 0x1502;
+ case PIPE_LOGICOP_INVERT:
+ return 0x150a;
+ case PIPE_LOGICOP_XOR:
+ return 0x1506;
+ case PIPE_LOGICOP_NAND:
+ return 0x150e;
+ case PIPE_LOGICOP_AND:
+ return 0x1501;
+ case PIPE_LOGICOP_EQUIV:
+ return 0x1509;
+ case PIPE_LOGICOP_NOOP:
+ return 0x1505;
+ case PIPE_LOGICOP_OR_INVERTED:
+ return 0x150d;
+ case PIPE_LOGICOP_COPY:
+ return 0x1503;
+ case PIPE_LOGICOP_OR_REVERSE:
+ return 0x150b;
+ case PIPE_LOGICOP_OR:
+ return 0x1507;
+ case PIPE_LOGICOP_SET:
+ return 0x150f;
+ default:
+ return 0x1505;
+ }
+}
+
+static INLINE unsigned
+nvgl_comparison_op(unsigned op)
+{
+ switch (op) {
+ case PIPE_FUNC_NEVER:
+ return 0x0200;
+ case PIPE_FUNC_LESS:
+ return 0x0201;
+ case PIPE_FUNC_EQUAL:
+ return 0x0202;
+ case PIPE_FUNC_LEQUAL:
+ return 0x0203;
+ case PIPE_FUNC_GREATER:
+ return 0x0204;
+ case PIPE_FUNC_NOTEQUAL:
+ return 0x0205;
+ case PIPE_FUNC_GEQUAL:
+ return 0x0206;
+ case PIPE_FUNC_ALWAYS:
+ return 0x0207;
+ default:
+ return 0x0207;
+ }
+}
+
+static INLINE unsigned
+nvgl_polygon_mode(unsigned mode)
+{
+ switch (mode) {
+ case PIPE_POLYGON_MODE_POINT:
+ return 0x1b00;
+ case PIPE_POLYGON_MODE_LINE:
+ return 0x1b01;
+ case PIPE_POLYGON_MODE_FILL:
+ return 0x1b02;
+ default:
+ return 0x1b02;
+ }
+}
+
+static INLINE unsigned
+nvgl_stencil_op(unsigned op)
+{
+ switch (op) {
+ case PIPE_STENCIL_OP_ZERO:
+ return 0x0000;
+ case PIPE_STENCIL_OP_INVERT:
+ return 0x150a;
+ case PIPE_STENCIL_OP_KEEP:
+ return 0x1e00;
+ case PIPE_STENCIL_OP_REPLACE:
+ return 0x1e01;
+ case PIPE_STENCIL_OP_INCR:
+ return 0x1e02;
+ case PIPE_STENCIL_OP_DECR:
+ return 0x1e03;
+ case PIPE_STENCIL_OP_INCR_WRAP:
+ return 0x8507;
+ case PIPE_STENCIL_OP_DECR_WRAP:
+ return 0x8508;
+ default:
+ return 0x1e00;
+ }
+}
+
+static INLINE unsigned
+nvgl_primitive(unsigned prim) {
+ switch (prim) {
+ case PIPE_PRIM_POINTS:
+ return 0x0001;
+ case PIPE_PRIM_LINES:
+ return 0x0002;
+ case PIPE_PRIM_LINE_LOOP:
+ return 0x0003;
+ case PIPE_PRIM_LINE_STRIP:
+ return 0x0004;
+ case PIPE_PRIM_TRIANGLES:
+ return 0x0005;
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ return 0x0006;
+ case PIPE_PRIM_TRIANGLE_FAN:
+ return 0x0007;
+ case PIPE_PRIM_QUADS:
+ return 0x0008;
+ case PIPE_PRIM_QUAD_STRIP:
+ return 0x0009;
+ case PIPE_PRIM_POLYGON:
+ return 0x000a;
+ default:
+ return 0;
+ }
+}
+
+#endif
diff --git a/src/gallium/drivers/nouveau/nouveau_grobj.h b/src/gallium/drivers/nouveau/nouveau_grobj.h
new file mode 100644
index 00000000000..8f5abf90514
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nouveau_grobj.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2007 Nouveau Project
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __NOUVEAU_GROBJ_H__
+#define __NOUVEAU_GROBJ_H__
+
+#include "nouveau_channel.h"
+
+struct nouveau_grobj {
+ struct nouveau_channel *channel;
+ int grclass;
+ uint32_t handle;
+ int subc;
+};
+
+#endif
diff --git a/src/gallium/drivers/nouveau/nouveau_notifier.h b/src/gallium/drivers/nouveau/nouveau_notifier.h
new file mode 100644
index 00000000000..35adde1e324
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nouveau_notifier.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright 2007 Nouveau Project
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __NOUVEAU_NOTIFIER_H__
+#define __NOUVEAU_NOTIFIER_H__
+
+#define NV_NOTIFIER_SIZE 32
+#define NV_NOTIFY_TIME_0 0x00000000
+#define NV_NOTIFY_TIME_1 0x00000004
+#define NV_NOTIFY_RETURN_VALUE 0x00000008
+#define NV_NOTIFY_STATE 0x0000000C
+#define NV_NOTIFY_STATE_STATUS_MASK 0xFF000000
+#define NV_NOTIFY_STATE_STATUS_SHIFT 24
+#define NV_NOTIFY_STATE_STATUS_COMPLETED 0x00
+#define NV_NOTIFY_STATE_STATUS_IN_PROCESS 0x01
+#define NV_NOTIFY_STATE_ERROR_CODE_MASK 0x0000FFFF
+#define NV_NOTIFY_STATE_ERROR_CODE_SHIFT 0
+
+struct nouveau_notifier {
+ struct nouveau_channel *channel;
+ uint32_t handle;
+};
+
+#endif
diff --git a/src/gallium/drivers/nouveau/nouveau_push.h b/src/gallium/drivers/nouveau/nouveau_push.h
new file mode 100644
index 00000000000..54ef1c1291e
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nouveau_push.h
@@ -0,0 +1,82 @@
+#ifndef __NOUVEAU_PUSH_H__
+#define __NOUVEAU_PUSH_H__
+
+#include "nouveau/nouveau_winsys.h"
+
+#ifndef NOUVEAU_PUSH_CONTEXT
+#error undefined push context
+#endif
+
+#define OUT_RING(data) do { \
+ NOUVEAU_PUSH_CONTEXT(pc); \
+ (*pc->nvws->channel->pushbuf->cur++) = (data); \
+} while(0)
+
+#define OUT_RINGp(src,size) do { \
+ NOUVEAU_PUSH_CONTEXT(pc); \
+ memcpy(pc->nvws->channel->pushbuf->cur, (src), (size) * 4); \
+ pc->nvws->channel->pushbuf->cur += (size); \
+} while(0)
+
+#define OUT_RINGf(data) do { \
+ union { float v; uint32_t u; } c; \
+ c.v = (data); \
+ OUT_RING(c.u); \
+} while(0)
+
+#define BEGIN_RING(obj,mthd,size) do { \
+ NOUVEAU_PUSH_CONTEXT(pc); \
+ if (pc->nvws->channel->pushbuf->remaining < ((size) + 1)) \
+ pc->nvws->push_flush(pc->nvws, ((size) + 1), NULL); \
+ OUT_RING((pc->obj->subc << 13) | ((size) << 18) | (mthd)); \
+ pc->nvws->channel->pushbuf->remaining -= ((size) + 1); \
+} while(0)
+
+#define BEGIN_RING_NI(obj,mthd,size) do { \
+ BEGIN_RING(obj, (mthd) | 0x40000000, (size)); \
+} while(0)
+
+#define FIRE_RING(fence) do { \
+ NOUVEAU_PUSH_CONTEXT(pc); \
+ pc->nvws->push_flush(pc->nvws, 0, fence); \
+} while(0)
+
+#define OUT_RELOC(bo,data,flags,vor,tor) do { \
+ NOUVEAU_PUSH_CONTEXT(pc); \
+ pc->nvws->push_reloc(pc->nvws, pc->nvws->channel->pushbuf->cur++, \
+ (bo), (data), (flags), (vor), (tor)); \
+} while(0)
+
+/* Raw data + flags depending on FB/TT buffer */
+#define OUT_RELOCd(bo,data,flags,vor,tor) do { \
+ OUT_RELOC((bo), (data), (flags) | NOUVEAU_BO_OR, (vor), (tor)); \
+} while(0)
+
+/* FB/TT object handle */
+#define OUT_RELOCo(bo,flags) do { \
+ OUT_RELOC((bo), 0, (flags) | NOUVEAU_BO_OR, \
+ pc->nvws->channel->vram->handle, \
+ pc->nvws->channel->gart->handle); \
+} while(0)
+
+/* Low 32-bits of offset */
+#define OUT_RELOCl(bo,delta,flags) do { \
+ OUT_RELOC((bo), (delta), (flags) | NOUVEAU_BO_LOW, 0, 0); \
+} while(0)
+
+/* High 32-bits of offset */
+#define OUT_RELOCh(bo,delta,flags) do { \
+ OUT_RELOC((bo), (delta), (flags) | NOUVEAU_BO_HIGH, 0, 0); \
+} while(0)
+
+/* A reloc which'll recombine into a NV_DMA_METHOD packet header */
+#define OUT_RELOCm(bo, flags, obj, mthd, size) do { \
+ NOUVEAU_PUSH_CONTEXT(pc); \
+ if (pc->nvws->channel->pushbuf->remaining < ((size) + 1)) \
+ pc->nvws->push_flush(pc->nvws->channel, ((size) + 1), NULL); \
+ OUT_RELOCd((bo), (pc->obj->subc << 13) | ((size) << 18) | (mthd), \
+ (flags), 0, 0); \
+ pc->nvws->channel->pushbuf->remaining -= ((size) + 1); \
+} while(0)
+
+#endif
diff --git a/src/gallium/drivers/nouveau/nouveau_pushbuf.h b/src/gallium/drivers/nouveau/nouveau_pushbuf.h
new file mode 100644
index 00000000000..19097650982
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nouveau_pushbuf.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2007 Nouveau Project
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __NOUVEAU_PUSHBUF_H__
+#define __NOUVEAU_PUSHBUF_H__
+
+struct nouveau_pushbuf {
+ struct nouveau_channel *channel;
+ unsigned remaining;
+ uint32_t *cur;
+};
+
+#endif
diff --git a/src/gallium/drivers/nouveau/nouveau_resource.h b/src/gallium/drivers/nouveau/nouveau_resource.h
new file mode 100644
index 00000000000..1af7961d301
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nouveau_resource.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2007 Nouveau Project
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __NOUVEAU_RESOURCE_H__
+#define __NOUVEAU_RESOURCE_H__
+
+struct nouveau_resource {
+ struct nouveau_resource *prev;
+ struct nouveau_resource *next;
+
+ int in_use;
+ void *priv;
+
+ unsigned int start;
+ unsigned int size;
+};
+
+#endif
diff --git a/src/gallium/drivers/nouveau/nouveau_stateobj.h b/src/gallium/drivers/nouveau/nouveau_stateobj.h
new file mode 100644
index 00000000000..729988b095e
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nouveau_stateobj.h
@@ -0,0 +1,158 @@
+#ifndef __NOUVEAU_STATEOBJ_H__
+#define __NOUVEAU_STATEOBJ_H__
+
+#include "pipe/p_debug.h"
+
+struct nouveau_stateobj_reloc {
+ struct pipe_buffer *bo;
+
+ unsigned offset;
+ unsigned packet;
+
+ unsigned data;
+ unsigned flags;
+ unsigned vor;
+ unsigned tor;
+};
+
+struct nouveau_stateobj {
+ int refcount;
+
+ unsigned *push;
+ struct nouveau_stateobj_reloc *reloc;
+
+ unsigned *cur;
+ unsigned cur_packet;
+ unsigned cur_reloc;
+};
+
+static INLINE struct nouveau_stateobj *
+so_new(unsigned push, unsigned reloc)
+{
+ struct nouveau_stateobj *so;
+
+ so = MALLOC(sizeof(struct nouveau_stateobj));
+ so->refcount = 0;
+ so->push = MALLOC(sizeof(unsigned) * push);
+ so->reloc = MALLOC(sizeof(struct nouveau_stateobj_reloc) * reloc);
+
+ so->cur = so->push;
+ so->cur_reloc = so->cur_packet = 0;
+
+ return so;
+}
+
+static INLINE void
+so_ref(struct nouveau_stateobj *ref, struct nouveau_stateobj **pso)
+{
+ struct nouveau_stateobj *so = *pso;
+
+ if (ref) {
+ ref->refcount++;
+ }
+
+ if (so && --so->refcount <= 0) {
+ free(so->push);
+ free(so->reloc);
+ free(so);
+ }
+
+ *pso = ref;
+}
+
+static INLINE void
+so_data(struct nouveau_stateobj *so, unsigned data)
+{
+ (*so->cur++) = (data);
+ so->cur_packet += 4;
+}
+
+static INLINE void
+so_datap(struct nouveau_stateobj *so, unsigned *data, unsigned size)
+{
+ so->cur_packet += (4 * size);
+ while (size--)
+ (*so->cur++) = (*data++);
+}
+
+static INLINE void
+so_method(struct nouveau_stateobj *so, struct nouveau_grobj *gr,
+ unsigned mthd, unsigned size)
+{
+ so->cur_packet = (gr->subc << 13) | (1 << 18) | (mthd - 4);
+ so_data(so, (gr->subc << 13) | (size << 18) | mthd);
+}
+
+static INLINE void
+so_reloc(struct nouveau_stateobj *so, struct pipe_buffer *bo,
+ unsigned data, unsigned flags, unsigned vor, unsigned tor)
+{
+ struct nouveau_stateobj_reloc *r = &so->reloc[so->cur_reloc++];
+
+ r->bo = bo;
+ r->offset = so->cur - so->push;
+ r->packet = so->cur_packet;
+ r->data = data;
+ r->flags = flags;
+ r->vor = vor;
+ r->tor = tor;
+ so_data(so, data);
+}
+
+static INLINE void
+so_dump(struct nouveau_stateobj *so)
+{
+ unsigned i, nr = so->cur - so->push;
+
+ for (i = 0; i < nr; i++)
+ debug_printf("+0x%04x: 0x%08x\n", i, so->push[i]);
+}
+
+static INLINE void
+so_emit(struct nouveau_winsys *nvws, struct nouveau_stateobj *so)
+{
+ struct nouveau_pushbuf *pb = nvws->channel->pushbuf;
+ unsigned nr, i;
+
+ nr = so->cur - so->push;
+ if (pb->remaining < nr)
+ nvws->push_flush(nvws, nr, NULL);
+ pb->remaining -= nr;
+
+ memcpy(pb->cur, so->push, nr * 4);
+ for (i = 0; i < so->cur_reloc; i++) {
+ struct nouveau_stateobj_reloc *r = &so->reloc[i];
+
+ nvws->push_reloc(nvws, pb->cur + r->offset, r->bo,
+ r->data, r->flags, r->vor, r->tor);
+ }
+ pb->cur += nr;
+}
+
+static INLINE void
+so_emit_reloc_markers(struct nouveau_winsys *nvws, struct nouveau_stateobj *so)
+{
+ struct nouveau_pushbuf *pb = nvws->channel->pushbuf;
+ unsigned i;
+
+ if (!so)
+ return;
+
+ i = so->cur_reloc << 1;
+ if (nvws->channel->pushbuf->remaining < i)
+ nvws->push_flush(nvws, i, NULL);
+ nvws->channel->pushbuf->remaining -= i;
+
+ for (i = 0; i < so->cur_reloc; i++) {
+ struct nouveau_stateobj_reloc *r = &so->reloc[i];
+
+ nvws->push_reloc(nvws, pb->cur++, r->bo, r->packet,
+ (r->flags &
+ (NOUVEAU_BO_VRAM | NOUVEAU_BO_GART)) |
+ NOUVEAU_BO_DUMMY, 0, 0);
+ nvws->push_reloc(nvws, pb->cur++, r->bo, r->data,
+ r->flags | NOUVEAU_BO_DUMMY, r->vor, r->tor);
+ }
+}
+
+#endif
diff --git a/src/gallium/drivers/nouveau/nouveau_util.h b/src/gallium/drivers/nouveau/nouveau_util.h
new file mode 100644
index 00000000000..a10114beab9
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nouveau_util.h
@@ -0,0 +1,91 @@
+#ifndef __NOUVEAU_UTIL_H__
+#define __NOUVEAU_UTIL_H__
+
+/* Determine how many vertices can be pushed into the command stream.
+ * Where the remaining space isn't large enough to represent all verices,
+ * split the buffer at primitive boundaries.
+ *
+ * Returns a count of vertices that can be rendered, and an index to
+ * restart drawing at after a flush.
+ */
+static INLINE unsigned
+nouveau_vbuf_split(unsigned remaining, unsigned overhead, unsigned vpp,
+ unsigned mode, unsigned start, unsigned count,
+ unsigned *restart)
+{
+ int max, adj = 0;
+
+ max = remaining - overhead;
+ if (max < 0)
+ return 0;
+
+ max *= vpp;
+ if (max >= count)
+ return count;
+
+ switch (mode) {
+ case PIPE_PRIM_POINTS:
+ break;
+ case PIPE_PRIM_LINES:
+ max = max & 1;
+ break;
+ case PIPE_PRIM_TRIANGLES:
+ max = max - (max % 3);
+ break;
+ case PIPE_PRIM_QUADS:
+ max = max & 3;
+ break;
+ case PIPE_PRIM_LINE_LOOP:
+ case PIPE_PRIM_LINE_STRIP:
+ if (max < 2)
+ max = 0;
+ adj = 1;
+ break;
+ case PIPE_PRIM_POLYGON:
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ case PIPE_PRIM_TRIANGLE_FAN:
+ if (max < 3)
+ max = 0;
+ adj = 2;
+ break;
+ case PIPE_PRIM_QUAD_STRIP:
+ if (max < 4)
+ max = 0;
+ adj = 3;
+ break;
+ default:
+ assert(0);
+ }
+
+ *restart = start + max - adj;
+ return max;
+}
+
+/* Integer base-2 logarithm, rounded towards zero. */
+static INLINE unsigned log2i(unsigned i)
+{
+ unsigned r = 0;
+
+ if (i & 0xffff0000) {
+ i >>= 16;
+ r += 16;
+ }
+ if (i & 0x0000ff00) {
+ i >>= 8;
+ r += 8;
+ }
+ if (i & 0x000000f0) {
+ i >>= 4;
+ r += 4;
+ }
+ if (i & 0x0000000c) {
+ i >>= 2;
+ r += 2;
+ }
+ if (i & 0x00000002) {
+ r += 1;
+ }
+ return r;
+}
+
+#endif
diff --git a/src/gallium/drivers/nouveau/nouveau_winsys.h b/src/gallium/drivers/nouveau/nouveau_winsys.h
new file mode 100644
index 00000000000..5535ebb6a99
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nouveau_winsys.h
@@ -0,0 +1,98 @@
+#ifndef NOUVEAU_WINSYS_H
+#define NOUVEAU_WINSYS_H
+
+#include <stdint.h>
+#include "pipe/p_winsys.h"
+#include "pipe/p_defines.h"
+
+#include "nouveau/nouveau_bo.h"
+#include "nouveau/nouveau_channel.h"
+#include "nouveau/nouveau_class.h"
+#include "nouveau/nouveau_device.h"
+#include "nouveau/nouveau_grobj.h"
+#include "nouveau/nouveau_notifier.h"
+#include "nouveau/nouveau_resource.h"
+#include "nouveau/nouveau_pushbuf.h"
+
+#define NOUVEAU_CAP_HW_VTXBUF (0xbeef0000)
+#define NOUVEAU_CAP_HW_IDXBUF (0xbeef0001)
+
+#define NOUVEAU_TEXTURE_USAGE_LINEAR (1 << 16)
+
+#define NOUVEAU_BUFFER_USAGE_TEXTURE (1 << 16)
+#define NOUVEAU_BUFFER_USAGE_ZETA (1 << 17)
+
+struct nouveau_winsys {
+ struct nouveau_context *nv;
+
+ struct nouveau_channel *channel;
+
+ int (*res_init)(struct nouveau_resource **heap, unsigned start,
+ unsigned size);
+ int (*res_alloc)(struct nouveau_resource *heap, int size, void *priv,
+ struct nouveau_resource **);
+ void (*res_free)(struct nouveau_resource **);
+
+ int (*push_reloc)(struct nouveau_winsys *, void *ptr,
+ struct pipe_buffer *, uint32_t data,
+ uint32_t flags, uint32_t vor, uint32_t tor);
+ int (*push_flush)(struct nouveau_winsys *, unsigned size,
+ struct pipe_fence_handle **fence);
+
+ int (*grobj_alloc)(struct nouveau_winsys *, int grclass,
+ struct nouveau_grobj **);
+ void (*grobj_free)(struct nouveau_grobj **);
+
+ int (*notifier_alloc)(struct nouveau_winsys *, int count,
+ struct nouveau_notifier **);
+ void (*notifier_free)(struct nouveau_notifier **);
+ void (*notifier_reset)(struct nouveau_notifier *, int id);
+ uint32_t (*notifier_status)(struct nouveau_notifier *, int id);
+ uint32_t (*notifier_retval)(struct nouveau_notifier *, int id);
+ int (*notifier_wait)(struct nouveau_notifier *, int id,
+ int status, int timeout);
+
+ int (*surface_copy)(struct nouveau_winsys *, struct pipe_surface *,
+ unsigned, unsigned, struct pipe_surface *,
+ unsigned, unsigned, unsigned, unsigned);
+ int (*surface_fill)(struct nouveau_winsys *, struct pipe_surface *,
+ unsigned, unsigned, unsigned, unsigned, unsigned);
+};
+
+extern struct pipe_screen *
+nv04_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *);
+
+extern struct pipe_context *
+nv04_create(struct pipe_screen *, unsigned pctx_id);
+
+extern struct pipe_screen *
+nv10_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *);
+
+extern struct pipe_context *
+nv10_create(struct pipe_screen *, unsigned pctx_id);
+
+extern struct pipe_screen *
+nv20_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *);
+
+extern struct pipe_context *
+nv20_create(struct pipe_screen *, unsigned pctx_id);
+
+extern struct pipe_screen *
+nv30_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *);
+
+extern struct pipe_context *
+nv30_create(struct pipe_screen *, unsigned pctx_id);
+
+extern struct pipe_screen *
+nv40_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *);
+
+extern struct pipe_context *
+nv40_create(struct pipe_screen *, unsigned pctx_id);
+
+extern struct pipe_screen *
+nv50_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *);
+
+extern struct pipe_context *
+nv50_create(struct pipe_screen *, unsigned pctx_id);
+
+#endif
diff --git a/src/gallium/drivers/nv04/Makefile b/src/gallium/drivers/nv04/Makefile
new file mode 100644
index 00000000000..5ea51a2f420
--- /dev/null
+++ b/src/gallium/drivers/nv04/Makefile
@@ -0,0 +1,28 @@
+TOP = ../../../..
+include $(TOP)/configs/current
+
+LIBNAME = nv04
+
+DRIVER_SOURCES = \
+ nv04_clear.c \
+ nv04_context.c \
+ nv04_fragprog.c \
+ nv04_fragtex.c \
+ nv04_miptree.c \
+ nv04_prim_vbuf.c \
+ nv04_screen.c \
+ nv04_state.c \
+ nv04_state_emit.c \
+ nv04_surface.c \
+ nv04_vbo.c
+
+C_SOURCES = \
+ $(COMMON_SOURCES) \
+ $(DRIVER_SOURCES)
+
+ASM_SOURCES =
+
+include ../../Makefile.template
+
+symlinks:
+
diff --git a/src/gallium/drivers/nv04/nv04_clear.c b/src/gallium/drivers/nv04/nv04_clear.c
new file mode 100644
index 00000000000..01cacd36fe1
--- /dev/null
+++ b/src/gallium/drivers/nv04/nv04_clear.c
@@ -0,0 +1,12 @@
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+
+#include "nv04_context.h"
+
+void
+nv04_clear(struct pipe_context *pipe, struct pipe_surface *ps,
+ unsigned clearValue)
+{
+ pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, clearValue);
+}
diff --git a/src/gallium/drivers/nv04/nv04_context.c b/src/gallium/drivers/nv04/nv04_context.c
new file mode 100644
index 00000000000..9f75253363f
--- /dev/null
+++ b/src/gallium/drivers/nv04/nv04_context.c
@@ -0,0 +1,106 @@
+#include "draw/draw_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_winsys.h"
+
+#include "nv04_context.h"
+#include "nv04_screen.h"
+
+static void
+nv04_flush(struct pipe_context *pipe, unsigned flags,
+ struct pipe_fence_handle **fence)
+{
+ struct nv04_context *nv04 = nv04_context(pipe);
+
+ draw_flush(nv04->draw);
+
+ FIRE_RING(fence);
+}
+
+static void
+nv04_destroy(struct pipe_context *pipe)
+{
+ struct nv04_context *nv04 = nv04_context(pipe);
+
+ if (nv04->draw)
+ draw_destroy(nv04->draw);
+
+ FREE(nv04);
+}
+
+static void
+nv04_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield)
+{
+}
+
+static boolean
+nv04_init_hwctx(struct nv04_context *nv04)
+{
+ BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_NOTIFY, 1);
+ OUT_RING(0);
+ BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_NOP, 1);
+ OUT_RING(0);
+
+ BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_CONTROL, 1);
+ OUT_RING(0x40182800);
+// OUT_RING(1<<20/*no cull*/);
+ BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_BLEND, 1);
+// OUT_RING(0x24|(1<<6)|(1<<8));
+ OUT_RING(0x120001a4);
+ BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_FORMAT, 1);
+ OUT_RING(0x332213a1);
+ BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_FILTER, 1);
+ OUT_RING(0x11001010);
+ BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_COLORKEY, 1);
+ OUT_RING(0x0);
+// BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_OFFSET, 1);
+// OUT_RING(SCREEN_OFFSET);
+ BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_FOGCOLOR, 1);
+ OUT_RING(0xff000000);
+
+
+
+ FIRE_RING (NULL);
+ return TRUE;
+}
+
+struct pipe_context *
+nv04_create(struct pipe_screen *pscreen, unsigned pctx_id)
+{
+ struct nv04_screen *screen = nv04_screen(pscreen);
+ struct pipe_winsys *ws = pscreen->winsys;
+ struct nv04_context *nv04;
+ struct nouveau_winsys *nvws = screen->nvws;
+
+ nv04 = CALLOC(1, sizeof(struct nv04_context));
+ if (!nv04)
+ return NULL;
+ nv04->screen = screen;
+ nv04->pctx_id = pctx_id;
+
+ nv04->nvws = nvws;
+
+ nv04->pipe.winsys = ws;
+ nv04->pipe.screen = pscreen;
+ nv04->pipe.destroy = nv04_destroy;
+ nv04->pipe.set_edgeflags = nv04_set_edgeflags;
+ nv04->pipe.draw_arrays = nv04_draw_arrays;
+ nv04->pipe.draw_elements = nv04_draw_elements;
+ nv04->pipe.clear = nv04_clear;
+ nv04->pipe.flush = nv04_flush;
+
+ nv04_init_surface_functions(nv04);
+ nv04_init_state_functions(nv04);
+
+ nv04->draw = draw_create();
+ assert(nv04->draw);
+ draw_wide_point_threshold(nv04->draw, 0.0);
+ draw_wide_line_threshold(nv04->draw, 0.0);
+ draw_enable_line_stipple(nv04->draw, FALSE);
+ draw_enable_point_sprites(nv04->draw, FALSE);
+ draw_set_rasterize_stage(nv04->draw, nv04_draw_vbuf_stage(nv04));
+
+ nv04_init_hwctx(nv04);
+
+ return &nv04->pipe;
+}
+
diff --git a/src/gallium/drivers/nv04/nv04_context.h b/src/gallium/drivers/nv04/nv04_context.h
new file mode 100644
index 00000000000..3e6a0852702
--- /dev/null
+++ b/src/gallium/drivers/nv04/nv04_context.h
@@ -0,0 +1,146 @@
+#ifndef __NV04_CONTEXT_H__
+#define __NV04_CONTEXT_H__
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+#include "pipe/p_compiler.h"
+
+#include "util/u_memory.h"
+#include "util/u_math.h"
+
+#include "draw/draw_vertex.h"
+
+#include "nouveau/nouveau_winsys.h"
+#include "nouveau/nouveau_gldefs.h"
+
+#define NOUVEAU_PUSH_CONTEXT(ctx) \
+ struct nv04_screen *ctx = nv04->screen
+#include "nouveau/nouveau_push.h"
+
+#include "nv04_state.h"
+
+#define NOUVEAU_ERR(fmt, args...) \
+ fprintf(stderr, "%s:%d - "fmt, __func__, __LINE__, ##args);
+#define NOUVEAU_MSG(fmt, args...) \
+ fprintf(stderr, "nouveau: "fmt, ##args);
+
+#include "nv04_screen.h"
+
+#define NV04_NEW_VERTPROG (1 << 1)
+#define NV04_NEW_FRAGPROG (1 << 2)
+#define NV04_NEW_BLEND (1 << 3)
+#define NV04_NEW_RAST (1 << 4)
+#define NV04_NEW_CONTROL (1 << 5)
+#define NV04_NEW_VIEWPORT (1 << 6)
+#define NV04_NEW_SAMPLER (1 << 7)
+
+struct nv04_context {
+ struct pipe_context pipe;
+
+ struct nouveau_winsys *nvws;
+ struct nv04_screen *screen;
+ unsigned pctx_id;
+
+ struct draw_context *draw;
+
+ int chipset;
+ struct nouveau_notifier *sync;
+
+ uint32_t dirty;
+
+ struct nv04_blend_state *blend;
+ struct nv04_sampler_state *sampler[PIPE_MAX_SAMPLERS];
+ struct nv04_fragtex_state fragtex;
+ struct nv04_rasterizer_state *rast;
+ struct nv04_depth_stencil_alpha_state *dsa;
+
+ struct nv04_miptree *tex_miptree[PIPE_MAX_SAMPLERS];
+ unsigned dirty_samplers;
+ unsigned fp_samplers;
+ unsigned vp_samplers;
+
+ uint32_t rt_enable;
+ struct pipe_buffer *rt[4];
+ struct pipe_buffer *zeta;
+
+ struct {
+ struct pipe_buffer *buffer;
+ uint32_t format;
+ } tex[16];
+
+ unsigned vb_enable;
+ struct {
+ struct pipe_buffer *buffer;
+ unsigned delta;
+ } vb[16];
+
+ struct vertex_info vertex_info;
+ struct {
+
+ struct nouveau_resource *exec_heap;
+ struct nouveau_resource *data_heap;
+
+ struct nv04_vertex_program *active;
+
+ struct nv04_vertex_program *current;
+ struct pipe_buffer *constant_buf;
+ } vertprog;
+
+ struct {
+ struct nv04_fragment_program *active;
+
+ struct nv04_fragment_program *current;
+ struct pipe_buffer *constant_buf;
+ } fragprog;
+
+ struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
+ unsigned num_vertex_buffers;
+ unsigned num_vertex_elements;
+
+ struct pipe_viewport_state viewport;
+};
+
+static INLINE struct nv04_context *
+nv04_context(struct pipe_context *pipe)
+{
+ return (struct nv04_context *)pipe;
+}
+
+extern void nv04_init_state_functions(struct nv04_context *nv04);
+extern void nv04_init_surface_functions(struct nv04_context *nv04);
+extern void nv04_init_miptree_functions(struct pipe_screen *screen);
+
+/* nv04_clear.c */
+extern void nv04_clear(struct pipe_context *pipe, struct pipe_surface *ps,
+ unsigned clearValue);
+
+/* nv04_draw.c */
+extern struct draw_stage *nv04_draw_render_stage(struct nv04_context *nv04);
+
+/* nv04_fragprog.c */
+extern void nv04_fragprog_bind(struct nv04_context *,
+ struct nv04_fragment_program *);
+extern void nv04_fragprog_destroy(struct nv04_context *,
+ struct nv04_fragment_program *);
+
+/* nv04_fragtex.c */
+extern void nv04_fragtex_bind(struct nv04_context *);
+
+/* nv04_prim_vbuf.c */
+struct draw_stage *nv04_draw_vbuf_stage( struct nv04_context *nv04 );
+
+/* nv04_state.c and friends */
+extern void nv04_emit_hw_state(struct nv04_context *nv04);
+extern void nv04_state_tex_update(struct nv04_context *nv04);
+
+/* nv04_vbo.c */
+extern boolean nv04_draw_arrays(struct pipe_context *, unsigned mode,
+ unsigned start, unsigned count);
+extern boolean nv04_draw_elements( struct pipe_context *pipe,
+ struct pipe_buffer *indexBuffer,
+ unsigned indexSize,
+ unsigned prim, unsigned start, unsigned count);
+
+
+#endif
diff --git a/src/gallium/drivers/nv04/nv04_fragprog.c b/src/gallium/drivers/nv04/nv04_fragprog.c
new file mode 100644
index 00000000000..8a2af41fe06
--- /dev/null
+++ b/src/gallium/drivers/nv04/nv04_fragprog.c
@@ -0,0 +1,21 @@
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+
+#include "pipe/p_shader_tokens.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_util.h"
+
+#include "nv04_context.h"
+
+void
+nv04_fragprog_bind(struct nv04_context *nv04, struct nv04_fragment_program *fp)
+{
+}
+
+void
+nv04_fragprog_destroy(struct nv04_context *nv04,
+ struct nv04_fragment_program *fp)
+{
+}
+
diff --git a/src/gallium/drivers/nv04/nv04_fragtex.c b/src/gallium/drivers/nv04/nv04_fragtex.c
new file mode 100644
index 00000000000..21f990fd536
--- /dev/null
+++ b/src/gallium/drivers/nv04/nv04_fragtex.c
@@ -0,0 +1,73 @@
+#include "nv04_context.h"
+#include "nouveau/nouveau_util.h"
+
+#define _(m,tf) \
+{ \
+ PIPE_FORMAT_##m, \
+ NV04_DX5_TEXTURED_TRIANGLE_FORMAT_COLOR_##tf, \
+}
+
+struct nv04_texture_format {
+ uint pipe;
+ int format;
+};
+
+static struct nv04_texture_format
+nv04_texture_formats[] = {
+ _(A8R8G8B8_UNORM, A8R8G8B8),
+ _(X8R8G8B8_UNORM, X8R8G8B8),
+ _(A1R5G5B5_UNORM, A1R5G5B5),
+ _(A4R4G4B4_UNORM, A4R4G4B4),
+ _(L8_UNORM, Y8 ),
+ _(A8_UNORM, Y8 ),
+};
+
+static uint32_t
+nv04_fragtex_format(uint pipe_format)
+{
+ struct nv04_texture_format *tf = nv04_texture_formats;
+ int i;
+
+ for (i=0; i< sizeof(nv04_texture_formats)/sizeof(nv04_texture_formats[0]); i++) {
+ if (tf->pipe == pipe_format)
+ return tf->format;
+ tf++;
+ }
+
+ NOUVEAU_ERR("unknown texture format %s\n", pf_name(pipe_format));
+ return 0;
+}
+
+
+static void
+nv04_fragtex_build(struct nv04_context *nv04, int unit)
+{
+ struct nv04_miptree *nv04mt = nv04->tex_miptree[unit];
+ struct pipe_texture *pt = &nv04mt->base;
+
+ switch (pt->target) {
+ case PIPE_TEXTURE_2D:
+ break;
+ default:
+ NOUVEAU_ERR("Unknown target %d\n", pt->target);
+ return;
+ }
+
+ nv04->fragtex.format = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ORIGIN_ZOH_CORNER
+ | NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ORIGIN_FOH_CORNER
+ | nv04_fragtex_format(pt->format)
+ | ( (pt->last_level + 1) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_MIPMAP_LEVELS_SHIFT )
+ | ( log2i(pt->width[0]) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_U_SHIFT )
+ | ( log2i(pt->height[0]) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_V_SHIFT )
+ | NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_EDGE
+ | NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_CLAMP_TO_EDGE
+ ;
+}
+
+
+void
+nv04_fragtex_bind(struct nv04_context *nv04)
+{
+ nv04_fragtex_build(nv04, 0);
+}
+
diff --git a/src/gallium/drivers/nv04/nv04_miptree.c b/src/gallium/drivers/nv04/nv04_miptree.c
new file mode 100644
index 00000000000..0cbb91e187b
--- /dev/null
+++ b/src/gallium/drivers/nv04/nv04_miptree.c
@@ -0,0 +1,138 @@
+#include "pipe/p_state.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_inlines.h"
+
+#include "nv04_context.h"
+#include "nv04_screen.h"
+
+static void
+nv04_miptree_layout(struct nv04_miptree *nv04mt)
+{
+ struct pipe_texture *pt = &nv04mt->base;
+ uint width = pt->width[0], height = pt->height[0];
+ uint offset = 0;
+ int nr_faces, l, f;
+
+ nr_faces = 1;
+
+ for (l = 0; l <= pt->last_level; l++) {
+ pt->width[l] = width;
+ pt->height[l] = height;
+
+ pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width);
+ pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height);
+
+ nv04mt->level[l].pitch = pt->width[0] * pt->block.size;
+ nv04mt->level[l].pitch = (nv04mt->level[l].pitch + 63) & ~63;
+
+ nv04mt->level[l].image_offset =
+ CALLOC(nr_faces, sizeof(unsigned));
+
+ width = MAX2(1, width >> 1);
+ height = MAX2(1, height >> 1);
+
+ }
+
+ for (f = 0; f < nr_faces; f++) {
+ for (l = 0; l <= pt->last_level; l++) {
+ nv04mt->level[l].image_offset[f] = offset;
+ offset += nv04mt->level[l].pitch * pt->height[l];
+ }
+ }
+
+ nv04mt->total_size = offset;
+}
+
+static struct pipe_texture *
+nv04_miptree_create(struct pipe_screen *screen, const struct pipe_texture *pt)
+{
+ struct pipe_winsys *ws = screen->winsys;
+ struct nv04_miptree *mt;
+
+ mt = MALLOC(sizeof(struct nv04_miptree));
+ if (!mt)
+ return NULL;
+ mt->base = *pt;
+ mt->base.refcount = 1;
+ mt->base.screen = screen;
+
+ nv04_miptree_layout(mt);
+
+ mt->buffer = ws->buffer_create(ws, 256, PIPE_BUFFER_USAGE_PIXEL,
+ mt->total_size);
+ if (!mt->buffer) {
+ FREE(mt);
+ return NULL;
+ }
+
+ return &mt->base;
+}
+
+static void
+nv04_miptree_release(struct pipe_screen *screen, struct pipe_texture **pt)
+{
+ struct pipe_texture *mt = *pt;
+
+ *pt = NULL;
+ if (--mt->refcount <= 0) {
+ struct nv04_miptree *nv04mt = (struct nv04_miptree *)mt;
+ int l;
+
+ pipe_buffer_reference(screen, &nv04mt->buffer, NULL);
+ for (l = 0; l <= mt->last_level; l++) {
+ if (nv04mt->level[l].image_offset)
+ FREE(nv04mt->level[l].image_offset);
+ }
+ FREE(nv04mt);
+ }
+}
+
+static struct pipe_surface *
+nv04_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
+ unsigned face, unsigned level, unsigned zslice,
+ unsigned flags)
+{
+ struct pipe_winsys *ws = pscreen->winsys;
+ struct nv04_miptree *nv04mt = (struct nv04_miptree *)pt;
+ struct pipe_surface *ps;
+
+ ps = ws->surface_alloc(ws);
+ if (!ps)
+ return NULL;
+ pipe_buffer_reference(pscreen, &ps->buffer, nv04mt->buffer);
+ ps->format = pt->format;
+ ps->width = pt->width[level];
+ ps->height = pt->height[level];
+ ps->block = pt->block;
+ ps->width = pt->width[level];
+ ps->height = pt->height[level];
+ ps->nblocksx = pt->nblocksx[level];
+ ps->nblocksy = pt->nblocksy[level];
+ ps->stride = nv04mt->level[level].pitch;
+ ps->refcount = 1;
+ ps->winsys = pscreen->winsys;
+
+ if (pt->target == PIPE_TEXTURE_CUBE) {
+ ps->offset = nv04mt->level[level].image_offset[face];
+ } else {
+ ps->offset = nv04mt->level[level].image_offset[0];
+ }
+
+ return ps;
+}
+
+static void
+nv04_miptree_surface_del(struct pipe_screen *pscreen,
+ struct pipe_surface **psurface)
+{
+}
+
+void
+nv04_init_miptree_functions(struct pipe_screen *pscreen)
+{
+ pscreen->texture_create = nv04_miptree_create;
+ pscreen->texture_release = nv04_miptree_release;
+ pscreen->get_tex_surface = nv04_miptree_surface_new;
+ pscreen->tex_surface_release = nv04_miptree_surface_del;
+}
+
diff --git a/src/gallium/drivers/nv04/nv04_prim_vbuf.c b/src/gallium/drivers/nv04/nv04_prim_vbuf.c
new file mode 100644
index 00000000000..19979fff795
--- /dev/null
+++ b/src/gallium/drivers/nv04/nv04_prim_vbuf.c
@@ -0,0 +1,309 @@
+
+#include "pipe/p_debug.h"
+#include "pipe/p_inlines.h"
+#include "pipe/p_winsys.h"
+#include "pipe/p_compiler.h"
+
+#include "draw/draw_vbuf.h"
+
+#include "nv04_context.h"
+#include "nv04_state.h"
+
+#define VERTEX_SIZE 40
+#define VERTEX_BUFFER_SIZE (4096*VERTEX_SIZE) // 4096 vertices of 40 bytes each
+
+/**
+ * Primitive renderer for nv04.
+ */
+struct nv04_vbuf_render {
+ struct vbuf_render base;
+
+ struct nv04_context *nv04;
+
+ /** Vertex buffer */
+ unsigned char* buffer;
+
+ /** Vertex size in bytes */
+ unsigned vertex_size;
+
+ /** Current primitive */
+ unsigned prim;
+};
+
+
+/**
+ * Basically a cast wrapper.
+ */
+static INLINE struct nv04_vbuf_render *
+nv04_vbuf_render( struct vbuf_render *render )
+{
+ assert(render);
+ return (struct nv04_vbuf_render *)render;
+}
+
+
+static const struct vertex_info *
+nv04_vbuf_render_get_vertex_info( struct vbuf_render *render )
+{
+ struct nv04_vbuf_render *nv04_render = nv04_vbuf_render(render);
+ struct nv04_context *nv04 = nv04_render->nv04;
+ return &nv04->vertex_info;
+}
+
+
+static void *
+nv04_vbuf_render_allocate_vertices( struct vbuf_render *render,
+ ushort vertex_size,
+ ushort nr_vertices )
+{
+ struct nv04_vbuf_render *nv04_render = nv04_vbuf_render(render);
+
+ nv04_render->buffer = (unsigned char*) MALLOC(VERTEX_BUFFER_SIZE);
+ assert(!nv04_render->buffer);
+
+ return nv04_render->buffer;
+}
+
+
+static boolean
+nv04_vbuf_render_set_primitive( struct vbuf_render *render,
+ unsigned prim )
+{
+ struct nv04_vbuf_render *nv04_render = nv04_vbuf_render(render);
+
+ if (prim <= PIPE_PRIM_LINE_STRIP)
+ return FALSE;
+
+ nv04_render->prim = prim;
+ return TRUE;
+}
+
+static INLINE void nv04_2triangles(struct nv04_context* nv04, unsigned char* buffer, ushort v0, ushort v1, ushort v2, ushort v3, ushort v4, ushort v5)
+{
+ BEGIN_RING(fahrenheit,NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0xA),49);
+ OUT_RINGp(buffer + VERTEX_SIZE * v0,8);
+ OUT_RINGp(buffer + VERTEX_SIZE * v1,8);
+ OUT_RINGp(buffer + VERTEX_SIZE * v2,8);
+ OUT_RINGp(buffer + VERTEX_SIZE * v3,8);
+ OUT_RINGp(buffer + VERTEX_SIZE * v4,8);
+ OUT_RINGp(buffer + VERTEX_SIZE * v5,8);
+ OUT_RING(0xFEDCBA);
+}
+
+static INLINE void nv04_1triangle(struct nv04_context* nv04, unsigned char* buffer, ushort v0, ushort v1, ushort v2)
+{
+ BEGIN_RING(fahrenheit,NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0xD),25);
+ OUT_RINGp(buffer + VERTEX_SIZE * v0,8);
+ OUT_RINGp(buffer + VERTEX_SIZE * v1,8);
+ OUT_RINGp(buffer + VERTEX_SIZE * v2,8);
+ OUT_RING(0xFED);
+}
+
+static INLINE void nv04_1quad(struct nv04_context* nv04, unsigned char* buffer, ushort v0, ushort v1, ushort v2, ushort v3)
+{
+ BEGIN_RING(fahrenheit,NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0xC),33);
+ OUT_RINGp(buffer + VERTEX_SIZE * v0,8);
+ OUT_RINGp(buffer + VERTEX_SIZE * v1,8);
+ OUT_RINGp(buffer + VERTEX_SIZE * v2,8);
+ OUT_RINGp(buffer + VERTEX_SIZE * v3,8);
+ OUT_RING(0xFECEDC);
+}
+
+static void nv04_vbuf_render_triangles_elts(struct nv04_vbuf_render * render, const ushort * indices, uint nr_indices)
+{
+ unsigned char* buffer = render->buffer;
+ struct nv04_context* nv04 = render->nv04;
+ int i;
+
+ for( i=0; i< nr_indices-5; i+=6)
+ nv04_2triangles(nv04,
+ buffer,
+ indices[i+0],
+ indices[i+1],
+ indices[i+2],
+ indices[i+3],
+ indices[i+4],
+ indices[i+5]
+ );
+ if (i != nr_indices)
+ {
+ nv04_1triangle(nv04,
+ buffer,
+ indices[i+0],
+ indices[i+1],
+ indices[i+2]
+ );
+ i+=3;
+ }
+ if (i != nr_indices)
+ NOUVEAU_ERR("Houston, we have lost some vertices\n");
+}
+
+static void nv04_vbuf_render_tri_strip_elts(struct nv04_vbuf_render* render, const ushort* indices, uint nr_indices)
+{
+ const uint32_t striptbl[]={0x321210,0x543432,0x765654,0x987876,0xBA9A98,0xDCBCBA,0xFEDEDC};
+ unsigned char* buffer = render->buffer;
+ struct nv04_context* nv04 = render->nv04;
+ int i,j;
+
+ for(i = 0; i<nr_indices; i+=14)
+ {
+ int numvert = MIN2(16, nr_indices - i);
+ int numtri = numvert - 2;
+ if (numvert<3)
+ break;
+
+ BEGIN_RING( fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0x0), numvert*8 );
+ for(j = 0; j<numvert; j++)
+ OUT_RINGp( buffer + VERTEX_SIZE * indices [i+j], 8 );
+
+ BEGIN_RING_NI( fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE(0), (numtri+1)/2 );
+ for(j = 0; j<numtri/2; j++ )
+ OUT_RING(striptbl[j]);
+ if (numtri%2)
+ OUT_RING(striptbl[numtri/2]&0xFFF);
+ }
+}
+
+static void nv04_vbuf_render_tri_fan_elts(struct nv04_vbuf_render* render, const ushort* indices, uint nr_indices)
+{
+ const uint32_t fantbl[]={0x320210,0x540430,0x760650,0x980870,0xBA0A90,0xDC0CB0,0xFE0ED0};
+ unsigned char* buffer = render->buffer;
+ struct nv04_context* nv04 = render->nv04;
+ int i,j;
+
+ BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0x0), 8);
+ OUT_RINGp(buffer + VERTEX_SIZE * indices[0], 8);
+
+ for(i = 1; i<nr_indices; i+=14)
+ {
+ int numvert=MIN2(15, nr_indices - i);
+ int numtri=numvert-2;
+ if (numvert < 3)
+ break;
+
+ BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0x1), numvert*8);
+
+ for(j=0;j<numvert;j++)
+ OUT_RINGp( buffer + VERTEX_SIZE * indices[ i+j ], 8 );
+
+ BEGIN_RING_NI(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE(0), (numtri+1)/2);
+ for(j = 0; j<numtri/2; j++)
+ OUT_RING(fantbl[j]);
+ if (numtri%2)
+ OUT_RING(fantbl[numtri/2]&0xFFF);
+ }
+}
+
+static void nv04_vbuf_render_quads_elts(struct nv04_vbuf_render* render, const ushort* indices, uint nr_indices)
+{
+ unsigned char* buffer = render->buffer;
+ struct nv04_context* nv04 = render->nv04;
+ int i;
+
+ for(i = 0; i < nr_indices; i += 4)
+ nv04_1quad(nv04,
+ buffer,
+ indices[i+0],
+ indices[i+1],
+ indices[i+2],
+ indices[i+3]
+ );
+}
+
+
+static void
+nv04_vbuf_render_draw( struct vbuf_render *render,
+ const ushort *indices,
+ uint nr_indices)
+{
+ struct nv04_vbuf_render *nv04_render = nv04_vbuf_render(render);
+
+ // emit the indices
+ switch( nv04_render->prim )
+ {
+ case PIPE_PRIM_TRIANGLES:
+ nv04_vbuf_render_triangles_elts(nv04_render, indices, nr_indices);
+ break;
+ case PIPE_PRIM_QUAD_STRIP:
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ nv04_vbuf_render_tri_strip_elts(nv04_render, indices, nr_indices);
+ break;
+ case PIPE_PRIM_TRIANGLE_FAN:
+ case PIPE_PRIM_POLYGON:
+ nv04_vbuf_render_tri_fan_elts(nv04_render, indices, nr_indices);
+ break;
+ case PIPE_PRIM_QUADS:
+ nv04_vbuf_render_quads_elts(nv04_render, indices, nr_indices);
+ break;
+ default:
+ NOUVEAU_ERR("You have to implement primitive %d, young padawan\n", nv04_render->prim);
+ break;
+ }
+}
+
+
+static void
+nv04_vbuf_render_release_vertices( struct vbuf_render *render,
+ void *vertices,
+ unsigned vertex_size,
+ unsigned vertices_used )
+{
+ struct nv04_vbuf_render *nv04_render = nv04_vbuf_render(render);
+
+ free(nv04_render->buffer);
+ nv04_render->buffer = NULL;
+}
+
+
+static void
+nv04_vbuf_render_destroy( struct vbuf_render *render )
+{
+ struct nv04_vbuf_render *nv04_render = nv04_vbuf_render(render);
+ FREE(nv04_render);
+}
+
+
+/**
+ * Create a new primitive render.
+ */
+static struct vbuf_render *
+nv04_vbuf_render_create( struct nv04_context *nv04 )
+{
+ struct nv04_vbuf_render *nv04_render = CALLOC_STRUCT(nv04_vbuf_render);
+
+ nv04_render->nv04 = nv04;
+
+ nv04_render->base.max_vertex_buffer_bytes = VERTEX_BUFFER_SIZE;
+ nv04_render->base.max_indices = 65536;
+ nv04_render->base.get_vertex_info = nv04_vbuf_render_get_vertex_info;
+ nv04_render->base.allocate_vertices = nv04_vbuf_render_allocate_vertices;
+ nv04_render->base.set_primitive = nv04_vbuf_render_set_primitive;
+ nv04_render->base.draw = nv04_vbuf_render_draw;
+ nv04_render->base.release_vertices = nv04_vbuf_render_release_vertices;
+ nv04_render->base.destroy = nv04_vbuf_render_destroy;
+
+ return &nv04_render->base;
+}
+
+
+/**
+ * Create a new primitive vbuf/render stage.
+ */
+struct draw_stage *nv04_draw_vbuf_stage( struct nv04_context *nv04 )
+{
+ struct vbuf_render *render;
+ struct draw_stage *stage;
+
+ render = nv04_vbuf_render_create(nv04);
+ if(!render)
+ return NULL;
+
+ stage = draw_vbuf_stage( nv04->draw, render );
+ if(!stage) {
+ render->destroy(render);
+ return NULL;
+ }
+
+ return stage;
+}
diff --git a/src/gallium/drivers/nv04/nv04_screen.c b/src/gallium/drivers/nv04/nv04_screen.c
new file mode 100644
index 00000000000..3966a29ffa9
--- /dev/null
+++ b/src/gallium/drivers/nv04/nv04_screen.c
@@ -0,0 +1,212 @@
+#include "pipe/p_screen.h"
+
+#include "nv04_context.h"
+#include "nv04_screen.h"
+
+static const char *
+nv04_screen_get_name(struct pipe_screen *screen)
+{
+ struct nv04_screen *nv04screen = nv04_screen(screen);
+ struct nouveau_device *dev = nv04screen->nvws->channel->device;
+ static char buffer[128];
+
+ snprintf(buffer, sizeof(buffer), "NV%02X", dev->chipset);
+ return buffer;
+}
+
+static const char *
+nv04_screen_get_vendor(struct pipe_screen *screen)
+{
+ return "nouveau";
+}
+
+static int
+nv04_screen_get_param(struct pipe_screen *screen, int param)
+{
+ switch (param) {
+ case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
+ return 1;
+ case PIPE_CAP_NPOT_TEXTURES:
+ return 0;
+ case PIPE_CAP_TWO_SIDED_STENCIL:
+ return 0;
+ case PIPE_CAP_GLSL:
+ return 0;
+ case PIPE_CAP_S3TC:
+ return 0;
+ case PIPE_CAP_ANISOTROPIC_FILTER:
+ return 0;
+ case PIPE_CAP_POINT_SPRITE:
+ return 0;
+ case PIPE_CAP_MAX_RENDER_TARGETS:
+ return 1;
+ case PIPE_CAP_OCCLUSION_QUERY:
+ return 0;
+ case PIPE_CAP_TEXTURE_SHADOW_MAP:
+ return 0;
+ case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
+ return 10;
+ case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
+ return 0;
+ case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
+ return 0;
+ default:
+ NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param);
+ return 0;
+ }
+}
+
+static float
+nv04_screen_get_paramf(struct pipe_screen *screen, int param)
+{
+ switch (param) {
+ case PIPE_CAP_MAX_LINE_WIDTH:
+ case PIPE_CAP_MAX_LINE_WIDTH_AA:
+ return 0.0;
+ case PIPE_CAP_MAX_POINT_WIDTH:
+ case PIPE_CAP_MAX_POINT_WIDTH_AA:
+ return 0.0;
+ case PIPE_CAP_MAX_TEXTURE_ANISOTROPY:
+ return 0.0;
+ case PIPE_CAP_MAX_TEXTURE_LOD_BIAS:
+ return 0.0;
+ default:
+ NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param);
+ return 0.0;
+ }
+}
+
+static boolean
+nv04_screen_is_format_supported(struct pipe_screen *screen,
+ enum pipe_format format,
+ enum pipe_texture_target target,
+ unsigned tex_usage, unsigned geom_flags)
+{
+ if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) {
+ switch (format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ case PIPE_FORMAT_Z16_UNORM:
+ return TRUE;
+ default:
+ break;
+ }
+ } else {
+ switch (format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case PIPE_FORMAT_X8R8G8B8_UNORM:
+ case PIPE_FORMAT_A1R5G5B5_UNORM:
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ case PIPE_FORMAT_L8_UNORM:
+ case PIPE_FORMAT_A8_UNORM:
+ return TRUE;
+ default:
+ break;
+ }
+ }
+
+ return FALSE;
+}
+
+static void *
+nv04_surface_map(struct pipe_screen *screen, struct pipe_surface *surface,
+ unsigned flags )
+{
+ struct pipe_winsys *ws = screen->winsys;
+ void *map;
+
+ map = ws->buffer_map(ws, surface->buffer, flags);
+ if (!map)
+ return NULL;
+
+ return map + surface->offset;
+}
+
+static void
+nv04_surface_unmap(struct pipe_screen *screen, struct pipe_surface *surface)
+{
+ struct pipe_winsys *ws = screen->winsys;
+
+ ws->buffer_unmap(ws, surface->buffer);
+}
+
+static void
+nv04_screen_destroy(struct pipe_screen *pscreen)
+{
+ struct nv04_screen *screen = nv04_screen(pscreen);
+ struct nouveau_winsys *nvws = screen->nvws;
+
+ nvws->notifier_free(&screen->sync);
+ nvws->grobj_free(&screen->fahrenheit);
+
+ FREE(pscreen);
+}
+
+struct pipe_screen *
+nv04_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws)
+{
+ struct nv04_screen *screen = CALLOC_STRUCT(nv04_screen);
+ unsigned fahrenheit_class = 0, sub3d_class = 0;
+ unsigned chipset = nvws->channel->device->chipset;
+ int ret;
+
+ if (!screen)
+ return NULL;
+ screen->nvws = nvws;
+
+ if (chipset>=0x20) {
+ fahrenheit_class = 0;
+ sub3d_class = 0;
+ } else if (chipset>=0x10) {
+ fahrenheit_class = NV10_DX5_TEXTURED_TRIANGLE;
+ sub3d_class = NV10_CONTEXT_SURFACES_3D;
+ } else {
+ fahrenheit_class=NV04_DX5_TEXTURED_TRIANGLE;
+ sub3d_class = NV04_CONTEXT_SURFACES_3D;
+ }
+
+ if (!fahrenheit_class) {
+ NOUVEAU_ERR("Unknown nv04 chipset: nv%02x\n", chipset);
+ return NULL;
+ }
+
+ /* 3D object */
+ ret = nvws->grobj_alloc(nvws, fahrenheit_class, &screen->fahrenheit);
+ if (ret) {
+ NOUVEAU_ERR("Error creating 3D object: %d\n", ret);
+ return NULL;
+ }
+
+ /* 3D surface object */
+ ret = nvws->grobj_alloc(nvws, sub3d_class, &screen->context_surfaces_3d);
+ if (ret) {
+ NOUVEAU_ERR("Error creating 3D surface object: %d\n", ret);
+ return NULL;
+ }
+
+ /* Notifier for sync purposes */
+ ret = nvws->notifier_alloc(nvws, 1, &screen->sync);
+ if (ret) {
+ NOUVEAU_ERR("Error creating notifier object: %d\n", ret);
+ nv04_screen_destroy(&screen->pipe);
+ return NULL;
+ }
+
+ screen->pipe.winsys = ws;
+ screen->pipe.destroy = nv04_screen_destroy;
+
+ screen->pipe.get_name = nv04_screen_get_name;
+ screen->pipe.get_vendor = nv04_screen_get_vendor;
+ screen->pipe.get_param = nv04_screen_get_param;
+ screen->pipe.get_paramf = nv04_screen_get_paramf;
+
+ screen->pipe.is_format_supported = nv04_screen_is_format_supported;
+
+ screen->pipe.surface_map = nv04_surface_map;
+ screen->pipe.surface_unmap = nv04_surface_unmap;
+
+ nv04_screen_init_miptree_functions(&screen->pipe);
+
+ return &screen->pipe;
+}
+
diff --git a/src/gallium/drivers/nv04/nv04_screen.h b/src/gallium/drivers/nv04/nv04_screen.h
new file mode 100644
index 00000000000..99a49cdf7a9
--- /dev/null
+++ b/src/gallium/drivers/nv04/nv04_screen.h
@@ -0,0 +1,25 @@
+#ifndef __NV04_SCREEN_H__
+#define __NV04_SCREEN_H__
+
+#include "pipe/p_screen.h"
+
+struct nv04_screen {
+ struct pipe_screen pipe;
+
+ struct nouveau_winsys *nvws;
+ unsigned chipset;
+
+ /* HW graphics objects */
+ struct nouveau_grobj *fahrenheit;
+ struct nouveau_grobj *context_surfaces_3d;
+ struct nouveau_notifier *sync;
+
+};
+
+static INLINE struct nv04_screen *
+nv04_screen(struct pipe_screen *screen)
+{
+ return (struct nv04_screen *)screen;
+}
+
+#endif
diff --git a/src/gallium/drivers/nv04/nv04_state.c b/src/gallium/drivers/nv04/nv04_state.c
new file mode 100644
index 00000000000..ff1933b5508
--- /dev/null
+++ b/src/gallium/drivers/nv04/nv04_state.c
@@ -0,0 +1,495 @@
+#include "draw/draw_context.h"
+#include "pipe/p_state.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_shader_tokens.h"
+
+#include "tgsi/tgsi_parse.h"
+
+#include "nv04_context.h"
+#include "nv04_state.h"
+
+static void *
+nv04_blend_state_create(struct pipe_context *pipe,
+ const struct pipe_blend_state *cso)
+{
+ struct nv04_blend_state *cb;
+
+ cb = MALLOC(sizeof(struct nv04_blend_state));
+
+ cb->b_enable = cso->blend_enable ? 1 : 0;
+ cb->b_src = ((nvgl_blend_func(cso->alpha_src_factor)<<16) |
+ (nvgl_blend_func(cso->rgb_src_factor)));
+ cb->b_dst = ((nvgl_blend_func(cso->alpha_dst_factor)<<16) |
+ (nvgl_blend_func(cso->rgb_dst_factor)));
+
+
+ return (void *)cb;
+}
+
+static void
+nv04_blend_state_bind(struct pipe_context *pipe, void *blend)
+{
+ struct nv04_context *nv04 = nv04_context(pipe);
+
+ nv04->blend = (struct nv04_blend_state*)blend;
+
+ nv04->dirty |= NV04_NEW_BLEND;
+}
+
+static void
+nv04_blend_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ free(hwcso);
+}
+
+
+static INLINE unsigned
+wrap_mode(unsigned wrap) {
+ unsigned ret;
+
+ switch (wrap) {
+ case PIPE_TEX_WRAP_REPEAT:
+ ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_REPEAT;
+ break;
+ case PIPE_TEX_WRAP_MIRROR_REPEAT:
+ ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_MIRRORED_REPEAT;
+ break;
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+ ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_EDGE;
+ break;
+ case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+ ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_BORDER;
+ break;
+ case PIPE_TEX_WRAP_CLAMP:
+ ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP;
+ break;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP:
+ default:
+ NOUVEAU_ERR("unknown wrap mode: %d\n", wrap);
+ ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP;
+ }
+ return ret >> NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_SHIFT;
+}
+
+static void *
+nv04_sampler_state_create(struct pipe_context *pipe,
+ const struct pipe_sampler_state *cso)
+{
+
+ struct nv04_sampler_state *ss;
+ uint32_t filter = 0;
+
+ ss = MALLOC(sizeof(struct nv04_sampler_state));
+
+ ss->format = ((wrap_mode(cso->wrap_s) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_SHIFT) |
+ (wrap_mode(cso->wrap_t) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_SHIFT));
+
+ if (cso->max_anisotropy > 1.0) {
+ filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_ANISOTROPIC_MINIFY_ENABLE | NV04_DX5_TEXTURED_TRIANGLE_FILTER_ANISOTROPIC_MAGNIFY_ENABLE;
+ }
+
+ switch (cso->mag_img_filter) {
+ case PIPE_TEX_FILTER_LINEAR:
+ filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MAGNIFY_LINEAR;
+ break;
+ case PIPE_TEX_FILTER_NEAREST:
+ default:
+ filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MAGNIFY_NEAREST;
+ break;
+ }
+
+ switch (cso->min_img_filter) {
+ case PIPE_TEX_FILTER_LINEAR:
+ switch (cso->min_mip_filter) {
+ case PIPE_TEX_MIPFILTER_NEAREST:
+ filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST;
+ break;
+ case PIPE_TEX_MIPFILTER_LINEAR:
+ filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR;
+ break;
+ case PIPE_TEX_MIPFILTER_NONE:
+ default:
+ filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR;
+ break;
+ }
+ break;
+ case PIPE_TEX_FILTER_NEAREST:
+ default:
+ switch (cso->min_mip_filter) {
+ case PIPE_TEX_MIPFILTER_NEAREST:
+ filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST;
+ break;
+ case PIPE_TEX_MIPFILTER_LINEAR:
+ filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR;
+ break;
+ case PIPE_TEX_MIPFILTER_NONE:
+ default:
+ filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST;
+ break;
+ }
+ break;
+ }
+
+ ss->filter = filter;
+
+ return (void *)ss;
+}
+
+static void
+nv04_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler)
+{
+ struct nv04_context *nv04 = nv04_context(pipe);
+ unsigned unit;
+
+ for (unit = 0; unit < nr; unit++) {
+ nv04->sampler[unit] = sampler[unit];
+ nv04->dirty_samplers |= (1 << unit);
+ }
+}
+
+static void
+nv04_sampler_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ free(hwcso);
+}
+
+static void
+nv04_set_sampler_texture(struct pipe_context *pipe, unsigned nr,
+ struct pipe_texture **miptree)
+{
+ struct nv04_context *nv04 = nv04_context(pipe);
+ unsigned unit;
+
+ for (unit = 0; unit < nr; unit++) {
+ nv04->tex_miptree[unit] = (struct nv04_miptree *)miptree[unit];
+ nv04->dirty_samplers |= (1 << unit);
+ }
+}
+
+static void *
+nv04_rasterizer_state_create(struct pipe_context *pipe,
+ const struct pipe_rasterizer_state *cso)
+{
+ struct nv04_rasterizer_state *rs;
+
+ /*XXX: ignored:
+ * scissor
+ * points/lines (no hw support, emulated with tris in gallium)
+ */
+ rs = MALLOC(sizeof(struct nv04_rasterizer_state));
+
+ rs->blend = cso->flatshade ? NV04_DX5_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_FLAT : NV04_DX5_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_GOURAUD;
+
+ return (void *)rs;
+}
+
+static void
+nv04_rasterizer_state_bind(struct pipe_context *pipe, void *rast)
+{
+ struct nv04_context *nv04 = nv04_context(pipe);
+
+ nv04->rast = (struct nv04_rasterizer_state*)rast;
+
+ draw_set_rasterizer_state(nv04->draw, (nv04->rast ? nv04->rast->templ : NULL));
+
+ nv04->dirty |= NV04_NEW_RAST | NV04_NEW_BLEND;
+}
+
+static void
+nv04_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ free(hwcso);
+}
+
+static INLINE uint32_t nv04_compare_func(uint32_t f)
+{
+ switch ( f ) {
+ case PIPE_FUNC_NEVER: return 1;
+ case PIPE_FUNC_LESS: return 2;
+ case PIPE_FUNC_EQUAL: return 3;
+ case PIPE_FUNC_LEQUAL: return 4;
+ case PIPE_FUNC_GREATER: return 5;
+ case PIPE_FUNC_NOTEQUAL: return 6;
+ case PIPE_FUNC_GEQUAL: return 7;
+ case PIPE_FUNC_ALWAYS: return 8;
+ }
+ NOUVEAU_MSG("Unable to find the function\n");
+ return 0;
+}
+
+static void *
+nv04_depth_stencil_alpha_state_create(struct pipe_context *pipe,
+ const struct pipe_depth_stencil_alpha_state *cso)
+{
+ struct nv04_depth_stencil_alpha_state *hw;
+
+ hw = MALLOC(sizeof(struct nv04_depth_stencil_alpha_state));
+
+ hw->control = float_to_ubyte(cso->alpha.ref);
+ hw->control |= ( nv04_compare_func(cso->alpha.func) << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_ALPHA_FUNC_SHIFT );
+ hw->control |= cso->alpha.enabled ? NV04_DX5_TEXTURED_TRIANGLE_CONTROL_ALPHA_TEST_ENABLE : 0;
+ hw->control |= NV04_DX5_TEXTURED_TRIANGLE_CONTROL_ORIGIN;
+ hw->control |= cso->depth.enabled ? (1 << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_ENABLE_SHIFT) : 0;
+ hw->control |= ( nv04_compare_func(cso->depth.func)<< NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_FUNC_SHIFT );
+ hw->control |= 1 << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_CULL_MODE_SHIFT; // no culling, handled by the draw module
+ hw->control |= NV04_DX5_TEXTURED_TRIANGLE_CONTROL_DITHER_ENABLE;
+ hw->control |= NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_PERSPECTIVE_ENABLE;
+ hw->control |= cso->depth.writemask ? (1 << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_WRITE_ENABLE_SHIFT) : 0;
+ hw->control |= 1 << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_FORMAT_SHIFT; // integer zbuffer format
+
+ return (void *)hw;
+}
+
+static void
+nv04_depth_stencil_alpha_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv04_context *nv04 = nv04_context(pipe);
+
+ nv04->dsa = hwcso;
+ nv04->dirty |= NV04_NEW_CONTROL;
+}
+
+static void
+nv04_depth_stencil_alpha_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ free(hwcso);
+}
+
+static void *
+nv04_vp_state_create(struct pipe_context *pipe,
+ const struct pipe_shader_state *templ)
+{
+ struct nv04_context *nv04 = nv04_context(pipe);
+
+ return draw_create_vertex_shader(nv04->draw, templ);
+}
+
+static void
+nv04_vp_state_bind(struct pipe_context *pipe, void *shader)
+{
+ struct nv04_context *nv04 = nv04_context(pipe);
+
+ draw_bind_vertex_shader(nv04->draw, (struct draw_vertex_shader *) shader);
+
+ nv04->dirty |= NV04_NEW_VERTPROG;
+}
+
+static void
+nv04_vp_state_delete(struct pipe_context *pipe, void *shader)
+{
+ struct nv04_context *nv04 = nv04_context(pipe);
+
+ draw_delete_vertex_shader(nv04->draw, (struct draw_vertex_shader *) shader);
+}
+
+static void *
+nv04_fp_state_create(struct pipe_context *pipe,
+ const struct pipe_shader_state *cso)
+{
+ struct nv04_fragment_program *fp;
+
+ fp = CALLOC(1, sizeof(struct nv04_fragment_program));
+ fp->pipe.tokens = tgsi_dup_tokens(cso->tokens);
+
+ return (void *)fp;
+}
+
+static void
+nv04_fp_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv04_context *nv04 = nv04_context(pipe);
+ struct nv04_fragment_program *fp = hwcso;
+
+ nv04->fragprog.current = fp;
+ nv04->dirty |= NV04_NEW_FRAGPROG;
+}
+
+static void
+nv04_fp_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv04_context *nv04 = nv04_context(pipe);
+ struct nv04_fragment_program *fp = hwcso;
+
+ nv04_fragprog_destroy(nv04, fp);
+ free((void*)fp->pipe.tokens);
+ free(fp);
+}
+
+static void
+nv04_set_blend_color(struct pipe_context *pipe,
+ const struct pipe_blend_color *bcol)
+{
+}
+
+static void
+nv04_set_clip_state(struct pipe_context *pipe,
+ const struct pipe_clip_state *clip)
+{
+}
+
+static void
+nv04_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
+ const struct pipe_constant_buffer *buf )
+{
+ struct nv04_context *nv04 = nv04_context(pipe);
+
+ if (shader == PIPE_SHADER_VERTEX) {
+ nv04->vertprog.constant_buf = buf->buffer;
+ nv04->dirty |= NV04_NEW_VERTPROG;
+ } else
+ if (shader == PIPE_SHADER_FRAGMENT) {
+ nv04->fragprog.constant_buf = buf->buffer;
+ nv04->dirty |= NV04_NEW_FRAGPROG;
+ }
+}
+
+static void
+nv04_set_framebuffer_state(struct pipe_context *pipe,
+ const struct pipe_framebuffer_state *fb)
+{
+ struct nv04_context *nv04 = nv04_context(pipe);
+ struct pipe_surface *rt, *zeta;
+ uint32_t rt_format, w, h;
+ int colour_format = 0, zeta_format = 0;
+
+ w = fb->cbufs[0]->width;
+ h = fb->cbufs[0]->height;
+ colour_format = fb->cbufs[0]->format;
+ rt = fb->cbufs[0];
+
+ if (fb->zsbuf) {
+ if (colour_format) {
+ assert(w == fb->zsbuf->width);
+ assert(h == fb->zsbuf->height);
+ } else {
+ w = fb->zsbuf->width;
+ h = fb->zsbuf->height;
+ }
+
+ zeta_format = fb->zsbuf->format;
+ zeta = fb->zsbuf;
+ }
+
+ switch (colour_format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case 0:
+ rt_format = 0x108;
+ break;
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ rt_format = 0x103;
+ break;
+ default:
+ assert(0);
+ }
+
+ BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_FORMAT, 1);
+ OUT_RING(rt_format);
+
+ /* FIXME pitches have to be aligned ! */
+ BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_PITCH, 2);
+ OUT_RING(rt->stride|(zeta->stride<<16));
+ OUT_RELOCl(rt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ if (fb->zsbuf) {
+ BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_OFFSET_ZETA, 1);
+ OUT_RELOCl(zeta->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ }
+}
+
+static void
+nv04_set_polygon_stipple(struct pipe_context *pipe,
+ const struct pipe_poly_stipple *stipple)
+{
+ NOUVEAU_ERR("line stipple hahaha\n");
+}
+
+static void
+nv04_set_scissor_state(struct pipe_context *pipe,
+ const struct pipe_scissor_state *s)
+{
+/* struct nv04_context *nv04 = nv04_context(pipe);
+
+ // XXX
+ BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_SCISSOR_HORIZ, 2);
+ OUT_RING (((s->maxx - s->minx) << 16) | s->minx);
+ OUT_RING (((s->maxy - s->miny) << 16) | s->miny);*/
+}
+
+static void
+nv04_set_viewport_state(struct pipe_context *pipe,
+ const struct pipe_viewport_state *viewport)
+{
+ struct nv04_context *nv04 = nv04_context(pipe);
+
+ nv04->viewport = *viewport;
+
+ draw_set_viewport_state(nv04->draw, &nv04->viewport);
+}
+
+static void
+nv04_set_vertex_buffers(struct pipe_context *pipe, unsigned count,
+ const struct pipe_vertex_buffer *buffers)
+{
+ struct nv04_context *nv04 = nv04_context(pipe);
+
+ draw_flush(nv04->draw);
+
+ memcpy(nv04->vertex_buffer, buffers, count * sizeof(buffers[0]));
+ nv04->num_vertex_buffers = count;
+
+ draw_set_vertex_buffers(nv04->draw, count, buffers);
+}
+
+static void
+nv04_set_vertex_elements(struct pipe_context *pipe, unsigned count,
+ const struct pipe_vertex_element *elements)
+{
+ struct nv04_context *nv04 = nv04_context(pipe);
+
+ draw_flush(nv04->draw);
+
+ nv04->num_vertex_elements = count;
+ draw_set_vertex_elements(nv04->draw, count, elements);
+}
+
+void
+nv04_init_state_functions(struct nv04_context *nv04)
+{
+ nv04->pipe.create_blend_state = nv04_blend_state_create;
+ nv04->pipe.bind_blend_state = nv04_blend_state_bind;
+ nv04->pipe.delete_blend_state = nv04_blend_state_delete;
+
+ nv04->pipe.create_sampler_state = nv04_sampler_state_create;
+ nv04->pipe.bind_sampler_states = nv04_sampler_state_bind;
+ nv04->pipe.delete_sampler_state = nv04_sampler_state_delete;
+ nv04->pipe.set_sampler_textures = nv04_set_sampler_texture;
+
+ nv04->pipe.create_rasterizer_state = nv04_rasterizer_state_create;
+ nv04->pipe.bind_rasterizer_state = nv04_rasterizer_state_bind;
+ nv04->pipe.delete_rasterizer_state = nv04_rasterizer_state_delete;
+
+ nv04->pipe.create_depth_stencil_alpha_state = nv04_depth_stencil_alpha_state_create;
+ nv04->pipe.bind_depth_stencil_alpha_state = nv04_depth_stencil_alpha_state_bind;
+ nv04->pipe.delete_depth_stencil_alpha_state = nv04_depth_stencil_alpha_state_delete;
+
+ nv04->pipe.create_vs_state = nv04_vp_state_create;
+ nv04->pipe.bind_vs_state = nv04_vp_state_bind;
+ nv04->pipe.delete_vs_state = nv04_vp_state_delete;
+
+ nv04->pipe.create_fs_state = nv04_fp_state_create;
+ nv04->pipe.bind_fs_state = nv04_fp_state_bind;
+ nv04->pipe.delete_fs_state = nv04_fp_state_delete;
+
+ nv04->pipe.set_blend_color = nv04_set_blend_color;
+ nv04->pipe.set_clip_state = nv04_set_clip_state;
+ nv04->pipe.set_constant_buffer = nv04_set_constant_buffer;
+ nv04->pipe.set_framebuffer_state = nv04_set_framebuffer_state;
+ nv04->pipe.set_polygon_stipple = nv04_set_polygon_stipple;
+ nv04->pipe.set_scissor_state = nv04_set_scissor_state;
+ nv04->pipe.set_viewport_state = nv04_set_viewport_state;
+
+ nv04->pipe.set_vertex_buffers = nv04_set_vertex_buffers;
+ nv04->pipe.set_vertex_elements = nv04_set_vertex_elements;
+}
+
diff --git a/src/gallium/drivers/nv04/nv04_state.h b/src/gallium/drivers/nv04/nv04_state.h
new file mode 100644
index 00000000000..399f750dbe7
--- /dev/null
+++ b/src/gallium/drivers/nv04/nv04_state.h
@@ -0,0 +1,71 @@
+#ifndef __NV04_STATE_H__
+#define __NV04_STATE_H__
+
+#include "pipe/p_state.h"
+#include "tgsi/tgsi_scan.h"
+
+struct nv04_blend_state {
+ uint32_t b_enable;
+ uint32_t b_src;
+ uint32_t b_dst;
+};
+
+struct nv04_fragtex_state {
+ uint32_t format;
+};
+
+struct nv04_sampler_state {
+ uint32_t filter;
+ uint32_t format;
+};
+
+struct nv04_depth_stencil_alpha_state {
+ uint32_t control;
+};
+
+struct nv04_rasterizer_state {
+ uint32_t blend;
+
+ const struct pipe_rasterizer_state *templ;
+};
+
+struct nv04_miptree {
+ struct pipe_texture base;
+
+ struct pipe_buffer *buffer;
+ uint total_size;
+
+ struct {
+ uint pitch;
+ uint *image_offset;
+ } level[PIPE_MAX_TEXTURE_LEVELS];
+};
+
+struct nv04_fragment_program_data {
+ unsigned offset;
+ unsigned index;
+};
+
+struct nv04_fragment_program {
+ struct pipe_shader_state pipe;
+ struct tgsi_shader_info info;
+
+ boolean translated;
+ boolean on_hw;
+ unsigned samplers;
+
+ uint32_t *insn;
+ int insn_len;
+
+ struct nv04_fragment_program_data *consts;
+ unsigned nr_consts;
+
+ struct pipe_buffer *buffer;
+
+ uint32_t fp_control;
+ uint32_t fp_reg_control;
+};
+
+
+
+#endif
diff --git a/src/gallium/drivers/nv04/nv04_state_emit.c b/src/gallium/drivers/nv04/nv04_state_emit.c
new file mode 100644
index 00000000000..0ad40a092ea
--- /dev/null
+++ b/src/gallium/drivers/nv04/nv04_state_emit.c
@@ -0,0 +1,156 @@
+#include "nv04_context.h"
+#include "nv04_state.h"
+
+static void nv04_vertex_layout(struct pipe_context* pipe)
+{
+ struct nv04_context *nv04 = nv04_context(pipe);
+ struct nv04_fragment_program *fp = nv04->fragprog.current;
+ uint32_t src = 0;
+ int i;
+ struct vertex_info vinfo;
+
+ memset(&vinfo, 0, sizeof(vinfo));
+
+ for (i = 0; i < fp->info.num_inputs; i++) {
+ switch (i) {
+ case TGSI_SEMANTIC_POSITION:
+ draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_LINEAR, src++);
+ break;
+ case TGSI_SEMANTIC_COLOR:
+ draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_LINEAR, src++);
+ break;
+ default:
+ case TGSI_SEMANTIC_GENERIC:
+ draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE, src++);
+ break;
+ case TGSI_SEMANTIC_FOG:
+ draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE, src++);
+ break;
+ }
+ }
+ draw_compute_vertex_size(&vinfo);
+}
+
+static uint32_t nv04_blend_func(uint32_t f)
+{
+ switch ( f ) {
+ case PIPE_BLENDFACTOR_ZERO: return 0x1;
+ case PIPE_BLENDFACTOR_ONE: return 0x2;
+ case PIPE_BLENDFACTOR_SRC_COLOR: return 0x3;
+ case PIPE_BLENDFACTOR_INV_SRC_COLOR: return 0x4;
+ case PIPE_BLENDFACTOR_SRC_ALPHA: return 0x5;
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA: return 0x6;
+ case PIPE_BLENDFACTOR_DST_ALPHA: return 0x7;
+ case PIPE_BLENDFACTOR_INV_DST_ALPHA: return 0x8;
+ case PIPE_BLENDFACTOR_DST_COLOR: return 0x9;
+ case PIPE_BLENDFACTOR_INV_DST_COLOR: return 0xA;
+ case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: return 0xB;
+ }
+ NOUVEAU_MSG("Unable to find the blend function 0x%x\n",f);
+ return 0;
+}
+
+static void nv04_emit_control(struct nv04_context* nv04)
+{
+ uint32_t control = nv04->dsa->control;
+
+ BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_CONTROL, 1);
+ OUT_RING(control);
+}
+
+static void nv04_emit_blend(struct nv04_context* nv04)
+{
+ uint32_t blend;
+
+ blend=0x4; // texture MODULATE_ALPHA
+ blend|=0x20; // alpha is MSB
+ blend|=(2<<6); // flat shading
+ blend|=(1<<8); // persp correct
+ blend|=(0<<16); // no fog
+ blend|=(nv04->blend->b_enable<<20);
+ blend|=(nv04_blend_func(nv04->blend->b_src)<<24);
+ blend|=(nv04_blend_func(nv04->blend->b_dst)<<28);
+
+ BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_BLEND, 1);
+ OUT_RING(blend);
+}
+
+static void nv04_emit_sampler(struct nv04_context *nv04, int unit)
+{
+ struct nv04_miptree *nv04mt = nv04->tex_miptree[unit];
+ struct pipe_texture *pt = &nv04mt->base;
+
+ BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_OFFSET, 3);
+ OUT_RELOCl(nv04mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+ OUT_RELOCd(nv04mt->buffer, (nv04->fragtex.format | nv04->sampler[unit]->format), NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/);
+ OUT_RING(nv04->sampler[unit]->filter);
+}
+
+void
+nv04_emit_hw_state(struct nv04_context *nv04)
+{
+ int i;
+
+ if (nv04->dirty & NV04_NEW_VERTPROG) {
+ //nv04_vertprog_bind(nv04, nv04->vertprog.current);
+ nv04->dirty &= ~NV04_NEW_VERTPROG;
+ }
+
+ if (nv04->dirty & NV04_NEW_FRAGPROG) {
+ nv04_fragprog_bind(nv04, nv04->fragprog.current);
+ /*XXX: clear NV04_NEW_FRAGPROG if no new program uploaded */
+ nv04->dirty_samplers |= (1<<10);
+ nv04->dirty_samplers = 0;
+ }
+
+ if (nv04->dirty & NV04_NEW_CONTROL) {
+ nv04->dirty &= ~NV04_NEW_CONTROL;
+
+ BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_CONTROL, 1);
+ OUT_RING(nv04->dsa->control);
+ }
+
+ if (nv04->dirty & NV04_NEW_BLEND) {
+ nv04->dirty &= ~NV04_NEW_BLEND;
+
+ nv04_emit_blend(nv04);
+ }
+
+ if (nv04->dirty & NV04_NEW_SAMPLER) {
+ nv04->dirty &= ~NV04_NEW_SAMPLER;
+
+ nv04_emit_sampler(nv04, 0);
+ }
+
+ if (nv04->dirty & NV04_NEW_VIEWPORT) {
+ nv04->dirty &= ~NV04_NEW_VIEWPORT;
+// nv04_state_emit_viewport(nv04);
+ }
+
+ /* Emit relocs for every referenced buffer.
+ * This is to ensure the bufmgr has an accurate idea of how
+ * the buffer is used. This isn't very efficient, but we don't
+ * seem to take a significant performance hit. Will be improved
+ * at some point. Vertex arrays are emitted by nv04_vbo.c
+ */
+
+ /* Render target */
+/* BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_PITCH, 2);
+ OUT_RING(rt->stride|(zeta->stride<<16));
+ OUT_RELOCl(rt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ if (fb->zsbuf) {
+ BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_OFFSET_ZETA, 1);
+ OUT_RELOCl(zeta->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ }*/
+
+ /* Texture images */
+ for (i = 0; i < 1; i++) {
+ if (!(nv04->fp_samplers & (1 << i)))
+ continue;
+ struct nv04_miptree *nv04mt = nv04->tex_miptree[i];
+ BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_OFFSET, 2);
+ OUT_RELOCl(nv04mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+ OUT_RELOCd(nv04mt->buffer, (nv04->fragtex.format | nv04->sampler[i]->format), NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/);
+ }
+}
+
diff --git a/src/gallium/drivers/nv04/nv04_surface.c b/src/gallium/drivers/nv04/nv04_surface.c
new file mode 100644
index 00000000000..9d9943ed4e4
--- /dev/null
+++ b/src/gallium/drivers/nv04/nv04_surface.c
@@ -0,0 +1,64 @@
+
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "nv04_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_winsys.h"
+#include "pipe/p_inlines.h"
+#include "util/u_tile.h"
+
+static void
+nv04_surface_copy(struct pipe_context *pipe, boolean do_flip,
+ struct pipe_surface *dest, unsigned destx, unsigned desty,
+ struct pipe_surface *src, unsigned srcx, unsigned srcy,
+ unsigned width, unsigned height)
+{
+ struct nv04_context *nv04 = nv04_context(pipe);
+ struct nouveau_winsys *nvws = nv04->nvws;
+
+ nvws->surface_copy(nvws, dest, destx, desty, src, srcx, srcy,
+ width, height);
+}
+
+static void
+nv04_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest,
+ unsigned destx, unsigned desty, unsigned width,
+ unsigned height, unsigned value)
+{
+ struct nv04_context *nv04 = nv04_context(pipe);
+ struct nouveau_winsys *nvws = nv04->nvws;
+
+ nvws->surface_fill(nvws, dest, destx, desty, width, height, value);
+}
+
+void
+nv04_init_surface_functions(struct nv04_context *nv04)
+{
+ nv04->pipe.surface_copy = nv04_surface_copy;
+ nv04->pipe.surface_fill = nv04_surface_fill;
+}
diff --git a/src/gallium/drivers/nv04/nv04_vbo.c b/src/gallium/drivers/nv04/nv04_vbo.c
new file mode 100644
index 00000000000..91f919d48ec
--- /dev/null
+++ b/src/gallium/drivers/nv04/nv04_vbo.c
@@ -0,0 +1,71 @@
+#include "draw/draw_context.h"
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+
+#include "nv04_context.h"
+#include "nv04_state.h"
+
+#include "nouveau/nouveau_channel.h"
+#include "nouveau/nouveau_pushbuf.h"
+
+boolean nv04_draw_elements( struct pipe_context *pipe,
+ struct pipe_buffer *indexBuffer,
+ unsigned indexSize,
+ unsigned prim, unsigned start, unsigned count)
+{
+ struct nv04_context *nv04 = nv04_context( pipe );
+ struct draw_context *draw = nv04->draw;
+ unsigned i;
+
+ /*
+ * Map vertex buffers
+ */
+ for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
+ if (nv04->vertex_buffer[i].buffer) {
+ void *buf
+ = pipe->winsys->buffer_map(pipe->winsys,
+ nv04->vertex_buffer[i].buffer,
+ PIPE_BUFFER_USAGE_CPU_READ);
+ draw_set_mapped_vertex_buffer(draw, i, buf);
+ }
+ }
+ /* Map index buffer, if present */
+ if (indexBuffer) {
+ void *mapped_indexes
+ = pipe->winsys->buffer_map(pipe->winsys, indexBuffer,
+ PIPE_BUFFER_USAGE_CPU_READ);
+ draw_set_mapped_element_buffer(draw, indexSize, mapped_indexes);
+ }
+ else {
+ /* no index/element buffer */
+ draw_set_mapped_element_buffer(draw, 0, NULL);
+ }
+
+ /* draw! */
+ draw_arrays(nv04->draw, prim, start, count);
+
+ /*
+ * unmap vertex/index buffers
+ */
+ for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
+ if (nv04->vertex_buffer[i].buffer) {
+ pipe->winsys->buffer_unmap(pipe->winsys, nv04->vertex_buffer[i].buffer);
+ draw_set_mapped_vertex_buffer(draw, i, NULL);
+ }
+ }
+ if (indexBuffer) {
+ pipe->winsys->buffer_unmap(pipe->winsys, indexBuffer);
+ draw_set_mapped_element_buffer(draw, 0, NULL);
+ }
+
+ return TRUE;
+}
+
+boolean nv04_draw_arrays( struct pipe_context *pipe,
+ unsigned prim, unsigned start, unsigned count)
+{
+ return nv04_draw_elements(pipe, NULL, 0, prim, start, count);
+}
+
+
+
diff --git a/src/gallium/drivers/nv10/Makefile b/src/gallium/drivers/nv10/Makefile
new file mode 100644
index 00000000000..4ba7ce586d6
--- /dev/null
+++ b/src/gallium/drivers/nv10/Makefile
@@ -0,0 +1,28 @@
+TOP = ../../../..
+include $(TOP)/configs/current
+
+LIBNAME = nv10
+
+DRIVER_SOURCES = \
+ nv10_clear.c \
+ nv10_context.c \
+ nv10_fragprog.c \
+ nv10_fragtex.c \
+ nv10_miptree.c \
+ nv10_prim_vbuf.c \
+ nv10_screen.c \
+ nv10_state.c \
+ nv10_state_emit.c \
+ nv10_surface.c \
+ nv10_vbo.c
+
+C_SOURCES = \
+ $(COMMON_SOURCES) \
+ $(DRIVER_SOURCES)
+
+ASM_SOURCES =
+
+include ../../Makefile.template
+
+symlinks:
+
diff --git a/src/gallium/drivers/nv10/nv10_clear.c b/src/gallium/drivers/nv10/nv10_clear.c
new file mode 100644
index 00000000000..be7e09cf4b0
--- /dev/null
+++ b/src/gallium/drivers/nv10/nv10_clear.c
@@ -0,0 +1,12 @@
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+
+#include "nv10_context.h"
+
+void
+nv10_clear(struct pipe_context *pipe, struct pipe_surface *ps,
+ unsigned clearValue)
+{
+ pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, clearValue);
+}
diff --git a/src/gallium/drivers/nv10/nv10_context.c b/src/gallium/drivers/nv10/nv10_context.c
new file mode 100644
index 00000000000..4eb4ed9185e
--- /dev/null
+++ b/src/gallium/drivers/nv10/nv10_context.c
@@ -0,0 +1,296 @@
+#include "draw/draw_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_winsys.h"
+
+#include "nv10_context.h"
+#include "nv10_screen.h"
+
+static void
+nv10_flush(struct pipe_context *pipe, unsigned flags,
+ struct pipe_fence_handle **fence)
+{
+ struct nv10_context *nv10 = nv10_context(pipe);
+
+ draw_flush(nv10->draw);
+
+ FIRE_RING(fence);
+}
+
+static void
+nv10_destroy(struct pipe_context *pipe)
+{
+ struct nv10_context *nv10 = nv10_context(pipe);
+
+ if (nv10->draw)
+ draw_destroy(nv10->draw);
+
+ FREE(nv10);
+}
+
+static void nv10_init_hwctx(struct nv10_context *nv10)
+{
+ struct nv10_screen *screen = nv10->screen;
+ struct nouveau_winsys *nvws = screen->nvws;
+ int i;
+ float projectionmatrix[16];
+
+ BEGIN_RING(celsius, NV10TCL_DMA_NOTIFY, 1);
+ OUT_RING (screen->sync->handle);
+ BEGIN_RING(celsius, NV10TCL_DMA_IN_MEMORY0, 2);
+ OUT_RING (nvws->channel->vram->handle);
+ OUT_RING (nvws->channel->gart->handle);
+ BEGIN_RING(celsius, NV10TCL_DMA_IN_MEMORY2, 2);
+ OUT_RING (nvws->channel->vram->handle);
+ OUT_RING (nvws->channel->vram->handle);
+
+ BEGIN_RING(celsius, NV10TCL_NOP, 1);
+ OUT_RING (0);
+
+ BEGIN_RING(celsius, NV10TCL_RT_HORIZ, 2);
+ OUT_RING (0);
+ OUT_RING (0);
+
+ BEGIN_RING(celsius, NV10TCL_VIEWPORT_CLIP_HORIZ(0), 1);
+ OUT_RING ((0x7ff<<16)|0x800);
+ BEGIN_RING(celsius, NV10TCL_VIEWPORT_CLIP_VERT(0), 1);
+ OUT_RING ((0x7ff<<16)|0x800);
+
+ for (i=1;i<8;i++) {
+ BEGIN_RING(celsius, NV10TCL_VIEWPORT_CLIP_HORIZ(i), 1);
+ OUT_RING (0);
+ BEGIN_RING(celsius, NV10TCL_VIEWPORT_CLIP_VERT(i), 1);
+ OUT_RING (0);
+ }
+
+ BEGIN_RING(celsius, 0x290, 1);
+ OUT_RING ((0x10<<16)|1);
+ BEGIN_RING(celsius, 0x3f4, 1);
+ OUT_RING (0);
+
+ BEGIN_RING(celsius, NV10TCL_NOP, 1);
+ OUT_RING (0);
+
+ if (nv10->screen->celsius->grclass != NV10TCL) {
+ /* For nv11, nv17 */
+ BEGIN_RING(celsius, 0x120, 3);
+ OUT_RING (0);
+ OUT_RING (1);
+ OUT_RING (2);
+
+ BEGIN_RING(celsius, NV10TCL_NOP, 1);
+ OUT_RING (0);
+ }
+
+ BEGIN_RING(celsius, NV10TCL_NOP, 1);
+ OUT_RING (0);
+
+ /* Set state */
+ BEGIN_RING(celsius, NV10TCL_FOG_ENABLE, 1);
+ OUT_RING (0);
+ BEGIN_RING(celsius, NV10TCL_ALPHA_FUNC_ENABLE, 1);
+ OUT_RING (0);
+ BEGIN_RING(celsius, NV10TCL_ALPHA_FUNC_FUNC, 2);
+ OUT_RING (0x207);
+ OUT_RING (0);
+ BEGIN_RING(celsius, NV10TCL_TX_ENABLE(0), 2);
+ OUT_RING (0);
+ OUT_RING (0);
+
+ BEGIN_RING(celsius, NV10TCL_RC_IN_ALPHA(0), 12);
+ OUT_RING (0x30141010);
+ OUT_RING (0);
+ OUT_RING (0x20040000);
+ OUT_RING (0);
+ OUT_RING (0);
+ OUT_RING (0);
+ OUT_RING (0x00000c00);
+ OUT_RING (0);
+ OUT_RING (0x00000c00);
+ OUT_RING (0x18000000);
+ OUT_RING (0x300e0300);
+ OUT_RING (0x0c091c80);
+
+ BEGIN_RING(celsius, NV10TCL_BLEND_FUNC_ENABLE, 1);
+ OUT_RING (0);
+ BEGIN_RING(celsius, NV10TCL_DITHER_ENABLE, 2);
+ OUT_RING (1);
+ OUT_RING (0);
+ BEGIN_RING(celsius, NV10TCL_LINE_SMOOTH_ENABLE, 1);
+ OUT_RING (0);
+ BEGIN_RING(celsius, NV10TCL_VERTEX_WEIGHT_ENABLE, 2);
+ OUT_RING (0);
+ OUT_RING (0);
+ BEGIN_RING(celsius, NV10TCL_BLEND_FUNC_SRC, 4);
+ OUT_RING (1);
+ OUT_RING (0);
+ OUT_RING (0);
+ OUT_RING (0x8006);
+ BEGIN_RING(celsius, NV10TCL_STENCIL_MASK, 8);
+ OUT_RING (0xff);
+ OUT_RING (0x207);
+ OUT_RING (0);
+ OUT_RING (0xff);
+ OUT_RING (0x1e00);
+ OUT_RING (0x1e00);
+ OUT_RING (0x1e00);
+ OUT_RING (0x1d01);
+ BEGIN_RING(celsius, NV10TCL_NORMALIZE_ENABLE, 1);
+ OUT_RING (0);
+ BEGIN_RING(celsius, NV10TCL_FOG_ENABLE, 2);
+ OUT_RING (0);
+ OUT_RING (0);
+ BEGIN_RING(celsius, NV10TCL_LIGHT_MODEL, 1);
+ OUT_RING (0);
+ BEGIN_RING(celsius, NV10TCL_COLOR_CONTROL, 1);
+ OUT_RING (0);
+ BEGIN_RING(celsius, NV10TCL_ENABLED_LIGHTS, 1);
+ OUT_RING (0);
+ BEGIN_RING(celsius, NV10TCL_POLYGON_OFFSET_POINT_ENABLE, 3);
+ OUT_RING (0);
+ OUT_RING (0);
+ OUT_RING (0);
+ BEGIN_RING(celsius, NV10TCL_DEPTH_FUNC, 1);
+ OUT_RING (0x201);
+ BEGIN_RING(celsius, NV10TCL_DEPTH_WRITE_ENABLE, 1);
+ OUT_RING (0);
+ BEGIN_RING(celsius, NV10TCL_DEPTH_TEST_ENABLE, 1);
+ OUT_RING (0);
+ BEGIN_RING(celsius, NV10TCL_POLYGON_OFFSET_FACTOR, 2);
+ OUT_RING (0);
+ OUT_RING (0);
+ BEGIN_RING(celsius, NV10TCL_POINT_SIZE, 1);
+ OUT_RING (8);
+ BEGIN_RING(celsius, NV10TCL_POINT_PARAMETERS_ENABLE, 2);
+ OUT_RING (0);
+ OUT_RING (0);
+ BEGIN_RING(celsius, NV10TCL_LINE_WIDTH, 1);
+ OUT_RING (8);
+ BEGIN_RING(celsius, NV10TCL_LINE_SMOOTH_ENABLE, 1);
+ OUT_RING (0);
+ BEGIN_RING(celsius, NV10TCL_POLYGON_MODE_FRONT, 2);
+ OUT_RING (0x1b02);
+ OUT_RING (0x1b02);
+ BEGIN_RING(celsius, NV10TCL_CULL_FACE, 2);
+ OUT_RING (0x405);
+ OUT_RING (0x901);
+ BEGIN_RING(celsius, NV10TCL_POLYGON_SMOOTH_ENABLE, 1);
+ OUT_RING (0);
+ BEGIN_RING(celsius, NV10TCL_CULL_FACE_ENABLE, 1);
+ OUT_RING (0);
+ BEGIN_RING(celsius, NV10TCL_TX_GEN_S(0), 8);
+ for (i=0;i<8;i++) {
+ OUT_RING (0);
+ }
+ BEGIN_RING(celsius, NV10TCL_FOG_EQUATION_CONSTANT, 3);
+ OUT_RING (0x3fc00000); /* -1.50 */
+ OUT_RING (0xbdb8aa0a); /* -0.09 */
+ OUT_RING (0); /* 0.00 */
+
+ BEGIN_RING(celsius, NV10TCL_NOP, 1);
+ OUT_RING (0);
+
+ BEGIN_RING(celsius, NV10TCL_FOG_MODE, 2);
+ OUT_RING (0x802);
+ OUT_RING (2);
+ /* for some reason VIEW_MATRIX_ENABLE need to be 6 instead of 4 when
+ * using texturing, except when using the texture matrix
+ */
+ BEGIN_RING(celsius, NV10TCL_VIEW_MATRIX_ENABLE, 1);
+ OUT_RING (6);
+ BEGIN_RING(celsius, NV10TCL_COLOR_MASK, 1);
+ OUT_RING (0x01010101);
+
+ /* Set vertex component */
+ BEGIN_RING(celsius, NV10TCL_VERTEX_COL_4F_R, 4);
+ OUT_RINGf (1.0);
+ OUT_RINGf (1.0);
+ OUT_RINGf (1.0);
+ OUT_RINGf (1.0);
+ BEGIN_RING(celsius, NV10TCL_VERTEX_COL2_3F_R, 3);
+ OUT_RING (0);
+ OUT_RING (0);
+ OUT_RING (0);
+ BEGIN_RING(celsius, NV10TCL_VERTEX_NOR_3F_X, 3);
+ OUT_RING (0);
+ OUT_RING (0);
+ OUT_RINGf (1.0);
+ BEGIN_RING(celsius, NV10TCL_VERTEX_TX0_4F_S, 4);
+ OUT_RINGf (0.0);
+ OUT_RINGf (0.0);
+ OUT_RINGf (0.0);
+ OUT_RINGf (1.0);
+ BEGIN_RING(celsius, NV10TCL_VERTEX_TX1_4F_S, 4);
+ OUT_RINGf (0.0);
+ OUT_RINGf (0.0);
+ OUT_RINGf (0.0);
+ OUT_RINGf (1.0);
+ BEGIN_RING(celsius, NV10TCL_VERTEX_FOG_1F, 1);
+ OUT_RINGf (0.0);
+ BEGIN_RING(celsius, NV10TCL_EDGEFLAG_ENABLE, 1);
+ OUT_RING (1);
+
+ memset(projectionmatrix, 0, sizeof(projectionmatrix));
+ BEGIN_RING(celsius, NV10TCL_PROJECTION_MATRIX(0), 16);
+ projectionmatrix[0*4+0] = 1.0;
+ projectionmatrix[1*4+1] = 1.0;
+ projectionmatrix[2*4+2] = 1.0;
+ projectionmatrix[3*4+3] = 1.0;
+ for (i=0;i<16;i++) {
+ OUT_RINGf (projectionmatrix[i]);
+ }
+
+ BEGIN_RING(celsius, NV10TCL_DEPTH_RANGE_NEAR, 2);
+ OUT_RING (0.0);
+ OUT_RINGf (16777216.0);
+
+ BEGIN_RING(celsius, NV10TCL_VIEWPORT_SCALE_X, 4);
+ OUT_RINGf (-2048.0);
+ OUT_RINGf (-2048.0);
+ OUT_RINGf (16777215.0 * 0.5);
+ OUT_RING (0);
+
+ FIRE_RING (NULL);
+}
+
+static void
+nv10_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield)
+{
+}
+
+struct pipe_context *
+nv10_create(struct pipe_screen *pscreen, unsigned pctx_id)
+{
+ struct nv10_screen *screen = nv10_screen(pscreen);
+ struct pipe_winsys *ws = pscreen->winsys;
+ struct nv10_context *nv10;
+ struct nouveau_winsys *nvws = screen->nvws;
+
+ nv10 = CALLOC(1, sizeof(struct nv10_context));
+ if (!nv10)
+ return NULL;
+ nv10->screen = screen;
+ nv10->pctx_id = pctx_id;
+
+ nv10->nvws = nvws;
+
+ nv10->pipe.winsys = ws;
+ nv10->pipe.screen = pscreen;
+ nv10->pipe.destroy = nv10_destroy;
+ nv10->pipe.set_edgeflags = nv10_set_edgeflags;
+ nv10->pipe.draw_arrays = nv10_draw_arrays;
+ nv10->pipe.draw_elements = nv10_draw_elements;
+ nv10->pipe.clear = nv10_clear;
+ nv10->pipe.flush = nv10_flush;
+
+ nv10_init_surface_functions(nv10);
+ nv10_init_state_functions(nv10);
+
+ nv10->draw = draw_create();
+ assert(nv10->draw);
+ draw_set_rasterize_stage(nv10->draw, nv10_draw_vbuf_stage(nv10));
+
+ nv10_init_hwctx(nv10);
+
+ return &nv10->pipe;
+}
+
diff --git a/src/gallium/drivers/nv10/nv10_context.h b/src/gallium/drivers/nv10/nv10_context.h
new file mode 100644
index 00000000000..f3b56de25a7
--- /dev/null
+++ b/src/gallium/drivers/nv10/nv10_context.h
@@ -0,0 +1,153 @@
+#ifndef __NV10_CONTEXT_H__
+#define __NV10_CONTEXT_H__
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+#include "pipe/p_compiler.h"
+
+#include "util/u_memory.h"
+#include "util/u_math.h"
+
+#include "draw/draw_vertex.h"
+
+#include "nouveau/nouveau_winsys.h"
+#include "nouveau/nouveau_gldefs.h"
+
+#define NOUVEAU_PUSH_CONTEXT(ctx) \
+ struct nv10_screen *ctx = nv10->screen
+#include "nouveau/nouveau_push.h"
+
+#include "nv10_state.h"
+
+#define NOUVEAU_ERR(fmt, args...) \
+ fprintf(stderr, "%s:%d - "fmt, __func__, __LINE__, ##args);
+#define NOUVEAU_MSG(fmt, args...) \
+ fprintf(stderr, "nouveau: "fmt, ##args);
+
+#define NV10_NEW_VERTPROG (1 << 0)
+#define NV10_NEW_FRAGPROG (1 << 1)
+#define NV10_NEW_VTXARRAYS (1 << 2)
+#define NV10_NEW_BLEND (1 << 3)
+#define NV10_NEW_BLENDCOL (1 << 4)
+#define NV10_NEW_RAST (1 << 5)
+#define NV10_NEW_DSA (1 << 6)
+#define NV10_NEW_VIEWPORT (1 << 7)
+#define NV10_NEW_SCISSOR (1 << 8)
+#define NV10_NEW_FRAMEBUFFER (1 << 9)
+
+#include "nv10_screen.h"
+
+struct nv10_context {
+ struct pipe_context pipe;
+
+ struct nouveau_winsys *nvws;
+ struct nv10_screen *screen;
+ unsigned pctx_id;
+
+ struct draw_context *draw;
+
+ uint32_t dirty;
+
+ struct nv10_sampler_state *tex_sampler[PIPE_MAX_SAMPLERS];
+ struct nv10_miptree *tex_miptree[PIPE_MAX_SAMPLERS];
+ unsigned dirty_samplers;
+ unsigned fp_samplers;
+ unsigned vp_samplers;
+
+ uint32_t rt_enable;
+ struct pipe_buffer *rt[4];
+ struct pipe_buffer *zeta;
+ uint32_t lma_offset;
+
+ struct nv10_blend_state *blend;
+ struct pipe_blend_color *blend_color;
+ struct nv10_rasterizer_state *rast;
+ struct nv10_depth_stencil_alpha_state *dsa;
+ struct pipe_viewport_state *viewport;
+ struct pipe_scissor_state *scissor;
+ struct pipe_framebuffer_state *framebuffer;
+
+ //struct pipe_buffer *constbuf[PIPE_SHADER_TYPES];
+ float *constbuf[PIPE_SHADER_TYPES][32][4];
+ unsigned constbuf_nr[PIPE_SHADER_TYPES];
+
+ struct vertex_info vertex_info;
+
+ struct {
+ struct pipe_buffer *buffer;
+ uint32_t format;
+ } tex[2];
+
+ unsigned vb_enable;
+ struct {
+ struct pipe_buffer *buffer;
+ unsigned delta;
+ } vb[16];
+
+/* struct {
+
+ struct nouveau_resource *exec_heap;
+ struct nouveau_resource *data_heap;
+
+ struct nv10_vertex_program *active;
+
+ struct nv10_vertex_program *current;
+ } vertprog;
+*/
+ struct {
+ struct nv10_fragment_program *active;
+
+ struct nv10_fragment_program *current;
+ struct pipe_buffer *constant_buf;
+ } fragprog;
+
+ struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS];
+ struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS];
+};
+
+static INLINE struct nv10_context *
+nv10_context(struct pipe_context *pipe)
+{
+ return (struct nv10_context *)pipe;
+}
+
+extern void nv10_init_state_functions(struct nv10_context *nv10);
+extern void nv10_init_surface_functions(struct nv10_context *nv10);
+
+extern void nv10_screen_init_miptree_functions(struct pipe_screen *pscreen);
+
+/* nv10_clear.c */
+extern void nv10_clear(struct pipe_context *pipe, struct pipe_surface *ps,
+ unsigned clearValue);
+
+/* nv10_draw.c */
+extern struct draw_stage *nv10_draw_render_stage(struct nv10_context *nv10);
+
+/* nv10_fragprog.c */
+extern void nv10_fragprog_bind(struct nv10_context *,
+ struct nv10_fragment_program *);
+extern void nv10_fragprog_destroy(struct nv10_context *,
+ struct nv10_fragment_program *);
+
+/* nv10_fragtex.c */
+extern void nv10_fragtex_bind(struct nv10_context *);
+
+/* nv10_prim_vbuf.c */
+struct draw_stage *nv10_draw_vbuf_stage( struct nv10_context *nv10 );
+extern void nv10_vtxbuf_bind(struct nv10_context* nv10);
+
+/* nv10_state.c and friends */
+extern void nv10_emit_hw_state(struct nv10_context *nv10);
+extern void nv10_state_tex_update(struct nv10_context *nv10);
+
+/* nv10_vbo.c */
+extern boolean nv10_draw_arrays(struct pipe_context *, unsigned mode,
+ unsigned start, unsigned count);
+extern boolean nv10_draw_elements( struct pipe_context *pipe,
+ struct pipe_buffer *indexBuffer,
+ unsigned indexSize,
+ unsigned prim, unsigned start, unsigned count);
+
+
+#endif
diff --git a/src/gallium/drivers/nv10/nv10_fragprog.c b/src/gallium/drivers/nv10/nv10_fragprog.c
new file mode 100644
index 00000000000..698db5a16a9
--- /dev/null
+++ b/src/gallium/drivers/nv10/nv10_fragprog.c
@@ -0,0 +1,21 @@
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+
+#include "pipe/p_shader_tokens.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_util.h"
+
+#include "nv10_context.h"
+
+void
+nv10_fragprog_bind(struct nv10_context *nv10, struct nv10_fragment_program *fp)
+{
+}
+
+void
+nv10_fragprog_destroy(struct nv10_context *nv10,
+ struct nv10_fragment_program *fp)
+{
+}
+
diff --git a/src/gallium/drivers/nv10/nv10_fragtex.c b/src/gallium/drivers/nv10/nv10_fragtex.c
new file mode 100644
index 00000000000..27f2f875847
--- /dev/null
+++ b/src/gallium/drivers/nv10/nv10_fragtex.c
@@ -0,0 +1,124 @@
+#include "nv10_context.h"
+#include "nouveau/nouveau_util.h"
+
+#define _(m,tf) \
+{ \
+ TRUE, \
+ PIPE_FORMAT_##m, \
+ NV10TCL_TX_FORMAT_FORMAT_##tf, \
+}
+
+struct nv10_texture_format {
+ boolean defined;
+ uint pipe;
+ int format;
+};
+
+static struct nv10_texture_format
+nv10_texture_formats[] = {
+ _(A8R8G8B8_UNORM, A8R8G8B8),
+ _(A1R5G5B5_UNORM, A1R5G5B5),
+ _(A4R4G4B4_UNORM, A4R4G4B4),
+ _(L8_UNORM , L8 ),
+ _(A8_UNORM , A8 ),
+ _(A8L8_UNORM , A8L8 ),
+// _(RGB_DXT1 , DXT1, ),
+// _(RGBA_DXT1 , DXT1, ),
+// _(RGBA_DXT3 , DXT3, ),
+// _(RGBA_DXT5 , DXT5, ),
+ {},
+};
+
+static struct nv10_texture_format *
+nv10_fragtex_format(uint pipe_format)
+{
+ struct nv10_texture_format *tf = nv10_texture_formats;
+
+ while (tf->defined) {
+ if (tf->pipe == pipe_format)
+ return tf;
+ tf++;
+ }
+
+ return NULL;
+}
+
+
+static void
+nv10_fragtex_build(struct nv10_context *nv10, int unit)
+{
+#if 0
+ struct nv10_sampler_state *ps = nv10->tex_sampler[unit];
+ struct nv10_miptree *nv10mt = nv10->tex_miptree[unit];
+ struct pipe_texture *pt = &nv10mt->base;
+ struct nv10_texture_format *tf;
+ uint32_t txf, txs, txp;
+
+ tf = nv10_fragtex_format(pt->format);
+ if (!tf || !tf->defined) {
+ NOUVEAU_ERR("Unsupported texture format: 0x%x\n", pt->format);
+ return;
+ }
+
+ txf = tf->format << 8;
+ txf |= (pt->last_level + 1) << 16;
+ txf |= log2i(pt->width[0]) << 20;
+ txf |= log2i(pt->height[0]) << 24;
+ txf |= log2i(pt->depth[0]) << 28;
+ txf |= 8;
+
+ switch (pt->target) {
+ case PIPE_TEXTURE_CUBE:
+ txf |= NV10TCL_TX_FORMAT_CUBE_MAP;
+ /* fall-through */
+ case PIPE_TEXTURE_2D:
+ txf |= (2<<4);
+ break;
+ case PIPE_TEXTURE_1D:
+ txf |= (1<<4);
+ break;
+ default:
+ NOUVEAU_ERR("Unknown target %d\n", pt->target);
+ return;
+ }
+
+ BEGIN_RING(celsius, NV10TCL_TX_OFFSET(unit), 8);
+ OUT_RELOCl(nv10mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+ OUT_RELOCd(nv10mt->buffer,txf,NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/);
+ OUT_RING (ps->wrap);
+ OUT_RING (0x40000000); /* enable */
+ OUT_RING (txs);
+ OUT_RING (ps->filt | 0x2000 /* magic */);
+ OUT_RING ((pt->width[0] << 16) | pt->height[0]);
+ OUT_RING (ps->bcol);
+#endif
+}
+
+void
+nv10_fragtex_bind(struct nv10_context *nv10)
+{
+#if 0
+ struct nv10_fragment_program *fp = nv10->fragprog.active;
+ unsigned samplers, unit;
+
+ samplers = nv10->fp_samplers & ~fp->samplers;
+ while (samplers) {
+ unit = ffs(samplers) - 1;
+ samplers &= ~(1 << unit);
+
+ BEGIN_RING(celsius, NV10TCL_TX_ENABLE(unit), 1);
+ OUT_RING (0);
+ }
+
+ samplers = nv10->dirty_samplers & fp->samplers;
+ while (samplers) {
+ unit = ffs(samplers) - 1;
+ samplers &= ~(1 << unit);
+
+ nv10_fragtex_build(nv10, unit);
+ }
+
+ nv10->fp_samplers = fp->samplers;
+#endif
+}
+
diff --git a/src/gallium/drivers/nv10/nv10_miptree.c b/src/gallium/drivers/nv10/nv10_miptree.c
new file mode 100644
index 00000000000..943f9e21e98
--- /dev/null
+++ b/src/gallium/drivers/nv10/nv10_miptree.c
@@ -0,0 +1,149 @@
+#include "pipe/p_state.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_inlines.h"
+
+#include "nv10_context.h"
+#include "nv10_screen.h"
+
+static void
+nv10_miptree_layout(struct nv10_miptree *nv10mt)
+{
+ struct pipe_texture *pt = &nv10mt->base;
+ boolean swizzled = FALSE;
+ uint width = pt->width[0], height = pt->height[0];
+ uint offset = 0;
+ int nr_faces, l, f;
+
+ if (pt->target == PIPE_TEXTURE_CUBE) {
+ nr_faces = 6;
+ } else {
+ nr_faces = 1;
+ }
+
+ for (l = 0; l <= pt->last_level; l++) {
+ pt->width[l] = width;
+ pt->height[l] = height;
+ pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width);
+ pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height);
+
+ if (swizzled)
+ nv10mt->level[l].pitch = pt->nblocksx[l] * pt->block.size;
+ else
+ nv10mt->level[l].pitch = pt->nblocksx[0] * pt->block.size;
+ nv10mt->level[l].pitch = (nv10mt->level[l].pitch + 63) & ~63;
+
+ nv10mt->level[l].image_offset =
+ CALLOC(nr_faces, sizeof(unsigned));
+
+ width = MAX2(1, width >> 1);
+ height = MAX2(1, height >> 1);
+
+ }
+
+ for (f = 0; f < nr_faces; f++) {
+ for (l = 0; l <= pt->last_level; l++) {
+ nv10mt->level[l].image_offset[f] = offset;
+ offset += nv10mt->level[l].pitch * pt->height[l];
+ }
+ }
+
+ nv10mt->total_size = offset;
+}
+
+static struct pipe_texture *
+nv10_miptree_create(struct pipe_screen *screen, const struct pipe_texture *pt)
+{
+ struct pipe_winsys *ws = screen->winsys;
+ struct nv10_miptree *mt;
+
+ mt = MALLOC(sizeof(struct nv10_miptree));
+ if (!mt)
+ return NULL;
+ mt->base = *pt;
+ mt->base.refcount = 1;
+ mt->base.screen = screen;
+
+ nv10_miptree_layout(mt);
+
+ mt->buffer = ws->buffer_create(ws, 256, PIPE_BUFFER_USAGE_PIXEL,
+ mt->total_size);
+ if (!mt->buffer) {
+ FREE(mt);
+ return NULL;
+ }
+
+ return &mt->base;
+}
+
+static void
+nv10_miptree_release(struct pipe_screen *screen, struct pipe_texture **pt)
+{
+ struct pipe_texture *mt = *pt;
+
+ *pt = NULL;
+ if (--mt->refcount <= 0) {
+ struct nv10_miptree *nv10mt = (struct nv10_miptree *)mt;
+ int l;
+
+ pipe_buffer_reference(screen, &nv10mt->buffer, NULL);
+ for (l = 0; l <= mt->last_level; l++) {
+ if (nv10mt->level[l].image_offset)
+ FREE(nv10mt->level[l].image_offset);
+ }
+ FREE(nv10mt);
+ }
+}
+
+static void
+nv10_miptree_update(struct pipe_context *pipe, struct pipe_texture *mt,
+ uint face, uint levels)
+{
+}
+
+
+static struct pipe_surface *
+nv10_miptree_surface_get(struct pipe_screen *screen, struct pipe_texture *pt,
+ unsigned face, unsigned level, unsigned zslice,
+ unsigned flags)
+{
+ struct pipe_winsys *ws = screen->winsys;
+ struct nv10_miptree *nv10mt = (struct nv10_miptree *)pt;
+ struct pipe_surface *ps;
+
+ ps = ws->surface_alloc(ws);
+ if (!ps)
+ return NULL;
+ pipe_buffer_reference(screen, &ps->buffer, nv10mt->buffer);
+ ps->format = pt->format;
+ ps->width = pt->width[level];
+ ps->height = pt->height[level];
+ ps->block = pt->block;
+ ps->nblocksx = pt->nblocksx[level];
+ ps->nblocksy = pt->nblocksy[level];
+ ps->stride = nv10mt->level[level].pitch;
+ ps->refcount = 1;
+ ps->winsys = screen->winsys;
+
+ if (pt->target == PIPE_TEXTURE_CUBE) {
+ ps->offset = nv10mt->level[level].image_offset[face];
+ } else {
+ ps->offset = nv10mt->level[level].image_offset[0];
+ }
+
+ return ps;
+}
+
+static void
+nv10_miptree_surface_release(struct pipe_screen *screen,
+ struct pipe_surface **surface)
+{
+}
+
+void nv10_screen_init_miptree_functions(struct pipe_screen *pscreen)
+{
+ pscreen->texture_create = nv10_miptree_create;
+ pscreen->texture_release = nv10_miptree_release;
+ pscreen->get_tex_surface = nv10_miptree_surface_get;
+ pscreen->tex_surface_release = nv10_miptree_surface_release;
+}
+
diff --git a/src/gallium/drivers/nv10/nv10_prim_vbuf.c b/src/gallium/drivers/nv10/nv10_prim_vbuf.c
new file mode 100644
index 00000000000..e7e81d3dff8
--- /dev/null
+++ b/src/gallium/drivers/nv10/nv10_prim_vbuf.c
@@ -0,0 +1,245 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * \file
+ * Build post-transformation, post-clipping vertex buffers and element
+ * lists by hooking into the end of the primitive pipeline and
+ * manipulating the vertex_id field in the vertex headers.
+ *
+ * XXX: work in progress
+ *
+ * \author José Fonseca <jrfonseca@tungstengraphics.com>
+ * \author Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "pipe/p_debug.h"
+#include "pipe/p_inlines.h"
+#include "pipe/p_winsys.h"
+
+#include "nv10_context.h"
+#include "nv10_state.h"
+
+#include "draw/draw_vbuf.h"
+
+/**
+ * Primitive renderer for nv10.
+ */
+struct nv10_vbuf_render {
+ struct vbuf_render base;
+
+ struct nv10_context *nv10;
+
+ /** Vertex buffer */
+ struct pipe_buffer* buffer;
+
+ /** Vertex size in bytes */
+ unsigned vertex_size;
+
+ /** Hardware primitive */
+ unsigned hwprim;
+};
+
+
+void nv10_vtxbuf_bind( struct nv10_context* nv10 )
+{
+ int i;
+ for(i = 0; i < 8; i++) {
+ BEGIN_RING(celsius, NV10TCL_VERTEX_ARRAY_ATTRIB_OFFSET(i), 1);
+ OUT_RING(0/*nv10->vtxbuf*/);
+ BEGIN_RING(celsius, NV10TCL_VERTEX_ARRAY_ATTRIB_FORMAT(i) ,1);
+ OUT_RING(0/*XXX*/);
+ }
+}
+
+/**
+ * Basically a cast wrapper.
+ */
+static INLINE struct nv10_vbuf_render *
+nv10_vbuf_render( struct vbuf_render *render )
+{
+ assert(render);
+ return (struct nv10_vbuf_render *)render;
+}
+
+
+static const struct vertex_info *
+nv10_vbuf_render_get_vertex_info( struct vbuf_render *render )
+{
+ struct nv10_vbuf_render *nv10_render = nv10_vbuf_render(render);
+ struct nv10_context *nv10 = nv10_render->nv10;
+
+ nv10_emit_hw_state(nv10);
+
+ return &nv10->vertex_info;
+}
+
+
+static void *
+nv10_vbuf_render_allocate_vertices( struct vbuf_render *render,
+ ushort vertex_size,
+ ushort nr_vertices )
+{
+ struct nv10_vbuf_render *nv10_render = nv10_vbuf_render(render);
+ struct nv10_context *nv10 = nv10_render->nv10;
+ struct pipe_winsys *winsys = nv10->pipe.winsys;
+ size_t size = (size_t)vertex_size * (size_t)nr_vertices;
+
+ assert(!nv10_render->buffer);
+ nv10_render->buffer = winsys->buffer_create(winsys, 64, PIPE_BUFFER_USAGE_VERTEX, size);
+
+ nv10->dirty |= NV10_NEW_VTXARRAYS;
+
+ return winsys->buffer_map(winsys,
+ nv10_render->buffer,
+ PIPE_BUFFER_USAGE_CPU_WRITE);
+}
+
+
+static boolean
+nv10_vbuf_render_set_primitive( struct vbuf_render *render,
+ unsigned prim )
+{
+ struct nv10_vbuf_render *nv10_render = nv10_vbuf_render(render);
+ unsigned hwp = nvgl_primitive(prim);
+ if (hwp == 0)
+ return FALSE;
+
+ nv10_render->hwprim = hwp;
+ return TRUE;
+}
+
+
+static void
+nv10_vbuf_render_draw( struct vbuf_render *render,
+ const ushort *indices,
+ uint nr_indices)
+{
+ struct nv10_vbuf_render *nv10_render = nv10_vbuf_render(render);
+ struct nv10_context *nv10 = nv10_render->nv10;
+ int push, i;
+
+ nv10_emit_hw_state(nv10);
+
+ BEGIN_RING(celsius, NV10TCL_VERTEX_ARRAY_OFFSET_POS, 1);
+ OUT_RELOCl(nv10_render->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+
+ BEGIN_RING(celsius, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1);
+ OUT_RING(nv10_render->hwprim);
+
+ if (nr_indices & 1) {
+ BEGIN_RING(celsius, NV10TCL_VB_ELEMENT_U32, 1);
+ OUT_RING (indices[0]);
+ indices++; nr_indices--;
+ }
+
+ while (nr_indices) {
+ // XXX too big/small ? check the size
+ push = MIN2(nr_indices, 1200 * 2);
+
+ BEGIN_RING_NI(celsius, NV10TCL_VB_ELEMENT_U16, push >> 1);
+ for (i = 0; i < push; i+=2)
+ OUT_RING((indices[i+1] << 16) | indices[i]);
+
+ nr_indices -= push;
+ indices += push;
+ }
+
+ BEGIN_RING(celsius, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1);
+ OUT_RING (0);
+}
+
+
+static void
+nv10_vbuf_render_release_vertices( struct vbuf_render *render,
+ void *vertices,
+ unsigned vertex_size,
+ unsigned vertices_used )
+{
+ struct nv10_vbuf_render *nv10_render = nv10_vbuf_render(render);
+ struct nv10_context *nv10 = nv10_render->nv10;
+ struct pipe_winsys *winsys = nv10->pipe.winsys;
+ struct pipe_screen *pscreen = &nv10->screen->pipe;
+
+ assert(nv10_render->buffer);
+ winsys->buffer_unmap(winsys, nv10_render->buffer);
+ pipe_buffer_reference(pscreen, &nv10_render->buffer, NULL);
+}
+
+
+static void
+nv10_vbuf_render_destroy( struct vbuf_render *render )
+{
+ struct nv10_vbuf_render *nv10_render = nv10_vbuf_render(render);
+ FREE(nv10_render);
+}
+
+
+/**
+ * Create a new primitive render.
+ */
+static struct vbuf_render *
+nv10_vbuf_render_create( struct nv10_context *nv10 )
+{
+ struct nv10_vbuf_render *nv10_render = CALLOC_STRUCT(nv10_vbuf_render);
+
+ nv10_render->nv10 = nv10;
+
+ nv10_render->base.max_vertex_buffer_bytes = 16*1024;
+ nv10_render->base.max_indices = 1024;
+ nv10_render->base.get_vertex_info = nv10_vbuf_render_get_vertex_info;
+ nv10_render->base.allocate_vertices = nv10_vbuf_render_allocate_vertices;
+ nv10_render->base.set_primitive = nv10_vbuf_render_set_primitive;
+ nv10_render->base.draw = nv10_vbuf_render_draw;
+ nv10_render->base.release_vertices = nv10_vbuf_render_release_vertices;
+ nv10_render->base.destroy = nv10_vbuf_render_destroy;
+
+ return &nv10_render->base;
+}
+
+
+/**
+ * Create a new primitive vbuf/render stage.
+ */
+struct draw_stage *nv10_draw_vbuf_stage( struct nv10_context *nv10 )
+{
+ struct vbuf_render *render;
+ struct draw_stage *stage;
+
+ render = nv10_vbuf_render_create(nv10);
+ if(!render)
+ return NULL;
+
+ stage = draw_vbuf_stage( nv10->draw, render );
+ if(!stage) {
+ render->destroy(render);
+ return NULL;
+ }
+
+ return stage;
+}
diff --git a/src/gallium/drivers/nv10/nv10_screen.c b/src/gallium/drivers/nv10/nv10_screen.c
new file mode 100644
index 00000000000..27a9edf9bba
--- /dev/null
+++ b/src/gallium/drivers/nv10/nv10_screen.c
@@ -0,0 +1,208 @@
+#include "pipe/p_screen.h"
+
+#include "nv10_context.h"
+#include "nv10_screen.h"
+
+static const char *
+nv10_screen_get_name(struct pipe_screen *screen)
+{
+ struct nv10_screen *nv10screen = nv10_screen(screen);
+ struct nouveau_device *dev = nv10screen->nvws->channel->device;
+ static char buffer[128];
+
+ snprintf(buffer, sizeof(buffer), "NV%02X", dev->chipset);
+ return buffer;
+}
+
+static const char *
+nv10_screen_get_vendor(struct pipe_screen *screen)
+{
+ return "nouveau";
+}
+
+static int
+nv10_screen_get_param(struct pipe_screen *screen, int param)
+{
+ switch (param) {
+ case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
+ return 2;
+ case PIPE_CAP_NPOT_TEXTURES:
+ return 0;
+ case PIPE_CAP_TWO_SIDED_STENCIL:
+ return 0;
+ case PIPE_CAP_GLSL:
+ return 0;
+ case PIPE_CAP_S3TC:
+ return 0;
+ case PIPE_CAP_ANISOTROPIC_FILTER:
+ return 1;
+ case PIPE_CAP_POINT_SPRITE:
+ return 0;
+ case PIPE_CAP_MAX_RENDER_TARGETS:
+ return 1;
+ case PIPE_CAP_OCCLUSION_QUERY:
+ return 0;
+ case PIPE_CAP_TEXTURE_SHADOW_MAP:
+ return 0;
+ case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
+ return 12;
+ case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
+ return 0;
+ case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
+ return 12;
+ case NOUVEAU_CAP_HW_VTXBUF:
+ case NOUVEAU_CAP_HW_IDXBUF:
+ return 0;
+ default:
+ NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param);
+ return 0;
+ }
+}
+
+static float
+nv10_screen_get_paramf(struct pipe_screen *screen, int param)
+{
+ switch (param) {
+ case PIPE_CAP_MAX_LINE_WIDTH:
+ case PIPE_CAP_MAX_LINE_WIDTH_AA:
+ return 10.0;
+ case PIPE_CAP_MAX_POINT_WIDTH:
+ case PIPE_CAP_MAX_POINT_WIDTH_AA:
+ return 64.0;
+ case PIPE_CAP_MAX_TEXTURE_ANISOTROPY:
+ return 2.0;
+ case PIPE_CAP_MAX_TEXTURE_LOD_BIAS:
+ return 4.0;
+ default:
+ NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param);
+ return 0.0;
+ }
+}
+
+static boolean
+nv10_screen_is_format_supported(struct pipe_screen *screen,
+ enum pipe_format format,
+ enum pipe_texture_target target,
+ unsigned tex_usage, unsigned geom_flags)
+{
+ if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) {
+ switch (format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ case PIPE_FORMAT_Z24S8_UNORM:
+ case PIPE_FORMAT_Z16_UNORM:
+ return TRUE;
+ default:
+ break;
+ }
+ } else {
+ switch (format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case PIPE_FORMAT_A1R5G5B5_UNORM:
+ case PIPE_FORMAT_A4R4G4B4_UNORM:
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ case PIPE_FORMAT_L8_UNORM:
+ case PIPE_FORMAT_A8_UNORM:
+ case PIPE_FORMAT_I8_UNORM:
+ return TRUE;
+ default:
+ break;
+ }
+ }
+
+ return FALSE;
+}
+
+static void *
+nv10_surface_map(struct pipe_screen *screen, struct pipe_surface *surface,
+ unsigned flags )
+{
+ struct pipe_winsys *ws = screen->winsys;
+ void *map;
+
+ map = ws->buffer_map(ws, surface->buffer, flags);
+ if (!map)
+ return NULL;
+
+ return map + surface->offset;
+}
+
+static void
+nv10_surface_unmap(struct pipe_screen *screen, struct pipe_surface *surface)
+{
+ struct pipe_winsys *ws = screen->winsys;
+
+ ws->buffer_unmap(ws, surface->buffer);
+}
+
+static void
+nv10_screen_destroy(struct pipe_screen *pscreen)
+{
+ struct nv10_screen *screen = nv10_screen(pscreen);
+ struct nouveau_winsys *nvws = screen->nvws;
+
+ nvws->notifier_free(&screen->sync);
+ nvws->grobj_free(&screen->celsius);
+
+ FREE(pscreen);
+}
+
+struct pipe_screen *
+nv10_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws)
+{
+ struct nv10_screen *screen = CALLOC_STRUCT(nv10_screen);
+ unsigned celsius_class;
+ unsigned chipset = nvws->channel->device->chipset;
+ int ret;
+
+ if (!screen)
+ return NULL;
+ screen->nvws = nvws;
+
+ /* 3D object */
+ if (chipset>=0x20)
+ celsius_class=NV11TCL;
+ else if (chipset>=0x17)
+ celsius_class=NV17TCL;
+ else if (chipset>=0x11)
+ celsius_class=NV11TCL;
+ else
+ celsius_class=NV10TCL;
+
+ if (!celsius_class) {
+ NOUVEAU_ERR("Unknown nv1x chipset: nv%02x\n", chipset);
+ return NULL;
+ }
+
+ ret = nvws->grobj_alloc(nvws, celsius_class, &screen->celsius);
+ if (ret) {
+ NOUVEAU_ERR("Error creating 3D object: %d\n", ret);
+ return FALSE;
+ }
+
+ /* Notifier for sync purposes */
+ ret = nvws->notifier_alloc(nvws, 1, &screen->sync);
+ if (ret) {
+ NOUVEAU_ERR("Error creating notifier object: %d\n", ret);
+ nv10_screen_destroy(&screen->pipe);
+ return NULL;
+ }
+
+ screen->pipe.winsys = ws;
+ screen->pipe.destroy = nv10_screen_destroy;
+
+ screen->pipe.get_name = nv10_screen_get_name;
+ screen->pipe.get_vendor = nv10_screen_get_vendor;
+ screen->pipe.get_param = nv10_screen_get_param;
+ screen->pipe.get_paramf = nv10_screen_get_paramf;
+
+ screen->pipe.is_format_supported = nv10_screen_is_format_supported;
+
+ screen->pipe.surface_map = nv10_surface_map;
+ screen->pipe.surface_unmap = nv10_surface_unmap;
+
+ nv10_screen_init_miptree_functions(&screen->pipe);
+
+ return &screen->pipe;
+}
+
diff --git a/src/gallium/drivers/nv10/nv10_screen.h b/src/gallium/drivers/nv10/nv10_screen.h
new file mode 100644
index 00000000000..3f8750a13f7
--- /dev/null
+++ b/src/gallium/drivers/nv10/nv10_screen.h
@@ -0,0 +1,22 @@
+#ifndef __NV10_SCREEN_H__
+#define __NV10_SCREEN_H__
+
+#include "pipe/p_screen.h"
+
+struct nv10_screen {
+ struct pipe_screen pipe;
+
+ struct nouveau_winsys *nvws;
+
+ /* HW graphics objects */
+ struct nouveau_grobj *celsius;
+ struct nouveau_notifier *sync;
+};
+
+static INLINE struct nv10_screen *
+nv10_screen(struct pipe_screen *screen)
+{
+ return (struct nv10_screen *)screen;
+}
+
+#endif
diff --git a/src/gallium/drivers/nv10/nv10_state.c b/src/gallium/drivers/nv10/nv10_state.c
new file mode 100644
index 00000000000..d2375aa2f64
--- /dev/null
+++ b/src/gallium/drivers/nv10/nv10_state.c
@@ -0,0 +1,588 @@
+#include "draw/draw_context.h"
+#include "pipe/p_state.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_shader_tokens.h"
+
+#include "tgsi/tgsi_parse.h"
+
+#include "nv10_context.h"
+#include "nv10_state.h"
+
+static void *
+nv10_blend_state_create(struct pipe_context *pipe,
+ const struct pipe_blend_state *cso)
+{
+ struct nv10_blend_state *cb;
+
+ cb = MALLOC(sizeof(struct nv10_blend_state));
+
+ cb->b_enable = cso->blend_enable ? 1 : 0;
+ cb->b_srcfunc = ((nvgl_blend_func(cso->alpha_src_factor)<<16) |
+ (nvgl_blend_func(cso->rgb_src_factor)));
+ cb->b_dstfunc = ((nvgl_blend_func(cso->alpha_dst_factor)<<16) |
+ (nvgl_blend_func(cso->rgb_dst_factor)));
+
+ cb->c_mask = (((cso->colormask & PIPE_MASK_A) ? (0x01<<24) : 0) |
+ ((cso->colormask & PIPE_MASK_R) ? (0x01<<16) : 0) |
+ ((cso->colormask & PIPE_MASK_G) ? (0x01<< 8) : 0) |
+ ((cso->colormask & PIPE_MASK_B) ? (0x01<< 0) : 0));
+
+ cb->d_enable = cso->dither ? 1 : 0;
+
+ return (void *)cb;
+}
+
+static void
+nv10_blend_state_bind(struct pipe_context *pipe, void *blend)
+{
+ struct nv10_context *nv10 = nv10_context(pipe);
+
+ nv10->blend = (struct nv10_blend_state*)blend;
+
+ nv10->dirty |= NV10_NEW_BLEND;
+}
+
+static void
+nv10_blend_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ FREE(hwcso);
+}
+
+
+static INLINE unsigned
+wrap_mode(unsigned wrap) {
+ unsigned ret;
+
+ switch (wrap) {
+ case PIPE_TEX_WRAP_REPEAT:
+ ret = NV10TCL_TX_FORMAT_WRAP_S_REPEAT;
+ break;
+ case PIPE_TEX_WRAP_MIRROR_REPEAT:
+ ret = NV10TCL_TX_FORMAT_WRAP_S_MIRRORED_REPEAT;
+ break;
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+ ret = NV10TCL_TX_FORMAT_WRAP_S_CLAMP_TO_EDGE;
+ break;
+ case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+ ret = NV10TCL_TX_FORMAT_WRAP_S_CLAMP_TO_BORDER;
+ break;
+ case PIPE_TEX_WRAP_CLAMP:
+ ret = NV10TCL_TX_FORMAT_WRAP_S_CLAMP;
+ break;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP:
+ default:
+ NOUVEAU_ERR("unknown wrap mode: %d\n", wrap);
+ ret = NV10TCL_TX_FORMAT_WRAP_S_REPEAT;
+ break;
+ }
+
+ return ret >> NV10TCL_TX_FORMAT_WRAP_S_SHIFT;
+}
+
+static void *
+nv10_sampler_state_create(struct pipe_context *pipe,
+ const struct pipe_sampler_state *cso)
+{
+ struct nv10_sampler_state *ps;
+ uint32_t filter = 0;
+
+ ps = MALLOC(sizeof(struct nv10_sampler_state));
+
+ ps->wrap = ((wrap_mode(cso->wrap_s) << NV10TCL_TX_FORMAT_WRAP_S_SHIFT) |
+ (wrap_mode(cso->wrap_t) << NV10TCL_TX_FORMAT_WRAP_T_SHIFT));
+
+ ps->en = 0;
+ if (cso->max_anisotropy > 1.0) {
+ /* no idea, binary driver sets it, works without it.. meh.. */
+ ps->wrap |= (1 << 5);
+
+/* if (cso->max_anisotropy >= 16.0) {
+ ps->en |= NV10TCL_TX_ENABLE_ANISO_16X;
+ } else
+ if (cso->max_anisotropy >= 12.0) {
+ ps->en |= NV10TCL_TX_ENABLE_ANISO_12X;
+ } else
+ if (cso->max_anisotropy >= 10.0) {
+ ps->en |= NV10TCL_TX_ENABLE_ANISO_10X;
+ } else
+ if (cso->max_anisotropy >= 8.0) {
+ ps->en |= NV10TCL_TX_ENABLE_ANISO_8X;
+ } else
+ if (cso->max_anisotropy >= 6.0) {
+ ps->en |= NV10TCL_TX_ENABLE_ANISO_6X;
+ } else
+ if (cso->max_anisotropy >= 4.0) {
+ ps->en |= NV10TCL_TX_ENABLE_ANISO_4X;
+ } else {
+ ps->en |= NV10TCL_TX_ENABLE_ANISO_2X;
+ }*/
+ }
+
+ switch (cso->mag_img_filter) {
+ case PIPE_TEX_FILTER_LINEAR:
+ filter |= NV10TCL_TX_FILTER_MAGNIFY_LINEAR;
+ break;
+ case PIPE_TEX_FILTER_NEAREST:
+ default:
+ filter |= NV10TCL_TX_FILTER_MAGNIFY_NEAREST;
+ break;
+ }
+
+ switch (cso->min_img_filter) {
+ case PIPE_TEX_FILTER_LINEAR:
+ switch (cso->min_mip_filter) {
+ case PIPE_TEX_MIPFILTER_NEAREST:
+ filter |= NV10TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST;
+ break;
+ case PIPE_TEX_MIPFILTER_LINEAR:
+ filter |= NV10TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR;
+ break;
+ case PIPE_TEX_MIPFILTER_NONE:
+ default:
+ filter |= NV10TCL_TX_FILTER_MINIFY_LINEAR;
+ break;
+ }
+ break;
+ case PIPE_TEX_FILTER_NEAREST:
+ default:
+ switch (cso->min_mip_filter) {
+ case PIPE_TEX_MIPFILTER_NEAREST:
+ filter |= NV10TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST;
+ break;
+ case PIPE_TEX_MIPFILTER_LINEAR:
+ filter |= NV10TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR;
+ break;
+ case PIPE_TEX_MIPFILTER_NONE:
+ default:
+ filter |= NV10TCL_TX_FILTER_MINIFY_NEAREST;
+ break;
+ }
+ break;
+ }
+
+ ps->filt = filter;
+
+/* if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
+ switch (cso->compare_func) {
+ case PIPE_FUNC_NEVER:
+ ps->wrap |= NV10TCL_TX_WRAP_RCOMP_NEVER;
+ break;
+ case PIPE_FUNC_GREATER:
+ ps->wrap |= NV10TCL_TX_WRAP_RCOMP_GREATER;
+ break;
+ case PIPE_FUNC_EQUAL:
+ ps->wrap |= NV10TCL_TX_WRAP_RCOMP_EQUAL;
+ break;
+ case PIPE_FUNC_GEQUAL:
+ ps->wrap |= NV10TCL_TX_WRAP_RCOMP_GEQUAL;
+ break;
+ case PIPE_FUNC_LESS:
+ ps->wrap |= NV10TCL_TX_WRAP_RCOMP_LESS;
+ break;
+ case PIPE_FUNC_NOTEQUAL:
+ ps->wrap |= NV10TCL_TX_WRAP_RCOMP_NOTEQUAL;
+ break;
+ case PIPE_FUNC_LEQUAL:
+ ps->wrap |= NV10TCL_TX_WRAP_RCOMP_LEQUAL;
+ break;
+ case PIPE_FUNC_ALWAYS:
+ ps->wrap |= NV10TCL_TX_WRAP_RCOMP_ALWAYS;
+ break;
+ default:
+ break;
+ }
+ }*/
+
+ ps->bcol = ((float_to_ubyte(cso->border_color[3]) << 24) |
+ (float_to_ubyte(cso->border_color[0]) << 16) |
+ (float_to_ubyte(cso->border_color[1]) << 8) |
+ (float_to_ubyte(cso->border_color[2]) << 0));
+
+ return (void *)ps;
+}
+
+static void
+nv10_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler)
+{
+ struct nv10_context *nv10 = nv10_context(pipe);
+ unsigned unit;
+
+ for (unit = 0; unit < nr; unit++) {
+ nv10->tex_sampler[unit] = sampler[unit];
+ nv10->dirty_samplers |= (1 << unit);
+ }
+}
+
+static void
+nv10_sampler_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ FREE(hwcso);
+}
+
+static void
+nv10_set_sampler_texture(struct pipe_context *pipe, unsigned nr,
+ struct pipe_texture **miptree)
+{
+ struct nv10_context *nv10 = nv10_context(pipe);
+ unsigned unit;
+
+ for (unit = 0; unit < nr; unit++) {
+ nv10->tex_miptree[unit] = (struct nv10_miptree *)miptree[unit];
+ nv10->dirty_samplers |= (1 << unit);
+ }
+}
+
+static void *
+nv10_rasterizer_state_create(struct pipe_context *pipe,
+ const struct pipe_rasterizer_state *cso)
+{
+ struct nv10_rasterizer_state *rs;
+ int i;
+
+ /*XXX: ignored:
+ * light_twoside
+ * offset_cw/ccw -nohw
+ * scissor
+ * point_smooth -nohw
+ * multisample
+ * offset_units / offset_scale
+ */
+ rs = MALLOC(sizeof(struct nv10_rasterizer_state));
+
+ rs->templ = cso;
+
+ rs->shade_model = cso->flatshade ? 0x1d00 : 0x1d01;
+
+ rs->line_width = (unsigned char)(cso->line_width * 8.0) & 0xff;
+ rs->line_smooth_en = cso->line_smooth ? 1 : 0;
+
+ rs->point_size = *(uint32_t*)&cso->point_size;
+
+ rs->poly_smooth_en = cso->poly_smooth ? 1 : 0;
+
+ if (cso->front_winding == PIPE_WINDING_CCW) {
+ rs->front_face = NV10TCL_FRONT_FACE_CCW;
+ rs->poly_mode_front = nvgl_polygon_mode(cso->fill_ccw);
+ rs->poly_mode_back = nvgl_polygon_mode(cso->fill_cw);
+ } else {
+ rs->front_face = NV10TCL_FRONT_FACE_CW;
+ rs->poly_mode_front = nvgl_polygon_mode(cso->fill_cw);
+ rs->poly_mode_back = nvgl_polygon_mode(cso->fill_ccw);
+ }
+
+ switch (cso->cull_mode) {
+ case PIPE_WINDING_CCW:
+ rs->cull_face_en = 1;
+ if (cso->front_winding == PIPE_WINDING_CCW)
+ rs->cull_face = NV10TCL_CULL_FACE_FRONT;
+ else
+ rs->cull_face = NV10TCL_CULL_FACE_BACK;
+ break;
+ case PIPE_WINDING_CW:
+ rs->cull_face_en = 1;
+ if (cso->front_winding == PIPE_WINDING_CW)
+ rs->cull_face = NV10TCL_CULL_FACE_FRONT;
+ else
+ rs->cull_face = NV10TCL_CULL_FACE_BACK;
+ break;
+ case PIPE_WINDING_BOTH:
+ rs->cull_face_en = 1;
+ rs->cull_face = NV10TCL_CULL_FACE_FRONT_AND_BACK;
+ break;
+ case PIPE_WINDING_NONE:
+ default:
+ rs->cull_face_en = 0;
+ rs->cull_face = 0;
+ break;
+ }
+
+ if (cso->point_sprite) {
+ rs->point_sprite = (1 << 0);
+ for (i = 0; i < 8; i++) {
+ if (cso->sprite_coord_mode[i] != PIPE_SPRITE_COORD_NONE)
+ rs->point_sprite |= (1 << (8 + i));
+ }
+ } else {
+ rs->point_sprite = 0;
+ }
+
+ return (void *)rs;
+}
+
+static void
+nv10_rasterizer_state_bind(struct pipe_context *pipe, void *rast)
+{
+ struct nv10_context *nv10 = nv10_context(pipe);
+
+ nv10->rast = (struct nv10_rasterizer_state*)rast;
+
+ draw_set_rasterizer_state(nv10->draw, (nv10->rast ? nv10->rast->templ : NULL));
+
+ nv10->dirty |= NV10_NEW_RAST;
+}
+
+static void
+nv10_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ FREE(hwcso);
+}
+
+static void *
+nv10_depth_stencil_alpha_state_create(struct pipe_context *pipe,
+ const struct pipe_depth_stencil_alpha_state *cso)
+{
+ struct nv10_depth_stencil_alpha_state *hw;
+
+ hw = MALLOC(sizeof(struct nv10_depth_stencil_alpha_state));
+
+ hw->depth.func = nvgl_comparison_op(cso->depth.func);
+ hw->depth.write_enable = cso->depth.writemask ? 1 : 0;
+ hw->depth.test_enable = cso->depth.enabled ? 1 : 0;
+
+ hw->stencil.enable = cso->stencil[0].enabled ? 1 : 0;
+ hw->stencil.wmask = cso->stencil[0].write_mask;
+ hw->stencil.func = nvgl_comparison_op(cso->stencil[0].func);
+ hw->stencil.ref = cso->stencil[0].ref_value;
+ hw->stencil.vmask = cso->stencil[0].value_mask;
+ hw->stencil.fail = nvgl_stencil_op(cso->stencil[0].fail_op);
+ hw->stencil.zfail = nvgl_stencil_op(cso->stencil[0].zfail_op);
+ hw->stencil.zpass = nvgl_stencil_op(cso->stencil[0].zpass_op);
+
+ hw->alpha.enabled = cso->alpha.enabled ? 1 : 0;
+ hw->alpha.func = nvgl_comparison_op(cso->alpha.func);
+ hw->alpha.ref = float_to_ubyte(cso->alpha.ref);
+
+ return (void *)hw;
+}
+
+static void
+nv10_depth_stencil_alpha_state_bind(struct pipe_context *pipe, void *dsa)
+{
+ struct nv10_context *nv10 = nv10_context(pipe);
+
+ nv10->dsa = (struct nv10_depth_stencil_alpha_state*)dsa;
+
+ nv10->dirty |= NV10_NEW_DSA;
+}
+
+static void
+nv10_depth_stencil_alpha_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ FREE(hwcso);
+}
+
+static void *
+nv10_vp_state_create(struct pipe_context *pipe,
+ const struct pipe_shader_state *templ)
+{
+ struct nv10_context *nv10 = nv10_context(pipe);
+
+ return draw_create_vertex_shader(nv10->draw, templ);
+}
+
+static void
+nv10_vp_state_bind(struct pipe_context *pipe, void *shader)
+{
+ struct nv10_context *nv10 = nv10_context(pipe);
+
+ draw_bind_vertex_shader(nv10->draw, (struct draw_vertex_shader *) shader);
+
+ nv10->dirty |= NV10_NEW_VERTPROG;
+}
+
+static void
+nv10_vp_state_delete(struct pipe_context *pipe, void *shader)
+{
+ struct nv10_context *nv10 = nv10_context(pipe);
+
+ draw_delete_vertex_shader(nv10->draw, (struct draw_vertex_shader *) shader);
+}
+
+static void *
+nv10_fp_state_create(struct pipe_context *pipe,
+ const struct pipe_shader_state *cso)
+{
+ struct nv10_fragment_program *fp;
+
+ fp = CALLOC(1, sizeof(struct nv10_fragment_program));
+ fp->pipe.tokens = tgsi_dup_tokens(cso->tokens);
+
+ tgsi_scan_shader(cso->tokens, &fp->info);
+
+ return (void *)fp;
+}
+
+static void
+nv10_fp_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv10_context *nv10 = nv10_context(pipe);
+ struct nv10_fragment_program *fp = hwcso;
+
+ nv10->fragprog.current = fp;
+ nv10->dirty |= NV10_NEW_FRAGPROG;
+}
+
+static void
+nv10_fp_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv10_context *nv10 = nv10_context(pipe);
+ struct nv10_fragment_program *fp = hwcso;
+
+ nv10_fragprog_destroy(nv10, fp);
+ FREE((void*)fp->pipe.tokens);
+ FREE(fp);
+}
+
+static void
+nv10_set_blend_color(struct pipe_context *pipe,
+ const struct pipe_blend_color *bcol)
+{
+ struct nv10_context *nv10 = nv10_context(pipe);
+
+ nv10->blend_color = (struct pipe_blend_color*)bcol;
+
+ nv10->dirty |= NV10_NEW_BLENDCOL;
+}
+
+static void
+nv10_set_clip_state(struct pipe_context *pipe,
+ const struct pipe_clip_state *clip)
+{
+ struct nv10_context *nv10 = nv10_context(pipe);
+
+ draw_set_clip_state(nv10->draw, clip);
+}
+
+static void
+nv10_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
+ const struct pipe_constant_buffer *buf )
+{
+ struct nv10_context *nv10 = nv10_context(pipe);
+ struct pipe_winsys *ws = pipe->winsys;
+
+ assert(shader < PIPE_SHADER_TYPES);
+ assert(index == 0);
+
+ if (buf) {
+ void *mapped;
+ if (buf->size && (mapped = ws->buffer_map(ws, buf->buffer, PIPE_BUFFER_USAGE_CPU_READ)))
+ {
+ memcpy(nv10->constbuf[shader], mapped, buf->size);
+ nv10->constbuf_nr[shader] =
+ buf->size / (4 * sizeof(float));
+ ws->buffer_unmap(ws, buf->buffer);
+ }
+ }
+}
+
+static void
+nv10_set_framebuffer_state(struct pipe_context *pipe,
+ const struct pipe_framebuffer_state *fb)
+{
+ struct nv10_context *nv10 = nv10_context(pipe);
+
+ nv10->framebuffer = (struct pipe_framebuffer_state*)fb;
+
+ nv10->dirty |= NV10_NEW_FRAMEBUFFER;
+}
+
+static void
+nv10_set_polygon_stipple(struct pipe_context *pipe,
+ const struct pipe_poly_stipple *stipple)
+{
+ NOUVEAU_ERR("line stipple hahaha\n");
+}
+
+static void
+nv10_set_scissor_state(struct pipe_context *pipe,
+ const struct pipe_scissor_state *s)
+{
+ struct nv10_context *nv10 = nv10_context(pipe);
+
+ nv10->scissor = (struct pipe_scissor_state*)s;
+
+ nv10->dirty |= NV10_NEW_SCISSOR;
+}
+
+static void
+nv10_set_viewport_state(struct pipe_context *pipe,
+ const struct pipe_viewport_state *vpt)
+{
+ struct nv10_context *nv10 = nv10_context(pipe);
+
+ nv10->viewport = (struct pipe_viewport_state*)vpt;
+
+ draw_set_viewport_state(nv10->draw, nv10->viewport);
+
+ nv10->dirty |= NV10_NEW_VIEWPORT;
+}
+
+static void
+nv10_set_vertex_buffers(struct pipe_context *pipe, unsigned count,
+ const struct pipe_vertex_buffer *vb)
+{
+ struct nv10_context *nv10 = nv10_context(pipe);
+
+ memcpy(nv10->vtxbuf, vb, sizeof(*vb) * count);
+ nv10->dirty |= NV10_NEW_VTXARRAYS;
+
+ draw_set_vertex_buffers(nv10->draw, count, vb);
+}
+
+static void
+nv10_set_vertex_elements(struct pipe_context *pipe, unsigned count,
+ const struct pipe_vertex_element *ve)
+{
+ struct nv10_context *nv10 = nv10_context(pipe);
+
+ memcpy(nv10->vtxelt, ve, sizeof(*ve) * count);
+ nv10->dirty |= NV10_NEW_VTXARRAYS;
+
+ draw_set_vertex_elements(nv10->draw, count, ve);
+}
+
+void
+nv10_init_state_functions(struct nv10_context *nv10)
+{
+ nv10->pipe.create_blend_state = nv10_blend_state_create;
+ nv10->pipe.bind_blend_state = nv10_blend_state_bind;
+ nv10->pipe.delete_blend_state = nv10_blend_state_delete;
+
+ nv10->pipe.create_sampler_state = nv10_sampler_state_create;
+ nv10->pipe.bind_sampler_states = nv10_sampler_state_bind;
+ nv10->pipe.delete_sampler_state = nv10_sampler_state_delete;
+ nv10->pipe.set_sampler_textures = nv10_set_sampler_texture;
+
+ nv10->pipe.create_rasterizer_state = nv10_rasterizer_state_create;
+ nv10->pipe.bind_rasterizer_state = nv10_rasterizer_state_bind;
+ nv10->pipe.delete_rasterizer_state = nv10_rasterizer_state_delete;
+
+ nv10->pipe.create_depth_stencil_alpha_state =
+ nv10_depth_stencil_alpha_state_create;
+ nv10->pipe.bind_depth_stencil_alpha_state =
+ nv10_depth_stencil_alpha_state_bind;
+ nv10->pipe.delete_depth_stencil_alpha_state =
+ nv10_depth_stencil_alpha_state_delete;
+
+ nv10->pipe.create_vs_state = nv10_vp_state_create;
+ nv10->pipe.bind_vs_state = nv10_vp_state_bind;
+ nv10->pipe.delete_vs_state = nv10_vp_state_delete;
+
+ nv10->pipe.create_fs_state = nv10_fp_state_create;
+ nv10->pipe.bind_fs_state = nv10_fp_state_bind;
+ nv10->pipe.delete_fs_state = nv10_fp_state_delete;
+
+ nv10->pipe.set_blend_color = nv10_set_blend_color;
+ nv10->pipe.set_clip_state = nv10_set_clip_state;
+ nv10->pipe.set_constant_buffer = nv10_set_constant_buffer;
+ nv10->pipe.set_framebuffer_state = nv10_set_framebuffer_state;
+ nv10->pipe.set_polygon_stipple = nv10_set_polygon_stipple;
+ nv10->pipe.set_scissor_state = nv10_set_scissor_state;
+ nv10->pipe.set_viewport_state = nv10_set_viewport_state;
+
+ nv10->pipe.set_vertex_buffers = nv10_set_vertex_buffers;
+ nv10->pipe.set_vertex_elements = nv10_set_vertex_elements;
+}
+
diff --git a/src/gallium/drivers/nv10/nv10_state.h b/src/gallium/drivers/nv10/nv10_state.h
new file mode 100644
index 00000000000..3a3fd0d4f4f
--- /dev/null
+++ b/src/gallium/drivers/nv10/nv10_state.h
@@ -0,0 +1,139 @@
+#ifndef __NV10_STATE_H__
+#define __NV10_STATE_H__
+
+#include "pipe/p_state.h"
+#include "tgsi/tgsi_scan.h"
+
+struct nv10_blend_state {
+ uint32_t b_enable;
+ uint32_t b_srcfunc;
+ uint32_t b_dstfunc;
+
+ uint32_t c_mask;
+
+ uint32_t d_enable;
+};
+
+struct nv10_sampler_state {
+ uint32_t wrap;
+ uint32_t en;
+ uint32_t filt;
+ uint32_t bcol;
+};
+
+struct nv10_rasterizer_state {
+ uint32_t shade_model;
+
+ uint32_t line_width;
+ uint32_t line_smooth_en;
+
+ uint32_t point_size;
+
+ uint32_t poly_smooth_en;
+
+ uint32_t poly_mode_front;
+ uint32_t poly_mode_back;
+
+ uint32_t front_face;
+ uint32_t cull_face;
+ uint32_t cull_face_en;
+
+ uint32_t point_sprite;
+
+ const struct pipe_rasterizer_state *templ;
+};
+
+struct nv10_vertex_program_exec {
+ uint32_t data[4];
+ boolean has_branch_offset;
+ int const_index;
+};
+
+struct nv10_vertex_program_data {
+ int index; /* immediates == -1 */
+ float value[4];
+};
+
+struct nv10_vertex_program {
+ const struct pipe_shader_state *pipe;
+
+ boolean translated;
+ struct nv10_vertex_program_exec *insns;
+ unsigned nr_insns;
+ struct nv10_vertex_program_data *consts;
+ unsigned nr_consts;
+
+ struct nouveau_resource *exec;
+ unsigned exec_start;
+ struct nouveau_resource *data;
+ unsigned data_start;
+ unsigned data_start_min;
+
+ uint32_t ir;
+ uint32_t or;
+};
+
+struct nv10_fragment_program_data {
+ unsigned offset;
+ unsigned index;
+};
+
+struct nv10_fragment_program {
+ struct pipe_shader_state pipe;
+ struct tgsi_shader_info info;
+
+ boolean translated;
+ boolean on_hw;
+ unsigned samplers;
+
+ uint32_t *insn;
+ int insn_len;
+
+ struct nv10_fragment_program_data *consts;
+ unsigned nr_consts;
+
+ struct pipe_buffer *buffer;
+
+ uint32_t fp_control;
+ uint32_t fp_reg_control;
+};
+
+
+struct nv10_depth_stencil_alpha_state {
+ struct {
+ uint32_t func;
+ uint32_t write_enable;
+ uint32_t test_enable;
+ } depth;
+
+ struct {
+ uint32_t enable;
+ uint32_t wmask;
+ uint32_t func;
+ uint32_t ref;
+ uint32_t vmask;
+ uint32_t fail;
+ uint32_t zfail;
+ uint32_t zpass;
+ } stencil;
+
+ struct {
+ uint32_t enabled;
+ uint32_t func;
+ uint32_t ref;
+ } alpha;
+};
+
+struct nv10_miptree {
+ struct pipe_texture base;
+
+ struct pipe_buffer *buffer;
+ uint total_size;
+
+ struct {
+ uint pitch;
+ uint *image_offset;
+ } level[PIPE_MAX_TEXTURE_LEVELS];
+};
+
+#endif
diff --git a/src/gallium/drivers/nv10/nv10_state_emit.c b/src/gallium/drivers/nv10/nv10_state_emit.c
new file mode 100644
index 00000000000..46c7e1d7536
--- /dev/null
+++ b/src/gallium/drivers/nv10/nv10_state_emit.c
@@ -0,0 +1,303 @@
+#include "nv10_context.h"
+#include "nv10_state.h"
+
+static void nv10_state_emit_blend(struct nv10_context* nv10)
+{
+ struct nv10_blend_state *b = nv10->blend;
+
+ BEGIN_RING(celsius, NV10TCL_DITHER_ENABLE, 1);
+ OUT_RING (b->d_enable);
+
+ BEGIN_RING(celsius, NV10TCL_BLEND_FUNC_ENABLE, 3);
+ OUT_RING (b->b_enable);
+ OUT_RING (b->b_srcfunc);
+ OUT_RING (b->b_dstfunc);
+
+ BEGIN_RING(celsius, NV10TCL_COLOR_MASK, 1);
+ OUT_RING (b->c_mask);
+}
+
+static void nv10_state_emit_blend_color(struct nv10_context* nv10)
+{
+ struct pipe_blend_color *c = nv10->blend_color;
+
+ BEGIN_RING(celsius, NV10TCL_BLEND_COLOR, 1);
+ OUT_RING ((float_to_ubyte(c->color[3]) << 24)|
+ (float_to_ubyte(c->color[0]) << 16)|
+ (float_to_ubyte(c->color[1]) << 8) |
+ (float_to_ubyte(c->color[2]) << 0));
+}
+
+static void nv10_state_emit_rast(struct nv10_context* nv10)
+{
+ struct nv10_rasterizer_state *r = nv10->rast;
+
+ BEGIN_RING(celsius, NV10TCL_SHADE_MODEL, 2);
+ OUT_RING (r->shade_model);
+ OUT_RING (r->line_width);
+
+
+ BEGIN_RING(celsius, NV10TCL_POINT_SIZE, 1);
+ OUT_RING (r->point_size);
+
+ BEGIN_RING(celsius, NV10TCL_POLYGON_MODE_FRONT, 2);
+ OUT_RING (r->poly_mode_front);
+ OUT_RING (r->poly_mode_back);
+
+
+ BEGIN_RING(celsius, NV10TCL_CULL_FACE, 2);
+ OUT_RING (r->cull_face);
+ OUT_RING (r->front_face);
+
+ BEGIN_RING(celsius, NV10TCL_LINE_SMOOTH_ENABLE, 2);
+ OUT_RING (r->line_smooth_en);
+ OUT_RING (r->poly_smooth_en);
+
+ BEGIN_RING(celsius, NV10TCL_CULL_FACE_ENABLE, 1);
+ OUT_RING (r->cull_face_en);
+}
+
+static void nv10_state_emit_dsa(struct nv10_context* nv10)
+{
+ struct nv10_depth_stencil_alpha_state *d = nv10->dsa;
+
+ BEGIN_RING(celsius, NV10TCL_DEPTH_FUNC, 1);
+ OUT_RING (d->depth.func);
+
+ BEGIN_RING(celsius, NV10TCL_DEPTH_WRITE_ENABLE, 1);
+ OUT_RING (d->depth.write_enable);
+
+ BEGIN_RING(celsius, NV10TCL_DEPTH_TEST_ENABLE, 1);
+ OUT_RING (d->depth.test_enable);
+
+#if 0
+ BEGIN_RING(celsius, NV10TCL_STENCIL_ENABLE, 1);
+ OUT_RING (d->stencil.enable);
+ BEGIN_RING(celsius, NV10TCL_STENCIL_MASK, 7);
+ OUT_RINGp ((uint32_t *)&(d->stencil.wmask), 7);
+#endif
+
+ BEGIN_RING(celsius, NV10TCL_ALPHA_FUNC_ENABLE, 1);
+ OUT_RING (d->alpha.enabled);
+
+ BEGIN_RING(celsius, NV10TCL_ALPHA_FUNC_FUNC, 1);
+ OUT_RING (d->alpha.func);
+
+ BEGIN_RING(celsius, NV10TCL_ALPHA_FUNC_REF, 1);
+ OUT_RING (d->alpha.ref);
+}
+
+static void nv10_state_emit_viewport(struct nv10_context* nv10)
+{
+}
+
+static void nv10_state_emit_scissor(struct nv10_context* nv10)
+{
+ // XXX this is so not working
+/* struct pipe_scissor_state *s = nv10->scissor;
+ BEGIN_RING(celsius, NV10TCL_SCISSOR_HORIZ, 2);
+ OUT_RING (((s->maxx - s->minx) << 16) | s->minx);
+ OUT_RING (((s->maxy - s->miny) << 16) | s->miny);*/
+}
+
+static void nv10_state_emit_framebuffer(struct nv10_context* nv10)
+{
+ struct pipe_framebuffer_state* fb = nv10->framebuffer;
+ struct pipe_surface *rt, *zeta = NULL;
+ uint32_t rt_format, w, h;
+ int colour_format = 0, zeta_format = 0;
+
+ w = fb->cbufs[0]->width;
+ h = fb->cbufs[0]->height;
+ colour_format = fb->cbufs[0]->format;
+ rt = fb->cbufs[0];
+
+ if (fb->zsbuf) {
+ if (colour_format) {
+ assert(w == fb->zsbuf->width);
+ assert(h == fb->zsbuf->height);
+ } else {
+ w = fb->zsbuf->width;
+ h = fb->zsbuf->height;
+ }
+
+ zeta_format = fb->zsbuf->format;
+ zeta = fb->zsbuf;
+ }
+
+ rt_format = NV10TCL_RT_FORMAT_TYPE_LINEAR;
+
+ switch (colour_format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case 0:
+ rt_format |= NV10TCL_RT_FORMAT_COLOR_A8R8G8B8;
+ break;
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ rt_format |= NV10TCL_RT_FORMAT_COLOR_R5G6B5;
+ break;
+ default:
+ assert(0);
+ }
+
+ if (zeta) {
+ BEGIN_RING(celsius, NV10TCL_RT_PITCH, 1);
+ OUT_RING (rt->stride | (zeta->stride << 16));
+ } else {
+ BEGIN_RING(celsius, NV10TCL_RT_PITCH, 1);
+ OUT_RING (rt->stride | (rt->stride << 16));
+ }
+
+ nv10->rt[0] = rt->buffer;
+
+ if (zeta_format)
+ {
+ nv10->zeta = zeta->buffer;
+ }
+
+ BEGIN_RING(celsius, NV10TCL_RT_HORIZ, 3);
+ OUT_RING ((w << 16) | 0);
+ OUT_RING ((h << 16) | 0);
+ OUT_RING (rt_format);
+ BEGIN_RING(celsius, NV10TCL_VIEWPORT_CLIP_HORIZ(0), 2);
+ OUT_RING (((w - 1) << 16) | 0 | 0x08000800);
+ OUT_RING (((h - 1) << 16) | 0 | 0x08000800);
+}
+
+static void nv10_vertex_layout(struct nv10_context *nv10)
+{
+ struct nv10_fragment_program *fp = nv10->fragprog.current;
+ uint32_t src = 0;
+ int i;
+ struct vertex_info vinfo;
+
+ memset(&vinfo, 0, sizeof(vinfo));
+
+ for (i = 0; i < fp->info.num_inputs; i++) {
+ switch (fp->info.input_semantic_name[i]) {
+ case TGSI_SEMANTIC_POSITION:
+ draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_LINEAR, src++);
+ break;
+ case TGSI_SEMANTIC_COLOR:
+ draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_LINEAR, src++);
+ break;
+ default:
+ case TGSI_SEMANTIC_GENERIC:
+ draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE, src++);
+ break;
+ case TGSI_SEMANTIC_FOG:
+ draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE, src++);
+ break;
+ }
+ }
+ draw_compute_vertex_size(&vinfo);
+}
+
+void
+nv10_emit_hw_state(struct nv10_context *nv10)
+{
+ int i;
+
+ if (nv10->dirty & NV10_NEW_VERTPROG) {
+ //nv10_vertprog_bind(nv10, nv10->vertprog.current);
+ nv10->dirty &= ~NV10_NEW_VERTPROG;
+ }
+
+ if (nv10->dirty & NV10_NEW_FRAGPROG) {
+ nv10_fragprog_bind(nv10, nv10->fragprog.current);
+ /*XXX: clear NV10_NEW_FRAGPROG if no new program uploaded */
+ nv10->dirty_samplers |= (1<<10);
+ nv10->dirty_samplers = 0;
+ }
+
+ if (nv10->dirty_samplers || (nv10->dirty & NV10_NEW_FRAGPROG)) {
+ nv10_fragtex_bind(nv10);
+ nv10->dirty &= ~NV10_NEW_FRAGPROG;
+ }
+
+ if (nv10->dirty & NV10_NEW_VTXARRAYS) {
+ nv10->dirty &= ~NV10_NEW_VTXARRAYS;
+ nv10_vertex_layout(nv10);
+ nv10_vtxbuf_bind(nv10);
+ }
+
+ if (nv10->dirty & NV10_NEW_BLEND) {
+ nv10->dirty &= ~NV10_NEW_BLEND;
+ nv10_state_emit_blend(nv10);
+ }
+
+ if (nv10->dirty & NV10_NEW_BLENDCOL) {
+ nv10->dirty &= ~NV10_NEW_BLENDCOL;
+ nv10_state_emit_blend_color(nv10);
+ }
+
+ if (nv10->dirty & NV10_NEW_RAST) {
+ nv10->dirty &= ~NV10_NEW_RAST;
+ nv10_state_emit_rast(nv10);
+ }
+
+ if (nv10->dirty & NV10_NEW_DSA) {
+ nv10->dirty &= ~NV10_NEW_DSA;
+ nv10_state_emit_dsa(nv10);
+ }
+
+ if (nv10->dirty & NV10_NEW_VIEWPORT) {
+ nv10->dirty &= ~NV10_NEW_VIEWPORT;
+ nv10_state_emit_viewport(nv10);
+ }
+
+ if (nv10->dirty & NV10_NEW_SCISSOR) {
+ nv10->dirty &= ~NV10_NEW_SCISSOR;
+ nv10_state_emit_scissor(nv10);
+ }
+
+ if (nv10->dirty & NV10_NEW_FRAMEBUFFER) {
+ nv10->dirty &= ~NV10_NEW_FRAMEBUFFER;
+ nv10_state_emit_framebuffer(nv10);
+ }
+
+ /* Emit relocs for every referenced buffer.
+ * This is to ensure the bufmgr has an accurate idea of how
+ * the buffer is used. This isn't very efficient, but we don't
+ * seem to take a significant performance hit. Will be improved
+ * at some point. Vertex arrays are emitted by nv10_vbo.c
+ */
+
+ /* Render target */
+// XXX figre out who's who for NV10TCL_DMA_* and fill accordingly
+// BEGIN_RING(celsius, NV10TCL_DMA_COLOR0, 1);
+// OUT_RELOCo(nv10->rt[0], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ BEGIN_RING(celsius, NV10TCL_COLOR_OFFSET, 1);
+ OUT_RELOCl(nv10->rt[0], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+
+ if (nv10->zeta) {
+// XXX
+// BEGIN_RING(celsius, NV10TCL_DMA_ZETA, 1);
+// OUT_RELOCo(nv10->zeta, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ BEGIN_RING(celsius, NV10TCL_ZETA_OFFSET, 1);
+ OUT_RELOCl(nv10->zeta, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ /* XXX for when we allocate LMA on nv17 */
+/* BEGIN_RING(celsius, NV10TCL_LMA_DEPTH_BUFFER_OFFSET, 1);
+ OUT_RELOCl(nv10->zeta + lma_offset);*/
+ }
+
+ /* Vertex buffer */
+ BEGIN_RING(celsius, NV10TCL_DMA_VTXBUF0, 1);
+ OUT_RELOCo(nv10->rt[0], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ BEGIN_RING(celsius, NV10TCL_COLOR_OFFSET, 1);
+ OUT_RELOCl(nv10->rt[0], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+
+ /* Texture images */
+ for (i = 0; i < 2; i++) {
+ if (!(nv10->fp_samplers & (1 << i)))
+ continue;
+ BEGIN_RING(celsius, NV10TCL_TX_OFFSET(i), 1);
+ OUT_RELOCl(nv10->tex[i].buffer, 0, NOUVEAU_BO_VRAM |
+ NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+ BEGIN_RING(celsius, NV10TCL_TX_FORMAT(i), 1);
+ OUT_RELOCd(nv10->tex[i].buffer, nv10->tex[i].format,
+ NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD |
+ NOUVEAU_BO_OR, NV10TCL_TX_FORMAT_DMA0,
+ NV10TCL_TX_FORMAT_DMA1);
+ }
+}
+
diff --git a/src/gallium/drivers/nv10/nv10_surface.c b/src/gallium/drivers/nv10/nv10_surface.c
new file mode 100644
index 00000000000..be44c7bed50
--- /dev/null
+++ b/src/gallium/drivers/nv10/nv10_surface.c
@@ -0,0 +1,64 @@
+
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "nv10_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_winsys.h"
+#include "pipe/p_inlines.h"
+#include "util/u_tile.h"
+
+static void
+nv10_surface_copy(struct pipe_context *pipe, boolean do_flip,
+ struct pipe_surface *dest, unsigned destx, unsigned desty,
+ struct pipe_surface *src, unsigned srcx, unsigned srcy,
+ unsigned width, unsigned height)
+{
+ struct nv10_context *nv10 = nv10_context(pipe);
+ struct nouveau_winsys *nvws = nv10->nvws;
+
+ nvws->surface_copy(nvws, dest, destx, desty, src, srcx, srcy,
+ width, height);
+}
+
+static void
+nv10_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest,
+ unsigned destx, unsigned desty, unsigned width,
+ unsigned height, unsigned value)
+{
+ struct nv10_context *nv10 = nv10_context(pipe);
+ struct nouveau_winsys *nvws = nv10->nvws;
+
+ nvws->surface_fill(nvws, dest, destx, desty, width, height, value);
+}
+
+void
+nv10_init_surface_functions(struct nv10_context *nv10)
+{
+ nv10->pipe.surface_copy = nv10_surface_copy;
+ nv10->pipe.surface_fill = nv10_surface_fill;
+}
diff --git a/src/gallium/drivers/nv10/nv10_vbo.c b/src/gallium/drivers/nv10/nv10_vbo.c
new file mode 100644
index 00000000000..d0e788ac036
--- /dev/null
+++ b/src/gallium/drivers/nv10/nv10_vbo.c
@@ -0,0 +1,77 @@
+#include "draw/draw_context.h"
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+
+#include "nv10_context.h"
+#include "nv10_state.h"
+
+#include "nouveau/nouveau_channel.h"
+#include "nouveau/nouveau_pushbuf.h"
+
+boolean nv10_draw_elements( struct pipe_context *pipe,
+ struct pipe_buffer *indexBuffer,
+ unsigned indexSize,
+ unsigned prim, unsigned start, unsigned count)
+{
+ struct nv10_context *nv10 = nv10_context( pipe );
+ struct draw_context *draw = nv10->draw;
+ unsigned i;
+
+ nv10_emit_hw_state(nv10);
+
+ /*
+ * Map vertex buffers
+ */
+ for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
+ if (nv10->vtxbuf[i].buffer) {
+ void *buf
+ = pipe->winsys->buffer_map(pipe->winsys,
+ nv10->vtxbuf[i].buffer,
+ PIPE_BUFFER_USAGE_CPU_READ);
+ draw_set_mapped_vertex_buffer(draw, i, buf);
+ }
+ }
+ /* Map index buffer, if present */
+ if (indexBuffer) {
+ void *mapped_indexes
+ = pipe->winsys->buffer_map(pipe->winsys, indexBuffer,
+ PIPE_BUFFER_USAGE_CPU_READ);
+ draw_set_mapped_element_buffer(draw, indexSize, mapped_indexes);
+ }
+ else {
+ /* no index/element buffer */
+ draw_set_mapped_element_buffer(draw, 0, NULL);
+ }
+
+ draw_set_mapped_constant_buffer(draw,
+ nv10->constbuf[PIPE_SHADER_VERTEX],
+ nv10->constbuf_nr[PIPE_SHADER_VERTEX]);
+
+ /* draw! */
+ draw_arrays(nv10->draw, prim, start, count);
+
+ /*
+ * unmap vertex/index buffers
+ */
+ for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
+ if (nv10->vtxbuf[i].buffer) {
+ pipe->winsys->buffer_unmap(pipe->winsys, nv10->vtxbuf[i].buffer);
+ draw_set_mapped_vertex_buffer(draw, i, NULL);
+ }
+ }
+ if (indexBuffer) {
+ pipe->winsys->buffer_unmap(pipe->winsys, indexBuffer);
+ draw_set_mapped_element_buffer(draw, 0, NULL);
+ }
+
+ return TRUE;
+}
+
+boolean nv10_draw_arrays( struct pipe_context *pipe,
+ unsigned prim, unsigned start, unsigned count)
+{
+ return nv10_draw_elements(pipe, NULL, 0, prim, start, count);
+}
+
+
+
diff --git a/src/gallium/drivers/nv20/Makefile b/src/gallium/drivers/nv20/Makefile
new file mode 100644
index 00000000000..d777fd3d8b4
--- /dev/null
+++ b/src/gallium/drivers/nv20/Makefile
@@ -0,0 +1,29 @@
+TOP = ../../../..
+include $(TOP)/configs/current
+
+LIBNAME = nv20
+
+DRIVER_SOURCES = \
+ nv20_clear.c \
+ nv20_context.c \
+ nv20_fragprog.c \
+ nv20_fragtex.c \
+ nv20_miptree.c \
+ nv20_prim_vbuf.c \
+ nv20_screen.c \
+ nv20_state.c \
+ nv20_state_emit.c \
+ nv20_surface.c \
+ nv20_vbo.c
+# nv20_vertprog.c
+
+C_SOURCES = \
+ $(COMMON_SOURCES) \
+ $(DRIVER_SOURCES)
+
+ASM_SOURCES =
+
+include ../../Makefile.template
+
+symlinks:
+
diff --git a/src/gallium/drivers/nv20/nv20_clear.c b/src/gallium/drivers/nv20/nv20_clear.c
new file mode 100644
index 00000000000..81b6f3e78ac
--- /dev/null
+++ b/src/gallium/drivers/nv20/nv20_clear.c
@@ -0,0 +1,12 @@
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+
+#include "nv20_context.h"
+
+void
+nv20_clear(struct pipe_context *pipe, struct pipe_surface *ps,
+ unsigned clearValue)
+{
+ pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, clearValue);
+}
diff --git a/src/gallium/drivers/nv20/nv20_context.c b/src/gallium/drivers/nv20/nv20_context.c
new file mode 100644
index 00000000000..9a17f4af571
--- /dev/null
+++ b/src/gallium/drivers/nv20/nv20_context.c
@@ -0,0 +1,419 @@
+#include "draw/draw_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_winsys.h"
+
+#include "nv20_context.h"
+#include "nv20_screen.h"
+
+static void
+nv20_flush(struct pipe_context *pipe, unsigned flags,
+ struct pipe_fence_handle **fence)
+{
+ struct nv20_context *nv20 = nv20_context(pipe);
+
+ draw_flush(nv20->draw);
+
+ FIRE_RING(fence);
+}
+
+static void
+nv20_destroy(struct pipe_context *pipe)
+{
+ struct nv20_context *nv20 = nv20_context(pipe);
+
+ if (nv20->draw)
+ draw_destroy(nv20->draw);
+
+ FREE(nv20);
+}
+
+static void nv20_init_hwctx(struct nv20_context *nv20)
+{
+ struct nv20_screen *screen = nv20->screen;
+ struct nouveau_winsys *nvws = screen->nvws;
+ int i;
+ float projectionmatrix[16];
+ const boolean is_nv25tcl = (nv20->screen->kelvin->grclass == NV25TCL);
+
+ BEGIN_RING(kelvin, NV20TCL_DMA_NOTIFY, 1);
+ OUT_RING (screen->sync->handle);
+ BEGIN_RING(kelvin, NV20TCL_DMA_TEXTURE0, 2);
+ OUT_RING (nvws->channel->vram->handle);
+ OUT_RING (nvws->channel->gart->handle); /* TEXTURE1 */
+ BEGIN_RING(kelvin, NV20TCL_DMA_COLOR, 2);
+ OUT_RING (nvws->channel->vram->handle);
+ OUT_RING (nvws->channel->vram->handle); /* ZETA */
+
+ BEGIN_RING(kelvin, NV20TCL_DMA_QUERY, 1);
+ OUT_RING (0); /* renouveau: beef0351, unique */
+
+ BEGIN_RING(kelvin, NV20TCL_RT_HORIZ, 2);
+ OUT_RING (0);
+ OUT_RING (0);
+
+ BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_HORIZ(0), 1);
+ OUT_RING ((0xfff << 16) | 0x0);
+ BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_VERT(0), 1);
+ OUT_RING ((0xfff << 16) | 0x0);
+
+ for (i = 1; i < NV20TCL_VIEWPORT_CLIP_HORIZ__SIZE; i++) {
+ BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_HORIZ(i), 1);
+ OUT_RING (0);
+ BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_VERT(i), 1);
+ OUT_RING (0);
+ }
+
+ BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_MODE, 1);
+ OUT_RING (0);
+
+ BEGIN_RING(kelvin, 0x17e0, 3);
+ OUT_RINGf (0.0);
+ OUT_RINGf (0.0);
+ OUT_RINGf (1.0);
+
+ if (is_nv25tcl) {
+ BEGIN_RING(kelvin, NV20TCL_TX_RCOMP, 1);
+ OUT_RING (NV20TCL_TX_RCOMP_LEQUAL | 0xdb0);
+ } else {
+ BEGIN_RING(kelvin, 0x1e68, 1);
+ OUT_RING (0x4b800000); /* 16777216.000000 */
+ BEGIN_RING(kelvin, NV20TCL_TX_RCOMP, 1);
+ OUT_RING (NV20TCL_TX_RCOMP_LEQUAL);
+ }
+
+ BEGIN_RING(kelvin, 0x290, 1);
+ OUT_RING ((0x10 << 16) | 1);
+ BEGIN_RING(kelvin, 0x9fc, 1);
+ OUT_RING (0);
+ BEGIN_RING(kelvin, 0x1d80, 1);
+ OUT_RING (1);
+ BEGIN_RING(kelvin, 0x9f8, 1);
+ OUT_RING (4);
+ BEGIN_RING(kelvin, 0x17ec, 3);
+ OUT_RINGf (0.0);
+ OUT_RINGf (1.0);
+ OUT_RINGf (0.0);
+
+ if (is_nv25tcl) {
+ BEGIN_RING(kelvin, 0x1d88, 1);
+ OUT_RING (3);
+
+ BEGIN_RING(kelvin, NV25TCL_DMA_IN_MEMORY9, 1);
+ OUT_RING (nvws->channel->vram->handle);
+ BEGIN_RING(kelvin, NV25TCL_DMA_IN_MEMORY8, 1);
+ OUT_RING (nvws->channel->vram->handle);
+ }
+ BEGIN_RING(kelvin, NV20TCL_DMA_FENCE, 1);
+ OUT_RING (0); /* renouveau: beef1e10 */
+
+ BEGIN_RING(kelvin, 0x1e98, 1);
+ OUT_RING (0);
+#if 0
+ if (is_nv25tcl) {
+ BEGIN_RING(NvSub3D, NV25TCL_DMA_IN_MEMORY4, 2);
+ OUT_RING (NvDmaTT); /* renouveau: beef0202 */
+ OUT_RING (NvDmaFB); /* renouveau: beef0201 */
+
+ BEGIN_RING(NvSub3D, NV20TCL_DMA_TEXTURE1, 1);
+ OUT_RING (NvDmaTT); /* renouveau: beef0202 */
+ }
+#endif
+ BEGIN_RING(kelvin, NV20TCL_NOTIFY, 1);
+ OUT_RING (0);
+
+ BEGIN_RING(kelvin, 0x120, 3);
+ OUT_RING (0);
+ OUT_RING (1);
+ OUT_RING (2);
+
+/* error: ILLEGAL_MTHD, PROTECTION_FAULT
+ BEGIN_RING(kelvin, NV20TCL_VIEWPORT_TRANSLATE_X, 4);
+ OUT_RINGf (0.0);
+ OUT_RINGf (512.0);
+ OUT_RINGf (0.0);
+ OUT_RINGf (0.0);
+*/
+
+ if (is_nv25tcl) {
+ BEGIN_RING(kelvin, 0x022c, 2);
+ OUT_RING (0x280);
+ OUT_RING (0x07d28000);
+ }
+
+/* * illegal method, protection fault
+ BEGIN_RING(NvSub3D, 0x1c2c, 1);
+ OUT_RING (0); */
+
+ if (is_nv25tcl) {
+ BEGIN_RING(kelvin, 0x1da4, 1);
+ OUT_RING (0);
+ }
+
+/* * crashes with illegal method, protection fault
+ BEGIN_RING(NvSub3D, 0x1c18, 1);
+ OUT_RING (0x200); */
+
+ BEGIN_RING(kelvin, NV20TCL_RT_HORIZ, 2);
+ OUT_RING ((0 << 16) | 0);
+ OUT_RING ((0 << 16) | 0);
+
+ /* *** Set state *** */
+
+ BEGIN_RING(kelvin, NV20TCL_ALPHA_FUNC_ENABLE, 1);
+ OUT_RING (0);
+ BEGIN_RING(kelvin, NV20TCL_ALPHA_FUNC_FUNC, 2);
+ OUT_RING (NV20TCL_ALPHA_FUNC_FUNC_ALWAYS);
+ OUT_RING (0); /* NV20TCL_ALPHA_FUNC_REF */
+
+ for (i = 0; i < NV20TCL_TX_ENABLE__SIZE; ++i) {
+ BEGIN_RING(kelvin, NV20TCL_TX_ENABLE(i), 1);
+ OUT_RING (0);
+ }
+ BEGIN_RING(kelvin, NV20TCL_TX_SHADER_OP, 1);
+ OUT_RING (0);
+ BEGIN_RING(kelvin, NV20TCL_TX_SHADER_CULL_MODE, 1);
+ OUT_RING (0);
+ BEGIN_RING(kelvin, NV20TCL_RC_IN_ALPHA(0), 4);
+ OUT_RING (0x30d410d0);
+ OUT_RING (0);
+ OUT_RING (0);
+ OUT_RING (0);
+ BEGIN_RING(kelvin, NV20TCL_RC_OUT_RGB(0), 4);
+ OUT_RING (0x00000c00);
+ OUT_RING (0);
+ OUT_RING (0);
+ OUT_RING (0);
+ BEGIN_RING(kelvin, NV20TCL_RC_ENABLE, 1);
+ OUT_RING (0x00011101);
+ BEGIN_RING(kelvin, NV20TCL_RC_FINAL0, 2);
+ OUT_RING (0x130e0300);
+ OUT_RING (0x0c091c80);
+ BEGIN_RING(kelvin, NV20TCL_RC_OUT_ALPHA(0), 4);
+ OUT_RING (0x00000c00);
+ OUT_RING (0);
+ OUT_RING (0);
+ OUT_RING (0);
+ BEGIN_RING(kelvin, NV20TCL_RC_IN_RGB(0), 4);
+ OUT_RING (0x20c400c0);
+ OUT_RING (0);
+ OUT_RING (0);
+ OUT_RING (0);
+ BEGIN_RING(kelvin, NV20TCL_RC_COLOR0, 2);
+ OUT_RING (0);
+ OUT_RING (0);
+ BEGIN_RING(kelvin, NV20TCL_RC_CONSTANT_COLOR0(0), 4);
+ OUT_RING (0x035125a0);
+ OUT_RING (0);
+ OUT_RING (0x40002000);
+ OUT_RING (0);
+ BEGIN_RING(kelvin, NV20TCL_MULTISAMPLE_CONTROL, 1);
+ OUT_RING (0xffff0000);
+
+ BEGIN_RING(kelvin, NV20TCL_BLEND_FUNC_ENABLE, 1);
+ OUT_RING (0);
+ BEGIN_RING(kelvin, NV20TCL_DITHER_ENABLE, 1);
+ OUT_RING (0);
+ BEGIN_RING(kelvin, NV20TCL_STENCIL_ENABLE, 1);
+ OUT_RING (0);
+ BEGIN_RING(kelvin, NV20TCL_BLEND_FUNC_SRC, 4);
+ OUT_RING (NV20TCL_BLEND_FUNC_SRC_ONE);
+ OUT_RING (NV20TCL_BLEND_FUNC_DST_ZERO);
+ OUT_RING (0); /* NV20TCL_BLEND_COLOR */
+ OUT_RING (NV20TCL_BLEND_EQUATION_FUNC_ADD);
+ BEGIN_RING(kelvin, NV20TCL_STENCIL_MASK, 7);
+ OUT_RING (0xff);
+ OUT_RING (NV20TCL_STENCIL_FUNC_FUNC_ALWAYS);
+ OUT_RING (0); /* NV20TCL_STENCIL_FUNC_REF */
+ OUT_RING (0xff); /* NV20TCL_STENCIL_FUNC_MASK */
+ OUT_RING (NV20TCL_STENCIL_OP_FAIL_KEEP);
+ OUT_RING (NV20TCL_STENCIL_OP_ZFAIL_KEEP);
+ OUT_RING (NV20TCL_STENCIL_OP_ZPASS_KEEP);
+
+ BEGIN_RING(kelvin, NV20TCL_COLOR_LOGIC_OP_ENABLE, 2);
+ OUT_RING (0);
+ OUT_RING (NV20TCL_COLOR_LOGIC_OP_OP_COPY);
+ BEGIN_RING(kelvin, 0x17cc, 1);
+ OUT_RING (0);
+ if (is_nv25tcl) {
+ BEGIN_RING(kelvin, 0x1d84, 1);
+ OUT_RING (1);
+ }
+ BEGIN_RING(kelvin, NV20TCL_LIGHTING_ENABLE, 1);
+ OUT_RING (0);
+ BEGIN_RING(kelvin, NV20TCL_LIGHT_CONTROL, 1);
+ OUT_RING (0x00020000);
+ BEGIN_RING(kelvin, NV20TCL_SEPARATE_SPECULAR_ENABLE, 1);
+ OUT_RING (0);
+ BEGIN_RING(kelvin, NV20TCL_LIGHT_MODEL_TWO_SIDE_ENABLE, 1);
+ OUT_RING (0);
+ BEGIN_RING(kelvin, NV20TCL_ENABLED_LIGHTS, 1);
+ OUT_RING (0);
+ BEGIN_RING(kelvin, NV20TCL_NORMALIZE_ENABLE, 1);
+ OUT_RING (0);
+ BEGIN_RING(kelvin, NV20TCL_POLYGON_STIPPLE_PATTERN(0),
+ NV20TCL_POLYGON_STIPPLE_PATTERN__SIZE);
+ for (i = 0; i < NV20TCL_POLYGON_STIPPLE_PATTERN__SIZE; ++i) {
+ OUT_RING(0xffffffff);
+ }
+
+ BEGIN_RING(kelvin, NV20TCL_POLYGON_OFFSET_POINT_ENABLE, 3);
+ OUT_RING (0);
+ OUT_RING (0); /* NV20TCL.POLYGON_OFFSET_LINE_ENABLE */
+ OUT_RING (0); /* NV20TCL.POLYGON_OFFSET_FILL_ENABLE */
+ BEGIN_RING(kelvin, NV20TCL_DEPTH_FUNC, 1);
+ OUT_RING (NV20TCL_DEPTH_FUNC_LESS);
+ BEGIN_RING(kelvin, NV20TCL_DEPTH_WRITE_ENABLE, 1);
+ OUT_RING (1);
+ BEGIN_RING(kelvin, NV20TCL_DEPTH_TEST_ENABLE, 1);
+ OUT_RING (0);
+ BEGIN_RING(kelvin, NV20TCL_POLYGON_OFFSET_FACTOR, 2);
+ OUT_RINGf (0.0);
+ OUT_RINGf (0.0); /* NV20TCL.POLYGON_OFFSET_UNITS */
+ BEGIN_RING(kelvin, NV20TCL_DEPTH_UNK17D8, 1);
+ OUT_RING (1);
+ if (!is_nv25tcl) {
+ BEGIN_RING(kelvin, 0x1d84, 1);
+ OUT_RING (3);
+ }
+ BEGIN_RING(kelvin, NV20TCL_POINT_SIZE, 1);
+ if (!is_nv25tcl) {
+ OUT_RING (8);
+ } else {
+ OUT_RINGf (1.0);
+ }
+ if (!is_nv25tcl) {
+ BEGIN_RING(kelvin, NV20TCL_POINT_PARAMETERS_ENABLE, 2);
+ OUT_RING (0);
+ OUT_RING (0); /* NV20TCL.POINT_SMOOTH_ENABLE */
+ } else {
+ BEGIN_RING(kelvin, NV20TCL_POINT_PARAMETERS_ENABLE, 1);
+ OUT_RING (0);
+ BEGIN_RING(kelvin, 0x0a1c, 1);
+ OUT_RING (0x800);
+ }
+ BEGIN_RING(kelvin, NV20TCL_LINE_WIDTH, 1);
+ OUT_RING (8);
+ BEGIN_RING(kelvin, NV20TCL_LINE_SMOOTH_ENABLE, 1);
+ OUT_RING (0);
+ BEGIN_RING(kelvin, NV20TCL_POLYGON_MODE_FRONT, 2);
+ OUT_RING (NV20TCL_POLYGON_MODE_FRONT_FILL);
+ OUT_RING (NV20TCL_POLYGON_MODE_BACK_FILL);
+ BEGIN_RING(kelvin, NV20TCL_CULL_FACE, 2);
+ OUT_RING (NV20TCL_CULL_FACE_BACK);
+ OUT_RING (NV20TCL_FRONT_FACE_CCW);
+ BEGIN_RING(kelvin, NV20TCL_POLYGON_SMOOTH_ENABLE, 1);
+ OUT_RING (0);
+ BEGIN_RING(kelvin, NV20TCL_CULL_FACE_ENABLE, 1);
+ OUT_RING (0);
+ BEGIN_RING(kelvin, NV20TCL_SHADE_MODEL, 1);
+ OUT_RING (NV20TCL_SHADE_MODEL_SMOOTH);
+ BEGIN_RING(kelvin, NV20TCL_POLYGON_STIPPLE_ENABLE, 1);
+ OUT_RING (0);
+ BEGIN_RING(kelvin, NV20TCL_TX_GEN_S(0), 4 * NV20TCL_TX_GEN_S__SIZE);
+ for (i=0; i < 4 * NV20TCL_TX_GEN_S__SIZE; ++i) {
+ OUT_RING(0);
+ }
+ BEGIN_RING(kelvin, NV20TCL_FOG_EQUATION_CONSTANT, 3);
+ OUT_RINGf (1.5);
+ OUT_RINGf (-0.090168); /* NV20TCL.FOG_EQUATION_LINEAR */
+ OUT_RINGf (0.0); /* NV20TCL.FOG_EQUATION_QUADRATIC */
+ BEGIN_RING(kelvin, NV20TCL_FOG_MODE, 2);
+ OUT_RING (NV20TCL_FOG_MODE_EXP_2);
+ OUT_RING (NV20TCL_FOG_COORD_DIST_COORD_FOG);
+ BEGIN_RING(kelvin, NV20TCL_FOG_ENABLE, 2);
+ OUT_RING (0);
+ OUT_RING (0); /* NV20TCL.FOG_COLOR */
+ BEGIN_RING(kelvin, NV20TCL_ENGINE, 1);
+ OUT_RING (NV20TCL_ENGINE_FIXED);
+
+ for (i = 0; i < NV20TCL_TX_MATRIX_ENABLE__SIZE; ++i) {
+ BEGIN_RING(kelvin, NV20TCL_TX_MATRIX_ENABLE(i), 1);
+ OUT_RING (0);
+ }
+
+ BEGIN_RING(kelvin, NV20TCL_VTX_ATTR_4F_X(1), 4 * 15);
+ OUT_RINGf(1.0); OUT_RINGf(0.0); OUT_RINGf(0.0); OUT_RINGf(1.0);
+ OUT_RINGf(0.0); OUT_RINGf(0.0); OUT_RINGf(1.0); OUT_RINGf(1.0);
+ OUT_RINGf(1.0); OUT_RINGf(1.0); OUT_RINGf(1.0); OUT_RINGf(1.0);
+ for (i = 4; i < 16; ++i) {
+ OUT_RINGf(0.0); OUT_RINGf(0.0); OUT_RINGf(0.0); OUT_RINGf(1.0);
+ }
+
+ BEGIN_RING(kelvin, NV20TCL_EDGEFLAG_ENABLE, 1);
+ OUT_RING (1);
+ BEGIN_RING(kelvin, NV20TCL_COLOR_MASK, 1);
+ OUT_RING (0x00010101);
+ BEGIN_RING(kelvin, NV20TCL_CLEAR_VALUE, 1);
+ OUT_RING (0);
+
+ memset(projectionmatrix, 0, sizeof(projectionmatrix));
+ projectionmatrix[0*4+0] = 1.0;
+ projectionmatrix[1*4+1] = 1.0;
+ projectionmatrix[2*4+2] = 1.0;
+ projectionmatrix[3*4+3] = 1.0;
+ BEGIN_RING(kelvin, NV20TCL_PROJECTION_MATRIX(0), 16);
+ for (i = 0; i < 16; i++) {
+ OUT_RINGf (projectionmatrix[i]);
+ }
+
+ BEGIN_RING(kelvin, NV20TCL_DEPTH_RANGE_NEAR, 2);
+ OUT_RINGf (0.0);
+ OUT_RINGf (16777216.0); /* bpp dependant? */
+
+ BEGIN_RING(kelvin, NV20TCL_VIEWPORT_SCALE0_X, 4);
+ OUT_RINGf (-2048.0);
+ OUT_RINGf (-2048.0);
+ OUT_RINGf (16777215.0 * 0.5);
+ OUT_RING (0);
+
+ BEGIN_RING(kelvin, NV20TCL_VIEWPORT_SCALE1_X, 4);
+ OUT_RINGf (-2048.0);
+ OUT_RINGf (-2048.0);
+ OUT_RINGf (16777215.0 * 0.5);
+ OUT_RING (0);
+
+ FIRE_RING (NULL);
+}
+
+static void
+nv20_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield)
+{
+}
+
+struct pipe_context *
+nv20_create(struct pipe_screen *pscreen, unsigned pctx_id)
+{
+ struct nv20_screen *screen = nv20_screen(pscreen);
+ struct pipe_winsys *ws = pscreen->winsys;
+ struct nv20_context *nv20;
+ struct nouveau_winsys *nvws = screen->nvws;
+
+ nv20 = CALLOC(1, sizeof(struct nv20_context));
+ if (!nv20)
+ return NULL;
+ nv20->screen = screen;
+ nv20->pctx_id = pctx_id;
+
+ nv20->nvws = nvws;
+
+ nv20->pipe.winsys = ws;
+ nv20->pipe.screen = pscreen;
+ nv20->pipe.destroy = nv20_destroy;
+ nv20->pipe.set_edgeflags = nv20_set_edgeflags;
+ nv20->pipe.draw_arrays = nv20_draw_arrays;
+ nv20->pipe.draw_elements = nv20_draw_elements;
+ nv20->pipe.clear = nv20_clear;
+ nv20->pipe.flush = nv20_flush;
+
+ nv20_init_surface_functions(nv20);
+ nv20_init_state_functions(nv20);
+
+ nv20->draw = draw_create();
+ assert(nv20->draw);
+ draw_set_rasterize_stage(nv20->draw, nv20_draw_vbuf_stage(nv20));
+
+ nv20_init_hwctx(nv20);
+
+ return &nv20->pipe;
+}
+
diff --git a/src/gallium/drivers/nv20/nv20_context.h b/src/gallium/drivers/nv20/nv20_context.h
new file mode 100644
index 00000000000..8ad926db20a
--- /dev/null
+++ b/src/gallium/drivers/nv20/nv20_context.h
@@ -0,0 +1,153 @@
+#ifndef __NV20_CONTEXT_H__
+#define __NV20_CONTEXT_H__
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+#include "pipe/p_compiler.h"
+
+#include "util/u_memory.h"
+#include "util/u_math.h"
+
+#include "draw/draw_vertex.h"
+
+#include "nouveau/nouveau_winsys.h"
+#include "nouveau/nouveau_gldefs.h"
+
+#define NOUVEAU_PUSH_CONTEXT(ctx) \
+ struct nv20_screen *ctx = nv20->screen
+#include "nouveau/nouveau_push.h"
+
+#include "nv20_state.h"
+
+#define NOUVEAU_ERR(fmt, args...) \
+ fprintf(stderr, "%s:%d - "fmt, __func__, __LINE__, ##args);
+#define NOUVEAU_MSG(fmt, args...) \
+ fprintf(stderr, "nouveau: "fmt, ##args);
+
+#define NV20_NEW_VERTPROG (1 << 0)
+#define NV20_NEW_FRAGPROG (1 << 1)
+#define NV20_NEW_VTXARRAYS (1 << 2)
+#define NV20_NEW_BLEND (1 << 3)
+#define NV20_NEW_BLENDCOL (1 << 4)
+#define NV20_NEW_RAST (1 << 5)
+#define NV20_NEW_DSA (1 << 6)
+#define NV20_NEW_VIEWPORT (1 << 7)
+#define NV20_NEW_SCISSOR (1 << 8)
+#define NV20_NEW_FRAMEBUFFER (1 << 9)
+
+#include "nv20_screen.h"
+
+struct nv20_context {
+ struct pipe_context pipe;
+
+ struct nouveau_winsys *nvws;
+ struct nv20_screen *screen;
+ unsigned pctx_id;
+
+ struct draw_context *draw;
+
+ uint32_t dirty;
+
+ struct nv20_sampler_state *tex_sampler[PIPE_MAX_SAMPLERS];
+ struct nv20_miptree *tex_miptree[PIPE_MAX_SAMPLERS];
+ unsigned dirty_samplers;
+ unsigned fp_samplers;
+ unsigned vp_samplers;
+
+ uint32_t rt_enable;
+ struct pipe_buffer *rt[4];
+ struct pipe_buffer *zeta;
+ uint32_t lma_offset;
+
+ struct nv20_blend_state *blend;
+ struct pipe_blend_color *blend_color;
+ struct nv20_rasterizer_state *rast;
+ struct nv20_depth_stencil_alpha_state *dsa;
+ struct pipe_viewport_state *viewport;
+ struct pipe_scissor_state *scissor;
+ struct pipe_framebuffer_state *framebuffer;
+
+ //struct pipe_buffer *constbuf[PIPE_SHADER_TYPES];
+ float *constbuf[PIPE_SHADER_TYPES][32][4];
+ unsigned constbuf_nr[PIPE_SHADER_TYPES];
+
+ struct vertex_info vertex_info;
+
+ struct {
+ struct pipe_buffer *buffer;
+ uint32_t format;
+ } tex[2];
+
+ unsigned vb_enable;
+ struct {
+ struct pipe_buffer *buffer;
+ unsigned delta;
+ } vb[16];
+
+/* struct {
+
+ struct nouveau_resource *exec_heap;
+ struct nouveau_resource *data_heap;
+
+ struct nv20_vertex_program *active;
+
+ struct nv20_vertex_program *current;
+ } vertprog;
+*/
+ struct {
+ struct nv20_fragment_program *active;
+
+ struct nv20_fragment_program *current;
+ struct pipe_buffer *constant_buf;
+ } fragprog;
+
+ struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS];
+ struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS];
+};
+
+static INLINE struct nv20_context *
+nv20_context(struct pipe_context *pipe)
+{
+ return (struct nv20_context *)pipe;
+}
+
+extern void nv20_init_state_functions(struct nv20_context *nv20);
+extern void nv20_init_surface_functions(struct nv20_context *nv20);
+
+extern void nv20_screen_init_miptree_functions(struct pipe_screen *pscreen);
+
+/* nv20_clear.c */
+extern void nv20_clear(struct pipe_context *pipe, struct pipe_surface *ps,
+ unsigned clearValue);
+
+/* nv20_draw.c */
+extern struct draw_stage *nv20_draw_render_stage(struct nv20_context *nv20);
+
+/* nv20_fragprog.c */
+extern void nv20_fragprog_bind(struct nv20_context *,
+ struct nv20_fragment_program *);
+extern void nv20_fragprog_destroy(struct nv20_context *,
+ struct nv20_fragment_program *);
+
+/* nv20_fragtex.c */
+extern void nv20_fragtex_bind(struct nv20_context *);
+
+/* nv20_prim_vbuf.c */
+struct draw_stage *nv20_draw_vbuf_stage( struct nv20_context *nv20 );
+extern void nv20_vtxbuf_bind(struct nv20_context* nv20);
+
+/* nv20_state.c and friends */
+extern void nv20_emit_hw_state(struct nv20_context *nv20);
+extern void nv20_state_tex_update(struct nv20_context *nv20);
+
+/* nv20_vbo.c */
+extern boolean nv20_draw_arrays(struct pipe_context *, unsigned mode,
+ unsigned start, unsigned count);
+extern boolean nv20_draw_elements( struct pipe_context *pipe,
+ struct pipe_buffer *indexBuffer,
+ unsigned indexSize,
+ unsigned prim, unsigned start, unsigned count);
+
+
+#endif
diff --git a/src/gallium/drivers/nv20/nv20_fragprog.c b/src/gallium/drivers/nv20/nv20_fragprog.c
new file mode 100644
index 00000000000..4f496369dd3
--- /dev/null
+++ b/src/gallium/drivers/nv20/nv20_fragprog.c
@@ -0,0 +1,21 @@
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+
+#include "pipe/p_shader_tokens.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_util.h"
+
+#include "nv20_context.h"
+
+void
+nv20_fragprog_bind(struct nv20_context *nv20, struct nv20_fragment_program *fp)
+{
+}
+
+void
+nv20_fragprog_destroy(struct nv20_context *nv20,
+ struct nv20_fragment_program *fp)
+{
+}
+
diff --git a/src/gallium/drivers/nv20/nv20_fragtex.c b/src/gallium/drivers/nv20/nv20_fragtex.c
new file mode 100644
index 00000000000..495a7be9127
--- /dev/null
+++ b/src/gallium/drivers/nv20/nv20_fragtex.c
@@ -0,0 +1,124 @@
+#include "nv20_context.h"
+#include "nouveau/nouveau_util.h"
+
+#define _(m,tf) \
+{ \
+ TRUE, \
+ PIPE_FORMAT_##m, \
+ NV20TCL_TX_FORMAT_FORMAT_##tf, \
+}
+
+struct nv20_texture_format {
+ boolean defined;
+ uint pipe;
+ int format;
+};
+
+static struct nv20_texture_format
+nv20_texture_formats[] = {
+ _(A8R8G8B8_UNORM, A8R8G8B8),
+ _(A1R5G5B5_UNORM, A1R5G5B5),
+ _(A4R4G4B4_UNORM, A4R4G4B4),
+ _(L8_UNORM , L8 ),
+ _(A8_UNORM , A8 ),
+ _(A8L8_UNORM , A8L8 ),
+/* _(RGB_DXT1 , DXT1, ), */
+/* _(RGBA_DXT1 , DXT1, ), */
+/* _(RGBA_DXT3 , DXT3, ), */
+/* _(RGBA_DXT5 , DXT5, ), */
+ {},
+};
+
+static struct nv20_texture_format *
+nv20_fragtex_format(uint pipe_format)
+{
+ struct nv20_texture_format *tf = nv20_texture_formats;
+
+ while (tf->defined) {
+ if (tf->pipe == pipe_format)
+ return tf;
+ tf++;
+ }
+
+ return NULL;
+}
+
+
+static void
+nv20_fragtex_build(struct nv20_context *nv20, int unit)
+{
+#if 0
+ struct nv20_sampler_state *ps = nv20->tex_sampler[unit];
+ struct nv20_miptree *nv20mt = nv20->tex_miptree[unit];
+ struct pipe_texture *pt = &nv20mt->base;
+ struct nv20_texture_format *tf;
+ uint32_t txf, txs, txp;
+
+ tf = nv20_fragtex_format(pt->format);
+ if (!tf || !tf->defined) {
+ NOUVEAU_ERR("Unsupported texture format: 0x%x\n", pt->format);
+ return;
+ }
+
+ txf = tf->format << 8;
+ txf |= (pt->last_level + 1) << 16;
+ txf |= log2i(pt->width[0]) << 20;
+ txf |= log2i(pt->height[0]) << 24;
+ txf |= log2i(pt->depth[0]) << 28;
+ txf |= 8;
+
+ switch (pt->target) {
+ case PIPE_TEXTURE_CUBE:
+ txf |= NV10TCL_TX_FORMAT_CUBE_MAP;
+ /* fall-through */
+ case PIPE_TEXTURE_2D:
+ txf |= (2<<4);
+ break;
+ case PIPE_TEXTURE_1D:
+ txf |= (1<<4);
+ break;
+ default:
+ NOUVEAU_ERR("Unknown target %d\n", pt->target);
+ return;
+ }
+
+ BEGIN_RING(kelvin, NV10TCL_TX_OFFSET(unit), 8);
+ OUT_RELOCl(nv20mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+ OUT_RELOCd(nv20mt->buffer,txf,NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/);
+ OUT_RING (ps->wrap);
+ OUT_RING (0x40000000); /* enable */
+ OUT_RING (txs);
+ OUT_RING (ps->filt | 0x2000 /* magic */);
+ OUT_RING ((pt->width[0] << 16) | pt->height[0]);
+ OUT_RING (ps->bcol);
+#endif
+}
+
+void
+nv20_fragtex_bind(struct nv20_context *nv20)
+{
+#if 0
+ struct nv20_fragment_program *fp = nv20->fragprog.active;
+ unsigned samplers, unit;
+
+ samplers = nv20->fp_samplers & ~fp->samplers;
+ while (samplers) {
+ unit = ffs(samplers) - 1;
+ samplers &= ~(1 << unit);
+
+ BEGIN_RING(kelvin, NV10TCL_TX_ENABLE(unit), 1);
+ OUT_RING (0);
+ }
+
+ samplers = nv20->dirty_samplers & fp->samplers;
+ while (samplers) {
+ unit = ffs(samplers) - 1;
+ samplers &= ~(1 << unit);
+
+ nv20_fragtex_build(nv20, unit);
+ }
+
+ nv20->fp_samplers = fp->samplers;
+#endif
+}
+
diff --git a/src/gallium/drivers/nv20/nv20_miptree.c b/src/gallium/drivers/nv20/nv20_miptree.c
new file mode 100644
index 00000000000..c6106d58c43
--- /dev/null
+++ b/src/gallium/drivers/nv20/nv20_miptree.c
@@ -0,0 +1,156 @@
+#include "pipe/p_state.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_inlines.h"
+
+#include "nv20_context.h"
+#include "nv20_screen.h"
+
+static void
+nv20_miptree_layout(struct nv20_miptree *nv20mt)
+{
+ struct pipe_texture *pt = &nv20mt->base;
+ boolean swizzled = FALSE;
+ uint width = pt->width[0], height = pt->height[0];
+ uint offset = 0;
+ int nr_faces, l, f;
+
+ if (pt->target == PIPE_TEXTURE_CUBE) {
+ nr_faces = 6;
+ } else {
+ nr_faces = 1;
+ }
+
+ for (l = 0; l <= pt->last_level; l++) {
+ pt->width[l] = width;
+ pt->height[l] = height;
+ pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width);
+ pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height);
+
+ if (swizzled)
+ nv20mt->level[l].pitch = pt->nblocksx[l] * pt->block.size;
+ else
+ nv20mt->level[l].pitch = pt->nblocksx[0] * pt->block.size;
+ nv20mt->level[l].pitch = (nv20mt->level[l].pitch + 63) & ~63;
+
+ nv20mt->level[l].image_offset =
+ CALLOC(nr_faces, sizeof(unsigned));
+
+ width = MAX2(1, width >> 1);
+ height = MAX2(1, height >> 1);
+
+ }
+
+ for (f = 0; f < nr_faces; f++) {
+ for (l = 0; l <= pt->last_level; l++) {
+ nv20mt->level[l].image_offset[f] = offset;
+ offset += nv20mt->level[l].pitch * pt->height[l];
+ }
+ }
+
+ nv20mt->total_size = offset;
+}
+
+static struct pipe_texture *
+nv20_miptree_create(struct pipe_screen *screen, const struct pipe_texture *pt)
+{
+ struct pipe_winsys *ws = screen->winsys;
+ struct nv20_miptree *mt;
+
+ mt = MALLOC(sizeof(struct nv20_miptree));
+ if (!mt)
+ return NULL;
+ mt->base = *pt;
+ mt->base.refcount = 1;
+ mt->base.screen = screen;
+
+ nv20_miptree_layout(mt);
+
+ mt->buffer = ws->buffer_create(ws, 256, PIPE_BUFFER_USAGE_PIXEL,
+ mt->total_size);
+ if (!mt->buffer) {
+ FREE(mt);
+ return NULL;
+ }
+
+ return &mt->base;
+}
+
+static void
+nv20_miptree_release(struct pipe_screen *screen, struct pipe_texture **pt)
+{
+ struct pipe_texture *mt = *pt;
+
+ *pt = NULL;
+ if (--mt->refcount <= 0) {
+ struct nv20_miptree *nv20mt = (struct nv20_miptree *)mt;
+ int l;
+
+ pipe_buffer_reference(screen, &nv20mt->buffer, NULL);
+ for (l = 0; l <= mt->last_level; l++) {
+ if (nv20mt->level[l].image_offset)
+ FREE(nv20mt->level[l].image_offset);
+ }
+ FREE(nv20mt);
+ }
+}
+
+static struct pipe_surface *
+nv20_miptree_surface_get(struct pipe_screen *screen, struct pipe_texture *pt,
+ unsigned face, unsigned level, unsigned zslice,
+ unsigned flags)
+{
+ struct nv20_miptree *nv20mt = (struct nv20_miptree *)pt;
+ struct pipe_surface *ps;
+
+ ps = CALLOC_STRUCT(pipe_surface);
+ if (!ps)
+ return NULL;
+ pipe_texture_reference(&ps->texture, pt);
+ pipe_buffer_reference(screen, &ps->buffer, nv20mt->buffer);
+ ps->format = pt->format;
+ ps->width = pt->width[level];
+ ps->height = pt->height[level];
+ ps->block = pt->block;
+ ps->nblocksx = pt->nblocksx[level];
+ ps->nblocksy = pt->nblocksy[level];
+ ps->stride = nv20mt->level[level].pitch;
+ ps->usage = flags;
+ ps->status = PIPE_SURFACE_STATUS_DEFINED;
+ ps->refcount = 1;
+ ps->winsys = screen->winsys;
+
+ if (pt->target == PIPE_TEXTURE_CUBE) {
+ ps->offset = nv20mt->level[level].image_offset[face];
+ } else
+ if (pt->target == PIPE_TEXTURE_3D) {
+ ps->offset = nv20mt->level[level].image_offset[zslice];
+ } else {
+ ps->offset = nv20mt->level[level].image_offset[0];
+ }
+
+ return ps;
+}
+
+static void
+nv20_miptree_surface_release(struct pipe_screen *pscreen,
+ struct pipe_surface **psurface)
+{
+ struct pipe_surface *ps = *psurface;
+
+ *psurface = NULL;
+ if (--ps->refcount > 0)
+ return;
+
+ pipe_texture_reference(&ps->texture, NULL);
+ pipe_buffer_reference(pscreen, &ps->buffer, NULL);
+ FREE(ps);
+}
+
+void nv20_screen_init_miptree_functions(struct pipe_screen *pscreen)
+{
+ pscreen->texture_create = nv20_miptree_create;
+ pscreen->texture_release = nv20_miptree_release;
+ pscreen->get_tex_surface = nv20_miptree_surface_get;
+ pscreen->tex_surface_release = nv20_miptree_surface_release;
+}
+
diff --git a/src/gallium/drivers/nv20/nv20_prim_vbuf.c b/src/gallium/drivers/nv20/nv20_prim_vbuf.c
new file mode 100644
index 00000000000..74540845a87
--- /dev/null
+++ b/src/gallium/drivers/nv20/nv20_prim_vbuf.c
@@ -0,0 +1,402 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * \file
+ * Build post-transformation, post-clipping vertex buffers and element
+ * lists by hooking into the end of the primitive pipeline and
+ * manipulating the vertex_id field in the vertex headers.
+ *
+ * XXX: work in progress
+ *
+ * \author José Fonseca <jrfonseca@tungstengraphics.com>
+ * \author Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "pipe/p_debug.h"
+#include "pipe/p_inlines.h"
+#include "pipe/p_winsys.h"
+
+#include "nv20_context.h"
+#include "nv20_state.h"
+
+#include "draw/draw_vbuf.h"
+
+/**
+ * Primitive renderer for nv20.
+ */
+struct nv20_vbuf_render {
+ struct vbuf_render base;
+
+ struct nv20_context *nv20;
+
+ /** Vertex buffer in VRAM */
+ struct pipe_buffer *pbuffer;
+
+ /** Vertex buffer in normal memory */
+ void *mbuffer;
+
+ /** Vertex size in bytes */
+ /*unsigned vertex_size;*/
+
+ /** Hardware primitive */
+ unsigned hwprim;
+};
+
+/**
+ * Basically a cast wrapper.
+ */
+static INLINE struct nv20_vbuf_render *
+nv20_vbuf_render(struct vbuf_render *render)
+{
+ assert(render);
+ return (struct nv20_vbuf_render *)render;
+}
+
+void nv20_vtxbuf_bind( struct nv20_context* nv20 )
+{
+#if 0
+ int i;
+ for(i = 0; i < NV20TCL_VTXBUF_ADDRESS__SIZE; i++) {
+ BEGIN_RING(kelvin, NV20TCL_VTXBUF_ADDRESS(i), 1);
+ OUT_RING(0/*nv20->vtxbuf*/);
+ BEGIN_RING(kelvin, NV20TCL_VTXFMT(i) ,1);
+ OUT_RING(0/*XXX*/);
+ }
+#endif
+}
+
+static const struct vertex_info *
+nv20_vbuf_render_get_vertex_info( struct vbuf_render *render )
+{
+ struct nv20_vbuf_render *nv20_render = nv20_vbuf_render(render);
+ struct nv20_context *nv20 = nv20_render->nv20;
+
+ nv20_emit_hw_state(nv20);
+
+ return &nv20->vertex_info;
+}
+
+static void *
+nv20__allocate_mbuffer(struct nv20_vbuf_render *nv20_render, size_t size)
+{
+ nv20_render->mbuffer = MALLOC(size);
+ return nv20_render->mbuffer;
+}
+
+static void *
+nv20__allocate_pbuffer(struct nv20_vbuf_render *nv20_render, size_t size)
+{
+ struct pipe_winsys *winsys = nv20_render->nv20->pipe.winsys;
+ nv20_render->pbuffer = winsys->buffer_create(winsys, 64,
+ PIPE_BUFFER_USAGE_VERTEX, size);
+ return winsys->buffer_map(winsys,
+ nv20_render->pbuffer,
+ PIPE_BUFFER_USAGE_CPU_WRITE);
+}
+
+static void *
+nv20_vbuf_render_allocate_vertices( struct vbuf_render *render,
+ ushort vertex_size,
+ ushort nr_vertices )
+{
+ struct nv20_vbuf_render *nv20_render = nv20_vbuf_render(render);
+ size_t size = (size_t)vertex_size * (size_t)nr_vertices;
+ void *buf;
+
+ assert(!nv20_render->pbuffer);
+ assert(!nv20_render->mbuffer);
+
+ /*
+ * For small amount of vertices, don't bother with pipe vertex
+ * buffer, the data will be passed directly via the fifo.
+ */
+ /* XXX: Pipe vertex buffers don't work. */
+ if (0 && size > 16 * 1024)
+ buf = nv20__allocate_pbuffer(nv20_render, size);
+ else
+ buf = nv20__allocate_mbuffer(nv20_render, size);
+
+ if (buf)
+ nv20_render->nv20->dirty |= NV20_NEW_VTXARRAYS;
+
+ return buf;
+}
+
+static boolean
+nv20_vbuf_render_set_primitive( struct vbuf_render *render,
+ unsigned prim )
+{
+ struct nv20_vbuf_render *nv20_render = nv20_vbuf_render(render);
+ unsigned hwp = nvgl_primitive(prim);
+ if (hwp == 0)
+ return FALSE;
+
+ nv20_render->hwprim = hwp;
+ return TRUE;
+}
+
+static uint32_t
+nv20__vtxhwformat(unsigned stride, unsigned fields, unsigned type)
+{
+ return (stride << NV20TCL_VTXFMT_STRIDE_SHIFT) |
+ (fields << NV20TCL_VTXFMT_SIZE_SHIFT) |
+ (type << NV20TCL_VTXFMT_TYPE_SHIFT);
+}
+
+static unsigned
+nv20__emit_format(struct nv20_context *nv20, enum attrib_emit type, int hwattr)
+{
+ uint32_t hwfmt = 0;
+ unsigned fields;
+
+ switch (type) {
+ case EMIT_OMIT:
+ hwfmt = nv20__vtxhwformat(0, 0, 2);
+ fields = 0;
+ break;
+ case EMIT_1F:
+ hwfmt = nv20__vtxhwformat(4, 1, 2);
+ fields = 1;
+ break;
+ case EMIT_2F:
+ hwfmt = nv20__vtxhwformat(8, 2, 2);
+ fields = 2;
+ break;
+ case EMIT_3F:
+ hwfmt = nv20__vtxhwformat(12, 3, 2);
+ fields = 3;
+ break;
+ case EMIT_4F:
+ hwfmt = nv20__vtxhwformat(16, 4, 2);
+ fields = 4;
+ break;
+ default:
+ NOUVEAU_ERR("unhandled attrib_emit %d\n", type);
+ return 0;
+ }
+
+ BEGIN_RING(kelvin, NV20TCL_VTXFMT(hwattr), 1);
+ OUT_RING(hwfmt);
+ return fields;
+}
+
+static unsigned
+nv20__emit_vertex_array_format(struct nv20_context *nv20)
+{
+ struct vertex_info *vinfo = &nv20->vertex_info;
+ int hwattr = NV20TCL_VTXFMT__SIZE;
+ int attr = 0;
+ unsigned nr_fields = 0;
+
+ while (hwattr-- > 0) {
+ if (vinfo->hwfmt[0] & (1 << hwattr)) {
+ nr_fields += nv20__emit_format(nv20,
+ vinfo->attrib[attr].emit, hwattr);
+ attr++;
+ } else
+ nv20__emit_format(nv20, EMIT_OMIT, hwattr);
+ }
+
+ return nr_fields;
+}
+
+static void
+nv20__draw_mbuffer(struct nv20_vbuf_render *nv20_render,
+ const ushort *indices,
+ uint nr_indices)
+{
+ struct nv20_context *nv20 = nv20_render->nv20;
+ struct vertex_info *vinfo = &nv20->vertex_info;
+ unsigned nr_fields;
+ int max_push;
+ ubyte *data = nv20_render->mbuffer;
+ int vsz = 4 * vinfo->size;
+
+ nr_fields = nv20__emit_vertex_array_format(nv20);
+
+ BEGIN_RING(kelvin, NV20TCL_VERTEX_BEGIN_END, 1);
+ OUT_RING(nv20_render->hwprim);
+
+ max_push = 1200 / nr_fields;
+ while (nr_indices) {
+ int i;
+ int push = MIN2(nr_indices, max_push);
+
+ BEGIN_RING_NI(kelvin, NV20TCL_VERTEX_DATA, push * nr_fields);
+ for (i = 0; i < push; i++) {
+ /* XXX: fixme to handle other than floats? */
+ int f = nr_fields;
+ float *attrv = (float*)&data[indices[i] * vsz];
+ while (f-- > 0)
+ OUT_RINGf(*attrv++);
+ }
+
+ nr_indices -= push;
+ indices += push;
+ }
+
+ BEGIN_RING(kelvin, NV20TCL_VERTEX_BEGIN_END, 1);
+ OUT_RING(NV20TCL_VERTEX_BEGIN_END_STOP);
+}
+
+static void
+nv20__draw_pbuffer(struct nv20_vbuf_render *nv20_render,
+ const ushort *indices,
+ uint nr_indices)
+{
+ struct nv20_context *nv20 = nv20_render->nv20;
+ int push, i;
+
+ NOUVEAU_ERR("nv20__draw_pbuffer: this path is broken.\n");
+
+ BEGIN_RING(kelvin, NV10TCL_VERTEX_ARRAY_OFFSET_POS, 1);
+ OUT_RELOCl(nv20_render->pbuffer, 0,
+ NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+
+ BEGIN_RING(kelvin, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1);
+ OUT_RING(nv20_render->hwprim);
+
+ if (nr_indices & 1) {
+ BEGIN_RING(kelvin, NV10TCL_VB_ELEMENT_U32, 1);
+ OUT_RING (indices[0]);
+ indices++; nr_indices--;
+ }
+
+ while (nr_indices) {
+ // XXX too big/small ? check the size
+ push = MIN2(nr_indices, 1200 * 2);
+
+ BEGIN_RING_NI(kelvin, NV10TCL_VB_ELEMENT_U16, push >> 1);
+ for (i = 0; i < push; i+=2)
+ OUT_RING((indices[i+1] << 16) | indices[i]);
+
+ nr_indices -= push;
+ indices += push;
+ }
+
+ BEGIN_RING(kelvin, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1);
+ OUT_RING (0);
+}
+
+static void
+nv20_vbuf_render_draw( struct vbuf_render *render,
+ const ushort *indices,
+ uint nr_indices)
+{
+ struct nv20_vbuf_render *nv20_render = nv20_vbuf_render(render);
+
+ nv20_emit_hw_state(nv20_render->nv20);
+
+ if (nv20_render->pbuffer)
+ nv20__draw_pbuffer(nv20_render, indices, nr_indices);
+ else if (nv20_render->mbuffer)
+ nv20__draw_mbuffer(nv20_render, indices, nr_indices);
+ else
+ assert(0);
+}
+
+
+static void
+nv20_vbuf_render_release_vertices( struct vbuf_render *render,
+ void *vertices,
+ unsigned vertex_size,
+ unsigned vertices_used )
+{
+ struct nv20_vbuf_render *nv20_render = nv20_vbuf_render(render);
+ struct nv20_context *nv20 = nv20_render->nv20;
+ struct pipe_winsys *winsys = nv20->pipe.winsys;
+ struct pipe_screen *pscreen = &nv20->screen->pipe;
+
+ if (nv20_render->pbuffer) {
+ winsys->buffer_unmap(winsys, nv20_render->pbuffer);
+ pipe_buffer_reference(pscreen, &nv20_render->pbuffer, NULL);
+ } else if (nv20_render->mbuffer) {
+ FREE(nv20_render->mbuffer);
+ nv20_render->mbuffer = NULL;
+ } else
+ assert(0);
+}
+
+
+static void
+nv20_vbuf_render_destroy( struct vbuf_render *render )
+{
+ struct nv20_vbuf_render *nv20_render = nv20_vbuf_render(render);
+
+ assert(!nv20_render->pbuffer);
+ assert(!nv20_render->mbuffer);
+
+ FREE(nv20_render);
+}
+
+
+/**
+ * Create a new primitive render.
+ */
+static struct vbuf_render *
+nv20_vbuf_render_create( struct nv20_context *nv20 )
+{
+ struct nv20_vbuf_render *nv20_render = CALLOC_STRUCT(nv20_vbuf_render);
+
+ nv20_render->nv20 = nv20;
+
+ nv20_render->base.max_vertex_buffer_bytes = 16*1024;
+ nv20_render->base.max_indices = 1024;
+ nv20_render->base.get_vertex_info = nv20_vbuf_render_get_vertex_info;
+ nv20_render->base.allocate_vertices =
+ nv20_vbuf_render_allocate_vertices;
+ nv20_render->base.set_primitive = nv20_vbuf_render_set_primitive;
+ nv20_render->base.draw = nv20_vbuf_render_draw;
+ nv20_render->base.release_vertices = nv20_vbuf_render_release_vertices;
+ nv20_render->base.destroy = nv20_vbuf_render_destroy;
+
+ return &nv20_render->base;
+}
+
+
+/**
+ * Create a new primitive vbuf/render stage.
+ */
+struct draw_stage *nv20_draw_vbuf_stage( struct nv20_context *nv20 )
+{
+ struct vbuf_render *render;
+ struct draw_stage *stage;
+
+ render = nv20_vbuf_render_create(nv20);
+ if(!render)
+ return NULL;
+
+ stage = draw_vbuf_stage( nv20->draw, render );
+ if(!stage) {
+ render->destroy(render);
+ return NULL;
+ }
+
+ return stage;
+}
diff --git a/src/gallium/drivers/nv20/nv20_screen.c b/src/gallium/drivers/nv20/nv20_screen.c
new file mode 100644
index 00000000000..c0a90f6c584
--- /dev/null
+++ b/src/gallium/drivers/nv20/nv20_screen.c
@@ -0,0 +1,204 @@
+#include "pipe/p_screen.h"
+
+#include "nv20_context.h"
+#include "nv20_screen.h"
+
+static const char *
+nv20_screen_get_name(struct pipe_screen *screen)
+{
+ struct nv20_screen *nv20screen = nv20_screen(screen);
+ struct nouveau_device *dev = nv20screen->nvws->channel->device;
+ static char buffer[128];
+
+ snprintf(buffer, sizeof(buffer), "NV%02X", dev->chipset);
+ return buffer;
+}
+
+static const char *
+nv20_screen_get_vendor(struct pipe_screen *screen)
+{
+ return "nouveau";
+}
+
+static int
+nv20_screen_get_param(struct pipe_screen *screen, int param)
+{
+ switch (param) {
+ case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
+ return 2;
+ case PIPE_CAP_NPOT_TEXTURES:
+ return 0;
+ case PIPE_CAP_TWO_SIDED_STENCIL:
+ return 0;
+ case PIPE_CAP_GLSL:
+ return 0;
+ case PIPE_CAP_S3TC:
+ return 0;
+ case PIPE_CAP_ANISOTROPIC_FILTER:
+ return 1;
+ case PIPE_CAP_POINT_SPRITE:
+ return 0;
+ case PIPE_CAP_MAX_RENDER_TARGETS:
+ return 1;
+ case PIPE_CAP_OCCLUSION_QUERY:
+ return 0;
+ case PIPE_CAP_TEXTURE_SHADOW_MAP:
+ return 0;
+ case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
+ return 12;
+ case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
+ return 0;
+ case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
+ return 12;
+ case NOUVEAU_CAP_HW_VTXBUF:
+ case NOUVEAU_CAP_HW_IDXBUF:
+ return 0;
+ default:
+ NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param);
+ return 0;
+ }
+}
+
+static float
+nv20_screen_get_paramf(struct pipe_screen *screen, int param)
+{
+ switch (param) {
+ case PIPE_CAP_MAX_LINE_WIDTH:
+ case PIPE_CAP_MAX_LINE_WIDTH_AA:
+ return 10.0;
+ case PIPE_CAP_MAX_POINT_WIDTH:
+ case PIPE_CAP_MAX_POINT_WIDTH_AA:
+ return 64.0;
+ case PIPE_CAP_MAX_TEXTURE_ANISOTROPY:
+ return 2.0;
+ case PIPE_CAP_MAX_TEXTURE_LOD_BIAS:
+ return 4.0;
+ default:
+ NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param);
+ return 0.0;
+ }
+}
+
+static boolean
+nv20_screen_is_format_supported(struct pipe_screen *screen,
+ enum pipe_format format,
+ enum pipe_texture_target target,
+ unsigned tex_usage, unsigned geom_flags)
+{
+ if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) {
+ switch (format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ case PIPE_FORMAT_Z24S8_UNORM:
+ case PIPE_FORMAT_Z16_UNORM:
+ return TRUE;
+ default:
+ break;
+ }
+ } else {
+ switch (format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case PIPE_FORMAT_A1R5G5B5_UNORM:
+ case PIPE_FORMAT_A4R4G4B4_UNORM:
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ case PIPE_FORMAT_L8_UNORM:
+ case PIPE_FORMAT_A8_UNORM:
+ case PIPE_FORMAT_I8_UNORM:
+ return TRUE;
+ default:
+ break;
+ }
+ }
+
+ return FALSE;
+}
+
+static void *
+nv20_surface_map(struct pipe_screen *screen, struct pipe_surface *surface,
+ unsigned flags )
+{
+ struct pipe_winsys *ws = screen->winsys;
+ void *map;
+
+ map = ws->buffer_map(ws, surface->buffer, flags);
+ if (!map)
+ return NULL;
+
+ return map + surface->offset;
+}
+
+static void
+nv20_surface_unmap(struct pipe_screen *screen, struct pipe_surface *surface)
+{
+ struct pipe_winsys *ws = screen->winsys;
+
+ ws->buffer_unmap(ws, surface->buffer);
+}
+
+static void
+nv20_screen_destroy(struct pipe_screen *pscreen)
+{
+ struct nv20_screen *screen = nv20_screen(pscreen);
+ struct nouveau_winsys *nvws = screen->nvws;
+
+ nvws->notifier_free(&screen->sync);
+ nvws->grobj_free(&screen->kelvin);
+
+ FREE(pscreen);
+}
+
+struct pipe_screen *
+nv20_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws)
+{
+ struct nv20_screen *screen = CALLOC_STRUCT(nv20_screen);
+ unsigned kelvin_class = 0;
+ unsigned chipset = nvws->channel->device->chipset;
+ int ret;
+
+ if (!screen)
+ return NULL;
+ screen->nvws = nvws;
+
+ /* 3D object */
+ if (chipset >= 0x25)
+ kelvin_class = NV25TCL;
+ else if (chipset >= 0x20)
+ kelvin_class = NV20TCL;
+
+ if (!kelvin_class || chipset >= 0x30) {
+ NOUVEAU_ERR("Unknown nv2x chipset: nv%02x\n", chipset);
+ return NULL;
+ }
+
+ ret = nvws->grobj_alloc(nvws, kelvin_class, &screen->kelvin);
+ if (ret) {
+ NOUVEAU_ERR("Error creating 3D object: %d\n", ret);
+ return FALSE;
+ }
+
+ /* Notifier for sync purposes */
+ ret = nvws->notifier_alloc(nvws, 1, &screen->sync);
+ if (ret) {
+ NOUVEAU_ERR("Error creating notifier object: %d\n", ret);
+ nv20_screen_destroy(&screen->pipe);
+ return NULL;
+ }
+
+ screen->pipe.winsys = ws;
+ screen->pipe.destroy = nv20_screen_destroy;
+
+ screen->pipe.get_name = nv20_screen_get_name;
+ screen->pipe.get_vendor = nv20_screen_get_vendor;
+ screen->pipe.get_param = nv20_screen_get_param;
+ screen->pipe.get_paramf = nv20_screen_get_paramf;
+
+ screen->pipe.is_format_supported = nv20_screen_is_format_supported;
+
+ screen->pipe.surface_map = nv20_surface_map;
+ screen->pipe.surface_unmap = nv20_surface_unmap;
+
+ nv20_screen_init_miptree_functions(&screen->pipe);
+
+ return &screen->pipe;
+}
+
diff --git a/src/gallium/drivers/nv20/nv20_screen.h b/src/gallium/drivers/nv20/nv20_screen.h
new file mode 100644
index 00000000000..8f2f2e341db
--- /dev/null
+++ b/src/gallium/drivers/nv20/nv20_screen.h
@@ -0,0 +1,22 @@
+#ifndef __NV20_SCREEN_H__
+#define __NV20_SCREEN_H__
+
+#include "pipe/p_screen.h"
+
+struct nv20_screen {
+ struct pipe_screen pipe;
+
+ struct nouveau_winsys *nvws;
+
+ /* HW graphics objects */
+ struct nouveau_grobj *kelvin;
+ struct nouveau_notifier *sync;
+};
+
+static INLINE struct nv20_screen *
+nv20_screen(struct pipe_screen *screen)
+{
+ return (struct nv20_screen *)screen;
+}
+
+#endif
diff --git a/src/gallium/drivers/nv20/nv20_state.c b/src/gallium/drivers/nv20/nv20_state.c
new file mode 100644
index 00000000000..21bde5b81f9
--- /dev/null
+++ b/src/gallium/drivers/nv20/nv20_state.c
@@ -0,0 +1,581 @@
+#include "draw/draw_context.h"
+#include "pipe/p_state.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_shader_tokens.h"
+
+#include "tgsi/tgsi_parse.h"
+
+#include "nv20_context.h"
+#include "nv20_state.h"
+
+static void *
+nv20_blend_state_create(struct pipe_context *pipe,
+ const struct pipe_blend_state *cso)
+{
+ struct nv20_blend_state *cb;
+
+ cb = MALLOC(sizeof(struct nv20_blend_state));
+
+ cb->b_enable = cso->blend_enable ? 1 : 0;
+ cb->b_srcfunc = ((nvgl_blend_func(cso->alpha_src_factor)<<16) |
+ (nvgl_blend_func(cso->rgb_src_factor)));
+ cb->b_dstfunc = ((nvgl_blend_func(cso->alpha_dst_factor)<<16) |
+ (nvgl_blend_func(cso->rgb_dst_factor)));
+
+ cb->c_mask = (((cso->colormask & PIPE_MASK_A) ? (0x01<<24) : 0) |
+ ((cso->colormask & PIPE_MASK_R) ? (0x01<<16) : 0) |
+ ((cso->colormask & PIPE_MASK_G) ? (0x01<< 8) : 0) |
+ ((cso->colormask & PIPE_MASK_B) ? (0x01<< 0) : 0));
+
+ cb->d_enable = cso->dither ? 1 : 0;
+
+ return (void *)cb;
+}
+
+static void
+nv20_blend_state_bind(struct pipe_context *pipe, void *blend)
+{
+ struct nv20_context *nv20 = nv20_context(pipe);
+
+ nv20->blend = (struct nv20_blend_state*)blend;
+
+ nv20->dirty |= NV20_NEW_BLEND;
+}
+
+static void
+nv20_blend_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ FREE(hwcso);
+}
+
+
+static INLINE unsigned
+wrap_mode(unsigned wrap) {
+ unsigned ret;
+
+ switch (wrap) {
+ case PIPE_TEX_WRAP_REPEAT:
+ ret = NV20TCL_TX_WRAP_S_REPEAT;
+ break;
+ case PIPE_TEX_WRAP_MIRROR_REPEAT:
+ ret = NV20TCL_TX_WRAP_S_MIRRORED_REPEAT;
+ break;
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+ ret = NV20TCL_TX_WRAP_S_CLAMP_TO_EDGE;
+ break;
+ case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+ ret = NV20TCL_TX_WRAP_S_CLAMP_TO_BORDER;
+ break;
+ case PIPE_TEX_WRAP_CLAMP:
+ ret = NV20TCL_TX_WRAP_S_CLAMP;
+ break;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP:
+ default:
+ NOUVEAU_ERR("unknown wrap mode: %d\n", wrap);
+ ret = NV20TCL_TX_WRAP_S_REPEAT;
+ break;
+ }
+
+ return (ret >> NV20TCL_TX_WRAP_S_SHIFT);
+}
+
+static void *
+nv20_sampler_state_create(struct pipe_context *pipe,
+ const struct pipe_sampler_state *cso)
+{
+ struct nv20_sampler_state *ps;
+ uint32_t filter = 0;
+
+ ps = MALLOC(sizeof(struct nv20_sampler_state));
+
+ ps->wrap = ((wrap_mode(cso->wrap_s) << NV20TCL_TX_WRAP_S_SHIFT) |
+ (wrap_mode(cso->wrap_t) << NV20TCL_TX_WRAP_T_SHIFT));
+
+ ps->en = 0;
+ if (cso->max_anisotropy > 1.0) {
+ /* no idea, binary driver sets it, works without it.. meh.. */
+ ps->wrap |= (1 << 5);
+
+/* if (cso->max_anisotropy >= 8.0) {
+ ps->en |= NV20TCL_TX_ENABLE_ANISO_8X;
+ } else
+ if (cso->max_anisotropy >= 4.0) {
+ ps->en |= NV20TCL_TX_ENABLE_ANISO_4X;
+ } else {
+ ps->en |= NV20TCL_TX_ENABLE_ANISO_2X;
+ }*/
+ }
+
+ switch (cso->mag_img_filter) {
+ case PIPE_TEX_FILTER_LINEAR:
+ filter |= NV20TCL_TX_FILTER_MAGNIFY_LINEAR;
+ break;
+ case PIPE_TEX_FILTER_NEAREST:
+ default:
+ filter |= NV20TCL_TX_FILTER_MAGNIFY_NEAREST;
+ break;
+ }
+
+ switch (cso->min_img_filter) {
+ case PIPE_TEX_FILTER_LINEAR:
+ switch (cso->min_mip_filter) {
+ case PIPE_TEX_MIPFILTER_NEAREST:
+ filter |=
+ NV20TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST;
+ break;
+ case PIPE_TEX_MIPFILTER_LINEAR:
+ filter |= NV20TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR;
+ break;
+ case PIPE_TEX_MIPFILTER_NONE:
+ default:
+ filter |= NV20TCL_TX_FILTER_MINIFY_LINEAR;
+ break;
+ }
+ break;
+ case PIPE_TEX_FILTER_NEAREST:
+ default:
+ switch (cso->min_mip_filter) {
+ case PIPE_TEX_MIPFILTER_NEAREST:
+ filter |=
+ NV20TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST;
+ break;
+ case PIPE_TEX_MIPFILTER_LINEAR:
+ filter |=
+ NV20TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR;
+ break;
+ case PIPE_TEX_MIPFILTER_NONE:
+ default:
+ filter |= NV20TCL_TX_FILTER_MINIFY_NEAREST;
+ break;
+ }
+ break;
+ }
+
+ ps->filt = filter;
+
+/* if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
+ switch (cso->compare_func) {
+ case PIPE_FUNC_NEVER:
+ ps->wrap |= NV10TCL_TX_WRAP_RCOMP_NEVER;
+ break;
+ case PIPE_FUNC_GREATER:
+ ps->wrap |= NV10TCL_TX_WRAP_RCOMP_GREATER;
+ break;
+ case PIPE_FUNC_EQUAL:
+ ps->wrap |= NV10TCL_TX_WRAP_RCOMP_EQUAL;
+ break;
+ case PIPE_FUNC_GEQUAL:
+ ps->wrap |= NV10TCL_TX_WRAP_RCOMP_GEQUAL;
+ break;
+ case PIPE_FUNC_LESS:
+ ps->wrap |= NV10TCL_TX_WRAP_RCOMP_LESS;
+ break;
+ case PIPE_FUNC_NOTEQUAL:
+ ps->wrap |= NV10TCL_TX_WRAP_RCOMP_NOTEQUAL;
+ break;
+ case PIPE_FUNC_LEQUAL:
+ ps->wrap |= NV10TCL_TX_WRAP_RCOMP_LEQUAL;
+ break;
+ case PIPE_FUNC_ALWAYS:
+ ps->wrap |= NV10TCL_TX_WRAP_RCOMP_ALWAYS;
+ break;
+ default:
+ break;
+ }
+ }*/
+
+ ps->bcol = ((float_to_ubyte(cso->border_color[3]) << 24) |
+ (float_to_ubyte(cso->border_color[0]) << 16) |
+ (float_to_ubyte(cso->border_color[1]) << 8) |
+ (float_to_ubyte(cso->border_color[2]) << 0));
+
+ return (void *)ps;
+}
+
+static void
+nv20_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler)
+{
+ struct nv20_context *nv20 = nv20_context(pipe);
+ unsigned unit;
+
+ for (unit = 0; unit < nr; unit++) {
+ nv20->tex_sampler[unit] = sampler[unit];
+ nv20->dirty_samplers |= (1 << unit);
+ }
+}
+
+static void
+nv20_sampler_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ FREE(hwcso);
+}
+
+static void
+nv20_set_sampler_texture(struct pipe_context *pipe, unsigned nr,
+ struct pipe_texture **miptree)
+{
+ struct nv20_context *nv20 = nv20_context(pipe);
+ unsigned unit;
+
+ for (unit = 0; unit < nr; unit++) {
+ nv20->tex_miptree[unit] = (struct nv20_miptree *)miptree[unit];
+ nv20->dirty_samplers |= (1 << unit);
+ }
+}
+
+static void *
+nv20_rasterizer_state_create(struct pipe_context *pipe,
+ const struct pipe_rasterizer_state *cso)
+{
+ struct nv20_rasterizer_state *rs;
+ int i;
+
+ /*XXX: ignored:
+ * light_twoside
+ * offset_cw/ccw -nohw
+ * scissor
+ * point_smooth -nohw
+ * multisample
+ * offset_units / offset_scale
+ */
+ rs = MALLOC(sizeof(struct nv20_rasterizer_state));
+
+ rs->templ = cso;
+
+ rs->shade_model = cso->flatshade ? NV20TCL_SHADE_MODEL_FLAT :
+ NV20TCL_SHADE_MODEL_SMOOTH;
+
+ rs->line_width = (unsigned char)(cso->line_width * 8.0) & 0xff;
+ rs->line_smooth_en = cso->line_smooth ? 1 : 0;
+
+ /* XXX: nv20 and nv25 different! */
+ rs->point_size = *(uint32_t*)&cso->point_size;
+
+ rs->poly_smooth_en = cso->poly_smooth ? 1 : 0;
+
+ if (cso->front_winding == PIPE_WINDING_CCW) {
+ rs->front_face = NV20TCL_FRONT_FACE_CCW;
+ rs->poly_mode_front = nvgl_polygon_mode(cso->fill_ccw);
+ rs->poly_mode_back = nvgl_polygon_mode(cso->fill_cw);
+ } else {
+ rs->front_face = NV20TCL_FRONT_FACE_CW;
+ rs->poly_mode_front = nvgl_polygon_mode(cso->fill_cw);
+ rs->poly_mode_back = nvgl_polygon_mode(cso->fill_ccw);
+ }
+
+ switch (cso->cull_mode) {
+ case PIPE_WINDING_CCW:
+ rs->cull_face_en = 1;
+ if (cso->front_winding == PIPE_WINDING_CCW)
+ rs->cull_face = NV20TCL_CULL_FACE_FRONT;
+ else
+ rs->cull_face = NV20TCL_CULL_FACE_BACK;
+ break;
+ case PIPE_WINDING_CW:
+ rs->cull_face_en = 1;
+ if (cso->front_winding == PIPE_WINDING_CW)
+ rs->cull_face = NV20TCL_CULL_FACE_FRONT;
+ else
+ rs->cull_face = NV20TCL_CULL_FACE_BACK;
+ break;
+ case PIPE_WINDING_BOTH:
+ rs->cull_face_en = 1;
+ rs->cull_face = NV20TCL_CULL_FACE_FRONT_AND_BACK;
+ break;
+ case PIPE_WINDING_NONE:
+ default:
+ rs->cull_face_en = 0;
+ rs->cull_face = 0;
+ break;
+ }
+
+ if (cso->point_sprite) {
+ rs->point_sprite = (1 << 0);
+ for (i = 0; i < 8; i++) {
+ if (cso->sprite_coord_mode[i] != PIPE_SPRITE_COORD_NONE)
+ rs->point_sprite |= (1 << (8 + i));
+ }
+ } else {
+ rs->point_sprite = 0;
+ }
+
+ return (void *)rs;
+}
+
+static void
+nv20_rasterizer_state_bind(struct pipe_context *pipe, void *rast)
+{
+ struct nv20_context *nv20 = nv20_context(pipe);
+
+ nv20->rast = (struct nv20_rasterizer_state*)rast;
+
+ draw_set_rasterizer_state(nv20->draw, (nv20->rast ? nv20->rast->templ : NULL));
+
+ nv20->dirty |= NV20_NEW_RAST;
+}
+
+static void
+nv20_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ FREE(hwcso);
+}
+
+static void *
+nv20_depth_stencil_alpha_state_create(struct pipe_context *pipe,
+ const struct pipe_depth_stencil_alpha_state *cso)
+{
+ struct nv20_depth_stencil_alpha_state *hw;
+
+ hw = MALLOC(sizeof(struct nv20_depth_stencil_alpha_state));
+
+ hw->depth.func = nvgl_comparison_op(cso->depth.func);
+ hw->depth.write_enable = cso->depth.writemask ? 1 : 0;
+ hw->depth.test_enable = cso->depth.enabled ? 1 : 0;
+
+ hw->stencil.enable = cso->stencil[0].enabled ? 1 : 0;
+ hw->stencil.wmask = cso->stencil[0].write_mask;
+ hw->stencil.func = nvgl_comparison_op(cso->stencil[0].func);
+ hw->stencil.ref = cso->stencil[0].ref_value;
+ hw->stencil.vmask = cso->stencil[0].value_mask;
+ hw->stencil.fail = nvgl_stencil_op(cso->stencil[0].fail_op);
+ hw->stencil.zfail = nvgl_stencil_op(cso->stencil[0].zfail_op);
+ hw->stencil.zpass = nvgl_stencil_op(cso->stencil[0].zpass_op);
+
+ hw->alpha.enabled = cso->alpha.enabled ? 1 : 0;
+ hw->alpha.func = nvgl_comparison_op(cso->alpha.func);
+ hw->alpha.ref = float_to_ubyte(cso->alpha.ref);
+
+ return (void *)hw;
+}
+
+static void
+nv20_depth_stencil_alpha_state_bind(struct pipe_context *pipe, void *dsa)
+{
+ struct nv20_context *nv20 = nv20_context(pipe);
+
+ nv20->dsa = (struct nv20_depth_stencil_alpha_state*)dsa;
+
+ nv20->dirty |= NV20_NEW_DSA;
+}
+
+static void
+nv20_depth_stencil_alpha_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ FREE(hwcso);
+}
+
+static void *
+nv20_vp_state_create(struct pipe_context *pipe,
+ const struct pipe_shader_state *templ)
+{
+ struct nv20_context *nv20 = nv20_context(pipe);
+
+ return draw_create_vertex_shader(nv20->draw, templ);
+}
+
+static void
+nv20_vp_state_bind(struct pipe_context *pipe, void *shader)
+{
+ struct nv20_context *nv20 = nv20_context(pipe);
+
+ draw_bind_vertex_shader(nv20->draw, (struct draw_vertex_shader *) shader);
+
+ nv20->dirty |= NV20_NEW_VERTPROG;
+}
+
+static void
+nv20_vp_state_delete(struct pipe_context *pipe, void *shader)
+{
+ struct nv20_context *nv20 = nv20_context(pipe);
+
+ draw_delete_vertex_shader(nv20->draw, (struct draw_vertex_shader *) shader);
+}
+
+static void *
+nv20_fp_state_create(struct pipe_context *pipe,
+ const struct pipe_shader_state *cso)
+{
+ struct nv20_fragment_program *fp;
+
+ fp = CALLOC(1, sizeof(struct nv20_fragment_program));
+ fp->pipe.tokens = tgsi_dup_tokens(cso->tokens);
+
+ tgsi_scan_shader(cso->tokens, &fp->info);
+
+ return (void *)fp;
+}
+
+static void
+nv20_fp_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv20_context *nv20 = nv20_context(pipe);
+ struct nv20_fragment_program *fp = hwcso;
+
+ nv20->fragprog.current = fp;
+ nv20->dirty |= NV20_NEW_FRAGPROG;
+}
+
+static void
+nv20_fp_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv20_context *nv20 = nv20_context(pipe);
+ struct nv20_fragment_program *fp = hwcso;
+
+ nv20_fragprog_destroy(nv20, fp);
+ FREE((void*)fp->pipe.tokens);
+ FREE(fp);
+}
+
+static void
+nv20_set_blend_color(struct pipe_context *pipe,
+ const struct pipe_blend_color *bcol)
+{
+ struct nv20_context *nv20 = nv20_context(pipe);
+
+ nv20->blend_color = (struct pipe_blend_color*)bcol;
+
+ nv20->dirty |= NV20_NEW_BLENDCOL;
+}
+
+static void
+nv20_set_clip_state(struct pipe_context *pipe,
+ const struct pipe_clip_state *clip)
+{
+ struct nv20_context *nv20 = nv20_context(pipe);
+
+ draw_set_clip_state(nv20->draw, clip);
+}
+
+static void
+nv20_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
+ const struct pipe_constant_buffer *buf )
+{
+ struct nv20_context *nv20 = nv20_context(pipe);
+ struct pipe_winsys *ws = pipe->winsys;
+
+ assert(shader < PIPE_SHADER_TYPES);
+ assert(index == 0);
+
+ if (buf) {
+ void *mapped;
+ if (buf->size && (mapped = ws->buffer_map(ws, buf->buffer, PIPE_BUFFER_USAGE_CPU_READ)))
+ {
+ memcpy(nv20->constbuf[shader], mapped, buf->size);
+ nv20->constbuf_nr[shader] =
+ buf->size / (4 * sizeof(float));
+ ws->buffer_unmap(ws, buf->buffer);
+ }
+ }
+}
+
+static void
+nv20_set_framebuffer_state(struct pipe_context *pipe,
+ const struct pipe_framebuffer_state *fb)
+{
+ struct nv20_context *nv20 = nv20_context(pipe);
+
+ nv20->framebuffer = (struct pipe_framebuffer_state*)fb;
+
+ nv20->dirty |= NV20_NEW_FRAMEBUFFER;
+}
+
+static void
+nv20_set_polygon_stipple(struct pipe_context *pipe,
+ const struct pipe_poly_stipple *stipple)
+{
+ NOUVEAU_ERR("line stipple hahaha\n");
+}
+
+static void
+nv20_set_scissor_state(struct pipe_context *pipe,
+ const struct pipe_scissor_state *s)
+{
+ struct nv20_context *nv20 = nv20_context(pipe);
+
+ nv20->scissor = (struct pipe_scissor_state*)s;
+
+ nv20->dirty |= NV20_NEW_SCISSOR;
+}
+
+static void
+nv20_set_viewport_state(struct pipe_context *pipe,
+ const struct pipe_viewport_state *vpt)
+{
+ struct nv20_context *nv20 = nv20_context(pipe);
+
+ nv20->viewport = (struct pipe_viewport_state*)vpt;
+
+ draw_set_viewport_state(nv20->draw, nv20->viewport);
+
+ nv20->dirty |= NV20_NEW_VIEWPORT;
+}
+
+static void
+nv20_set_vertex_buffers(struct pipe_context *pipe, unsigned count,
+ const struct pipe_vertex_buffer *vb)
+{
+ struct nv20_context *nv20 = nv20_context(pipe);
+
+ memcpy(nv20->vtxbuf, vb, sizeof(*vb) * count);
+ nv20->dirty |= NV20_NEW_VTXARRAYS;
+
+ draw_set_vertex_buffers(nv20->draw, count, vb);
+}
+
+static void
+nv20_set_vertex_elements(struct pipe_context *pipe, unsigned count,
+ const struct pipe_vertex_element *ve)
+{
+ struct nv20_context *nv20 = nv20_context(pipe);
+
+ memcpy(nv20->vtxelt, ve, sizeof(*ve) * count);
+ nv20->dirty |= NV20_NEW_VTXARRAYS;
+
+ draw_set_vertex_elements(nv20->draw, count, ve);
+}
+
+void
+nv20_init_state_functions(struct nv20_context *nv20)
+{
+ nv20->pipe.create_blend_state = nv20_blend_state_create;
+ nv20->pipe.bind_blend_state = nv20_blend_state_bind;
+ nv20->pipe.delete_blend_state = nv20_blend_state_delete;
+
+ nv20->pipe.create_sampler_state = nv20_sampler_state_create;
+ nv20->pipe.bind_sampler_states = nv20_sampler_state_bind;
+ nv20->pipe.delete_sampler_state = nv20_sampler_state_delete;
+ nv20->pipe.set_sampler_textures = nv20_set_sampler_texture;
+
+ nv20->pipe.create_rasterizer_state = nv20_rasterizer_state_create;
+ nv20->pipe.bind_rasterizer_state = nv20_rasterizer_state_bind;
+ nv20->pipe.delete_rasterizer_state = nv20_rasterizer_state_delete;
+
+ nv20->pipe.create_depth_stencil_alpha_state =
+ nv20_depth_stencil_alpha_state_create;
+ nv20->pipe.bind_depth_stencil_alpha_state =
+ nv20_depth_stencil_alpha_state_bind;
+ nv20->pipe.delete_depth_stencil_alpha_state =
+ nv20_depth_stencil_alpha_state_delete;
+
+ nv20->pipe.create_vs_state = nv20_vp_state_create;
+ nv20->pipe.bind_vs_state = nv20_vp_state_bind;
+ nv20->pipe.delete_vs_state = nv20_vp_state_delete;
+
+ nv20->pipe.create_fs_state = nv20_fp_state_create;
+ nv20->pipe.bind_fs_state = nv20_fp_state_bind;
+ nv20->pipe.delete_fs_state = nv20_fp_state_delete;
+
+ nv20->pipe.set_blend_color = nv20_set_blend_color;
+ nv20->pipe.set_clip_state = nv20_set_clip_state;
+ nv20->pipe.set_constant_buffer = nv20_set_constant_buffer;
+ nv20->pipe.set_framebuffer_state = nv20_set_framebuffer_state;
+ nv20->pipe.set_polygon_stipple = nv20_set_polygon_stipple;
+ nv20->pipe.set_scissor_state = nv20_set_scissor_state;
+ nv20->pipe.set_viewport_state = nv20_set_viewport_state;
+
+ nv20->pipe.set_vertex_buffers = nv20_set_vertex_buffers;
+ nv20->pipe.set_vertex_elements = nv20_set_vertex_elements;
+}
+
diff --git a/src/gallium/drivers/nv20/nv20_state.h b/src/gallium/drivers/nv20/nv20_state.h
new file mode 100644
index 00000000000..34f402fdcbf
--- /dev/null
+++ b/src/gallium/drivers/nv20/nv20_state.h
@@ -0,0 +1,139 @@
+#ifndef __NV20_STATE_H__
+#define __NV20_STATE_H__
+
+#include "pipe/p_state.h"
+#include "tgsi/tgsi_scan.h"
+
+struct nv20_blend_state {
+ uint32_t b_enable;
+ uint32_t b_srcfunc;
+ uint32_t b_dstfunc;
+
+ uint32_t c_mask;
+
+ uint32_t d_enable;
+};
+
+struct nv20_sampler_state {
+ uint32_t wrap;
+ uint32_t en;
+ uint32_t filt;
+ uint32_t bcol;
+};
+
+struct nv20_rasterizer_state {
+ uint32_t shade_model;
+
+ uint32_t line_width;
+ uint32_t line_smooth_en;
+
+ uint32_t point_size;
+
+ uint32_t poly_smooth_en;
+
+ uint32_t poly_mode_front;
+ uint32_t poly_mode_back;
+
+ uint32_t front_face;
+ uint32_t cull_face;
+ uint32_t cull_face_en;
+
+ uint32_t point_sprite;
+
+ const struct pipe_rasterizer_state *templ;
+};
+
+struct nv20_vertex_program_exec {
+ uint32_t data[4];
+ boolean has_branch_offset;
+ int const_index;
+};
+
+struct nv20_vertex_program_data {
+ int index; /* immediates == -1 */
+ float value[4];
+};
+
+struct nv20_vertex_program {
+ const struct pipe_shader_state *pipe;
+
+ boolean translated;
+ struct nv20_vertex_program_exec *insns;
+ unsigned nr_insns;
+ struct nv20_vertex_program_data *consts;
+ unsigned nr_consts;
+
+ struct nouveau_resource *exec;
+ unsigned exec_start;
+ struct nouveau_resource *data;
+ unsigned data_start;
+ unsigned data_start_min;
+
+ uint32_t ir;
+ uint32_t or;
+};
+
+struct nv20_fragment_program_data {
+ unsigned offset;
+ unsigned index;
+};
+
+struct nv20_fragment_program {
+ struct pipe_shader_state pipe;
+ struct tgsi_shader_info info;
+
+ boolean translated;
+ boolean on_hw;
+ unsigned samplers;
+
+ uint32_t *insn;
+ int insn_len;
+
+ struct nv20_fragment_program_data *consts;
+ unsigned nr_consts;
+
+ struct pipe_buffer *buffer;
+
+ uint32_t fp_control;
+ uint32_t fp_reg_control;
+};
+
+
+struct nv20_depth_stencil_alpha_state {
+ struct {
+ uint32_t func;
+ uint32_t write_enable;
+ uint32_t test_enable;
+ } depth;
+
+ struct {
+ uint32_t enable;
+ uint32_t wmask;
+ uint32_t func;
+ uint32_t ref;
+ uint32_t vmask;
+ uint32_t fail;
+ uint32_t zfail;
+ uint32_t zpass;
+ } stencil;
+
+ struct {
+ uint32_t enabled;
+ uint32_t func;
+ uint32_t ref;
+ } alpha;
+};
+
+struct nv20_miptree {
+ struct pipe_texture base;
+
+ struct pipe_buffer *buffer;
+ uint total_size;
+
+ struct {
+ uint pitch;
+ uint *image_offset;
+ } level[PIPE_MAX_TEXTURE_LEVELS];
+};
+
+#endif
diff --git a/src/gallium/drivers/nv20/nv20_state_emit.c b/src/gallium/drivers/nv20/nv20_state_emit.c
new file mode 100644
index 00000000000..5265bf3a314
--- /dev/null
+++ b/src/gallium/drivers/nv20/nv20_state_emit.c
@@ -0,0 +1,359 @@
+#include "nv20_context.h"
+#include "nv20_state.h"
+#include "draw/draw_context.h"
+
+static void nv20_state_emit_blend(struct nv20_context* nv20)
+{
+ struct nv20_blend_state *b = nv20->blend;
+
+ BEGIN_RING(kelvin, NV20TCL_DITHER_ENABLE, 1);
+ OUT_RING (b->d_enable);
+
+ BEGIN_RING(kelvin, NV20TCL_BLEND_FUNC_ENABLE, 1);
+ OUT_RING (b->b_enable);
+
+ BEGIN_RING(kelvin, NV20TCL_BLEND_FUNC_SRC, 2);
+ OUT_RING (b->b_srcfunc);
+ OUT_RING (b->b_dstfunc);
+
+ BEGIN_RING(kelvin, NV20TCL_COLOR_MASK, 1);
+ OUT_RING (b->c_mask);
+}
+
+static void nv20_state_emit_blend_color(struct nv20_context* nv20)
+{
+ struct pipe_blend_color *c = nv20->blend_color;
+
+ BEGIN_RING(kelvin, NV20TCL_BLEND_COLOR, 1);
+ OUT_RING ((float_to_ubyte(c->color[3]) << 24)|
+ (float_to_ubyte(c->color[0]) << 16)|
+ (float_to_ubyte(c->color[1]) << 8) |
+ (float_to_ubyte(c->color[2]) << 0));
+}
+
+static void nv20_state_emit_rast(struct nv20_context* nv20)
+{
+ struct nv20_rasterizer_state *r = nv20->rast;
+
+ BEGIN_RING(kelvin, NV20TCL_SHADE_MODEL, 2);
+ OUT_RING (r->shade_model);
+ OUT_RING (r->line_width);
+
+
+ BEGIN_RING(kelvin, NV20TCL_POINT_SIZE, 1);
+ OUT_RING (r->point_size);
+
+ BEGIN_RING(kelvin, NV20TCL_POLYGON_MODE_FRONT, 2);
+ OUT_RING (r->poly_mode_front);
+ OUT_RING (r->poly_mode_back);
+
+
+ BEGIN_RING(kelvin, NV20TCL_CULL_FACE, 2);
+ OUT_RING (r->cull_face);
+ OUT_RING (r->front_face);
+
+ BEGIN_RING(kelvin, NV20TCL_LINE_SMOOTH_ENABLE, 2);
+ OUT_RING (r->line_smooth_en);
+ OUT_RING (r->poly_smooth_en);
+
+ BEGIN_RING(kelvin, NV20TCL_CULL_FACE_ENABLE, 1);
+ OUT_RING (r->cull_face_en);
+}
+
+static void nv20_state_emit_dsa(struct nv20_context* nv20)
+{
+ struct nv20_depth_stencil_alpha_state *d = nv20->dsa;
+
+ BEGIN_RING(kelvin, NV20TCL_DEPTH_FUNC, 1);
+ OUT_RING (d->depth.func);
+
+ BEGIN_RING(kelvin, NV20TCL_DEPTH_WRITE_ENABLE, 1);
+ OUT_RING (d->depth.write_enable);
+
+ BEGIN_RING(kelvin, NV20TCL_DEPTH_TEST_ENABLE, 1);
+ OUT_RING (d->depth.test_enable);
+
+#if 0
+ BEGIN_RING(kelvin, NV20TCL_STENCIL_ENABLE, 1);
+ OUT_RING (d->stencil.enable);
+ BEGIN_RING(kelvin, NV20TCL_STENCIL_MASK, 7);
+ OUT_RINGp ((uint32_t *)&(d->stencil.wmask), 7);
+#endif
+
+ BEGIN_RING(kelvin, NV20TCL_ALPHA_FUNC_ENABLE, 1);
+ OUT_RING (d->alpha.enabled);
+
+ BEGIN_RING(kelvin, NV20TCL_ALPHA_FUNC_FUNC, 1);
+ OUT_RING (d->alpha.func);
+
+ BEGIN_RING(kelvin, NV20TCL_ALPHA_FUNC_REF, 1);
+ OUT_RING (d->alpha.ref);
+}
+
+static void nv20_state_emit_viewport(struct nv20_context* nv20)
+{
+}
+
+static void nv20_state_emit_scissor(struct nv20_context* nv20)
+{
+ /* NV20TCL_SCISSOR_* is probably a software method */
+/* struct pipe_scissor_state *s = nv20->scissor;
+ BEGIN_RING(kelvin, NV20TCL_SCISSOR_HORIZ, 2);
+ OUT_RING (((s->maxx - s->minx) << 16) | s->minx);
+ OUT_RING (((s->maxy - s->miny) << 16) | s->miny);*/
+}
+
+static void nv20_state_emit_framebuffer(struct nv20_context* nv20)
+{
+ struct pipe_framebuffer_state* fb = nv20->framebuffer;
+ struct pipe_surface *rt, *zeta = NULL;
+ uint32_t rt_format, w, h;
+ int colour_format = 0, zeta_format = 0;
+
+ w = fb->cbufs[0]->width;
+ h = fb->cbufs[0]->height;
+ colour_format = fb->cbufs[0]->format;
+ rt = fb->cbufs[0];
+
+ if (fb->zsbuf) {
+ if (colour_format) {
+ assert(w == fb->zsbuf->width);
+ assert(h == fb->zsbuf->height);
+ } else {
+ w = fb->zsbuf->width;
+ h = fb->zsbuf->height;
+ }
+
+ zeta_format = fb->zsbuf->format;
+ zeta = fb->zsbuf;
+ }
+
+ rt_format = NV20TCL_RT_FORMAT_TYPE_LINEAR | 0x20;
+
+ switch (colour_format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case 0:
+ rt_format |= NV20TCL_RT_FORMAT_COLOR_A8R8G8B8;
+ break;
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ rt_format |= NV20TCL_RT_FORMAT_COLOR_R5G6B5;
+ break;
+ default:
+ assert(0);
+ }
+
+ if (zeta) {
+ BEGIN_RING(kelvin, NV20TCL_RT_PITCH, 1);
+ OUT_RING (rt->stride | (zeta->stride << 16));
+ } else {
+ BEGIN_RING(kelvin, NV20TCL_RT_PITCH, 1);
+ OUT_RING (rt->stride | (rt->stride << 16));
+ }
+
+ nv20->rt[0] = rt->buffer;
+
+ if (zeta_format)
+ {
+ nv20->zeta = zeta->buffer;
+ }
+
+ BEGIN_RING(kelvin, NV20TCL_RT_HORIZ, 3);
+ OUT_RING ((w << 16) | 0);
+ OUT_RING ((h << 16) | 0);
+ OUT_RING (rt_format);
+ BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_HORIZ(0), 2);
+ OUT_RING (((w - 1) << 16) | 0);
+ OUT_RING (((h - 1) << 16) | 0);
+}
+
+static void nv20_vertex_layout(struct nv20_context *nv20)
+{
+ struct nv20_fragment_program *fp = nv20->fragprog.current;
+ struct draw_context *dc = nv20->draw;
+ uint32_t src;
+ int i;
+ struct vertex_info *vinfo = &nv20->vertex_info;
+ const enum interp_mode colorInterp = INTERP_LINEAR;
+ boolean colors[2] = { FALSE };
+ boolean generics[4] = { FALSE };
+ boolean fog = FALSE;
+
+ memset(vinfo, 0, sizeof(*vinfo));
+
+ /*
+ * NV10 hardware vertex attribute order:
+ * fog, weight, normal, tex1, tex0, 2nd color, color, position
+ * vinfo->hwfmt[0] has a used-bit corresponding to each of these.
+ * relation to TGSI_SEMANTIC_*:
+ * - POSITION: position (always used)
+ * - COLOR: 2nd color, color
+ * - GENERIC: weight, normal, tex1, tex0
+ * - FOG: fog
+ */
+
+ for (i = 0; i < fp->info.num_inputs; i++) {
+ int isn = fp->info.input_semantic_name[i];
+ int isi = fp->info.input_semantic_index[i];
+ switch (isn) {
+ case TGSI_SEMANTIC_POSITION:
+ break;
+ case TGSI_SEMANTIC_COLOR:
+ assert(isi < 2);
+ colors[isi] = TRUE;
+ break;
+ case TGSI_SEMANTIC_GENERIC:
+ assert(isi < 4);
+ generics[isi] = TRUE;
+ break;
+ case TGSI_SEMANTIC_FOG:
+ fog = TRUE;
+ break;
+ default:
+ assert(0 && "unknown input_semantic_name");
+ }
+ }
+
+ if (fog) {
+ int src = draw_find_vs_output(dc, TGSI_SEMANTIC_FOG, 0);
+ draw_emit_vertex_attr(vinfo, EMIT_1F, INTERP_PERSPECTIVE, src);
+ vinfo->hwfmt[0] |= (1 << 7);
+ }
+
+ for (i = 3; i >= 0; i--) {
+ int src;
+ if (!generics[i])
+ continue;
+ src = draw_find_vs_output(dc, TGSI_SEMANTIC_GENERIC, i);
+ draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src);
+ vinfo->hwfmt[0] |= (1 << (i + 3));
+ }
+
+ if (colors[1]) {
+ int src = draw_find_vs_output(dc, TGSI_SEMANTIC_COLOR, 1);
+ draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src);
+ vinfo->hwfmt[0] |= (1 << 2);
+ }
+
+ if (colors[0]) {
+ int src = draw_find_vs_output(dc, TGSI_SEMANTIC_COLOR, 0);
+ draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src);
+ vinfo->hwfmt[0] |= (1 << 1);
+ }
+
+ /* always do position */
+ src = draw_find_vs_output(dc, TGSI_SEMANTIC_POSITION, 0);
+ draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_LINEAR, src);
+ vinfo->hwfmt[0] |= (1 << 0);
+
+ draw_compute_vertex_size(vinfo);
+}
+
+void
+nv20_emit_hw_state(struct nv20_context *nv20)
+{
+ int i;
+
+ if (nv20->dirty & NV20_NEW_VERTPROG) {
+ //nv20_vertprog_bind(nv20, nv20->vertprog.current);
+ nv20->dirty &= ~NV20_NEW_VERTPROG;
+ }
+
+ if (nv20->dirty & NV20_NEW_FRAGPROG) {
+ nv20_fragprog_bind(nv20, nv20->fragprog.current);
+ /*XXX: clear NV20_NEW_FRAGPROG if no new program uploaded */
+ nv20->dirty_samplers |= (1<<10);
+ nv20->dirty_samplers = 0;
+ }
+
+ if (nv20->dirty_samplers || (nv20->dirty & NV20_NEW_FRAGPROG)) {
+ nv20_fragtex_bind(nv20);
+ nv20->dirty &= ~NV20_NEW_FRAGPROG;
+ }
+
+ if (nv20->dirty & NV20_NEW_VTXARRAYS) {
+ nv20->dirty &= ~NV20_NEW_VTXARRAYS;
+ nv20_vertex_layout(nv20);
+ nv20_vtxbuf_bind(nv20);
+ }
+
+ if (nv20->dirty & NV20_NEW_BLEND) {
+ nv20->dirty &= ~NV20_NEW_BLEND;
+ nv20_state_emit_blend(nv20);
+ }
+
+ if (nv20->dirty & NV20_NEW_BLENDCOL) {
+ nv20->dirty &= ~NV20_NEW_BLENDCOL;
+ nv20_state_emit_blend_color(nv20);
+ }
+
+ if (nv20->dirty & NV20_NEW_RAST) {
+ nv20->dirty &= ~NV20_NEW_RAST;
+ nv20_state_emit_rast(nv20);
+ }
+
+ if (nv20->dirty & NV20_NEW_DSA) {
+ nv20->dirty &= ~NV20_NEW_DSA;
+ nv20_state_emit_dsa(nv20);
+ }
+
+ if (nv20->dirty & NV20_NEW_VIEWPORT) {
+ nv20->dirty &= ~NV20_NEW_VIEWPORT;
+ nv20_state_emit_viewport(nv20);
+ }
+
+ if (nv20->dirty & NV20_NEW_SCISSOR) {
+ nv20->dirty &= ~NV20_NEW_SCISSOR;
+ nv20_state_emit_scissor(nv20);
+ }
+
+ if (nv20->dirty & NV20_NEW_FRAMEBUFFER) {
+ nv20->dirty &= ~NV20_NEW_FRAMEBUFFER;
+ nv20_state_emit_framebuffer(nv20);
+ }
+
+ /* Emit relocs for every referenced buffer.
+ * This is to ensure the bufmgr has an accurate idea of how
+ * the buffer is used. This isn't very efficient, but we don't
+ * seem to take a significant performance hit. Will be improved
+ * at some point. Vertex arrays are emitted by nv20_vbo.c
+ */
+
+ /* Render target */
+/* XXX figre out who's who for NV10TCL_DMA_* and fill accordingly
+ * BEGIN_RING(kelvin, NV20TCL_DMA_COLOR0, 1);
+ * OUT_RELOCo(nv20->rt[0], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); */
+ BEGIN_RING(kelvin, NV20TCL_COLOR_OFFSET, 1);
+ OUT_RELOCl(nv20->rt[0], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+
+ if (nv20->zeta) {
+/* XXX
+ * BEGIN_RING(kelvin, NV20TCL_DMA_ZETA, 1);
+ * OUT_RELOCo(nv20->zeta, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); */
+ BEGIN_RING(kelvin, NV20TCL_ZETA_OFFSET, 1);
+ OUT_RELOCl(nv20->zeta, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ /* XXX for when we allocate LMA on nv17 */
+/* BEGIN_RING(kelvin, NV10TCL_LMA_DEPTH_BUFFER_OFFSET, 1);
+ OUT_RELOCl(nv20->zeta + lma_offset);*/
+ }
+
+ /* Vertex buffer */
+ BEGIN_RING(kelvin, NV20TCL_DMA_VTXBUF0, 1);
+ OUT_RELOCo(nv20->rt[0], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ BEGIN_RING(kelvin, NV20TCL_COLOR_OFFSET, 1);
+ OUT_RELOCl(nv20->rt[0], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+
+ /* Texture images */
+ for (i = 0; i < 2; i++) {
+ if (!(nv20->fp_samplers & (1 << i)))
+ continue;
+ BEGIN_RING(kelvin, NV20TCL_TX_OFFSET(i), 1);
+ OUT_RELOCl(nv20->tex[i].buffer, 0, NOUVEAU_BO_VRAM |
+ NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+ BEGIN_RING(kelvin, NV20TCL_TX_FORMAT(i), 1);
+ OUT_RELOCd(nv20->tex[i].buffer, nv20->tex[i].format,
+ NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD |
+ NOUVEAU_BO_OR, NV20TCL_TX_FORMAT_DMA0,
+ NV20TCL_TX_FORMAT_DMA1);
+ }
+}
+
diff --git a/src/gallium/drivers/nv20/nv20_surface.c b/src/gallium/drivers/nv20/nv20_surface.c
new file mode 100644
index 00000000000..7bc68d0ca26
--- /dev/null
+++ b/src/gallium/drivers/nv20/nv20_surface.c
@@ -0,0 +1,64 @@
+
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "nv20_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_winsys.h"
+#include "pipe/p_inlines.h"
+#include "util/u_tile.h"
+
+static void
+nv20_surface_copy(struct pipe_context *pipe, boolean do_flip,
+ struct pipe_surface *dest, unsigned destx, unsigned desty,
+ struct pipe_surface *src, unsigned srcx, unsigned srcy,
+ unsigned width, unsigned height)
+{
+ struct nv20_context *nv20 = nv20_context(pipe);
+ struct nouveau_winsys *nvws = nv20->nvws;
+
+ nvws->surface_copy(nvws, dest, destx, desty, src, srcx, srcy,
+ width, height);
+}
+
+static void
+nv20_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest,
+ unsigned destx, unsigned desty, unsigned width,
+ unsigned height, unsigned value)
+{
+ struct nv20_context *nv20 = nv20_context(pipe);
+ struct nouveau_winsys *nvws = nv20->nvws;
+
+ nvws->surface_fill(nvws, dest, destx, desty, width, height, value);
+}
+
+void
+nv20_init_surface_functions(struct nv20_context *nv20)
+{
+ nv20->pipe.surface_copy = nv20_surface_copy;
+ nv20->pipe.surface_fill = nv20_surface_fill;
+}
diff --git a/src/gallium/drivers/nv20/nv20_vbo.c b/src/gallium/drivers/nv20/nv20_vbo.c
new file mode 100644
index 00000000000..4edc4efebd8
--- /dev/null
+++ b/src/gallium/drivers/nv20/nv20_vbo.c
@@ -0,0 +1,77 @@
+#include "draw/draw_context.h"
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+
+#include "nv20_context.h"
+#include "nv20_state.h"
+
+#include "nouveau/nouveau_channel.h"
+#include "nouveau/nouveau_pushbuf.h"
+
+boolean nv20_draw_elements( struct pipe_context *pipe,
+ struct pipe_buffer *indexBuffer,
+ unsigned indexSize,
+ unsigned prim, unsigned start, unsigned count)
+{
+ struct nv20_context *nv20 = nv20_context( pipe );
+ struct draw_context *draw = nv20->draw;
+ unsigned i;
+
+ nv20_emit_hw_state(nv20);
+
+ /*
+ * Map vertex buffers
+ */
+ for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
+ if (nv20->vtxbuf[i].buffer) {
+ void *buf
+ = pipe->winsys->buffer_map(pipe->winsys,
+ nv20->vtxbuf[i].buffer,
+ PIPE_BUFFER_USAGE_CPU_READ);
+ draw_set_mapped_vertex_buffer(draw, i, buf);
+ }
+ }
+ /* Map index buffer, if present */
+ if (indexBuffer) {
+ void *mapped_indexes
+ = pipe->winsys->buffer_map(pipe->winsys, indexBuffer,
+ PIPE_BUFFER_USAGE_CPU_READ);
+ draw_set_mapped_element_buffer(draw, indexSize, mapped_indexes);
+ }
+ else {
+ /* no index/element buffer */
+ draw_set_mapped_element_buffer(draw, 0, NULL);
+ }
+
+ draw_set_mapped_constant_buffer(draw,
+ nv20->constbuf[PIPE_SHADER_VERTEX],
+ nv20->constbuf_nr[PIPE_SHADER_VERTEX]);
+
+ /* draw! */
+ draw_arrays(nv20->draw, prim, start, count);
+
+ /*
+ * unmap vertex/index buffers
+ */
+ for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
+ if (nv20->vtxbuf[i].buffer) {
+ pipe->winsys->buffer_unmap(pipe->winsys, nv20->vtxbuf[i].buffer);
+ draw_set_mapped_vertex_buffer(draw, i, NULL);
+ }
+ }
+ if (indexBuffer) {
+ pipe->winsys->buffer_unmap(pipe->winsys, indexBuffer);
+ draw_set_mapped_element_buffer(draw, 0, NULL);
+ }
+
+ return TRUE;
+}
+
+boolean nv20_draw_arrays( struct pipe_context *pipe,
+ unsigned prim, unsigned start, unsigned count)
+{
+ return nv20_draw_elements(pipe, NULL, 0, prim, start, count);
+}
+
+
+
diff --git a/src/gallium/drivers/nv20/nv20_vertprog.c b/src/gallium/drivers/nv20/nv20_vertprog.c
new file mode 100644
index 00000000000..a885fcd7a56
--- /dev/null
+++ b/src/gallium/drivers/nv20/nv20_vertprog.c
@@ -0,0 +1,838 @@
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+
+#include "pipe/p_shader_tokens.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_dump.h"
+
+#include "nv20_context.h"
+#include "nv20_state.h"
+
+/* TODO (at least...):
+ * 1. Indexed consts + ARL
+ * 2. Arb. swz/negation
+ * 3. NV_vp11, NV_vp2, NV_vp3 features
+ * - extra arith opcodes
+ * - branching
+ * - texture sampling
+ * - indexed attribs
+ * - indexed results
+ * 4. bugs
+ */
+
+#define SWZ_X 0
+#define SWZ_Y 1
+#define SWZ_Z 2
+#define SWZ_W 3
+#define MASK_X 8
+#define MASK_Y 4
+#define MASK_Z 2
+#define MASK_W 1
+#define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W)
+#define DEF_SCALE 0
+#define DEF_CTEST 0
+#include "nv20_shader.h"
+
+#define swz(s,x,y,z,w) nv20_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w)
+#define neg(s) nv20_sr_neg((s))
+#define abs(s) nv20_sr_abs((s))
+
+struct nv20_vpc {
+ struct nv20_vertex_program *vp;
+
+ struct nv20_vertex_program_exec *vpi;
+
+ unsigned output_map[PIPE_MAX_SHADER_OUTPUTS];
+
+ int high_temp;
+ int temp_temp_count;
+
+ struct nv20_sreg *imm;
+ unsigned nr_imm;
+};
+
+static struct nv20_sreg
+temp(struct nv20_vpc *vpc)
+{
+ int idx;
+
+ idx = vpc->temp_temp_count++;
+ idx += vpc->high_temp + 1;
+ return nv20_sr(NV30SR_TEMP, idx);
+}
+
+static struct nv20_sreg
+constant(struct nv20_vpc *vpc, int pipe, float x, float y, float z, float w)
+{
+ struct nv20_vertex_program *vp = vpc->vp;
+ struct nv20_vertex_program_data *vpd;
+ int idx;
+
+ if (pipe >= 0) {
+ for (idx = 0; idx < vp->nr_consts; idx++) {
+ if (vp->consts[idx].index == pipe)
+ return nv20_sr(NV30SR_CONST, idx);
+ }
+ }
+
+ idx = vp->nr_consts++;
+ vp->consts = realloc(vp->consts, sizeof(*vpd) * vp->nr_consts);
+ vpd = &vp->consts[idx];
+
+ vpd->index = pipe;
+ vpd->value[0] = x;
+ vpd->value[1] = y;
+ vpd->value[2] = z;
+ vpd->value[3] = w;
+ return nv20_sr(NV30SR_CONST, idx);
+}
+
+#define arith(cc,s,o,d,m,s0,s1,s2) \
+ nv20_vp_arith((cc), (s), NV30_VP_INST_##o, (d), (m), (s0), (s1), (s2))
+
+static void
+emit_src(struct nv20_vpc *vpc, uint32_t *hw, int pos, struct nv20_sreg src)
+{
+ struct nv20_vertex_program *vp = vpc->vp;
+ uint32_t sr = 0;
+
+ switch (src.type) {
+ case NV30SR_TEMP:
+ sr |= (NV30_VP_SRC_REG_TYPE_TEMP << NV30_VP_SRC_REG_TYPE_SHIFT);
+ sr |= (src.index << NV30_VP_SRC_TEMP_SRC_SHIFT);
+ break;
+ case NV30SR_INPUT:
+ sr |= (NV30_VP_SRC_REG_TYPE_INPUT <<
+ NV30_VP_SRC_REG_TYPE_SHIFT);
+ vp->ir |= (1 << src.index);
+ hw[1] |= (src.index << NV30_VP_INST_INPUT_SRC_SHIFT);
+ break;
+ case NV30SR_CONST:
+ sr |= (NV30_VP_SRC_REG_TYPE_CONST <<
+ NV30_VP_SRC_REG_TYPE_SHIFT);
+ assert(vpc->vpi->const_index == -1 ||
+ vpc->vpi->const_index == src.index);
+ vpc->vpi->const_index = src.index;
+ break;
+ case NV30SR_NONE:
+ sr |= (NV30_VP_SRC_REG_TYPE_INPUT <<
+ NV30_VP_SRC_REG_TYPE_SHIFT);
+ break;
+ default:
+ assert(0);
+ }
+
+ if (src.negate)
+ sr |= NV30_VP_SRC_NEGATE;
+
+ if (src.abs)
+ hw[0] |= (1 << (21 + pos));
+
+ sr |= ((src.swz[0] << NV30_VP_SRC_SWZ_X_SHIFT) |
+ (src.swz[1] << NV30_VP_SRC_SWZ_Y_SHIFT) |
+ (src.swz[2] << NV30_VP_SRC_SWZ_Z_SHIFT) |
+ (src.swz[3] << NV30_VP_SRC_SWZ_W_SHIFT));
+
+/*
+ * |VVV|
+ * d�.�b
+ * \u/
+ *
+ */
+
+ switch (pos) {
+ case 0:
+ hw[1] |= ((sr & NV30_VP_SRC0_HIGH_MASK) >>
+ NV30_VP_SRC0_HIGH_SHIFT) << NV30_VP_INST_SRC0H_SHIFT;
+ hw[2] |= (sr & NV30_VP_SRC0_LOW_MASK) <<
+ NV30_VP_INST_SRC0L_SHIFT;
+ break;
+ case 1:
+ hw[2] |= sr << NV30_VP_INST_SRC1_SHIFT;
+ break;
+ case 2:
+ hw[2] |= ((sr & NV30_VP_SRC2_HIGH_MASK) >>
+ NV30_VP_SRC2_HIGH_SHIFT) << NV30_VP_INST_SRC2H_SHIFT;
+ hw[3] |= (sr & NV30_VP_SRC2_LOW_MASK) <<
+ NV30_VP_INST_SRC2L_SHIFT;
+ break;
+ default:
+ assert(0);
+ }
+}
+
+static void
+emit_dst(struct nv20_vpc *vpc, uint32_t *hw, int slot, struct nv20_sreg dst)
+{
+ struct nv20_vertex_program *vp = vpc->vp;
+
+ switch (dst.type) {
+ case NV30SR_TEMP:
+ hw[0] |= (dst.index << NV30_VP_INST_DEST_TEMP_ID_SHIFT);
+ break;
+ case NV30SR_OUTPUT:
+ switch (dst.index) {
+ case NV30_VP_INST_DEST_COL0 : vp->or |= (1 << 0); break;
+ case NV30_VP_INST_DEST_COL1 : vp->or |= (1 << 1); break;
+ case NV30_VP_INST_DEST_BFC0 : vp->or |= (1 << 2); break;
+ case NV30_VP_INST_DEST_BFC1 : vp->or |= (1 << 3); break;
+ case NV30_VP_INST_DEST_FOGC : vp->or |= (1 << 4); break;
+ case NV30_VP_INST_DEST_PSZ : vp->or |= (1 << 5); break;
+ case NV30_VP_INST_DEST_TC(0): vp->or |= (1 << 14); break;
+ case NV30_VP_INST_DEST_TC(1): vp->or |= (1 << 15); break;
+ case NV30_VP_INST_DEST_TC(2): vp->or |= (1 << 16); break;
+ case NV30_VP_INST_DEST_TC(3): vp->or |= (1 << 17); break;
+ case NV30_VP_INST_DEST_TC(4): vp->or |= (1 << 18); break;
+ case NV30_VP_INST_DEST_TC(5): vp->or |= (1 << 19); break;
+ case NV30_VP_INST_DEST_TC(6): vp->or |= (1 << 20); break;
+ case NV30_VP_INST_DEST_TC(7): vp->or |= (1 << 21); break;
+ default:
+ break;
+ }
+
+ hw[3] |= (dst.index << NV30_VP_INST_DEST_SHIFT);
+ hw[0] |= NV30_VP_INST_VEC_DEST_TEMP_MASK | (1<<20);
+
+ /*XXX: no way this is entirely correct, someone needs to
+ * figure out what exactly it is.
+ */
+ hw[3] |= 0x800;
+ break;
+ default:
+ assert(0);
+ }
+}
+
+static void
+nv20_vp_arith(struct nv20_vpc *vpc, int slot, int op,
+ struct nv20_sreg dst, int mask,
+ struct nv20_sreg s0, struct nv20_sreg s1,
+ struct nv20_sreg s2)
+{
+ struct nv20_vertex_program *vp = vpc->vp;
+ uint32_t *hw;
+
+ vp->insns = realloc(vp->insns, ++vp->nr_insns * sizeof(*vpc->vpi));
+ vpc->vpi = &vp->insns[vp->nr_insns - 1];
+ memset(vpc->vpi, 0, sizeof(*vpc->vpi));
+ vpc->vpi->const_index = -1;
+
+ hw = vpc->vpi->data;
+
+ hw[0] |= (NV30_VP_INST_COND_TR << NV30_VP_INST_COND_SHIFT);
+ hw[0] |= ((0 << NV30_VP_INST_COND_SWZ_X_SHIFT) |
+ (1 << NV30_VP_INST_COND_SWZ_Y_SHIFT) |
+ (2 << NV30_VP_INST_COND_SWZ_Z_SHIFT) |
+ (3 << NV30_VP_INST_COND_SWZ_W_SHIFT));
+
+ hw[1] |= (op << NV30_VP_INST_VEC_OPCODE_SHIFT);
+// hw[3] |= NV30_VP_INST_SCA_DEST_TEMP_MASK;
+// hw[3] |= (mask << NV30_VP_INST_VEC_WRITEMASK_SHIFT);
+
+ if (dst.type == NV30SR_OUTPUT) {
+ if (slot)
+ hw[3] |= (mask << NV30_VP_INST_SDEST_WRITEMASK_SHIFT);
+ else
+ hw[3] |= (mask << NV30_VP_INST_VDEST_WRITEMASK_SHIFT);
+ } else {
+ if (slot)
+ hw[3] |= (mask << NV30_VP_INST_STEMP_WRITEMASK_SHIFT);
+ else
+ hw[3] |= (mask << NV30_VP_INST_VTEMP_WRITEMASK_SHIFT);
+ }
+
+ emit_dst(vpc, hw, slot, dst);
+ emit_src(vpc, hw, 0, s0);
+ emit_src(vpc, hw, 1, s1);
+ emit_src(vpc, hw, 2, s2);
+}
+
+static INLINE struct nv20_sreg
+tgsi_src(struct nv20_vpc *vpc, const struct tgsi_full_src_register *fsrc) {
+ struct nv20_sreg src;
+
+ switch (fsrc->SrcRegister.File) {
+ case TGSI_FILE_INPUT:
+ src = nv20_sr(NV30SR_INPUT, fsrc->SrcRegister.Index);
+ break;
+ case TGSI_FILE_CONSTANT:
+ src = constant(vpc, fsrc->SrcRegister.Index, 0, 0, 0, 0);
+ break;
+ case TGSI_FILE_IMMEDIATE:
+ src = vpc->imm[fsrc->SrcRegister.Index];
+ break;
+ case TGSI_FILE_TEMPORARY:
+ if (vpc->high_temp < fsrc->SrcRegister.Index)
+ vpc->high_temp = fsrc->SrcRegister.Index;
+ src = nv20_sr(NV30SR_TEMP, fsrc->SrcRegister.Index);
+ break;
+ default:
+ NOUVEAU_ERR("bad src file\n");
+ break;
+ }
+
+ src.abs = fsrc->SrcRegisterExtMod.Absolute;
+ src.negate = fsrc->SrcRegister.Negate;
+ src.swz[0] = fsrc->SrcRegister.SwizzleX;
+ src.swz[1] = fsrc->SrcRegister.SwizzleY;
+ src.swz[2] = fsrc->SrcRegister.SwizzleZ;
+ src.swz[3] = fsrc->SrcRegister.SwizzleW;
+ return src;
+}
+
+static INLINE struct nv20_sreg
+tgsi_dst(struct nv20_vpc *vpc, const struct tgsi_full_dst_register *fdst) {
+ struct nv20_sreg dst;
+
+ switch (fdst->DstRegister.File) {
+ case TGSI_FILE_OUTPUT:
+ dst = nv20_sr(NV30SR_OUTPUT,
+ vpc->output_map[fdst->DstRegister.Index]);
+
+ break;
+ case TGSI_FILE_TEMPORARY:
+ dst = nv20_sr(NV30SR_TEMP, fdst->DstRegister.Index);
+ if (vpc->high_temp < dst.index)
+ vpc->high_temp = dst.index;
+ break;
+ default:
+ NOUVEAU_ERR("bad dst file\n");
+ break;
+ }
+
+ return dst;
+}
+
+static INLINE int
+tgsi_mask(uint tgsi)
+{
+ int mask = 0;
+
+ if (tgsi & TGSI_WRITEMASK_X) mask |= MASK_X;
+ if (tgsi & TGSI_WRITEMASK_Y) mask |= MASK_Y;
+ if (tgsi & TGSI_WRITEMASK_Z) mask |= MASK_Z;
+ if (tgsi & TGSI_WRITEMASK_W) mask |= MASK_W;
+ return mask;
+}
+
+static boolean
+nv20_vertprog_parse_instruction(struct nv20_vpc *vpc,
+ const struct tgsi_full_instruction *finst)
+{
+ struct nv20_sreg src[3], dst, tmp;
+ struct nv20_sreg none = nv20_sr(NV30SR_NONE, 0);
+ int mask;
+ int ai = -1, ci = -1;
+ int i;
+
+ if (finst->Instruction.Opcode == TGSI_OPCODE_END)
+ return TRUE;
+
+ vpc->temp_temp_count = 0;
+ for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
+ const struct tgsi_full_src_register *fsrc;
+
+ fsrc = &finst->FullSrcRegisters[i];
+ if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) {
+ src[i] = tgsi_src(vpc, fsrc);
+ }
+ }
+
+ for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
+ const struct tgsi_full_src_register *fsrc;
+
+ fsrc = &finst->FullSrcRegisters[i];
+ switch (fsrc->SrcRegister.File) {
+ case TGSI_FILE_INPUT:
+ if (ai == -1 || ai == fsrc->SrcRegister.Index) {
+ ai = fsrc->SrcRegister.Index;
+ src[i] = tgsi_src(vpc, fsrc);
+ } else {
+ src[i] = temp(vpc);
+ arith(vpc, 0, OP_MOV, src[i], MASK_ALL,
+ tgsi_src(vpc, fsrc), none, none);
+ }
+ break;
+ /*XXX: index comparison is broken now that consts come from
+ * two different register files.
+ */
+ case TGSI_FILE_CONSTANT:
+ case TGSI_FILE_IMMEDIATE:
+ if (ci == -1 || ci == fsrc->SrcRegister.Index) {
+ ci = fsrc->SrcRegister.Index;
+ src[i] = tgsi_src(vpc, fsrc);
+ } else {
+ src[i] = temp(vpc);
+ arith(vpc, 0, OP_MOV, src[i], MASK_ALL,
+ tgsi_src(vpc, fsrc), none, none);
+ }
+ break;
+ case TGSI_FILE_TEMPORARY:
+ /* handled above */
+ break;
+ default:
+ NOUVEAU_ERR("bad src file\n");
+ return FALSE;
+ }
+ }
+
+ dst = tgsi_dst(vpc, &finst->FullDstRegisters[0]);
+ mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask);
+
+ switch (finst->Instruction.Opcode) {
+ case TGSI_OPCODE_ABS:
+ arith(vpc, 0, OP_MOV, dst, mask, abs(src[0]), none, none);
+ break;
+ case TGSI_OPCODE_ADD:
+ arith(vpc, 0, OP_ADD, dst, mask, src[0], none, src[1]);
+ break;
+ case TGSI_OPCODE_ARL:
+ arith(vpc, 0, OP_ARL, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_DP3:
+ arith(vpc, 0, OP_DP3, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_DP4:
+ arith(vpc, 0, OP_DP4, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_DPH:
+ arith(vpc, 0, OP_DPH, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_DST:
+ arith(vpc, 0, OP_DST, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_EX2:
+ arith(vpc, 1, OP_EX2, dst, mask, none, none, src[0]);
+ break;
+ case TGSI_OPCODE_EXP:
+ arith(vpc, 1, OP_EXP, dst, mask, none, none, src[0]);
+ break;
+ case TGSI_OPCODE_FLR:
+ arith(vpc, 0, OP_FLR, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_FRC:
+ arith(vpc, 0, OP_FRC, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_LG2:
+ arith(vpc, 1, OP_LG2, dst, mask, none, none, src[0]);
+ break;
+ case TGSI_OPCODE_LIT:
+ arith(vpc, 1, OP_LIT, dst, mask, none, none, src[0]);
+ break;
+ case TGSI_OPCODE_LOG:
+ arith(vpc, 1, OP_LOG, dst, mask, none, none, src[0]);
+ break;
+ case TGSI_OPCODE_MAD:
+ arith(vpc, 0, OP_MAD, dst, mask, src[0], src[1], src[2]);
+ break;
+ case TGSI_OPCODE_MAX:
+ arith(vpc, 0, OP_MAX, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_MIN:
+ arith(vpc, 0, OP_MIN, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_MOV:
+ arith(vpc, 0, OP_MOV, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_MUL:
+ arith(vpc, 0, OP_MUL, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_POW:
+ tmp = temp(vpc);
+ arith(vpc, 1, OP_LG2, tmp, MASK_X, none, none,
+ swz(src[0], X, X, X, X));
+ arith(vpc, 0, OP_MUL, tmp, MASK_X, swz(tmp, X, X, X, X),
+ swz(src[1], X, X, X, X), none);
+ arith(vpc, 1, OP_EX2, dst, mask, none, none,
+ swz(tmp, X, X, X, X));
+ break;
+ case TGSI_OPCODE_RCP:
+ arith(vpc, 1, OP_RCP, dst, mask, none, none, src[0]);
+ break;
+ case TGSI_OPCODE_RET:
+ break;
+ case TGSI_OPCODE_RSQ:
+ arith(vpc, 1, OP_RSQ, dst, mask, none, none, src[0]);
+ break;
+ case TGSI_OPCODE_SGE:
+ arith(vpc, 0, OP_SGE, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_SGT:
+ arith(vpc, 0, OP_SGT, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_SLT:
+ arith(vpc, 0, OP_SLT, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_SUB:
+ arith(vpc, 0, OP_ADD, dst, mask, src[0], none, neg(src[1]));
+ break;
+ case TGSI_OPCODE_XPD:
+ tmp = temp(vpc);
+ arith(vpc, 0, OP_MUL, tmp, mask,
+ swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none);
+ arith(vpc, 0, OP_MAD, dst, (mask & ~MASK_W),
+ swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y),
+ neg(tmp));
+ break;
+ default:
+ NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode);
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+static boolean
+nv20_vertprog_parse_decl_output(struct nv20_vpc *vpc,
+ const struct tgsi_full_declaration *fdec)
+{
+ int hw;
+
+ switch (fdec->Semantic.SemanticName) {
+ case TGSI_SEMANTIC_POSITION:
+ hw = NV30_VP_INST_DEST_POS;
+ break;
+ case TGSI_SEMANTIC_COLOR:
+ if (fdec->Semantic.SemanticIndex == 0) {
+ hw = NV30_VP_INST_DEST_COL0;
+ } else
+ if (fdec->Semantic.SemanticIndex == 1) {
+ hw = NV30_VP_INST_DEST_COL1;
+ } else {
+ NOUVEAU_ERR("bad colour semantic index\n");
+ return FALSE;
+ }
+ break;
+ case TGSI_SEMANTIC_BCOLOR:
+ if (fdec->Semantic.SemanticIndex == 0) {
+ hw = NV30_VP_INST_DEST_BFC0;
+ } else
+ if (fdec->Semantic.SemanticIndex == 1) {
+ hw = NV30_VP_INST_DEST_BFC1;
+ } else {
+ NOUVEAU_ERR("bad bcolour semantic index\n");
+ return FALSE;
+ }
+ break;
+ case TGSI_SEMANTIC_FOG:
+ hw = NV30_VP_INST_DEST_FOGC;
+ break;
+ case TGSI_SEMANTIC_PSIZE:
+ hw = NV30_VP_INST_DEST_PSZ;
+ break;
+ case TGSI_SEMANTIC_GENERIC:
+ if (fdec->Semantic.SemanticIndex <= 7) {
+ hw = NV30_VP_INST_DEST_TC(fdec->Semantic.SemanticIndex);
+ } else {
+ NOUVEAU_ERR("bad generic semantic index\n");
+ return FALSE;
+ }
+ break;
+ default:
+ NOUVEAU_ERR("bad output semantic\n");
+ return FALSE;
+ }
+
+ vpc->output_map[fdec->DeclarationRange.First] = hw;
+ return TRUE;
+}
+
+static boolean
+nv20_vertprog_prepare(struct nv20_vpc *vpc)
+{
+ struct tgsi_parse_context p;
+ int nr_imm = 0;
+
+ tgsi_parse_init(&p, vpc->vp->pipe.tokens);
+ while (!tgsi_parse_end_of_tokens(&p)) {
+ const union tgsi_full_token *tok = &p.FullToken;
+
+ tgsi_parse_token(&p);
+ switch(tok->Token.Type) {
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ nr_imm++;
+ break;
+ default:
+ break;
+ }
+ }
+ tgsi_parse_free(&p);
+
+ if (nr_imm) {
+ vpc->imm = CALLOC(nr_imm, sizeof(struct nv20_sreg));
+ assert(vpc->imm);
+ }
+
+ return TRUE;
+}
+
+static void
+nv20_vertprog_translate(struct nv20_context *nv20,
+ struct nv20_vertex_program *vp)
+{
+ struct tgsi_parse_context parse;
+ struct nv20_vpc *vpc = NULL;
+
+ tgsi_dump(vp->pipe.tokens,0);
+
+ vpc = CALLOC(1, sizeof(struct nv20_vpc));
+ if (!vpc)
+ return;
+ vpc->vp = vp;
+ vpc->high_temp = -1;
+
+ if (!nv20_vertprog_prepare(vpc)) {
+ FREE(vpc);
+ return;
+ }
+
+ tgsi_parse_init(&parse, vp->pipe.tokens);
+
+ while (!tgsi_parse_end_of_tokens(&parse)) {
+ tgsi_parse_token(&parse);
+
+ switch (parse.FullToken.Token.Type) {
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ {
+ const struct tgsi_full_declaration *fdec;
+ fdec = &parse.FullToken.FullDeclaration;
+ switch (fdec->Declaration.File) {
+ case TGSI_FILE_OUTPUT:
+ if (!nv20_vertprog_parse_decl_output(vpc, fdec))
+ goto out_err;
+ break;
+ default:
+ break;
+ }
+ }
+ break;
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ {
+ const struct tgsi_full_immediate *imm;
+
+ imm = &parse.FullToken.FullImmediate;
+ assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32);
+// assert(imm->Immediate.Size == 4);
+ vpc->imm[vpc->nr_imm++] =
+ constant(vpc, -1,
+ imm->u.ImmediateFloat32[0].Float,
+ imm->u.ImmediateFloat32[1].Float,
+ imm->u.ImmediateFloat32[2].Float,
+ imm->u.ImmediateFloat32[3].Float);
+ }
+ break;
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ {
+ const struct tgsi_full_instruction *finst;
+ finst = &parse.FullToken.FullInstruction;
+ if (!nv20_vertprog_parse_instruction(vpc, finst))
+ goto out_err;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+
+ vp->insns[vp->nr_insns - 1].data[3] |= NV30_VP_INST_LAST;
+ vp->translated = TRUE;
+out_err:
+ tgsi_parse_free(&parse);
+ FREE(vpc);
+}
+
+static boolean
+nv20_vertprog_validate(struct nv20_context *nv20)
+{
+ struct nouveau_winsys *nvws = nv20->nvws;
+ struct pipe_winsys *ws = nv20->pipe.winsys;
+ struct nouveau_grobj *rankine = nv20->screen->rankine;
+ struct nv20_vertex_program *vp;
+ struct pipe_buffer *constbuf;
+ boolean upload_code = FALSE, upload_data = FALSE;
+ int i;
+
+ vp = nv20->vertprog;
+ constbuf = nv20->constbuf[PIPE_SHADER_VERTEX];
+
+ /* Translate TGSI shader into hw bytecode */
+ if (!vp->translated) {
+ nv20_vertprog_translate(nv20, vp);
+ if (!vp->translated)
+ return FALSE;
+ }
+
+ /* Allocate hw vtxprog exec slots */
+ if (!vp->exec) {
+ struct nouveau_resource *heap = nv20->screen->vp_exec_heap;
+ struct nouveau_stateobj *so;
+ uint vplen = vp->nr_insns;
+
+ if (nvws->res_alloc(heap, vplen, vp, &vp->exec)) {
+ while (heap->next && heap->size < vplen) {
+ struct nv20_vertex_program *evict;
+
+ evict = heap->next->priv;
+ nvws->res_free(&evict->exec);
+ }
+
+ if (nvws->res_alloc(heap, vplen, vp, &vp->exec))
+ assert(0);
+ }
+
+ so = so_new(2, 0);
+ so_method(so, rankine, NV34TCL_VP_START_FROM_ID, 1);
+ so_data (so, vp->exec->start);
+ so_ref(so, &vp->so);
+
+ upload_code = TRUE;
+ }
+
+ /* Allocate hw vtxprog const slots */
+ if (vp->nr_consts && !vp->data) {
+ struct nouveau_resource *heap = nv20->screen->vp_data_heap;
+
+ if (nvws->res_alloc(heap, vp->nr_consts, vp, &vp->data)) {
+ while (heap->next && heap->size < vp->nr_consts) {
+ struct nv20_vertex_program *evict;
+
+ evict = heap->next->priv;
+ nvws->res_free(&evict->data);
+ }
+
+ if (nvws->res_alloc(heap, vp->nr_consts, vp, &vp->data))
+ assert(0);
+ }
+
+ /*XXX: handle this some day */
+ assert(vp->data->start >= vp->data_start_min);
+
+ upload_data = TRUE;
+ if (vp->data_start != vp->data->start)
+ upload_code = TRUE;
+ }
+
+ /* If exec or data segments moved we need to patch the program to
+ * fixup offsets and register IDs.
+ */
+ if (vp->exec_start != vp->exec->start) {
+ for (i = 0; i < vp->nr_insns; i++) {
+ struct nv20_vertex_program_exec *vpi = &vp->insns[i];
+
+ if (vpi->has_branch_offset) {
+ assert(0);
+ }
+ }
+
+ vp->exec_start = vp->exec->start;
+ }
+
+ if (vp->nr_consts && vp->data_start != vp->data->start) {
+ for (i = 0; i < vp->nr_insns; i++) {
+ struct nv20_vertex_program_exec *vpi = &vp->insns[i];
+
+ if (vpi->const_index >= 0) {
+ vpi->data[1] &= ~NV30_VP_INST_CONST_SRC_MASK;
+ vpi->data[1] |=
+ (vpi->const_index + vp->data->start) <<
+ NV30_VP_INST_CONST_SRC_SHIFT;
+
+ }
+ }
+
+ vp->data_start = vp->data->start;
+ }
+
+ /* Update + Upload constant values */
+ if (vp->nr_consts) {
+ float *map = NULL;
+
+ if (constbuf) {
+ map = ws->buffer_map(ws, constbuf,
+ PIPE_BUFFER_USAGE_CPU_READ);
+ }
+
+ for (i = 0; i < vp->nr_consts; i++) {
+ struct nv20_vertex_program_data *vpd = &vp->consts[i];
+
+ if (vpd->index >= 0) {
+ if (!upload_data &&
+ !memcmp(vpd->value, &map[vpd->index * 4],
+ 4 * sizeof(float)))
+ continue;
+ memcpy(vpd->value, &map[vpd->index * 4],
+ 4 * sizeof(float));
+ }
+
+ BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_CONST_ID, 5);
+ OUT_RING (i + vp->data->start);
+ OUT_RINGp ((uint32_t *)vpd->value, 4);
+ }
+
+ if (constbuf) {
+ ws->buffer_unmap(ws, constbuf);
+ }
+ }
+
+ /* Upload vtxprog */
+ if (upload_code) {
+#if 0
+ for (i = 0; i < vp->nr_insns; i++) {
+ NOUVEAU_MSG("VP inst %d: 0x%08x 0x%08x 0x%08x 0x%08x\n",
+ i, vp->insns[i].data[0], vp->insns[i].data[1],
+ vp->insns[i].data[2], vp->insns[i].data[3]);
+ }
+#endif
+ BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_FROM_ID, 1);
+ OUT_RING (vp->exec->start);
+ for (i = 0; i < vp->nr_insns; i++) {
+ BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_INST(0), 4);
+ OUT_RINGp (vp->insns[i].data, 4);
+ }
+ }
+
+ if (vp->so != nv20->state.hw[NV30_STATE_VERTPROG]) {
+ so_ref(vp->so, &nv20->state.hw[NV30_STATE_VERTPROG]);
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+void
+nv20_vertprog_destroy(struct nv20_context *nv20, struct nv20_vertex_program *vp)
+{
+ struct nouveau_winsys *nvws = nv20->screen->nvws;
+
+ vp->translated = FALSE;
+
+ if (vp->nr_insns) {
+ FREE(vp->insns);
+ vp->insns = NULL;
+ vp->nr_insns = 0;
+ }
+
+ if (vp->nr_consts) {
+ FREE(vp->consts);
+ vp->consts = NULL;
+ vp->nr_consts = 0;
+ }
+
+ nvws->res_free(&vp->exec);
+ vp->exec_start = 0;
+ nvws->res_free(&vp->data);
+ vp->data_start = 0;
+ vp->data_start_min = 0;
+
+ vp->ir = vp->or = 0;
+ so_ref(NULL, &vp->so);
+}
+
+struct nv20_state_entry nv20_state_vertprog = {
+ .validate = nv20_vertprog_validate,
+ .dirty = {
+ .pipe = NV30_NEW_VERTPROG /*| NV30_NEW_UCP*/,
+ .hw = NV30_STATE_VERTPROG,
+ }
+};
diff --git a/src/gallium/drivers/nv30/Makefile b/src/gallium/drivers/nv30/Makefile
new file mode 100644
index 00000000000..69f2790dfe2
--- /dev/null
+++ b/src/gallium/drivers/nv30/Makefile
@@ -0,0 +1,37 @@
+TOP = ../../../..
+include $(TOP)/configs/current
+
+LIBNAME = nv30
+
+DRIVER_SOURCES = \
+ nv30_clear.c \
+ nv30_context.c \
+ nv30_draw.c \
+ nv30_fragprog.c \
+ nv30_fragtex.c \
+ nv30_miptree.c \
+ nv30_query.c \
+ nv30_screen.c \
+ nv30_state.c \
+ nv30_state_blend.c \
+ nv30_state_emit.c \
+ nv30_state_fb.c \
+ nv30_state_rasterizer.c \
+ nv30_state_scissor.c \
+ nv30_state_stipple.c \
+ nv30_state_viewport.c \
+ nv30_state_zsa.c \
+ nv30_surface.c \
+ nv30_vbo.c \
+ nv30_vertprog.c
+
+C_SOURCES = \
+ $(COMMON_SOURCES) \
+ $(DRIVER_SOURCES)
+
+ASM_SOURCES =
+
+include ../../Makefile.template
+
+symlinks:
+
diff --git a/src/gallium/drivers/nv30/nv30_clear.c b/src/gallium/drivers/nv30/nv30_clear.c
new file mode 100644
index 00000000000..8c3ca204d58
--- /dev/null
+++ b/src/gallium/drivers/nv30/nv30_clear.c
@@ -0,0 +1,13 @@
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+
+#include "nv30_context.h"
+
+void
+nv30_clear(struct pipe_context *pipe, struct pipe_surface *ps,
+ unsigned clearValue)
+{
+ pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, clearValue);
+ ps->status = PIPE_SURFACE_STATUS_CLEAR;
+}
diff --git a/src/gallium/drivers/nv30/nv30_context.c b/src/gallium/drivers/nv30/nv30_context.c
new file mode 100644
index 00000000000..2bff28aca9c
--- /dev/null
+++ b/src/gallium/drivers/nv30/nv30_context.c
@@ -0,0 +1,72 @@
+#include "draw/draw_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_winsys.h"
+
+#include "nv30_context.h"
+#include "nv30_screen.h"
+
+static void
+nv30_flush(struct pipe_context *pipe, unsigned flags,
+ struct pipe_fence_handle **fence)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+
+ if (flags & PIPE_FLUSH_TEXTURE_CACHE) {
+ BEGIN_RING(rankine, 0x1fd8, 1);
+ OUT_RING (2);
+ BEGIN_RING(rankine, 0x1fd8, 1);
+ OUT_RING (1);
+ }
+
+ FIRE_RING(fence);
+}
+
+static void
+nv30_destroy(struct pipe_context *pipe)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+
+ if (nv30->draw)
+ draw_destroy(nv30->draw);
+ FREE(nv30);
+}
+
+struct pipe_context *
+nv30_create(struct pipe_screen *pscreen, unsigned pctx_id)
+{
+ struct nv30_screen *screen = nv30_screen(pscreen);
+ struct pipe_winsys *ws = pscreen->winsys;
+ struct nv30_context *nv30;
+ struct nouveau_winsys *nvws = screen->nvws;
+
+ nv30 = CALLOC(1, sizeof(struct nv30_context));
+ if (!nv30)
+ return NULL;
+ nv30->screen = screen;
+ nv30->pctx_id = pctx_id;
+
+ nv30->nvws = nvws;
+
+ nv30->pipe.winsys = ws;
+ nv30->pipe.screen = pscreen;
+ nv30->pipe.destroy = nv30_destroy;
+ nv30->pipe.draw_arrays = nv30_draw_arrays;
+ nv30->pipe.draw_elements = nv30_draw_elements;
+ nv30->pipe.clear = nv30_clear;
+ nv30->pipe.flush = nv30_flush;
+
+ nv30_init_query_functions(nv30);
+ nv30_init_surface_functions(nv30);
+ nv30_init_state_functions(nv30);
+
+ /* Create, configure, and install fallback swtnl path */
+ nv30->draw = draw_create();
+ draw_wide_point_threshold(nv30->draw, 9999999.0);
+ draw_wide_line_threshold(nv30->draw, 9999999.0);
+ draw_enable_line_stipple(nv30->draw, FALSE);
+ draw_enable_point_sprites(nv30->draw, FALSE);
+ draw_set_rasterize_stage(nv30->draw, nv30_draw_render_stage(nv30));
+
+ return &nv30->pipe;
+}
+
diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h
new file mode 100644
index 00000000000..b9337697008
--- /dev/null
+++ b/src/gallium/drivers/nv30/nv30_context.h
@@ -0,0 +1,212 @@
+#ifndef __NV30_CONTEXT_H__
+#define __NV30_CONTEXT_H__
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+#include "pipe/p_compiler.h"
+
+#include "util/u_memory.h"
+#include "util/u_math.h"
+
+#include "draw/draw_vertex.h"
+
+#include "nouveau/nouveau_winsys.h"
+#include "nouveau/nouveau_gldefs.h"
+
+#define NOUVEAU_PUSH_CONTEXT(ctx) \
+ struct nv30_screen *ctx = nv30->screen
+#include "nouveau/nouveau_push.h"
+#include "nouveau/nouveau_stateobj.h"
+
+#include "nv30_state.h"
+
+#define NOUVEAU_ERR(fmt, args...) \
+ fprintf(stderr, "%s:%d - "fmt, __func__, __LINE__, ##args);
+#define NOUVEAU_MSG(fmt, args...) \
+ fprintf(stderr, "nouveau: "fmt, ##args);
+
+enum nv30_state_index {
+ NV30_STATE_FB = 0,
+ NV30_STATE_VIEWPORT = 1,
+ NV30_STATE_BLEND = 2,
+ NV30_STATE_RAST = 3,
+ NV30_STATE_ZSA = 4,
+ NV30_STATE_BCOL = 5,
+ NV30_STATE_CLIP = 6,
+ NV30_STATE_SCISSOR = 7,
+ NV30_STATE_STIPPLE = 8,
+ NV30_STATE_FRAGPROG = 9,
+ NV30_STATE_VERTPROG = 10,
+ NV30_STATE_FRAGTEX0 = 11,
+ NV30_STATE_FRAGTEX1 = 12,
+ NV30_STATE_FRAGTEX2 = 13,
+ NV30_STATE_FRAGTEX3 = 14,
+ NV30_STATE_FRAGTEX4 = 15,
+ NV30_STATE_FRAGTEX5 = 16,
+ NV30_STATE_FRAGTEX6 = 17,
+ NV30_STATE_FRAGTEX7 = 18,
+ NV30_STATE_FRAGTEX8 = 19,
+ NV30_STATE_FRAGTEX9 = 20,
+ NV30_STATE_FRAGTEX10 = 21,
+ NV30_STATE_FRAGTEX11 = 22,
+ NV30_STATE_FRAGTEX12 = 23,
+ NV30_STATE_FRAGTEX13 = 24,
+ NV30_STATE_FRAGTEX14 = 25,
+ NV30_STATE_FRAGTEX15 = 26,
+ NV30_STATE_VERTTEX0 = 27,
+ NV30_STATE_VERTTEX1 = 28,
+ NV30_STATE_VERTTEX2 = 29,
+ NV30_STATE_VERTTEX3 = 30,
+ NV30_STATE_VTXBUF = 31,
+ NV30_STATE_VTXFMT = 32,
+ NV30_STATE_VTXATTR = 33,
+ NV30_STATE_MAX = 34
+};
+
+#include "nv30_screen.h"
+
+#define NV30_NEW_BLEND (1 << 0)
+#define NV30_NEW_RAST (1 << 1)
+#define NV30_NEW_ZSA (1 << 2)
+#define NV30_NEW_SAMPLER (1 << 3)
+#define NV30_NEW_FB (1 << 4)
+#define NV30_NEW_STIPPLE (1 << 5)
+#define NV30_NEW_SCISSOR (1 << 6)
+#define NV30_NEW_VIEWPORT (1 << 7)
+#define NV30_NEW_BCOL (1 << 8)
+#define NV30_NEW_VERTPROG (1 << 9)
+#define NV30_NEW_FRAGPROG (1 << 10)
+#define NV30_NEW_ARRAYS (1 << 11)
+#define NV30_NEW_UCP (1 << 12)
+
+struct nv30_rasterizer_state {
+ struct pipe_rasterizer_state pipe;
+ struct nouveau_stateobj *so;
+};
+
+struct nv30_zsa_state {
+ struct pipe_depth_stencil_alpha_state pipe;
+ struct nouveau_stateobj *so;
+};
+
+struct nv30_blend_state {
+ struct pipe_blend_state pipe;
+ struct nouveau_stateobj *so;
+};
+
+
+struct nv30_state {
+ unsigned scissor_enabled;
+ unsigned stipple_enabled;
+ unsigned viewport_bypass;
+ unsigned fp_samplers;
+
+ uint64_t dirty;
+ struct nouveau_stateobj *hw[NV30_STATE_MAX];
+};
+
+struct nv30_context {
+ struct pipe_context pipe;
+
+ struct nouveau_winsys *nvws;
+ struct nv30_screen *screen;
+ unsigned pctx_id;
+
+ struct draw_context *draw;
+
+ /* HW state derived from pipe states */
+ struct nv30_state state;
+
+ /* Context state */
+ unsigned dirty;
+ struct pipe_scissor_state scissor;
+ unsigned stipple[32];
+ struct nv30_vertex_program *vertprog;
+ struct nv30_fragment_program *fragprog;
+ struct pipe_buffer *constbuf[PIPE_SHADER_TYPES];
+ unsigned constbuf_nr[PIPE_SHADER_TYPES];
+ struct nv30_rasterizer_state *rasterizer;
+ struct nv30_zsa_state *zsa;
+ struct nv30_blend_state *blend;
+ struct pipe_blend_color blend_colour;
+ struct pipe_viewport_state viewport;
+ struct pipe_framebuffer_state framebuffer;
+ struct pipe_buffer *idxbuf;
+ unsigned idxbuf_format;
+ struct nv30_sampler_state *tex_sampler[PIPE_MAX_SAMPLERS];
+ struct nv30_miptree *tex_miptree[PIPE_MAX_SAMPLERS];
+ unsigned nr_samplers;
+ unsigned nr_textures;
+ unsigned dirty_samplers;
+ struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS];
+ unsigned vtxbuf_nr;
+ struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS];
+ unsigned vtxelt_nr;
+ const unsigned *edgeflags;
+};
+
+static INLINE struct nv30_context *
+nv30_context(struct pipe_context *pipe)
+{
+ return (struct nv30_context *)pipe;
+}
+
+struct nv30_state_entry {
+ boolean (*validate)(struct nv30_context *nv30);
+ struct {
+ unsigned pipe;
+ unsigned hw;
+ } dirty;
+};
+
+extern void nv30_init_state_functions(struct nv30_context *nv30);
+extern void nv30_init_surface_functions(struct nv30_context *nv30);
+extern void nv30_init_query_functions(struct nv30_context *nv30);
+
+extern void nv30_screen_init_miptree_functions(struct pipe_screen *pscreen);
+
+/* nv30_draw.c */
+extern struct draw_stage *nv30_draw_render_stage(struct nv30_context *nv30);
+
+/* nv30_vertprog.c */
+extern void nv30_vertprog_destroy(struct nv30_context *,
+ struct nv30_vertex_program *);
+
+/* nv30_fragprog.c */
+extern void nv30_fragprog_destroy(struct nv30_context *,
+ struct nv30_fragment_program *);
+
+/* nv30_fragtex.c */
+extern void nv30_fragtex_bind(struct nv30_context *);
+
+/* nv30_state.c and friends */
+extern boolean nv30_state_validate(struct nv30_context *nv30);
+extern void nv30_state_emit(struct nv30_context *nv30);
+extern struct nv30_state_entry nv30_state_rasterizer;
+extern struct nv30_state_entry nv30_state_scissor;
+extern struct nv30_state_entry nv30_state_stipple;
+extern struct nv30_state_entry nv30_state_fragprog;
+extern struct nv30_state_entry nv30_state_vertprog;
+extern struct nv30_state_entry nv30_state_blend;
+extern struct nv30_state_entry nv30_state_blend_colour;
+extern struct nv30_state_entry nv30_state_zsa;
+extern struct nv30_state_entry nv30_state_viewport;
+extern struct nv30_state_entry nv30_state_framebuffer;
+extern struct nv30_state_entry nv30_state_fragtex;
+extern struct nv30_state_entry nv30_state_vbo;
+
+/* nv30_vbo.c */
+extern boolean nv30_draw_arrays(struct pipe_context *, unsigned mode,
+ unsigned start, unsigned count);
+extern boolean nv30_draw_elements(struct pipe_context *pipe,
+ struct pipe_buffer *indexBuffer,
+ unsigned indexSize,
+ unsigned mode, unsigned start,
+ unsigned count);
+
+/* nv30_clear.c */
+extern void nv30_clear(struct pipe_context *pipe, struct pipe_surface *ps,
+ unsigned clearValue);
+
+#endif
diff --git a/src/gallium/drivers/nv30/nv30_draw.c b/src/gallium/drivers/nv30/nv30_draw.c
new file mode 100644
index 00000000000..74fc138c051
--- /dev/null
+++ b/src/gallium/drivers/nv30/nv30_draw.c
@@ -0,0 +1,61 @@
+#include "draw/draw_pipe.h"
+
+#include "nv30_context.h"
+
+struct nv30_draw_stage {
+ struct draw_stage draw;
+ struct nv30_context *nv30;
+};
+
+static void
+nv30_draw_point(struct draw_stage *draw, struct prim_header *prim)
+{
+ NOUVEAU_ERR("\n");
+}
+
+static void
+nv30_draw_line(struct draw_stage *draw, struct prim_header *prim)
+{
+ NOUVEAU_ERR("\n");
+}
+
+static void
+nv30_draw_tri(struct draw_stage *draw, struct prim_header *prim)
+{
+ NOUVEAU_ERR("\n");
+}
+
+static void
+nv30_draw_flush(struct draw_stage *draw, unsigned flags)
+{
+}
+
+static void
+nv30_draw_reset_stipple_counter(struct draw_stage *draw)
+{
+ NOUVEAU_ERR("\n");
+}
+
+static void
+nv30_draw_destroy(struct draw_stage *draw)
+{
+ FREE(draw);
+}
+
+struct draw_stage *
+nv30_draw_render_stage(struct nv30_context *nv30)
+{
+ struct nv30_draw_stage *nv30draw = CALLOC_STRUCT(nv30_draw_stage);
+
+ nv30draw->nv30 = nv30;
+ nv30draw->draw.draw = nv30->draw;
+ nv30draw->draw.point = nv30_draw_point;
+ nv30draw->draw.line = nv30_draw_line;
+ nv30draw->draw.tri = nv30_draw_tri;
+ nv30draw->draw.flush = nv30_draw_flush;
+ nv30draw->draw.reset_stipple_counter = nv30_draw_reset_stipple_counter;
+ nv30draw->draw.destroy = nv30_draw_destroy;
+
+ return &nv30draw->draw;
+}
+
diff --git a/src/gallium/drivers/nv30/nv30_fragprog.c b/src/gallium/drivers/nv30/nv30_fragprog.c
new file mode 100644
index 00000000000..320ba3f4bf4
--- /dev/null
+++ b/src/gallium/drivers/nv30/nv30_fragprog.c
@@ -0,0 +1,911 @@
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+
+#include "pipe/p_shader_tokens.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_util.h"
+
+#include "nv30_context.h"
+
+#define SWZ_X 0
+#define SWZ_Y 1
+#define SWZ_Z 2
+#define SWZ_W 3
+#define MASK_X 1
+#define MASK_Y 2
+#define MASK_Z 4
+#define MASK_W 8
+#define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W)
+#define DEF_SCALE NV30_FP_OP_DST_SCALE_1X
+#define DEF_CTEST NV30_FP_OP_COND_TR
+#include "nv30_shader.h"
+
+#define swz(s,x,y,z,w) nv30_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w)
+#define neg(s) nv30_sr_neg((s))
+#define abs(s) nv30_sr_abs((s))
+#define scale(s,v) nv30_sr_scale((s), NV30_FP_OP_DST_SCALE_##v)
+
+#define MAX_CONSTS 128
+#define MAX_IMM 32
+struct nv30_fpc {
+ struct nv30_fragment_program *fp;
+
+ uint attrib_map[PIPE_MAX_SHADER_INPUTS];
+
+ int high_temp;
+ int temp_temp_count;
+ int num_regs;
+
+ uint depth_id;
+ uint colour_id;
+
+ unsigned inst_offset;
+
+ struct {
+ int pipe;
+ float vals[4];
+ } consts[MAX_CONSTS];
+ int nr_consts;
+
+ struct nv30_sreg imm[MAX_IMM];
+ unsigned nr_imm;
+};
+
+static INLINE struct nv30_sreg
+temp(struct nv30_fpc *fpc)
+{
+ int idx;
+
+ idx = fpc->temp_temp_count++;
+ idx += fpc->high_temp + 1;
+ return nv30_sr(NV30SR_TEMP, idx);
+}
+
+static INLINE struct nv30_sreg
+constant(struct nv30_fpc *fpc, int pipe, float vals[4])
+{
+ int idx;
+
+ if (fpc->nr_consts == MAX_CONSTS)
+ assert(0);
+ idx = fpc->nr_consts++;
+
+ fpc->consts[idx].pipe = pipe;
+ if (pipe == -1)
+ memcpy(fpc->consts[idx].vals, vals, 4 * sizeof(float));
+ return nv30_sr(NV30SR_CONST, idx);
+}
+
+#define arith(cc,s,o,d,m,s0,s1,s2) \
+ nv30_fp_arith((cc), (s), NV30_FP_OP_OPCODE_##o, \
+ (d), (m), (s0), (s1), (s2))
+#define tex(cc,s,o,u,d,m,s0,s1,s2) \
+ nv30_fp_tex((cc), (s), NV30_FP_OP_OPCODE_##o, (u), \
+ (d), (m), (s0), none, none)
+
+static void
+grow_insns(struct nv30_fpc *fpc, int size)
+{
+ struct nv30_fragment_program *fp = fpc->fp;
+
+ fp->insn_len += size;
+ fp->insn = realloc(fp->insn, sizeof(uint32_t) * fp->insn_len);
+}
+
+static void
+emit_src(struct nv30_fpc *fpc, int pos, struct nv30_sreg src)
+{
+ struct nv30_fragment_program *fp = fpc->fp;
+ uint32_t *hw = &fp->insn[fpc->inst_offset];
+ uint32_t sr = 0;
+
+ switch (src.type) {
+ case NV30SR_INPUT:
+ sr |= (NV30_FP_REG_TYPE_INPUT << NV30_FP_REG_TYPE_SHIFT);
+ hw[0] |= (src.index << NV30_FP_OP_INPUT_SRC_SHIFT);
+ break;
+ case NV30SR_OUTPUT:
+ sr |= NV30_FP_REG_SRC_HALF;
+ /* fall-through */
+ case NV30SR_TEMP:
+ sr |= (NV30_FP_REG_TYPE_TEMP << NV30_FP_REG_TYPE_SHIFT);
+ sr |= (src.index << NV30_FP_REG_SRC_SHIFT);
+ break;
+ case NV30SR_CONST:
+ grow_insns(fpc, 4);
+ hw = &fp->insn[fpc->inst_offset];
+ if (fpc->consts[src.index].pipe >= 0) {
+ struct nv30_fragment_program_data *fpd;
+
+ fp->consts = realloc(fp->consts, ++fp->nr_consts *
+ sizeof(*fpd));
+ fpd = &fp->consts[fp->nr_consts - 1];
+ fpd->offset = fpc->inst_offset + 4;
+ fpd->index = fpc->consts[src.index].pipe;
+ memset(&fp->insn[fpd->offset], 0, sizeof(uint32_t) * 4);
+ } else {
+ memcpy(&fp->insn[fpc->inst_offset + 4],
+ fpc->consts[src.index].vals,
+ sizeof(uint32_t) * 4);
+ }
+
+ sr |= (NV30_FP_REG_TYPE_CONST << NV30_FP_REG_TYPE_SHIFT);
+ break;
+ case NV30SR_NONE:
+ sr |= (NV30_FP_REG_TYPE_INPUT << NV30_FP_REG_TYPE_SHIFT);
+ break;
+ default:
+ assert(0);
+ }
+
+ if (src.negate)
+ sr |= NV30_FP_REG_NEGATE;
+
+ if (src.abs)
+ hw[1] |= (1 << (29 + pos));
+
+ sr |= ((src.swz[0] << NV30_FP_REG_SWZ_X_SHIFT) |
+ (src.swz[1] << NV30_FP_REG_SWZ_Y_SHIFT) |
+ (src.swz[2] << NV30_FP_REG_SWZ_Z_SHIFT) |
+ (src.swz[3] << NV30_FP_REG_SWZ_W_SHIFT));
+
+ hw[pos + 1] |= sr;
+}
+
+static void
+emit_dst(struct nv30_fpc *fpc, struct nv30_sreg dst)
+{
+ struct nv30_fragment_program *fp = fpc->fp;
+ uint32_t *hw = &fp->insn[fpc->inst_offset];
+
+ switch (dst.type) {
+ case NV30SR_TEMP:
+ if (fpc->num_regs < (dst.index + 1))
+ fpc->num_regs = dst.index + 1;
+ break;
+ case NV30SR_OUTPUT:
+ if (dst.index == 1) {
+ fp->fp_control |= 0xe;
+ } else {
+ hw[0] |= NV30_FP_OP_OUT_REG_HALF;
+ }
+ break;
+ case NV30SR_NONE:
+ hw[0] |= (1 << 30);
+ break;
+ default:
+ assert(0);
+ }
+
+ hw[0] |= (dst.index << NV30_FP_OP_OUT_REG_SHIFT);
+}
+
+static void
+nv30_fp_arith(struct nv30_fpc *fpc, int sat, int op,
+ struct nv30_sreg dst, int mask,
+ struct nv30_sreg s0, struct nv30_sreg s1, struct nv30_sreg s2)
+{
+ struct nv30_fragment_program *fp = fpc->fp;
+ uint32_t *hw;
+
+ fpc->inst_offset = fp->insn_len;
+ grow_insns(fpc, 4);
+ hw = &fp->insn[fpc->inst_offset];
+ memset(hw, 0, sizeof(uint32_t) * 4);
+
+ if (op == NV30_FP_OP_OPCODE_KIL)
+ fp->fp_control |= NV34TCL_FP_CONTROL_USES_KIL;
+ hw[0] |= (op << NV30_FP_OP_OPCODE_SHIFT);
+ hw[0] |= (mask << NV30_FP_OP_OUTMASK_SHIFT);
+ hw[2] |= (dst.dst_scale << NV30_FP_OP_DST_SCALE_SHIFT);
+
+ if (sat)
+ hw[0] |= NV30_FP_OP_OUT_SAT;
+
+ if (dst.cc_update)
+ hw[0] |= NV30_FP_OP_COND_WRITE_ENABLE;
+ hw[1] |= (dst.cc_test << NV30_FP_OP_COND_SHIFT);
+ hw[1] |= ((dst.cc_swz[0] << NV30_FP_OP_COND_SWZ_X_SHIFT) |
+ (dst.cc_swz[1] << NV30_FP_OP_COND_SWZ_Y_SHIFT) |
+ (dst.cc_swz[2] << NV30_FP_OP_COND_SWZ_Z_SHIFT) |
+ (dst.cc_swz[3] << NV30_FP_OP_COND_SWZ_W_SHIFT));
+
+ emit_dst(fpc, dst);
+ emit_src(fpc, 0, s0);
+ emit_src(fpc, 1, s1);
+ emit_src(fpc, 2, s2);
+}
+
+static void
+nv30_fp_tex(struct nv30_fpc *fpc, int sat, int op, int unit,
+ struct nv30_sreg dst, int mask,
+ struct nv30_sreg s0, struct nv30_sreg s1, struct nv30_sreg s2)
+{
+ struct nv30_fragment_program *fp = fpc->fp;
+
+ nv30_fp_arith(fpc, sat, op, dst, mask, s0, s1, s2);
+
+ fp->insn[fpc->inst_offset] |= (unit << NV30_FP_OP_TEX_UNIT_SHIFT);
+ fp->samplers |= (1 << unit);
+}
+
+static INLINE struct nv30_sreg
+tgsi_src(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc)
+{
+ struct nv30_sreg src;
+
+ switch (fsrc->SrcRegister.File) {
+ case TGSI_FILE_INPUT:
+ src = nv30_sr(NV30SR_INPUT,
+ fpc->attrib_map[fsrc->SrcRegister.Index]);
+ break;
+ case TGSI_FILE_CONSTANT:
+ src = constant(fpc, fsrc->SrcRegister.Index, NULL);
+ break;
+ case TGSI_FILE_IMMEDIATE:
+ assert(fsrc->SrcRegister.Index < fpc->nr_imm);
+ src = fpc->imm[fsrc->SrcRegister.Index];
+ break;
+ case TGSI_FILE_TEMPORARY:
+ src = nv30_sr(NV30SR_TEMP, fsrc->SrcRegister.Index + 1);
+ if (fpc->high_temp < src.index)
+ fpc->high_temp = src.index;
+ break;
+ /* This is clearly insane, but gallium hands us shaders like this.
+ * Luckily fragprog results are just temp regs..
+ */
+ case TGSI_FILE_OUTPUT:
+ if (fsrc->SrcRegister.Index == fpc->colour_id)
+ return nv30_sr(NV30SR_OUTPUT, 0);
+ else
+ return nv30_sr(NV30SR_OUTPUT, 1);
+ break;
+ default:
+ NOUVEAU_ERR("bad src file\n");
+ break;
+ }
+
+ src.abs = fsrc->SrcRegisterExtMod.Absolute;
+ src.negate = fsrc->SrcRegister.Negate;
+ src.swz[0] = fsrc->SrcRegister.SwizzleX;
+ src.swz[1] = fsrc->SrcRegister.SwizzleY;
+ src.swz[2] = fsrc->SrcRegister.SwizzleZ;
+ src.swz[3] = fsrc->SrcRegister.SwizzleW;
+ return src;
+}
+
+static INLINE struct nv30_sreg
+tgsi_dst(struct nv30_fpc *fpc, const struct tgsi_full_dst_register *fdst) {
+ int idx;
+
+ switch (fdst->DstRegister.File) {
+ case TGSI_FILE_OUTPUT:
+ if (fdst->DstRegister.Index == fpc->colour_id)
+ return nv30_sr(NV30SR_OUTPUT, 0);
+ else
+ return nv30_sr(NV30SR_OUTPUT, 1);
+ break;
+ case TGSI_FILE_TEMPORARY:
+ idx = fdst->DstRegister.Index + 1;
+ if (fpc->high_temp < idx)
+ fpc->high_temp = idx;
+ return nv30_sr(NV30SR_TEMP, idx);
+ case TGSI_FILE_NULL:
+ return nv30_sr(NV30SR_NONE, 0);
+ default:
+ NOUVEAU_ERR("bad dst file %d\n", fdst->DstRegister.File);
+ return nv30_sr(NV30SR_NONE, 0);
+ }
+}
+
+static INLINE int
+tgsi_mask(uint tgsi)
+{
+ int mask = 0;
+
+ if (tgsi & TGSI_WRITEMASK_X) mask |= MASK_X;
+ if (tgsi & TGSI_WRITEMASK_Y) mask |= MASK_Y;
+ if (tgsi & TGSI_WRITEMASK_Z) mask |= MASK_Z;
+ if (tgsi & TGSI_WRITEMASK_W) mask |= MASK_W;
+ return mask;
+}
+
+static boolean
+src_native_swz(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc,
+ struct nv30_sreg *src)
+{
+ const struct nv30_sreg none = nv30_sr(NV30SR_NONE, 0);
+ struct nv30_sreg tgsi = tgsi_src(fpc, fsrc);
+ uint mask = 0, zero_mask = 0, one_mask = 0, neg_mask = 0;
+ uint neg[4] = { fsrc->SrcRegisterExtSwz.NegateX,
+ fsrc->SrcRegisterExtSwz.NegateY,
+ fsrc->SrcRegisterExtSwz.NegateZ,
+ fsrc->SrcRegisterExtSwz.NegateW };
+ uint c;
+
+ for (c = 0; c < 4; c++) {
+ switch (tgsi_util_get_full_src_register_extswizzle(fsrc, c)) {
+ case TGSI_EXTSWIZZLE_X:
+ case TGSI_EXTSWIZZLE_Y:
+ case TGSI_EXTSWIZZLE_Z:
+ case TGSI_EXTSWIZZLE_W:
+ mask |= (1 << c);
+ break;
+ case TGSI_EXTSWIZZLE_ZERO:
+ zero_mask |= (1 << c);
+ tgsi.swz[c] = SWZ_X;
+ break;
+ case TGSI_EXTSWIZZLE_ONE:
+ one_mask |= (1 << c);
+ tgsi.swz[c] = SWZ_X;
+ break;
+ default:
+ assert(0);
+ }
+
+ if (!tgsi.negate && neg[c])
+ neg_mask |= (1 << c);
+ }
+
+ if (mask == MASK_ALL && !neg_mask)
+ return TRUE;
+
+ *src = temp(fpc);
+
+ if (mask)
+ arith(fpc, 0, MOV, *src, mask, tgsi, none, none);
+
+ if (zero_mask)
+ arith(fpc, 0, SFL, *src, zero_mask, *src, none, none);
+
+ if (one_mask)
+ arith(fpc, 0, STR, *src, one_mask, *src, none, none);
+
+ if (neg_mask) {
+ struct nv30_sreg one = temp(fpc);
+ arith(fpc, 0, STR, one, neg_mask, one, none, none);
+ arith(fpc, 0, MUL, *src, neg_mask, *src, neg(one), none);
+ }
+
+ return FALSE;
+}
+
+static boolean
+nv30_fragprog_parse_instruction(struct nv30_fpc *fpc,
+ const struct tgsi_full_instruction *finst)
+{
+ const struct nv30_sreg none = nv30_sr(NV30SR_NONE, 0);
+ struct nv30_sreg src[3], dst, tmp;
+ int mask, sat, unit = 0;
+ int ai = -1, ci = -1;
+ int i;
+
+ if (finst->Instruction.Opcode == TGSI_OPCODE_END)
+ return TRUE;
+
+ fpc->temp_temp_count = 0;
+ for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
+ const struct tgsi_full_src_register *fsrc;
+
+ fsrc = &finst->FullSrcRegisters[i];
+ if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) {
+ src[i] = tgsi_src(fpc, fsrc);
+ }
+ }
+
+ for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
+ const struct tgsi_full_src_register *fsrc;
+
+ fsrc = &finst->FullSrcRegisters[i];
+
+ switch (fsrc->SrcRegister.File) {
+ case TGSI_FILE_INPUT:
+ case TGSI_FILE_CONSTANT:
+ case TGSI_FILE_TEMPORARY:
+ if (!src_native_swz(fpc, fsrc, &src[i]))
+ continue;
+ break;
+ default:
+ break;
+ }
+
+ switch (fsrc->SrcRegister.File) {
+ case TGSI_FILE_INPUT:
+ if (ai == -1 || ai == fsrc->SrcRegister.Index) {
+ ai = fsrc->SrcRegister.Index;
+ src[i] = tgsi_src(fpc, fsrc);
+ } else {
+ NOUVEAU_MSG("extra src attr %d\n",
+ fsrc->SrcRegister.Index);
+ src[i] = temp(fpc);
+ arith(fpc, 0, MOV, src[i], MASK_ALL,
+ tgsi_src(fpc, fsrc), none, none);
+ }
+ break;
+ case TGSI_FILE_CONSTANT:
+ case TGSI_FILE_IMMEDIATE:
+ if (ci == -1 || ci == fsrc->SrcRegister.Index) {
+ ci = fsrc->SrcRegister.Index;
+ src[i] = tgsi_src(fpc, fsrc);
+ } else {
+ src[i] = temp(fpc);
+ arith(fpc, 0, MOV, src[i], MASK_ALL,
+ tgsi_src(fpc, fsrc), none, none);
+ }
+ break;
+ case TGSI_FILE_TEMPORARY:
+ /* handled above */
+ break;
+ case TGSI_FILE_SAMPLER:
+ unit = fsrc->SrcRegister.Index;
+ break;
+ case TGSI_FILE_OUTPUT:
+ break;
+ default:
+ NOUVEAU_ERR("bad src file\n");
+ return FALSE;
+ }
+ }
+
+ dst = tgsi_dst(fpc, &finst->FullDstRegisters[0]);
+ mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask);
+ sat = (finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE);
+
+ switch (finst->Instruction.Opcode) {
+ case TGSI_OPCODE_ABS:
+ arith(fpc, sat, MOV, dst, mask, abs(src[0]), none, none);
+ break;
+ case TGSI_OPCODE_ADD:
+ arith(fpc, sat, ADD, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_CMP:
+ tmp = temp(fpc);
+ arith(fpc, sat, MOV, dst, mask, src[2], none, none);
+ tmp.cc_update = 1;
+ arith(fpc, 0, MOV, tmp, 0xf, src[0], none, none);
+ dst.cc_test = NV30_VP_INST_COND_LT;
+ arith(fpc, sat, MOV, dst, mask, src[1], none, none);
+ break;
+ case TGSI_OPCODE_COS:
+ arith(fpc, sat, COS, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_DP3:
+ arith(fpc, sat, DP3, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_DP4:
+ arith(fpc, sat, DP4, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_DPH:
+ tmp = temp(fpc);
+ arith(fpc, 0, DP3, tmp, MASK_X, src[0], src[1], none);
+ arith(fpc, sat, ADD, dst, mask, swz(tmp, X, X, X, X),
+ swz(src[1], W, W, W, W), none);
+ break;
+ case TGSI_OPCODE_DST:
+ arith(fpc, sat, DST, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_EX2:
+ arith(fpc, sat, EX2, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_FLR:
+ arith(fpc, sat, FLR, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_FRC:
+ arith(fpc, sat, FRC, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_KILP:
+ arith(fpc, 0, KIL, none, 0, none, none, none);
+ break;
+ case TGSI_OPCODE_KIL:
+ dst = nv30_sr(NV30SR_NONE, 0);
+ dst.cc_update = 1;
+ arith(fpc, 0, MOV, dst, MASK_ALL, src[0], none, none);
+ dst.cc_update = 0; dst.cc_test = NV30_FP_OP_COND_LT;
+ arith(fpc, 0, KIL, dst, 0, none, none, none);
+ break;
+ case TGSI_OPCODE_LG2:
+ arith(fpc, sat, LG2, dst, mask, src[0], none, none);
+ break;
+// case TGSI_OPCODE_LIT:
+ case TGSI_OPCODE_LRP:
+ arith(fpc, sat, LRP, dst, mask, src[0], src[1], src[2]);
+ break;
+ case TGSI_OPCODE_MAD:
+ arith(fpc, sat, MAD, dst, mask, src[0], src[1], src[2]);
+ break;
+ case TGSI_OPCODE_MAX:
+ arith(fpc, sat, MAX, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_MIN:
+ arith(fpc, sat, MIN, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_MOV:
+ arith(fpc, sat, MOV, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_MUL:
+ arith(fpc, sat, MUL, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_NOISE1:
+ case TGSI_OPCODE_NOISE2:
+ case TGSI_OPCODE_NOISE3:
+ case TGSI_OPCODE_NOISE4:
+ arith(fpc, sat, SFL, dst, mask, none, none, none);
+ break;
+ case TGSI_OPCODE_POW:
+ arith(fpc, sat, POW, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_RCP:
+ arith(fpc, sat, RCP, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_RET:
+ assert(0);
+ break;
+ case TGSI_OPCODE_RFL:
+ arith(fpc, 0, RFL, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_RSQ:
+ arith(fpc, sat, RSQ, dst, mask, abs(swz(src[0], X, X, X, X)), none, none);
+ break;
+ case TGSI_OPCODE_SCS:
+ if (mask & MASK_X) {
+ arith(fpc, sat, COS, dst, MASK_X,
+ swz(src[0], X, X, X, X), none, none);
+ }
+ if (mask & MASK_Y) {
+ arith(fpc, sat, SIN, dst, MASK_Y,
+ swz(src[0], X, X, X, X), none, none);
+ }
+ break;
+ case TGSI_OPCODE_SIN:
+ arith(fpc, sat, SIN, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_SGE:
+ arith(fpc, sat, SGE, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_SGT:
+ arith(fpc, sat, SGT, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_SLT:
+ arith(fpc, sat, SLT, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_SUB:
+ arith(fpc, sat, ADD, dst, mask, src[0], neg(src[1]), none);
+ break;
+ case TGSI_OPCODE_TEX:
+ tex(fpc, sat, TEX, unit, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_TXB:
+ tex(fpc, sat, TXB, unit, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_TXP:
+ tex(fpc, sat, TXP, unit, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_XPD:
+ tmp = temp(fpc);
+ arith(fpc, 0, MUL, tmp, mask,
+ swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none);
+ arith(fpc, sat, MAD, dst, (mask & ~MASK_W),
+ swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y),
+ neg(tmp));
+ break;
+ default:
+ NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode);
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+static boolean
+nv30_fragprog_parse_decl_attrib(struct nv30_fpc *fpc,
+ const struct tgsi_full_declaration *fdec)
+{
+ int hw;
+
+ switch (fdec->Semantic.SemanticName) {
+ case TGSI_SEMANTIC_POSITION:
+ hw = NV30_FP_OP_INPUT_SRC_POSITION;
+ break;
+ case TGSI_SEMANTIC_COLOR:
+ if (fdec->Semantic.SemanticIndex == 0) {
+ hw = NV30_FP_OP_INPUT_SRC_COL0;
+ } else
+ if (fdec->Semantic.SemanticIndex == 1) {
+ hw = NV30_FP_OP_INPUT_SRC_COL1;
+ } else {
+ NOUVEAU_ERR("bad colour semantic index\n");
+ return FALSE;
+ }
+ break;
+ case TGSI_SEMANTIC_FOG:
+ hw = NV30_FP_OP_INPUT_SRC_FOGC;
+ break;
+ case TGSI_SEMANTIC_GENERIC:
+ if (fdec->Semantic.SemanticIndex <= 7) {
+ hw = NV30_FP_OP_INPUT_SRC_TC(fdec->Semantic.
+ SemanticIndex);
+ } else {
+ NOUVEAU_ERR("bad generic semantic index\n");
+ return FALSE;
+ }
+ break;
+ default:
+ NOUVEAU_ERR("bad input semantic\n");
+ return FALSE;
+ }
+
+ fpc->attrib_map[fdec->DeclarationRange.First] = hw;
+ return TRUE;
+}
+
+static boolean
+nv30_fragprog_parse_decl_output(struct nv30_fpc *fpc,
+ const struct tgsi_full_declaration *fdec)
+{
+ switch (fdec->Semantic.SemanticName) {
+ case TGSI_SEMANTIC_POSITION:
+ fpc->depth_id = fdec->DeclarationRange.First;
+ break;
+ case TGSI_SEMANTIC_COLOR:
+ fpc->colour_id = fdec->DeclarationRange.First;
+ break;
+ default:
+ NOUVEAU_ERR("bad output semantic\n");
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+static boolean
+nv30_fragprog_prepare(struct nv30_fpc *fpc)
+{
+ struct tgsi_parse_context p;
+ /*int high_temp = -1, i;*/
+
+ tgsi_parse_init(&p, fpc->fp->pipe.tokens);
+ while (!tgsi_parse_end_of_tokens(&p)) {
+ const union tgsi_full_token *tok = &p.FullToken;
+
+ tgsi_parse_token(&p);
+ switch(tok->Token.Type) {
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ {
+ const struct tgsi_full_declaration *fdec;
+ fdec = &p.FullToken.FullDeclaration;
+ switch (fdec->Declaration.File) {
+ case TGSI_FILE_INPUT:
+ if (!nv30_fragprog_parse_decl_attrib(fpc, fdec))
+ goto out_err;
+ break;
+ case TGSI_FILE_OUTPUT:
+ if (!nv30_fragprog_parse_decl_output(fpc, fdec))
+ goto out_err;
+ break;
+ /*case TGSI_FILE_TEMPORARY:
+ if (fdec->DeclarationRange.Last > high_temp) {
+ high_temp =
+ fdec->DeclarationRange.Last;
+ }
+ break;*/
+ default:
+ break;
+ }
+ }
+ break;
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ {
+ struct tgsi_full_immediate *imm;
+ float vals[4];
+
+ imm = &p.FullToken.FullImmediate;
+ assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32);
+ assert(fpc->nr_imm < MAX_IMM);
+
+ vals[0] = imm->u.ImmediateFloat32[0].Float;
+ vals[1] = imm->u.ImmediateFloat32[1].Float;
+ vals[2] = imm->u.ImmediateFloat32[2].Float;
+ vals[3] = imm->u.ImmediateFloat32[3].Float;
+ fpc->imm[fpc->nr_imm++] = constant(fpc, -1, vals);
+ }
+ break;
+ default:
+ break;
+ }
+ }
+ tgsi_parse_free(&p);
+
+ /*if (++high_temp) {
+ fpc->r_temp = CALLOC(high_temp, sizeof(struct nv30_sreg));
+ for (i = 0; i < high_temp; i++)
+ fpc->r_temp[i] = temp(fpc);
+ fpc->r_temps_discard = 0;
+ }*/
+
+ return TRUE;
+
+out_err:
+ /*if (fpc->r_temp)
+ FREE(fpc->r_temp);*/
+ tgsi_parse_free(&p);
+ return FALSE;
+}
+
+static void
+nv30_fragprog_translate(struct nv30_context *nv30,
+ struct nv30_fragment_program *fp)
+{
+ struct tgsi_parse_context parse;
+ struct nv30_fpc *fpc = NULL;
+
+ tgsi_dump(fp->pipe.tokens,0);
+
+ fpc = CALLOC(1, sizeof(struct nv30_fpc));
+ if (!fpc)
+ return;
+ fpc->fp = fp;
+ fpc->high_temp = -1;
+ fpc->num_regs = 2;
+
+ if (!nv30_fragprog_prepare(fpc)) {
+ FREE(fpc);
+ return;
+ }
+
+ tgsi_parse_init(&parse, fp->pipe.tokens);
+
+ while (!tgsi_parse_end_of_tokens(&parse)) {
+ tgsi_parse_token(&parse);
+
+ switch (parse.FullToken.Token.Type) {
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ {
+ const struct tgsi_full_instruction *finst;
+
+ finst = &parse.FullToken.FullInstruction;
+ if (!nv30_fragprog_parse_instruction(fpc, finst))
+ goto out_err;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+
+ fp->fp_control |= (fpc->num_regs-1)/2;
+ fp->fp_reg_control = (1<<16)|0x4;
+
+ /* Terminate final instruction */
+ fp->insn[fpc->inst_offset] |= 0x00000001;
+
+ /* Append NOP + END instruction, may or may not be necessary. */
+ fpc->inst_offset = fp->insn_len;
+ grow_insns(fpc, 4);
+ fp->insn[fpc->inst_offset + 0] = 0x00000001;
+ fp->insn[fpc->inst_offset + 1] = 0x00000000;
+ fp->insn[fpc->inst_offset + 2] = 0x00000000;
+ fp->insn[fpc->inst_offset + 3] = 0x00000000;
+
+ fp->translated = TRUE;
+ fp->on_hw = FALSE;
+out_err:
+ tgsi_parse_free(&parse);
+ FREE(fpc);
+}
+
+static void
+nv30_fragprog_upload(struct nv30_context *nv30,
+ struct nv30_fragment_program *fp)
+{
+ struct pipe_winsys *ws = nv30->pipe.winsys;
+ const uint32_t le = 1;
+ uint32_t *map;
+ int i;
+
+ map = ws->buffer_map(ws, fp->buffer, PIPE_BUFFER_USAGE_CPU_WRITE);
+
+#if 0
+ for (i = 0; i < fp->insn_len; i++) {
+ fflush(stdout); fflush(stderr);
+ NOUVEAU_ERR("%d 0x%08x\n", i, fp->insn[i]);
+ fflush(stdout); fflush(stderr);
+ }
+#endif
+
+ if ((*(const uint8_t *)&le)) {
+ for (i = 0; i < fp->insn_len; i++) {
+ map[i] = fp->insn[i];
+ }
+ } else {
+ /* Weird swapping for big-endian chips */
+ for (i = 0; i < fp->insn_len; i++) {
+ map[i] = ((fp->insn[i] & 0xffff) << 16) |
+ ((fp->insn[i] >> 16) & 0xffff);
+ }
+ }
+
+ ws->buffer_unmap(ws, fp->buffer);
+}
+
+static boolean
+nv30_fragprog_validate(struct nv30_context *nv30)
+{
+ struct nv30_fragment_program *fp = nv30->fragprog;
+ struct pipe_buffer *constbuf =
+ nv30->constbuf[PIPE_SHADER_FRAGMENT];
+ struct pipe_winsys *ws = nv30->pipe.winsys;
+ struct nouveau_stateobj *so;
+ boolean new_consts = FALSE;
+ int i;
+
+ if (fp->translated)
+ goto update_constants;
+
+ /*nv30->fallback_swrast &= ~NV30_NEW_FRAGPROG;*/
+ nv30_fragprog_translate(nv30, fp);
+ if (!fp->translated) {
+ /*nv30->fallback_swrast |= NV30_NEW_FRAGPROG;*/
+ return FALSE;
+ }
+
+ fp->buffer = ws->buffer_create(ws, 0x100, 0, fp->insn_len * 4);
+ nv30_fragprog_upload(nv30, fp);
+
+ so = so_new(8, 1);
+ so_method(so, nv30->screen->rankine, NV34TCL_FP_ACTIVE_PROGRAM, 1);
+ so_reloc (so, fp->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART |
+ NOUVEAU_BO_RD | NOUVEAU_BO_LOW | NOUVEAU_BO_OR,
+ NV34TCL_FP_ACTIVE_PROGRAM_DMA0, NV34TCL_FP_ACTIVE_PROGRAM_DMA1);
+ so_method(so, nv30->screen->rankine, NV34TCL_FP_CONTROL, 1);
+ so_data (so, fp->fp_control);
+ so_method(so, nv30->screen->rankine, NV34TCL_FP_REG_CONTROL, 1);
+ so_data (so, fp->fp_reg_control);
+ so_method(so, nv30->screen->rankine, NV34TCL_TX_UNITS_ENABLE, 1);
+ so_data (so, fp->samplers);
+ so_ref(so, &fp->so);
+
+update_constants:
+ if (fp->nr_consts) {
+ float *map;
+
+ map = ws->buffer_map(ws, constbuf, PIPE_BUFFER_USAGE_CPU_READ);
+ for (i = 0; i < fp->nr_consts; i++) {
+ struct nv30_fragment_program_data *fpd = &fp->consts[i];
+ uint32_t *p = &fp->insn[fpd->offset];
+ uint32_t *cb = (uint32_t *)&map[fpd->index * 4];
+
+ if (!memcmp(p, cb, 4 * sizeof(float)))
+ continue;
+ memcpy(p, cb, 4 * sizeof(float));
+ new_consts = TRUE;
+ }
+ ws->buffer_unmap(ws, constbuf);
+
+ if (new_consts)
+ nv30_fragprog_upload(nv30, fp);
+ }
+
+ if (new_consts || fp->so != nv30->state.hw[NV30_STATE_FRAGPROG]) {
+ so_ref(fp->so, &nv30->state.hw[NV30_STATE_FRAGPROG]);
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+void
+nv30_fragprog_destroy(struct nv30_context *nv30,
+ struct nv30_fragment_program *fp)
+{
+ if (fp->insn_len)
+ FREE(fp->insn);
+}
+
+struct nv30_state_entry nv30_state_fragprog = {
+ .validate = nv30_fragprog_validate,
+ .dirty = {
+ .pipe = NV30_NEW_FRAGPROG,
+ .hw = NV30_STATE_FRAGPROG
+ }
+};
diff --git a/src/gallium/drivers/nv30/nv30_fragtex.c b/src/gallium/drivers/nv30/nv30_fragtex.c
new file mode 100644
index 00000000000..b1d2663af38
--- /dev/null
+++ b/src/gallium/drivers/nv30/nv30_fragtex.c
@@ -0,0 +1,163 @@
+#include "nv30_context.h"
+#include "nouveau/nouveau_util.h"
+
+#define _(m,tf,ts0x,ts0y,ts0z,ts0w,ts1x,ts1y,ts1z,ts1w) \
+{ \
+ TRUE, \
+ PIPE_FORMAT_##m, \
+ NV34TCL_TX_FORMAT_FORMAT_##tf, \
+ (NV34TCL_TX_SWIZZLE_S0_X_##ts0x | NV34TCL_TX_SWIZZLE_S0_Y_##ts0y | \
+ NV34TCL_TX_SWIZZLE_S0_Z_##ts0z | NV34TCL_TX_SWIZZLE_S0_W_##ts0w | \
+ NV34TCL_TX_SWIZZLE_S1_X_##ts1x | NV34TCL_TX_SWIZZLE_S1_Y_##ts1y | \
+ NV34TCL_TX_SWIZZLE_S1_Z_##ts1z | NV34TCL_TX_SWIZZLE_S1_W_##ts1w) \
+}
+
+struct nv30_texture_format {
+ boolean defined;
+ uint pipe;
+ int format;
+ int swizzle;
+};
+
+static struct nv30_texture_format
+nv30_texture_formats[] = {
+ _(A8R8G8B8_UNORM, A8R8G8B8, S1, S1, S1, S1, X, Y, Z, W),
+ _(A1R5G5B5_UNORM, A1R5G5B5, S1, S1, S1, S1, X, Y, Z, W),
+ _(A4R4G4B4_UNORM, A4R4G4B4, S1, S1, S1, S1, X, Y, Z, W),
+ _(R5G6B5_UNORM , R5G6B5 , S1, S1, S1, ONE, X, Y, Z, W),
+ _(L8_UNORM , L8 , S1, S1, S1, ONE, X, X, X, X),
+ _(A8_UNORM , L8 , ZERO, ZERO, ZERO, S1, X, X, X, X),
+ _(I8_UNORM , L8 , S1, S1, S1, S1, X, X, X, X),
+ _(A8L8_UNORM , A8L8 , S1, S1, S1, S1, X, X, X, Y),
+// _(Z16_UNORM , Z16 , S1, S1, S1, ONE, X, X, X, X),
+// _(Z24S8_UNORM , Z24 , S1, S1, S1, ONE, X, X, X, X),
+ _(DXT1_RGB , DXT1 , S1, S1, S1, ONE, X, Y, Z, W),
+ _(DXT1_RGBA , DXT1 , S1, S1, S1, S1, X, Y, Z, W),
+ _(DXT3_RGBA , DXT3 , S1, S1, S1, S1, X, Y, Z, W),
+ _(DXT5_RGBA , DXT5 , S1, S1, S1, S1, X, Y, Z, W),
+ {},
+};
+
+static struct nv30_texture_format *
+nv30_fragtex_format(uint pipe_format)
+{
+ struct nv30_texture_format *tf = nv30_texture_formats;
+ char fs[128];
+
+ while (tf->defined) {
+ if (tf->pipe == pipe_format)
+ return tf;
+ tf++;
+ }
+
+ NOUVEAU_ERR("unknown texture format %s\n", pf_name(pipe_format));
+ return NULL;
+}
+
+
+static struct nouveau_stateobj *
+nv30_fragtex_build(struct nv30_context *nv30, int unit)
+{
+ struct nv30_sampler_state *ps = nv30->tex_sampler[unit];
+ struct nv30_miptree *nv30mt = nv30->tex_miptree[unit];
+ struct pipe_texture *pt = &nv30mt->base;
+ struct nv30_texture_format *tf;
+ struct nouveau_stateobj *so;
+ uint32_t txf, txs , txp;
+ unsigned tex_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD;
+
+ tf = nv30_fragtex_format(pt->format);
+ if (!tf)
+ assert(0);
+
+ txf = tf->format;
+ txf |= ((pt->last_level>0) ? NV34TCL_TX_FORMAT_MIPMAP : 0);
+ txf |= log2i(pt->width[0]) << 20;
+ txf |= log2i(pt->height[0]) << 24;
+ txf |= log2i(pt->depth[0]) << 28;
+ txf |= NV34TCL_TX_FORMAT_NO_BORDER | 0x10000;
+
+ switch (pt->target) {
+ case PIPE_TEXTURE_CUBE:
+ txf |= NV34TCL_TX_FORMAT_CUBIC;
+ /* fall-through */
+ case PIPE_TEXTURE_2D:
+ txf |= NV34TCL_TX_FORMAT_DIMS_2D;
+ break;
+ case PIPE_TEXTURE_3D:
+ txf |= NV34TCL_TX_FORMAT_DIMS_3D;
+ break;
+ case PIPE_TEXTURE_1D:
+ txf |= NV34TCL_TX_FORMAT_DIMS_1D;
+ break;
+ default:
+ NOUVEAU_ERR("Unknown target %d\n", pt->target);
+ return NULL;
+ }
+
+ if (!(pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) {
+ txp = 0;
+ } else {
+ txp = nv30mt->level[0].pitch;
+ txf |= (1<<13) /*FIXME: NV34TCL_TX_FORMAT_LINEAR ? */;
+ }
+
+ txs = tf->swizzle;
+
+ so = so_new(16, 2);
+ so_method(so, nv30->screen->rankine, NV34TCL_TX_OFFSET(unit), 8);
+ so_reloc (so, nv30mt->buffer, 0, tex_flags | NOUVEAU_BO_LOW, 0, 0);
+ so_reloc (so, nv30mt->buffer, txf, tex_flags | NOUVEAU_BO_OR,
+ NV34TCL_TX_FORMAT_DMA0, NV34TCL_TX_FORMAT_DMA1);
+ so_data (so, ps->wrap);
+ so_data (so, NV34TCL_TX_ENABLE_ENABLE | ps->en);
+ so_data (so, txs);
+ so_data (so, ps->filt | 0x2000 /*voodoo*/);
+ so_data (so, (pt->width[0] << NV34TCL_TX_NPOT_SIZE_W_SHIFT) |
+ pt->height[0]);
+ so_data (so, ps->bcol);
+
+ return so;
+}
+
+static boolean
+nv30_fragtex_validate(struct nv30_context *nv30)
+{
+ struct nv30_fragment_program *fp = nv30->fragprog;
+ struct nv30_state *state = &nv30->state;
+ struct nouveau_stateobj *so;
+ unsigned samplers, unit;
+
+ samplers = state->fp_samplers & ~fp->samplers;
+ while (samplers) {
+ unit = ffs(samplers) - 1;
+ samplers &= ~(1 << unit);
+
+ so = so_new(2, 0);
+ so_method(so, nv30->screen->rankine, NV34TCL_TX_ENABLE(unit), 1);
+ so_data (so, 0);
+ so_ref(so, &nv30->state.hw[NV30_STATE_FRAGTEX0 + unit]);
+ state->dirty |= (1ULL << (NV30_STATE_FRAGTEX0 + unit));
+ }
+
+ samplers = nv30->dirty_samplers & fp->samplers;
+ while (samplers) {
+ unit = ffs(samplers) - 1;
+ samplers &= ~(1 << unit);
+
+ so = nv30_fragtex_build(nv30, unit);
+ so_ref(so, &nv30->state.hw[NV30_STATE_FRAGTEX0 + unit]);
+ state->dirty |= (1ULL << (NV30_STATE_FRAGTEX0 + unit));
+ }
+
+ nv30->state.fp_samplers = fp->samplers;
+ return FALSE;
+}
+
+struct nv30_state_entry nv30_state_fragtex = {
+ .validate = nv30_fragtex_validate,
+ .dirty = {
+ .pipe = NV30_NEW_SAMPLER | NV30_NEW_FRAGPROG,
+ .hw = 0
+ }
+};
diff --git a/src/gallium/drivers/nv30/nv30_miptree.c b/src/gallium/drivers/nv30/nv30_miptree.c
new file mode 100644
index 00000000000..aa670b9a45b
--- /dev/null
+++ b/src/gallium/drivers/nv30/nv30_miptree.c
@@ -0,0 +1,195 @@
+#include "pipe/p_state.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_inlines.h"
+
+#include "nv30_context.h"
+
+static void
+nv30_miptree_layout(struct nv30_miptree *nv30mt)
+{
+ struct pipe_texture *pt = &nv30mt->base;
+ boolean swizzled = FALSE;
+ uint width = pt->width[0], height = pt->height[0], depth = pt->depth[0];
+ uint offset = 0;
+ int nr_faces, l, f, pitch;
+
+ if (pt->target == PIPE_TEXTURE_CUBE) {
+ nr_faces = 6;
+ } else
+ if (pt->target == PIPE_TEXTURE_3D) {
+ nr_faces = pt->depth[0];
+ } else {
+ nr_faces = 1;
+ }
+
+ pitch = pt->width[0];
+ for (l = 0; l <= pt->last_level; l++) {
+ pt->width[l] = width;
+ pt->height[l] = height;
+ pt->depth[l] = depth;
+ pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width);
+ pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height);
+
+ if (swizzled)
+ pitch = pt->nblocksx[l];
+ pitch = align(pitch, 64);
+
+ nv30mt->level[l].pitch = pitch * pt->block.size;
+ nv30mt->level[l].image_offset =
+ CALLOC(nr_faces, sizeof(unsigned));
+
+ width = MAX2(1, width >> 1);
+ height = MAX2(1, height >> 1);
+ depth = MAX2(1, depth >> 1);
+ }
+
+ for (f = 0; f < nr_faces; f++) {
+ for (l = 0; l <= pt->last_level; l++) {
+ nv30mt->level[l].image_offset[f] = offset;
+ offset += nv30mt->level[l].pitch * pt->height[l];
+ }
+ }
+
+ nv30mt->total_size = offset;
+}
+
+static struct pipe_texture *
+nv30_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt)
+{
+ struct pipe_winsys *ws = pscreen->winsys;
+ struct nv30_miptree *mt;
+
+ mt = MALLOC(sizeof(struct nv30_miptree));
+ if (!mt)
+ return NULL;
+ mt->base = *pt;
+ mt->base.refcount = 1;
+ mt->base.screen = pscreen;
+ mt->shadow_tex = NULL;
+ mt->shadow_surface = NULL;
+
+ /* Swizzled textures must be POT */
+ if (pt->width[0] & (pt->width[0] - 1) ||
+ pt->height[0] & (pt->height[0] - 1))
+ mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR;
+ else
+ if (pt->tex_usage & (PIPE_TEXTURE_USAGE_PRIMARY |
+ PIPE_TEXTURE_USAGE_DISPLAY_TARGET))
+ mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR;
+ else {
+ switch (pt->format) {
+ /* TODO: Figure out which formats can be swizzled */
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case PIPE_FORMAT_X8R8G8B8_UNORM:
+ /* XXX: Re-enable when SIFM size limits are fixed */
+ /*case PIPE_FORMAT_R16_SNORM:*/
+ break;
+ default:
+ mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR;
+ }
+ }
+
+ nv30_miptree_layout(mt);
+
+ mt->buffer = ws->buffer_create(ws, 256,
+ PIPE_BUFFER_USAGE_PIXEL |
+ NOUVEAU_BUFFER_USAGE_TEXTURE,
+ mt->total_size);
+ if (!mt->buffer) {
+ FREE(mt);
+ return NULL;
+ }
+
+ return &mt->base;
+}
+
+static void
+nv30_miptree_release(struct pipe_screen *pscreen, struct pipe_texture **ppt)
+{
+ struct pipe_texture *pt = *ppt;
+ struct nv30_miptree *mt = (struct nv30_miptree *)pt;
+ int l;
+
+ *ppt = NULL;
+ if (--pt->refcount)
+ return;
+
+ pipe_buffer_reference(pscreen, &mt->buffer, NULL);
+ for (l = 0; l <= pt->last_level; l++) {
+ if (mt->level[l].image_offset)
+ FREE(mt->level[l].image_offset);
+ }
+
+ if (mt->shadow_tex) {
+ assert(mt->shadow_surface);
+ pscreen->tex_surface_release(pscreen, &mt->shadow_surface);
+ nv30_miptree_release(pscreen, &mt->shadow_tex);
+ }
+
+ FREE(mt);
+}
+
+static struct pipe_surface *
+nv30_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
+ unsigned face, unsigned level, unsigned zslice,
+ unsigned flags)
+{
+ struct nv30_miptree *nv30mt = (struct nv30_miptree *)pt;
+ struct pipe_surface *ps;
+
+ ps = CALLOC_STRUCT(pipe_surface);
+ if (!ps)
+ return NULL;
+ pipe_texture_reference(&ps->texture, pt);
+ pipe_buffer_reference(pscreen, &ps->buffer, nv30mt->buffer);
+ ps->format = pt->format;
+ ps->width = pt->width[level];
+ ps->height = pt->height[level];
+ ps->block = pt->block;
+ ps->nblocksx = pt->nblocksx[level];
+ ps->nblocksy = pt->nblocksy[level];
+ ps->stride = nv30mt->level[level].pitch;
+ ps->usage = flags;
+ ps->status = PIPE_SURFACE_STATUS_DEFINED;
+ ps->refcount = 1;
+ ps->winsys = pscreen->winsys;
+ ps->face = face;
+ ps->level = level;
+ ps->zslice = zslice;
+
+ if (pt->target == PIPE_TEXTURE_CUBE) {
+ ps->offset = nv30mt->level[level].image_offset[face];
+ } else
+ if (pt->target == PIPE_TEXTURE_3D) {
+ ps->offset = nv30mt->level[level].image_offset[zslice];
+ } else {
+ ps->offset = nv30mt->level[level].image_offset[0];
+ }
+
+ return ps;
+}
+
+static void
+nv30_miptree_surface_del(struct pipe_screen *pscreen,
+ struct pipe_surface **psurface)
+{
+ struct pipe_surface *ps = *psurface;
+
+ *psurface = NULL;
+ if (--ps->refcount > 0)
+ return;
+
+ pipe_texture_reference(&ps->texture, NULL);
+ pipe_buffer_reference(pscreen->winsys, &ps->buffer, NULL);
+ FREE(ps);
+}
+
+void
+nv30_screen_init_miptree_functions(struct pipe_screen *pscreen)
+{
+ pscreen->texture_create = nv30_miptree_create;
+ pscreen->texture_release = nv30_miptree_release;
+ pscreen->get_tex_surface = nv30_miptree_surface_new;
+ pscreen->tex_surface_release = nv30_miptree_surface_del;
+}
+
diff --git a/src/gallium/drivers/nv30/nv30_query.c b/src/gallium/drivers/nv30/nv30_query.c
new file mode 100644
index 00000000000..d40d75f2640
--- /dev/null
+++ b/src/gallium/drivers/nv30/nv30_query.c
@@ -0,0 +1,122 @@
+#include "pipe/p_context.h"
+
+#include "nv30_context.h"
+
+struct nv30_query {
+ struct nouveau_resource *object;
+ unsigned type;
+ boolean ready;
+ uint64_t result;
+};
+
+static INLINE struct nv30_query *
+nv30_query(struct pipe_query *pipe)
+{
+ return (struct nv30_query *)pipe;
+}
+
+static struct pipe_query *
+nv30_query_create(struct pipe_context *pipe, unsigned query_type)
+{
+ struct nv30_query *q;
+
+ q = CALLOC(1, sizeof(struct nv30_query));
+ q->type = query_type;
+
+ return (struct pipe_query *)q;
+}
+
+static void
+nv30_query_destroy(struct pipe_context *pipe, struct pipe_query *pq)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+ struct nv30_query *q = nv30_query(pq);
+
+ if (q->object)
+ nv30->nvws->res_free(&q->object);
+ FREE(q);
+}
+
+static void
+nv30_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+ struct nv30_query *q = nv30_query(pq);
+
+ assert(q->type == PIPE_QUERY_OCCLUSION_COUNTER);
+
+ /* Happens when end_query() is called, then another begin_query()
+ * without querying the result in-between. For now we'll wait for
+ * the existing query to notify completion, but it could be better.
+ */
+ if (q->object) {
+ uint64 tmp;
+ pipe->get_query_result(pipe, pq, 1, &tmp);
+ }
+
+ if (nv30->nvws->res_alloc(nv30->screen->query_heap, 1, NULL, &q->object))
+ assert(0);
+ nv30->nvws->notifier_reset(nv30->screen->query, q->object->start);
+
+ BEGIN_RING(rankine, NV34TCL_QUERY_RESET, 1);
+ OUT_RING (1);
+ BEGIN_RING(rankine, NV34TCL_QUERY_UNK17CC, 1);
+ OUT_RING (1);
+
+ q->ready = FALSE;
+}
+
+static void
+nv30_query_end(struct pipe_context *pipe, struct pipe_query *pq)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+ struct nv30_query *q = nv30_query(pq);
+
+ BEGIN_RING(rankine, NV34TCL_QUERY_GET, 1);
+ OUT_RING ((0x01 << NV34TCL_QUERY_GET_UNK24_SHIFT) |
+ ((q->object->start * 32) << NV34TCL_QUERY_GET_OFFSET_SHIFT));
+ FIRE_RING(NULL);
+}
+
+static boolean
+nv30_query_result(struct pipe_context *pipe, struct pipe_query *pq,
+ boolean wait, uint64 *result)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+ struct nv30_query *q = nv30_query(pq);
+ struct nouveau_winsys *nvws = nv30->nvws;
+
+ assert(q->object && q->type == PIPE_QUERY_OCCLUSION_COUNTER);
+
+ if (!q->ready) {
+ unsigned status;
+
+ status = nvws->notifier_status(nv30->screen->query,
+ q->object->start);
+ if (status != NV_NOTIFY_STATE_STATUS_COMPLETED) {
+ if (wait == FALSE)
+ return FALSE;
+ nvws->notifier_wait(nv30->screen->query, q->object->start,
+ NV_NOTIFY_STATE_STATUS_COMPLETED,
+ 0);
+ }
+
+ q->result = nvws->notifier_retval(nv30->screen->query,
+ q->object->start);
+ q->ready = TRUE;
+ nvws->res_free(&q->object);
+ }
+
+ *result = q->result;
+ return TRUE;
+}
+
+void
+nv30_init_query_functions(struct nv30_context *nv30)
+{
+ nv30->pipe.create_query = nv30_query_create;
+ nv30->pipe.destroy_query = nv30_query_destroy;
+ nv30->pipe.begin_query = nv30_query_begin;
+ nv30->pipe.end_query = nv30_query_end;
+ nv30->pipe.get_query_result = nv30_query_result;
+}
diff --git a/src/gallium/drivers/nv30/nv30_screen.c b/src/gallium/drivers/nv30/nv30_screen.c
new file mode 100644
index 00000000000..910a3c456dd
--- /dev/null
+++ b/src/gallium/drivers/nv30/nv30_screen.c
@@ -0,0 +1,383 @@
+#include "pipe/p_screen.h"
+
+#include "nv30_context.h"
+#include "nv30_screen.h"
+
+#define NV30TCL_CHIPSET_3X_MASK 0x00000003
+#define NV34TCL_CHIPSET_3X_MASK 0x00000010
+#define NV35TCL_CHIPSET_3X_MASK 0x000001e0
+
+static const char *
+nv30_screen_get_name(struct pipe_screen *pscreen)
+{
+ struct nv30_screen *screen = nv30_screen(pscreen);
+ struct nouveau_device *dev = screen->nvws->channel->device;
+ static char buffer[128];
+
+ snprintf(buffer, sizeof(buffer), "NV%02X", dev->chipset);
+ return buffer;
+}
+
+static const char *
+nv30_screen_get_vendor(struct pipe_screen *pscreen)
+{
+ return "nouveau";
+}
+
+static int
+nv30_screen_get_param(struct pipe_screen *pscreen, int param)
+{
+ switch (param) {
+ case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
+ return 16;
+ case PIPE_CAP_NPOT_TEXTURES:
+ return 0;
+ case PIPE_CAP_TWO_SIDED_STENCIL:
+ return 1;
+ case PIPE_CAP_GLSL:
+ return 0;
+ case PIPE_CAP_S3TC:
+ return 0;
+ case PIPE_CAP_ANISOTROPIC_FILTER:
+ return 1;
+ case PIPE_CAP_POINT_SPRITE:
+ return 1;
+ case PIPE_CAP_MAX_RENDER_TARGETS:
+ return 2;
+ case PIPE_CAP_OCCLUSION_QUERY:
+ return 1;
+ case PIPE_CAP_TEXTURE_SHADOW_MAP:
+ return 1;
+ case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
+ return 13;
+ case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
+ return 10;
+ case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
+ return 13;
+ case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
+ return 0;
+ case PIPE_CAP_TEXTURE_MIRROR_REPEAT:
+ return 1;
+ case NOUVEAU_CAP_HW_VTXBUF:
+ case NOUVEAU_CAP_HW_IDXBUF:
+ return 1;
+ default:
+ NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param);
+ return 0;
+ }
+}
+
+static float
+nv30_screen_get_paramf(struct pipe_screen *pscreen, int param)
+{
+ switch (param) {
+ case PIPE_CAP_MAX_LINE_WIDTH:
+ case PIPE_CAP_MAX_LINE_WIDTH_AA:
+ return 10.0;
+ case PIPE_CAP_MAX_POINT_WIDTH:
+ case PIPE_CAP_MAX_POINT_WIDTH_AA:
+ return 64.0;
+ case PIPE_CAP_MAX_TEXTURE_ANISOTROPY:
+ return 8.0;
+ case PIPE_CAP_MAX_TEXTURE_LOD_BIAS:
+ return 4.0;
+ default:
+ NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param);
+ return 0.0;
+ }
+}
+
+static boolean
+nv30_screen_surface_format_supported(struct pipe_screen *pscreen,
+ enum pipe_format format,
+ enum pipe_texture_target target,
+ unsigned tex_usage, unsigned geom_flags)
+{
+ if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) {
+ switch (format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ case PIPE_FORMAT_Z24S8_UNORM:
+ case PIPE_FORMAT_Z16_UNORM:
+ return TRUE;
+ default:
+ break;
+ }
+ } else {
+ switch (format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case PIPE_FORMAT_A1R5G5B5_UNORM:
+ case PIPE_FORMAT_A4R4G4B4_UNORM:
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ case PIPE_FORMAT_L8_UNORM:
+ case PIPE_FORMAT_A8_UNORM:
+ case PIPE_FORMAT_I8_UNORM:
+ case PIPE_FORMAT_A8L8_UNORM:
+ case PIPE_FORMAT_Z16_UNORM:
+ case PIPE_FORMAT_Z24S8_UNORM:
+ return TRUE;
+ default:
+ break;
+ }
+ }
+
+ return FALSE;
+}
+
+static void *
+nv30_surface_map(struct pipe_screen *screen, struct pipe_surface *surface,
+ unsigned flags )
+{
+ struct pipe_winsys *ws = screen->winsys;
+ struct pipe_surface *surface_to_map;
+ void *map;
+
+ if (!(surface->texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) {
+ struct nv30_miptree *mt = (struct nv30_miptree *)surface->texture;
+
+ if (!mt->shadow_tex) {
+ unsigned old_tex_usage = surface->texture->tex_usage;
+ surface->texture->tex_usage = NOUVEAU_TEXTURE_USAGE_LINEAR;
+ mt->shadow_tex = screen->texture_create(screen, surface->texture);
+ surface->texture->tex_usage = old_tex_usage;
+
+ assert(mt->shadow_tex->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR);
+ mt->shadow_surface = screen->get_tex_surface
+ (
+ screen, mt->shadow_tex,
+ surface->face, surface->level, surface->zslice,
+ surface->usage
+ );
+ }
+
+ surface_to_map = mt->shadow_surface;
+ }
+ else
+ surface_to_map = surface;
+
+ assert(surface_to_map);
+
+ map = ws->buffer_map(ws, surface_to_map->buffer, flags);
+ if (!map)
+ return NULL;
+
+ return map + surface_to_map->offset;
+}
+
+static void
+nv30_surface_unmap(struct pipe_screen *screen, struct pipe_surface *surface)
+{
+ struct pipe_winsys *ws = screen->winsys;
+ struct pipe_surface *surface_to_unmap;
+
+ /* TODO: Copy from shadow just before push buffer is flushed instead.
+ There are probably some programs that map/unmap excessively
+ before rendering. */
+ if (!(surface->texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) {
+ struct nv30_miptree *mt = (struct nv30_miptree *)surface->texture;
+
+ assert(mt->shadow_tex);
+
+ surface_to_unmap = mt->shadow_surface;
+ }
+ else
+ surface_to_unmap = surface;
+
+ assert(surface_to_unmap);
+
+ ws->buffer_unmap(ws, surface_to_unmap->buffer);
+
+ if (surface_to_unmap != surface) {
+ struct nv30_screen *nvscreen = nv30_screen(screen);
+
+ nvscreen->nvws->surface_copy(nvscreen->nvws,
+ surface, 0, 0,
+ surface_to_unmap, 0, 0,
+ surface->width, surface->height);
+ }
+}
+
+static void
+nv30_screen_destroy(struct pipe_screen *pscreen)
+{
+ struct nv30_screen *screen = nv30_screen(pscreen);
+ struct nouveau_winsys *nvws = screen->nvws;
+
+ nvws->res_free(&screen->vp_exec_heap);
+ nvws->res_free(&screen->vp_data_heap);
+ nvws->res_free(&screen->query_heap);
+ nvws->notifier_free(&screen->query);
+ nvws->notifier_free(&screen->sync);
+ nvws->grobj_free(&screen->rankine);
+
+ FREE(pscreen);
+}
+
+struct pipe_screen *
+nv30_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws)
+{
+ struct nv30_screen *screen = CALLOC_STRUCT(nv30_screen);
+ struct nouveau_stateobj *so;
+ unsigned rankine_class = 0;
+ unsigned chipset = nvws->channel->device->chipset;
+ int ret, i;
+
+ if (!screen)
+ return NULL;
+ screen->nvws = nvws;
+
+ /* 3D object */
+ switch (chipset & 0xf0) {
+ case 0x30:
+ if (NV30TCL_CHIPSET_3X_MASK & (1 << (chipset & 0x0f)))
+ rankine_class = 0x0397;
+ else
+ if (NV34TCL_CHIPSET_3X_MASK & (1 << (chipset & 0x0f)))
+ rankine_class = 0x0697;
+ else
+ if (NV35TCL_CHIPSET_3X_MASK & (1 << (chipset & 0x0f)))
+ rankine_class = 0x0497;
+ break;
+ default:
+ break;
+ }
+
+ if (!rankine_class) {
+ NOUVEAU_ERR("Unknown nv3x chipset: nv%02x\n", chipset);
+ return NULL;
+ }
+
+ ret = nvws->grobj_alloc(nvws, rankine_class, &screen->rankine);
+ if (ret) {
+ NOUVEAU_ERR("Error creating 3D object: %d\n", ret);
+ return FALSE;
+ }
+
+ /* Notifier for sync purposes */
+ ret = nvws->notifier_alloc(nvws, 1, &screen->sync);
+ if (ret) {
+ NOUVEAU_ERR("Error creating notifier object: %d\n", ret);
+ nv30_screen_destroy(&screen->pipe);
+ return NULL;
+ }
+
+ /* Query objects */
+ ret = nvws->notifier_alloc(nvws, 32, &screen->query);
+ if (ret) {
+ NOUVEAU_ERR("Error initialising query objects: %d\n", ret);
+ nv30_screen_destroy(&screen->pipe);
+ return NULL;
+ }
+
+ ret = nvws->res_init(&screen->query_heap, 0, 32);
+ if (ret) {
+ NOUVEAU_ERR("Error initialising query object heap: %d\n", ret);
+ nv30_screen_destroy(&screen->pipe);
+ return NULL;
+ }
+
+ /* Vtxprog resources */
+ if (nvws->res_init(&screen->vp_exec_heap, 0, 256) ||
+ nvws->res_init(&screen->vp_data_heap, 0, 256)) {
+ nv30_screen_destroy(&screen->pipe);
+ return NULL;
+ }
+
+ /* Static rankine initialisation */
+ so = so_new(128, 0);
+ so_method(so, screen->rankine, NV34TCL_DMA_NOTIFY, 1);
+ so_data (so, screen->sync->handle);
+ so_method(so, screen->rankine, NV34TCL_DMA_TEXTURE0, 2);
+ so_data (so, nvws->channel->vram->handle);
+ so_data (so, nvws->channel->gart->handle);
+ so_method(so, screen->rankine, NV34TCL_DMA_COLOR1, 1);
+ so_data (so, nvws->channel->vram->handle);
+ so_method(so, screen->rankine, NV34TCL_DMA_COLOR0, 2);
+ so_data (so, nvws->channel->vram->handle);
+ so_data (so, nvws->channel->vram->handle);
+ so_method(so, screen->rankine, NV34TCL_DMA_VTXBUF0, 2);
+ so_data (so, nvws->channel->vram->handle);
+ so_data (so, nvws->channel->gart->handle);
+/* so_method(so, screen->rankine, NV34TCL_DMA_FENCE, 2);
+ so_data (so, 0);
+ so_data (so, screen->query->handle);*/
+ so_method(so, screen->rankine, NV34TCL_DMA_IN_MEMORY7, 1);
+ so_data (so, nvws->channel->vram->handle);
+ so_method(so, screen->rankine, NV34TCL_DMA_IN_MEMORY8, 1);
+ so_data (so, nvws->channel->vram->handle);
+
+ for (i=1; i<8; i++) {
+ so_method(so, screen->rankine, NV34TCL_VIEWPORT_CLIP_HORIZ(i), 1);
+ so_data (so, 0);
+ so_method(so, screen->rankine, NV34TCL_VIEWPORT_CLIP_VERT(i), 1);
+ so_data (so, 0);
+ }
+
+ so_method(so, screen->rankine, 0x220, 1);
+ so_data (so, 1);
+
+ so_method(so, screen->rankine, 0x03b0, 1);
+ so_data (so, 0x00100000);
+ so_method(so, screen->rankine, 0x1454, 1);
+ so_data (so, 0);
+ so_method(so, screen->rankine, 0x1d80, 1);
+ so_data (so, 3);
+ so_method(so, screen->rankine, 0x1450, 1);
+ so_data (so, 0x00030004);
+
+ /* NEW */
+ so_method(so, screen->rankine, 0x1e98, 1);
+ so_data (so, 0);
+ so_method(so, screen->rankine, 0x17e0, 3);
+ so_data (so, fui(0.0));
+ so_data (so, fui(0.0));
+ so_data (so, fui(1.0));
+ so_method(so, screen->rankine, 0x1f80, 16);
+ for (i=0; i<16; i++) {
+ so_data (so, (i==8) ? 0x0000ffff : 0);
+ }
+
+ so_method(so, screen->rankine, 0x120, 3);
+ so_data (so, 0);
+ so_data (so, 1);
+ so_data (so, 2);
+
+ so_method(so, screen->rankine, 0x1d88, 1);
+ so_data (so, 0x00001200);
+
+ so_method(so, screen->rankine, NV34TCL_RC_ENABLE, 1);
+ so_data (so, 0);
+
+ so_method(so, screen->rankine, NV34TCL_DEPTH_RANGE_NEAR, 2);
+ so_data (so, fui(0.0));
+ so_data (so, fui(1.0));
+
+ so_method(so, screen->rankine, NV34TCL_MULTISAMPLE_CONTROL, 1);
+ so_data (so, 0xffff0000);
+
+ /* enables use of vp rather than fixed-function somehow */
+ so_method(so, screen->rankine, 0x1e94, 1);
+ so_data (so, 0x13);
+
+ so_emit(nvws, so);
+ so_ref(NULL, &so);
+ nvws->push_flush(nvws, 0, NULL);
+
+ screen->pipe.winsys = ws;
+ screen->pipe.destroy = nv30_screen_destroy;
+
+ screen->pipe.get_name = nv30_screen_get_name;
+ screen->pipe.get_vendor = nv30_screen_get_vendor;
+ screen->pipe.get_param = nv30_screen_get_param;
+ screen->pipe.get_paramf = nv30_screen_get_paramf;
+
+ screen->pipe.is_format_supported = nv30_screen_surface_format_supported;
+
+ screen->pipe.surface_map = nv30_surface_map;
+ screen->pipe.surface_unmap = nv30_surface_unmap;
+
+ nv30_screen_init_miptree_functions(&screen->pipe);
+
+ return &screen->pipe;
+}
+
diff --git a/src/gallium/drivers/nv30/nv30_screen.h b/src/gallium/drivers/nv30/nv30_screen.h
new file mode 100644
index 00000000000..b7ddc2a9594
--- /dev/null
+++ b/src/gallium/drivers/nv30/nv30_screen.h
@@ -0,0 +1,35 @@
+#ifndef __NV30_SCREEN_H__
+#define __NV30_SCREEN_H__
+
+#include "pipe/p_screen.h"
+
+struct nv30_screen {
+ struct pipe_screen pipe;
+
+ struct nouveau_winsys *nvws;
+
+ unsigned cur_pctx;
+
+ /* HW graphics objects */
+ struct nouveau_grobj *rankine;
+ struct nouveau_notifier *sync;
+
+ /* Query object resources */
+ struct nouveau_notifier *query;
+ struct nouveau_resource *query_heap;
+
+ /* Vtxprog resources */
+ struct nouveau_resource *vp_exec_heap;
+ struct nouveau_resource *vp_data_heap;
+
+ /* Current 3D state of channel */
+ struct nouveau_stateobj *state[NV30_STATE_MAX];
+};
+
+static INLINE struct nv30_screen *
+nv30_screen(struct pipe_screen *screen)
+{
+ return (struct nv30_screen *)screen;
+}
+
+#endif
diff --git a/src/gallium/drivers/nv30/nv30_shader.h b/src/gallium/drivers/nv30/nv30_shader.h
new file mode 100644
index 00000000000..dd3a36f78f3
--- /dev/null
+++ b/src/gallium/drivers/nv30/nv30_shader.h
@@ -0,0 +1,490 @@
+#ifndef __NV30_SHADER_H__
+#define __NV30_SHADER_H__
+
+/* Vertex programs instruction set
+ *
+ * 128bit opcodes, split into 4 32-bit ones for ease of use.
+ *
+ * Non-native instructions
+ * ABS - MOV + NV40_VP_INST0_DEST_ABS
+ * POW - EX2 + MUL + LG2
+ * SUB - ADD, second source negated
+ * SWZ - MOV
+ * XPD -
+ *
+ * Register access
+ * - Only one INPUT can be accessed per-instruction (move extras into TEMPs)
+ * - Only one CONST can be accessed per-instruction (move extras into TEMPs)
+ *
+ * Relative Addressing
+ * According to the value returned for
+ * MAX_PROGRAM_NATIVE_ADDRESS_REGISTERS_ARB
+ *
+ * there are only two address registers available. The destination in the
+ * ARL instruction is set to TEMP <n> (The temp isn't actually written).
+ *
+ * When using vanilla ARB_v_p, the proprietary driver will squish both the
+ * available ADDRESS regs into the first hardware reg in the X and Y
+ * components.
+ *
+ * To use an address reg as an index into consts, the CONST_SRC is set to
+ * (const_base + offset) and INDEX_CONST is set.
+ *
+ * To access the second address reg use ADDR_REG_SELECT_1. A particular
+ * component of the address regs is selected with ADDR_SWZ.
+ *
+ * Only one address register can be accessed per instruction.
+ *
+ * Conditional execution (see NV_vertex_program{2,3} for details) Conditional
+ * execution of an instruction is enabled by setting COND_TEST_ENABLE, and
+ * selecting the condition which will allow the test to pass with
+ * COND_{FL,LT,...}. It is possible to swizzle the values in the condition
+ * register, which allows for testing against an individual component.
+ *
+ * Branching:
+ *
+ * The BRA/CAL instructions seem to follow a slightly different opcode
+ * layout. The destination instruction ID (IADDR) overlaps a source field.
+ * Instruction ID's seem to be numbered based on the UPLOAD_FROM_ID FIFO
+ * command, and is incremented automatically on each UPLOAD_INST FIFO
+ * command.
+ *
+ * Conditional branching is achieved by using the condition tests described
+ * above. There doesn't appear to be dedicated looping instructions, but
+ * this can be done using a temp reg + conditional branching.
+ *
+ * Subroutines may be uploaded before the main program itself, but the first
+ * executed instruction is determined by the PROGRAM_START_ID FIFO command.
+ *
+ */
+
+/* DWORD 0 */
+
+#define NV30_VP_INST_ADDR_REG_SELECT_1 (1 << 24)
+#define NV30_VP_INST_SRC2_ABS (1 << 23) /* guess */
+#define NV30_VP_INST_SRC1_ABS (1 << 22) /* guess */
+#define NV30_VP_INST_SRC0_ABS (1 << 21) /* guess */
+#define NV30_VP_INST_VEC_RESULT (1 << 20)
+#define NV30_VP_INST_DEST_TEMP_ID_SHIFT 16
+#define NV30_VP_INST_DEST_TEMP_ID_MASK (0x0F << 16)
+#define NV30_VP_INST_COND_UPDATE_ENABLE (1<<15)
+#define NV30_VP_INST_VEC_DEST_TEMP_MASK (0xF << 16)
+#define NV30_VP_INST_COND_TEST_ENABLE (1<<14)
+#define NV30_VP_INST_COND_SHIFT 11
+#define NV30_VP_INST_COND_MASK (0x07 << 11)
+# define NV30_VP_INST_COND_FL 0 /* guess */
+# define NV30_VP_INST_COND_LT 1
+# define NV30_VP_INST_COND_EQ 2
+# define NV30_VP_INST_COND_LE 3
+# define NV30_VP_INST_COND_GT 4
+# define NV30_VP_INST_COND_NE 5
+# define NV30_VP_INST_COND_GE 6
+# define NV30_VP_INST_COND_TR 7 /* guess */
+#define NV30_VP_INST_COND_SWZ_X_SHIFT 9
+#define NV30_VP_INST_COND_SWZ_X_MASK (0x03 << 9)
+#define NV30_VP_INST_COND_SWZ_Y_SHIFT 7
+#define NV30_VP_INST_COND_SWZ_Y_MASK (0x03 << 7)
+#define NV30_VP_INST_COND_SWZ_Z_SHIFT 5
+#define NV30_VP_INST_COND_SWZ_Z_MASK (0x03 << 5)
+#define NV30_VP_INST_COND_SWZ_W_SHIFT 3
+#define NV30_VP_INST_COND_SWZ_W_MASK (0x03 << 3)
+#define NV30_VP_INST_COND_SWZ_ALL_SHIFT 3
+#define NV30_VP_INST_COND_SWZ_ALL_MASK (0xFF << 3)
+#define NV30_VP_INST_ADDR_SWZ_SHIFT 1
+#define NV30_VP_INST_ADDR_SWZ_MASK (0x03 << 1)
+#define NV30_VP_INST_SCA_OPCODEH_SHIFT 0
+#define NV30_VP_INST_SCA_OPCODEH_MASK (0x01 << 0)
+
+/* DWORD 1 */
+#define NV30_VP_INST_SCA_OPCODEL_SHIFT 28
+#define NV30_VP_INST_SCA_OPCODEL_MASK (0x0F << 28)
+# define NV30_VP_INST_OP_NOP 0x00
+# define NV30_VP_INST_OP_RCP 0x02
+# define NV30_VP_INST_OP_RCC 0x03
+# define NV30_VP_INST_OP_RSQ 0x04
+# define NV30_VP_INST_OP_EXP 0x05
+# define NV30_VP_INST_OP_LOG 0x06
+# define NV30_VP_INST_OP_LIT 0x07
+# define NV30_VP_INST_OP_BRA 0x09
+# define NV30_VP_INST_OP_CAL 0x0B
+# define NV30_VP_INST_OP_RET 0x0C
+# define NV30_VP_INST_OP_LG2 0x0D
+# define NV30_VP_INST_OP_EX2 0x0E
+# define NV30_VP_INST_OP_SIN 0x0F
+# define NV30_VP_INST_OP_COS 0x10
+#define NV30_VP_INST_VEC_OPCODE_SHIFT 23
+#define NV30_VP_INST_VEC_OPCODE_MASK (0x1F << 23)
+# define NV30_VP_INST_OP_NOPV 0x00
+# define NV30_VP_INST_OP_MOV 0x01
+# define NV30_VP_INST_OP_MUL 0x02
+# define NV30_VP_INST_OP_ADD 0x03
+# define NV30_VP_INST_OP_MAD 0x04
+# define NV30_VP_INST_OP_DP3 0x05
+# define NV30_VP_INST_OP_DP4 0x07
+# define NV30_VP_INST_OP_DPH 0x06
+# define NV30_VP_INST_OP_DST 0x08
+# define NV30_VP_INST_OP_MIN 0x09
+# define NV30_VP_INST_OP_MAX 0x0A
+# define NV30_VP_INST_OP_SLT 0x0B
+# define NV30_VP_INST_OP_SGE 0x0C
+# define NV30_VP_INST_OP_ARL 0x0D
+# define NV30_VP_INST_OP_FRC 0x0E
+# define NV30_VP_INST_OP_FLR 0x0F
+# define NV30_VP_INST_OP_SEQ 0x10
+# define NV30_VP_INST_OP_SFL 0x11
+# define NV30_VP_INST_OP_SGT 0x12
+# define NV30_VP_INST_OP_SLE 0x13
+# define NV30_VP_INST_OP_SNE 0x14
+# define NV30_VP_INST_OP_STR 0x15
+# define NV30_VP_INST_OP_SSG 0x16
+# define NV30_VP_INST_OP_ARR 0x17
+# define NV30_VP_INST_OP_ARA 0x18
+#define NV30_VP_INST_CONST_SRC_SHIFT 14
+#define NV30_VP_INST_CONST_SRC_MASK (0xFF << 14)
+#define NV30_VP_INST_INPUT_SRC_SHIFT 9 /*NV20*/
+#define NV30_VP_INST_INPUT_SRC_MASK (0x0F << 9) /*NV20*/
+# define NV30_VP_INST_IN_POS 0 /* These seem to match the bindings specified in */
+# define NV30_VP_INST_IN_WEIGHT 1 /* the ARB_v_p spec (2.14.3.1) */
+# define NV30_VP_INST_IN_NORMAL 2
+# define NV30_VP_INST_IN_COL0 3 /* Should probably confirm them all though */
+# define NV30_VP_INST_IN_COL1 4
+# define NV30_VP_INST_IN_FOGC 5
+# define NV30_VP_INST_IN_TC0 8
+# define NV30_VP_INST_IN_TC(n) (8+n)
+#define NV30_VP_INST_SRC0H_SHIFT 0 /*NV20*/
+#define NV30_VP_INST_SRC0H_MASK (0x1FF << 0) /*NV20*/
+
+/* Please note: the IADDR fields overlap other fields because they are used
+ * only for branch instructions. See Branching: label above
+ *
+ * DWORD 2
+ */
+#define NV30_VP_INST_SRC0L_SHIFT 26 /*NV20*/
+#define NV30_VP_INST_SRC0L_MASK (0x3F <<26) /* NV30_VP_SRC0_LOW_MASK << 26 */
+#define NV30_VP_INST_SRC1_SHIFT 11 /*NV20*/
+#define NV30_VP_INST_SRC1_MASK (0x7FFF<<11) /*NV20*/
+#define NV30_VP_INST_SRC2H_SHIFT 0 /*NV20*/
+#define NV30_VP_INST_SRC2H_MASK (0x7FF << 0) /* NV30_VP_SRC2_HIGH_MASK >> 4*/
+#define NV30_VP_INST_IADDR_SHIFT 2
+#define NV30_VP_INST_IADDR_MASK (0xF << 28) /* NV30_VP_SRC2_LOW_MASK << 28 */
+
+/* DWORD 3 */
+#define NV30_VP_INST_SRC2L_SHIFT 28 /*NV20*/
+#define NV30_VP_INST_SRC2L_MASK (0x0F <<28) /*NV20*/
+#define NV30_VP_INST_STEMP_WRITEMASK_SHIFT 24
+#define NV30_VP_INST_STEMP_WRITEMASK_MASK (0x0F << 24)
+#define NV30_VP_INST_VTEMP_WRITEMASK_SHIFT 20
+#define NV30_VP_INST_VTEMP_WRITEMASK_MASK (0x0F << 20)
+#define NV30_VP_INST_SDEST_WRITEMASK_SHIFT 16
+#define NV30_VP_INST_SDEST_WRITEMASK_MASK (0x0F << 16)
+#define NV30_VP_INST_VDEST_WRITEMASK_SHIFT 12 /*NV20*/
+#define NV30_VP_INST_VDEST_WRITEMASK_MASK (0x0F << 12) /*NV20*/
+#define NV30_VP_INST_DEST_SHIFT 2
+#define NV30_VP_INST_DEST_MASK (0x0F << 2)
+# define NV30_VP_INST_DEST_POS 0
+# define NV30_VP_INST_DEST_BFC0 1
+# define NV30_VP_INST_DEST_BFC1 2
+# define NV30_VP_INST_DEST_COL0 3
+# define NV30_VP_INST_DEST_COL1 4
+# define NV30_VP_INST_DEST_FOGC 5
+# define NV30_VP_INST_DEST_PSZ 6
+# define NV30_VP_INST_DEST_TC(n) (8+n)
+
+#define NV30_VP_INST_LAST (1 << 0)
+
+/* Useful to split the source selection regs into their pieces */
+#define NV30_VP_SRC0_HIGH_SHIFT 6
+#define NV30_VP_SRC0_HIGH_MASK 0x00007FC0
+#define NV30_VP_SRC0_LOW_MASK 0x0000003F
+#define NV30_VP_SRC2_HIGH_SHIFT 4
+#define NV30_VP_SRC2_HIGH_MASK 0x00007FF0
+#define NV30_VP_SRC2_LOW_MASK 0x0000000F
+
+
+/* Source-register definition - matches NV20 exactly */
+#define NV30_VP_SRC_NEGATE (1<<14)
+#define NV30_VP_SRC_SWZ_X_SHIFT 12
+#define NV30_VP_SRC_REG_SWZ_X_MASK (0x03 <<12)
+#define NV30_VP_SRC_SWZ_Y_SHIFT 10
+#define NV30_VP_SRC_REG_SWZ_Y_MASK (0x03 <<10)
+#define NV30_VP_SRC_SWZ_Z_SHIFT 8
+#define NV30_VP_SRC_REG_SWZ_Z_MASK (0x03 << 8)
+#define NV30_VP_SRC_SWZ_W_SHIFT 6
+#define NV30_VP_SRC_REG_SWZ_W_MASK (0x03 << 6)
+#define NV30_VP_SRC_REG_SWZ_ALL_SHIFT 6
+#define NV30_VP_SRC_REG_SWZ_ALL_MASK (0xFF << 6)
+#define NV30_VP_SRC_TEMP_SRC_SHIFT 2
+#define NV30_VP_SRC_REG_TEMP_ID_MASK (0x0F << 0)
+#define NV30_VP_SRC_REG_TYPE_SHIFT 0
+#define NV30_VP_SRC_REG_TYPE_MASK (0x03 << 0)
+#define NV30_VP_SRC_REG_TYPE_TEMP 1
+#define NV30_VP_SRC_REG_TYPE_INPUT 2
+#define NV30_VP_SRC_REG_TYPE_CONST 3 /* guess */
+
+/*
+ * Each fragment program opcode appears to be comprised of 4 32-bit values.
+ *
+ * 0 - Opcode, output reg/mask, ATTRIB source
+ * 1 - Source 0
+ * 2 - Source 1
+ * 3 - Source 2
+ *
+ * There appears to be no special difference between result regs and temp regs.
+ * result.color == R0.xyzw
+ * result.depth == R1.z
+ * When the fragprog contains instructions to write depth, NV30_TCL_PRIMITIVE_3D_UNK1D78=0
+ * otherwise it is set to 1.
+ *
+ * Constants are inserted directly after the instruction that uses them.
+ *
+ * It appears that it's not possible to use two input registers in one
+ * instruction as the input sourcing is done in the instruction dword
+ * and not the source selection dwords. As such instructions such as:
+ *
+ * ADD result.color, fragment.color, fragment.texcoord[0];
+ *
+ * must be split into two MOV's and then an ADD (nvidia does this) but
+ * I'm not sure why it's not just one MOV and then source the second input
+ * in the ADD instruction..
+ *
+ * Negation of the full source is done with NV30_FP_REG_NEGATE, arbitrary
+ * negation requires multiplication with a const.
+ *
+ * Arbitrary swizzling is supported with the exception of SWIZZLE_ZERO/SWIZZLE_ONE
+ * The temp/result regs appear to be initialised to (0.0, 0.0, 0.0, 0.0) as SWIZZLE_ZERO
+ * is implemented simply by not writing to the relevant components of the destination.
+ *
+ * Conditional execution
+ * TODO
+ *
+ * Non-native instructions:
+ * LIT
+ * LRP - MAD+MAD
+ * SUB - ADD, negate second source
+ * RSQ - LG2 + EX2
+ * POW - LG2 + MUL + EX2
+ * SCS - COS + SIN
+ * XPD
+ */
+
+//== Opcode / Destination selection ==
+#define NV30_FP_OP_PROGRAM_END (1 << 0)
+#define NV30_FP_OP_OUT_REG_SHIFT 1
+#define NV30_FP_OP_OUT_REG_MASK (31 << 1) /* uncertain */
+/* Needs to be set when writing outputs to get expected result.. */
+#define NV30_FP_OP_OUT_REG_HALF (1 << 7)
+#define NV30_FP_OP_COND_WRITE_ENABLE (1 << 8)
+#define NV30_FP_OP_OUTMASK_SHIFT 9
+#define NV30_FP_OP_OUTMASK_MASK (0xF << 9)
+# define NV30_FP_OP_OUT_X (1<<9)
+# define NV30_FP_OP_OUT_Y (1<<10)
+# define NV30_FP_OP_OUT_Z (1<<11)
+# define NV30_FP_OP_OUT_W (1<<12)
+/* Uncertain about these, especially the input_src values.. it's possible that
+ * they can be dynamically changed.
+ */
+#define NV30_FP_OP_INPUT_SRC_SHIFT 13
+#define NV30_FP_OP_INPUT_SRC_MASK (15 << 13)
+# define NV30_FP_OP_INPUT_SRC_POSITION 0x0
+# define NV30_FP_OP_INPUT_SRC_COL0 0x1
+# define NV30_FP_OP_INPUT_SRC_COL1 0x2
+# define NV30_FP_OP_INPUT_SRC_FOGC 0x3
+# define NV30_FP_OP_INPUT_SRC_TC0 0x4
+# define NV30_FP_OP_INPUT_SRC_TC(n) (0x4 + n)
+#define NV30_FP_OP_TEX_UNIT_SHIFT 17
+#define NV30_FP_OP_TEX_UNIT_MASK (0xF << 17) /* guess */
+#define NV30_FP_OP_PRECISION_SHIFT 22
+#define NV30_FP_OP_PRECISION_MASK (3 << 22)
+# define NV30_FP_PRECISION_FP32 0
+# define NV30_FP_PRECISION_FP16 1
+# define NV30_FP_PRECISION_FX12 2
+#define NV30_FP_OP_OPCODE_SHIFT 24
+#define NV30_FP_OP_OPCODE_MASK (0x3F << 24)
+# define NV30_FP_OP_OPCODE_NOP 0x00
+# define NV30_FP_OP_OPCODE_MOV 0x01
+# define NV30_FP_OP_OPCODE_MUL 0x02
+# define NV30_FP_OP_OPCODE_ADD 0x03
+# define NV30_FP_OP_OPCODE_MAD 0x04
+# define NV30_FP_OP_OPCODE_DP3 0x05
+# define NV30_FP_OP_OPCODE_DP4 0x06
+# define NV30_FP_OP_OPCODE_DST 0x07
+# define NV30_FP_OP_OPCODE_MIN 0x08
+# define NV30_FP_OP_OPCODE_MAX 0x09
+# define NV30_FP_OP_OPCODE_SLT 0x0A
+# define NV30_FP_OP_OPCODE_SGE 0x0B
+# define NV30_FP_OP_OPCODE_SLE 0x0C
+# define NV30_FP_OP_OPCODE_SGT 0x0D
+# define NV30_FP_OP_OPCODE_SNE 0x0E
+# define NV30_FP_OP_OPCODE_SEQ 0x0F
+# define NV30_FP_OP_OPCODE_FRC 0x10
+# define NV30_FP_OP_OPCODE_FLR 0x11
+# define NV30_FP_OP_OPCODE_KIL 0x12
+# define NV30_FP_OP_OPCODE_PK4B 0x13
+# define NV30_FP_OP_OPCODE_UP4B 0x14
+# define NV30_FP_OP_OPCODE_DDX 0x15 /* can only write XY */
+# define NV30_FP_OP_OPCODE_DDY 0x16 /* can only write XY */
+# define NV30_FP_OP_OPCODE_TEX 0x17
+# define NV30_FP_OP_OPCODE_TXP 0x18
+# define NV30_FP_OP_OPCODE_TXD 0x19
+# define NV30_FP_OP_OPCODE_RCP 0x1A
+# define NV30_FP_OP_OPCODE_RSQ 0x1B
+# define NV30_FP_OP_OPCODE_EX2 0x1C
+# define NV30_FP_OP_OPCODE_LG2 0x1D
+# define NV30_FP_OP_OPCODE_LIT 0x1E
+# define NV30_FP_OP_OPCODE_LRP 0x1F
+# define NV30_FP_OP_OPCODE_STR 0x20
+# define NV30_FP_OP_OPCODE_SFL 0x21
+# define NV30_FP_OP_OPCODE_COS 0x22
+# define NV30_FP_OP_OPCODE_SIN 0x23
+# define NV30_FP_OP_OPCODE_PK2H 0x24
+# define NV30_FP_OP_OPCODE_UP2H 0x25
+# define NV30_FP_OP_OPCODE_POW 0x26
+# define NV30_FP_OP_OPCODE_PK4UB 0x27
+# define NV30_FP_OP_OPCODE_UP4UB 0x28
+# define NV30_FP_OP_OPCODE_PK2US 0x29
+# define NV30_FP_OP_OPCODE_UP2US 0x2A
+# define NV30_FP_OP_OPCODE_DP2A 0x2E
+# define NV30_FP_OP_OPCODE_TXB 0x31
+# define NV30_FP_OP_OPCODE_RFL 0x36
+# define NV30_FP_OP_OPCODE_DIV 0x3A
+#define NV30_FP_OP_OUT_SAT (1 << 31)
+
+/* high order bits of SRC0 */
+#define NV30_FP_OP_OUT_ABS (1 << 29)
+#define NV30_FP_OP_COND_SWZ_W_SHIFT 27
+#define NV30_FP_OP_COND_SWZ_W_MASK (3 << 27)
+#define NV30_FP_OP_COND_SWZ_Z_SHIFT 25
+#define NV30_FP_OP_COND_SWZ_Z_MASK (3 << 25)
+#define NV30_FP_OP_COND_SWZ_Y_SHIFT 23
+#define NV30_FP_OP_COND_SWZ_Y_MASK (3 << 23)
+#define NV30_FP_OP_COND_SWZ_X_SHIFT 21
+#define NV30_FP_OP_COND_SWZ_X_MASK (3 << 21)
+#define NV30_FP_OP_COND_SWZ_ALL_SHIFT 21
+#define NV30_FP_OP_COND_SWZ_ALL_MASK (0xFF << 21)
+#define NV30_FP_OP_COND_SHIFT 18
+#define NV30_FP_OP_COND_MASK (0x07 << 18)
+# define NV30_FP_OP_COND_FL 0
+# define NV30_FP_OP_COND_LT 1
+# define NV30_FP_OP_COND_EQ 2
+# define NV30_FP_OP_COND_LE 3
+# define NV30_FP_OP_COND_GT 4
+# define NV30_FP_OP_COND_NE 5
+# define NV30_FP_OP_COND_GE 6
+# define NV30_FP_OP_COND_TR 7
+
+/* high order bits of SRC1 */
+#define NV30_FP_OP_DST_SCALE_SHIFT 28
+#define NV30_FP_OP_DST_SCALE_MASK (3 << 28)
+#define NV30_FP_OP_DST_SCALE_1X 0
+#define NV30_FP_OP_DST_SCALE_2X 1
+#define NV30_FP_OP_DST_SCALE_4X 2
+#define NV30_FP_OP_DST_SCALE_8X 3
+#define NV30_FP_OP_DST_SCALE_INV_2X 5
+#define NV30_FP_OP_DST_SCALE_INV_4X 6
+#define NV30_FP_OP_DST_SCALE_INV_8X 7
+
+
+/* high order bits of SRC2 */
+#define NV30_FP_OP_INDEX_INPUT (1 << 30)
+
+//== Register selection ==
+#define NV30_FP_REG_TYPE_SHIFT 0
+#define NV30_FP_REG_TYPE_MASK (3 << 0)
+# define NV30_FP_REG_TYPE_TEMP 0
+# define NV30_FP_REG_TYPE_INPUT 1
+# define NV30_FP_REG_TYPE_CONST 2
+#define NV30_FP_REG_SRC_SHIFT 2 /* uncertain */
+#define NV30_FP_REG_SRC_MASK (31 << 2)
+#define NV30_FP_REG_SRC_HALF (1 << 8)
+#define NV30_FP_REG_SWZ_ALL_SHIFT 9
+#define NV30_FP_REG_SWZ_ALL_MASK (255 << 9)
+#define NV30_FP_REG_SWZ_X_SHIFT 9
+#define NV30_FP_REG_SWZ_X_MASK (3 << 9)
+#define NV30_FP_REG_SWZ_Y_SHIFT 11
+#define NV30_FP_REG_SWZ_Y_MASK (3 << 11)
+#define NV30_FP_REG_SWZ_Z_SHIFT 13
+#define NV30_FP_REG_SWZ_Z_MASK (3 << 13)
+#define NV30_FP_REG_SWZ_W_SHIFT 15
+#define NV30_FP_REG_SWZ_W_MASK (3 << 15)
+# define NV30_FP_SWIZZLE_X 0
+# define NV30_FP_SWIZZLE_Y 1
+# define NV30_FP_SWIZZLE_Z 2
+# define NV30_FP_SWIZZLE_W 3
+#define NV30_FP_REG_NEGATE (1 << 17)
+
+#define NV30SR_NONE 0
+#define NV30SR_OUTPUT 1
+#define NV30SR_INPUT 2
+#define NV30SR_TEMP 3
+#define NV30SR_CONST 4
+
+struct nv30_sreg {
+ int type;
+ int index;
+
+ int dst_scale;
+
+ int negate;
+ int abs;
+ int swz[4];
+
+ int cc_update;
+ int cc_update_reg;
+ int cc_test;
+ int cc_test_reg;
+ int cc_swz[4];
+};
+
+static INLINE struct nv30_sreg
+nv30_sr(int type, int index)
+{
+ struct nv30_sreg temp = {
+ .type = type,
+ .index = index,
+ .dst_scale = DEF_SCALE,
+ .abs = 0,
+ .negate = 0,
+ .swz = { 0, 1, 2, 3 },
+ .cc_update = 0,
+ .cc_update_reg = 0,
+ .cc_test = DEF_CTEST,
+ .cc_test_reg = 0,
+ .cc_swz = { 0, 1, 2, 3 },
+ };
+ return temp;
+}
+
+static INLINE struct nv30_sreg
+nv30_sr_swz(struct nv30_sreg src, int x, int y, int z, int w)
+{
+ struct nv30_sreg dst = src;
+
+ dst.swz[SWZ_X] = src.swz[x];
+ dst.swz[SWZ_Y] = src.swz[y];
+ dst.swz[SWZ_Z] = src.swz[z];
+ dst.swz[SWZ_W] = src.swz[w];
+ return dst;
+}
+
+static INLINE struct nv30_sreg
+nv30_sr_neg(struct nv30_sreg src)
+{
+ src.negate = !src.negate;
+ return src;
+}
+
+static INLINE struct nv30_sreg
+nv30_sr_abs(struct nv30_sreg src)
+{
+ src.abs = 1;
+ return src;
+}
+
+static INLINE struct nv30_sreg
+nv30_sr_scale(struct nv30_sreg src, int scale)
+{
+ src.dst_scale = scale;
+ return src;
+}
+
+#endif
diff --git a/src/gallium/drivers/nv30/nv30_state.c b/src/gallium/drivers/nv30/nv30_state.c
new file mode 100644
index 00000000000..fc66075c83f
--- /dev/null
+++ b/src/gallium/drivers/nv30/nv30_state.c
@@ -0,0 +1,724 @@
+#include "pipe/p_state.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_inlines.h"
+
+#include "tgsi/tgsi_parse.h"
+
+#include "nv30_context.h"
+#include "nv30_state.h"
+
+static void *
+nv30_blend_state_create(struct pipe_context *pipe,
+ const struct pipe_blend_state *cso)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+ struct nouveau_grobj *rankine = nv30->screen->rankine;
+ struct nv30_blend_state *bso = CALLOC(1, sizeof(*bso));
+ struct nouveau_stateobj *so = so_new(16, 0);
+
+ if (cso->blend_enable) {
+ so_method(so, rankine, NV34TCL_BLEND_FUNC_ENABLE, 3);
+ so_data (so, 1);
+ so_data (so, (nvgl_blend_func(cso->alpha_src_factor) << 16) |
+ nvgl_blend_func(cso->rgb_src_factor));
+ so_data (so, nvgl_blend_func(cso->alpha_dst_factor) << 16 |
+ nvgl_blend_func(cso->rgb_dst_factor));
+ so_method(so, rankine, NV34TCL_BLEND_EQUATION, 1);
+ so_data (so, nvgl_blend_eqn(cso->alpha_func) << 16 |
+ nvgl_blend_eqn(cso->rgb_func));
+ } else {
+ so_method(so, rankine, NV34TCL_BLEND_FUNC_ENABLE, 1);
+ so_data (so, 0);
+ }
+
+ so_method(so, rankine, NV34TCL_COLOR_MASK, 1);
+ so_data (so, (((cso->colormask & PIPE_MASK_A) ? (0x01 << 24) : 0) |
+ ((cso->colormask & PIPE_MASK_R) ? (0x01 << 16) : 0) |
+ ((cso->colormask & PIPE_MASK_G) ? (0x01 << 8) : 0) |
+ ((cso->colormask & PIPE_MASK_B) ? (0x01 << 0) : 0)));
+
+ if (cso->logicop_enable) {
+ so_method(so, rankine, NV34TCL_COLOR_LOGIC_OP_ENABLE, 2);
+ so_data (so, 1);
+ so_data (so, nvgl_logicop_func(cso->logicop_func));
+ } else {
+ so_method(so, rankine, NV34TCL_COLOR_LOGIC_OP_ENABLE, 1);
+ so_data (so, 0);
+ }
+
+ so_method(so, rankine, NV34TCL_DITHER_ENABLE, 1);
+ so_data (so, cso->dither ? 1 : 0);
+
+ so_ref(so, &bso->so);
+ bso->pipe = *cso;
+ return (void *)bso;
+}
+
+static void
+nv30_blend_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+
+ nv30->blend = hwcso;
+ nv30->dirty |= NV30_NEW_BLEND;
+}
+
+static void
+nv30_blend_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv30_blend_state *bso = hwcso;
+
+ so_ref(NULL, &bso->so);
+ FREE(bso);
+}
+
+
+static INLINE unsigned
+wrap_mode(unsigned wrap) {
+ unsigned ret;
+
+ switch (wrap) {
+ case PIPE_TEX_WRAP_REPEAT:
+ ret = NV34TCL_TX_WRAP_S_REPEAT;
+ break;
+ case PIPE_TEX_WRAP_MIRROR_REPEAT:
+ ret = NV34TCL_TX_WRAP_S_MIRRORED_REPEAT;
+ break;
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+ ret = NV34TCL_TX_WRAP_S_CLAMP_TO_EDGE;
+ break;
+ case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+ ret = NV34TCL_TX_WRAP_S_CLAMP_TO_BORDER;
+ break;
+ case PIPE_TEX_WRAP_CLAMP:
+ ret = NV34TCL_TX_WRAP_S_CLAMP;
+ break;
+/* case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+ ret = NV34TCL_TX_WRAP_S_MIRROR_CLAMP_TO_EDGE;
+ break;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+ ret = NV34TCL_TX_WRAP_S_MIRROR_CLAMP_TO_BORDER;
+ break;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP:
+ ret = NV34TCL_TX_WRAP_S_MIRROR_CLAMP;
+ break;*/
+ default:
+ NOUVEAU_ERR("unknown wrap mode: %d\n", wrap);
+ ret = NV34TCL_TX_WRAP_S_REPEAT;
+ break;
+ }
+
+ return ret >> NV34TCL_TX_WRAP_S_SHIFT;
+}
+
+static void *
+nv30_sampler_state_create(struct pipe_context *pipe,
+ const struct pipe_sampler_state *cso)
+{
+ struct nv30_sampler_state *ps;
+ uint32_t filter = 0;
+
+ ps = MALLOC(sizeof(struct nv30_sampler_state));
+
+ ps->fmt = 0;
+ /* TODO: Not all RECTs formats have this bit set, bits 15-8 of format
+ are the tx format to use. We should store normalized coord flag
+ in sampler state structure, and set appropriate format in
+ nvxx_fragtex_build()
+ */
+ /*NV34TCL_TX_FORMAT_RECT*/
+ /*if (!cso->normalized_coords) {
+ ps->fmt |= (1<<14) ;
+ }*/
+
+ ps->wrap = ((wrap_mode(cso->wrap_s) << NV34TCL_TX_WRAP_S_SHIFT) |
+ (wrap_mode(cso->wrap_t) << NV34TCL_TX_WRAP_T_SHIFT) |
+ (wrap_mode(cso->wrap_r) << NV34TCL_TX_WRAP_R_SHIFT));
+
+ ps->en = 0;
+
+ if (cso->max_anisotropy >= 8.0) {
+ ps->en |= NV34TCL_TX_ENABLE_ANISO_8X;
+ } else
+ if (cso->max_anisotropy >= 4.0) {
+ ps->en |= NV34TCL_TX_ENABLE_ANISO_4X;
+ } else
+ if (cso->max_anisotropy >= 2.0) {
+ ps->en |= NV34TCL_TX_ENABLE_ANISO_2X;
+ }
+
+ switch (cso->mag_img_filter) {
+ case PIPE_TEX_FILTER_LINEAR:
+ filter |= NV34TCL_TX_FILTER_MAGNIFY_LINEAR;
+ break;
+ case PIPE_TEX_FILTER_NEAREST:
+ default:
+ filter |= NV34TCL_TX_FILTER_MAGNIFY_NEAREST;
+ break;
+ }
+
+ switch (cso->min_img_filter) {
+ case PIPE_TEX_FILTER_LINEAR:
+ switch (cso->min_mip_filter) {
+ case PIPE_TEX_MIPFILTER_NEAREST:
+ filter |= NV34TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST;
+ break;
+ case PIPE_TEX_MIPFILTER_LINEAR:
+ filter |= NV34TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR;
+ break;
+ case PIPE_TEX_MIPFILTER_NONE:
+ default:
+ filter |= NV34TCL_TX_FILTER_MINIFY_LINEAR;
+ break;
+ }
+ break;
+ case PIPE_TEX_FILTER_NEAREST:
+ default:
+ switch (cso->min_mip_filter) {
+ case PIPE_TEX_MIPFILTER_NEAREST:
+ filter |= NV34TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST;
+ break;
+ case PIPE_TEX_MIPFILTER_LINEAR:
+ filter |= NV34TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR;
+ break;
+ case PIPE_TEX_MIPFILTER_NONE:
+ default:
+ filter |= NV34TCL_TX_FILTER_MINIFY_NEAREST;
+ break;
+ }
+ break;
+ }
+
+ ps->filt = filter;
+
+ {
+ float limit;
+
+ limit = CLAMP(cso->lod_bias, -16.0, 15.0);
+ ps->filt |= (int)(cso->lod_bias * 256.0) & 0x1fff;
+
+ limit = CLAMP(cso->max_lod, 0.0, 15.0);
+ ps->en |= (int)(limit) << 14 /*NV34TCL_TX_ENABLE_MIPMAP_MAX_LOD_SHIFT*/;
+
+ limit = CLAMP(cso->min_lod, 0.0, 15.0);
+ ps->en |= (int)(limit) << 26 /*NV34TCL_TX_ENABLE_MIPMAP_MIN_LOD_SHIFT*/;
+ }
+
+ if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
+ switch (cso->compare_func) {
+ case PIPE_FUNC_NEVER:
+ ps->wrap |= NV34TCL_TX_WRAP_RCOMP_NEVER;
+ break;
+ case PIPE_FUNC_GREATER:
+ ps->wrap |= NV34TCL_TX_WRAP_RCOMP_GREATER;
+ break;
+ case PIPE_FUNC_EQUAL:
+ ps->wrap |= NV34TCL_TX_WRAP_RCOMP_EQUAL;
+ break;
+ case PIPE_FUNC_GEQUAL:
+ ps->wrap |= NV34TCL_TX_WRAP_RCOMP_GEQUAL;
+ break;
+ case PIPE_FUNC_LESS:
+ ps->wrap |= NV34TCL_TX_WRAP_RCOMP_LESS;
+ break;
+ case PIPE_FUNC_NOTEQUAL:
+ ps->wrap |= NV34TCL_TX_WRAP_RCOMP_NOTEQUAL;
+ break;
+ case PIPE_FUNC_LEQUAL:
+ ps->wrap |= NV34TCL_TX_WRAP_RCOMP_LEQUAL;
+ break;
+ case PIPE_FUNC_ALWAYS:
+ ps->wrap |= NV34TCL_TX_WRAP_RCOMP_ALWAYS;
+ break;
+ default:
+ break;
+ }
+ }
+
+ ps->bcol = ((float_to_ubyte(cso->border_color[3]) << 24) |
+ (float_to_ubyte(cso->border_color[0]) << 16) |
+ (float_to_ubyte(cso->border_color[1]) << 8) |
+ (float_to_ubyte(cso->border_color[2]) << 0));
+
+ return (void *)ps;
+}
+
+static void
+nv30_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+ unsigned unit;
+
+ for (unit = 0; unit < nr; unit++) {
+ nv30->tex_sampler[unit] = sampler[unit];
+ nv30->dirty_samplers |= (1 << unit);
+ }
+
+ for (unit = nr; unit < nv30->nr_samplers; unit++) {
+ nv30->tex_sampler[unit] = NULL;
+ nv30->dirty_samplers |= (1 << unit);
+ }
+
+ nv30->nr_samplers = nr;
+ nv30->dirty |= NV30_NEW_SAMPLER;
+}
+
+static void
+nv30_sampler_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ FREE(hwcso);
+}
+
+static void
+nv30_set_sampler_texture(struct pipe_context *pipe, unsigned nr,
+ struct pipe_texture **miptree)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+ unsigned unit;
+
+ for (unit = 0; unit < nr; unit++) {
+ pipe_texture_reference((struct pipe_texture **)
+ &nv30->tex_miptree[unit], miptree[unit]);
+ nv30->dirty_samplers |= (1 << unit);
+ }
+
+ for (unit = nr; unit < nv30->nr_textures; unit++) {
+ pipe_texture_reference((struct pipe_texture **)
+ &nv30->tex_miptree[unit], NULL);
+ nv30->dirty_samplers |= (1 << unit);
+ }
+
+ nv30->nr_textures = nr;
+ nv30->dirty |= NV30_NEW_SAMPLER;
+}
+
+static void *
+nv30_rasterizer_state_create(struct pipe_context *pipe,
+ const struct pipe_rasterizer_state *cso)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+ struct nv30_rasterizer_state *rsso = CALLOC(1, sizeof(*rsso));
+ struct nouveau_stateobj *so = so_new(32, 0);
+ struct nouveau_grobj *rankine = nv30->screen->rankine;
+
+ /*XXX: ignored:
+ * light_twoside
+ * point_smooth -nohw
+ * multisample
+ */
+
+ so_method(so, rankine, NV34TCL_SHADE_MODEL, 1);
+ so_data (so, cso->flatshade ? NV34TCL_SHADE_MODEL_FLAT :
+ NV34TCL_SHADE_MODEL_SMOOTH);
+
+ so_method(so, rankine, NV34TCL_LINE_WIDTH, 2);
+ so_data (so, (unsigned char)(cso->line_width * 8.0) & 0xff);
+ so_data (so, cso->line_smooth ? 1 : 0);
+ so_method(so, rankine, NV34TCL_LINE_STIPPLE_ENABLE, 2);
+ so_data (so, cso->line_stipple_enable ? 1 : 0);
+ so_data (so, (cso->line_stipple_pattern << 16) |
+ cso->line_stipple_factor);
+
+ so_method(so, rankine, NV34TCL_POINT_SIZE, 1);
+ so_data (so, fui(cso->point_size));
+
+ so_method(so, rankine, NV34TCL_POLYGON_MODE_FRONT, 6);
+ if (cso->front_winding == PIPE_WINDING_CCW) {
+ so_data(so, nvgl_polygon_mode(cso->fill_ccw));
+ so_data(so, nvgl_polygon_mode(cso->fill_cw));
+ switch (cso->cull_mode) {
+ case PIPE_WINDING_CCW:
+ so_data(so, NV34TCL_CULL_FACE_FRONT);
+ break;
+ case PIPE_WINDING_CW:
+ so_data(so, NV34TCL_CULL_FACE_BACK);
+ break;
+ case PIPE_WINDING_BOTH:
+ so_data(so, NV34TCL_CULL_FACE_FRONT_AND_BACK);
+ break;
+ default:
+ so_data(so, NV34TCL_CULL_FACE_BACK);
+ break;
+ }
+ so_data(so, NV34TCL_FRONT_FACE_CCW);
+ } else {
+ so_data(so, nvgl_polygon_mode(cso->fill_cw));
+ so_data(so, nvgl_polygon_mode(cso->fill_ccw));
+ switch (cso->cull_mode) {
+ case PIPE_WINDING_CCW:
+ so_data(so, NV34TCL_CULL_FACE_BACK);
+ break;
+ case PIPE_WINDING_CW:
+ so_data(so, NV34TCL_CULL_FACE_FRONT);
+ break;
+ case PIPE_WINDING_BOTH:
+ so_data(so, NV34TCL_CULL_FACE_FRONT_AND_BACK);
+ break;
+ default:
+ so_data(so, NV34TCL_CULL_FACE_BACK);
+ break;
+ }
+ so_data(so, NV34TCL_FRONT_FACE_CW);
+ }
+ so_data(so, cso->poly_smooth ? 1 : 0);
+ so_data(so, (cso->cull_mode != PIPE_WINDING_NONE) ? 1 : 0);
+
+ so_method(so, rankine, NV34TCL_POLYGON_STIPPLE_ENABLE, 1);
+ so_data (so, cso->poly_stipple_enable ? 1 : 0);
+
+ so_method(so, rankine, NV34TCL_POLYGON_OFFSET_POINT_ENABLE, 3);
+ if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_POINT) ||
+ (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_POINT))
+ so_data(so, 1);
+ else
+ so_data(so, 0);
+ if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_LINE) ||
+ (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_LINE))
+ so_data(so, 1);
+ else
+ so_data(so, 0);
+ if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_FILL) ||
+ (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_FILL))
+ so_data(so, 1);
+ else
+ so_data(so, 0);
+ if (cso->offset_cw || cso->offset_ccw) {
+ so_method(so, rankine, NV34TCL_POLYGON_OFFSET_FACTOR, 2);
+ so_data (so, fui(cso->offset_scale));
+ so_data (so, fui(cso->offset_units * 2));
+ }
+
+ so_method(so, rankine, NV34TCL_POINT_SPRITE, 1);
+ if (cso->point_sprite) {
+ unsigned psctl = (1 << 0), i;
+
+ for (i = 0; i < 8; i++) {
+ if (cso->sprite_coord_mode[i] != PIPE_SPRITE_COORD_NONE)
+ psctl |= (1 << (8 + i));
+ }
+
+ so_data(so, psctl);
+ } else {
+ so_data(so, 0);
+ }
+
+ so_ref(so, &rsso->so);
+ rsso->pipe = *cso;
+ return (void *)rsso;
+}
+
+static void
+nv30_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+
+ nv30->rasterizer = hwcso;
+ nv30->dirty |= NV30_NEW_RAST;
+ /*nv30->draw_dirty |= NV30_NEW_RAST;*/
+}
+
+static void
+nv30_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv30_rasterizer_state *rsso = hwcso;
+
+ so_ref(NULL, &rsso->so);
+ FREE(rsso);
+}
+
+static void *
+nv30_depth_stencil_alpha_state_create(struct pipe_context *pipe,
+ const struct pipe_depth_stencil_alpha_state *cso)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+ struct nv30_zsa_state *zsaso = CALLOC(1, sizeof(*zsaso));
+ struct nouveau_stateobj *so = so_new(32, 0);
+ struct nouveau_grobj *rankine = nv30->screen->rankine;
+
+ so_method(so, rankine, NV34TCL_DEPTH_FUNC, 3);
+ so_data (so, nvgl_comparison_op(cso->depth.func));
+ so_data (so, cso->depth.writemask ? 1 : 0);
+ so_data (so, cso->depth.enabled ? 1 : 0);
+
+ so_method(so, rankine, NV34TCL_ALPHA_FUNC_ENABLE, 3);
+ so_data (so, cso->alpha.enabled ? 1 : 0);
+ so_data (so, nvgl_comparison_op(cso->alpha.func));
+ so_data (so, float_to_ubyte(cso->alpha.ref));
+
+ if (cso->stencil[0].enabled) {
+ so_method(so, rankine, NV34TCL_STENCIL_FRONT_ENABLE, 8);
+ so_data (so, cso->stencil[0].enabled ? 1 : 0);
+ so_data (so, cso->stencil[0].write_mask);
+ so_data (so, nvgl_comparison_op(cso->stencil[0].func));
+ so_data (so, cso->stencil[0].ref_value);
+ so_data (so, cso->stencil[0].value_mask);
+ so_data (so, nvgl_stencil_op(cso->stencil[0].fail_op));
+ so_data (so, nvgl_stencil_op(cso->stencil[0].zfail_op));
+ so_data (so, nvgl_stencil_op(cso->stencil[0].zpass_op));
+ } else {
+ so_method(so, rankine, NV34TCL_STENCIL_FRONT_ENABLE, 1);
+ so_data (so, 0);
+ }
+
+ if (cso->stencil[1].enabled) {
+ so_method(so, rankine, NV34TCL_STENCIL_BACK_ENABLE, 8);
+ so_data (so, cso->stencil[1].enabled ? 1 : 0);
+ so_data (so, cso->stencil[1].write_mask);
+ so_data (so, nvgl_comparison_op(cso->stencil[1].func));
+ so_data (so, cso->stencil[1].ref_value);
+ so_data (so, cso->stencil[1].value_mask);
+ so_data (so, nvgl_stencil_op(cso->stencil[1].fail_op));
+ so_data (so, nvgl_stencil_op(cso->stencil[1].zfail_op));
+ so_data (so, nvgl_stencil_op(cso->stencil[1].zpass_op));
+ } else {
+ so_method(so, rankine, NV34TCL_STENCIL_BACK_ENABLE, 1);
+ so_data (so, 0);
+ }
+
+ so_ref(so, &zsaso->so);
+ zsaso->pipe = *cso;
+ return (void *)zsaso;
+}
+
+static void
+nv30_depth_stencil_alpha_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+
+ nv30->zsa = hwcso;
+ nv30->dirty |= NV30_NEW_ZSA;
+}
+
+static void
+nv30_depth_stencil_alpha_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv30_zsa_state *zsaso = hwcso;
+
+ so_ref(NULL, &zsaso->so);
+ FREE(zsaso);
+}
+
+static void *
+nv30_vp_state_create(struct pipe_context *pipe,
+ const struct pipe_shader_state *cso)
+{
+ /*struct nv30_context *nv30 = nv30_context(pipe);*/
+ struct nv30_vertex_program *vp;
+
+ vp = CALLOC(1, sizeof(struct nv30_vertex_program));
+ vp->pipe.tokens = tgsi_dup_tokens(cso->tokens);
+ /*vp->draw = draw_create_vertex_shader(nv30->draw, &vp->pipe);*/
+
+ return (void *)vp;
+}
+
+static void
+nv30_vp_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+
+ nv30->vertprog = hwcso;
+ nv30->dirty |= NV30_NEW_VERTPROG;
+ /*nv30->draw_dirty |= NV30_NEW_VERTPROG;*/
+}
+
+static void
+nv30_vp_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+ struct nv30_vertex_program *vp = hwcso;
+
+ /*draw_delete_vertex_shader(nv30->draw, vp->draw);*/
+ nv30_vertprog_destroy(nv30, vp);
+ FREE((void*)vp->pipe.tokens);
+ FREE(vp);
+}
+
+static void *
+nv30_fp_state_create(struct pipe_context *pipe,
+ const struct pipe_shader_state *cso)
+{
+ struct nv30_fragment_program *fp;
+
+ fp = CALLOC(1, sizeof(struct nv30_fragment_program));
+ fp->pipe.tokens = tgsi_dup_tokens(cso->tokens);
+
+ tgsi_scan_shader(fp->pipe.tokens, &fp->info);
+
+ return (void *)fp;
+}
+
+static void
+nv30_fp_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+
+ nv30->fragprog = hwcso;
+ nv30->dirty |= NV30_NEW_FRAGPROG;
+}
+
+static void
+nv30_fp_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+ struct nv30_fragment_program *fp = hwcso;
+
+ nv30_fragprog_destroy(nv30, fp);
+ FREE((void*)fp->pipe.tokens);
+ FREE(fp);
+}
+
+static void
+nv30_set_blend_color(struct pipe_context *pipe,
+ const struct pipe_blend_color *bcol)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+
+ nv30->blend_colour = *bcol;
+ nv30->dirty |= NV30_NEW_BCOL;
+}
+
+static void
+nv30_set_clip_state(struct pipe_context *pipe,
+ const struct pipe_clip_state *clip)
+{
+}
+
+static void
+nv30_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
+ const struct pipe_constant_buffer *buf )
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+
+ nv30->constbuf[shader] = buf->buffer;
+ nv30->constbuf_nr[shader] = buf->size / (4 * sizeof(float));
+
+ if (shader == PIPE_SHADER_VERTEX) {
+ nv30->dirty |= NV30_NEW_VERTPROG;
+ } else
+ if (shader == PIPE_SHADER_FRAGMENT) {
+ nv30->dirty |= NV30_NEW_FRAGPROG;
+ }
+}
+
+static void
+nv30_set_framebuffer_state(struct pipe_context *pipe,
+ const struct pipe_framebuffer_state *fb)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+
+ nv30->framebuffer = *fb;
+ nv30->dirty |= NV30_NEW_FB;
+}
+
+static void
+nv30_set_polygon_stipple(struct pipe_context *pipe,
+ const struct pipe_poly_stipple *stipple)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+
+ memcpy(nv30->stipple, stipple->stipple, 4 * 32);
+ nv30->dirty |= NV30_NEW_STIPPLE;
+}
+
+static void
+nv30_set_scissor_state(struct pipe_context *pipe,
+ const struct pipe_scissor_state *s)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+
+ nv30->scissor = *s;
+ nv30->dirty |= NV30_NEW_SCISSOR;
+}
+
+static void
+nv30_set_viewport_state(struct pipe_context *pipe,
+ const struct pipe_viewport_state *vpt)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+
+ nv30->viewport = *vpt;
+ nv30->dirty |= NV30_NEW_VIEWPORT;
+ /*nv30->draw_dirty |= NV30_NEW_VIEWPORT;*/
+}
+
+static void
+nv30_set_vertex_buffers(struct pipe_context *pipe, unsigned count,
+ const struct pipe_vertex_buffer *vb)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+
+ memcpy(nv30->vtxbuf, vb, sizeof(*vb) * count);
+ nv30->vtxbuf_nr = count;
+
+ nv30->dirty |= NV30_NEW_ARRAYS;
+ /*nv30->draw_dirty |= NV30_NEW_ARRAYS;*/
+}
+
+static void
+nv30_set_vertex_elements(struct pipe_context *pipe, unsigned count,
+ const struct pipe_vertex_element *ve)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+
+ memcpy(nv30->vtxelt, ve, sizeof(*ve) * count);
+ nv30->vtxelt_nr = count;
+
+ nv30->dirty |= NV30_NEW_ARRAYS;
+ /*nv30->draw_dirty |= NV30_NEW_ARRAYS;*/
+}
+
+static void
+nv30_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+
+ nv30->edgeflags = bitfield;
+ nv30->dirty |= NV30_NEW_ARRAYS;
+ /*nv30->draw_dirty |= NV30_NEW_ARRAYS;*/
+}
+
+void
+nv30_init_state_functions(struct nv30_context *nv30)
+{
+ nv30->pipe.create_blend_state = nv30_blend_state_create;
+ nv30->pipe.bind_blend_state = nv30_blend_state_bind;
+ nv30->pipe.delete_blend_state = nv30_blend_state_delete;
+
+ nv30->pipe.create_sampler_state = nv30_sampler_state_create;
+ nv30->pipe.bind_sampler_states = nv30_sampler_state_bind;
+ nv30->pipe.delete_sampler_state = nv30_sampler_state_delete;
+ nv30->pipe.set_sampler_textures = nv30_set_sampler_texture;
+
+ nv30->pipe.create_rasterizer_state = nv30_rasterizer_state_create;
+ nv30->pipe.bind_rasterizer_state = nv30_rasterizer_state_bind;
+ nv30->pipe.delete_rasterizer_state = nv30_rasterizer_state_delete;
+
+ nv30->pipe.create_depth_stencil_alpha_state =
+ nv30_depth_stencil_alpha_state_create;
+ nv30->pipe.bind_depth_stencil_alpha_state =
+ nv30_depth_stencil_alpha_state_bind;
+ nv30->pipe.delete_depth_stencil_alpha_state =
+ nv30_depth_stencil_alpha_state_delete;
+
+ nv30->pipe.create_vs_state = nv30_vp_state_create;
+ nv30->pipe.bind_vs_state = nv30_vp_state_bind;
+ nv30->pipe.delete_vs_state = nv30_vp_state_delete;
+
+ nv30->pipe.create_fs_state = nv30_fp_state_create;
+ nv30->pipe.bind_fs_state = nv30_fp_state_bind;
+ nv30->pipe.delete_fs_state = nv30_fp_state_delete;
+
+ nv30->pipe.set_blend_color = nv30_set_blend_color;
+ nv30->pipe.set_clip_state = nv30_set_clip_state;
+ nv30->pipe.set_constant_buffer = nv30_set_constant_buffer;
+ nv30->pipe.set_framebuffer_state = nv30_set_framebuffer_state;
+ nv30->pipe.set_polygon_stipple = nv30_set_polygon_stipple;
+ nv30->pipe.set_scissor_state = nv30_set_scissor_state;
+ nv30->pipe.set_viewport_state = nv30_set_viewport_state;
+
+ nv30->pipe.set_edgeflags = nv30_set_edgeflags;
+ nv30->pipe.set_vertex_buffers = nv30_set_vertex_buffers;
+ nv30->pipe.set_vertex_elements = nv30_set_vertex_elements;
+}
+
diff --git a/src/gallium/drivers/nv30/nv30_state.h b/src/gallium/drivers/nv30/nv30_state.h
new file mode 100644
index 00000000000..2023278e377
--- /dev/null
+++ b/src/gallium/drivers/nv30/nv30_state.h
@@ -0,0 +1,88 @@
+#ifndef __NV30_STATE_H__
+#define __NV30_STATE_H__
+
+#include "pipe/p_state.h"
+#include "tgsi/tgsi_scan.h"
+
+struct nv30_sampler_state {
+ uint32_t fmt;
+ uint32_t wrap;
+ uint32_t en;
+ uint32_t filt;
+ uint32_t bcol;
+};
+
+struct nv30_vertex_program_exec {
+ uint32_t data[4];
+ boolean has_branch_offset;
+ int const_index;
+};
+
+struct nv30_vertex_program_data {
+ int index; /* immediates == -1 */
+ float value[4];
+};
+
+struct nv30_vertex_program {
+ struct pipe_shader_state pipe;
+
+ boolean translated;
+
+ struct nv30_vertex_program_exec *insns;
+ unsigned nr_insns;
+ struct nv30_vertex_program_data *consts;
+ unsigned nr_consts;
+
+ struct nouveau_resource *exec;
+ unsigned exec_start;
+ struct nouveau_resource *data;
+ unsigned data_start;
+ unsigned data_start_min;
+
+ uint32_t ir;
+ uint32_t or;
+ struct nouveau_stateobj *so;
+};
+
+struct nv30_fragment_program_data {
+ unsigned offset;
+ unsigned index;
+};
+
+struct nv30_fragment_program {
+ struct pipe_shader_state pipe;
+ struct tgsi_shader_info info;
+
+ boolean translated;
+ boolean on_hw;
+ unsigned samplers;
+
+ uint32_t *insn;
+ int insn_len;
+
+ struct nv30_fragment_program_data *consts;
+ unsigned nr_consts;
+
+ struct pipe_buffer *buffer;
+
+ uint32_t fp_control;
+ uint32_t fp_reg_control;
+ struct nouveau_stateobj *so;
+};
+
+struct nv30_miptree {
+ struct pipe_texture base;
+
+ struct pipe_buffer *buffer;
+ uint total_size;
+
+ struct pipe_texture *shadow_tex;
+ struct pipe_surface *shadow_surface;
+
+ struct {
+ uint pitch;
+ uint *image_offset;
+ } level[PIPE_MAX_TEXTURE_LEVELS];
+};
+
+#endif
diff --git a/src/gallium/drivers/nv30/nv30_state_blend.c b/src/gallium/drivers/nv30/nv30_state_blend.c
new file mode 100644
index 00000000000..44d43e132a5
--- /dev/null
+++ b/src/gallium/drivers/nv30/nv30_state_blend.c
@@ -0,0 +1,40 @@
+#include "nv30_context.h"
+
+static boolean
+nv30_state_blend_validate(struct nv30_context *nv30)
+{
+ so_ref(nv30->blend->so, &nv30->state.hw[NV30_STATE_BLEND]);
+ return TRUE;
+}
+
+struct nv30_state_entry nv30_state_blend = {
+ .validate = nv30_state_blend_validate,
+ .dirty = {
+ .pipe = NV30_NEW_BLEND,
+ .hw = NV30_STATE_BLEND
+ }
+};
+
+static boolean
+nv30_state_blend_colour_validate(struct nv30_context *nv30)
+{
+ struct nouveau_stateobj *so = so_new(2, 0);
+ struct pipe_blend_color *bcol = &nv30->blend_colour;
+
+ so_method(so, nv30->screen->rankine, NV34TCL_BLEND_COLOR, 1);
+ so_data (so, ((float_to_ubyte(bcol->color[3]) << 24) |
+ (float_to_ubyte(bcol->color[0]) << 16) |
+ (float_to_ubyte(bcol->color[1]) << 8) |
+ (float_to_ubyte(bcol->color[2]) << 0)));
+
+ so_ref(so, &nv30->state.hw[NV30_STATE_BCOL]);
+ return TRUE;
+}
+
+struct nv30_state_entry nv30_state_blend_colour = {
+ .validate = nv30_state_blend_colour_validate,
+ .dirty = {
+ .pipe = NV30_NEW_BCOL,
+ .hw = NV30_STATE_BCOL
+ }
+};
diff --git a/src/gallium/drivers/nv30/nv30_state_emit.c b/src/gallium/drivers/nv30/nv30_state_emit.c
new file mode 100644
index 00000000000..40fed621b24
--- /dev/null
+++ b/src/gallium/drivers/nv30/nv30_state_emit.c
@@ -0,0 +1,118 @@
+#include "nv30_context.h"
+#include "nv30_state.h"
+
+static struct nv30_state_entry *render_states[] = {
+ &nv30_state_framebuffer,
+ &nv30_state_rasterizer,
+ &nv30_state_scissor,
+ &nv30_state_stipple,
+ &nv30_state_fragprog,
+ &nv30_state_fragtex,
+ &nv30_state_vertprog,
+ &nv30_state_blend,
+ &nv30_state_blend_colour,
+ &nv30_state_zsa,
+ &nv30_state_viewport,
+ &nv30_state_vbo,
+ NULL
+};
+
+static void
+nv30_state_do_validate(struct nv30_context *nv30,
+ struct nv30_state_entry **states)
+{
+ const struct pipe_framebuffer_state *fb = &nv30->framebuffer;
+ unsigned i;
+
+ for (i = 0; i < fb->num_cbufs; i++)
+ fb->cbufs[i]->status = PIPE_SURFACE_STATUS_DEFINED;
+ if (fb->zsbuf)
+ fb->zsbuf->status = PIPE_SURFACE_STATUS_DEFINED;
+
+ while (*states) {
+ struct nv30_state_entry *e = *states;
+
+ if (nv30->dirty & e->dirty.pipe) {
+ if (e->validate(nv30)) {
+ nv30->state.dirty |= (1ULL << e->dirty.hw);
+ }
+ }
+
+ states++;
+ }
+ nv30->dirty = 0;
+}
+
+void
+nv30_state_emit(struct nv30_context *nv30)
+{
+ struct nv30_state *state = &nv30->state;
+ struct nv30_screen *screen = nv30->screen;
+ unsigned i, samplers;
+ uint64 states;
+
+ if (nv30->pctx_id != screen->cur_pctx) {
+ for (i = 0; i < NV30_STATE_MAX; i++) {
+ if (state->hw[i] && screen->state[i] != state->hw[i])
+ state->dirty |= (1ULL << i);
+ }
+
+ screen->cur_pctx = nv30->pctx_id;
+ }
+
+ for (i = 0, states = state->dirty; states; i++) {
+ if (!(states & (1ULL << i)))
+ continue;
+ so_ref (state->hw[i], &nv30->screen->state[i]);
+ if (state->hw[i])
+ so_emit(nv30->nvws, nv30->screen->state[i]);
+ states &= ~(1ULL << i);
+ }
+
+ state->dirty = 0;
+
+ so_emit_reloc_markers(nv30->nvws, state->hw[NV30_STATE_FB]);
+ for (i = 0, samplers = state->fp_samplers; i < 16 && samplers; i++) {
+ if (!(samplers & (1 << i)))
+ continue;
+ so_emit_reloc_markers(nv30->nvws,
+ state->hw[NV30_STATE_FRAGTEX0+i]);
+ samplers &= ~(1ULL << i);
+ }
+ so_emit_reloc_markers(nv30->nvws, state->hw[NV30_STATE_FRAGPROG]);
+ if (state->hw[NV30_STATE_VTXBUF] /*&& nv30->render_mode == HW*/)
+ so_emit_reloc_markers(nv30->nvws, state->hw[NV30_STATE_VTXBUF]);
+}
+
+boolean
+nv30_state_validate(struct nv30_context *nv30)
+{
+#if 0
+ boolean was_sw = nv30->fallback_swtnl ? TRUE : FALSE;
+
+ if (nv30->render_mode != HW) {
+ /* Don't even bother trying to go back to hw if none
+ * of the states that caused swtnl previously have changed.
+ */
+ if ((nv30->fallback_swtnl & nv30->dirty)
+ != nv30->fallback_swtnl)
+ return FALSE;
+
+ /* Attempt to go to hwtnl again */
+ nv30->pipe.flush(&nv30->pipe, 0, NULL);
+ nv30->dirty |= (NV30_NEW_VIEWPORT |
+ NV30_NEW_VERTPROG |
+ NV30_NEW_ARRAYS);
+ nv30->render_mode = HW;
+ }
+#endif
+ nv30_state_do_validate(nv30, render_states);
+#if 0
+ if (nv30->fallback_swtnl || nv30->fallback_swrast)
+ return FALSE;
+
+ if (was_sw)
+ NOUVEAU_ERR("swtnl->hw\n");
+#endif
+ return TRUE;
+}
diff --git a/src/gallium/drivers/nv30/nv30_state_fb.c b/src/gallium/drivers/nv30/nv30_state_fb.c
new file mode 100644
index 00000000000..73bdf7e56c5
--- /dev/null
+++ b/src/gallium/drivers/nv30/nv30_state_fb.c
@@ -0,0 +1,140 @@
+#include "nv30_context.h"
+#include "nouveau/nouveau_util.h"
+
+static boolean
+nv30_state_framebuffer_validate(struct nv30_context *nv30)
+{
+ struct pipe_framebuffer_state *fb = &nv30->framebuffer;
+ struct pipe_surface *rt[2], *zeta = NULL;
+ uint32_t rt_enable, rt_format;
+ int i, colour_format = 0, zeta_format = 0;
+ struct nouveau_stateobj *so = so_new(64, 10);
+ unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM;
+ unsigned w = fb->width;
+ unsigned h = fb->height;
+
+ rt_enable = 0;
+ for (i = 0; i < fb->num_cbufs; i++) {
+ if (colour_format) {
+ assert(colour_format == fb->cbufs[i]->format);
+ } else {
+ colour_format = fb->cbufs[i]->format;
+ rt_enable |= (NV34TCL_RT_ENABLE_COLOR0 << i);
+ rt[i] = fb->cbufs[i];
+ }
+ }
+
+ if (rt_enable & NV34TCL_RT_ENABLE_COLOR1)
+ rt_enable |= NV34TCL_RT_ENABLE_MRT;
+
+ if (fb->zsbuf) {
+ zeta_format = fb->zsbuf->format;
+ zeta = fb->zsbuf;
+ }
+
+ if (!(rt[0]->texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) {
+ assert(!(fb->width & (fb->width - 1)) && !(fb->height & (fb->height - 1)));
+ for (i = 1; i < fb->num_cbufs; i++)
+ assert(!(rt[i]->texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR));
+
+ /* FIXME: NV34TCL_RT_FORMAT_LOG2_[WIDTH/HEIGHT] */
+ rt_format = NV34TCL_RT_FORMAT_TYPE_SWIZZLED |
+ log2i(fb->width) << 16 /*NV34TCL_RT_FORMAT_LOG2_WIDTH_SHIFT*/ |
+ log2i(fb->height) << 24 /*NV34TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT*/;
+ }
+ else
+ rt_format = NV34TCL_RT_FORMAT_TYPE_LINEAR;
+
+ switch (colour_format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case 0:
+ rt_format |= NV34TCL_RT_FORMAT_COLOR_A8R8G8B8;
+ break;
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ rt_format |= NV34TCL_RT_FORMAT_COLOR_R5G6B5;
+ break;
+ default:
+ assert(0);
+ }
+
+ switch (zeta_format) {
+ case PIPE_FORMAT_Z16_UNORM:
+ rt_format |= NV34TCL_RT_FORMAT_ZETA_Z16;
+ break;
+ case PIPE_FORMAT_Z24S8_UNORM:
+ case 0:
+ rt_format |= NV34TCL_RT_FORMAT_ZETA_Z24S8;
+ break;
+ default:
+ assert(0);
+ }
+
+ if (rt_enable & NV34TCL_RT_ENABLE_COLOR0) {
+ uint32_t pitch = rt[0]->stride;
+ if (zeta) {
+ pitch |= (zeta->stride << 16);
+ } else {
+ pitch |= (pitch << 16);
+ }
+
+ so_method(so, nv30->screen->rankine, NV34TCL_DMA_COLOR0, 1);
+ so_reloc (so, rt[0]->buffer, 0, rt_flags | NOUVEAU_BO_OR,
+ nv30->nvws->channel->vram->handle,
+ nv30->nvws->channel->gart->handle);
+ so_method(so, nv30->screen->rankine, NV34TCL_COLOR0_PITCH, 2);
+ so_data (so, pitch);
+ so_reloc (so, rt[0]->buffer, rt[0]->offset, rt_flags |
+ NOUVEAU_BO_LOW, 0, 0);
+ }
+
+ if (rt_enable & NV34TCL_RT_ENABLE_COLOR1) {
+ so_method(so, nv30->screen->rankine, NV34TCL_DMA_COLOR1, 1);
+ so_reloc (so, rt[1]->buffer, 0, rt_flags | NOUVEAU_BO_OR,
+ nv30->nvws->channel->vram->handle,
+ nv30->nvws->channel->gart->handle);
+ so_method(so, nv30->screen->rankine, NV34TCL_COLOR1_OFFSET, 2);
+ so_reloc (so, rt[1]->buffer, rt[1]->offset, rt_flags |
+ NOUVEAU_BO_LOW, 0, 0);
+ so_data (so, rt[1]->stride);
+ }
+
+ if (zeta_format) {
+ so_method(so, nv30->screen->rankine, NV34TCL_DMA_ZETA, 1);
+ so_reloc (so, zeta->buffer, 0, rt_flags | NOUVEAU_BO_OR,
+ nv30->nvws->channel->vram->handle,
+ nv30->nvws->channel->gart->handle);
+ so_method(so, nv30->screen->rankine, NV34TCL_ZETA_OFFSET, 1);
+ so_reloc (so, zeta->buffer, zeta->offset, rt_flags |
+ NOUVEAU_BO_LOW, 0, 0);
+ /* TODO: allocate LMA depth buffer */
+ }
+
+ so_method(so, nv30->screen->rankine, NV34TCL_RT_ENABLE, 1);
+ so_data (so, rt_enable);
+ so_method(so, nv30->screen->rankine, NV34TCL_RT_HORIZ, 3);
+ so_data (so, (w << 16) | 0);
+ so_data (so, (h << 16) | 0);
+ so_data (so, rt_format);
+ so_method(so, nv30->screen->rankine, NV34TCL_VIEWPORT_HORIZ, 2);
+ so_data (so, (w << 16) | 0);
+ so_data (so, (h << 16) | 0);
+ so_method(so, nv30->screen->rankine, NV34TCL_VIEWPORT_CLIP_HORIZ(0), 2);
+ so_data (so, ((w - 1) << 16) | 0);
+ so_data (so, ((h - 1) << 16) | 0);
+ so_method(so, nv30->screen->rankine, 0x1d88, 1);
+ so_data (so, (1 << 12) | h);
+ /* Wonder why this is needed, context should all be set to zero on init */
+ so_method(so, nv30->screen->rankine, NV34TCL_VIEWPORT_TX_ORIGIN, 1);
+ so_data (so, 0);
+
+ so_ref(so, &nv30->state.hw[NV30_STATE_FB]);
+ return TRUE;
+}
+
+struct nv30_state_entry nv30_state_framebuffer = {
+ .validate = nv30_state_framebuffer_validate,
+ .dirty = {
+ .pipe = NV30_NEW_FB,
+ .hw = NV30_STATE_FB
+ }
+};
diff --git a/src/gallium/drivers/nv30/nv30_state_rasterizer.c b/src/gallium/drivers/nv30/nv30_state_rasterizer.c
new file mode 100644
index 00000000000..6d1b60e043d
--- /dev/null
+++ b/src/gallium/drivers/nv30/nv30_state_rasterizer.c
@@ -0,0 +1,17 @@
+#include "nv30_context.h"
+
+static boolean
+nv30_state_rasterizer_validate(struct nv30_context *nv30)
+{
+ so_ref(nv30->rasterizer->so,
+ &nv30->state.hw[NV30_STATE_RAST]);
+ return TRUE;
+}
+
+struct nv30_state_entry nv30_state_rasterizer = {
+ .validate = nv30_state_rasterizer_validate,
+ .dirty = {
+ .pipe = NV30_NEW_RAST,
+ .hw = NV30_STATE_RAST
+ }
+};
diff --git a/src/gallium/drivers/nv30/nv30_state_scissor.c b/src/gallium/drivers/nv30/nv30_state_scissor.c
new file mode 100644
index 00000000000..1db9bc17955
--- /dev/null
+++ b/src/gallium/drivers/nv30/nv30_state_scissor.c
@@ -0,0 +1,35 @@
+#include "nv30_context.h"
+
+static boolean
+nv30_state_scissor_validate(struct nv30_context *nv30)
+{
+ struct pipe_rasterizer_state *rast = &nv30->rasterizer->pipe;
+ struct pipe_scissor_state *s = &nv30->scissor;
+ struct nouveau_stateobj *so;
+
+ if (nv30->state.hw[NV30_STATE_SCISSOR] &&
+ (rast->scissor == 0 && nv30->state.scissor_enabled == 0))
+ return FALSE;
+ nv30->state.scissor_enabled = rast->scissor;
+
+ so = so_new(3, 0);
+ so_method(so, nv30->screen->rankine, NV34TCL_SCISSOR_HORIZ, 2);
+ if (nv30->state.scissor_enabled) {
+ so_data (so, ((s->maxx - s->minx) << 16) | s->minx);
+ so_data (so, ((s->maxy - s->miny) << 16) | s->miny);
+ } else {
+ so_data (so, 4096 << 16);
+ so_data (so, 4096 << 16);
+ }
+
+ so_ref(so, &nv30->state.hw[NV30_STATE_SCISSOR]);
+ return TRUE;
+}
+
+struct nv30_state_entry nv30_state_scissor = {
+ .validate = nv30_state_scissor_validate,
+ .dirty = {
+ .pipe = NV30_NEW_SCISSOR | NV30_NEW_RAST,
+ .hw = NV30_STATE_SCISSOR
+ }
+};
diff --git a/src/gallium/drivers/nv30/nv30_state_stipple.c b/src/gallium/drivers/nv30/nv30_state_stipple.c
new file mode 100644
index 00000000000..41b42813b49
--- /dev/null
+++ b/src/gallium/drivers/nv30/nv30_state_stipple.c
@@ -0,0 +1,39 @@
+#include "nv30_context.h"
+
+static boolean
+nv30_state_stipple_validate(struct nv30_context *nv30)
+{
+ struct pipe_rasterizer_state *rast = &nv30->rasterizer->pipe;
+ struct nouveau_grobj *rankine = nv30->screen->rankine;
+ struct nouveau_stateobj *so;
+
+ if (nv30->state.hw[NV30_STATE_STIPPLE] &&
+ (rast->poly_stipple_enable == 0 && nv30->state.stipple_enabled == 0))
+ return FALSE;
+
+ if (rast->poly_stipple_enable) {
+ unsigned i;
+
+ so = so_new(35, 0);
+ so_method(so, rankine, NV34TCL_POLYGON_STIPPLE_ENABLE, 1);
+ so_data (so, 1);
+ so_method(so, rankine, NV34TCL_POLYGON_STIPPLE_PATTERN(0), 32);
+ for (i = 0; i < 32; i++)
+ so_data(so, nv30->stipple[i]);
+ } else {
+ so = so_new(2, 0);
+ so_method(so, rankine, NV34TCL_POLYGON_STIPPLE_ENABLE, 1);
+ so_data (so, 0);
+ }
+
+ so_ref(so, &nv30->state.hw[NV30_STATE_STIPPLE]);
+ return TRUE;
+}
+
+struct nv30_state_entry nv30_state_stipple = {
+ .validate = nv30_state_stipple_validate,
+ .dirty = {
+ .pipe = NV30_NEW_STIPPLE | NV30_NEW_RAST,
+ .hw = NV30_STATE_STIPPLE,
+ }
+};
diff --git a/src/gallium/drivers/nv30/nv30_state_viewport.c b/src/gallium/drivers/nv30/nv30_state_viewport.c
new file mode 100644
index 00000000000..951d40ebfdd
--- /dev/null
+++ b/src/gallium/drivers/nv30/nv30_state_viewport.c
@@ -0,0 +1,70 @@
+#include "nv30_context.h"
+
+static boolean
+nv30_state_viewport_validate(struct nv30_context *nv30)
+{
+ struct pipe_viewport_state *vpt = &nv30->viewport;
+ struct nouveau_stateobj *so;
+ unsigned bypass;
+
+ if (/*nv30->render_mode == HW &&*/ !nv30->rasterizer->pipe.bypass_clipping)
+ bypass = 0;
+ else
+ bypass = 1;
+
+ if (nv30->state.hw[NV30_STATE_VIEWPORT] &&
+ (bypass || !(nv30->dirty & NV30_NEW_VIEWPORT)) &&
+ nv30->state.viewport_bypass == bypass)
+ return FALSE;
+ nv30->state.viewport_bypass = bypass;
+
+ so = so_new(11, 0);
+ if (!bypass) {
+ so_method(so, nv30->screen->rankine,
+ NV34TCL_VIEWPORT_TRANSLATE_X, 8);
+ so_data (so, fui(vpt->translate[0]));
+ so_data (so, fui(vpt->translate[1]));
+ so_data (so, fui(vpt->translate[2]));
+ so_data (so, fui(vpt->translate[3]));
+ so_data (so, fui(vpt->scale[0]));
+ so_data (so, fui(vpt->scale[1]));
+ so_data (so, fui(vpt->scale[2]));
+ so_data (so, fui(vpt->scale[3]));
+/* so_method(so, nv30->screen->rankine, 0x1d78, 1);
+ so_data (so, 1);
+*/ } else {
+ so_method(so, nv30->screen->rankine,
+ NV34TCL_VIEWPORT_TRANSLATE_X, 8);
+ so_data (so, fui(0.0));
+ so_data (so, fui(0.0));
+ so_data (so, fui(0.0));
+ so_data (so, fui(0.0));
+ so_data (so, fui(1.0));
+ so_data (so, fui(1.0));
+ so_data (so, fui(1.0));
+ so_data (so, fui(0.0));
+ /* Not entirely certain what this is yet. The DDX uses this
+ * value also as it fixes rendering when you pass
+ * pre-transformed vertices to the GPU. My best gusss is that
+ * this bypasses some culling/clipping stage. Might be worth
+ * noting that points/lines are uneffected by whatever this
+ * value fixes, only filled polygons are effected.
+ */
+/* so_method(so, nv30->screen->rankine, 0x1d78, 1);
+ so_data (so, 0x110);
+*/ }
+ /* TODO/FIXME: never saw value 0x0110 in renouveau dumps, only 0x0001 */
+ so_method(so, nv30->screen->rankine, 0x1d78, 1);
+ so_data (so, 1);
+
+ so_ref(so, &nv30->state.hw[NV30_STATE_VIEWPORT]);
+ return TRUE;
+}
+
+struct nv30_state_entry nv30_state_viewport = {
+ .validate = nv30_state_viewport_validate,
+ .dirty = {
+ .pipe = NV30_NEW_VIEWPORT | NV30_NEW_RAST,
+ .hw = NV30_STATE_VIEWPORT
+ }
+};
diff --git a/src/gallium/drivers/nv30/nv30_state_zsa.c b/src/gallium/drivers/nv30/nv30_state_zsa.c
new file mode 100644
index 00000000000..0940b7269b2
--- /dev/null
+++ b/src/gallium/drivers/nv30/nv30_state_zsa.c
@@ -0,0 +1,17 @@
+#include "nv30_context.h"
+
+static boolean
+nv30_state_zsa_validate(struct nv30_context *nv30)
+{
+ so_ref(nv30->zsa->so,
+ &nv30->state.hw[NV30_STATE_ZSA]);
+ return TRUE;
+}
+
+struct nv30_state_entry nv30_state_zsa = {
+ .validate = nv30_state_zsa_validate,
+ .dirty = {
+ .pipe = NV30_NEW_ZSA,
+ .hw = NV30_STATE_ZSA
+ }
+};
diff --git a/src/gallium/drivers/nv30/nv30_surface.c b/src/gallium/drivers/nv30/nv30_surface.c
new file mode 100644
index 00000000000..d3376a73bf5
--- /dev/null
+++ b/src/gallium/drivers/nv30/nv30_surface.c
@@ -0,0 +1,77 @@
+
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "nv30_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_winsys.h"
+#include "pipe/p_inlines.h"
+
+#include "util/u_tile.h"
+
+static void
+nv30_surface_copy(struct pipe_context *pipe, boolean do_flip,
+ struct pipe_surface *dest, unsigned destx, unsigned desty,
+ struct pipe_surface *src, unsigned srcx, unsigned srcy,
+ unsigned width, unsigned height)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+ struct nouveau_winsys *nvws = nv30->nvws;
+
+ if (do_flip) {
+ /*XXX: This dodgyness will do for now for correctness. But,
+ * need to investigate whether the 2D engine is able to
+ * manage a flip (perhaps SIFM?), if not, use the 3D engine
+ */
+ desty += height;
+ while (height--) {
+ nvws->surface_copy(nvws, dest, destx, desty--, src,
+ srcx, srcy++, width, 1);
+ }
+ } else {
+ nvws->surface_copy(nvws, dest, destx, desty, src, srcx, srcy,
+ width, height);
+ }
+}
+
+static void
+nv30_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest,
+ unsigned destx, unsigned desty, unsigned width,
+ unsigned height, unsigned value)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+ struct nouveau_winsys *nvws = nv30->nvws;
+
+ nvws->surface_fill(nvws, dest, destx, desty, width, height, value);
+}
+
+void
+nv30_init_surface_functions(struct nv30_context *nv30)
+{
+ nv30->pipe.surface_copy = nv30_surface_copy;
+ nv30->pipe.surface_fill = nv30_surface_fill;
+}
diff --git a/src/gallium/drivers/nv30/nv30_vbo.c b/src/gallium/drivers/nv30/nv30_vbo.c
new file mode 100644
index 00000000000..556f981d4a5
--- /dev/null
+++ b/src/gallium/drivers/nv30/nv30_vbo.c
@@ -0,0 +1,556 @@
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+
+#include "nv30_context.h"
+#include "nv30_state.h"
+
+#include "nouveau/nouveau_channel.h"
+#include "nouveau/nouveau_pushbuf.h"
+#include "nouveau/nouveau_util.h"
+
+#define FORCE_SWTNL 0
+
+static INLINE int
+nv30_vbo_format_to_hw(enum pipe_format pipe, unsigned *fmt, unsigned *ncomp)
+{
+ switch (pipe) {
+ case PIPE_FORMAT_R32_FLOAT:
+ case PIPE_FORMAT_R32G32_FLOAT:
+ case PIPE_FORMAT_R32G32B32_FLOAT:
+ case PIPE_FORMAT_R32G32B32A32_FLOAT:
+ *fmt = NV34TCL_VTXFMT_TYPE_FLOAT;
+ break;
+ case PIPE_FORMAT_R8_UNORM:
+ case PIPE_FORMAT_R8G8_UNORM:
+ case PIPE_FORMAT_R8G8B8_UNORM:
+ case PIPE_FORMAT_R8G8B8A8_UNORM:
+ *fmt = NV34TCL_VTXFMT_TYPE_UBYTE;
+ break;
+ case PIPE_FORMAT_R16_SSCALED:
+ case PIPE_FORMAT_R16G16_SSCALED:
+ case PIPE_FORMAT_R16G16B16_SSCALED:
+ case PIPE_FORMAT_R16G16B16A16_SSCALED:
+ *fmt = NV34TCL_VTXFMT_TYPE_USHORT;
+ break;
+ default:
+ NOUVEAU_ERR("Unknown format %s\n", pf_name(pipe));
+ return 1;
+ }
+
+ switch (pipe) {
+ case PIPE_FORMAT_R8_UNORM:
+ case PIPE_FORMAT_R32_FLOAT:
+ case PIPE_FORMAT_R16_SSCALED:
+ *ncomp = 1;
+ break;
+ case PIPE_FORMAT_R8G8_UNORM:
+ case PIPE_FORMAT_R32G32_FLOAT:
+ case PIPE_FORMAT_R16G16_SSCALED:
+ *ncomp = 2;
+ break;
+ case PIPE_FORMAT_R8G8B8_UNORM:
+ case PIPE_FORMAT_R32G32B32_FLOAT:
+ case PIPE_FORMAT_R16G16B16_SSCALED:
+ *ncomp = 3;
+ break;
+ case PIPE_FORMAT_R8G8B8A8_UNORM:
+ case PIPE_FORMAT_R32G32B32A32_FLOAT:
+ case PIPE_FORMAT_R16G16B16A16_SSCALED:
+ *ncomp = 4;
+ break;
+ default:
+ NOUVEAU_ERR("Unknown format %s\n", pf_name(pipe));
+ return 1;
+ }
+
+ return 0;
+}
+
+static boolean
+nv30_vbo_set_idxbuf(struct nv30_context *nv30, struct pipe_buffer *ib,
+ unsigned ib_size)
+{
+ struct pipe_screen *pscreen = &nv30->screen->pipe;
+ unsigned type;
+
+ if (!ib) {
+ nv30->idxbuf = NULL;
+ nv30->idxbuf_format = 0xdeadbeef;
+ return FALSE;
+ }
+
+ if (!pscreen->get_param(pscreen, NOUVEAU_CAP_HW_IDXBUF) || ib_size == 1)
+ return FALSE;
+
+ switch (ib_size) {
+ case 2:
+ type = NV34TCL_IDXBUF_FORMAT_TYPE_U16;
+ break;
+ case 4:
+ type = NV34TCL_IDXBUF_FORMAT_TYPE_U32;
+ break;
+ default:
+ return FALSE;
+ }
+
+ if (ib != nv30->idxbuf ||
+ type != nv30->idxbuf_format) {
+ nv30->dirty |= NV30_NEW_ARRAYS;
+ nv30->idxbuf = ib;
+ nv30->idxbuf_format = type;
+ }
+
+ return TRUE;
+}
+
+static boolean
+nv30_vbo_static_attrib(struct nv30_context *nv30, struct nouveau_stateobj *so,
+ int attrib, struct pipe_vertex_element *ve,
+ struct pipe_vertex_buffer *vb)
+{
+ struct pipe_winsys *ws = nv30->pipe.winsys;
+ struct nouveau_grobj *rankine = nv30->screen->rankine;
+ unsigned type, ncomp;
+ void *map;
+
+ if (nv30_vbo_format_to_hw(ve->src_format, &type, &ncomp))
+ return FALSE;
+
+ map = ws->buffer_map(ws, vb->buffer, PIPE_BUFFER_USAGE_CPU_READ);
+ map += vb->buffer_offset + ve->src_offset;
+
+ switch (type) {
+ case NV34TCL_VTXFMT_TYPE_FLOAT:
+ {
+ float *v = map;
+
+ switch (ncomp) {
+ case 4:
+ so_method(so, rankine, NV34TCL_VTX_ATTR_4F_X(attrib), 4);
+ so_data (so, fui(v[0]));
+ so_data (so, fui(v[1]));
+ so_data (so, fui(v[2]));
+ so_data (so, fui(v[3]));
+ break;
+ case 3:
+ so_method(so, rankine, NV34TCL_VTX_ATTR_3F_X(attrib), 3);
+ so_data (so, fui(v[0]));
+ so_data (so, fui(v[1]));
+ so_data (so, fui(v[2]));
+ break;
+ case 2:
+ so_method(so, rankine, NV34TCL_VTX_ATTR_2F_X(attrib), 2);
+ so_data (so, fui(v[0]));
+ so_data (so, fui(v[1]));
+ break;
+ case 1:
+ so_method(so, rankine, NV34TCL_VTX_ATTR_1F(attrib), 1);
+ so_data (so, fui(v[0]));
+ break;
+ default:
+ ws->buffer_unmap(ws, vb->buffer);
+ return FALSE;
+ }
+ }
+ break;
+ default:
+ ws->buffer_unmap(ws, vb->buffer);
+ return FALSE;
+ }
+
+ ws->buffer_unmap(ws, vb->buffer);
+
+ return TRUE;
+}
+
+boolean
+nv30_draw_arrays(struct pipe_context *pipe,
+ unsigned mode, unsigned start, unsigned count)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+ struct nouveau_channel *chan = nv30->nvws->channel;
+ unsigned restart = 0;
+
+ nv30_vbo_set_idxbuf(nv30, NULL, 0);
+ if (FORCE_SWTNL || !nv30_state_validate(nv30)) {
+ /*return nv30_draw_elements_swtnl(pipe, NULL, 0,
+ mode, start, count);*/
+ return FALSE;
+ }
+
+ while (count) {
+ unsigned vc, nr;
+
+ nv30_state_emit(nv30);
+
+ vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 256,
+ mode, start, count, &restart);
+ if (!vc) {
+ FIRE_RING(NULL);
+ continue;
+ }
+
+ BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1);
+ OUT_RING (nvgl_primitive(mode));
+
+ nr = (vc & 0xff);
+ if (nr) {
+ BEGIN_RING(rankine, NV34TCL_VB_VERTEX_BATCH, 1);
+ OUT_RING (((nr - 1) << 24) | start);
+ start += nr;
+ }
+
+ nr = vc >> 8;
+ while (nr) {
+ unsigned push = nr > 2047 ? 2047 : nr;
+
+ nr -= push;
+
+ BEGIN_RING_NI(rankine, NV34TCL_VB_VERTEX_BATCH, push);
+ while (push--) {
+ OUT_RING(((0x100 - 1) << 24) | start);
+ start += 0x100;
+ }
+ }
+
+ BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1);
+ OUT_RING (0);
+
+ count -= vc;
+ start = restart;
+ }
+
+ pipe->flush(pipe, 0, NULL);
+ return TRUE;
+}
+
+static INLINE void
+nv30_draw_elements_u08(struct nv30_context *nv30, void *ib,
+ unsigned mode, unsigned start, unsigned count)
+{
+ struct nouveau_channel *chan = nv30->nvws->channel;
+
+ while (count) {
+ uint8_t *elts = (uint8_t *)ib + start;
+ unsigned vc, push, restart = 0;
+
+ nv30_state_emit(nv30);
+
+ vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 2,
+ mode, start, count, &restart);
+ if (vc == 0) {
+ FIRE_RING(NULL);
+ continue;
+ }
+ count -= vc;
+
+ BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1);
+ OUT_RING (nvgl_primitive(mode));
+
+ if (vc & 1) {
+ BEGIN_RING(rankine, NV34TCL_VB_ELEMENT_U32, 1);
+ OUT_RING (elts[0]);
+ elts++; vc--;
+ }
+
+ while (vc) {
+ unsigned i;
+
+ push = MIN2(vc, 2047 * 2);
+
+ BEGIN_RING_NI(rankine, NV34TCL_VB_ELEMENT_U16, push >> 1);
+ for (i = 0; i < push; i+=2)
+ OUT_RING((elts[i+1] << 16) | elts[i]);
+
+ vc -= push;
+ elts += push;
+ }
+
+ BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1);
+ OUT_RING (0);
+
+ start = restart;
+ }
+}
+
+static INLINE void
+nv30_draw_elements_u16(struct nv30_context *nv30, void *ib,
+ unsigned mode, unsigned start, unsigned count)
+{
+ struct nouveau_channel *chan = nv30->nvws->channel;
+
+ while (count) {
+ uint16_t *elts = (uint16_t *)ib + start;
+ unsigned vc, push, restart = 0;
+
+ nv30_state_emit(nv30);
+
+ vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 2,
+ mode, start, count, &restart);
+ if (vc == 0) {
+ FIRE_RING(NULL);
+ continue;
+ }
+ count -= vc;
+
+ BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1);
+ OUT_RING (nvgl_primitive(mode));
+
+ if (vc & 1) {
+ BEGIN_RING(rankine, NV34TCL_VB_ELEMENT_U32, 1);
+ OUT_RING (elts[0]);
+ elts++; vc--;
+ }
+
+ while (vc) {
+ unsigned i;
+
+ push = MIN2(vc, 2047 * 2);
+
+ BEGIN_RING_NI(rankine, NV34TCL_VB_ELEMENT_U16, push >> 1);
+ for (i = 0; i < push; i+=2)
+ OUT_RING((elts[i+1] << 16) | elts[i]);
+
+ vc -= push;
+ elts += push;
+ }
+
+ BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1);
+ OUT_RING (0);
+
+ start = restart;
+ }
+}
+
+static INLINE void
+nv30_draw_elements_u32(struct nv30_context *nv30, void *ib,
+ unsigned mode, unsigned start, unsigned count)
+{
+ struct nouveau_channel *chan = nv30->nvws->channel;
+
+ while (count) {
+ uint32_t *elts = (uint32_t *)ib + start;
+ unsigned vc, push, restart = 0;
+
+ nv30_state_emit(nv30);
+
+ vc = nouveau_vbuf_split(chan->pushbuf->remaining, 5, 1,
+ mode, start, count, &restart);
+ if (vc == 0) {
+ FIRE_RING(NULL);
+ continue;
+ }
+ count -= vc;
+
+ BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1);
+ OUT_RING (nvgl_primitive(mode));
+
+ while (vc) {
+ push = MIN2(vc, 2047);
+
+ BEGIN_RING_NI(rankine, NV34TCL_VB_ELEMENT_U32, push);
+ OUT_RINGp (elts, push);
+
+ vc -= push;
+ elts += push;
+ }
+
+ BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1);
+ OUT_RING (0);
+
+ start = restart;
+ }
+}
+
+static boolean
+nv30_draw_elements_inline(struct pipe_context *pipe,
+ struct pipe_buffer *ib, unsigned ib_size,
+ unsigned mode, unsigned start, unsigned count)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+ struct pipe_winsys *ws = pipe->winsys;
+ void *map;
+
+ map = ws->buffer_map(ws, ib, PIPE_BUFFER_USAGE_CPU_READ);
+ if (!ib) {
+ NOUVEAU_ERR("failed mapping ib\n");
+ return FALSE;
+ }
+
+ switch (ib_size) {
+ case 1:
+ nv30_draw_elements_u08(nv30, map, mode, start, count);
+ break;
+ case 2:
+ nv30_draw_elements_u16(nv30, map, mode, start, count);
+ break;
+ case 4:
+ nv30_draw_elements_u32(nv30, map, mode, start, count);
+ break;
+ default:
+ NOUVEAU_ERR("invalid idxbuf fmt %d\n", ib_size);
+ break;
+ }
+
+ ws->buffer_unmap(ws, ib);
+ return TRUE;
+}
+
+static boolean
+nv30_draw_elements_vbo(struct pipe_context *pipe,
+ unsigned mode, unsigned start, unsigned count)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+ struct nouveau_channel *chan = nv30->nvws->channel;
+ unsigned restart = 0;
+
+ while (count) {
+ unsigned nr, vc;
+
+ nv30_state_emit(nv30);
+
+ vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 256,
+ mode, start, count, &restart);
+ if (!vc) {
+ FIRE_RING(NULL);
+ continue;
+ }
+
+ BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1);
+ OUT_RING (nvgl_primitive(mode));
+
+ nr = (vc & 0xff);
+ if (nr) {
+ BEGIN_RING(rankine, NV34TCL_VB_INDEX_BATCH, 1);
+ OUT_RING (((nr - 1) << 24) | start);
+ start += nr;
+ }
+
+ nr = vc >> 8;
+ while (nr) {
+ unsigned push = nr > 2047 ? 2047 : nr;
+
+ nr -= push;
+
+ BEGIN_RING_NI(rankine, NV34TCL_VB_INDEX_BATCH, push);
+ while (push--) {
+ OUT_RING(((0x100 - 1) << 24) | start);
+ start += 0x100;
+ }
+ }
+
+ BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1);
+ OUT_RING (0);
+
+ count -= vc;
+ start = restart;
+ }
+
+ return TRUE;
+}
+
+boolean
+nv30_draw_elements(struct pipe_context *pipe,
+ struct pipe_buffer *indexBuffer, unsigned indexSize,
+ unsigned mode, unsigned start, unsigned count)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+ boolean idxbuf;
+
+ idxbuf = nv30_vbo_set_idxbuf(nv30, indexBuffer, indexSize);
+ if (FORCE_SWTNL || !nv30_state_validate(nv30)) {
+ /*return nv30_draw_elements_swtnl(pipe, NULL, 0,
+ mode, start, count);*/
+ return FALSE;
+ }
+
+ if (idxbuf) {
+ nv30_draw_elements_vbo(pipe, mode, start, count);
+ } else {
+ nv30_draw_elements_inline(pipe, indexBuffer, indexSize,
+ mode, start, count);
+ }
+
+ pipe->flush(pipe, 0, NULL);
+ return TRUE;
+}
+
+static boolean
+nv30_vbo_validate(struct nv30_context *nv30)
+{
+ struct nouveau_stateobj *vtxbuf, *vtxfmt, *sattr = NULL;
+ struct nouveau_grobj *rankine = nv30->screen->rankine;
+ struct pipe_buffer *ib = nv30->idxbuf;
+ unsigned ib_format = nv30->idxbuf_format;
+ unsigned vb_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD;
+ int hw;
+
+ if (nv30->edgeflags) {
+ /*nv30->fallback_swtnl |= NV30_NEW_ARRAYS;*/
+ return FALSE;
+ }
+
+ vtxbuf = so_new(20, 18);
+ so_method(vtxbuf, rankine, NV34TCL_VTXBUF_ADDRESS(0), nv30->vtxelt_nr);
+ vtxfmt = so_new(17, 0);
+ so_method(vtxfmt, rankine, NV34TCL_VTXFMT(0), nv30->vtxelt_nr);
+
+ for (hw = 0; hw < nv30->vtxelt_nr; hw++) {
+ struct pipe_vertex_element *ve;
+ struct pipe_vertex_buffer *vb;
+ unsigned type, ncomp;
+
+ ve = &nv30->vtxelt[hw];
+ vb = &nv30->vtxbuf[ve->vertex_buffer_index];
+
+ if (!vb->pitch) {
+ if (!sattr)
+ sattr = so_new(16 * 5, 0);
+
+ if (nv30_vbo_static_attrib(nv30, sattr, hw, ve, vb)) {
+ so_data(vtxbuf, 0);
+ so_data(vtxfmt, NV34TCL_VTXFMT_TYPE_FLOAT);
+ continue;
+ }
+ }
+
+ if (nv30_vbo_format_to_hw(ve->src_format, &type, &ncomp)) {
+ /*nv30->fallback_swtnl |= NV30_NEW_ARRAYS;*/
+ so_ref(NULL, &vtxbuf);
+ so_ref(NULL, &vtxfmt);
+ return FALSE;
+ }
+
+ so_reloc(vtxbuf, vb->buffer, vb->buffer_offset + ve->src_offset,
+ vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR,
+ 0, NV34TCL_VTXBUF_ADDRESS_DMA1);
+ so_data (vtxfmt, ((vb->pitch << NV34TCL_VTXFMT_STRIDE_SHIFT) |
+ (ncomp << NV34TCL_VTXFMT_SIZE_SHIFT) | type));
+ }
+
+ if (ib) {
+ so_method(vtxbuf, rankine, NV34TCL_IDXBUF_ADDRESS, 2);
+ so_reloc (vtxbuf, ib, 0, vb_flags | NOUVEAU_BO_LOW, 0, 0);
+ so_reloc (vtxbuf, ib, ib_format, vb_flags | NOUVEAU_BO_OR,
+ 0, NV34TCL_IDXBUF_FORMAT_DMA1);
+ }
+
+ so_method(vtxbuf, rankine, 0x1710, 1);
+ so_data (vtxbuf, 0);
+
+ so_ref(vtxbuf, &nv30->state.hw[NV30_STATE_VTXBUF]);
+ nv30->state.dirty |= (1ULL << NV30_STATE_VTXBUF);
+ so_ref(vtxfmt, &nv30->state.hw[NV30_STATE_VTXFMT]);
+ nv30->state.dirty |= (1ULL << NV30_STATE_VTXFMT);
+ so_ref(sattr, &nv30->state.hw[NV30_STATE_VTXATTR]);
+ nv30->state.dirty |= (1ULL << NV30_STATE_VTXATTR);
+ return FALSE;
+}
+
+struct nv30_state_entry nv30_state_vbo = {
+ .validate = nv30_vbo_validate,
+ .dirty = {
+ .pipe = NV30_NEW_ARRAYS,
+ .hw = 0,
+ }
+};
diff --git a/src/gallium/drivers/nv30/nv30_vertprog.c b/src/gallium/drivers/nv30/nv30_vertprog.c
new file mode 100644
index 00000000000..72824559e8b
--- /dev/null
+++ b/src/gallium/drivers/nv30/nv30_vertprog.c
@@ -0,0 +1,838 @@
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+
+#include "pipe/p_shader_tokens.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_dump.h"
+
+#include "nv30_context.h"
+#include "nv30_state.h"
+
+/* TODO (at least...):
+ * 1. Indexed consts + ARL
+ * 2. Arb. swz/negation
+ * 3. NV_vp11, NV_vp2, NV_vp3 features
+ * - extra arith opcodes
+ * - branching
+ * - texture sampling
+ * - indexed attribs
+ * - indexed results
+ * 4. bugs
+ */
+
+#define SWZ_X 0
+#define SWZ_Y 1
+#define SWZ_Z 2
+#define SWZ_W 3
+#define MASK_X 8
+#define MASK_Y 4
+#define MASK_Z 2
+#define MASK_W 1
+#define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W)
+#define DEF_SCALE 0
+#define DEF_CTEST 0
+#include "nv30_shader.h"
+
+#define swz(s,x,y,z,w) nv30_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w)
+#define neg(s) nv30_sr_neg((s))
+#define abs(s) nv30_sr_abs((s))
+
+struct nv30_vpc {
+ struct nv30_vertex_program *vp;
+
+ struct nv30_vertex_program_exec *vpi;
+
+ unsigned output_map[PIPE_MAX_SHADER_OUTPUTS];
+
+ int high_temp;
+ int temp_temp_count;
+
+ struct nv30_sreg *imm;
+ unsigned nr_imm;
+};
+
+static struct nv30_sreg
+temp(struct nv30_vpc *vpc)
+{
+ int idx;
+
+ idx = vpc->temp_temp_count++;
+ idx += vpc->high_temp + 1;
+ return nv30_sr(NV30SR_TEMP, idx);
+}
+
+static struct nv30_sreg
+constant(struct nv30_vpc *vpc, int pipe, float x, float y, float z, float w)
+{
+ struct nv30_vertex_program *vp = vpc->vp;
+ struct nv30_vertex_program_data *vpd;
+ int idx;
+
+ if (pipe >= 0) {
+ for (idx = 0; idx < vp->nr_consts; idx++) {
+ if (vp->consts[idx].index == pipe)
+ return nv30_sr(NV30SR_CONST, idx);
+ }
+ }
+
+ idx = vp->nr_consts++;
+ vp->consts = realloc(vp->consts, sizeof(*vpd) * vp->nr_consts);
+ vpd = &vp->consts[idx];
+
+ vpd->index = pipe;
+ vpd->value[0] = x;
+ vpd->value[1] = y;
+ vpd->value[2] = z;
+ vpd->value[3] = w;
+ return nv30_sr(NV30SR_CONST, idx);
+}
+
+#define arith(cc,s,o,d,m,s0,s1,s2) \
+ nv30_vp_arith((cc), (s), NV30_VP_INST_##o, (d), (m), (s0), (s1), (s2))
+
+static void
+emit_src(struct nv30_vpc *vpc, uint32_t *hw, int pos, struct nv30_sreg src)
+{
+ struct nv30_vertex_program *vp = vpc->vp;
+ uint32_t sr = 0;
+
+ switch (src.type) {
+ case NV30SR_TEMP:
+ sr |= (NV30_VP_SRC_REG_TYPE_TEMP << NV30_VP_SRC_REG_TYPE_SHIFT);
+ sr |= (src.index << NV30_VP_SRC_TEMP_SRC_SHIFT);
+ break;
+ case NV30SR_INPUT:
+ sr |= (NV30_VP_SRC_REG_TYPE_INPUT <<
+ NV30_VP_SRC_REG_TYPE_SHIFT);
+ vp->ir |= (1 << src.index);
+ hw[1] |= (src.index << NV30_VP_INST_INPUT_SRC_SHIFT);
+ break;
+ case NV30SR_CONST:
+ sr |= (NV30_VP_SRC_REG_TYPE_CONST <<
+ NV30_VP_SRC_REG_TYPE_SHIFT);
+ assert(vpc->vpi->const_index == -1 ||
+ vpc->vpi->const_index == src.index);
+ vpc->vpi->const_index = src.index;
+ break;
+ case NV30SR_NONE:
+ sr |= (NV30_VP_SRC_REG_TYPE_INPUT <<
+ NV30_VP_SRC_REG_TYPE_SHIFT);
+ break;
+ default:
+ assert(0);
+ }
+
+ if (src.negate)
+ sr |= NV30_VP_SRC_NEGATE;
+
+ if (src.abs)
+ hw[0] |= (1 << (21 + pos));
+
+ sr |= ((src.swz[0] << NV30_VP_SRC_SWZ_X_SHIFT) |
+ (src.swz[1] << NV30_VP_SRC_SWZ_Y_SHIFT) |
+ (src.swz[2] << NV30_VP_SRC_SWZ_Z_SHIFT) |
+ (src.swz[3] << NV30_VP_SRC_SWZ_W_SHIFT));
+
+/*
+ * |VVV|
+ * d�.�b
+ * \u/
+ *
+ */
+
+ switch (pos) {
+ case 0:
+ hw[1] |= ((sr & NV30_VP_SRC0_HIGH_MASK) >>
+ NV30_VP_SRC0_HIGH_SHIFT) << NV30_VP_INST_SRC0H_SHIFT;
+ hw[2] |= (sr & NV30_VP_SRC0_LOW_MASK) <<
+ NV30_VP_INST_SRC0L_SHIFT;
+ break;
+ case 1:
+ hw[2] |= sr << NV30_VP_INST_SRC1_SHIFT;
+ break;
+ case 2:
+ hw[2] |= ((sr & NV30_VP_SRC2_HIGH_MASK) >>
+ NV30_VP_SRC2_HIGH_SHIFT) << NV30_VP_INST_SRC2H_SHIFT;
+ hw[3] |= (sr & NV30_VP_SRC2_LOW_MASK) <<
+ NV30_VP_INST_SRC2L_SHIFT;
+ break;
+ default:
+ assert(0);
+ }
+}
+
+static void
+emit_dst(struct nv30_vpc *vpc, uint32_t *hw, int slot, struct nv30_sreg dst)
+{
+ struct nv30_vertex_program *vp = vpc->vp;
+
+ switch (dst.type) {
+ case NV30SR_TEMP:
+ hw[0] |= (dst.index << NV30_VP_INST_DEST_TEMP_ID_SHIFT);
+ break;
+ case NV30SR_OUTPUT:
+ switch (dst.index) {
+ case NV30_VP_INST_DEST_COL0 : vp->or |= (1 << 0); break;
+ case NV30_VP_INST_DEST_COL1 : vp->or |= (1 << 1); break;
+ case NV30_VP_INST_DEST_BFC0 : vp->or |= (1 << 2); break;
+ case NV30_VP_INST_DEST_BFC1 : vp->or |= (1 << 3); break;
+ case NV30_VP_INST_DEST_FOGC : vp->or |= (1 << 4); break;
+ case NV30_VP_INST_DEST_PSZ : vp->or |= (1 << 5); break;
+ case NV30_VP_INST_DEST_TC(0): vp->or |= (1 << 14); break;
+ case NV30_VP_INST_DEST_TC(1): vp->or |= (1 << 15); break;
+ case NV30_VP_INST_DEST_TC(2): vp->or |= (1 << 16); break;
+ case NV30_VP_INST_DEST_TC(3): vp->or |= (1 << 17); break;
+ case NV30_VP_INST_DEST_TC(4): vp->or |= (1 << 18); break;
+ case NV30_VP_INST_DEST_TC(5): vp->or |= (1 << 19); break;
+ case NV30_VP_INST_DEST_TC(6): vp->or |= (1 << 20); break;
+ case NV30_VP_INST_DEST_TC(7): vp->or |= (1 << 21); break;
+ default:
+ break;
+ }
+
+ hw[3] |= (dst.index << NV30_VP_INST_DEST_SHIFT);
+ hw[0] |= NV30_VP_INST_VEC_DEST_TEMP_MASK | (1<<20);
+
+ /*XXX: no way this is entirely correct, someone needs to
+ * figure out what exactly it is.
+ */
+ hw[3] |= 0x800;
+ break;
+ default:
+ assert(0);
+ }
+}
+
+static void
+nv30_vp_arith(struct nv30_vpc *vpc, int slot, int op,
+ struct nv30_sreg dst, int mask,
+ struct nv30_sreg s0, struct nv30_sreg s1,
+ struct nv30_sreg s2)
+{
+ struct nv30_vertex_program *vp = vpc->vp;
+ uint32_t *hw;
+
+ vp->insns = realloc(vp->insns, ++vp->nr_insns * sizeof(*vpc->vpi));
+ vpc->vpi = &vp->insns[vp->nr_insns - 1];
+ memset(vpc->vpi, 0, sizeof(*vpc->vpi));
+ vpc->vpi->const_index = -1;
+
+ hw = vpc->vpi->data;
+
+ hw[0] |= (NV30_VP_INST_COND_TR << NV30_VP_INST_COND_SHIFT);
+ hw[0] |= ((0 << NV30_VP_INST_COND_SWZ_X_SHIFT) |
+ (1 << NV30_VP_INST_COND_SWZ_Y_SHIFT) |
+ (2 << NV30_VP_INST_COND_SWZ_Z_SHIFT) |
+ (3 << NV30_VP_INST_COND_SWZ_W_SHIFT));
+
+ hw[1] |= (op << NV30_VP_INST_VEC_OPCODE_SHIFT);
+// hw[3] |= NV30_VP_INST_SCA_DEST_TEMP_MASK;
+// hw[3] |= (mask << NV30_VP_INST_VEC_WRITEMASK_SHIFT);
+
+ if (dst.type == NV30SR_OUTPUT) {
+ if (slot)
+ hw[3] |= (mask << NV30_VP_INST_SDEST_WRITEMASK_SHIFT);
+ else
+ hw[3] |= (mask << NV30_VP_INST_VDEST_WRITEMASK_SHIFT);
+ } else {
+ if (slot)
+ hw[3] |= (mask << NV30_VP_INST_STEMP_WRITEMASK_SHIFT);
+ else
+ hw[3] |= (mask << NV30_VP_INST_VTEMP_WRITEMASK_SHIFT);
+ }
+
+ emit_dst(vpc, hw, slot, dst);
+ emit_src(vpc, hw, 0, s0);
+ emit_src(vpc, hw, 1, s1);
+ emit_src(vpc, hw, 2, s2);
+}
+
+static INLINE struct nv30_sreg
+tgsi_src(struct nv30_vpc *vpc, const struct tgsi_full_src_register *fsrc) {
+ struct nv30_sreg src;
+
+ switch (fsrc->SrcRegister.File) {
+ case TGSI_FILE_INPUT:
+ src = nv30_sr(NV30SR_INPUT, fsrc->SrcRegister.Index);
+ break;
+ case TGSI_FILE_CONSTANT:
+ src = constant(vpc, fsrc->SrcRegister.Index, 0, 0, 0, 0);
+ break;
+ case TGSI_FILE_IMMEDIATE:
+ src = vpc->imm[fsrc->SrcRegister.Index];
+ break;
+ case TGSI_FILE_TEMPORARY:
+ if (vpc->high_temp < fsrc->SrcRegister.Index)
+ vpc->high_temp = fsrc->SrcRegister.Index;
+ src = nv30_sr(NV30SR_TEMP, fsrc->SrcRegister.Index);
+ break;
+ default:
+ NOUVEAU_ERR("bad src file\n");
+ break;
+ }
+
+ src.abs = fsrc->SrcRegisterExtMod.Absolute;
+ src.negate = fsrc->SrcRegister.Negate;
+ src.swz[0] = fsrc->SrcRegister.SwizzleX;
+ src.swz[1] = fsrc->SrcRegister.SwizzleY;
+ src.swz[2] = fsrc->SrcRegister.SwizzleZ;
+ src.swz[3] = fsrc->SrcRegister.SwizzleW;
+ return src;
+}
+
+static INLINE struct nv30_sreg
+tgsi_dst(struct nv30_vpc *vpc, const struct tgsi_full_dst_register *fdst) {
+ struct nv30_sreg dst;
+
+ switch (fdst->DstRegister.File) {
+ case TGSI_FILE_OUTPUT:
+ dst = nv30_sr(NV30SR_OUTPUT,
+ vpc->output_map[fdst->DstRegister.Index]);
+
+ break;
+ case TGSI_FILE_TEMPORARY:
+ dst = nv30_sr(NV30SR_TEMP, fdst->DstRegister.Index);
+ if (vpc->high_temp < dst.index)
+ vpc->high_temp = dst.index;
+ break;
+ default:
+ NOUVEAU_ERR("bad dst file\n");
+ break;
+ }
+
+ return dst;
+}
+
+static INLINE int
+tgsi_mask(uint tgsi)
+{
+ int mask = 0;
+
+ if (tgsi & TGSI_WRITEMASK_X) mask |= MASK_X;
+ if (tgsi & TGSI_WRITEMASK_Y) mask |= MASK_Y;
+ if (tgsi & TGSI_WRITEMASK_Z) mask |= MASK_Z;
+ if (tgsi & TGSI_WRITEMASK_W) mask |= MASK_W;
+ return mask;
+}
+
+static boolean
+nv30_vertprog_parse_instruction(struct nv30_vpc *vpc,
+ const struct tgsi_full_instruction *finst)
+{
+ struct nv30_sreg src[3], dst, tmp;
+ struct nv30_sreg none = nv30_sr(NV30SR_NONE, 0);
+ int mask;
+ int ai = -1, ci = -1;
+ int i;
+
+ if (finst->Instruction.Opcode == TGSI_OPCODE_END)
+ return TRUE;
+
+ vpc->temp_temp_count = 0;
+ for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
+ const struct tgsi_full_src_register *fsrc;
+
+ fsrc = &finst->FullSrcRegisters[i];
+ if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) {
+ src[i] = tgsi_src(vpc, fsrc);
+ }
+ }
+
+ for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
+ const struct tgsi_full_src_register *fsrc;
+
+ fsrc = &finst->FullSrcRegisters[i];
+ switch (fsrc->SrcRegister.File) {
+ case TGSI_FILE_INPUT:
+ if (ai == -1 || ai == fsrc->SrcRegister.Index) {
+ ai = fsrc->SrcRegister.Index;
+ src[i] = tgsi_src(vpc, fsrc);
+ } else {
+ src[i] = temp(vpc);
+ arith(vpc, 0, OP_MOV, src[i], MASK_ALL,
+ tgsi_src(vpc, fsrc), none, none);
+ }
+ break;
+ /*XXX: index comparison is broken now that consts come from
+ * two different register files.
+ */
+ case TGSI_FILE_CONSTANT:
+ case TGSI_FILE_IMMEDIATE:
+ if (ci == -1 || ci == fsrc->SrcRegister.Index) {
+ ci = fsrc->SrcRegister.Index;
+ src[i] = tgsi_src(vpc, fsrc);
+ } else {
+ src[i] = temp(vpc);
+ arith(vpc, 0, OP_MOV, src[i], MASK_ALL,
+ tgsi_src(vpc, fsrc), none, none);
+ }
+ break;
+ case TGSI_FILE_TEMPORARY:
+ /* handled above */
+ break;
+ default:
+ NOUVEAU_ERR("bad src file\n");
+ return FALSE;
+ }
+ }
+
+ dst = tgsi_dst(vpc, &finst->FullDstRegisters[0]);
+ mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask);
+
+ switch (finst->Instruction.Opcode) {
+ case TGSI_OPCODE_ABS:
+ arith(vpc, 0, OP_MOV, dst, mask, abs(src[0]), none, none);
+ break;
+ case TGSI_OPCODE_ADD:
+ arith(vpc, 0, OP_ADD, dst, mask, src[0], none, src[1]);
+ break;
+ case TGSI_OPCODE_ARL:
+ arith(vpc, 0, OP_ARL, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_DP3:
+ arith(vpc, 0, OP_DP3, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_DP4:
+ arith(vpc, 0, OP_DP4, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_DPH:
+ arith(vpc, 0, OP_DPH, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_DST:
+ arith(vpc, 0, OP_DST, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_EX2:
+ arith(vpc, 1, OP_EX2, dst, mask, none, none, src[0]);
+ break;
+ case TGSI_OPCODE_EXP:
+ arith(vpc, 1, OP_EXP, dst, mask, none, none, src[0]);
+ break;
+ case TGSI_OPCODE_FLR:
+ arith(vpc, 0, OP_FLR, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_FRC:
+ arith(vpc, 0, OP_FRC, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_LG2:
+ arith(vpc, 1, OP_LG2, dst, mask, none, none, src[0]);
+ break;
+ case TGSI_OPCODE_LIT:
+ arith(vpc, 1, OP_LIT, dst, mask, none, none, src[0]);
+ break;
+ case TGSI_OPCODE_LOG:
+ arith(vpc, 1, OP_LOG, dst, mask, none, none, src[0]);
+ break;
+ case TGSI_OPCODE_MAD:
+ arith(vpc, 0, OP_MAD, dst, mask, src[0], src[1], src[2]);
+ break;
+ case TGSI_OPCODE_MAX:
+ arith(vpc, 0, OP_MAX, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_MIN:
+ arith(vpc, 0, OP_MIN, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_MOV:
+ arith(vpc, 0, OP_MOV, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_MUL:
+ arith(vpc, 0, OP_MUL, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_POW:
+ tmp = temp(vpc);
+ arith(vpc, 1, OP_LG2, tmp, MASK_X, none, none,
+ swz(src[0], X, X, X, X));
+ arith(vpc, 0, OP_MUL, tmp, MASK_X, swz(tmp, X, X, X, X),
+ swz(src[1], X, X, X, X), none);
+ arith(vpc, 1, OP_EX2, dst, mask, none, none,
+ swz(tmp, X, X, X, X));
+ break;
+ case TGSI_OPCODE_RCP:
+ arith(vpc, 1, OP_RCP, dst, mask, none, none, src[0]);
+ break;
+ case TGSI_OPCODE_RET:
+ break;
+ case TGSI_OPCODE_RSQ:
+ arith(vpc, 1, OP_RSQ, dst, mask, none, none, src[0]);
+ break;
+ case TGSI_OPCODE_SGE:
+ arith(vpc, 0, OP_SGE, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_SGT:
+ arith(vpc, 0, OP_SGT, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_SLT:
+ arith(vpc, 0, OP_SLT, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_SUB:
+ arith(vpc, 0, OP_ADD, dst, mask, src[0], none, neg(src[1]));
+ break;
+ case TGSI_OPCODE_XPD:
+ tmp = temp(vpc);
+ arith(vpc, 0, OP_MUL, tmp, mask,
+ swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none);
+ arith(vpc, 0, OP_MAD, dst, (mask & ~MASK_W),
+ swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y),
+ neg(tmp));
+ break;
+ default:
+ NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode);
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+static boolean
+nv30_vertprog_parse_decl_output(struct nv30_vpc *vpc,
+ const struct tgsi_full_declaration *fdec)
+{
+ int hw;
+
+ switch (fdec->Semantic.SemanticName) {
+ case TGSI_SEMANTIC_POSITION:
+ hw = NV30_VP_INST_DEST_POS;
+ break;
+ case TGSI_SEMANTIC_COLOR:
+ if (fdec->Semantic.SemanticIndex == 0) {
+ hw = NV30_VP_INST_DEST_COL0;
+ } else
+ if (fdec->Semantic.SemanticIndex == 1) {
+ hw = NV30_VP_INST_DEST_COL1;
+ } else {
+ NOUVEAU_ERR("bad colour semantic index\n");
+ return FALSE;
+ }
+ break;
+ case TGSI_SEMANTIC_BCOLOR:
+ if (fdec->Semantic.SemanticIndex == 0) {
+ hw = NV30_VP_INST_DEST_BFC0;
+ } else
+ if (fdec->Semantic.SemanticIndex == 1) {
+ hw = NV30_VP_INST_DEST_BFC1;
+ } else {
+ NOUVEAU_ERR("bad bcolour semantic index\n");
+ return FALSE;
+ }
+ break;
+ case TGSI_SEMANTIC_FOG:
+ hw = NV30_VP_INST_DEST_FOGC;
+ break;
+ case TGSI_SEMANTIC_PSIZE:
+ hw = NV30_VP_INST_DEST_PSZ;
+ break;
+ case TGSI_SEMANTIC_GENERIC:
+ if (fdec->Semantic.SemanticIndex <= 7) {
+ hw = NV30_VP_INST_DEST_TC(fdec->Semantic.SemanticIndex);
+ } else {
+ NOUVEAU_ERR("bad generic semantic index\n");
+ return FALSE;
+ }
+ break;
+ default:
+ NOUVEAU_ERR("bad output semantic\n");
+ return FALSE;
+ }
+
+ vpc->output_map[fdec->DeclarationRange.First] = hw;
+ return TRUE;
+}
+
+static boolean
+nv30_vertprog_prepare(struct nv30_vpc *vpc)
+{
+ struct tgsi_parse_context p;
+ int nr_imm = 0;
+
+ tgsi_parse_init(&p, vpc->vp->pipe.tokens);
+ while (!tgsi_parse_end_of_tokens(&p)) {
+ const union tgsi_full_token *tok = &p.FullToken;
+
+ tgsi_parse_token(&p);
+ switch(tok->Token.Type) {
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ nr_imm++;
+ break;
+ default:
+ break;
+ }
+ }
+ tgsi_parse_free(&p);
+
+ if (nr_imm) {
+ vpc->imm = CALLOC(nr_imm, sizeof(struct nv30_sreg));
+ assert(vpc->imm);
+ }
+
+ return TRUE;
+}
+
+static void
+nv30_vertprog_translate(struct nv30_context *nv30,
+ struct nv30_vertex_program *vp)
+{
+ struct tgsi_parse_context parse;
+ struct nv30_vpc *vpc = NULL;
+
+ tgsi_dump(vp->pipe.tokens,0);
+
+ vpc = CALLOC(1, sizeof(struct nv30_vpc));
+ if (!vpc)
+ return;
+ vpc->vp = vp;
+ vpc->high_temp = -1;
+
+ if (!nv30_vertprog_prepare(vpc)) {
+ FREE(vpc);
+ return;
+ }
+
+ tgsi_parse_init(&parse, vp->pipe.tokens);
+
+ while (!tgsi_parse_end_of_tokens(&parse)) {
+ tgsi_parse_token(&parse);
+
+ switch (parse.FullToken.Token.Type) {
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ {
+ const struct tgsi_full_declaration *fdec;
+ fdec = &parse.FullToken.FullDeclaration;
+ switch (fdec->Declaration.File) {
+ case TGSI_FILE_OUTPUT:
+ if (!nv30_vertprog_parse_decl_output(vpc, fdec))
+ goto out_err;
+ break;
+ default:
+ break;
+ }
+ }
+ break;
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ {
+ const struct tgsi_full_immediate *imm;
+
+ imm = &parse.FullToken.FullImmediate;
+ assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32);
+// assert(imm->Immediate.Size == 4);
+ vpc->imm[vpc->nr_imm++] =
+ constant(vpc, -1,
+ imm->u.ImmediateFloat32[0].Float,
+ imm->u.ImmediateFloat32[1].Float,
+ imm->u.ImmediateFloat32[2].Float,
+ imm->u.ImmediateFloat32[3].Float);
+ }
+ break;
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ {
+ const struct tgsi_full_instruction *finst;
+ finst = &parse.FullToken.FullInstruction;
+ if (!nv30_vertprog_parse_instruction(vpc, finst))
+ goto out_err;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+
+ vp->insns[vp->nr_insns - 1].data[3] |= NV30_VP_INST_LAST;
+ vp->translated = TRUE;
+out_err:
+ tgsi_parse_free(&parse);
+ FREE(vpc);
+}
+
+static boolean
+nv30_vertprog_validate(struct nv30_context *nv30)
+{
+ struct nouveau_winsys *nvws = nv30->nvws;
+ struct pipe_winsys *ws = nv30->pipe.winsys;
+ struct nouveau_grobj *rankine = nv30->screen->rankine;
+ struct nv30_vertex_program *vp;
+ struct pipe_buffer *constbuf;
+ boolean upload_code = FALSE, upload_data = FALSE;
+ int i;
+
+ vp = nv30->vertprog;
+ constbuf = nv30->constbuf[PIPE_SHADER_VERTEX];
+
+ /* Translate TGSI shader into hw bytecode */
+ if (!vp->translated) {
+ nv30_vertprog_translate(nv30, vp);
+ if (!vp->translated)
+ return FALSE;
+ }
+
+ /* Allocate hw vtxprog exec slots */
+ if (!vp->exec) {
+ struct nouveau_resource *heap = nv30->screen->vp_exec_heap;
+ struct nouveau_stateobj *so;
+ uint vplen = vp->nr_insns;
+
+ if (nvws->res_alloc(heap, vplen, vp, &vp->exec)) {
+ while (heap->next && heap->size < vplen) {
+ struct nv30_vertex_program *evict;
+
+ evict = heap->next->priv;
+ nvws->res_free(&evict->exec);
+ }
+
+ if (nvws->res_alloc(heap, vplen, vp, &vp->exec))
+ assert(0);
+ }
+
+ so = so_new(2, 0);
+ so_method(so, rankine, NV34TCL_VP_START_FROM_ID, 1);
+ so_data (so, vp->exec->start);
+ so_ref(so, &vp->so);
+
+ upload_code = TRUE;
+ }
+
+ /* Allocate hw vtxprog const slots */
+ if (vp->nr_consts && !vp->data) {
+ struct nouveau_resource *heap = nv30->screen->vp_data_heap;
+
+ if (nvws->res_alloc(heap, vp->nr_consts, vp, &vp->data)) {
+ while (heap->next && heap->size < vp->nr_consts) {
+ struct nv30_vertex_program *evict;
+
+ evict = heap->next->priv;
+ nvws->res_free(&evict->data);
+ }
+
+ if (nvws->res_alloc(heap, vp->nr_consts, vp, &vp->data))
+ assert(0);
+ }
+
+ /*XXX: handle this some day */
+ assert(vp->data->start >= vp->data_start_min);
+
+ upload_data = TRUE;
+ if (vp->data_start != vp->data->start)
+ upload_code = TRUE;
+ }
+
+ /* If exec or data segments moved we need to patch the program to
+ * fixup offsets and register IDs.
+ */
+ if (vp->exec_start != vp->exec->start) {
+ for (i = 0; i < vp->nr_insns; i++) {
+ struct nv30_vertex_program_exec *vpi = &vp->insns[i];
+
+ if (vpi->has_branch_offset) {
+ assert(0);
+ }
+ }
+
+ vp->exec_start = vp->exec->start;
+ }
+
+ if (vp->nr_consts && vp->data_start != vp->data->start) {
+ for (i = 0; i < vp->nr_insns; i++) {
+ struct nv30_vertex_program_exec *vpi = &vp->insns[i];
+
+ if (vpi->const_index >= 0) {
+ vpi->data[1] &= ~NV30_VP_INST_CONST_SRC_MASK;
+ vpi->data[1] |=
+ (vpi->const_index + vp->data->start) <<
+ NV30_VP_INST_CONST_SRC_SHIFT;
+
+ }
+ }
+
+ vp->data_start = vp->data->start;
+ }
+
+ /* Update + Upload constant values */
+ if (vp->nr_consts) {
+ float *map = NULL;
+
+ if (constbuf) {
+ map = ws->buffer_map(ws, constbuf,
+ PIPE_BUFFER_USAGE_CPU_READ);
+ }
+
+ for (i = 0; i < vp->nr_consts; i++) {
+ struct nv30_vertex_program_data *vpd = &vp->consts[i];
+
+ if (vpd->index >= 0) {
+ if (!upload_data &&
+ !memcmp(vpd->value, &map[vpd->index * 4],
+ 4 * sizeof(float)))
+ continue;
+ memcpy(vpd->value, &map[vpd->index * 4],
+ 4 * sizeof(float));
+ }
+
+ BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_CONST_ID, 5);
+ OUT_RING (i + vp->data->start);
+ OUT_RINGp ((uint32_t *)vpd->value, 4);
+ }
+
+ if (constbuf) {
+ ws->buffer_unmap(ws, constbuf);
+ }
+ }
+
+ /* Upload vtxprog */
+ if (upload_code) {
+#if 0
+ for (i = 0; i < vp->nr_insns; i++) {
+ NOUVEAU_MSG("VP inst %d: 0x%08x 0x%08x 0x%08x 0x%08x\n",
+ i, vp->insns[i].data[0], vp->insns[i].data[1],
+ vp->insns[i].data[2], vp->insns[i].data[3]);
+ }
+#endif
+ BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_FROM_ID, 1);
+ OUT_RING (vp->exec->start);
+ for (i = 0; i < vp->nr_insns; i++) {
+ BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_INST(0), 4);
+ OUT_RINGp (vp->insns[i].data, 4);
+ }
+ }
+
+ if (vp->so != nv30->state.hw[NV30_STATE_VERTPROG]) {
+ so_ref(vp->so, &nv30->state.hw[NV30_STATE_VERTPROG]);
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+void
+nv30_vertprog_destroy(struct nv30_context *nv30, struct nv30_vertex_program *vp)
+{
+ struct nouveau_winsys *nvws = nv30->screen->nvws;
+
+ vp->translated = FALSE;
+
+ if (vp->nr_insns) {
+ FREE(vp->insns);
+ vp->insns = NULL;
+ vp->nr_insns = 0;
+ }
+
+ if (vp->nr_consts) {
+ FREE(vp->consts);
+ vp->consts = NULL;
+ vp->nr_consts = 0;
+ }
+
+ nvws->res_free(&vp->exec);
+ vp->exec_start = 0;
+ nvws->res_free(&vp->data);
+ vp->data_start = 0;
+ vp->data_start_min = 0;
+
+ vp->ir = vp->or = 0;
+ so_ref(NULL, &vp->so);
+}
+
+struct nv30_state_entry nv30_state_vertprog = {
+ .validate = nv30_vertprog_validate,
+ .dirty = {
+ .pipe = NV30_NEW_VERTPROG /*| NV30_NEW_UCP*/,
+ .hw = NV30_STATE_VERTPROG,
+ }
+};
diff --git a/src/gallium/drivers/nv40/Makefile b/src/gallium/drivers/nv40/Makefile
new file mode 100644
index 00000000000..9c8eadf7e44
--- /dev/null
+++ b/src/gallium/drivers/nv40/Makefile
@@ -0,0 +1,37 @@
+TOP = ../../../..
+include $(TOP)/configs/current
+
+LIBNAME = nv40
+
+DRIVER_SOURCES = \
+ nv40_clear.c \
+ nv40_context.c \
+ nv40_draw.c \
+ nv40_fragprog.c \
+ nv40_fragtex.c \
+ nv40_miptree.c \
+ nv40_query.c \
+ nv40_screen.c \
+ nv40_state.c \
+ nv40_state_blend.c \
+ nv40_state_emit.c \
+ nv40_state_fb.c \
+ nv40_state_rasterizer.c \
+ nv40_state_scissor.c \
+ nv40_state_stipple.c \
+ nv40_state_viewport.c \
+ nv40_state_zsa.c \
+ nv40_surface.c \
+ nv40_vbo.c \
+ nv40_vertprog.c
+
+C_SOURCES = \
+ $(COMMON_SOURCES) \
+ $(DRIVER_SOURCES)
+
+ASM_SOURCES =
+
+include ../../Makefile.template
+
+symlinks:
+
diff --git a/src/gallium/drivers/nv40/nv40_clear.c b/src/gallium/drivers/nv40/nv40_clear.c
new file mode 100644
index 00000000000..59efd620e32
--- /dev/null
+++ b/src/gallium/drivers/nv40/nv40_clear.c
@@ -0,0 +1,13 @@
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+
+#include "nv40_context.h"
+
+void
+nv40_clear(struct pipe_context *pipe, struct pipe_surface *ps,
+ unsigned clearValue)
+{
+ pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, clearValue);
+ ps->status = PIPE_SURFACE_STATUS_CLEAR;
+}
diff --git a/src/gallium/drivers/nv40/nv40_context.c b/src/gallium/drivers/nv40/nv40_context.c
new file mode 100644
index 00000000000..cc63dd734bc
--- /dev/null
+++ b/src/gallium/drivers/nv40/nv40_context.c
@@ -0,0 +1,72 @@
+#include "draw/draw_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_winsys.h"
+
+#include "nv40_context.h"
+#include "nv40_screen.h"
+
+static void
+nv40_flush(struct pipe_context *pipe, unsigned flags,
+ struct pipe_fence_handle **fence)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+
+ if (flags & PIPE_FLUSH_TEXTURE_CACHE) {
+ BEGIN_RING(curie, 0x1fd8, 1);
+ OUT_RING (2);
+ BEGIN_RING(curie, 0x1fd8, 1);
+ OUT_RING (1);
+ }
+
+ FIRE_RING(fence);
+}
+
+static void
+nv40_destroy(struct pipe_context *pipe)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+
+ if (nv40->draw)
+ draw_destroy(nv40->draw);
+ FREE(nv40);
+}
+
+struct pipe_context *
+nv40_create(struct pipe_screen *pscreen, unsigned pctx_id)
+{
+ struct nv40_screen *screen = nv40_screen(pscreen);
+ struct pipe_winsys *ws = pscreen->winsys;
+ struct nv40_context *nv40;
+ struct nouveau_winsys *nvws = screen->nvws;
+
+ nv40 = CALLOC(1, sizeof(struct nv40_context));
+ if (!nv40)
+ return NULL;
+ nv40->screen = screen;
+ nv40->pctx_id = pctx_id;
+
+ nv40->nvws = nvws;
+
+ nv40->pipe.winsys = ws;
+ nv40->pipe.screen = pscreen;
+ nv40->pipe.destroy = nv40_destroy;
+ nv40->pipe.draw_arrays = nv40_draw_arrays;
+ nv40->pipe.draw_elements = nv40_draw_elements;
+ nv40->pipe.clear = nv40_clear;
+ nv40->pipe.flush = nv40_flush;
+
+ nv40_init_query_functions(nv40);
+ nv40_init_surface_functions(nv40);
+ nv40_init_state_functions(nv40);
+
+ /* Create, configure, and install fallback swtnl path */
+ nv40->draw = draw_create();
+ draw_wide_point_threshold(nv40->draw, 9999999.0);
+ draw_wide_line_threshold(nv40->draw, 9999999.0);
+ draw_enable_line_stipple(nv40->draw, FALSE);
+ draw_enable_point_sprites(nv40->draw, FALSE);
+ draw_set_rasterize_stage(nv40->draw, nv40_draw_render_stage(nv40));
+
+ return &nv40->pipe;
+}
+
diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h
new file mode 100644
index 00000000000..adcfbdd85a8
--- /dev/null
+++ b/src/gallium/drivers/nv40/nv40_context.h
@@ -0,0 +1,233 @@
+#ifndef __NV40_CONTEXT_H__
+#define __NV40_CONTEXT_H__
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+#include "pipe/p_compiler.h"
+
+#include "util/u_memory.h"
+#include "util/u_math.h"
+
+#include "draw/draw_vertex.h"
+
+#include "nouveau/nouveau_winsys.h"
+#include "nouveau/nouveau_gldefs.h"
+
+#define NOUVEAU_PUSH_CONTEXT(ctx) \
+ struct nv40_screen *ctx = nv40->screen
+#include "nouveau/nouveau_push.h"
+#include "nouveau/nouveau_stateobj.h"
+
+#include "nv40_state.h"
+
+#define NOUVEAU_ERR(fmt, args...) \
+ fprintf(stderr, "%s:%d - "fmt, __func__, __LINE__, ##args);
+#define NOUVEAU_MSG(fmt, args...) \
+ fprintf(stderr, "nouveau: "fmt, ##args);
+
+enum nv40_state_index {
+ NV40_STATE_FB = 0,
+ NV40_STATE_VIEWPORT = 1,
+ NV40_STATE_BLEND = 2,
+ NV40_STATE_RAST = 3,
+ NV40_STATE_ZSA = 4,
+ NV40_STATE_BCOL = 5,
+ NV40_STATE_CLIP = 6,
+ NV40_STATE_SCISSOR = 7,
+ NV40_STATE_STIPPLE = 8,
+ NV40_STATE_FRAGPROG = 9,
+ NV40_STATE_VERTPROG = 10,
+ NV40_STATE_FRAGTEX0 = 11,
+ NV40_STATE_FRAGTEX1 = 12,
+ NV40_STATE_FRAGTEX2 = 13,
+ NV40_STATE_FRAGTEX3 = 14,
+ NV40_STATE_FRAGTEX4 = 15,
+ NV40_STATE_FRAGTEX5 = 16,
+ NV40_STATE_FRAGTEX6 = 17,
+ NV40_STATE_FRAGTEX7 = 18,
+ NV40_STATE_FRAGTEX8 = 19,
+ NV40_STATE_FRAGTEX9 = 20,
+ NV40_STATE_FRAGTEX10 = 21,
+ NV40_STATE_FRAGTEX11 = 22,
+ NV40_STATE_FRAGTEX12 = 23,
+ NV40_STATE_FRAGTEX13 = 24,
+ NV40_STATE_FRAGTEX14 = 25,
+ NV40_STATE_FRAGTEX15 = 26,
+ NV40_STATE_VERTTEX0 = 27,
+ NV40_STATE_VERTTEX1 = 28,
+ NV40_STATE_VERTTEX2 = 29,
+ NV40_STATE_VERTTEX3 = 30,
+ NV40_STATE_VTXBUF = 31,
+ NV40_STATE_VTXFMT = 32,
+ NV40_STATE_VTXATTR = 33,
+ NV40_STATE_MAX = 34
+};
+
+#include "nv40_screen.h"
+
+#define NV40_NEW_BLEND (1 << 0)
+#define NV40_NEW_RAST (1 << 1)
+#define NV40_NEW_ZSA (1 << 2)
+#define NV40_NEW_SAMPLER (1 << 3)
+#define NV40_NEW_FB (1 << 4)
+#define NV40_NEW_STIPPLE (1 << 5)
+#define NV40_NEW_SCISSOR (1 << 6)
+#define NV40_NEW_VIEWPORT (1 << 7)
+#define NV40_NEW_BCOL (1 << 8)
+#define NV40_NEW_VERTPROG (1 << 9)
+#define NV40_NEW_FRAGPROG (1 << 10)
+#define NV40_NEW_ARRAYS (1 << 11)
+#define NV40_NEW_UCP (1 << 12)
+
+struct nv40_rasterizer_state {
+ struct pipe_rasterizer_state pipe;
+ struct nouveau_stateobj *so;
+};
+
+struct nv40_zsa_state {
+ struct pipe_depth_stencil_alpha_state pipe;
+ struct nouveau_stateobj *so;
+};
+
+struct nv40_blend_state {
+ struct pipe_blend_state pipe;
+ struct nouveau_stateobj *so;
+};
+
+
+struct nv40_state {
+ unsigned scissor_enabled;
+ unsigned stipple_enabled;
+ unsigned viewport_bypass;
+ unsigned fp_samplers;
+
+ uint64_t dirty;
+ struct nouveau_stateobj *hw[NV40_STATE_MAX];
+};
+
+struct nv40_context {
+ struct pipe_context pipe;
+
+ struct nouveau_winsys *nvws;
+ struct nv40_screen *screen;
+ unsigned pctx_id;
+
+ struct draw_context *draw;
+
+ /* HW state derived from pipe states */
+ struct nv40_state state;
+ struct {
+ struct nv40_vertex_program *vertprog;
+
+ unsigned nr_attribs;
+ unsigned hw[PIPE_MAX_SHADER_INPUTS];
+ unsigned draw[PIPE_MAX_SHADER_INPUTS];
+ unsigned emit[PIPE_MAX_SHADER_INPUTS];
+ } swtnl;
+
+ enum {
+ HW, SWTNL, SWRAST
+ } render_mode;
+ unsigned fallback_swtnl;
+ unsigned fallback_swrast;
+
+ /* Context state */
+ unsigned dirty, draw_dirty;
+ struct pipe_scissor_state scissor;
+ unsigned stipple[32];
+ struct pipe_clip_state clip;
+ struct nv40_vertex_program *vertprog;
+ struct nv40_fragment_program *fragprog;
+ struct pipe_buffer *constbuf[PIPE_SHADER_TYPES];
+ unsigned constbuf_nr[PIPE_SHADER_TYPES];
+ struct nv40_rasterizer_state *rasterizer;
+ struct nv40_zsa_state *zsa;
+ struct nv40_blend_state *blend;
+ struct pipe_blend_color blend_colour;
+ struct pipe_viewport_state viewport;
+ struct pipe_framebuffer_state framebuffer;
+ struct pipe_buffer *idxbuf;
+ unsigned idxbuf_format;
+ struct nv40_sampler_state *tex_sampler[PIPE_MAX_SAMPLERS];
+ struct nv40_miptree *tex_miptree[PIPE_MAX_SAMPLERS];
+ unsigned nr_samplers;
+ unsigned nr_textures;
+ unsigned dirty_samplers;
+ struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS];
+ unsigned vtxbuf_nr;
+ struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS];
+ unsigned vtxelt_nr;
+ const unsigned *edgeflags;
+};
+
+static INLINE struct nv40_context *
+nv40_context(struct pipe_context *pipe)
+{
+ return (struct nv40_context *)pipe;
+}
+
+struct nv40_state_entry {
+ boolean (*validate)(struct nv40_context *nv40);
+ struct {
+ unsigned pipe;
+ unsigned hw;
+ } dirty;
+};
+
+extern void nv40_init_state_functions(struct nv40_context *nv40);
+extern void nv40_init_surface_functions(struct nv40_context *nv40);
+extern void nv40_init_query_functions(struct nv40_context *nv40);
+
+extern void nv40_screen_init_miptree_functions(struct pipe_screen *pscreen);
+
+/* nv40_draw.c */
+extern struct draw_stage *nv40_draw_render_stage(struct nv40_context *nv40);
+extern boolean nv40_draw_elements_swtnl(struct pipe_context *pipe,
+ struct pipe_buffer *idxbuf,
+ unsigned ib_size, unsigned mode,
+ unsigned start, unsigned count);
+
+/* nv40_vertprog.c */
+extern void nv40_vertprog_destroy(struct nv40_context *,
+ struct nv40_vertex_program *);
+
+/* nv40_fragprog.c */
+extern void nv40_fragprog_destroy(struct nv40_context *,
+ struct nv40_fragment_program *);
+
+/* nv40_fragtex.c */
+extern void nv40_fragtex_bind(struct nv40_context *);
+
+/* nv40_state.c and friends */
+extern boolean nv40_state_validate(struct nv40_context *nv40);
+extern boolean nv40_state_validate_swtnl(struct nv40_context *nv40);
+extern void nv40_state_emit(struct nv40_context *nv40);
+extern struct nv40_state_entry nv40_state_rasterizer;
+extern struct nv40_state_entry nv40_state_scissor;
+extern struct nv40_state_entry nv40_state_stipple;
+extern struct nv40_state_entry nv40_state_fragprog;
+extern struct nv40_state_entry nv40_state_vertprog;
+extern struct nv40_state_entry nv40_state_blend;
+extern struct nv40_state_entry nv40_state_blend_colour;
+extern struct nv40_state_entry nv40_state_zsa;
+extern struct nv40_state_entry nv40_state_viewport;
+extern struct nv40_state_entry nv40_state_framebuffer;
+extern struct nv40_state_entry nv40_state_fragtex;
+extern struct nv40_state_entry nv40_state_vbo;
+extern struct nv40_state_entry nv40_state_vtxfmt;
+
+/* nv40_vbo.c */
+extern boolean nv40_draw_arrays(struct pipe_context *, unsigned mode,
+ unsigned start, unsigned count);
+extern boolean nv40_draw_elements(struct pipe_context *pipe,
+ struct pipe_buffer *indexBuffer,
+ unsigned indexSize,
+ unsigned mode, unsigned start,
+ unsigned count);
+
+/* nv40_clear.c */
+extern void nv40_clear(struct pipe_context *pipe, struct pipe_surface *ps,
+ unsigned clearValue);
+
+#endif
diff --git a/src/gallium/drivers/nv40/nv40_draw.c b/src/gallium/drivers/nv40/nv40_draw.c
new file mode 100644
index 00000000000..8e56cdc2fe0
--- /dev/null
+++ b/src/gallium/drivers/nv40/nv40_draw.c
@@ -0,0 +1,349 @@
+#include "pipe/p_shader_tokens.h"
+
+#include "util/u_pack_color.h"
+
+#include "draw/draw_context.h"
+#include "draw/draw_vertex.h"
+#include "draw/draw_pipe.h"
+
+#include "nv40_context.h"
+#define NV40_SHADER_NO_FUCKEDNESS
+#include "nv40_shader.h"
+
+/* Simple, but crappy, swtnl path, hopefully we wont need to hit this very
+ * often at all. Uses "quadro style" vertex submission + a fixed vertex
+ * layout to avoid the need to generate a vertex program or vtxfmt.
+ */
+
+struct nv40_render_stage {
+ struct draw_stage stage;
+ struct nv40_context *nv40;
+ unsigned prim;
+};
+
+static INLINE struct nv40_render_stage *
+nv40_render_stage(struct draw_stage *stage)
+{
+ return (struct nv40_render_stage *)stage;
+}
+
+static INLINE void
+nv40_render_vertex(struct nv40_context *nv40, const struct vertex_header *v)
+{
+ unsigned i;
+
+ for (i = 0; i < nv40->swtnl.nr_attribs; i++) {
+ unsigned idx = nv40->swtnl.draw[i];
+ unsigned hw = nv40->swtnl.hw[i];
+
+ switch (nv40->swtnl.emit[i]) {
+ case EMIT_OMIT:
+ break;
+ case EMIT_1F:
+ BEGIN_RING(curie, NV40TCL_VTX_ATTR_1F(hw), 1);
+ OUT_RING (fui(v->data[idx][0]));
+ break;
+ case EMIT_2F:
+ BEGIN_RING(curie, NV40TCL_VTX_ATTR_2F_X(hw), 2);
+ OUT_RING (fui(v->data[idx][0]));
+ OUT_RING (fui(v->data[idx][1]));
+ break;
+ case EMIT_3F:
+ BEGIN_RING(curie, NV40TCL_VTX_ATTR_3F_X(hw), 3);
+ OUT_RING (fui(v->data[idx][0]));
+ OUT_RING (fui(v->data[idx][1]));
+ OUT_RING (fui(v->data[idx][2]));
+ break;
+ case EMIT_4F:
+ BEGIN_RING(curie, NV40TCL_VTX_ATTR_4F_X(hw), 4);
+ OUT_RING (fui(v->data[idx][0]));
+ OUT_RING (fui(v->data[idx][1]));
+ OUT_RING (fui(v->data[idx][2]));
+ OUT_RING (fui(v->data[idx][3]));
+ break;
+ case EMIT_4UB:
+ BEGIN_RING(curie, NV40TCL_VTX_ATTR_4UB(hw), 1);
+ OUT_RING (pack_ub4(float_to_ubyte(v->data[idx][0]),
+ float_to_ubyte(v->data[idx][1]),
+ float_to_ubyte(v->data[idx][2]),
+ float_to_ubyte(v->data[idx][3])));
+ break;
+ default:
+ assert(0);
+ break;
+ }
+ }
+}
+
+static INLINE void
+nv40_render_prim(struct draw_stage *stage, struct prim_header *prim,
+ unsigned mode, unsigned count)
+{
+ struct nv40_render_stage *rs = nv40_render_stage(stage);
+ struct nv40_context *nv40 = rs->nv40;
+ struct nouveau_pushbuf *pb = nv40->nvws->channel->pushbuf;
+ unsigned i;
+
+ /* Ensure there's room for 4xfloat32 + potentially 3 begin/end */
+ if (pb->remaining < ((count * 20) + 6)) {
+ if (rs->prim != NV40TCL_BEGIN_END_STOP) {
+ NOUVEAU_ERR("AIII, missed flush\n");
+ assert(0);
+ }
+ FIRE_RING(NULL);
+ nv40_state_emit(nv40);
+ }
+
+ /* Switch primitive modes if necessary */
+ if (rs->prim != mode) {
+ if (rs->prim != NV40TCL_BEGIN_END_STOP) {
+ BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
+ OUT_RING (NV40TCL_BEGIN_END_STOP);
+ }
+
+ BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
+ OUT_RING (mode);
+ rs->prim = mode;
+ }
+
+ /* Emit vertex data */
+ for (i = 0; i < count; i++)
+ nv40_render_vertex(nv40, prim->v[i]);
+
+ /* If it's likely we'll need to empty the push buffer soon, finish
+ * off the primitive now.
+ */
+ if (pb->remaining < ((count * 20) + 6)) {
+ BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
+ OUT_RING (NV40TCL_BEGIN_END_STOP);
+ rs->prim = NV40TCL_BEGIN_END_STOP;
+ }
+}
+
+static void
+nv40_render_point(struct draw_stage *draw, struct prim_header *prim)
+{
+ nv40_render_prim(draw, prim, NV40TCL_BEGIN_END_POINTS, 1);
+}
+
+static void
+nv40_render_line(struct draw_stage *draw, struct prim_header *prim)
+{
+ nv40_render_prim(draw, prim, NV40TCL_BEGIN_END_LINES, 2);
+}
+
+static void
+nv40_render_tri(struct draw_stage *draw, struct prim_header *prim)
+{
+ nv40_render_prim(draw, prim, NV40TCL_BEGIN_END_TRIANGLES, 3);
+}
+
+static void
+nv40_render_flush(struct draw_stage *draw, unsigned flags)
+{
+ struct nv40_render_stage *rs = nv40_render_stage(draw);
+ struct nv40_context *nv40 = rs->nv40;
+
+ if (rs->prim != NV40TCL_BEGIN_END_STOP) {
+ BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
+ OUT_RING (NV40TCL_BEGIN_END_STOP);
+ rs->prim = NV40TCL_BEGIN_END_STOP;
+ }
+}
+
+static void
+nv40_render_reset_stipple_counter(struct draw_stage *draw)
+{
+}
+
+static void
+nv40_render_destroy(struct draw_stage *draw)
+{
+ FREE(draw);
+}
+
+static INLINE void
+emit_mov(struct nv40_vertex_program *vp,
+ unsigned dst, unsigned src, unsigned vor, unsigned mask)
+{
+ struct nv40_vertex_program_exec *inst;
+
+ vp->insns = realloc(vp->insns,
+ sizeof(struct nv40_vertex_program_exec) *
+ ++vp->nr_insns);
+ inst = &vp->insns[vp->nr_insns - 1];
+
+ inst->data[0] = 0x401f9c6c;
+ inst->data[1] = 0x0040000d | (src << 8);
+ inst->data[2] = 0x8106c083;
+ inst->data[3] = 0x6041ff80 | (dst << 2) | (mask << 13);
+ inst->const_index = -1;
+ inst->has_branch_offset = FALSE;
+
+ vp->ir |= (1 << src);
+ if (vor != ~0)
+ vp->or |= (1 << vor);
+}
+
+static struct nv40_vertex_program *
+create_drawvp(struct nv40_context *nv40)
+{
+ struct nv40_vertex_program *vp = CALLOC_STRUCT(nv40_vertex_program);
+ unsigned i;
+
+ emit_mov(vp, NV40_VP_INST_DEST_POS, 0, ~0, 0xf);
+ emit_mov(vp, NV40_VP_INST_DEST_COL0, 3, 0, 0xf);
+ emit_mov(vp, NV40_VP_INST_DEST_COL1, 4, 1, 0xf);
+ emit_mov(vp, NV40_VP_INST_DEST_BFC0, 3, 2, 0xf);
+ emit_mov(vp, NV40_VP_INST_DEST_BFC1, 4, 3, 0xf);
+ emit_mov(vp, NV40_VP_INST_DEST_FOGC, 5, 4, 0x8);
+ for (i = 0; i < 8; i++)
+ emit_mov(vp, NV40_VP_INST_DEST_TC(i), 8 + i, 14 + i, 0xf);
+
+ vp->insns[vp->nr_insns - 1].data[3] |= 1;
+ vp->translated = TRUE;
+ return vp;
+}
+
+struct draw_stage *
+nv40_draw_render_stage(struct nv40_context *nv40)
+{
+ struct nv40_render_stage *render = CALLOC_STRUCT(nv40_render_stage);
+
+ if (!nv40->swtnl.vertprog)
+ nv40->swtnl.vertprog = create_drawvp(nv40);
+
+ render->nv40 = nv40;
+ render->stage.draw = nv40->draw;
+ render->stage.point = nv40_render_point;
+ render->stage.line = nv40_render_line;
+ render->stage.tri = nv40_render_tri;
+ render->stage.flush = nv40_render_flush;
+ render->stage.reset_stipple_counter = nv40_render_reset_stipple_counter;
+ render->stage.destroy = nv40_render_destroy;
+
+ return &render->stage;
+}
+
+boolean
+nv40_draw_elements_swtnl(struct pipe_context *pipe,
+ struct pipe_buffer *idxbuf, unsigned idxbuf_size,
+ unsigned mode, unsigned start, unsigned count)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+ struct pipe_winsys *ws = pipe->winsys;
+ unsigned i;
+ void *map;
+
+ if (!nv40_state_validate_swtnl(nv40))
+ return FALSE;
+ nv40->state.dirty &= ~(1ULL << NV40_STATE_VTXBUF);
+ nv40_state_emit(nv40);
+
+ for (i = 0; i < nv40->vtxbuf_nr; i++) {
+ map = ws->buffer_map(ws, nv40->vtxbuf[i].buffer,
+ PIPE_BUFFER_USAGE_CPU_READ);
+ draw_set_mapped_vertex_buffer(nv40->draw, i, map);
+ }
+
+ if (idxbuf) {
+ map = ws->buffer_map(ws, idxbuf, PIPE_BUFFER_USAGE_CPU_READ);
+ draw_set_mapped_element_buffer(nv40->draw, idxbuf_size, map);
+ } else {
+ draw_set_mapped_element_buffer(nv40->draw, 0, NULL);
+ }
+
+ if (nv40->constbuf[PIPE_SHADER_VERTEX]) {
+ const unsigned nr = nv40->constbuf_nr[PIPE_SHADER_VERTEX];
+
+ map = ws->buffer_map(ws, nv40->constbuf[PIPE_SHADER_VERTEX],
+ PIPE_BUFFER_USAGE_CPU_READ);
+ draw_set_mapped_constant_buffer(nv40->draw, map, nr);
+ }
+
+ draw_arrays(nv40->draw, mode, start, count);
+
+ for (i = 0; i < nv40->vtxbuf_nr; i++)
+ ws->buffer_unmap(ws, nv40->vtxbuf[i].buffer);
+
+ if (idxbuf)
+ ws->buffer_unmap(ws, idxbuf);
+
+ if (nv40->constbuf[PIPE_SHADER_VERTEX])
+ ws->buffer_unmap(ws, nv40->constbuf[PIPE_SHADER_VERTEX]);
+
+ draw_flush(nv40->draw);
+ pipe->flush(pipe, 0, NULL);
+
+ return TRUE;
+}
+
+static INLINE void
+emit_attrib(struct nv40_context *nv40, unsigned hw, unsigned emit,
+ unsigned semantic, unsigned index)
+{
+ unsigned draw_out = draw_find_vs_output(nv40->draw, semantic, index);
+ unsigned a = nv40->swtnl.nr_attribs++;
+
+ nv40->swtnl.hw[a] = hw;
+ nv40->swtnl.emit[a] = emit;
+ nv40->swtnl.draw[a] = draw_out;
+}
+
+static boolean
+nv40_state_vtxfmt_validate(struct nv40_context *nv40)
+{
+ struct nv40_fragment_program *fp = nv40->fragprog;
+ unsigned colour = 0, texcoords = 0, fog = 0, i;
+
+ /* Determine needed fragprog inputs */
+ for (i = 0; i < fp->info.num_inputs; i++) {
+ switch (fp->info.input_semantic_name[i]) {
+ case TGSI_SEMANTIC_POSITION:
+ break;
+ case TGSI_SEMANTIC_COLOR:
+ colour |= (1 << fp->info.input_semantic_index[i]);
+ break;
+ case TGSI_SEMANTIC_GENERIC:
+ texcoords |= (1 << fp->info.input_semantic_index[i]);
+ break;
+ case TGSI_SEMANTIC_FOG:
+ fog = 1;
+ break;
+ default:
+ assert(0);
+ }
+ }
+
+ nv40->swtnl.nr_attribs = 0;
+
+ /* Map draw vtxprog output to hw attribute IDs */
+ for (i = 0; i < 2; i++) {
+ if (!(colour & (1 << i)))
+ continue;
+ emit_attrib(nv40, 3 + i, EMIT_4UB, TGSI_SEMANTIC_COLOR, i);
+ }
+
+ for (i = 0; i < 8; i++) {
+ if (!(texcoords & (1 << i)))
+ continue;
+ emit_attrib(nv40, 8 + i, EMIT_4F, TGSI_SEMANTIC_GENERIC, i);
+ }
+
+ if (fog) {
+ emit_attrib(nv40, 5, EMIT_1F, TGSI_SEMANTIC_FOG, 0);
+ }
+
+ emit_attrib(nv40, 0, EMIT_3F, TGSI_SEMANTIC_POSITION, 0);
+
+ return FALSE;
+}
+
+struct nv40_state_entry nv40_state_vtxfmt = {
+ .validate = nv40_state_vtxfmt_validate,
+ .dirty = {
+ .pipe = NV40_NEW_ARRAYS | NV40_NEW_FRAGPROG,
+ .hw = 0
+ }
+};
+
diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c b/src/gallium/drivers/nv40/nv40_fragprog.c
new file mode 100644
index 00000000000..91dcbebda0d
--- /dev/null
+++ b/src/gallium/drivers/nv40/nv40_fragprog.c
@@ -0,0 +1,991 @@
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+
+#include "pipe/p_shader_tokens.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_util.h"
+
+#include "nv40_context.h"
+
+#define SWZ_X 0
+#define SWZ_Y 1
+#define SWZ_Z 2
+#define SWZ_W 3
+#define MASK_X 1
+#define MASK_Y 2
+#define MASK_Z 4
+#define MASK_W 8
+#define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W)
+#define DEF_SCALE NV40_FP_OP_DST_SCALE_1X
+#define DEF_CTEST NV40_FP_OP_COND_TR
+#include "nv40_shader.h"
+
+#define swz(s,x,y,z,w) nv40_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w)
+#define neg(s) nv40_sr_neg((s))
+#define abs(s) nv40_sr_abs((s))
+#define scale(s,v) nv40_sr_scale((s), NV40_FP_OP_DST_SCALE_##v)
+
+#define MAX_CONSTS 128
+#define MAX_IMM 32
+struct nv40_fpc {
+ struct nv40_fragment_program *fp;
+
+ uint attrib_map[PIPE_MAX_SHADER_INPUTS];
+
+ unsigned r_temps;
+ unsigned r_temps_discard;
+ struct nv40_sreg r_result[PIPE_MAX_SHADER_OUTPUTS];
+ struct nv40_sreg *r_temp;
+
+ int num_regs;
+
+ unsigned inst_offset;
+ unsigned have_const;
+
+ struct {
+ int pipe;
+ float vals[4];
+ } consts[MAX_CONSTS];
+ int nr_consts;
+
+ struct nv40_sreg imm[MAX_IMM];
+ unsigned nr_imm;
+};
+
+static INLINE struct nv40_sreg
+temp(struct nv40_fpc *fpc)
+{
+ int idx = ffs(~fpc->r_temps) - 1;
+
+ if (idx < 0) {
+ NOUVEAU_ERR("out of temps!!\n");
+ assert(0);
+ return nv40_sr(NV40SR_TEMP, 0);
+ }
+
+ fpc->r_temps |= (1 << idx);
+ fpc->r_temps_discard |= (1 << idx);
+ return nv40_sr(NV40SR_TEMP, idx);
+}
+
+static INLINE void
+release_temps(struct nv40_fpc *fpc)
+{
+ fpc->r_temps &= ~fpc->r_temps_discard;
+ fpc->r_temps_discard = 0;
+}
+
+static INLINE struct nv40_sreg
+constant(struct nv40_fpc *fpc, int pipe, float vals[4])
+{
+ int idx;
+
+ if (fpc->nr_consts == MAX_CONSTS)
+ assert(0);
+ idx = fpc->nr_consts++;
+
+ fpc->consts[idx].pipe = pipe;
+ if (pipe == -1)
+ memcpy(fpc->consts[idx].vals, vals, 4 * sizeof(float));
+ return nv40_sr(NV40SR_CONST, idx);
+}
+
+#define arith(cc,s,o,d,m,s0,s1,s2) \
+ nv40_fp_arith((cc), (s), NV40_FP_OP_OPCODE_##o, \
+ (d), (m), (s0), (s1), (s2))
+#define tex(cc,s,o,u,d,m,s0,s1,s2) \
+ nv40_fp_tex((cc), (s), NV40_FP_OP_OPCODE_##o, (u), \
+ (d), (m), (s0), none, none)
+
+static void
+grow_insns(struct nv40_fpc *fpc, int size)
+{
+ struct nv40_fragment_program *fp = fpc->fp;
+
+ fp->insn_len += size;
+ fp->insn = realloc(fp->insn, sizeof(uint32_t) * fp->insn_len);
+}
+
+static void
+emit_src(struct nv40_fpc *fpc, int pos, struct nv40_sreg src)
+{
+ struct nv40_fragment_program *fp = fpc->fp;
+ uint32_t *hw = &fp->insn[fpc->inst_offset];
+ uint32_t sr = 0;
+
+ switch (src.type) {
+ case NV40SR_INPUT:
+ sr |= (NV40_FP_REG_TYPE_INPUT << NV40_FP_REG_TYPE_SHIFT);
+ hw[0] |= (src.index << NV40_FP_OP_INPUT_SRC_SHIFT);
+ break;
+ case NV40SR_OUTPUT:
+ sr |= NV40_FP_REG_SRC_HALF;
+ /* fall-through */
+ case NV40SR_TEMP:
+ sr |= (NV40_FP_REG_TYPE_TEMP << NV40_FP_REG_TYPE_SHIFT);
+ sr |= (src.index << NV40_FP_REG_SRC_SHIFT);
+ break;
+ case NV40SR_CONST:
+ if (!fpc->have_const) {
+ grow_insns(fpc, 4);
+ fpc->have_const = 1;
+ }
+
+ hw = &fp->insn[fpc->inst_offset];
+ if (fpc->consts[src.index].pipe >= 0) {
+ struct nv40_fragment_program_data *fpd;
+
+ fp->consts = realloc(fp->consts, ++fp->nr_consts *
+ sizeof(*fpd));
+ fpd = &fp->consts[fp->nr_consts - 1];
+ fpd->offset = fpc->inst_offset + 4;
+ fpd->index = fpc->consts[src.index].pipe;
+ memset(&fp->insn[fpd->offset], 0, sizeof(uint32_t) * 4);
+ } else {
+ memcpy(&fp->insn[fpc->inst_offset + 4],
+ fpc->consts[src.index].vals,
+ sizeof(uint32_t) * 4);
+ }
+
+ sr |= (NV40_FP_REG_TYPE_CONST << NV40_FP_REG_TYPE_SHIFT);
+ break;
+ case NV40SR_NONE:
+ sr |= (NV40_FP_REG_TYPE_INPUT << NV40_FP_REG_TYPE_SHIFT);
+ break;
+ default:
+ assert(0);
+ }
+
+ if (src.negate)
+ sr |= NV40_FP_REG_NEGATE;
+
+ if (src.abs)
+ hw[1] |= (1 << (29 + pos));
+
+ sr |= ((src.swz[0] << NV40_FP_REG_SWZ_X_SHIFT) |
+ (src.swz[1] << NV40_FP_REG_SWZ_Y_SHIFT) |
+ (src.swz[2] << NV40_FP_REG_SWZ_Z_SHIFT) |
+ (src.swz[3] << NV40_FP_REG_SWZ_W_SHIFT));
+
+ hw[pos + 1] |= sr;
+}
+
+static void
+emit_dst(struct nv40_fpc *fpc, struct nv40_sreg dst)
+{
+ struct nv40_fragment_program *fp = fpc->fp;
+ uint32_t *hw = &fp->insn[fpc->inst_offset];
+
+ switch (dst.type) {
+ case NV40SR_TEMP:
+ if (fpc->num_regs < (dst.index + 1))
+ fpc->num_regs = dst.index + 1;
+ break;
+ case NV40SR_OUTPUT:
+ if (dst.index == 1) {
+ fp->fp_control |= 0xe;
+ } else {
+ hw[0] |= NV40_FP_OP_OUT_REG_HALF;
+ }
+ break;
+ case NV40SR_NONE:
+ hw[0] |= (1 << 30);
+ break;
+ default:
+ assert(0);
+ }
+
+ hw[0] |= (dst.index << NV40_FP_OP_OUT_REG_SHIFT);
+}
+
+static void
+nv40_fp_arith(struct nv40_fpc *fpc, int sat, int op,
+ struct nv40_sreg dst, int mask,
+ struct nv40_sreg s0, struct nv40_sreg s1, struct nv40_sreg s2)
+{
+ struct nv40_fragment_program *fp = fpc->fp;
+ uint32_t *hw;
+
+ fpc->inst_offset = fp->insn_len;
+ fpc->have_const = 0;
+ grow_insns(fpc, 4);
+ hw = &fp->insn[fpc->inst_offset];
+ memset(hw, 0, sizeof(uint32_t) * 4);
+
+ if (op == NV40_FP_OP_OPCODE_KIL)
+ fp->fp_control |= NV40TCL_FP_CONTROL_KIL;
+ hw[0] |= (op << NV40_FP_OP_OPCODE_SHIFT);
+ hw[0] |= (mask << NV40_FP_OP_OUTMASK_SHIFT);
+ hw[2] |= (dst.dst_scale << NV40_FP_OP_DST_SCALE_SHIFT);
+
+ if (sat)
+ hw[0] |= NV40_FP_OP_OUT_SAT;
+
+ if (dst.cc_update)
+ hw[0] |= NV40_FP_OP_COND_WRITE_ENABLE;
+ hw[1] |= (dst.cc_test << NV40_FP_OP_COND_SHIFT);
+ hw[1] |= ((dst.cc_swz[0] << NV40_FP_OP_COND_SWZ_X_SHIFT) |
+ (dst.cc_swz[1] << NV40_FP_OP_COND_SWZ_Y_SHIFT) |
+ (dst.cc_swz[2] << NV40_FP_OP_COND_SWZ_Z_SHIFT) |
+ (dst.cc_swz[3] << NV40_FP_OP_COND_SWZ_W_SHIFT));
+
+ emit_dst(fpc, dst);
+ emit_src(fpc, 0, s0);
+ emit_src(fpc, 1, s1);
+ emit_src(fpc, 2, s2);
+}
+
+static void
+nv40_fp_tex(struct nv40_fpc *fpc, int sat, int op, int unit,
+ struct nv40_sreg dst, int mask,
+ struct nv40_sreg s0, struct nv40_sreg s1, struct nv40_sreg s2)
+{
+ struct nv40_fragment_program *fp = fpc->fp;
+
+ nv40_fp_arith(fpc, sat, op, dst, mask, s0, s1, s2);
+
+ fp->insn[fpc->inst_offset] |= (unit << NV40_FP_OP_TEX_UNIT_SHIFT);
+ fp->samplers |= (1 << unit);
+}
+
+static INLINE struct nv40_sreg
+tgsi_src(struct nv40_fpc *fpc, const struct tgsi_full_src_register *fsrc)
+{
+ struct nv40_sreg src;
+
+ switch (fsrc->SrcRegister.File) {
+ case TGSI_FILE_INPUT:
+ src = nv40_sr(NV40SR_INPUT,
+ fpc->attrib_map[fsrc->SrcRegister.Index]);
+ break;
+ case TGSI_FILE_CONSTANT:
+ src = constant(fpc, fsrc->SrcRegister.Index, NULL);
+ break;
+ case TGSI_FILE_IMMEDIATE:
+ assert(fsrc->SrcRegister.Index < fpc->nr_imm);
+ src = fpc->imm[fsrc->SrcRegister.Index];
+ break;
+ case TGSI_FILE_TEMPORARY:
+ src = fpc->r_temp[fsrc->SrcRegister.Index];
+ break;
+ /* NV40 fragprog result regs are just temps, so this is simple */
+ case TGSI_FILE_OUTPUT:
+ src = fpc->r_result[fsrc->SrcRegister.Index];
+ break;
+ default:
+ NOUVEAU_ERR("bad src file\n");
+ break;
+ }
+
+ src.abs = fsrc->SrcRegisterExtMod.Absolute;
+ src.negate = fsrc->SrcRegister.Negate;
+ src.swz[0] = fsrc->SrcRegister.SwizzleX;
+ src.swz[1] = fsrc->SrcRegister.SwizzleY;
+ src.swz[2] = fsrc->SrcRegister.SwizzleZ;
+ src.swz[3] = fsrc->SrcRegister.SwizzleW;
+ return src;
+}
+
+static INLINE struct nv40_sreg
+tgsi_dst(struct nv40_fpc *fpc, const struct tgsi_full_dst_register *fdst) {
+ switch (fdst->DstRegister.File) {
+ case TGSI_FILE_OUTPUT:
+ return fpc->r_result[fdst->DstRegister.Index];
+ case TGSI_FILE_TEMPORARY:
+ return fpc->r_temp[fdst->DstRegister.Index];
+ case TGSI_FILE_NULL:
+ return nv40_sr(NV40SR_NONE, 0);
+ default:
+ NOUVEAU_ERR("bad dst file %d\n", fdst->DstRegister.File);
+ return nv40_sr(NV40SR_NONE, 0);
+ }
+}
+
+static INLINE int
+tgsi_mask(uint tgsi)
+{
+ int mask = 0;
+
+ if (tgsi & TGSI_WRITEMASK_X) mask |= MASK_X;
+ if (tgsi & TGSI_WRITEMASK_Y) mask |= MASK_Y;
+ if (tgsi & TGSI_WRITEMASK_Z) mask |= MASK_Z;
+ if (tgsi & TGSI_WRITEMASK_W) mask |= MASK_W;
+ return mask;
+}
+
+static boolean
+src_native_swz(struct nv40_fpc *fpc, const struct tgsi_full_src_register *fsrc,
+ struct nv40_sreg *src)
+{
+ const struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0);
+ struct nv40_sreg tgsi = tgsi_src(fpc, fsrc);
+ uint mask = 0, zero_mask = 0, one_mask = 0, neg_mask = 0;
+ uint neg[4] = { fsrc->SrcRegisterExtSwz.NegateX,
+ fsrc->SrcRegisterExtSwz.NegateY,
+ fsrc->SrcRegisterExtSwz.NegateZ,
+ fsrc->SrcRegisterExtSwz.NegateW };
+ uint c;
+
+ for (c = 0; c < 4; c++) {
+ switch (tgsi_util_get_full_src_register_extswizzle(fsrc, c)) {
+ case TGSI_EXTSWIZZLE_X:
+ case TGSI_EXTSWIZZLE_Y:
+ case TGSI_EXTSWIZZLE_Z:
+ case TGSI_EXTSWIZZLE_W:
+ mask |= (1 << c);
+ break;
+ case TGSI_EXTSWIZZLE_ZERO:
+ zero_mask |= (1 << c);
+ tgsi.swz[c] = SWZ_X;
+ break;
+ case TGSI_EXTSWIZZLE_ONE:
+ one_mask |= (1 << c);
+ tgsi.swz[c] = SWZ_X;
+ break;
+ default:
+ assert(0);
+ }
+
+ if (!tgsi.negate && neg[c])
+ neg_mask |= (1 << c);
+ }
+
+ if (mask == MASK_ALL && !neg_mask)
+ return TRUE;
+
+ *src = temp(fpc);
+
+ if (mask)
+ arith(fpc, 0, MOV, *src, mask, tgsi, none, none);
+
+ if (zero_mask)
+ arith(fpc, 0, SFL, *src, zero_mask, *src, none, none);
+
+ if (one_mask)
+ arith(fpc, 0, STR, *src, one_mask, *src, none, none);
+
+ if (neg_mask) {
+ struct nv40_sreg one = temp(fpc);
+ arith(fpc, 0, STR, one, neg_mask, one, none, none);
+ arith(fpc, 0, MUL, *src, neg_mask, *src, neg(one), none);
+ }
+
+ return FALSE;
+}
+
+static boolean
+nv40_fragprog_parse_instruction(struct nv40_fpc *fpc,
+ const struct tgsi_full_instruction *finst)
+{
+ const struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0);
+ struct nv40_sreg src[3], dst, tmp;
+ int mask, sat, unit;
+ int ai = -1, ci = -1, ii = -1;
+ int i;
+
+ if (finst->Instruction.Opcode == TGSI_OPCODE_END)
+ return TRUE;
+
+ for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
+ const struct tgsi_full_src_register *fsrc;
+
+ fsrc = &finst->FullSrcRegisters[i];
+ if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) {
+ src[i] = tgsi_src(fpc, fsrc);
+ }
+ }
+
+ for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
+ const struct tgsi_full_src_register *fsrc;
+
+ fsrc = &finst->FullSrcRegisters[i];
+
+ switch (fsrc->SrcRegister.File) {
+ case TGSI_FILE_INPUT:
+ case TGSI_FILE_CONSTANT:
+ case TGSI_FILE_TEMPORARY:
+ if (!src_native_swz(fpc, fsrc, &src[i]))
+ continue;
+ break;
+ default:
+ break;
+ }
+
+ switch (fsrc->SrcRegister.File) {
+ case TGSI_FILE_INPUT:
+ if (ai == -1 || ai == fsrc->SrcRegister.Index) {
+ ai = fsrc->SrcRegister.Index;
+ src[i] = tgsi_src(fpc, fsrc);
+ } else {
+ src[i] = temp(fpc);
+ arith(fpc, 0, MOV, src[i], MASK_ALL,
+ tgsi_src(fpc, fsrc), none, none);
+ }
+ break;
+ case TGSI_FILE_CONSTANT:
+ if ((ci == -1 && ii == -1) ||
+ ci == fsrc->SrcRegister.Index) {
+ ci = fsrc->SrcRegister.Index;
+ src[i] = tgsi_src(fpc, fsrc);
+ } else {
+ src[i] = temp(fpc);
+ arith(fpc, 0, MOV, src[i], MASK_ALL,
+ tgsi_src(fpc, fsrc), none, none);
+ }
+ break;
+ case TGSI_FILE_IMMEDIATE:
+ if ((ci == -1 && ii == -1) ||
+ ii == fsrc->SrcRegister.Index) {
+ ii = fsrc->SrcRegister.Index;
+ src[i] = tgsi_src(fpc, fsrc);
+ } else {
+ src[i] = temp(fpc);
+ arith(fpc, 0, MOV, src[i], MASK_ALL,
+ tgsi_src(fpc, fsrc), none, none);
+ }
+ break;
+ case TGSI_FILE_TEMPORARY:
+ /* handled above */
+ break;
+ case TGSI_FILE_SAMPLER:
+ unit = fsrc->SrcRegister.Index;
+ break;
+ case TGSI_FILE_OUTPUT:
+ break;
+ default:
+ NOUVEAU_ERR("bad src file\n");
+ return FALSE;
+ }
+ }
+
+ dst = tgsi_dst(fpc, &finst->FullDstRegisters[0]);
+ mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask);
+ sat = (finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE);
+
+ switch (finst->Instruction.Opcode) {
+ case TGSI_OPCODE_ABS:
+ arith(fpc, sat, MOV, dst, mask, abs(src[0]), none, none);
+ break;
+ case TGSI_OPCODE_ADD:
+ arith(fpc, sat, ADD, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_CMP:
+ tmp = temp(fpc);
+ arith(fpc, sat, MOV, dst, mask, src[2], none, none);
+ tmp.cc_update = 1;
+ arith(fpc, 0, MOV, tmp, 0xf, src[0], none, none);
+ dst.cc_test = NV40_VP_INST_COND_LT;
+ arith(fpc, sat, MOV, dst, mask, src[1], none, none);
+ break;
+ case TGSI_OPCODE_COS:
+ arith(fpc, sat, COS, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_DDX:
+ if (mask & (MASK_Z | MASK_W)) {
+ tmp = temp(fpc);
+ arith(fpc, sat, DDX, tmp, MASK_X | MASK_Y,
+ swz(src[0], Z, W, Z, W), none, none);
+ arith(fpc, 0, MOV, tmp, MASK_Z | MASK_W,
+ swz(tmp, X, Y, X, Y), none, none);
+ arith(fpc, sat, DDX, tmp, MASK_X | MASK_Y, src[0],
+ none, none);
+ arith(fpc, 0, MOV, dst, mask, tmp, none, none);
+ } else {
+ arith(fpc, sat, DDX, dst, mask, src[0], none, none);
+ }
+ break;
+ case TGSI_OPCODE_DDY:
+ if (mask & (MASK_Z | MASK_W)) {
+ tmp = temp(fpc);
+ arith(fpc, sat, DDY, tmp, MASK_X | MASK_Y,
+ swz(src[0], Z, W, Z, W), none, none);
+ arith(fpc, 0, MOV, tmp, MASK_Z | MASK_W,
+ swz(tmp, X, Y, X, Y), none, none);
+ arith(fpc, sat, DDY, tmp, MASK_X | MASK_Y, src[0],
+ none, none);
+ arith(fpc, 0, MOV, dst, mask, tmp, none, none);
+ } else {
+ arith(fpc, sat, DDY, dst, mask, src[0], none, none);
+ }
+ break;
+ case TGSI_OPCODE_DP3:
+ arith(fpc, sat, DP3, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_DP4:
+ arith(fpc, sat, DP4, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_DPH:
+ tmp = temp(fpc);
+ arith(fpc, 0, DP3, tmp, MASK_X, src[0], src[1], none);
+ arith(fpc, sat, ADD, dst, mask, swz(tmp, X, X, X, X),
+ swz(src[1], W, W, W, W), none);
+ break;
+ case TGSI_OPCODE_DST:
+ arith(fpc, sat, DST, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_EX2:
+ arith(fpc, sat, EX2, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_FLR:
+ arith(fpc, sat, FLR, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_FRC:
+ arith(fpc, sat, FRC, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_KILP:
+ arith(fpc, 0, KIL, none, 0, none, none, none);
+ break;
+ case TGSI_OPCODE_KIL:
+ dst = nv40_sr(NV40SR_NONE, 0);
+ dst.cc_update = 1;
+ arith(fpc, 0, MOV, dst, MASK_ALL, src[0], none, none);
+ dst.cc_update = 0; dst.cc_test = NV40_FP_OP_COND_LT;
+ arith(fpc, 0, KIL, dst, 0, none, none, none);
+ break;
+ case TGSI_OPCODE_LG2:
+ arith(fpc, sat, LG2, dst, mask, src[0], none, none);
+ break;
+// case TGSI_OPCODE_LIT:
+ case TGSI_OPCODE_LRP:
+ tmp = temp(fpc);
+ arith(fpc, 0, MAD, tmp, mask, neg(src[0]), src[2], src[2]);
+ arith(fpc, sat, MAD, dst, mask, src[0], src[1], tmp);
+ break;
+ case TGSI_OPCODE_MAD:
+ arith(fpc, sat, MAD, dst, mask, src[0], src[1], src[2]);
+ break;
+ case TGSI_OPCODE_MAX:
+ arith(fpc, sat, MAX, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_MIN:
+ arith(fpc, sat, MIN, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_MOV:
+ arith(fpc, sat, MOV, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_MUL:
+ arith(fpc, sat, MUL, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_NOISE1:
+ case TGSI_OPCODE_NOISE2:
+ case TGSI_OPCODE_NOISE3:
+ case TGSI_OPCODE_NOISE4:
+ arith(fpc, sat, SFL, dst, mask, none, none, none);
+ break;
+ case TGSI_OPCODE_POW:
+ tmp = temp(fpc);
+ arith(fpc, 0, LG2, tmp, MASK_X,
+ swz(src[0], X, X, X, X), none, none);
+ arith(fpc, 0, MUL, tmp, MASK_X, swz(tmp, X, X, X, X),
+ swz(src[1], X, X, X, X), none);
+ arith(fpc, sat, EX2, dst, mask,
+ swz(tmp, X, X, X, X), none, none);
+ break;
+ case TGSI_OPCODE_RCP:
+ arith(fpc, sat, RCP, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_RET:
+ assert(0);
+ break;
+ case TGSI_OPCODE_RFL:
+ tmp = temp(fpc);
+ arith(fpc, 0, DP3, tmp, MASK_X, src[0], src[0], none);
+ arith(fpc, 0, DP3, tmp, MASK_Y, src[0], src[1], none);
+ arith(fpc, 0, DIV, scale(tmp, 2X), MASK_Z,
+ swz(tmp, Y, Y, Y, Y), swz(tmp, X, X, X, X), none);
+ arith(fpc, sat, MAD, dst, mask,
+ swz(tmp, Z, Z, Z, Z), src[0], neg(src[1]));
+ break;
+ case TGSI_OPCODE_RSQ:
+ tmp = temp(fpc);
+ arith(fpc, 0, LG2, scale(tmp, INV_2X), MASK_X,
+ abs(swz(src[0], X, X, X, X)), none, none);
+ arith(fpc, sat, EX2, dst, mask,
+ neg(swz(tmp, X, X, X, X)), none, none);
+ break;
+ case TGSI_OPCODE_SCS:
+ if (mask & MASK_X) {
+ arith(fpc, sat, COS, dst, MASK_X,
+ swz(src[0], X, X, X, X), none, none);
+ }
+ if (mask & MASK_Y) {
+ arith(fpc, sat, SIN, dst, MASK_Y,
+ swz(src[0], X, X, X, X), none, none);
+ }
+ break;
+ case TGSI_OPCODE_SEQ:
+ arith(fpc, sat, SEQ, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_SFL:
+ arith(fpc, sat, SFL, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_SGE:
+ arith(fpc, sat, SGE, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_SGT:
+ arith(fpc, sat, SGT, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_SIN:
+ arith(fpc, sat, SIN, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_SLE:
+ arith(fpc, sat, SLE, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_SLT:
+ arith(fpc, sat, SLT, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_SNE:
+ arith(fpc, sat, SNE, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_STR:
+ arith(fpc, sat, STR, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_SUB:
+ arith(fpc, sat, ADD, dst, mask, src[0], neg(src[1]), none);
+ break;
+ case TGSI_OPCODE_TEX:
+ tex(fpc, sat, TEX, unit, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_TXB:
+ tex(fpc, sat, TXB, unit, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_TXP:
+ tex(fpc, sat, TXP, unit, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_XPD:
+ tmp = temp(fpc);
+ arith(fpc, 0, MUL, tmp, mask,
+ swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none);
+ arith(fpc, sat, MAD, dst, (mask & ~MASK_W),
+ swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y),
+ neg(tmp));
+ break;
+ default:
+ NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode);
+ return FALSE;
+ }
+
+ release_temps(fpc);
+ return TRUE;
+}
+
+static boolean
+nv40_fragprog_parse_decl_attrib(struct nv40_fpc *fpc,
+ const struct tgsi_full_declaration *fdec)
+{
+ int hw;
+
+ switch (fdec->Semantic.SemanticName) {
+ case TGSI_SEMANTIC_POSITION:
+ hw = NV40_FP_OP_INPUT_SRC_POSITION;
+ break;
+ case TGSI_SEMANTIC_COLOR:
+ if (fdec->Semantic.SemanticIndex == 0) {
+ hw = NV40_FP_OP_INPUT_SRC_COL0;
+ } else
+ if (fdec->Semantic.SemanticIndex == 1) {
+ hw = NV40_FP_OP_INPUT_SRC_COL1;
+ } else {
+ NOUVEAU_ERR("bad colour semantic index\n");
+ return FALSE;
+ }
+ break;
+ case TGSI_SEMANTIC_FOG:
+ hw = NV40_FP_OP_INPUT_SRC_FOGC;
+ break;
+ case TGSI_SEMANTIC_GENERIC:
+ if (fdec->Semantic.SemanticIndex <= 7) {
+ hw = NV40_FP_OP_INPUT_SRC_TC(fdec->Semantic.
+ SemanticIndex);
+ } else {
+ NOUVEAU_ERR("bad generic semantic index\n");
+ return FALSE;
+ }
+ break;
+ default:
+ NOUVEAU_ERR("bad input semantic\n");
+ return FALSE;
+ }
+
+ fpc->attrib_map[fdec->DeclarationRange.First] = hw;
+ return TRUE;
+}
+
+static boolean
+nv40_fragprog_parse_decl_output(struct nv40_fpc *fpc,
+ const struct tgsi_full_declaration *fdec)
+{
+ unsigned idx = fdec->DeclarationRange.First;
+ unsigned hw;
+
+ switch (fdec->Semantic.SemanticName) {
+ case TGSI_SEMANTIC_POSITION:
+ hw = 1;
+ break;
+ case TGSI_SEMANTIC_COLOR:
+ switch (fdec->Semantic.SemanticIndex) {
+ case 0: hw = 0; break;
+ case 1: hw = 2; break;
+ case 2: hw = 3; break;
+ case 3: hw = 4; break;
+ default:
+ NOUVEAU_ERR("bad rcol index\n");
+ return FALSE;
+ }
+ break;
+ default:
+ NOUVEAU_ERR("bad output semantic\n");
+ return FALSE;
+ }
+
+ fpc->r_result[idx] = nv40_sr(NV40SR_OUTPUT, hw);
+ fpc->r_temps |= (1 << hw);
+ return TRUE;
+}
+
+static boolean
+nv40_fragprog_prepare(struct nv40_fpc *fpc)
+{
+ struct tgsi_parse_context p;
+ int high_temp = -1, i;
+
+ tgsi_parse_init(&p, fpc->fp->pipe.tokens);
+ while (!tgsi_parse_end_of_tokens(&p)) {
+ const union tgsi_full_token *tok = &p.FullToken;
+
+ tgsi_parse_token(&p);
+ switch(tok->Token.Type) {
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ {
+ const struct tgsi_full_declaration *fdec;
+ fdec = &p.FullToken.FullDeclaration;
+ switch (fdec->Declaration.File) {
+ case TGSI_FILE_INPUT:
+ if (!nv40_fragprog_parse_decl_attrib(fpc, fdec))
+ goto out_err;
+ break;
+ case TGSI_FILE_OUTPUT:
+ if (!nv40_fragprog_parse_decl_output(fpc, fdec))
+ goto out_err;
+ break;
+ case TGSI_FILE_TEMPORARY:
+ if (fdec->DeclarationRange.Last > high_temp) {
+ high_temp =
+ fdec->DeclarationRange.Last;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+ break;
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ {
+ struct tgsi_full_immediate *imm;
+ float vals[4];
+
+ imm = &p.FullToken.FullImmediate;
+ assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32);
+ assert(fpc->nr_imm < MAX_IMM);
+
+ vals[0] = imm->u.ImmediateFloat32[0].Float;
+ vals[1] = imm->u.ImmediateFloat32[1].Float;
+ vals[2] = imm->u.ImmediateFloat32[2].Float;
+ vals[3] = imm->u.ImmediateFloat32[3].Float;
+ fpc->imm[fpc->nr_imm++] = constant(fpc, -1, vals);
+ }
+ break;
+ default:
+ break;
+ }
+ }
+ tgsi_parse_free(&p);
+
+ if (++high_temp) {
+ fpc->r_temp = CALLOC(high_temp, sizeof(struct nv40_sreg));
+ for (i = 0; i < high_temp; i++)
+ fpc->r_temp[i] = temp(fpc);
+ fpc->r_temps_discard = 0;
+ }
+
+ return TRUE;
+
+out_err:
+ if (fpc->r_temp)
+ FREE(fpc->r_temp);
+ tgsi_parse_free(&p);
+ return FALSE;
+}
+
+static void
+nv40_fragprog_translate(struct nv40_context *nv40,
+ struct nv40_fragment_program *fp)
+{
+ struct tgsi_parse_context parse;
+ struct nv40_fpc *fpc = NULL;
+
+ fpc = CALLOC(1, sizeof(struct nv40_fpc));
+ if (!fpc)
+ return;
+ fpc->fp = fp;
+ fpc->num_regs = 2;
+
+ if (!nv40_fragprog_prepare(fpc)) {
+ FREE(fpc);
+ return;
+ }
+
+ tgsi_parse_init(&parse, fp->pipe.tokens);
+
+ while (!tgsi_parse_end_of_tokens(&parse)) {
+ tgsi_parse_token(&parse);
+
+ switch (parse.FullToken.Token.Type) {
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ {
+ const struct tgsi_full_instruction *finst;
+
+ finst = &parse.FullToken.FullInstruction;
+ if (!nv40_fragprog_parse_instruction(fpc, finst))
+ goto out_err;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+
+ fp->fp_control |= fpc->num_regs << NV40TCL_FP_CONTROL_TEMP_COUNT_SHIFT;
+
+ /* Terminate final instruction */
+ fp->insn[fpc->inst_offset] |= 0x00000001;
+
+ /* Append NOP + END instruction, may or may not be necessary. */
+ fpc->inst_offset = fp->insn_len;
+ grow_insns(fpc, 4);
+ fp->insn[fpc->inst_offset + 0] = 0x00000001;
+ fp->insn[fpc->inst_offset + 1] = 0x00000000;
+ fp->insn[fpc->inst_offset + 2] = 0x00000000;
+ fp->insn[fpc->inst_offset + 3] = 0x00000000;
+
+ fp->translated = TRUE;
+out_err:
+ tgsi_parse_free(&parse);
+ if (fpc->r_temp)
+ FREE(fpc->r_temp);
+ FREE(fpc);
+}
+
+static void
+nv40_fragprog_upload(struct nv40_context *nv40,
+ struct nv40_fragment_program *fp)
+{
+ struct pipe_winsys *ws = nv40->pipe.winsys;
+ const uint32_t le = 1;
+ uint32_t *map;
+ int i;
+
+ map = ws->buffer_map(ws, fp->buffer, PIPE_BUFFER_USAGE_CPU_WRITE);
+
+#if 0
+ for (i = 0; i < fp->insn_len; i++) {
+ fflush(stdout); fflush(stderr);
+ NOUVEAU_ERR("%d 0x%08x\n", i, fp->insn[i]);
+ fflush(stdout); fflush(stderr);
+ }
+#endif
+
+ if ((*(const uint8_t *)&le)) {
+ for (i = 0; i < fp->insn_len; i++) {
+ map[i] = fp->insn[i];
+ }
+ } else {
+ /* Weird swapping for big-endian chips */
+ for (i = 0; i < fp->insn_len; i++) {
+ map[i] = ((fp->insn[i] & 0xffff) << 16) |
+ ((fp->insn[i] >> 16) & 0xffff);
+ }
+ }
+
+ ws->buffer_unmap(ws, fp->buffer);
+}
+
+static boolean
+nv40_fragprog_validate(struct nv40_context *nv40)
+{
+ struct nv40_fragment_program *fp = nv40->fragprog;
+ struct pipe_buffer *constbuf =
+ nv40->constbuf[PIPE_SHADER_FRAGMENT];
+ struct pipe_winsys *ws = nv40->pipe.winsys;
+ struct nouveau_stateobj *so;
+ boolean new_consts = FALSE;
+ int i;
+
+ if (fp->translated)
+ goto update_constants;
+
+ nv40->fallback_swrast &= ~NV40_NEW_FRAGPROG;
+ nv40_fragprog_translate(nv40, fp);
+ if (!fp->translated) {
+ nv40->fallback_swrast |= NV40_NEW_FRAGPROG;
+ return FALSE;
+ }
+
+ fp->buffer = ws->buffer_create(ws, 0x100, 0, fp->insn_len * 4);
+ nv40_fragprog_upload(nv40, fp);
+
+ so = so_new(4, 1);
+ so_method(so, nv40->screen->curie, NV40TCL_FP_ADDRESS, 1);
+ so_reloc (so, fp->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART |
+ NOUVEAU_BO_RD | NOUVEAU_BO_LOW | NOUVEAU_BO_OR,
+ NV40TCL_FP_ADDRESS_DMA0, NV40TCL_FP_ADDRESS_DMA1);
+ so_method(so, nv40->screen->curie, NV40TCL_FP_CONTROL, 1);
+ so_data (so, fp->fp_control);
+ so_ref(so, &fp->so);
+
+update_constants:
+ if (fp->nr_consts) {
+ float *map;
+
+ map = ws->buffer_map(ws, constbuf, PIPE_BUFFER_USAGE_CPU_READ);
+ for (i = 0; i < fp->nr_consts; i++) {
+ struct nv40_fragment_program_data *fpd = &fp->consts[i];
+ uint32_t *p = &fp->insn[fpd->offset];
+ uint32_t *cb = (uint32_t *)&map[fpd->index * 4];
+
+ if (!memcmp(p, cb, 4 * sizeof(float)))
+ continue;
+ memcpy(p, cb, 4 * sizeof(float));
+ new_consts = TRUE;
+ }
+ ws->buffer_unmap(ws, constbuf);
+
+ if (new_consts)
+ nv40_fragprog_upload(nv40, fp);
+ }
+
+ if (new_consts || fp->so != nv40->state.hw[NV40_STATE_FRAGPROG]) {
+ so_ref(fp->so, &nv40->state.hw[NV40_STATE_FRAGPROG]);
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+void
+nv40_fragprog_destroy(struct nv40_context *nv40,
+ struct nv40_fragment_program *fp)
+{
+ if (fp->insn_len)
+ FREE(fp->insn);
+}
+
+struct nv40_state_entry nv40_state_fragprog = {
+ .validate = nv40_fragprog_validate,
+ .dirty = {
+ .pipe = NV40_NEW_FRAGPROG,
+ .hw = NV40_STATE_FRAGPROG
+ }
+};
+
diff --git a/src/gallium/drivers/nv40/nv40_fragtex.c b/src/gallium/drivers/nv40/nv40_fragtex.c
new file mode 100644
index 00000000000..0227d22620d
--- /dev/null
+++ b/src/gallium/drivers/nv40/nv40_fragtex.c
@@ -0,0 +1,168 @@
+#include "nv40_context.h"
+
+#define _(m,tf,ts0x,ts0y,ts0z,ts0w,ts1x,ts1y,ts1z,ts1w,sx,sy,sz,sw) \
+{ \
+ TRUE, \
+ PIPE_FORMAT_##m, \
+ NV40TCL_TEX_FORMAT_FORMAT_##tf, \
+ (NV40TCL_TEX_SWIZZLE_S0_X_##ts0x | NV40TCL_TEX_SWIZZLE_S0_Y_##ts0y | \
+ NV40TCL_TEX_SWIZZLE_S0_Z_##ts0z | NV40TCL_TEX_SWIZZLE_S0_W_##ts0w | \
+ NV40TCL_TEX_SWIZZLE_S1_X_##ts1x | NV40TCL_TEX_SWIZZLE_S1_Y_##ts1y | \
+ NV40TCL_TEX_SWIZZLE_S1_Z_##ts1z | NV40TCL_TEX_SWIZZLE_S1_W_##ts1w), \
+ ((NV40TCL_TEX_FILTER_SIGNED_RED*sx) | (NV40TCL_TEX_FILTER_SIGNED_GREEN*sy) | \
+ (NV40TCL_TEX_FILTER_SIGNED_BLUE*sz) | (NV40TCL_TEX_FILTER_SIGNED_ALPHA*sw)) \
+}
+
+struct nv40_texture_format {
+ boolean defined;
+ uint pipe;
+ int format;
+ int swizzle;
+ int sign;
+};
+
+static struct nv40_texture_format
+nv40_texture_formats[] = {
+ _(A8R8G8B8_UNORM, A8R8G8B8, S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0),
+ _(A1R5G5B5_UNORM, A1R5G5B5, S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0),
+ _(A4R4G4B4_UNORM, A4R4G4B4, S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0),
+ _(R5G6B5_UNORM , R5G6B5 , S1, S1, S1, ONE, X, Y, Z, W, 0, 0, 0, 0),
+ _(L8_UNORM , L8 , S1, S1, S1, ONE, X, X, X, X, 0, 0, 0, 0),
+ _(A8_UNORM , L8 , ZERO, ZERO, ZERO, S1, X, X, X, X, 0, 0, 0, 0),
+ _(R16_SNORM , A16 , ZERO, ZERO, S1, ONE, X, X, X, Y, 1, 1, 1, 1),
+ _(I8_UNORM , L8 , S1, S1, S1, S1, X, X, X, X, 0, 0, 0, 0),
+ _(A8L8_UNORM , A8L8 , S1, S1, S1, S1, X, X, X, Y, 0, 0, 0, 0),
+ _(Z16_UNORM , Z16 , S1, S1, S1, ONE, X, X, X, X, 0, 0, 0, 0),
+ _(Z24S8_UNORM , Z24 , S1, S1, S1, ONE, X, X, X, X, 0, 0, 0, 0),
+ _(DXT1_RGB , DXT1 , S1, S1, S1, ONE, X, Y, Z, W, 0, 0, 0, 0),
+ _(DXT1_RGBA , DXT1 , S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0),
+ _(DXT3_RGBA , DXT3 , S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0),
+ _(DXT5_RGBA , DXT5 , S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0),
+ {},
+};
+
+static struct nv40_texture_format *
+nv40_fragtex_format(uint pipe_format)
+{
+ struct nv40_texture_format *tf = nv40_texture_formats;
+
+ while (tf->defined) {
+ if (tf->pipe == pipe_format)
+ return tf;
+ tf++;
+ }
+
+ NOUVEAU_ERR("unknown texture format %s\n", pf_name(pipe_format));
+ return NULL;
+}
+
+
+static struct nouveau_stateobj *
+nv40_fragtex_build(struct nv40_context *nv40, int unit)
+{
+ struct nv40_sampler_state *ps = nv40->tex_sampler[unit];
+ struct nv40_miptree *nv40mt = nv40->tex_miptree[unit];
+ struct pipe_texture *pt = &nv40mt->base;
+ struct nv40_texture_format *tf;
+ struct nouveau_stateobj *so;
+ uint32_t txf, txs, txp;
+ unsigned tex_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD;
+
+ tf = nv40_fragtex_format(pt->format);
+ if (!tf)
+ assert(0);
+
+ txf = ps->fmt;
+ txf |= tf->format | 0x8000;
+ txf |= ((pt->last_level + 1) << NV40TCL_TEX_FORMAT_MIPMAP_COUNT_SHIFT);
+
+ if (1) /* XXX */
+ txf |= NV40TCL_TEX_FORMAT_NO_BORDER;
+
+ switch (pt->target) {
+ case PIPE_TEXTURE_CUBE:
+ txf |= NV40TCL_TEX_FORMAT_CUBIC;
+ /* fall-through */
+ case PIPE_TEXTURE_2D:
+ txf |= NV40TCL_TEX_FORMAT_DIMS_2D;
+ break;
+ case PIPE_TEXTURE_3D:
+ txf |= NV40TCL_TEX_FORMAT_DIMS_3D;
+ break;
+ case PIPE_TEXTURE_1D:
+ txf |= NV40TCL_TEX_FORMAT_DIMS_1D;
+ break;
+ default:
+ NOUVEAU_ERR("Unknown target %d\n", pt->target);
+ return NULL;
+ }
+
+ if (!(pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) {
+ txp = 0;
+ } else {
+ txp = nv40mt->level[0].pitch;
+ txf |= NV40TCL_TEX_FORMAT_LINEAR;
+ }
+
+ txs = tf->swizzle;
+
+ so = so_new(16, 2);
+ so_method(so, nv40->screen->curie, NV40TCL_TEX_OFFSET(unit), 8);
+ so_reloc (so, nv40mt->buffer, 0, tex_flags | NOUVEAU_BO_LOW, 0, 0);
+ so_reloc (so, nv40mt->buffer, txf, tex_flags | NOUVEAU_BO_OR,
+ NV40TCL_TEX_FORMAT_DMA0, NV40TCL_TEX_FORMAT_DMA1);
+ so_data (so, ps->wrap);
+ so_data (so, NV40TCL_TEX_ENABLE_ENABLE | ps->en);
+ so_data (so, txs);
+ so_data (so, ps->filt | tf->sign | 0x2000 /*voodoo*/);
+ so_data (so, (pt->width[0] << NV40TCL_TEX_SIZE0_W_SHIFT) |
+ pt->height[0]);
+ so_data (so, ps->bcol);
+ so_method(so, nv40->screen->curie, NV40TCL_TEX_SIZE1(unit), 1);
+ so_data (so, (pt->depth[0] << NV40TCL_TEX_SIZE1_DEPTH_SHIFT) | txp);
+
+ return so;
+}
+
+static boolean
+nv40_fragtex_validate(struct nv40_context *nv40)
+{
+ struct nv40_fragment_program *fp = nv40->fragprog;
+ struct nv40_state *state = &nv40->state;
+ struct nouveau_stateobj *so;
+ unsigned samplers, unit;
+
+ samplers = state->fp_samplers & ~fp->samplers;
+ while (samplers) {
+ unit = ffs(samplers) - 1;
+ samplers &= ~(1 << unit);
+
+ so = so_new(2, 0);
+ so_method(so, nv40->screen->curie, NV40TCL_TEX_ENABLE(unit), 1);
+ so_data (so, 0);
+ so_ref(so, &nv40->state.hw[NV40_STATE_FRAGTEX0 + unit]);
+ state->dirty |= (1ULL << (NV40_STATE_FRAGTEX0 + unit));
+ }
+
+ samplers = nv40->dirty_samplers & fp->samplers;
+ while (samplers) {
+ unit = ffs(samplers) - 1;
+ samplers &= ~(1 << unit);
+
+ so = nv40_fragtex_build(nv40, unit);
+ so_ref(so, &nv40->state.hw[NV40_STATE_FRAGTEX0 + unit]);
+ state->dirty |= (1ULL << (NV40_STATE_FRAGTEX0 + unit));
+ }
+
+ nv40->state.fp_samplers = fp->samplers;
+ return FALSE;
+}
+
+struct nv40_state_entry nv40_state_fragtex = {
+ .validate = nv40_fragtex_validate,
+ .dirty = {
+ .pipe = NV40_NEW_SAMPLER | NV40_NEW_FRAGPROG,
+ .hw = 0
+ }
+};
+
diff --git a/src/gallium/drivers/nv40/nv40_miptree.c b/src/gallium/drivers/nv40/nv40_miptree.c
new file mode 100644
index 00000000000..b68967c07fd
--- /dev/null
+++ b/src/gallium/drivers/nv40/nv40_miptree.c
@@ -0,0 +1,194 @@
+#include "pipe/p_state.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_inlines.h"
+
+#include "nv40_context.h"
+
+static void
+nv40_miptree_layout(struct nv40_miptree *mt)
+{
+ struct pipe_texture *pt = &mt->base;
+ uint width = pt->width[0], height = pt->height[0], depth = pt->depth[0];
+ uint offset = 0;
+ int nr_faces, l, f, pitch;
+
+ if (pt->target == PIPE_TEXTURE_CUBE) {
+ nr_faces = 6;
+ } else
+ if (pt->target == PIPE_TEXTURE_3D) {
+ nr_faces = pt->depth[0];
+ } else {
+ nr_faces = 1;
+ }
+
+ pitch = pt->width[0];
+ for (l = 0; l <= pt->last_level; l++) {
+ pt->width[l] = width;
+ pt->height[l] = height;
+ pt->depth[l] = depth;
+ pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width);
+ pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height);
+
+ if (!(pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR))
+ pitch = pt->nblocksx[l];
+ pitch = align(pitch, 64);
+
+ mt->level[l].pitch = pitch * pt->block.size;
+ mt->level[l].image_offset =
+ CALLOC(nr_faces, sizeof(unsigned));
+
+ width = MAX2(1, width >> 1);
+ height = MAX2(1, height >> 1);
+ depth = MAX2(1, depth >> 1);
+ }
+
+ for (f = 0; f < nr_faces; f++) {
+ for (l = 0; l <= pt->last_level; l++) {
+ mt->level[l].image_offset[f] = offset;
+ offset += mt->level[l].pitch * pt->height[l];
+ }
+ }
+
+ mt->total_size = offset;
+}
+
+static struct pipe_texture *
+nv40_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt)
+{
+ struct pipe_winsys *ws = pscreen->winsys;
+ struct nv40_miptree *mt;
+
+ mt = MALLOC(sizeof(struct nv40_miptree));
+ if (!mt)
+ return NULL;
+ mt->base = *pt;
+ mt->base.refcount = 1;
+ mt->base.screen = pscreen;
+ mt->shadow_tex = NULL;
+ mt->shadow_surface = NULL;
+
+ /* Swizzled textures must be POT */
+ if (pt->width[0] & (pt->width[0] - 1) ||
+ pt->height[0] & (pt->height[0] - 1))
+ mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR;
+ else
+ if (pt->tex_usage & (PIPE_TEXTURE_USAGE_PRIMARY |
+ PIPE_TEXTURE_USAGE_DISPLAY_TARGET))
+ mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR;
+ else {
+ switch (pt->format) {
+ /* TODO: Figure out which formats can be swizzled */
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case PIPE_FORMAT_X8R8G8B8_UNORM:
+ /* XXX: Re-enable when SIFM size limits are fixed */
+ /*case PIPE_FORMAT_R16_SNORM:*/
+ break;
+ default:
+ mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR;
+ }
+ }
+
+ nv40_miptree_layout(mt);
+
+ mt->buffer = ws->buffer_create(ws, 256,
+ PIPE_BUFFER_USAGE_PIXEL |
+ NOUVEAU_BUFFER_USAGE_TEXTURE,
+ mt->total_size);
+ if (!mt->buffer) {
+ FREE(mt);
+ return NULL;
+ }
+
+ return &mt->base;
+}
+
+static void
+nv40_miptree_release(struct pipe_screen *pscreen, struct pipe_texture **ppt)
+{
+ struct pipe_texture *pt = *ppt;
+ struct nv40_miptree *mt = (struct nv40_miptree *)pt;
+ int l;
+
+ *ppt = NULL;
+ if (--pt->refcount)
+ return;
+
+ pipe_buffer_reference(pscreen, &mt->buffer, NULL);
+ for (l = 0; l <= pt->last_level; l++) {
+ if (mt->level[l].image_offset)
+ FREE(mt->level[l].image_offset);
+ }
+
+ if (mt->shadow_tex) {
+ assert(mt->shadow_surface);
+ pscreen->tex_surface_release(pscreen, &mt->shadow_surface);
+ nv40_miptree_release(pscreen, &mt->shadow_tex);
+ }
+
+ FREE(mt);
+}
+
+static struct pipe_surface *
+nv40_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
+ unsigned face, unsigned level, unsigned zslice,
+ unsigned flags)
+{
+ struct nv40_miptree *mt = (struct nv40_miptree *)pt;
+ struct pipe_surface *ps;
+
+ ps = CALLOC_STRUCT(pipe_surface);
+ if (!ps)
+ return NULL;
+ pipe_texture_reference(&ps->texture, pt);
+ pipe_buffer_reference(pscreen, &ps->buffer, mt->buffer);
+ ps->format = pt->format;
+ ps->width = pt->width[level];
+ ps->height = pt->height[level];
+ ps->block = pt->block;
+ ps->nblocksx = pt->nblocksx[level];
+ ps->nblocksy = pt->nblocksy[level];
+ ps->stride = mt->level[level].pitch;
+ ps->usage = flags;
+ ps->status = PIPE_SURFACE_STATUS_DEFINED;
+ ps->refcount = 1;
+ ps->winsys = pscreen->winsys;
+ ps->face = face;
+ ps->level = level;
+ ps->zslice = zslice;
+
+ if (pt->target == PIPE_TEXTURE_CUBE) {
+ ps->offset = mt->level[level].image_offset[face];
+ } else
+ if (pt->target == PIPE_TEXTURE_3D) {
+ ps->offset = mt->level[level].image_offset[zslice];
+ } else {
+ ps->offset = mt->level[level].image_offset[0];
+ }
+
+ return ps;
+}
+
+static void
+nv40_miptree_surface_del(struct pipe_screen *pscreen,
+ struct pipe_surface **psurface)
+{
+ struct pipe_surface *ps = *psurface;
+
+ *psurface = NULL;
+ if (--ps->refcount > 0)
+ return;
+
+ pipe_texture_reference(&ps->texture, NULL);
+ pipe_buffer_reference(pscreen, &ps->buffer, NULL);
+ FREE(ps);
+}
+
+void
+nv40_screen_init_miptree_functions(struct pipe_screen *pscreen)
+{
+ pscreen->texture_create = nv40_miptree_create;
+ pscreen->texture_release = nv40_miptree_release;
+ pscreen->get_tex_surface = nv40_miptree_surface_new;
+ pscreen->tex_surface_release = nv40_miptree_surface_del;
+}
+
diff --git a/src/gallium/drivers/nv40/nv40_query.c b/src/gallium/drivers/nv40/nv40_query.c
new file mode 100644
index 00000000000..57f39cfab0c
--- /dev/null
+++ b/src/gallium/drivers/nv40/nv40_query.c
@@ -0,0 +1,122 @@
+#include "pipe/p_context.h"
+
+#include "nv40_context.h"
+
+struct nv40_query {
+ struct nouveau_resource *object;
+ unsigned type;
+ boolean ready;
+ uint64_t result;
+};
+
+static INLINE struct nv40_query *
+nv40_query(struct pipe_query *pipe)
+{
+ return (struct nv40_query *)pipe;
+}
+
+static struct pipe_query *
+nv40_query_create(struct pipe_context *pipe, unsigned query_type)
+{
+ struct nv40_query *q;
+
+ q = CALLOC(1, sizeof(struct nv40_query));
+ q->type = query_type;
+
+ return (struct pipe_query *)q;
+}
+
+static void
+nv40_query_destroy(struct pipe_context *pipe, struct pipe_query *pq)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+ struct nv40_query *q = nv40_query(pq);
+
+ if (q->object)
+ nv40->nvws->res_free(&q->object);
+ FREE(q);
+}
+
+static void
+nv40_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+ struct nv40_query *q = nv40_query(pq);
+
+ assert(q->type == PIPE_QUERY_OCCLUSION_COUNTER);
+
+ /* Happens when end_query() is called, then another begin_query()
+ * without querying the result in-between. For now we'll wait for
+ * the existing query to notify completion, but it could be better.
+ */
+ if (q->object) {
+ uint64 tmp;
+ pipe->get_query_result(pipe, pq, 1, &tmp);
+ }
+
+ if (nv40->nvws->res_alloc(nv40->screen->query_heap, 1, NULL, &q->object))
+ assert(0);
+ nv40->nvws->notifier_reset(nv40->screen->query, q->object->start);
+
+ BEGIN_RING(curie, NV40TCL_QUERY_RESET, 1);
+ OUT_RING (1);
+ BEGIN_RING(curie, NV40TCL_QUERY_UNK17CC, 1);
+ OUT_RING (1);
+
+ q->ready = FALSE;
+}
+
+static void
+nv40_query_end(struct pipe_context *pipe, struct pipe_query *pq)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+ struct nv40_query *q = nv40_query(pq);
+
+ BEGIN_RING(curie, NV40TCL_QUERY_GET, 1);
+ OUT_RING ((0x01 << NV40TCL_QUERY_GET_UNK24_SHIFT) |
+ ((q->object->start * 32) << NV40TCL_QUERY_GET_OFFSET_SHIFT));
+ FIRE_RING(NULL);
+}
+
+static boolean
+nv40_query_result(struct pipe_context *pipe, struct pipe_query *pq,
+ boolean wait, uint64 *result)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+ struct nv40_query *q = nv40_query(pq);
+ struct nouveau_winsys *nvws = nv40->nvws;
+
+ assert(q->object && q->type == PIPE_QUERY_OCCLUSION_COUNTER);
+
+ if (!q->ready) {
+ unsigned status;
+
+ status = nvws->notifier_status(nv40->screen->query,
+ q->object->start);
+ if (status != NV_NOTIFY_STATE_STATUS_COMPLETED) {
+ if (wait == FALSE)
+ return FALSE;
+ nvws->notifier_wait(nv40->screen->query, q->object->start,
+ NV_NOTIFY_STATE_STATUS_COMPLETED,
+ 0);
+ }
+
+ q->result = nvws->notifier_retval(nv40->screen->query,
+ q->object->start);
+ q->ready = TRUE;
+ nvws->res_free(&q->object);
+ }
+
+ *result = q->result;
+ return TRUE;
+}
+
+void
+nv40_init_query_functions(struct nv40_context *nv40)
+{
+ nv40->pipe.create_query = nv40_query_create;
+ nv40->pipe.destroy_query = nv40_query_destroy;
+ nv40->pipe.begin_query = nv40_query_begin;
+ nv40->pipe.end_query = nv40_query_end;
+ nv40->pipe.get_query_result = nv40_query_result;
+}
diff --git a/src/gallium/drivers/nv40/nv40_screen.c b/src/gallium/drivers/nv40/nv40_screen.c
new file mode 100644
index 00000000000..25c78682961
--- /dev/null
+++ b/src/gallium/drivers/nv40/nv40_screen.c
@@ -0,0 +1,365 @@
+#include "pipe/p_screen.h"
+
+#include "nv40_context.h"
+#include "nv40_screen.h"
+
+#define NV4X_GRCLASS4097_CHIPSETS 0x00000baf
+#define NV4X_GRCLASS4497_CHIPSETS 0x00005450
+#define NV6X_GRCLASS4497_CHIPSETS 0x00000088
+
+static const char *
+nv40_screen_get_name(struct pipe_screen *pscreen)
+{
+ struct nv40_screen *screen = nv40_screen(pscreen);
+ struct nouveau_device *dev = screen->nvws->channel->device;
+ static char buffer[128];
+
+ snprintf(buffer, sizeof(buffer), "NV%02X", dev->chipset);
+ return buffer;
+}
+
+static const char *
+nv40_screen_get_vendor(struct pipe_screen *pscreen)
+{
+ return "nouveau";
+}
+
+static int
+nv40_screen_get_param(struct pipe_screen *pscreen, int param)
+{
+ struct nv40_screen *screen = nv40_screen(pscreen);
+
+ switch (param) {
+ case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
+ return 16;
+ case PIPE_CAP_NPOT_TEXTURES:
+ return 1;
+ case PIPE_CAP_TWO_SIDED_STENCIL:
+ return 1;
+ case PIPE_CAP_GLSL:
+ return 0;
+ case PIPE_CAP_S3TC:
+ return 1;
+ case PIPE_CAP_ANISOTROPIC_FILTER:
+ return 1;
+ case PIPE_CAP_POINT_SPRITE:
+ return 1;
+ case PIPE_CAP_MAX_RENDER_TARGETS:
+ return 4;
+ case PIPE_CAP_OCCLUSION_QUERY:
+ return 1;
+ case PIPE_CAP_TEXTURE_SHADOW_MAP:
+ return 1;
+ case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
+ return 13;
+ case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
+ return 10;
+ case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
+ return 13;
+ case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
+ case PIPE_CAP_TEXTURE_MIRROR_REPEAT:
+ return 1;
+ case NOUVEAU_CAP_HW_VTXBUF:
+ return 1;
+ case NOUVEAU_CAP_HW_IDXBUF:
+ if (screen->curie->grclass == NV40TCL)
+ return 1;
+ return 0;
+ default:
+ NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param);
+ return 0;
+ }
+}
+
+static float
+nv40_screen_get_paramf(struct pipe_screen *pscreen, int param)
+{
+ switch (param) {
+ case PIPE_CAP_MAX_LINE_WIDTH:
+ case PIPE_CAP_MAX_LINE_WIDTH_AA:
+ return 10.0;
+ case PIPE_CAP_MAX_POINT_WIDTH:
+ case PIPE_CAP_MAX_POINT_WIDTH_AA:
+ return 64.0;
+ case PIPE_CAP_MAX_TEXTURE_ANISOTROPY:
+ return 16.0;
+ case PIPE_CAP_MAX_TEXTURE_LOD_BIAS:
+ return 16.0;
+ default:
+ NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param);
+ return 0.0;
+ }
+}
+
+static boolean
+nv40_screen_surface_format_supported(struct pipe_screen *pscreen,
+ enum pipe_format format,
+ enum pipe_texture_target target,
+ unsigned tex_usage, unsigned geom_flags)
+{
+ if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) {
+ switch (format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ case PIPE_FORMAT_Z24S8_UNORM:
+ case PIPE_FORMAT_Z16_UNORM:
+ return TRUE;
+ default:
+ break;
+ }
+ } else {
+ switch (format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case PIPE_FORMAT_A1R5G5B5_UNORM:
+ case PIPE_FORMAT_A4R4G4B4_UNORM:
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ case PIPE_FORMAT_R16_SNORM:
+ case PIPE_FORMAT_L8_UNORM:
+ case PIPE_FORMAT_A8_UNORM:
+ case PIPE_FORMAT_I8_UNORM:
+ case PIPE_FORMAT_A8L8_UNORM:
+ case PIPE_FORMAT_Z16_UNORM:
+ case PIPE_FORMAT_Z24S8_UNORM:
+ case PIPE_FORMAT_DXT1_RGB:
+ case PIPE_FORMAT_DXT1_RGBA:
+ case PIPE_FORMAT_DXT3_RGBA:
+ case PIPE_FORMAT_DXT5_RGBA:
+ return TRUE;
+ default:
+ break;
+ }
+ }
+
+ return FALSE;
+}
+
+static void *
+nv40_surface_map(struct pipe_screen *screen, struct pipe_surface *surface,
+ unsigned flags )
+{
+ struct pipe_winsys *ws = screen->winsys;
+ struct pipe_surface *surface_to_map;
+ void *map;
+
+ if (!(surface->texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) {
+ struct nv40_miptree *mt = (struct nv40_miptree *)surface->texture;
+
+ if (!mt->shadow_tex) {
+ unsigned old_tex_usage = surface->texture->tex_usage;
+ surface->texture->tex_usage = NOUVEAU_TEXTURE_USAGE_LINEAR;
+ mt->shadow_tex = screen->texture_create(screen, surface->texture);
+ surface->texture->tex_usage = old_tex_usage;
+
+ assert(mt->shadow_tex->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR);
+ mt->shadow_surface = screen->get_tex_surface
+ (
+ screen, mt->shadow_tex,
+ surface->face, surface->level, surface->zslice,
+ surface->usage
+ );
+ }
+
+ surface_to_map = mt->shadow_surface;
+ }
+ else
+ surface_to_map = surface;
+
+ assert(surface_to_map);
+
+ map = ws->buffer_map(ws, surface_to_map->buffer, flags);
+ if (!map)
+ return NULL;
+
+ return map + surface_to_map->offset;
+}
+
+static void
+nv40_surface_unmap(struct pipe_screen *screen, struct pipe_surface *surface)
+{
+ struct pipe_winsys *ws = screen->winsys;
+ struct pipe_surface *surface_to_unmap;
+
+ /* TODO: Copy from shadow just before push buffer is flushed instead.
+ There are probably some programs that map/unmap excessively
+ before rendering. */
+ if (!(surface->texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) {
+ struct nv40_miptree *mt = (struct nv40_miptree *)surface->texture;
+
+ assert(mt->shadow_tex);
+
+ surface_to_unmap = mt->shadow_surface;
+ }
+ else
+ surface_to_unmap = surface;
+
+ assert(surface_to_unmap);
+
+ ws->buffer_unmap(ws, surface_to_unmap->buffer);
+
+ if (surface_to_unmap != surface) {
+ struct nv40_screen *nvscreen = nv40_screen(screen);
+
+ nvscreen->nvws->surface_copy(nvscreen->nvws,
+ surface, 0, 0,
+ surface_to_unmap, 0, 0,
+ surface->width, surface->height);
+ }
+}
+
+static void
+nv40_screen_destroy(struct pipe_screen *pscreen)
+{
+ struct nv40_screen *screen = nv40_screen(pscreen);
+ struct nouveau_winsys *nvws = screen->nvws;
+
+ nvws->res_free(&screen->vp_exec_heap);
+ nvws->res_free(&screen->vp_data_heap);
+ nvws->res_free(&screen->query_heap);
+ nvws->notifier_free(&screen->query);
+ nvws->notifier_free(&screen->sync);
+ nvws->grobj_free(&screen->curie);
+
+ FREE(pscreen);
+}
+
+struct pipe_screen *
+nv40_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws)
+{
+ struct nv40_screen *screen = CALLOC_STRUCT(nv40_screen);
+ struct nouveau_stateobj *so;
+ unsigned curie_class;
+ unsigned chipset = nvws->channel->device->chipset;
+ int ret;
+
+ if (!screen)
+ return NULL;
+ screen->nvws = nvws;
+
+ /* 3D object */
+ switch (chipset & 0xf0) {
+ case 0x40:
+ if (NV4X_GRCLASS4097_CHIPSETS & (1 << (chipset & 0x0f)))
+ curie_class = NV40TCL;
+ else
+ if (NV4X_GRCLASS4497_CHIPSETS & (1 << (chipset & 0x0f)))
+ curie_class = NV44TCL;
+ break;
+ case 0x60:
+ if (NV6X_GRCLASS4497_CHIPSETS & (1 << (chipset & 0x0f)))
+ curie_class = NV44TCL;
+ break;
+ default:
+ break;
+ }
+
+ if (!curie_class) {
+ NOUVEAU_ERR("Unknown nv4x chipset: nv%02x\n", chipset);
+ return NULL;
+ }
+
+ ret = nvws->grobj_alloc(nvws, curie_class, &screen->curie);
+ if (ret) {
+ NOUVEAU_ERR("Error creating 3D object: %d\n", ret);
+ return FALSE;
+ }
+
+ /* Notifier for sync purposes */
+ ret = nvws->notifier_alloc(nvws, 1, &screen->sync);
+ if (ret) {
+ NOUVEAU_ERR("Error creating notifier object: %d\n", ret);
+ nv40_screen_destroy(&screen->pipe);
+ return NULL;
+ }
+
+ /* Query objects */
+ ret = nvws->notifier_alloc(nvws, 32, &screen->query);
+ if (ret) {
+ NOUVEAU_ERR("Error initialising query objects: %d\n", ret);
+ nv40_screen_destroy(&screen->pipe);
+ return NULL;
+ }
+
+ ret = nvws->res_init(&screen->query_heap, 0, 32);
+ if (ret) {
+ NOUVEAU_ERR("Error initialising query object heap: %d\n", ret);
+ nv40_screen_destroy(&screen->pipe);
+ return NULL;
+ }
+
+ /* Vtxprog resources */
+ if (nvws->res_init(&screen->vp_exec_heap, 0, 512) ||
+ nvws->res_init(&screen->vp_data_heap, 0, 256)) {
+ nv40_screen_destroy(&screen->pipe);
+ return NULL;
+ }
+
+ /* Static curie initialisation */
+ so = so_new(128, 0);
+ so_method(so, screen->curie, NV40TCL_DMA_NOTIFY, 1);
+ so_data (so, screen->sync->handle);
+ so_method(so, screen->curie, NV40TCL_DMA_TEXTURE0, 2);
+ so_data (so, nvws->channel->vram->handle);
+ so_data (so, nvws->channel->gart->handle);
+ so_method(so, screen->curie, NV40TCL_DMA_COLOR1, 1);
+ so_data (so, nvws->channel->vram->handle);
+ so_method(so, screen->curie, NV40TCL_DMA_COLOR0, 2);
+ so_data (so, nvws->channel->vram->handle);
+ so_data (so, nvws->channel->vram->handle);
+ so_method(so, screen->curie, NV40TCL_DMA_VTXBUF0, 2);
+ so_data (so, nvws->channel->vram->handle);
+ so_data (so, nvws->channel->gart->handle);
+ so_method(so, screen->curie, NV40TCL_DMA_FENCE, 2);
+ so_data (so, 0);
+ so_data (so, screen->query->handle);
+ so_method(so, screen->curie, NV40TCL_DMA_UNK01AC, 2);
+ so_data (so, nvws->channel->vram->handle);
+ so_data (so, nvws->channel->vram->handle);
+ so_method(so, screen->curie, NV40TCL_DMA_COLOR2, 2);
+ so_data (so, nvws->channel->vram->handle);
+ so_data (so, nvws->channel->vram->handle);
+
+ so_method(so, screen->curie, 0x1ea4, 3);
+ so_data (so, 0x00000010);
+ so_data (so, 0x01000100);
+ so_data (so, 0xff800006);
+
+ /* vtxprog output routing */
+ so_method(so, screen->curie, 0x1fc4, 1);
+ so_data (so, 0x06144321);
+ so_method(so, screen->curie, 0x1fc8, 2);
+ so_data (so, 0xedcba987);
+ so_data (so, 0x00000021);
+ so_method(so, screen->curie, 0x1fd0, 1);
+ so_data (so, 0x00171615);
+ so_method(so, screen->curie, 0x1fd4, 1);
+ so_data (so, 0x001b1a19);
+
+ so_method(so, screen->curie, 0x1ef8, 1);
+ so_data (so, 0x0020ffff);
+ so_method(so, screen->curie, 0x1d64, 1);
+ so_data (so, 0x00d30000);
+ so_method(so, screen->curie, 0x1e94, 1);
+ so_data (so, 0x00000001);
+
+ so_emit(nvws, so);
+ so_ref(NULL, &so);
+ nvws->push_flush(nvws, 0, NULL);
+
+ screen->pipe.winsys = ws;
+ screen->pipe.destroy = nv40_screen_destroy;
+
+ screen->pipe.get_name = nv40_screen_get_name;
+ screen->pipe.get_vendor = nv40_screen_get_vendor;
+ screen->pipe.get_param = nv40_screen_get_param;
+ screen->pipe.get_paramf = nv40_screen_get_paramf;
+
+ screen->pipe.is_format_supported = nv40_screen_surface_format_supported;
+
+ screen->pipe.surface_map = nv40_surface_map;
+ screen->pipe.surface_unmap = nv40_surface_unmap;
+
+ nv40_screen_init_miptree_functions(&screen->pipe);
+
+ return &screen->pipe;
+}
+
diff --git a/src/gallium/drivers/nv40/nv40_screen.h b/src/gallium/drivers/nv40/nv40_screen.h
new file mode 100644
index 00000000000..c04a1275a00
--- /dev/null
+++ b/src/gallium/drivers/nv40/nv40_screen.h
@@ -0,0 +1,35 @@
+#ifndef __NV40_SCREEN_H__
+#define __NV40_SCREEN_H__
+
+#include "pipe/p_screen.h"
+
+struct nv40_screen {
+ struct pipe_screen pipe;
+
+ struct nouveau_winsys *nvws;
+
+ unsigned cur_pctx;
+
+ /* HW graphics objects */
+ struct nouveau_grobj *curie;
+ struct nouveau_notifier *sync;
+
+ /* Query object resources */
+ struct nouveau_notifier *query;
+ struct nouveau_resource *query_heap;
+
+ /* Vtxprog resources */
+ struct nouveau_resource *vp_exec_heap;
+ struct nouveau_resource *vp_data_heap;
+
+ /* Current 3D state of channel */
+ struct nouveau_stateobj *state[NV40_STATE_MAX];
+};
+
+static INLINE struct nv40_screen *
+nv40_screen(struct pipe_screen *screen)
+{
+ return (struct nv40_screen *)screen;
+}
+
+#endif
diff --git a/src/gallium/drivers/nv40/nv40_shader.h b/src/gallium/drivers/nv40/nv40_shader.h
new file mode 100644
index 00000000000..854dccf5486
--- /dev/null
+++ b/src/gallium/drivers/nv40/nv40_shader.h
@@ -0,0 +1,556 @@
+#ifndef __NV40_SHADER_H__
+#define __NV40_SHADER_H__
+
+/* Vertex programs instruction set
+ *
+ * The NV40 instruction set is very similar to NV30. Most fields are in
+ * a slightly different position in the instruction however.
+ *
+ * Merged instructions
+ * In some cases it is possible to put two instructions into one opcode
+ * slot. The rules for when this is OK is not entirely clear to me yet.
+ *
+ * There are separate writemasks and dest temp register fields for each
+ * grouping of instructions. There is however only one field with the
+ * ID of a result register. Writing to temp/result regs is selected by
+ * setting VEC_RESULT/SCA_RESULT.
+ *
+ * Temporary registers
+ * The source/dest temp register fields have been extended by 1 bit, to
+ * give a total of 32 temporary registers.
+ *
+ * Relative Addressing
+ * NV40 can use an address register to index into vertex attribute regs.
+ * This is done by putting the offset value into INPUT_SRC and setting
+ * the INDEX_INPUT flag.
+ *
+ * Conditional execution (see NV_vertex_program{2,3} for details)
+ * There is a second condition code register on NV40, it's use is enabled
+ * by setting the COND_REG_SELECT_1 flag.
+ *
+ * Texture lookup
+ * TODO
+ */
+
+/* ---- OPCODE BITS 127:96 / data DWORD 0 --- */
+#define NV40_VP_INST_VEC_RESULT (1 << 30)
+/* uncertain.. */
+#define NV40_VP_INST_COND_UPDATE_ENABLE ((1 << 14)|1<<29)
+/* use address reg as index into attribs */
+#define NV40_VP_INST_INDEX_INPUT (1 << 27)
+#define NV40_VP_INST_COND_REG_SELECT_1 (1 << 25)
+#define NV40_VP_INST_ADDR_REG_SELECT_1 (1 << 24)
+#define NV40_VP_INST_SRC2_ABS (1 << 23)
+#define NV40_VP_INST_SRC1_ABS (1 << 22)
+#define NV40_VP_INST_SRC0_ABS (1 << 21)
+#define NV40_VP_INST_VEC_DEST_TEMP_SHIFT 15
+#define NV40_VP_INST_VEC_DEST_TEMP_MASK (0x1F << 15)
+#define NV40_VP_INST_COND_TEST_ENABLE (1 << 13)
+#define NV40_VP_INST_COND_SHIFT 10
+#define NV40_VP_INST_COND_MASK (0x7 << 10)
+# define NV40_VP_INST_COND_FL 0
+# define NV40_VP_INST_COND_LT 1
+# define NV40_VP_INST_COND_EQ 2
+# define NV40_VP_INST_COND_LE 3
+# define NV40_VP_INST_COND_GT 4
+# define NV40_VP_INST_COND_NE 5
+# define NV40_VP_INST_COND_GE 6
+# define NV40_VP_INST_COND_TR 7
+#define NV40_VP_INST_COND_SWZ_X_SHIFT 8
+#define NV40_VP_INST_COND_SWZ_X_MASK (3 << 8)
+#define NV40_VP_INST_COND_SWZ_Y_SHIFT 6
+#define NV40_VP_INST_COND_SWZ_Y_MASK (3 << 6)
+#define NV40_VP_INST_COND_SWZ_Z_SHIFT 4
+#define NV40_VP_INST_COND_SWZ_Z_MASK (3 << 4)
+#define NV40_VP_INST_COND_SWZ_W_SHIFT 2
+#define NV40_VP_INST_COND_SWZ_W_MASK (3 << 2)
+#define NV40_VP_INST_COND_SWZ_ALL_SHIFT 2
+#define NV40_VP_INST_COND_SWZ_ALL_MASK (0xFF << 2)
+#define NV40_VP_INST_ADDR_SWZ_SHIFT 0
+#define NV40_VP_INST_ADDR_SWZ_MASK (0x03 << 0)
+#define NV40_VP_INST0_KNOWN ( \
+ NV40_VP_INST_INDEX_INPUT | \
+ NV40_VP_INST_COND_REG_SELECT_1 | \
+ NV40_VP_INST_ADDR_REG_SELECT_1 | \
+ NV40_VP_INST_SRC2_ABS | \
+ NV40_VP_INST_SRC1_ABS | \
+ NV40_VP_INST_SRC0_ABS | \
+ NV40_VP_INST_VEC_DEST_TEMP_MASK | \
+ NV40_VP_INST_COND_TEST_ENABLE | \
+ NV40_VP_INST_COND_MASK | \
+ NV40_VP_INST_COND_SWZ_ALL_MASK | \
+ NV40_VP_INST_ADDR_SWZ_MASK)
+
+/* ---- OPCODE BITS 95:64 / data DWORD 1 --- */
+#define NV40_VP_INST_VEC_OPCODE_SHIFT 22
+#define NV40_VP_INST_VEC_OPCODE_MASK (0x1F << 22)
+# define NV40_VP_INST_OP_NOP 0x00
+# define NV40_VP_INST_OP_MOV 0x01
+# define NV40_VP_INST_OP_MUL 0x02
+# define NV40_VP_INST_OP_ADD 0x03
+# define NV40_VP_INST_OP_MAD 0x04
+# define NV40_VP_INST_OP_DP3 0x05
+# define NV40_VP_INST_OP_DPH 0x06
+# define NV40_VP_INST_OP_DP4 0x07
+# define NV40_VP_INST_OP_DST 0x08
+# define NV40_VP_INST_OP_MIN 0x09
+# define NV40_VP_INST_OP_MAX 0x0A
+# define NV40_VP_INST_OP_SLT 0x0B
+# define NV40_VP_INST_OP_SGE 0x0C
+# define NV40_VP_INST_OP_ARL 0x0D
+# define NV40_VP_INST_OP_FRC 0x0E
+# define NV40_VP_INST_OP_FLR 0x0F
+# define NV40_VP_INST_OP_SEQ 0x10
+# define NV40_VP_INST_OP_SFL 0x11
+# define NV40_VP_INST_OP_SGT 0x12
+# define NV40_VP_INST_OP_SLE 0x13
+# define NV40_VP_INST_OP_SNE 0x14
+# define NV40_VP_INST_OP_STR 0x15
+# define NV40_VP_INST_OP_SSG 0x16
+# define NV40_VP_INST_OP_ARR 0x17
+# define NV40_VP_INST_OP_ARA 0x18
+# define NV40_VP_INST_OP_TXL 0x19
+#define NV40_VP_INST_SCA_OPCODE_SHIFT 27
+#define NV40_VP_INST_SCA_OPCODE_MASK (0x1F << 27)
+# define NV40_VP_INST_OP_NOP 0x00
+# define NV40_VP_INST_OP_MOV 0x01
+# define NV40_VP_INST_OP_RCP 0x02
+# define NV40_VP_INST_OP_RCC 0x03
+# define NV40_VP_INST_OP_RSQ 0x04
+# define NV40_VP_INST_OP_EXP 0x05
+# define NV40_VP_INST_OP_LOG 0x06
+# define NV40_VP_INST_OP_LIT 0x07
+# define NV40_VP_INST_OP_BRA 0x09
+# define NV40_VP_INST_OP_CAL 0x0B
+# define NV40_VP_INST_OP_RET 0x0C
+# define NV40_VP_INST_OP_LG2 0x0D
+# define NV40_VP_INST_OP_EX2 0x0E
+# define NV40_VP_INST_OP_SIN 0x0F
+# define NV40_VP_INST_OP_COS 0x10
+# define NV40_VP_INST_OP_PUSHA 0x13
+# define NV40_VP_INST_OP_POPA 0x14
+#define NV40_VP_INST_CONST_SRC_SHIFT 12
+#define NV40_VP_INST_CONST_SRC_MASK (0xFF << 12)
+#define NV40_VP_INST_INPUT_SRC_SHIFT 8
+#define NV40_VP_INST_INPUT_SRC_MASK (0x0F << 8)
+# define NV40_VP_INST_IN_POS 0
+# define NV40_VP_INST_IN_WEIGHT 1
+# define NV40_VP_INST_IN_NORMAL 2
+# define NV40_VP_INST_IN_COL0 3
+# define NV40_VP_INST_IN_COL1 4
+# define NV40_VP_INST_IN_FOGC 5
+# define NV40_VP_INST_IN_TC0 8
+# define NV40_VP_INST_IN_TC(n) (8+n)
+#define NV40_VP_INST_SRC0H_SHIFT 0
+#define NV40_VP_INST_SRC0H_MASK (0xFF << 0)
+#define NV40_VP_INST1_KNOWN ( \
+ NV40_VP_INST_VEC_OPCODE_MASK | \
+ NV40_VP_INST_SCA_OPCODE_MASK | \
+ NV40_VP_INST_CONST_SRC_MASK | \
+ NV40_VP_INST_INPUT_SRC_MASK | \
+ NV40_VP_INST_SRC0H_MASK \
+ )
+
+/* ---- OPCODE BITS 63:32 / data DWORD 2 --- */
+#define NV40_VP_INST_SRC0L_SHIFT 23
+#define NV40_VP_INST_SRC0L_MASK (0x1FF << 23)
+#define NV40_VP_INST_SRC1_SHIFT 6
+#define NV40_VP_INST_SRC1_MASK (0x1FFFF << 6)
+#define NV40_VP_INST_SRC2H_SHIFT 0
+#define NV40_VP_INST_SRC2H_MASK (0x3F << 0)
+#define NV40_VP_INST_IADDRH_SHIFT 0
+#define NV40_VP_INST_IADDRH_MASK (0x1F << 0)
+
+/* ---- OPCODE BITS 31:0 / data DWORD 3 --- */
+#define NV40_VP_INST_IADDRL_SHIFT 29
+#define NV40_VP_INST_IADDRL_MASK (7 << 29)
+#define NV40_VP_INST_SRC2L_SHIFT 21
+#define NV40_VP_INST_SRC2L_MASK (0x7FF << 21)
+#define NV40_VP_INST_SCA_WRITEMASK_SHIFT 17
+#define NV40_VP_INST_SCA_WRITEMASK_MASK (0xF << 17)
+# define NV40_VP_INST_SCA_WRITEMASK_X (1 << 20)
+# define NV40_VP_INST_SCA_WRITEMASK_Y (1 << 19)
+# define NV40_VP_INST_SCA_WRITEMASK_Z (1 << 18)
+# define NV40_VP_INST_SCA_WRITEMASK_W (1 << 17)
+#define NV40_VP_INST_VEC_WRITEMASK_SHIFT 13
+#define NV40_VP_INST_VEC_WRITEMASK_MASK (0xF << 13)
+# define NV40_VP_INST_VEC_WRITEMASK_X (1 << 16)
+# define NV40_VP_INST_VEC_WRITEMASK_Y (1 << 15)
+# define NV40_VP_INST_VEC_WRITEMASK_Z (1 << 14)
+# define NV40_VP_INST_VEC_WRITEMASK_W (1 << 13)
+#define NV40_VP_INST_SCA_RESULT (1 << 12)
+#define NV40_VP_INST_SCA_DEST_TEMP_SHIFT 7
+#define NV40_VP_INST_SCA_DEST_TEMP_MASK (0x1F << 7)
+#define NV40_VP_INST_DEST_SHIFT 2
+#define NV40_VP_INST_DEST_MASK (31 << 2)
+# define NV40_VP_INST_DEST_POS 0
+# define NV40_VP_INST_DEST_COL0 1
+# define NV40_VP_INST_DEST_COL1 2
+# define NV40_VP_INST_DEST_BFC0 3
+# define NV40_VP_INST_DEST_BFC1 4
+# define NV40_VP_INST_DEST_FOGC 5
+# define NV40_VP_INST_DEST_PSZ 6
+# define NV40_VP_INST_DEST_TC0 7
+# define NV40_VP_INST_DEST_TC(n) (7+n)
+# define NV40_VP_INST_DEST_TEMP 0x1F
+#define NV40_VP_INST_INDEX_CONST (1 << 1)
+#define NV40_VP_INST_LAST (1 << 0)
+#define NV40_VP_INST3_KNOWN ( \
+ NV40_VP_INST_SRC2L_MASK |\
+ NV40_VP_INST_SCA_WRITEMASK_MASK |\
+ NV40_VP_INST_VEC_WRITEMASK_MASK |\
+ NV40_VP_INST_SCA_DEST_TEMP_MASK |\
+ NV40_VP_INST_DEST_MASK |\
+ NV40_VP_INST_INDEX_CONST)
+
+/* Useful to split the source selection regs into their pieces */
+#define NV40_VP_SRC0_HIGH_SHIFT 9
+#define NV40_VP_SRC0_HIGH_MASK 0x0001FE00
+#define NV40_VP_SRC0_LOW_MASK 0x000001FF
+#define NV40_VP_SRC2_HIGH_SHIFT 11
+#define NV40_VP_SRC2_HIGH_MASK 0x0001F800
+#define NV40_VP_SRC2_LOW_MASK 0x000007FF
+
+/* Source selection - these are the bits you fill NV40_VP_INST_SRCn with */
+#define NV40_VP_SRC_NEGATE (1 << 16)
+#define NV40_VP_SRC_SWZ_X_SHIFT 14
+#define NV40_VP_SRC_SWZ_X_MASK (3 << 14)
+#define NV40_VP_SRC_SWZ_Y_SHIFT 12
+#define NV40_VP_SRC_SWZ_Y_MASK (3 << 12)
+#define NV40_VP_SRC_SWZ_Z_SHIFT 10
+#define NV40_VP_SRC_SWZ_Z_MASK (3 << 10)
+#define NV40_VP_SRC_SWZ_W_SHIFT 8
+#define NV40_VP_SRC_SWZ_W_MASK (3 << 8)
+#define NV40_VP_SRC_SWZ_ALL_SHIFT 8
+#define NV40_VP_SRC_SWZ_ALL_MASK (0xFF << 8)
+#define NV40_VP_SRC_TEMP_SRC_SHIFT 2
+#define NV40_VP_SRC_TEMP_SRC_MASK (0x1F << 2)
+#define NV40_VP_SRC_REG_TYPE_SHIFT 0
+#define NV40_VP_SRC_REG_TYPE_MASK (3 << 0)
+# define NV40_VP_SRC_REG_TYPE_UNK0 0
+# define NV40_VP_SRC_REG_TYPE_TEMP 1
+# define NV40_VP_SRC_REG_TYPE_INPUT 2
+# define NV40_VP_SRC_REG_TYPE_CONST 3
+
+
+/*
+ * Each fragment program opcode appears to be comprised of 4 32-bit values.
+ *
+ * 0 - Opcode, output reg/mask, ATTRIB source
+ * 1 - Source 0
+ * 2 - Source 1
+ * 3 - Source 2
+ *
+ * There appears to be no special difference between result regs and temp regs.
+ * result.color == R0.xyzw
+ * result.depth == R1.z
+ * When the fragprog contains instructions to write depth,
+ * NV30_TCL_PRIMITIVE_3D_UNK1D78=0 otherwise it is set to 1.
+ *
+ * Constants are inserted directly after the instruction that uses them.
+ *
+ * It appears that it's not possible to use two input registers in one
+ * instruction as the input sourcing is done in the instruction dword
+ * and not the source selection dwords. As such instructions such as:
+ *
+ * ADD result.color, fragment.color, fragment.texcoord[0];
+ *
+ * must be split into two MOV's and then an ADD (nvidia does this) but
+ * I'm not sure why it's not just one MOV and then source the second input
+ * in the ADD instruction..
+ *
+ * Negation of the full source is done with NV30_FP_REG_NEGATE, arbitrary
+ * negation requires multiplication with a const.
+ *
+ * Arbitrary swizzling is supported with the exception of SWIZZLE_ZERO and
+ * SWIZZLE_ONE.
+ *
+ * The temp/result regs appear to be initialised to (0.0, 0.0, 0.0, 0.0) as
+ * SWIZZLE_ZERO is implemented simply by not writing to the relevant components
+ * of the destination.
+ *
+ * Looping
+ * Loops appear to be fairly expensive on NV40 at least, the proprietary
+ * driver goes to a lot of effort to avoid using the native looping
+ * instructions. If the total number of *executed* instructions between
+ * REP/ENDREP or LOOP/ENDLOOP is <=500, the driver will unroll the loop.
+ * The maximum loop count is 255.
+ *
+ * Conditional execution
+ * TODO
+ *
+ * Non-native instructions:
+ * LIT
+ * LRP - MAD+MAD
+ * SUB - ADD, negate second source
+ * RSQ - LG2 + EX2
+ * POW - LG2 + MUL + EX2
+ * SCS - COS + SIN
+ * XPD
+ * DP2 - MUL + ADD
+ * NRM
+ */
+
+//== Opcode / Destination selection ==
+#define NV40_FP_OP_PROGRAM_END (1 << 0)
+#define NV40_FP_OP_OUT_REG_SHIFT 1
+#define NV40_FP_OP_OUT_REG_MASK (63 << 1)
+/* Needs to be set when writing outputs to get expected result.. */
+#define NV40_FP_OP_OUT_REG_HALF (1 << 7)
+#define NV40_FP_OP_COND_WRITE_ENABLE (1 << 8)
+#define NV40_FP_OP_OUTMASK_SHIFT 9
+#define NV40_FP_OP_OUTMASK_MASK (0xF << 9)
+# define NV40_FP_OP_OUT_X (1 << 9)
+# define NV40_FP_OP_OUT_Y (1 <<10)
+# define NV40_FP_OP_OUT_Z (1 <<11)
+# define NV40_FP_OP_OUT_W (1 <<12)
+/* Uncertain about these, especially the input_src values.. it's possible that
+ * they can be dynamically changed.
+ */
+#define NV40_FP_OP_INPUT_SRC_SHIFT 13
+#define NV40_FP_OP_INPUT_SRC_MASK (15 << 13)
+# define NV40_FP_OP_INPUT_SRC_POSITION 0x0
+# define NV40_FP_OP_INPUT_SRC_COL0 0x1
+# define NV40_FP_OP_INPUT_SRC_COL1 0x2
+# define NV40_FP_OP_INPUT_SRC_FOGC 0x3
+# define NV40_FP_OP_INPUT_SRC_TC0 0x4
+# define NV40_FP_OP_INPUT_SRC_TC(n) (0x4 + n)
+# define NV40_FP_OP_INPUT_SRC_FACING 0xE
+#define NV40_FP_OP_TEX_UNIT_SHIFT 17
+#define NV40_FP_OP_TEX_UNIT_MASK (0xF << 17)
+#define NV40_FP_OP_PRECISION_SHIFT 22
+#define NV40_FP_OP_PRECISION_MASK (3 << 22)
+# define NV40_FP_PRECISION_FP32 0
+# define NV40_FP_PRECISION_FP16 1
+# define NV40_FP_PRECISION_FX12 2
+#define NV40_FP_OP_OPCODE_SHIFT 24
+#define NV40_FP_OP_OPCODE_MASK (0x3F << 24)
+# define NV40_FP_OP_OPCODE_NOP 0x00
+# define NV40_FP_OP_OPCODE_MOV 0x01
+# define NV40_FP_OP_OPCODE_MUL 0x02
+# define NV40_FP_OP_OPCODE_ADD 0x03
+# define NV40_FP_OP_OPCODE_MAD 0x04
+# define NV40_FP_OP_OPCODE_DP3 0x05
+# define NV40_FP_OP_OPCODE_DP4 0x06
+# define NV40_FP_OP_OPCODE_DST 0x07
+# define NV40_FP_OP_OPCODE_MIN 0x08
+# define NV40_FP_OP_OPCODE_MAX 0x09
+# define NV40_FP_OP_OPCODE_SLT 0x0A
+# define NV40_FP_OP_OPCODE_SGE 0x0B
+# define NV40_FP_OP_OPCODE_SLE 0x0C
+# define NV40_FP_OP_OPCODE_SGT 0x0D
+# define NV40_FP_OP_OPCODE_SNE 0x0E
+# define NV40_FP_OP_OPCODE_SEQ 0x0F
+# define NV40_FP_OP_OPCODE_FRC 0x10
+# define NV40_FP_OP_OPCODE_FLR 0x11
+# define NV40_FP_OP_OPCODE_KIL 0x12
+# define NV40_FP_OP_OPCODE_PK4B 0x13
+# define NV40_FP_OP_OPCODE_UP4B 0x14
+/* DDX/DDY can only write to XY */
+# define NV40_FP_OP_OPCODE_DDX 0x15
+# define NV40_FP_OP_OPCODE_DDY 0x16
+# define NV40_FP_OP_OPCODE_TEX 0x17
+# define NV40_FP_OP_OPCODE_TXP 0x18
+# define NV40_FP_OP_OPCODE_TXD 0x19
+# define NV40_FP_OP_OPCODE_RCP 0x1A
+# define NV40_FP_OP_OPCODE_EX2 0x1C
+# define NV40_FP_OP_OPCODE_LG2 0x1D
+# define NV40_FP_OP_OPCODE_STR 0x20
+# define NV40_FP_OP_OPCODE_SFL 0x21
+# define NV40_FP_OP_OPCODE_COS 0x22
+# define NV40_FP_OP_OPCODE_SIN 0x23
+# define NV40_FP_OP_OPCODE_PK2H 0x24
+# define NV40_FP_OP_OPCODE_UP2H 0x25
+# define NV40_FP_OP_OPCODE_PK4UB 0x27
+# define NV40_FP_OP_OPCODE_UP4UB 0x28
+# define NV40_FP_OP_OPCODE_PK2US 0x29
+# define NV40_FP_OP_OPCODE_UP2US 0x2A
+# define NV40_FP_OP_OPCODE_DP2A 0x2E
+# define NV40_FP_OP_OPCODE_TXL 0x2F
+# define NV40_FP_OP_OPCODE_TXB 0x31
+# define NV40_FP_OP_OPCODE_DIV 0x3A
+# define NV40_FP_OP_OPCODE_UNK_LIT 0x3C
+/* The use of these instructions appears to be indicated by bit 31 of DWORD 2.*/
+# define NV40_FP_OP_BRA_OPCODE_BRK 0x0
+# define NV40_FP_OP_BRA_OPCODE_CAL 0x1
+# define NV40_FP_OP_BRA_OPCODE_IF 0x2
+# define NV40_FP_OP_BRA_OPCODE_LOOP 0x3
+# define NV40_FP_OP_BRA_OPCODE_REP 0x4
+# define NV40_FP_OP_BRA_OPCODE_RET 0x5
+#define NV40_FP_OP_OUT_SAT (1 << 31)
+
+/* high order bits of SRC0 */
+#define NV40_FP_OP_OUT_ABS (1 << 29)
+#define NV40_FP_OP_COND_SWZ_W_SHIFT 27
+#define NV40_FP_OP_COND_SWZ_W_MASK (3 << 27)
+#define NV40_FP_OP_COND_SWZ_Z_SHIFT 25
+#define NV40_FP_OP_COND_SWZ_Z_MASK (3 << 25)
+#define NV40_FP_OP_COND_SWZ_Y_SHIFT 23
+#define NV40_FP_OP_COND_SWZ_Y_MASK (3 << 23)
+#define NV40_FP_OP_COND_SWZ_X_SHIFT 21
+#define NV40_FP_OP_COND_SWZ_X_MASK (3 << 21)
+#define NV40_FP_OP_COND_SWZ_ALL_SHIFT 21
+#define NV40_FP_OP_COND_SWZ_ALL_MASK (0xFF << 21)
+#define NV40_FP_OP_COND_SHIFT 18
+#define NV40_FP_OP_COND_MASK (0x07 << 18)
+# define NV40_FP_OP_COND_FL 0
+# define NV40_FP_OP_COND_LT 1
+# define NV40_FP_OP_COND_EQ 2
+# define NV40_FP_OP_COND_LE 3
+# define NV40_FP_OP_COND_GT 4
+# define NV40_FP_OP_COND_NE 5
+# define NV40_FP_OP_COND_GE 6
+# define NV40_FP_OP_COND_TR 7
+
+/* high order bits of SRC1 */
+#define NV40_FP_OP_OPCODE_IS_BRANCH (1<<31)
+#define NV40_FP_OP_DST_SCALE_SHIFT 28
+#define NV40_FP_OP_DST_SCALE_MASK (3 << 28)
+#define NV40_FP_OP_DST_SCALE_1X 0
+#define NV40_FP_OP_DST_SCALE_2X 1
+#define NV40_FP_OP_DST_SCALE_4X 2
+#define NV40_FP_OP_DST_SCALE_8X 3
+#define NV40_FP_OP_DST_SCALE_INV_2X 5
+#define NV40_FP_OP_DST_SCALE_INV_4X 6
+#define NV40_FP_OP_DST_SCALE_INV_8X 7
+
+/* SRC1 LOOP */
+#define NV40_FP_OP_LOOP_INCR_SHIFT 19
+#define NV40_FP_OP_LOOP_INCR_MASK (0xFF << 19)
+#define NV40_FP_OP_LOOP_INDEX_SHIFT 10
+#define NV40_FP_OP_LOOP_INDEX_MASK (0xFF << 10)
+#define NV40_FP_OP_LOOP_COUNT_SHIFT 2
+#define NV40_FP_OP_LOOP_COUNT_MASK (0xFF << 2)
+
+/* SRC1 IF */
+#define NV40_FP_OP_ELSE_ID_SHIFT 2
+#define NV40_FP_OP_ELSE_ID_MASK (0xFF << 2)
+
+/* SRC1 CAL */
+#define NV40_FP_OP_IADDR_SHIFT 2
+#define NV40_FP_OP_IADDR_MASK (0xFF << 2)
+
+/* SRC1 REP
+ * I have no idea why there are 3 count values here.. but they
+ * have always been filled with the same value in my tests so
+ * far..
+ */
+#define NV40_FP_OP_REP_COUNT1_SHIFT 2
+#define NV40_FP_OP_REP_COUNT1_MASK (0xFF << 2)
+#define NV40_FP_OP_REP_COUNT2_SHIFT 10
+#define NV40_FP_OP_REP_COUNT2_MASK (0xFF << 10)
+#define NV40_FP_OP_REP_COUNT3_SHIFT 19
+#define NV40_FP_OP_REP_COUNT3_MASK (0xFF << 19)
+
+/* SRC2 REP/IF */
+#define NV40_FP_OP_END_ID_SHIFT 2
+#define NV40_FP_OP_END_ID_MASK (0xFF << 2)
+
+// SRC2 high-order
+#define NV40_FP_OP_INDEX_INPUT (1 << 30)
+#define NV40_FP_OP_ADDR_INDEX_SHIFT 19
+#define NV40_FP_OP_ADDR_INDEX_MASK (0xF << 19)
+
+//== Register selection ==
+#define NV40_FP_REG_TYPE_SHIFT 0
+#define NV40_FP_REG_TYPE_MASK (3 << 0)
+# define NV40_FP_REG_TYPE_TEMP 0
+# define NV40_FP_REG_TYPE_INPUT 1
+# define NV40_FP_REG_TYPE_CONST 2
+#define NV40_FP_REG_SRC_SHIFT 2
+#define NV40_FP_REG_SRC_MASK (63 << 2)
+#define NV40_FP_REG_SRC_HALF (1 << 8)
+#define NV40_FP_REG_SWZ_ALL_SHIFT 9
+#define NV40_FP_REG_SWZ_ALL_MASK (255 << 9)
+#define NV40_FP_REG_SWZ_X_SHIFT 9
+#define NV40_FP_REG_SWZ_X_MASK (3 << 9)
+#define NV40_FP_REG_SWZ_Y_SHIFT 11
+#define NV40_FP_REG_SWZ_Y_MASK (3 << 11)
+#define NV40_FP_REG_SWZ_Z_SHIFT 13
+#define NV40_FP_REG_SWZ_Z_MASK (3 << 13)
+#define NV40_FP_REG_SWZ_W_SHIFT 15
+#define NV40_FP_REG_SWZ_W_MASK (3 << 15)
+# define NV40_FP_SWIZZLE_X 0
+# define NV40_FP_SWIZZLE_Y 1
+# define NV40_FP_SWIZZLE_Z 2
+# define NV40_FP_SWIZZLE_W 3
+#define NV40_FP_REG_NEGATE (1 << 17)
+
+#ifndef NV40_SHADER_NO_FUCKEDNESS
+#define NV40SR_NONE 0
+#define NV40SR_OUTPUT 1
+#define NV40SR_INPUT 2
+#define NV40SR_TEMP 3
+#define NV40SR_CONST 4
+
+struct nv40_sreg {
+ int type;
+ int index;
+
+ int dst_scale;
+
+ int negate;
+ int abs;
+ int swz[4];
+
+ int cc_update;
+ int cc_update_reg;
+ int cc_test;
+ int cc_test_reg;
+ int cc_swz[4];
+};
+
+static INLINE struct nv40_sreg
+nv40_sr(int type, int index)
+{
+ struct nv40_sreg temp = {
+ .type = type,
+ .index = index,
+ .dst_scale = DEF_SCALE,
+ .abs = 0,
+ .negate = 0,
+ .swz = { 0, 1, 2, 3 },
+ .cc_update = 0,
+ .cc_update_reg = 0,
+ .cc_test = DEF_CTEST,
+ .cc_test_reg = 0,
+ .cc_swz = { 0, 1, 2, 3 },
+ };
+ return temp;
+}
+
+static INLINE struct nv40_sreg
+nv40_sr_swz(struct nv40_sreg src, int x, int y, int z, int w)
+{
+ struct nv40_sreg dst = src;
+
+ dst.swz[SWZ_X] = src.swz[x];
+ dst.swz[SWZ_Y] = src.swz[y];
+ dst.swz[SWZ_Z] = src.swz[z];
+ dst.swz[SWZ_W] = src.swz[w];
+ return dst;
+}
+
+static INLINE struct nv40_sreg
+nv40_sr_neg(struct nv40_sreg src)
+{
+ src.negate = !src.negate;
+ return src;
+}
+
+static INLINE struct nv40_sreg
+nv40_sr_abs(struct nv40_sreg src)
+{
+ src.abs = 1;
+ return src;
+}
+
+static INLINE struct nv40_sreg
+nv40_sr_scale(struct nv40_sreg src, int scale)
+{
+ src.dst_scale = scale;
+ return src;
+}
+#endif
+
+#endif
diff --git a/src/gallium/drivers/nv40/nv40_state.c b/src/gallium/drivers/nv40/nv40_state.c
new file mode 100644
index 00000000000..255c4b294d1
--- /dev/null
+++ b/src/gallium/drivers/nv40/nv40_state.c
@@ -0,0 +1,740 @@
+#include "pipe/p_state.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_inlines.h"
+
+#include "draw/draw_context.h"
+
+#include "tgsi/tgsi_parse.h"
+
+#include "nv40_context.h"
+#include "nv40_state.h"
+
+static void *
+nv40_blend_state_create(struct pipe_context *pipe,
+ const struct pipe_blend_state *cso)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+ struct nouveau_grobj *curie = nv40->screen->curie;
+ struct nv40_blend_state *bso = CALLOC(1, sizeof(*bso));
+ struct nouveau_stateobj *so = so_new(16, 0);
+
+ if (cso->blend_enable) {
+ so_method(so, curie, NV40TCL_BLEND_ENABLE, 3);
+ so_data (so, 1);
+ so_data (so, (nvgl_blend_func(cso->alpha_src_factor) << 16) |
+ nvgl_blend_func(cso->rgb_src_factor));
+ so_data (so, nvgl_blend_func(cso->alpha_dst_factor) << 16 |
+ nvgl_blend_func(cso->rgb_dst_factor));
+ so_method(so, curie, NV40TCL_BLEND_EQUATION, 1);
+ so_data (so, nvgl_blend_eqn(cso->alpha_func) << 16 |
+ nvgl_blend_eqn(cso->rgb_func));
+ } else {
+ so_method(so, curie, NV40TCL_BLEND_ENABLE, 1);
+ so_data (so, 0);
+ }
+
+ so_method(so, curie, NV40TCL_COLOR_MASK, 1);
+ so_data (so, (((cso->colormask & PIPE_MASK_A) ? (0x01 << 24) : 0) |
+ ((cso->colormask & PIPE_MASK_R) ? (0x01 << 16) : 0) |
+ ((cso->colormask & PIPE_MASK_G) ? (0x01 << 8) : 0) |
+ ((cso->colormask & PIPE_MASK_B) ? (0x01 << 0) : 0)));
+
+ if (cso->logicop_enable) {
+ so_method(so, curie, NV40TCL_COLOR_LOGIC_OP_ENABLE, 2);
+ so_data (so, 1);
+ so_data (so, nvgl_logicop_func(cso->logicop_func));
+ } else {
+ so_method(so, curie, NV40TCL_COLOR_LOGIC_OP_ENABLE, 1);
+ so_data (so, 0);
+ }
+
+ so_method(so, curie, NV40TCL_DITHER_ENABLE, 1);
+ so_data (so, cso->dither ? 1 : 0);
+
+ so_ref(so, &bso->so);
+ bso->pipe = *cso;
+ return (void *)bso;
+}
+
+static void
+nv40_blend_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+
+ nv40->blend = hwcso;
+ nv40->dirty |= NV40_NEW_BLEND;
+}
+
+static void
+nv40_blend_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv40_blend_state *bso = hwcso;
+
+ so_ref(NULL, &bso->so);
+ FREE(bso);
+}
+
+
+static INLINE unsigned
+wrap_mode(unsigned wrap) {
+ unsigned ret;
+
+ switch (wrap) {
+ case PIPE_TEX_WRAP_REPEAT:
+ ret = NV40TCL_TEX_WRAP_S_REPEAT;
+ break;
+ case PIPE_TEX_WRAP_MIRROR_REPEAT:
+ ret = NV40TCL_TEX_WRAP_S_MIRRORED_REPEAT;
+ break;
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+ ret = NV40TCL_TEX_WRAP_S_CLAMP_TO_EDGE;
+ break;
+ case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+ ret = NV40TCL_TEX_WRAP_S_CLAMP_TO_BORDER;
+ break;
+ case PIPE_TEX_WRAP_CLAMP:
+ ret = NV40TCL_TEX_WRAP_S_CLAMP;
+ break;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+ ret = NV40TCL_TEX_WRAP_S_MIRROR_CLAMP_TO_EDGE;
+ break;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+ ret = NV40TCL_TEX_WRAP_S_MIRROR_CLAMP_TO_BORDER;
+ break;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP:
+ ret = NV40TCL_TEX_WRAP_S_MIRROR_CLAMP;
+ break;
+ default:
+ NOUVEAU_ERR("unknown wrap mode: %d\n", wrap);
+ ret = NV40TCL_TEX_WRAP_S_REPEAT;
+ break;
+ }
+
+ return ret >> NV40TCL_TEX_WRAP_S_SHIFT;
+}
+
+static void *
+nv40_sampler_state_create(struct pipe_context *pipe,
+ const struct pipe_sampler_state *cso)
+{
+ struct nv40_sampler_state *ps;
+ uint32_t filter = 0;
+
+ ps = MALLOC(sizeof(struct nv40_sampler_state));
+
+ ps->fmt = 0;
+ if (!cso->normalized_coords)
+ ps->fmt |= NV40TCL_TEX_FORMAT_RECT;
+
+ ps->wrap = ((wrap_mode(cso->wrap_s) << NV40TCL_TEX_WRAP_S_SHIFT) |
+ (wrap_mode(cso->wrap_t) << NV40TCL_TEX_WRAP_T_SHIFT) |
+ (wrap_mode(cso->wrap_r) << NV40TCL_TEX_WRAP_R_SHIFT));
+
+ ps->en = 0;
+ if (cso->max_anisotropy >= 2.0) {
+ /* no idea, binary driver sets it, works without it.. meh.. */
+ ps->wrap |= (1 << 5);
+
+ if (cso->max_anisotropy >= 16.0) {
+ ps->en |= NV40TCL_TEX_ENABLE_ANISO_16X;
+ } else
+ if (cso->max_anisotropy >= 12.0) {
+ ps->en |= NV40TCL_TEX_ENABLE_ANISO_12X;
+ } else
+ if (cso->max_anisotropy >= 10.0) {
+ ps->en |= NV40TCL_TEX_ENABLE_ANISO_10X;
+ } else
+ if (cso->max_anisotropy >= 8.0) {
+ ps->en |= NV40TCL_TEX_ENABLE_ANISO_8X;
+ } else
+ if (cso->max_anisotropy >= 6.0) {
+ ps->en |= NV40TCL_TEX_ENABLE_ANISO_6X;
+ } else
+ if (cso->max_anisotropy >= 4.0) {
+ ps->en |= NV40TCL_TEX_ENABLE_ANISO_4X;
+ } else {
+ ps->en |= NV40TCL_TEX_ENABLE_ANISO_2X;
+ }
+ }
+
+ switch (cso->mag_img_filter) {
+ case PIPE_TEX_FILTER_LINEAR:
+ filter |= NV40TCL_TEX_FILTER_MAG_LINEAR;
+ break;
+ case PIPE_TEX_FILTER_NEAREST:
+ default:
+ filter |= NV40TCL_TEX_FILTER_MAG_NEAREST;
+ break;
+ }
+
+ switch (cso->min_img_filter) {
+ case PIPE_TEX_FILTER_LINEAR:
+ switch (cso->min_mip_filter) {
+ case PIPE_TEX_MIPFILTER_NEAREST:
+ filter |= NV40TCL_TEX_FILTER_MIN_LINEAR_MIPMAP_NEAREST;
+ break;
+ case PIPE_TEX_MIPFILTER_LINEAR:
+ filter |= NV40TCL_TEX_FILTER_MIN_LINEAR_MIPMAP_LINEAR;
+ break;
+ case PIPE_TEX_MIPFILTER_NONE:
+ default:
+ filter |= NV40TCL_TEX_FILTER_MIN_LINEAR;
+ break;
+ }
+ break;
+ case PIPE_TEX_FILTER_NEAREST:
+ default:
+ switch (cso->min_mip_filter) {
+ case PIPE_TEX_MIPFILTER_NEAREST:
+ filter |= NV40TCL_TEX_FILTER_MIN_NEAREST_MIPMAP_NEAREST;
+ break;
+ case PIPE_TEX_MIPFILTER_LINEAR:
+ filter |= NV40TCL_TEX_FILTER_MIN_NEAREST_MIPMAP_LINEAR;
+ break;
+ case PIPE_TEX_MIPFILTER_NONE:
+ default:
+ filter |= NV40TCL_TEX_FILTER_MIN_NEAREST;
+ break;
+ }
+ break;
+ }
+
+ ps->filt = filter;
+
+ {
+ float limit;
+
+ limit = CLAMP(cso->lod_bias, -16.0, 15.0);
+ ps->filt |= (int)(cso->lod_bias * 256.0) & 0x1fff;
+
+ limit = CLAMP(cso->max_lod, 0.0, 15.0);
+ ps->en |= (int)(limit * 256.0) << 7;
+
+ limit = CLAMP(cso->min_lod, 0.0, 15.0);
+ ps->en |= (int)(limit * 256.0) << 19;
+ }
+
+
+ if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
+ switch (cso->compare_func) {
+ case PIPE_FUNC_NEVER:
+ ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_NEVER;
+ break;
+ case PIPE_FUNC_GREATER:
+ ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_GREATER;
+ break;
+ case PIPE_FUNC_EQUAL:
+ ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_EQUAL;
+ break;
+ case PIPE_FUNC_GEQUAL:
+ ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_GEQUAL;
+ break;
+ case PIPE_FUNC_LESS:
+ ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_LESS;
+ break;
+ case PIPE_FUNC_NOTEQUAL:
+ ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_NOTEQUAL;
+ break;
+ case PIPE_FUNC_LEQUAL:
+ ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_LEQUAL;
+ break;
+ case PIPE_FUNC_ALWAYS:
+ ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_ALWAYS;
+ break;
+ default:
+ break;
+ }
+ }
+
+ ps->bcol = ((float_to_ubyte(cso->border_color[3]) << 24) |
+ (float_to_ubyte(cso->border_color[0]) << 16) |
+ (float_to_ubyte(cso->border_color[1]) << 8) |
+ (float_to_ubyte(cso->border_color[2]) << 0));
+
+ return (void *)ps;
+}
+
+static void
+nv40_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+ unsigned unit;
+
+ for (unit = 0; unit < nr; unit++) {
+ nv40->tex_sampler[unit] = sampler[unit];
+ nv40->dirty_samplers |= (1 << unit);
+ }
+
+ for (unit = nr; unit < nv40->nr_samplers; unit++) {
+ nv40->tex_sampler[unit] = NULL;
+ nv40->dirty_samplers |= (1 << unit);
+ }
+
+ nv40->nr_samplers = nr;
+ nv40->dirty |= NV40_NEW_SAMPLER;
+}
+
+static void
+nv40_sampler_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ FREE(hwcso);
+}
+
+static void
+nv40_set_sampler_texture(struct pipe_context *pipe, unsigned nr,
+ struct pipe_texture **miptree)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+ unsigned unit;
+
+ for (unit = 0; unit < nr; unit++) {
+ pipe_texture_reference((struct pipe_texture **)
+ &nv40->tex_miptree[unit], miptree[unit]);
+ nv40->dirty_samplers |= (1 << unit);
+ }
+
+ for (unit = nr; unit < nv40->nr_textures; unit++) {
+ pipe_texture_reference((struct pipe_texture **)
+ &nv40->tex_miptree[unit], NULL);
+ nv40->dirty_samplers |= (1 << unit);
+ }
+
+ nv40->nr_textures = nr;
+ nv40->dirty |= NV40_NEW_SAMPLER;
+}
+
+static void *
+nv40_rasterizer_state_create(struct pipe_context *pipe,
+ const struct pipe_rasterizer_state *cso)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+ struct nv40_rasterizer_state *rsso = CALLOC(1, sizeof(*rsso));
+ struct nouveau_stateobj *so = so_new(32, 0);
+ struct nouveau_grobj *curie = nv40->screen->curie;
+
+ /*XXX: ignored:
+ * light_twoside
+ * point_smooth -nohw
+ * multisample
+ */
+
+ so_method(so, curie, NV40TCL_SHADE_MODEL, 1);
+ so_data (so, cso->flatshade ? NV40TCL_SHADE_MODEL_FLAT :
+ NV40TCL_SHADE_MODEL_SMOOTH);
+
+ so_method(so, curie, NV40TCL_LINE_WIDTH, 2);
+ so_data (so, (unsigned char)(cso->line_width * 8.0) & 0xff);
+ so_data (so, cso->line_smooth ? 1 : 0);
+ so_method(so, curie, NV40TCL_LINE_STIPPLE_ENABLE, 2);
+ so_data (so, cso->line_stipple_enable ? 1 : 0);
+ so_data (so, (cso->line_stipple_pattern << 16) |
+ cso->line_stipple_factor);
+
+ so_method(so, curie, NV40TCL_POINT_SIZE, 1);
+ so_data (so, fui(cso->point_size));
+
+ so_method(so, curie, NV40TCL_POLYGON_MODE_FRONT, 6);
+ if (cso->front_winding == PIPE_WINDING_CCW) {
+ so_data(so, nvgl_polygon_mode(cso->fill_ccw));
+ so_data(so, nvgl_polygon_mode(cso->fill_cw));
+ switch (cso->cull_mode) {
+ case PIPE_WINDING_CCW:
+ so_data(so, NV40TCL_CULL_FACE_FRONT);
+ break;
+ case PIPE_WINDING_CW:
+ so_data(so, NV40TCL_CULL_FACE_BACK);
+ break;
+ case PIPE_WINDING_BOTH:
+ so_data(so, NV40TCL_CULL_FACE_FRONT_AND_BACK);
+ break;
+ default:
+ so_data(so, NV40TCL_CULL_FACE_BACK);
+ break;
+ }
+ so_data(so, NV40TCL_FRONT_FACE_CCW);
+ } else {
+ so_data(so, nvgl_polygon_mode(cso->fill_cw));
+ so_data(so, nvgl_polygon_mode(cso->fill_ccw));
+ switch (cso->cull_mode) {
+ case PIPE_WINDING_CCW:
+ so_data(so, NV40TCL_CULL_FACE_BACK);
+ break;
+ case PIPE_WINDING_CW:
+ so_data(so, NV40TCL_CULL_FACE_FRONT);
+ break;
+ case PIPE_WINDING_BOTH:
+ so_data(so, NV40TCL_CULL_FACE_FRONT_AND_BACK);
+ break;
+ default:
+ so_data(so, NV40TCL_CULL_FACE_BACK);
+ break;
+ }
+ so_data(so, NV40TCL_FRONT_FACE_CW);
+ }
+ so_data(so, cso->poly_smooth ? 1 : 0);
+ so_data(so, (cso->cull_mode != PIPE_WINDING_NONE) ? 1 : 0);
+
+ so_method(so, curie, NV40TCL_POLYGON_STIPPLE_ENABLE, 1);
+ so_data (so, cso->poly_stipple_enable ? 1 : 0);
+
+ so_method(so, curie, NV40TCL_POLYGON_OFFSET_POINT_ENABLE, 3);
+ if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_POINT) ||
+ (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_POINT))
+ so_data(so, 1);
+ else
+ so_data(so, 0);
+ if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_LINE) ||
+ (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_LINE))
+ so_data(so, 1);
+ else
+ so_data(so, 0);
+ if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_FILL) ||
+ (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_FILL))
+ so_data(so, 1);
+ else
+ so_data(so, 0);
+ if (cso->offset_cw || cso->offset_ccw) {
+ so_method(so, curie, NV40TCL_POLYGON_OFFSET_FACTOR, 2);
+ so_data (so, fui(cso->offset_scale));
+ so_data (so, fui(cso->offset_units * 2));
+ }
+
+ so_method(so, curie, NV40TCL_POINT_SPRITE, 1);
+ if (cso->point_sprite) {
+ unsigned psctl = (1 << 0), i;
+
+ for (i = 0; i < 8; i++) {
+ if (cso->sprite_coord_mode[i] != PIPE_SPRITE_COORD_NONE)
+ psctl |= (1 << (8 + i));
+ }
+
+ so_data(so, psctl);
+ } else {
+ so_data(so, 0);
+ }
+
+ so_ref(so, &rsso->so);
+ rsso->pipe = *cso;
+ return (void *)rsso;
+}
+
+static void
+nv40_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+
+ nv40->rasterizer = hwcso;
+ nv40->dirty |= NV40_NEW_RAST;
+ nv40->draw_dirty |= NV40_NEW_RAST;
+}
+
+static void
+nv40_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv40_rasterizer_state *rsso = hwcso;
+
+ so_ref(NULL, &rsso->so);
+ FREE(rsso);
+}
+
+static void *
+nv40_depth_stencil_alpha_state_create(struct pipe_context *pipe,
+ const struct pipe_depth_stencil_alpha_state *cso)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+ struct nv40_zsa_state *zsaso = CALLOC(1, sizeof(*zsaso));
+ struct nouveau_stateobj *so = so_new(32, 0);
+ struct nouveau_grobj *curie = nv40->screen->curie;
+
+ so_method(so, curie, NV40TCL_DEPTH_FUNC, 3);
+ so_data (so, nvgl_comparison_op(cso->depth.func));
+ so_data (so, cso->depth.writemask ? 1 : 0);
+ so_data (so, cso->depth.enabled ? 1 : 0);
+
+ so_method(so, curie, NV40TCL_ALPHA_TEST_ENABLE, 3);
+ so_data (so, cso->alpha.enabled ? 1 : 0);
+ so_data (so, nvgl_comparison_op(cso->alpha.func));
+ so_data (so, float_to_ubyte(cso->alpha.ref));
+
+ if (cso->stencil[0].enabled) {
+ so_method(so, curie, NV40TCL_STENCIL_FRONT_ENABLE, 8);
+ so_data (so, cso->stencil[0].enabled ? 1 : 0);
+ so_data (so, cso->stencil[0].write_mask);
+ so_data (so, nvgl_comparison_op(cso->stencil[0].func));
+ so_data (so, cso->stencil[0].ref_value);
+ so_data (so, cso->stencil[0].value_mask);
+ so_data (so, nvgl_stencil_op(cso->stencil[0].fail_op));
+ so_data (so, nvgl_stencil_op(cso->stencil[0].zfail_op));
+ so_data (so, nvgl_stencil_op(cso->stencil[0].zpass_op));
+ } else {
+ so_method(so, curie, NV40TCL_STENCIL_FRONT_ENABLE, 1);
+ so_data (so, 0);
+ }
+
+ if (cso->stencil[1].enabled) {
+ so_method(so, curie, NV40TCL_STENCIL_BACK_ENABLE, 8);
+ so_data (so, cso->stencil[1].enabled ? 1 : 0);
+ so_data (so, cso->stencil[1].write_mask);
+ so_data (so, nvgl_comparison_op(cso->stencil[1].func));
+ so_data (so, cso->stencil[1].ref_value);
+ so_data (so, cso->stencil[1].value_mask);
+ so_data (so, nvgl_stencil_op(cso->stencil[1].fail_op));
+ so_data (so, nvgl_stencil_op(cso->stencil[1].zfail_op));
+ so_data (so, nvgl_stencil_op(cso->stencil[1].zpass_op));
+ } else {
+ so_method(so, curie, NV40TCL_STENCIL_BACK_ENABLE, 1);
+ so_data (so, 0);
+ }
+
+ so_ref(so, &zsaso->so);
+ zsaso->pipe = *cso;
+ return (void *)zsaso;
+}
+
+static void
+nv40_depth_stencil_alpha_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+
+ nv40->zsa = hwcso;
+ nv40->dirty |= NV40_NEW_ZSA;
+}
+
+static void
+nv40_depth_stencil_alpha_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv40_zsa_state *zsaso = hwcso;
+
+ so_ref(NULL, &zsaso->so);
+ FREE(zsaso);
+}
+
+static void *
+nv40_vp_state_create(struct pipe_context *pipe,
+ const struct pipe_shader_state *cso)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+ struct nv40_vertex_program *vp;
+
+ vp = CALLOC(1, sizeof(struct nv40_vertex_program));
+ vp->pipe.tokens = tgsi_dup_tokens(cso->tokens);
+ vp->draw = draw_create_vertex_shader(nv40->draw, &vp->pipe);
+
+ return (void *)vp;
+}
+
+static void
+nv40_vp_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+
+ nv40->vertprog = hwcso;
+ nv40->dirty |= NV40_NEW_VERTPROG;
+ nv40->draw_dirty |= NV40_NEW_VERTPROG;
+}
+
+static void
+nv40_vp_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+ struct nv40_vertex_program *vp = hwcso;
+
+ draw_delete_vertex_shader(nv40->draw, vp->draw);
+ nv40_vertprog_destroy(nv40, vp);
+ FREE((void*)vp->pipe.tokens);
+ FREE(vp);
+}
+
+static void *
+nv40_fp_state_create(struct pipe_context *pipe,
+ const struct pipe_shader_state *cso)
+{
+ struct nv40_fragment_program *fp;
+
+ fp = CALLOC(1, sizeof(struct nv40_fragment_program));
+ fp->pipe.tokens = tgsi_dup_tokens(cso->tokens);
+
+ tgsi_scan_shader(fp->pipe.tokens, &fp->info);
+
+ return (void *)fp;
+}
+
+static void
+nv40_fp_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+
+ nv40->fragprog = hwcso;
+ nv40->dirty |= NV40_NEW_FRAGPROG;
+}
+
+static void
+nv40_fp_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+ struct nv40_fragment_program *fp = hwcso;
+
+ nv40_fragprog_destroy(nv40, fp);
+ FREE((void*)fp->pipe.tokens);
+ FREE(fp);
+}
+
+static void
+nv40_set_blend_color(struct pipe_context *pipe,
+ const struct pipe_blend_color *bcol)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+
+ nv40->blend_colour = *bcol;
+ nv40->dirty |= NV40_NEW_BCOL;
+}
+
+static void
+nv40_set_clip_state(struct pipe_context *pipe,
+ const struct pipe_clip_state *clip)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+
+ nv40->clip = *clip;
+ nv40->dirty |= NV40_NEW_UCP;
+ nv40->draw_dirty |= NV40_NEW_UCP;
+}
+
+static void
+nv40_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
+ const struct pipe_constant_buffer *buf )
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+
+ nv40->constbuf[shader] = buf->buffer;
+ nv40->constbuf_nr[shader] = buf->size / (4 * sizeof(float));
+
+ if (shader == PIPE_SHADER_VERTEX) {
+ nv40->dirty |= NV40_NEW_VERTPROG;
+ } else
+ if (shader == PIPE_SHADER_FRAGMENT) {
+ nv40->dirty |= NV40_NEW_FRAGPROG;
+ }
+}
+
+static void
+nv40_set_framebuffer_state(struct pipe_context *pipe,
+ const struct pipe_framebuffer_state *fb)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+
+ nv40->framebuffer = *fb;
+ nv40->dirty |= NV40_NEW_FB;
+}
+
+static void
+nv40_set_polygon_stipple(struct pipe_context *pipe,
+ const struct pipe_poly_stipple *stipple)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+
+ memcpy(nv40->stipple, stipple->stipple, 4 * 32);
+ nv40->dirty |= NV40_NEW_STIPPLE;
+}
+
+static void
+nv40_set_scissor_state(struct pipe_context *pipe,
+ const struct pipe_scissor_state *s)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+
+ nv40->scissor = *s;
+ nv40->dirty |= NV40_NEW_SCISSOR;
+}
+
+static void
+nv40_set_viewport_state(struct pipe_context *pipe,
+ const struct pipe_viewport_state *vpt)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+
+ nv40->viewport = *vpt;
+ nv40->dirty |= NV40_NEW_VIEWPORT;
+ nv40->draw_dirty |= NV40_NEW_VIEWPORT;
+}
+
+static void
+nv40_set_vertex_buffers(struct pipe_context *pipe, unsigned count,
+ const struct pipe_vertex_buffer *vb)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+
+ memcpy(nv40->vtxbuf, vb, sizeof(*vb) * count);
+ nv40->vtxbuf_nr = count;
+
+ nv40->dirty |= NV40_NEW_ARRAYS;
+ nv40->draw_dirty |= NV40_NEW_ARRAYS;
+}
+
+static void
+nv40_set_vertex_elements(struct pipe_context *pipe, unsigned count,
+ const struct pipe_vertex_element *ve)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+
+ memcpy(nv40->vtxelt, ve, sizeof(*ve) * count);
+ nv40->vtxelt_nr = count;
+
+ nv40->dirty |= NV40_NEW_ARRAYS;
+ nv40->draw_dirty |= NV40_NEW_ARRAYS;
+}
+
+static void
+nv40_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+
+ nv40->edgeflags = bitfield;
+ nv40->dirty |= NV40_NEW_ARRAYS;
+ nv40->draw_dirty |= NV40_NEW_ARRAYS;
+}
+
+void
+nv40_init_state_functions(struct nv40_context *nv40)
+{
+ nv40->pipe.create_blend_state = nv40_blend_state_create;
+ nv40->pipe.bind_blend_state = nv40_blend_state_bind;
+ nv40->pipe.delete_blend_state = nv40_blend_state_delete;
+
+ nv40->pipe.create_sampler_state = nv40_sampler_state_create;
+ nv40->pipe.bind_sampler_states = nv40_sampler_state_bind;
+ nv40->pipe.delete_sampler_state = nv40_sampler_state_delete;
+ nv40->pipe.set_sampler_textures = nv40_set_sampler_texture;
+
+ nv40->pipe.create_rasterizer_state = nv40_rasterizer_state_create;
+ nv40->pipe.bind_rasterizer_state = nv40_rasterizer_state_bind;
+ nv40->pipe.delete_rasterizer_state = nv40_rasterizer_state_delete;
+
+ nv40->pipe.create_depth_stencil_alpha_state =
+ nv40_depth_stencil_alpha_state_create;
+ nv40->pipe.bind_depth_stencil_alpha_state =
+ nv40_depth_stencil_alpha_state_bind;
+ nv40->pipe.delete_depth_stencil_alpha_state =
+ nv40_depth_stencil_alpha_state_delete;
+
+ nv40->pipe.create_vs_state = nv40_vp_state_create;
+ nv40->pipe.bind_vs_state = nv40_vp_state_bind;
+ nv40->pipe.delete_vs_state = nv40_vp_state_delete;
+
+ nv40->pipe.create_fs_state = nv40_fp_state_create;
+ nv40->pipe.bind_fs_state = nv40_fp_state_bind;
+ nv40->pipe.delete_fs_state = nv40_fp_state_delete;
+
+ nv40->pipe.set_blend_color = nv40_set_blend_color;
+ nv40->pipe.set_clip_state = nv40_set_clip_state;
+ nv40->pipe.set_constant_buffer = nv40_set_constant_buffer;
+ nv40->pipe.set_framebuffer_state = nv40_set_framebuffer_state;
+ nv40->pipe.set_polygon_stipple = nv40_set_polygon_stipple;
+ nv40->pipe.set_scissor_state = nv40_set_scissor_state;
+ nv40->pipe.set_viewport_state = nv40_set_viewport_state;
+
+ nv40->pipe.set_edgeflags = nv40_set_edgeflags;
+ nv40->pipe.set_vertex_buffers = nv40_set_vertex_buffers;
+ nv40->pipe.set_vertex_elements = nv40_set_vertex_elements;
+}
+
diff --git a/src/gallium/drivers/nv40/nv40_state.h b/src/gallium/drivers/nv40/nv40_state.h
new file mode 100644
index 00000000000..9c55903ae30
--- /dev/null
+++ b/src/gallium/drivers/nv40/nv40_state.h
@@ -0,0 +1,91 @@
+#ifndef __NV40_STATE_H__
+#define __NV40_STATE_H__
+
+#include "pipe/p_state.h"
+#include "tgsi/tgsi_scan.h"
+
+struct nv40_sampler_state {
+ uint32_t fmt;
+ uint32_t wrap;
+ uint32_t en;
+ uint32_t filt;
+ uint32_t bcol;
+};
+
+struct nv40_vertex_program_exec {
+ uint32_t data[4];
+ boolean has_branch_offset;
+ int const_index;
+};
+
+struct nv40_vertex_program_data {
+ int index; /* immediates == -1 */
+ float value[4];
+};
+
+struct nv40_vertex_program {
+ struct pipe_shader_state pipe;
+
+ struct draw_vertex_shader *draw;
+
+ boolean translated;
+
+ struct pipe_clip_state ucp;
+
+ struct nv40_vertex_program_exec *insns;
+ unsigned nr_insns;
+ struct nv40_vertex_program_data *consts;
+ unsigned nr_consts;
+
+ struct nouveau_resource *exec;
+ unsigned exec_start;
+ struct nouveau_resource *data;
+ unsigned data_start;
+ unsigned data_start_min;
+
+ uint32_t ir;
+ uint32_t or;
+ uint32_t clip_ctrl;
+ struct nouveau_stateobj *so;
+};
+
+struct nv40_fragment_program_data {
+ unsigned offset;
+ unsigned index;
+};
+
+struct nv40_fragment_program {
+ struct pipe_shader_state pipe;
+ struct tgsi_shader_info info;
+
+ boolean translated;
+ unsigned samplers;
+
+ uint32_t *insn;
+ int insn_len;
+
+ struct nv40_fragment_program_data *consts;
+ unsigned nr_consts;
+
+ struct pipe_buffer *buffer;
+
+ uint32_t fp_control;
+ struct nouveau_stateobj *so;
+};
+
+struct nv40_miptree {
+ struct pipe_texture base;
+
+ struct pipe_buffer *buffer;
+ uint total_size;
+
+ struct pipe_texture *shadow_tex;
+ struct pipe_surface *shadow_surface;
+
+ struct {
+ uint pitch;
+ uint *image_offset;
+ } level[PIPE_MAX_TEXTURE_LEVELS];
+};
+
+#endif
diff --git a/src/gallium/drivers/nv40/nv40_state_blend.c b/src/gallium/drivers/nv40/nv40_state_blend.c
new file mode 100644
index 00000000000..95e6d7394f4
--- /dev/null
+++ b/src/gallium/drivers/nv40/nv40_state_blend.c
@@ -0,0 +1,40 @@
+#include "nv40_context.h"
+
+static boolean
+nv40_state_blend_validate(struct nv40_context *nv40)
+{
+ so_ref(nv40->blend->so, &nv40->state.hw[NV40_STATE_BLEND]);
+ return TRUE;
+}
+
+struct nv40_state_entry nv40_state_blend = {
+ .validate = nv40_state_blend_validate,
+ .dirty = {
+ .pipe = NV40_NEW_BLEND,
+ .hw = NV40_STATE_BLEND
+ }
+};
+
+static boolean
+nv40_state_blend_colour_validate(struct nv40_context *nv40)
+{
+ struct nouveau_stateobj *so = so_new(2, 0);
+ struct pipe_blend_color *bcol = &nv40->blend_colour;
+
+ so_method(so, nv40->screen->curie, NV40TCL_BLEND_COLOR, 1);
+ so_data (so, ((float_to_ubyte(bcol->color[3]) << 24) |
+ (float_to_ubyte(bcol->color[0]) << 16) |
+ (float_to_ubyte(bcol->color[1]) << 8) |
+ (float_to_ubyte(bcol->color[2]) << 0)));
+
+ so_ref(so, &nv40->state.hw[NV40_STATE_BCOL]);
+ return TRUE;
+}
+
+struct nv40_state_entry nv40_state_blend_colour = {
+ .validate = nv40_state_blend_colour_validate,
+ .dirty = {
+ .pipe = NV40_NEW_BCOL,
+ .hw = NV40_STATE_BCOL
+ }
+};
diff --git a/src/gallium/drivers/nv40/nv40_state_emit.c b/src/gallium/drivers/nv40/nv40_state_emit.c
new file mode 100644
index 00000000000..ab88dc416e5
--- /dev/null
+++ b/src/gallium/drivers/nv40/nv40_state_emit.c
@@ -0,0 +1,184 @@
+#include "nv40_context.h"
+#include "nv40_state.h"
+#include "draw/draw_context.h"
+
+static struct nv40_state_entry *render_states[] = {
+ &nv40_state_framebuffer,
+ &nv40_state_rasterizer,
+ &nv40_state_scissor,
+ &nv40_state_stipple,
+ &nv40_state_fragprog,
+ &nv40_state_fragtex,
+ &nv40_state_vertprog,
+ &nv40_state_blend,
+ &nv40_state_blend_colour,
+ &nv40_state_zsa,
+ &nv40_state_viewport,
+ &nv40_state_vbo,
+ NULL
+};
+
+static struct nv40_state_entry *swtnl_states[] = {
+ &nv40_state_framebuffer,
+ &nv40_state_rasterizer,
+ &nv40_state_scissor,
+ &nv40_state_stipple,
+ &nv40_state_fragprog,
+ &nv40_state_fragtex,
+ &nv40_state_vertprog,
+ &nv40_state_blend,
+ &nv40_state_blend_colour,
+ &nv40_state_zsa,
+ &nv40_state_viewport,
+ &nv40_state_vtxfmt,
+ NULL
+};
+
+static void
+nv40_state_do_validate(struct nv40_context *nv40,
+ struct nv40_state_entry **states)
+{
+ const struct pipe_framebuffer_state *fb = &nv40->framebuffer;
+ unsigned i;
+
+ for (i = 0; i < fb->num_cbufs; i++)
+ fb->cbufs[i]->status = PIPE_SURFACE_STATUS_DEFINED;
+ if (fb->zsbuf)
+ fb->zsbuf->status = PIPE_SURFACE_STATUS_DEFINED;
+
+ while (*states) {
+ struct nv40_state_entry *e = *states;
+
+ if (nv40->dirty & e->dirty.pipe) {
+ if (e->validate(nv40))
+ nv40->state.dirty |= (1ULL << e->dirty.hw);
+ }
+
+ states++;
+ }
+ nv40->dirty = 0;
+}
+
+void
+nv40_state_emit(struct nv40_context *nv40)
+{
+ struct nv40_state *state = &nv40->state;
+ struct nv40_screen *screen = nv40->screen;
+ unsigned i, samplers;
+ uint64 states;
+
+ if (nv40->pctx_id != screen->cur_pctx) {
+ for (i = 0; i < NV40_STATE_MAX; i++) {
+ if (state->hw[i] && screen->state[i] != state->hw[i])
+ state->dirty |= (1ULL << i);
+ }
+
+ screen->cur_pctx = nv40->pctx_id;
+ }
+
+ for (i = 0, states = state->dirty; states; i++) {
+ if (!(states & (1ULL << i)))
+ continue;
+ so_ref (state->hw[i], &nv40->screen->state[i]);
+ if (state->hw[i])
+ so_emit(nv40->nvws, nv40->screen->state[i]);
+ states &= ~(1ULL << i);
+ }
+
+ if (state->dirty & ((1ULL << NV40_STATE_FRAGPROG) |
+ (1ULL << NV40_STATE_FRAGTEX0))) {
+ BEGIN_RING(curie, NV40TCL_TEX_CACHE_CTL, 1);
+ OUT_RING (2);
+ BEGIN_RING(curie, NV40TCL_TEX_CACHE_CTL, 1);
+ OUT_RING (1);
+ }
+
+ state->dirty = 0;
+
+ so_emit_reloc_markers(nv40->nvws, state->hw[NV40_STATE_FB]);
+ for (i = 0, samplers = state->fp_samplers; i < 16 && samplers; i++) {
+ if (!(samplers & (1 << i)))
+ continue;
+ so_emit_reloc_markers(nv40->nvws,
+ state->hw[NV40_STATE_FRAGTEX0+i]);
+ samplers &= ~(1ULL << i);
+ }
+ so_emit_reloc_markers(nv40->nvws, state->hw[NV40_STATE_FRAGPROG]);
+ if (state->hw[NV40_STATE_VTXBUF] && nv40->render_mode == HW)
+ so_emit_reloc_markers(nv40->nvws, state->hw[NV40_STATE_VTXBUF]);
+}
+
+boolean
+nv40_state_validate(struct nv40_context *nv40)
+{
+ boolean was_sw = nv40->fallback_swtnl ? TRUE : FALSE;
+
+ if (nv40->render_mode != HW) {
+ /* Don't even bother trying to go back to hw if none
+ * of the states that caused swtnl previously have changed.
+ */
+ if ((nv40->fallback_swtnl & nv40->dirty)
+ != nv40->fallback_swtnl)
+ return FALSE;
+
+ /* Attempt to go to hwtnl again */
+ nv40->pipe.flush(&nv40->pipe, 0, NULL);
+ nv40->dirty |= (NV40_NEW_VIEWPORT |
+ NV40_NEW_VERTPROG |
+ NV40_NEW_ARRAYS);
+ nv40->render_mode = HW;
+ }
+
+ nv40_state_do_validate(nv40, render_states);
+ if (nv40->fallback_swtnl || nv40->fallback_swrast)
+ return FALSE;
+
+ if (was_sw)
+ NOUVEAU_ERR("swtnl->hw\n");
+
+ return TRUE;
+}
+
+boolean
+nv40_state_validate_swtnl(struct nv40_context *nv40)
+{
+ struct draw_context *draw = nv40->draw;
+
+ /* Setup for swtnl */
+ if (nv40->render_mode == HW) {
+ NOUVEAU_ERR("hw->swtnl 0x%08x\n", nv40->fallback_swtnl);
+ nv40->pipe.flush(&nv40->pipe, 0, NULL);
+ nv40->dirty |= (NV40_NEW_VIEWPORT |
+ NV40_NEW_VERTPROG |
+ NV40_NEW_ARRAYS);
+ nv40->render_mode = SWTNL;
+ }
+
+ if (nv40->draw_dirty & NV40_NEW_VERTPROG)
+ draw_bind_vertex_shader(draw, nv40->vertprog->draw);
+
+ if (nv40->draw_dirty & NV40_NEW_RAST)
+ draw_set_rasterizer_state(draw, &nv40->rasterizer->pipe);
+
+ if (nv40->draw_dirty & NV40_NEW_UCP)
+ draw_set_clip_state(draw, &nv40->clip);
+
+ if (nv40->draw_dirty & NV40_NEW_VIEWPORT)
+ draw_set_viewport_state(draw, &nv40->viewport);
+
+ if (nv40->draw_dirty & NV40_NEW_ARRAYS) {
+ draw_set_edgeflags(draw, nv40->edgeflags);
+ draw_set_vertex_buffers(draw, nv40->vtxbuf_nr, nv40->vtxbuf);
+ draw_set_vertex_elements(draw, nv40->vtxelt_nr, nv40->vtxelt);
+ }
+
+ nv40_state_do_validate(nv40, swtnl_states);
+ if (nv40->fallback_swrast) {
+ NOUVEAU_ERR("swtnl->swrast 0x%08x\n", nv40->fallback_swrast);
+ return FALSE;
+ }
+
+ nv40->draw_dirty = 0;
+ return TRUE;
+}
+
diff --git a/src/gallium/drivers/nv40/nv40_state_fb.c b/src/gallium/drivers/nv40/nv40_state_fb.c
new file mode 100644
index 00000000000..28592d71c37
--- /dev/null
+++ b/src/gallium/drivers/nv40/nv40_state_fb.c
@@ -0,0 +1,155 @@
+#include "nv40_context.h"
+#include "nouveau/nouveau_util.h"
+
+static boolean
+nv40_state_framebuffer_validate(struct nv40_context *nv40)
+{
+ struct pipe_framebuffer_state *fb = &nv40->framebuffer;
+ struct pipe_surface *rt[4], *zeta;
+ uint32_t rt_enable, rt_format;
+ int i, colour_format = 0, zeta_format = 0;
+ struct nouveau_stateobj *so = so_new(64, 10);
+ unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM;
+ unsigned w = fb->width;
+ unsigned h = fb->height;
+
+ rt_enable = 0;
+ for (i = 0; i < fb->num_cbufs; i++) {
+ if (colour_format) {
+ assert(colour_format == fb->cbufs[i]->format);
+ } else {
+ colour_format = fb->cbufs[i]->format;
+ rt_enable |= (NV40TCL_RT_ENABLE_COLOR0 << i);
+ rt[i] = fb->cbufs[i];
+ }
+ }
+
+ if (rt_enable & (NV40TCL_RT_ENABLE_COLOR1 | NV40TCL_RT_ENABLE_COLOR2 |
+ NV40TCL_RT_ENABLE_COLOR3))
+ rt_enable |= NV40TCL_RT_ENABLE_MRT;
+
+ if (fb->zsbuf) {
+ zeta_format = fb->zsbuf->format;
+ zeta = fb->zsbuf;
+ }
+
+ if (!(rt[0]->texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) {
+ assert(!(fb->width & (fb->width - 1)) && !(fb->height & (fb->height - 1)));
+ for (i = 1; i < fb->num_cbufs; i++)
+ assert(!(rt[i]->texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR));
+
+ rt_format = NV40TCL_RT_FORMAT_TYPE_SWIZZLED |
+ log2i(fb->width) << NV40TCL_RT_FORMAT_LOG2_WIDTH_SHIFT |
+ log2i(fb->height) << NV40TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT;
+ }
+ else
+ rt_format = NV40TCL_RT_FORMAT_TYPE_LINEAR;
+
+ switch (colour_format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case 0:
+ rt_format |= NV40TCL_RT_FORMAT_COLOR_A8R8G8B8;
+ break;
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ rt_format |= NV40TCL_RT_FORMAT_COLOR_R5G6B5;
+ break;
+ default:
+ assert(0);
+ }
+
+ switch (zeta_format) {
+ case PIPE_FORMAT_Z16_UNORM:
+ rt_format |= NV40TCL_RT_FORMAT_ZETA_Z16;
+ break;
+ case PIPE_FORMAT_Z24S8_UNORM:
+ case 0:
+ rt_format |= NV40TCL_RT_FORMAT_ZETA_Z24S8;
+ break;
+ default:
+ assert(0);
+ }
+
+ if (rt_enable & NV40TCL_RT_ENABLE_COLOR0) {
+ so_method(so, nv40->screen->curie, NV40TCL_DMA_COLOR0, 1);
+ so_reloc (so, rt[0]->buffer, 0, rt_flags | NOUVEAU_BO_OR,
+ nv40->nvws->channel->vram->handle,
+ nv40->nvws->channel->gart->handle);
+ so_method(so, nv40->screen->curie, NV40TCL_COLOR0_PITCH, 2);
+ so_data (so, rt[0]->stride);
+ so_reloc (so, rt[0]->buffer, rt[0]->offset, rt_flags |
+ NOUVEAU_BO_LOW, 0, 0);
+ }
+
+ if (rt_enable & NV40TCL_RT_ENABLE_COLOR1) {
+ so_method(so, nv40->screen->curie, NV40TCL_DMA_COLOR1, 1);
+ so_reloc (so, rt[1]->buffer, 0, rt_flags | NOUVEAU_BO_OR,
+ nv40->nvws->channel->vram->handle,
+ nv40->nvws->channel->gart->handle);
+ so_method(so, nv40->screen->curie, NV40TCL_COLOR1_OFFSET, 2);
+ so_reloc (so, rt[1]->buffer, rt[1]->offset, rt_flags |
+ NOUVEAU_BO_LOW, 0, 0);
+ so_data (so, rt[1]->stride);
+ }
+
+ if (rt_enable & NV40TCL_RT_ENABLE_COLOR2) {
+ so_method(so, nv40->screen->curie, NV40TCL_DMA_COLOR2, 1);
+ so_reloc (so, rt[2]->buffer, 0, rt_flags | NOUVEAU_BO_OR,
+ nv40->nvws->channel->vram->handle,
+ nv40->nvws->channel->gart->handle);
+ so_method(so, nv40->screen->curie, NV40TCL_COLOR2_OFFSET, 1);
+ so_reloc (so, rt[2]->buffer, rt[2]->offset, rt_flags |
+ NOUVEAU_BO_LOW, 0, 0);
+ so_method(so, nv40->screen->curie, NV40TCL_COLOR2_PITCH, 1);
+ so_data (so, rt[2]->stride);
+ }
+
+ if (rt_enable & NV40TCL_RT_ENABLE_COLOR3) {
+ so_method(so, nv40->screen->curie, NV40TCL_DMA_COLOR3, 1);
+ so_reloc (so, rt[3]->buffer, 0, rt_flags | NOUVEAU_BO_OR,
+ nv40->nvws->channel->vram->handle,
+ nv40->nvws->channel->gart->handle);
+ so_method(so, nv40->screen->curie, NV40TCL_COLOR3_OFFSET, 1);
+ so_reloc (so, rt[3]->buffer, rt[3]->offset, rt_flags |
+ NOUVEAU_BO_LOW, 0, 0);
+ so_method(so, nv40->screen->curie, NV40TCL_COLOR3_PITCH, 1);
+ so_data (so, rt[3]->stride);
+ }
+
+ if (zeta_format) {
+ so_method(so, nv40->screen->curie, NV40TCL_DMA_ZETA, 1);
+ so_reloc (so, zeta->buffer, 0, rt_flags | NOUVEAU_BO_OR,
+ nv40->nvws->channel->vram->handle,
+ nv40->nvws->channel->gart->handle);
+ so_method(so, nv40->screen->curie, NV40TCL_ZETA_OFFSET, 1);
+ so_reloc (so, zeta->buffer, zeta->offset, rt_flags |
+ NOUVEAU_BO_LOW, 0, 0);
+ so_method(so, nv40->screen->curie, NV40TCL_ZETA_PITCH, 1);
+ so_data (so, zeta->stride);
+ }
+
+ so_method(so, nv40->screen->curie, NV40TCL_RT_ENABLE, 1);
+ so_data (so, rt_enable);
+ so_method(so, nv40->screen->curie, NV40TCL_RT_HORIZ, 3);
+ so_data (so, (w << 16) | 0);
+ so_data (so, (h << 16) | 0);
+ so_data (so, rt_format);
+ so_method(so, nv40->screen->curie, NV40TCL_VIEWPORT_HORIZ, 2);
+ so_data (so, (w << 16) | 0);
+ so_data (so, (h << 16) | 0);
+ so_method(so, nv40->screen->curie, NV40TCL_VIEWPORT_CLIP_HORIZ(0), 2);
+ so_data (so, ((w - 1) << 16) | 0);
+ so_data (so, ((h - 1) << 16) | 0);
+ so_method(so, nv40->screen->curie, 0x1d88, 1);
+ so_data (so, (1 << 12) | h);
+
+ so_ref(so, &nv40->state.hw[NV40_STATE_FB]);
+ return TRUE;
+}
+
+struct nv40_state_entry nv40_state_framebuffer = {
+ .validate = nv40_state_framebuffer_validate,
+ .dirty = {
+ .pipe = NV40_NEW_FB,
+ .hw = NV40_STATE_FB
+ }
+};
diff --git a/src/gallium/drivers/nv40/nv40_state_rasterizer.c b/src/gallium/drivers/nv40/nv40_state_rasterizer.c
new file mode 100644
index 00000000000..9ecda5990f0
--- /dev/null
+++ b/src/gallium/drivers/nv40/nv40_state_rasterizer.c
@@ -0,0 +1,17 @@
+#include "nv40_context.h"
+
+static boolean
+nv40_state_rasterizer_validate(struct nv40_context *nv40)
+{
+ so_ref(nv40->rasterizer->so,
+ &nv40->state.hw[NV40_STATE_RAST]);
+ return TRUE;
+}
+
+struct nv40_state_entry nv40_state_rasterizer = {
+ .validate = nv40_state_rasterizer_validate,
+ .dirty = {
+ .pipe = NV40_NEW_RAST,
+ .hw = NV40_STATE_RAST
+ }
+};
diff --git a/src/gallium/drivers/nv40/nv40_state_scissor.c b/src/gallium/drivers/nv40/nv40_state_scissor.c
new file mode 100644
index 00000000000..285239ef419
--- /dev/null
+++ b/src/gallium/drivers/nv40/nv40_state_scissor.c
@@ -0,0 +1,35 @@
+#include "nv40_context.h"
+
+static boolean
+nv40_state_scissor_validate(struct nv40_context *nv40)
+{
+ struct pipe_rasterizer_state *rast = &nv40->rasterizer->pipe;
+ struct pipe_scissor_state *s = &nv40->scissor;
+ struct nouveau_stateobj *so;
+
+ if (nv40->state.hw[NV40_STATE_SCISSOR] &&
+ (rast->scissor == 0 && nv40->state.scissor_enabled == 0))
+ return FALSE;
+ nv40->state.scissor_enabled = rast->scissor;
+
+ so = so_new(3, 0);
+ so_method(so, nv40->screen->curie, NV40TCL_SCISSOR_HORIZ, 2);
+ if (nv40->state.scissor_enabled) {
+ so_data (so, ((s->maxx - s->minx) << 16) | s->minx);
+ so_data (so, ((s->maxy - s->miny) << 16) | s->miny);
+ } else {
+ so_data (so, 4096 << 16);
+ so_data (so, 4096 << 16);
+ }
+
+ so_ref(so, &nv40->state.hw[NV40_STATE_SCISSOR]);
+ return TRUE;
+}
+
+struct nv40_state_entry nv40_state_scissor = {
+ .validate = nv40_state_scissor_validate,
+ .dirty = {
+ .pipe = NV40_NEW_SCISSOR | NV40_NEW_RAST,
+ .hw = NV40_STATE_SCISSOR
+ }
+};
diff --git a/src/gallium/drivers/nv40/nv40_state_stipple.c b/src/gallium/drivers/nv40/nv40_state_stipple.c
new file mode 100644
index 00000000000..b51024ad9b2
--- /dev/null
+++ b/src/gallium/drivers/nv40/nv40_state_stipple.c
@@ -0,0 +1,39 @@
+#include "nv40_context.h"
+
+static boolean
+nv40_state_stipple_validate(struct nv40_context *nv40)
+{
+ struct pipe_rasterizer_state *rast = &nv40->rasterizer->pipe;
+ struct nouveau_grobj *curie = nv40->screen->curie;
+ struct nouveau_stateobj *so;
+
+ if (nv40->state.hw[NV40_STATE_STIPPLE] &&
+ (rast->poly_stipple_enable == 0 && nv40->state.stipple_enabled == 0))
+ return FALSE;
+
+ if (rast->poly_stipple_enable) {
+ unsigned i;
+
+ so = so_new(35, 0);
+ so_method(so, curie, NV40TCL_POLYGON_STIPPLE_ENABLE, 1);
+ so_data (so, 1);
+ so_method(so, curie, NV40TCL_POLYGON_STIPPLE_PATTERN(0), 32);
+ for (i = 0; i < 32; i++)
+ so_data(so, nv40->stipple[i]);
+ } else {
+ so = so_new(2, 0);
+ so_method(so, curie, NV40TCL_POLYGON_STIPPLE_ENABLE, 1);
+ so_data (so, 0);
+ }
+
+ so_ref(so, &nv40->state.hw[NV40_STATE_STIPPLE]);
+ return TRUE;
+}
+
+struct nv40_state_entry nv40_state_stipple = {
+ .validate = nv40_state_stipple_validate,
+ .dirty = {
+ .pipe = NV40_NEW_STIPPLE | NV40_NEW_RAST,
+ .hw = NV40_STATE_STIPPLE,
+ }
+};
diff --git a/src/gallium/drivers/nv40/nv40_state_viewport.c b/src/gallium/drivers/nv40/nv40_state_viewport.c
new file mode 100644
index 00000000000..869a55b4053
--- /dev/null
+++ b/src/gallium/drivers/nv40/nv40_state_viewport.c
@@ -0,0 +1,67 @@
+#include "nv40_context.h"
+
+static boolean
+nv40_state_viewport_validate(struct nv40_context *nv40)
+{
+ struct pipe_viewport_state *vpt = &nv40->viewport;
+ struct nouveau_stateobj *so;
+ unsigned bypass;
+
+ if (nv40->render_mode == HW && !nv40->rasterizer->pipe.bypass_clipping)
+ bypass = 0;
+ else
+ bypass = 1;
+
+ if (nv40->state.hw[NV40_STATE_VIEWPORT] &&
+ (bypass || !(nv40->dirty & NV40_NEW_VIEWPORT)) &&
+ nv40->state.viewport_bypass == bypass)
+ return FALSE;
+ nv40->state.viewport_bypass = bypass;
+
+ so = so_new(11, 0);
+ if (!bypass) {
+ so_method(so, nv40->screen->curie,
+ NV40TCL_VIEWPORT_TRANSLATE_X, 8);
+ so_data (so, fui(vpt->translate[0]));
+ so_data (so, fui(vpt->translate[1]));
+ so_data (so, fui(vpt->translate[2]));
+ so_data (so, fui(vpt->translate[3]));
+ so_data (so, fui(vpt->scale[0]));
+ so_data (so, fui(vpt->scale[1]));
+ so_data (so, fui(vpt->scale[2]));
+ so_data (so, fui(vpt->scale[3]));
+ so_method(so, nv40->screen->curie, 0x1d78, 1);
+ so_data (so, 1);
+ } else {
+ so_method(so, nv40->screen->curie,
+ NV40TCL_VIEWPORT_TRANSLATE_X, 8);
+ so_data (so, fui(0.0));
+ so_data (so, fui(0.0));
+ so_data (so, fui(0.0));
+ so_data (so, fui(0.0));
+ so_data (so, fui(1.0));
+ so_data (so, fui(1.0));
+ so_data (so, fui(1.0));
+ so_data (so, fui(0.0));
+ /* Not entirely certain what this is yet. The DDX uses this
+ * value also as it fixes rendering when you pass
+ * pre-transformed vertices to the GPU. My best gusss is that
+ * this bypasses some culling/clipping stage. Might be worth
+ * noting that points/lines are uneffected by whatever this
+ * value fixes, only filled polygons are effected.
+ */
+ so_method(so, nv40->screen->curie, 0x1d78, 1);
+ so_data (so, 0x110);
+ }
+
+ so_ref(so, &nv40->state.hw[NV40_STATE_VIEWPORT]);
+ return TRUE;
+}
+
+struct nv40_state_entry nv40_state_viewport = {
+ .validate = nv40_state_viewport_validate,
+ .dirty = {
+ .pipe = NV40_NEW_VIEWPORT | NV40_NEW_RAST,
+ .hw = NV40_STATE_VIEWPORT
+ }
+};
diff --git a/src/gallium/drivers/nv40/nv40_state_zsa.c b/src/gallium/drivers/nv40/nv40_state_zsa.c
new file mode 100644
index 00000000000..fb760677c88
--- /dev/null
+++ b/src/gallium/drivers/nv40/nv40_state_zsa.c
@@ -0,0 +1,17 @@
+#include "nv40_context.h"
+
+static boolean
+nv40_state_zsa_validate(struct nv40_context *nv40)
+{
+ so_ref(nv40->zsa->so,
+ &nv40->state.hw[NV40_STATE_ZSA]);
+ return TRUE;
+}
+
+struct nv40_state_entry nv40_state_zsa = {
+ .validate = nv40_state_zsa_validate,
+ .dirty = {
+ .pipe = NV40_NEW_ZSA,
+ .hw = NV40_STATE_ZSA
+ }
+};
diff --git a/src/gallium/drivers/nv40/nv40_surface.c b/src/gallium/drivers/nv40/nv40_surface.c
new file mode 100644
index 00000000000..576af7c59ee
--- /dev/null
+++ b/src/gallium/drivers/nv40/nv40_surface.c
@@ -0,0 +1,77 @@
+
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "nv40_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_winsys.h"
+#include "pipe/p_inlines.h"
+
+#include "util/u_tile.h"
+
+static void
+nv40_surface_copy(struct pipe_context *pipe, boolean do_flip,
+ struct pipe_surface *dest, unsigned destx, unsigned desty,
+ struct pipe_surface *src, unsigned srcx, unsigned srcy,
+ unsigned width, unsigned height)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+ struct nouveau_winsys *nvws = nv40->nvws;
+
+ if (do_flip) {
+ /*XXX: This dodgyness will do for now for correctness. But,
+ * need to investigate whether the 2D engine is able to
+ * manage a flip (perhaps SIFM?), if not, use the 3D engine
+ */
+ desty += height;
+ while (height--) {
+ nvws->surface_copy(nvws, dest, destx, desty--, src,
+ srcx, srcy++, width, 1);
+ }
+ } else {
+ nvws->surface_copy(nvws, dest, destx, desty, src, srcx, srcy,
+ width, height);
+ }
+}
+
+static void
+nv40_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest,
+ unsigned destx, unsigned desty, unsigned width,
+ unsigned height, unsigned value)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+ struct nouveau_winsys *nvws = nv40->nvws;
+
+ nvws->surface_fill(nvws, dest, destx, desty, width, height, value);
+}
+
+void
+nv40_init_surface_functions(struct nv40_context *nv40)
+{
+ nv40->pipe.surface_copy = nv40_surface_copy;
+ nv40->pipe.surface_fill = nv40_surface_fill;
+}
diff --git a/src/gallium/drivers/nv40/nv40_vbo.c b/src/gallium/drivers/nv40/nv40_vbo.c
new file mode 100644
index 00000000000..09f6e79d32a
--- /dev/null
+++ b/src/gallium/drivers/nv40/nv40_vbo.c
@@ -0,0 +1,555 @@
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+
+#include "nv40_context.h"
+#include "nv40_state.h"
+
+#include "nouveau/nouveau_channel.h"
+#include "nouveau/nouveau_pushbuf.h"
+#include "nouveau/nouveau_util.h"
+
+#define FORCE_SWTNL 0
+
+static INLINE int
+nv40_vbo_format_to_hw(enum pipe_format pipe, unsigned *fmt, unsigned *ncomp)
+{
+ switch (pipe) {
+ case PIPE_FORMAT_R32_FLOAT:
+ case PIPE_FORMAT_R32G32_FLOAT:
+ case PIPE_FORMAT_R32G32B32_FLOAT:
+ case PIPE_FORMAT_R32G32B32A32_FLOAT:
+ *fmt = NV40TCL_VTXFMT_TYPE_FLOAT;
+ break;
+ case PIPE_FORMAT_R8_UNORM:
+ case PIPE_FORMAT_R8G8_UNORM:
+ case PIPE_FORMAT_R8G8B8_UNORM:
+ case PIPE_FORMAT_R8G8B8A8_UNORM:
+ *fmt = NV40TCL_VTXFMT_TYPE_UBYTE;
+ break;
+ case PIPE_FORMAT_R16_SSCALED:
+ case PIPE_FORMAT_R16G16_SSCALED:
+ case PIPE_FORMAT_R16G16B16_SSCALED:
+ case PIPE_FORMAT_R16G16B16A16_SSCALED:
+ *fmt = NV40TCL_VTXFMT_TYPE_USHORT;
+ break;
+ default:
+ NOUVEAU_ERR("Unknown format %s\n", pf_name(pipe));
+ return 1;
+ }
+
+ switch (pipe) {
+ case PIPE_FORMAT_R8_UNORM:
+ case PIPE_FORMAT_R32_FLOAT:
+ case PIPE_FORMAT_R16_SSCALED:
+ *ncomp = 1;
+ break;
+ case PIPE_FORMAT_R8G8_UNORM:
+ case PIPE_FORMAT_R32G32_FLOAT:
+ case PIPE_FORMAT_R16G16_SSCALED:
+ *ncomp = 2;
+ break;
+ case PIPE_FORMAT_R8G8B8_UNORM:
+ case PIPE_FORMAT_R32G32B32_FLOAT:
+ case PIPE_FORMAT_R16G16B16_SSCALED:
+ *ncomp = 3;
+ break;
+ case PIPE_FORMAT_R8G8B8A8_UNORM:
+ case PIPE_FORMAT_R32G32B32A32_FLOAT:
+ case PIPE_FORMAT_R16G16B16A16_SSCALED:
+ *ncomp = 4;
+ break;
+ default:
+ NOUVEAU_ERR("Unknown format %s\n", pf_name(pipe));
+ return 1;
+ }
+
+ return 0;
+}
+
+static boolean
+nv40_vbo_set_idxbuf(struct nv40_context *nv40, struct pipe_buffer *ib,
+ unsigned ib_size)
+{
+ struct pipe_screen *pscreen = &nv40->screen->pipe;
+ unsigned type;
+
+ if (!ib) {
+ nv40->idxbuf = NULL;
+ nv40->idxbuf_format = 0xdeadbeef;
+ return FALSE;
+ }
+
+ if (!pscreen->get_param(pscreen, NOUVEAU_CAP_HW_IDXBUF) || ib_size == 1)
+ return FALSE;
+
+ switch (ib_size) {
+ case 2:
+ type = NV40TCL_IDXBUF_FORMAT_TYPE_U16;
+ break;
+ case 4:
+ type = NV40TCL_IDXBUF_FORMAT_TYPE_U32;
+ break;
+ default:
+ return FALSE;
+ }
+
+ if (ib != nv40->idxbuf ||
+ type != nv40->idxbuf_format) {
+ nv40->dirty |= NV40_NEW_ARRAYS;
+ nv40->idxbuf = ib;
+ nv40->idxbuf_format = type;
+ }
+
+ return TRUE;
+}
+
+static boolean
+nv40_vbo_static_attrib(struct nv40_context *nv40, struct nouveau_stateobj *so,
+ int attrib, struct pipe_vertex_element *ve,
+ struct pipe_vertex_buffer *vb)
+{
+ struct pipe_winsys *ws = nv40->pipe.winsys;
+ struct nouveau_grobj *curie = nv40->screen->curie;
+ unsigned type, ncomp;
+ void *map;
+
+ if (nv40_vbo_format_to_hw(ve->src_format, &type, &ncomp))
+ return FALSE;
+
+ map = ws->buffer_map(ws, vb->buffer, PIPE_BUFFER_USAGE_CPU_READ);
+ map += vb->buffer_offset + ve->src_offset;
+
+ switch (type) {
+ case NV40TCL_VTXFMT_TYPE_FLOAT:
+ {
+ float *v = map;
+
+ switch (ncomp) {
+ case 4:
+ so_method(so, curie, NV40TCL_VTX_ATTR_4F_X(attrib), 4);
+ so_data (so, fui(v[0]));
+ so_data (so, fui(v[1]));
+ so_data (so, fui(v[2]));
+ so_data (so, fui(v[3]));
+ break;
+ case 3:
+ so_method(so, curie, NV40TCL_VTX_ATTR_3F_X(attrib), 3);
+ so_data (so, fui(v[0]));
+ so_data (so, fui(v[1]));
+ so_data (so, fui(v[2]));
+ break;
+ case 2:
+ so_method(so, curie, NV40TCL_VTX_ATTR_2F_X(attrib), 2);
+ so_data (so, fui(v[0]));
+ so_data (so, fui(v[1]));
+ break;
+ case 1:
+ so_method(so, curie, NV40TCL_VTX_ATTR_1F(attrib), 1);
+ so_data (so, fui(v[0]));
+ break;
+ default:
+ ws->buffer_unmap(ws, vb->buffer);
+ return FALSE;
+ }
+ }
+ break;
+ default:
+ ws->buffer_unmap(ws, vb->buffer);
+ return FALSE;
+ }
+
+ ws->buffer_unmap(ws, vb->buffer);
+
+ return TRUE;
+}
+
+boolean
+nv40_draw_arrays(struct pipe_context *pipe,
+ unsigned mode, unsigned start, unsigned count)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+ struct nouveau_channel *chan = nv40->nvws->channel;
+ unsigned restart;
+
+ nv40_vbo_set_idxbuf(nv40, NULL, 0);
+ if (FORCE_SWTNL || !nv40_state_validate(nv40)) {
+ return nv40_draw_elements_swtnl(pipe, NULL, 0,
+ mode, start, count);
+ }
+
+ while (count) {
+ unsigned vc, nr;
+
+ nv40_state_emit(nv40);
+
+ vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 256,
+ mode, start, count, &restart);
+ if (!vc) {
+ FIRE_RING(NULL);
+ continue;
+ }
+
+ BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
+ OUT_RING (nvgl_primitive(mode));
+
+ nr = (vc & 0xff);
+ if (nr) {
+ BEGIN_RING(curie, NV40TCL_VB_VERTEX_BATCH, 1);
+ OUT_RING (((nr - 1) << 24) | start);
+ start += nr;
+ }
+
+ nr = vc >> 8;
+ while (nr) {
+ unsigned push = nr > 2047 ? 2047 : nr;
+
+ nr -= push;
+
+ BEGIN_RING_NI(curie, NV40TCL_VB_VERTEX_BATCH, push);
+ while (push--) {
+ OUT_RING(((0x100 - 1) << 24) | start);
+ start += 0x100;
+ }
+ }
+
+ BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
+ OUT_RING (0);
+
+ count -= vc;
+ start = restart;
+ }
+
+ pipe->flush(pipe, 0, NULL);
+ return TRUE;
+}
+
+static INLINE void
+nv40_draw_elements_u08(struct nv40_context *nv40, void *ib,
+ unsigned mode, unsigned start, unsigned count)
+{
+ struct nouveau_channel *chan = nv40->nvws->channel;
+
+ while (count) {
+ uint8_t *elts = (uint8_t *)ib + start;
+ unsigned vc, push, restart;
+
+ nv40_state_emit(nv40);
+
+ vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 2,
+ mode, start, count, &restart);
+ if (vc == 0) {
+ FIRE_RING(NULL);
+ continue;
+ }
+ count -= vc;
+
+ BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
+ OUT_RING (nvgl_primitive(mode));
+
+ if (vc & 1) {
+ BEGIN_RING(curie, NV40TCL_VB_ELEMENT_U32, 1);
+ OUT_RING (elts[0]);
+ elts++; vc--;
+ }
+
+ while (vc) {
+ unsigned i;
+
+ push = MIN2(vc, 2047 * 2);
+
+ BEGIN_RING_NI(curie, NV40TCL_VB_ELEMENT_U16, push >> 1);
+ for (i = 0; i < push; i+=2)
+ OUT_RING((elts[i+1] << 16) | elts[i]);
+
+ vc -= push;
+ elts += push;
+ }
+
+ BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
+ OUT_RING (0);
+
+ start = restart;
+ }
+}
+
+static INLINE void
+nv40_draw_elements_u16(struct nv40_context *nv40, void *ib,
+ unsigned mode, unsigned start, unsigned count)
+{
+ struct nouveau_channel *chan = nv40->nvws->channel;
+
+ while (count) {
+ uint16_t *elts = (uint16_t *)ib + start;
+ unsigned vc, push, restart;
+
+ nv40_state_emit(nv40);
+
+ vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 2,
+ mode, start, count, &restart);
+ if (vc == 0) {
+ FIRE_RING(NULL);
+ continue;
+ }
+ count -= vc;
+
+ BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
+ OUT_RING (nvgl_primitive(mode));
+
+ if (vc & 1) {
+ BEGIN_RING(curie, NV40TCL_VB_ELEMENT_U32, 1);
+ OUT_RING (elts[0]);
+ elts++; vc--;
+ }
+
+ while (vc) {
+ unsigned i;
+
+ push = MIN2(vc, 2047 * 2);
+
+ BEGIN_RING_NI(curie, NV40TCL_VB_ELEMENT_U16, push >> 1);
+ for (i = 0; i < push; i+=2)
+ OUT_RING((elts[i+1] << 16) | elts[i]);
+
+ vc -= push;
+ elts += push;
+ }
+
+ BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
+ OUT_RING (0);
+
+ start = restart;
+ }
+}
+
+static INLINE void
+nv40_draw_elements_u32(struct nv40_context *nv40, void *ib,
+ unsigned mode, unsigned start, unsigned count)
+{
+ struct nouveau_channel *chan = nv40->nvws->channel;
+
+ while (count) {
+ uint32_t *elts = (uint32_t *)ib + start;
+ unsigned vc, push, restart;
+
+ nv40_state_emit(nv40);
+
+ vc = nouveau_vbuf_split(chan->pushbuf->remaining, 5, 1,
+ mode, start, count, &restart);
+ if (vc == 0) {
+ FIRE_RING(NULL);
+ continue;
+ }
+ count -= vc;
+
+ BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
+ OUT_RING (nvgl_primitive(mode));
+
+ while (vc) {
+ push = MIN2(vc, 2047);
+
+ BEGIN_RING_NI(curie, NV40TCL_VB_ELEMENT_U32, push);
+ OUT_RINGp (elts, push);
+
+ vc -= push;
+ elts += push;
+ }
+
+ BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
+ OUT_RING (0);
+
+ start = restart;
+ }
+}
+
+static boolean
+nv40_draw_elements_inline(struct pipe_context *pipe,
+ struct pipe_buffer *ib, unsigned ib_size,
+ unsigned mode, unsigned start, unsigned count)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+ struct pipe_winsys *ws = pipe->winsys;
+ void *map;
+
+ map = ws->buffer_map(ws, ib, PIPE_BUFFER_USAGE_CPU_READ);
+ if (!ib) {
+ NOUVEAU_ERR("failed mapping ib\n");
+ return FALSE;
+ }
+
+ switch (ib_size) {
+ case 1:
+ nv40_draw_elements_u08(nv40, map, mode, start, count);
+ break;
+ case 2:
+ nv40_draw_elements_u16(nv40, map, mode, start, count);
+ break;
+ case 4:
+ nv40_draw_elements_u32(nv40, map, mode, start, count);
+ break;
+ default:
+ NOUVEAU_ERR("invalid idxbuf fmt %d\n", ib_size);
+ break;
+ }
+
+ ws->buffer_unmap(ws, ib);
+ return TRUE;
+}
+
+static boolean
+nv40_draw_elements_vbo(struct pipe_context *pipe,
+ unsigned mode, unsigned start, unsigned count)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+ struct nouveau_channel *chan = nv40->nvws->channel;
+ unsigned restart;
+
+ while (count) {
+ unsigned nr, vc;
+
+ nv40_state_emit(nv40);
+
+ vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 256,
+ mode, start, count, &restart);
+ if (!vc) {
+ FIRE_RING(NULL);
+ continue;
+ }
+
+ BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
+ OUT_RING (nvgl_primitive(mode));
+
+ nr = (vc & 0xff);
+ if (nr) {
+ BEGIN_RING(curie, NV40TCL_VB_INDEX_BATCH, 1);
+ OUT_RING (((nr - 1) << 24) | start);
+ start += nr;
+ }
+
+ nr = vc >> 8;
+ while (nr) {
+ unsigned push = nr > 2047 ? 2047 : nr;
+
+ nr -= push;
+
+ BEGIN_RING_NI(curie, NV40TCL_VB_INDEX_BATCH, push);
+ while (push--) {
+ OUT_RING(((0x100 - 1) << 24) | start);
+ start += 0x100;
+ }
+ }
+
+ BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
+ OUT_RING (0);
+
+ count -= vc;
+ start = restart;
+ }
+
+ return TRUE;
+}
+
+boolean
+nv40_draw_elements(struct pipe_context *pipe,
+ struct pipe_buffer *indexBuffer, unsigned indexSize,
+ unsigned mode, unsigned start, unsigned count)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+ boolean idxbuf;
+
+ idxbuf = nv40_vbo_set_idxbuf(nv40, indexBuffer, indexSize);
+ if (FORCE_SWTNL || !nv40_state_validate(nv40)) {
+ return nv40_draw_elements_swtnl(pipe, NULL, 0,
+ mode, start, count);
+ }
+
+ if (idxbuf) {
+ nv40_draw_elements_vbo(pipe, mode, start, count);
+ } else {
+ nv40_draw_elements_inline(pipe, indexBuffer, indexSize,
+ mode, start, count);
+ }
+
+ pipe->flush(pipe, 0, NULL);
+ return TRUE;
+}
+
+static boolean
+nv40_vbo_validate(struct nv40_context *nv40)
+{
+ struct nouveau_stateobj *vtxbuf, *vtxfmt, *sattr = NULL;
+ struct nouveau_grobj *curie = nv40->screen->curie;
+ struct pipe_buffer *ib = nv40->idxbuf;
+ unsigned ib_format = nv40->idxbuf_format;
+ unsigned vb_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD;
+ int hw;
+
+ if (nv40->edgeflags) {
+ nv40->fallback_swtnl |= NV40_NEW_ARRAYS;
+ return FALSE;
+ }
+
+ vtxbuf = so_new(20, 18);
+ so_method(vtxbuf, curie, NV40TCL_VTXBUF_ADDRESS(0), nv40->vtxelt_nr);
+ vtxfmt = so_new(17, 0);
+ so_method(vtxfmt, curie, NV40TCL_VTXFMT(0), nv40->vtxelt_nr);
+
+ for (hw = 0; hw < nv40->vtxelt_nr; hw++) {
+ struct pipe_vertex_element *ve;
+ struct pipe_vertex_buffer *vb;
+ unsigned type, ncomp;
+
+ ve = &nv40->vtxelt[hw];
+ vb = &nv40->vtxbuf[ve->vertex_buffer_index];
+
+ if (!vb->pitch) {
+ if (!sattr)
+ sattr = so_new(16 * 5, 0);
+
+ if (nv40_vbo_static_attrib(nv40, sattr, hw, ve, vb)) {
+ so_data(vtxbuf, 0);
+ so_data(vtxfmt, NV40TCL_VTXFMT_TYPE_FLOAT);
+ continue;
+ }
+ }
+
+ if (nv40_vbo_format_to_hw(ve->src_format, &type, &ncomp)) {
+ nv40->fallback_swtnl |= NV40_NEW_ARRAYS;
+ so_ref(NULL, &vtxbuf);
+ so_ref(NULL, &vtxfmt);
+ return FALSE;
+ }
+
+ so_reloc(vtxbuf, vb->buffer, vb->buffer_offset + ve->src_offset,
+ vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR,
+ 0, NV40TCL_VTXBUF_ADDRESS_DMA1);
+ so_data (vtxfmt, ((vb->pitch << NV40TCL_VTXFMT_STRIDE_SHIFT) |
+ (ncomp << NV40TCL_VTXFMT_SIZE_SHIFT) | type));
+ }
+
+ if (ib) {
+ so_method(vtxbuf, curie, NV40TCL_IDXBUF_ADDRESS, 2);
+ so_reloc (vtxbuf, ib, 0, vb_flags | NOUVEAU_BO_LOW, 0, 0);
+ so_reloc (vtxbuf, ib, ib_format, vb_flags | NOUVEAU_BO_OR,
+ 0, NV40TCL_IDXBUF_FORMAT_DMA1);
+ }
+
+ so_method(vtxbuf, curie, 0x1710, 1);
+ so_data (vtxbuf, 0);
+
+ so_ref(vtxbuf, &nv40->state.hw[NV40_STATE_VTXBUF]);
+ nv40->state.dirty |= (1ULL << NV40_STATE_VTXBUF);
+ so_ref(vtxfmt, &nv40->state.hw[NV40_STATE_VTXFMT]);
+ nv40->state.dirty |= (1ULL << NV40_STATE_VTXFMT);
+ so_ref(sattr, &nv40->state.hw[NV40_STATE_VTXATTR]);
+ nv40->state.dirty |= (1ULL << NV40_STATE_VTXATTR);
+ return FALSE;
+}
+
+struct nv40_state_entry nv40_state_vbo = {
+ .validate = nv40_vbo_validate,
+ .dirty = {
+ .pipe = NV40_NEW_ARRAYS,
+ .hw = 0,
+ }
+};
+
diff --git a/src/gallium/drivers/nv40/nv40_vertprog.c b/src/gallium/drivers/nv40/nv40_vertprog.c
new file mode 100644
index 00000000000..ff988e6a5f4
--- /dev/null
+++ b/src/gallium/drivers/nv40/nv40_vertprog.c
@@ -0,0 +1,1070 @@
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+
+#include "pipe/p_shader_tokens.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_util.h"
+
+#include "nv40_context.h"
+#include "nv40_state.h"
+
+/* TODO (at least...):
+ * 1. Indexed consts + ARL
+ * 3. NV_vp11, NV_vp2, NV_vp3 features
+ * - extra arith opcodes
+ * - branching
+ * - texture sampling
+ * - indexed attribs
+ * - indexed results
+ * 4. bugs
+ */
+
+#define SWZ_X 0
+#define SWZ_Y 1
+#define SWZ_Z 2
+#define SWZ_W 3
+#define MASK_X 8
+#define MASK_Y 4
+#define MASK_Z 2
+#define MASK_W 1
+#define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W)
+#define DEF_SCALE 0
+#define DEF_CTEST 0
+#include "nv40_shader.h"
+
+#define swz(s,x,y,z,w) nv40_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w)
+#define neg(s) nv40_sr_neg((s))
+#define abs(s) nv40_sr_abs((s))
+
+#define NV40_VP_INST_DEST_CLIP(n) ((~0 - 6) + (n))
+
+struct nv40_vpc {
+ struct nv40_vertex_program *vp;
+
+ struct nv40_vertex_program_exec *vpi;
+
+ unsigned r_temps;
+ unsigned r_temps_discard;
+ struct nv40_sreg r_result[PIPE_MAX_SHADER_OUTPUTS];
+ struct nv40_sreg *r_address;
+ struct nv40_sreg *r_temp;
+
+ struct nv40_sreg *imm;
+ unsigned nr_imm;
+
+ unsigned hpos_idx;
+};
+
+static struct nv40_sreg
+temp(struct nv40_vpc *vpc)
+{
+ int idx = ffs(~vpc->r_temps) - 1;
+
+ if (idx < 0) {
+ NOUVEAU_ERR("out of temps!!\n");
+ assert(0);
+ return nv40_sr(NV40SR_TEMP, 0);
+ }
+
+ vpc->r_temps |= (1 << idx);
+ vpc->r_temps_discard |= (1 << idx);
+ return nv40_sr(NV40SR_TEMP, idx);
+}
+
+static INLINE void
+release_temps(struct nv40_vpc *vpc)
+{
+ vpc->r_temps &= ~vpc->r_temps_discard;
+ vpc->r_temps_discard = 0;
+}
+
+static struct nv40_sreg
+constant(struct nv40_vpc *vpc, int pipe, float x, float y, float z, float w)
+{
+ struct nv40_vertex_program *vp = vpc->vp;
+ struct nv40_vertex_program_data *vpd;
+ int idx;
+
+ if (pipe >= 0) {
+ for (idx = 0; idx < vp->nr_consts; idx++) {
+ if (vp->consts[idx].index == pipe)
+ return nv40_sr(NV40SR_CONST, idx);
+ }
+ }
+
+ idx = vp->nr_consts++;
+ vp->consts = realloc(vp->consts, sizeof(*vpd) * vp->nr_consts);
+ vpd = &vp->consts[idx];
+
+ vpd->index = pipe;
+ vpd->value[0] = x;
+ vpd->value[1] = y;
+ vpd->value[2] = z;
+ vpd->value[3] = w;
+ return nv40_sr(NV40SR_CONST, idx);
+}
+
+#define arith(cc,s,o,d,m,s0,s1,s2) \
+ nv40_vp_arith((cc), (s), NV40_VP_INST_##o, (d), (m), (s0), (s1), (s2))
+
+static void
+emit_src(struct nv40_vpc *vpc, uint32_t *hw, int pos, struct nv40_sreg src)
+{
+ struct nv40_vertex_program *vp = vpc->vp;
+ uint32_t sr = 0;
+
+ switch (src.type) {
+ case NV40SR_TEMP:
+ sr |= (NV40_VP_SRC_REG_TYPE_TEMP << NV40_VP_SRC_REG_TYPE_SHIFT);
+ sr |= (src.index << NV40_VP_SRC_TEMP_SRC_SHIFT);
+ break;
+ case NV40SR_INPUT:
+ sr |= (NV40_VP_SRC_REG_TYPE_INPUT <<
+ NV40_VP_SRC_REG_TYPE_SHIFT);
+ vp->ir |= (1 << src.index);
+ hw[1] |= (src.index << NV40_VP_INST_INPUT_SRC_SHIFT);
+ break;
+ case NV40SR_CONST:
+ sr |= (NV40_VP_SRC_REG_TYPE_CONST <<
+ NV40_VP_SRC_REG_TYPE_SHIFT);
+ assert(vpc->vpi->const_index == -1 ||
+ vpc->vpi->const_index == src.index);
+ vpc->vpi->const_index = src.index;
+ break;
+ case NV40SR_NONE:
+ sr |= (NV40_VP_SRC_REG_TYPE_INPUT <<
+ NV40_VP_SRC_REG_TYPE_SHIFT);
+ break;
+ default:
+ assert(0);
+ }
+
+ if (src.negate)
+ sr |= NV40_VP_SRC_NEGATE;
+
+ if (src.abs)
+ hw[0] |= (1 << (21 + pos));
+
+ sr |= ((src.swz[0] << NV40_VP_SRC_SWZ_X_SHIFT) |
+ (src.swz[1] << NV40_VP_SRC_SWZ_Y_SHIFT) |
+ (src.swz[2] << NV40_VP_SRC_SWZ_Z_SHIFT) |
+ (src.swz[3] << NV40_VP_SRC_SWZ_W_SHIFT));
+
+ switch (pos) {
+ case 0:
+ hw[1] |= ((sr & NV40_VP_SRC0_HIGH_MASK) >>
+ NV40_VP_SRC0_HIGH_SHIFT) << NV40_VP_INST_SRC0H_SHIFT;
+ hw[2] |= (sr & NV40_VP_SRC0_LOW_MASK) <<
+ NV40_VP_INST_SRC0L_SHIFT;
+ break;
+ case 1:
+ hw[2] |= sr << NV40_VP_INST_SRC1_SHIFT;
+ break;
+ case 2:
+ hw[2] |= ((sr & NV40_VP_SRC2_HIGH_MASK) >>
+ NV40_VP_SRC2_HIGH_SHIFT) << NV40_VP_INST_SRC2H_SHIFT;
+ hw[3] |= (sr & NV40_VP_SRC2_LOW_MASK) <<
+ NV40_VP_INST_SRC2L_SHIFT;
+ break;
+ default:
+ assert(0);
+ }
+}
+
+static void
+emit_dst(struct nv40_vpc *vpc, uint32_t *hw, int slot, struct nv40_sreg dst)
+{
+ struct nv40_vertex_program *vp = vpc->vp;
+
+ switch (dst.type) {
+ case NV40SR_TEMP:
+ hw[3] |= NV40_VP_INST_DEST_MASK;
+ if (slot == 0) {
+ hw[0] |= (dst.index <<
+ NV40_VP_INST_VEC_DEST_TEMP_SHIFT);
+ } else {
+ hw[3] |= (dst.index <<
+ NV40_VP_INST_SCA_DEST_TEMP_SHIFT);
+ }
+ break;
+ case NV40SR_OUTPUT:
+ switch (dst.index) {
+ case NV40_VP_INST_DEST_COL0 : vp->or |= (1 << 0); break;
+ case NV40_VP_INST_DEST_COL1 : vp->or |= (1 << 1); break;
+ case NV40_VP_INST_DEST_BFC0 : vp->or |= (1 << 2); break;
+ case NV40_VP_INST_DEST_BFC1 : vp->or |= (1 << 3); break;
+ case NV40_VP_INST_DEST_FOGC : vp->or |= (1 << 4); break;
+ case NV40_VP_INST_DEST_PSZ : vp->or |= (1 << 5); break;
+ case NV40_VP_INST_DEST_TC(0): vp->or |= (1 << 14); break;
+ case NV40_VP_INST_DEST_TC(1): vp->or |= (1 << 15); break;
+ case NV40_VP_INST_DEST_TC(2): vp->or |= (1 << 16); break;
+ case NV40_VP_INST_DEST_TC(3): vp->or |= (1 << 17); break;
+ case NV40_VP_INST_DEST_TC(4): vp->or |= (1 << 18); break;
+ case NV40_VP_INST_DEST_TC(5): vp->or |= (1 << 19); break;
+ case NV40_VP_INST_DEST_TC(6): vp->or |= (1 << 20); break;
+ case NV40_VP_INST_DEST_TC(7): vp->or |= (1 << 21); break;
+ case NV40_VP_INST_DEST_CLIP(0):
+ vp->or |= (1 << 6);
+ vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE0;
+ dst.index = NV40_VP_INST_DEST_FOGC;
+ break;
+ case NV40_VP_INST_DEST_CLIP(1):
+ vp->or |= (1 << 7);
+ vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE1;
+ dst.index = NV40_VP_INST_DEST_FOGC;
+ break;
+ case NV40_VP_INST_DEST_CLIP(2):
+ vp->or |= (1 << 8);
+ vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE2;
+ dst.index = NV40_VP_INST_DEST_FOGC;
+ break;
+ case NV40_VP_INST_DEST_CLIP(3):
+ vp->or |= (1 << 9);
+ vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE3;
+ dst.index = NV40_VP_INST_DEST_PSZ;
+ break;
+ case NV40_VP_INST_DEST_CLIP(4):
+ vp->or |= (1 << 10);
+ vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE4;
+ dst.index = NV40_VP_INST_DEST_PSZ;
+ break;
+ case NV40_VP_INST_DEST_CLIP(5):
+ vp->or |= (1 << 11);
+ vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE5;
+ dst.index = NV40_VP_INST_DEST_PSZ;
+ break;
+ default:
+ break;
+ }
+
+ hw[3] |= (dst.index << NV40_VP_INST_DEST_SHIFT);
+ if (slot == 0) {
+ hw[0] |= NV40_VP_INST_VEC_RESULT;
+ hw[0] |= NV40_VP_INST_VEC_DEST_TEMP_MASK | (1<<20);
+ } else {
+ hw[3] |= NV40_VP_INST_SCA_RESULT;
+ hw[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK;
+ }
+ break;
+ default:
+ assert(0);
+ }
+}
+
+static void
+nv40_vp_arith(struct nv40_vpc *vpc, int slot, int op,
+ struct nv40_sreg dst, int mask,
+ struct nv40_sreg s0, struct nv40_sreg s1,
+ struct nv40_sreg s2)
+{
+ struct nv40_vertex_program *vp = vpc->vp;
+ uint32_t *hw;
+
+ vp->insns = realloc(vp->insns, ++vp->nr_insns * sizeof(*vpc->vpi));
+ vpc->vpi = &vp->insns[vp->nr_insns - 1];
+ memset(vpc->vpi, 0, sizeof(*vpc->vpi));
+ vpc->vpi->const_index = -1;
+
+ hw = vpc->vpi->data;
+
+ hw[0] |= (NV40_VP_INST_COND_TR << NV40_VP_INST_COND_SHIFT);
+ hw[0] |= ((0 << NV40_VP_INST_COND_SWZ_X_SHIFT) |
+ (1 << NV40_VP_INST_COND_SWZ_Y_SHIFT) |
+ (2 << NV40_VP_INST_COND_SWZ_Z_SHIFT) |
+ (3 << NV40_VP_INST_COND_SWZ_W_SHIFT));
+
+ if (slot == 0) {
+ hw[1] |= (op << NV40_VP_INST_VEC_OPCODE_SHIFT);
+ hw[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK;
+ hw[3] |= (mask << NV40_VP_INST_VEC_WRITEMASK_SHIFT);
+ } else {
+ hw[1] |= (op << NV40_VP_INST_SCA_OPCODE_SHIFT);
+ hw[0] |= (NV40_VP_INST_VEC_DEST_TEMP_MASK | (1 << 20));
+ hw[3] |= (mask << NV40_VP_INST_SCA_WRITEMASK_SHIFT);
+ }
+
+ emit_dst(vpc, hw, slot, dst);
+ emit_src(vpc, hw, 0, s0);
+ emit_src(vpc, hw, 1, s1);
+ emit_src(vpc, hw, 2, s2);
+}
+
+static INLINE struct nv40_sreg
+tgsi_src(struct nv40_vpc *vpc, const struct tgsi_full_src_register *fsrc) {
+ struct nv40_sreg src;
+
+ switch (fsrc->SrcRegister.File) {
+ case TGSI_FILE_INPUT:
+ src = nv40_sr(NV40SR_INPUT, fsrc->SrcRegister.Index);
+ break;
+ case TGSI_FILE_CONSTANT:
+ src = constant(vpc, fsrc->SrcRegister.Index, 0, 0, 0, 0);
+ break;
+ case TGSI_FILE_IMMEDIATE:
+ src = vpc->imm[fsrc->SrcRegister.Index];
+ break;
+ case TGSI_FILE_TEMPORARY:
+ src = vpc->r_temp[fsrc->SrcRegister.Index];
+ break;
+ default:
+ NOUVEAU_ERR("bad src file\n");
+ break;
+ }
+
+ src.abs = fsrc->SrcRegisterExtMod.Absolute;
+ src.negate = fsrc->SrcRegister.Negate;
+ src.swz[0] = fsrc->SrcRegister.SwizzleX;
+ src.swz[1] = fsrc->SrcRegister.SwizzleY;
+ src.swz[2] = fsrc->SrcRegister.SwizzleZ;
+ src.swz[3] = fsrc->SrcRegister.SwizzleW;
+ return src;
+}
+
+static INLINE struct nv40_sreg
+tgsi_dst(struct nv40_vpc *vpc, const struct tgsi_full_dst_register *fdst) {
+ struct nv40_sreg dst;
+
+ switch (fdst->DstRegister.File) {
+ case TGSI_FILE_OUTPUT:
+ dst = vpc->r_result[fdst->DstRegister.Index];
+ break;
+ case TGSI_FILE_TEMPORARY:
+ dst = vpc->r_temp[fdst->DstRegister.Index];
+ break;
+ case TGSI_FILE_ADDRESS:
+ dst = vpc->r_address[fdst->DstRegister.Index];
+ break;
+ default:
+ NOUVEAU_ERR("bad dst file\n");
+ break;
+ }
+
+ return dst;
+}
+
+static INLINE int
+tgsi_mask(uint tgsi)
+{
+ int mask = 0;
+
+ if (tgsi & TGSI_WRITEMASK_X) mask |= MASK_X;
+ if (tgsi & TGSI_WRITEMASK_Y) mask |= MASK_Y;
+ if (tgsi & TGSI_WRITEMASK_Z) mask |= MASK_Z;
+ if (tgsi & TGSI_WRITEMASK_W) mask |= MASK_W;
+ return mask;
+}
+
+static boolean
+src_native_swz(struct nv40_vpc *vpc, const struct tgsi_full_src_register *fsrc,
+ struct nv40_sreg *src)
+{
+ const struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0);
+ struct nv40_sreg tgsi = tgsi_src(vpc, fsrc);
+ uint mask = 0, zero_mask = 0, one_mask = 0, neg_mask = 0;
+ uint neg[4] = { fsrc->SrcRegisterExtSwz.NegateX,
+ fsrc->SrcRegisterExtSwz.NegateY,
+ fsrc->SrcRegisterExtSwz.NegateZ,
+ fsrc->SrcRegisterExtSwz.NegateW };
+ uint c;
+
+ for (c = 0; c < 4; c++) {
+ switch (tgsi_util_get_full_src_register_extswizzle(fsrc, c)) {
+ case TGSI_EXTSWIZZLE_X:
+ case TGSI_EXTSWIZZLE_Y:
+ case TGSI_EXTSWIZZLE_Z:
+ case TGSI_EXTSWIZZLE_W:
+ mask |= tgsi_mask(1 << c);
+ break;
+ case TGSI_EXTSWIZZLE_ZERO:
+ zero_mask |= tgsi_mask(1 << c);
+ tgsi.swz[c] = SWZ_X;
+ break;
+ case TGSI_EXTSWIZZLE_ONE:
+ one_mask |= tgsi_mask(1 << c);
+ tgsi.swz[c] = SWZ_X;
+ break;
+ default:
+ assert(0);
+ }
+
+ if (!tgsi.negate && neg[c])
+ neg_mask |= tgsi_mask(1 << c);
+ }
+
+ if (mask == MASK_ALL && !neg_mask)
+ return TRUE;
+
+ *src = temp(vpc);
+
+ if (mask)
+ arith(vpc, 0, OP_MOV, *src, mask, tgsi, none, none);
+
+ if (zero_mask)
+ arith(vpc, 0, OP_SFL, *src, zero_mask, *src, none, none);
+
+ if (one_mask)
+ arith(vpc, 0, OP_STR, *src, one_mask, *src, none, none);
+
+ if (neg_mask) {
+ struct nv40_sreg one = temp(vpc);
+ arith(vpc, 0, OP_STR, one, neg_mask, one, none, none);
+ arith(vpc, 0, OP_MUL, *src, neg_mask, *src, neg(one), none);
+ }
+
+ return FALSE;
+}
+
+static boolean
+nv40_vertprog_parse_instruction(struct nv40_vpc *vpc,
+ const struct tgsi_full_instruction *finst)
+{
+ struct nv40_sreg src[3], dst, tmp;
+ struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0);
+ int mask;
+ int ai = -1, ci = -1, ii = -1;
+ int i;
+
+ if (finst->Instruction.Opcode == TGSI_OPCODE_END)
+ return TRUE;
+
+ for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
+ const struct tgsi_full_src_register *fsrc;
+
+ fsrc = &finst->FullSrcRegisters[i];
+ if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) {
+ src[i] = tgsi_src(vpc, fsrc);
+ }
+ }
+
+ for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
+ const struct tgsi_full_src_register *fsrc;
+
+ fsrc = &finst->FullSrcRegisters[i];
+
+ switch (fsrc->SrcRegister.File) {
+ case TGSI_FILE_INPUT:
+ case TGSI_FILE_CONSTANT:
+ case TGSI_FILE_TEMPORARY:
+ if (!src_native_swz(vpc, fsrc, &src[i]))
+ continue;
+ break;
+ default:
+ break;
+ }
+
+ switch (fsrc->SrcRegister.File) {
+ case TGSI_FILE_INPUT:
+ if (ai == -1 || ai == fsrc->SrcRegister.Index) {
+ ai = fsrc->SrcRegister.Index;
+ src[i] = tgsi_src(vpc, fsrc);
+ } else {
+ src[i] = temp(vpc);
+ arith(vpc, 0, OP_MOV, src[i], MASK_ALL,
+ tgsi_src(vpc, fsrc), none, none);
+ }
+ break;
+ case TGSI_FILE_CONSTANT:
+ if ((ci == -1 && ii == -1) ||
+ ci == fsrc->SrcRegister.Index) {
+ ci = fsrc->SrcRegister.Index;
+ src[i] = tgsi_src(vpc, fsrc);
+ } else {
+ src[i] = temp(vpc);
+ arith(vpc, 0, OP_MOV, src[i], MASK_ALL,
+ tgsi_src(vpc, fsrc), none, none);
+ }
+ break;
+ case TGSI_FILE_IMMEDIATE:
+ if ((ci == -1 && ii == -1) ||
+ ii == fsrc->SrcRegister.Index) {
+ ii = fsrc->SrcRegister.Index;
+ src[i] = tgsi_src(vpc, fsrc);
+ } else {
+ src[i] = temp(vpc);
+ arith(vpc, 0, OP_MOV, src[i], MASK_ALL,
+ tgsi_src(vpc, fsrc), none, none);
+ }
+ break;
+ case TGSI_FILE_TEMPORARY:
+ /* handled above */
+ break;
+ default:
+ NOUVEAU_ERR("bad src file\n");
+ return FALSE;
+ }
+ }
+
+ dst = tgsi_dst(vpc, &finst->FullDstRegisters[0]);
+ mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask);
+
+ switch (finst->Instruction.Opcode) {
+ case TGSI_OPCODE_ABS:
+ arith(vpc, 0, OP_MOV, dst, mask, abs(src[0]), none, none);
+ break;
+ case TGSI_OPCODE_ADD:
+ arith(vpc, 0, OP_ADD, dst, mask, src[0], none, src[1]);
+ break;
+ case TGSI_OPCODE_ARL:
+ arith(vpc, 0, OP_ARL, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_DP3:
+ arith(vpc, 0, OP_DP3, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_DP4:
+ arith(vpc, 0, OP_DP4, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_DPH:
+ arith(vpc, 0, OP_DPH, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_DST:
+ arith(vpc, 0, OP_DST, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_EX2:
+ arith(vpc, 1, OP_EX2, dst, mask, none, none, src[0]);
+ break;
+ case TGSI_OPCODE_EXP:
+ arith(vpc, 1, OP_EXP, dst, mask, none, none, src[0]);
+ break;
+ case TGSI_OPCODE_FLR:
+ arith(vpc, 0, OP_FLR, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_FRC:
+ arith(vpc, 0, OP_FRC, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_LG2:
+ arith(vpc, 1, OP_LG2, dst, mask, none, none, src[0]);
+ break;
+ case TGSI_OPCODE_LIT:
+ arith(vpc, 1, OP_LIT, dst, mask, none, none, src[0]);
+ break;
+ case TGSI_OPCODE_LOG:
+ arith(vpc, 1, OP_LOG, dst, mask, none, none, src[0]);
+ break;
+ case TGSI_OPCODE_MAD:
+ arith(vpc, 0, OP_MAD, dst, mask, src[0], src[1], src[2]);
+ break;
+ case TGSI_OPCODE_MAX:
+ arith(vpc, 0, OP_MAX, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_MIN:
+ arith(vpc, 0, OP_MIN, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_MOV:
+ arith(vpc, 0, OP_MOV, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_MUL:
+ arith(vpc, 0, OP_MUL, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_POW:
+ tmp = temp(vpc);
+ arith(vpc, 1, OP_LG2, tmp, MASK_X, none, none,
+ swz(src[0], X, X, X, X));
+ arith(vpc, 0, OP_MUL, tmp, MASK_X, swz(tmp, X, X, X, X),
+ swz(src[1], X, X, X, X), none);
+ arith(vpc, 1, OP_EX2, dst, mask, none, none,
+ swz(tmp, X, X, X, X));
+ break;
+ case TGSI_OPCODE_RCP:
+ arith(vpc, 1, OP_RCP, dst, mask, none, none, src[0]);
+ break;
+ case TGSI_OPCODE_RET:
+ break;
+ case TGSI_OPCODE_RSQ:
+ arith(vpc, 1, OP_RSQ, dst, mask, none, none, src[0]);
+ break;
+ case TGSI_OPCODE_SGE:
+ arith(vpc, 0, OP_SGE, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_SLT:
+ arith(vpc, 0, OP_SLT, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_SUB:
+ arith(vpc, 0, OP_ADD, dst, mask, src[0], none, neg(src[1]));
+ break;
+ case TGSI_OPCODE_XPD:
+ tmp = temp(vpc);
+ arith(vpc, 0, OP_MUL, tmp, mask,
+ swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none);
+ arith(vpc, 0, OP_MAD, dst, (mask & ~MASK_W),
+ swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y),
+ neg(tmp));
+ break;
+ default:
+ NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode);
+ return FALSE;
+ }
+
+ release_temps(vpc);
+ return TRUE;
+}
+
+static boolean
+nv40_vertprog_parse_decl_output(struct nv40_vpc *vpc,
+ const struct tgsi_full_declaration *fdec)
+{
+ unsigned idx = fdec->DeclarationRange.First;
+ int hw;
+
+ switch (fdec->Semantic.SemanticName) {
+ case TGSI_SEMANTIC_POSITION:
+ hw = NV40_VP_INST_DEST_POS;
+ vpc->hpos_idx = idx;
+ break;
+ case TGSI_SEMANTIC_COLOR:
+ if (fdec->Semantic.SemanticIndex == 0) {
+ hw = NV40_VP_INST_DEST_COL0;
+ } else
+ if (fdec->Semantic.SemanticIndex == 1) {
+ hw = NV40_VP_INST_DEST_COL1;
+ } else {
+ NOUVEAU_ERR("bad colour semantic index\n");
+ return FALSE;
+ }
+ break;
+ case TGSI_SEMANTIC_BCOLOR:
+ if (fdec->Semantic.SemanticIndex == 0) {
+ hw = NV40_VP_INST_DEST_BFC0;
+ } else
+ if (fdec->Semantic.SemanticIndex == 1) {
+ hw = NV40_VP_INST_DEST_BFC1;
+ } else {
+ NOUVEAU_ERR("bad bcolour semantic index\n");
+ return FALSE;
+ }
+ break;
+ case TGSI_SEMANTIC_FOG:
+ hw = NV40_VP_INST_DEST_FOGC;
+ break;
+ case TGSI_SEMANTIC_PSIZE:
+ hw = NV40_VP_INST_DEST_PSZ;
+ break;
+ case TGSI_SEMANTIC_GENERIC:
+ if (fdec->Semantic.SemanticIndex <= 7) {
+ hw = NV40_VP_INST_DEST_TC(fdec->Semantic.SemanticIndex);
+ } else {
+ NOUVEAU_ERR("bad generic semantic index\n");
+ return FALSE;
+ }
+ break;
+ default:
+ NOUVEAU_ERR("bad output semantic\n");
+ return FALSE;
+ }
+
+ vpc->r_result[idx] = nv40_sr(NV40SR_OUTPUT, hw);
+ return TRUE;
+}
+
+static boolean
+nv40_vertprog_prepare(struct nv40_vpc *vpc)
+{
+ struct tgsi_parse_context p;
+ int high_temp = -1, high_addr = -1, nr_imm = 0, i;
+
+ tgsi_parse_init(&p, vpc->vp->pipe.tokens);
+ while (!tgsi_parse_end_of_tokens(&p)) {
+ const union tgsi_full_token *tok = &p.FullToken;
+
+ tgsi_parse_token(&p);
+ switch(tok->Token.Type) {
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ nr_imm++;
+ break;
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ {
+ const struct tgsi_full_declaration *fdec;
+
+ fdec = &p.FullToken.FullDeclaration;
+ switch (fdec->Declaration.File) {
+ case TGSI_FILE_TEMPORARY:
+ if (fdec->DeclarationRange.Last > high_temp) {
+ high_temp =
+ fdec->DeclarationRange.Last;
+ }
+ break;
+#if 0 /* this would be nice.. except gallium doesn't track it */
+ case TGSI_FILE_ADDRESS:
+ if (fdec->DeclarationRange.Last > high_addr) {
+ high_addr =
+ fdec->DeclarationRange.Last;
+ }
+ break;
+#endif
+ case TGSI_FILE_OUTPUT:
+ if (!nv40_vertprog_parse_decl_output(vpc, fdec))
+ return FALSE;
+ break;
+ default:
+ break;
+ }
+ }
+ break;
+#if 1 /* yay, parse instructions looking for address regs instead */
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ {
+ const struct tgsi_full_instruction *finst;
+ const struct tgsi_full_dst_register *fdst;
+
+ finst = &p.FullToken.FullInstruction;
+ fdst = &finst->FullDstRegisters[0];
+
+ if (fdst->DstRegister.File == TGSI_FILE_ADDRESS) {
+ if (fdst->DstRegister.Index > high_addr)
+ high_addr = fdst->DstRegister.Index;
+ }
+
+ }
+ break;
+#endif
+ default:
+ break;
+ }
+ }
+ tgsi_parse_free(&p);
+
+ if (nr_imm) {
+ vpc->imm = CALLOC(nr_imm, sizeof(struct nv40_sreg));
+ assert(vpc->imm);
+ }
+
+ if (++high_temp) {
+ vpc->r_temp = CALLOC(high_temp, sizeof(struct nv40_sreg));
+ for (i = 0; i < high_temp; i++)
+ vpc->r_temp[i] = temp(vpc);
+ }
+
+ if (++high_addr) {
+ vpc->r_address = CALLOC(high_addr, sizeof(struct nv40_sreg));
+ for (i = 0; i < high_addr; i++)
+ vpc->r_address[i] = temp(vpc);
+ }
+
+ vpc->r_temps_discard = 0;
+ return TRUE;
+}
+
+static void
+nv40_vertprog_translate(struct nv40_context *nv40,
+ struct nv40_vertex_program *vp)
+{
+ struct tgsi_parse_context parse;
+ struct nv40_vpc *vpc = NULL;
+ struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0);
+ int i;
+
+ vpc = CALLOC(1, sizeof(struct nv40_vpc));
+ if (!vpc)
+ return;
+ vpc->vp = vp;
+
+ if (!nv40_vertprog_prepare(vpc)) {
+ FREE(vpc);
+ return;
+ }
+
+ /* Redirect post-transform vertex position to a temp if user clip
+ * planes are enabled. We need to append code the the vtxprog
+ * to handle clip planes later.
+ */
+ if (vp->ucp.nr) {
+ vpc->r_result[vpc->hpos_idx] = temp(vpc);
+ vpc->r_temps_discard = 0;
+ }
+
+ tgsi_parse_init(&parse, vp->pipe.tokens);
+
+ while (!tgsi_parse_end_of_tokens(&parse)) {
+ tgsi_parse_token(&parse);
+
+ switch (parse.FullToken.Token.Type) {
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ {
+ const struct tgsi_full_immediate *imm;
+
+ imm = &parse.FullToken.FullImmediate;
+ assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32);
+// assert(imm->Immediate.Size == 4);
+ vpc->imm[vpc->nr_imm++] =
+ constant(vpc, -1,
+ imm->u.ImmediateFloat32[0].Float,
+ imm->u.ImmediateFloat32[1].Float,
+ imm->u.ImmediateFloat32[2].Float,
+ imm->u.ImmediateFloat32[3].Float);
+ }
+ break;
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ {
+ const struct tgsi_full_instruction *finst;
+ finst = &parse.FullToken.FullInstruction;
+ if (!nv40_vertprog_parse_instruction(vpc, finst))
+ goto out_err;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+
+ /* Write out HPOS if it was redirected to a temp earlier */
+ if (vpc->r_result[vpc->hpos_idx].type != NV40SR_OUTPUT) {
+ struct nv40_sreg hpos = nv40_sr(NV40SR_OUTPUT,
+ NV40_VP_INST_DEST_POS);
+ struct nv40_sreg htmp = vpc->r_result[vpc->hpos_idx];
+
+ arith(vpc, 0, OP_MOV, hpos, MASK_ALL, htmp, none, none);
+ }
+
+ /* Insert code to handle user clip planes */
+ for (i = 0; i < vp->ucp.nr; i++) {
+ struct nv40_sreg cdst = nv40_sr(NV40SR_OUTPUT,
+ NV40_VP_INST_DEST_CLIP(i));
+ struct nv40_sreg ceqn = constant(vpc, -1,
+ nv40->clip.ucp[i][0],
+ nv40->clip.ucp[i][1],
+ nv40->clip.ucp[i][2],
+ nv40->clip.ucp[i][3]);
+ struct nv40_sreg htmp = vpc->r_result[vpc->hpos_idx];
+ unsigned mask;
+
+ switch (i) {
+ case 0: case 3: mask = MASK_Y; break;
+ case 1: case 4: mask = MASK_Z; break;
+ case 2: case 5: mask = MASK_W; break;
+ default:
+ NOUVEAU_ERR("invalid clip dist #%d\n", i);
+ goto out_err;
+ }
+
+ arith(vpc, 0, OP_DP4, cdst, mask, htmp, ceqn, none);
+ }
+
+ vp->insns[vp->nr_insns - 1].data[3] |= NV40_VP_INST_LAST;
+ vp->translated = TRUE;
+out_err:
+ tgsi_parse_free(&parse);
+ if (vpc->r_temp)
+ FREE(vpc->r_temp);
+ if (vpc->r_address)
+ FREE(vpc->r_address);
+ if (vpc->imm)
+ FREE(vpc->imm);
+ FREE(vpc);
+}
+
+static boolean
+nv40_vertprog_validate(struct nv40_context *nv40)
+{
+ struct nouveau_winsys *nvws = nv40->nvws;
+ struct pipe_winsys *ws = nv40->pipe.winsys;
+ struct nouveau_grobj *curie = nv40->screen->curie;
+ struct nv40_vertex_program *vp;
+ struct pipe_buffer *constbuf;
+ boolean upload_code = FALSE, upload_data = FALSE;
+ int i;
+
+ if (nv40->render_mode == HW) {
+ vp = nv40->vertprog;
+ constbuf = nv40->constbuf[PIPE_SHADER_VERTEX];
+
+ if ((nv40->dirty & NV40_NEW_UCP) ||
+ memcmp(&nv40->clip, &vp->ucp, sizeof(vp->ucp))) {
+ nv40_vertprog_destroy(nv40, vp);
+ memcpy(&vp->ucp, &nv40->clip, sizeof(vp->ucp));
+ }
+ } else {
+ vp = nv40->swtnl.vertprog;
+ constbuf = NULL;
+ }
+
+ /* Translate TGSI shader into hw bytecode */
+ if (vp->translated)
+ goto check_gpu_resources;
+
+ nv40->fallback_swtnl &= ~NV40_NEW_VERTPROG;
+ nv40_vertprog_translate(nv40, vp);
+ if (!vp->translated) {
+ nv40->fallback_swtnl |= NV40_NEW_VERTPROG;
+ return FALSE;
+ }
+
+check_gpu_resources:
+ /* Allocate hw vtxprog exec slots */
+ if (!vp->exec) {
+ struct nouveau_resource *heap = nv40->screen->vp_exec_heap;
+ struct nouveau_stateobj *so;
+ uint vplen = vp->nr_insns;
+
+ if (nvws->res_alloc(heap, vplen, vp, &vp->exec)) {
+ while (heap->next && heap->size < vplen) {
+ struct nv40_vertex_program *evict;
+
+ evict = heap->next->priv;
+ nvws->res_free(&evict->exec);
+ }
+
+ if (nvws->res_alloc(heap, vplen, vp, &vp->exec))
+ assert(0);
+ }
+
+ so = so_new(7, 0);
+ so_method(so, curie, NV40TCL_VP_START_FROM_ID, 1);
+ so_data (so, vp->exec->start);
+ so_method(so, curie, NV40TCL_VP_ATTRIB_EN, 2);
+ so_data (so, vp->ir);
+ so_data (so, vp->or);
+ so_method(so, curie, NV40TCL_CLIP_PLANE_ENABLE, 1);
+ so_data (so, vp->clip_ctrl);
+ so_ref(so, &vp->so);
+
+ upload_code = TRUE;
+ }
+
+ /* Allocate hw vtxprog const slots */
+ if (vp->nr_consts && !vp->data) {
+ struct nouveau_resource *heap = nv40->screen->vp_data_heap;
+
+ if (nvws->res_alloc(heap, vp->nr_consts, vp, &vp->data)) {
+ while (heap->next && heap->size < vp->nr_consts) {
+ struct nv40_vertex_program *evict;
+
+ evict = heap->next->priv;
+ nvws->res_free(&evict->data);
+ }
+
+ if (nvws->res_alloc(heap, vp->nr_consts, vp, &vp->data))
+ assert(0);
+ }
+
+ /*XXX: handle this some day */
+ assert(vp->data->start >= vp->data_start_min);
+
+ upload_data = TRUE;
+ if (vp->data_start != vp->data->start)
+ upload_code = TRUE;
+ }
+
+ /* If exec or data segments moved we need to patch the program to
+ * fixup offsets and register IDs.
+ */
+ if (vp->exec_start != vp->exec->start) {
+ for (i = 0; i < vp->nr_insns; i++) {
+ struct nv40_vertex_program_exec *vpi = &vp->insns[i];
+
+ if (vpi->has_branch_offset) {
+ assert(0);
+ }
+ }
+
+ vp->exec_start = vp->exec->start;
+ }
+
+ if (vp->nr_consts && vp->data_start != vp->data->start) {
+ for (i = 0; i < vp->nr_insns; i++) {
+ struct nv40_vertex_program_exec *vpi = &vp->insns[i];
+
+ if (vpi->const_index >= 0) {
+ vpi->data[1] &= ~NV40_VP_INST_CONST_SRC_MASK;
+ vpi->data[1] |=
+ (vpi->const_index + vp->data->start) <<
+ NV40_VP_INST_CONST_SRC_SHIFT;
+
+ }
+ }
+
+ vp->data_start = vp->data->start;
+ }
+
+ /* Update + Upload constant values */
+ if (vp->nr_consts) {
+ float *map = NULL;
+
+ if (constbuf) {
+ map = ws->buffer_map(ws, constbuf,
+ PIPE_BUFFER_USAGE_CPU_READ);
+ }
+
+ for (i = 0; i < vp->nr_consts; i++) {
+ struct nv40_vertex_program_data *vpd = &vp->consts[i];
+
+ if (vpd->index >= 0) {
+ if (!upload_data &&
+ !memcmp(vpd->value, &map[vpd->index * 4],
+ 4 * sizeof(float)))
+ continue;
+ memcpy(vpd->value, &map[vpd->index * 4],
+ 4 * sizeof(float));
+ }
+
+ BEGIN_RING(curie, NV40TCL_VP_UPLOAD_CONST_ID, 5);
+ OUT_RING (i + vp->data->start);
+ OUT_RINGp ((uint32_t *)vpd->value, 4);
+ }
+
+ if (constbuf)
+ ws->buffer_unmap(ws, constbuf);
+ }
+
+ /* Upload vtxprog */
+ if (upload_code) {
+#if 0
+ for (i = 0; i < vp->nr_insns; i++) {
+ NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[0]);
+ NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[1]);
+ NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[2]);
+ NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[3]);
+ }
+#endif
+ BEGIN_RING(curie, NV40TCL_VP_UPLOAD_FROM_ID, 1);
+ OUT_RING (vp->exec->start);
+ for (i = 0; i < vp->nr_insns; i++) {
+ BEGIN_RING(curie, NV40TCL_VP_UPLOAD_INST(0), 4);
+ OUT_RINGp (vp->insns[i].data, 4);
+ }
+ }
+
+ if (vp->so != nv40->state.hw[NV40_STATE_VERTPROG]) {
+ so_ref(vp->so, &nv40->state.hw[NV40_STATE_VERTPROG]);
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+void
+nv40_vertprog_destroy(struct nv40_context *nv40, struct nv40_vertex_program *vp)
+{
+ struct nouveau_winsys *nvws = nv40->screen->nvws;
+
+ vp->translated = FALSE;
+
+ if (vp->nr_insns) {
+ FREE(vp->insns);
+ vp->insns = NULL;
+ vp->nr_insns = 0;
+ }
+
+ if (vp->nr_consts) {
+ FREE(vp->consts);
+ vp->consts = NULL;
+ vp->nr_consts = 0;
+ }
+
+ nvws->res_free(&vp->exec);
+ vp->exec_start = 0;
+ nvws->res_free(&vp->data);
+ vp->data_start = 0;
+ vp->data_start_min = 0;
+
+ vp->ir = vp->or = vp->clip_ctrl = 0;
+ so_ref(NULL, &vp->so);
+}
+
+struct nv40_state_entry nv40_state_vertprog = {
+ .validate = nv40_vertprog_validate,
+ .dirty = {
+ .pipe = NV40_NEW_VERTPROG | NV40_NEW_UCP,
+ .hw = NV40_STATE_VERTPROG,
+ }
+};
+
diff --git a/src/gallium/drivers/nv50/Makefile b/src/gallium/drivers/nv50/Makefile
new file mode 100644
index 00000000000..be30400c039
--- /dev/null
+++ b/src/gallium/drivers/nv50/Makefile
@@ -0,0 +1,29 @@
+TOP = ../../../..
+include $(TOP)/configs/current
+
+LIBNAME = nv50
+
+DRIVER_SOURCES = \
+ nv50_clear.c \
+ nv50_context.c \
+ nv50_draw.c \
+ nv50_miptree.c \
+ nv50_query.c \
+ nv50_program.c \
+ nv50_screen.c \
+ nv50_state.c \
+ nv50_state_validate.c \
+ nv50_surface.c \
+ nv50_tex.c \
+ nv50_vbo.c
+
+C_SOURCES = \
+ $(COMMON_SOURCES) \
+ $(DRIVER_SOURCES)
+
+ASM_SOURCES =
+
+include ../../Makefile.template
+
+symlinks:
+
diff --git a/src/gallium/drivers/nv50/nv50_clear.c b/src/gallium/drivers/nv50/nv50_clear.c
new file mode 100644
index 00000000000..a31a42d6b54
--- /dev/null
+++ b/src/gallium/drivers/nv50/nv50_clear.c
@@ -0,0 +1,90 @@
+/*
+ * Copyright 2008 Ben Skeggs
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+
+#include "nv50_context.h"
+
+void
+nv50_clear(struct pipe_context *pipe, struct pipe_surface *ps,
+ unsigned clearValue)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+ struct pipe_framebuffer_state fb, s_fb = nv50->framebuffer;
+ struct pipe_scissor_state sc, s_sc = nv50->scissor;
+ unsigned dirty = nv50->dirty;
+
+ nv50->dirty = 0;
+
+ if (ps->format == PIPE_FORMAT_Z24S8_UNORM ||
+ ps->format == PIPE_FORMAT_Z16_UNORM) {
+ fb.num_cbufs = 0;
+ fb.zsbuf = ps;
+ } else {
+ fb.num_cbufs = 1;
+ fb.cbufs[0] = ps;
+ fb.zsbuf = NULL;
+ }
+ fb.width = ps->width;
+ fb.height = ps->height;
+ pipe->set_framebuffer_state(pipe, &fb);
+
+ sc.minx = sc.miny = 0;
+ sc.maxx = fb.width;
+ sc.maxy = fb.height;
+ pipe->set_scissor_state(pipe, &sc);
+
+ nv50_state_validate(nv50);
+
+ switch (ps->format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ BEGIN_RING(tesla, 0x0d80, 4);
+ OUT_RINGf (ubyte_to_float((clearValue >> 16) & 0xff));
+ OUT_RINGf (ubyte_to_float((clearValue >> 8) & 0xff));
+ OUT_RINGf (ubyte_to_float((clearValue >> 0) & 0xff));
+ OUT_RINGf (ubyte_to_float((clearValue >> 24) & 0xff));
+ BEGIN_RING(tesla, 0x19d0, 1);
+ OUT_RING (0x3c);
+ break;
+ case PIPE_FORMAT_Z24S8_UNORM:
+ BEGIN_RING(tesla, 0x0d90, 1);
+ OUT_RINGf ((float)(clearValue >> 8) * (1.0 / 16777215.0));
+ BEGIN_RING(tesla, 0x0da0, 1);
+ OUT_RING (clearValue & 0xff);
+ BEGIN_RING(tesla, 0x19d0, 1);
+ OUT_RING (0x03);
+ break;
+ default:
+ pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height,
+ clearValue);
+ break;
+ }
+
+ pipe->set_framebuffer_state(pipe, &s_fb);
+ pipe->set_scissor_state(pipe, &s_sc);
+ nv50->dirty |= dirty;
+
+ ps->status = PIPE_SURFACE_STATUS_CLEAR;
+}
+
diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c
new file mode 100644
index 00000000000..b02c53f2095
--- /dev/null
+++ b/src/gallium/drivers/nv50/nv50_context.c
@@ -0,0 +1,90 @@
+/*
+ * Copyright 2008 Ben Skeggs
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "draw/draw_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_winsys.h"
+
+#include "nv50_context.h"
+#include "nv50_screen.h"
+
+static void
+nv50_flush(struct pipe_context *pipe, unsigned flags,
+ struct pipe_fence_handle **fence)
+{
+ struct nv50_context *nv50 = (struct nv50_context *)pipe;
+
+ FIRE_RING(fence);
+}
+
+static void
+nv50_destroy(struct pipe_context *pipe)
+{
+ struct nv50_context *nv50 = (struct nv50_context *)pipe;
+
+ draw_destroy(nv50->draw);
+ FREE(nv50);
+}
+
+
+static void
+nv50_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield)
+{
+}
+
+struct pipe_context *
+nv50_create(struct pipe_screen *pscreen, unsigned pctx_id)
+{
+ struct pipe_winsys *pipe_winsys = pscreen->winsys;
+ struct nv50_screen *screen = nv50_screen(pscreen);
+ struct nv50_context *nv50;
+
+ nv50 = CALLOC_STRUCT(nv50_context);
+ if (!nv50)
+ return NULL;
+ nv50->screen = screen;
+ nv50->pctx_id = pctx_id;
+
+ nv50->pipe.winsys = pipe_winsys;
+ nv50->pipe.screen = pscreen;
+
+ nv50->pipe.destroy = nv50_destroy;
+
+ nv50->pipe.set_edgeflags = nv50_set_edgeflags;
+ nv50->pipe.draw_arrays = nv50_draw_arrays;
+ nv50->pipe.draw_elements = nv50_draw_elements;
+ nv50->pipe.clear = nv50_clear;
+
+ nv50->pipe.flush = nv50_flush;
+
+ nv50_init_surface_functions(nv50);
+ nv50_init_state_functions(nv50);
+ nv50_init_query_functions(nv50);
+
+ nv50->draw = draw_create();
+ assert(nv50->draw);
+ draw_set_rasterize_stage(nv50->draw, nv50_draw_render_stage(nv50));
+
+ return &nv50->pipe;
+}
+
+
diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h
new file mode 100644
index 00000000000..5d377f2d067
--- /dev/null
+++ b/src/gallium/drivers/nv50/nv50_context.h
@@ -0,0 +1,185 @@
+#ifndef __NV50_CONTEXT_H__
+#define __NV50_CONTEXT_H__
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+#include "pipe/p_compiler.h"
+
+#include "util/u_memory.h"
+#include "util/u_math.h"
+
+#include "draw/draw_vertex.h"
+
+#include "nouveau/nouveau_winsys.h"
+#include "nouveau/nouveau_gldefs.h"
+#include "nouveau/nouveau_stateobj.h"
+
+#define NOUVEAU_PUSH_CONTEXT(ctx) \
+ struct nv50_screen *ctx = nv50->screen
+#include "nouveau/nouveau_push.h"
+
+#include "nv50_screen.h"
+#include "nv50_program.h"
+
+#define NOUVEAU_ERR(fmt, args...) \
+ fprintf(stderr, "%s:%d - "fmt, __func__, __LINE__, ##args);
+#define NOUVEAU_MSG(fmt, args...) \
+ fprintf(stderr, "nouveau: "fmt, ##args);
+
+/* Constant buffer assignment */
+#define NV50_CB_PMISC 0
+#define NV50_CB_PVP 1
+#define NV50_CB_PFP 2
+#define NV50_CB_PGP 3
+#define NV50_CB_TIC 4
+#define NV50_CB_TSC 5
+#define NV50_CB_PUPLOAD 6
+
+#define NV50_NEW_BLEND (1 << 0)
+#define NV50_NEW_ZSA (1 << 1)
+#define NV50_NEW_BLEND_COLOUR (1 << 2)
+#define NV50_NEW_STIPPLE (1 << 3)
+#define NV50_NEW_SCISSOR (1 << 4)
+#define NV50_NEW_VIEWPORT (1 << 5)
+#define NV50_NEW_RASTERIZER (1 << 6)
+#define NV50_NEW_FRAMEBUFFER (1 << 7)
+#define NV50_NEW_VERTPROG (1 << 8)
+#define NV50_NEW_VERTPROG_CB (1 << 9)
+#define NV50_NEW_FRAGPROG (1 << 10)
+#define NV50_NEW_FRAGPROG_CB (1 << 11)
+#define NV50_NEW_ARRAYS (1 << 12)
+#define NV50_NEW_SAMPLER (1 << 13)
+#define NV50_NEW_TEXTURE (1 << 14)
+
+struct nv50_blend_stateobj {
+ struct pipe_blend_state pipe;
+ struct nouveau_stateobj *so;
+};
+
+struct nv50_zsa_stateobj {
+ struct pipe_depth_stencil_alpha_state pipe;
+ struct nouveau_stateobj *so;
+};
+
+struct nv50_rasterizer_stateobj {
+ struct pipe_rasterizer_state pipe;
+ struct nouveau_stateobj *so;
+};
+
+struct nv50_miptree {
+ struct pipe_texture base;
+ struct pipe_buffer *buffer;
+};
+
+static INLINE struct nv50_miptree *
+nv50_miptree(struct pipe_texture *pt)
+{
+ return (struct nv50_miptree *)pt;
+}
+
+struct nv50_surface {
+ struct pipe_surface base;
+ struct pipe_buffer *untiled;
+};
+
+static INLINE struct nv50_surface *
+nv50_surface(struct pipe_surface *pt)
+{
+ return (struct nv50_surface *)pt;
+}
+
+struct nv50_state {
+ unsigned dirty;
+
+ struct nouveau_stateobj *fb;
+ struct nouveau_stateobj *blend;
+ struct nouveau_stateobj *blend_colour;
+ struct nouveau_stateobj *zsa;
+ struct nouveau_stateobj *rast;
+ struct nouveau_stateobj *stipple;
+ struct nouveau_stateobj *scissor;
+ unsigned scissor_enabled;
+ struct nouveau_stateobj *viewport;
+ unsigned viewport_bypass;
+ struct nouveau_stateobj *tsc_upload;
+ struct nouveau_stateobj *tic_upload;
+ struct nouveau_stateobj *vertprog;
+ struct nouveau_stateobj *fragprog;
+ struct nouveau_stateobj *vtxfmt;
+ struct nouveau_stateobj *vtxbuf;
+};
+
+struct nv50_context {
+ struct pipe_context pipe;
+
+ struct nv50_screen *screen;
+ unsigned pctx_id;
+
+ struct draw_context *draw;
+
+ struct nv50_state state;
+
+ unsigned dirty;
+ struct nv50_blend_stateobj *blend;
+ struct nv50_zsa_stateobj *zsa;
+ struct nv50_rasterizer_stateobj *rasterizer;
+ struct pipe_blend_color blend_colour;
+ struct pipe_poly_stipple stipple;
+ struct pipe_scissor_state scissor;
+ struct pipe_viewport_state viewport;
+ struct pipe_framebuffer_state framebuffer;
+ struct nv50_program *vertprog;
+ struct nv50_program *fragprog;
+ struct pipe_buffer *constbuf[PIPE_SHADER_TYPES];
+ struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS];
+ unsigned vtxbuf_nr;
+ struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS];
+ unsigned vtxelt_nr;
+ unsigned *sampler[PIPE_MAX_SAMPLERS];
+ unsigned sampler_nr;
+ struct nv50_miptree *miptree[PIPE_MAX_SAMPLERS];
+ unsigned miptree_nr;
+};
+
+static INLINE struct nv50_context *
+nv50_context(struct pipe_context *pipe)
+{
+ return (struct nv50_context *)pipe;
+}
+
+extern void nv50_init_surface_functions(struct nv50_context *nv50);
+extern void nv50_init_state_functions(struct nv50_context *nv50);
+extern void nv50_init_query_functions(struct nv50_context *nv50);
+
+extern void nv50_screen_init_miptree_functions(struct pipe_screen *pscreen);
+
+/* nv50_draw.c */
+extern struct draw_stage *nv50_draw_render_stage(struct nv50_context *nv50);
+
+/* nv50_vbo.c */
+extern boolean nv50_draw_arrays(struct pipe_context *, unsigned mode,
+ unsigned start, unsigned count);
+extern boolean nv50_draw_elements(struct pipe_context *pipe,
+ struct pipe_buffer *indexBuffer,
+ unsigned indexSize,
+ unsigned mode, unsigned start,
+ unsigned count);
+extern void nv50_vbo_validate(struct nv50_context *nv50);
+
+/* nv50_clear.c */
+extern void nv50_clear(struct pipe_context *pipe, struct pipe_surface *ps,
+ unsigned clearValue);
+
+/* nv50_program.c */
+extern void nv50_vertprog_validate(struct nv50_context *nv50);
+extern void nv50_fragprog_validate(struct nv50_context *nv50);
+extern void nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p);
+
+/* nv50_state_validate.c */
+extern boolean nv50_state_validate(struct nv50_context *nv50);
+
+/* nv50_tex.c */
+extern void nv50_tex_validate(struct nv50_context *);
+
+#endif
diff --git a/src/gallium/drivers/nv50/nv50_draw.c b/src/gallium/drivers/nv50/nv50_draw.c
new file mode 100644
index 00000000000..2f6f607261e
--- /dev/null
+++ b/src/gallium/drivers/nv50/nv50_draw.c
@@ -0,0 +1,89 @@
+/*
+ * Copyright 2008 Ben Skeggs
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "draw/draw_pipe.h"
+
+#include "nv50_context.h"
+
+struct nv50_render_stage {
+ struct draw_stage stage;
+ struct nv50_context *nv50;
+};
+
+static INLINE struct nv50_render_stage *
+nv50_render_stage(struct draw_stage *stage)
+{
+ return (struct nv50_render_stage *)stage;
+}
+
+static void
+nv50_render_point(struct draw_stage *stage, struct prim_header *prim)
+{
+ NOUVEAU_ERR("\n");
+}
+
+static void
+nv50_render_line(struct draw_stage *stage, struct prim_header *prim)
+{
+ NOUVEAU_ERR("\n");
+}
+
+static void
+nv50_render_tri(struct draw_stage *stage, struct prim_header *prim)
+{
+ NOUVEAU_ERR("\n");
+}
+
+static void
+nv50_render_flush(struct draw_stage *stage, unsigned flags)
+{
+}
+
+static void
+nv50_render_reset_stipple_counter(struct draw_stage *stage)
+{
+ NOUVEAU_ERR("\n");
+}
+
+static void
+nv50_render_destroy(struct draw_stage *stage)
+{
+ FREE(stage);
+}
+
+struct draw_stage *
+nv50_draw_render_stage(struct nv50_context *nv50)
+{
+ struct nv50_render_stage *rs = CALLOC_STRUCT(nv50_render_stage);
+
+ rs->nv50 = nv50;
+ rs->stage.draw = nv50->draw;
+ rs->stage.destroy = nv50_render_destroy;
+ rs->stage.point = nv50_render_point;
+ rs->stage.line = nv50_render_line;
+ rs->stage.tri = nv50_render_tri;
+ rs->stage.flush = nv50_render_flush;
+ rs->stage.reset_stipple_counter = nv50_render_reset_stipple_counter;
+
+ return &rs->stage;
+}
+
diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c
new file mode 100644
index 00000000000..28a8bdc0fab
--- /dev/null
+++ b/src/gallium/drivers/nv50/nv50_miptree.c
@@ -0,0 +1,135 @@
+/*
+ * Copyright 2008 Ben Skeggs
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "pipe/p_state.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_inlines.h"
+
+#include "nv50_context.h"
+
+static struct pipe_texture *
+nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt)
+{
+ struct pipe_winsys *ws = pscreen->winsys;
+ struct nv50_miptree *mt = CALLOC_STRUCT(nv50_miptree);
+ unsigned usage, pitch;
+
+ mt->base = *pt;
+ mt->base.refcount = 1;
+ mt->base.screen = pscreen;
+
+ usage = PIPE_BUFFER_USAGE_PIXEL;
+ switch (pt->format) {
+ case PIPE_FORMAT_Z24S8_UNORM:
+ case PIPE_FORMAT_Z16_UNORM:
+ usage |= NOUVEAU_BUFFER_USAGE_ZETA;
+ break;
+ default:
+ break;
+ }
+
+ pitch = ((pt->width[0] + 63) & ~63) * pt->block.size;
+ /*XXX*/
+ pitch *= 2;
+
+ mt->buffer = ws->buffer_create(ws, 256, usage, pitch * pt->height[0]);
+ if (!mt->buffer) {
+ FREE(mt);
+ return NULL;
+ }
+
+ return &mt->base;
+}
+
+static void
+nv50_miptree_release(struct pipe_screen *pscreen, struct pipe_texture **ppt)
+{
+ struct pipe_texture *pt = *ppt;
+
+ *ppt = NULL;
+
+ if (--pt->refcount <= 0) {
+ struct nv50_miptree *mt = nv50_miptree(pt);
+
+ pipe_buffer_reference(pscreen, &mt->buffer, NULL);
+ FREE(mt);
+ }
+}
+
+static struct pipe_surface *
+nv50_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
+ unsigned face, unsigned level, unsigned zslice,
+ unsigned flags)
+{
+ struct nv50_miptree *mt = nv50_miptree(pt);
+ struct nv50_surface *s;
+ struct pipe_surface *ps;
+
+ s = CALLOC_STRUCT(nv50_surface);
+ if (!s)
+ return NULL;
+ ps = &s->base;
+
+ ps->refcount = 1;
+ ps->winsys = pscreen->winsys;
+ ps->format = pt->format;
+ ps->width = pt->width[level];
+ ps->height = pt->height[level];
+ ps->block = pt->block;
+ ps->nblocksx = pt->nblocksx[level];
+ ps->nblocksy = pt->nblocksy[level];
+ ps->stride = ps->width * ps->block.size;
+ ps->offset = 0;
+ ps->usage = flags;
+ ps->status = PIPE_SURFACE_STATUS_DEFINED;
+
+ pipe_texture_reference(&ps->texture, pt);
+ pipe_buffer_reference(pscreen, &ps->buffer, mt->buffer);
+
+ return ps;
+}
+
+static void
+nv50_miptree_surface_del(struct pipe_screen *pscreen,
+ struct pipe_surface **psurface)
+{
+ struct pipe_surface *ps = *psurface;
+ struct nv50_surface *s = nv50_surface(ps);
+
+ *psurface = NULL;
+
+ if (--ps->refcount <= 0) {
+ pipe_texture_reference(&ps->texture, NULL);
+ pipe_buffer_reference(pscreen, &ps->buffer, NULL);
+ FREE(s);
+ }
+}
+
+void
+nv50_screen_init_miptree_functions(struct pipe_screen *pscreen)
+{
+ pscreen->texture_create = nv50_miptree_create;
+ pscreen->texture_release = nv50_miptree_release;
+ pscreen->get_tex_surface = nv50_miptree_surface_new;
+ pscreen->tex_surface_release = nv50_miptree_surface_del;
+}
+
diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
new file mode 100644
index 00000000000..d6fbdd18243
--- /dev/null
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -0,0 +1,1708 @@
+/*
+ * Copyright 2008 Ben Skeggs
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+#include "pipe/p_inlines.h"
+
+#include "pipe/p_shader_tokens.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_util.h"
+
+#include "nv50_context.h"
+
+#define NV50_SU_MAX_TEMP 64
+#define NV50_PROGRAM_DUMP
+
+/* ARL - gallium craps itself on progs/vp/arl.txt
+ *
+ * MSB - Like MAD, but MUL+SUB
+ * - Fuck it off, introduce a way to negate args for ops that
+ * support it.
+ *
+ * Look into inlining IMMD for ops other than MOV (make it general?)
+ * - Maybe even relax restrictions a bit, can't do P_RESULT + P_IMMD,
+ * but can emit to P_TEMP first - then MOV later. NVIDIA does this
+ *
+ * In ops such as ADD it's possible to construct a bad opcode in the !is_long()
+ * case, if the emit_src() causes the inst to suddenly become long.
+ *
+ * Verify half-insns work where expected - and force disable them where they
+ * don't work - MUL has it forcibly disabled atm as it fixes POW..
+ *
+ * FUCK! watch dst==src vectors, can overwrite components that are needed.
+ * ie. SUB R0, R0.yzxw, R0
+ *
+ * Things to check with renouveau:
+ * FP attr/result assignment - how?
+ * attrib
+ * - 0x16bc maps vp output onto fp hpos
+ * - 0x16c0 maps vp output onto fp col0
+ * result
+ * - colr always 0-3
+ * - depr always 4
+ * 0x16bc->0x16e8 --> some binding between vp/fp regs
+ * 0x16b8 --> VP output count
+ *
+ * 0x1298 --> "MOV rcol.x, fcol.y" "MOV depr, fcol.y" = 0x00000005
+ * "MOV rcol.x, fcol.y" = 0x00000004
+ * 0x19a8 --> as above but 0x00000100 and 0x00000000
+ * - 0x00100000 used when KIL used
+ * 0x196c --> as above but 0x00000011 and 0x00000000
+ *
+ * 0x1988 --> 0xXXNNNNNN
+ * - XX == FP high something
+ */
+struct nv50_reg {
+ enum {
+ P_TEMP,
+ P_ATTR,
+ P_RESULT,
+ P_CONST,
+ P_IMMD
+ } type;
+ int index;
+
+ int hw;
+ int neg;
+};
+
+struct nv50_pc {
+ struct nv50_program *p;
+
+ /* hw resources */
+ struct nv50_reg *r_temp[NV50_SU_MAX_TEMP];
+
+ /* tgsi resources */
+ struct nv50_reg *temp;
+ int temp_nr;
+ struct nv50_reg *attr;
+ int attr_nr;
+ struct nv50_reg *result;
+ int result_nr;
+ struct nv50_reg *param;
+ int param_nr;
+ struct nv50_reg *immd;
+ float *immd_buf;
+ int immd_nr;
+
+ struct nv50_reg *temp_temp[16];
+ unsigned temp_temp_nr;
+};
+
+static void
+alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg)
+{
+ int i;
+
+ if (reg->type == P_RESULT) {
+ if (pc->p->cfg.high_result < (reg->hw + 1))
+ pc->p->cfg.high_result = reg->hw + 1;
+ }
+
+ if (reg->type != P_TEMP)
+ return;
+
+ if (reg->hw >= 0) {
+ /*XXX: do this here too to catch FP temp-as-attr usage..
+ * not clean, but works */
+ if (pc->p->cfg.high_temp < (reg->hw + 1))
+ pc->p->cfg.high_temp = reg->hw + 1;
+ return;
+ }
+
+ for (i = 0; i < NV50_SU_MAX_TEMP; i++) {
+ if (!(pc->r_temp[i])) {
+ pc->r_temp[i] = reg;
+ reg->hw = i;
+ if (pc->p->cfg.high_temp < (i + 1))
+ pc->p->cfg.high_temp = i + 1;
+ return;
+ }
+ }
+
+ assert(0);
+}
+
+static struct nv50_reg *
+alloc_temp(struct nv50_pc *pc, struct nv50_reg *dst)
+{
+ struct nv50_reg *r;
+ int i;
+
+ if (dst && dst->type == P_TEMP && dst->hw == -1)
+ return dst;
+
+ for (i = 0; i < NV50_SU_MAX_TEMP; i++) {
+ if (!pc->r_temp[i]) {
+ r = CALLOC_STRUCT(nv50_reg);
+ r->type = P_TEMP;
+ r->index = -1;
+ r->hw = i;
+ pc->r_temp[i] = r;
+ return r;
+ }
+ }
+
+ assert(0);
+ return NULL;
+}
+
+static void
+free_temp(struct nv50_pc *pc, struct nv50_reg *r)
+{
+ if (r->index == -1) {
+ unsigned hw = r->hw;
+
+ FREE(pc->r_temp[hw]);
+ pc->r_temp[hw] = NULL;
+ }
+}
+
+static struct nv50_reg *
+temp_temp(struct nv50_pc *pc)
+{
+ if (pc->temp_temp_nr >= 16)
+ assert(0);
+
+ pc->temp_temp[pc->temp_temp_nr] = alloc_temp(pc, NULL);
+ return pc->temp_temp[pc->temp_temp_nr++];
+}
+
+static void
+kill_temp_temp(struct nv50_pc *pc)
+{
+ int i;
+
+ for (i = 0; i < pc->temp_temp_nr; i++)
+ free_temp(pc, pc->temp_temp[i]);
+ pc->temp_temp_nr = 0;
+}
+
+static int
+ctor_immd(struct nv50_pc *pc, float x, float y, float z, float w)
+{
+ pc->immd_buf = REALLOC(pc->immd_buf, (pc->immd_nr * r * sizeof(float)),
+ (pc->immd_nr + 1) * 4 * sizeof(float));
+ pc->immd_buf[(pc->immd_nr * 4) + 0] = x;
+ pc->immd_buf[(pc->immd_nr * 4) + 1] = y;
+ pc->immd_buf[(pc->immd_nr * 4) + 2] = z;
+ pc->immd_buf[(pc->immd_nr * 4) + 3] = w;
+
+ return pc->immd_nr++;
+}
+
+static struct nv50_reg *
+alloc_immd(struct nv50_pc *pc, float f)
+{
+ struct nv50_reg *r = CALLOC_STRUCT(nv50_reg);
+ unsigned hw;
+
+ hw = ctor_immd(pc, f, 0, 0, 0) * 4;
+ r->type = P_IMMD;
+ r->hw = hw;
+ r->index = -1;
+ return r;
+}
+
+static struct nv50_program_exec *
+exec(struct nv50_pc *pc)
+{
+ struct nv50_program_exec *e = CALLOC_STRUCT(nv50_program_exec);
+
+ e->param.index = -1;
+ return e;
+}
+
+static void
+emit(struct nv50_pc *pc, struct nv50_program_exec *e)
+{
+ struct nv50_program *p = pc->p;
+
+ if (p->exec_tail)
+ p->exec_tail->next = e;
+ if (!p->exec_head)
+ p->exec_head = e;
+ p->exec_tail = e;
+ p->exec_size += (e->inst[0] & 1) ? 2 : 1;
+}
+
+static INLINE void set_long(struct nv50_pc *, struct nv50_program_exec *);
+
+static boolean
+is_long(struct nv50_program_exec *e)
+{
+ if (e->inst[0] & 1)
+ return TRUE;
+ return FALSE;
+}
+
+static boolean
+is_immd(struct nv50_program_exec *e)
+{
+ if (is_long(e) && (e->inst[1] & 3) == 3)
+ return TRUE;
+ return FALSE;
+}
+
+static INLINE void
+set_pred(struct nv50_pc *pc, unsigned pred, unsigned idx,
+ struct nv50_program_exec *e)
+{
+ set_long(pc, e);
+ e->inst[1] &= ~((0x1f << 7) | (0x3 << 12));
+ e->inst[1] |= (pred << 7) | (idx << 12);
+}
+
+static INLINE void
+set_pred_wr(struct nv50_pc *pc, unsigned on, unsigned idx,
+ struct nv50_program_exec *e)
+{
+ set_long(pc, e);
+ e->inst[1] &= ~((0x3 << 4) | (1 << 6));
+ e->inst[1] |= (idx << 4) | (on << 6);
+}
+
+static INLINE void
+set_long(struct nv50_pc *pc, struct nv50_program_exec *e)
+{
+ if (is_long(e))
+ return;
+
+ e->inst[0] |= 1;
+ set_pred(pc, 0xf, 0, e);
+ set_pred_wr(pc, 0, 0, e);
+}
+
+static INLINE void
+set_dst(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_program_exec *e)
+{
+ if (dst->type == P_RESULT) {
+ set_long(pc, e);
+ e->inst[1] |= 0x00000008;
+ }
+
+ alloc_reg(pc, dst);
+ e->inst[0] |= (dst->hw << 2);
+}
+
+static INLINE void
+set_immd(struct nv50_pc *pc, struct nv50_reg *imm, struct nv50_program_exec *e)
+{
+ unsigned val = fui(pc->immd_buf[imm->hw]); /* XXX */
+
+ set_long(pc, e);
+ /*XXX: can't be predicated - bits overlap.. catch cases where both
+ * are required and avoid them. */
+ set_pred(pc, 0, 0, e);
+ set_pred_wr(pc, 0, 0, e);
+
+ e->inst[1] |= 0x00000002 | 0x00000001;
+ e->inst[0] |= (val & 0x3f) << 16;
+ e->inst[1] |= (val >> 6) << 2;
+}
+
+static void
+emit_interp(struct nv50_pc *pc, struct nv50_reg *dst,
+ struct nv50_reg *src, struct nv50_reg *iv)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ e->inst[0] |= 0x80000000;
+ set_dst(pc, dst, e);
+ alloc_reg(pc, src);
+ e->inst[0] |= (src->hw << 16);
+ if (iv) {
+ e->inst[0] |= (1 << 25);
+ alloc_reg(pc, iv);
+ e->inst[0] |= (iv->hw << 9);
+ }
+
+ emit(pc, e);
+}
+
+static void
+set_data(struct nv50_pc *pc, struct nv50_reg *src, unsigned m, unsigned s,
+ struct nv50_program_exec *e)
+{
+ set_long(pc, e);
+#if 1
+ e->inst[1] |= (1 << 22);
+#else
+ if (src->type == P_IMMD) {
+ e->inst[1] |= (NV50_CB_PMISC << 22);
+ } else {
+ if (pc->p->type == PIPE_SHADER_VERTEX)
+ e->inst[1] |= (NV50_CB_PVP << 22);
+ else
+ e->inst[1] |= (NV50_CB_PFP << 22);
+ }
+#endif
+
+ e->param.index = src->hw;
+ e->param.shift = s;
+ e->param.mask = m << (s % 32);
+}
+
+static void
+emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ e->inst[0] |= 0x10000000;
+
+ set_dst(pc, dst, e);
+
+ if (0 && dst->type != P_RESULT && src->type == P_IMMD) {
+ set_immd(pc, src, e);
+ /*XXX: 32-bit, but steals part of "half" reg space - need to
+ * catch and handle this case if/when we do half-regs
+ */
+ e->inst[0] |= 0x00008000;
+ } else
+ if (src->type == P_IMMD || src->type == P_CONST) {
+ set_long(pc, e);
+ set_data(pc, src, 0x7f, 9, e);
+ e->inst[1] |= 0x20000000; /* src0 const? */
+ } else {
+ if (src->type == P_ATTR) {
+ set_long(pc, e);
+ e->inst[1] |= 0x00200000;
+ }
+
+ alloc_reg(pc, src);
+ e->inst[0] |= (src->hw << 9);
+ }
+
+ /* We really should support "half" instructions here at some point,
+ * but I don't feel confident enough about them yet.
+ */
+ set_long(pc, e);
+ if (is_long(e) && !is_immd(e)) {
+ e->inst[1] |= 0x04000000; /* 32-bit */
+ e->inst[1] |= 0x0003c000; /* "subsubop" 0xf == mov */
+ }
+
+ emit(pc, e);
+}
+
+static boolean
+check_swap_src_0_1(struct nv50_pc *pc,
+ struct nv50_reg **s0, struct nv50_reg **s1)
+{
+ struct nv50_reg *src0 = *s0, *src1 = *s1;
+
+ if (src0->type == P_CONST) {
+ if (src1->type != P_CONST) {
+ *s0 = src1;
+ *s1 = src0;
+ return TRUE;
+ }
+ } else
+ if (src1->type == P_ATTR) {
+ if (src0->type != P_ATTR) {
+ *s0 = src1;
+ *s1 = src0;
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
+static void
+set_src_0(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e)
+{
+ if (src->type == P_ATTR) {
+ set_long(pc, e);
+ e->inst[1] |= 0x00200000;
+ } else
+ if (src->type == P_CONST || src->type == P_IMMD) {
+ struct nv50_reg *temp = temp_temp(pc);
+
+ emit_mov(pc, temp, src);
+ src = temp;
+ }
+
+ alloc_reg(pc, src);
+ e->inst[0] |= (src->hw << 9);
+}
+
+static void
+set_src_1(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e)
+{
+ if (src->type == P_ATTR) {
+ struct nv50_reg *temp = temp_temp(pc);
+
+ emit_mov(pc, temp, src);
+ src = temp;
+ } else
+ if (src->type == P_CONST || src->type == P_IMMD) {
+ assert(!(e->inst[0] & 0x00800000));
+ if (e->inst[0] & 0x01000000) {
+ struct nv50_reg *temp = temp_temp(pc);
+
+ emit_mov(pc, temp, src);
+ src = temp;
+ } else {
+ set_data(pc, src, 0x7f, 16, e);
+ e->inst[0] |= 0x00800000;
+ }
+ }
+
+ alloc_reg(pc, src);
+ e->inst[0] |= (src->hw << 16);
+}
+
+static void
+set_src_2(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e)
+{
+ set_long(pc, e);
+
+ if (src->type == P_ATTR) {
+ struct nv50_reg *temp = temp_temp(pc);
+
+ emit_mov(pc, temp, src);
+ src = temp;
+ } else
+ if (src->type == P_CONST || src->type == P_IMMD) {
+ assert(!(e->inst[0] & 0x01000000));
+ if (e->inst[0] & 0x00800000) {
+ struct nv50_reg *temp = temp_temp(pc);
+
+ emit_mov(pc, temp, src);
+ src = temp;
+ } else {
+ set_data(pc, src, 0x7f, 32+14, e);
+ e->inst[0] |= 0x01000000;
+ }
+ }
+
+ alloc_reg(pc, src);
+ e->inst[1] |= (src->hw << 14);
+}
+
+static void
+emit_mul(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
+ struct nv50_reg *src1)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ e->inst[0] |= 0xc0000000;
+ set_long(pc, e);
+
+ check_swap_src_0_1(pc, &src0, &src1);
+ set_dst(pc, dst, e);
+ set_src_0(pc, src0, e);
+ set_src_1(pc, src1, e);
+
+ emit(pc, e);
+}
+
+static void
+emit_add(struct nv50_pc *pc, struct nv50_reg *dst,
+ struct nv50_reg *src0, struct nv50_reg *src1)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ e->inst[0] |= 0xb0000000;
+
+ check_swap_src_0_1(pc, &src0, &src1);
+ set_dst(pc, dst, e);
+ set_src_0(pc, src0, e);
+ if (is_long(e))
+ set_src_2(pc, src1, e);
+ else
+ set_src_1(pc, src1, e);
+
+ emit(pc, e);
+}
+
+static void
+emit_minmax(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst,
+ struct nv50_reg *src0, struct nv50_reg *src1)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ set_long(pc, e);
+ e->inst[0] |= 0xb0000000;
+ e->inst[1] |= (sub << 29);
+
+ check_swap_src_0_1(pc, &src0, &src1);
+ set_dst(pc, dst, e);
+ set_src_0(pc, src0, e);
+ set_src_1(pc, src1, e);
+
+ emit(pc, e);
+}
+
+static void
+emit_sub(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
+ struct nv50_reg *src1)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ e->inst[0] |= 0xb0000000;
+
+ set_long(pc, e);
+ if (check_swap_src_0_1(pc, &src0, &src1))
+ e->inst[1] |= 0x04000000;
+ else
+ e->inst[1] |= 0x08000000;
+
+ set_dst(pc, dst, e);
+ set_src_0(pc, src0, e);
+ set_src_2(pc, src1, e);
+
+ emit(pc, e);
+}
+
+static void
+emit_mad(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
+ struct nv50_reg *src1, struct nv50_reg *src2)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ e->inst[0] |= 0xe0000000;
+
+ check_swap_src_0_1(pc, &src0, &src1);
+ set_dst(pc, dst, e);
+ set_src_0(pc, src0, e);
+ set_src_1(pc, src1, e);
+ set_src_2(pc, src2, e);
+
+ emit(pc, e);
+}
+
+static void
+emit_msb(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
+ struct nv50_reg *src1, struct nv50_reg *src2)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ e->inst[0] |= 0xe0000000;
+ set_long(pc, e);
+ e->inst[1] |= 0x08000000; /* src0 * src1 - src2 */
+
+ check_swap_src_0_1(pc, &src0, &src1);
+ set_dst(pc, dst, e);
+ set_src_0(pc, src0, e);
+ set_src_1(pc, src1, e);
+ set_src_2(pc, src2, e);
+
+ emit(pc, e);
+}
+
+static void
+emit_flop(struct nv50_pc *pc, unsigned sub,
+ struct nv50_reg *dst, struct nv50_reg *src)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ e->inst[0] |= 0x90000000;
+ if (sub) {
+ set_long(pc, e);
+ e->inst[1] |= (sub << 29);
+ }
+
+ set_dst(pc, dst, e);
+ set_src_0(pc, src, e);
+
+ emit(pc, e);
+}
+
+static void
+emit_preex2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ e->inst[0] |= 0xb0000000;
+
+ set_dst(pc, dst, e);
+ set_src_0(pc, src, e);
+ set_long(pc, e);
+ e->inst[1] |= (6 << 29) | 0x00004000;
+
+ emit(pc, e);
+}
+
+static void
+emit_precossin(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ e->inst[0] |= 0xb0000000;
+
+ set_dst(pc, dst, e);
+ set_src_0(pc, src, e);
+ set_long(pc, e);
+ e->inst[1] |= (6 << 29);
+
+ emit(pc, e);
+}
+
+static void
+emit_set(struct nv50_pc *pc, unsigned c_op, struct nv50_reg *dst,
+ struct nv50_reg *src0, struct nv50_reg *src1)
+{
+ struct nv50_program_exec *e = exec(pc);
+ unsigned inv_cop[8] = { 0, 4, 2, 6, 1, 5, 3, 7 };
+ struct nv50_reg *rdst;
+
+ assert(c_op <= 7);
+ if (check_swap_src_0_1(pc, &src0, &src1))
+ c_op = inv_cop[c_op];
+
+ rdst = dst;
+ if (dst->type != P_TEMP)
+ dst = alloc_temp(pc, NULL);
+
+ /* set.u32 */
+ set_long(pc, e);
+ e->inst[0] |= 0xb0000000;
+ e->inst[1] |= (3 << 29);
+ e->inst[1] |= (c_op << 14);
+ /*XXX: breaks things, .u32 by default?
+ * decuda will disasm as .u16 and use .lo/.hi regs, but this
+ * doesn't seem to match what the hw actually does.
+ inst[1] |= 0x04000000; << breaks things.. .u32 by default?
+ */
+ set_dst(pc, dst, e);
+ set_src_0(pc, src0, e);
+ set_src_1(pc, src1, e);
+ emit(pc, e);
+
+ /* cvt.f32.u32 */
+ e = exec(pc);
+ e->inst[0] = 0xa0000001;
+ e->inst[1] = 0x64014780;
+ set_dst(pc, rdst, e);
+ set_src_0(pc, dst, e);
+ emit(pc, e);
+
+ if (dst != rdst)
+ free_temp(pc, dst);
+}
+
+static void
+emit_flr(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ e->inst[0] = 0xa0000000; /* cvt */
+ set_long(pc, e);
+ e->inst[1] |= (6 << 29); /* cvt */
+ e->inst[1] |= 0x08000000; /* integer mode */
+ e->inst[1] |= 0x04000000; /* 32 bit */
+ e->inst[1] |= ((0x1 << 3)) << 14; /* .rn */
+ e->inst[1] |= (1 << 14); /* src .f32 */
+ set_dst(pc, dst, e);
+ set_src_0(pc, src, e);
+
+ emit(pc, e);
+}
+
+static void
+emit_pow(struct nv50_pc *pc, struct nv50_reg *dst,
+ struct nv50_reg *v, struct nv50_reg *e)
+{
+ struct nv50_reg *temp = alloc_temp(pc, NULL);
+
+ emit_flop(pc, 3, temp, v);
+ emit_mul(pc, temp, temp, e);
+ emit_preex2(pc, temp, temp);
+ emit_flop(pc, 6, dst, temp);
+
+ free_temp(pc, temp);
+}
+
+static void
+emit_abs(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ e->inst[0] = 0xa0000000; /* cvt */
+ set_long(pc, e);
+ e->inst[1] |= (6 << 29); /* cvt */
+ e->inst[1] |= 0x04000000; /* 32 bit */
+ e->inst[1] |= (1 << 14); /* src .f32 */
+ e->inst[1] |= ((1 << 6) << 14); /* .abs */
+ set_dst(pc, dst, e);
+ set_src_0(pc, src, e);
+
+ emit(pc, e);
+}
+
+static void
+emit_lit(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask,
+ struct nv50_reg **src)
+{
+ struct nv50_reg *one = alloc_immd(pc, 1.0);
+ struct nv50_reg *zero = alloc_immd(pc, 0.0);
+ struct nv50_reg *neg128 = alloc_immd(pc, -127.999999);
+ struct nv50_reg *pos128 = alloc_immd(pc, 127.999999);
+ struct nv50_reg *tmp[4];
+
+ if (mask & (1 << 0))
+ emit_mov(pc, dst[0], one);
+
+ if (mask & (1 << 3))
+ emit_mov(pc, dst[3], one);
+
+ if (mask & (3 << 1)) {
+ if (mask & (1 << 1))
+ tmp[0] = dst[1];
+ else
+ tmp[0] = temp_temp(pc);
+ emit_minmax(pc, 4, tmp[0], src[0], zero);
+ }
+
+ if (mask & (1 << 2)) {
+ set_pred_wr(pc, 1, 0, pc->p->exec_tail);
+
+ tmp[1] = temp_temp(pc);
+ emit_minmax(pc, 4, tmp[1], src[1], zero);
+
+ tmp[3] = temp_temp(pc);
+ emit_minmax(pc, 4, tmp[3], src[3], neg128);
+ emit_minmax(pc, 5, tmp[3], tmp[3], pos128);
+
+ emit_pow(pc, dst[2], tmp[1], tmp[3]);
+ emit_mov(pc, dst[2], zero);
+ set_pred(pc, 3, 0, pc->p->exec_tail);
+ }
+}
+
+static void
+emit_neg(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ set_long(pc, e);
+ e->inst[0] |= 0xa0000000; /* delta */
+ e->inst[1] |= (7 << 29); /* delta */
+ e->inst[1] |= 0x04000000; /* negate arg0? probably not */
+ e->inst[1] |= (1 << 14); /* src .f32 */
+ set_dst(pc, dst, e);
+ set_src_0(pc, src, e);
+
+ emit(pc, e);
+}
+
+static struct nv50_reg *
+tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst)
+{
+ switch (dst->DstRegister.File) {
+ case TGSI_FILE_TEMPORARY:
+ return &pc->temp[dst->DstRegister.Index * 4 + c];
+ case TGSI_FILE_OUTPUT:
+ return &pc->result[dst->DstRegister.Index * 4 + c];
+ case TGSI_FILE_NULL:
+ return NULL;
+ default:
+ break;
+ }
+
+ return NULL;
+}
+
+static struct nv50_reg *
+tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src)
+{
+ struct nv50_reg *r = NULL;
+ struct nv50_reg *temp;
+ unsigned c;
+
+ c = tgsi_util_get_full_src_register_extswizzle(src, chan);
+ switch (c) {
+ case TGSI_EXTSWIZZLE_X:
+ case TGSI_EXTSWIZZLE_Y:
+ case TGSI_EXTSWIZZLE_Z:
+ case TGSI_EXTSWIZZLE_W:
+ switch (src->SrcRegister.File) {
+ case TGSI_FILE_INPUT:
+ r = &pc->attr[src->SrcRegister.Index * 4 + c];
+ break;
+ case TGSI_FILE_TEMPORARY:
+ r = &pc->temp[src->SrcRegister.Index * 4 + c];
+ break;
+ case TGSI_FILE_CONSTANT:
+ r = &pc->param[src->SrcRegister.Index * 4 + c];
+ break;
+ case TGSI_FILE_IMMEDIATE:
+ r = &pc->immd[src->SrcRegister.Index * 4 + c];
+ break;
+ case TGSI_FILE_SAMPLER:
+ break;
+ default:
+ assert(0);
+ break;
+ }
+ break;
+ case TGSI_EXTSWIZZLE_ZERO:
+ r = alloc_immd(pc, 0.0);
+ break;
+ case TGSI_EXTSWIZZLE_ONE:
+ r = alloc_immd(pc, 1.0);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ switch (tgsi_util_get_full_src_register_sign_mode(src, chan)) {
+ case TGSI_UTIL_SIGN_KEEP:
+ break;
+ case TGSI_UTIL_SIGN_CLEAR:
+ temp = temp_temp(pc);
+ emit_abs(pc, temp, r);
+ r = temp;
+ break;
+ case TGSI_UTIL_SIGN_TOGGLE:
+ temp = temp_temp(pc);
+ emit_neg(pc, temp, r);
+ r = temp;
+ break;
+ case TGSI_UTIL_SIGN_SET:
+ temp = temp_temp(pc);
+ emit_abs(pc, temp, r);
+ emit_neg(pc, temp, r);
+ r = temp;
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ return r;
+}
+
+static boolean
+nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
+{
+ const struct tgsi_full_instruction *inst = &tok->FullInstruction;
+ struct nv50_reg *rdst[4], *dst[4], *src[3][4], *temp;
+ unsigned mask, sat;
+ int i, c;
+
+ mask = inst->FullDstRegisters[0].DstRegister.WriteMask;
+ sat = inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE;
+
+ for (c = 0; c < 4; c++) {
+ if (mask & (1 << c))
+ dst[c] = tgsi_dst(pc, c, &inst->FullDstRegisters[0]);
+ else
+ dst[c] = NULL;
+ }
+
+ for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
+ for (c = 0; c < 4; c++)
+ src[i][c] = tgsi_src(pc, c, &inst->FullSrcRegisters[i]);
+ }
+
+ if (sat) {
+ for (c = 0; c < 4; c++) {
+ rdst[c] = dst[c];
+ dst[c] = temp_temp(pc);
+ }
+ }
+
+ switch (inst->Instruction.Opcode) {
+ case TGSI_OPCODE_ABS:
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_abs(pc, dst[c], src[0][c]);
+ }
+ break;
+ case TGSI_OPCODE_ADD:
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_add(pc, dst[c], src[0][c], src[1][c]);
+ }
+ break;
+ case TGSI_OPCODE_COS:
+ temp = alloc_temp(pc, NULL);
+ emit_precossin(pc, temp, src[0][0]);
+ emit_flop(pc, 5, temp, temp);
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_mov(pc, dst[c], temp);
+ }
+ break;
+ case TGSI_OPCODE_DP3:
+ temp = alloc_temp(pc, NULL);
+ emit_mul(pc, temp, src[0][0], src[1][0]);
+ emit_mad(pc, temp, src[0][1], src[1][1], temp);
+ emit_mad(pc, temp, src[0][2], src[1][2], temp);
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_mov(pc, dst[c], temp);
+ }
+ free_temp(pc, temp);
+ break;
+ case TGSI_OPCODE_DP4:
+ temp = alloc_temp(pc, NULL);
+ emit_mul(pc, temp, src[0][0], src[1][0]);
+ emit_mad(pc, temp, src[0][1], src[1][1], temp);
+ emit_mad(pc, temp, src[0][2], src[1][2], temp);
+ emit_mad(pc, temp, src[0][3], src[1][3], temp);
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_mov(pc, dst[c], temp);
+ }
+ free_temp(pc, temp);
+ break;
+ case TGSI_OPCODE_DPH:
+ temp = alloc_temp(pc, NULL);
+ emit_mul(pc, temp, src[0][0], src[1][0]);
+ emit_mad(pc, temp, src[0][1], src[1][1], temp);
+ emit_mad(pc, temp, src[0][2], src[1][2], temp);
+ emit_add(pc, temp, src[1][3], temp);
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_mov(pc, dst[c], temp);
+ }
+ free_temp(pc, temp);
+ break;
+ case TGSI_OPCODE_DST:
+ {
+ struct nv50_reg *one = alloc_immd(pc, 1.0);
+ if (mask & (1 << 0))
+ emit_mov(pc, dst[0], one);
+ if (mask & (1 << 1))
+ emit_mul(pc, dst[1], src[0][1], src[1][1]);
+ if (mask & (1 << 2))
+ emit_mov(pc, dst[2], src[0][2]);
+ if (mask & (1 << 3))
+ emit_mov(pc, dst[3], src[1][3]);
+ FREE(one);
+ }
+ break;
+ case TGSI_OPCODE_EX2:
+ temp = alloc_temp(pc, NULL);
+ emit_preex2(pc, temp, src[0][0]);
+ emit_flop(pc, 6, temp, temp);
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_mov(pc, dst[c], temp);
+ }
+ free_temp(pc, temp);
+ break;
+ case TGSI_OPCODE_FLR:
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_flr(pc, dst[c], src[0][c]);
+ }
+ break;
+ case TGSI_OPCODE_FRC:
+ temp = alloc_temp(pc, NULL);
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_flr(pc, temp, src[0][c]);
+ emit_sub(pc, dst[c], src[0][c], temp);
+ }
+ free_temp(pc, temp);
+ break;
+ case TGSI_OPCODE_LIT:
+ emit_lit(pc, &dst[0], mask, &src[0][0]);
+ break;
+ case TGSI_OPCODE_LG2:
+ temp = alloc_temp(pc, NULL);
+ emit_flop(pc, 3, temp, src[0][0]);
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_mov(pc, dst[c], temp);
+ }
+ break;
+ case TGSI_OPCODE_LRP:
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ /*XXX: we can do better than this */
+ temp = alloc_temp(pc, NULL);
+ emit_neg(pc, temp, src[0][c]);
+ emit_mad(pc, temp, temp, src[2][c], src[2][c]);
+ emit_mad(pc, dst[c], src[0][c], src[1][c], temp);
+ free_temp(pc, temp);
+ }
+ break;
+ case TGSI_OPCODE_MAD:
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_mad(pc, dst[c], src[0][c], src[1][c], src[2][c]);
+ }
+ break;
+ case TGSI_OPCODE_MAX:
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_minmax(pc, 4, dst[c], src[0][c], src[1][c]);
+ }
+ break;
+ case TGSI_OPCODE_MIN:
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_minmax(pc, 5, dst[c], src[0][c], src[1][c]);
+ }
+ break;
+ case TGSI_OPCODE_MOV:
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_mov(pc, dst[c], src[0][c]);
+ }
+ break;
+ case TGSI_OPCODE_MUL:
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_mul(pc, dst[c], src[0][c], src[1][c]);
+ }
+ break;
+ case TGSI_OPCODE_POW:
+ temp = alloc_temp(pc, NULL);
+ emit_pow(pc, temp, src[0][0], src[1][0]);
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_mov(pc, dst[c], temp);
+ }
+ free_temp(pc, temp);
+ break;
+ case TGSI_OPCODE_RCP:
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_flop(pc, 0, dst[c], src[0][0]);
+ }
+ break;
+ case TGSI_OPCODE_RSQ:
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_flop(pc, 2, dst[c], src[0][0]);
+ }
+ break;
+ case TGSI_OPCODE_SCS:
+ temp = alloc_temp(pc, NULL);
+ emit_precossin(pc, temp, src[0][0]);
+ if (mask & (1 << 0))
+ emit_flop(pc, 5, dst[0], temp);
+ if (mask & (1 << 1))
+ emit_flop(pc, 4, dst[1], temp);
+ break;
+ case TGSI_OPCODE_SGE:
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_set(pc, 6, dst[c], src[0][c], src[1][c]);
+ }
+ break;
+ case TGSI_OPCODE_SIN:
+ temp = alloc_temp(pc, NULL);
+ emit_precossin(pc, temp, src[0][0]);
+ emit_flop(pc, 4, temp, temp);
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_mov(pc, dst[c], temp);
+ }
+ break;
+ case TGSI_OPCODE_SLT:
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_set(pc, 1, dst[c], src[0][c], src[1][c]);
+ }
+ break;
+ case TGSI_OPCODE_SUB:
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_sub(pc, dst[c], src[0][c], src[1][c]);
+ }
+ break;
+ case TGSI_OPCODE_TEX:
+ {
+ struct nv50_reg *t0, *t1, *t2, *t3;
+ struct nv50_program_exec *e;
+
+ t0 = alloc_temp(pc, NULL);
+ t0 = alloc_temp(pc, NULL);
+ t1 = alloc_temp(pc, NULL);
+ t2 = alloc_temp(pc, NULL);
+ t3 = alloc_temp(pc, NULL);
+ emit_mov(pc, t0, src[0][0]);
+ emit_mov(pc, t1, src[0][1]);
+
+ e = exec(pc);
+ e->inst[0] = 0xf6400000;
+ set_long(pc, e);
+ e->inst[1] |= 0x0000c004;
+ set_dst(pc, t0, e);
+ emit(pc, e);
+
+ if (mask & (1 << 0)) emit_mov(pc, dst[0], t0);
+ if (mask & (1 << 1)) emit_mov(pc, dst[1], t1);
+ if (mask & (1 << 2)) emit_mov(pc, dst[2], t2);
+ if (mask & (1 << 3)) emit_mov(pc, dst[3], t3);
+
+ free_temp(pc, t0);
+ free_temp(pc, t1);
+ free_temp(pc, t2);
+ free_temp(pc, t3);
+ }
+ break;
+ case TGSI_OPCODE_XPD:
+ temp = alloc_temp(pc, NULL);
+ if (mask & (1 << 0)) {
+ emit_mul(pc, temp, src[0][2], src[1][1]);
+ emit_msb(pc, dst[0], src[0][1], src[1][2], temp);
+ }
+ if (mask & (1 << 1)) {
+ emit_mul(pc, temp, src[0][0], src[1][2]);
+ emit_msb(pc, dst[1], src[0][2], src[1][0], temp);
+ }
+ if (mask & (1 << 2)) {
+ emit_mul(pc, temp, src[0][1], src[1][0]);
+ emit_msb(pc, dst[2], src[0][0], src[1][1], temp);
+ }
+ free_temp(pc, temp);
+ break;
+ case TGSI_OPCODE_END:
+ break;
+ default:
+ NOUVEAU_ERR("invalid opcode %d\n", inst->Instruction.Opcode);
+ return FALSE;
+ }
+
+ if (sat) {
+ for (c = 0; c < 4; c++) {
+ struct nv50_program_exec *e;
+
+ if (!(mask & (1 << c)))
+ continue;
+ e = exec(pc);
+
+ e->inst[0] = 0xa0000000; /* cvt */
+ set_long(pc, e);
+ e->inst[1] |= (6 << 29); /* cvt */
+ e->inst[1] |= 0x04000000; /* 32 bit */
+ e->inst[1] |= (1 << 14); /* src .f32 */
+ e->inst[1] |= ((1 << 5) << 14); /* .sat */
+ set_dst(pc, rdst[c], e);
+ set_src_0(pc, dst[c], e);
+ emit(pc, e);
+ }
+ }
+
+ kill_temp_temp(pc);
+ return TRUE;
+}
+
+static boolean
+nv50_program_tx_prep(struct nv50_pc *pc)
+{
+ struct tgsi_parse_context p;
+ boolean ret = FALSE;
+ unsigned i, c;
+
+ tgsi_parse_init(&p, pc->p->pipe.tokens);
+ while (!tgsi_parse_end_of_tokens(&p)) {
+ const union tgsi_full_token *tok = &p.FullToken;
+
+ tgsi_parse_token(&p);
+ switch (tok->Token.Type) {
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ {
+ const struct tgsi_full_immediate *imm =
+ &p.FullToken.FullImmediate;
+
+ ctor_immd(pc, imm->u.ImmediateFloat32[0].Float,
+ imm->u.ImmediateFloat32[1].Float,
+ imm->u.ImmediateFloat32[2].Float,
+ imm->u.ImmediateFloat32[3].Float);
+ }
+ break;
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ {
+ const struct tgsi_full_declaration *d;
+ unsigned last;
+
+ d = &p.FullToken.FullDeclaration;
+ last = d->DeclarationRange.Last;
+
+ switch (d->Declaration.File) {
+ case TGSI_FILE_TEMPORARY:
+ if (pc->temp_nr < (last + 1))
+ pc->temp_nr = last + 1;
+ break;
+ case TGSI_FILE_OUTPUT:
+ if (pc->result_nr < (last + 1))
+ pc->result_nr = last + 1;
+ break;
+ case TGSI_FILE_INPUT:
+ if (pc->attr_nr < (last + 1))
+ pc->attr_nr = last + 1;
+ break;
+ case TGSI_FILE_CONSTANT:
+ if (pc->param_nr < (last + 1))
+ pc->param_nr = last + 1;
+ break;
+ case TGSI_FILE_SAMPLER:
+ break;
+ default:
+ NOUVEAU_ERR("bad decl file %d\n",
+ d->Declaration.File);
+ goto out_err;
+ }
+ }
+ break;
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (pc->temp_nr) {
+ pc->temp = CALLOC(pc->temp_nr * 4, sizeof(struct nv50_reg));
+ if (!pc->temp)
+ goto out_err;
+
+ for (i = 0; i < pc->temp_nr; i++) {
+ for (c = 0; c < 4; c++) {
+ pc->temp[i*4+c].type = P_TEMP;
+ pc->temp[i*4+c].hw = -1;
+ pc->temp[i*4+c].index = i;
+ }
+ }
+ }
+
+ if (pc->attr_nr) {
+ struct nv50_reg *iv = NULL;
+ int aid = 0;
+
+ pc->attr = CALLOC(pc->attr_nr * 4, sizeof(struct nv50_reg));
+ if (!pc->attr)
+ goto out_err;
+
+ if (pc->p->type == PIPE_SHADER_FRAGMENT) {
+ iv = alloc_temp(pc, NULL);
+ emit_interp(pc, iv, iv, NULL);
+ emit_flop(pc, 0, iv, iv);
+ aid++;
+ }
+
+ for (i = 0; i < pc->attr_nr; i++) {
+ struct nv50_reg *a = &pc->attr[i*4];
+
+ for (c = 0; c < 4; c++) {
+ if (pc->p->type == PIPE_SHADER_FRAGMENT) {
+ struct nv50_reg *at =
+ alloc_temp(pc, NULL);
+ pc->attr[i*4+c].type = at->type;
+ pc->attr[i*4+c].hw = at->hw;
+ pc->attr[i*4+c].index = at->index;
+ } else {
+ pc->p->cfg.vp.attr[aid/32] |=
+ (1 << (aid % 32));
+ pc->attr[i*4+c].type = P_ATTR;
+ pc->attr[i*4+c].hw = aid++;
+ pc->attr[i*4+c].index = i;
+ }
+ }
+
+ if (pc->p->type != PIPE_SHADER_FRAGMENT)
+ continue;
+
+ emit_interp(pc, &a[0], &a[0], iv);
+ emit_interp(pc, &a[1], &a[1], iv);
+ emit_interp(pc, &a[2], &a[2], iv);
+ emit_interp(pc, &a[3], &a[3], iv);
+ }
+
+ if (iv)
+ free_temp(pc, iv);
+ }
+
+ if (pc->result_nr) {
+ int rid = 0;
+
+ pc->result = CALLOC(pc->result_nr * 4, sizeof(struct nv50_reg));
+ if (!pc->result)
+ goto out_err;
+
+ for (i = 0; i < pc->result_nr; i++) {
+ for (c = 0; c < 4; c++) {
+ if (pc->p->type == PIPE_SHADER_FRAGMENT) {
+ pc->result[i*4+c].type = P_TEMP;
+ pc->result[i*4+c].hw = -1;
+ } else {
+ pc->result[i*4+c].type = P_RESULT;
+ pc->result[i*4+c].hw = rid++;
+ }
+ pc->result[i*4+c].index = i;
+ }
+ }
+ }
+
+ if (pc->param_nr) {
+ int rid = 0;
+
+ pc->param = CALLOC(pc->param_nr * 4, sizeof(struct nv50_reg));
+ if (!pc->param)
+ goto out_err;
+
+ for (i = 0; i < pc->param_nr; i++) {
+ for (c = 0; c < 4; c++) {
+ pc->param[i*4+c].type = P_CONST;
+ pc->param[i*4+c].hw = rid++;
+ pc->param[i*4+c].index = i;
+ }
+ }
+ }
+
+ if (pc->immd_nr) {
+ int rid = pc->param_nr * 4;
+
+ pc->immd = CALLOC(pc->immd_nr * 4, sizeof(struct nv50_reg));
+ if (!pc->immd)
+ goto out_err;
+
+ for (i = 0; i < pc->immd_nr; i++) {
+ for (c = 0; c < 4; c++) {
+ pc->immd[i*4+c].type = P_IMMD;
+ pc->immd[i*4+c].hw = rid++;
+ pc->immd[i*4+c].index = i;
+ }
+ }
+ }
+
+ ret = TRUE;
+out_err:
+ tgsi_parse_free(&p);
+ return ret;
+}
+
+static boolean
+nv50_program_tx(struct nv50_program *p)
+{
+ struct tgsi_parse_context parse;
+ struct nv50_pc *pc;
+ boolean ret;
+
+ pc = CALLOC_STRUCT(nv50_pc);
+ if (!pc)
+ return FALSE;
+ pc->p = p;
+ pc->p->cfg.high_temp = 4;
+
+ ret = nv50_program_tx_prep(pc);
+ if (ret == FALSE)
+ goto out_cleanup;
+
+ tgsi_parse_init(&parse, pc->p->pipe.tokens);
+ while (!tgsi_parse_end_of_tokens(&parse)) {
+ const union tgsi_full_token *tok = &parse.FullToken;
+
+ tgsi_parse_token(&parse);
+
+ switch (tok->Token.Type) {
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ ret = nv50_program_tx_insn(pc, tok);
+ if (ret == FALSE)
+ goto out_err;
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (p->type == PIPE_SHADER_FRAGMENT) {
+ struct nv50_reg out;
+
+ out.type = P_TEMP;
+ for (out.hw = 0; out.hw < pc->result_nr * 4; out.hw++)
+ emit_mov(pc, &out, &pc->result[out.hw]);
+ }
+
+ assert(is_long(pc->p->exec_tail) && !is_immd(pc->p->exec_head));
+ pc->p->exec_tail->inst[1] |= 0x00000001;
+
+ p->param_nr = pc->param_nr * 4;
+ p->immd_nr = pc->immd_nr * 4;
+ p->immd = pc->immd_buf;
+
+out_err:
+ tgsi_parse_free(&parse);
+
+out_cleanup:
+ return ret;
+}
+
+static void
+nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p)
+{
+ if (nv50_program_tx(p) == FALSE)
+ assert(0);
+ p->translated = TRUE;
+}
+
+static void
+nv50_program_upload_data(struct nv50_context *nv50, float *map,
+ unsigned start, unsigned count)
+{
+ while (count) {
+ unsigned nr = count > 2047 ? 2047 : count;
+
+ BEGIN_RING(tesla, 0x00000f00, 1);
+ OUT_RING ((NV50_CB_PMISC << 0) | (start << 8));
+ BEGIN_RING(tesla, 0x40000f04, nr);
+ OUT_RINGp (map, nr);
+
+ map += nr;
+ start += nr;
+ count -= nr;
+ }
+}
+
+static void
+nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p)
+{
+ struct nouveau_winsys *nvws = nv50->screen->nvws;
+ struct pipe_winsys *ws = nv50->pipe.winsys;
+ unsigned nr = p->param_nr + p->immd_nr;
+
+ if (!p->data && nr) {
+ struct nouveau_resource *heap = nv50->screen->vp_data_heap;
+
+ if (nvws->res_alloc(heap, nr, p, &p->data)) {
+ while (heap->next && heap->size < nr) {
+ struct nv50_program *evict = heap->next->priv;
+ nvws->res_free(&evict->data);
+ }
+
+ if (nvws->res_alloc(heap, nr, p, &p->data))
+ assert(0);
+ }
+ }
+
+ if (p->param_nr) {
+ float *map = ws->buffer_map(ws, nv50->constbuf[p->type],
+ PIPE_BUFFER_USAGE_CPU_READ);
+ nv50_program_upload_data(nv50, map, p->data->start,
+ p->param_nr);
+ ws->buffer_unmap(ws, nv50->constbuf[p->type]);
+ }
+
+ if (p->immd_nr) {
+ nv50_program_upload_data(nv50, p->immd,
+ p->data->start + p->param_nr,
+ p->immd_nr);
+ }
+}
+
+static void
+nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p)
+{
+ struct pipe_winsys *ws = nv50->pipe.winsys;
+ struct nv50_program_exec *e;
+ struct nouveau_stateobj *so;
+ const unsigned flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_WR;
+ unsigned start, count, *up, *ptr;
+ boolean upload = FALSE;
+
+ if (!p->buffer) {
+ p->buffer = ws->buffer_create(ws, 0x100, 0, p->exec_size * 4);
+ upload = TRUE;
+ }
+
+ if (p->data && p->data->start != p->data_start) {
+ for (e = p->exec_head; e; e = e->next) {
+ unsigned ei, ci;
+
+ if (e->param.index < 0)
+ continue;
+ ei = e->param.shift >> 5;
+ ci = e->param.index + p->data->start;
+
+ e->inst[ei] &= ~e->param.mask;
+ e->inst[ei] |= (ci << e->param.shift);
+ }
+
+ p->data_start = p->data->start;
+ upload = TRUE;
+ }
+
+ if (!upload)
+ return;
+
+ up = ptr = MALLOC(p->exec_size * 4);
+ for (e = p->exec_head; e; e = e->next) {
+ *(ptr++) = e->inst[0];
+ if (is_long(e))
+ *(ptr++) = e->inst[1];
+ }
+
+ so = so_new(4,2);
+ so_method(so, nv50->screen->tesla, 0x1280, 3);
+ so_reloc (so, p->buffer, 0, flags | NOUVEAU_BO_HIGH, 0, 0);
+ so_reloc (so, p->buffer, 0, flags | NOUVEAU_BO_LOW, 0, 0);
+ so_data (so, (NV50_CB_PUPLOAD << 16) | 0x0800); //(p->exec_size * 4));
+
+ start = 0; count = p->exec_size;
+ while (count) {
+ struct nouveau_winsys *nvws = nv50->screen->nvws;
+ unsigned nr;
+
+ so_emit(nvws, so);
+
+ nr = MIN2(count, 2047);
+ nr = MIN2(nvws->channel->pushbuf->remaining, nr);
+ if (nvws->channel->pushbuf->remaining < (nr + 3)) {
+ FIRE_RING(NULL);
+ continue;
+ }
+
+ BEGIN_RING(tesla, 0x0f00, 1);
+ OUT_RING ((start << 8) | NV50_CB_PUPLOAD);
+ BEGIN_RING(tesla, 0x40000f04, nr);
+ OUT_RINGp (up + start, nr);
+
+ start += nr;
+ count -= nr;
+ }
+
+ FREE(up);
+ so_ref(NULL, &so);
+}
+
+void
+nv50_vertprog_validate(struct nv50_context *nv50)
+{
+ struct nouveau_grobj *tesla = nv50->screen->tesla;
+ struct nv50_program *p = nv50->vertprog;
+ struct nouveau_stateobj *so;
+
+ if (!p->translated) {
+ nv50_program_validate(nv50, p);
+ if (!p->translated)
+ assert(0);
+ }
+
+ nv50_program_validate_data(nv50, p);
+ nv50_program_validate_code(nv50, p);
+
+ so = so_new(13, 2);
+ so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2);
+ so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
+ NOUVEAU_BO_HIGH, 0, 0);
+ so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
+ NOUVEAU_BO_LOW, 0, 0);
+ so_method(so, tesla, 0x1650, 2);
+ so_data (so, p->cfg.vp.attr[0]);
+ so_data (so, p->cfg.vp.attr[1]);
+ so_method(so, tesla, 0x16b8, 1);
+ so_data (so, p->cfg.high_result);
+ so_method(so, tesla, 0x16ac, 2);
+ so_data (so, p->cfg.high_result); //8);
+ so_data (so, p->cfg.high_temp);
+ so_method(so, tesla, 0x140c, 1);
+ so_data (so, 0); /* program start offset */
+ so_ref(so, &nv50->state.vertprog);
+}
+
+void
+nv50_fragprog_validate(struct nv50_context *nv50)
+{
+ struct nouveau_grobj *tesla = nv50->screen->tesla;
+ struct nv50_program *p = nv50->fragprog;
+ struct nouveau_stateobj *so;
+
+ if (!p->translated) {
+ nv50_program_validate(nv50, p);
+ if (!p->translated)
+ assert(0);
+ }
+
+ nv50_program_validate_data(nv50, p);
+ nv50_program_validate_code(nv50, p);
+
+ so = so_new(64, 2);
+ so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2);
+ so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
+ NOUVEAU_BO_HIGH, 0, 0);
+ so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
+ NOUVEAU_BO_LOW, 0, 0);
+ so_method(so, tesla, 0x1904, 4);
+ so_data (so, 0x01040404); /* p: 0x01000404 */
+ so_data (so, 0x00000004);
+ so_data (so, 0x00000000);
+ so_data (so, 0x00000000);
+ so_method(so, tesla, 0x16bc, 3); /*XXX: fixme */
+ so_data (so, 0x03020100);
+ so_data (so, 0x07060504);
+ so_data (so, 0x0b0a0908);
+ so_method(so, tesla, 0x1988, 2);
+ so_data (so, 0x08080408); //0x08040404); /* p: 0x0f000401 */
+ so_data (so, p->cfg.high_temp);
+ so_method(so, tesla, 0x1414, 1);
+ so_data (so, 0); /* program start offset */
+ so_ref(so, &nv50->state.fragprog);
+}
+
+void
+nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p)
+{
+ struct pipe_winsys *ws = nv50->pipe.winsys;
+
+ while (p->exec_head) {
+ struct nv50_program_exec *e = p->exec_head;
+
+ p->exec_head = e->next;
+ FREE(e);
+ }
+ p->exec_tail = NULL;
+ p->exec_size = 0;
+
+ if (p->buffer)
+ pipe_buffer_reference(ws, &p->buffer, NULL);
+
+ nv50->screen->nvws->res_free(&p->data);
+
+ p->translated = 0;
+}
+
diff --git a/src/gallium/drivers/nv50/nv50_program.h b/src/gallium/drivers/nv50/nv50_program.h
new file mode 100644
index 00000000000..78deed6a384
--- /dev/null
+++ b/src/gallium/drivers/nv50/nv50_program.h
@@ -0,0 +1,45 @@
+#ifndef __NV50_PROGRAM_H__
+#define __NV50_PROGRAM_H__
+
+#include "pipe/p_state.h"
+#include "tgsi/tgsi_scan.h"
+
+struct nv50_program_exec {
+ struct nv50_program_exec *next;
+
+ unsigned inst[2];
+ struct {
+ int index;
+ unsigned mask;
+ unsigned shift;
+ } param;
+};
+
+struct nv50_program {
+ struct pipe_shader_state pipe;
+ struct tgsi_shader_info info;
+ boolean translated;
+
+ unsigned type;
+ struct nv50_program_exec *exec_head;
+ struct nv50_program_exec *exec_tail;
+ unsigned exec_size;
+ struct nouveau_resource *data;
+ unsigned data_start;
+
+ struct pipe_buffer *buffer;
+
+ float *immd;
+ unsigned immd_nr;
+ unsigned param_nr;
+
+ struct {
+ unsigned high_temp;
+ unsigned high_result;
+ struct {
+ unsigned attr[2];
+ } vp;
+ } cfg;
+};
+
+#endif
diff --git a/src/gallium/drivers/nv50/nv50_query.c b/src/gallium/drivers/nv50/nv50_query.c
new file mode 100644
index 00000000000..26bd90ccc5e
--- /dev/null
+++ b/src/gallium/drivers/nv50/nv50_query.c
@@ -0,0 +1,69 @@
+/*
+ * Copyright 2008 Ben Skeggs
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "pipe/p_context.h"
+
+#include "nv50_context.h"
+
+static struct pipe_query *
+nv50_query_create(struct pipe_context *pipe, unsigned type)
+{
+ NOUVEAU_ERR("unimplemented\n");
+ return NULL;
+}
+
+static void
+nv50_query_destroy(struct pipe_context *pipe, struct pipe_query *q)
+{
+ NOUVEAU_ERR("unimplemented\n");
+}
+
+static void
+nv50_query_begin(struct pipe_context *pipe, struct pipe_query *q)
+{
+ NOUVEAU_ERR("unimplemented\n");
+}
+
+static void
+nv50_query_end(struct pipe_context *pipe, struct pipe_query *q)
+{
+ NOUVEAU_ERR("unimplemented\n");
+}
+
+static boolean
+nv50_query_result(struct pipe_context *pipe, struct pipe_query *q,
+ boolean wait, uint64 *result)
+{
+ NOUVEAU_ERR("unimplemented\n");
+ *result = 0xdeadcafe;
+ return TRUE;
+}
+
+void
+nv50_init_query_functions(struct nv50_context *nv50)
+{
+ nv50->pipe.create_query = nv50_query_create;
+ nv50->pipe.destroy_query = nv50_query_destroy;
+ nv50->pipe.begin_query = nv50_query_begin;
+ nv50->pipe.end_query = nv50_query_end;
+ nv50->pipe.get_query_result = nv50_query_result;
+}
diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c
new file mode 100644
index 00000000000..b5aef7dadd8
--- /dev/null
+++ b/src/gallium/drivers/nv50/nv50_screen.c
@@ -0,0 +1,323 @@
+/*
+ * Copyright 2008 Ben Skeggs
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "pipe/p_screen.h"
+
+#include "nv50_context.h"
+#include "nv50_screen.h"
+
+#include "nouveau/nouveau_stateobj.h"
+
+#define NV5X_GRCLASS5097_CHIPSETS 0x00000001
+#define NV8X_GRCLASS8297_CHIPSETS 0x00000050
+#define NV9X_GRCLASS8297_CHIPSETS 0x00000014
+
+static boolean
+nv50_screen_is_format_supported(struct pipe_screen *pscreen,
+ enum pipe_format format,
+ enum pipe_texture_target target,
+ unsigned tex_usage, unsigned geom_flags)
+{
+ if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) {
+ switch (format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ case PIPE_FORMAT_Z24S8_UNORM:
+ case PIPE_FORMAT_Z16_UNORM:
+ return TRUE;
+ default:
+ break;
+ }
+ } else {
+ switch (format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case PIPE_FORMAT_A1R5G5B5_UNORM:
+ case PIPE_FORMAT_A4R4G4B4_UNORM:
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ case PIPE_FORMAT_L8_UNORM:
+ case PIPE_FORMAT_A8_UNORM:
+ case PIPE_FORMAT_I8_UNORM:
+ case PIPE_FORMAT_A8L8_UNORM:
+ return TRUE;
+ default:
+ break;
+ }
+ }
+
+ return FALSE;
+}
+
+static const char *
+nv50_screen_get_name(struct pipe_screen *pscreen)
+{
+ struct nv50_screen *screen = nv50_screen(pscreen);
+ struct nouveau_device *dev = screen->nvws->channel->device;
+ static char buffer[128];
+
+ snprintf(buffer, sizeof(buffer), "NV%02X", dev->chipset);
+ return buffer;
+}
+
+static const char *
+nv50_screen_get_vendor(struct pipe_screen *pscreen)
+{
+ return "nouveau";
+}
+
+static int
+nv50_screen_get_param(struct pipe_screen *pscreen, int param)
+{
+ switch (param) {
+ case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
+ return 32;
+ case PIPE_CAP_NPOT_TEXTURES:
+ return 0;
+ case PIPE_CAP_TWO_SIDED_STENCIL:
+ return 1;
+ case PIPE_CAP_GLSL:
+ return 0;
+ case PIPE_CAP_S3TC:
+ return 0;
+ case PIPE_CAP_ANISOTROPIC_FILTER:
+ return 0;
+ case PIPE_CAP_POINT_SPRITE:
+ return 0;
+ case PIPE_CAP_MAX_RENDER_TARGETS:
+ return 8;
+ case PIPE_CAP_OCCLUSION_QUERY:
+ return 0;
+ case PIPE_CAP_TEXTURE_SHADOW_MAP:
+ return 0;
+ case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
+ return 13;
+ case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
+ return 10;
+ case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
+ return 13;
+ case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
+ case PIPE_CAP_TEXTURE_MIRROR_REPEAT:
+ return 1;
+ case NOUVEAU_CAP_HW_VTXBUF:
+ return 1;
+ case NOUVEAU_CAP_HW_IDXBUF:
+ return 0;
+ default:
+ NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param);
+ return 0;
+ }
+}
+
+static float
+nv50_screen_get_paramf(struct pipe_screen *pscreen, int param)
+{
+ switch (param) {
+ case PIPE_CAP_MAX_LINE_WIDTH:
+ case PIPE_CAP_MAX_LINE_WIDTH_AA:
+ return 10.0;
+ case PIPE_CAP_MAX_POINT_WIDTH:
+ case PIPE_CAP_MAX_POINT_WIDTH_AA:
+ return 64.0;
+ case PIPE_CAP_MAX_TEXTURE_ANISOTROPY:
+ return 16.0;
+ case PIPE_CAP_MAX_TEXTURE_LOD_BIAS:
+ return 4.0;
+ default:
+ NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param);
+ return 0.0;
+ }
+}
+
+static void
+nv50_screen_destroy(struct pipe_screen *pscreen)
+{
+ FREE(pscreen);
+}
+
+struct pipe_screen *
+nv50_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws)
+{
+ struct nv50_screen *screen = CALLOC_STRUCT(nv50_screen);
+ struct nouveau_stateobj *so;
+ unsigned tesla_class = 0, ret;
+ unsigned chipset = nvws->channel->device->chipset;
+ int i;
+
+ if (!screen)
+ return NULL;
+ screen->nvws = nvws;
+
+ /* 3D object */
+ if ((chipset & 0xf0) != 0x50 && (chipset & 0xf0) != 0x80) {
+ NOUVEAU_ERR("Not a G8x chipset\n");
+ nv50_screen_destroy(&screen->pipe);
+ return NULL;
+ }
+
+ switch (chipset & 0xf0) {
+ case 0x50:
+ if (NV5X_GRCLASS5097_CHIPSETS & (1 << (chipset & 0x0f)))
+ tesla_class = 0x5097;
+ break;
+ case 0x80:
+ if (NV8X_GRCLASS8297_CHIPSETS & (1 << (chipset & 0x0f)))
+ tesla_class = 0x8297;
+ break;
+ case 0x90:
+ if (NV9X_GRCLASS8297_CHIPSETS & (1 << (chipset & 0x0f)))
+ tesla_class = 0x8297;
+ break;
+ default:
+ break;
+ }
+
+ if (tesla_class == 0) {
+ NOUVEAU_ERR("Unknown G8x chipset: NV%02x\n", chipset);
+ nv50_screen_destroy(&screen->pipe);
+ return NULL;
+ }
+
+ ret = nvws->grobj_alloc(nvws, tesla_class, &screen->tesla);
+ if (ret) {
+ NOUVEAU_ERR("Error creating 3D object: %d\n", ret);
+ nv50_screen_destroy(&screen->pipe);
+ return NULL;
+ }
+
+ /* Sync notifier */
+ ret = nvws->notifier_alloc(nvws, 1, &screen->sync);
+ if (ret) {
+ NOUVEAU_ERR("Error creating notifier object: %d\n", ret);
+ nv50_screen_destroy(&screen->pipe);
+ return NULL;
+ }
+
+ /* Static tesla init */
+ so = so_new(256, 20);
+
+ so_method(so, screen->tesla, 0x1558, 1);
+ so_data (so, 1);
+ so_method(so, screen->tesla, NV50TCL_DMA_NOTIFY, 1);
+ so_data (so, screen->sync->handle);
+ so_method(so, screen->tesla, NV50TCL_DMA_UNK0(0),
+ NV50TCL_DMA_UNK0__SIZE);
+ for (i = 0; i < NV50TCL_DMA_UNK0__SIZE; i++)
+ so_data(so, nvws->channel->vram->handle);
+ so_method(so, screen->tesla, NV50TCL_DMA_UNK1(0),
+ NV50TCL_DMA_UNK1__SIZE);
+ for (i = 0; i < NV50TCL_DMA_UNK1__SIZE; i++)
+ so_data(so, nvws->channel->vram->handle);
+ so_method(so, screen->tesla, 0x121c, 1);
+ so_data (so, 1);
+
+ so_method(so, screen->tesla, 0x13bc, 1);
+ so_data (so, 0x54);
+ so_method(so, screen->tesla, 0x13ac, 1);
+ so_data (so, 1);
+ so_method(so, screen->tesla, 0x16b8, 1);
+ so_data (so, 8);
+
+ /* Shared constant buffer */
+ screen->constbuf = ws->buffer_create(ws, 0, 0, 128 * 4 * 4);
+ if (nvws->res_init(&screen->vp_data_heap, 0, 128)) {
+ NOUVEAU_ERR("Error initialising constant buffer\n");
+ nv50_screen_destroy(&screen->pipe);
+ return NULL;
+ }
+
+ so_method(so, screen->tesla, 0x1280, 3);
+ so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM |
+ NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
+ so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM |
+ NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
+ so_data (so, (NV50_CB_PMISC << 16) | 0x00001000);
+
+ /* Texture sampler/image unit setup - we abuse the constant buffer
+ * upload mechanism for the moment to upload data to the tex config
+ * blocks. At some point we *may* want to go the NVIDIA way of doing
+ * things?
+ */
+ screen->tic = ws->buffer_create(ws, 0, 0, 32 * 8 * 4);
+ so_method(so, screen->tesla, 0x1280, 3);
+ so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM |
+ NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
+ so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM |
+ NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
+ so_data (so, (NV50_CB_TIC << 16) | 0x0800);
+ so_method(so, screen->tesla, 0x1574, 3);
+ so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM |
+ NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
+ so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM |
+ NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
+ so_data (so, 0x00000800);
+
+ screen->tsc = ws->buffer_create(ws, 0, 0, 32 * 8 * 4);
+ so_method(so, screen->tesla, 0x1280, 3);
+ so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM |
+ NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
+ so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM |
+ NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
+ so_data (so, (NV50_CB_TSC << 16) | 0x0800);
+ so_method(so, screen->tesla, 0x155c, 3);
+ so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM |
+ NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
+ so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM |
+ NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
+ so_data (so, 0x00000800);
+
+
+ /* Vertex array limits - max them out */
+ for (i = 0; i < 16; i++) {
+ so_method(so, screen->tesla, 0x1080 + (i * 8), 2);
+ so_data (so, 0x000000ff);
+ so_data (so, 0xffffffff);
+ }
+
+ so_method(so, screen->tesla, NV50TCL_DEPTH_RANGE_NEAR, 2);
+ so_data (so, fui(0.0));
+ so_data (so, fui(1.0));
+
+ so_method(so, screen->tesla, 0x1234, 1);
+ so_data (so, 1);
+ so_method(so, screen->tesla, 0x1458, 1);
+ so_data (so, 1);
+
+ so_emit(nvws, so);
+ so_ref(so, &screen->static_init);
+ nvws->push_flush(nvws, 0, NULL);
+
+ screen->pipe.winsys = ws;
+
+ screen->pipe.destroy = nv50_screen_destroy;
+
+ screen->pipe.get_name = nv50_screen_get_name;
+ screen->pipe.get_vendor = nv50_screen_get_vendor;
+ screen->pipe.get_param = nv50_screen_get_param;
+ screen->pipe.get_paramf = nv50_screen_get_paramf;
+
+ screen->pipe.is_format_supported = nv50_screen_is_format_supported;
+
+ nv50_screen_init_miptree_functions(&screen->pipe);
+ nv50_surface_init_screen_functions(&screen->pipe);
+
+ return &screen->pipe;
+}
+
diff --git a/src/gallium/drivers/nv50/nv50_screen.h b/src/gallium/drivers/nv50/nv50_screen.h
new file mode 100644
index 00000000000..400ddcef06d
--- /dev/null
+++ b/src/gallium/drivers/nv50/nv50_screen.h
@@ -0,0 +1,33 @@
+#ifndef __NV50_SCREEN_H__
+#define __NV50_SCREEN_H__
+
+#include "pipe/p_screen.h"
+
+struct nv50_screen {
+ struct pipe_screen pipe;
+
+ struct nouveau_winsys *nvws;
+
+ unsigned cur_pctx;
+
+ struct nouveau_grobj *tesla;
+ struct nouveau_notifier *sync;
+
+ struct pipe_buffer *constbuf;
+ struct nouveau_resource *vp_data_heap;
+
+ struct pipe_buffer *tic;
+ struct pipe_buffer *tsc;
+
+ struct nouveau_stateobj *static_init;
+};
+
+static INLINE struct nv50_screen *
+nv50_screen(struct pipe_screen *screen)
+{
+ return (struct nv50_screen *)screen;
+}
+
+void nv50_surface_init_screen_functions(struct pipe_screen *);
+
+#endif
diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c
new file mode 100644
index 00000000000..95f9d408b5e
--- /dev/null
+++ b/src/gallium/drivers/nv50/nv50_state.c
@@ -0,0 +1,638 @@
+/*
+ * Copyright 2008 Ben Skeggs
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "pipe/p_state.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_inlines.h"
+
+#include "tgsi/tgsi_parse.h"
+
+#include "nv50_context.h"
+#include "nv50_texture.h"
+
+#include "nouveau/nouveau_stateobj.h"
+
+static void *
+nv50_blend_state_create(struct pipe_context *pipe,
+ const struct pipe_blend_state *cso)
+{
+ struct nouveau_stateobj *so = so_new(64, 0);
+ struct nouveau_grobj *tesla = nv50_context(pipe)->screen->tesla;
+ struct nv50_blend_stateobj *bso = CALLOC_STRUCT(nv50_blend_stateobj);
+ unsigned cmask = 0, i;
+
+ /*XXX ignored:
+ * - dither
+ */
+
+ if (cso->blend_enable == 0) {
+ so_method(so, tesla, NV50TCL_BLEND_ENABLE(0), 8);
+ for (i = 0; i < 8; i++)
+ so_data(so, 0);
+ } else {
+ so_method(so, tesla, NV50TCL_BLEND_ENABLE(0), 8);
+ for (i = 0; i < 8; i++)
+ so_data(so, 1);
+ so_method(so, tesla, NV50TCL_BLEND_EQUATION_RGB, 5);
+ so_data (so, nvgl_blend_eqn(cso->rgb_func));
+ so_data (so, 0x4000 | nvgl_blend_func(cso->rgb_src_factor));
+ so_data (so, 0x4000 | nvgl_blend_func(cso->rgb_dst_factor));
+ so_data (so, nvgl_blend_eqn(cso->alpha_func));
+ so_data (so, 0x4000 | nvgl_blend_func(cso->alpha_src_factor));
+ so_method(so, tesla, NV50TCL_BLEND_FUNC_DST_ALPHA, 1);
+ so_data (so, 0x4000 | nvgl_blend_func(cso->alpha_dst_factor));
+ }
+
+ if (cso->logicop_enable == 0 ) {
+ so_method(so, tesla, NV50TCL_LOGIC_OP_ENABLE, 1);
+ so_data (so, 0);
+ } else {
+ so_method(so, tesla, NV50TCL_LOGIC_OP_ENABLE, 2);
+ so_data (so, 1);
+ so_data (so, nvgl_logicop_func(cso->logicop_func));
+ }
+
+ if (cso->colormask & PIPE_MASK_R)
+ cmask |= (1 << 0);
+ if (cso->colormask & PIPE_MASK_G)
+ cmask |= (1 << 4);
+ if (cso->colormask & PIPE_MASK_B)
+ cmask |= (1 << 8);
+ if (cso->colormask & PIPE_MASK_A)
+ cmask |= (1 << 12);
+ so_method(so, tesla, NV50TCL_COLOR_MASK(0), 8);
+ for (i = 0; i < 8; i++)
+ so_data(so, cmask);
+
+ bso->pipe = *cso;
+ so_ref(so, &bso->so);
+ return (void *)bso;
+}
+
+static void
+nv50_blend_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ nv50->blend = hwcso;
+ nv50->dirty |= NV50_NEW_BLEND;
+}
+
+static void
+nv50_blend_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv50_blend_stateobj *bso = hwcso;
+
+ so_ref(NULL, &bso->so);
+ FREE(bso);
+}
+
+static INLINE unsigned
+wrap_mode(unsigned wrap)
+{
+ switch (wrap) {
+ case PIPE_TEX_WRAP_REPEAT:
+ return NV50TSC_1_0_WRAPS_REPEAT;
+ case PIPE_TEX_WRAP_MIRROR_REPEAT:
+ return NV50TSC_1_0_WRAPS_MIRROR_REPEAT;
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+ return NV50TSC_1_0_WRAPS_CLAMP_TO_EDGE;
+ case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+ return NV50TSC_1_0_WRAPS_CLAMP_TO_BORDER;
+ case PIPE_TEX_WRAP_CLAMP:
+ return NV50TSC_1_0_WRAPS_CLAMP;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+ return NV50TSC_1_0_WRAPS_MIRROR_CLAMP_TO_EDGE;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+ return NV50TSC_1_0_WRAPS_MIRROR_CLAMP_TO_BORDER;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP:
+ return NV50TSC_1_0_WRAPS_MIRROR_CLAMP;
+ default:
+ NOUVEAU_ERR("unknown wrap mode: %d\n", wrap);
+ return NV50TSC_1_0_WRAPS_REPEAT;
+ }
+}
+static void *
+nv50_sampler_state_create(struct pipe_context *pipe,
+ const struct pipe_sampler_state *cso)
+{
+ unsigned *tsc = CALLOC(8, sizeof(unsigned));
+
+ tsc[0] = (0x00024000 |
+ (wrap_mode(cso->wrap_s) << 0) |
+ (wrap_mode(cso->wrap_t) << 3) |
+ (wrap_mode(cso->wrap_r) << 6));
+
+ switch (cso->mag_img_filter) {
+ case PIPE_TEX_FILTER_LINEAR:
+ tsc[1] |= NV50TSC_1_1_MAGF_LINEAR;
+ break;
+ case PIPE_TEX_FILTER_NEAREST:
+ default:
+ tsc[1] |= NV50TSC_1_1_MAGF_NEAREST;
+ break;
+ }
+
+ switch (cso->min_img_filter) {
+ case PIPE_TEX_FILTER_LINEAR:
+ tsc[1] |= NV50TSC_1_1_MINF_LINEAR;
+ break;
+ case PIPE_TEX_FILTER_NEAREST:
+ default:
+ tsc[1] |= NV50TSC_1_1_MINF_NEAREST;
+ break;
+ }
+
+ switch (cso->min_mip_filter) {
+ case PIPE_TEX_MIPFILTER_LINEAR:
+ tsc[1] |= NV50TSC_1_1_MIPF_LINEAR;
+ break;
+ case PIPE_TEX_MIPFILTER_NEAREST:
+ tsc[1] |= NV50TSC_1_1_MIPF_NEAREST;
+ break;
+ case PIPE_TEX_MIPFILTER_NONE:
+ default:
+ tsc[1] |= NV50TSC_1_1_MIPF_NONE;
+ break;
+ }
+
+ return (void *)tsc;
+}
+
+static void
+nv50_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+ int i;
+
+ nv50->sampler_nr = nr;
+ for (i = 0; i < nv50->sampler_nr; i++)
+ nv50->sampler[i] = sampler[i];
+
+ nv50->dirty |= NV50_NEW_SAMPLER;
+}
+
+static void
+nv50_sampler_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ FREE(hwcso);
+}
+
+static void
+nv50_set_sampler_texture(struct pipe_context *pipe, unsigned nr,
+ struct pipe_texture **pt)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+ int i;
+
+ for (i = 0; i < nr; i++)
+ pipe_texture_reference((void *)&nv50->miptree[i], pt[i]);
+ for (i = nr; i < nv50->miptree_nr; i++)
+ pipe_texture_reference((void *)&nv50->miptree[i], NULL);
+
+ nv50->miptree_nr = nr;
+ nv50->dirty |= NV50_NEW_TEXTURE;
+}
+
+static void *
+nv50_rasterizer_state_create(struct pipe_context *pipe,
+ const struct pipe_rasterizer_state *cso)
+{
+ struct nouveau_stateobj *so = so_new(64, 0);
+ struct nouveau_grobj *tesla = nv50_context(pipe)->screen->tesla;
+ struct nv50_rasterizer_stateobj *rso =
+ CALLOC_STRUCT(nv50_rasterizer_stateobj);
+
+ /*XXX: ignored
+ * - light_twosize
+ * - point_smooth
+ * - multisample
+ * - point_sprite / sprite_coord_mode
+ */
+
+ so_method(so, tesla, NV50TCL_SHADE_MODEL, 1);
+ so_data (so, cso->flatshade ? NV50TCL_SHADE_MODEL_FLAT :
+ NV50TCL_SHADE_MODEL_SMOOTH);
+
+ so_method(so, tesla, NV50TCL_LINE_WIDTH, 1);
+ so_data (so, fui(cso->line_width));
+ so_method(so, tesla, NV50TCL_LINE_SMOOTH_ENABLE, 1);
+ so_data (so, cso->line_smooth ? 1 : 0);
+ if (cso->line_stipple_enable) {
+ so_method(so, tesla, NV50TCL_LINE_STIPPLE_ENABLE, 1);
+ so_data (so, 1);
+ so_method(so, tesla, NV50TCL_LINE_STIPPLE_PATTERN, 1);
+ so_data (so, (cso->line_stipple_pattern << 8) |
+ cso->line_stipple_factor);
+ } else {
+ so_method(so, tesla, NV50TCL_LINE_STIPPLE_ENABLE, 1);
+ so_data (so, 0);
+ }
+
+ so_method(so, tesla, NV50TCL_POINT_SIZE, 1);
+ so_data (so, fui(cso->point_size));
+
+ so_method(so, tesla, NV50TCL_POLYGON_MODE_FRONT, 3);
+ if (cso->front_winding == PIPE_WINDING_CCW) {
+ so_data(so, nvgl_polygon_mode(cso->fill_ccw));
+ so_data(so, nvgl_polygon_mode(cso->fill_cw));
+ } else {
+ so_data(so, nvgl_polygon_mode(cso->fill_cw));
+ so_data(so, nvgl_polygon_mode(cso->fill_ccw));
+ }
+ so_data(so, cso->poly_smooth ? 1 : 0);
+
+ so_method(so, tesla, NV50TCL_CULL_FACE_ENABLE, 3);
+ so_data (so, cso->cull_mode != PIPE_WINDING_NONE);
+ if (cso->front_winding == PIPE_WINDING_CCW) {
+ so_data(so, NV50TCL_FRONT_FACE_CCW);
+ switch (cso->cull_mode) {
+ case PIPE_WINDING_CCW:
+ so_data(so, NV50TCL_CULL_FACE_FRONT);
+ break;
+ case PIPE_WINDING_CW:
+ so_data(so, NV50TCL_CULL_FACE_BACK);
+ break;
+ case PIPE_WINDING_BOTH:
+ so_data(so, NV50TCL_CULL_FACE_FRONT_AND_BACK);
+ break;
+ default:
+ so_data(so, NV50TCL_CULL_FACE_BACK);
+ break;
+ }
+ } else {
+ so_data(so, NV50TCL_FRONT_FACE_CW);
+ switch (cso->cull_mode) {
+ case PIPE_WINDING_CCW:
+ so_data(so, NV50TCL_CULL_FACE_BACK);
+ break;
+ case PIPE_WINDING_CW:
+ so_data(so, NV50TCL_CULL_FACE_FRONT);
+ break;
+ case PIPE_WINDING_BOTH:
+ so_data(so, NV50TCL_CULL_FACE_FRONT_AND_BACK);
+ break;
+ default:
+ so_data(so, NV50TCL_CULL_FACE_BACK);
+ break;
+ }
+ }
+
+ so_method(so, tesla, NV50TCL_POLYGON_STIPPLE_ENABLE, 1);
+ so_data (so, cso->poly_stipple_enable ? 1 : 0);
+
+ so_method(so, tesla, NV50TCL_POLYGON_OFFSET_POINT_ENABLE, 3);
+ if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_POINT) ||
+ (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_POINT))
+ so_data(so, 1);
+ else
+ so_data(so, 0);
+ if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_LINE) ||
+ (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_LINE))
+ so_data(so, 1);
+ else
+ so_data(so, 0);
+ if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_FILL) ||
+ (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_FILL))
+ so_data(so, 1);
+ else
+ so_data(so, 0);
+
+ if (cso->offset_cw || cso->offset_ccw) {
+ so_method(so, tesla, NV50TCL_POLYGON_OFFSET_FACTOR, 1);
+ so_data (so, fui(cso->offset_scale));
+ so_method(so, tesla, NV50TCL_POLYGON_OFFSET_UNITS, 1);
+ so_data (so, fui(cso->offset_units));
+ }
+
+ rso->pipe = *cso;
+ so_ref(so, &rso->so);
+ return (void *)rso;
+}
+
+static void
+nv50_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ nv50->rasterizer = hwcso;
+ nv50->dirty |= NV50_NEW_RASTERIZER;
+}
+
+static void
+nv50_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv50_rasterizer_stateobj *rso = hwcso;
+
+ so_ref(NULL, &rso->so);
+ FREE(rso);
+}
+
+static void *
+nv50_depth_stencil_alpha_state_create(struct pipe_context *pipe,
+ const struct pipe_depth_stencil_alpha_state *cso)
+{
+ struct nouveau_grobj *tesla = nv50_context(pipe)->screen->tesla;
+ struct nv50_zsa_stateobj *zsa = CALLOC_STRUCT(nv50_zsa_stateobj);
+ struct nouveau_stateobj *so = so_new(64, 0);
+
+ so_method(so, tesla, NV50TCL_DEPTH_WRITE_ENABLE, 1);
+ so_data (so, cso->depth.writemask ? 1 : 0);
+ if (cso->depth.enabled) {
+ so_method(so, tesla, NV50TCL_DEPTH_TEST_ENABLE, 1);
+ so_data (so, 1);
+ so_method(so, tesla, NV50TCL_DEPTH_TEST_FUNC, 1);
+ so_data (so, nvgl_comparison_op(cso->depth.func));
+ } else {
+ so_method(so, tesla, NV50TCL_DEPTH_TEST_ENABLE, 1);
+ so_data (so, 0);
+ }
+
+ /*XXX: yes, I know they're backwards.. header needs fixing */
+ if (cso->stencil[0].enabled) {
+ so_method(so, tesla, NV50TCL_STENCIL_BACK_ENABLE, 5);
+ so_data (so, 1);
+ so_data (so, nvgl_stencil_op(cso->stencil[0].fail_op));
+ so_data (so, nvgl_stencil_op(cso->stencil[0].zfail_op));
+ so_data (so, nvgl_stencil_op(cso->stencil[0].zpass_op));
+ so_data (so, nvgl_comparison_op(cso->stencil[0].func));
+ so_method(so, tesla, NV50TCL_STENCIL_BACK_FUNC_REF, 3);
+ so_data (so, cso->stencil[0].ref_value);
+ so_data (so, cso->stencil[0].write_mask);
+ so_data (so, cso->stencil[0].value_mask);
+ } else {
+ so_method(so, tesla, NV50TCL_STENCIL_BACK_ENABLE, 1);
+ so_data (so, 0);
+ }
+
+ if (cso->stencil[1].enabled) {
+ so_method(so, tesla, NV50TCL_STENCIL_FRONT_ENABLE, 8);
+ so_data (so, 1);
+ so_data (so, nvgl_stencil_op(cso->stencil[1].fail_op));
+ so_data (so, nvgl_stencil_op(cso->stencil[1].zfail_op));
+ so_data (so, nvgl_stencil_op(cso->stencil[1].zpass_op));
+ so_data (so, nvgl_comparison_op(cso->stencil[1].func));
+ so_data (so, cso->stencil[1].ref_value);
+ so_data (so, cso->stencil[1].write_mask);
+ so_data (so, cso->stencil[1].value_mask);
+ } else {
+ so_method(so, tesla, NV50TCL_STENCIL_FRONT_ENABLE, 1);
+ so_data (so, 0);
+ }
+
+ if (cso->alpha.enabled) {
+ so_method(so, tesla, NV50TCL_ALPHA_TEST_ENABLE, 1);
+ so_data (so, 1);
+ so_method(so, tesla, NV50TCL_ALPHA_TEST_REF, 2);
+ so_data (so, fui(cso->alpha.ref));
+ so_data (so, nvgl_comparison_op(cso->alpha.func));
+ } else {
+ so_method(so, tesla, NV50TCL_ALPHA_TEST_ENABLE, 1);
+ so_data (so, 0);
+ }
+
+ zsa->pipe = *cso;
+ so_ref(so, &zsa->so);
+ return (void *)zsa;
+}
+
+static void
+nv50_depth_stencil_alpha_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ nv50->zsa = hwcso;
+ nv50->dirty |= NV50_NEW_ZSA;
+}
+
+static void
+nv50_depth_stencil_alpha_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv50_zsa_stateobj *zsa = hwcso;
+
+ so_ref(NULL, &zsa->so);
+ FREE(zsa);
+}
+
+static void *
+nv50_vp_state_create(struct pipe_context *pipe,
+ const struct pipe_shader_state *cso)
+{
+ struct nv50_program *p = CALLOC_STRUCT(nv50_program);
+
+ p->pipe.tokens = tgsi_dup_tokens(cso->tokens);
+ p->type = PIPE_SHADER_VERTEX;
+ tgsi_scan_shader(p->pipe.tokens, &p->info);
+ return (void *)p;
+}
+
+static void
+nv50_vp_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ nv50->vertprog = hwcso;
+ nv50->dirty |= NV50_NEW_VERTPROG;
+}
+
+static void
+nv50_vp_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+ struct nv50_program *p = hwcso;
+
+ nv50_program_destroy(nv50, p);
+ FREE((void*)p->pipe.tokens);
+ FREE(p);
+}
+
+static void *
+nv50_fp_state_create(struct pipe_context *pipe,
+ const struct pipe_shader_state *cso)
+{
+ struct nv50_program *p = CALLOC_STRUCT(nv50_program);
+
+ p->pipe.tokens = tgsi_dup_tokens(cso->tokens);
+ p->type = PIPE_SHADER_FRAGMENT;
+ tgsi_scan_shader(p->pipe.tokens, &p->info);
+ return (void *)p;
+}
+
+static void
+nv50_fp_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ nv50->fragprog = hwcso;
+ nv50->dirty |= NV50_NEW_FRAGPROG;
+}
+
+static void
+nv50_fp_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+ struct nv50_program *p = hwcso;
+
+ nv50_program_destroy(nv50, p);
+ FREE((void*)p->pipe.tokens);
+ FREE(p);
+}
+
+static void
+nv50_set_blend_color(struct pipe_context *pipe,
+ const struct pipe_blend_color *bcol)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ nv50->blend_colour = *bcol;
+ nv50->dirty |= NV50_NEW_BLEND_COLOUR;
+}
+
+static void
+nv50_set_clip_state(struct pipe_context *pipe,
+ const struct pipe_clip_state *clip)
+{
+}
+
+static void
+nv50_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
+ const struct pipe_constant_buffer *buf )
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ if (shader == PIPE_SHADER_VERTEX) {
+ nv50->constbuf[PIPE_SHADER_VERTEX] = buf->buffer;
+ nv50->dirty |= NV50_NEW_VERTPROG_CB;
+ } else
+ if (shader == PIPE_SHADER_FRAGMENT) {
+ nv50->constbuf[PIPE_SHADER_FRAGMENT] = buf->buffer;
+ nv50->dirty |= NV50_NEW_FRAGPROG_CB;
+ }
+}
+
+static void
+nv50_set_framebuffer_state(struct pipe_context *pipe,
+ const struct pipe_framebuffer_state *fb)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ nv50->framebuffer = *fb;
+ nv50->dirty |= NV50_NEW_FRAMEBUFFER;
+}
+
+static void
+nv50_set_polygon_stipple(struct pipe_context *pipe,
+ const struct pipe_poly_stipple *stipple)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ nv50->stipple = *stipple;
+ nv50->dirty |= NV50_NEW_STIPPLE;
+}
+
+static void
+nv50_set_scissor_state(struct pipe_context *pipe,
+ const struct pipe_scissor_state *s)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ nv50->scissor = *s;
+ nv50->dirty |= NV50_NEW_SCISSOR;
+}
+
+static void
+nv50_set_viewport_state(struct pipe_context *pipe,
+ const struct pipe_viewport_state *vpt)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ nv50->viewport = *vpt;
+ nv50->dirty |= NV50_NEW_VIEWPORT;
+}
+
+static void
+nv50_set_vertex_buffers(struct pipe_context *pipe, unsigned count,
+ const struct pipe_vertex_buffer *vb)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ memcpy(nv50->vtxbuf, vb, sizeof(*vb) * count);
+ nv50->vtxbuf_nr = count;
+
+ nv50->dirty |= NV50_NEW_ARRAYS;
+}
+
+static void
+nv50_set_vertex_elements(struct pipe_context *pipe, unsigned count,
+ const struct pipe_vertex_element *ve)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ memcpy(nv50->vtxelt, ve, sizeof(*ve) * count);
+ nv50->vtxelt_nr = count;
+
+ nv50->dirty |= NV50_NEW_ARRAYS;
+}
+
+void
+nv50_init_state_functions(struct nv50_context *nv50)
+{
+ nv50->pipe.create_blend_state = nv50_blend_state_create;
+ nv50->pipe.bind_blend_state = nv50_blend_state_bind;
+ nv50->pipe.delete_blend_state = nv50_blend_state_delete;
+
+ nv50->pipe.create_sampler_state = nv50_sampler_state_create;
+ nv50->pipe.bind_sampler_states = nv50_sampler_state_bind;
+ nv50->pipe.delete_sampler_state = nv50_sampler_state_delete;
+ nv50->pipe.set_sampler_textures = nv50_set_sampler_texture;
+
+ nv50->pipe.create_rasterizer_state = nv50_rasterizer_state_create;
+ nv50->pipe.bind_rasterizer_state = nv50_rasterizer_state_bind;
+ nv50->pipe.delete_rasterizer_state = nv50_rasterizer_state_delete;
+
+ nv50->pipe.create_depth_stencil_alpha_state =
+ nv50_depth_stencil_alpha_state_create;
+ nv50->pipe.bind_depth_stencil_alpha_state =
+ nv50_depth_stencil_alpha_state_bind;
+ nv50->pipe.delete_depth_stencil_alpha_state =
+ nv50_depth_stencil_alpha_state_delete;
+
+ nv50->pipe.create_vs_state = nv50_vp_state_create;
+ nv50->pipe.bind_vs_state = nv50_vp_state_bind;
+ nv50->pipe.delete_vs_state = nv50_vp_state_delete;
+
+ nv50->pipe.create_fs_state = nv50_fp_state_create;
+ nv50->pipe.bind_fs_state = nv50_fp_state_bind;
+ nv50->pipe.delete_fs_state = nv50_fp_state_delete;
+
+ nv50->pipe.set_blend_color = nv50_set_blend_color;
+ nv50->pipe.set_clip_state = nv50_set_clip_state;
+ nv50->pipe.set_constant_buffer = nv50_set_constant_buffer;
+ nv50->pipe.set_framebuffer_state = nv50_set_framebuffer_state;
+ nv50->pipe.set_polygon_stipple = nv50_set_polygon_stipple;
+ nv50->pipe.set_scissor_state = nv50_set_scissor_state;
+ nv50->pipe.set_viewport_state = nv50_set_viewport_state;
+
+ nv50->pipe.set_vertex_buffers = nv50_set_vertex_buffers;
+ nv50->pipe.set_vertex_elements = nv50_set_vertex_elements;
+}
+
diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c
new file mode 100644
index 00000000000..198e25f4482
--- /dev/null
+++ b/src/gallium/drivers/nv50/nv50_state_validate.c
@@ -0,0 +1,309 @@
+/*
+ * Copyright 2008 Ben Skeggs
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "nv50_context.h"
+#include "nouveau/nouveau_stateobj.h"
+
+static void
+nv50_state_validate_fb(struct nv50_context *nv50)
+{
+ struct nouveau_grobj *tesla = nv50->screen->tesla;
+ struct nouveau_stateobj *so = so_new(128, 18);
+ struct pipe_framebuffer_state *fb = &nv50->framebuffer;
+ unsigned i, w, h, gw = 0;
+
+ for (i = 0; i < fb->num_cbufs; i++) {
+ if (!gw) {
+ w = fb->cbufs[i]->width;
+ h = fb->cbufs[i]->height;
+ gw = 1;
+ } else {
+ assert(w == fb->cbufs[i]->width);
+ assert(h == fb->cbufs[i]->height);
+ }
+
+ so_method(so, tesla, NV50TCL_RT_HORIZ(i), 2);
+ so_data (so, fb->cbufs[i]->width);
+ so_data (so, fb->cbufs[i]->height);
+
+ so_method(so, tesla, NV50TCL_RT_ADDRESS_HIGH(i), 5);
+ so_reloc (so, fb->cbufs[i]->buffer, fb->cbufs[i]->offset,
+ NOUVEAU_BO_VRAM | NOUVEAU_BO_HIGH, 0, 0);
+ so_reloc (so, fb->cbufs[i]->buffer, fb->cbufs[i]->offset,
+ NOUVEAU_BO_VRAM | NOUVEAU_BO_LOW, 0, 0);
+ switch (fb->cbufs[i]->format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ so_data(so, 0xcf);
+ break;
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ so_data(so, 0xe8);
+ break;
+ default:
+ NOUVEAU_ERR("AIIII unknown format %s\n",
+ pf_name(fb->cbufs[i]->format));
+ so_data(so, 0xe6);
+ break;
+ }
+ so_data(so, 0x00000000);
+ so_data(so, 0x00000000);
+
+ so_method(so, tesla, 0x1224, 1);
+ so_data (so, 1);
+ }
+
+ if (fb->zsbuf) {
+ if (!gw) {
+ w = fb->zsbuf->width;
+ h = fb->zsbuf->height;
+ gw = 1;
+ } else {
+ assert(w == fb->zsbuf->width);
+ assert(h == fb->zsbuf->height);
+ }
+
+ so_method(so, tesla, NV50TCL_ZETA_ADDRESS_HIGH, 5);
+ so_reloc (so, fb->zsbuf->buffer, fb->zsbuf->offset,
+ NOUVEAU_BO_VRAM | NOUVEAU_BO_HIGH, 0, 0);
+ so_reloc (so, fb->zsbuf->buffer, fb->zsbuf->offset,
+ NOUVEAU_BO_VRAM | NOUVEAU_BO_LOW, 0, 0);
+ switch (fb->zsbuf->format) {
+ case PIPE_FORMAT_Z24S8_UNORM:
+ so_data(so, 0x16);
+ break;
+ case PIPE_FORMAT_Z16_UNORM:
+ so_data(so, 0x15);
+ break;
+ default:
+ NOUVEAU_ERR("AIIII unknown format %s\n",
+ pf_name(fb->zsbuf->format));
+ so_data(so, 0x16);
+ break;
+ }
+ so_data(so, 0x00000000);
+ so_data(so, 0x00000000);
+
+ so_method(so, tesla, 0x1538, 1);
+ so_data (so, 1);
+ so_method(so, tesla, 0x1228, 3);
+ so_data (so, fb->zsbuf->width);
+ so_data (so, fb->zsbuf->height);
+ so_data (so, 0x00010001);
+ }
+
+ so_method(so, tesla, NV50TCL_VIEWPORT_HORIZ, 2);
+ so_data (so, w << 16);
+ so_data (so, h << 16);
+ so_method(so, tesla, 0x0e04, 2);
+ so_data (so, w << 16);
+ so_data (so, h << 16);
+ so_method(so, tesla, 0xdf8, 2);
+ so_data (so, 0);
+ so_data (so, h);
+
+ so_ref(so, &nv50->state.fb);
+}
+
+static void
+nv50_state_emit(struct nv50_context *nv50)
+{
+ struct nv50_screen *screen = nv50->screen;
+ struct nouveau_winsys *nvws = screen->nvws;
+
+ if (nv50->pctx_id != screen->cur_pctx) {
+ nv50->state.dirty |= 0xffffffff;
+ screen->cur_pctx = nv50->pctx_id;
+ }
+
+ if (nv50->state.dirty & NV50_NEW_FRAMEBUFFER)
+ so_emit(nvws, nv50->state.fb);
+ if (nv50->state.dirty & NV50_NEW_BLEND)
+ so_emit(nvws, nv50->state.blend);
+ if (nv50->state.dirty & NV50_NEW_ZSA)
+ so_emit(nvws, nv50->state.zsa);
+ if (nv50->state.dirty & NV50_NEW_VERTPROG)
+ so_emit(nvws, nv50->state.vertprog);
+ if (nv50->state.dirty & NV50_NEW_FRAGPROG)
+ so_emit(nvws, nv50->state.fragprog);
+ if (nv50->state.dirty & NV50_NEW_RASTERIZER)
+ so_emit(nvws, nv50->state.rast);
+ if (nv50->state.dirty & NV50_NEW_BLEND_COLOUR)
+ so_emit(nvws, nv50->state.blend_colour);
+ if (nv50->state.dirty & NV50_NEW_STIPPLE)
+ so_emit(nvws, nv50->state.stipple);
+ if (nv50->state.dirty & NV50_NEW_SCISSOR)
+ so_emit(nvws, nv50->state.scissor);
+ if (nv50->state.dirty & NV50_NEW_VIEWPORT)
+ so_emit(nvws, nv50->state.viewport);
+ if (nv50->state.dirty & NV50_NEW_SAMPLER)
+ so_emit(nvws, nv50->state.tsc_upload);
+ if (nv50->state.dirty & NV50_NEW_TEXTURE)
+ so_emit(nvws, nv50->state.tic_upload);
+ if (nv50->state.dirty & NV50_NEW_ARRAYS) {
+ so_emit(nvws, nv50->state.vtxfmt);
+ so_emit(nvws, nv50->state.vtxbuf);
+ }
+ nv50->state.dirty = 0;
+
+ so_emit_reloc_markers(nvws, nv50->state.fb);
+ so_emit_reloc_markers(nvws, nv50->state.vertprog);
+ so_emit_reloc_markers(nvws, nv50->state.fragprog);
+ so_emit_reloc_markers(nvws, nv50->state.vtxbuf);
+ so_emit_reloc_markers(nvws, nv50->screen->static_init);
+}
+
+boolean
+nv50_state_validate(struct nv50_context *nv50)
+{
+ const struct pipe_framebuffer_state *fb = &nv50->framebuffer;
+ struct nouveau_grobj *tesla = nv50->screen->tesla;
+ struct nouveau_stateobj *so;
+ unsigned i;
+
+ for (i = 0; i < fb->num_cbufs; i++)
+ fb->cbufs[i]->status = PIPE_SURFACE_STATUS_DEFINED;
+
+ if (fb->zsbuf)
+ fb->zsbuf->status = PIPE_SURFACE_STATUS_DEFINED;
+
+ if (nv50->dirty & NV50_NEW_FRAMEBUFFER)
+ nv50_state_validate_fb(nv50);
+
+ if (nv50->dirty & NV50_NEW_BLEND)
+ so_ref(nv50->blend->so, &nv50->state.blend);
+
+ if (nv50->dirty & NV50_NEW_ZSA)
+ so_ref(nv50->zsa->so, &nv50->state.zsa);
+
+ if (nv50->dirty & (NV50_NEW_VERTPROG | NV50_NEW_VERTPROG_CB))
+ nv50_vertprog_validate(nv50);
+
+ if (nv50->dirty & (NV50_NEW_FRAGPROG | NV50_NEW_FRAGPROG_CB))
+ nv50_fragprog_validate(nv50);
+
+ if (nv50->dirty & NV50_NEW_RASTERIZER)
+ so_ref(nv50->rasterizer->so, &nv50->state.rast);
+
+ if (nv50->dirty & NV50_NEW_BLEND_COLOUR) {
+ so = so_new(5, 0);
+ so_method(so, tesla, NV50TCL_BLEND_COLOR(0), 4);
+ so_data (so, fui(nv50->blend_colour.color[0]));
+ so_data (so, fui(nv50->blend_colour.color[1]));
+ so_data (so, fui(nv50->blend_colour.color[2]));
+ so_data (so, fui(nv50->blend_colour.color[3]));
+ so_ref(so, &nv50->state.blend_colour);
+ }
+
+ if (nv50->dirty & NV50_NEW_STIPPLE) {
+ so = so_new(33, 0);
+ so_method(so, tesla, NV50TCL_POLYGON_STIPPLE_PATTERN(0), 32);
+ for (i = 0; i < 32; i++)
+ so_data(so, nv50->stipple.stipple[i]);
+ so_ref(so, &nv50->state.stipple);
+ }
+
+ if (nv50->dirty & (NV50_NEW_SCISSOR | NV50_NEW_RASTERIZER)) {
+ struct pipe_rasterizer_state *rast = &nv50->rasterizer->pipe;
+ struct pipe_scissor_state *s = &nv50->scissor;
+
+ if (nv50->state.scissor &&
+ (rast->scissor == 0 && nv50->state.scissor_enabled == 0))
+ goto scissor_uptodate;
+ nv50->state.scissor_enabled = rast->scissor;
+
+ so = so_new(3, 0);
+ so_method(so, tesla, 0x0ff4, 2);
+ if (nv50->state.scissor_enabled) {
+ so_data(so, ((s->maxx - s->minx) << 16) | s->minx);
+ so_data(so, ((s->maxy - s->miny) << 16) | s->miny);
+ } else {
+ so_data(so, (8192 << 16));
+ so_data(so, (8192 << 16));
+ }
+ so_ref(so, &nv50->state.scissor);
+ nv50->state.dirty |= NV50_NEW_SCISSOR;
+ }
+scissor_uptodate:
+
+ if (nv50->dirty & NV50_NEW_VIEWPORT) {
+ unsigned bypass;
+
+ if (!nv50->rasterizer->pipe.bypass_clipping)
+ bypass = 0;
+ else
+ bypass = 1;
+
+ if (nv50->state.viewport &&
+ (bypass || !(nv50->dirty & NV50_NEW_VIEWPORT)) &&
+ nv50->state.viewport_bypass == bypass)
+ goto viewport_uptodate;
+ nv50->state.viewport_bypass = bypass;
+
+ so = so_new(12, 0);
+ if (!bypass) {
+ so_method(so, tesla, NV50TCL_VIEWPORT_UNK1(0), 3);
+ so_data (so, fui(nv50->viewport.translate[0]));
+ so_data (so, fui(nv50->viewport.translate[1]));
+ so_data (so, fui(nv50->viewport.translate[2]));
+ so_method(so, tesla, NV50TCL_VIEWPORT_UNK0(0), 3);
+ so_data (so, fui(nv50->viewport.scale[0]));
+ so_data (so, fui(-nv50->viewport.scale[1]));
+ so_data (so, fui(nv50->viewport.scale[2]));
+ so_method(so, tesla, 0x192c, 1);
+ so_data (so, 1);
+ so_method(so, tesla, 0x0f90, 1);
+ so_data (so, 0);
+ } else {
+ so_method(so, tesla, 0x192c, 1);
+ so_data (so, 0);
+ so_method(so, tesla, 0x0f90, 1);
+ so_data (so, 1);
+ }
+
+ so_ref(so, &nv50->state.viewport);
+ }
+viewport_uptodate:
+
+ if (nv50->dirty & NV50_NEW_SAMPLER) {
+ int i;
+
+ so = so_new(nv50->sampler_nr * 8 + 3, 0);
+ so_method(so, tesla, 0x0f00, 1);
+ so_data (so, NV50_CB_TSC);
+ so_method(so, tesla, 0x40000f04, nv50->sampler_nr * 8);
+ for (i = 0; i < nv50->sampler_nr; i++)
+ so_datap (so, nv50->sampler[i], 8);
+ so_ref(so, &nv50->state.tsc_upload);
+ }
+
+ if (nv50->dirty & NV50_NEW_TEXTURE)
+ nv50_tex_validate(nv50);
+
+ if (nv50->dirty & NV50_NEW_ARRAYS)
+ nv50_vbo_validate(nv50);
+
+ nv50->state.dirty |= nv50->dirty;
+ nv50->dirty = 0;
+ nv50_state_emit(nv50);
+
+ return TRUE;
+}
+
diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c
new file mode 100644
index 00000000000..5bf97d3a6bf
--- /dev/null
+++ b/src/gallium/drivers/nv50/nv50_surface.c
@@ -0,0 +1,123 @@
+/*
+ * Copyright 2008 Ben Skeggs
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "nv50_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_winsys.h"
+#include "pipe/p_inlines.h"
+
+#include "util/u_tile.h"
+
+static void
+nv50_surface_copy(struct pipe_context *pipe, boolean flip,
+ struct pipe_surface *dest, unsigned destx, unsigned desty,
+ struct pipe_surface *src, unsigned srcx, unsigned srcy,
+ unsigned width, unsigned height)
+{
+ struct nv50_context *nv50 = (struct nv50_context *)pipe;
+ struct nouveau_winsys *nvws = nv50->screen->nvws;
+
+ if (flip) {
+ desty += height;
+ while (height--) {
+ nvws->surface_copy(nvws, dest, destx, desty--, src,
+ srcx, srcy++, width, 1);
+ }
+ } else {
+ nvws->surface_copy(nvws, dest, destx, desty, src, srcx, srcy,
+ width, height);
+ }
+}
+
+static void
+nv50_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest,
+ unsigned destx, unsigned desty, unsigned width,
+ unsigned height, unsigned value)
+{
+ struct nv50_context *nv50 = (struct nv50_context *)pipe;
+ struct nouveau_winsys *nvws = nv50->screen->nvws;
+
+ nvws->surface_fill(nvws, dest, destx, desty, width, height, value);
+}
+
+static void *
+nv50_surface_map(struct pipe_screen *screen, struct pipe_surface *ps,
+ unsigned flags )
+{
+ struct nouveau_winsys *nvws = nv50_screen(screen)->nvws;
+ struct pipe_winsys *ws = screen->winsys;
+ struct nv50_surface *s = nv50_surface(ps);
+ struct nv50_surface m = *s;
+ void *map;
+
+ if (!s->untiled) {
+ s->untiled = ws->buffer_create(ws, 0, 0, ps->buffer->size);
+
+ m.base.buffer = s->untiled;
+ nvws->surface_copy(nvws, &m.base, 0, 0, &s->base, 0, 0,
+ ps->width, ps->height);
+ }
+
+ /* Map original tiled surface to disallow it being validated while
+ * untiled mirror is mapped.
+ */
+ ws->buffer_map(ws, ps->buffer, flags);
+
+ map = ws->buffer_map(ws, s->untiled, flags);
+ if (!map)
+ return NULL;
+
+ return map;
+}
+
+static void
+nv50_surface_unmap(struct pipe_screen *pscreen, struct pipe_surface *ps)
+{
+ struct nouveau_winsys *nvws = nv50_screen(pscreen)->nvws;
+ struct pipe_winsys *ws = pscreen->winsys;
+ struct nv50_surface *s = nv50_surface(ps);
+ struct nv50_surface m = *s;
+
+ ws->buffer_unmap(ws, s->untiled);
+ ws->buffer_unmap(ws, ps->buffer);
+
+ m.base.buffer = s->untiled;
+ nvws->surface_copy(nvws, &s->base, 0, 0, &m.base, 0, 0,
+ ps->width, ps->height);
+
+ pipe_buffer_reference(pscreen, &s->untiled, NULL);
+}
+
+void
+nv50_init_surface_functions(struct nv50_context *nv50)
+{
+ nv50->pipe.surface_copy = nv50_surface_copy;
+ nv50->pipe.surface_fill = nv50_surface_fill;
+}
+
+void
+nv50_surface_init_screen_functions(struct pipe_screen *pscreen)
+{
+ pscreen->surface_map = nv50_surface_map;
+ pscreen->surface_unmap = nv50_surface_unmap;
+}
+
diff --git a/src/gallium/drivers/nv50/nv50_tex.c b/src/gallium/drivers/nv50/nv50_tex.c
new file mode 100644
index 00000000000..fde3c97c059
--- /dev/null
+++ b/src/gallium/drivers/nv50/nv50_tex.c
@@ -0,0 +1,124 @@
+/*
+ * Copyright 2008 Ben Skeggs
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "nv50_context.h"
+#include "nv50_texture.h"
+
+#include "nouveau/nouveau_stateobj.h"
+
+static int
+nv50_tex_construct(struct nouveau_stateobj *so, struct nv50_miptree *mt)
+{
+ switch (mt->base.format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ so_data(so, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM |
+ NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM |
+ NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM |
+ NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM |
+ NV50TIC_0_0_FMT_8_8_8_8);
+ break;
+ case PIPE_FORMAT_A1R5G5B5_UNORM:
+ so_data(so, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM |
+ NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM |
+ NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM |
+ NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM |
+ NV50TIC_0_0_FMT_1_5_5_5);
+ break;
+ case PIPE_FORMAT_A4R4G4B4_UNORM:
+ so_data(so, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM |
+ NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM |
+ NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM |
+ NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM |
+ NV50TIC_0_0_FMT_4_4_4_4);
+ break;
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ so_data(so, NV50TIC_0_0_MAPA_ONE | NV50TIC_0_0_TYPEA_UNORM |
+ NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM |
+ NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM |
+ NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM |
+ NV50TIC_0_0_FMT_5_6_5);
+ break;
+ case PIPE_FORMAT_L8_UNORM:
+ so_data(so, NV50TIC_0_0_MAPA_ONE | NV50TIC_0_0_TYPEA_UNORM |
+ NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM |
+ NV50TIC_0_0_MAPG_C0 | NV50TIC_0_0_TYPEG_UNORM |
+ NV50TIC_0_0_MAPB_C0 | NV50TIC_0_0_TYPEB_UNORM |
+ NV50TIC_0_0_FMT_8);
+ break;
+ case PIPE_FORMAT_A8_UNORM:
+ so_data(so, NV50TIC_0_0_MAPA_C0 | NV50TIC_0_0_TYPEA_UNORM |
+ NV50TIC_0_0_MAPR_ZERO | NV50TIC_0_0_TYPER_UNORM |
+ NV50TIC_0_0_MAPG_ZERO | NV50TIC_0_0_TYPEG_UNORM |
+ NV50TIC_0_0_MAPB_ZERO | NV50TIC_0_0_TYPEB_UNORM |
+ NV50TIC_0_0_FMT_8);
+ break;
+ case PIPE_FORMAT_I8_UNORM:
+ so_data(so, NV50TIC_0_0_MAPA_C0 | NV50TIC_0_0_TYPEA_UNORM |
+ NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM |
+ NV50TIC_0_0_MAPG_C0 | NV50TIC_0_0_TYPEG_UNORM |
+ NV50TIC_0_0_MAPB_C0 | NV50TIC_0_0_TYPEB_UNORM |
+ NV50TIC_0_0_FMT_8);
+ break;
+ case PIPE_FORMAT_A8L8_UNORM:
+ so_data(so, NV50TIC_0_0_MAPA_C1 | NV50TIC_0_0_TYPEA_UNORM |
+ NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM |
+ NV50TIC_0_0_MAPG_C0 | NV50TIC_0_0_TYPEG_UNORM |
+ NV50TIC_0_0_MAPB_C0 | NV50TIC_0_0_TYPEB_UNORM |
+ NV50TIC_0_0_FMT_8_8);
+ break;
+ default:
+ return 1;
+ }
+
+ so_reloc(so, mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_LOW, 0, 0);
+ so_data (so, 0xd0005000);
+ so_data (so, 0x00300000);
+ so_data (so, mt->base.width[0]);
+ so_data (so, (mt->base.depth[0] << 16) | mt->base.height[0]);
+ so_data (so, 0x03000000);
+ so_reloc(so, mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_HIGH, 0, 0);
+
+ return 0;
+}
+
+void
+nv50_tex_validate(struct nv50_context *nv50)
+{
+ struct nouveau_grobj *tesla = nv50->screen->tesla;
+ struct nouveau_stateobj *so;
+ int i;
+
+ so = so_new(nv50->miptree_nr * 8 + 3, nv50->miptree_nr * 2);
+ so_method(so, tesla, 0x0f00, 1);
+ so_data (so, NV50_CB_TIC);
+ so_method(so, tesla, 0x40000f04, nv50->miptree_nr * 8);
+ for (i = 0; i < nv50->miptree_nr; i++) {
+ if (nv50_tex_construct(so, nv50->miptree[i])) {
+ NOUVEAU_ERR("failed tex validate\n");
+ so_ref(NULL, &so);
+ return;
+ }
+ }
+
+ so_ref(so, &nv50->state.tic_upload);
+}
+
diff --git a/src/gallium/drivers/nv50/nv50_texture.h b/src/gallium/drivers/nv50/nv50_texture.h
new file mode 100644
index 00000000000..6861d67b4d5
--- /dev/null
+++ b/src/gallium/drivers/nv50/nv50_texture.h
@@ -0,0 +1,126 @@
+#ifndef __NV50_TEXTURE_H__
+#define __NV50_TEXTURE_H__
+
+/* It'd be really nice to have these in nouveau_class.h generated by
+ * renouveau like the rest of the object header - but not sure it can
+ * handle non-object stuff nicely - need to look into it.
+ */
+
+/* Texture image control block */
+#define NV50TIC_0_0_MAPA_MASK 0x38000000
+#define NV50TIC_0_0_MAPA_ZERO 0x00000000
+#define NV50TIC_0_0_MAPA_C0 0x10000000
+#define NV50TIC_0_0_MAPA_C1 0x18000000
+#define NV50TIC_0_0_MAPA_C2 0x20000000
+#define NV50TIC_0_0_MAPA_C3 0x28000000
+#define NV50TIC_0_0_MAPA_ONE 0x38000000
+#define NV50TIC_0_0_MAPR_MASK 0x07000000
+#define NV50TIC_0_0_MAPR_ZERO 0x00000000
+#define NV50TIC_0_0_MAPR_C0 0x02000000
+#define NV50TIC_0_0_MAPR_C1 0x03000000
+#define NV50TIC_0_0_MAPR_C2 0x04000000
+#define NV50TIC_0_0_MAPR_C3 0x05000000
+#define NV50TIC_0_0_MAPR_ONE 0x07000000
+#define NV50TIC_0_0_MAPG_MASK 0x00e00000
+#define NV50TIC_0_0_MAPG_ZERO 0x00000000
+#define NV50TIC_0_0_MAPG_C0 0x00400000
+#define NV50TIC_0_0_MAPG_C1 0x00600000
+#define NV50TIC_0_0_MAPG_C2 0x00800000
+#define NV50TIC_0_0_MAPG_C3 0x00a00000
+#define NV50TIC_0_0_MAPG_ONE 0x00e00000
+#define NV50TIC_0_0_MAPB_MASK 0x001c0000
+#define NV50TIC_0_0_MAPB_ZERO 0x00000000
+#define NV50TIC_0_0_MAPB_C0 0x00080000
+#define NV50TIC_0_0_MAPB_C1 0x000c0000
+#define NV50TIC_0_0_MAPB_C2 0x00100000
+#define NV50TIC_0_0_MAPB_C3 0x00140000
+#define NV50TIC_0_0_MAPB_ONE 0x001c0000
+#define NV50TIC_0_0_TYPEA_MASK 0x00038000
+#define NV50TIC_0_0_TYPEA_UNORM 0x00010000
+#define NV50TIC_0_0_TYPER_MASK 0x00007000
+#define NV50TIC_0_0_TYPER_UNORM 0x00002000
+#define NV50TIC_0_0_TYPEG_MASK 0x00000e00
+#define NV50TIC_0_0_TYPEG_UNORM 0x00000400
+#define NV50TIC_0_0_TYPEB_MASK 0x000001c0
+#define NV50TIC_0_0_TYPEB_UNORM 0x00000080
+#define NV50TIC_0_0_FMT_MASK 0x0000003c
+#define NV50TIC_0_0_FMT_8_8_8_8 0x00000008
+#define NV50TIC_0_0_FMT_4_4_4_4 0x00000012
+#define NV50TIC_0_0_FMT_1_5_5_5 0x00000013
+#define NV50TIC_0_0_FMT_5_6_5 0x00000015
+#define NV50TIC_0_0_FMT_8_8 0x00000018
+#define NV50TIC_0_0_FMT_8 0x0000001d
+
+#define NV50TIC_0_1_OFFSET_LOW_MASK 0xffffffff
+#define NV50TIC_0_1_OFFSET_LOW_SHIFT 0
+
+#define NV50TIC_0_2_UNKNOWN_MASK 0xffffffff
+
+#define NV50TIC_0_3_UNKNOWN_MASK 0xffffffff
+
+#define NV50TIC_0_4_WIDTH_MASK 0x0000ffff
+#define NV50TIC_0_4_WIDTH_SHIFT 0
+
+#define NV50TIC_0_5_DEPTH_MASK 0xffff0000
+#define NV50TIC_0_5_DEPTH_SHIFT 16
+#define NV50TIC_0_5_HEIGHT_MASK 0x0000ffff
+#define NV50TIC_0_5_HEIGHT_SHIFT 0
+
+#define NV50TIC_0_6_UNKNOWN_MASK 0xffffffff
+
+#define NV50TIC_0_7_OFFSET_HIGH_MASK 0xffffffff
+#define NV50TIC_0_7_OFFSET_HIGH_SHIFT 0
+
+/* Texture sampler control block */
+#define NV50TSC_1_0_WRAPS_MASK 0x00000007
+#define NV50TSC_1_0_WRAPS_REPEAT 0x00000000
+#define NV50TSC_1_0_WRAPS_MIRROR_REPEAT 0x00000001
+#define NV50TSC_1_0_WRAPS_CLAMP_TO_EDGE 0x00000002
+#define NV50TSC_1_0_WRAPS_CLAMP_TO_BORDER 0x00000003
+#define NV50TSC_1_0_WRAPS_CLAMP 0x00000004
+#define NV50TSC_1_0_WRAPS_MIRROR_CLAMP_TO_EDGE 0x00000005
+#define NV50TSC_1_0_WRAPS_MIRROR_CLAMP_TO_BORDER 0x00000006
+#define NV50TSC_1_0_WRAPS_MIRROR_CLAMP 0x00000007
+#define NV50TSC_1_0_WRAPT_MASK 0x00000038
+#define NV50TSC_1_0_WRAPT_REPEAT 0x00000000
+#define NV50TSC_1_0_WRAPT_MIRROR_REPEAT 0x00000008
+#define NV50TSC_1_0_WRAPT_CLAMP_TO_EDGE 0x00000010
+#define NV50TSC_1_0_WRAPT_CLAMP_TO_BORDER 0x00000018
+#define NV50TSC_1_0_WRAPT_CLAMP 0x00000020
+#define NV50TSC_1_0_WRAPT_MIRROR_CLAMP_TO_EDGE 0x00000028
+#define NV50TSC_1_0_WRAPT_MIRROR_CLAMP_TO_BORDER 0x00000030
+#define NV50TSC_1_0_WRAPT_MIRROR_CLAMP 0x00000038
+#define NV50TSC_1_0_WRAPR_MASK 0x000001c0
+#define NV50TSC_1_0_WRAPR_REPEAT 0x00000000
+#define NV50TSC_1_0_WRAPR_MIRROR_REPEAT 0x00000040
+#define NV50TSC_1_0_WRAPR_CLAMP_TO_EDGE 0x00000080
+#define NV50TSC_1_0_WRAPR_CLAMP_TO_BORDER 0x000000c0
+#define NV50TSC_1_0_WRAPR_CLAMP 0x00000100
+#define NV50TSC_1_0_WRAPR_MIRROR_CLAMP_TO_EDGE 0x00000140
+#define NV50TSC_1_0_WRAPR_MIRROR_CLAMP_TO_BORDER 0x00000180
+#define NV50TSC_1_0_WRAPR_MIRROR_CLAMP 0x000001c0
+
+#define NV50TSC_1_1_MAGF_MASK 0x00000003
+#define NV50TSC_1_1_MAGF_NEAREST 0x00000001
+#define NV50TSC_1_1_MAGF_LINEAR 0x00000002
+#define NV50TSC_1_1_MINF_MASK 0x00000030
+#define NV50TSC_1_1_MINF_NEAREST 0x00000010
+#define NV50TSC_1_1_MINF_LINEAR 0x00000020
+#define NV50TSC_1_1_MIPF_MASK 0x000000c0
+#define NV50TSC_1_1_MIPF_NONE 0x00000040
+#define NV50TSC_1_1_MIPF_NEAREST 0x00000080
+#define NV50TSC_1_1_MIPF_LINEAR 0x000000c0
+
+#define NV50TSC_1_2_UNKNOWN_MASK 0xffffffff
+
+#define NV50TSC_1_3_UNKNOWN_MASK 0xffffffff
+
+#define NV50TSC_1_4_UNKNOWN_MASK 0xffffffff
+
+#define NV50TSC_1_5_UNKNOWN_MASK 0xffffffff
+
+#define NV50TSC_1_6_UNKNOWN_MASK 0xffffffff
+
+#define NV50TSC_1_7_UNKNOWN_MASK 0xffffffff
+
+#endif
diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c
new file mode 100644
index 00000000000..584336682e4
--- /dev/null
+++ b/src/gallium/drivers/nv50/nv50_vbo.c
@@ -0,0 +1,237 @@
+/*
+ * Copyright 2008 Ben Skeggs
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+
+#include "nv50_context.h"
+
+static INLINE unsigned
+nv50_prim(unsigned mode)
+{
+ switch (mode) {
+ case PIPE_PRIM_POINTS: return NV50TCL_VERTEX_BEGIN_POINTS;
+ case PIPE_PRIM_LINES: return NV50TCL_VERTEX_BEGIN_LINES;
+ case PIPE_PRIM_LINE_LOOP: return NV50TCL_VERTEX_BEGIN_LINE_LOOP;
+ case PIPE_PRIM_LINE_STRIP: return NV50TCL_VERTEX_BEGIN_LINE_STRIP;
+ case PIPE_PRIM_TRIANGLES: return NV50TCL_VERTEX_BEGIN_TRIANGLES;
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ return NV50TCL_VERTEX_BEGIN_TRIANGLE_STRIP;
+ case PIPE_PRIM_TRIANGLE_FAN: return NV50TCL_VERTEX_BEGIN_TRIANGLE_FAN;
+ case PIPE_PRIM_QUADS: return NV50TCL_VERTEX_BEGIN_QUADS;
+ case PIPE_PRIM_QUAD_STRIP: return NV50TCL_VERTEX_BEGIN_QUAD_STRIP;
+ case PIPE_PRIM_POLYGON: return NV50TCL_VERTEX_BEGIN_POLYGON;
+ default:
+ break;
+ }
+
+ NOUVEAU_ERR("invalid primitive type %d\n", mode);
+ return NV50TCL_VERTEX_BEGIN_POINTS;
+}
+
+boolean
+nv50_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start,
+ unsigned count)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ nv50_state_validate(nv50);
+
+ BEGIN_RING(tesla, 0x142c, 1);
+ OUT_RING (0);
+ BEGIN_RING(tesla, 0x142c, 1);
+ OUT_RING (0);
+
+ BEGIN_RING(tesla, NV50TCL_VERTEX_BEGIN, 1);
+ OUT_RING (nv50_prim(mode));
+ BEGIN_RING(tesla, NV50TCL_VERTEX_BUFFER_FIRST, 2);
+ OUT_RING (start);
+ OUT_RING (count);
+ BEGIN_RING(tesla, NV50TCL_VERTEX_END, 1);
+ OUT_RING (0);
+
+ pipe->flush(pipe, 0, NULL);
+ return TRUE;
+}
+
+static INLINE void
+nv50_draw_elements_inline_u08(struct nv50_context *nv50, uint8_t *map,
+ unsigned start, unsigned count)
+{
+ map += start;
+
+ if (count & 1) {
+ BEGIN_RING(tesla, 0x15e8, 1);
+ OUT_RING (map[0]);
+ map++;
+ count--;
+ }
+
+ while (count) {
+ unsigned nr = count > 2046 ? 2046 : count;
+ int i;
+
+ BEGIN_RING(tesla, 0x400015f0, nr >> 1);
+ for (i = 0; i < nr; i += 2)
+ OUT_RING ((map[1] << 16) | map[0]);
+
+ count -= nr;
+ map += nr;
+ }
+}
+
+static INLINE void
+nv50_draw_elements_inline_u16(struct nv50_context *nv50, uint16_t *map,
+ unsigned start, unsigned count)
+{
+ map += start;
+
+ if (count & 1) {
+ BEGIN_RING(tesla, 0x15e8, 1);
+ OUT_RING (map[0]);
+ map++;
+ count--;
+ }
+
+ while (count) {
+ unsigned nr = count > 2046 ? 2046 : count;
+ int i;
+
+ BEGIN_RING(tesla, 0x400015f0, nr >> 1);
+ for (i = 0; i < nr; i += 2)
+ OUT_RING ((map[1] << 16) | map[0]);
+
+ count -= nr;
+ map += nr;
+ }
+}
+
+static INLINE void
+nv50_draw_elements_inline_u32(struct nv50_context *nv50, uint8_t *map,
+ unsigned start, unsigned count)
+{
+ map += start;
+
+ while (count) {
+ unsigned nr = count > 2047 ? 2047 : count;
+
+ BEGIN_RING(tesla, 0x400015e8, nr);
+ OUT_RINGp (map, nr);
+
+ count -= nr;
+ map += nr;
+ }
+}
+
+boolean
+nv50_draw_elements(struct pipe_context *pipe,
+ struct pipe_buffer *indexBuffer, unsigned indexSize,
+ unsigned mode, unsigned start, unsigned count)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+ struct pipe_winsys *ws = pipe->winsys;
+ void *map = ws->buffer_map(ws, indexBuffer, PIPE_BUFFER_USAGE_CPU_READ);
+
+ nv50_state_validate(nv50);
+
+ BEGIN_RING(tesla, 0x142c, 1);
+ OUT_RING (0);
+ BEGIN_RING(tesla, 0x142c, 1);
+ OUT_RING (0);
+
+ BEGIN_RING(tesla, NV50TCL_VERTEX_BEGIN, 1);
+ OUT_RING (nv50_prim(mode));
+ switch (indexSize) {
+ case 1:
+ nv50_draw_elements_inline_u08(nv50, map, start, count);
+ break;
+ case 2:
+ nv50_draw_elements_inline_u16(nv50, map, start, count);
+ break;
+ case 4:
+ nv50_draw_elements_inline_u32(nv50, map, start, count);
+ break;
+ default:
+ assert(0);
+ }
+ BEGIN_RING(tesla, NV50TCL_VERTEX_END, 1);
+ OUT_RING (0);
+
+ pipe->flush(pipe, 0, NULL);
+ return TRUE;
+}
+
+void
+nv50_vbo_validate(struct nv50_context *nv50)
+{
+ struct nouveau_grobj *tesla = nv50->screen->tesla;
+ struct nouveau_stateobj *vtxbuf, *vtxfmt;
+ int i, vpi = 0;
+
+ vtxbuf = so_new(nv50->vtxelt_nr * 4, nv50->vtxelt_nr * 2);
+ vtxfmt = so_new(nv50->vtxelt_nr + 1, 0);
+ so_method(vtxfmt, tesla, 0x1ac0, nv50->vtxelt_nr);
+
+ for (i = 0; i < nv50->vtxelt_nr; i++) {
+ struct pipe_vertex_element *ve = &nv50->vtxelt[i];
+ struct pipe_vertex_buffer *vb =
+ &nv50->vtxbuf[ve->vertex_buffer_index];
+
+ switch (ve->src_format) {
+ case PIPE_FORMAT_R32G32B32A32_FLOAT:
+ so_data(vtxfmt, 0x7e080000 | i);
+ break;
+ case PIPE_FORMAT_R32G32B32_FLOAT:
+ so_data(vtxfmt, 0x7e100000 | i);
+ break;
+ case PIPE_FORMAT_R32G32_FLOAT:
+ so_data(vtxfmt, 0x7e200000 | i);
+ break;
+ case PIPE_FORMAT_R32_FLOAT:
+ so_data(vtxfmt, 0x7e900000 | i);
+ break;
+ case PIPE_FORMAT_R8G8B8A8_UNORM:
+ so_data(vtxfmt, 0x24500000 | i);
+ break;
+ default:
+ {
+ NOUVEAU_ERR("invalid vbo format %s\n",
+ pf_name(ve->src_format));
+ assert(0);
+ return;
+ }
+ }
+
+ so_method(vtxbuf, tesla, 0x900 + (i * 16), 3);
+ so_data (vtxbuf, 0x20000000 | vb->pitch);
+ so_reloc (vtxbuf, vb->buffer, vb->buffer_offset +
+ ve->src_offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART |
+ NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
+ so_reloc (vtxbuf, vb->buffer, vb->buffer_offset +
+ ve->src_offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART |
+ NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
+ }
+
+ so_ref (vtxfmt, &nv50->state.vtxfmt);
+ so_ref (vtxbuf, &nv50->state.vtxbuf);
+}
+
diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c
index cd1e6663d86..99b52748575 100644
--- a/src/gallium/drivers/softpipe/sp_context.c
+++ b/src/gallium/drivers/softpipe/sp_context.c
@@ -221,6 +221,14 @@ softpipe_create( struct pipe_screen *screen,
softpipe->quad[i].output = sp_quad_output_stage(softpipe);
}
+ for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
+ softpipe->tgsi.samplers[i].base.get_samples = sp_get_samples;
+ softpipe->tgsi.samplers[i].unit = i;
+ softpipe->tgsi.samplers[i].sp = softpipe;
+ softpipe->tgsi.samplers[i].cache = softpipe->tex_cache[i];
+ softpipe->tgsi.samplers_list[i] = &softpipe->tgsi.samplers[i];
+ }
+
/*
* Create drawing context and plug our rendering stage into it.
*/
@@ -228,6 +236,9 @@ softpipe_create( struct pipe_screen *screen,
if (!softpipe->draw)
goto fail;
+ draw_texture_samplers(softpipe->draw,
+ PIPE_MAX_SAMPLERS, softpipe->tgsi.samplers_list);
+
softpipe->setup = sp_draw_render_stage(softpipe);
if (!softpipe->setup)
goto fail;
diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h
index 2b9a2a8ee52..790143aecc9 100644
--- a/src/gallium/drivers/softpipe/sp_context.h
+++ b/src/gallium/drivers/softpipe/sp_context.h
@@ -37,6 +37,7 @@
#include "draw/draw_vertex.h"
#include "sp_quad.h"
+#include "sp_tex_sample.h"
/**
@@ -139,6 +140,12 @@ struct softpipe_context {
struct quad_stage *first; /**< points to one of the above stages */
} quad[SP_NUM_QUAD_THREADS];
+ /** TGSI exec things */
+ struct {
+ struct sp_shader_sampler samplers[PIPE_MAX_SAMPLERS];
+ struct sp_shader_sampler *samplers_list[PIPE_MAX_SAMPLERS];
+ } tgsi;
+
/** The primitive drawing context */
struct draw_context *draw;
struct draw_stage *setup;
diff --git a/src/gallium/drivers/softpipe/sp_fs_exec.c b/src/gallium/drivers/softpipe/sp_fs_exec.c
index 701ee4c72f2..453b0373f0f 100644
--- a/src/gallium/drivers/softpipe/sp_fs_exec.c
+++ b/src/gallium/drivers/softpipe/sp_fs_exec.c
@@ -39,11 +39,19 @@
#include "tgsi/tgsi_exec.h"
#include "tgsi/tgsi_parse.h"
-struct sp_exec_fragment_shader {
+struct sp_exec_fragment_shader
+{
struct sp_fragment_shader base;
};
+/** cast wrapper */
+static INLINE struct sp_exec_fragment_shader *
+sp_exec_fragment_shader(const struct sp_fragment_shader *base)
+{
+ return (struct sp_exec_fragment_shader *) base;
+}
+
/**
* Compute quad X,Y,Z,W for the four fragments in a quad.
@@ -84,12 +92,18 @@ sp_setup_pos_vector(const struct tgsi_interp_coef *coef,
static void
exec_prepare( const struct sp_fragment_shader *base,
struct tgsi_exec_machine *machine,
- struct tgsi_sampler *samplers )
+ struct tgsi_sampler **samplers )
{
- tgsi_exec_machine_bind_shader( machine,
- base->shader.tokens,
- PIPE_MAX_SAMPLERS,
- samplers );
+ /*
+ * Bind tokens/shader to the interpreter's machine state.
+ * Avoid redundant binding.
+ */
+ if (machine->Tokens != base->shader.tokens) {
+ tgsi_exec_machine_bind_shader( machine,
+ base->shader.tokens,
+ PIPE_MAX_SAMPLERS,
+ samplers );
+ }
}
diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c
index 50eb2c07bcb..9a273c87643 100644
--- a/src/gallium/drivers/softpipe/sp_fs_sse.c
+++ b/src/gallium/drivers/softpipe/sp_fs_sse.c
@@ -40,7 +40,7 @@
#include "tgsi/tgsi_sse2.h"
-#ifdef PIPE_ARCH_X86
+#if defined(PIPE_ARCH_X86)
#include "rtasm/rtasm_x86sse.h"
@@ -69,7 +69,7 @@ struct sp_sse_fragment_shader {
static void
fs_sse_prepare( const struct sp_fragment_shader *base,
struct tgsi_exec_machine *machine,
- struct tgsi_sampler *samplers )
+ struct tgsi_sampler **samplers )
{
}
diff --git a/src/gallium/drivers/softpipe/sp_prim_vbuf.c b/src/gallium/drivers/softpipe/sp_prim_vbuf.c
index 425e13cd283..9cd5784e5bc 100644
--- a/src/gallium/drivers/softpipe/sp_prim_vbuf.c
+++ b/src/gallium/drivers/softpipe/sp_prim_vbuf.c
@@ -26,10 +26,10 @@
**************************************************************************/
/**
- * Post-transform vertex buffering. This is an optional part of the
- * softpipe rendering pipeline.
- * Probably not desired in general, but useful for testing/debuggin.
- * Enabled/Disabled with SP_VBUF env var.
+ * Interface between 'draw' module's output and the softpipe rasterizer/setup
+ * code. When the 'draw' module has finished filling a vertex buffer, the
+ * draw_arrays() functions below will be called. Loop over the vertices and
+ * call the point/line/tri setup functions.
*
* Authors
* Brian Paul
@@ -131,21 +131,23 @@ static INLINE cptrf4 get_vert( const void *vertex_buffer,
}
+/**
+ * draw elements / indexed primitives
+ */
static void
sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr)
{
struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr);
struct softpipe_context *softpipe = cvbr->softpipe;
- unsigned stride = softpipe->vertex_info_vbuf.size * sizeof(float);
- unsigned i;
+ const unsigned stride = softpipe->vertex_info_vbuf.size * sizeof(float);
const void *vertex_buffer = cvbr->vertex_buffer;
+ unsigned i;
/* XXX: break this dependency - make setup_context live under
* softpipe, rename the old "setup" draw stage to something else.
*/
struct draw_stage *setup = softpipe->setup;
- struct setup_context *setup_ctx = sp_draw_setup_context(softpipe->setup);
-
+ struct setup_context *setup_ctx = sp_draw_setup_context(setup);
switch (cvbr->prim) {
case PIPE_PRIM_POINTS:
@@ -258,13 +260,16 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr)
{
struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr);
struct softpipe_context *softpipe = cvbr->softpipe;
- struct draw_stage *setup = softpipe->setup;
- const void *vertex_buffer = NULL;
const unsigned stride = softpipe->vertex_info_vbuf.size * sizeof(float);
+ const void *vertex_buffer =
+ (void *) get_vert(cvbr->vertex_buffer, start, stride);
unsigned i;
- struct setup_context *setup_ctx = sp_draw_setup_context(setup);
- vertex_buffer = (void *)get_vert(cvbr->vertex_buffer, start, stride);
+ /* XXX: break this dependency - make setup_context live under
+ * softpipe, rename the old "setup" draw stage to something else.
+ */
+ struct draw_stage *setup = softpipe->setup;
+ struct setup_context *setup_ctx = sp_draw_setup_context(setup);
switch (cvbr->prim) {
case PIPE_PRIM_POINTS:
diff --git a/src/gallium/drivers/softpipe/sp_quad_fs.c b/src/gallium/drivers/softpipe/sp_quad_fs.c
index 1f0cb3e0355..963a2b44f55 100644
--- a/src/gallium/drivers/softpipe/sp_quad_fs.c
+++ b/src/gallium/drivers/softpipe/sp_quad_fs.c
@@ -50,8 +50,9 @@
struct quad_shade_stage
{
- struct quad_stage stage;
- struct tgsi_sampler samplers[PIPE_MAX_SAMPLERS];
+ struct quad_stage stage; /**< base class */
+ struct sp_shader_sampler samplers[PIPE_MAX_SAMPLERS];
+ struct sp_shader_sampler *samplers_list[PIPE_MAX_SAMPLERS];
struct tgsi_exec_machine machine;
struct tgsi_exec_vector *inputs, *outputs;
};
@@ -147,18 +148,10 @@ static void shade_begin(struct quad_stage *qs)
{
struct quad_shade_stage *qss = quad_shade_stage(qs);
struct softpipe_context *softpipe = qs->softpipe;
- unsigned i;
- unsigned num = MAX2(softpipe->num_textures, softpipe->num_samplers);
-
- /* set TGSI sampler state that varies */
- for (i = 0; i < num; i++) {
- qss->samplers[i].state = softpipe->sampler[i];
- qss->samplers[i].texture = softpipe->texture[i];
- }
softpipe->fs->prepare( softpipe->fs,
&qss->machine,
- qss->samplers );
+ (struct tgsi_sampler **) qss->samplers_list );
qs->next->begin(qs->next);
}
@@ -191,12 +184,14 @@ struct quad_stage *sp_quad_shade_stage( struct softpipe_context *softpipe )
qss->stage.run = shade_quad;
qss->stage.destroy = shade_destroy;
- /* set TGSI sampler state that's constant */
+ /* setup TGSI sampler state */
for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
assert(softpipe->tex_cache[i]);
- qss->samplers[i].get_samples = sp_get_samples;
- qss->samplers[i].pipe = &softpipe->pipe;
+ qss->samplers[i].base.get_samples = sp_get_samples;
+ qss->samplers[i].unit = i;
+ qss->samplers[i].sp = softpipe;
qss->samplers[i].cache = softpipe->tex_cache[i];
+ qss->samplers_list[i] = &qss->samplers[i];
}
tgsi_exec_machine_init( &qss->machine );
diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c
index 9644dbd168f..12f98c32f53 100644
--- a/src/gallium/drivers/softpipe/sp_screen.c
+++ b/src/gallium/drivers/softpipe/sp_screen.c
@@ -56,6 +56,8 @@ softpipe_get_param(struct pipe_screen *screen, int param)
switch (param) {
case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
return 8;
+ case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS:
+ return 8;
case PIPE_CAP_NPOT_TEXTURES:
return 1;
case PIPE_CAP_TWO_SIDED_STENCIL:
diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h
index 476ef3dc8fb..3eff41ffa5f 100644
--- a/src/gallium/drivers/softpipe/sp_state.h
+++ b/src/gallium/drivers/softpipe/sp_state.h
@@ -69,7 +69,7 @@ struct sp_fragment_shader {
void (*prepare)( const struct sp_fragment_shader *shader,
struct tgsi_exec_machine *machine,
- struct tgsi_sampler *samplers);
+ struct tgsi_sampler **samplers);
/* Run the shader - this interface will get cleaned up in the
* future:
diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c
index 49250ec084c..631c60966c2 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -35,6 +35,7 @@
#include "sp_context.h"
#include "sp_headers.h"
#include "sp_surface.h"
+#include "sp_texture.h"
#include "sp_tex_sample.h"
#include "sp_tile_cache.h"
#include "pipe/p_context.h"
@@ -57,7 +58,11 @@
/**
* Linear interpolation macro
*/
-#define LERP(T, A, B) ( (A) + (T) * ((B) - (A)) )
+static INLINE float
+lerp(float a, float v0, float v1)
+{
+ return v0 + a * (v1 - v0);
+}
/**
@@ -72,13 +77,28 @@ static INLINE float
lerp_2d(float a, float b,
float v00, float v10, float v01, float v11)
{
- const float temp0 = LERP(a, v00, v10);
- const float temp1 = LERP(a, v01, v11);
- return LERP(b, temp0, temp1);
+ const float temp0 = lerp(a, v00, v10);
+ const float temp1 = lerp(a, v01, v11);
+ return lerp(b, temp0, temp1);
}
/**
+ * As above, but 3D interpolation of 8 values.
+ */
+static INLINE float
+lerp_3d(float a, float b, float c,
+ float v000, float v100, float v010, float v110,
+ float v001, float v101, float v011, float v111)
+{
+ const float temp0 = lerp_2d(a, b, v000, v100, v010, v110);
+ const float temp1 = lerp_2d(a, b, v001, v101, v011, v111);
+ return lerp(c, temp0, temp1);
+}
+
+
+
+/**
* If A is a signed integer, A % B doesn't give the right value for A < 0
* (in terms of texture repeat). Just casting to unsigned fixes that.
*/
@@ -86,250 +106,275 @@ lerp_2d(float a, float b,
/**
- * Apply texture coord wrapping mode and return integer texture index.
+ * Apply texture coord wrapping mode and return integer texture indexes
+ * for a vector of four texcoords (S or T or P).
* \param wrapMode PIPE_TEX_WRAP_x
- * \param s the texcoord
+ * \param s the incoming texcoords
* \param size the texture image size
+ * \param icoord returns the integer texcoords
* \return integer texture index
*/
-static INLINE int
-nearest_texcoord(unsigned wrapMode, float s, unsigned size)
+static INLINE void
+nearest_texcoord_4(unsigned wrapMode, const float s[4], unsigned size,
+ int icoord[4])
{
- int i;
+ uint ch;
switch (wrapMode) {
case PIPE_TEX_WRAP_REPEAT:
/* s limited to [0,1) */
/* i limited to [0,size-1] */
- i = util_ifloor(s * size);
- i = REMAINDER(i, size);
- return i;
+ for (ch = 0; ch < 4; ch++) {
+ int i = util_ifloor(s[ch] * size);
+ icoord[ch] = REMAINDER(i, size);
+ }
+ return;
case PIPE_TEX_WRAP_CLAMP:
/* s limited to [0,1] */
/* i limited to [0,size-1] */
- if (s <= 0.0F)
- i = 0;
- else if (s >= 1.0F)
- i = size - 1;
- else
- i = util_ifloor(s * size);
- return i;
+ for (ch = 0; ch < 4; ch++) {
+ if (s[ch] <= 0.0F)
+ icoord[ch] = 0;
+ else if (s[ch] >= 1.0F)
+ icoord[ch] = size - 1;
+ else
+ icoord[ch] = util_ifloor(s[ch] * size);
+ }
+ return;
case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
{
/* s limited to [min,max] */
/* i limited to [0, size-1] */
const float min = 1.0F / (2.0F * size);
const float max = 1.0F - min;
- if (s < min)
- i = 0;
- else if (s > max)
- i = size - 1;
- else
- i = util_ifloor(s * size);
+ for (ch = 0; ch < 4; ch++) {
+ if (s[ch] < min)
+ icoord[ch] = 0;
+ else if (s[ch] > max)
+ icoord[ch] = size - 1;
+ else
+ icoord[ch] = util_ifloor(s[ch] * size);
+ }
}
- return i;
+ return;
case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
{
/* s limited to [min,max] */
/* i limited to [-1, size] */
const float min = -1.0F / (2.0F * size);
const float max = 1.0F - min;
- if (s <= min)
- i = -1;
- else if (s >= max)
- i = size;
- else
- i = util_ifloor(s * size);
+ for (ch = 0; ch < 4; ch++) {
+ if (s[ch] <= min)
+ icoord[ch] = -1;
+ else if (s[ch] >= max)
+ icoord[ch] = size;
+ else
+ icoord[ch] = util_ifloor(s[ch] * size);
+ }
}
- return i;
+ return;
case PIPE_TEX_WRAP_MIRROR_REPEAT:
{
const float min = 1.0F / (2.0F * size);
const float max = 1.0F - min;
- const int flr = util_ifloor(s);
- float u;
- if (flr & 1)
- u = 1.0F - (s - (float) flr);
- else
- u = s - (float) flr;
- if (u < min)
- i = 0;
- else if (u > max)
- i = size - 1;
- else
- i = util_ifloor(u * size);
+ for (ch = 0; ch < 4; ch++) {
+ const int flr = util_ifloor(s[ch]);
+ float u;
+ if (flr & 1)
+ u = 1.0F - (s[ch] - (float) flr);
+ else
+ u = s[ch] - (float) flr;
+ if (u < min)
+ icoord[ch] = 0;
+ else if (u > max)
+ icoord[ch] = size - 1;
+ else
+ icoord[ch] = util_ifloor(u * size);
+ }
}
- return i;
+ return;
case PIPE_TEX_WRAP_MIRROR_CLAMP:
- {
+ for (ch = 0; ch < 4; ch++) {
/* s limited to [0,1] */
/* i limited to [0,size-1] */
- const float u = fabsf(s);
+ const float u = fabsf(s[ch]);
if (u <= 0.0F)
- i = 0;
+ icoord[ch] = 0;
else if (u >= 1.0F)
- i = size - 1;
+ icoord[ch] = size - 1;
else
- i = util_ifloor(u * size);
+ icoord[ch] = util_ifloor(u * size);
}
- return i;
+ return;
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
{
/* s limited to [min,max] */
/* i limited to [0, size-1] */
const float min = 1.0F / (2.0F * size);
const float max = 1.0F - min;
- const float u = fabsf(s);
- if (u < min)
- i = 0;
- else if (u > max)
- i = size - 1;
- else
- i = util_ifloor(u * size);
+ for (ch = 0; ch < 4; ch++) {
+ const float u = fabsf(s[ch]);
+ if (u < min)
+ icoord[ch] = 0;
+ else if (u > max)
+ icoord[ch] = size - 1;
+ else
+ icoord[ch] = util_ifloor(u * size);
+ }
}
- return i;
+ return;
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
{
/* s limited to [min,max] */
/* i limited to [0, size-1] */
const float min = -1.0F / (2.0F * size);
const float max = 1.0F - min;
- const float u = fabsf(s);
- if (u < min)
- i = -1;
- else if (u > max)
- i = size;
- else
- i = util_ifloor(u * size);
+ for (ch = 0; ch < 4; ch++) {
+ const float u = fabsf(s[ch]);
+ if (u < min)
+ icoord[ch] = -1;
+ else if (u > max)
+ icoord[ch] = size;
+ else
+ icoord[ch] = util_ifloor(u * size);
+ }
}
- return i;
+ return;
default:
assert(0);
- return 0;
}
}
/**
- * Used to compute texel locations for linear sampling.
+ * Used to compute texel locations for linear sampling for four texcoords.
* \param wrapMode PIPE_TEX_WRAP_x
- * \param s the texcoord
+ * \param s the texcoords
* \param size the texture image size
- * \param i0 returns first texture index
- * \param i1 returns second texture index (usually *i0 + 1)
- * \param a returns blend factor/weight between texture indexes
+ * \param icoord0 returns first texture indexes
+ * \param icoord1 returns second texture indexes (usually icoord0 + 1)
+ * \param w returns blend factor/weight between texture indexes
+ * \param icoord returns the computed integer texture coords
*/
static INLINE void
-linear_texcoord(unsigned wrapMode, float s, unsigned size,
- int *i0, int *i1, float *a)
+linear_texcoord_4(unsigned wrapMode, const float s[4], unsigned size,
+ int icoord0[4], int icoord1[4], float w[4])
{
- float u;
+ uint ch;
+
switch (wrapMode) {
case PIPE_TEX_WRAP_REPEAT:
- u = s * size - 0.5F;
- *i0 = REMAINDER(util_ifloor(u), size);
- *i1 = REMAINDER(*i0 + 1, size);
- break;
+ for (ch = 0; ch < 4; ch++) {
+ float u = s[ch] * size - 0.5F;
+ icoord0[ch] = REMAINDER(util_ifloor(u), size);
+ icoord1[ch] = REMAINDER(icoord0[ch] + 1, size);
+ w[ch] = FRAC(u);
+ }
+ break;;
case PIPE_TEX_WRAP_CLAMP:
- if (s <= 0.0F)
- u = 0.0F;
- else if (s >= 1.0F)
- u = (float) size;
- else
- u = s * size;
- u -= 0.5F;
- *i0 = util_ifloor(u);
- *i1 = *i0 + 1;
- break;
+ for (ch = 0; ch < 4; ch++) {
+ float u = CLAMP(s[ch], 0.0F, 1.0F);
+ u = u * size - 0.5f;
+ icoord0[ch] = util_ifloor(u);
+ icoord1[ch] = icoord0[ch] + 1;
+ w[ch] = FRAC(u);
+ }
+ break;;
case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
- if (s <= 0.0F)
- u = 0.0F;
- else if (s >= 1.0F)
- u = (float) size;
- else
- u = s * size;
- u -= 0.5F;
- *i0 = util_ifloor(u);
- *i1 = *i0 + 1;
- if (*i0 < 0)
- *i0 = 0;
- if (*i1 >= (int) size)
- *i1 = size - 1;
- break;
+ for (ch = 0; ch < 4; ch++) {
+ float u = CLAMP(s[ch], 0.0F, 1.0F);
+ u = u * size - 0.5f;
+ icoord0[ch] = util_ifloor(u);
+ icoord1[ch] = icoord0[ch] + 1;
+ if (icoord0[ch] < 0)
+ icoord0[ch] = 0;
+ if (icoord1[ch] >= (int) size)
+ icoord1[ch] = size - 1;
+ w[ch] = FRAC(u);
+ }
+ break;;
case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
{
const float min = -1.0F / (2.0F * size);
const float max = 1.0F - min;
- if (s <= min)
- u = min * size;
- else if (s >= max)
- u = max * size;
- else
- u = s * size;
- u -= 0.5F;
- *i0 = util_ifloor(u);
- *i1 = *i0 + 1;
+ for (ch = 0; ch < 4; ch++) {
+ float u = CLAMP(s[ch], min, max);
+ u = u * size - 0.5f;
+ icoord0[ch] = util_ifloor(u);
+ icoord1[ch] = icoord0[ch] + 1;
+ w[ch] = FRAC(u);
+ }
}
- break;
+ break;;
case PIPE_TEX_WRAP_MIRROR_REPEAT:
- {
- const int flr = util_ifloor(s);
+ for (ch = 0; ch < 4; ch++) {
+ const int flr = util_ifloor(s[ch]);
+ float u;
if (flr & 1)
- u = 1.0F - (s - (float) flr);
+ u = 1.0F - (s[ch] - (float) flr);
else
- u = s - (float) flr;
- u = (u * size) - 0.5F;
- *i0 = util_ifloor(u);
- *i1 = *i0 + 1;
- if (*i0 < 0)
- *i0 = 0;
- if (*i1 >= (int) size)
- *i1 = size - 1;
+ u = s[ch] - (float) flr;
+ u = u * size - 0.5F;
+ icoord0[ch] = util_ifloor(u);
+ icoord1[ch] = icoord0[ch] + 1;
+ if (icoord0[ch] < 0)
+ icoord0[ch] = 0;
+ if (icoord1[ch] >= (int) size)
+ icoord1[ch] = size - 1;
+ w[ch] = FRAC(u);
}
- break;
+ break;;
case PIPE_TEX_WRAP_MIRROR_CLAMP:
- u = fabsf(s);
- if (u >= 1.0F)
- u = (float) size;
- else
- u *= size;
- u -= 0.5F;
- *i0 = util_ifloor(u);
- *i1 = *i0 + 1;
- break;
+ for (ch = 0; ch < 4; ch++) {
+ float u = fabsf(s[ch]);
+ if (u >= 1.0F)
+ u = (float) size;
+ else
+ u *= size;
+ u -= 0.5F;
+ icoord0[ch] = util_ifloor(u);
+ icoord1[ch] = icoord0[ch] + 1;
+ w[ch] = FRAC(u);
+ }
+ break;;
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
- u = fabsf(s);
- if (u >= 1.0F)
- u = (float) size;
- else
- u *= size;
- u -= 0.5F;
- *i0 = util_ifloor(u);
- *i1 = *i0 + 1;
- if (*i0 < 0)
- *i0 = 0;
- if (*i1 >= (int) size)
- *i1 = size - 1;
- break;
+ for (ch = 0; ch < 4; ch++) {
+ float u = fabsf(s[ch]);
+ if (u >= 1.0F)
+ u = (float) size;
+ else
+ u *= size;
+ u -= 0.5F;
+ icoord0[ch] = util_ifloor(u);
+ icoord1[ch] = icoord0[ch] + 1;
+ if (icoord0[ch] < 0)
+ icoord0[ch] = 0;
+ if (icoord1[ch] >= (int) size)
+ icoord1[ch] = size - 1;
+ w[ch] = FRAC(u);
+ }
+ break;;
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
{
const float min = -1.0F / (2.0F * size);
const float max = 1.0F - min;
- u = fabsf(s);
- if (u <= min)
- u = min * size;
- else if (u >= max)
- u = max * size;
- else
- u *= size;
- u -= 0.5F;
- *i0 = util_ifloor(u);
- *i1 = *i0 + 1;
+ for (ch = 0; ch < 4; ch++) {
+ float u = fabsf(s[ch]);
+ if (u <= min)
+ u = min * size;
+ else if (u >= max)
+ u = max * size;
+ else
+ u *= size;
+ u -= 0.5F;
+ icoord0[ch] = util_ifloor(u);
+ icoord1[ch] = icoord0[ch] + 1;
+ w[ch] = FRAC(u);
+ }
}
- break;
+ break;;
default:
assert(0);
}
- *a = FRAC(u);
}
@@ -337,21 +382,27 @@ linear_texcoord(unsigned wrapMode, float s, unsigned size,
* For RECT textures / unnormalized texcoords
* Only a subset of wrap modes supported.
*/
-static INLINE int
-nearest_texcoord_unnorm(unsigned wrapMode, float s, unsigned size)
+static INLINE void
+nearest_texcoord_unnorm_4(unsigned wrapMode, const float s[4], unsigned size,
+ int icoord[4])
{
- int i;
+ uint ch;
switch (wrapMode) {
case PIPE_TEX_WRAP_CLAMP:
- i = util_ifloor(s);
- return CLAMP(i, 0, (int) size-1);
+ for (ch = 0; ch < 4; ch++) {
+ int i = util_ifloor(s[ch]);
+ icoord[ch]= CLAMP(i, 0, (int) size-1);
+ }
+ return;
case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
/* fall-through */
case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
- return util_ifloor( CLAMP(s, 0.5F, (float) size - 0.5F) );
+ for (ch = 0; ch < 4; ch++) {
+ icoord[ch]= util_ifloor( CLAMP(s[ch], 0.5F, (float) size - 0.5F) );
+ }
+ return;
default:
assert(0);
- return 0;
}
}
@@ -361,30 +412,36 @@ nearest_texcoord_unnorm(unsigned wrapMode, float s, unsigned size)
* Only a subset of wrap modes supported.
*/
static INLINE void
-linear_texcoord_unnorm(unsigned wrapMode, float s, unsigned size,
- int *i0, int *i1, float *a)
+linear_texcoord_unnorm_4(unsigned wrapMode, const float s[4], unsigned size,
+ int icoord0[4], int icoord1[4], float w[4])
{
+ uint ch;
switch (wrapMode) {
case PIPE_TEX_WRAP_CLAMP:
- /* Not exactly what the spec says, but it matches NVIDIA output */
- s = CLAMP(s - 0.5F, 0.0f, (float) size - 1.0f);
- *i0 = util_ifloor(s);
- *i1 = *i0 + 1;
- break;
+ for (ch = 0; ch < 4; ch++) {
+ /* Not exactly what the spec says, but it matches NVIDIA output */
+ float u = CLAMP(s[ch] - 0.5F, 0.0f, (float) size - 1.0f);
+ icoord0[ch] = util_ifloor(u);
+ icoord1[ch] = icoord0[ch] + 1;
+ w[ch] = FRAC(u);
+ }
+ return;
case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
/* fall-through */
case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
- s = CLAMP(s, 0.5F, (float) size - 0.5F);
- s -= 0.5F;
- *i0 = util_ifloor(s);
- *i1 = *i0 + 1;
- if (*i1 > (int) size - 1)
- *i1 = size - 1;
+ for (ch = 0; ch < 4; ch++) {
+ float u = CLAMP(s[ch], 0.5F, (float) size - 0.5F);
+ u -= 0.5F;
+ icoord0[ch] = util_ifloor(u);
+ icoord1[ch] = icoord0[ch] + 1;
+ if (icoord1[ch] > (int) size - 1)
+ icoord1[ch] = size - 1;
+ w[ch] = FRAC(u);
+ }
break;
default:
assert(0);
}
- *a = FRAC(s);
}
@@ -463,7 +520,8 @@ choose_cube_face(float rx, float ry, float rz, float *newS, float *newT)
* This is only done for fragment shaders, not vertex shaders.
*/
static float
-compute_lambda(struct tgsi_sampler *sampler,
+compute_lambda(const struct pipe_texture *tex,
+ const struct pipe_sampler_state *sampler,
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
const float p[QUAD_SIZE],
@@ -471,7 +529,7 @@ compute_lambda(struct tgsi_sampler *sampler,
{
float rho, lambda;
- assert(sampler->state->normalized_coords);
+ assert(sampler->normalized_coords);
assert(s);
{
@@ -479,7 +537,7 @@ compute_lambda(struct tgsi_sampler *sampler,
float dsdy = s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT];
dsdx = fabsf(dsdx);
dsdy = fabsf(dsdy);
- rho = MAX2(dsdx, dsdy) * sampler->texture->width[0];
+ rho = MAX2(dsdx, dsdy) * tex->width[0];
}
if (t) {
float dtdx = t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT];
@@ -487,7 +545,7 @@ compute_lambda(struct tgsi_sampler *sampler,
float max;
dtdx = fabsf(dtdx);
dtdy = fabsf(dtdy);
- max = MAX2(dtdx, dtdy) * sampler->texture->height[0];
+ max = MAX2(dtdx, dtdy) * tex->height[0];
rho = MAX2(rho, max);
}
if (p) {
@@ -496,13 +554,13 @@ compute_lambda(struct tgsi_sampler *sampler,
float max;
dpdx = fabsf(dpdx);
dpdy = fabsf(dpdy);
- max = MAX2(dpdx, dpdy) * sampler->texture->depth[0];
+ max = MAX2(dpdx, dpdy) * tex->depth[0];
rho = MAX2(rho, max);
}
lambda = util_fast_log2(rho);
- lambda += lodbias + sampler->state->lod_bias;
- lambda = CLAMP(lambda, sampler->state->min_lod, sampler->state->max_lod);
+ lambda += lodbias + sampler->lod_bias;
+ lambda = CLAMP(lambda, sampler->min_lod, sampler->max_lod);
return lambda;
}
@@ -514,9 +572,14 @@ compute_lambda(struct tgsi_sampler *sampler,
* 2. Determine if we're minifying or magnifying
* 3. If minifying, choose mipmap levels
* 4. Return image filter to use within mipmap images
+ * \param level0 Returns first mipmap level to sample from
+ * \param level1 Returns second mipmap level to sample from
+ * \param levelBlend Returns blend factor between levels, in [0,1]
+ * \param imgFilter Returns either the min or mag filter, depending on lambda
*/
static void
-choose_mipmap_levels(struct tgsi_sampler *sampler,
+choose_mipmap_levels(const struct pipe_texture *texture,
+ const struct pipe_sampler_state *sampler,
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
const float p[QUAD_SIZE],
@@ -524,25 +587,25 @@ choose_mipmap_levels(struct tgsi_sampler *sampler,
unsigned *level0, unsigned *level1, float *levelBlend,
unsigned *imgFilter)
{
- if (sampler->state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) {
+ if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) {
/* no mipmap selection needed */
- *level0 = *level1 = CLAMP((int) sampler->state->min_lod,
- 0, (int) sampler->texture->last_level);
+ *level0 = *level1 = CLAMP((int) sampler->min_lod,
+ 0, (int) texture->last_level);
- if (sampler->state->min_img_filter != sampler->state->mag_img_filter) {
+ if (sampler->min_img_filter != sampler->mag_img_filter) {
/* non-mipmapped texture, but still need to determine if doing
* minification or magnification.
*/
- float lambda = compute_lambda(sampler, s, t, p, lodbias);
+ float lambda = compute_lambda(texture, sampler, s, t, p, lodbias);
if (lambda <= 0.0) {
- *imgFilter = sampler->state->mag_img_filter;
+ *imgFilter = sampler->mag_img_filter;
}
else {
- *imgFilter = sampler->state->min_img_filter;
+ *imgFilter = sampler->min_img_filter;
}
}
else {
- *imgFilter = sampler->state->mag_img_filter;
+ *imgFilter = sampler->mag_img_filter;
}
}
else {
@@ -550,32 +613,32 @@ choose_mipmap_levels(struct tgsi_sampler *sampler,
if (1)
/* fragment shader */
- lambda = compute_lambda(sampler, s, t, p, lodbias);
+ lambda = compute_lambda(texture, sampler, s, t, p, lodbias);
else
/* vertex shader */
lambda = lodbias; /* not really a bias, but absolute LOD */
if (lambda <= 0.0) { /* XXX threshold depends on the filter */
/* magnifying */
- *imgFilter = sampler->state->mag_img_filter;
+ *imgFilter = sampler->mag_img_filter;
*level0 = *level1 = 0;
}
else {
/* minifying */
- *imgFilter = sampler->state->min_img_filter;
+ *imgFilter = sampler->min_img_filter;
/* choose mipmap level(s) and compute the blend factor between them */
- if (sampler->state->min_mip_filter == PIPE_TEX_MIPFILTER_NEAREST) {
+ if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NEAREST) {
/* Nearest mipmap level */
const int lvl = (int) (lambda + 0.5);
*level0 =
- *level1 = CLAMP(lvl, 0, (int) sampler->texture->last_level);
+ *level1 = CLAMP(lvl, 0, (int) texture->last_level);
}
else {
/* Linear interpolation between mipmap levels */
const int lvl = (int) lambda;
- *level0 = CLAMP(lvl, 0, (int) sampler->texture->last_level);
- *level1 = CLAMP(lvl + 1, 0, (int) sampler->texture->last_level);
+ *level0 = CLAMP(lvl, 0, (int) texture->last_level);
+ *level1 = CLAMP(lvl + 1, 0, (int) texture->last_level);
*levelBlend = FRAC(lambda); /* blending weight between levels */
}
}
@@ -598,23 +661,29 @@ choose_mipmap_levels(struct tgsi_sampler *sampler,
* sp_get_cached_tile_tex() function. Also, get 4 texels instead of 1...
*/
static void
-get_texel(struct tgsi_sampler *sampler,
+get_texel(const struct tgsi_sampler *tgsi_sampler,
unsigned face, unsigned level, int x, int y, int z,
float rgba[NUM_CHANNELS][QUAD_SIZE], unsigned j)
{
- if (x < 0 || x >= (int) sampler->texture->width[level] ||
- y < 0 || y >= (int) sampler->texture->height[level] ||
- z < 0 || z >= (int) sampler->texture->depth[level]) {
- rgba[0][j] = sampler->state->border_color[0];
- rgba[1][j] = sampler->state->border_color[1];
- rgba[2][j] = sampler->state->border_color[2];
- rgba[3][j] = sampler->state->border_color[3];
+ const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler);
+ struct softpipe_context *sp = samp->sp;
+ const uint unit = samp->unit;
+ const struct pipe_texture *texture = sp->texture[unit];
+ const struct pipe_sampler_state *sampler = sp->sampler[unit];
+
+ if (x < 0 || x >= (int) texture->width[level] ||
+ y < 0 || y >= (int) texture->height[level] ||
+ z < 0 || z >= (int) texture->depth[level]) {
+ rgba[0][j] = sampler->border_color[0];
+ rgba[1][j] = sampler->border_color[1];
+ rgba[2][j] = sampler->border_color[2];
+ rgba[3][j] = sampler->border_color[3];
}
else {
const int tx = x % TILE_SIZE;
const int ty = y % TILE_SIZE;
const struct softpipe_cached_tile *tile
- = sp_get_cached_tile_tex(sampler->pipe, sampler->cache,
+ = sp_get_cached_tile_tex(sp, samp->cache,
x, y, z, face, level);
rgba[0][j] = tile->data.color[ty][tx][0];
rgba[1][j] = tile->data.color[ty][tx][1];
@@ -624,7 +693,7 @@ get_texel(struct tgsi_sampler *sampler,
{
debug_printf("Get texel %f %f %f %f from %s\n",
rgba[0][j], rgba[1][j], rgba[2][j], rgba[3][j],
- pf_name(sampler->texture->format));
+ pf_name(texture->format));
}
}
}
@@ -682,7 +751,7 @@ shadow_compare(uint compare_func,
* Could probably extend for 3D...
*/
static void
-sp_get_samples_2d_common(struct tgsi_sampler *sampler,
+sp_get_samples_2d_common(const struct tgsi_sampler *tgsi_sampler,
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
const float p[QUAD_SIZE],
@@ -690,95 +759,115 @@ sp_get_samples_2d_common(struct tgsi_sampler *sampler,
float rgba[NUM_CHANNELS][QUAD_SIZE],
const unsigned faces[4])
{
- const uint compare_func = sampler->state->compare_func;
+ const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler);
+ const struct softpipe_context *sp = samp->sp;
+ const uint unit = samp->unit;
+ const struct pipe_texture *texture = sp->texture[unit];
+ const struct pipe_sampler_state *sampler = sp->sampler[unit];
+ const uint compare_func = sampler->compare_func;
unsigned level0, level1, j, imgFilter;
int width, height;
float levelBlend;
- choose_mipmap_levels(sampler, s, t, p, lodbias,
+ choose_mipmap_levels(texture, sampler, s, t, p, lodbias,
&level0, &level1, &levelBlend, &imgFilter);
- assert(sampler->state->normalized_coords);
+ assert(sampler->normalized_coords);
- width = sampler->texture->width[level0];
- height = sampler->texture->height[level0];
+ width = texture->width[level0];
+ height = texture->height[level0];
assert(width > 0);
switch (imgFilter) {
case PIPE_TEX_FILTER_NEAREST:
- for (j = 0; j < QUAD_SIZE; j++) {
- int x = nearest_texcoord(sampler->state->wrap_s, s[j], width);
- int y = nearest_texcoord(sampler->state->wrap_t, t[j], height);
- get_texel(sampler, faces[j], level0, x, y, 0, rgba, j);
- if (sampler->state->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
- shadow_compare(compare_func, rgba, p, j);
- }
-
- if (level0 != level1) {
- /* get texels from second mipmap level and blend */
- float rgba2[4][4];
- unsigned c;
- x = x / 2;
- y = y / 2;
- get_texel(sampler, faces[j], level1, x, y, 0, rgba2, j);
- if (sampler->state->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE){
- shadow_compare(compare_func, rgba2, p, j);
+ {
+ int x[4], y[4];
+ nearest_texcoord_4(sampler->wrap_s, s, width, x);
+ nearest_texcoord_4(sampler->wrap_t, t, height, y);
+
+ for (j = 0; j < QUAD_SIZE; j++) {
+ get_texel(tgsi_sampler, faces[j], level0, x[j], y[j], 0, rgba, j);
+ if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
+ shadow_compare(compare_func, rgba, p, j);
}
- for (c = 0; c < NUM_CHANNELS; c++) {
- rgba[c][j] = LERP(levelBlend, rgba[c][j], rgba2[c][j]);
+ if (level0 != level1) {
+ /* get texels from second mipmap level and blend */
+ float rgba2[4][4];
+ unsigned c;
+ x[j] /= 2;
+ y[j] /= 2;
+ get_texel(tgsi_sampler, faces[j], level1, x[j], y[j], 0,
+ rgba2, j);
+ if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE){
+ shadow_compare(compare_func, rgba2, p, j);
+ }
+
+ for (c = 0; c < NUM_CHANNELS; c++) {
+ rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]);
+ }
}
}
}
break;
case PIPE_TEX_FILTER_LINEAR:
case PIPE_TEX_FILTER_ANISO:
- for (j = 0; j < QUAD_SIZE; j++) {
- float tx[4][4], a, b;
- int x0, y0, x1, y1, c;
- linear_texcoord(sampler->state->wrap_s, s[j], width, &x0, &x1, &a);
- linear_texcoord(sampler->state->wrap_t, t[j], height, &y0, &y1, &b);
- get_texel(sampler, faces[j], level0, x0, y0, 0, tx, 0);
- get_texel(sampler, faces[j], level0, x1, y0, 0, tx, 1);
- get_texel(sampler, faces[j], level0, x0, y1, 0, tx, 2);
- get_texel(sampler, faces[j], level0, x1, y1, 0, tx, 3);
- if (sampler->state->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
- shadow_compare(compare_func, tx, p, 0);
- shadow_compare(compare_func, tx, p, 1);
- shadow_compare(compare_func, tx, p, 2);
- shadow_compare(compare_func, tx, p, 3);
- }
-
- for (c = 0; c < 4; c++) {
- rgba[c][j] = lerp_2d(a, b, tx[c][0], tx[c][1], tx[c][2], tx[c][3]);
- }
-
- if (level0 != level1) {
- /* get texels from second mipmap level and blend */
- float rgba2[4][4];
- x0 = x0 / 2;
- y0 = y0 / 2;
- x1 = x1 / 2;
- y1 = y1 / 2;
- get_texel(sampler, faces[j], level1, x0, y0, 0, tx, 0);
- get_texel(sampler, faces[j], level1, x1, y0, 0, tx, 1);
- get_texel(sampler, faces[j], level1, x0, y1, 0, tx, 2);
- get_texel(sampler, faces[j], level1, x1, y1, 0, tx, 3);
- if (sampler->state->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE){
+ {
+ int x0[4], y0[4], x1[4], y1[4];
+ float xw[4], yw[4]; /* weights */
+
+ linear_texcoord_4(sampler->wrap_s, s, width, x0, x1, xw);
+ linear_texcoord_4(sampler->wrap_t, t, height, y0, y1, yw);
+
+ for (j = 0; j < QUAD_SIZE; j++) {
+ float tx[4][4]; /* texels */
+ int c;
+ get_texel(tgsi_sampler, faces[j], level0, x0[j], y0[j], 0, tx, 0);
+ get_texel(tgsi_sampler, faces[j], level0, x1[j], y0[j], 0, tx, 1);
+ get_texel(tgsi_sampler, faces[j], level0, x0[j], y1[j], 0, tx, 2);
+ get_texel(tgsi_sampler, faces[j], level0, x1[j], y1[j], 0, tx, 3);
+ if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
shadow_compare(compare_func, tx, p, 0);
shadow_compare(compare_func, tx, p, 1);
shadow_compare(compare_func, tx, p, 2);
shadow_compare(compare_func, tx, p, 3);
}
+ /* interpolate R, G, B, A */
for (c = 0; c < 4; c++) {
- rgba2[c][j] = lerp_2d(a, b,
- tx[c][0], tx[c][1], tx[c][2], tx[c][3]);
+ rgba[c][j] = lerp_2d(xw[j], yw[j],
+ tx[c][0], tx[c][1],
+ tx[c][2], tx[c][3]);
}
- for (c = 0; c < NUM_CHANNELS; c++) {
- rgba[c][j] = LERP(levelBlend, rgba[c][j], rgba2[c][j]);
+ if (level0 != level1) {
+ /* get texels from second mipmap level and blend */
+ float rgba2[4][4];
+ x0[j] /= 2;
+ y0[j] /= 2;
+ x1[j] /= 2;
+ y1[j] /= 2;
+ get_texel(tgsi_sampler, faces[j], level1, x0[j], y0[j], 0, tx, 0);
+ get_texel(tgsi_sampler, faces[j], level1, x1[j], y0[j], 0, tx, 1);
+ get_texel(tgsi_sampler, faces[j], level1, x0[j], y1[j], 0, tx, 2);
+ get_texel(tgsi_sampler, faces[j], level1, x1[j], y1[j], 0, tx, 3);
+ if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE){
+ shadow_compare(compare_func, tx, p, 0);
+ shadow_compare(compare_func, tx, p, 1);
+ shadow_compare(compare_func, tx, p, 2);
+ shadow_compare(compare_func, tx, p, 3);
+ }
+
+ /* interpolate R, G, B, A */
+ for (c = 0; c < 4; c++) {
+ rgba2[c][j] = lerp_2d(xw[j], yw[j],
+ tx[c][0], tx[c][1], tx[c][2], tx[c][3]);
+ }
+
+ for (c = 0; c < NUM_CHANNELS; c++) {
+ rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]);
+ }
}
}
}
@@ -789,8 +878,8 @@ sp_get_samples_2d_common(struct tgsi_sampler *sampler,
}
-static void
-sp_get_samples_1d(struct tgsi_sampler *sampler,
+static INLINE void
+sp_get_samples_1d(const struct tgsi_sampler *sampler,
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
const float p[QUAD_SIZE],
@@ -803,8 +892,8 @@ sp_get_samples_1d(struct tgsi_sampler *sampler,
}
-static void
-sp_get_samples_2d(struct tgsi_sampler *sampler,
+static INLINE void
+sp_get_samples_2d(const struct tgsi_sampler *sampler,
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
const float p[QUAD_SIZE],
@@ -816,28 +905,33 @@ sp_get_samples_2d(struct tgsi_sampler *sampler,
}
-static void
-sp_get_samples_3d(struct tgsi_sampler *sampler,
+static INLINE void
+sp_get_samples_3d(const struct tgsi_sampler *tgsi_sampler,
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
const float p[QUAD_SIZE],
float lodbias,
float rgba[NUM_CHANNELS][QUAD_SIZE])
{
+ const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler);
+ const struct softpipe_context *sp = samp->sp;
+ const uint unit = samp->unit;
+ const struct pipe_texture *texture = sp->texture[unit];
+ const struct pipe_sampler_state *sampler = sp->sampler[unit];
/* get/map pipe_surfaces corresponding to 3D tex slices */
unsigned level0, level1, j, imgFilter;
int width, height, depth;
float levelBlend;
const uint face = 0;
- choose_mipmap_levels(sampler, s, t, p, lodbias,
+ choose_mipmap_levels(texture, sampler, s, t, p, lodbias,
&level0, &level1, &levelBlend, &imgFilter);
- assert(sampler->state->normalized_coords);
+ assert(sampler->normalized_coords);
- width = sampler->texture->width[level0];
- height = sampler->texture->height[level0];
- depth = sampler->texture->depth[level0];
+ width = texture->width[level0];
+ height = texture->height[level0];
+ depth = texture->depth[level0];
assert(width > 0);
assert(height > 0);
@@ -845,89 +939,89 @@ sp_get_samples_3d(struct tgsi_sampler *sampler,
switch (imgFilter) {
case PIPE_TEX_FILTER_NEAREST:
- for (j = 0; j < QUAD_SIZE; j++) {
- int x = nearest_texcoord(sampler->state->wrap_s, s[j], width);
- int y = nearest_texcoord(sampler->state->wrap_t, t[j], height);
- int z = nearest_texcoord(sampler->state->wrap_r, p[j], depth);
- get_texel(sampler, face, level0, x, y, z, rgba, j);
-
- if (level0 != level1) {
- /* get texels from second mipmap level and blend */
- float rgba2[4][4];
- unsigned c;
- x /= 2;
- y /= 2;
- z /= 2;
- get_texel(sampler, face, level1, x, y, z, rgba2, j);
- for (c = 0; c < NUM_CHANNELS; c++) {
- rgba[c][j] = LERP(levelBlend, rgba2[c][j], rgba[c][j]);
+ {
+ int x[4], y[4], z[4];
+ nearest_texcoord_4(sampler->wrap_s, s, width, x);
+ nearest_texcoord_4(sampler->wrap_t, t, height, y);
+ nearest_texcoord_4(sampler->wrap_r, p, depth, z);
+ for (j = 0; j < QUAD_SIZE; j++) {
+ get_texel(tgsi_sampler, face, level0, x[j], y[j], z[j], rgba, j);
+ if (level0 != level1) {
+ /* get texels from second mipmap level and blend */
+ float rgba2[4][4];
+ unsigned c;
+ x[j] /= 2;
+ y[j] /= 2;
+ z[j] /= 2;
+ get_texel(tgsi_sampler, face, level1, x[j], y[j], z[j], rgba2, j);
+ for (c = 0; c < NUM_CHANNELS; c++) {
+ rgba[c][j] = lerp(levelBlend, rgba2[c][j], rgba[c][j]);
+ }
}
}
}
break;
case PIPE_TEX_FILTER_LINEAR:
case PIPE_TEX_FILTER_ANISO:
- for (j = 0; j < QUAD_SIZE; j++) {
- float texel0[4][4], texel1[4][4];
- float xw, yw, zw; /* interpolation weights */
- int x0, x1, y0, y1, z0, z1, c;
- linear_texcoord(sampler->state->wrap_s, s[j], width, &x0, &x1, &xw);
- linear_texcoord(sampler->state->wrap_t, t[j], height, &y0, &y1, &yw);
- linear_texcoord(sampler->state->wrap_r, p[j], depth, &z0, &z1, &zw);
- get_texel(sampler, face, level0, x0, y0, z0, texel0, 0);
- get_texel(sampler, face, level0, x1, y0, z0, texel0, 1);
- get_texel(sampler, face, level0, x0, y1, z0, texel0, 2);
- get_texel(sampler, face, level0, x1, y1, z0, texel0, 3);
- get_texel(sampler, face, level0, x0, y0, z1, texel1, 0);
- get_texel(sampler, face, level0, x1, y0, z1, texel1, 1);
- get_texel(sampler, face, level0, x0, y1, z1, texel1, 2);
- get_texel(sampler, face, level0, x1, y1, z1, texel1, 3);
-
- /* 3D lerp */
- for (c = 0; c < 4; c++) {
- float ctemp0[4][4], ctemp1[4][4];
- ctemp0[c][j] = lerp_2d(xw, yw,
- texel0[c][0], texel0[c][1],
- texel0[c][2], texel0[c][3]);
- ctemp1[c][j] = lerp_2d(xw, yw,
- texel1[c][0], texel1[c][1],
- texel1[c][2], texel1[c][3]);
- rgba[c][j] = LERP(zw, ctemp0[c][j], ctemp1[c][j]);
- }
-
- if (level0 != level1) {
- /* get texels from second mipmap level and blend */
- float rgba2[4][4];
- x0 /= 2;
- y0 /= 2;
- z0 /= 2;
- x1 /= 2;
- y1 /= 2;
- z1 /= 2;
- get_texel(sampler, face, level1, x0, y0, z0, texel0, 0);
- get_texel(sampler, face, level1, x1, y0, z0, texel0, 1);
- get_texel(sampler, face, level1, x0, y1, z0, texel0, 2);
- get_texel(sampler, face, level1, x1, y1, z0, texel0, 3);
- get_texel(sampler, face, level1, x0, y0, z1, texel1, 0);
- get_texel(sampler, face, level1, x1, y0, z1, texel1, 1);
- get_texel(sampler, face, level1, x0, y1, z1, texel1, 2);
- get_texel(sampler, face, level1, x1, y1, z1, texel1, 3);
-
- /* 3D lerp */
+ {
+ int x0[4], x1[4], y0[4], y1[4], z0[4], z1[4];
+ float xw[4], yw[4], zw[4]; /* interpolation weights */
+ linear_texcoord_4(sampler->wrap_s, s, width, x0, x1, xw);
+ linear_texcoord_4(sampler->wrap_t, t, height, y0, y1, yw);
+ linear_texcoord_4(sampler->wrap_r, p, depth, z0, z1, zw);
+
+ for (j = 0; j < QUAD_SIZE; j++) {
+ int c;
+ float tx0[4][4], tx1[4][4];
+ get_texel(tgsi_sampler, face, level0, x0[j], y0[j], z0[j], tx0, 0);
+ get_texel(tgsi_sampler, face, level0, x1[j], y0[j], z0[j], tx0, 1);
+ get_texel(tgsi_sampler, face, level0, x0[j], y1[j], z0[j], tx0, 2);
+ get_texel(tgsi_sampler, face, level0, x1[j], y1[j], z0[j], tx0, 3);
+ get_texel(tgsi_sampler, face, level0, x0[j], y0[j], z1[j], tx1, 0);
+ get_texel(tgsi_sampler, face, level0, x1[j], y0[j], z1[j], tx1, 1);
+ get_texel(tgsi_sampler, face, level0, x0[j], y1[j], z1[j], tx1, 2);
+ get_texel(tgsi_sampler, face, level0, x1[j], y1[j], z1[j], tx1, 3);
+
+ /* interpolate R, G, B, A */
for (c = 0; c < 4; c++) {
- float ctemp0[4][4], ctemp1[4][4];
- ctemp0[c][j] = lerp_2d(xw, yw,
- texel0[c][0], texel0[c][1],
- texel0[c][2], texel0[c][3]);
- ctemp1[c][j] = lerp_2d(xw, yw,
- texel1[c][0], texel1[c][1],
- texel1[c][2], texel1[c][3]);
- rgba2[c][j] = LERP(zw, ctemp0[c][j], ctemp1[c][j]);
+ rgba[c][j] = lerp_3d(xw[j], yw[j], zw[j],
+ tx0[c][0], tx0[c][1],
+ tx0[c][2], tx0[c][3],
+ tx1[c][0], tx1[c][1],
+ tx1[c][2], tx1[c][3]);
}
- /* blend mipmap levels */
- for (c = 0; c < NUM_CHANNELS; c++) {
- rgba[c][j] = LERP(levelBlend, rgba[c][j], rgba2[c][j]);
+ if (level0 != level1) {
+ /* get texels from second mipmap level and blend */
+ float rgba2[4][4];
+ x0[j] /= 2;
+ y0[j] /= 2;
+ z0[j] /= 2;
+ x1[j] /= 2;
+ y1[j] /= 2;
+ z1[j] /= 2;
+ get_texel(tgsi_sampler, face, level1, x0[j], y0[j], z0[j], tx0, 0);
+ get_texel(tgsi_sampler, face, level1, x1[j], y0[j], z0[j], tx0, 1);
+ get_texel(tgsi_sampler, face, level1, x0[j], y1[j], z0[j], tx0, 2);
+ get_texel(tgsi_sampler, face, level1, x1[j], y1[j], z0[j], tx0, 3);
+ get_texel(tgsi_sampler, face, level1, x0[j], y0[j], z1[j], tx1, 0);
+ get_texel(tgsi_sampler, face, level1, x1[j], y0[j], z1[j], tx1, 1);
+ get_texel(tgsi_sampler, face, level1, x0[j], y1[j], z1[j], tx1, 2);
+ get_texel(tgsi_sampler, face, level1, x1[j], y1[j], z1[j], tx1, 3);
+
+ /* interpolate R, G, B, A */
+ for (c = 0; c < 4; c++) {
+ rgba2[c][j] = lerp_3d(xw[j], yw[j], zw[j],
+ tx0[c][0], tx0[c][1],
+ tx0[c][2], tx0[c][3],
+ tx1[c][0], tx1[c][1],
+ tx1[c][2], tx1[c][3]);
+ }
+
+ /* blend mipmap levels */
+ for (c = 0; c < NUM_CHANNELS; c++) {
+ rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]);
+ }
}
}
}
@@ -939,7 +1033,7 @@ sp_get_samples_3d(struct tgsi_sampler *sampler,
static void
-sp_get_samples_cube(struct tgsi_sampler *sampler,
+sp_get_samples_cube(const struct tgsi_sampler *sampler,
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
const float p[QUAD_SIZE],
@@ -956,62 +1050,73 @@ sp_get_samples_cube(struct tgsi_sampler *sampler,
static void
-sp_get_samples_rect(struct tgsi_sampler *sampler,
+sp_get_samples_rect(const struct tgsi_sampler *tgsi_sampler,
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
const float p[QUAD_SIZE],
float lodbias,
float rgba[NUM_CHANNELS][QUAD_SIZE])
{
- //sp_get_samples_2d_common(sampler, s, t, p, lodbias, rgba, faces);
- static const uint face = 0;
- const uint compare_func = sampler->state->compare_func;
+ const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler);
+ const struct softpipe_context *sp = samp->sp;
+ const uint unit = samp->unit;
+ const struct pipe_texture *texture = sp->texture[unit];
+ const struct pipe_sampler_state *sampler = sp->sampler[unit];
+ const uint face = 0;
+ const uint compare_func = sampler->compare_func;
unsigned level0, level1, j, imgFilter;
int width, height;
float levelBlend;
- choose_mipmap_levels(sampler, s, t, p, lodbias,
+ choose_mipmap_levels(texture, sampler, s, t, p, lodbias,
&level0, &level1, &levelBlend, &imgFilter);
/* texture RECTS cannot be mipmapped */
assert(level0 == level1);
- width = sampler->texture->width[level0];
- height = sampler->texture->height[level0];
+ width = texture->width[level0];
+ height = texture->height[level0];
assert(width > 0);
switch (imgFilter) {
case PIPE_TEX_FILTER_NEAREST:
- for (j = 0; j < QUAD_SIZE; j++) {
- int x = nearest_texcoord_unnorm(sampler->state->wrap_s, s[j], width);
- int y = nearest_texcoord_unnorm(sampler->state->wrap_t, t[j], height);
- get_texel(sampler, face, level0, x, y, 0, rgba, j);
- if (sampler->state->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
- shadow_compare(compare_func, rgba, p, j);
+ {
+ int x[4], y[4];
+ nearest_texcoord_unnorm_4(sampler->wrap_s, s, width, x);
+ nearest_texcoord_unnorm_4(sampler->wrap_t, t, height, y);
+ for (j = 0; j < QUAD_SIZE; j++) {
+ get_texel(tgsi_sampler, face, level0, x[j], y[j], 0, rgba, j);
+ if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
+ shadow_compare(compare_func, rgba, p, j);
+ }
}
}
break;
case PIPE_TEX_FILTER_LINEAR:
case PIPE_TEX_FILTER_ANISO:
- for (j = 0; j < QUAD_SIZE; j++) {
- float tx[4][4], a, b;
- int x0, y0, x1, y1, c;
- linear_texcoord_unnorm(sampler->state->wrap_s, s[j], width, &x0, &x1, &a);
- linear_texcoord_unnorm(sampler->state->wrap_t, t[j], height, &y0, &y1, &b);
- get_texel(sampler, face, level0, x0, y0, 0, tx, 0);
- get_texel(sampler, face, level0, x1, y0, 0, tx, 1);
- get_texel(sampler, face, level0, x0, y1, 0, tx, 2);
- get_texel(sampler, face, level0, x1, y1, 0, tx, 3);
- if (sampler->state->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
- shadow_compare(compare_func, tx, p, 0);
- shadow_compare(compare_func, tx, p, 1);
- shadow_compare(compare_func, tx, p, 2);
- shadow_compare(compare_func, tx, p, 3);
- }
-
- for (c = 0; c < 4; c++) {
- rgba[c][j] = lerp_2d(a, b, tx[c][0], tx[c][1], tx[c][2], tx[c][3]);
+ {
+ int x0[4], y0[4], x1[4], y1[4];
+ float xw[4], yw[4]; /* weights */
+ linear_texcoord_unnorm_4(sampler->wrap_s, s, width, x0, x1, xw);
+ linear_texcoord_unnorm_4(sampler->wrap_t, t, height, y0, y1, yw);
+ for (j = 0; j < QUAD_SIZE; j++) {
+ float tx[4][4]; /* texels */
+ int c;
+ get_texel(tgsi_sampler, face, level0, x0[j], y0[j], 0, tx, 0);
+ get_texel(tgsi_sampler, face, level0, x1[j], y0[j], 0, tx, 1);
+ get_texel(tgsi_sampler, face, level0, x0[j], y1[j], 0, tx, 2);
+ get_texel(tgsi_sampler, face, level0, x1[j], y1[j], 0, tx, 3);
+ if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
+ shadow_compare(compare_func, tx, p, 0);
+ shadow_compare(compare_func, tx, p, 1);
+ shadow_compare(compare_func, tx, p, 2);
+ shadow_compare(compare_func, tx, p, 3);
+ }
+ for (c = 0; c < 4; c++) {
+ rgba[c][j] = lerp_2d(xw[j], yw[j],
+ tx[c][0], tx[c][1], tx[c][2], tx[c][3]);
+ }
}
}
break;
@@ -1021,49 +1126,45 @@ sp_get_samples_rect(struct tgsi_sampler *sampler,
}
-
-
/**
* Called via tgsi_sampler::get_samples()
- * Use the sampler's state setting to get a filtered RGBA value
- * from the sampler's texture.
- *
- * XXX we can implement many versions of this function, each
- * tightly coded for a specific combination of sampler state
- * (nearest + repeat), (bilinear mipmap + clamp), etc.
- *
- * The update_samplers() function in st_atom_sampler.c could create
- * a new tgsi_sampler object for each state combo it finds....
+ * Get four filtered RGBA values from the sampler's texture.
*/
void
-sp_get_samples(struct tgsi_sampler *sampler,
+sp_get_samples(struct tgsi_sampler *tgsi_sampler,
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
const float p[QUAD_SIZE],
float lodbias,
float rgba[NUM_CHANNELS][QUAD_SIZE])
{
- if (!sampler->texture)
+ const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler);
+ const struct softpipe_context *sp = samp->sp;
+ const uint unit = samp->unit;
+ const struct pipe_texture *texture = sp->texture[unit];
+ const struct pipe_sampler_state *sampler = sp->sampler[unit];
+
+ if (!texture)
return;
- switch (sampler->texture->target) {
+ switch (texture->target) {
case PIPE_TEXTURE_1D:
- assert(sampler->state->normalized_coords);
- sp_get_samples_1d(sampler, s, t, p, lodbias, rgba);
+ assert(sampler->normalized_coords);
+ sp_get_samples_1d(tgsi_sampler, s, t, p, lodbias, rgba);
break;
case PIPE_TEXTURE_2D:
- if (sampler->state->normalized_coords)
- sp_get_samples_2d(sampler, s, t, p, lodbias, rgba);
+ if (sampler->normalized_coords)
+ sp_get_samples_2d(tgsi_sampler, s, t, p, lodbias, rgba);
else
- sp_get_samples_rect(sampler, s, t, p, lodbias, rgba);
+ sp_get_samples_rect(tgsi_sampler, s, t, p, lodbias, rgba);
break;
case PIPE_TEXTURE_3D:
- assert(sampler->state->normalized_coords);
- sp_get_samples_3d(sampler, s, t, p, lodbias, rgba);
+ assert(sampler->normalized_coords);
+ sp_get_samples_3d(tgsi_sampler, s, t, p, lodbias, rgba);
break;
case PIPE_TEXTURE_CUBE:
- assert(sampler->state->normalized_coords);
- sp_get_samples_cube(sampler, s, t, p, lodbias, rgba);
+ assert(sampler->normalized_coords);
+ sp_get_samples_cube(tgsi_sampler, s, t, p, lodbias, rgba);
break;
default:
assert(0);
diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.h b/src/gallium/drivers/softpipe/sp_tex_sample.h
index 404bfd0c365..f0a8a787782 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.h
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.h
@@ -1,8 +1,56 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
#ifndef SP_TEX_SAMPLE_H
#define SP_TEX_SAMPLE_H
-struct tgsi_sampler;
+#include "tgsi/tgsi_exec.h"
+
+
+/**
+ * Subclass of tgsi_sampler
+ */
+struct sp_shader_sampler
+{
+ struct tgsi_sampler base; /**< base class */
+
+ uint unit;
+ struct softpipe_context *sp;
+ struct softpipe_tile_cache *cache;
+};
+
+
+
+static INLINE const struct sp_shader_sampler *
+sp_shader_sampler(const struct tgsi_sampler *sampler)
+{
+ return (const struct sp_shader_sampler *) sampler;
+}
extern void
diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c
index b50c9845133..78b0efa46d2 100644
--- a/src/gallium/drivers/softpipe/sp_tile_cache.c
+++ b/src/gallium/drivers/softpipe/sp_tile_cache.c
@@ -494,11 +494,11 @@ tex_cache_pos(int x, int y, int z, int face, int level)
* Tiles are read-only and indexed with more params.
*/
const struct softpipe_cached_tile *
-sp_get_cached_tile_tex(struct pipe_context *pipe,
+sp_get_cached_tile_tex(struct softpipe_context *sp,
struct softpipe_tile_cache *tc, int x, int y, int z,
int face, int level)
{
- struct pipe_screen *screen = pipe->screen;
+ struct pipe_screen *screen = sp->pipe.screen;
/* tile pos in framebuffer: */
const int tile_x = x & ~(TILE_SIZE - 1);
const int tile_y = y & ~(TILE_SIZE - 1);
diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.h b/src/gallium/drivers/softpipe/sp_tile_cache.h
index bc96c941f61..a66bb50bcc1 100644
--- a/src/gallium/drivers/softpipe/sp_tile_cache.h
+++ b/src/gallium/drivers/softpipe/sp_tile_cache.h
@@ -96,7 +96,7 @@ sp_get_cached_tile(struct softpipe_context *softpipe,
struct softpipe_tile_cache *tc, int x, int y);
extern const struct softpipe_cached_tile *
-sp_get_cached_tile_tex(struct pipe_context *pipe,
+sp_get_cached_tile_tex(struct softpipe_context *softpipe,
struct softpipe_tile_cache *tc, int x, int y, int z,
int face, int level);