summaryrefslogtreecommitdiff
path: root/src/gallium/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r--src/gallium/drivers/cell/ppu/cell_gen_fp.c8
-rw-r--r--src/gallium/drivers/cell/ppu/cell_texture.c13
-rw-r--r--src/gallium/drivers/cell/spu/spu_exec.c46
-rw-r--r--src/gallium/drivers/i915simple/i915_batch.h4
-rw-r--r--src/gallium/drivers/i915simple/i915_context.c16
-rw-r--r--src/gallium/drivers/i915simple/i915_fpc_translate.c3
-rw-r--r--src/gallium/drivers/i915simple/i915_screen.c2
-rw-r--r--src/gallium/drivers/i915simple/i915_texture.c30
-rw-r--r--src/gallium/drivers/i915simple/i915_winsys.h1
-rw-r--r--src/gallium/drivers/i965simple/brw_tex_layout.c5
-rw-r--r--src/gallium/drivers/i965simple/brw_vs_emit.c8
-rw-r--r--src/gallium/drivers/i965simple/brw_wm_glsl.c6
-rw-r--r--src/gallium/drivers/identity/id_context.c2
-rw-r--r--src/gallium/drivers/identity/id_drm.c4
-rw-r--r--src/gallium/drivers/identity/id_screen.c2
-rw-r--r--src/gallium/drivers/nouveau/nouveau_screen.c9
-rw-r--r--src/gallium/drivers/nv04/nv04_surface_2d.c102
-rw-r--r--src/gallium/drivers/nv20/nv20_vertprog.c8
-rw-r--r--src/gallium/drivers/nv30/nv30_fragprog.c8
-rw-r--r--src/gallium/drivers/nv30/nv30_screen.c2
-rw-r--r--src/gallium/drivers/nv30/nv30_vertprog.c8
-rw-r--r--src/gallium/drivers/nv40/nv40_fragprog.c8
-rw-r--r--src/gallium/drivers/nv40/nv40_vertprog.c8
-rw-r--r--src/gallium/drivers/nv50/nv50_context.c18
-rw-r--r--src/gallium/drivers/nv50/nv50_context.h1
-rw-r--r--src/gallium/drivers/nv50/nv50_miptree.c28
-rw-r--r--src/gallium/drivers/nv50/nv50_program.c46
-rw-r--r--src/gallium/drivers/nv50/nv50_query.c12
-rw-r--r--src/gallium/drivers/nv50/nv50_screen.c55
-rw-r--r--src/gallium/drivers/nv50/nv50_state.c19
-rw-r--r--src/gallium/drivers/nv50/nv50_state_validate.c91
-rw-r--r--src/gallium/drivers/nv50/nv50_surface.c12
-rw-r--r--src/gallium/drivers/nv50/nv50_tex.c31
-rw-r--r--src/gallium/drivers/nv50/nv50_texture.h71
-rw-r--r--src/gallium/drivers/nv50/nv50_transfer.c76
-rw-r--r--src/gallium/drivers/nv50/nv50_vbo.c87
-rw-r--r--src/gallium/drivers/r300/Makefile19
-rw-r--r--src/gallium/drivers/r300/r300_context.c2
-rw-r--r--src/gallium/drivers/r300/r300_context.h91
-rw-r--r--src/gallium/drivers/r300/r300_debug.c198
-rw-r--r--src/gallium/drivers/r300/r300_debug.h211
-rw-r--r--src/gallium/drivers/r300/r300_emit.c265
-rw-r--r--src/gallium/drivers/r300/r300_emit.h17
-rw-r--r--src/gallium/drivers/r300/r300_fs.c164
-rw-r--r--src/gallium/drivers/r300/r300_fs.h15
-rw-r--r--src/gallium/drivers/r300/r300_fs_inlines.h158
-rw-r--r--src/gallium/drivers/r300/r300_screen.c7
-rw-r--r--src/gallium/drivers/r300/r300_state.c29
-rw-r--r--src/gallium/drivers/r300/r300_state_derived.c3
-rw-r--r--src/gallium/drivers/r300/r300_state_invariant.c11
-rw-r--r--src/gallium/drivers/r300/r300_surface.c38
-rw-r--r--src/gallium/drivers/r300/r300_texture.c22
-rw-r--r--src/gallium/drivers/r300/r300_tgsi_to_rc.c337
-rw-r--r--src/gallium/drivers/r300/r300_tgsi_to_rc.h41
-rw-r--r--src/gallium/drivers/r300/r300_vs.c520
-rw-r--r--src/gallium/drivers/r300/r300_vs.h137
-rw-r--r--src/gallium/drivers/r300/r3xx_fs.c100
-rw-r--r--src/gallium/drivers/r300/r3xx_fs.h50
-rw-r--r--src/gallium/drivers/r300/r5xx_fs.c540
-rw-r--r--src/gallium/drivers/r300/r5xx_fs.h106
-rw-r--r--src/gallium/drivers/softpipe/sp_texture.c21
-rw-r--r--src/gallium/drivers/trace/tr_drm.c6
-rw-r--r--src/gallium/drivers/trace/tr_screen.c3
63 files changed, 1639 insertions, 2322 deletions
diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c
index 5a889a6119d..58a8b5d0b0f 100644
--- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c
+++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c
@@ -1834,9 +1834,9 @@ emit_instruction(struct codegen *gen,
case TGSI_OPCODE_ENDIF:
return emit_ENDIF(gen, inst);
- case TGSI_OPCODE_BGNLOOP2:
+ case TGSI_OPCODE_BGNLOOP:
return emit_BGNLOOP(gen, inst);
- case TGSI_OPCODE_ENDLOOP2:
+ case TGSI_OPCODE_ENDLOOP:
return emit_ENDLOOP(gen, inst);
case TGSI_OPCODE_BRK:
return emit_BRK(gen, inst);
@@ -1875,9 +1875,9 @@ emit_immediate(struct codegen *gen, const struct tgsi_full_immediate *immed)
assert(gen->num_imm < MAX_TEMPS);
for (ch = 0; ch < 4; ch++) {
- float val = immed->u.ImmediateFloat32[ch].Float;
+ float val = immed->u[ch].Float;
- if (ch > 0 && val == immed->u.ImmediateFloat32[ch - 1].Float) {
+ if (ch > 0 && val == immed->u[ch - 1].Float) {
/* re-use previous register */
gen->imm_regs[gen->num_imm][ch] = gen->imm_regs[gen->num_imm][ch - 1];
}
diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c
index e26594448f0..6a63a0e6ced 100644
--- a/src/gallium/drivers/cell/ppu/cell_texture.c
+++ b/src/gallium/drivers/cell/ppu/cell_texture.c
@@ -44,13 +44,6 @@
-static unsigned
-minify(unsigned d)
-{
- return MAX2(1, d>>1);
-}
-
-
static void
cell_texture_layout(struct cell_texture *ct)
{
@@ -424,7 +417,8 @@ cell_transfer_map(struct pipe_screen *screen, struct pipe_transfer *transfer)
if (!ctrans->map)
return NULL; /* out of memory */
- if (transfer->usage & PIPE_TRANSFER_READ) {
+ if (transfer->usage == PIPE_TRANSFER_READ ||
+ transfer->usage == PIPE_TRANSFER_READ_WRITE) {
/* need to untwiddle the texture to make a linear version */
const uint bpp = pf_get_size(ct->base.format);
if (bpp == 4) {
@@ -465,7 +459,8 @@ cell_transfer_unmap(struct pipe_screen *screen,
PIPE_BUFFER_USAGE_CPU_READ);
}
- if (transfer->usage & PIPE_TRANSFER_WRITE) {
+ if (transfer->usage == PIPE_TRANSFER_WRITE ||
+ transfer->usage == PIPE_TRANSFER_READ_WRITE) {
/* The user wrote new texture data into the mapped buffer.
* We need to convert the new linear data into the twiddled/tiled format.
*/
diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c
index e27df2dfb38..6db9501128c 100644
--- a/src/gallium/drivers/cell/spu/spu_exec.c
+++ b/src/gallium/drivers/cell/spu/spu_exec.c
@@ -952,7 +952,6 @@ exec_instruction(
break;
case TGSI_OPCODE_RCP:
- /* TGSI_OPCODE_RECIP */
FETCH( &r[0], 0, CHAN_X );
r[0].q = micro_div(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, r[0].q);
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
@@ -961,7 +960,6 @@ exec_instruction(
break;
case TGSI_OPCODE_RSQ:
- /* TGSI_OPCODE_RECIPSQRT */
FETCH( &r[0], 0, CHAN_X );
r[0].q = micro_sqrt(r[0].q);
r[0].q = micro_div(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, r[0].q);
@@ -1115,7 +1113,6 @@ exec_instruction(
break;
case TGSI_OPCODE_MAD:
- /* TGSI_OPCODE_MADD */
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
FETCH( &r[1], 1, chan_index );
@@ -1136,8 +1133,7 @@ exec_instruction(
}
break;
- case TGSI_OPCODE_LERP:
- /* TGSI_OPCODE_LRP */
+ case TGSI_OPCODE_LRP:
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH(&r[0], 0, chan_index);
FETCH(&r[1], 1, chan_index);
@@ -1158,21 +1154,11 @@ exec_instruction(
ASSERT (0);
break;
- case TGSI_OPCODE_DOT2ADD:
- /* TGSI_OPCODE_DP2A */
+ case TGSI_OPCODE_DP2A:
ASSERT (0);
break;
- case TGSI_OPCODE_INDEX:
- ASSERT (0);
- break;
-
- case TGSI_OPCODE_NEGATE:
- ASSERT (0);
- break;
-
- case TGSI_OPCODE_FRAC:
- /* TGSI_OPCODE_FRC */
+ case TGSI_OPCODE_FRC:
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
r[0].q = micro_frc(r[0].q);
@@ -1184,8 +1170,7 @@ exec_instruction(
ASSERT (0);
break;
- case TGSI_OPCODE_FLOOR:
- /* TGSI_OPCODE_FLR */
+ case TGSI_OPCODE_FLR:
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
r[0].q = micro_flr(r[0].q);
@@ -1201,8 +1186,7 @@ exec_instruction(
}
break;
- case TGSI_OPCODE_EXPBASE2:
- /* TGSI_OPCODE_EX2 */
+ case TGSI_OPCODE_EX2:
FETCH(&r[0], 0, CHAN_X);
r[0].q = micro_pow(mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].q, r[0].q);
@@ -1212,8 +1196,7 @@ exec_instruction(
}
break;
- case TGSI_OPCODE_LOGBASE2:
- /* TGSI_OPCODE_LG2 */
+ case TGSI_OPCODE_LG2:
FETCH( &r[0], 0, CHAN_X );
r[0].q = micro_lg2(r[0].q);
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
@@ -1221,8 +1204,7 @@ exec_instruction(
}
break;
- case TGSI_OPCODE_POWER:
- /* TGSI_OPCODE_POW */
+ case TGSI_OPCODE_POW:
FETCH(&r[0], 0, CHAN_X);
FETCH(&r[1], 1, CHAN_X);
@@ -1233,7 +1215,7 @@ exec_instruction(
}
break;
- case TGSI_OPCODE_CROSSPRODUCT:
+ case TGSI_OPCODE_XPD:
/* TGSI_OPCODE_XPD */
FETCH(&r[0], 0, CHAN_Y);
FETCH(&r[1], 1, CHAN_Z);
@@ -1275,10 +1257,6 @@ exec_instruction(
}
break;
- case TGSI_OPCODE_MULTIPLYMATRIX:
- ASSERT (0);
- break;
-
case TGSI_OPCODE_ABS:
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH(&r[0], 0, chan_index);
@@ -1780,9 +1758,9 @@ exec_instruction(
mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0;
break;
- case TGSI_OPCODE_LOOP:
+ case TGSI_OPCODE_BGNFOR:
/* fall-through (for now) */
- case TGSI_OPCODE_BGNLOOP2:
+ case TGSI_OPCODE_BGNLOOP:
/* push LoopMask and ContMasks */
ASSERT(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
@@ -1790,9 +1768,9 @@ exec_instruction(
mach->ContStack[mach->ContStackTop++] = mach->ContMask;
break;
- case TGSI_OPCODE_ENDLOOP:
+ case TGSI_OPCODE_ENDFOR:
/* fall-through (for now at least) */
- case TGSI_OPCODE_ENDLOOP2:
+ case TGSI_OPCODE_ENDLOOP:
/* Restore ContMask, but don't pop */
ASSERT(mach->ContStackTop > 0);
mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
diff --git a/src/gallium/drivers/i915simple/i915_batch.h b/src/gallium/drivers/i915simple/i915_batch.h
index a433cf054de..c6e68ea38a2 100644
--- a/src/gallium/drivers/i915simple/i915_batch.h
+++ b/src/gallium/drivers/i915simple/i915_batch.h
@@ -50,8 +50,8 @@ i915_batchbuffer_check( struct i915_batchbuffer *batch,
size_t dwords,
size_t relocs )
{
- /** TODO JB: Check relocs */
- return dwords * 4 <= batch->size - (batch->ptr - batch->map);
+ return dwords * 4 <= batch->size - (batch->ptr - batch->map) &&
+ relocs <= (batch->max_relocs - batch->relocs);
}
static INLINE size_t
diff --git a/src/gallium/drivers/i915simple/i915_context.c b/src/gallium/drivers/i915simple/i915_context.c
index ccf9bb31fb0..bf69c8e9f53 100644
--- a/src/gallium/drivers/i915simple/i915_context.c
+++ b/src/gallium/drivers/i915simple/i915_context.c
@@ -142,10 +142,14 @@ i915_is_texture_referenced( struct pipe_context *pipe,
unsigned face, unsigned level)
{
/**
- * FIXME: Optimize.
+ * FIXME: Return the corrent result. We can't alays return referenced
+ * since it causes a double flush within the vbo module.
*/
-
+#if 0
return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
+#else
+ return 0;
+#endif
}
static unsigned int
@@ -153,10 +157,14 @@ i915_is_buffer_referenced( struct pipe_context *pipe,
struct pipe_buffer *buf)
{
/**
- * FIXME: Optimize.
+ * FIXME: Return the corrent result. We can't alays return referenced
+ * since it causes a double flush within the vbo module.
*/
-
+#if 0
return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
+#else
+ return 0;
+#endif
}
diff --git a/src/gallium/drivers/i915simple/i915_fpc_translate.c b/src/gallium/drivers/i915simple/i915_fpc_translate.c
index 961c1bf2134..89504ced276 100644
--- a/src/gallium/drivers/i915simple/i915_fpc_translate.c
+++ b/src/gallium/drivers/i915simple/i915_fpc_translate.c
@@ -975,8 +975,9 @@ i915_translate_instructions(struct i915_fp_compile *p,
= &parse.FullToken.FullImmediate;
const uint pos = p->num_immediates++;
uint j;
+ assert( imm->Immediate.NrTokens <= 4 + 1 );
for (j = 0; j < imm->Immediate.NrTokens - 1; j++) {
- p->immediates[pos][j] = imm->u.ImmediateFloat32[j].Float;
+ p->immediates[pos][j] = imm->u[j].Float;
}
}
break;
diff --git a/src/gallium/drivers/i915simple/i915_screen.c b/src/gallium/drivers/i915simple/i915_screen.c
index f4aa8e60d81..a3de38d5860 100644
--- a/src/gallium/drivers/i915simple/i915_screen.c
+++ b/src/gallium/drivers/i915simple/i915_screen.c
@@ -232,6 +232,8 @@ i915_get_tex_transfer(struct pipe_screen *screen,
if (trans) {
pipe_texture_reference(&trans->base.texture, texture);
trans->base.format = trans->base.format;
+ trans->base.x = x;
+ trans->base.y = y;
trans->base.width = w;
trans->base.height = h;
trans->base.block = texture->block;
diff --git a/src/gallium/drivers/i915simple/i915_texture.c b/src/gallium/drivers/i915simple/i915_texture.c
index ca8e87af8d1..03f0e14e7c8 100644
--- a/src/gallium/drivers/i915simple/i915_texture.c
+++ b/src/gallium/drivers/i915simple/i915_texture.c
@@ -72,11 +72,6 @@ static const int step_offsets[6][2] = {
{-1, 1}
};
-static unsigned minify( unsigned d )
-{
- return MAX2(1, d>>1);
-}
-
static unsigned
power_of_two(unsigned x)
{
@@ -160,10 +155,10 @@ i915_miptree_set_image_offset(struct i915_texture *tex,
/**
- * Special case to deal with display targets.
+ * Special case to deal with scanout textures.
*/
static boolean
-i915_displaytarget_layout(struct i915_texture *tex)
+i915_scanout_layout(struct i915_texture *tex)
{
struct pipe_texture *pt = &tex->base;
@@ -177,9 +172,13 @@ i915_displaytarget_layout(struct i915_texture *tex)
i915_miptree_set_image_offset( tex, 0, 0, 0, 0 );
if (tex->base.width[0] >= 128) {
+#if 0
tex->stride = power_of_two(tex->base.nblocksx[0] * pt->block.size);
+#else
+ tex->stride = 2048 * 4; /* TODO fix when backend is smarter */
+#endif
tex->total_nblocksy = round_up(tex->base.nblocksy[0], 8);
-#if 0 /* used for tiled display targets */
+#if 0 /* used for tiled textures */
tex->tiled = 1;
#endif
} else {
@@ -209,9 +208,9 @@ i945_miptree_layout_2d( struct i915_texture *tex )
unsigned nblocksx = pt->nblocksx[0];
unsigned nblocksy = pt->nblocksy[0];
- /* used for tiled display targets */
- if (0)
- if (i915_displaytarget_layout(tex))
+ /* used for scanouts that need special layouts */
+ if (tex->base.tex_usage & PIPE_TEXTURE_USAGE_PRIMARY)
+ if (i915_scanout_layout(tex))
return;
tex->stride = round_up(pt->nblocksx[0] * pt->block.size, 4);
@@ -584,6 +583,7 @@ i915_texture_create(struct pipe_screen *screen,
struct i915_screen *i915screen = i915_screen(screen);
struct i915_texture *tex = CALLOC_STRUCT(i915_texture);
size_t tex_size;
+ unsigned buf_usage = 0;
if (!tex)
return NULL;
@@ -605,9 +605,11 @@ i915_texture_create(struct pipe_screen *screen,
tex_size = tex->stride * tex->total_nblocksy;
- tex->buffer = screen->buffer_create(screen, 64,
- PIPE_BUFFER_USAGE_PIXEL,
- tex_size);
+ buf_usage = PIPE_BUFFER_USAGE_PIXEL;
+ if (templat->tex_usage & PIPE_TEXTURE_USAGE_PRIMARY)
+ buf_usage |= I915_BUFFER_USAGE_SCANOUT;
+
+ tex->buffer = screen->buffer_create(screen, 64, buf_usage, tex_size);
if (!tex->buffer)
goto fail;
diff --git a/src/gallium/drivers/i915simple/i915_winsys.h b/src/gallium/drivers/i915simple/i915_winsys.h
index ff5b34f193a..711db91c367 100644
--- a/src/gallium/drivers/i915simple/i915_winsys.h
+++ b/src/gallium/drivers/i915simple/i915_winsys.h
@@ -109,6 +109,7 @@ struct i915_winsys {
#define I915_BUFFER_ACCESS_READ 0x2
#define I915_BUFFER_USAGE_LIT_VERTEX (PIPE_BUFFER_USAGE_CUSTOM << 0)
+#define I915_BUFFER_USAGE_SCANOUT (PIPE_BUFFER_USAGE_CUSTOM << 1)
/**
diff --git a/src/gallium/drivers/i965simple/brw_tex_layout.c b/src/gallium/drivers/i965simple/brw_tex_layout.c
index 8aea8c05581..998ffaeac4a 100644
--- a/src/gallium/drivers/i965simple/brw_tex_layout.c
+++ b/src/gallium/drivers/i965simple/brw_tex_layout.c
@@ -65,11 +65,6 @@ unsigned intel_compressed_alignment(unsigned internalFormat)
}
#endif
-static unsigned minify( unsigned d )
-{
- return MAX2(1, d>>1);
-}
-
static void intel_miptree_set_image_offset(struct brw_texture *tex,
unsigned level,
diff --git a/src/gallium/drivers/i965simple/brw_vs_emit.c b/src/gallium/drivers/i965simple/brw_vs_emit.c
index e03d6534821..3ee82d95b3a 100644
--- a/src/gallium/drivers/i965simple/brw_vs_emit.c
+++ b/src/gallium/drivers/i965simple/brw_vs_emit.c
@@ -1294,10 +1294,10 @@ void brw_vs_emit(struct brw_vs_compile *c)
case TGSI_TOKEN_TYPE_IMMEDIATE: {
struct tgsi_full_immediate *imm = &parse.FullToken.FullImmediate;
assert(imm->Immediate.NrTokens == 4 + 1);
- c->prog_data.imm_buf[c->prog_data.num_imm][0] = imm->u.ImmediateFloat32[0].Float;
- c->prog_data.imm_buf[c->prog_data.num_imm][1] = imm->u.ImmediateFloat32[1].Float;
- c->prog_data.imm_buf[c->prog_data.num_imm][2] = imm->u.ImmediateFloat32[2].Float;
- c->prog_data.imm_buf[c->prog_data.num_imm][3] = imm->u.ImmediateFloat32[3].Float;
+ c->prog_data.imm_buf[c->prog_data.num_imm][0] = imm->u[0].Float;
+ c->prog_data.imm_buf[c->prog_data.num_imm][1] = imm->u[1].Float;
+ c->prog_data.imm_buf[c->prog_data.num_imm][2] = imm->u[2].Float;
+ c->prog_data.imm_buf[c->prog_data.num_imm][3] = imm->u[3].Float;
c->prog_data.num_imm++;
}
break;
diff --git a/src/gallium/drivers/i965simple/brw_wm_glsl.c b/src/gallium/drivers/i965simple/brw_wm_glsl.c
index ab6410aa607..db759639328 100644
--- a/src/gallium/drivers/i965simple/brw_wm_glsl.c
+++ b/src/gallium/drivers/i965simple/brw_wm_glsl.c
@@ -947,7 +947,7 @@ static void brw_wm_emit_instruction( struct brw_wm_compile *c,
#endif
break;
- case TGSI_OPCODE_LOOP:
+ case TGSI_OPCODE_BGNFOR:
c->loop_inst[c->loop_insn++] = brw_DO(p, BRW_EXECUTE_8);
break;
case TGSI_OPCODE_BRK:
@@ -958,11 +958,11 @@ static void brw_wm_emit_instruction( struct brw_wm_compile *c,
brw_CONT(p);
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
break;
- case TGSI_OPCODE_ENDLOOP:
+ case TGSI_OPCODE_ENDFOR:
c->loop_insn--;
c->inst0 = c->inst1 = brw_WHILE(p, c->loop_inst[c->loop_insn]);
/* patch all the BREAK instructions from
- last BEGINLOOP */
+ last BGNFOR */
while (c->inst0 > c->loop_inst[c->loop_insn]) {
c->inst0--;
if (c->inst0->header.opcode == BRW_OPCODE_BREAK) {
diff --git a/src/gallium/drivers/identity/id_context.c b/src/gallium/drivers/identity/id_context.c
index a500ec60454..4e700089e33 100644
--- a/src/gallium/drivers/identity/id_context.c
+++ b/src/gallium/drivers/identity/id_context.c
@@ -501,7 +501,7 @@ identity_set_sampler_textures(struct pipe_context *_pipe,
pipe->set_sampler_textures(pipe,
num_textures,
- _textures);
+ textures);
}
static void
diff --git a/src/gallium/drivers/identity/id_drm.c b/src/gallium/drivers/identity/id_drm.c
index 555220f8531..e5342ac06e3 100644
--- a/src/gallium/drivers/identity/id_drm.c
+++ b/src/gallium/drivers/identity/id_drm.c
@@ -60,7 +60,7 @@ identity_drm_create_screen(struct drm_api *_api, int fd,
screen = api->create_screen(api, fd, arg);
return identity_screen_create(screen);
-};
+}
static struct pipe_context *
identity_drm_create_context(struct drm_api *_api,
@@ -77,7 +77,7 @@ identity_drm_create_context(struct drm_api *_api,
pipe = identity_context_create(_screen, pipe);
return pipe;
-};
+}
static boolean
identity_drm_buffer_from_texture(struct drm_api *_api,
diff --git a/src/gallium/drivers/identity/id_screen.c b/src/gallium/drivers/identity/id_screen.c
index 259f1be36e7..26439637d08 100644
--- a/src/gallium/drivers/identity/id_screen.c
+++ b/src/gallium/drivers/identity/id_screen.c
@@ -289,6 +289,7 @@ identity_screen_surface_buffer_create(struct pipe_screen *_screen,
unsigned height,
enum pipe_format format,
unsigned usage,
+ unsigned tex_usage,
unsigned *stride)
{
struct identity_screen *id_screen = identity_screen(_screen);
@@ -300,6 +301,7 @@ identity_screen_surface_buffer_create(struct pipe_screen *_screen,
height,
format,
usage,
+ tex_usage,
stride);
if (result)
diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c b/src/gallium/drivers/nouveau/nouveau_screen.c
index 832366e6462..e4cf91c005c 100644
--- a/src/gallium/drivers/nouveau/nouveau_screen.c
+++ b/src/gallium/drivers/nouveau/nouveau_screen.c
@@ -4,6 +4,8 @@
#include <util/u_memory.h>
+#include <errno.h>
+
#include "nouveau/nouveau_bo.h"
#include "nouveau_winsys.h"
#include "nouveau_screen.h"
@@ -141,12 +143,13 @@ nouveau_screen_bo_map_range(struct pipe_screen *pscreen, struct pipe_buffer *pb,
unsigned offset, unsigned length, unsigned usage)
{
struct nouveau_bo *bo = nouveau_bo(pb);
+ uint32_t flags = nouveau_screen_map_flags(usage);
int ret;
- ret = nouveau_bo_map_range(bo, offset, length,
- nouveau_screen_map_flags(usage));
+ ret = nouveau_bo_map_range(bo, offset, length, flags);
if (ret) {
- debug_printf("map_range failed: %d\n", ret);
+ if (!(flags & NOUVEAU_BO_NOWAIT) || ret != -EBUSY)
+ debug_printf("map_range failed: %d\n", ret);
return NULL;
}
diff --git a/src/gallium/drivers/nv04/nv04_surface_2d.c b/src/gallium/drivers/nv04/nv04_surface_2d.c
index f315cf54f05..bbbcb54c467 100644
--- a/src/gallium/drivers/nv04/nv04_surface_2d.c
+++ b/src/gallium/drivers/nv04/nv04_surface_2d.c
@@ -110,10 +110,10 @@ nv04_surface_copy_swizzle(struct nv04_surface_2d *ctx,
unsigned cx;
unsigned cy;
- /* POT or GTFO */
- assert(!(w & (w - 1)) && !(h & (h - 1)));
+#if 0
/* That's the way she likes it */
assert(src_pitch == ((struct nv04_surface *)dst)->pitch);
+#endif
BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_DMA_IMAGE, 1);
OUT_RELOCo(chan, dst_bo,
@@ -133,7 +133,7 @@ nv04_surface_copy_swizzle(struct nv04_surface_2d *ctx,
for (cy = 0; cy < h; cy += sub_h) {
for (cx = 0; cx < w; cx += sub_w) {
BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_OFFSET, 1);
- OUT_RELOCl(chan, dst_bo, dst->offset + nv04_swizzle_bits(cx, cy) *
+ OUT_RELOCl(chan, dst_bo, dst->offset + nv04_swizzle_bits(cx+dx, cy+dy) *
dst->texture->block.size, NOUVEAU_BO_GART |
NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
@@ -153,8 +153,8 @@ nv04_surface_copy_swizzle(struct nv04_surface_2d *ctx,
OUT_RING (chan, src_pitch |
NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_ORIGIN_CENTER |
NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_FILTER_POINT_SAMPLE);
- OUT_RELOCl(chan, src_bo, src->offset + cy * src_pitch +
- cx * src->texture->block.size, NOUVEAU_BO_GART |
+ OUT_RELOCl(chan, src_bo, src->offset + (cy+sy) * src_pitch +
+ (cx+sx) * src->texture->block.size, NOUVEAU_BO_GART |
NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
OUT_RING (chan, 0);
}
@@ -210,6 +210,43 @@ nv04_surface_copy_m2mf(struct nv04_surface_2d *ctx,
}
static int
+nv04_surface_copy_m2mf_swizzle(struct nv04_surface_2d *ctx,
+ struct pipe_surface *dst, int dx, int dy,
+ struct pipe_surface *src, int sx, int sy)
+{
+ struct nouveau_channel *chan = ctx->m2mf->channel;
+ struct nouveau_grobj *m2mf = ctx->m2mf;
+ struct nouveau_bo *src_bo = nouveau_bo(ctx->buf(src));
+ struct nouveau_bo *dst_bo = nouveau_bo(ctx->buf(dst));
+ unsigned src_pitch = ((struct nv04_surface *)src)->pitch;
+ unsigned dst_pitch = ((struct nv04_surface *)dst)->pitch;
+ unsigned dst_offset = dst->offset + nv04_swizzle_bits(dx, dy) *
+ dst->texture->block.size;
+ unsigned src_offset = src->offset + sy * src_pitch +
+ sx * src->texture->block.size;
+
+ BEGIN_RING(chan, m2mf, NV04_MEMORY_TO_MEMORY_FORMAT_DMA_BUFFER_IN, 2);
+ OUT_RELOCo(chan, src_bo,
+ NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+ OUT_RELOCo(chan, dst_bo,
+ NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+
+ BEGIN_RING(chan, m2mf, NV04_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 8);
+ OUT_RELOCl(chan, src_bo, src_offset,
+ NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+ OUT_RELOCl(chan, dst_bo, dst_offset,
+ NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_WR);
+ OUT_RING (chan, src_pitch);
+ OUT_RING (chan, dst_pitch);
+ OUT_RING (chan, 1 * src->texture->block.size);
+ OUT_RING (chan, 1);
+ OUT_RING (chan, 0x0101);
+ OUT_RING (chan, 0);
+
+ return 0;
+}
+
+static int
nv04_surface_copy_blit(struct nv04_surface_2d *ctx, struct pipe_surface *dst,
int dx, int dy, struct pipe_surface *src, int sx, int sy,
int w, int h)
@@ -258,8 +295,59 @@ nv04_surface_copy(struct nv04_surface_2d *ctx, struct pipe_surface *dst,
assert(src->format == dst->format);
/* Setup transfer to swizzle the texture to vram if needed */
- if (src_linear && !dst_linear && w > 1 && h > 1) {
- nv04_surface_copy_swizzle(ctx, dst, dx, dy, src, sx, sy, w, h);
+ if (src_linear && !dst_linear) {
+ int x,y;
+
+ if ((w>1) && (h>1)) {
+ int potWidth = 1<<log2i(w);
+ int potHeight = 1<<log2i(h);
+ int remainWidth = w-potWidth;
+ int remainHeight = h-potHeight;
+ int squareDim = (potWidth>potHeight ? potHeight : potWidth);
+
+ /* top left is always POT, but we can only swizzle squares */
+ for (y=0; y<potHeight; y+=squareDim) {
+ for (x=0; x<potWidth; x+= squareDim) {
+ nv04_surface_copy_swizzle(ctx, dst, dx+x, dy+y,
+ src, sx+x, sy+y,
+ squareDim, squareDim);
+ }
+ }
+
+ /* top right */
+ if (remainWidth>0) {
+ nv04_surface_copy(ctx, dst, dx+potWidth, dy,
+ src, sx+potWidth, sy,
+ remainWidth, potHeight);
+ }
+
+ /* bottom left */
+ if (remainHeight>0) {
+ nv04_surface_copy(ctx, dst, dx, dy+potHeight,
+ src, sx, sy+potHeight,
+ potWidth, remainHeight);
+ }
+
+ /* bottom right */
+ if ((remainWidth>0) && (remainHeight>0)) {
+ nv04_surface_copy(ctx, dst, dx+potWidth, dy+potHeight,
+ src, sx+potWidth, sy+potHeight,
+ remainWidth, remainHeight);
+ }
+ } else if (w==1) {
+ /* We have a column to copy to a swizzled texture */
+ for (y=0; y<h; y++) {
+ nv04_surface_copy_m2mf_swizzle(ctx, dst, dx, dy+y,
+ src, sx, sy+y);
+ }
+ } else if (h==1) {
+ /* We have a row to copy to a swizzled texture */
+ for (x=0; x<w; x++) {
+ nv04_surface_copy_m2mf_swizzle(ctx, dst, dx+x, dy,
+ src, sx+x, sy);
+ }
+ }
+
return;
}
diff --git a/src/gallium/drivers/nv20/nv20_vertprog.c b/src/gallium/drivers/nv20/nv20_vertprog.c
index c1e588902b2..388245ecb04 100644
--- a/src/gallium/drivers/nv20/nv20_vertprog.c
+++ b/src/gallium/drivers/nv20/nv20_vertprog.c
@@ -617,10 +617,10 @@ nv20_vertprog_translate(struct nv20_context *nv20,
assert(imm->Immediate.NrTokens == 4 + 1);
vpc->imm[vpc->nr_imm++] =
constant(vpc, -1,
- imm->u.ImmediateFloat32[0].Float,
- imm->u.ImmediateFloat32[1].Float,
- imm->u.ImmediateFloat32[2].Float,
- imm->u.ImmediateFloat32[3].Float);
+ imm->u[0].Float,
+ imm->u[1].Float,
+ imm->u[2].Float,
+ imm->u[3].Float);
}
break;
case TGSI_TOKEN_TYPE_INSTRUCTION:
diff --git a/src/gallium/drivers/nv30/nv30_fragprog.c b/src/gallium/drivers/nv30/nv30_fragprog.c
index 1d1c556fb11..a48ba9782b3 100644
--- a/src/gallium/drivers/nv30/nv30_fragprog.c
+++ b/src/gallium/drivers/nv30/nv30_fragprog.c
@@ -704,10 +704,10 @@ nv30_fragprog_prepare(struct nv30_fpc *fpc)
assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32);
assert(fpc->nr_imm < MAX_IMM);
- vals[0] = imm->u.ImmediateFloat32[0].Float;
- vals[1] = imm->u.ImmediateFloat32[1].Float;
- vals[2] = imm->u.ImmediateFloat32[2].Float;
- vals[3] = imm->u.ImmediateFloat32[3].Float;
+ vals[0] = imm->u[0].Float;
+ vals[1] = imm->u[1].Float;
+ vals[2] = imm->u[2].Float;
+ vals[3] = imm->u[3].Float;
fpc->imm[fpc->nr_imm++] = constant(fpc, -1, vals);
}
break;
diff --git a/src/gallium/drivers/nv30/nv30_screen.c b/src/gallium/drivers/nv30/nv30_screen.c
index c8b40784b05..f8285e4455f 100644
--- a/src/gallium/drivers/nv30/nv30_screen.c
+++ b/src/gallium/drivers/nv30/nv30_screen.c
@@ -95,7 +95,7 @@ nv30_screen_surface_format_supported(struct pipe_screen *pscreen,
}
} else
if (tex_usage & PIPE_TEXTURE_USAGE_DEPTH_STENCIL) {
- switch (tex_usage) {
+ switch (format) {
case PIPE_FORMAT_Z24S8_UNORM:
case PIPE_FORMAT_Z24X8_UNORM:
case PIPE_FORMAT_Z16_UNORM:
diff --git a/src/gallium/drivers/nv30/nv30_vertprog.c b/src/gallium/drivers/nv30/nv30_vertprog.c
index c7514efcfea..14a5c0260d0 100644
--- a/src/gallium/drivers/nv30/nv30_vertprog.c
+++ b/src/gallium/drivers/nv30/nv30_vertprog.c
@@ -617,10 +617,10 @@ nv30_vertprog_translate(struct nv30_context *nv30,
assert(imm->Immediate.NrTokens == 4 + 1);
vpc->imm[vpc->nr_imm++] =
constant(vpc, -1,
- imm->u.ImmediateFloat32[0].Float,
- imm->u.ImmediateFloat32[1].Float,
- imm->u.ImmediateFloat32[2].Float,
- imm->u.ImmediateFloat32[3].Float);
+ imm->u[0].Float,
+ imm->u[1].Float,
+ imm->u[2].Float,
+ imm->u[3].Float);
}
break;
case TGSI_TOKEN_TYPE_INSTRUCTION:
diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c b/src/gallium/drivers/nv40/nv40_fragprog.c
index 680976da56b..32d9ed1a7f8 100644
--- a/src/gallium/drivers/nv40/nv40_fragprog.c
+++ b/src/gallium/drivers/nv40/nv40_fragprog.c
@@ -790,10 +790,10 @@ nv40_fragprog_prepare(struct nv40_fpc *fpc)
assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32);
assert(fpc->nr_imm < MAX_IMM);
- vals[0] = imm->u.ImmediateFloat32[0].Float;
- vals[1] = imm->u.ImmediateFloat32[1].Float;
- vals[2] = imm->u.ImmediateFloat32[2].Float;
- vals[3] = imm->u.ImmediateFloat32[3].Float;
+ vals[0] = imm->u[0].Float;
+ vals[1] = imm->u[1].Float;
+ vals[2] = imm->u[2].Float;
+ vals[3] = imm->u[3].Float;
fpc->imm[fpc->nr_imm++] = constant(fpc, -1, vals);
}
break;
diff --git a/src/gallium/drivers/nv40/nv40_vertprog.c b/src/gallium/drivers/nv40/nv40_vertprog.c
index e75e8d3f424..0382dbba8f6 100644
--- a/src/gallium/drivers/nv40/nv40_vertprog.c
+++ b/src/gallium/drivers/nv40/nv40_vertprog.c
@@ -788,10 +788,10 @@ nv40_vertprog_translate(struct nv40_context *nv40,
assert(imm->Immediate.NrTokens == 4 + 1);
vpc->imm[vpc->nr_imm++] =
constant(vpc, -1,
- imm->u.ImmediateFloat32[0].Float,
- imm->u.ImmediateFloat32[1].Float,
- imm->u.ImmediateFloat32[2].Float,
- imm->u.ImmediateFloat32[3].Float);
+ imm->u[0].Float,
+ imm->u[1].Float,
+ imm->u[2].Float,
+ imm->u[3].Float);
}
break;
case TGSI_TOKEN_TYPE_INSTRUCTION:
diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c
index e02afc4be99..6e8f4f9750d 100644
--- a/src/gallium/drivers/nv50/nv50_context.c
+++ b/src/gallium/drivers/nv50/nv50_context.c
@@ -31,15 +31,23 @@ static void
nv50_flush(struct pipe_context *pipe, unsigned flags,
struct pipe_fence_handle **fence)
{
- struct nv50_context *nv50 = (struct nv50_context *)pipe;
-
- FIRE_RING(nv50->screen->base.channel);
+ struct nv50_context *nv50 = nv50_context(pipe);
+ struct nouveau_channel *chan = nv50->screen->base.channel;
+ struct nouveau_grobj *eng2d = nv50->screen->eng2d;
+
+ /* We need this in the ddx for reliable composite, not sure what we're
+ * actually flushing. We generate all our own flushes with flags = 0. */
+ WAIT_RING(chan, 3);
+ BEGIN_RING(chan, eng2d, 0x0110, 1);
+ OUT_RING (chan, 0);
+
+ FIRE_RING(chan);
}
static void
nv50_destroy(struct pipe_context *pipe)
{
- struct nv50_context *nv50 = (struct nv50_context *)pipe;
+ struct nv50_context *nv50 = nv50_context(pipe);
draw_destroy(nv50->draw);
FREE(nv50);
@@ -112,5 +120,3 @@ nv50_create(struct pipe_screen *pscreen, unsigned pctx_id)
return &nv50->pipe;
}
-
-
diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h
index 9b8cc4d37d0..5cbc2c8f823 100644
--- a/src/gallium/drivers/nv50/nv50_context.h
+++ b/src/gallium/drivers/nv50/nv50_context.h
@@ -71,6 +71,7 @@ struct nv50_sampler_stateobj {
struct nv50_miptree_level {
int *image_offset;
unsigned pitch;
+ unsigned tile_mode;
};
struct nv50_miptree {
diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c
index 22465e02274..dd1b0303bd5 100644
--- a/src/gallium/drivers/nv50/nv50_miptree.c
+++ b/src/gallium/drivers/nv50/nv50_miptree.c
@@ -42,9 +42,14 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp)
mt->base.screen = pscreen;
switch (pt->format) {
- case PIPE_FORMAT_Z24X8_UNORM:
+ case PIPE_FORMAT_Z32_FLOAT:
+ tile_flags = 0x4800;
+ break;
case PIPE_FORMAT_Z24S8_UNORM:
- case PIPE_FORMAT_Z16_UNORM:
+ tile_flags = 0x1800;
+ break;
+ case PIPE_FORMAT_X8Z24_UNORM:
+ case PIPE_FORMAT_S8Z24_UNORM:
tile_flags = 0x2800;
break;
default:
@@ -82,20 +87,27 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp)
lvl->image_offset = CALLOC(mt->image_nr, sizeof(int));
lvl->pitch = align(pt->width[l] * pt->block.size, 64);
+ lvl->tile_mode = tile_mode;
width = MAX2(1, width >> 1);
height = MAX2(1, height >> 1);
depth = MAX2(1, depth >> 1);
+
+ if (tile_mode && height <= (tile_h >> 1)) {
+ tile_mode--;
+ tile_h >>= 1;
+ }
}
for (i = 0; i < mt->image_nr; i++) {
for (l = 0; l <= pt->last_level; l++) {
struct nv50_miptree_level *lvl = &mt->level[l];
int size;
+ tile_h = 1 << (lvl->tile_mode + 2);
size = align(pt->width[l], 8) * pt->block.size;
size = align(size, 64);
- size *= align(pt->height[l], tile_h) * pt->block.size;
+ size *= align(pt->height[l], tile_h);
lvl->image_offset[i] = mt->total_size;
@@ -104,12 +116,12 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp)
}
ret = nouveau_bo_new_tile(dev, NOUVEAU_BO_VRAM, 256, mt->total_size,
- tile_mode, tile_flags, &mt->bo);
+ mt->level[0].tile_mode, tile_flags, &mt->bo);
if (ret) {
FREE(mt);
return NULL;
}
-
+
return &mt->base;
}
@@ -146,7 +158,7 @@ nv50_miptree_destroy(struct pipe_texture *pt)
struct nv50_miptree *mt = nv50_miptree(pt);
nouveau_bo_ref(NULL, &mt->bo);
- FREE(mt);
+ FREE(mt);
}
static struct pipe_surface *
@@ -189,8 +201,8 @@ nv50_miptree_surface_del(struct pipe_surface *ps)
{
struct nv50_surface *s = nv50_surface(ps);
- pipe_texture_reference(&ps->texture, NULL);
- FREE(s);
+ pipe_texture_reference(&ps->texture, NULL);
+ FREE(s);
}
void
diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index 5f7d06dbecb..289c3485e08 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -251,7 +251,7 @@ alloc_temp4(struct nv50_pc *pc, struct nv50_reg *dst[4], int idx)
if (pc->r_temp[idx] || pc->r_temp[idx + 1] ||
pc->r_temp[idx + 2] || pc->r_temp[idx + 3])
- return alloc_temp4(pc, dst, idx + 1);
+ return alloc_temp4(pc, dst, idx + 4);
for (i = 0; i < 4; i++) {
dst[i] = CALLOC_STRUCT(nv50_reg);
@@ -296,7 +296,7 @@ kill_temp_temp(struct nv50_pc *pc)
static int
ctor_immd(struct nv50_pc *pc, float x, float y, float z, float w)
{
- pc->immd_buf = REALLOC(pc->immd_buf, (pc->immd_nr * r * sizeof(float)),
+ pc->immd_buf = REALLOC(pc->immd_buf, (pc->immd_nr * 4 * sizeof(float)),
(pc->immd_nr + 1) * 4 * sizeof(float));
pc->immd_buf[(pc->immd_nr * 4) + 0] = x;
pc->immd_buf[(pc->immd_nr * 4) + 1] = y;
@@ -1014,6 +1014,7 @@ emit_tex(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask,
break;
}
+ /* some cards need t[0]'s hw index to be a multiple of 4 */
alloc_temp4(pc, t, 0);
if (proj) {
@@ -1809,10 +1810,10 @@ nv50_program_tx_prep(struct nv50_pc *pc)
const struct tgsi_full_immediate *imm =
&p.FullToken.FullImmediate;
- ctor_immd(pc, imm->u.ImmediateFloat32[0].Float,
- imm->u.ImmediateFloat32[1].Float,
- imm->u.ImmediateFloat32[2].Float,
- imm->u.ImmediateFloat32[3].Float);
+ ctor_immd(pc, imm->u[0].Float,
+ imm->u[1].Float,
+ imm->u[2].Float,
+ imm->u[3].Float);
}
break;
case TGSI_TOKEN_TYPE_DECLARATION:
@@ -2221,9 +2222,9 @@ nv50_program_upload_data(struct nv50_context *nv50, float *map,
while (count) {
unsigned nr = count > 2047 ? 2047 : count;
- BEGIN_RING(chan, tesla, 0x00000f00, 1);
+ BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 1);
OUT_RING (chan, (cbuf << 0) | (start << 8));
- BEGIN_RING(chan, tesla, 0x40000f04, nr);
+ BEGIN_RING(chan, tesla, NV50TCL_CB_DATA(0) | 0x40000000, nr);
OUT_RINGp (chan, map, nr);
map += nr;
@@ -2345,7 +2346,7 @@ nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p)
}
so = so_new(4,2);
- so_method(so, nv50->screen->tesla, 0x1280, 3);
+ so_method(so, nv50->screen->tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3);
so_reloc (so, p->bo, 0, flags | NOUVEAU_BO_HIGH, 0, 0);
so_reloc (so, p->bo, 0, flags | NOUVEAU_BO_LOW, 0, 0);
so_data (so, (NV50_CB_PUPLOAD << 16) | 0x0800); //(p->exec_size * 4));
@@ -2364,9 +2365,9 @@ nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p)
continue;
}
- BEGIN_RING(chan, tesla, 0x0f00, 1);
+ BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 1);
OUT_RING (chan, (start << 8) | NV50_CB_PUPLOAD);
- BEGIN_RING(chan, tesla, 0x40000f04, nr);
+ BEGIN_RING(chan, tesla, NV50TCL_CB_DATA(0) | 0x40000000, nr);
OUT_RINGp (chan, up + start, nr);
start += nr;
@@ -2399,15 +2400,15 @@ nv50_vertprog_validate(struct nv50_context *nv50)
NOUVEAU_BO_HIGH, 0, 0);
so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
NOUVEAU_BO_LOW, 0, 0);
- so_method(so, tesla, 0x1650, 2);
+ so_method(so, tesla, NV50TCL_VP_ATTR_EN_0, 2);
so_data (so, p->cfg.vp.attr[0]);
so_data (so, p->cfg.vp.attr[1]);
- so_method(so, tesla, 0x16b8, 1);
+ so_method(so, tesla, NV50TCL_VP_REG_ALLOC_RESULT, 1);
so_data (so, p->cfg.high_result);
- so_method(so, tesla, 0x16ac, 2);
+ so_method(so, tesla, NV50TCL_VP_RESULT_MAP_SIZE, 2);
so_data (so, p->cfg.high_result); //8);
so_data (so, p->cfg.high_temp);
- so_method(so, tesla, 0x140c, 1);
+ so_method(so, tesla, NV50TCL_VP_START_ID, 1);
so_data (so, 0); /* program start offset */
so_ref(so, &nv50->state.vertprog);
so_ref(NULL, &so);
@@ -2436,24 +2437,24 @@ nv50_fragprog_validate(struct nv50_context *nv50)
NOUVEAU_BO_HIGH, 0, 0);
so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
NOUVEAU_BO_LOW, 0, 0);
- so_method(so, tesla, 0x1904, 4);
+ so_method(so, tesla, NV50TCL_MAP_SEMANTIC_0, 4);
so_data (so, p->cfg.fp.regs[0]); /* 0x01000404 / 0x00040404 */
so_data (so, 0x00000004);
so_data (so, 0x00000000);
so_data (so, 0x00000000);
- so_method(so, tesla, 0x16bc, p->cfg.fp.high_map);
+ so_method(so, tesla, NV50TCL_VP_RESULT_MAP(0), p->cfg.fp.high_map);
for (i = 0; i < p->cfg.fp.high_map; i++)
so_data(so, p->cfg.fp.map[i]);
- so_method(so, tesla, 0x1988, 2);
+ so_method(so, tesla, NV50TCL_FP_INTERPOLANT_CTRL, 2);
so_data (so, p->cfg.fp.regs[1]); /* 0x08040404 / 0x0f000401 */
so_data (so, p->cfg.high_temp);
- so_method(so, tesla, 0x1298, 1);
+ so_method(so, tesla, NV50TCL_FP_RESULT_COUNT, 1);
so_data (so, p->cfg.high_result);
- so_method(so, tesla, 0x19a8, 1);
+ so_method(so, tesla, NV50TCL_FP_CTRL_UNK19A8, 1);
so_data (so, p->cfg.fp.regs[2]);
- so_method(so, tesla, 0x196c, 1);
+ so_method(so, tesla, NV50TCL_FP_CTRL_UNK196C, 1);
so_data (so, p->cfg.fp.regs[3]);
- so_method(so, tesla, 0x1414, 1);
+ so_method(so, tesla, NV50TCL_FP_START_ID, 1);
so_data (so, 0); /* program start offset */
so_ref(so, &nv50->state.fragprog);
so_ref(NULL, &so);
@@ -2478,4 +2479,3 @@ nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p)
p->translated = 0;
}
-
diff --git a/src/gallium/drivers/nv50/nv50_query.c b/src/gallium/drivers/nv50/nv50_query.c
index 940e04365f2..5305c93d59a 100644
--- a/src/gallium/drivers/nv50/nv50_query.c
+++ b/src/gallium/drivers/nv50/nv50_query.c
@@ -94,7 +94,7 @@ nv50_query_end(struct pipe_context *pipe, struct pipe_query *pq)
struct nv50_query *q = nv50_query(pq);
WAIT_RING (chan, 5);
- BEGIN_RING(chan, tesla, 0x1b00, 4);
+ BEGIN_RING(chan, tesla, NV50TCL_QUERY_ADDRESS_HIGH, 4);
OUT_RELOCh(chan, q->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
OUT_RELOCl(chan, q->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
OUT_RING (chan, 0x00000000);
@@ -107,13 +107,13 @@ nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq,
boolean wait, uint64_t *result)
{
struct nv50_query *q = nv50_query(pq);
-
- /*XXX: Want to be able to return FALSE here instead of blocking
- * until the result is available..
- */
+ int ret;
if (!q->ready) {
- nouveau_bo_map(q->bo, NOUVEAU_BO_RD);
+ ret = nouveau_bo_map(q->bo, NOUVEAU_BO_RD |
+ wait ? 0 : NOUVEAU_BO_NOWAIT);
+ if (ret)
+ return false;
q->result = ((uint32_t *)q->bo->map)[1];
q->ready = TRUE;
nouveau_bo_unmap(q->bo);
diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c
index ce8f906b15f..c7f80a22037 100644
--- a/src/gallium/drivers/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nv50/nv50_screen.c
@@ -44,9 +44,10 @@ nv50_screen_is_format_supported(struct pipe_screen *pscreen,
} else
if (tex_usage & PIPE_TEXTURE_USAGE_DEPTH_STENCIL) {
switch (format) {
+ case PIPE_FORMAT_Z32_FLOAT:
case PIPE_FORMAT_Z24S8_UNORM:
- case PIPE_FORMAT_Z24X8_UNORM:
- case PIPE_FORMAT_Z16_UNORM:
+ case PIPE_FORMAT_X8Z24_UNORM:
+ case PIPE_FORMAT_S8Z24_UNORM:
return TRUE;
default:
break;
@@ -188,7 +189,8 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
nv50_transfer_init_screen_functions(pscreen);
/* DMA engine object */
- ret = nouveau_grobj_alloc(chan, 0xbeef5039, 0x5039, &screen->m2mf);
+ ret = nouveau_grobj_alloc(chan, 0xbeef5039,
+ NV50_MEMORY_TO_MEMORY_FORMAT, &screen->m2mf);
if (ret) {
NOUVEAU_ERR("Error creating M2MF object: %d\n", ret);
nv50_screen_destroy(pscreen);
@@ -197,7 +199,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
BIND_RING(chan, screen->m2mf, 1);
/* 2D object */
- ret = nouveau_grobj_alloc(chan, 0xbeef502d, 0x502d, &screen->eng2d);
+ ret = nouveau_grobj_alloc(chan, 0xbeef502d, NV50_2D, &screen->eng2d);
if (ret) {
NOUVEAU_ERR("Error creating 2D object: %d\n", ret);
nv50_screen_destroy(pscreen);
@@ -208,14 +210,15 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
/* 3D object */
switch (chipset & 0xf0) {
case 0x50:
- tesla_class = 0x5097;
+ tesla_class = NV50TCL;
break;
case 0x80:
case 0x90:
- tesla_class = 0x8297;
+ /* this stupid name should be corrected. */
+ tesla_class = NV54TCL;
break;
case 0xa0:
- tesla_class = 0x8397;
+ tesla_class = NVA0TCL;
break;
default:
NOUVEAU_ERR("Not a known NV50 chipset: NV%02x\n", chipset);
@@ -229,7 +232,8 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
return NULL;
}
- ret = nouveau_grobj_alloc(chan, 0xbeef5097, tesla_class, &screen->tesla);
+ ret = nouveau_grobj_alloc(chan, 0xbeef5097, tesla_class,
+ &screen->tesla);
if (ret) {
NOUVEAU_ERR("Error creating 3D object: %d\n", ret);
nv50_screen_destroy(pscreen);
@@ -247,7 +251,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
/* Static M2MF init */
so = so_new(32, 0);
- so_method(so, screen->m2mf, 0x0180, 3);
+ so_method(so, screen->m2mf, NV04_MEMORY_TO_MEMORY_FORMAT_DMA_NOTIFY, 3);
so_data (so, screen->sync->handle);
so_data (so, chan->vram->handle);
so_data (so, chan->vram->handle);
@@ -290,9 +294,10 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
so_method(so, screen->tesla, 0x13bc, 1);
so_data (so, 0x54);
+ /* origin is top left (set to 1 for bottom left) */
so_method(so, screen->tesla, 0x13ac, 1);
- so_data (so, 1);
- so_method(so, screen->tesla, 0x16b8, 1);
+ so_data (so, 0);
+ so_method(so, screen->tesla, NV50TCL_VP_REG_ALLOC_RESULT, 1);
so_data (so, 8);
/* constant buffers for immediates and VP/FP parameters */
@@ -330,33 +335,33 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
so_data (so, 0x000BBNP1);
*/
- so_method(so, screen->tesla, 0x1280, 3);
+ so_method(so, screen->tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3);
so_reloc (so, screen->constbuf_misc[0], 0, NOUVEAU_BO_VRAM |
NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
so_reloc (so, screen->constbuf_misc[0], 0, NOUVEAU_BO_VRAM |
NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
so_data (so, (NV50_CB_PMISC << 16) | 0x00000800);
- so_method(so, screen->tesla, 0x1694, 1);
+ so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1);
so_data (so, 0x00000001 | (NV50_CB_PMISC << 12));
- so_method(so, screen->tesla, 0x1694, 1);
+ so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1);
so_data (so, 0x00000031 | (NV50_CB_PMISC << 12));
- so_method(so, screen->tesla, 0x1280, 3);
+ so_method(so, screen->tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3);
so_reloc (so, screen->constbuf_parm[0], 0, NOUVEAU_BO_VRAM |
NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
so_reloc (so, screen->constbuf_parm[0], 0, NOUVEAU_BO_VRAM |
NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
so_data (so, (NV50_CB_PVP << 16) | 0x00000800);
- so_method(so, screen->tesla, 0x1694, 1);
+ so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1);
so_data (so, 0x00000101 | (NV50_CB_PVP << 12));
- so_method(so, screen->tesla, 0x1280, 3);
+ so_method(so, screen->tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3);
so_reloc (so, screen->constbuf_parm[1], 0, NOUVEAU_BO_VRAM |
NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
so_reloc (so, screen->constbuf_parm[1], 0, NOUVEAU_BO_VRAM |
NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
so_data (so, (NV50_CB_PFP << 16) | 0x00000800);
- so_method(so, screen->tesla, 0x1694, 1);
+ so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1);
so_data (so, 0x00000131 | (NV50_CB_PFP << 12));
/* Texture sampler/image unit setup - we abuse the constant buffer
@@ -370,13 +375,13 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
return NULL;
}
- so_method(so, screen->tesla, 0x1280, 3);
+ so_method(so, screen->tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3);
so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM |
NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM |
NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
so_data (so, (NV50_CB_TIC << 16) | 0x0800);
- so_method(so, screen->tesla, 0x1574, 3);
+ so_method(so, screen->tesla, NV50TCL_TIC_ADDRESS_HIGH, 3);
so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM |
NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM |
@@ -389,13 +394,13 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
return NULL;
}
- so_method(so, screen->tesla, 0x1280, 3);
+ so_method(so, screen->tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3);
so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM |
NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM |
NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
so_data (so, (NV50_CB_TSC << 16) | 0x0800);
- so_method(so, screen->tesla, 0x155c, 3);
+ so_method(so, screen->tesla, NV50TCL_TSC_ADDRESS_HIGH, 3);
so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM |
NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM |
@@ -405,7 +410,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
/* Vertex array limits - max them out */
for (i = 0; i < 16; i++) {
- so_method(so, screen->tesla, 0x1080 + (i * 8), 2);
+ so_method(so, screen->tesla, NV50TCL_UNK1080_OFFSET_HIGH(i), 2);
so_data (so, 0x000000ff);
so_data (so, 0xffffffff);
}
@@ -417,6 +422,10 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
so_method(so, screen->tesla, 0x1234, 1);
so_data (so, 1);
+ /* activate first scissor rectangle */
+ so_method(so, screen->tesla, NV50TCL_SCISSOR_ENABLE, 1);
+ so_data (so, 1);
+
so_emit(chan, so);
so_ref (so, &screen->static_init);
so_ref (NULL, &so);
diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c
index 116866a8e78..ef4154d3036 100644
--- a/src/gallium/drivers/nv50/nv50_state.c
+++ b/src/gallium/drivers/nv50/nv50_state.c
@@ -205,11 +205,16 @@ nv50_sampler_state_create(struct pipe_context *pipe,
}
limit = CLAMP(cso->lod_bias, -16.0, 15.0);
- tsc[1] |= ((int)(limit * 256.0) & 0x1fff) << 11;
+ tsc[1] |= ((int)(limit * 256.0) & 0x1fff) << 12;
tsc[2] |= ((int)CLAMP(cso->max_lod, 0.0, 15.0) << 20) |
((int)CLAMP(cso->min_lod, 0.0, 15.0) << 8);
+ tsc[4] = fui(cso->border_color[0]);
+ tsc[5] = fui(cso->border_color[1]);
+ tsc[6] = fui(cso->border_color[2]);
+ tsc[7] = fui(cso->border_color[3]);
+
sso->normalized = cso->normalized_coords;
return (void *)sso;
}
@@ -404,35 +409,35 @@ nv50_depth_stencil_alpha_state_create(struct pipe_context *pipe,
so_data (so, 0);
}
- /*XXX: yes, I know they're backwards.. header needs fixing */
+ /* XXX: keep hex values until header is updated (names reversed) */
if (cso->stencil[0].enabled) {
- so_method(so, tesla, NV50TCL_STENCIL_BACK_ENABLE, 5);
+ so_method(so, tesla, 0x1380, 8);
so_data (so, 1);
so_data (so, nvgl_stencil_op(cso->stencil[0].fail_op));
so_data (so, nvgl_stencil_op(cso->stencil[0].zfail_op));
so_data (so, nvgl_stencil_op(cso->stencil[0].zpass_op));
so_data (so, nvgl_comparison_op(cso->stencil[0].func));
- so_method(so, tesla, NV50TCL_STENCIL_BACK_FUNC_REF, 3);
so_data (so, cso->stencil[0].ref_value);
so_data (so, cso->stencil[0].writemask);
so_data (so, cso->stencil[0].valuemask);
} else {
- so_method(so, tesla, NV50TCL_STENCIL_BACK_ENABLE, 1);
+ so_method(so, tesla, 0x1380, 1);
so_data (so, 0);
}
if (cso->stencil[1].enabled) {
- so_method(so, tesla, NV50TCL_STENCIL_FRONT_ENABLE, 8);
+ so_method(so, tesla, 0x1594, 5);
so_data (so, 1);
so_data (so, nvgl_stencil_op(cso->stencil[1].fail_op));
so_data (so, nvgl_stencil_op(cso->stencil[1].zfail_op));
so_data (so, nvgl_stencil_op(cso->stencil[1].zpass_op));
so_data (so, nvgl_comparison_op(cso->stencil[1].func));
+ so_method(so, tesla, 0x0f54, 3);
so_data (so, cso->stencil[1].ref_value);
so_data (so, cso->stencil[1].writemask);
so_data (so, cso->stencil[1].valuemask);
} else {
- so_method(so, tesla, NV50TCL_STENCIL_FRONT_ENABLE, 1);
+ so_method(so, tesla, 0x1594, 1);
so_data (so, 0);
}
diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c
index d313e9de4fe..a879df2e6e2 100644
--- a/src/gallium/drivers/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nv50/nv50_state_validate.c
@@ -55,15 +55,15 @@ nv50_state_validate_fb(struct nv50_context *nv50)
NOUVEAU_BO_LOW | NOUVEAU_BO_RDWR, 0, 0);
switch (fb->cbufs[i]->format) {
case PIPE_FORMAT_A8R8G8B8_UNORM:
- so_data(so, 0xcf);
+ so_data(so, NV50TCL_RT_FORMAT_A8R8G8B8_UNORM);
break;
case PIPE_FORMAT_R5G6B5_UNORM:
- so_data(so, 0xe8);
+ so_data(so, NV50TCL_RT_FORMAT_R5G6B5_UNORM);
break;
default:
NOUVEAU_ERR("AIIII unknown format %s\n",
pf_name(fb->cbufs[i]->format));
- so_data(so, 0xe6);
+ so_data(so, NV50TCL_RT_FORMAT_X8R8G8B8_UNORM);
break;
}
so_data(so, bo->tile_mode << 4);
@@ -92,17 +92,22 @@ nv50_state_validate_fb(struct nv50_context *nv50)
so_reloc (so, bo, fb->zsbuf->offset, NOUVEAU_BO_VRAM |
NOUVEAU_BO_LOW | NOUVEAU_BO_RDWR, 0, 0);
switch (fb->zsbuf->format) {
+ case PIPE_FORMAT_Z32_FLOAT:
+ so_data(so, NV50TCL_ZETA_FORMAT_Z32_FLOAT);
+ break;
case PIPE_FORMAT_Z24S8_UNORM:
- case PIPE_FORMAT_Z24X8_UNORM:
- so_data(so, 0x16);
+ so_data(so, NV50TCL_ZETA_FORMAT_Z24S8_UNORM);
+ break;
+ case PIPE_FORMAT_X8Z24_UNORM:
+ so_data(so, NV50TCL_ZETA_FORMAT_X8Z24_UNORM);
break;
- case PIPE_FORMAT_Z16_UNORM:
- so_data(so, 0x15);
+ case PIPE_FORMAT_S8Z24_UNORM:
+ so_data(so, NV50TCL_ZETA_FORMAT_S8Z24_UNORM);
break;
default:
NOUVEAU_ERR("AIIII unknown format %s\n",
pf_name(fb->zsbuf->format));
- so_data(so, 0x16);
+ so_data(so, NV50TCL_ZETA_FORMAT_S8Z24_UNORM);
break;
}
so_data(so, bo->tile_mode << 4);
@@ -110,7 +115,7 @@ nv50_state_validate_fb(struct nv50_context *nv50)
so_method(so, tesla, 0x1538, 1);
so_data (so, 1);
- so_method(so, tesla, 0x1228, 3);
+ so_method(so, tesla, NV50TCL_ZETA_HORIZ, 3);
so_data (so, fb->zsbuf->width);
so_data (so, fb->zsbuf->height);
so_data (so, 0x00010001);
@@ -119,12 +124,18 @@ nv50_state_validate_fb(struct nv50_context *nv50)
so_method(so, tesla, NV50TCL_VIEWPORT_HORIZ, 2);
so_data (so, w << 16);
so_data (so, h << 16);
- so_method(so, tesla, 0x0e04, 2);
+ /* set window lower left corner */
+ so_method(so, tesla, NV50TCL_WINDOW_LEFT, 2);
+ so_data (so, 0);
+ so_data (so, 0);
+ /* set screen scissor rectangle */
+ so_method(so, tesla, NV50TCL_SCREEN_SCISSOR_HORIZ, 2);
so_data (so, w << 16);
so_data (so, h << 16);
- so_method(so, tesla, 0xdf8, 2);
- so_data (so, 0);
- so_data (so, h);
+
+ /* we set scissors to framebuffer size when they're 'turned off' */
+ nv50->dirty |= NV50_NEW_SCISSOR;
+ so_ref(NULL, &nv50->state.scissor);
so_ref(so, &nv50->state.fb);
so_ref(NULL, &so);
@@ -137,7 +148,32 @@ nv50_state_emit(struct nv50_context *nv50)
struct nouveau_channel *chan = screen->base.channel;
if (nv50->pctx_id != screen->cur_pctx) {
- nv50->state.dirty |= 0xffffffff;
+ if (nv50->state.fb)
+ nv50->state.dirty |= NV50_NEW_FRAMEBUFFER;
+ if (nv50->state.blend)
+ nv50->state.dirty |= NV50_NEW_BLEND;
+ if (nv50->state.zsa)
+ nv50->state.dirty |= NV50_NEW_ZSA;
+ if (nv50->state.vertprog)
+ nv50->state.dirty |= NV50_NEW_VERTPROG;
+ if (nv50->state.fragprog)
+ nv50->state.dirty |= NV50_NEW_FRAGPROG;
+ if (nv50->state.rast)
+ nv50->state.dirty |= NV50_NEW_RASTERIZER;
+ if (nv50->state.blend_colour)
+ nv50->state.dirty |= NV50_NEW_BLEND_COLOUR;
+ if (nv50->state.stipple)
+ nv50->state.dirty |= NV50_NEW_STIPPLE;
+ if (nv50->state.scissor)
+ nv50->state.dirty |= NV50_NEW_SCISSOR;
+ if (nv50->state.viewport)
+ nv50->state.dirty |= NV50_NEW_VIEWPORT;
+ if (nv50->state.tsc_upload)
+ nv50->state.dirty |= NV50_NEW_SAMPLER;
+ if (nv50->state.tic_upload)
+ nv50->state.dirty |= NV50_NEW_TEXTURE;
+ if (nv50->state.vtxfmt && nv50->state.vtxbuf)
+ nv50->state.dirty |= NV50_NEW_ARRAYS;
screen->cur_pctx = nv50->pctx_id;
}
@@ -233,13 +269,13 @@ nv50_state_validate(struct nv50_context *nv50)
nv50->state.scissor_enabled = rast->scissor;
so = so_new(3, 0);
- so_method(so, tesla, 0x0ff4, 2);
+ so_method(so, tesla, NV50TCL_SCISSOR_HORIZ, 2);
if (nv50->state.scissor_enabled) {
- so_data(so, ((s->maxx - s->minx) << 16) | s->minx);
- so_data(so, ((s->maxy - s->miny) << 16) | s->miny);
+ so_data(so, (s->maxx << 16) | s->minx);
+ so_data(so, (s->maxy << 16) | s->miny);
} else {
- so_data(so, (8192 << 16));
- so_data(so, (8192 << 16));
+ so_data(so, (nv50->framebuffer.width << 16));
+ so_data(so, (nv50->framebuffer.height << 16));
}
so_ref(so, &nv50->state.scissor);
so_ref(NULL, &so);
@@ -263,20 +299,22 @@ scissor_uptodate:
so = so_new(12, 0);
if (!bypass) {
- so_method(so, tesla, NV50TCL_VIEWPORT_UNK1(0), 3);
+ so_method(so, tesla, NV50TCL_VIEWPORT_TRANSLATE(0), 3);
so_data (so, fui(nv50->viewport.translate[0]));
so_data (so, fui(nv50->viewport.translate[1]));
so_data (so, fui(nv50->viewport.translate[2]));
- so_method(so, tesla, NV50TCL_VIEWPORT_UNK0(0), 3);
+ so_method(so, tesla, NV50TCL_VIEWPORT_SCALE(0), 3);
so_data (so, fui(nv50->viewport.scale[0]));
- so_data (so, fui(-nv50->viewport.scale[1]));
+ so_data (so, fui(nv50->viewport.scale[1]));
so_data (so, fui(nv50->viewport.scale[2]));
- so_method(so, tesla, 0x192c, 1);
+
+ so_method(so, tesla, NV50TCL_VIEWPORT_TRANSFORM_EN, 1);
so_data (so, 1);
+ /* no idea what 0f90 does */
so_method(so, tesla, 0x0f90, 1);
so_data (so, 0);
} else {
- so_method(so, tesla, 0x192c, 1);
+ so_method(so, tesla, NV50TCL_VIEWPORT_TRANSFORM_EN, 1);
so_data (so, 0);
so_method(so, tesla, 0x0f90, 1);
so_data (so, 1);
@@ -292,9 +330,10 @@ viewport_uptodate:
int i;
so = so_new(nv50->sampler_nr * 8 + 3, 0);
- so_method(so, tesla, 0x0f00, 1);
+ so_method(so, tesla, NV50TCL_CB_ADDR, 1);
so_data (so, NV50_CB_TSC);
- so_method(so, tesla, 0x40000f04, nv50->sampler_nr * 8);
+ so_method(so, tesla, NV50TCL_CB_DATA(0) | 0x40000000,
+ nv50->sampler_nr * 8);
for (i = 0; i < nv50->sampler_nr; i++)
so_datap (so, nv50->sampler[i]->tsc, 8);
so_ref(so, &nv50->state.tsc_upload);
diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c
index 3da9d6e7285..edaf4b055a1 100644
--- a/src/gallium/drivers/nv50/nv50_surface.c
+++ b/src/gallium/drivers/nv50/nv50_surface.c
@@ -35,13 +35,13 @@ nv50_format(enum pipe_format format)
{
switch (format) {
case PIPE_FORMAT_A8R8G8B8_UNORM:
- return NV50_2D_DST_FORMAT_32BPP;
+ return NV50_2D_DST_FORMAT_A8R8G8B8_UNORM;
case PIPE_FORMAT_X8R8G8B8_UNORM:
- return NV50_2D_DST_FORMAT_24BPP;
+ return NV50_2D_DST_FORMAT_X8R8G8B8_UNORM;
case PIPE_FORMAT_R5G6B5_UNORM:
- return NV50_2D_DST_FORMAT_16BPP;
+ return NV50_2D_DST_FORMAT_R5G6B5_UNORM;
case PIPE_FORMAT_A8_UNORM:
- return NV50_2D_DST_FORMAT_8BPP;
+ return NV50_2D_DST_FORMAT_R8_UNORM;
default:
return -1;
}
@@ -144,7 +144,7 @@ nv50_surface_copy(struct pipe_context *pipe,
struct pipe_surface *src, unsigned srcx, unsigned srcy,
unsigned width, unsigned height)
{
- struct nv50_context *nv50 = (struct nv50_context *)pipe;
+ struct nv50_context *nv50 = nv50_context(pipe);
struct nv50_screen *screen = nv50->screen;
assert(src->format == dest->format);
@@ -158,7 +158,7 @@ nv50_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest,
unsigned destx, unsigned desty, unsigned width,
unsigned height, unsigned value)
{
- struct nv50_context *nv50 = (struct nv50_context *)pipe;
+ struct nv50_context *nv50 = nv50_context(pipe);
struct nv50_screen *screen = nv50->screen;
struct nouveau_channel *chan = screen->eng2d->channel;
struct nouveau_grobj *eng2d = screen->eng2d;
diff --git a/src/gallium/drivers/nv50/nv50_tex.c b/src/gallium/drivers/nv50/nv50_tex.c
index ff40c2ad81b..14c68b96e14 100644
--- a/src/gallium/drivers/nv50/nv50_tex.c
+++ b/src/gallium/drivers/nv50/nv50_tex.c
@@ -32,30 +32,30 @@ nv50_tex_construct(struct nv50_context *nv50, struct nouveau_stateobj *so,
switch (mt->base.format) {
case PIPE_FORMAT_A8R8G8B8_UNORM:
so_data(so, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM |
- NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM |
+ NV50TIC_0_0_MAPR_C2 | NV50TIC_0_0_TYPER_UNORM |
NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM |
- NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM |
+ NV50TIC_0_0_MAPB_C0 | NV50TIC_0_0_TYPEB_UNORM |
NV50TIC_0_0_FMT_8_8_8_8);
break;
case PIPE_FORMAT_A1R5G5B5_UNORM:
so_data(so, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM |
- NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM |
+ NV50TIC_0_0_MAPR_C2 | NV50TIC_0_0_TYPER_UNORM |
NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM |
- NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM |
+ NV50TIC_0_0_MAPB_C0 | NV50TIC_0_0_TYPEB_UNORM |
NV50TIC_0_0_FMT_1_5_5_5);
break;
case PIPE_FORMAT_A4R4G4B4_UNORM:
so_data(so, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM |
- NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM |
+ NV50TIC_0_0_MAPR_C2 | NV50TIC_0_0_TYPER_UNORM |
NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM |
- NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM |
+ NV50TIC_0_0_MAPB_C0 | NV50TIC_0_0_TYPEB_UNORM |
NV50TIC_0_0_FMT_4_4_4_4);
break;
case PIPE_FORMAT_R5G6B5_UNORM:
so_data(so, NV50TIC_0_0_MAPA_ONE | NV50TIC_0_0_TYPEA_UNORM |
- NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM |
+ NV50TIC_0_0_MAPR_C2 | NV50TIC_0_0_TYPER_UNORM |
NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM |
- NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM |
+ NV50TIC_0_0_MAPB_C0 | NV50TIC_0_0_TYPEB_UNORM |
NV50TIC_0_0_FMT_5_6_5);
break;
case PIPE_FORMAT_L8_UNORM:
@@ -145,25 +145,28 @@ nv50_tex_validate(struct nv50_context *nv50)
push += MAX2(nv50->miptree_nr, nv50->state.miptree_nr) * 2;
so = so_new(push, nv50->miptree_nr * 2);
- so_method(so, tesla, 0x0f00, 1);
+ so_method(so, tesla, NV50TCL_CB_ADDR, 1);
so_data (so, NV50_CB_TIC);
for (unit = 0; unit < nv50->miptree_nr; unit++) {
struct nv50_miptree *mt = nv50->miptree[unit];
- so_method(so, tesla, 0x40000f04, 8);
+ so_method(so, tesla, NV50TCL_CB_DATA(0) | 0x40000000, 8);
if (nv50_tex_construct(nv50, so, mt, unit)) {
NOUVEAU_ERR("failed tex validate\n");
so_ref(NULL, &so);
return;
}
- so_method(so, tesla, 0x1458, 1);
- so_data (so, (unit << 9) | (unit << 1) | 1);
+ so_method(so, tesla, NV50TCL_SET_SAMPLER_TEX, 1);
+ so_data (so, (unit << NV50TCL_SET_SAMPLER_TEX_TIC_SHIFT) |
+ (unit << NV50TCL_SET_SAMPLER_TEX_SAMPLER_SHIFT) |
+ NV50TCL_SET_SAMPLER_TEX_VALID);
}
for (; unit < nv50->state.miptree_nr; unit++) {
- so_method(so, tesla, 0x1458, 1);
- so_data (so, (unit << 1) | 0);
+ so_method(so, tesla, NV50TCL_SET_SAMPLER_TEX, 1);
+ so_data (so,
+ (unit << NV50TCL_SET_SAMPLER_TEX_SAMPLER_SHIFT) | 0);
}
so_ref(so, &nv50->state.tic_upload);
diff --git a/src/gallium/drivers/nv50/nv50_texture.h b/src/gallium/drivers/nv50/nv50_texture.h
index aca622c73b1..207fb039f70 100644
--- a/src/gallium/drivers/nv50/nv50_texture.h
+++ b/src/gallium/drivers/nv50/nv50_texture.h
@@ -14,13 +14,13 @@
#define NV50TIC_0_0_MAPA_C2 0x20000000
#define NV50TIC_0_0_MAPA_C3 0x28000000
#define NV50TIC_0_0_MAPA_ONE 0x38000000
-#define NV50TIC_0_0_MAPR_MASK 0x07000000
-#define NV50TIC_0_0_MAPR_ZERO 0x00000000
-#define NV50TIC_0_0_MAPR_C0 0x02000000
-#define NV50TIC_0_0_MAPR_C1 0x03000000
-#define NV50TIC_0_0_MAPR_C2 0x04000000
-#define NV50TIC_0_0_MAPR_C3 0x05000000
-#define NV50TIC_0_0_MAPR_ONE 0x07000000
+#define NV50TIC_0_0_MAPB_MASK 0x07000000
+#define NV50TIC_0_0_MAPB_ZERO 0x00000000
+#define NV50TIC_0_0_MAPB_C0 0x02000000
+#define NV50TIC_0_0_MAPB_C1 0x03000000
+#define NV50TIC_0_0_MAPB_C2 0x04000000
+#define NV50TIC_0_0_MAPB_C3 0x05000000
+#define NV50TIC_0_0_MAPB_ONE 0x07000000
#define NV50TIC_0_0_MAPG_MASK 0x00e00000
#define NV50TIC_0_0_MAPG_ZERO 0x00000000
#define NV50TIC_0_0_MAPG_C0 0x00400000
@@ -28,31 +28,49 @@
#define NV50TIC_0_0_MAPG_C2 0x00800000
#define NV50TIC_0_0_MAPG_C3 0x00a00000
#define NV50TIC_0_0_MAPG_ONE 0x00e00000
-#define NV50TIC_0_0_MAPB_MASK 0x001c0000
-#define NV50TIC_0_0_MAPB_ZERO 0x00000000
-#define NV50TIC_0_0_MAPB_C0 0x00080000
-#define NV50TIC_0_0_MAPB_C1 0x000c0000
-#define NV50TIC_0_0_MAPB_C2 0x00100000
-#define NV50TIC_0_0_MAPB_C3 0x00140000
-#define NV50TIC_0_0_MAPB_ONE 0x001c0000
+#define NV50TIC_0_0_MAPR_MASK 0x001c0000
+#define NV50TIC_0_0_MAPR_ZERO 0x00000000
+#define NV50TIC_0_0_MAPR_C0 0x00080000
+#define NV50TIC_0_0_MAPR_C1 0x000c0000
+#define NV50TIC_0_0_MAPR_C2 0x00100000
+#define NV50TIC_0_0_MAPR_C3 0x00140000
+#define NV50TIC_0_0_MAPR_ONE 0x001c0000
#define NV50TIC_0_0_TYPEA_MASK 0x00038000
#define NV50TIC_0_0_TYPEA_UNORM 0x00010000
-#define NV50TIC_0_0_TYPER_MASK 0x00007000
-#define NV50TIC_0_0_TYPER_UNORM 0x00002000
+#define NV50TIC_0_0_TYPEA_SNORM 0x00008000
+#define NV50TIC_0_0_TYPEA_FLOAT 0x00038000
+#define NV50TIC_0_0_TYPEB_MASK 0x00007000
+#define NV50TIC_0_0_TYPEB_UNORM 0x00002000
+#define NV50TIC_0_0_TYPEB_SNORM 0x00001000
+#define NV50TIC_0_0_TYPEB_FLOAT 0x00007000
#define NV50TIC_0_0_TYPEG_MASK 0x00000e00
#define NV50TIC_0_0_TYPEG_UNORM 0x00000400
-#define NV50TIC_0_0_TYPEB_MASK 0x000001c0
-#define NV50TIC_0_0_TYPEB_UNORM 0x00000080
-#define NV50TIC_0_0_FMT_MASK 0x0000003c
+#define NV50TIC_0_0_TYPEG_SNORM 0x00000200
+#define NV50TIC_0_0_TYPEG_FLOAT 0x00000e00
+#define NV50TIC_0_0_TYPER_MASK 0x000001c0
+#define NV50TIC_0_0_TYPER_UNORM 0x00000080
+#define NV50TIC_0_0_TYPER_SNORM 0x00000040
+#define NV50TIC_0_0_TYPER_FLOAT 0x000001c0
+#define NV50TIC_0_0_FMT_MASK 0x0000003f
+#define NV50TIC_0_0_FMT_32_32_32_32 0x00000001
+#define NV50TIC_0_0_FMT_16_16_16_16 0x00000003
+#define NV50TIC_0_0_FMT_32_32 0x00000004
#define NV50TIC_0_0_FMT_8_8_8_8 0x00000008
+#define NV50TIC_0_0_FMT_2_10_10_10 0x00000009
+#define NV50TIC_0_0_FMT_32 0x0000000f
#define NV50TIC_0_0_FMT_4_4_4_4 0x00000012
-#define NV50TIC_0_0_FMT_1_5_5_5 0x00000013
+/* #define NV50TIC_0_0_FMT_1_5_5_5 0x00000013 */
+#define NV50TIC_0_0_FMT_1_5_5_5 0x00000014
#define NV50TIC_0_0_FMT_5_6_5 0x00000015
#define NV50TIC_0_0_FMT_8_8 0x00000018
+#define NV50TIC_0_0_FMT_16 0x0000001b
#define NV50TIC_0_0_FMT_8 0x0000001d
+#define NV50TIC_0_0_FMT_10_11_11 0x00000021
#define NV50TIC_0_0_FMT_DXT1 0x00000024
#define NV50TIC_0_0_FMT_DXT3 0x00000025
#define NV50TIC_0_0_FMT_DXT5 0x00000026
+#define NV50TIC_0_0_FMT_RGTC1 0x00000027
+#define NV50TIC_0_0_FMT_RGTC2 0x00000028
#define NV50TIC_0_1_OFFSET_LOW_MASK 0xffffffff
#define NV50TIC_0_1_OFFSET_LOW_SHIFT 0
@@ -102,6 +120,7 @@
#define NV50TSC_1_0_WRAPR_MIRROR_CLAMP_TO_EDGE 0x00000140
#define NV50TSC_1_0_WRAPR_MIRROR_CLAMP_TO_BORDER 0x00000180
#define NV50TSC_1_0_WRAPR_MIRROR_CLAMP 0x000001c0
+#define NV50TSC_1_0_MAX_ANISOTROPY_MASK 0x00700000
#define NV50TSC_1_1_MAGF_MASK 0x00000003
#define NV50TSC_1_1_MAGF_NEAREST 0x00000001
@@ -113,17 +132,19 @@
#define NV50TSC_1_1_MIPF_NONE 0x00000040
#define NV50TSC_1_1_MIPF_NEAREST 0x00000080
#define NV50TSC_1_1_MIPF_LINEAR 0x000000c0
+#define NV50TSC_1_1_LOD_BIAS_MASK 0x01fff000
-#define NV50TSC_1_2_UNKNOWN_MASK 0xffffffff
+#define NV50TSC_1_2_MIN_LOD_MASK 0x00000f00
+#define NV50TSC_1_2_MAX_LOD_MASK 0x00f00000
#define NV50TSC_1_3_UNKNOWN_MASK 0xffffffff
-#define NV50TSC_1_4_UNKNOWN_MASK 0xffffffff
+#define NV50TSC_1_4_BORDER_COLOR_RED_MASK 0xffffffff
-#define NV50TSC_1_5_UNKNOWN_MASK 0xffffffff
+#define NV50TSC_1_5_BORDER_COLOR_GREEN_MASK 0xffffffff
-#define NV50TSC_1_6_UNKNOWN_MASK 0xffffffff
+#define NV50TSC_1_6_BORDER_COLOR_BLUE_MASK 0xffffffff
-#define NV50TSC_1_7_UNKNOWN_MASK 0xffffffff
+#define NV50TSC_1_7_BORDER_COLOR_ALPHA_MASK 0xffffffff
#endif
diff --git a/src/gallium/drivers/nv50/nv50_transfer.c b/src/gallium/drivers/nv50/nv50_transfer.c
index d0b7f0bef43..d2b5e4d75d4 100644
--- a/src/gallium/drivers/nv50/nv50_transfer.c
+++ b/src/gallium/drivers/nv50/nv50_transfer.c
@@ -8,6 +8,7 @@ struct nv50_transfer {
struct pipe_transfer base;
struct nouveau_bo *bo;
unsigned level_offset;
+ unsigned level_tiling;
int level_pitch;
int level_width;
int level_height;
@@ -16,11 +17,14 @@ struct nv50_transfer {
};
static void
-nv50_transfer_rect_m2mf(struct pipe_screen *pscreen, struct nouveau_bo *src_bo,
- unsigned src_offset, int src_pitch, int sx, int sy,
- int sw, int sh, struct nouveau_bo *dst_bo,
- unsigned dst_offset, int dst_pitch, int dx, int dy,
- int dw, int dh, int cpp, int width, int height,
+nv50_transfer_rect_m2mf(struct pipe_screen *pscreen,
+ struct nouveau_bo *src_bo, unsigned src_offset,
+ int src_pitch, unsigned src_tile_mode,
+ int sx, int sy, int sw, int sh,
+ struct nouveau_bo *dst_bo, unsigned dst_offset,
+ int dst_pitch, unsigned dst_tile_mode,
+ int dx, int dy, int dw, int dh,
+ int cpp, int width, int height,
unsigned src_reloc, unsigned dst_reloc)
{
struct nv50_screen *screen = nv50_screen(pscreen);
@@ -33,15 +37,18 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen, struct nouveau_bo *src_bo,
WAIT_RING (chan, 14);
if (!src_bo->tile_flags) {
- BEGIN_RING(chan, m2mf, 0x0200, 1);
+ BEGIN_RING(chan, m2mf,
+ NV50_MEMORY_TO_MEMORY_FORMAT_LINEAR_IN, 1);
OUT_RING (chan, 1);
- BEGIN_RING(chan, m2mf, 0x0314, 1);
+ BEGIN_RING(chan, m2mf,
+ NV50_MEMORY_TO_MEMORY_FORMAT_PITCH_IN, 1);
OUT_RING (chan, src_pitch);
src_offset += (sy * src_pitch) + (sx * cpp);
} else {
- BEGIN_RING(chan, m2mf, 0x0200, 6);
+ BEGIN_RING(chan, m2mf,
+ NV50_MEMORY_TO_MEMORY_FORMAT_LINEAR_IN, 6);
OUT_RING (chan, 0);
- OUT_RING (chan, src_bo->tile_mode << 4);
+ OUT_RING (chan, src_tile_mode << 4);
OUT_RING (chan, sw * cpp);
OUT_RING (chan, sh);
OUT_RING (chan, 1);
@@ -49,15 +56,18 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen, struct nouveau_bo *src_bo,
}
if (!dst_bo->tile_flags) {
- BEGIN_RING(chan, m2mf, 0x021c, 1);
+ BEGIN_RING(chan, m2mf,
+ NV50_MEMORY_TO_MEMORY_FORMAT_LINEAR_OUT, 1);
OUT_RING (chan, 1);
- BEGIN_RING(chan, m2mf, 0x0318, 1);
+ BEGIN_RING(chan, m2mf,
+ NV50_MEMORY_TO_MEMORY_FORMAT_PITCH_OUT, 1);
OUT_RING (chan, dst_pitch);
dst_offset += (dy * dst_pitch) + (dx * cpp);
} else {
- BEGIN_RING(chan, m2mf, 0x021c, 6);
+ BEGIN_RING(chan, m2mf,
+ NV50_MEMORY_TO_MEMORY_FORMAT_LINEAR_OUT, 6);
OUT_RING (chan, 0);
- OUT_RING (chan, dst_bo->tile_mode << 4);
+ OUT_RING (chan, dst_tile_mode << 4);
OUT_RING (chan, dw * cpp);
OUT_RING (chan, dh);
OUT_RING (chan, 1);
@@ -68,25 +78,30 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen, struct nouveau_bo *src_bo,
int line_count = height > 2047 ? 2047 : height;
WAIT_RING (chan, 15);
- BEGIN_RING(chan, m2mf, 0x0238, 2);
+ BEGIN_RING(chan, m2mf,
+ NV50_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN_HIGH, 2);
OUT_RELOCh(chan, src_bo, src_offset, src_reloc);
OUT_RELOCh(chan, dst_bo, dst_offset, dst_reloc);
- BEGIN_RING(chan, m2mf, 0x030c, 2);
+ BEGIN_RING(chan, m2mf,
+ NV50_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 2);
OUT_RELOCl(chan, src_bo, src_offset, src_reloc);
OUT_RELOCl(chan, dst_bo, dst_offset, dst_reloc);
if (src_bo->tile_flags) {
- BEGIN_RING(chan, m2mf, 0x0218, 1);
- OUT_RING (chan, (dy << 16) | sx);
+ BEGIN_RING(chan, m2mf,
+ NV50_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_IN, 1);
+ OUT_RING (chan, (sy << 16) | sx);
} else {
src_offset += (line_count * src_pitch);
}
if (dst_bo->tile_flags) {
- BEGIN_RING(chan, m2mf, 0x0234, 1);
- OUT_RING (chan, (sy << 16) | dx);
+ BEGIN_RING(chan, m2mf,
+ NV50_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_OUT, 1);
+ OUT_RING (chan, (dy << 16) | dx);
} else {
dst_offset += (line_count * dst_pitch);
}
- BEGIN_RING(chan, m2mf, 0x031c, 4);
+ BEGIN_RING(chan, m2mf,
+ NV50_MEMORY_TO_MEMORY_FORMAT_LINE_LENGTH_IN, 4);
OUT_RING (chan, width * cpp);
OUT_RING (chan, line_count);
OUT_RING (chan, 0x00000101);
@@ -136,6 +151,7 @@ nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
tx->level_width = mt->base.width[level];
tx->level_height = mt->base.height[level];
tx->level_offset = lvl->image_offset[image];
+ tx->level_tiling = lvl->tile_mode;
tx->level_x = x;
tx->level_y = y;
ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0,
@@ -147,9 +163,11 @@ nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
if (usage != PIPE_TRANSFER_WRITE) {
nv50_transfer_rect_m2mf(pscreen, mt->bo, tx->level_offset,
- tx->level_pitch, x, y, tx->level_width,
- tx->level_height, tx->bo, 0,
- tx->base.stride, 0, 0,
+ tx->level_pitch, tx->level_tiling,
+ x, y,
+ tx->level_width, tx->level_height,
+ tx->bo, 0, tx->base.stride,
+ tx->bo->tile_mode, 0, 0,
tx->base.width, tx->base.height,
tx->base.block.size, w, h,
NOUVEAU_BO_VRAM | NOUVEAU_BO_GART,
@@ -168,12 +186,14 @@ nv50_transfer_del(struct pipe_transfer *ptx)
if (ptx->usage != PIPE_TRANSFER_READ) {
struct pipe_screen *pscreen = ptx->texture->screen;
nv50_transfer_rect_m2mf(pscreen, tx->bo, 0, tx->base.stride,
- 0, 0, tx->base.width, tx->base.height,
- mt->bo, tx->level_offset,
- tx->level_pitch, tx->level_x,
- tx->level_y, tx->level_width,
- tx->level_height, tx->base.block.size,
+ tx->bo->tile_mode, 0, 0,
tx->base.width, tx->base.height,
+ mt->bo, tx->level_offset,
+ tx->level_pitch, tx->level_tiling,
+ tx->level_x, tx->level_y,
+ tx->level_width, tx->level_height,
+ tx->base.block.size, tx->base.width,
+ tx->base.height,
NOUVEAU_BO_GART, NOUVEAU_BO_VRAM |
NOUVEAU_BO_GART);
}
diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c
index f81929f2387..17283f3f418 100644
--- a/src/gallium/drivers/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nv50/nv50_vbo.c
@@ -49,6 +49,57 @@ nv50_prim(unsigned mode)
return NV50TCL_VERTEX_BEGIN_POINTS;
}
+static INLINE unsigned
+nv50_vtxeltfmt(unsigned pf)
+{
+ static const uint8_t vtxelt_32[4] = { 0x90, 0x20, 0x10, 0x08 };
+ static const uint8_t vtxelt_16[4] = { 0xd8, 0x78, 0x28, 0x18 };
+ static const uint8_t vtxelt_08[4] = { 0xe8, 0xc0, 0x98, 0x50 };
+
+ unsigned nf, c = 0;
+
+ switch (pf_type(pf)) {
+ case PIPE_FORMAT_TYPE_FLOAT:
+ nf = NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_FLOAT; break;
+ case PIPE_FORMAT_TYPE_UNORM:
+ nf = NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_UNORM; break;
+ case PIPE_FORMAT_TYPE_SNORM:
+ nf = NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SNORM; break;
+ case PIPE_FORMAT_TYPE_USCALED:
+ nf = NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_USCALED; break;
+ case PIPE_FORMAT_TYPE_SSCALED:
+ nf = NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SSCALED; break;
+ default:
+ NOUVEAU_ERR("invalid vbo type %d\n",pf_type(pf));
+ assert(0);
+ nf = NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_FLOAT;
+ break;
+ }
+
+ if (pf_size_y(pf)) c++;
+ if (pf_size_z(pf)) c++;
+ if (pf_size_w(pf)) c++;
+
+ if (pf_exp2(pf) == 3) {
+ switch (pf_size_x(pf)) {
+ case 1: return (nf | (vtxelt_08[c] << 16));
+ case 2: return (nf | (vtxelt_16[c] << 16));
+ case 4: return (nf | (vtxelt_32[c] << 16));
+ default:
+ break;
+ }
+ } else
+ if (pf_exp2(pf) == 6 && pf_size_x(pf) == 1) {
+ NOUVEAU_ERR("unsupported vbo component size 64\n");
+ assert(0);
+ return (nf | 0x08000000);
+ }
+
+ NOUVEAU_ERR("invalid vbo format %s\n",pf_name(pf));
+ assert(0);
+ return (nf | 0x08000000);
+}
+
boolean
nv50_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start,
unsigned count)
@@ -139,7 +190,7 @@ nv50_draw_elements_inline_u16(struct nv50_context *nv50, uint16_t *map,
}
static INLINE void
-nv50_draw_elements_inline_u32(struct nv50_context *nv50, uint8_t *map,
+nv50_draw_elements_inline_u32(struct nv50_context *nv50, uint32_t *map,
unsigned start, unsigned count)
{
struct nouveau_channel *chan = nv50->screen->tesla->channel;
@@ -208,9 +259,14 @@ nv50_vbo_validate(struct nv50_context *nv50)
struct nouveau_stateobj *vtxbuf, *vtxfmt;
int i;
+ /* don't validate if Gallium took away our buffers */
+ if (nv50->vtxbuf_nr == 0)
+ return;
+
vtxbuf = so_new(nv50->vtxelt_nr * 4, nv50->vtxelt_nr * 2);
vtxfmt = so_new(nv50->vtxelt_nr + 1, 0);
- so_method(vtxfmt, tesla, 0x1ac0, nv50->vtxelt_nr);
+ so_method(vtxfmt, tesla, NV50TCL_VERTEX_ARRAY_ATTRIB(0),
+ nv50->vtxelt_nr);
for (i = 0; i < nv50->vtxelt_nr; i++) {
struct pipe_vertex_element *ve = &nv50->vtxelt[i];
@@ -218,32 +274,9 @@ nv50_vbo_validate(struct nv50_context *nv50)
&nv50->vtxbuf[ve->vertex_buffer_index];
struct nouveau_bo *bo = nouveau_bo(vb->buffer);
- switch (ve->src_format) {
- case PIPE_FORMAT_R32G32B32A32_FLOAT:
- so_data(vtxfmt, 0x7e080000 | i);
- break;
- case PIPE_FORMAT_R32G32B32_FLOAT:
- so_data(vtxfmt, 0x7e100000 | i);
- break;
- case PIPE_FORMAT_R32G32_FLOAT:
- so_data(vtxfmt, 0x7e200000 | i);
- break;
- case PIPE_FORMAT_R32_FLOAT:
- so_data(vtxfmt, 0x7e900000 | i);
- break;
- case PIPE_FORMAT_R8G8B8A8_UNORM:
- so_data(vtxfmt, 0x24500000 | i);
- break;
- default:
- {
- NOUVEAU_ERR("invalid vbo format %s\n",
- pf_name(ve->src_format));
- assert(0);
- return;
- }
- }
+ so_data(vtxfmt, nv50_vtxeltfmt(ve->src_format) | i);
- so_method(vtxbuf, tesla, 0x900 + (i * 16), 3);
+ so_method(vtxbuf, tesla, NV50TCL_VERTEX_ARRAY_FORMAT(i), 3);
so_data (vtxbuf, 0x20000000 | vb->stride);
so_reloc (vtxbuf, bo, vb->buffer_offset +
ve->src_offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART |
diff --git a/src/gallium/drivers/r300/Makefile b/src/gallium/drivers/r300/Makefile
index faceec9842f..d7a2c8c462c 100644
--- a/src/gallium/drivers/r300/Makefile
+++ b/src/gallium/drivers/r300/Makefile
@@ -9,7 +9,6 @@ C_SOURCES = \
r300_chipset.c \
r300_clear.c \
r300_context.c \
- r300_debug.c \
r300_emit.c \
r300_flush.c \
r300_fs.c \
@@ -21,6 +20,22 @@ C_SOURCES = \
r300_state_invariant.c \
r300_vs.c \
r300_surface.c \
- r300_texture.c
+ r300_texture.c \
+ r300_tgsi_to_rc.c
+
+LIBRARY_INCLUDES = \
+ -I$(TOP)/src/mesa/drivers/dri/r300/compiler \
+ -I$(TOP)/src/mesa \
+ -I$(TOP)/include
+
+COMPILER_ARCHIVE = $(TOP)/src/mesa/drivers/dri/r300/compiler/libr300compiler.a
+
+EXTRA_OBJECTS = \
+ $(COMPILER_ARCHIVE)
include ../../Makefile.template
+
+.PHONY : $(COMPILER_ARCHIVE)
+
+$(COMPILER_ARCHIVE):
+ cd $(TOP)/src/mesa/drivers/dri/r300/compiler; make
diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c
index 233a32b53c5..c8510bc63e4 100644
--- a/src/gallium/drivers/r300/r300_context.c
+++ b/src/gallium/drivers/r300/r300_context.c
@@ -52,7 +52,7 @@ static boolean r300_draw_range_elements(struct pipe_context* pipe,
draw_set_mapped_constant_buffer(r300->draw,
r300->shader_constants[PIPE_SHADER_VERTEX].constants,
- r300->shader_constants[PIPE_SHADER_VERTEX].user_count *
+ r300->shader_constants[PIPE_SHADER_VERTEX].count *
(sizeof(float) * 4));
draw_arrays(r300->draw, mode, start, count);
diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h
index d891fd6265f..fc8a4498933 100644
--- a/src/gallium/drivers/r300/r300_context.h
+++ b/src/gallium/drivers/r300/r300_context.h
@@ -34,6 +34,9 @@
#include "r300_screen.h"
#include "r300_winsys.h"
+struct r300_fragment_shader;
+struct r300_vertex_shader;
+
struct r300_blend_state {
uint32_t blend_control; /* R300_RB3D_CBLEND: 0x4e04 */
uint32_t alpha_blend_control; /* R300_RB3D_ABLEND: 0x4e08 */
@@ -143,71 +146,10 @@ struct r300_constant_buffer {
/* Buffer of constants */
/* XXX first number should be raised */
float constants[32][4];
- /* Number of user-defined constants */
- unsigned user_count;
/* Total number of constants */
unsigned count;
};
-struct r300_fragment_shader {
- /* Parent class */
- struct pipe_shader_state state;
- struct tgsi_shader_info info;
-
- /* Has this shader been translated yet? */
- boolean translated;
-
- /* Pixel stack size */
- int stack_size;
-
- /* Are there immediates in this shader?
- * If not, we can heavily optimize recompilation. */
- boolean uses_imms;
-};
-
-struct r3xx_fragment_shader {
- /* Parent class */
- struct r300_fragment_shader shader;
-
- /* Number of ALU instructions */
- int alu_instruction_count;
-
- /* Number of texture instructions */
- int tex_instruction_count;
-
- /* Number of texture indirections */
- int indirections;
-
- /* Indirection node offsets */
- int alu_offset[4];
-
- /* Machine instructions */
- struct {
- uint32_t alu_rgb_inst;
- uint32_t alu_rgb_addr;
- uint32_t alu_alpha_inst;
- uint32_t alu_alpha_addr;
- } instructions[64]; /* XXX magic num */
-};
-
-struct r5xx_fragment_shader {
- /* Parent class */
- struct r300_fragment_shader shader;
-
- /* Number of used instructions */
- int instruction_count;
-
- /* Machine instructions */
- struct {
- uint32_t inst0;
- uint32_t inst1;
- uint32_t inst2;
- uint32_t inst3;
- uint32_t inst4;
- uint32_t inst5;
- } instructions[256]; /*< XXX magic number */
-};
-
struct r300_texture {
/* Parent class */
struct pipe_texture tex;
@@ -242,33 +184,6 @@ struct r300_vertex_format {
int fs_tab[16];
};
-struct r300_vertex_shader {
- /* Parent class */
- struct pipe_shader_state state;
- struct tgsi_shader_info info;
-
- /* Fallback shader, because Draw has issues */
- struct draw_vertex_shader* draw;
-
- /* Has this shader been translated yet? */
- boolean translated;
-
- /* Are there immediates in this shader?
- * If not, we can heavily optimize recompilation. */
- boolean uses_imms;
-
- /* Number of used instructions */
- int instruction_count;
-
- /* Machine instructions */
- struct {
- uint32_t inst0;
- uint32_t inst1;
- uint32_t inst2;
- uint32_t inst3;
- } instructions[128]; /*< XXX magic number */
-};
-
static struct pipe_viewport_state r300_viewport_identity = {
.scale = {1.0, 1.0, 1.0, 1.0},
.translate = {0.0, 0.0, 0.0, 0.0},
diff --git a/src/gallium/drivers/r300/r300_debug.c b/src/gallium/drivers/r300/r300_debug.c
deleted file mode 100644
index c83e8526cf7..00000000000
--- a/src/gallium/drivers/r300/r300_debug.c
+++ /dev/null
@@ -1,198 +0,0 @@
-/*
- * Copyright 2009 Corbin Simpson <MostAwesomeDude@gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE. */
-
-#include "r300_debug.h"
-
-void r3xx_dump_fs(struct r3xx_fragment_shader* fs)
-{
- int i;
-
- for (i = 0; i < fs->alu_instruction_count; i++) {
- }
-}
-
-void r5xx_fs_dump(struct r5xx_fragment_shader* fs)
-{
- int i;
- uint32_t inst;
-
- for (i = 0; i < fs->instruction_count; i++) {
- inst = fs->instructions[i].inst0;
- debug_printf("%d: 0: CMN_INST 0x%08x:", i, inst);
- switch (inst & 0x3) {
- case R500_INST_TYPE_ALU:
- debug_printf("ALU ");
- break;
- case R500_INST_TYPE_OUT:
- debug_printf("OUT ");
- break;
- case R500_INST_TYPE_FC:
- debug_printf("FC ");
- break;
- case R500_INST_TYPE_TEX:
- debug_printf("TEX ");
- break;
- }
- debug_printf("%s %s %s %s ",
- inst & R500_INST_TEX_SEM_WAIT ? "TEX_WAIT" : "",
- inst & R500_INST_LAST ? "LAST" : "",
- inst & R500_INST_NOP ? "NOP" : "",
- inst & R500_INST_ALU_WAIT ? "ALU_WAIT" : "");
- debug_printf("wmask: %s omask: %s\n",
- r5xx_fs_mask[(inst >> 11) & 0xf],
- r5xx_fs_mask[(inst >> 15) & 0xf]);
- switch (inst & 0x3) {
- case R500_INST_TYPE_ALU:
- case R500_INST_TYPE_OUT:
- inst = fs->instructions[i].inst1;
- debug_printf(" 1: RGB_ADDR 0x%08x:", inst);
- debug_printf("Addr0: %d%c, Addr1: %d%c, "
- "Addr2: %d%c, srcp:%d\n",
- inst & 0xff, (inst & (1 << 8)) ? 'c' : 't',
- (inst >> 10) & 0xff, (inst & (1 << 18)) ? 'c' : 't',
- (inst >> 20) & 0xff, (inst & (1 << 28)) ? 'c' : 't',
- (inst >> 30));
-
- inst = fs->instructions[i].inst2;
- debug_printf(" 2: ALPHA_ADDR 0x%08x:", inst);
- debug_printf("Addr0: %d%c, Addr1: %d%c, "
- "Addr2: %d%c, srcp:%d\n",
- inst & 0xff, (inst & (1 << 8)) ? 'c' : 't',
- (inst >> 10) & 0xff, (inst & (1 << 18)) ? 'c' : 't',
- (inst >> 20) & 0xff, (inst & (1 << 28)) ? 'c' : 't',
- (inst >> 30));
-
- inst = fs->instructions[i].inst3;
- debug_printf(" 3: RGB_INST 0x%08x:", inst);
- debug_printf("rgb_A_src:%d %s/%s/%s %d "
- "rgb_B_src:%d %s/%s/%s %d\n",
- inst & 0x3, r5xx_fs_swiz[(inst >> 2) & 0x7],
- r5xx_fs_swiz[(inst >> 5) & 0x7],
- r5xx_fs_swiz[(inst >> 8) & 0x7],
- (inst >> 11) & 0x3, (inst >> 13) & 0x3,
- r5xx_fs_swiz[(inst >> 15) & 0x7],
- r5xx_fs_swiz[(inst >> 18) & 0x7],
- r5xx_fs_swiz[(inst >> 21) & 0x7],
- (inst >> 24) & 0x3);
-
- inst = fs->instructions[i].inst4;
- debug_printf(" 4: ALPHA_INST 0x%08x:", inst);
- debug_printf("%s dest:%d%s alp_A_src:%d %s %d "
- "alp_B_src:%d %s %d w:%d\n",
- r5xx_fs_op_alpha[inst & 0xf], (inst >> 4) & 0x7f,
- inst & (1<<11) ? "(rel)":"", (inst >> 12) & 0x3,
- r5xx_fs_swiz[(inst >> 14) & 0x7], (inst >> 17) & 0x3,
- (inst >> 19) & 0x3, r5xx_fs_swiz[(inst >> 21) & 0x7],
- (inst >> 24) & 0x3, (inst >> 31) & 0x1);
-
- inst = fs->instructions[i].inst5;
- debug_printf(" 5: RGBA_INST 0x%08x:", inst);
- debug_printf("%s dest:%d%s rgb_C_src:%d %s/%s/%s %d "
- "alp_C_src:%d %s %d\n",
- r5xx_fs_op_rgb[inst & 0xf], (inst >> 4) & 0x7f,
- inst & (1 << 11) ? "(rel)":"", (inst >> 12) & 0x3,
- r5xx_fs_swiz[(inst >> 14) & 0x7],
- r5xx_fs_swiz[(inst >> 17) & 0x7],
- r5xx_fs_swiz[(inst >> 20) & 0x7],
- (inst >> 23) & 0x3, (inst >> 25) & 0x3,
- r5xx_fs_swiz[(inst >> 27) & 0x7], (inst >> 30) & 0x3);
- break;
- case R500_INST_TYPE_FC:
- /* XXX don't even bother yet */
- break;
- case R500_INST_TYPE_TEX:
- inst = fs->instructions[i].inst1;
- debug_printf(" 1: TEX_INST 0x%08x: id: %d "
- "op:%s, %s, %s %s\n",
- inst, (inst >> 16) & 0xf,
- r5xx_fs_tex[(inst >> 22) & 0x7],
- (inst & (1 << 25)) ? "ACQ" : "",
- (inst & (1 << 26)) ? "IGNUNC" : "",
- (inst & (1 << 27)) ? "UNSCALED" : "SCALED");
-
- inst = fs->instructions[i].inst2;
- debug_printf(" 2: TEX_ADDR 0x%08x: "
- "src: %d%s %s/%s/%s/%s dst: %d%s %s/%s/%s/%s\n",
- inst, inst & 0x7f, inst & (1 << 7) ? "(rel)" : "",
- r5xx_fs_swiz[(inst >> 8) & 0x3],
- r5xx_fs_swiz[(inst >> 10) & 0x3],
- r5xx_fs_swiz[(inst >> 12) & 0x3],
- r5xx_fs_swiz[(inst >> 14) & 0x3],
- (inst >> 16) & 0x7f, inst & (1 << 23) ? "(rel)" : "",
- r5xx_fs_swiz[(inst >> 24) & 0x3],
- r5xx_fs_swiz[(inst >> 26) & 0x3],
- r5xx_fs_swiz[(inst >> 28) & 0x3],
- r5xx_fs_swiz[(inst >> 30) & 0x3]);
-
- inst = fs->instructions[i].inst3;
- debug_printf(" 3: TEX_DXDY 0x%08x\n", inst);
- break;
- }
- }
-}
-
-static void r300_vs_op_dump(uint32_t op)
-{
- debug_printf(" dst: %d%s op: ",
- (op >> 13) & 0x7f, r300_vs_dst_debug[(op >> 8) & 0x7]);
- if (op & 0x80) {
- if (op & 0x1) {
- debug_printf("PVS_MACRO_OP_2CLK_M2X_ADD\n");
- } else {
- debug_printf(" PVS_MACRO_OP_2CLK_MADD\n");
- }
- } else if (op & 0x40) {
- debug_printf("%s\n", r300_vs_me_ops[op & 0x1f]);
- } else {
- debug_printf("%s\n", r300_vs_ve_ops[op & 0x1f]);
- }
-}
-
-void r300_vs_src_dump(uint32_t src)
-{
- debug_printf(" reg: %d%s swiz: %s%s/%s%s/%s%s/%s%s\n",
- (src >> 5) & 0x7f, r300_vs_src_debug[src & 0x3],
- src & (1 << 25) ? "-" : " ",
- r300_vs_swiz_debug[(src >> 13) & 0x7],
- src & (1 << 26) ? "-" : " ",
- r300_vs_swiz_debug[(src >> 16) & 0x7],
- src & (1 << 27) ? "-" : " ",
- r300_vs_swiz_debug[(src >> 19) & 0x7],
- src & (1 << 28) ? "-" : " ",
- r300_vs_swiz_debug[(src >> 22) & 0x7]);
-}
-
-void r300_vs_dump(struct r300_vertex_shader* vs)
-{
- int i;
-
- for (i = 0; i < vs->instruction_count; i++) {
- debug_printf("%d: op: 0x%08x", i, vs->instructions[i].inst0);
- r300_vs_op_dump(vs->instructions[i].inst0);
- debug_printf(" src0: 0x%08x", vs->instructions[i].inst1);
- r300_vs_src_dump(vs->instructions[i].inst1);
- debug_printf(" src1: 0x%08x", vs->instructions[i].inst2);
- r300_vs_src_dump(vs->instructions[i].inst2);
- debug_printf(" src2: 0x%08x", vs->instructions[i].inst3);
- r300_vs_src_dump(vs->instructions[i].inst3);
- }
-}
diff --git a/src/gallium/drivers/r300/r300_debug.h b/src/gallium/drivers/r300/r300_debug.h
deleted file mode 100644
index 6b58c1e2501..00000000000
--- a/src/gallium/drivers/r300/r300_debug.h
+++ /dev/null
@@ -1,211 +0,0 @@
-/*
- * Copyright 2009 Corbin Simpson <MostAwesomeDude@gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE. */
-
-#ifndef R300_DEBUG_H
-#define R300_DEBUG_H
-
-#include "r300_reg.h"
-#include "r300_fs.h"
-#include "r300_vs.h"
-
-static char* r5xx_fs_swiz[] = {
- " R",
- " G",
- " B",
- " A",
- " 0",
- ".5",
- " 1",
- " U",
-};
-
-static char* r5xx_fs_op_rgb[] = {
- "MAD",
- "DP3",
- "DP4",
- "D2A",
- "MIN",
- "MAX",
- "---",
- "CND",
- "CMP",
- "FRC",
- "SOP",
- "MDH",
- "MDV",
-};
-
-static char* r5xx_fs_op_alpha[] = {
- "MAD",
- " DP",
- "MIN",
- "MAX",
- "---",
- "CND",
- "CMP",
- "FRC",
- "EX2",
- "LN2",
- "RCP",
- "RSQ",
- "SIN",
- "COS",
- "MDH",
- "MDV",
-};
-
-static char* r5xx_fs_mask[] = {
- "NONE",
- "R ",
- " G ",
- "RG ",
- " B ",
- "R B ",
- " GB ",
- "RGB ",
- " A",
- "R A",
- " G A",
- "RG A",
- " BA",
- "R BA",
- " GBA",
- "RGBA",
-};
-
-static char* r5xx_fs_tex[] = {
- " NOP",
- " LD",
- "TEXKILL",
- " PROJ",
- "LODBIAS",
- " LOD",
- " DXDY",
-};
-
-static char* r300_vs_ve_ops[] = {
- /* R300 vector ops */
- " VE_NO_OP",
- " VE_DOT_PRODUCT",
- " VE_MULTIPLY",
- " VE_ADD",
- " VE_MULTIPLY_ADD",
- " VE_DISTANCE_FACTOR",
- " VE_FRACTION",
- " VE_MAXIMUM",
- " VE_MINIMUM",
- "VE_SET_GREATER_THAN_EQUAL",
- " VE_SET_LESS_THAN",
- " VE_MULTIPLYX2_ADD",
- " VE_MULTIPLY_CLAMP",
- " VE_FLT2FIX_DX",
- " VE_FLT2FIX_DX_RND",
- /* R500 vector ops */
- " VE_PRED_SET_EQ_PUSH",
- " VE_PRED_SET_GT_PUSH",
- " VE_PRED_SET_GTE_PUSH",
- " VE_PRED_SET_NEQ_PUSH",
- " VE_COND_WRITE_EQ",
- " VE_COND_WRITE_GT",
- " VE_COND_WRITE_GTE",
- " VE_COND_WRITE_NEQ",
- " VE_SET_GREATER_THAN",
- " VE_SET_EQUAL",
- " VE_SET_NOT_EQUAL",
- " (reserved)",
- " (reserved)",
- " (reserved)",
-};
-
-static char* r300_vs_me_ops[] = {
- /* R300 math ops */
- " ME_NO_OP",
- " ME_EXP_BASE2_DX",
- " ME_LOG_BASE2_DX",
- " ME_EXP_BASEE_FF",
- " ME_LIGHT_COEFF_DX",
- " ME_POWER_FUNC_FF",
- " ME_RECIP_DX",
- " ME_RECIP_FF",
- " ME_RECIP_SQRT_DX",
- " ME_RECIP_SQRT_FF",
- " ME_MULTIPLY",
- " ME_EXP_BASE2_FULL_DX",
- " ME_LOG_BASE2_FULL_DX",
- " ME_POWER_FUNC_FF_CLAMP_B",
- "ME_POWER_FUNC_FF_CLAMP_B1",
- "ME_POWER_FUNC_FF_CLAMP_01",
- " ME_SIN",
- " ME_COS",
- /* R500 math ops */
- " ME_LOG_BASE2_IEEE",
- " ME_RECIP_IEEE",
- " ME_RECIP_SQRT_IEEE",
- " ME_PRED_SET_EQ",
- " ME_PRED_SET_GT",
- " ME_PRED_SET_GTE",
- " ME_PRED_SET_NEQ",
- " ME_PRED_SET_CLR",
- " ME_PRED_SET_INV",
- " ME_PRED_SET_POP",
- " ME_PRED_SET_RESTORE",
- " (reserved)",
- " (reserved)",
- " (reserved)",
-};
-
-/* XXX refactor to avoid clashing symbols */
-static char* r300_vs_src_debug[] = {
- "t",
- "i",
- "c",
- "a",
-};
-
-static char* r300_vs_dst_debug[] = {
- "t",
- "a0",
- "o",
- "ox",
- "a",
- "i",
- "u",
- "u",
-};
-
-static char* r300_vs_swiz_debug[] = {
- "X",
- "Y",
- "Z",
- "W",
- "0",
- "1",
- "U",
- "U",
-};
-
-void r5xx_fs_dump(struct r5xx_fragment_shader* fs);
-void r3xx_dump_fs(struct r3xx_fragment_shader* fs);
-
-void r300_vs_dump(struct r300_vertex_shader* vs);
-
-#endif /* R300_DEBUG_H */
diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index 7ba56cdc1d2..53256fc6dd3 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -24,6 +24,9 @@
#include "r300_emit.h"
+#include "r300_fs.h"
+#include "r300_vs.h"
+
void r300_emit_blend_state(struct r300_context* r300,
struct r300_blend_state* blend)
{
@@ -109,73 +112,158 @@ void r300_emit_dsa_state(struct r300_context* r300,
END_CS;
}
-void r300_emit_fragment_shader(struct r300_context* r300,
- struct r3xx_fragment_shader* fs)
+static const float * get_shader_constant(
+ struct r300_context * r300,
+ struct rc_constant * constant,
+ struct r300_constant_buffer * externals)
+{
+ static const float zero[4] = { 0.0, 0.0, 0.0, 0.0 };
+ switch(constant->Type) {
+ case RC_CONSTANT_EXTERNAL:
+ return externals->constants[constant->u.External];
+
+ case RC_CONSTANT_IMMEDIATE:
+ return constant->u.Immediate;
+
+ default:
+ debug_printf("r300: Implementation error: Unhandled constant type %i\n",
+ constant->Type);
+ return zero;
+ }
+}
+
+/* Convert a normal single-precision float into the 7.16 format
+ * used by the R300 fragment shader.
+ */
+static uint32_t pack_float24(float f)
{
+ union {
+ float fl;
+ uint32_t u;
+ } u;
+ float mantissa;
+ int exponent;
+ uint32_t float24 = 0;
+
+ if (f == 0.0)
+ return 0;
+
+ u.fl = f;
+
+ mantissa = frexpf(f, &exponent);
+
+ /* Handle -ve */
+ if (mantissa < 0) {
+ float24 |= (1 << 23);
+ mantissa = mantissa * -1.0;
+ }
+ /* Handle exponent, bias of 63 */
+ exponent += 62;
+ float24 |= (exponent << 16);
+ /* Kill 7 LSB of mantissa */
+ float24 |= (u.u & 0x7FFFFF) >> 7;
+
+ return float24;
+}
+
+void r300_emit_fragment_program_code(struct r300_context* r300,
+ struct rX00_fragment_program_code* generic_code,
+ struct r300_constant_buffer* externals)
+{
+ struct r300_fragment_program_code * code = &generic_code->code.r300;
+ struct rc_constant_list * constants = &generic_code->constants;
int i;
CS_LOCALS(r300);
- BEGIN_CS(22);
-
- OUT_CS_REG(R300_US_CONFIG, fs->indirections);
- OUT_CS_REG(R300_US_PIXSIZE, fs->shader.stack_size);
- /* XXX figure out exactly how big the sizes are on this reg */
- OUT_CS_REG(R300_US_CODE_OFFSET, 0x40);
- /* XXX figure these ones out a bit better kthnx */
- OUT_CS_REG(R300_US_CODE_ADDR_0, 0x0);
- OUT_CS_REG(R300_US_CODE_ADDR_1, 0x0);
- OUT_CS_REG(R300_US_CODE_ADDR_2, 0x0);
- OUT_CS_REG(R300_US_CODE_ADDR_3, 0x40 | R300_RGBA_OUT);
-
- for (i = 0; i < fs->alu_instruction_count; i++) {
- OUT_CS_REG(R300_US_ALU_RGB_INST_0 + (4 * i),
- fs->instructions[i].alu_rgb_inst);
- OUT_CS_REG(R300_US_ALU_RGB_ADDR_0 + (4 * i),
- fs->instructions[i].alu_rgb_addr);
- OUT_CS_REG(R300_US_ALU_ALPHA_INST_0 + (4 * i),
- fs->instructions[i].alu_alpha_inst);
- OUT_CS_REG(R300_US_ALU_ALPHA_ADDR_0 + (4 * i),
- fs->instructions[i].alu_alpha_addr);
+ BEGIN_CS(15 +
+ code->alu.length * 4 +
+ (code->tex.length ? (1 + code->tex.length) : 0) +
+ (constants->Count ? (1 + constants->Count * 4) : 0));
+
+ OUT_CS_REG(R300_US_CONFIG, code->config);
+ OUT_CS_REG(R300_US_PIXSIZE, code->pixsize);
+ OUT_CS_REG(R300_US_CODE_OFFSET, code->code_offset);
+
+ OUT_CS_REG_SEQ(R300_US_CODE_ADDR_0, 4);
+ for(i = 0; i < 4; ++i)
+ OUT_CS(code->code_addr[i]);
+
+ OUT_CS_REG_SEQ(R300_US_ALU_RGB_INST_0, code->alu.length);
+ for (i = 0; i < code->alu.length; i++)
+ OUT_CS(code->alu.inst[i].rgb_inst);
+
+ OUT_CS_REG_SEQ(R300_US_ALU_RGB_ADDR_0, code->alu.length);
+ for (i = 0; i < code->alu.length; i++)
+ OUT_CS(code->alu.inst[i].rgb_addr);
+
+ OUT_CS_REG_SEQ(R300_US_ALU_ALPHA_INST_0, code->alu.length);
+ for (i = 0; i < code->alu.length; i++)
+ OUT_CS(code->alu.inst[i].alpha_inst);
+
+ OUT_CS_REG_SEQ(R300_US_ALU_ALPHA_ADDR_0, code->alu.length);
+ for (i = 0; i < code->alu.length; i++)
+ OUT_CS(code->alu.inst[i].alpha_addr);
+
+ if (code->tex.length) {
+ OUT_CS_REG_SEQ(R300_US_TEX_INST_0, code->tex.length);
+ for(i = 0; i < code->tex.length; ++i)
+ OUT_CS(code->tex.inst[i]);
+ }
+
+ if (constants->Count) {
+ OUT_CS_ONE_REG(R300_PFS_PARAM_0_X, constants->Count * 4);
+ for(i = 0; i < constants->Count; ++i) {
+ const float * data = get_shader_constant(r300, &constants->Constants[i], externals);
+ OUT_CS(pack_float24(data[0]));
+ OUT_CS(pack_float24(data[1]));
+ OUT_CS(pack_float24(data[2]));
+ OUT_CS(pack_float24(data[3]));
+ }
}
END_CS;
}
-void r500_emit_fragment_shader(struct r300_context* r300,
- struct r5xx_fragment_shader* fs)
+void r500_emit_fragment_program_code(struct r300_context* r300,
+ struct rX00_fragment_program_code* generic_code,
+ struct r300_constant_buffer* externals)
{
+ struct r500_fragment_program_code * code = &generic_code->code.r500;
+ struct rc_constant_list * constants = &generic_code->constants;
int i;
- struct r300_constant_buffer* constants =
- &r300->shader_constants[PIPE_SHADER_FRAGMENT];
CS_LOCALS(r300);
- BEGIN_CS(9 + (fs->instruction_count * 6) + (constants->count ? 3 : 0) +
- (constants->count * 4));
- OUT_CS_REG(R500_US_CONFIG, R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO);
- OUT_CS_REG(R500_US_PIXSIZE, fs->shader.stack_size);
- OUT_CS_REG(R500_US_CODE_ADDR, R500_US_CODE_START_ADDR(0) |
- R500_US_CODE_END_ADDR(fs->instruction_count));
+ BEGIN_CS(13 +
+ ((code->inst_end + 1) * 6) +
+ (constants->Count ? (3 + (constants->Count * 4)) : 0));
+ OUT_CS_REG(R500_US_CONFIG, 0);
+ OUT_CS_REG(R500_US_PIXSIZE, code->max_temp_idx);
+ OUT_CS_REG(R500_US_CODE_RANGE,
+ R500_US_CODE_RANGE_ADDR(0) | R500_US_CODE_RANGE_SIZE(code->inst_end));
+ OUT_CS_REG(R500_US_CODE_OFFSET, 0);
+ OUT_CS_REG(R500_US_CODE_ADDR,
+ R500_US_CODE_START_ADDR(0) | R500_US_CODE_END_ADDR(code->inst_end));
OUT_CS_REG(R500_GA_US_VECTOR_INDEX, R500_GA_US_VECTOR_INDEX_TYPE_INSTR);
- OUT_CS_ONE_REG(R500_GA_US_VECTOR_DATA, fs->instruction_count * 6);
- for (i = 0; i < fs->instruction_count; i++) {
- OUT_CS(fs->instructions[i].inst0);
- OUT_CS(fs->instructions[i].inst1);
- OUT_CS(fs->instructions[i].inst2);
- OUT_CS(fs->instructions[i].inst3);
- OUT_CS(fs->instructions[i].inst4);
- OUT_CS(fs->instructions[i].inst5);
- }
-
- if (constants->count) {
- OUT_CS_REG(R500_GA_US_VECTOR_INDEX,
- R500_GA_US_VECTOR_INDEX_TYPE_CONST);
- OUT_CS_ONE_REG(R500_GA_US_VECTOR_DATA, constants->count * 4);
- for (i = 0; i < constants->count; i++) {
- OUT_CS_32F(constants->constants[i][0]);
- OUT_CS_32F(constants->constants[i][1]);
- OUT_CS_32F(constants->constants[i][2]);
- OUT_CS_32F(constants->constants[i][3]);
+ OUT_CS_ONE_REG(R500_GA_US_VECTOR_DATA, (code->inst_end + 1) * 6);
+ for (i = 0; i <= code->inst_end; i++) {
+ OUT_CS(code->inst[i].inst0);
+ OUT_CS(code->inst[i].inst1);
+ OUT_CS(code->inst[i].inst2);
+ OUT_CS(code->inst[i].inst3);
+ OUT_CS(code->inst[i].inst4);
+ OUT_CS(code->inst[i].inst5);
+ }
+
+ if (constants->Count) {
+ OUT_CS_REG(R500_GA_US_VECTOR_INDEX, R500_GA_US_VECTOR_INDEX_TYPE_CONST);
+ OUT_CS_ONE_REG(R500_GA_US_VECTOR_DATA, constants->Count * 4);
+ for (i = 0; i < constants->Count; i++) {
+ const float * data = get_shader_constant(r300, &constants->Constants[i], externals);
+ OUT_CS_32F(data[0]);
+ OUT_CS_32F(data[1]);
+ OUT_CS_32F(data[2]);
+ OUT_CS_32F(data[3]);
}
}
@@ -190,7 +278,7 @@ void r300_emit_fb_state(struct r300_context* r300,
int i;
CS_LOCALS(r300);
- BEGIN_CS((8 * fb->nr_cbufs) + (fb->zsbuf ? 8 : 0) + 4);
+ BEGIN_CS((10 * fb->nr_cbufs) + (fb->zsbuf ? 10 : 0) + 4);
for (i = 0; i < fb->nr_cbufs; i++) {
tex = (struct r300_texture*)fb->cbufs[i]->texture;
assert(tex && tex->buffer && "cbuf is marked, but NULL!");
@@ -199,8 +287,10 @@ void r300_emit_fb_state(struct r300_context* r300,
OUT_CS_REG_SEQ(R300_RB3D_COLOROFFSET0 + (4 * i), 1);
OUT_CS_RELOC(tex->buffer, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
- OUT_CS_REG(R300_RB3D_COLORPITCH0 + (4 * i), pixpitch |
- r300_translate_colorformat(tex->tex.format));
+ OUT_CS_REG_SEQ(R300_RB3D_COLORPITCH0 + (4 * i), 1);
+ OUT_CS_RELOC(tex->buffer, pixpitch |
+ r300_translate_colorformat(tex->tex.format), 0,
+ RADEON_GEM_DOMAIN_VRAM, 0);
OUT_CS_REG(R300_US_OUT_FMT_0 + (4 * i),
r300_translate_out_fmt(fb->cbufs[i]->format));
@@ -216,7 +306,8 @@ void r300_emit_fb_state(struct r300_context* r300,
OUT_CS_REG(R300_ZB_FORMAT, r300_translate_zsformat(tex->tex.format));
- OUT_CS_REG(R300_ZB_DEPTHPITCH, pixpitch);
+ OUT_CS_REG_SEQ(R300_ZB_DEPTHPITCH, 1);
+ OUT_CS_RELOC(tex->buffer, pixpitch, 0, RADEON_GEM_DOMAIN_VRAM, 0);
}
OUT_CS_REG(R300_RB3D_DSTCACHE_CTLSTAT,
@@ -380,13 +471,13 @@ void r300_emit_vertex_format_state(struct r300_context* r300)
END_CS;
}
-void r300_emit_vertex_shader(struct r300_context* r300,
- struct r300_vertex_shader* vs)
+void r300_emit_vertex_program_code(struct r300_context* r300,
+ struct r300_vertex_program_code* code,
+ struct r300_constant_buffer* constants)
{
int i;
struct r300_screen* r300screen = r300_screen(r300->context.screen);
- struct r300_constant_buffer* constants =
- &r300->shader_constants[PIPE_SHADER_VERTEX];
+ unsigned instruction_count = code->length / 4;
CS_LOCALS(r300);
if (!r300screen->caps->has_tcl) {
@@ -395,10 +486,10 @@ void r300_emit_vertex_shader(struct r300_context* r300,
return;
}
- if (constants->count) {
- BEGIN_CS(14 + (vs->instruction_count * 4) + (constants->count * 4));
+ if (code->constants.Count) {
+ BEGIN_CS(14 + code->length + (code->constants.Count * 4));
} else {
- BEGIN_CS(11 + (vs->instruction_count * 4));
+ BEGIN_CS(11 + code->length);
}
/* R300_VAP_PVS_CODE_CNTL_0
@@ -408,30 +499,27 @@ void r300_emit_vertex_shader(struct r300_context* r300,
* XXX these could be optimized to select better values... */
OUT_CS_REG_SEQ(R300_VAP_PVS_CODE_CNTL_0, 3);
OUT_CS(R300_PVS_FIRST_INST(0) |
- R300_PVS_XYZW_VALID_INST(vs->instruction_count - 1) |
- R300_PVS_LAST_INST(vs->instruction_count - 1));
- OUT_CS(R300_PVS_MAX_CONST_ADDR(constants->count - 1));
- OUT_CS(vs->instruction_count - 1);
+ R300_PVS_XYZW_VALID_INST(instruction_count - 1) |
+ R300_PVS_LAST_INST(instruction_count - 1));
+ OUT_CS(R300_PVS_MAX_CONST_ADDR(code->constants.Count - 1));
+ OUT_CS(instruction_count - 1);
OUT_CS_REG(R300_VAP_PVS_VECTOR_INDX_REG, 0);
- OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, vs->instruction_count * 4);
- for (i = 0; i < vs->instruction_count; i++) {
- OUT_CS(vs->instructions[i].inst0);
- OUT_CS(vs->instructions[i].inst1);
- OUT_CS(vs->instructions[i].inst2);
- OUT_CS(vs->instructions[i].inst3);
- }
+ OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, code->length);
+ for (i = 0; i < code->length; i++)
+ OUT_CS(code->body.d[i]);
- if (constants->count) {
+ if (code->constants.Count) {
OUT_CS_REG(R300_VAP_PVS_VECTOR_INDX_REG,
(r300screen->caps->is_r500 ?
R500_PVS_CONST_START : R300_PVS_CONST_START));
- OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, constants->count * 4);
- for (i = 0; i < constants->count; i++) {
- OUT_CS_32F(constants->constants[i][0]);
- OUT_CS_32F(constants->constants[i][1]);
- OUT_CS_32F(constants->constants[i][2]);
- OUT_CS_32F(constants->constants[i][3]);
+ OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, code->constants.Count * 4);
+ for (i = 0; i < code->constants.Count; i++) {
+ const float * data = get_shader_constant(r300, &code->constants.Constants[i], constants);
+ OUT_CS_32F(data[0]);
+ OUT_CS_32F(data[1]);
+ OUT_CS_32F(data[2]);
+ OUT_CS_32F(data[3]);
}
}
@@ -443,6 +531,12 @@ void r300_emit_vertex_shader(struct r300_context* r300,
END_CS;
}
+void r300_emit_vertex_shader(struct r300_context* r300,
+ struct r300_vertex_shader* vs)
+{
+ r300_emit_vertex_program_code(r300, &vs->code, &r300->shader_constants[PIPE_SHADER_VERTEX]);
+}
+
void r300_emit_viewport_state(struct r300_context* r300,
struct r300_viewport_state* viewport)
{
@@ -531,10 +625,11 @@ validate:
} else {
debug_printf("No VBO while emitting dirty state!\n");
}
- if (r300->winsys->validate(r300->winsys)) {
+ if (!r300->winsys->validate(r300->winsys)) {
r300->context.flush(&r300->context, 0, NULL);
if (invalid) {
/* Well, hell. */
+ debug_printf("r300: Stuck in validation loop, gonna quit now.");
exit(1);
}
invalid = TRUE;
@@ -563,11 +658,9 @@ validate:
if (r300->dirty_state & R300_NEW_FRAGMENT_SHADER) {
if (r300screen->caps->is_r500) {
- r500_emit_fragment_shader(r300,
- (struct r5xx_fragment_shader*)r300->fs);
+ r500_emit_fragment_program_code(r300, &r300->fs->code, &r300->shader_constants[PIPE_SHADER_FRAGMENT]);
} else {
- r300_emit_fragment_shader(r300,
- (struct r3xx_fragment_shader*)r300->fs);
+ r300_emit_fragment_program_code(r300, &r300->fs->code, &r300->shader_constants[PIPE_SHADER_FRAGMENT]);
}
r300->dirty_state &= ~R300_NEW_FRAGMENT_SHADER;
}
diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h
index fda26f39481..350691d592d 100644
--- a/src/gallium/drivers/r300/r300_emit.h
+++ b/src/gallium/drivers/r300/r300_emit.h
@@ -30,6 +30,9 @@
#include "r300_screen.h"
#include "r300_state_inlines.h"
+struct rX00_fragment_program_code;
+struct r300_vertex_program_code;
+
void r300_emit_blend_state(struct r300_context* r300,
struct r300_blend_state* blend);
@@ -42,11 +45,13 @@ void r300_emit_clip_state(struct r300_context* r300,
void r300_emit_dsa_state(struct r300_context* r300,
struct r300_dsa_state* dsa);
-void r300_emit_fragment_shader(struct r300_context* r300,
- struct r3xx_fragment_shader* fs);
+void r300_emit_fragment_program_code(struct r300_context* r300,
+ struct rX00_fragment_program_code* generic_code,
+ struct r300_constant_buffer* externals);
-void r500_emit_fragment_shader(struct r300_context* r300,
- struct r5xx_fragment_shader* fs);
+void r500_emit_fragment_program_code(struct r300_context* r300,
+ struct rX00_fragment_program_code* generic_code,
+ struct r300_constant_buffer* externals);
void r300_emit_fb_state(struct r300_context* r300,
struct pipe_framebuffer_state* fb);
@@ -68,6 +73,10 @@ void r300_emit_vertex_buffer(struct r300_context* r300);
void r300_emit_vertex_format_state(struct r300_context* r300);
+void r300_emit_vertex_program_code(struct r300_context* r300,
+ struct r300_vertex_program_code* code,
+ struct r300_constant_buffer* constants);
+
void r300_emit_vertex_shader(struct r300_context* r300,
struct r300_vertex_shader* vs);
diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c
index 4b304306d0f..36463b9a2eb 100644
--- a/src/gallium/drivers/r300/r300_fs.c
+++ b/src/gallium/drivers/r300/r300_fs.c
@@ -23,87 +23,115 @@
#include "r300_fs.h"
-void r300_translate_fragment_shader(struct r300_context* r300,
- struct r300_fragment_shader* fs)
+#include "r300_tgsi_to_rc.h"
+
+#include "radeon_compiler.h"
+
+static void find_output_registers(struct r300_fragment_program_compiler * compiler,
+ struct r300_fragment_shader * fs)
{
- struct tgsi_parse_context parser;
- int i;
- boolean is_r500 = r300_screen(r300->context.screen)->caps->is_r500;
- struct r300_constant_buffer* consts =
- &r300->shader_constants[PIPE_SHADER_FRAGMENT];
+ unsigned i;
- struct r300_fs_asm* assembler = CALLOC_STRUCT(r300_fs_asm);
- if (assembler == NULL) {
- return;
- }
- /* Setup starting offset for immediates. */
- assembler->imm_offset = consts->user_count;
- /* Enable depth writes, if needed. */
- assembler->writes_depth = fs->info.writes_z;
-
- /* Make sure we start at the beginning of the shader. */
- if (is_r500) {
- ((struct r5xx_fragment_shader*)fs)->instruction_count = 0;
- }
+ /* Mark the outputs as not present initially */
+ compiler->OutputColor = fs->info.num_outputs;
+ compiler->OutputDepth = fs->info.num_outputs;
- tgsi_parse_init(&parser, fs->state.tokens);
+ /* Now see where they really are. */
+ for(i = 0; i < fs->info.num_outputs; ++i) {
+ switch(fs->info.output_semantic_name[i]) {
+ case TGSI_SEMANTIC_COLOR:
+ compiler->OutputColor = i;
+ break;
+ case TGSI_SEMANTIC_POSITION:
+ compiler->OutputDepth = i;
+ break;
+ }
+ }
+}
- while (!tgsi_parse_end_of_tokens(&parser)) {
- tgsi_parse_token(&parser);
+static void allocate_hardware_inputs(
+ struct r300_fragment_program_compiler * c,
+ void (*allocate)(void * data, unsigned input, unsigned hwreg),
+ void * mydata)
+{
+ struct tgsi_shader_info* info = &((struct r300_fragment_shader*)c->UserData)->info;
+ int total_colors = 0;
+ int colors = 0;
+ int total_generic = 0;
+ int generic = 0;
+ int i;
- /* This is seriously the lamest way to create fragment programs ever.
- * I blame TGSI. */
- switch (parser.FullToken.Token.Type) {
- case TGSI_TOKEN_TYPE_DECLARATION:
- /* Allocated registers sitting at the beginning
- * of the program. */
- r300_fs_declare(assembler, &parser.FullToken.FullDeclaration);
+ for (i = 0; i < info->num_inputs; i++) {
+ switch (info->input_semantic_name[i]) {
+ case TGSI_SEMANTIC_COLOR:
+ total_colors++;
break;
- case TGSI_TOKEN_TYPE_IMMEDIATE:
- debug_printf("r300: Emitting immediate to constant buffer, "
- "position %d\n",
- assembler->imm_offset + assembler->imm_count);
- /* I am not amused by the length of these. */
- for (i = 0; i < 4; i++) {
- consts->constants[assembler->imm_offset +
- assembler->imm_count][i] =
- parser.FullToken.FullImmediate.u.ImmediateFloat32[i]
- .Float;
- }
- assembler->imm_count++;
+ case TGSI_SEMANTIC_FOG:
+ case TGSI_SEMANTIC_GENERIC:
+ total_generic++;
break;
- case TGSI_TOKEN_TYPE_INSTRUCTION:
- if (is_r500) {
- r5xx_fs_instruction((struct r5xx_fragment_shader*)fs,
- assembler, &parser.FullToken.FullInstruction);
- } else {
- r3xx_fs_instruction((struct r3xx_fragment_shader*)fs,
- assembler, &parser.FullToken.FullInstruction);
- }
+ }
+ }
+
+ for(i = 0; i < info->num_inputs; i++) {
+ switch (info->input_semantic_name[i]) {
+ case TGSI_SEMANTIC_COLOR:
+ allocate(mydata, i, colors);
+ colors++;
+ break;
+ case TGSI_SEMANTIC_FOG:
+ case TGSI_SEMANTIC_GENERIC:
+ allocate(mydata, i, total_colors + generic);
+ generic++;
break;
}
}
+}
+
+void r300_translate_fragment_shader(struct r300_context* r300,
+ struct r300_fragment_shader* fs)
+{
+ struct r300_fragment_program_compiler compiler;
+ struct tgsi_to_rc ttr;
+
+ memset(&compiler, 0, sizeof(compiler));
+ rc_init(&compiler.Base);
+ compiler.Base.Debug = 1;
+
+ compiler.code = &fs->code;
+ compiler.is_r500 = r300_screen(r300->context.screen)->caps->is_r500;
+ compiler.AllocateHwInputs = &allocate_hardware_inputs;
+ compiler.UserData = fs;
+
+ /* TODO: Program compilation depends on texture compare modes,
+ * which are sampler state. Therefore, programs need to be recompiled
+ * depending on this state as in the classic Mesa driver.
+ *
+ * This is not yet handled correctly.
+ */
- debug_printf("r300: fs: %d texs and %d colors, first free reg is %d\n",
- assembler->tex_count, assembler->color_count,
- assembler->tex_count + assembler->color_count);
-
- consts->count = consts->user_count + assembler->imm_count;
- fs->uses_imms = assembler->imm_count;
- debug_printf("r300: fs: %d total constants, "
- "%d from user and %d from immediates\n", consts->count,
- consts->user_count, assembler->imm_count);
- r3xx_fs_finalize(fs, assembler);
- if (is_r500) {
- r5xx_fs_finalize((struct r5xx_fragment_shader*)fs, assembler);
+ find_output_registers(&compiler, fs);
+
+ if (compiler.Base.Debug) {
+ debug_printf("r300: Initial fragment program\n");
+ tgsi_dump(fs->state.tokens, 0);
}
- tgsi_dump(fs->state.tokens, 0);
- /* XXX finish r300 dumper too */
- if (is_r500) {
- r5xx_fs_dump((struct r5xx_fragment_shader*)fs);
+ /* Translate TGSI to our internal representation */
+ ttr.compiler = &compiler.Base;
+ ttr.info = &fs->info;
+
+ r300_tgsi_to_rc(&ttr, fs->state.tokens);
+
+ /* Invoke the compiler */
+ r3xx_compile_fragment_program(&compiler);
+ if (compiler.Base.Error) {
+ /* Todo: Fail gracefully */
+ fprintf(stderr, "r300 FP: Compiler error\n");
+ abort();
}
- tgsi_parse_free(&parser);
- FREE(assembler);
+ /* And, finally... */
+ rc_destroy(&compiler.Base);
+ fs->translated = TRUE;
}
diff --git a/src/gallium/drivers/r300/r300_fs.h b/src/gallium/drivers/r300/r300_fs.h
index 18deb7a05e4..9fab7894024 100644
--- a/src/gallium/drivers/r300/r300_fs.h
+++ b/src/gallium/drivers/r300/r300_fs.h
@@ -30,6 +30,21 @@
#include "r3xx_fs.h"
#include "r5xx_fs.h"
+#include "radeon_code.h"
+
+struct r300_fragment_shader {
+ /* Parent class */
+ struct pipe_shader_state state;
+ struct tgsi_shader_info info;
+
+ /* Has this shader been translated yet? */
+ boolean translated;
+
+ /* Compiled code */
+ struct rX00_fragment_program_code code;
+};
+
+
void r300_translate_fragment_shader(struct r300_context* r300,
struct r300_fragment_shader* fs);
diff --git a/src/gallium/drivers/r300/r300_fs_inlines.h b/src/gallium/drivers/r300/r300_fs_inlines.h
deleted file mode 100644
index be4be9465e6..00000000000
--- a/src/gallium/drivers/r300/r300_fs_inlines.h
+++ /dev/null
@@ -1,158 +0,0 @@
-/*
- * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
- * Joakim Sindholt <opensource@zhasha.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE. */
-
-#ifndef R300_FS_INLINES_H
-#define R300_FS_INLINES_H
-
-#include "tgsi/tgsi_parse.h"
-
-#include "r300_context.h"
-#include "r300_debug.h"
-#include "r300_reg.h"
-#include "r300_screen.h"
-#include "r300_shader_inlines.h"
-
-/* Temporary struct used to hold assembly state while putting together
- * fragment programs. */
-struct r300_fs_asm {
- /* Pipe context. */
- struct r300_context* r300;
- /* Number of colors. */
- unsigned color_count;
- /* Number of texcoords. */
- unsigned tex_count;
- /* Offset for temporary registers. Inputs and temporaries have no
- * distinguishing markings, so inputs start at 0 and the first usable
- * temporary register is after all inputs. */
- unsigned temp_offset;
- /* Number of requested temporary registers. */
- unsigned temp_count;
- /* Offset for immediate constants. Neither R300 nor R500 can do four
- * inline constants per source, so instead we copy immediates into the
- * constant buffer. */
- unsigned imm_offset;
- /* Number of immediate constants. */
- unsigned imm_count;
- /* Are depth writes enabled? */
- boolean writes_depth;
- /* Depth write offset. This is the TGSI output that corresponds to
- * depth writes. */
- unsigned depth_output;
-};
-
-static INLINE void r300_fs_declare(struct r300_fs_asm* assembler,
- struct tgsi_full_declaration* decl)
-{
- switch (decl->Declaration.File) {
- case TGSI_FILE_INPUT:
- switch (decl->Semantic.SemanticName) {
- case TGSI_SEMANTIC_COLOR:
- assembler->color_count++;
- break;
- case TGSI_SEMANTIC_FOG:
- case TGSI_SEMANTIC_GENERIC:
- assembler->tex_count++;
- break;
- default:
- debug_printf("r300: fs: Bad semantic declaration %d\n",
- decl->Semantic.SemanticName);
- break;
- }
- break;
- case TGSI_FILE_OUTPUT:
- /* Depth write. Mark the position of the output so we can
- * identify it later. */
- if (decl->Semantic.SemanticName == TGSI_SEMANTIC_POSITION) {
- assembler->depth_output = decl->DeclarationRange.First;
- }
- break;
- case TGSI_FILE_CONSTANT:
- break;
- case TGSI_FILE_TEMPORARY:
- assembler->temp_count++;
- break;
- default:
- debug_printf("r300: fs: Bad file %d\n", decl->Declaration.File);
- break;
- }
-
- assembler->temp_offset = assembler->color_count + assembler->tex_count;
-}
-
-static INLINE unsigned r300_fs_src(struct r300_fs_asm* assembler,
- struct tgsi_src_register* src)
-{
- switch (src->File) {
- case TGSI_FILE_NULL:
- return 0;
- case TGSI_FILE_INPUT:
- /* XXX may be wrong */
- return src->Index;
- break;
- case TGSI_FILE_TEMPORARY:
- return src->Index + assembler->temp_offset;
- break;
- case TGSI_FILE_IMMEDIATE:
- return (src->Index + assembler->imm_offset) | (1 << 8);
- break;
- case TGSI_FILE_CONSTANT:
- /* XXX magic */
- return src->Index | (1 << 8);
- break;
- default:
- debug_printf("r300: fs: Unimplemented src %d\n", src->File);
- break;
- }
- return 0;
-}
-
-static INLINE unsigned r300_fs_dst(struct r300_fs_asm* assembler,
- struct tgsi_dst_register* dst)
-{
- switch (dst->File) {
- case TGSI_FILE_NULL:
- /* This happens during KIL instructions. */
- return 0;
- break;
- case TGSI_FILE_OUTPUT:
- return 0;
- break;
- case TGSI_FILE_TEMPORARY:
- return dst->Index + assembler->temp_offset;
- break;
- default:
- debug_printf("r300: fs: Unimplemented dst %d\n", dst->File);
- break;
- }
- return 0;
-}
-
-static INLINE boolean r300_fs_is_depr(struct r300_fs_asm* assembler,
- struct tgsi_dst_register* dst)
-{
- return (assembler->writes_depth &&
- (dst->File == TGSI_FILE_OUTPUT) &&
- (dst->Index == assembler->depth_output));
-}
-
-#endif /* R300_FS_INLINES_H */
diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
index da1d5ffe2fd..96a73046217 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -147,6 +147,8 @@ static int r300_get_param(struct pipe_screen* pscreen, int param)
case PIPE_CAP_TGSI_CONT_SUPPORTED:
/* XXX */
return 0;
+ case PIPE_CAP_BLEND_EQUATION_SEPARATE:
+ return 1;
default:
debug_printf("r300: Implementation error: Bad param %d\n",
param);
@@ -320,13 +322,14 @@ r300_get_tex_transfer(struct pipe_screen *screen,
trans = CALLOC_STRUCT(r300_transfer);
if (trans) {
pipe_texture_reference(&trans->transfer.texture, texture);
- trans->transfer.format = trans->transfer.format;
+ trans->transfer.format = texture->format;
trans->transfer.width = w;
trans->transfer.height = h;
trans->transfer.block = texture->block;
trans->transfer.nblocksx = texture->nblocksx[level];
trans->transfer.nblocksy = texture->nblocksy[level];
- trans->transfer.stride = tex->stride;
+ trans->transfer.stride = align(pf_get_stride(&trans->transfer.block,
+ texture->width[level]), 32);
trans->transfer.usage = usage;
trans->offset = offset;
}
diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index 68da0aa4cbb..a02fb34b2a7 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -32,6 +32,7 @@
#include "r300_reg.h"
#include "r300_state_inlines.h"
#include "r300_fs.h"
+#include "r300_vs.h"
/* r300_state: Functions used to intialize state context by translating
* Gallium state objects into semi-native r300 state objects. */
@@ -137,7 +138,6 @@ static void
const struct pipe_constant_buffer* buffer)
{
struct r300_context* r300 = r300_context(pipe);
- int i = r300->shader_constants[shader].user_count;
/* This entire chunk of code seems ever-so-slightly baked.
* It's as if I've got pipe_buffer* matryoshkas... */
@@ -148,26 +148,13 @@ static void
map, buffer->buffer->size);
pipe->winsys->buffer_unmap(pipe->winsys, buffer->buffer);
- r300->shader_constants[shader].user_count =
+ r300->shader_constants[shader].count =
buffer->buffer->size / (sizeof(float) * 4);
} else {
- r300->shader_constants[shader].user_count = 0;
+ r300->shader_constants[shader].count = 0;
}
r300->dirty_state |= R300_NEW_CONSTANTS;
-
- /* If the number of constants have changed, invalidate the shader. */
- if (r300->shader_constants[shader].user_count != i) {
- if (shader == PIPE_SHADER_FRAGMENT && r300->fs &&
- r300->fs->uses_imms) {
- r300->fs->translated = FALSE;
- r300_translate_fragment_shader(r300, r300->fs);
- } else if (shader == PIPE_SHADER_VERTEX && r300->vs &&
- r300->vs->uses_imms) {
- r300->vs->translated = FALSE;
- r300_translate_vertex_shader(r300, r300->vs);
- }
- }
}
/* Create a new depth, stencil, and alpha state based on the CSO dsa state.
@@ -284,14 +271,9 @@ static void
static void* r300_create_fs_state(struct pipe_context* pipe,
const struct pipe_shader_state* shader)
{
- struct r300_context* r300 = r300_context(pipe);
struct r300_fragment_shader* fs = NULL;
- if (r300_screen(r300->context.screen)->caps->is_r500) {
- fs = (struct r300_fragment_shader*)CALLOC_STRUCT(r5xx_fragment_shader);
- } else {
- fs = (struct r300_fragment_shader*)CALLOC_STRUCT(r3xx_fragment_shader);
- }
+ fs = (struct r300_fragment_shader*)CALLOC_STRUCT(r300_fragment_shader);
/* Copy state directly into shader. */
fs->state = *shader;
@@ -315,7 +297,6 @@ static void r300_bind_fs_state(struct pipe_context* pipe, void* shader)
r300_translate_fragment_shader(r300, fs);
}
- fs->translated = TRUE;
r300->fs = fs;
r300->dirty_state |= R300_NEW_FRAGMENT_SHADER;
@@ -325,6 +306,7 @@ static void r300_bind_fs_state(struct pipe_context* pipe, void* shader)
static void r300_delete_fs_state(struct pipe_context* pipe, void* shader)
{
struct r300_fragment_shader* fs = (struct r300_fragment_shader*)shader;
+ rc_constants_destroy(&fs->code.constants);
FREE(fs->state.tokens);
FREE(shader);
}
@@ -688,6 +670,7 @@ static void r300_delete_vs_state(struct pipe_context* pipe, void* shader)
if (r300_screen(pipe->screen)->caps->has_tcl) {
struct r300_vertex_shader* vs = (struct r300_vertex_shader*)shader;
+ rc_constants_destroy(&vs->code.constants);
draw_delete_vertex_shader(r300->draw, vs->draw);
FREE(vs->state.tokens);
FREE(shader);
diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c
index 2477b30822b..ea670f41fb5 100644
--- a/src/gallium/drivers/r300/r300_state_derived.c
+++ b/src/gallium/drivers/r300/r300_state_derived.c
@@ -22,6 +22,9 @@
#include "r300_state_derived.h"
+#include "r300_fs.h"
+#include "r300_vs.h"
+
/* r300_state_derived: Various bits of state which are dependent upon
* currently bound CSO data. */
diff --git a/src/gallium/drivers/r300/r300_state_invariant.c b/src/gallium/drivers/r300/r300_state_invariant.c
index 430129d5bd2..1e92374a4e9 100644
--- a/src/gallium/drivers/r300/r300_state_invariant.c
+++ b/src/gallium/drivers/r300/r300_state_invariant.c
@@ -34,7 +34,7 @@ void r300_emit_invariant_state(struct r300_context* r300)
struct r300_capabilities* caps = r300_screen(r300->context.screen)->caps;
CS_LOCALS(r300);
- BEGIN_CS(22 + (caps->has_tcl ? 2: 0));
+ BEGIN_CS(24 + (caps->has_tcl ? 2: 0));
/*** Graphics Backend (GB) ***/
/* Various GB enables */
@@ -56,6 +56,7 @@ void r300_emit_invariant_state(struct r300_context* r300)
OUT_CS_REG(R300_FG_FOG_COLOR_G, 0x0);
OUT_CS_REG(R300_FG_FOG_COLOR_B, 0x0);
OUT_CS_REG(R300_FG_DEPTH_SRC, 0x0);
+ OUT_CS_REG(R300_US_W_FMT, 0x0);
/*** VAP ***/
/* Max and min vertex index clamp. */
@@ -72,7 +73,7 @@ void r300_emit_invariant_state(struct r300_context* r300)
END_CS;
/* XXX unsorted stuff from surface_fill */
- BEGIN_CS(71 + (caps->has_tcl ? 5 : 0) + (caps->is_r500 ? 4 : 0));
+ BEGIN_CS(64 + (caps->has_tcl ? 5 : 0) + (caps->is_r500 ? 4 : 0));
/* Flush PVS. */
OUT_CS_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0x0);
@@ -132,11 +133,5 @@ void r300_emit_invariant_state(struct r300_context* r300)
/* XXX */
OUT_CS_REG(R300_SC_CLIP_RULE, 0xaaaa);
- OUT_CS_REG_SEQ(R300_US_OUT_FMT_0, 4);
- OUT_CS(R300_C0_SEL_B | R300_C1_SEL_G | R300_C2_SEL_R | R300_C3_SEL_A);
- OUT_CS(R300_US_OUT_FMT_UNUSED);
- OUT_CS(R300_US_OUT_FMT_UNUSED);
- OUT_CS(R300_US_OUT_FMT_UNUSED);
- OUT_CS_REG(R300_US_W_FMT, R300_W_FMT_W0);
END_CS;
}
diff --git a/src/gallium/drivers/r300/r300_surface.c b/src/gallium/drivers/r300/r300_surface.c
index fdabe4d9cfe..a093f839454 100644
--- a/src/gallium/drivers/r300/r300_surface.c
+++ b/src/gallium/drivers/r300/r300_surface.c
@@ -37,7 +37,7 @@ static void r300_surface_setup(struct r300_context* r300,
r300_emit_dsa_state(r300, &dsa_clear_state);
r300_emit_rs_state(r300, &rs_clear_state);
- BEGIN_CS(24);
+ BEGIN_CS(26);
/* Viewport setup */
OUT_CS_REG_SEQ(R300_SE_VPORT_XSCALE, 6);
@@ -78,8 +78,10 @@ static void r300_surface_setup(struct r300_context* r300,
/* Setup colorbuffer. */
OUT_CS_REG_SEQ(R300_RB3D_COLOROFFSET0, 1);
OUT_CS_RELOC(dest->buffer, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
- OUT_CS_REG(R300_RB3D_COLORPITCH0, pixpitch |
- r300_translate_colorformat(dest->tex.format));
+ OUT_CS_REG_SEQ(R300_RB3D_COLORPITCH0, 1);
+ OUT_CS_RELOC(dest->buffer, pixpitch |
+ r300_translate_colorformat(dest->tex.format), 0,
+ RADEON_GEM_DOMAIN_VRAM, 0);
OUT_CS_REG(RB3D_COLOR_CHANNEL_MASK, 0xf);
END_CS;
@@ -125,9 +127,10 @@ validate:
r300->context.flush(&r300->context, 0, NULL);
goto validate;
}
- if (r300->winsys->validate(r300->winsys)) {
+ if (!r300->winsys->validate(r300->winsys)) {
r300->context.flush(&r300->context, 0, NULL);
if (invalid) {
+ debug_printf("r300: Stuck in validation loop, gonna fallback.");
goto fallback;
}
invalid = TRUE;
@@ -138,10 +141,14 @@ validate:
/* Vertex shader setup */
if (caps->has_tcl) {
- r300_emit_vertex_shader(r300, &r300_passthrough_vertex_shader);
+ r300_emit_vertex_program_code(r300, &r300_passthrough_vertex_shader, 0);
} else {
BEGIN_CS(4);
- OUT_CS_REG(R300_VAP_CNTL_STATUS, R300_VAP_TCL_BYPASS);
+ OUT_CS_REG(R300_VAP_CNTL_STATUS,
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ R300_VC_32BIT_SWAP |
+#endif
+ R300_VAP_TCL_BYPASS);
OUT_CS_REG(R300_VAP_CNTL, R300_PVS_NUM_SLOTS(5) |
R300_PVS_NUM_CNTLRS(5) |
R300_PVS_NUM_FPUS(caps->num_vert_fpus) |
@@ -151,10 +158,10 @@ validate:
/* Fragment shader setup */
if (caps->is_r500) {
- r500_emit_fragment_shader(r300, &r5xx_passthrough_fragment_shader);
+ r500_emit_fragment_program_code(r300, &r5xx_passthrough_fragment_shader, 0);
r300_emit_rs_block_state(r300, &r5xx_rs_block_clear_state);
} else {
- r300_emit_fragment_shader(r300, &r3xx_passthrough_fragment_shader);
+ r300_emit_fragment_program_code(r300, &r3xx_passthrough_fragment_shader, 0);
r300_emit_rs_block_state(r300, &r3xx_rs_block_clear_state);
}
@@ -256,9 +263,10 @@ validate:
r300->context.flush(&r300->context, 0, NULL);
goto validate;
}
- if (r300->winsys->validate(r300->winsys)) {
+ if (!r300->winsys->validate(r300->winsys)) {
r300->context.flush(&r300->context, 0, NULL);
if (invalid) {
+ debug_printf("r300: Stuck in validation loop, gonna fallback.");
goto fallback;
}
invalid = TRUE;
@@ -275,10 +283,14 @@ validate:
/* Vertex shader setup */
if (caps->has_tcl) {
- r300_emit_vertex_shader(r300, &r300_passthrough_vertex_shader);
+ r300_emit_vertex_program_code(r300, &r300_passthrough_vertex_shader, 0);
} else {
BEGIN_CS(4);
- OUT_CS_REG(R300_VAP_CNTL_STATUS, R300_VAP_TCL_BYPASS);
+ OUT_CS_REG(R300_VAP_CNTL_STATUS,
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ R300_VC_32BIT_SWAP |
+#endif
+ R300_VAP_TCL_BYPASS);
OUT_CS_REG(R300_VAP_CNTL, R300_PVS_NUM_SLOTS(5) |
R300_PVS_NUM_CNTLRS(5) |
R300_PVS_NUM_FPUS(caps->num_vert_fpus) |
@@ -288,10 +300,10 @@ validate:
/* Fragment shader setup */
if (caps->is_r500) {
- r500_emit_fragment_shader(r300, &r5xx_texture_fragment_shader);
+ r500_emit_fragment_program_code(r300, &r5xx_texture_fragment_shader, 0);
r300_emit_rs_block_state(r300, &r5xx_rs_block_copy_state);
} else {
- r300_emit_fragment_shader(r300, &r3xx_texture_fragment_shader);
+ r300_emit_fragment_program_code(r300, &r3xx_texture_fragment_shader, 0);
r300_emit_rs_block_state(r300, &r3xx_rs_block_copy_state);
}
diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c
index 11c7858d422..590052509cc 100644
--- a/src/gallium/drivers/r300/r300_texture.c
+++ b/src/gallium/drivers/r300/r300_texture.c
@@ -22,13 +22,6 @@
#include "r300_texture.h"
-/* XXX maths need to go to util */
-
-static int minify(int i)
-{
- return MAX2(1, i >> 1);
-}
-
static void r300_setup_texture_state(struct r300_texture* tex,
unsigned width,
unsigned height,
@@ -55,6 +48,9 @@ static void r300_setup_texture_state(struct r300_texture* tex,
if (height > 2048) {
state->format2 |= R500_TXHEIGHT_BIT11;
}
+
+ debug_printf("r300: Set texture state (%dx%d, pitch %d, %d levels)\n",
+ width, height, pitch, levels);
}
static void r300_setup_miptree(struct r300_texture* tex)
@@ -71,19 +67,23 @@ static void r300_setup_miptree(struct r300_texture* tex)
}
base->nblocksx[i] = pf_get_nblocksx(&base->block, base->width[i]);
- base->nblocksy[i] = pf_get_nblocksy(&base->block, base->width[i]);
+ base->nblocksy[i] = pf_get_nblocksy(&base->block, base->height[i]);
/* Radeons enjoy things in multiples of 64.
*
* XXX
* POT, uncompressed, unmippmapped textures can be aligned to 32,
* instead of 64. */
- stride = align(base->nblocksx[i] * base->block.size, 64);
+ stride = align(pf_get_stride(&base->block, base->width[i]), 32);
size = stride * base->nblocksy[i] * base->depth[i];
- tex->offset[i] = align(tex->size, 64);
+ tex->offset[i] = align(tex->size, 32);
tex->size = tex->offset[i] + size;
+ debug_printf("r300: Texture miptree: Level %d "
+ "(%dx%dx%d px, pitch %d bytes)\n",
+ i, base->width[i], base->height[i], base->depth[i],
+ stride);
/* Save stride of first level to the texture. */
if (i == 0) {
tex->stride = stride;
@@ -111,7 +111,7 @@ static struct pipe_texture*
r300_setup_texture_state(tex, template->width[0], template->height[0],
template->width[0], template->last_level);
- tex->buffer = screen->buffer_create(screen, 64,
+ tex->buffer = screen->buffer_create(screen, 1024,
PIPE_BUFFER_USAGE_PIXEL,
tex->size);
diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
new file mode 100644
index 00000000000..3adbb715f37
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
@@ -0,0 +1,337 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "r300_tgsi_to_rc.h"
+
+#include "radeon_compiler.h"
+#include "radeon_program.h"
+
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_scan.h"
+#include "tgsi/tgsi_util.h"
+
+
+static unsigned translate_opcode(unsigned opcode)
+{
+ switch(opcode) {
+ case TGSI_OPCODE_ARL: return OPCODE_ARL;
+ case TGSI_OPCODE_MOV: return OPCODE_MOV;
+ case TGSI_OPCODE_LIT: return OPCODE_LIT;
+ case TGSI_OPCODE_RCP: return OPCODE_RCP;
+ case TGSI_OPCODE_RSQ: return OPCODE_RSQ;
+ case TGSI_OPCODE_EXP: return OPCODE_EXP;
+ case TGSI_OPCODE_LOG: return OPCODE_LOG;
+ case TGSI_OPCODE_MUL: return OPCODE_MUL;
+ case TGSI_OPCODE_ADD: return OPCODE_ADD;
+ case TGSI_OPCODE_DP3: return OPCODE_DP3;
+ case TGSI_OPCODE_DP4: return OPCODE_DP4;
+ case TGSI_OPCODE_DST: return OPCODE_DST;
+ case TGSI_OPCODE_MIN: return OPCODE_MIN;
+ case TGSI_OPCODE_MAX: return OPCODE_MAX;
+ case TGSI_OPCODE_SLT: return OPCODE_SLT;
+ case TGSI_OPCODE_SGE: return OPCODE_SGE;
+ case TGSI_OPCODE_MAD: return OPCODE_MAD;
+ case TGSI_OPCODE_SUB: return OPCODE_SUB;
+ case TGSI_OPCODE_LRP: return OPCODE_LRP;
+ /* case TGSI_OPCODE_CND: return OPCODE_CND; */
+ /* case TGSI_OPCODE_CND0: return OPCODE_CND0; */
+ case TGSI_OPCODE_DP2A: return OPCODE_DP2A;
+ /* gap */
+ case TGSI_OPCODE_FRC: return OPCODE_FRC;
+ /* case TGSI_OPCODE_CLAMP: return OPCODE_CLAMP; */
+ case TGSI_OPCODE_FLR: return OPCODE_FLR;
+ /* case TGSI_OPCODE_ROUND: return OPCODE_ROUND; */
+ case TGSI_OPCODE_EX2: return OPCODE_EX2;
+ case TGSI_OPCODE_LG2: return OPCODE_LG2;
+ case TGSI_OPCODE_POW: return OPCODE_POW;
+ case TGSI_OPCODE_XPD: return OPCODE_XPD;
+ /* gap */
+ case TGSI_OPCODE_ABS: return OPCODE_ABS;
+ case TGSI_OPCODE_RCC: return OPCODE_RCC;
+ case TGSI_OPCODE_DPH: return OPCODE_DPH;
+ case TGSI_OPCODE_COS: return OPCODE_COS;
+ case TGSI_OPCODE_DDX: return OPCODE_DDX;
+ case TGSI_OPCODE_DDY: return OPCODE_DDY;
+ /* case TGSI_OPCODE_KILP: return OPCODE_KILP; */
+ case TGSI_OPCODE_PK2H: return OPCODE_PK2H;
+ case TGSI_OPCODE_PK2US: return OPCODE_PK2US;
+ case TGSI_OPCODE_PK4B: return OPCODE_PK4B;
+ case TGSI_OPCODE_PK4UB: return OPCODE_PK4UB;
+ case TGSI_OPCODE_RFL: return OPCODE_RFL;
+ case TGSI_OPCODE_SEQ: return OPCODE_SEQ;
+ case TGSI_OPCODE_SFL: return OPCODE_SFL;
+ case TGSI_OPCODE_SGT: return OPCODE_SGT;
+ case TGSI_OPCODE_SIN: return OPCODE_SIN;
+ case TGSI_OPCODE_SLE: return OPCODE_SLE;
+ case TGSI_OPCODE_SNE: return OPCODE_SNE;
+ case TGSI_OPCODE_STR: return OPCODE_STR;
+ case TGSI_OPCODE_TEX: return OPCODE_TEX;
+ case TGSI_OPCODE_TXD: return OPCODE_TXD;
+ case TGSI_OPCODE_TXP: return OPCODE_TXP;
+ case TGSI_OPCODE_UP2H: return OPCODE_UP2H;
+ case TGSI_OPCODE_UP2US: return OPCODE_UP2US;
+ case TGSI_OPCODE_UP4B: return OPCODE_UP4B;
+ case TGSI_OPCODE_UP4UB: return OPCODE_UP4UB;
+ case TGSI_OPCODE_X2D: return OPCODE_X2D;
+ case TGSI_OPCODE_ARA: return OPCODE_ARA;
+ case TGSI_OPCODE_ARR: return OPCODE_ARR;
+ case TGSI_OPCODE_BRA: return OPCODE_BRA;
+ case TGSI_OPCODE_CAL: return OPCODE_CAL;
+ case TGSI_OPCODE_RET: return OPCODE_RET;
+ case TGSI_OPCODE_SSG: return OPCODE_SSG;
+ case TGSI_OPCODE_CMP: return OPCODE_CMP;
+ case TGSI_OPCODE_SCS: return OPCODE_SCS;
+ case TGSI_OPCODE_TXB: return OPCODE_TXB;
+ /* case TGSI_OPCODE_NRM: return OPCODE_NRM; */
+ /* case TGSI_OPCODE_DIV: return OPCODE_DIV; */
+ case TGSI_OPCODE_DP2: return OPCODE_DP2;
+ case TGSI_OPCODE_TXL: return OPCODE_TXL;
+ case TGSI_OPCODE_BRK: return OPCODE_BRK;
+ case TGSI_OPCODE_IF: return OPCODE_IF;
+ /* case TGSI_OPCODE_LOOP: return OPCODE_LOOP; */
+ /* case TGSI_OPCODE_REP: return OPCODE_REP; */
+ case TGSI_OPCODE_ELSE: return OPCODE_ELSE;
+ case TGSI_OPCODE_ENDIF: return OPCODE_ENDIF;
+ case TGSI_OPCODE_ENDLOOP: return OPCODE_ENDLOOP;
+ /* case TGSI_OPCODE_ENDREP: return OPCODE_ENDREP; */
+ case TGSI_OPCODE_PUSHA: return OPCODE_PUSHA;
+ case TGSI_OPCODE_POPA: return OPCODE_POPA;
+ /* case TGSI_OPCODE_CEIL: return OPCODE_CEIL; */
+ /* case TGSI_OPCODE_I2F: return OPCODE_I2F; */
+ case TGSI_OPCODE_NOT: return OPCODE_NOT;
+ case TGSI_OPCODE_TRUNC: return OPCODE_TRUNC;
+ /* case TGSI_OPCODE_SHL: return OPCODE_SHL; */
+ /* case TGSI_OPCODE_SHR: return OPCODE_SHR; */
+ case TGSI_OPCODE_AND: return OPCODE_AND;
+ case TGSI_OPCODE_OR: return OPCODE_OR;
+ /* case TGSI_OPCODE_MOD: return OPCODE_MOD; */
+ case TGSI_OPCODE_XOR: return OPCODE_XOR;
+ /* case TGSI_OPCODE_SAD: return OPCODE_SAD; */
+ /* case TGSI_OPCODE_TXF: return OPCODE_TXF; */
+ /* case TGSI_OPCODE_TXQ: return OPCODE_TXQ; */
+ case TGSI_OPCODE_CONT: return OPCODE_CONT;
+ /* case TGSI_OPCODE_EMIT: return OPCODE_EMIT; */
+ /* case TGSI_OPCODE_ENDPRIM: return OPCODE_ENDPRIM; */
+ /* case TGSI_OPCODE_BGNLOOP2: return OPCODE_BGNLOOP2; */
+ case TGSI_OPCODE_BGNSUB: return OPCODE_BGNSUB;
+ /* case TGSI_OPCODE_ENDLOOP2: return OPCODE_ENDLOOP2; */
+ case TGSI_OPCODE_ENDSUB: return OPCODE_ENDSUB;
+ case TGSI_OPCODE_NOISE1: return OPCODE_NOISE1;
+ case TGSI_OPCODE_NOISE2: return OPCODE_NOISE2;
+ case TGSI_OPCODE_NOISE3: return OPCODE_NOISE3;
+ case TGSI_OPCODE_NOISE4: return OPCODE_NOISE4;
+ case TGSI_OPCODE_NOP: return OPCODE_NOP;
+ /* gap */
+ case TGSI_OPCODE_NRM4: return OPCODE_NRM4;
+ /* case TGSI_OPCODE_CALLNZ: return OPCODE_CALLNZ; */
+ /* case TGSI_OPCODE_IFC: return OPCODE_IFC; */
+ /* case TGSI_OPCODE_BREAKC: return OPCODE_BREAKC; */
+ case TGSI_OPCODE_KIL: return OPCODE_KIL;
+ case TGSI_OPCODE_END: return OPCODE_END;
+ case TGSI_OPCODE_SWZ: return OPCODE_SWZ;
+ }
+
+ fprintf(stderr, "Unknown opcode: %i\n", opcode);
+ abort();
+}
+
+static unsigned translate_saturate(unsigned saturate)
+{
+ switch(saturate) {
+ case TGSI_SAT_NONE: return SATURATE_OFF;
+ case TGSI_SAT_ZERO_ONE: return SATURATE_ZERO_ONE;
+ case TGSI_SAT_MINUS_PLUS_ONE: return SATURATE_PLUS_MINUS_ONE;
+ }
+
+ fprintf(stderr, "Unknown saturate mode: %i\n", saturate);
+ abort();
+}
+
+static unsigned translate_register_file(unsigned file)
+{
+ switch(file) {
+ case TGSI_FILE_CONSTANT: return PROGRAM_CONSTANT;
+ case TGSI_FILE_IMMEDIATE: return PROGRAM_CONSTANT;
+ case TGSI_FILE_INPUT: return PROGRAM_INPUT;
+ case TGSI_FILE_OUTPUT: return PROGRAM_OUTPUT;
+ case TGSI_FILE_TEMPORARY: return PROGRAM_TEMPORARY;
+ case TGSI_FILE_ADDRESS: return PROGRAM_ADDRESS;
+ }
+
+ fprintf(stderr, "Unhandled register file: %i\n", file);
+ abort();
+}
+
+static int translate_register_index(
+ struct tgsi_to_rc * ttr,
+ unsigned file,
+ int index)
+{
+ if (file == TGSI_FILE_IMMEDIATE)
+ return ttr->immediate_offset + index;
+
+ return index;
+}
+
+static void transform_dstreg(
+ struct tgsi_to_rc * ttr,
+ struct prog_dst_register * dst,
+ struct tgsi_full_dst_register * src)
+{
+ dst->File = translate_register_file(src->DstRegister.File);
+ dst->Index = translate_register_index(ttr, src->DstRegister.File, src->DstRegister.Index);
+ dst->WriteMask = src->DstRegister.WriteMask;
+ dst->RelAddr = src->DstRegister.Indirect;
+}
+
+static void transform_srcreg(
+ struct tgsi_to_rc * ttr,
+ struct prog_src_register * dst,
+ struct tgsi_full_src_register * src)
+{
+ dst->File = translate_register_file(src->SrcRegister.File);
+ dst->Index = translate_register_index(ttr, src->SrcRegister.File, src->SrcRegister.Index);
+ dst->RelAddr = src->SrcRegister.Indirect;
+ dst->Swizzle = tgsi_util_get_full_src_register_extswizzle(src, 0);
+ dst->Swizzle |= tgsi_util_get_full_src_register_extswizzle(src, 1) << 3;
+ dst->Swizzle |= tgsi_util_get_full_src_register_extswizzle(src, 2) << 6;
+ dst->Swizzle |= tgsi_util_get_full_src_register_extswizzle(src, 3) << 9;
+ dst->Abs = src->SrcRegisterExtMod.Absolute;
+ dst->Negate =
+ src->SrcRegisterExtSwz.NegateX |
+ (src->SrcRegisterExtSwz.NegateY << 1) |
+ (src->SrcRegisterExtSwz.NegateZ << 2) |
+ (src->SrcRegisterExtSwz.NegateW << 3);
+ dst->Negate ^= src->SrcRegister.Negate ? NEGATE_XYZW : 0;
+}
+
+static void transform_texture(struct rc_instruction * dst, struct tgsi_instruction_ext_texture src)
+{
+ switch(src.Texture) {
+ case TGSI_TEXTURE_1D:
+ dst->I.TexSrcTarget = TEXTURE_1D_INDEX;
+ break;
+ case TGSI_TEXTURE_2D:
+ dst->I.TexSrcTarget = TEXTURE_2D_INDEX;
+ break;
+ case TGSI_TEXTURE_3D:
+ dst->I.TexSrcTarget = TEXTURE_3D_INDEX;
+ break;
+ case TGSI_TEXTURE_CUBE:
+ dst->I.TexSrcTarget = TEXTURE_CUBE_INDEX;
+ break;
+ case TGSI_TEXTURE_RECT:
+ dst->I.TexSrcTarget = TEXTURE_RECT_INDEX;
+ break;
+ case TGSI_TEXTURE_SHADOW1D:
+ dst->I.TexSrcTarget = TEXTURE_1D_INDEX;
+ dst->I.TexShadow = 1;
+ break;
+ case TGSI_TEXTURE_SHADOW2D:
+ dst->I.TexSrcTarget = TEXTURE_2D_INDEX;
+ dst->I.TexShadow = 1;
+ break;
+ case TGSI_TEXTURE_SHADOWRECT:
+ dst->I.TexSrcTarget = TEXTURE_RECT_INDEX;
+ dst->I.TexShadow = 1;
+ break;
+ }
+}
+
+static void transform_instruction(struct tgsi_to_rc * ttr, struct tgsi_full_instruction * src)
+{
+ if (src->Instruction.Opcode == TGSI_OPCODE_END)
+ return;
+
+ struct rc_instruction * dst = rc_insert_new_instruction(ttr->compiler, ttr->compiler->Program.Instructions.Prev);
+ int i;
+
+ dst->I.Opcode = translate_opcode(src->Instruction.Opcode);
+ dst->I.SaturateMode = translate_saturate(src->Instruction.Saturate);
+
+ if (src->Instruction.NumDstRegs)
+ transform_dstreg(ttr, &dst->I.DstReg, &src->FullDstRegisters[0]);
+
+ for(i = 0; i < src->Instruction.NumSrcRegs; ++i) {
+ if (src->FullSrcRegisters[i].SrcRegister.File == TGSI_FILE_SAMPLER)
+ dst->I.TexSrcUnit = src->FullSrcRegisters[i].SrcRegister.Index;
+ else
+ transform_srcreg(ttr, &dst->I.SrcReg[i], &src->FullSrcRegisters[i]);
+ }
+
+ /* Texturing. */
+ transform_texture(dst, src->InstructionExtTexture);
+}
+
+static void handle_immediate(struct tgsi_to_rc * ttr, struct tgsi_full_immediate * imm)
+{
+ struct rc_constant constant;
+ int i;
+
+ constant.Type = RC_CONSTANT_IMMEDIATE;
+ constant.Size = 4;
+ for(i = 0; i < 4; ++i)
+ constant.u.Immediate[i] = imm->u[i].Float;
+ rc_constants_add(&ttr->compiler->Program.Constants, &constant);
+}
+
+void r300_tgsi_to_rc(struct tgsi_to_rc * ttr, const struct tgsi_token * tokens)
+{
+ struct tgsi_parse_context parser;
+ int i;
+
+ /* Allocate constants placeholders.
+ *
+ * Note: What if declared constants are not contiguous? */
+ for(i = 0; i <= ttr->info->file_max[TGSI_FILE_CONSTANT]; ++i) {
+ struct rc_constant constant;
+ memset(&constant, 0, sizeof(constant));
+ constant.Type = RC_CONSTANT_EXTERNAL;
+ constant.Size = 4;
+ constant.u.External = i;
+ rc_constants_add(&ttr->compiler->Program.Constants, &constant);
+ }
+
+ ttr->immediate_offset = ttr->compiler->Program.Constants.Count;
+
+ tgsi_parse_init(&parser, tokens);
+
+ while (!tgsi_parse_end_of_tokens(&parser)) {
+ tgsi_parse_token(&parser);
+
+ switch (parser.FullToken.Token.Type) {
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ break;
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ handle_immediate(ttr, &parser.FullToken.FullImmediate);
+ break;
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ transform_instruction(ttr, &parser.FullToken.FullInstruction);
+ break;
+ }
+ }
+
+ tgsi_parse_free(&parser);
+
+ rc_calculate_inputs_outputs(ttr->compiler);
+}
+
diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.h b/src/gallium/drivers/r300/r300_tgsi_to_rc.h
new file mode 100644
index 00000000000..93e90ec6d2c
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef R300_TGSI_TO_RC_H
+#define R300_TGSI_TO_RC_H
+
+struct radeon_compiler;
+
+struct tgsi_full_declaration;
+struct tgsi_shader_info;
+struct tgsi_token;
+
+struct tgsi_to_rc {
+ struct radeon_compiler * compiler;
+ const struct tgsi_shader_info * info;
+
+ int immediate_offset;
+};
+
+void r300_tgsi_to_rc(struct tgsi_to_rc * ttr, const struct tgsi_token * tokens);
+
+#endif /* R300_TGSI_TO_RC_H */
diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c
index f87435f9f07..2cb903bba2f 100644
--- a/src/gallium/drivers/r300/r300_vs.c
+++ b/src/gallium/drivers/r300/r300_vs.c
@@ -22,391 +22,213 @@
#include "r300_vs.h"
-static void r300_vs_declare(struct r300_vs_asm* assembler,
- struct tgsi_full_declaration* decl)
-{
- switch (decl->Declaration.File) {
- case TGSI_FILE_INPUT:
- break;
- case TGSI_FILE_OUTPUT:
- switch (decl->Semantic.SemanticName) {
- case TGSI_SEMANTIC_POSITION:
- assembler->tab[decl->DeclarationRange.First] = 0;
- break;
- case TGSI_SEMANTIC_COLOR:
- assembler->tab[decl->DeclarationRange.First] =
- (assembler->point_size ? 1 : 0) +
- assembler->out_colors;
- break;
- case TGSI_SEMANTIC_FOG:
- case TGSI_SEMANTIC_GENERIC:
- /* XXX multiple? */
- assembler->tab[decl->DeclarationRange.First] =
- (assembler->point_size ? 1 : 0) +
- assembler->out_colors +
- assembler->out_texcoords;
- break;
- case TGSI_SEMANTIC_PSIZE:
- assembler->tab[decl->DeclarationRange.First] = 1;
- break;
- default:
- debug_printf("r300: vs: Bad semantic declaration %d\n",
- decl->Semantic.SemanticName);
- break;
- }
- break;
- case TGSI_FILE_CONSTANT:
- break;
- case TGSI_FILE_TEMPORARY:
- assembler->temp_count++;
- break;
- default:
- debug_printf("r300: vs: Bad file %d\n", decl->Declaration.File);
- break;
- }
-}
+#include "r300_context.h"
+#include "r300_tgsi_to_rc.h"
-static INLINE unsigned r300_vs_src_type(struct r300_vs_asm* assembler,
- struct tgsi_src_register* src)
-{
- switch (src->File) {
- case TGSI_FILE_NULL:
- case TGSI_FILE_INPUT:
- /* Probably a zero or one swizzle */
- return R300_PVS_SRC_REG_INPUT;
- case TGSI_FILE_TEMPORARY:
- return R300_PVS_SRC_REG_TEMPORARY;
- case TGSI_FILE_CONSTANT:
- case TGSI_FILE_IMMEDIATE:
- return R300_PVS_SRC_REG_CONSTANT;
- default:
- debug_printf("r300: vs: Unimplemented src type %d\n", src->File);
- break;
- }
- return 0;
-}
+#include "tgsi/tgsi_dump.h"
+#include "tgsi/tgsi_parse.h"
-static INLINE unsigned r300_vs_src(struct r300_vs_asm* assembler,
- struct tgsi_src_register* src)
-{
- switch (src->File) {
- case TGSI_FILE_NULL:
- case TGSI_FILE_INPUT:
- case TGSI_FILE_TEMPORARY:
- case TGSI_FILE_CONSTANT:
- return src->Index;
- case TGSI_FILE_IMMEDIATE:
- return src->Index + assembler->imm_offset;
- default:
- debug_printf("r300: vs: Unimplemented src type %d\n", src->File);
- break;
- }
- return 0;
-}
+#include "radeon_compiler.h"
-static INLINE unsigned r300_vs_dst_type(struct r300_vs_asm* assembler,
- struct tgsi_dst_register* dst)
-{
- switch (dst->File) {
- case TGSI_FILE_TEMPORARY:
- return R300_PVS_DST_REG_TEMPORARY;
- case TGSI_FILE_OUTPUT:
- return R300_PVS_DST_REG_OUT;
- default:
- debug_printf("r300: vs: Unimplemented dst type %d\n", dst->File);
- break;
- }
- return 0;
-}
-static INLINE unsigned r300_vs_dst(struct r300_vs_asm* assembler,
- struct tgsi_dst_register* dst)
+static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c)
{
- switch (dst->File) {
- case TGSI_FILE_TEMPORARY:
- return dst->Index;
- case TGSI_FILE_OUTPUT:
- return assembler->tab[dst->Index];
- default:
- debug_printf("r300: vs: Unimplemented dst %d\n", dst->File);
- break;
- }
- return 0;
-}
+ struct r300_vertex_shader * vs = c->UserData;
+ struct tgsi_shader_info* info = &vs->info;
+ boolean pointsize = false;
+ int out_colors = 0;
+ int colors = 0;
+ int out_generic = 0;
+ int generic = 0;
+ int i;
-static uint32_t r300_vs_op(unsigned op)
-{
- switch (op) {
- case TGSI_OPCODE_DP3:
- case TGSI_OPCODE_DP4:
- return R300_VE_DOT_PRODUCT;
- case TGSI_OPCODE_MUL:
- return R300_VE_MULTIPLY;
- case TGSI_OPCODE_ADD:
- case TGSI_OPCODE_MOV:
- case TGSI_OPCODE_SUB:
- case TGSI_OPCODE_SWZ:
- return R300_VE_ADD;
- case TGSI_OPCODE_MAX:
- return R300_VE_MAXIMUM;
- case TGSI_OPCODE_SLT:
- return R300_VE_SET_LESS_THAN;
- case TGSI_OPCODE_RSQ:
- return R300_PVS_DST_MATH_INST | R300_ME_RECIP_DX;
- case TGSI_OPCODE_MAD:
- return R300_PVS_DST_MACRO_INST | R300_PVS_MACRO_OP_2CLK_MADD;
- default:
- break;
- }
- return 0;
-}
+ /* Fill in the input mapping */
+ for (i = 0; i < info->num_inputs; i++)
+ c->code->inputs[i] = i;
-static uint32_t r300_vs_swiz(struct tgsi_full_src_register* reg)
-{
- if (reg->SrcRegister.Extended) {
- return (reg->SrcRegister.Negate ? (0xf << 12) : 0) |
- reg->SrcRegisterExtSwz.ExtSwizzleX |
- (reg->SrcRegisterExtSwz.ExtSwizzleY << 3) |
- (reg->SrcRegisterExtSwz.ExtSwizzleZ << 6) |
- (reg->SrcRegisterExtSwz.ExtSwizzleW << 9);
- } else {
- return (reg->SrcRegister.Negate ? (0xf << 12) : 0) |
- reg->SrcRegister.SwizzleX |
- (reg->SrcRegister.SwizzleY << 3) |
- (reg->SrcRegister.SwizzleZ << 6) |
- (reg->SrcRegister.SwizzleW << 9);
+ /* Fill in the output mapping */
+ for (i = 0; i < info->num_outputs; i++) {
+ switch (info->output_semantic_name[i]) {
+ case TGSI_SEMANTIC_PSIZE:
+ pointsize = true;
+ break;
+ case TGSI_SEMANTIC_COLOR:
+ out_colors++;
+ break;
+ case TGSI_SEMANTIC_FOG:
+ case TGSI_SEMANTIC_GENERIC:
+ out_generic++;
+ break;
+ }
}
-}
-/* XXX icky icky icky icky */
-static uint32_t r300_vs_scalar_swiz(struct tgsi_full_src_register* reg)
-{
- if (reg->SrcRegister.Extended) {
- return (reg->SrcRegister.Negate ? (0xf << 12) : 0) |
- reg->SrcRegisterExtSwz.ExtSwizzleX |
- (reg->SrcRegisterExtSwz.ExtSwizzleX << 3) |
- (reg->SrcRegisterExtSwz.ExtSwizzleX << 6) |
- (reg->SrcRegisterExtSwz.ExtSwizzleX << 9);
- } else {
- return (reg->SrcRegister.Negate ? (0xf << 12) : 0) |
- reg->SrcRegister.SwizzleX |
- (reg->SrcRegister.SwizzleX << 3) |
- (reg->SrcRegister.SwizzleX << 6) |
- (reg->SrcRegister.SwizzleX << 9);
- }
-}
+ struct tgsi_parse_context parser;
-/* XXX scalar stupidity */
-static void r300_vs_emit_inst(struct r300_vertex_shader* vs,
- struct r300_vs_asm* assembler,
- struct tgsi_full_src_register* src,
- struct tgsi_full_dst_register* dst,
- unsigned op,
- unsigned count,
- boolean is_scalar)
-{
- int i = vs->instruction_count;
- vs->instructions[i].inst0 = R300_PVS_DST_OPCODE(r300_vs_op(op)) |
- R300_PVS_DST_REG_TYPE(r300_vs_dst_type(assembler, &dst->DstRegister)) |
- R300_PVS_DST_OFFSET(r300_vs_dst(assembler, &dst->DstRegister)) |
- R300_PVS_DST_WE(dst->DstRegister.WriteMask);
- switch (count) {
- case 3:
- vs->instructions[i].inst3 =
- R300_PVS_SRC_REG_TYPE(r300_vs_src_type(assembler,
- &src[2].SrcRegister)) |
- R300_PVS_SRC_OFFSET(r300_vs_src(assembler,
- &src[2].SrcRegister)) |
- R300_PVS_SRC_SWIZZLE(r300_vs_swiz(&src[2]));
- /* Fall through */
- case 2:
- vs->instructions[i].inst2 =
- R300_PVS_SRC_REG_TYPE(r300_vs_src_type(assembler,
- &src[1].SrcRegister)) |
- R300_PVS_SRC_OFFSET(r300_vs_src(assembler,
- &src[1].SrcRegister)) |
- R300_PVS_SRC_SWIZZLE(r300_vs_swiz(&src[1]));
- /* Fall through */
- case 1:
- vs->instructions[i].inst1 =
- R300_PVS_SRC_REG_TYPE(r300_vs_src_type(assembler,
- &src[0].SrcRegister)) |
- R300_PVS_SRC_OFFSET(r300_vs_src(assembler,
- &src[0].SrcRegister)) |
- /* XXX the icky, it burns */
- R300_PVS_SRC_SWIZZLE(is_scalar ? r300_vs_scalar_swiz(&src[0])
- : r300_vs_swiz(&src[0]));
- break;
- }
- vs->instruction_count++;
-}
+ tgsi_parse_init(&parser, vs->state.tokens);
-static void r300_vs_instruction(struct r300_vertex_shader* vs,
- struct r300_vs_asm* assembler,
- struct tgsi_full_instruction* inst)
-{
- switch (inst->Instruction.Opcode) {
- case TGSI_OPCODE_RSQ:
- r300_vs_emit_inst(vs, assembler, inst->FullSrcRegisters,
- &inst->FullDstRegisters[0], inst->Instruction.Opcode,
- 1, TRUE);
- break;
- case TGSI_OPCODE_SUB:
- inst->FullSrcRegisters[1].SrcRegister.Negate =
- !inst->FullSrcRegisters[1].SrcRegister.Negate;
- /* Fall through */
- case TGSI_OPCODE_ADD:
- case TGSI_OPCODE_MUL:
- case TGSI_OPCODE_MAX:
- case TGSI_OPCODE_SLT:
- r300_vs_emit_inst(vs, assembler, inst->FullSrcRegisters,
- &inst->FullDstRegisters[0], inst->Instruction.Opcode,
- 2, FALSE);
- break;
- case TGSI_OPCODE_DP3:
- /* Set alpha swizzle to zero for src0 and src1 */
- if (!inst->FullSrcRegisters[0].SrcRegister.Extended) {
- inst->FullSrcRegisters[0].SrcRegister.Extended = TRUE;
- inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtSwizzleX =
- inst->FullSrcRegisters[0].SrcRegister.SwizzleX;
- inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtSwizzleY =
- inst->FullSrcRegisters[0].SrcRegister.SwizzleY;
- inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtSwizzleZ =
- inst->FullSrcRegisters[0].SrcRegister.SwizzleZ;
- }
- inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtSwizzleW =
- TGSI_EXTSWIZZLE_ZERO;
- if (!inst->FullSrcRegisters[1].SrcRegister.Extended) {
- inst->FullSrcRegisters[1].SrcRegister.Extended = TRUE;
- inst->FullSrcRegisters[1].SrcRegisterExtSwz.ExtSwizzleX =
- inst->FullSrcRegisters[1].SrcRegister.SwizzleX;
- inst->FullSrcRegisters[1].SrcRegisterExtSwz.ExtSwizzleY =
- inst->FullSrcRegisters[1].SrcRegister.SwizzleY;
- inst->FullSrcRegisters[1].SrcRegisterExtSwz.ExtSwizzleZ =
- inst->FullSrcRegisters[1].SrcRegister.SwizzleZ;
- }
- inst->FullSrcRegisters[1].SrcRegisterExtSwz.ExtSwizzleW =
- TGSI_EXTSWIZZLE_ZERO;
- /* Fall through */
- case TGSI_OPCODE_DP4:
- r300_vs_emit_inst(vs, assembler, inst->FullSrcRegisters,
- &inst->FullDstRegisters[0], inst->Instruction.Opcode,
- 2, FALSE);
- break;
- case TGSI_OPCODE_MOV:
- case TGSI_OPCODE_SWZ:
- inst->FullSrcRegisters[1] = r300_constant_zero;
- r300_vs_emit_inst(vs, assembler, inst->FullSrcRegisters,
- &inst->FullDstRegisters[0], inst->Instruction.Opcode,
- 2, FALSE);
- break;
- case TGSI_OPCODE_MAD:
- r300_vs_emit_inst(vs, assembler, inst->FullSrcRegisters,
- &inst->FullDstRegisters[0], inst->Instruction.Opcode,
- 3, FALSE);
- break;
- case TGSI_OPCODE_END:
- break;
- default:
- debug_printf("r300: vs: Bad opcode %d\n",
- inst->Instruction.Opcode);
- break;
- }
-}
+ while (!tgsi_parse_end_of_tokens(&parser)) {
+ tgsi_parse_token(&parser);
-static void r300_vs_init(struct r300_vertex_shader* vs,
- struct r300_vs_asm* assembler)
-{
- struct tgsi_shader_info* info = &vs->info;
- int i;
+ if (parser.FullToken.Token.Type != TGSI_TOKEN_TYPE_DECLARATION)
+ continue;
- for (i = 0; i < info->num_outputs; i++) {
- switch (info->output_semantic_name[i]) {
+ struct tgsi_full_declaration * decl = &parser.FullToken.FullDeclaration;
+
+ if (decl->Declaration.File != TGSI_FILE_OUTPUT)
+ continue;
+
+ switch (decl->Semantic.SemanticName) {
+ case TGSI_SEMANTIC_POSITION:
+ c->code->outputs[decl->DeclarationRange.First] = 0;
+ break;
case TGSI_SEMANTIC_PSIZE:
- assembler->point_size = TRUE;
+ c->code->outputs[decl->DeclarationRange.First] = 1;
break;
case TGSI_SEMANTIC_COLOR:
- assembler->out_colors++;
+ c->code->outputs[decl->DeclarationRange.First] = 1 +
+ (pointsize ? 1 : 0) +
+ colors++;
break;
case TGSI_SEMANTIC_FOG:
case TGSI_SEMANTIC_GENERIC:
- assembler->out_texcoords++;
+ c->code->outputs[decl->DeclarationRange.First] = 1 +
+ (pointsize ? 1 : 0) +
+ out_colors +
+ generic++;
+ break;
+ default:
+ debug_printf("r300: vs: Bad semantic declaration %d\n",
+ decl->Semantic.SemanticName);
break;
}
}
- vs->instruction_count = 0;
+ tgsi_parse_free(&parser);
}
+
void r300_translate_vertex_shader(struct r300_context* r300,
struct r300_vertex_shader* vs)
{
- struct tgsi_parse_context parser;
- int i;
- struct r300_constant_buffer* consts =
- &r300->shader_constants[PIPE_SHADER_VERTEX];
+ struct r300_vertex_program_compiler compiler;
+ struct tgsi_to_rc ttr;
- struct r300_vs_asm* assembler = CALLOC_STRUCT(r300_vs_asm);
- if (assembler == NULL) {
- return;
- }
+ /* Setup the compiler */
+ rc_init(&compiler.Base);
- /* Init assembler. */
- r300_vs_init(vs, assembler);
+ compiler.Base.Debug = 1;
+ compiler.code = &vs->code;
+ compiler.UserData = vs;
- /* Setup starting offset for immediates. */
- assembler->imm_offset = consts->user_count;
+ if (compiler.Base.Debug) {
+ debug_printf("r300: Initial vertex program\n");
+ tgsi_dump(vs->state.tokens, 0);
+ }
- tgsi_parse_init(&parser, vs->state.tokens);
+ /* Translate TGSI to our internal representation */
+ ttr.compiler = &compiler.Base;
+ ttr.info = &vs->info;
- while (!tgsi_parse_end_of_tokens(&parser)) {
- tgsi_parse_token(&parser);
+ r300_tgsi_to_rc(&ttr, vs->state.tokens);
- /* This is seriously the lamest way to create fragment programs ever.
- * I blame TGSI. */
- switch (parser.FullToken.Token.Type) {
- case TGSI_TOKEN_TYPE_DECLARATION:
- /* Allocated registers sitting at the beginning
- * of the program. */
- r300_vs_declare(assembler, &parser.FullToken.FullDeclaration);
- break;
- case TGSI_TOKEN_TYPE_IMMEDIATE:
- debug_printf("r300: Emitting immediate to constant buffer, "
- "position %d\n",
- assembler->imm_offset + assembler->imm_count);
- /* I am not amused by the length of these. */
- for (i = 0; i < 4; i++) {
- consts->constants[assembler->imm_offset +
- assembler->imm_count][i] =
- parser.FullToken.FullImmediate.u.ImmediateFloat32[i]
- .Float;
- }
- assembler->imm_count++;
- break;
- case TGSI_TOKEN_TYPE_INSTRUCTION:
- r300_vs_instruction(vs, assembler,
- &parser.FullToken.FullInstruction);
- break;
- }
- }
+ compiler.RequiredOutputs = ~(~0 << vs->info.num_outputs);
+ compiler.SetHwInputOutput = &set_vertex_inputs_outputs;
- debug_printf("r300: vs: %d texs and %d colors, first free reg is %d\n",
- assembler->tex_count, assembler->color_count,
- assembler->tex_count + assembler->color_count);
+ /* Invoke the compiler */
+ r3xx_compile_vertex_program(&compiler);
+ if (compiler.Base.Error) {
+ /* Todo: Fail gracefully */
+ fprintf(stderr, "r300 VP: Compiler error\n");
+ abort();
+ }
- consts->count = consts->user_count + assembler->imm_count;
- vs->uses_imms = assembler->imm_count;
- debug_printf("r300: vs: %d total constants, "
- "%d from user and %d from immediates\n", consts->count,
- consts->user_count, assembler->imm_count);
+ /* And, finally... */
+ rc_destroy(&compiler.Base);
+ vs->translated = TRUE;
+}
- debug_printf("r300: vs: tab: %d %d %d %d\n", assembler->tab[0],
- assembler->tab[1], assembler->tab[2], assembler->tab[3]);
- tgsi_dump(vs->state.tokens, 0);
- /* XXX finish r300 vertex shader dumper */
- r300_vs_dump(vs);
+/* XXX get these to r300_reg */
+#define R300_PVS_DST_OPCODE(x) ((x) << 0)
+# define R300_VE_DOT_PRODUCT 1
+# define R300_VE_MULTIPLY 2
+# define R300_VE_ADD 3
+# define R300_VE_MAXIMUM 7
+# define R300_VE_SET_LESS_THAN 10
+#define R300_PVS_DST_MATH_INST (1 << 6)
+# define R300_ME_RECIP_DX 6
+#define R300_PVS_DST_MACRO_INST (1 << 7)
+# define R300_PVS_MACRO_OP_2CLK_MADD 0
+#define R300_PVS_DST_REG_TYPE(x) ((x) << 8)
+# define R300_PVS_DST_REG_TEMPORARY 0
+# define R300_PVS_DST_REG_A0 1
+# define R300_PVS_DST_REG_OUT 2
+# define R300_PVS_DST_REG_OUT_REPL_X 3
+# define R300_PVS_DST_REG_ALT_TEMPORARY 4
+# define R300_PVS_DST_REG_INPUT 5
+#define R300_PVS_DST_OFFSET(x) ((x) << 13)
+#define R300_PVS_DST_WE(x) ((x) << 20)
+#define R300_PVS_DST_WE_XYZW (0xf << 20)
+
+#define R300_PVS_SRC_REG_TYPE(x) ((x) << 0)
+# define R300_PVS_SRC_REG_TEMPORARY 0
+# define R300_PVS_SRC_REG_INPUT 1
+# define R300_PVS_SRC_REG_CONSTANT 2
+# define R300_PVS_SRC_REG_ALT_TEMPORARY 3
+#define R300_PVS_SRC_OFFSET(x) ((x) << 5)
+#define R300_PVS_SRC_SWIZZLE(x) ((x) << 13)
+# define R300_PVS_SRC_SELECT_X 0
+# define R300_PVS_SRC_SELECT_Y 1
+# define R300_PVS_SRC_SELECT_Z 2
+# define R300_PVS_SRC_SELECT_W 3
+# define R300_PVS_SRC_SELECT_FORCE_0 4
+# define R300_PVS_SRC_SELECT_FORCE_1 5
+# define R300_PVS_SRC_SWIZZLE_XYZW \
+ ((R300_PVS_SRC_SELECT_X | (R300_PVS_SRC_SELECT_Y << 3) | \
+ (R300_PVS_SRC_SELECT_Z << 6) | (R300_PVS_SRC_SELECT_W << 9)) << 13)
+# define R300_PVS_SRC_SWIZZLE_ZERO \
+ ((R300_PVS_SRC_SELECT_FORCE_0 | (R300_PVS_SRC_SELECT_FORCE_0 << 3) | \
+ (R300_PVS_SRC_SELECT_FORCE_0 << 6) | \
+ (R300_PVS_SRC_SELECT_FORCE_0 << 9)) << 13)
+# define R300_PVS_SRC_SWIZZLE_ONE \
+ ((R300_PVS_SRC_SELECT_FORCE_1 | (R300_PVS_SRC_SELECT_FORCE_1 << 3) | \
+ (R300_PVS_SRC_SELECT_FORCE_1 << 6) | \
+ (R300_PVS_SRC_SELECT_FORCE_1 << 9)) << 13)
+#define R300_PVS_MODIFIER_X (1 << 25)
+#define R300_PVS_MODIFIER_Y (1 << 26)
+#define R300_PVS_MODIFIER_Z (1 << 27)
+#define R300_PVS_MODIFIER_W (1 << 28)
+#define R300_PVS_NEGATE_XYZW \
+ (R300_PVS_MODIFIER_X | R300_PVS_MODIFIER_Y | \
+ R300_PVS_MODIFIER_Z | R300_PVS_MODIFIER_W)
+
+struct r300_vertex_program_code r300_passthrough_vertex_shader = {
+ .length = 8, /* two instructions */
+
+ /* MOV out[0], in[0] */
+ .body.d[0] = R300_PVS_DST_OPCODE(R300_VE_ADD) |
+ R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_OUT) |
+ R300_PVS_DST_OFFSET(0) | R300_PVS_DST_WE_XYZW,
+ .body.d[1] = R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
+ R300_PVS_SRC_OFFSET(0) | R300_PVS_SRC_SWIZZLE_XYZW,
+ .body.d[2] = R300_PVS_SRC_SWIZZLE_ZERO,
+ .body.d[3] = 0x0,
+
+ /* MOV out[1], in[1] */
+ .body.d[4] = R300_PVS_DST_OPCODE(R300_VE_ADD) |
+ R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_OUT) |
+ R300_PVS_DST_OFFSET(1) | R300_PVS_DST_WE_XYZW,
+ .body.d[5] = R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
+ R300_PVS_SRC_OFFSET(1) | R300_PVS_SRC_SWIZZLE_XYZW,
+ .body.d[6] = R300_PVS_SRC_SWIZZLE_ZERO,
+ .body.d[7] = 0x0,
+
+ .inputs[0] = 0,
+ .inputs[1] = 1,
+ .outputs[0] = 0,
+ .outputs[1] = 1,
+
+ .InputsRead = 3,
+ .OutputsWritten = 3
+};
- tgsi_parse_free(&parser);
- FREE(assembler);
-}
diff --git a/src/gallium/drivers/r300/r300_vs.h b/src/gallium/drivers/r300/r300_vs.h
index 165d7178122..2a4ce315e32 100644
--- a/src/gallium/drivers/r300/r300_vs.h
+++ b/src/gallium/drivers/r300/r300_vs.h
@@ -23,134 +23,31 @@
#ifndef R300_VS_H
#define R300_VS_H
-#include "tgsi/tgsi_parse.h"
-#include "tgsi/tgsi_dump.h"
+#include "pipe/p_state.h"
+#include "tgsi/tgsi_scan.h"
-#include "r300_context.h"
-#include "r300_debug.h"
-#include "r300_reg.h"
-#include "r300_screen.h"
-#include "r300_shader_inlines.h"
+#include "radeon_code.h"
-/* XXX get these to r300_reg */
-#define R300_PVS_DST_OPCODE(x) ((x) << 0)
-# define R300_VE_DOT_PRODUCT 1
-# define R300_VE_MULTIPLY 2
-# define R300_VE_ADD 3
-# define R300_VE_MAXIMUM 7
-# define R300_VE_SET_LESS_THAN 10
-#define R300_PVS_DST_MATH_INST (1 << 6)
-# define R300_ME_RECIP_DX 6
-#define R300_PVS_DST_MACRO_INST (1 << 7)
-# define R300_PVS_MACRO_OP_2CLK_MADD 0
-#define R300_PVS_DST_REG_TYPE(x) ((x) << 8)
-# define R300_PVS_DST_REG_TEMPORARY 0
-# define R300_PVS_DST_REG_A0 1
-# define R300_PVS_DST_REG_OUT 2
-# define R300_PVS_DST_REG_OUT_REPL_X 3
-# define R300_PVS_DST_REG_ALT_TEMPORARY 4
-# define R300_PVS_DST_REG_INPUT 5
-#define R300_PVS_DST_OFFSET(x) ((x) << 13)
-#define R300_PVS_DST_WE(x) ((x) << 20)
-#define R300_PVS_DST_WE_XYZW (0xf << 20)
+struct r300_context;
-#define R300_PVS_SRC_REG_TYPE(x) ((x) << 0)
-# define R300_PVS_SRC_REG_TEMPORARY 0
-# define R300_PVS_SRC_REG_INPUT 1
-# define R300_PVS_SRC_REG_CONSTANT 2
-# define R300_PVS_SRC_REG_ALT_TEMPORARY 3
-#define R300_PVS_SRC_OFFSET(x) ((x) << 5)
-#define R300_PVS_SRC_SWIZZLE(x) ((x) << 13)
-# define R300_PVS_SRC_SELECT_X 0
-# define R300_PVS_SRC_SELECT_Y 1
-# define R300_PVS_SRC_SELECT_Z 2
-# define R300_PVS_SRC_SELECT_W 3
-# define R300_PVS_SRC_SELECT_FORCE_0 4
-# define R300_PVS_SRC_SELECT_FORCE_1 5
-# define R300_PVS_SRC_SWIZZLE_XYZW \
- ((R300_PVS_SRC_SELECT_X | (R300_PVS_SRC_SELECT_Y << 3) | \
- (R300_PVS_SRC_SELECT_Z << 6) | (R300_PVS_SRC_SELECT_W << 9)) << 13)
-# define R300_PVS_SRC_SWIZZLE_ZERO \
- ((R300_PVS_SRC_SELECT_FORCE_0 | (R300_PVS_SRC_SELECT_FORCE_0 << 3) | \
- (R300_PVS_SRC_SELECT_FORCE_0 << 6) | \
- (R300_PVS_SRC_SELECT_FORCE_0 << 9)) << 13)
-# define R300_PVS_SRC_SWIZZLE_ONE \
- ((R300_PVS_SRC_SELECT_FORCE_1 | (R300_PVS_SRC_SELECT_FORCE_1 << 3) | \
- (R300_PVS_SRC_SELECT_FORCE_1 << 6) | \
- (R300_PVS_SRC_SELECT_FORCE_1 << 9)) << 13)
-#define R300_PVS_MODIFIER_X (1 << 25)
-#define R300_PVS_MODIFIER_Y (1 << 26)
-#define R300_PVS_MODIFIER_Z (1 << 27)
-#define R300_PVS_MODIFIER_W (1 << 28)
-#define R300_PVS_NEGATE_XYZW \
- (R300_PVS_MODIFIER_X | R300_PVS_MODIFIER_Y | \
- R300_PVS_MODIFIER_Z | R300_PVS_MODIFIER_W)
+struct r300_vertex_shader {
+ /* Parent class */
+ struct pipe_shader_state state;
+ struct tgsi_shader_info info;
-/* Temporary struct used to hold assembly state while putting together
- * fragment programs. */
-struct r300_vs_asm {
- /* Pipe context. */
- struct r300_context* r300;
- /* Number of colors. */
- unsigned color_count;
- /* Number of texcoords. */
- unsigned tex_count;
- /* Number of requested temporary registers. */
- unsigned temp_count;
- /* Offset for immediate constants. Neither R300 nor R500 can do four
- * inline constants per source, so instead we copy immediates into the
- * constant buffer. */
- unsigned imm_offset;
- /* Number of immediate constants. */
- unsigned imm_count;
- /* Number of colors to write. */
- unsigned out_colors;
- /* Number of texcoords to write. */
- unsigned out_texcoords;
- /* Whether to emit point size. */
- boolean point_size;
- /* Tab of declared outputs to OVM outputs. */
- unsigned tab[16];
-};
+ /* Fallback shader, because Draw has issues */
+ struct draw_vertex_shader* draw;
-static struct r300_vertex_shader r300_passthrough_vertex_shader = {
- /* XXX translate these back into normal instructions */
- .instruction_count = 2,
- .instructions[0].inst0 = R300_PVS_DST_OPCODE(R300_VE_ADD) |
- R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_OUT) |
- R300_PVS_DST_OFFSET(0) | R300_PVS_DST_WE_XYZW,
- .instructions[0].inst1 = R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
- R300_PVS_SRC_OFFSET(0) | R300_PVS_SRC_SWIZZLE_XYZW,
- .instructions[0].inst2 = R300_PVS_SRC_SWIZZLE_ZERO,
- .instructions[0].inst3 = 0x0,
- .instructions[1].inst0 = R300_PVS_DST_OPCODE(R300_VE_ADD) |
- R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_OUT) |
- R300_PVS_DST_OFFSET(1) | R300_PVS_DST_WE_XYZW,
- .instructions[1].inst1 = R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
- R300_PVS_SRC_OFFSET(1) | R300_PVS_SRC_SWIZZLE_XYZW,
- .instructions[1].inst2 = R300_PVS_SRC_SWIZZLE_ZERO,
- .instructions[1].inst3 = 0x0,
-};
+ /* Has this shader been translated yet? */
+ boolean translated;
-static struct r300_vertex_shader r300_texture_vertex_shader = {
- /* XXX translate these back into normal instructions */
- .instruction_count = 2,
- .instructions[0].inst0 = R300_PVS_DST_OPCODE(R300_VE_ADD) |
- R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_OUT) |
- R300_PVS_DST_OFFSET(0) | R300_PVS_DST_WE_XYZW,
- .instructions[0].inst1 = R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
- R300_PVS_SRC_OFFSET(0) | R300_PVS_SRC_SWIZZLE_XYZW,
- .instructions[0].inst2 = R300_PVS_SRC_SWIZZLE_ZERO,
- .instructions[0].inst3 = 0x0,
- .instructions[1].inst0 = R300_PVS_DST_OPCODE(R300_VE_ADD) |
- R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_OUT) |
- R300_PVS_DST_OFFSET(1) | R300_PVS_DST_WE_XYZW,
- .instructions[1].inst1 = R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
- R300_PVS_SRC_OFFSET(1) | R300_PVS_SRC_SWIZZLE_XYZW,
- .instructions[1].inst2 = R300_PVS_SRC_SWIZZLE_ZERO,
- .instructions[1].inst3 = 0x0,
+ /* Machine code (if translated) */
+ struct r300_vertex_program_code code;
};
+
+extern struct r300_vertex_program_code r300_passthrough_vertex_shader;
+
void r300_translate_vertex_shader(struct r300_context* r300,
struct r300_vertex_shader* vs);
diff --git a/src/gallium/drivers/r300/r3xx_fs.c b/src/gallium/drivers/r300/r3xx_fs.c
index 6e05d769773..c1c1194d58e 100644
--- a/src/gallium/drivers/r300/r3xx_fs.c
+++ b/src/gallium/drivers/r300/r3xx_fs.c
@@ -23,74 +23,52 @@
#include "r3xx_fs.h"
-static INLINE uint32_t r3xx_rgb_op(unsigned op)
-{
- switch (op) {
- case TGSI_OPCODE_MOV:
- return R300_ALU_OUTC_CMP;
- default:
- return 0;
- }
-}
+#include "r300_reg.h"
-static INLINE uint32_t r3xx_alpha_op(unsigned op)
-{
- switch (op) {
- case TGSI_OPCODE_MOV:
- return R300_ALU_OUTA_CMP;
- default:
- return 0;
- }
-}
+struct rX00_fragment_program_code r3xx_passthrough_fragment_shader = {
+ .code.r300.alu.length = 1,
+ .code.r300.tex.length = 0,
-static INLINE void r3xx_emit_maths(struct r3xx_fragment_shader* fs,
- struct r300_fs_asm* assembler,
- struct tgsi_full_src_register* src,
- struct tgsi_full_dst_register* dst,
- unsigned op,
- unsigned count)
-{
- int i = fs->alu_instruction_count;
+ .code.r300.config = 0,
+ .code.r300.pixsize = 0,
+ .code.r300.code_offset = 0,
+ .code.r300.code_addr[3] = R300_RGBA_OUT,
- fs->instructions[i].alu_rgb_inst = R300_RGB_SWIZA(R300_ALU_ARGC_SRC0C_XYZ) |
+ .code.r300.alu.inst[0].rgb_inst = R300_RGB_SWIZA(R300_ALU_ARGC_SRC0C_XYZ) |
R300_RGB_SWIZB(R300_ALU_ARGC_SRC0C_XYZ) |
R300_RGB_SWIZC(R300_ALU_ARGC_ZERO) |
- r3xx_rgb_op(op);
- fs->instructions[i].alu_rgb_addr = R300_RGB_ADDR0(0) | R300_RGB_ADDR1(0) |
- R300_RGB_ADDR2(0) | R300_ALU_DSTC_OUTPUT_XYZ;
- fs->instructions[i].alu_alpha_inst = R300_ALPHA_SWIZA(R300_ALU_ARGA_SRC0A) |
+ R300_ALU_OUTC_CMP,
+ .code.r300.alu.inst[0].rgb_addr = R300_RGB_ADDR0(0) | R300_RGB_ADDR1(0) |
+ R300_RGB_ADDR2(0) | R300_ALU_DSTC_OUTPUT_XYZ,
+ .code.r300.alu.inst[0].alpha_inst = R300_ALPHA_SWIZA(R300_ALU_ARGA_SRC0A) |
R300_ALPHA_SWIZB(R300_ALU_ARGA_SRC0A) |
R300_ALPHA_SWIZC(R300_ALU_ARGA_ZERO) |
- r3xx_alpha_op(op);
- fs->instructions[i].alu_alpha_addr = R300_ALPHA_ADDR0(0) |
- R300_ALPHA_ADDR1(0) | R300_ALPHA_ADDR2(0) | R300_ALU_DSTA_OUTPUT;
+ R300_ALU_OUTA_CMP,
+ .code.r300.alu.inst[0].alpha_addr = R300_ALPHA_ADDR0(0) |
+ R300_ALPHA_ADDR1(0) | R300_ALPHA_ADDR2(0) | R300_ALU_DSTA_OUTPUT,
+};
- fs->alu_instruction_count++;
-}
+struct rX00_fragment_program_code r3xx_texture_fragment_shader = {
+ .code.r300.alu.length = 1,
+ .code.r300.tex.length = 1,
-void r3xx_fs_finalize(struct r300_fragment_shader* fs,
- struct r300_fs_asm* assembler)
-{
- fs->stack_size = assembler->temp_count + assembler->temp_offset + 1;
-}
+ .code.r300.config = R300_PFS_CNTL_FIRST_NODE_HAS_TEX,
+ .code.r300.pixsize = 0,
+ .code.r300.code_offset = 0,
+ .code.r300.code_addr[3] = R300_RGBA_OUT,
-void r3xx_fs_instruction(struct r3xx_fragment_shader* fs,
- struct r300_fs_asm* assembler,
- struct tgsi_full_instruction* inst)
-{
- switch (inst->Instruction.Opcode) {
- case TGSI_OPCODE_MOV:
- /* src0 -> src1 and src2 forced to zero */
- inst->FullSrcRegisters[1] = inst->FullSrcRegisters[0];
- inst->FullSrcRegisters[2] = r300_constant_zero;
- r3xx_emit_maths(fs, assembler, inst->FullSrcRegisters,
- &inst->FullDstRegisters[0], inst->Instruction.Opcode, 3);
- break;
- case TGSI_OPCODE_END:
- break;
- default:
- debug_printf("r300: fs: Bad opcode %d\n",
- inst->Instruction.Opcode);
- break;
- }
-}
+ .code.r300.tex.inst[0] = R300_TEX_OP_LD << R300_TEX_INST_SHIFT,
+
+ .code.r300.alu.inst[0].rgb_inst = R300_RGB_SWIZA(R300_ALU_ARGC_SRC0C_XYZ) |
+ R300_RGB_SWIZB(R300_ALU_ARGC_SRC0C_XYZ) |
+ R300_RGB_SWIZC(R300_ALU_ARGC_ZERO) |
+ R300_ALU_OUTC_CMP,
+ .code.r300.alu.inst[0].rgb_addr = R300_RGB_ADDR0(0) | R300_RGB_ADDR1(0) |
+ R300_RGB_ADDR2(0) | R300_ALU_DSTC_OUTPUT_XYZ,
+ .code.r300.alu.inst[0].alpha_inst = R300_ALPHA_SWIZA(R300_ALU_ARGA_SRC0A) |
+ R300_ALPHA_SWIZB(R300_ALU_ARGA_SRC0A) |
+ R300_ALPHA_SWIZC(R300_ALU_ARGA_ZERO) |
+ R300_ALU_OUTA_CMP,
+ .code.r300.alu.inst[0].alpha_addr = R300_ALPHA_ADDR0(0) |
+ R300_ALPHA_ADDR1(0) | R300_ALPHA_ADDR2(0) | R300_ALU_DSTA_OUTPUT,
+};
diff --git a/src/gallium/drivers/r300/r3xx_fs.h b/src/gallium/drivers/r300/r3xx_fs.h
index 3da39ec2526..51cd245724d 100644
--- a/src/gallium/drivers/r300/r3xx_fs.h
+++ b/src/gallium/drivers/r300/r3xx_fs.h
@@ -24,53 +24,9 @@
#ifndef R3XX_FS_H
#define R3XX_FS_H
-#include "r300_fs_inlines.h"
+#include "radeon_code.h"
-static struct r3xx_fragment_shader r3xx_passthrough_fragment_shader = {
- .alu_instruction_count = 1,
- .tex_instruction_count = 0,
- .indirections = 0,
- .shader.stack_size = 1,
-
- .instructions[0].alu_rgb_inst = R300_RGB_SWIZA(R300_ALU_ARGC_SRC0C_XYZ) |
- R300_RGB_SWIZB(R300_ALU_ARGC_SRC0C_XYZ) |
- R300_RGB_SWIZC(R300_ALU_ARGC_ZERO) |
- R300_ALU_OUTC_CMP,
- .instructions[0].alu_rgb_addr = R300_RGB_ADDR0(0) | R300_RGB_ADDR1(0) |
- R300_RGB_ADDR2(0) | R300_ALU_DSTC_OUTPUT_XYZ,
- .instructions[0].alu_alpha_inst = R300_ALPHA_SWIZA(R300_ALU_ARGA_SRC0A) |
- R300_ALPHA_SWIZB(R300_ALU_ARGA_SRC0A) |
- R300_ALPHA_SWIZC(R300_ALU_ARGA_ZERO) |
- R300_ALU_OUTA_CMP,
- .instructions[0].alu_alpha_addr = R300_ALPHA_ADDR0(0) |
- R300_ALPHA_ADDR1(0) | R300_ALPHA_ADDR2(0) | R300_ALU_DSTA_OUTPUT,
-};
-
-static struct r3xx_fragment_shader r3xx_texture_fragment_shader = {
- .alu_instruction_count = 1,
- .tex_instruction_count = 0,
- .indirections = 0,
- .shader.stack_size = 1,
-
- .instructions[0].alu_rgb_inst = R300_RGB_SWIZA(R300_ALU_ARGC_SRC0C_XYZ) |
- R300_RGB_SWIZB(R300_ALU_ARGC_SRC0C_XYZ) |
- R300_RGB_SWIZC(R300_ALU_ARGC_ZERO) |
- R300_ALU_OUTC_CMP,
- .instructions[0].alu_rgb_addr = R300_RGB_ADDR0(0) | R300_RGB_ADDR1(0) |
- R300_RGB_ADDR2(0) | R300_ALU_DSTC_OUTPUT_XYZ,
- .instructions[0].alu_alpha_inst = R300_ALPHA_SWIZA(R300_ALU_ARGA_SRC0A) |
- R300_ALPHA_SWIZB(R300_ALU_ARGA_SRC0A) |
- R300_ALPHA_SWIZC(R300_ALU_ARGA_ZERO) |
- R300_ALU_OUTA_CMP,
- .instructions[0].alu_alpha_addr = R300_ALPHA_ADDR0(0) |
- R300_ALPHA_ADDR1(0) | R300_ALPHA_ADDR2(0) | R300_ALU_DSTA_OUTPUT,
-};
-
-void r3xx_fs_finalize(struct r300_fragment_shader* fs,
- struct r300_fs_asm* assembler);
-
-void r3xx_fs_instruction(struct r3xx_fragment_shader* fs,
- struct r300_fs_asm* assembler,
- struct tgsi_full_instruction* inst);
+struct rX00_fragment_program_code r3xx_passthrough_fragment_shader;
+struct rX00_fragment_program_code r3xx_texture_fragment_shader;
#endif /* R3XX_FS_H */
diff --git a/src/gallium/drivers/r300/r5xx_fs.c b/src/gallium/drivers/r300/r5xx_fs.c
index 99d826278ce..f072deab0d9 100644
--- a/src/gallium/drivers/r300/r5xx_fs.c
+++ b/src/gallium/drivers/r300/r5xx_fs.c
@@ -23,445 +23,103 @@
#include "r5xx_fs.h"
-static INLINE unsigned r5xx_fix_swiz(unsigned s)
-{
- /* For historical reasons, the swizzle values x, y, z, w, and 0 are
- * equivalent to the actual machine code, but 1 is not. Thus, we just
- * adjust it a bit... */
- if (s == TGSI_EXTSWIZZLE_ONE) {
- return R500_SWIZZLE_ONE;
- } else {
- return s;
- }
-}
-
-static uint32_t r5xx_rgba_swiz(struct tgsi_full_src_register* reg)
-{
- if (reg->SrcRegister.Extended) {
- return r5xx_fix_swiz(reg->SrcRegisterExtSwz.ExtSwizzleX) |
- (r5xx_fix_swiz(reg->SrcRegisterExtSwz.ExtSwizzleY) << 3) |
- (r5xx_fix_swiz(reg->SrcRegisterExtSwz.ExtSwizzleZ) << 6) |
- (r5xx_fix_swiz(reg->SrcRegisterExtSwz.ExtSwizzleW) << 9);
- } else {
- return reg->SrcRegister.SwizzleX |
- (reg->SrcRegister.SwizzleY << 3) |
- (reg->SrcRegister.SwizzleZ << 6) |
- (reg->SrcRegister.SwizzleW << 9);
- }
-}
-
-static uint32_t r5xx_strq_swiz(struct tgsi_full_src_register* reg)
-{
- return reg->SrcRegister.SwizzleX |
- (reg->SrcRegister.SwizzleY << 2) |
- (reg->SrcRegister.SwizzleZ << 4) |
- (reg->SrcRegister.SwizzleW << 6);
-}
-
-static INLINE uint32_t r5xx_rgb_swiz(struct tgsi_full_src_register* reg)
-{
- /* Only the first 9 bits... */
- return (r5xx_rgba_swiz(reg) & 0x1ff) |
- (reg->SrcRegister.Negate ? (1 << 9) : 0) |
- (reg->SrcRegisterExtMod.Absolute ? (1 << 10) : 0);
-}
-
-static INLINE uint32_t r5xx_alpha_swiz(struct tgsi_full_src_register* reg)
-{
- /* Only the last 3 bits... */
- return (r5xx_rgba_swiz(reg) >> 9) |
- (reg->SrcRegister.Negate ? (1 << 9) : 0) |
- (reg->SrcRegisterExtMod.Absolute ? (1 << 10) : 0);
-}
-
-static INLINE uint32_t r5xx_rgba_op(unsigned op)
-{
- switch (op) {
- case TGSI_OPCODE_COS:
- case TGSI_OPCODE_EX2:
- case TGSI_OPCODE_LG2:
- case TGSI_OPCODE_RCP:
- case TGSI_OPCODE_RSQ:
- case TGSI_OPCODE_SIN:
- return R500_ALU_RGBA_OP_SOP;
- case TGSI_OPCODE_DDX:
- return R500_ALU_RGBA_OP_MDH;
- case TGSI_OPCODE_DDY:
- return R500_ALU_RGBA_OP_MDV;
- case TGSI_OPCODE_FRC:
- return R500_ALU_RGBA_OP_FRC;
- case TGSI_OPCODE_DP3:
- return R500_ALU_RGBA_OP_DP3;
- case TGSI_OPCODE_DP4:
- case TGSI_OPCODE_DPH:
- return R500_ALU_RGBA_OP_DP4;
- case TGSI_OPCODE_ABS:
- case TGSI_OPCODE_CMP:
- case TGSI_OPCODE_MOV:
- case TGSI_OPCODE_SWZ:
- return R500_ALU_RGBA_OP_CMP;
- case TGSI_OPCODE_ADD:
- case TGSI_OPCODE_MAD:
- case TGSI_OPCODE_MUL:
- case TGSI_OPCODE_SUB:
- return R500_ALU_RGBA_OP_MAD;
- default:
- return 0;
- }
-}
-
-static INLINE uint32_t r5xx_alpha_op(unsigned op)
-{
- switch (op) {
- case TGSI_OPCODE_COS:
- return R500_ALPHA_OP_COS;
- case TGSI_OPCODE_EX2:
- return R500_ALPHA_OP_EX2;
- case TGSI_OPCODE_LG2:
- return R500_ALPHA_OP_LN2;
- case TGSI_OPCODE_RCP:
- return R500_ALPHA_OP_RCP;
- case TGSI_OPCODE_RSQ:
- return R500_ALPHA_OP_RSQ;
- case TGSI_OPCODE_FRC:
- return R500_ALPHA_OP_FRC;
- case TGSI_OPCODE_SIN:
- return R500_ALPHA_OP_SIN;
- case TGSI_OPCODE_DDX:
- return R500_ALPHA_OP_MDH;
- case TGSI_OPCODE_DDY:
- return R500_ALPHA_OP_MDV;
- case TGSI_OPCODE_DP3:
- case TGSI_OPCODE_DP4:
- case TGSI_OPCODE_DPH:
- return R500_ALPHA_OP_DP;
- case TGSI_OPCODE_ABS:
- case TGSI_OPCODE_CMP:
- case TGSI_OPCODE_MOV:
- case TGSI_OPCODE_SWZ:
- return R500_ALPHA_OP_CMP;
- case TGSI_OPCODE_ADD:
- case TGSI_OPCODE_MAD:
- case TGSI_OPCODE_MUL:
- case TGSI_OPCODE_SUB:
- return R500_ALPHA_OP_MAD;
- default:
- return 0;
- }
-}
-
-static INLINE uint32_t r5xx_tex_op(unsigned op)
-{
- switch (op) {
- case TGSI_OPCODE_KIL:
- return R500_TEX_INST_TEXKILL;
- case TGSI_OPCODE_TEX:
- return R500_TEX_INST_LD;
- case TGSI_OPCODE_TXB:
- return R500_TEX_INST_LODBIAS;
- case TGSI_OPCODE_TXP:
- return R500_TEX_INST_PROJ;
- default:
- return 0;
- }
-}
-
-/* Setup an ALU operation. */
-static INLINE void r5xx_emit_maths(struct r5xx_fragment_shader* fs,
- struct r300_fs_asm* assembler,
- struct tgsi_full_src_register* src,
- struct tgsi_full_dst_register* dst,
- unsigned op,
- unsigned count)
-{
- int i = fs->instruction_count;
-
- if (dst->DstRegister.File == TGSI_FILE_OUTPUT) {
- fs->instructions[i].inst0 = R500_INST_TYPE_OUT;
- if (r300_fs_is_depr(assembler, dst)) {
- fs->instructions[i].inst4 = R500_W_OMASK;
- } else {
- fs->instructions[i].inst0 |=
- R500_ALU_OMASK(dst->DstRegister.WriteMask);
- }
- } else {
- fs->instructions[i].inst0 = R500_INST_TYPE_ALU |
- R500_ALU_WMASK(dst->DstRegister.WriteMask);
- }
-
- fs->instructions[i].inst0 |= R500_INST_TEX_SEM_WAIT;
-
- fs->instructions[i].inst4 |=
- R500_ALPHA_ADDRD(r300_fs_dst(assembler, &dst->DstRegister));
- fs->instructions[i].inst5 =
- R500_ALU_RGBA_ADDRD(r300_fs_dst(assembler, &dst->DstRegister));
-
- switch (count) {
- case 3:
- fs->instructions[i].inst1 =
- R500_RGB_ADDR2(r300_fs_src(assembler, &src[2].SrcRegister));
- fs->instructions[i].inst2 =
- R500_ALPHA_ADDR2(r300_fs_src(assembler, &src[2].SrcRegister));
- fs->instructions[i].inst5 |=
- R500_ALU_RGBA_SEL_C_SRC2 |
- R500_SWIZ_RGBA_C(r5xx_rgb_swiz(&src[2])) |
- R500_ALU_RGBA_ALPHA_SEL_C_SRC2 |
- R500_SWIZ_ALPHA_C(r5xx_alpha_swiz(&src[2]));
- case 2:
- fs->instructions[i].inst1 |=
- R500_RGB_ADDR1(r300_fs_src(assembler, &src[1].SrcRegister));
- fs->instructions[i].inst2 |=
- R500_ALPHA_ADDR1(r300_fs_src(assembler, &src[1].SrcRegister));
- fs->instructions[i].inst3 =
- R500_ALU_RGB_SEL_B_SRC1 |
- R500_SWIZ_RGB_B(r5xx_rgb_swiz(&src[1]));
- fs->instructions[i].inst4 |=
- R500_ALPHA_SEL_B_SRC1 |
- R500_SWIZ_ALPHA_B(r5xx_alpha_swiz(&src[1]));
- case 1:
- case 0:
- default:
- fs->instructions[i].inst1 |=
- R500_RGB_ADDR0(r300_fs_src(assembler, &src[0].SrcRegister));
- fs->instructions[i].inst2 |=
- R500_ALPHA_ADDR0(r300_fs_src(assembler, &src[0].SrcRegister));
- fs->instructions[i].inst3 |=
- R500_ALU_RGB_SEL_A_SRC0 |
- R500_SWIZ_RGB_A(r5xx_rgb_swiz(&src[0]));
- fs->instructions[i].inst4 |=
- R500_ALPHA_SEL_A_SRC0 |
- R500_SWIZ_ALPHA_A(r5xx_alpha_swiz(&src[0]));
- break;
- }
-
- fs->instructions[i].inst4 |= r5xx_alpha_op(op);
- fs->instructions[i].inst5 |= r5xx_rgba_op(op);
-
- fs->instruction_count++;
-}
-
-static INLINE void r5xx_emit_tex(struct r5xx_fragment_shader* fs,
- struct r300_fs_asm* assembler,
- struct tgsi_full_src_register* src,
- struct tgsi_full_dst_register* dst,
- uint32_t op)
-{
- int i = fs->instruction_count;
-
- fs->instructions[i].inst0 = R500_INST_TYPE_TEX |
- R500_TEX_WMASK(dst->DstRegister.WriteMask) |
- R500_INST_TEX_SEM_WAIT;
- fs->instructions[i].inst1 = R500_TEX_ID(0) |
- R500_TEX_SEM_ACQUIRE | //R500_TEX_IGNORE_UNCOVERED |
- r5xx_tex_op(op);
- fs->instructions[i].inst2 =
- R500_TEX_SRC_ADDR(r300_fs_src(assembler, &src->SrcRegister)) |
- R500_SWIZ_TEX_STRQ(r5xx_strq_swiz(src)) |
- R500_TEX_DST_ADDR(r300_fs_dst(assembler, &dst->DstRegister)) |
+#include "r300_reg.h"
+
+/* XXX this all should find its way back to r300_reg */
+/* Swizzle tools */
+#define R500_SWIZZLE_ZERO 4
+#define R500_SWIZZLE_HALF 5
+#define R500_SWIZZLE_ONE 6
+#define R500_SWIZ_RGB_ZERO ((4 << 0) | (4 << 3) | (4 << 6))
+#define R500_SWIZ_RGB_ONE ((6 << 0) | (6 << 3) | (6 << 6))
+#define R500_SWIZ_RGB_RGB ((0 << 0) | (1 << 3) | (2 << 6))
+#define R500_SWIZ_MOD_NEG 1
+#define R500_SWIZ_MOD_ABS 2
+#define R500_SWIZ_MOD_NEG_ABS 3
+/* Swizzles for inst2 */
+#define R500_SWIZ_TEX_STRQ(x) ((x) << 8)
+#define R500_SWIZ_TEX_RGBA(x) ((x) << 24)
+/* Swizzles for inst3 */
+#define R500_SWIZ_RGB_A(x) ((x) << 2)
+#define R500_SWIZ_RGB_B(x) ((x) << 15)
+/* Swizzles for inst4 */
+#define R500_SWIZ_ALPHA_A(x) ((x) << 14)
+#define R500_SWIZ_ALPHA_B(x) ((x) << 21)
+/* Swizzle for inst5 */
+#define R500_SWIZ_RGBA_C(x) ((x) << 14)
+#define R500_SWIZ_ALPHA_C(x) ((x) << 27)
+/* Writemasks */
+#define R500_TEX_WMASK(x) ((x) << 11)
+#define R500_ALU_WMASK(x) ((x) << 11)
+#define R500_ALU_OMASK(x) ((x) << 15)
+#define R500_W_OMASK (1 << 31)
+
+struct rX00_fragment_program_code r5xx_passthrough_fragment_shader = {
+ .code.r500.max_temp_idx = 0,
+ .code.r500.inst_end = 0,
+
+ .code.r500.inst[0].inst0 = R500_INST_TYPE_OUT |
+ R500_INST_TEX_SEM_WAIT | R500_INST_LAST |
+ R500_INST_RGB_OMASK_RGB | R500_INST_ALPHA_OMASK |
+ R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP,
+ .code.r500.inst[0].inst1 =
+ R500_RGB_ADDR0(0) | R500_RGB_ADDR1(0) | R500_RGB_ADDR1_CONST |
+ R500_RGB_ADDR2(0) | R500_RGB_ADDR2_CONST,
+ .code.r500.inst[0].inst2 =
+ R500_ALPHA_ADDR0(0) | R500_ALPHA_ADDR1(0) | R500_ALPHA_ADDR1_CONST |
+ R500_ALPHA_ADDR2(0) | R500_ALPHA_ADDR2_CONST,
+ .code.r500.inst[0].inst3 =
+ R500_ALU_RGB_SEL_A_SRC0 | R500_ALU_RGB_R_SWIZ_A_R |
+ R500_ALU_RGB_G_SWIZ_A_G | R500_ALU_RGB_B_SWIZ_A_B |
+ R500_ALU_RGB_SEL_B_SRC0 | R500_ALU_RGB_R_SWIZ_B_R |
+ R500_ALU_RGB_B_SWIZ_B_G | R500_ALU_RGB_G_SWIZ_B_B,
+ .code.r500.inst[0].inst4 =
+ R500_ALPHA_OP_CMP | R500_ALPHA_SWIZ_A_A | R500_ALPHA_SWIZ_B_A,
+ .code.r500.inst[0].inst5 =
+ R500_ALU_RGBA_OP_CMP | R500_ALU_RGBA_R_SWIZ_0 |
+ R500_ALU_RGBA_G_SWIZ_0 | R500_ALU_RGBA_B_SWIZ_0 |
+ R500_ALU_RGBA_A_SWIZ_0,
+};
+
+struct rX00_fragment_program_code r5xx_texture_fragment_shader = {
+ .code.r500.max_temp_idx = 0,
+ .code.r500.inst_end = 1,
+
+ .code.r500.inst[0].inst0 = R500_INST_TYPE_TEX |
+ R500_INST_TEX_SEM_WAIT |
+ R500_INST_RGB_WMASK_RGB | R500_INST_ALPHA_WMASK |
+ R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP,
+ .code.r500.inst[0].inst1 = R500_TEX_ID(0) | R500_TEX_INST_LD |
+ R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED,
+ .code.r500.inst[0].inst2 = R500_TEX_SRC_ADDR(0) |
+ R500_TEX_SRC_S_SWIZ_R | R500_TEX_SRC_T_SWIZ_G |
+ R500_TEX_SRC_R_SWIZ_B | R500_TEX_SRC_Q_SWIZ_A |
+ R500_TEX_DST_ADDR(0) |
R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G |
- R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A;
-
- if (dst->DstRegister.File == TGSI_FILE_OUTPUT) {
- fs->instructions[i].inst2 |=
- R500_TEX_DST_ADDR(assembler->temp_count +
- assembler->temp_offset);
-
- fs->instruction_count++;
-
- /* Setup and emit a MOV. */
- src[0].SrcRegister.Index = assembler->temp_count;
- src[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
-
- src[1] = src[0];
- src[2] = r300_constant_zero;
- r5xx_emit_maths(fs, assembler, src, dst, TGSI_OPCODE_MOV, 3);
- } else {
- fs->instruction_count++;
- }
-}
-
-void r5xx_fs_finalize(struct r5xx_fragment_shader* fs,
- struct r300_fs_asm* assembler)
-{
- /* XXX should this just go with OPCODE_END? */
- fs->instructions[fs->instruction_count - 1].inst0 |=
- R500_INST_LAST;
-}
-
-void r5xx_fs_instruction(struct r5xx_fragment_shader* fs,
- struct r300_fs_asm* assembler,
- struct tgsi_full_instruction* inst)
-{
- /* Switch between opcodes. When possible, prefer using the official
- * AMD/ATI names for opcodes, please, as it facilitates using the
- * documentation. */
- switch (inst->Instruction.Opcode) {
- /* XXX trig needs extra prep */
- case TGSI_OPCODE_COS:
- case TGSI_OPCODE_SIN:
- /* The simple scalar ops. */
- case TGSI_OPCODE_EX2:
- case TGSI_OPCODE_LG2:
- case TGSI_OPCODE_RCP:
- case TGSI_OPCODE_RSQ:
- /* Copy red swizzle to alpha for src0 */
- inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtSwizzleW =
- inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtSwizzleX;
- inst->FullSrcRegisters[0].SrcRegister.SwizzleW =
- inst->FullSrcRegisters[0].SrcRegister.SwizzleX;
- /* Fall through */
- case TGSI_OPCODE_DDX:
- case TGSI_OPCODE_DDY:
- case TGSI_OPCODE_FRC:
- r5xx_emit_maths(fs, assembler, inst->FullSrcRegisters,
- &inst->FullDstRegisters[0], inst->Instruction.Opcode, 1);
- break;
-
- /* The dot products. */
- case TGSI_OPCODE_DPH:
- /* Set alpha swizzle to one for src0 */
- if (!inst->FullSrcRegisters[0].SrcRegister.Extended) {
- inst->FullSrcRegisters[0].SrcRegister.Extended = TRUE;
- inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtSwizzleX =
- inst->FullSrcRegisters[0].SrcRegister.SwizzleX;
- inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtSwizzleY =
- inst->FullSrcRegisters[0].SrcRegister.SwizzleY;
- inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtSwizzleZ =
- inst->FullSrcRegisters[0].SrcRegister.SwizzleZ;
- }
- inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtSwizzleW =
- TGSI_EXTSWIZZLE_ONE;
- /* Fall through */
- case TGSI_OPCODE_DP3:
- case TGSI_OPCODE_DP4:
- r5xx_emit_maths(fs, assembler, inst->FullSrcRegisters,
- &inst->FullDstRegisters[0], inst->Instruction.Opcode, 2);
- break;
-
- /* Simple three-source operations. */
- case TGSI_OPCODE_CMP:
- /* Swap src0 and src2 */
- inst->FullSrcRegisters[3] = inst->FullSrcRegisters[2];
- inst->FullSrcRegisters[2] = inst->FullSrcRegisters[0];
- inst->FullSrcRegisters[0] = inst->FullSrcRegisters[3];
- r5xx_emit_maths(fs, assembler, inst->FullSrcRegisters,
- &inst->FullDstRegisters[0], inst->Instruction.Opcode, 3);
- break;
-
- /* The MAD variants. */
- case TGSI_OPCODE_SUB:
- /* Just like ADD, but flip the negation on src1 first */
- inst->FullSrcRegisters[1].SrcRegister.Negate =
- !inst->FullSrcRegisters[1].SrcRegister.Negate;
- /* Fall through */
- case TGSI_OPCODE_ADD:
- /* Force src0 to one, move all registers over */
- inst->FullSrcRegisters[2] = inst->FullSrcRegisters[1];
- inst->FullSrcRegisters[1] = inst->FullSrcRegisters[0];
- inst->FullSrcRegisters[0] = r300_constant_one;
- r5xx_emit_maths(fs, assembler, inst->FullSrcRegisters,
- &inst->FullDstRegisters[0], inst->Instruction.Opcode, 3);
- break;
- case TGSI_OPCODE_MUL:
- /* Force our src2 to zero */
- inst->FullSrcRegisters[2] = r300_constant_zero;
- r5xx_emit_maths(fs, assembler, inst->FullSrcRegisters,
- &inst->FullDstRegisters[0], inst->Instruction.Opcode, 3);
- break;
- case TGSI_OPCODE_MAD:
- r5xx_emit_maths(fs, assembler, inst->FullSrcRegisters,
- &inst->FullDstRegisters[0], inst->Instruction.Opcode, 3);
- break;
-
- /* The MOV variants. */
- case TGSI_OPCODE_ABS:
- /* Set absolute value modifiers. */
- inst->FullSrcRegisters[0].SrcRegisterExtMod.Absolute = TRUE;
- /* Fall through */
- case TGSI_OPCODE_MOV:
- case TGSI_OPCODE_SWZ:
- /* src0 -> src1 and src2 forced to zero */
- inst->FullSrcRegisters[1] = inst->FullSrcRegisters[0];
- inst->FullSrcRegisters[2] = r300_constant_zero;
- r5xx_emit_maths(fs, assembler, inst->FullSrcRegisters,
- &inst->FullDstRegisters[0], inst->Instruction.Opcode, 3);
- break;
-
- /* The compound and hybrid insts. */
- case TGSI_OPCODE_LRP:
- /* LRP DST A, B, C -> MAD TMP -A, C, C; MAD DST A, B, TMP */
- inst->FullSrcRegisters[3] = inst->FullSrcRegisters[1];
- inst->FullSrcRegisters[1] = inst->FullSrcRegisters[2];
- inst->FullSrcRegisters[0].SrcRegister.Negate =
- !(inst->FullSrcRegisters[0].SrcRegister.Negate);
- inst->FullDstRegisters[1] = inst->FullDstRegisters[0];
- inst->FullDstRegisters[0].DstRegister.Index =
- assembler->temp_count;
- inst->FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
- r5xx_emit_maths(fs, assembler, inst->FullSrcRegisters,
- &inst->FullDstRegisters[0], TGSI_OPCODE_MAD, 3);
- inst->FullSrcRegisters[2].SrcRegister.Index =
- assembler->temp_count;
- inst->FullSrcRegisters[2].SrcRegister.File = TGSI_FILE_TEMPORARY;
- inst->FullSrcRegisters[2].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
- inst->FullSrcRegisters[2].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
- inst->FullSrcRegisters[2].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Z;
- inst->FullSrcRegisters[2].SrcRegister.SwizzleW = TGSI_SWIZZLE_W;
- inst->FullSrcRegisters[1] = inst->FullSrcRegisters[3];
- inst->FullSrcRegisters[0].SrcRegister.Negate =
- !(inst->FullSrcRegisters[0].SrcRegister.Negate);
- inst->FullDstRegisters[0] = inst->FullDstRegisters[1];
- r5xx_emit_maths(fs, assembler, inst->FullSrcRegisters,
- &inst->FullDstRegisters[0], TGSI_OPCODE_MAD, 3);
- break;
- case TGSI_OPCODE_POW:
- /* POW DST A, B -> LG2 TMP A; MUL TMP TMP, B; EX2 DST TMP */
- inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtSwizzleW =
- inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtSwizzleX;
- inst->FullSrcRegisters[0].SrcRegister.SwizzleW =
- inst->FullSrcRegisters[0].SrcRegister.SwizzleX;
- inst->FullDstRegisters[1] = inst->FullDstRegisters[0];
- inst->FullDstRegisters[0].DstRegister.Index =
- assembler->temp_count;
- inst->FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
- r5xx_emit_maths(fs, assembler, inst->FullSrcRegisters,
- &inst->FullDstRegisters[0], TGSI_OPCODE_LG2, 1);
- inst->FullSrcRegisters[0].SrcRegister.Index =
- assembler->temp_count;
- inst->FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
- inst->FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
- inst->FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
- inst->FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Z;
- inst->FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_W;
- inst->FullSrcRegisters[2] = r300_constant_zero;
- r5xx_emit_maths(fs, assembler, inst->FullSrcRegisters,
- &inst->FullDstRegisters[0], TGSI_OPCODE_MUL, 3);
- inst->FullDstRegisters[0] = inst->FullDstRegisters[1];
- r5xx_emit_maths(fs, assembler, inst->FullSrcRegisters,
- &inst->FullDstRegisters[0], TGSI_OPCODE_EX2, 1);
- break;
-
- /* The texture instruction set. */
- case TGSI_OPCODE_KIL:
- case TGSI_OPCODE_TEX:
- case TGSI_OPCODE_TXB:
- case TGSI_OPCODE_TXP:
- r5xx_emit_tex(fs, assembler, &inst->FullSrcRegisters[0],
- &inst->FullDstRegisters[0], inst->Instruction.Opcode);
- break;
-
- /* This is the end. My only friend, the end. */
- case TGSI_OPCODE_END:
- break;
- default:
- debug_printf("r300: fs: Bad opcode %d\n",
- inst->Instruction.Opcode);
- break;
- }
-
- /* Clamp, if saturation flags are set. */
- if (inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE) {
- fs->instructions[fs->instruction_count - 1].inst0 |=
- R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP;
- }
-}
+ R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A,
+ .code.r500.inst[0].inst3 = 0x0,
+ .code.r500.inst[0].inst4 = 0x0,
+ .code.r500.inst[0].inst5 = 0x0,
+
+ .code.r500.inst[1].inst0 = R500_INST_TYPE_OUT |
+ R500_INST_TEX_SEM_WAIT | R500_INST_LAST |
+ R500_INST_RGB_OMASK_RGB | R500_INST_ALPHA_OMASK |
+ R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP,
+ .code.r500.inst[1].inst1 =
+ R500_RGB_ADDR0(0) | R500_RGB_ADDR1(0) | R500_RGB_ADDR1_CONST |
+ R500_RGB_ADDR2(0) | R500_RGB_ADDR2_CONST,
+ .code.r500.inst[1].inst2 =
+ R500_ALPHA_ADDR0(0) | R500_ALPHA_ADDR1(0) | R500_ALPHA_ADDR1_CONST |
+ R500_ALPHA_ADDR2(0) | R500_ALPHA_ADDR2_CONST,
+ .code.r500.inst[1].inst3 =
+ R500_ALU_RGB_SEL_A_SRC0 | R500_ALU_RGB_R_SWIZ_A_R |
+ R500_ALU_RGB_G_SWIZ_A_G | R500_ALU_RGB_B_SWIZ_A_B |
+ R500_ALU_RGB_SEL_B_SRC0 | R500_ALU_RGB_R_SWIZ_B_R |
+ R500_ALU_RGB_B_SWIZ_B_G | R500_ALU_RGB_G_SWIZ_B_B,
+ .code.r500.inst[1].inst4 =
+ R500_ALPHA_OP_CMP | R500_ALPHA_SWIZ_A_A | R500_ALPHA_SWIZ_B_A,
+ .code.r500.inst[1].inst5 =
+ R500_ALU_RGBA_OP_CMP | R500_ALU_RGBA_R_SWIZ_0 |
+ R500_ALU_RGBA_G_SWIZ_0 | R500_ALU_RGBA_B_SWIZ_0 |
+ R500_ALU_RGBA_A_SWIZ_0,
+};
diff --git a/src/gallium/drivers/r300/r5xx_fs.h b/src/gallium/drivers/r300/r5xx_fs.h
index 629e587be4d..a4addde32b2 100644
--- a/src/gallium/drivers/r300/r5xx_fs.h
+++ b/src/gallium/drivers/r300/r5xx_fs.h
@@ -24,109 +24,9 @@
#ifndef R5XX_FS_H
#define R5XX_FS_H
-#include "r300_fs_inlines.h"
+#include "radeon_code.h"
-/* XXX this all should find its way back to r300_reg */
-/* Swizzle tools */
-#define R500_SWIZZLE_ZERO 4
-#define R500_SWIZZLE_HALF 5
-#define R500_SWIZZLE_ONE 6
-#define R500_SWIZ_RGB_ZERO ((4 << 0) | (4 << 3) | (4 << 6))
-#define R500_SWIZ_RGB_ONE ((6 << 0) | (6 << 3) | (6 << 6))
-#define R500_SWIZ_RGB_RGB ((0 << 0) | (1 << 3) | (2 << 6))
-#define R500_SWIZ_MOD_NEG 1
-#define R500_SWIZ_MOD_ABS 2
-#define R500_SWIZ_MOD_NEG_ABS 3
-/* Swizzles for inst2 */
-#define R500_SWIZ_TEX_STRQ(x) ((x) << 8)
-#define R500_SWIZ_TEX_RGBA(x) ((x) << 24)
-/* Swizzles for inst3 */
-#define R500_SWIZ_RGB_A(x) ((x) << 2)
-#define R500_SWIZ_RGB_B(x) ((x) << 15)
-/* Swizzles for inst4 */
-#define R500_SWIZ_ALPHA_A(x) ((x) << 14)
-#define R500_SWIZ_ALPHA_B(x) ((x) << 21)
-/* Swizzle for inst5 */
-#define R500_SWIZ_RGBA_C(x) ((x) << 14)
-#define R500_SWIZ_ALPHA_C(x) ((x) << 27)
-/* Writemasks */
-#define R500_TEX_WMASK(x) ((x) << 11)
-#define R500_ALU_WMASK(x) ((x) << 11)
-#define R500_ALU_OMASK(x) ((x) << 15)
-#define R500_W_OMASK (1 << 31)
-
-static struct r5xx_fragment_shader r5xx_passthrough_fragment_shader = {
- .shader.stack_size = 0,
- .instruction_count = 1,
- .instructions[0].inst0 = R500_INST_TYPE_OUT |
- R500_INST_TEX_SEM_WAIT | R500_INST_LAST |
- R500_INST_RGB_OMASK_RGB | R500_INST_ALPHA_OMASK |
- R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP,
- .instructions[0].inst1 =
- R500_RGB_ADDR0(0) | R500_RGB_ADDR1(0) | R500_RGB_ADDR1_CONST |
- R500_RGB_ADDR2(0) | R500_RGB_ADDR2_CONST,
- .instructions[0].inst2 =
- R500_ALPHA_ADDR0(0) | R500_ALPHA_ADDR1(0) | R500_ALPHA_ADDR1_CONST |
- R500_ALPHA_ADDR2(0) | R500_ALPHA_ADDR2_CONST,
- .instructions[0].inst3 =
- R500_ALU_RGB_SEL_A_SRC0 | R500_ALU_RGB_R_SWIZ_A_R |
- R500_ALU_RGB_G_SWIZ_A_G | R500_ALU_RGB_B_SWIZ_A_B |
- R500_ALU_RGB_SEL_B_SRC0 | R500_ALU_RGB_R_SWIZ_B_R |
- R500_ALU_RGB_B_SWIZ_B_G | R500_ALU_RGB_G_SWIZ_B_B,
- .instructions[0].inst4 =
- R500_ALPHA_OP_CMP | R500_ALPHA_SWIZ_A_A | R500_ALPHA_SWIZ_B_A,
- .instructions[0].inst5 =
- R500_ALU_RGBA_OP_CMP | R500_ALU_RGBA_R_SWIZ_0 |
- R500_ALU_RGBA_G_SWIZ_0 | R500_ALU_RGBA_B_SWIZ_0 |
- R500_ALU_RGBA_A_SWIZ_0,
-};
-
-static struct r5xx_fragment_shader r5xx_texture_fragment_shader = {
- .shader.stack_size = 1,
- .instruction_count = 2,
- .instructions[0].inst0 = R500_INST_TYPE_TEX |
- R500_INST_TEX_SEM_WAIT |
- R500_INST_RGB_WMASK_RGB | R500_INST_ALPHA_WMASK |
- R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP,
- .instructions[0].inst1 = R500_TEX_ID(0) | R500_TEX_INST_LD |
- R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED,
- .instructions[0].inst2 = R500_TEX_SRC_ADDR(0) |
- R500_TEX_SRC_S_SWIZ_R | R500_TEX_SRC_T_SWIZ_G |
- R500_TEX_SRC_R_SWIZ_B | R500_TEX_SRC_Q_SWIZ_A |
- R500_TEX_DST_ADDR(0) |
- R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G |
- R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A,
- .instructions[0].inst3 = 0x0,
- .instructions[0].inst4 = 0x0,
- .instructions[0].inst5 = 0x0,
- .instructions[1].inst0 = R500_INST_TYPE_OUT |
- R500_INST_TEX_SEM_WAIT | R500_INST_LAST |
- R500_INST_RGB_OMASK_RGB | R500_INST_ALPHA_OMASK |
- R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP,
- .instructions[1].inst1 =
- R500_RGB_ADDR0(0) | R500_RGB_ADDR1(0) | R500_RGB_ADDR1_CONST |
- R500_RGB_ADDR2(0) | R500_RGB_ADDR2_CONST,
- .instructions[1].inst2 =
- R500_ALPHA_ADDR0(0) | R500_ALPHA_ADDR1(0) | R500_ALPHA_ADDR1_CONST |
- R500_ALPHA_ADDR2(0) | R500_ALPHA_ADDR2_CONST,
- .instructions[1].inst3 =
- R500_ALU_RGB_SEL_A_SRC0 | R500_ALU_RGB_R_SWIZ_A_R |
- R500_ALU_RGB_G_SWIZ_A_G | R500_ALU_RGB_B_SWIZ_A_B |
- R500_ALU_RGB_SEL_B_SRC0 | R500_ALU_RGB_R_SWIZ_B_R |
- R500_ALU_RGB_B_SWIZ_B_G | R500_ALU_RGB_G_SWIZ_B_B,
- .instructions[1].inst4 =
- R500_ALPHA_OP_CMP | R500_ALPHA_SWIZ_A_A | R500_ALPHA_SWIZ_B_A,
- .instructions[1].inst5 =
- R500_ALU_RGBA_OP_CMP | R500_ALU_RGBA_R_SWIZ_0 |
- R500_ALU_RGBA_G_SWIZ_0 | R500_ALU_RGBA_B_SWIZ_0 |
- R500_ALU_RGBA_A_SWIZ_0,
-};
-
-void r5xx_fs_finalize(struct r5xx_fragment_shader* fs,
- struct r300_fs_asm* assembler);
-
-void r5xx_fs_instruction(struct r5xx_fragment_shader* fs,
- struct r300_fs_asm* assembler,
- struct tgsi_full_instruction* inst);
+struct rX00_fragment_program_code r5xx_passthrough_fragment_shader;
+struct rX00_fragment_program_code r5xx_texture_fragment_shader;
#endif /* R5XX_FS_H */
diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c
index 7a533dad9f0..70f09324311 100644
--- a/src/gallium/drivers/softpipe/sp_texture.c
+++ b/src/gallium/drivers/softpipe/sp_texture.c
@@ -48,11 +48,6 @@
/* Simple, maximally packed layout.
*/
-static unsigned minify( unsigned d )
-{
- return MAX2(1, d>>1);
-}
-
/* Conventional allocation path for non-display textures:
*/
@@ -100,6 +95,7 @@ softpipe_displaytarget_layout(struct pipe_screen *screen,
{
unsigned usage = (PIPE_BUFFER_USAGE_CPU_READ_WRITE |
PIPE_BUFFER_USAGE_GPU_READ_WRITE);
+ unsigned tex_usage = spt->base.tex_usage;
spt->base.nblocksx[0] = pf_get_nblocksx(&spt->base.block, spt->base.width[0]);
spt->base.nblocksy[0] = pf_get_nblocksy(&spt->base.block, spt->base.height[0]);
@@ -109,6 +105,7 @@ softpipe_displaytarget_layout(struct pipe_screen *screen,
spt->base.height[0],
spt->base.format,
usage,
+ tex_usage,
&spt->stride[0]);
return spt->buffer != NULL;
@@ -130,7 +127,8 @@ softpipe_texture_create(struct pipe_screen *screen,
pipe_reference_init(&spt->base.reference, 1);
spt->base.screen = screen;
- if (spt->base.tex_usage & PIPE_TEXTURE_USAGE_DISPLAY_TARGET) {
+ if (spt->base.tex_usage & (PIPE_TEXTURE_USAGE_DISPLAY_TARGET |
+ PIPE_TEXTURE_USAGE_PRIMARY)) {
if (!softpipe_displaytarget_layout(screen, spt))
goto fail;
}
@@ -224,12 +222,6 @@ softpipe_get_tex_surface(struct pipe_screen *screen,
if (ps->usage & PIPE_BUFFER_USAGE_GPU_READ)
ps->usage |= PIPE_BUFFER_USAGE_CPU_READ;
- if (ps->usage & (PIPE_BUFFER_USAGE_CPU_WRITE |
- PIPE_BUFFER_USAGE_GPU_WRITE)) {
- /* Mark the surface as dirty. The tile cache will look for this. */
- spt->modified = TRUE;
- }
-
ps->face = face;
ps->level = level;
ps->zslice = zslice;
@@ -376,6 +368,11 @@ softpipe_transfer_unmap(struct pipe_screen *screen,
spt = softpipe_texture(transfer->texture);
pipe_buffer_unmap( screen, spt->buffer );
+
+ if (transfer->usage != PIPE_TRANSFER_READ) {
+ /* Mark the texture as dirty to expire the tile caches. */
+ spt->modified = TRUE;
+ }
}
diff --git a/src/gallium/drivers/trace/tr_drm.c b/src/gallium/drivers/trace/tr_drm.c
index 98ac75e3fa3..93c569c73a8 100644
--- a/src/gallium/drivers/trace/tr_drm.c
+++ b/src/gallium/drivers/trace/tr_drm.c
@@ -63,7 +63,7 @@ trace_drm_create_screen(struct drm_api *_api, int fd,
screen = api->create_screen(api, fd, arg);
return trace_screen_create(screen);
-};
+}
static struct pipe_context *
trace_drm_create_context(struct drm_api *_api,
@@ -82,7 +82,7 @@ trace_drm_create_context(struct drm_api *_api,
pipe = trace_context_create(_screen, pipe);
return pipe;
-};
+}
static boolean
trace_drm_buffer_from_texture(struct drm_api *_api,
@@ -102,7 +102,7 @@ trace_drm_buffer_from_texture(struct drm_api *_api,
result = api->buffer_from_texture(api, texture, &buffer, stride);
if (result && _buffer)
- buffer = trace_buffer_create(trace_screen(texture->screen), buffer);
+ buffer = trace_buffer_create(trace_screen(_texture->screen), buffer);
if (_buffer)
*_buffer = buffer;
diff --git a/src/gallium/drivers/trace/tr_screen.c b/src/gallium/drivers/trace/tr_screen.c
index 5b1e26a52d7..26f1c04594f 100644
--- a/src/gallium/drivers/trace/tr_screen.c
+++ b/src/gallium/drivers/trace/tr_screen.c
@@ -462,6 +462,7 @@ trace_screen_surface_buffer_create(struct pipe_screen *_screen,
unsigned width, unsigned height,
enum pipe_format format,
unsigned usage,
+ unsigned tex_usage,
unsigned *pstride)
{
struct trace_screen *tr_scr = trace_screen(_screen);
@@ -476,11 +477,13 @@ trace_screen_surface_buffer_create(struct pipe_screen *_screen,
trace_dump_arg(uint, height);
trace_dump_arg(format, format);
trace_dump_arg(uint, usage);
+ trace_dump_arg(uint, tex_usage);
result = screen->surface_buffer_create(screen,
width, height,
format,
usage,
+ tex_usage,
pstride);
stride = *pstride;