summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorNicolai Haehnle <nhaehnle@gmail.com>2008-07-05 20:01:20 +0200
committerNicolai Haehnle <nhaehnle@gmail.com>2008-07-05 20:01:20 +0200
commit77fdfaa23adeaaf6a217ef1ee751410c6a5b0d21 (patch)
treed16e32914e301b68f293b6584f9e1905beaf16fd /src
parent364d45a3e1629f32c6ab5407f92618a16c9d45e0 (diff)
r300: Correctly scan for used temporary registers
This fixes a regression introduced by dea8719f0...
Diffstat (limited to 'src')
-rw-r--r--src/mesa/drivers/dri/r300/r300_fragprog.c28
-rw-r--r--src/mesa/drivers/dri/r300/r500_fragprog.c18
-rw-r--r--src/mesa/drivers/dri/r300/radeon_program.c52
-rw-r--r--src/mesa/drivers/dri/r300/radeon_program.h20
-rw-r--r--src/mesa/drivers/dri/r300/radeon_program_alu.c84
-rw-r--r--src/mesa/drivers/dri/r300/radeon_program_alu.h3
6 files changed, 126 insertions, 79 deletions
diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c
index 8c49e8ada68..6a8ef0ef5fc 100644
--- a/src/mesa/drivers/dri/r300/r300_fragprog.c
+++ b/src/mesa/drivers/dri/r300/r300_fragprog.c
@@ -68,7 +68,7 @@ static void reset_srcreg(struct prog_src_register* reg)
* be reused.
*/
static GLboolean transform_TEX(
- GLcontext *ctx, struct gl_program *p,
+ struct radeon_transform_context *t,
struct prog_instruction* orig_inst, void* data)
{
struct r300_fragment_program_compiler *compiler =
@@ -84,11 +84,11 @@ static GLboolean transform_TEX(
return GL_FALSE;
if (inst.Opcode != OPCODE_KIL &&
- p->ShadowSamplers & (1 << inst.TexSrcUnit)) {
+ t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) {
GLuint comparefunc = GL_NEVER + compiler->fp->state.unit[inst.TexSrcUnit].texture_compare_func;
if (comparefunc == GL_NEVER || comparefunc == GL_ALWAYS) {
- tgt = radeonAppendInstructions(p, 1);
+ tgt = radeonAppendInstructions(t->Program, 1);
tgt->Opcode = OPCODE_MOV;
tgt->DstReg = inst.DstReg;
@@ -98,7 +98,7 @@ static GLboolean transform_TEX(
}
inst.DstReg.File = PROGRAM_TEMPORARY;
- inst.DstReg.Index = _mesa_find_free_register(p, PROGRAM_TEMPORARY);
+ inst.DstReg.Index = radeonFindFreeTemporary(t);
inst.DstReg.WriteMask = WRITEMASK_XYZW;
}
@@ -113,7 +113,7 @@ static GLboolean transform_TEX(
0
};
- int tempreg = _mesa_find_free_register(p, PROGRAM_TEMPORARY);
+ int tempreg = radeonFindFreeTemporary(t);
int factor_index;
tokens[2] = inst.TexSrcUnit;
@@ -121,7 +121,7 @@ static GLboolean transform_TEX(
_mesa_add_state_reference(
compiler->fp->mesa_program.Base.Parameters, tokens);
- tgt = radeonAppendInstructions(p, 1);
+ tgt = radeonAppendInstructions(t->Program, 1);
tgt->Opcode = OPCODE_MUL;
tgt->DstReg.File = PROGRAM_TEMPORARY;
@@ -140,9 +140,9 @@ static GLboolean transform_TEX(
*/
if (inst.SrcReg[0].Swizzle != SWIZZLE_NOOP ||
inst.SrcReg[0].Abs || inst.SrcReg[0].NegateBase || inst.SrcReg[0].NegateAbs) {
- int tempreg = _mesa_find_free_register(p, PROGRAM_TEMPORARY);
+ int tempreg = radeonFindFreeTemporary(t);
- tgt = radeonAppendInstructions(p, 1);
+ tgt = radeonAppendInstructions(t->Program, 1);
tgt->Opcode = OPCODE_MOV;
tgt->DstReg.File = PROGRAM_TEMPORARY;
@@ -157,7 +157,7 @@ static GLboolean transform_TEX(
if (inst.Opcode != OPCODE_KIL) {
if (inst.DstReg.File != PROGRAM_TEMPORARY ||
inst.DstReg.WriteMask != WRITEMASK_XYZW) {
- int tempreg = _mesa_find_free_register(p, PROGRAM_TEMPORARY);
+ int tempreg = radeonFindFreeTemporary(t);
inst.DstReg.File = PROGRAM_TEMPORARY;
inst.DstReg.Index = tempreg;
@@ -166,16 +166,16 @@ static GLboolean transform_TEX(
}
}
- tgt = radeonAppendInstructions(p, 1);
+ tgt = radeonAppendInstructions(t->Program, 1);
_mesa_copy_instructions(tgt, &inst, 1);
if (inst.Opcode != OPCODE_KIL &&
- p->ShadowSamplers & (1 << inst.TexSrcUnit)) {
+ t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) {
GLuint comparefunc = GL_NEVER + compiler->fp->state.unit[inst.TexSrcUnit].texture_compare_func;
GLuint depthmode = compiler->fp->state.unit[inst.TexSrcUnit].depth_texture_mode;
- int rcptemp = _mesa_find_free_register(p, PROGRAM_TEMPORARY);
+ int rcptemp = radeonFindFreeTemporary(t);
- tgt = radeonAppendInstructions(p, 3);
+ tgt = radeonAppendInstructions(t->Program, 3);
tgt[0].Opcode = OPCODE_RCP;
tgt[0].DstReg.File = PROGRAM_TEMPORARY;
@@ -222,7 +222,7 @@ static GLboolean transform_TEX(
tgt[2].SrcReg[2].Swizzle = SWIZZLE_1111;
}
} else if (destredirect) {
- tgt = radeonAppendInstructions(p, 1);
+ tgt = radeonAppendInstructions(t->Program, 1);
tgt->Opcode = OPCODE_MOV;
tgt->DstReg = orig_inst->DstReg;
diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c
index b46e924ac7b..7ee84947225 100644
--- a/src/mesa/drivers/dri/r300/r500_fragprog.c
+++ b/src/mesa/drivers/dri/r300/r500_fragprog.c
@@ -38,7 +38,7 @@
*
*/
static GLboolean transform_TEX(
- GLcontext *ctx, struct gl_program *p,
+ struct radeon_transform_context *t,
struct prog_instruction* orig_inst, void* data)
{
struct r500_fragment_program_compiler *compiler =
@@ -55,11 +55,11 @@ static GLboolean transform_TEX(
/* ARB_shadow & EXT_shadow_funcs */
if (inst.Opcode != OPCODE_KIL &&
- p->ShadowSamplers & (1 << inst.TexSrcUnit)) {
+ t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) {
GLuint comparefunc = GL_NEVER + compiler->fp->state.unit[inst.TexSrcUnit].texture_compare_func;
if (comparefunc == GL_NEVER || comparefunc == GL_ALWAYS) {
- tgt = radeonAppendInstructions(p, 1);
+ tgt = radeonAppendInstructions(t->Program, 1);
tgt->Opcode = OPCODE_MOV;
tgt->DstReg.File = inst.DstReg.File;
@@ -71,20 +71,20 @@ static GLboolean transform_TEX(
}
inst.DstReg.File = PROGRAM_TEMPORARY;
- inst.DstReg.Index = _mesa_find_free_register(p, PROGRAM_TEMPORARY);
+ inst.DstReg.Index = radeonFindFreeTemporary(t);
inst.DstReg.WriteMask = WRITEMASK_XYZW;
}
- tgt = radeonAppendInstructions(p, 1);
+ tgt = radeonAppendInstructions(t->Program, 1);
_mesa_copy_instructions(tgt, &inst, 1);
if (inst.Opcode != OPCODE_KIL &&
- p->ShadowSamplers & (1 << inst.TexSrcUnit)) {
+ t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) {
GLuint comparefunc = GL_NEVER + compiler->fp->state.unit[inst.TexSrcUnit].texture_compare_func;
GLuint depthmode = compiler->fp->state.unit[inst.TexSrcUnit].depth_texture_mode;
- int rcptemp = _mesa_find_free_register(p, PROGRAM_TEMPORARY);
+ int rcptemp = radeonFindFreeTemporary(t);
- tgt = radeonAppendInstructions(p, 3);
+ tgt = radeonAppendInstructions(t->Program, 3);
tgt[0].Opcode = OPCODE_RCP;
tgt[0].DstReg.File = PROGRAM_TEMPORARY;
@@ -131,7 +131,7 @@ static GLboolean transform_TEX(
tgt[2].SrcReg[2].Swizzle = SWIZZLE_1111;
}
} else if (destredirect) {
- tgt = radeonAppendInstructions(p, 1);
+ tgt = radeonAppendInstructions(t->Program, 1);
tgt->Opcode = OPCODE_MOV;
tgt->DstReg = orig_inst->DstReg;
diff --git a/src/mesa/drivers/dri/r300/radeon_program.c b/src/mesa/drivers/dri/r300/radeon_program.c
index 3112339f81c..da5e7aefce5 100644
--- a/src/mesa/drivers/dri/r300/radeon_program.c
+++ b/src/mesa/drivers/dri/r300/radeon_program.c
@@ -46,29 +46,30 @@
* one instruction at a time.
*/
void radeonLocalTransform(
- GLcontext *ctx,
+ GLcontext *Ctx,
struct gl_program *program,
int num_transformations,
struct radeon_program_transformation* transformations)
{
- struct prog_instruction *source;
- int numinstructions;
+ struct radeon_transform_context ctx;
int ip;
- source = program->Instructions;
- numinstructions = program->NumInstructions;
+ ctx.Ctx = Ctx;
+ ctx.Program = program;
+ ctx.OldInstructions = program->Instructions;
+ ctx.OldNumInstructions = program->NumInstructions;
program->Instructions = 0;
program->NumInstructions = 0;
- for(ip = 0; ip < numinstructions; ++ip) {
- struct prog_instruction *instr = source + ip;
+ for(ip = 0; ip < ctx.OldNumInstructions; ++ip) {
+ struct prog_instruction *instr = ctx.OldInstructions + ip;
int i;
for(i = 0; i < num_transformations; ++i) {
struct radeon_program_transformation* t = transformations + i;
- if (t->function(ctx, program, instr, t->userData))
+ if (t->function(&ctx, instr, t->userData))
break;
}
@@ -78,7 +79,40 @@ void radeonLocalTransform(
}
}
- _mesa_free_instructions(source, numinstructions);
+ _mesa_free_instructions(ctx.OldInstructions, ctx.OldNumInstructions);
+}
+
+
+static void scan_instructions(GLboolean* used, const struct prog_instruction* insts, GLuint count)
+{
+ GLuint i;
+ for (i = 0; i < count; i++) {
+ const struct prog_instruction *inst = insts + i;
+ const GLuint n = _mesa_num_inst_src_regs(inst->Opcode);
+ GLuint k;
+
+ for (k = 0; k < n; k++) {
+ if (inst->SrcReg[k].File == PROGRAM_TEMPORARY)
+ used[inst->SrcReg[k].Index] = GL_TRUE;
+ }
+ }
+}
+
+GLint radeonFindFreeTemporary(struct radeon_transform_context *t)
+{
+ GLboolean used[MAX_PROGRAM_TEMPS];
+ GLuint i;
+
+ _mesa_memset(used, 0, sizeof(used));
+ scan_instructions(used, t->Program->Instructions, t->Program->NumInstructions);
+ scan_instructions(used, t->OldInstructions, t->OldNumInstructions);
+
+ for (i = 0; i < MAX_PROGRAM_TEMPS; i++) {
+ if (!used[i])
+ return i;
+ }
+
+ return -1;
}
diff --git a/src/mesa/drivers/dri/r300/radeon_program.h b/src/mesa/drivers/dri/r300/radeon_program.h
index 012104fa5ae..ba76bc47cfb 100644
--- a/src/mesa/drivers/dri/r300/radeon_program.h
+++ b/src/mesa/drivers/dri/r300/radeon_program.h
@@ -49,6 +49,19 @@ enum {
#define SWIZZLE_1111 MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE)
/**
+ * Transformation context that is passed to local transformations.
+ *
+ * Care must be taken with some operations during transformation,
+ * e.g. finding new temporary registers must use @ref radeonFindFreeTemporary
+ */
+struct radeon_transform_context {
+ GLcontext *Ctx;
+ struct gl_program *Program;
+ struct prog_instruction *OldInstructions;
+ GLuint OldNumInstructions;
+};
+
+/**
* A transformation that can be passed to \ref radeonLocalTransform.
*
* The function will be called once for each instruction.
@@ -60,8 +73,7 @@ enum {
*/
struct radeon_program_transformation {
GLboolean (*function)(
- GLcontext*,
- struct gl_program*,
+ struct radeon_transform_context*,
struct prog_instruction*,
void*);
void *userData;
@@ -73,6 +85,10 @@ void radeonLocalTransform(
int num_transformations,
struct radeon_program_transformation* transformations);
+/**
+ * Find a usable free temporary register during program transformation
+ */
+GLint radeonFindFreeTemporary(struct radeon_transform_context *ctx);
struct prog_instruction *radeonAppendInstructions(struct gl_program *program, int count);
diff --git a/src/mesa/drivers/dri/r300/radeon_program_alu.c b/src/mesa/drivers/dri/r300/radeon_program_alu.c
index 3104d07facd..d6d016d7c12 100644
--- a/src/mesa/drivers/dri/r300/radeon_program_alu.c
+++ b/src/mesa/drivers/dri/r300/radeon_program_alu.c
@@ -148,24 +148,24 @@ static struct prog_src_register scalar(struct prog_src_register reg)
return swizzle(reg, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X);
}
-static void transform_ABS(struct gl_program* p,
+static void transform_ABS(struct radeon_transform_context* t,
struct prog_instruction* inst)
{
struct prog_src_register src = inst->SrcReg[0];
src.Abs = 1;
src.NegateBase = 0;
src.NegateAbs = 0;
- emit1(p, OPCODE_MOV, inst->DstReg, src);
+ emit1(t->Program, OPCODE_MOV, inst->DstReg, src);
}
-static void transform_DPH(struct gl_program* p,
+static void transform_DPH(struct radeon_transform_context* t,
struct prog_instruction* inst)
{
struct prog_src_register src0 = inst->SrcReg[0];
if (src0.NegateAbs) {
if (src0.Abs) {
- int tempreg = _mesa_find_free_register(p, PROGRAM_TEMPORARY);
- emit1(p, OPCODE_MOV, dstreg(PROGRAM_TEMPORARY, tempreg), src0);
+ int tempreg = radeonFindFreeTemporary(t);
+ emit1(t->Program, OPCODE_MOV, dstreg(PROGRAM_TEMPORARY, tempreg), src0);
src0 = srcreg(src0.File, src0.Index);
} else {
src0.NegateAbs = 0;
@@ -174,70 +174,70 @@ static void transform_DPH(struct gl_program* p,
}
set_swizzle(&src0, 3, SWIZZLE_ONE);
set_negate_base(&src0, 3, 0);
- emit2(p, OPCODE_DP4, inst->DstReg, src0, inst->SrcReg[1]);
+ emit2(t->Program, OPCODE_DP4, inst->DstReg, src0, inst->SrcReg[1]);
}
-static void transform_FLR(struct gl_program* p,
+static void transform_FLR(struct radeon_transform_context* t,
struct prog_instruction* inst)
{
- int tempreg = _mesa_find_free_register(p, PROGRAM_TEMPORARY);
- emit1(p, OPCODE_FRC, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0]);
- emit2(p, OPCODE_ADD, inst->DstReg, inst->SrcReg[0], negate(srcreg(PROGRAM_TEMPORARY, tempreg)));
+ int tempreg = radeonFindFreeTemporary(t);
+ emit1(t->Program, OPCODE_FRC, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0]);
+ emit2(t->Program, OPCODE_ADD, inst->DstReg, inst->SrcReg[0], negate(srcreg(PROGRAM_TEMPORARY, tempreg)));
}
-static void transform_POW(struct gl_program* p,
+static void transform_POW(struct radeon_transform_context* t,
struct prog_instruction* inst)
{
- int tempreg = _mesa_find_free_register(p, PROGRAM_TEMPORARY);
+ int tempreg = radeonFindFreeTemporary(t);
struct prog_dst_register tempdst = dstreg(PROGRAM_TEMPORARY, tempreg);
struct prog_src_register tempsrc = srcreg(PROGRAM_TEMPORARY, tempreg);
tempdst.WriteMask = WRITEMASK_W;
tempsrc.Swizzle = SWIZZLE_WWWW;
- emit1(p, OPCODE_LG2, tempdst, scalar(inst->SrcReg[0]));
- emit2(p, OPCODE_MUL, tempdst, tempsrc, scalar(inst->SrcReg[1]));
- emit1(p, OPCODE_EX2, inst->DstReg, tempsrc);
+ emit1(t->Program, OPCODE_LG2, tempdst, scalar(inst->SrcReg[0]));
+ emit2(t->Program, OPCODE_MUL, tempdst, tempsrc, scalar(inst->SrcReg[1]));
+ emit1(t->Program, OPCODE_EX2, inst->DstReg, tempsrc);
}
-static void transform_SGE(struct gl_program* p,
+static void transform_SGE(struct radeon_transform_context* t,
struct prog_instruction* inst)
{
- int tempreg = _mesa_find_free_register(p, PROGRAM_TEMPORARY);
+ int tempreg = radeonFindFreeTemporary(t);
- emit2(p, OPCODE_ADD, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0], negate(inst->SrcReg[1]));
- emit3(p, OPCODE_CMP, inst->DstReg, srcreg(PROGRAM_TEMPORARY, tempreg), builtin_zero, builtin_one);
+ emit2(t->Program, OPCODE_ADD, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0], negate(inst->SrcReg[1]));
+ emit3(t->Program, OPCODE_CMP, inst->DstReg, srcreg(PROGRAM_TEMPORARY, tempreg), builtin_zero, builtin_one);
}
-static void transform_SLT(struct gl_program* p,
+static void transform_SLT(struct radeon_transform_context* t,
struct prog_instruction* inst)
{
- int tempreg = _mesa_find_free_register(p, PROGRAM_TEMPORARY);
+ int tempreg = radeonFindFreeTemporary(t);
- emit2(p, OPCODE_ADD, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0], negate(inst->SrcReg[1]));
- emit3(p, OPCODE_CMP, inst->DstReg, srcreg(PROGRAM_TEMPORARY, tempreg), builtin_one, builtin_zero);
+ emit2(t->Program, OPCODE_ADD, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0], negate(inst->SrcReg[1]));
+ emit3(t->Program, OPCODE_CMP, inst->DstReg, srcreg(PROGRAM_TEMPORARY, tempreg), builtin_one, builtin_zero);
}
-static void transform_SUB(struct gl_program* p,
+static void transform_SUB(struct radeon_transform_context* t,
struct prog_instruction* inst)
{
- emit2(p, OPCODE_ADD, inst->DstReg, inst->SrcReg[0], negate(inst->SrcReg[1]));
+ emit2(t->Program, OPCODE_ADD, inst->DstReg, inst->SrcReg[0], negate(inst->SrcReg[1]));
}
-static void transform_SWZ(struct gl_program* p,
+static void transform_SWZ(struct radeon_transform_context* t,
struct prog_instruction* inst)
{
- emit1(p, OPCODE_MOV, inst->DstReg, inst->SrcReg[0]);
+ emit1(t->Program, OPCODE_MOV, inst->DstReg, inst->SrcReg[0]);
}
-static void transform_XPD(struct gl_program* p,
+static void transform_XPD(struct radeon_transform_context* t,
struct prog_instruction* inst)
{
- int tempreg = _mesa_find_free_register(p, PROGRAM_TEMPORARY);
+ int tempreg = radeonFindFreeTemporary(t);
- emit2(p, OPCODE_MUL, dstreg(PROGRAM_TEMPORARY, tempreg),
+ emit2(t->Program, OPCODE_MUL, dstreg(PROGRAM_TEMPORARY, tempreg),
swizzle(inst->SrcReg[0], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W),
swizzle(inst->SrcReg[1], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W));
- emit3(p, OPCODE_MAD, inst->DstReg,
+ emit3(t->Program, OPCODE_MAD, inst->DstReg,
swizzle(inst->SrcReg[0], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W),
swizzle(inst->SrcReg[1], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W),
negate(srcreg(PROGRAM_TEMPORARY, tempreg)));
@@ -257,22 +257,20 @@ static void transform_XPD(struct gl_program* p,
*
* @todo add LIT here as well?
*/
-GLboolean radeonTransformALU(
- GLcontext* ctx,
- struct gl_program* prog,
+GLboolean radeonTransformALU(struct radeon_transform_context* t,
struct prog_instruction* inst,
void* unused)
{
switch(inst->Opcode) {
- case OPCODE_ABS: transform_ABS(prog, inst); return GL_TRUE;
- case OPCODE_DPH: transform_DPH(prog, inst); return GL_TRUE;
- case OPCODE_FLR: transform_FLR(prog, inst); return GL_TRUE;
- case OPCODE_POW: transform_POW(prog, inst); return GL_TRUE;
- case OPCODE_SGE: transform_SGE(prog, inst); return GL_TRUE;
- case OPCODE_SLT: transform_SLT(prog, inst); return GL_TRUE;
- case OPCODE_SUB: transform_SUB(prog, inst); return GL_TRUE;
- case OPCODE_SWZ: transform_SWZ(prog, inst); return GL_TRUE;
- case OPCODE_XPD: transform_XPD(prog, inst); return GL_TRUE;
+ case OPCODE_ABS: transform_ABS(t, inst); return GL_TRUE;
+ case OPCODE_DPH: transform_DPH(t, inst); return GL_TRUE;
+ case OPCODE_FLR: transform_FLR(t, inst); return GL_TRUE;
+ case OPCODE_POW: transform_POW(t, inst); return GL_TRUE;
+ case OPCODE_SGE: transform_SGE(t, inst); return GL_TRUE;
+ case OPCODE_SLT: transform_SLT(t, inst); return GL_TRUE;
+ case OPCODE_SUB: transform_SUB(t, inst); return GL_TRUE;
+ case OPCODE_SWZ: transform_SWZ(t, inst); return GL_TRUE;
+ case OPCODE_XPD: transform_XPD(t, inst); return GL_TRUE;
default:
return GL_FALSE;
}
diff --git a/src/mesa/drivers/dri/r300/radeon_program_alu.h b/src/mesa/drivers/dri/r300/radeon_program_alu.h
index f5beb9f8c30..858c5ed0b8c 100644
--- a/src/mesa/drivers/dri/r300/radeon_program_alu.h
+++ b/src/mesa/drivers/dri/r300/radeon_program_alu.h
@@ -31,8 +31,7 @@
#include "radeon_program.h"
GLboolean radeonTransformALU(
- GLcontext*,
- struct gl_program*,
+ struct radeon_transform_context *t,
struct prog_instruction*,
void*);