summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Stellard <tstellar@gmail.com>2010-07-13 21:25:27 -0700
committerTom Stellard <tstellar@gmail.com>2010-09-10 18:18:08 -0700
commit63432ecfce5415fbf07f1781ec77b5ea3efff599 (patch)
tree903818a193704d2053ce79cf0c8a2d43c3cbaa88
parentd8a36620089e72d431ae853ec168f193f3376782 (diff)
r300/compiler: Enable presubtract sources
The r300 compiler can now emit instructions that select from the presubtract source. A peephole optimization has been added to convert instructions like: ADD Temp[0].x, none.1, -Temp[1].x into the INV (1 - src0) presubtract operation.
-rw-r--r--src/gallium/drivers/r300/r300_fs.c1
-rw-r--r--src/gallium/drivers/r300/r300_vs.c1
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r300_fragprog.c76
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c49
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c33
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c37
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_compiler.h1
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c128
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_optimize.c245
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c158
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c53
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program.h11
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h36
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c185
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h33
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_print.c87
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.c34
17 files changed, 1074 insertions, 94 deletions
diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c
index b9f4d77dea7..b8dab88ef09 100644
--- a/src/gallium/drivers/r300/r300_fs.c
+++ b/src/gallium/drivers/r300/r300_fs.c
@@ -387,6 +387,7 @@ static void r300_translate_fragment_shader(
compiler.Base.is_r500 = r300->screen->caps.is_r500;
compiler.Base.disable_optimizations = DBG_ON(r300, DBG_NO_OPT);
compiler.Base.has_half_swizzles = TRUE;
+ compiler.Base.has_presub = TRUE;
compiler.Base.max_temp_regs = compiler.Base.is_r500 ? 128 : 32;
compiler.Base.max_constants = compiler.Base.is_r500 ? 256 : 32;
compiler.Base.max_alu_insts = compiler.Base.is_r500 ? 512 : 64;
diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c
index 22c94adbda0..e2b9af9d018 100644
--- a/src/gallium/drivers/r300/r300_vs.c
+++ b/src/gallium/drivers/r300/r300_vs.c
@@ -208,6 +208,7 @@ void r300_translate_vertex_shader(struct r300_context *r300,
compiler.Base.is_r500 = r300->screen->caps.is_r500;
compiler.Base.disable_optimizations = DBG_ON(r300, DBG_NO_OPT);
compiler.Base.has_half_swizzles = FALSE;
+ compiler.Base.has_presub = FALSE;
compiler.Base.max_temp_regs = 32;
compiler.Base.max_constants = 256;
compiler.Base.max_alu_insts = r300->screen->caps.is_r500 ? 1024 : 256;
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c
index 3c83deffcb5..782671bac01 100644
--- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c
@@ -31,6 +31,24 @@
#include "../r300_reg.h"
+static void presub_string(char out[10], unsigned int inst)
+{
+ switch(inst & 0x600000){
+ case R300_ALU_SRCP_1_MINUS_2_SRC0:
+ sprintf(out, "bias");
+ break;
+ case R300_ALU_SRCP_SRC1_MINUS_SRC0:
+ sprintf(out, "sub");
+ break;
+ case R300_ALU_SRCP_SRC1_PLUS_SRC0:
+ sprintf(out, "add");
+ break;
+ case R300_ALU_SRCP_1_MINUS_SRC0:
+ sprintf(out, "inv ");
+ break;
+ }
+}
+
/* just some random things... */
void r300FragmentProgramDump(struct radeon_compiler *c, void *user)
{
@@ -98,8 +116,8 @@ void r300FragmentProgramDump(struct radeon_compiler *c, void *user)
for (i = alu_offset;
i <= alu_offset + alu_end; ++i) {
- char srcc[3][10], dstc[20];
- char srca[3][10], dsta[20];
+ char srcc[4][10], dstc[20];
+ char srca[4][10], dsta[20];
char argc[3][20];
char arga[3][20];
char flags[5], tmp[10];
@@ -142,6 +160,9 @@ void r300FragmentProgramDump(struct radeon_compiler *c, void *user)
flags);
strcat(dstc, tmp);
}
+ /* Presub */
+ presub_string(srcc[3], code->alu.inst[i].rgb_inst);
+ presub_string(srca[3], code->alu.inst[i].alpha_inst);
dsta[0] = 0;
if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_REG) {
@@ -160,11 +181,12 @@ void r300FragmentProgramDump(struct radeon_compiler *c, void *user)
}
fprintf(stderr,
- "%3i: xyz: %3s %3s %3s -> %-20s (%08x)\n"
- " w: %3s %3s %3s -> %-20s (%08x)\n", i,
- srcc[0], srcc[1], srcc[2], dstc,
+ "%3i: xyz: %3s %3s %3s %5s-> %-20s (%08x)\n"
+ " w: %3s %3s %3s %5s-> %-20s (%08x)\n", i,
+ srcc[0], srcc[1], srcc[2], srcc[3], dstc,
code->alu.inst[i].rgb_addr, srca[0], srca[1],
- srca[2], dsta, code->alu.inst[i].alpha_addr);
+ srca[2], srca[3], dsta,
+ code->alu.inst[i].alpha_addr);
for (j = 0; j < 3; ++j) {
int regc = code->alu.inst[i].rgb_inst >> (j * 7);
@@ -194,6 +216,24 @@ void r300FragmentProgramDump(struct radeon_compiler *c, void *user)
}
} else if (d < 15) {
sprintf(buf, "%s.www", srca[d - 12]);
+ } else if (d < 20 ) {
+ switch(d) {
+ case R300_ALU_ARGC_SRCP_XYZ:
+ sprintf(buf, "srcp.xyz");
+ break;
+ case R300_ALU_ARGC_SRCP_XXX:
+ sprintf(buf, "srcp.xxx");
+ break;
+ case R300_ALU_ARGC_SRCP_YYY:
+ sprintf(buf, "srcp.yyy");
+ break;
+ case R300_ALU_ARGC_SRCP_ZZZ:
+ sprintf(buf, "srcp.zzz");
+ break;
+ case R300_ALU_ARGC_SRCP_WWW:
+ sprintf(buf, "srcp.www");
+ break;
+ }
} else if (d == 20) {
sprintf(buf, "0.0");
} else if (d == 21) {
@@ -231,6 +271,21 @@ void r300FragmentProgramDump(struct radeon_compiler *c, void *user)
'x' + (char)(d % 3));
} else if (d < 12) {
sprintf(buf, "%s.w", srca[d - 9]);
+ } else if (d < 16) {
+ switch(d) {
+ case R300_ALU_ARGA_SRCP_X:
+ sprintf(buf, "srcp.x");
+ break;
+ case R300_ALU_ARGA_SRCP_Y:
+ sprintf(buf, "srcp.y");
+ break;
+ case R300_ALU_ARGA_SRCP_Z:
+ sprintf(buf, "srcp.z");
+ break;
+ case R300_ALU_ARGA_SRCP_W:
+ sprintf(buf, "srcp.w");
+ break;
+ }
} else if (d == 16) {
sprintf(buf, "0.0");
} else if (d == 17) {
@@ -247,11 +302,14 @@ void r300FragmentProgramDump(struct radeon_compiler *c, void *user)
buf, (rega & 64) ? "|" : "");
}
- fprintf(stderr, " xyz: %8s %8s %8s op: %08x\n"
+ fprintf(stderr, " xyz: %8s %8s %8s op: %08x %s\n"
" w: %8s %8s %8s op: %08x\n",
argc[0], argc[1], argc[2],
- code->alu.inst[i].rgb_inst, arga[0], arga[1],
- arga[2], code->alu.inst[i].alpha_inst);
+ code->alu.inst[i].rgb_inst,
+ code->alu.inst[i].rgb_inst & R300_ALU_INSERT_NOP ?
+ "NOP" : "",
+ arga[0], arga[1],arga[2],
+ code->alu.inst[i].alpha_inst);
}
}
}
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
index eead2ea4260..3b2b06fc2bd 100644
--- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
@@ -164,6 +164,53 @@ static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* i
code->alu.inst[ip].alpha_inst |= arg << (7*j);
}
+ /* Presubtract */
+ if (inst->RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
+ switch(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index) {
+ case RC_PRESUB_BIAS:
+ code->alu.inst[ip].rgb_inst |=
+ R300_ALU_SRCP_1_MINUS_2_SRC0;
+ break;
+ case RC_PRESUB_ADD:
+ code->alu.inst[ip].rgb_inst |=
+ R300_ALU_SRCP_SRC1_PLUS_SRC0;
+ break;
+ case RC_PRESUB_SUB:
+ code->alu.inst[ip].rgb_inst |=
+ R300_ALU_SRCP_SRC1_MINUS_SRC0;
+ break;
+ case RC_PRESUB_INV:
+ code->alu.inst[ip].rgb_inst |=
+ R300_ALU_SRCP_1_MINUS_SRC0;
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) {
+ switch(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) {
+ case RC_PRESUB_BIAS:
+ code->alu.inst[ip].alpha_inst |=
+ R300_ALU_SRCP_1_MINUS_2_SRC0;
+ break;
+ case RC_PRESUB_ADD:
+ code->alu.inst[ip].alpha_inst |=
+ R300_ALU_SRCP_SRC1_PLUS_SRC0;
+ break;
+ case RC_PRESUB_SUB:
+ code->alu.inst[ip].alpha_inst |=
+ R300_ALU_SRCP_SRC1_MINUS_SRC0;
+ break;
+ case RC_PRESUB_INV:
+ code->alu.inst[ip].alpha_inst |=
+ R300_ALU_SRCP_1_MINUS_SRC0;
+ break;
+ default:
+ break;
+ }
+ }
+
if (inst->RGB.Saturate)
code->alu.inst[ip].rgb_inst |= R300_ALU_OUTC_CLAMP;
if (inst->Alpha.Saturate)
@@ -198,6 +245,8 @@ static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* i
emit->node_flags |= R300_W_OUT;
c->code->writes_depth = 1;
}
+ if (inst->Nop)
+ code->alu.inst[ip].rgb_inst |= R300_ALU_INSERT_NOP;
return 1;
}
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c
index 5d5de2f1b2a..caa48fe478c 100644
--- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c
@@ -44,25 +44,25 @@ struct swizzle_data {
unsigned int hash; /**< swizzle value this matches */
unsigned int base; /**< base value for hw swizzle */
unsigned int stride; /**< difference in base between arg0/1/2 */
+ unsigned int srcp_stride; /**< difference in base between arg0/scrp */
};
static const struct swizzle_data native_swizzles[] = {
- {MAKE_SWZ3(X, Y, Z), R300_ALU_ARGC_SRC0C_XYZ, 4},
- {MAKE_SWZ3(X, X, X), R300_ALU_ARGC_SRC0C_XXX, 4},
- {MAKE_SWZ3(Y, Y, Y), R300_ALU_ARGC_SRC0C_YYY, 4},
- {MAKE_SWZ3(Z, Z, Z), R300_ALU_ARGC_SRC0C_ZZZ, 4},
- {MAKE_SWZ3(W, W, W), R300_ALU_ARGC_SRC0A, 1},
- {MAKE_SWZ3(Y, Z, X), R300_ALU_ARGC_SRC0C_YZX, 1},
- {MAKE_SWZ3(Z, X, Y), R300_ALU_ARGC_SRC0C_ZXY, 1},
- {MAKE_SWZ3(W, Z, Y), R300_ALU_ARGC_SRC0CA_WZY, 1},
- {MAKE_SWZ3(ONE, ONE, ONE), R300_ALU_ARGC_ONE, 0},
- {MAKE_SWZ3(ZERO, ZERO, ZERO), R300_ALU_ARGC_ZERO, 0},
- {MAKE_SWZ3(HALF, HALF, HALF), R300_ALU_ARGC_HALF, 0}
+ {MAKE_SWZ3(X, Y, Z), R300_ALU_ARGC_SRC0C_XYZ, 4, 15},
+ {MAKE_SWZ3(X, X, X), R300_ALU_ARGC_SRC0C_XXX, 4, 15},
+ {MAKE_SWZ3(Y, Y, Y), R300_ALU_ARGC_SRC0C_YYY, 4, 15},
+ {MAKE_SWZ3(Z, Z, Z), R300_ALU_ARGC_SRC0C_ZZZ, 4, 15},
+ {MAKE_SWZ3(W, W, W), R300_ALU_ARGC_SRC0A, 1, 7},
+ {MAKE_SWZ3(Y, Z, X), R300_ALU_ARGC_SRC0C_YZX, 1, 0},
+ {MAKE_SWZ3(Z, X, Y), R300_ALU_ARGC_SRC0C_ZXY, 1, 0},
+ {MAKE_SWZ3(W, Z, Y), R300_ALU_ARGC_SRC0CA_WZY, 1, 0},
+ {MAKE_SWZ3(ONE, ONE, ONE), R300_ALU_ARGC_ONE, 0, 0},
+ {MAKE_SWZ3(ZERO, ZERO, ZERO), R300_ALU_ARGC_ZERO, 0, 0},
+ {MAKE_SWZ3(HALF, HALF, HALF), R300_ALU_ARGC_HALF, 0, 0}
};
static const int num_native_swizzles = sizeof(native_swizzles)/sizeof(native_swizzles[0]);
-
/**
* Find a native RGB swizzle that matches the given swizzle.
* Returns 0 if none found.
@@ -205,7 +205,11 @@ unsigned int r300FPTranslateRGBSwizzle(unsigned int src, unsigned int swizzle)
return 0;
}
- return sd->base + src*sd->stride;
+ if (src == RC_PAIR_PRESUB_SRC) {
+ return sd->base + sd->srcp_stride;
+ } else {
+ return sd->base + src*sd->stride;
+ }
}
@@ -215,6 +219,9 @@ unsigned int r300FPTranslateRGBSwizzle(unsigned int src, unsigned int swizzle)
*/
unsigned int r300FPTranslateAlphaSwizzle(unsigned int src, unsigned int swizzle)
{
+ if (src == RC_PAIR_PRESUB_SRC) {
+ return R300_ALU_ARGA_SRCP_X + swizzle;
+ }
if (swizzle < 3)
return swizzle + 3*src;
diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
index 3220349f26c..54cff9169a4 100644
--- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
@@ -260,6 +260,9 @@ static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair
code->inst[ip].inst0 |= (inst->RGB.WriteMask << 11) | (inst->Alpha.WriteMask << 14);
code->inst[ip].inst0 |= (inst->RGB.OutputWriteMask << 15) | (inst->Alpha.OutputWriteMask << 18);
+ if (inst->Nop) {
+ code->inst[ip].inst0 |= R500_INST_NOP;
+ }
if (inst->Alpha.DepthWriteMask) {
code->inst[ip].inst4 |= R500_ALPHA_W_OMASK;
c->code->writes_depth = 1;
@@ -275,6 +278,40 @@ static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair
if (inst->Alpha.Saturate)
code->inst[ip].inst0 |= R500_INST_ALPHA_CLAMP;
+ /* Set the presubtract operation. */
+ switch(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index) {
+ case RC_PRESUB_BIAS:
+ code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_2RGB0;
+ break;
+ case RC_PRESUB_SUB:
+ code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_MINUS_RGB0;
+ break;
+ case RC_PRESUB_ADD:
+ code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_PLUS_RGB0;
+ break;
+ case RC_PRESUB_INV:
+ code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_RGB0;
+ break;
+ default:
+ break;
+ }
+ switch(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) {
+ case RC_PRESUB_BIAS:
+ code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_2A0;
+ break;
+ case RC_PRESUB_SUB:
+ code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_MINUS_A0;
+ break;
+ case RC_PRESUB_ADD:
+ code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_PLUS_A0;
+ break;
+ case RC_PRESUB_INV:
+ code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_A0;
+ break;
+ default:
+ break;
+ }
+
code->inst[ip].inst1 |= R500_RGB_ADDR0(use_source(code, inst->RGB.Src[0]));
code->inst[ip].inst1 |= R500_RGB_ADDR1(use_source(code, inst->RGB.Src[1]));
code->inst[ip].inst1 |= R500_RGB_ADDR2(use_source(code, inst->RGB.Src[2]));
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
index 85c2e43ad67..6d96ac9fdd9 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
@@ -42,6 +42,7 @@ struct radeon_compiler {
/* Hardware specification. */
unsigned is_r500:1;
unsigned has_half_swizzles:1;
+ unsigned has_presub:1;
unsigned disable_optimizations:1;
unsigned max_temp_regs;
unsigned max_constants;
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c
index 0e6c62541fa..e73700f84a2 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c
@@ -29,6 +29,25 @@
#include "radeon_program.h"
+static void reads_normal_callback(
+ rc_read_write_chan_fn cb,
+ struct rc_instruction * fullinst,
+ struct rc_src_register src,
+ void * userdata)
+{
+ unsigned int refmask = 0;
+ unsigned int chan;
+ for(chan = 0; chan < 4; chan++) {
+ refmask |= 1 << GET_SWZ(src.Swizzle, chan);
+ }
+ refmask &= RC_MASK_XYZW;
+
+ if (refmask)
+ cb(userdata, fullinst, src.File, src.Index, refmask);
+
+ if (refmask && src.RelAddr)
+ cb(userdata, fullinst, RC_FILE_ADDRESS, 0, RC_MASK_X);
+}
static void reads_normal(struct rc_instruction * fullinst, rc_read_write_chan_fn cb, void * userdata)
{
@@ -36,47 +55,81 @@ static void reads_normal(struct rc_instruction * fullinst, rc_read_write_chan_fn
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);
for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
- unsigned int refmask = 0;
if (inst->SrcReg[src].File == RC_FILE_NONE)
return;
- for(unsigned int chan = 0; chan < 4; ++chan)
- refmask |= 1 << GET_SWZ(inst->SrcReg[src].Swizzle, chan);
-
- refmask &= RC_MASK_XYZW;
-
- if (refmask)
- cb(userdata, fullinst, inst->SrcReg[src].File, inst->SrcReg[src].Index, refmask);
-
- if (refmask && inst->SrcReg[src].RelAddr)
- cb(userdata, fullinst, RC_FILE_ADDRESS, 0, RC_MASK_X);
+ if (inst->SrcReg[src].File == RC_FILE_PRESUB) {
+ unsigned int i;
+ unsigned int srcp_regs = rc_presubtract_src_reg_count(
+ inst->PreSub.Opcode);
+ for( i = 0; i < srcp_regs; i++) {
+ reads_normal_callback(cb, fullinst,
+ inst->PreSub.SrcReg[i],
+ userdata);
+ }
+ } else {
+ reads_normal_callback(cb, fullinst,
+ inst->SrcReg[src], userdata);
+ }
}
}
-static void reads_pair(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata)
+static void pair_get_src_refmasks(unsigned int * refmasks,
+ struct rc_pair_instruction * inst,
+ unsigned int swz, unsigned int src)
{
- struct rc_pair_instruction * inst = &fullinst->U.P;
- unsigned int refmasks[3] = { 0, 0, 0 };
-
- if (inst->RGB.Opcode != RC_OPCODE_NOP) {
- const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->RGB.Opcode);
+ if (swz >= 4)
+ return;
+
+ if (swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y || swz == RC_SWIZZLE_Z) {
+ if(src == RC_PAIR_PRESUB_SRC) {
+ unsigned int i;
+ int srcp_regs =
+ rc_presubtract_src_reg_count(
+ inst->RGB.Src[src].Index);
+ for(i = 0; i < srcp_regs; i++) {
+ refmasks[i] |= 1 << swz;
+ }
+ }
+ else {
+ refmasks[src] |= 1 << swz;
+ }
+ }
- for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
- for(unsigned int chan = 0; chan < 3; ++chan) {
- unsigned int swz = GET_SWZ(inst->RGB.Arg[arg].Swizzle, chan);
- if (swz < 4)
- refmasks[inst->RGB.Arg[arg].Source] |= 1 << swz;
+ if (swz == RC_SWIZZLE_W) {
+ if (src == RC_PAIR_PRESUB_SRC) {
+ unsigned int i;
+ int srcp_regs = rc_presubtract_src_reg_count(
+ inst->Alpha.Src[src].Index);
+ for(i = 0; i < srcp_regs; i++) {
+ refmasks[i] |= 1 << swz;
}
}
+ else {
+ refmasks[src] |= 1 << swz;
+ }
}
+}
- if (inst->Alpha.Opcode != RC_OPCODE_NOP) {
- const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Alpha.Opcode);
+static void reads_pair(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata)
+{
+ struct rc_pair_instruction * inst = &fullinst->U.P;
+ unsigned int refmasks[3] = { 0, 0, 0 };
- for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
- if (inst->Alpha.Arg[arg].Swizzle < 4)
- refmasks[inst->Alpha.Arg[arg].Source] |= 1 << inst->Alpha.Arg[arg].Swizzle;
+ unsigned int arg;
+
+ for(arg = 0; arg < 3; ++arg) {
+ unsigned int chan;
+ for(chan = 0; chan < 3; ++chan) {
+ unsigned int swz_rgb =
+ GET_SWZ(inst->RGB.Arg[arg].Swizzle, chan);
+ unsigned int swz_alpha =
+ GET_SWZ(inst->Alpha.Arg[arg].Swizzle, chan);
+ pair_get_src_refmasks(refmasks, inst, swz_rgb,
+ inst->RGB.Arg[arg].Source);
+ pair_get_src_refmasks(refmasks, inst, swz_alpha,
+ inst->Alpha.Arg[arg].Source);
}
}
@@ -212,10 +265,25 @@ static void remap_normal_instruction(struct rc_instruction * fullinst,
rc_register_file file = inst->SrcReg[src].File;
unsigned int index = inst->SrcReg[src].Index;
- cb(userdata, fullinst, &file, &index);
+ if (file == RC_FILE_PRESUB) {
+ unsigned int i;
+ unsigned int srcp_srcs = rc_presubtract_src_reg_count(
+ inst->PreSub.Opcode);
+ for(i = 0; i < srcp_srcs; i++) {
+ file = inst->PreSub.SrcReg[i].File;
+ index = inst->PreSub.SrcReg[i].Index;
+ cb(userdata, fullinst, &file, &index);
+ inst->PreSub.SrcReg[i].File = file;
+ inst->PreSub.SrcReg[i].Index = index;
+ }
- inst->SrcReg[src].File = file;
- inst->SrcReg[src].Index = index;
+ }
+ else {
+ cb(userdata, fullinst, &file, &index);
+
+ inst->SrcReg[src].File = file;
+ inst->SrcReg[src].Index = index;
+ }
}
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
index 8e994671064..e01ba85aa56 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
@@ -30,6 +30,13 @@
#include "radeon_compiler.h"
#include "radeon_swizzle.h"
+struct peephole_state {
+ struct rc_instruction * Inst;
+ /** Stores a bitmask of the components that are still "alive" (i.e.
+ * they have not been written to since Inst was executed.)
+ */
+ unsigned int WriteMask;
+};
static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner)
{
@@ -54,7 +61,7 @@ static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct
return combine;
}
-struct peephole_state {
+struct copy_propagate_state {
struct radeon_compiler * C;
struct rc_instruction * Mov;
unsigned int Conflict:1;
@@ -84,10 +91,10 @@ struct peephole_state {
* @param index The index of the source register.
* @param mask The components of the source register that are being read from.
*/
-static void peephole_scan_read(void * data, struct rc_instruction * inst,
+static void copy_propagate_scan_read(void * data, struct rc_instruction * inst,
rc_register_file file, unsigned int index, unsigned int mask)
{
- struct peephole_state * s = data;
+ struct copy_propagate_state * s = data;
/* XXX This could probably be handled better. */
if (file == RC_FILE_ADDRESS) {
@@ -123,10 +130,10 @@ static void peephole_scan_read(void * data, struct rc_instruction * inst,
}
}
-static void peephole_scan_write(void * data, struct rc_instruction * inst,
+static void copy_propagate_scan_write(void * data, struct rc_instruction * inst,
rc_register_file file, unsigned int index, unsigned int mask)
{
- struct peephole_state * s = data;
+ struct copy_propagate_state * s = data;
if (s->BranchDepth < 0)
return;
@@ -146,9 +153,9 @@ static void peephole_scan_write(void * data, struct rc_instruction * inst,
}
}
-static void peephole(struct radeon_compiler * c, struct rc_instruction * inst_mov)
+static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * inst_mov)
{
- struct peephole_state s;
+ struct copy_propagate_state s;
if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY ||
inst_mov->U.I.DstReg.RelAddr ||
@@ -170,14 +177,23 @@ static void peephole(struct radeon_compiler * c, struct rc_instruction * inst_mo
for(struct rc_instruction * inst = inst_mov->Next;
inst != &c->Program.Instructions;
inst = inst->Next) {
+ const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode);
/* XXX In the future we might be able to make the optimizer
* smart enough to handle loops. */
if(inst->U.I.Opcode == RC_OPCODE_BGNLOOP
|| inst->U.I.Opcode == RC_OPCODE_ENDLOOP){
return;
}
- rc_for_all_reads_mask(inst, peephole_scan_read, &s);
- rc_for_all_writes_mask(inst, peephole_scan_write, &s);
+
+ /* It is possible to do copy propigation in this situation,
+ * just not right now, see peephole_add_presub_inv() */
+ if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE &&
+ info->NumSrcRegs > 2) {
+ return;
+ }
+
+ rc_for_all_reads_mask(inst, copy_propagate_scan_read, &s);
+ rc_for_all_writes_mask(inst, copy_propagate_scan_write, &s);
if (s.Conflict)
return;
@@ -206,7 +222,6 @@ static void peephole(struct radeon_compiler * c, struct rc_instruction * inst_mo
inst != &c->Program.Instructions;
inst = inst->Next) {
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
-
for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
if (inst->U.I.SrcReg[src].File == RC_FILE_TEMPORARY &&
inst->U.I.SrcReg[src].Index == s.Mov->U.I.DstReg.Index) {
@@ -217,8 +232,11 @@ static void peephole(struct radeon_compiler * c, struct rc_instruction * inst_mo
refmask |= (1 << swz) & RC_MASK_XYZW;
}
- if ((refmask & s.MovMask) == refmask)
+ if ((refmask & s.MovMask) == refmask) {
inst->U.I.SrcReg[src] = chain_srcregs(inst->U.I.SrcReg[src], s.Mov->U.I.SrcReg[0]);
+ if (s.Mov->U.I.SrcReg[0].File == RC_FILE_PRESUB)
+ inst->U.I.PreSub = s.Mov->U.I.PreSub;
+ }
}
}
@@ -283,7 +301,6 @@ static int is_src_uniform_constant(struct rc_src_register src,
return 1;
}
-
static void constant_folding_mad(struct rc_instruction * inst)
{
rc_swizzle swz;
@@ -379,7 +396,6 @@ static void constant_folding_add(struct rc_instruction * inst)
}
}
-
/**
* Replace 0.0, 1.0 and 0.5 immediate constants by their
* respective swizzles. Simplify instructions like ADD dst, src, 0;
@@ -454,6 +470,204 @@ static void constant_folding(struct radeon_compiler * c, struct rc_instruction *
constant_folding_add(inst);
}
+/**
+ * This function returns a writemask that indicates wich components are
+ * read by src and also written by dst.
+ */
+static unsigned int src_reads_dst_mask(struct rc_src_register src,
+ struct rc_dst_register dst)
+{
+ unsigned int mask = 0;
+ unsigned int i;
+ if (dst.File != src.File || dst.Index != src.Index) {
+ return 0;
+ }
+
+ for(i = 0; i < 4; i++) {
+ mask |= 1 << GET_SWZ(src.Swizzle, i);
+ }
+ mask &= RC_MASK_XYZW;
+
+ return mask;
+}
+
+/* Return 1 if the source registers has a constant swizzle (e.g. 0, 0.5, 1.0)
+ * in any of its channels. Return 0 otherwise. */
+static int src_has_const_swz(struct rc_src_register src) {
+ int chan;
+ for(chan = 0; chan < 4; chan++) {
+ unsigned int swz = GET_SWZ(src.Swizzle, chan);
+ if (swz == RC_SWIZZLE_ZERO || swz == RC_SWIZZLE_HALF
+ || swz == RC_SWIZZLE_ONE) {
+ return 1;
+ }
+ }
+ return 0;
+}
+
+static void peephole_scan_write(void * data, struct rc_instruction * inst,
+ rc_register_file file, unsigned int index, unsigned int mask)
+{
+ struct peephole_state * s = data;
+ if(s->Inst->U.I.DstReg.File == file
+ && s->Inst->U.I.DstReg.Index == index) {
+ unsigned int common_mask = s->WriteMask & mask;
+ s->WriteMask &= ~common_mask;
+ }
+}
+
+/**
+ * PRESUB_INV: ADD TEMP[0], none.1, -TEMP[1]
+ * Use the presubtract 1 - src0 for all readers of TEMP[0]. The first source
+ * of the add instruction must have the constatnt 1 swizzle. This function
+ * does not check const registers to see if their value is 1.0, so it should
+ * be called after the constant_folding optimization.
+ * @return
+ * 0 if the ADD instruction is still part of the program.
+ * 1 if the ADD instruction is no longer part of the program.
+ */
+static int peephole_add_presub_inv(
+ struct radeon_compiler * c,
+ struct rc_instruction * inst_add)
+{
+ unsigned int i, swz, mask;
+ unsigned int can_remove = 0;
+ unsigned int cant_sub = 0;
+ struct rc_instruction * inst;
+ struct peephole_state s;
+
+ if (inst_add->U.I.SaturateMode)
+ return 0;
+
+ mask = inst_add->U.I.DstReg.WriteMask;
+
+ /* Check if src0 is 1. */
+ /* XXX It would be nice to use is_src_uniform_constant here, but that
+ * function only works if the register's file is RC_FILE_NONE */
+ for(i = 0; i < 4; i++ ) {
+ swz = GET_SWZ(inst_add->U.I.SrcReg[0].Swizzle, i);
+ if(((1 << i) & inst_add->U.I.DstReg.WriteMask)
+ && swz != RC_SWIZZLE_ONE) {
+ return 0;
+ }
+ }
+
+ /* Check src1. */
+ if ((inst_add->U.I.SrcReg[1].Negate & inst_add->U.I.DstReg.WriteMask) !=
+ inst_add->U.I.DstReg.WriteMask
+ || inst_add->U.I.SrcReg[1].Abs
+ || (inst_add->U.I.SrcReg[1].File != RC_FILE_TEMPORARY
+ && inst_add->U.I.SrcReg[1].File != RC_FILE_CONSTANT)
+ || src_has_const_swz(inst_add->U.I.SrcReg[1])) {
+
+ return 0;
+ }
+
+ /* Setup the peephole_state information. */
+ s.Inst = inst_add;
+ s.WriteMask = inst_add->U.I.DstReg.WriteMask;
+
+ /* For all instructions that read inst_add->U.I.DstReg before it is
+ * written again, use the 1 - src0 presubtact instead. */
+ for(inst = inst_add->Next; inst != &c->Program.Instructions;
+ inst = inst->Next) {
+ const struct rc_opcode_info * info =
+ rc_get_opcode_info(inst->U.I.Opcode);
+
+ for(i = 0; i < info->NumSrcRegs; i++) {
+ if(inst_add->U.I.DstReg.WriteMask !=
+ src_reads_dst_mask(inst->U.I.SrcReg[i],
+ inst_add->U.I.DstReg)) {
+ continue;
+ }
+ if (cant_sub) {
+ can_remove = 0;
+ break;
+ }
+ /* XXX: There are some situations where instructions
+ * with more than 2 src registers can use the
+ * presubtract select, but to keep things simple we
+ * will disable presubtract on these instructions for
+ * now. Note: This if statement should not be pulled
+ * outside of the loop, because it only applies to
+ * instructions that could potentially use the
+ * presubtract source. */
+ if (info->NumSrcRegs > 2) {
+ can_remove = 0;
+ break;
+ }
+
+ /* We can't use more than one presubtract value in an
+ * instruction, unless the two prsubtract operations
+ * are the same and read from the same registers. */
+ if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE) {
+ if (inst->U.I.PreSub.Opcode != RC_PRESUB_INV
+ || inst->U.I.PreSub.SrcReg[0].File !=
+ inst_add->U.I.SrcReg[1].File
+ || inst->U.I.PreSub.SrcReg[0].Index !=
+ inst_add->U.I.SrcReg[1].Index) {
+
+ can_remove = 0;
+ break;
+ }
+ }
+ /* We must be careful not to modify inst_add, since it
+ * is possible it will remain part of the program. */
+ inst->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1];
+ inst->U.I.PreSub.SrcReg[0].Negate = 0;
+ inst->U.I.PreSub.Opcode = RC_PRESUB_INV;
+ inst->U.I.SrcReg[i] = chain_srcregs(inst->U.I.SrcReg[i],
+ inst->U.I.PreSub.SrcReg[0]);
+
+ inst->U.I.SrcReg[i].File = RC_FILE_PRESUB;
+ inst->U.I.SrcReg[i].Index = RC_PRESUB_INV;
+ can_remove = 1;
+ }
+ if(!can_remove)
+ break;
+ rc_for_all_writes_mask(inst, peephole_scan_write, &s);
+ /* If all components of inst_add's destination register have
+ * been written to by subsequent instructions, the original
+ * value of the destination register is no longer valid and
+ * we can't keep doing substitutions. */
+ if (!s.WriteMask){
+ break;
+ }
+ /* Make this instruction doesn't write to the presubtract source. */
+ if (inst->U.I.DstReg.WriteMask &
+ src_reads_dst_mask(inst_add->U.I.SrcReg[1],
+ inst->U.I.DstReg)
+ || info->IsFlowControl) {
+ cant_sub = 1;
+ }
+ }
+ if(can_remove) {
+ rc_remove_instruction(inst_add);
+ return 1;
+ }
+ return 0;
+}
+
+/**
+ * @return
+ * 0 if inst is still part of the program.
+ * 1 if inst is no longer part of the program.
+ */
+static int peephole(struct radeon_compiler * c, struct rc_instruction * inst)
+{
+ switch(inst->U.I.Opcode){
+ case RC_OPCODE_ADD:
+ if (c->has_presub) {
+ if(peephole_add_presub_inv(c, inst))
+ return 1;
+ }
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
void rc_optimize(struct radeon_compiler * c, void *user)
{
struct rc_instruction * inst = c->Program.Instructions.Next;
@@ -463,8 +677,11 @@ void rc_optimize(struct radeon_compiler * c, void *user)
constant_folding(c, cur);
+ if(peephole(c, cur))
+ continue;
+
if (cur->U.I.Opcode == RC_OPCODE_MOV) {
- peephole(c, cur);
+ copy_propagate(c, cur);
/* cur may no longer be part of the program */
}
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c
index 8e232bb2436..32c54fd74bc 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c
@@ -279,11 +279,118 @@ static int destructive_merge_instructions(
struct rc_pair_instruction * rgb,
struct rc_pair_instruction * alpha)
{
+ const struct rc_opcode_info * opcode;
assert(rgb->Alpha.Opcode == RC_OPCODE_NOP);
assert(alpha->RGB.Opcode == RC_OPCODE_NOP);
+ /* Presubtract registers need to be merged first so that registers
+ * needed by the presubtract operation can be placed in src0 and/or
+ * src1. */
+
+ /* Merge the rgb presubtract registers. */
+ const struct rc_opcode_info * rgb_info =
+ rc_get_opcode_info(rgb->RGB.Opcode);
+ if (alpha->RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
+ unsigned int srcp_src;
+ unsigned int srcp_regs;
+ if (rgb->RGB.Src[RC_PAIR_PRESUB_SRC].Used)
+ return 0;
+ srcp_regs = rc_presubtract_src_reg_count(
+ alpha->RGB.Src[RC_PAIR_PRESUB_SRC].Index);
+ for(srcp_src = 0; srcp_src < srcp_regs; srcp_src++) {
+ unsigned int arg;
+ int free_source;
+ struct radeon_pair_instruction_source srcp =
+ alpha->RGB.Src[srcp_src];
+ struct radeon_pair_instruction_source temp;
+ /* 2nd arg of 1 means this is an rgb source.
+ * 3rd arg of 0 means this is not an alpha source. */
+ free_source = rc_pair_alloc_source(rgb, 1, 0,
+ srcp.File, srcp.Index);
+ /* If free_source == srcp_src, then either the
+ * presubtract source is already in the correct place. */
+ if (free_source == srcp_src)
+ continue;
+ /* If free_source < 0 then there are no free source
+ * slots. */
+ if (free_source < 0)
+ return 0;
+ /* Shuffle the sources, so we can put the
+ * presubtract source in the correct place. */
+ for (arg = 0; arg < rgb_info->NumSrcRegs; arg++) {
+ /*If this arg does not read from an rgb source,
+ * do nothing. */
+ if (rc_source_type_that_arg_reads(
+ rgb->RGB.Arg[arg].Source,
+ rgb->RGB.Arg[arg].Swizzle, 3)
+ != RC_PAIR_SOURCE_RGB) {
+ continue;
+ }
+ if (rgb->RGB.Arg[arg].Source == srcp_src)
+ rgb->RGB.Arg[arg].Source = free_source;
+ /* We need to do this just in case register
+ * is one of the sources already, but in the
+ * wrong spot. */
+ else if(rgb->RGB.Arg[arg].Source == free_source)
+ rgb->RGB.Arg[arg].Source = srcp_src;
+ }
+ temp = rgb->RGB.Src[srcp_src];
+ rgb->RGB.Src[srcp_src] = rgb->RGB.Src[free_source];
+ rgb->RGB.Src[free_source] = temp;
+ }
+ }
+
+ /* Merge the alpha presubtract registers */
+ if (alpha->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) {
+ unsigned int srcp_src;
+ unsigned int srcp_regs;
+ if(rgb->Alpha.Src[RC_PAIR_PRESUB_SRC].Used)
+ return 0;
+
+ srcp_regs = rc_presubtract_src_reg_count(
+ alpha->Alpha.Src[RC_PAIR_PRESUB_SRC].Index);
+ for(srcp_src = 0; srcp_src < srcp_regs; srcp_src++) {
+ unsigned int arg;
+ int free_source;
+ struct radeon_pair_instruction_source srcp =
+ alpha->Alpha.Src[srcp_src];
+ struct radeon_pair_instruction_source temp;
+ /* 2nd arg of 0 means this is not an rgb source.
+ * 3rd arg of 1 means this is an alpha source. */
+ free_source = rc_pair_alloc_source(rgb, 0, 1,
+ srcp.File, srcp.Index);
+ /* If free_source == srcp_src, then either the
+ * presubtract source is already in the correct place. */
+ if (free_source == srcp_src)
+ continue;
+ /* If free_source < 0 then there are no free source
+ * slots. */
+ if (free_source < 0)
+ return 0;
+ /* Shuffle the sources, so we can put the
+ * presubtract source in the correct place. */
+ for(arg = 0; arg < rgb_info->NumSrcRegs; arg++) {
+ /*If this arg does not read from an alpha
+ * source, do nothing. */
+ if (rc_source_type_that_arg_reads(
+ rgb->RGB.Arg[arg].Source,
+ rgb->RGB.Arg[arg].Swizzle, 3)
+ != RC_PAIR_SOURCE_ALPHA) {
+ continue;
+ }
+ if (rgb->RGB.Arg[arg].Source == srcp_src)
+ rgb->RGB.Arg[arg].Source = free_source;
+ else if (rgb->RGB.Arg[arg].Source == free_source)
+ rgb->RGB.Arg[arg].Source = srcp_src;
+ }
+ temp = rgb->Alpha.Src[srcp_src];
+ rgb->Alpha.Src[srcp_src] = rgb->Alpha.Src[free_source];
+ rgb->Alpha.Src[free_source] = temp;
+ }
+ }
+
/* Copy alpha args into rgb */
- const struct rc_opcode_info * opcode = rc_get_opcode_info(alpha->Alpha.Opcode);
+ opcode = rc_get_opcode_info(alpha->Alpha.Opcode);
for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
unsigned int srcrgb = 0;
@@ -351,7 +458,52 @@ static int merge_instructions(struct rc_pair_instruction * rgb, struct rc_pair_i
return 0;
}
+static void presub_nop(struct rc_instruction * emitted) {
+ int prev_rgb_index, prev_alpha_index, i, num_src;
+ /* We don't need a nop if the previous instruction is a TEX. */
+ if (emitted->Prev->Type != RC_INSTRUCTION_PAIR) {
+ return;
+ }
+ if (emitted->Prev->U.P.RGB.WriteMask)
+ prev_rgb_index = emitted->Prev->U.P.RGB.DestIndex;
+ else
+ prev_rgb_index = -1;
+ if (emitted->Prev->U.P.Alpha.WriteMask)
+ prev_alpha_index = emitted->Prev->U.P.Alpha.DestIndex;
+ else
+ prev_alpha_index = 1;
+
+ /* Check the previous rgb instruction */
+ if (emitted->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
+ num_src = rc_presubtract_src_reg_count(
+ emitted->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Index);
+ for (i = 0; i < num_src; i++) {
+ unsigned int index = emitted->U.P.RGB.Src[i].Index;
+ if (emitted->U.P.RGB.Src[i].File == RC_FILE_TEMPORARY
+ && (index == prev_rgb_index
+ || index == prev_alpha_index)) {
+ emitted->Prev->U.P.Nop = 1;
+ return;
+ }
+ }
+ }
+
+ /* Check the previous alpha instruction. */
+ if (!emitted->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used)
+ return;
+
+ num_src = rc_presubtract_src_reg_count(
+ emitted->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Index);
+ for (i = 0; i < num_src; i++) {
+ unsigned int index = emitted->U.P.Alpha.Src[i].Index;
+ if(emitted->U.P.Alpha.Src[i].File == RC_FILE_TEMPORARY
+ && (index == prev_rgb_index || index == prev_alpha_index)) {
+ emitted->Prev->U.P.Nop = 1;
+ return;
+ }
+ }
+}
/**
* Find a good ALU instruction or pair of ALU instruction and emit it.
*
@@ -408,6 +560,10 @@ static void emit_one_alu(struct schedule_state *s, struct rc_instruction * befor
commit_alu_instruction(s, sinst);
success: ;
}
+ /* If the instruction we just emitted uses a presubtract value, and
+ * the presubtract sources were written by the previous intstruction,
+ * the previous instruction needs a nop. */
+ presub_nop(before->Prev);
}
static void scan_read(void * data, struct rc_instruction * inst,
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c
index 9fe39344f8e..4cdb7ea748e 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c
@@ -127,6 +127,18 @@ static void classify_instruction(struct rc_sub_instruction * inst,
}
}
+static void src_uses(struct rc_src_register src, unsigned int * rgb,
+ unsigned int * alpha)
+{
+ int j;
+ for(j = 0; j < 4; ++j) {
+ unsigned int swz = GET_SWZ(src.Swizzle, j);
+ if (swz < 3)
+ *rgb = 1;
+ else if (swz < 4)
+ *alpha = 1;
+ }
+}
/**
* Fill the given ALU instruction's opcodes and source operands into the given pair,
@@ -158,12 +170,51 @@ static void set_pair_instruction(struct r300_fragment_program_compiler *c,
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);
int i;
+ /* Presubtract handling:
+ * We need to make sure that the values used by the presubtract
+ * operation end up in src0 or src1. */
+ if(inst->PreSub.Opcode != RC_PRESUB_NONE) {
+ /* rc_pair_alloc_source() will fill in data for
+ * pair->{RGB,ALPHA}.Src[RC_PAIR_PRESUB_SRC] */
+ int j;
+ for(j = 0; j < 3; j++) {
+ int src_regs;
+ if(inst->SrcReg[j].File != RC_FILE_PRESUB)
+ continue;
+
+ src_regs = rc_presubtract_src_reg_count(
+ inst->PreSub.Opcode);
+ for(i = 0; i < src_regs; i++) {
+ unsigned int rgb = 0;
+ unsigned int alpha = 0;
+ src_uses(inst->SrcReg[j], &rgb, &alpha);
+ if(rgb) {
+ pair->RGB.Src[i].File =
+ inst->PreSub.SrcReg[i].File;
+ pair->RGB.Src[i].Index =
+ inst->PreSub.SrcReg[i].Index;
+ pair->RGB.Src[i].Used = 1;
+ }
+ if(alpha) {
+ pair->Alpha.Src[i].File =
+ inst->PreSub.SrcReg[i].File;
+ pair->Alpha.Src[i].Index =
+ inst->PreSub.SrcReg[i].Index;
+ pair->Alpha.Src[i].Used = 1;
+ }
+ }
+ }
+ }
+
for(i = 0; i < opcode->NumSrcRegs; ++i) {
int source;
if (needrgb && !istranscendent) {
unsigned int srcrgb = 0;
unsigned int srcalpha = 0;
int j;
+ /* We don't care about the alpha channel here. We only
+ * want the part of the swizzle that writes to rgb,
+ * since we are creating an rgb instruction. */
for(j = 0; j < 3; ++j) {
unsigned int swz = GET_SWZ(inst->SrcReg[i].Swizzle, j);
if (swz < 3)
@@ -173,6 +224,7 @@ static void set_pair_instruction(struct r300_fragment_program_compiler *c,
}
source = rc_pair_alloc_source(pair, srcrgb, srcalpha,
inst->SrcReg[i].File, inst->SrcReg[i].Index);
+ assert(source != -1);
pair->RGB.Arg[i].Source = source;
pair->RGB.Arg[i].Swizzle = inst->SrcReg[i].Swizzle & 0x1ff;
pair->RGB.Arg[i].Abs = inst->SrcReg[i].Abs;
@@ -188,6 +240,7 @@ static void set_pair_instruction(struct r300_fragment_program_compiler *c,
srcalpha = 1;
source = rc_pair_alloc_source(pair, srcrgb, srcalpha,
inst->SrcReg[i].File, inst->SrcReg[i].Index);
+ assert(source != -1);
pair->Alpha.Arg[i].Source = source;
pair->Alpha.Arg[i].Swizzle = swz;
pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs;
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.h b/src/mesa/drivers/dri/r300/compiler/radeon_program.h
index 5582f56d921..f0a77d7b539 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.h
@@ -39,7 +39,7 @@
struct radeon_compiler;
struct rc_src_register {
- unsigned int File:3;
+ unsigned int File:4;
/** Negative values may be used for relative addressing. */
signed int Index:(RC_REGISTER_INDEX_BITS+1);
@@ -64,6 +64,11 @@ struct rc_dst_register {
unsigned int WriteMask:4;
};
+struct rc_presub_instruction {
+ rc_presubtract_op Opcode;
+ struct rc_src_register SrcReg[2];
+};
+
/**
* Instructions are maintained by the compiler in a doubly linked list
* of these structures.
@@ -108,6 +113,10 @@ struct rc_sub_instruction {
/** True if tex instruction should do shadow comparison */
unsigned int TexShadow:1;
/*@}*/
+
+ /** This holds information about the presubtract operation used by
+ * this instruction. */
+ struct rc_presub_instruction PreSub;
};
typedef enum {
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h
index 2ddf60b6774..9dcd44c522d 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h
@@ -79,7 +79,13 @@ typedef enum {
/**
* Indicates a special register, see RC_SPECIAL_xxx.
*/
- RC_FILE_SPECIAL
+ RC_FILE_SPECIAL,
+
+ /**
+ * Indicates this register should use the result of the presubtract
+ * operation.
+ */
+ RC_FILE_PRESUB
} rc_register_file;
enum {
@@ -147,4 +153,32 @@ typedef enum {
RC_ALURESULT_W
} rc_write_aluresult;
+typedef enum {
+ RC_PRESUB_NONE = 0,
+
+ /** 1 - 2 * src0 */
+ RC_PRESUB_BIAS,
+
+ /** src1 - src0 */
+ RC_PRESUB_SUB,
+
+ /** src1 + src0 */
+ RC_PRESUB_ADD,
+
+ /** 1 - src0 */
+ RC_PRESUB_INV
+} rc_presubtract_op;
+
+static inline int rc_presubtract_src_reg_count(rc_presubtract_op op){
+ switch(op){
+ case RC_PRESUB_BIAS:
+ case RC_PRESUB_INV:
+ return 1;
+ case RC_PRESUB_ADD:
+ case RC_PRESUB_SUB:
+ return 2;
+ default:
+ return 0;
+ }
+}
#endif /* RADEON_PROGRAM_CONSTANTS_H */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c
index ee839596aab..5a50584b725 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c
@@ -38,26 +38,52 @@ int rc_pair_alloc_source(struct rc_pair_instruction *pair,
{
int candidate = -1;
int candidate_quality = -1;
+ unsigned int alpha_used = 0;
+ unsigned int rgb_used = 0;
int i;
if ((!rgb && !alpha) || file == RC_FILE_NONE)
return 0;
+ if (rgb && pair->RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
+ if (file == RC_FILE_PRESUB) {
+ if (index != pair->RGB.Src[RC_PAIR_PRESUB_SRC].Index) {
+ return -1;
+ }
+ } else {
+ rgb_used++;
+ }
+ }
+
+ if (alpha && pair->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) {
+ if (file == RC_FILE_PRESUB) {
+ if (index != pair->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) {
+ return -1;
+ }
+ } else {
+ alpha_used++;
+ }
+ }
+
for(i = 0; i < 3; ++i) {
int q = 0;
if (rgb) {
if (pair->RGB.Src[i].Used) {
if (pair->RGB.Src[i].File != file ||
- pair->RGB.Src[i].Index != index)
+ pair->RGB.Src[i].Index != index) {
+ rgb_used++;
continue;
+ }
q++;
}
}
if (alpha) {
if (pair->Alpha.Src[i].Used) {
if (pair->Alpha.Src[i].File != file ||
- pair->Alpha.Src[i].Index != index)
+ pair->Alpha.Src[i].Index != index) {
+ alpha_used++;
continue;
+ }
q++;
}
}
@@ -66,19 +92,156 @@ int rc_pair_alloc_source(struct rc_pair_instruction *pair,
candidate = i;
}
}
+ if (candidate < 0 || (rgb && rgb_used > 2) || (alpha && alpha_used > 2))
+ return -1;
- if (candidate >= 0) {
- if (rgb) {
- pair->RGB.Src[candidate].Used = 1;
- pair->RGB.Src[candidate].File = file;
- pair->RGB.Src[candidate].Index = index;
+ /* candidate >= 0 */
+
+ /* Even if we have a presub src, the above loop needs to run,
+ * because we still need to make sure there is a free source.
+ */
+ if (file == RC_FILE_PRESUB)
+ candidate = RC_PAIR_PRESUB_SRC;
+
+ if (rgb) {
+ pair->RGB.Src[candidate].Used = 1;
+ pair->RGB.Src[candidate].File = file;
+ pair->RGB.Src[candidate].Index = index;
+ if (candidate == RC_PAIR_PRESUB_SRC) {
+ /* For registers with the RC_FILE_PRESUB file,
+ * the index stores the presubtract op. */
+ int src_regs = rc_presubtract_src_reg_count(index);
+ for(i = 0; i < src_regs; i++) {
+ pair->RGB.Src[i].Used = 1;
+ }
}
- if (alpha) {
- pair->Alpha.Src[candidate].Used = 1;
- pair->Alpha.Src[candidate].File = file;
- pair->Alpha.Src[candidate].Index = index;
+ }
+ if (alpha) {
+ pair->Alpha.Src[candidate].Used = 1;
+ pair->Alpha.Src[candidate].File = file;
+ pair->Alpha.Src[candidate].Index = index;
+ if (candidate == RC_PAIR_PRESUB_SRC) {
+ /* For registers with the RC_FILE_PRESUB file,
+ * the index stores the presubtract op. */
+ int src_regs = rc_presubtract_src_reg_count(index);
+ for(i=0; i < src_regs; i++) {
+ pair->Alpha.Src[i].Used = 1;
+ }
}
}
return candidate;
}
+
+static void pair_foreach_source_callback(
+ struct rc_pair_instruction * pair,
+ void * data,
+ rc_pair_foreach_src_fn cb,
+ unsigned int swz,
+ unsigned int src)
+{
+ /* swz > 3 means that the swizzle is either not used, or a constant
+ * swizzle (e.g. 0, 1, 0.5). */
+ if(swz > 3)
+ return;
+
+ if(swz == RC_SWIZZLE_W) {
+ if (src == RC_PAIR_PRESUB_SRC) {
+ unsigned int i;
+ unsigned int src_count = rc_presubtract_src_reg_count(
+ pair->Alpha.Src[RC_PAIR_PRESUB_SRC].Index);
+ for(i = 0; i < src_count; i++) {
+ cb(data, &pair->Alpha.Src[i]);
+ }
+ } else {
+ cb(data, &pair->Alpha.Src[src]);
+ }
+ } else {
+ if (src == RC_PAIR_PRESUB_SRC) {
+ unsigned int i;
+ unsigned int src_count = rc_presubtract_src_reg_count(
+ pair->RGB.Src[RC_PAIR_PRESUB_SRC].Index);
+ for(i = 0; i < src_count; i++) {
+ cb(data, &pair->RGB.Src[i]);
+ }
+ }
+ else {
+ cb(data, &pair->RGB.Src[src]);
+ }
+ }
+}
+
+void rc_pair_foreach_source_that_alpha_reads(
+ struct rc_pair_instruction * pair,
+ void * data,
+ rc_pair_foreach_src_fn cb)
+{
+ unsigned int i;
+ const struct rc_opcode_info * info =
+ rc_get_opcode_info(pair->Alpha.Opcode);
+ for(i = 0; i < info->NumSrcRegs; i++) {
+ pair_foreach_source_callback(pair, data, cb,
+ GET_SWZ(pair->Alpha.Arg[i].Swizzle, 0),
+ pair->Alpha.Arg[i].Source);
+ }
+}
+
+void rc_pair_foreach_source_that_rgb_reads(
+ struct rc_pair_instruction * pair,
+ void * data,
+ rc_pair_foreach_src_fn cb)
+{
+ unsigned int i;
+ const struct rc_opcode_info * info =
+ rc_get_opcode_info(pair->RGB.Opcode);
+ for(i = 0; i < info->NumSrcRegs; i++) {
+ unsigned int chan;
+ unsigned int swz = RC_SWIZZLE_UNUSED;
+ /* Find a swizzle that is either X,Y,Z,or W. We assume here
+ * that if one channel swizzles X,Y, or Z, then none of the
+ * other channels swizzle W, and vice-versa. */
+ for(chan = 0; chan < 4; chan++) {
+ swz = GET_SWZ(pair->RGB.Arg[i].Swizzle, chan);
+ if(swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y
+ || swz == RC_SWIZZLE_Z || swz == RC_SWIZZLE_W)
+ continue;
+ }
+ pair_foreach_source_callback(pair, data, cb,
+ swz,
+ pair->RGB.Arg[i].Source);
+ }
+}
+
+/*return 0 for rgb, 1 for alpha -1 for error. */
+
+rc_pair_source_type rc_source_type_that_arg_reads(
+ unsigned int source,
+ unsigned int swizzle,
+ unsigned int channels)
+{
+ unsigned int chan;
+ unsigned int swz = RC_SWIZZLE_UNUSED;
+ int isRGB = 0;
+ int isAlpha = 0;
+ /* Find a swizzle that is either X,Y,Z,or W. We assume here
+ * that if one channel swizzles X,Y, or Z, then none of the
+ * other channels swizzle W, and vice-versa. */
+ for(chan = 0; chan < channels; chan++) {
+ swz = GET_SWZ(swizzle, chan);
+ if (swz == RC_SWIZZLE_W) {
+ isAlpha = 1;
+ } else if (swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y
+ || swz == RC_SWIZZLE_Z) {
+ isRGB = 1;
+ }
+ }
+ assert(!isRGB || !isAlpha);
+
+ if(!isRGB && !isAlpha)
+ return RC_PAIR_SOURCE_NONE;
+
+ if (isRGB)
+ return RC_PAIR_SOURCE_RGB;
+ /*isAlpha*/
+ return RC_PAIR_SOURCE_ALPHA;
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h
index ef5a0347009..e0061e454bf 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h
@@ -49,6 +49,11 @@ struct radeon_compiler;
* see \ref rc_pair_translate
*/
+/* For rgb and alpha instructions when arg[n].Source = RC_PAIR_PRESUB_SRC, then
+ * the presubtract value will be used, and
+ * {RGB,Alpha}.Src[RC_PAIR_PRESUB_SRC].File will be set to RC_FILE_PRESUB.
+ */
+#define RC_PAIR_PRESUB_SRC 3
struct radeon_pair_instruction_source {
unsigned int Used:1;
@@ -64,7 +69,7 @@ struct radeon_pair_instruction_rgb {
unsigned int OutputWriteMask:3;
unsigned int Saturate:1;
- struct radeon_pair_instruction_source Src[3];
+ struct radeon_pair_instruction_source Src[4];
struct {
unsigned int Source:2;
@@ -83,7 +88,7 @@ struct radeon_pair_instruction_alpha {
unsigned int DepthWriteMask:1;
unsigned int Saturate:1;
- struct radeon_pair_instruction_source Src[3];
+ struct radeon_pair_instruction_source Src[4];
struct {
unsigned int Source:2;
@@ -99,8 +104,17 @@ struct rc_pair_instruction {
unsigned int WriteALUResult:2;
unsigned int ALUResultCompare:3;
+ unsigned int Nop:1;
};
+typedef void (*rc_pair_foreach_src_fn)
+ (void *, struct radeon_pair_instruction_source *);
+
+typedef enum {
+ RC_PAIR_SOURCE_NONE = 0,
+ RC_PAIR_SOURCE_RGB,
+ RC_PAIR_SOURCE_ALPHA
+} rc_pair_source_type;
/**
* General helper functions for dealing with the paired instruction format.
@@ -109,6 +123,21 @@ struct rc_pair_instruction {
int rc_pair_alloc_source(struct rc_pair_instruction *pair,
unsigned int rgb, unsigned int alpha,
rc_register_file file, unsigned int index);
+
+void rc_pair_foreach_source_that_alpha_reads(
+ struct rc_pair_instruction * pair,
+ void * data,
+ rc_pair_foreach_src_fn cb);
+
+void rc_pair_foreach_source_that_rgb_reads(
+ struct rc_pair_instruction * pair,
+ void * data,
+ rc_pair_foreach_src_fn cb);
+
+rc_pair_source_type rc_source_type_that_arg_reads(
+ unsigned int source,
+ unsigned int swizzle,
+ unsigned int channels);
/*@}*/
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c
index a356e94e032..01612195810 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c
@@ -38,6 +38,24 @@ static const char * textarget_to_string(rc_texture_target target)
}
}
+static const char * presubtract_op_to_string(rc_presubtract_op op)
+{
+ switch(op) {
+ case RC_PRESUB_NONE:
+ return "NONE";
+ case RC_PRESUB_BIAS:
+ return "(1 - 2 * src0)";
+ case RC_PRESUB_SUB:
+ return "(src1 - src0)";
+ case RC_PRESUB_ADD:
+ return "(src1 + src0)";
+ case RC_PRESUB_INV:
+ return "(1 - src0)";
+ default:
+ return "BAD_PRESUBTRACT_OP";
+ }
+}
+
static void rc_print_comparefunc(FILE * f, const char * lhs, rc_compare_func func, const char * rhs)
{
if (func == RC_COMPARE_FUNC_NEVER) {
@@ -125,7 +143,43 @@ static void rc_print_swizzle(FILE * f, unsigned int swizzle, unsigned int negate
}
}
-static void rc_print_src_register(FILE * f, struct rc_src_register src)
+static void rc_print_presub_instruction(FILE * f,
+ struct rc_presub_instruction inst)
+{
+ fprintf(f,"(");
+ switch(inst.Opcode){
+ case RC_PRESUB_BIAS:
+ fprintf(f, "1 - 2 * ");
+ rc_print_register(f, inst.SrcReg[0].File,
+ inst.SrcReg[0].Index,inst.SrcReg[0].RelAddr);
+ break;
+ case RC_PRESUB_SUB:
+ rc_print_register(f, inst.SrcReg[0].File,
+ inst.SrcReg[0].Index,inst.SrcReg[0].RelAddr);
+ fprintf(f, " - ");
+ rc_print_register(f, inst.SrcReg[1].File,
+ inst.SrcReg[1].Index,inst.SrcReg[1].RelAddr);
+ break;
+ case RC_PRESUB_ADD:
+ rc_print_register(f, inst.SrcReg[0].File,
+ inst.SrcReg[0].Index,inst.SrcReg[0].RelAddr);
+ fprintf(f, " + ");
+ rc_print_register(f, inst.SrcReg[1].File,
+ inst.SrcReg[1].Index,inst.SrcReg[1].RelAddr);
+ break;
+ case RC_PRESUB_INV:
+ fprintf(f, "1 - ");
+ rc_print_register(f, inst.SrcReg[0].File,
+ inst.SrcReg[0].Index,inst.SrcReg[0].RelAddr);
+ break;
+ default:
+ break;
+ }
+ fprintf(f, ")");
+}
+
+static void rc_print_src_register(FILE * f, struct rc_instruction * inst,
+ struct rc_src_register src)
{
int trivial_negate = (src.Negate == RC_MASK_NONE || src.Negate == RC_MASK_XYZW);
@@ -134,7 +188,10 @@ static void rc_print_src_register(FILE * f, struct rc_src_register src)
if (src.Abs)
fprintf(f, "|");
- rc_print_register(f, src.File, src.Index, src.RelAddr);
+ if(src.File == RC_FILE_PRESUB)
+ rc_print_presub_instruction(f, inst->U.I.PreSub);
+ else
+ rc_print_register(f, src.File, src.Index, src.RelAddr);
if (src.Abs && !trivial_negate)
fprintf(f, "|");
@@ -198,7 +255,7 @@ static void rc_print_normal_instruction(FILE * f, struct rc_instruction * inst,
if (reg > 0)
fprintf(f, ",");
fprintf(f, " ");
- rc_print_src_register(f, inst->U.I.SrcReg[reg]);
+ rc_print_src_register(f, inst, inst->U.I.SrcReg[reg]);
}
if (opcode->HasTexture) {
@@ -247,6 +304,16 @@ static void rc_print_pair_instruction(FILE * f, struct rc_instruction * fullinst
printedsrc = 1;
}
}
+ if(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
+ fprintf(f, ", srcp.xyz = %s",
+ presubtract_op_to_string(
+ inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index));
+ }
+ if(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) {
+ fprintf(f, ", srcp.w = %s",
+ presubtract_op_to_string(
+ inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index));
+ }
fprintf(f, "\n");
if (inst->RGB.Opcode != RC_OPCODE_NOP) {
@@ -272,7 +339,12 @@ static void rc_print_pair_instruction(FILE * f, struct rc_instruction * fullinst
for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
const char* abs = inst->RGB.Arg[arg].Abs ? "|" : "";
const char* neg = inst->RGB.Arg[arg].Negate ? "-" : "";
- fprintf(f, ", %s%ssrc%i.%c%c%c%s", neg, abs, inst->RGB.Arg[arg].Source,
+ fprintf(f, ", %s%ssrc", neg, abs);
+ if(inst->RGB.Arg[arg].Source == RC_PAIR_PRESUB_SRC)
+ fprintf(f,"p");
+ else
+ fprintf(f,"%d", inst->RGB.Arg[arg].Source);
+ fprintf(f,".%c%c%c%s",
rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 0)),
rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 1)),
rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 2)),
@@ -300,7 +372,12 @@ static void rc_print_pair_instruction(FILE * f, struct rc_instruction * fullinst
for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
const char* abs = inst->Alpha.Arg[arg].Abs ? "|" : "";
const char* neg = inst->Alpha.Arg[arg].Negate ? "-" : "";
- fprintf(f, ", %s%ssrc%i.%c%s", neg, abs, inst->Alpha.Arg[arg].Source,
+ fprintf(f, ", %s%ssrc", neg, abs);
+ if(inst->Alpha.Arg[arg].Source == RC_PAIR_PRESUB_SRC)
+ fprintf(f,"p");
+ else
+ fprintf(f,"%d", inst->Alpha.Arg[arg].Source);
+ fprintf(f,".%c%s",
rc_swizzle_char(inst->Alpha.Arg[arg].Swizzle), abs);
}
fprintf(f, "\n");
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.c b/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.c
index 9281feecfa1..facea382f4e 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.c
@@ -27,6 +27,16 @@
#include "radeon_remove_constants.h"
+static void remap_regs(void * userdata, struct rc_instruction * inst,
+ rc_register_file * pfile, unsigned int * pindex)
+{
+ unsigned *inv_remap_table = userdata;
+
+ if (*pfile == RC_FILE_CONSTANT) {
+ *pindex = inv_remap_table[*pindex];
+ }
+}
+
void rc_remove_unused_constants(struct radeon_compiler *c, void *user)
{
unsigned **out_remap_table = (unsigned**)user;
@@ -51,6 +61,10 @@ void rc_remove_unused_constants(struct radeon_compiler *c, void *user)
inst != &c->Program.Instructions; inst = inst->Next) {
const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode);
+ /* XXX: This loop and the if statement after it should be
+ * replaced by a call to one of the rc_for_all_reads_* functions.
+ * The reason it does not use one of those functions now is
+ * because none of them have RelAddr as an argument. */
for (unsigned i = 0; i < opcode->NumSrcRegs; i++) {
if (inst->U.I.SrcReg[i].File == RC_FILE_CONSTANT) {
if (inst->U.I.SrcReg[i].RelAddr) {
@@ -60,6 +74,18 @@ void rc_remove_unused_constants(struct radeon_compiler *c, void *user)
}
}
}
+ if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE) {
+ unsigned int i;
+ unsigned int srcp_regs = rc_presubtract_src_reg_count(
+ inst->U.I.PreSub.Opcode);
+ for( i = 0; i < srcp_regs; i++) {
+ if (inst->U.I.PreSub.SrcReg[i].File ==
+ RC_FILE_CONSTANT) {
+ const_used[
+ inst->U.I.PreSub.SrcReg[i].Index] = 1;
+ }
+ }
+ }
}
/* Pass 2: If there is relative addressing, mark all externals as used. */
@@ -100,13 +126,7 @@ void rc_remove_unused_constants(struct radeon_compiler *c, void *user)
if (!is_identity) {
for (struct rc_instruction *inst = c->Program.Instructions.Next;
inst != &c->Program.Instructions; inst = inst->Next) {
- const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode);
-
- for (unsigned i = 0; i < opcode->NumSrcRegs; i++) {
- if (inst->U.I.SrcReg[i].File == RC_FILE_CONSTANT) {
- inst->U.I.SrcReg[i].Index = inv_remap_table[inst->U.I.SrcReg[i].Index];
- }
- }
+ rc_remap_registers(inst, remap_regs, inv_remap_table);
}
}