summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNicolai Hähnle <nhaehnle@gmail.com>2009-09-06 11:47:40 +0200
committerNicolai Hähnle <nhaehnle@gmail.com>2009-09-06 11:47:40 +0200
commite95e76e1255a3ad0ce604271301d090337b2e82b (patch)
tree2f164b0c45576b27d70130e6f2ea58b3bb62d880
parentd1b4351e603522be11061522cb6b685da9ef1fee (diff)
r300/compiler: New dataflow structures and passes
This replaces the old NQSSADCE code with the same functionality, but quite different design. Instead of doing a single integerated pass, we now build explicit data structures representing the dataflow. This will enable analysis of flow control instruction, and could potentially open an avenue for several dataflow based optimizations, such as peephole optimization, fusing MUL+ADD to MAD, and so on.
-rw-r--r--src/mesa/drivers/dri/r300/compiler/Makefile8
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c35
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.h7
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c38
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c50
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r500_fragprog.c24
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r500_fragprog.h6
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_compiler.c35
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_compiler.h26
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c106
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h113
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_dataflow_annotate.c365
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_dataflow_dealias.c150
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_dataflow_swizzles.c126
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.c267
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.h92
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c87
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h8
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program.c150
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program.h110
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h128
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_print.c214
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_swizzle.h57
-rw-r--r--src/mesa/drivers/dri/r300/r300_vertprog.c1
24 files changed, 1486 insertions, 717 deletions
diff --git a/src/mesa/drivers/dri/r300/compiler/Makefile b/src/mesa/drivers/dri/r300/compiler/Makefile
index 080c79898bf..53fb7caa95d 100644
--- a/src/mesa/drivers/dri/r300/compiler/Makefile
+++ b/src/mesa/drivers/dri/r300/compiler/Makefile
@@ -8,11 +8,15 @@ LIBNAME = r300compiler
C_SOURCES = \
radeon_code.c \
radeon_compiler.c \
- radeon_nqssadce.c \
radeon_program.c \
- radeon_opcodes.c \
+ radeon_program_print.c \
+ radeon_opcodes.c \
radeon_program_alu.c \
radeon_program_pair.c \
+ radeon_dataflow.c \
+ radeon_dataflow_annotate.c \
+ radeon_dataflow_dealias.c \
+ radeon_dataflow_swizzles.c \
r3xx_fragprog.c \
r300_fragprog.c \
r300_fragprog_swizzle.c \
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c
index ded6966d084..cfa48a59e3a 100644
--- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c
@@ -36,7 +36,6 @@
#include <stdio.h>
#include "../r300_reg.h"
-#include "radeon_nqssadce.h"
#include "radeon_compiler.h"
#define MAKE_SWZ3(x, y, z) (RC_MAKE_SWIZZLE(RC_SWIZZLE_##x, RC_SWIZZLE_##y, RC_SWIZZLE_##z, RC_SWIZZLE_ZERO))
@@ -92,7 +91,7 @@ static const struct swizzle_data* lookup_native_swizzle(unsigned int swizzle)
* Check whether the given instruction supports the swizzle and negate
* combinations in the given source register.
*/
-int r300FPIsNativeSwizzle(rc_opcode opcode, struct rc_src_register reg)
+static int r300_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)
{
if (reg.Abs)
reg.Negate = RC_MASK_NONE;
@@ -134,15 +133,16 @@ int r300FPIsNativeSwizzle(rc_opcode opcode, struct rc_src_register reg)
}
-/**
- * Generate MOV dst, src using only native swizzles.
- */
-void r300FPBuildSwizzle(struct nqssadce_state *s, struct rc_dst_register dst, struct rc_src_register src)
+static void r300_swizzle_split(
+ struct rc_src_register src, unsigned int mask,
+ struct rc_swizzle_split * split)
{
if (src.Abs)
src.Negate = RC_MASK_NONE;
- while(dst.WriteMask) {
+ split->NumPhases = 0;
+
+ while(mask) {
const struct swizzle_data *best_swizzle = 0;
unsigned int best_matchcount = 0;
unsigned int best_matchmask = 0;
@@ -153,7 +153,7 @@ void r300FPBuildSwizzle(struct nqssadce_state *s, struct rc_dst_register dst, st
unsigned int matchcount = 0;
unsigned int matchmask = 0;
for(comp = 0; comp < 3; ++comp) {
- if (!GET_BIT(dst.WriteMask, comp))
+ if (!GET_BIT(mask, comp))
continue;
unsigned int swz = GET_SWZ(src.Swizzle, comp);
if (swz == RC_SWIZZLE_UNUSED)
@@ -172,23 +172,24 @@ void r300FPBuildSwizzle(struct nqssadce_state *s, struct rc_dst_register dst, st
best_swizzle = sd;
best_matchcount = matchcount;
best_matchmask = matchmask;
- if (matchmask == (dst.WriteMask & RC_MASK_XYZ))
+ if (matchmask == (mask & RC_MASK_XYZ))
break;
}
}
- struct rc_instruction *inst = rc_insert_new_instruction(s->Compiler, s->IP->Prev);
- inst->I.Opcode = RC_OPCODE_MOV;
- inst->I.DstReg = dst;
- inst->I.DstReg.WriteMask &= (best_matchmask | RC_MASK_W);
- inst->I.SrcReg[0] = src;
- inst->I.SrcReg[0].Negate = (best_matchmask & src.Negate) ? RC_MASK_XYZW : RC_MASK_NONE;
- /* Note: We rely on NqSSA/DCE to set unused swizzle components to NIL */
+ if (mask & RC_MASK_W)
+ best_matchmask |= RC_MASK_W;
- dst.WriteMask &= ~inst->I.DstReg.WriteMask;
+ split->Phase[split->NumPhases++] = best_matchmask;
+ mask &= ~best_matchmask;
}
}
+struct rc_swizzle_caps r300_swizzle_caps = {
+ .IsNative = r300_swizzle_is_native,
+ .Split = r300_swizzle_split
+};
+
/**
* Translate an RGB (XYZ) swizzle into the hardware code for the given
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.h b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.h
index 728c2cd972f..118476af132 100644
--- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.h
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.h
@@ -28,12 +28,9 @@
#ifndef __R300_FRAGPROG_SWIZZLE_H_
#define __R300_FRAGPROG_SWIZZLE_H_
-#include "radeon_program.h"
+#include "radeon_swizzle.h"
-struct nqssadce_state;
-
-int r300FPIsNativeSwizzle(rc_opcode opcode, struct rc_src_register reg);
-void r300FPBuildSwizzle(struct nqssadce_state*, struct rc_dst_register dst, struct rc_src_register src);
+extern struct rc_swizzle_caps r300_swizzle_caps;
unsigned int r300FPTranslateRGBSwizzle(unsigned int src, unsigned int swizzle);
unsigned int r300FPTranslateAlphaSwizzle(unsigned int src, unsigned int swizzle);
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
index 0aa40c05876..bf9bea685ab 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
@@ -24,18 +24,18 @@
#include <stdio.h>
-#include "radeon_nqssadce.h"
#include "radeon_program_alu.h"
#include "r300_fragprog.h"
#include "r300_fragprog_swizzle.h"
#include "r500_fragprog.h"
-static void nqssadce_init(struct nqssadce_state* s)
+static void dataflow_outputs_mark_use(void * userdata, void * data,
+ void (*callback)(void *, unsigned int, unsigned int))
{
- struct r300_fragment_program_compiler * c = s->UserData;
- s->Outputs[c->OutputColor].Sourced = RC_MASK_XYZW;
- s->Outputs[c->OutputDepth].Sourced = RC_MASK_W;
+ struct r300_fragment_program_compiler * c = userdata;
+ callback(data, c->OutputColor, RC_MASK_XYZW);
+ callback(data, c->OutputDepth, RC_MASK_W);
}
static void rewrite_depth_out(struct r300_fragment_program_compiler * c)
@@ -92,6 +92,8 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
{ &radeonTransformTrigScale, 0 }
};
radeonLocalTransform(&c->Base, 4, transformations);
+
+ c->Base.SwizzleCaps = &r500_swizzle_caps;
} else {
struct radeon_program_transformation transformations[] = {
{ &r300_transform_TEX, c },
@@ -99,33 +101,23 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
{ &radeonTransformTrigSimple, 0 }
};
radeonLocalTransform(&c->Base, 3, transformations);
+
+ c->Base.SwizzleCaps = &r300_swizzle_caps;
}
if (c->Base.Debug) {
fprintf(stderr, "Fragment Program: After native rewrite:\n");
- rc_print_program(&c->Base.Program);
+ rc_print_program(&c->Base.Program, 0);
fflush(stderr);
}
- if (c->is_r500) {
- struct radeon_nqssadce_descr nqssadce = {
- .Init = &nqssadce_init,
- .IsNativeSwizzle = &r500FPIsNativeSwizzle,
- .BuildSwizzle = &r500FPBuildSwizzle
- };
- radeonNqssaDce(&c->Base, &nqssadce, c);
- } else {
- struct radeon_nqssadce_descr nqssadce = {
- .Init = &nqssadce_init,
- .IsNativeSwizzle = &r300FPIsNativeSwizzle,
- .BuildSwizzle = &r300FPBuildSwizzle
- };
- radeonNqssaDce(&c->Base, &nqssadce, c);
- }
+ rc_dataflow_annotate(&c->Base, &dataflow_outputs_mark_use, c);
+ rc_dataflow_dealias(&c->Base);
+ rc_dataflow_swizzles(&c->Base);
if (c->Base.Debug) {
- fprintf(stderr, "Compiler: after NqSSA-DCE:\n");
- rc_print_program(&c->Base.Program);
+ fprintf(stderr, "Compiler: after dataflow passes:\n");
+ rc_print_program(&c->Base.Program, 0);
fflush(stderr);
}
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
index 0efd2c91e6a..c64648ff3b5 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
@@ -26,9 +26,9 @@
#include "../r300_reg.h"
-#include "radeon_nqssadce.h"
-#include "radeon_program.h"
+#include "radeon_dataflow.h"
#include "radeon_program_alu.h"
+#include "radeon_swizzle.h"
/*
@@ -545,18 +545,19 @@ static void addArtificialOutputs(struct r300_vertex_program_compiler * compiler)
}
}
-static void nqssadceInit(struct nqssadce_state* s)
+static void dataflow_outputs_mark_used(void * userdata, void * data,
+ void (*callback)(void *, unsigned int, unsigned int))
{
- struct r300_vertex_program_compiler * compiler = s->UserData;
+ struct r300_vertex_program_compiler * c = userdata;
int i;
for(i = 0; i < 32; ++i) {
- if (compiler->RequiredOutputs & (1 << i))
- s->Outputs[i].Sourced = RC_MASK_XYZW;
+ if (c->RequiredOutputs & (1 << i))
+ callback(data, i, RC_MASK_XYZW);
}
}
-static int swizzleIsNative(rc_opcode opcode, struct rc_src_register reg)
+static int swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)
{
(void) opcode;
(void) reg;
@@ -565,9 +566,16 @@ static int swizzleIsNative(rc_opcode opcode, struct rc_src_register reg)
}
+static struct rc_swizzle_caps r300_vertprog_swizzle_caps = {
+ .IsNative = &swizzle_is_native,
+ .Split = 0 /* should never be called */
+};
+
void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
{
+ compiler->Base.SwizzleCaps = &r300_vertprog_swizzle_caps;
+
addArtificialOutputs(compiler);
{
@@ -579,7 +587,7 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
if (compiler->Base.Debug) {
fprintf(stderr, "Vertex program after native rewrite:\n");
- rc_print_program(&compiler->Base.Program);
+ rc_print_program(&compiler->Base.Program, 0);
fflush(stderr);
}
@@ -596,26 +604,22 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
if (compiler->Base.Debug) {
fprintf(stderr, "Vertex program after source conflict resolve:\n");
- rc_print_program(&compiler->Base.Program);
+ rc_print_program(&compiler->Base.Program, 0);
fflush(stderr);
}
- {
- struct radeon_nqssadce_descr nqssadce = {
- .Init = &nqssadceInit,
- .IsNativeSwizzle = &swizzleIsNative,
- .BuildSwizzle = NULL
- };
- radeonNqssaDce(&compiler->Base, &nqssadce, compiler);
+ rc_dataflow_annotate(&compiler->Base, &dataflow_outputs_mark_used, compiler);
+ rc_dataflow_dealias(&compiler->Base);
+ rc_dataflow_swizzles(&compiler->Base);
- /* We need this step for reusing temporary registers */
- allocate_temporary_registers(compiler);
+ /* This invalidates dataflow annotations and should be replaced
+ * by a future generic register allocation pass. */
+ allocate_temporary_registers(compiler);
- if (compiler->Base.Debug) {
- fprintf(stderr, "Vertex program after NQSSADCE:\n");
- rc_print_program(&compiler->Base.Program);
- fflush(stderr);
- }
+ if (compiler->Base.Debug) {
+ fprintf(stderr, "Vertex program after dataflow:\n");
+ rc_print_program(&compiler->Base.Program, 0);
+ fflush(stderr);
}
translate_vertex_program(compiler);
diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
index 3e994ebd1b5..971465e3591 100644
--- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
@@ -169,7 +169,7 @@ int r500_transform_TEX(
return 1;
}
-int r500FPIsNativeSwizzle(rc_opcode opcode, struct rc_src_register reg)
+static int r500_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)
{
unsigned int relevant;
int i;
@@ -227,36 +227,38 @@ int r500FPIsNativeSwizzle(rc_opcode opcode, struct rc_src_register reg)
}
/**
- * Implement a MOV with a potentially non-native swizzle.
+ * Split source register access.
*
* The only thing we *cannot* do in an ALU instruction is per-component
- * negation. Therefore, we split the MOV into two instructions when necessary.
+ * negation.
*/
-void r500FPBuildSwizzle(struct nqssadce_state *s, struct rc_dst_register dst, struct rc_src_register src)
+static void r500_swizzle_split(struct rc_src_register src, unsigned int usemask,
+ struct rc_swizzle_split * split)
{
unsigned int negatebase[2] = { 0, 0 };
int i;
for(i = 0; i < 4; ++i) {
unsigned int swz = GET_SWZ(src.Swizzle, i);
- if (swz == RC_SWIZZLE_UNUSED)
+ if (swz == RC_SWIZZLE_UNUSED || !GET_BIT(usemask, i))
continue;
negatebase[GET_BIT(src.Negate, i)] |= 1 << i;
}
+ split->NumPhases = 0;
+
for(i = 0; i <= 1; ++i) {
if (!negatebase[i])
continue;
- struct rc_instruction *inst = rc_insert_new_instruction(s->Compiler, s->IP->Prev);
- inst->I.Opcode = RC_OPCODE_MOV;
- inst->I.DstReg = dst;
- inst->I.DstReg.WriteMask = negatebase[i];
- inst->I.SrcReg[0] = src;
- inst->I.SrcReg[0].Negate = (i == 0) ? RC_MASK_NONE : RC_MASK_XYZW;
+ split->Phase[split->NumPhases++] = negatebase[i];
}
}
+struct rc_swizzle_caps r500_swizzle_caps = {
+ .IsNative = r500_swizzle_is_native,
+ .Split = r500_swizzle_split
+};
static char *toswiz(int swiz_val) {
switch(swiz_val) {
diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h
index 887d4abbd25..92ac75d5fd4 100644
--- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h
@@ -34,15 +34,13 @@
#define __R500_FRAGPROG_H_
#include "radeon_compiler.h"
-#include "radeon_nqssadce.h"
+#include "radeon_swizzle.h"
extern void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler);
extern void r500FragmentProgramDump(struct rX00_fragment_program_code *c);
-extern int r500FPIsNativeSwizzle(rc_opcode opcode, struct rc_src_register reg);
-
-extern void r500FPBuildSwizzle(struct nqssadce_state *s, struct rc_dst_register dst, struct rc_src_register src);
+extern struct rc_swizzle_caps r500_swizzle_caps;
extern int r500_transform_TEX(
struct radeon_compiler * c,
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c
index babdcffd3ab..d0b78ec1c88 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c
@@ -93,6 +93,41 @@ void rc_error(struct radeon_compiler * c, const char * fmt, ...)
}
}
+int rc_if_fail_helper(struct radeon_compiler * c, const char * file, int line, const char * assertion)
+{
+ rc_error(c, "ICE at %s:%i: assertion failed: %s\n", file, line, assertion);
+ return 1;
+}
+
+/**
+ * Recompute c->Program.InputsRead and c->Program.OutputsWritten
+ * based on which inputs and outputs are actually referenced
+ * in program instructions.
+ */
+void rc_calculate_inputs_outputs(struct radeon_compiler * c)
+{
+ struct rc_instruction *inst;
+
+ c->Program.InputsRead = 0;
+ c->Program.OutputsWritten = 0;
+
+ for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next)
+ {
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->I.Opcode);
+ int i;
+
+ for (i = 0; i < opcode->NumSrcRegs; ++i) {
+ if (inst->I.SrcReg[i].File == RC_FILE_INPUT)
+ c->Program.InputsRead |= 1 << inst->I.SrcReg[i].Index;
+ }
+
+ if (opcode->HasDstReg) {
+ if (inst->I.DstReg.File == RC_FILE_OUTPUT)
+ c->Program.OutputsWritten |= 1 << inst->I.DstReg.Index;
+ }
+ }
+}
+
/**
* Rewrite the program such that everything that source the given input
* register will source new_input instead.
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
index 018f9bba06a..87a732cd90d 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
@@ -27,6 +27,7 @@
#include "radeon_code.h"
#include "radeon_program.h"
+struct rc_swizzle_caps;
struct radeon_compiler {
struct memory_pool Pool;
@@ -34,6 +35,14 @@ struct radeon_compiler {
unsigned Debug:1;
unsigned Error:1;
char * ErrorMsg;
+
+ /**
+ * Variables used internally, not be touched by callers
+ * of the compiler
+ */
+ /*@{*/
+ struct rc_swizzle_caps * SwizzleCaps;
+ /*@}*/
};
void rc_init(struct radeon_compiler * c);
@@ -42,6 +51,23 @@ void rc_destroy(struct radeon_compiler * c);
void rc_debug(struct radeon_compiler * c, const char * fmt, ...);
void rc_error(struct radeon_compiler * c, const char * fmt, ...);
+int rc_if_fail_helper(struct radeon_compiler * c, const char * file, int line, const char * assertion);
+
+/**
+ * This macro acts like an if-statement that can be used to implement
+ * non-aborting assertions in the compiler.
+ *
+ * It checks whether \p cond is true. If not, an internal compiler error is
+ * flagged and the if-clause is run.
+ *
+ * A typical use-case would be:
+ *
+ * if (rc_assert(c, condition-that-must-be-true))
+ * return;
+ */
+#define rc_assert(c, cond) \
+ (!(cond) && rc_if_fail_helper(c, __FILE__, __LINE__, #cond))
+
void rc_calculate_inputs_outputs(struct radeon_compiler * c);
void rc_move_input(struct radeon_compiler * c, unsigned input, struct rc_src_register new_input);
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c
new file mode 100644
index 00000000000..af6777a7bd3
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c
@@ -0,0 +1,106 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_dataflow.h"
+
+#include "radeon_compiler.h"
+
+
+static void add_ref_to_vector(struct rc_dataflow_ref * ref, struct rc_dataflow_vector * vector)
+{
+ ref->Vector = vector;
+ ref->Prev = &vector->Refs;
+ ref->Next = vector->Refs.Next;
+ ref->Prev->Next = ref;
+ ref->Next->Prev = ref;
+}
+
+struct rc_dataflow_ref * rc_dataflow_create_ref(struct radeon_compiler * c,
+ struct rc_dataflow_vector * vector, struct rc_instruction * inst)
+{
+ struct rc_dataflow_ref * ref = memory_pool_malloc(&c->Pool, sizeof(struct rc_dataflow_ref));
+ ref->ReadInstruction = inst;
+ ref->UseMask = 0;
+
+ add_ref_to_vector(ref, vector);
+
+ return ref;
+}
+
+struct rc_dataflow_vector * rc_dataflow_create_vector(struct radeon_compiler * c,
+ rc_register_file file, unsigned int index, struct rc_instruction * inst)
+{
+ struct rc_dataflow_vector * vec = memory_pool_malloc(&c->Pool, sizeof(struct rc_dataflow_vector));
+
+ memset(vec, 0, sizeof(struct rc_dataflow_vector));
+ vec->File = file;
+ vec->Index = index;
+ vec->WriteInstruction = inst;
+
+ vec->Refs.Next = vec->Refs.Prev = &vec->Refs;
+
+ return vec;
+}
+
+void rc_dataflow_remove_ref(struct rc_dataflow_ref * ref)
+{
+ ref->Prev->Next = ref->Next;
+ ref->Next->Prev = ref->Prev;
+}
+
+void rc_dataflow_remove_instruction(struct rc_instruction * inst)
+{
+ for(unsigned int i = 0; i < 3; ++i) {
+ if (inst->Dataflow.SrcReg[i]) {
+ rc_dataflow_remove_ref(inst->Dataflow.SrcReg[i]);
+ inst->Dataflow.SrcReg[i] = 0;
+ }
+ if (inst->Dataflow.SrcRegAddress[i]) {
+ rc_dataflow_remove_ref(inst->Dataflow.SrcRegAddress[i]);
+ inst->Dataflow.SrcRegAddress[i] = 0;
+ }
+ }
+
+ if (inst->Dataflow.DstReg) {
+ while(inst->Dataflow.DstReg->Refs.Next != &inst->Dataflow.DstReg->Refs) {
+ struct rc_dataflow_ref * ref = inst->Dataflow.DstReg->Refs.Next;
+ rc_dataflow_remove_ref(ref);
+ if (inst->Dataflow.DstRegPrev)
+ add_ref_to_vector(ref, inst->Dataflow.DstRegPrev->Vector);
+ }
+
+ inst->Dataflow.DstReg->WriteInstruction = 0;
+ inst->Dataflow.DstReg = 0;
+ }
+
+ if (inst->Dataflow.DstRegPrev) {
+ rc_dataflow_remove_ref(inst->Dataflow.DstRegPrev);
+ inst->Dataflow.DstRegPrev = 0;
+ }
+
+ inst->Dataflow.DstRegAliased = 0;
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h
new file mode 100644
index 00000000000..c9856affe88
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h
@@ -0,0 +1,113 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef RADEON_DATAFLOW_H
+#define RADEON_DATAFLOW_H
+
+#include "radeon_program_constants.h"
+
+struct radeon_compiler;
+struct rc_instruction;
+struct rc_swizzle_caps;
+
+struct rc_dataflow_vector;
+
+struct rc_dataflow_ref {
+ struct rc_dataflow_vector * Vector;
+
+ /**
+ * Linked list of references to the above-mentioned vector.
+ * The linked list is \em not sorted.
+ */
+ /*@{*/
+ struct rc_dataflow_ref * Prev;
+ struct rc_dataflow_ref * Next;
+ /*@}*/
+
+ unsigned int UseMask:4;
+ struct rc_instruction * ReadInstruction;
+};
+
+struct rc_dataflow_vector {
+ rc_register_file File:3;
+ unsigned int Index:RC_REGISTER_INDEX_BITS;
+
+ /** For private use in compiler passes. MUST BE RESET TO 0 by the end of each pass.
+ * The annotate pass uses this bit to track whether a vector is in the
+ * update stack.
+ */
+ unsigned int PassBit:1;
+ /** Which of the components have been written with useful values */
+ unsigned int ValidMask:4;
+ /** Which of the components are used downstream */
+ unsigned int UseMask:4;
+ /** The instruction that produced this vector */
+ struct rc_instruction * WriteInstruction;
+
+ /** Linked list of references to this vector */
+ struct rc_dataflow_ref Refs;
+};
+
+struct rc_instruction_dataflow {
+ struct rc_dataflow_ref * SrcReg[3];
+ struct rc_dataflow_ref * SrcRegAddress[3];
+
+ /** Reference the components of the destination register
+ * that are carried over without being overwritten */
+ struct rc_dataflow_ref * DstRegPrev;
+ /** Indicates whether the destination register was in use
+ * before this instruction */
+ unsigned int DstRegAliased:1;
+ struct rc_dataflow_vector * DstReg;
+};
+
+/**
+ * General functions for manipulating the dataflow structures.
+ */
+/*@{*/
+struct rc_dataflow_ref * rc_dataflow_create_ref(struct radeon_compiler * c,
+ struct rc_dataflow_vector * vector, struct rc_instruction * inst);
+struct rc_dataflow_vector * rc_dataflow_create_vector(struct radeon_compiler * c,
+ rc_register_file file, unsigned int index, struct rc_instruction * inst);
+void rc_dataflow_remove_ref(struct rc_dataflow_ref * ref);
+
+void rc_dataflow_remove_instruction(struct rc_instruction * inst);
+/*@}*/
+
+
+/**
+ * Compiler passes based on dataflow structures.
+ */
+/*@{*/
+typedef void (*rc_dataflow_mark_outputs_fn)(void * userdata, void * data,
+ void (*mark_fn)(void * data, unsigned int index, unsigned int mask));
+void rc_dataflow_annotate(struct radeon_compiler * c, rc_dataflow_mark_outputs_fn dce, void * userdata);
+void rc_dataflow_dealias(struct radeon_compiler * c);
+void rc_dataflow_swizzles(struct radeon_compiler * c);
+/*@}*/
+
+#endif /* RADEON_DATAFLOW_H */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_annotate.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_annotate.c
new file mode 100644
index 00000000000..41d175a22f2
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_annotate.c
@@ -0,0 +1,365 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_dataflow.h"
+
+#include "radeon_compiler.h"
+
+
+struct dataflow_state {
+ struct radeon_compiler * C;
+ unsigned int DCE:1;
+ unsigned int UpdateRunning:1;
+
+ struct rc_dataflow_vector * Input[RC_REGISTER_MAX_INDEX];
+ struct rc_dataflow_vector * Output[RC_REGISTER_MAX_INDEX];
+ struct rc_dataflow_vector * Temporary[RC_REGISTER_MAX_INDEX];
+ struct rc_dataflow_vector * Address;
+
+ struct rc_dataflow_vector ** UpdateStack;
+ unsigned int UpdateStackSize;
+ unsigned int UpdateStackReserved;
+};
+
+static void mark_vector_use(struct dataflow_state * s, struct rc_dataflow_vector * vector, unsigned int mask);
+
+static struct rc_dataflow_vector * get_register_contents(struct dataflow_state * s,
+ rc_register_file file, unsigned int index)
+{
+ if (file == RC_FILE_INPUT || file == RC_FILE_OUTPUT || file == RC_FILE_TEMPORARY) {
+ if (index >= RC_REGISTER_MAX_INDEX)
+ return 0; /* cannot happen, but be defensive */
+
+ if (file == RC_FILE_TEMPORARY)
+ return s->Temporary[index];
+ if (file == RC_FILE_INPUT)
+ return s->Input[index];
+ if (file == RC_FILE_OUTPUT)
+ return s->Output[index];
+ }
+
+ if (file == RC_FILE_ADDRESS)
+ return s->Address;
+
+ return 0; /* can happen, constant register file */
+}
+
+static void mark_ref_use(struct dataflow_state * s, struct rc_dataflow_ref * ref, unsigned int mask)
+{
+ if (!(mask & ~ref->UseMask))
+ return;
+
+ ref->UseMask |= mask;
+ mark_vector_use(s, ref->Vector, ref->UseMask);
+}
+
+static void mark_source_use(struct dataflow_state * s, struct rc_instruction * inst,
+ unsigned int src, unsigned int srcmask)
+{
+ unsigned int refmask = 0;
+
+ for(unsigned int i = 0; i < 4; ++i) {
+ if (GET_BIT(srcmask, i))
+ refmask |= 1 << GET_SWZ(inst->I.SrcReg[src].Swizzle, i);
+ }
+
+ /* get rid of spurious bits from ZERO, ONE, etc. swizzles */
+ refmask &= RC_MASK_XYZW;
+
+ if (!refmask)
+ return; /* can happen if the swizzle contains constant components */
+
+ if (inst->Dataflow.SrcReg[src])
+ mark_ref_use(s, inst->Dataflow.SrcReg[src], refmask);
+
+ if (inst->Dataflow.SrcRegAddress[src])
+ mark_ref_use(s, inst->Dataflow.SrcRegAddress[src], RC_MASK_X);
+}
+
+static void compute_sources_for_writemask(
+ struct rc_instruction * inst,
+ unsigned int writemask,
+ unsigned int *srcmasks)
+{
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->I.Opcode);
+
+ srcmasks[0] = 0;
+ srcmasks[1] = 0;
+ srcmasks[2] = 0;
+
+ if (inst->I.Opcode == RC_OPCODE_KIL)
+ srcmasks[0] |= RC_MASK_XYZW;
+
+ if (!writemask)
+ return;
+
+ if (opcode->IsComponentwise) {
+ for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src)
+ srcmasks[src] |= writemask;
+ } else if (opcode->IsStandardScalar) {
+ for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src)
+ srcmasks[src] |= RC_MASK_X;
+ } else {
+ switch(inst->I.Opcode) {
+ case RC_OPCODE_ARL:
+ srcmasks[0] |= RC_MASK_X;
+ break;
+ case RC_OPCODE_DP3:
+ srcmasks[0] |= RC_MASK_XYZ;
+ srcmasks[1] |= RC_MASK_XYZ;
+ break;
+ case RC_OPCODE_DP4:
+ srcmasks[0] |= RC_MASK_XYZW;
+ srcmasks[1] |= RC_MASK_XYZW;
+ break;
+ case RC_OPCODE_TEX:
+ case RC_OPCODE_TXB:
+ case RC_OPCODE_TXP:
+ srcmasks[0] |= RC_MASK_XYZW;
+ break;
+ case RC_OPCODE_DST:
+ srcmasks[0] |= 0x6;
+ srcmasks[1] |= 0xa;
+ break;
+ case RC_OPCODE_EXP:
+ case RC_OPCODE_LOG:
+ srcmasks[0] |= RC_MASK_XY;
+ break;
+ case RC_OPCODE_LIT:
+ srcmasks[0] |= 0xb;
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+static void mark_instruction_source_use(struct dataflow_state * s,
+ struct rc_instruction * inst, unsigned int writemask)
+{
+ unsigned int srcmasks[3];
+
+ compute_sources_for_writemask(inst, writemask, srcmasks);
+
+ for(unsigned int src = 0; src < 3; ++src)
+ mark_source_use(s, inst, src, srcmasks[src]);
+}
+
+static void run_update(struct dataflow_state * s)
+{
+ s->UpdateRunning = 1;
+
+ while(s->UpdateStackSize) {
+ struct rc_dataflow_vector * vector = s->UpdateStack[--s->UpdateStackSize];
+ vector->PassBit = 0;
+
+ if (vector->WriteInstruction) {
+ struct rc_instruction * inst = vector->WriteInstruction;
+
+ if (inst->Dataflow.DstRegPrev) {
+ unsigned int carryover = vector->UseMask & ~inst->I.DstReg.WriteMask;
+
+ if (carryover)
+ mark_ref_use(s, inst->Dataflow.DstRegPrev, carryover);
+ }
+
+ mark_instruction_source_use(
+ s, vector->WriteInstruction,
+ vector->UseMask & inst->I.DstReg.WriteMask);
+ }
+ }
+
+ s->UpdateRunning = 0;
+}
+
+static void mark_vector_use(struct dataflow_state * s, struct rc_dataflow_vector * vector, unsigned int mask)
+{
+ if (!(mask & ~vector->UseMask))
+ return; /* no new used bits */
+
+ vector->UseMask |= mask;
+ if (vector->PassBit)
+ return;
+
+ if (s->UpdateStackSize >= s->UpdateStackReserved) {
+ unsigned int new_reserve = 2 * s->UpdateStackReserved;
+ struct rc_dataflow_vector ** new_stack;
+
+ if (!new_reserve)
+ new_reserve = 16;
+
+ new_stack = memory_pool_malloc(&s->C->Pool, new_reserve * sizeof(struct rc_dataflow_vector *));
+ memcpy(new_stack, s->UpdateStack, s->UpdateStackSize * sizeof(struct rc_dataflow_vector *));
+
+ s->UpdateStack = new_stack;
+ s->UpdateStackReserved = new_reserve;
+ }
+
+ s->UpdateStack[s->UpdateStackSize++] = vector;
+ vector->PassBit = 1;
+
+ if (!s->UpdateRunning)
+ run_update(s);
+}
+
+static void annotate_instruction(struct dataflow_state * s, struct rc_instruction * inst)
+{
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->I.Opcode);
+ unsigned int src;
+
+ for(src = 0; src < opcode->NumSrcRegs; ++src) {
+ struct rc_dataflow_vector * vector = get_register_contents(s, inst->I.SrcReg[src].File, inst->I.SrcReg[src].Index);
+ if (vector) {
+ inst->Dataflow.SrcReg[src] = rc_dataflow_create_ref(s->C, vector, inst);
+ }
+ if (inst->I.SrcReg[src].RelAddr) {
+ struct rc_dataflow_vector * addr = get_register_contents(s, RC_FILE_ADDRESS, 0);
+ if (addr)
+ inst->Dataflow.SrcRegAddress[src] = rc_dataflow_create_ref(s->C, addr, inst);
+ }
+ }
+
+ mark_instruction_source_use(s, inst, 0); /* for KIL */
+
+ if (opcode->HasDstReg) {
+ struct rc_dataflow_vector * oldvec = get_register_contents(s, inst->I.DstReg.File, inst->I.DstReg.Index);
+ struct rc_dataflow_vector * newvec = rc_dataflow_create_vector(s->C, inst->I.DstReg.File, inst->I.DstReg.Index, inst);
+
+ newvec->ValidMask = inst->I.DstReg.WriteMask;
+
+ if (oldvec) {
+ unsigned int carryover = oldvec->ValidMask & ~inst->I.DstReg.WriteMask;
+
+ if (oldvec->ValidMask)
+ inst->Dataflow.DstRegAliased = 1;
+
+ if (carryover) {
+ inst->Dataflow.DstRegPrev = rc_dataflow_create_ref(s->C, oldvec, inst);
+ newvec->ValidMask |= carryover;
+
+ if (!s->DCE)
+ mark_ref_use(s, inst->Dataflow.DstRegPrev, carryover);
+ }
+ }
+
+ inst->Dataflow.DstReg = newvec;
+
+ if (newvec->File == RC_FILE_TEMPORARY)
+ s->Temporary[newvec->Index] = newvec;
+ else if (newvec->File == RC_FILE_OUTPUT)
+ s->Output[newvec->Index] = newvec;
+ else
+ s->Address = newvec;
+
+ if (!s->DCE)
+ mark_vector_use(s, newvec, inst->I.DstReg.WriteMask);
+ }
+}
+
+static void init_inputs(struct dataflow_state * s)
+{
+ unsigned int index;
+
+ for(index = 0; index < 32; ++index) {
+ if (s->C->Program.InputsRead & (1 << index)) {
+ s->Input[index] = rc_dataflow_create_vector(s->C, RC_FILE_INPUT, index, 0);
+ s->Input[index]->ValidMask = RC_MASK_XYZW;
+ }
+ }
+}
+
+static void mark_output_use(void * data, unsigned int index, unsigned int mask)
+{
+ struct dataflow_state * s = data;
+ struct rc_dataflow_vector * vec = s->Output[index];
+
+ if (vec)
+ mark_vector_use(s, vec, mask);
+}
+
+void rc_dataflow_annotate(struct radeon_compiler * c, rc_dataflow_mark_outputs_fn dce, void * userdata)
+{
+ struct dataflow_state s;
+ struct rc_instruction * inst;
+
+ memset(&s, 0, sizeof(s));
+ s.C = c;
+ s.DCE = dce ? 1 : 0;
+
+ init_inputs(&s);
+
+ for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
+ annotate_instruction(&s, inst);
+ }
+
+ if (s.DCE) {
+ dce(userdata, &s, &mark_output_use);
+
+ for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->I.Opcode);
+
+ if (opcode->HasDstReg) {
+ unsigned int redundant_writes = inst->I.DstReg.WriteMask & ~inst->Dataflow.DstReg->UseMask;
+
+ inst->I.DstReg.WriteMask &= ~redundant_writes;
+
+ if (!inst->I.DstReg.WriteMask) {
+ struct rc_instruction * todelete = inst;
+ inst = inst->Prev;
+ rc_remove_instruction(todelete);
+ continue;
+ }
+ }
+
+ unsigned int srcmasks[3];
+ compute_sources_for_writemask(inst, inst->I.DstReg.WriteMask, srcmasks);
+
+ for(unsigned int src = 0; src < 3; ++src) {
+ for(unsigned int chan = 0; chan < 4; ++chan) {
+ if (!GET_BIT(srcmasks[src], chan))
+ SET_SWZ(inst->I.SrcReg[src].Swizzle, chan, RC_SWIZZLE_UNUSED);
+ }
+
+ if (inst->Dataflow.SrcReg[src]) {
+ if (!inst->Dataflow.SrcReg[src]->UseMask) {
+ rc_dataflow_remove_ref(inst->Dataflow.SrcReg[src]);
+ inst->Dataflow.SrcReg[src] = 0;
+ }
+ }
+
+ if (inst->Dataflow.SrcRegAddress[src]) {
+ if (!inst->Dataflow.SrcRegAddress[src]->UseMask) {
+ rc_dataflow_remove_ref(inst->Dataflow.SrcRegAddress[src]);
+ inst->Dataflow.SrcRegAddress[src] = 0;
+ }
+ }
+ }
+ }
+
+ rc_calculate_inputs_outputs(c);
+ }
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_dealias.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_dealias.c
new file mode 100644
index 00000000000..4596636970a
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_dealias.c
@@ -0,0 +1,150 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_dataflow.h"
+
+#include "radeon_compiler.h"
+
+
+#define DEALIAS_LIST_SIZE 128
+
+struct dealias_state {
+ struct radeon_compiler * C;
+
+ unsigned int OldIndex:RC_REGISTER_INDEX_BITS;
+ unsigned int NewIndex:RC_REGISTER_INDEX_BITS;
+ unsigned int DealiasFail:1;
+
+ struct rc_dataflow_vector * List[DEALIAS_LIST_SIZE];
+ unsigned int Length;
+};
+
+static void push_dealias_vector(struct dealias_state * s, struct rc_dataflow_vector * vec)
+{
+ if (s->Length >= DEALIAS_LIST_SIZE) {
+ rc_debug(s->C, "%s: list size exceeded\n", __FUNCTION__);
+ s->DealiasFail = 1;
+ return;
+ }
+
+ if (rc_assert(s->C, vec->File == RC_FILE_TEMPORARY && vec->Index == s->OldIndex))
+ return;
+
+ s->List[s->Length++] = vec;
+}
+
+static void run_dealias(struct dealias_state * s)
+{
+ unsigned int i;
+
+ for(i = 0; i < s->Length && !s->DealiasFail; ++i) {
+ struct rc_dataflow_vector * vec = s->List[i];
+ struct rc_dataflow_ref * ref;
+
+ for(ref = vec->Refs.Next; ref != &vec->Refs; ref = ref->Next) {
+ if (ref->ReadInstruction->Dataflow.DstRegPrev == ref)
+ push_dealias_vector(s, ref->ReadInstruction->Dataflow.DstReg);
+ }
+ }
+
+ if (s->DealiasFail)
+ return;
+
+ for(i = 0; i < s->Length; ++i) {
+ struct rc_dataflow_vector * vec = s->List[i];
+ struct rc_dataflow_ref * ref;
+
+ vec->Index = s->NewIndex;
+ vec->WriteInstruction->I.DstReg.Index = s->NewIndex;
+
+ for(ref = vec->Refs.Next; ref != &vec->Refs; ref = ref->Next) {
+ struct rc_instruction * inst = ref->ReadInstruction;
+ unsigned int i;
+
+ for(i = 0; i < 3; ++i) {
+ if (inst->Dataflow.SrcReg[i] == ref) {
+ if (rc_assert(s->C, inst->I.SrcReg[i].File == RC_FILE_TEMPORARY &&
+ inst->I.SrcReg[i].Index == s->OldIndex))
+ return;
+
+ inst->I.SrcReg[i].Index = s->NewIndex;
+ }
+ }
+ }
+ }
+}
+
+/**
+ * Breaks register aliasing to reduce multiple assignments to a single register.
+ *
+ * This affects sequences like:
+ * MUL r0, ...;
+ * MAD r0, r1, r2, r0;
+ * In this example, a new register will be used for the destination of the
+ * second MAD.
+ *
+ * The purpose of this dealiasing is to make the resulting code more SSA-like
+ * and therefore make it easier to move instructions around.
+ * This is of crucial importance for R300 fragment programs, where de-aliasing
+ * can help to reduce texture indirections, but other targets can benefit from
+ * it as well.
+ *
+ * \note When compiling GLSL, there may be some benefit gained from breaking
+ * up vectors whose components are unrelated. This is not done yet and should
+ * be investigated at some point (of course, a matching pass to re-merge
+ * components would be required).
+ */
+void rc_dataflow_dealias(struct radeon_compiler * c)
+{
+ struct dealias_state s;
+
+ memset(&s, 0, sizeof(s));
+ s.C = c;
+
+ struct rc_instruction * inst;
+ for(inst = c->Program.Instructions.Prev; inst != &c->Program.Instructions; inst = inst->Prev) {
+ if (!inst->Dataflow.DstRegAliased || inst->Dataflow.DstReg->File != RC_FILE_TEMPORARY)
+ continue;
+
+ if (inst->Dataflow.DstReg->UseMask & ~inst->I.DstReg.WriteMask)
+ continue;
+
+ s.OldIndex = inst->I.DstReg.Index;
+ s.NewIndex = rc_find_free_temporary(c);
+ s.DealiasFail = 0;
+ s.Length = 0;
+
+ inst->Dataflow.DstRegAliased = 0;
+ if (inst->Dataflow.DstRegPrev) {
+ rc_dataflow_remove_ref(inst->Dataflow.DstRegPrev);
+ inst->Dataflow.DstRegPrev = 0;
+ }
+
+ push_dealias_vector(&s, inst->Dataflow.DstReg);
+ run_dealias(&s);
+ }
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_swizzles.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_swizzles.c
new file mode 100644
index 00000000000..1aa91eff7c1
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_swizzles.c
@@ -0,0 +1,126 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_dataflow.h"
+
+#include "radeon_compiler.h"
+#include "radeon_swizzle.h"
+
+
+static void rewrite_source(struct radeon_compiler * c,
+ struct rc_instruction * inst, unsigned src)
+{
+ struct rc_swizzle_split split;
+ unsigned int tempreg = rc_find_free_temporary(c);
+ unsigned int usemask;
+ struct rc_dataflow_ref * oldref = inst->Dataflow.SrcReg[src];
+ struct rc_dataflow_vector * vector = 0;
+
+ usemask = 0;
+ for(unsigned int chan = 0; chan < 4; ++chan) {
+ if (GET_SWZ(inst->I.SrcReg[src].Swizzle, chan) != RC_SWIZZLE_UNUSED)
+ usemask |= 1 << chan;
+ }
+
+ c->SwizzleCaps->Split(inst->I.SrcReg[src], usemask, &split);
+
+ for(unsigned int phase = 0; phase < split.NumPhases; ++phase) {
+ struct rc_instruction * mov = rc_insert_new_instruction(c, inst->Prev);
+ unsigned int phase_refmask;
+ unsigned int masked_negate;
+
+ mov->I.Opcode = RC_OPCODE_MOV;
+ mov->I.DstReg.File = RC_FILE_TEMPORARY;
+ mov->I.DstReg.Index = tempreg;
+ mov->I.DstReg.WriteMask = split.Phase[phase];
+ mov->I.SrcReg[0] = inst->I.SrcReg[src];
+
+ phase_refmask = 0;
+ for(unsigned int chan = 0; chan < 4; ++chan) {
+ if (!GET_BIT(split.Phase[phase], chan))
+ SET_SWZ(mov->I.SrcReg[0].Swizzle, chan, RC_SWIZZLE_UNUSED);
+ else
+ phase_refmask |= 1 << GET_SWZ(mov->I.SrcReg[0].Swizzle, chan);
+ }
+
+ phase_refmask &= RC_MASK_XYZW;
+
+ masked_negate = split.Phase[phase] & mov->I.SrcReg[0].Negate;
+ if (masked_negate == 0)
+ mov->I.SrcReg[0].Negate = 0;
+ else if (masked_negate == split.Phase[phase])
+ mov->I.SrcReg[0].Negate = RC_MASK_XYZW;
+
+ if (oldref) {
+ mov->Dataflow.SrcReg[0] = rc_dataflow_create_ref(c, oldref->Vector, mov);
+ mov->Dataflow.SrcReg[0]->UseMask = phase_refmask;
+ }
+
+ mov->Dataflow.DstReg = rc_dataflow_create_vector(c, RC_FILE_TEMPORARY, tempreg, mov);
+ mov->Dataflow.DstReg->ValidMask = split.Phase[phase];
+
+ if (vector) {
+ mov->Dataflow.DstRegPrev = rc_dataflow_create_ref(c, vector, mov);
+ mov->Dataflow.DstRegPrev->UseMask = vector->ValidMask;
+ mov->Dataflow.DstReg->ValidMask |= vector->ValidMask;
+ mov->Dataflow.DstRegAliased = 1;
+ }
+
+ mov->Dataflow.DstReg->UseMask = mov->Dataflow.DstReg->ValidMask;
+ vector = mov->Dataflow.DstReg;
+ }
+
+ if (oldref)
+ rc_dataflow_remove_ref(oldref);
+ inst->Dataflow.SrcReg[src] = rc_dataflow_create_ref(c, vector, inst);
+ inst->Dataflow.SrcReg[src]->UseMask = usemask;
+
+ inst->I.SrcReg[src].File = RC_FILE_TEMPORARY;
+ inst->I.SrcReg[src].Index = tempreg;
+ inst->I.SrcReg[src].Swizzle = 0;
+ inst->I.SrcReg[src].Negate = RC_MASK_NONE;
+ inst->I.SrcReg[src].Abs = 0;
+ for(unsigned int chan = 0; chan < 4; ++chan) {
+ SET_SWZ(inst->I.SrcReg[src].Swizzle, chan,
+ GET_BIT(usemask, chan) ? chan : RC_SWIZZLE_UNUSED);
+ }
+}
+
+void rc_dataflow_swizzles(struct radeon_compiler * c)
+{
+ struct rc_instruction * inst;
+
+ for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->I.Opcode);
+ unsigned int src;
+
+ for(src = 0; src < opcode->NumSrcRegs; ++src) {
+ if (!c->SwizzleCaps->IsNative(inst->I.Opcode, inst->I.SrcReg[src]))
+ rewrite_source(c, inst, src);
+ }
+ }
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.c b/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.c
deleted file mode 100644
index 3e02ebee81f..00000000000
--- a/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.c
+++ /dev/null
@@ -1,267 +0,0 @@
-/*
- * Copyright (C) 2008 Nicolai Haehnle.
- *
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial
- * portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-/**
- * @file
- *
- * "Not-quite SSA" and Dead-Code Elimination.
- */
-
-#include "radeon_nqssadce.h"
-
-#include "radeon_compiler.h"
-
-
-/**
- * Return the @ref register_state for the given register (or 0 for untracked
- * registers, i.e. constants).
- */
-static struct register_state *get_reg_state(struct nqssadce_state* s, rc_register_file file, unsigned int index)
-{
- if (index >= RC_REGISTER_MAX_INDEX)
- return 0;
-
- switch(file) {
- case RC_FILE_TEMPORARY: return &s->Temps[index];
- case RC_FILE_OUTPUT: return &s->Outputs[index];
- case RC_FILE_ADDRESS: return &s->Address;
- default: return 0;
- }
-}
-
-
-static void track_used_srcreg(struct nqssadce_state* s,
- int src, unsigned int sourced)
-{
- struct rc_sub_instruction * inst = &s->IP->I;
- int i;
- unsigned int deswz_source = 0;
-
- for(i = 0; i < 4; ++i) {
- if (GET_BIT(sourced, i)) {
- unsigned int swz = GET_SWZ(inst->SrcReg[src].Swizzle, i);
- deswz_source |= 1 << swz;
- } else {
- inst->SrcReg[src].Swizzle &= ~(7 << (3*i));
- inst->SrcReg[src].Swizzle |= RC_SWIZZLE_UNUSED << (3*i);
- }
- }
-
- if (!s->Descr->IsNativeSwizzle(inst->Opcode, inst->SrcReg[src])) {
- struct rc_dst_register dstreg = inst->DstReg;
- dstreg.File = RC_FILE_TEMPORARY;
- dstreg.Index = rc_find_free_temporary(s->Compiler);
- dstreg.WriteMask = sourced;
-
- s->Descr->BuildSwizzle(s, dstreg, inst->SrcReg[src]);
-
- inst->SrcReg[src].File = RC_FILE_TEMPORARY;
- inst->SrcReg[src].Index = dstreg.Index;
- inst->SrcReg[src].Swizzle = 0;
- inst->SrcReg[src].Negate = RC_MASK_NONE;
- inst->SrcReg[src].Abs = 0;
- for(i = 0; i < 4; ++i) {
- if (GET_BIT(sourced, i))
- inst->SrcReg[src].Swizzle |= i << (3*i);
- else
- inst->SrcReg[src].Swizzle |= RC_SWIZZLE_UNUSED << (3*i);
- }
- deswz_source = sourced;
- }
-
- struct register_state *regstate;
-
- if (inst->SrcReg[src].RelAddr) {
- regstate = get_reg_state(s, RC_FILE_ADDRESS, 0);
- if (regstate)
- regstate->Sourced |= RC_MASK_X;
- } else {
- regstate = get_reg_state(s, inst->SrcReg[src].File, inst->SrcReg[src].Index);
- if (regstate)
- regstate->Sourced |= deswz_source & 0xf;
- }
-}
-
-static void unalias_srcregs(struct rc_instruction *inst, unsigned int oldindex, unsigned int newindex)
-{
- const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->I.Opcode);
- int i;
- for(i = 0; i < opcode->NumSrcRegs; ++i)
- if (inst->I.SrcReg[i].File == RC_FILE_TEMPORARY && inst->I.SrcReg[i].Index == oldindex)
- inst->I.SrcReg[i].Index = newindex;
-}
-
-static void unalias_temporary(struct nqssadce_state* s, unsigned int oldindex)
-{
- unsigned int newindex = rc_find_free_temporary(s->Compiler);
- struct rc_instruction * inst;
- for(inst = s->Compiler->Program.Instructions.Next; inst != s->IP; inst = inst->Next) {
- if (inst->I.DstReg.File == RC_FILE_TEMPORARY && inst->I.DstReg.Index == oldindex)
- inst->I.DstReg.Index = newindex;
- unalias_srcregs(inst, oldindex, newindex);
- }
- unalias_srcregs(s->IP, oldindex, newindex);
-}
-
-
-/**
- * Handle one instruction.
- */
-static void process_instruction(struct nqssadce_state* s)
-{
- struct rc_sub_instruction *inst = &s->IP->I;
- unsigned int WriteMask;
-
- if (inst->Opcode != RC_OPCODE_KIL) {
- struct register_state *regstate = get_reg_state(s, inst->DstReg.File, inst->DstReg.Index);
- if (!regstate) {
- rc_error(s->Compiler, "NqssaDce: bad destination register (%i[%i])\n",
- inst->DstReg.File, inst->DstReg.Index);
- return;
- }
-
- inst->DstReg.WriteMask &= regstate->Sourced;
- regstate->Sourced &= ~inst->DstReg.WriteMask;
-
- if (inst->DstReg.WriteMask == 0) {
- struct rc_instruction * inst_remove = s->IP;
- s->IP = s->IP->Prev;
- rc_remove_instruction(inst_remove);
- return;
- }
-
- if (inst->DstReg.File == RC_FILE_TEMPORARY && !regstate->Sourced)
- unalias_temporary(s, inst->DstReg.Index);
- }
-
- WriteMask = inst->DstReg.WriteMask;
-
- switch (inst->Opcode) {
- case RC_OPCODE_ARL:
- case RC_OPCODE_DDX:
- case RC_OPCODE_DDY:
- case RC_OPCODE_FRC:
- case RC_OPCODE_MOV:
- track_used_srcreg(s, 0, WriteMask);
- break;
- case RC_OPCODE_ADD:
- case RC_OPCODE_MAX:
- case RC_OPCODE_MIN:
- case RC_OPCODE_MUL:
- case RC_OPCODE_SGE:
- case RC_OPCODE_SLT:
- track_used_srcreg(s, 0, WriteMask);
- track_used_srcreg(s, 1, WriteMask);
- break;
- case RC_OPCODE_CMP:
- case RC_OPCODE_MAD:
- track_used_srcreg(s, 0, WriteMask);
- track_used_srcreg(s, 1, WriteMask);
- track_used_srcreg(s, 2, WriteMask);
- break;
- case RC_OPCODE_COS:
- case RC_OPCODE_EX2:
- case RC_OPCODE_LG2:
- case RC_OPCODE_RCP:
- case RC_OPCODE_RSQ:
- case RC_OPCODE_SIN:
- track_used_srcreg(s, 0, 0x1);
- break;
- case RC_OPCODE_DP3:
- track_used_srcreg(s, 0, 0x7);
- track_used_srcreg(s, 1, 0x7);
- break;
- case RC_OPCODE_DP4:
- track_used_srcreg(s, 0, 0xf);
- track_used_srcreg(s, 1, 0xf);
- break;
- case RC_OPCODE_KIL:
- case RC_OPCODE_TEX:
- case RC_OPCODE_TXB:
- case RC_OPCODE_TXP:
- track_used_srcreg(s, 0, 0xf);
- break;
- case RC_OPCODE_DST:
- track_used_srcreg(s, 0, 0x6);
- track_used_srcreg(s, 1, 0xa);
- break;
- case RC_OPCODE_EXP:
- case RC_OPCODE_LOG:
- case RC_OPCODE_POW:
- track_used_srcreg(s, 0, 0x3);
- break;
- case RC_OPCODE_LIT:
- track_used_srcreg(s, 0, 0xb);
- break;
- default:
- rc_error(s->Compiler, "NqssaDce: Unknown opcode %d\n", inst->Opcode);
- return;
- }
-
- s->IP = s->IP->Prev;
-}
-
-void rc_calculate_inputs_outputs(struct radeon_compiler * c)
-{
- struct rc_instruction *inst;
-
- c->Program.InputsRead = 0;
- c->Program.OutputsWritten = 0;
-
- for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next)
- {
- const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->I.Opcode);
- int i;
-
- for (i = 0; i < opcode->NumSrcRegs; ++i) {
- if (inst->I.SrcReg[i].File == RC_FILE_INPUT)
- c->Program.InputsRead |= 1 << inst->I.SrcReg[i].Index;
- }
-
- if (opcode->HasDstReg) {
- if (inst->I.DstReg.File == RC_FILE_OUTPUT)
- c->Program.OutputsWritten |= 1 << inst->I.DstReg.Index;
- }
- }
-}
-
-void radeonNqssaDce(struct radeon_compiler * c, struct radeon_nqssadce_descr* descr, void * data)
-{
- struct nqssadce_state s;
-
- memset(&s, 0, sizeof(s));
- s.Compiler = c;
- s.Descr = descr;
- s.UserData = data;
- s.Descr->Init(&s);
- s.IP = c->Program.Instructions.Prev;
-
- while(s.IP != &c->Program.Instructions && !c->Error)
- process_instruction(&s);
-
- rc_calculate_inputs_outputs(c);
-}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.h b/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.h
deleted file mode 100644
index a2aa1eb8ca3..00000000000
--- a/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.h
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Copyright (C) 2008 Nicolai Haehnle.
- *
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial
- * portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-#ifndef __RADEON_PROGRAM_NQSSADCE_H_
-#define __RADEON_PROGRAM_NQSSADCE_H_
-
-#include "radeon_program.h"
-
-struct register_state {
- /**
- * Bitmask indicating which components of the register are sourced
- * by later instructions.
- */
- unsigned int Sourced : 4;
-};
-
-/**
- * Maintain state such as which registers are used, which registers are
- * read from, etc.
- */
-struct nqssadce_state {
- struct radeon_compiler *Compiler;
- struct radeon_nqssadce_descr *Descr;
-
- /**
- * All instructions after this instruction pointer have been dealt with.
- */
- struct rc_instruction * IP;
-
- /**
- * Which registers are read by subsequent instructions?
- */
- struct register_state Temps[RC_REGISTER_MAX_INDEX];
- struct register_state Outputs[RC_REGISTER_MAX_INDEX];
- struct register_state Address;
-
- void * UserData;
-};
-
-
-/**
- * This structure contains a description of the hardware in-so-far as
- * it is required for the NqSSA-DCE pass.
- */
-struct radeon_nqssadce_descr {
- /**
- * Fill in which outputs
- */
- void (*Init)(struct nqssadce_state *);
-
- /**
- * Check whether the given swizzle, absolute and negate combination
- * can be implemented natively by the hardware for this opcode.
- *
- * \return 1 if the swizzle is native for the given opcode
- */
- int (*IsNativeSwizzle)(rc_opcode opcode, struct rc_src_register reg);
-
- /**
- * Emit (at the current IP) the instruction MOV dst, src;
- * The transformation will work recursively on the emitted instruction(s).
- */
- void (*BuildSwizzle)(struct nqssadce_state*, struct rc_dst_register dst, struct rc_src_register src);
-};
-
-void radeonNqssaDce(struct radeon_compiler * c, struct radeon_nqssadce_descr* descr, void * data);
-
-#endif /* __RADEON_PROGRAM_NQSSADCE_H_ */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c
index ffe2de1a873..b7200990c22 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c
@@ -40,13 +40,15 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = {
.Opcode = RC_OPCODE_ABS,
.Name = "ABS",
.NumSrcRegs = 1,
- .HasDstReg = 1
+ .HasDstReg = 1,
+ .IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_ADD,
.Name = "ADD",
.NumSrcRegs = 2,
- .HasDstReg = 1
+ .HasDstReg = 1,
+ .IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_ARL,
@@ -58,25 +60,29 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = {
.Opcode = RC_OPCODE_CMP,
.Name = "CMP",
.NumSrcRegs = 3,
- .HasDstReg = 1
+ .HasDstReg = 1,
+ .IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_COS,
.Name = "COS",
.NumSrcRegs = 1,
- .HasDstReg = 1
+ .HasDstReg = 1,
+ .IsStandardScalar = 1
},
{
.Opcode = RC_OPCODE_DDX,
.Name = "DDX",
.NumSrcRegs = 1,
- .HasDstReg = 1
+ .HasDstReg = 1,
+ .IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_DDY,
.Name = "DDY",
.NumSrcRegs = 1,
- .HasDstReg = 1
+ .HasDstReg = 1,
+ .IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_DP3,
@@ -106,7 +112,8 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = {
.Opcode = RC_OPCODE_EX2,
.Name = "EX2",
.NumSrcRegs = 1,
- .HasDstReg = 1
+ .HasDstReg = 1,
+ .IsStandardScalar = 1
},
{
.Opcode = RC_OPCODE_EXP,
@@ -118,13 +125,15 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = {
.Opcode = RC_OPCODE_FLR,
.Name = "FLR",
.NumSrcRegs = 1,
- .HasDstReg = 1
+ .HasDstReg = 1,
+ .IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_FRC,
.Name = "FRC",
.NumSrcRegs = 1,
- .HasDstReg = 1
+ .HasDstReg = 1,
+ .IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_KIL,
@@ -135,7 +144,8 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = {
.Opcode = RC_OPCODE_LG2,
.Name = "LG2",
.NumSrcRegs = 1,
- .HasDstReg = 1
+ .HasDstReg = 1,
+ .IsStandardScalar = 1
},
{
.Opcode = RC_OPCODE_LIT,
@@ -153,55 +163,64 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = {
.Opcode = RC_OPCODE_LRP,
.Name = "LRP",
.NumSrcRegs = 3,
- .HasDstReg = 1
+ .HasDstReg = 1,
+ .IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_MAD,
.Name = "MAD",
.NumSrcRegs = 3,
- .HasDstReg = 1
+ .HasDstReg = 1,
+ .IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_MAX,
.Name = "MAX",
.NumSrcRegs = 2,
- .HasDstReg = 1
+ .HasDstReg = 1,
+ .IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_MIN,
.Name = "MIN",
.NumSrcRegs = 2,
- .HasDstReg = 1
+ .HasDstReg = 1,
+ .IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_MOV,
.Name = "MOV",
.NumSrcRegs = 1,
- .HasDstReg = 1
+ .HasDstReg = 1,
+ .IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_MUL,
.Name = "MUL",
.NumSrcRegs = 2,
- .HasDstReg = 1
+ .HasDstReg = 1,
+ .IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_POW,
.Name = "POW",
.NumSrcRegs = 2,
- .HasDstReg = 1
+ .HasDstReg = 1,
+ .IsStandardScalar = 1
},
{
.Opcode = RC_OPCODE_RCP,
.Name = "RCP",
.NumSrcRegs = 1,
- .HasDstReg = 1
+ .HasDstReg = 1,
+ .IsStandardScalar = 1
},
{
.Opcode = RC_OPCODE_RSQ,
.Name = "RSQ",
.NumSrcRegs = 1,
- .HasDstReg = 1
+ .HasDstReg = 1,
+ .IsStandardScalar = 1
},
{
.Opcode = RC_OPCODE_SCS,
@@ -213,61 +232,71 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = {
.Opcode = RC_OPCODE_SEQ,
.Name = "SEQ",
.NumSrcRegs = 2,
- .HasDstReg = 1
+ .HasDstReg = 1,
+ .IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_SFL,
.Name = "SFL",
.NumSrcRegs = 0,
- .HasDstReg = 1
+ .HasDstReg = 1,
+ .IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_SGE,
.Name = "SGE",
.NumSrcRegs = 2,
- .HasDstReg = 1
+ .HasDstReg = 1,
+ .IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_SGT,
.Name = "SGT",
.NumSrcRegs = 2,
- .HasDstReg = 1
+ .HasDstReg = 1,
+ .IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_SIN,
.Name = "SIN",
.NumSrcRegs = 1,
- .HasDstReg = 1
+ .HasDstReg = 1,
+ .IsStandardScalar = 1
},
{
.Opcode = RC_OPCODE_SLE,
.Name = "SLE",
.NumSrcRegs = 2,
- .HasDstReg = 1
+ .HasDstReg = 1,
+ .IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_SLT,
.Name = "SLT",
.NumSrcRegs = 2,
- .HasDstReg = 1
+ .HasDstReg = 1,
+ .IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_SNE,
.Name = "SNE",
.NumSrcRegs = 2,
- .HasDstReg = 1
+ .HasDstReg = 1,
+ .IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_SUB,
.Name = "SUB",
.NumSrcRegs = 2,
- .HasDstReg = 1
+ .HasDstReg = 1,
+ .IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_SWZ,
.Name = "SWZ",
.NumSrcRegs = 1,
- .HasDstReg = 1
+ .HasDstReg = 1,
+ .IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_XPD,
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h
index 4eb9be3e556..8e30bef1e3e 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h
@@ -187,6 +187,14 @@ struct rc_opcode_info {
unsigned int NumSrcRegs:2;
unsigned int HasDstReg:1;
+
+ /** true if this is a vector instruction that operates on components in parallel
+ * without any cross-component interaction */
+ unsigned int IsComponentwise:1;
+
+ /** true if this instruction sources only its operands X components
+ * to compute one result which is smeared across all output channels */
+ unsigned int IsStandardScalar:1;
};
extern struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE];
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.c b/src/mesa/drivers/dri/r300/compiler/radeon_program.c
index 0e0c1f68e64..b97c48084b4 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.c
@@ -154,155 +154,7 @@ struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler * c, str
void rc_remove_instruction(struct rc_instruction * inst)
{
+ rc_dataflow_remove_instruction(inst);
inst->Prev->Next = inst->Next;
inst->Next->Prev = inst->Prev;
}
-
-static const char * textarget_to_string(rc_texture_target target)
-{
- switch(target) {
- case RC_TEXTURE_2D_ARRAY: return "2D_ARRAY";
- case RC_TEXTURE_1D_ARRAY: return "1D_ARRAY";
- case RC_TEXTURE_CUBE: return "CUBE";
- case RC_TEXTURE_3D: return "3D";
- case RC_TEXTURE_RECT: return "RECT";
- case RC_TEXTURE_2D: return "2D";
- case RC_TEXTURE_1D: return "1D";
- default: return "BAD_TEXTURE_TARGET";
- }
-}
-
-static void rc_print_register(FILE * f, rc_register_file file, int index, unsigned int reladdr)
-{
- if (file == RC_FILE_NONE) {
- fprintf(f, "none");
- } else {
- const char * filename;
- switch(file) {
- case RC_FILE_TEMPORARY: filename = "temp"; break;
- case RC_FILE_INPUT: filename = "input"; break;
- case RC_FILE_OUTPUT: filename = "output"; break;
- case RC_FILE_ADDRESS: filename = "addr"; break;
- case RC_FILE_CONSTANT: filename = "const"; break;
- default: filename = "BAD FILE"; break;
- }
- fprintf(f, "%s[%i%s]", filename, index, reladdr ? " + addr[0]" : "");
- }
-}
-
-static void rc_print_mask(FILE * f, unsigned int mask)
-{
- if (mask & RC_MASK_X) fprintf(f, "x");
- if (mask & RC_MASK_Y) fprintf(f, "y");
- if (mask & RC_MASK_Z) fprintf(f, "z");
- if (mask & RC_MASK_W) fprintf(f, "w");
-}
-
-static void rc_print_dst_register(FILE * f, struct rc_dst_register dst)
-{
- rc_print_register(f, dst.File, dst.Index, dst.RelAddr);
- if (dst.WriteMask != RC_MASK_XYZW) {
- fprintf(f, ".");
- rc_print_mask(f, dst.WriteMask);
- }
-}
-
-static void rc_print_swizzle(FILE * f, unsigned int swizzle, unsigned int negate)
-{
- unsigned int comp;
- for(comp = 0; comp < 4; ++comp) {
- rc_swizzle swz = GET_SWZ(swizzle, comp);
- if (GET_BIT(negate, comp))
- fprintf(f, "-");
- switch(swz) {
- case RC_SWIZZLE_X: fprintf(f, "x"); break;
- case RC_SWIZZLE_Y: fprintf(f, "y"); break;
- case RC_SWIZZLE_Z: fprintf(f, "z"); break;
- case RC_SWIZZLE_W: fprintf(f, "w"); break;
- case RC_SWIZZLE_ZERO: fprintf(f, "0"); break;
- case RC_SWIZZLE_ONE: fprintf(f, "1"); break;
- case RC_SWIZZLE_HALF: fprintf(f, "H"); break;
- case RC_SWIZZLE_UNUSED: fprintf(f, "_"); break;
- }
- }
-}
-
-static void rc_print_src_register(FILE * f, struct rc_src_register src)
-{
- int trivial_negate = (src.Negate == RC_MASK_NONE || src.Negate == RC_MASK_XYZW);
-
- if (src.Negate == RC_MASK_XYZW)
- fprintf(f, "-");
- if (src.Abs)
- fprintf(f, "|");
-
- rc_print_register(f, src.File, src.Index, src.RelAddr);
-
- if (src.Abs && !trivial_negate)
- fprintf(f, "|");
-
- if (src.Swizzle != RC_SWIZZLE_XYZW || !trivial_negate) {
- fprintf(f, ".");
- rc_print_swizzle(f, src.Swizzle, trivial_negate ? 0 : src.Negate);
- }
-
- if (src.Abs && trivial_negate)
- fprintf(f, "|");
-}
-
-static void rc_print_instruction(FILE * f, struct rc_instruction * inst)
-{
- const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->I.Opcode);
- unsigned int reg;
-
- fprintf(f, "%s", opcode->Name);
-
- switch(inst->I.SaturateMode) {
- case RC_SATURATE_NONE: break;
- case RC_SATURATE_ZERO_ONE: fprintf(f, "_SAT"); break;
- case RC_SATURATE_MINUS_PLUS_ONE: fprintf(f, "_SAT2"); break;
- default: fprintf(f, "_BAD_SAT"); break;
- }
-
- if (opcode->HasDstReg) {
- fprintf(f, " ");
- rc_print_dst_register(f, inst->I.DstReg);
- if (opcode->NumSrcRegs)
- fprintf(f, ",");
- }
-
- for(reg = 0; reg < opcode->NumSrcRegs; ++reg) {
- if (reg > 0)
- fprintf(f, ",");
- fprintf(f, " ");
- rc_print_src_register(f, inst->I.SrcReg[reg]);
- }
-
- if (opcode->HasTexture) {
- fprintf(f, ", %s%s[%u]",
- textarget_to_string(inst->I.TexSrcTarget),
- inst->I.TexShadow ? "SHADOW" : "",
- inst->I.TexSrcUnit);
- }
-
- fprintf(f, ";\n");
-}
-
-/**
- * Print program to stderr, default options.
- */
-void rc_print_program(const struct rc_program *prog)
-{
- unsigned int linenum = 0;
- struct rc_instruction *inst;
-
- fprintf(stderr, "# Radeon Compiler Program\n");
-
- for(inst = prog->Instructions.Next; inst != &prog->Instructions; inst = inst->Next) {
- fprintf(stderr, "%3d: ", linenum);
-
- rc_print_instruction(stderr, inst);
-
- linenum++;
- }
-}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.h b/src/mesa/drivers/dri/r300/compiler/radeon_program.h
index a2ab757fec6..d38c9a420c6 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.h
@@ -33,102 +33,11 @@
#include "radeon_opcodes.h"
#include "radeon_code.h"
+#include "radeon_program_constants.h"
+#include "radeon_dataflow.h"
struct radeon_compiler;
-typedef enum {
- RC_SATURATE_NONE = 0,
- RC_SATURATE_ZERO_ONE,
- RC_SATURATE_MINUS_PLUS_ONE
-} rc_saturate_mode;
-
-typedef enum {
- RC_TEXTURE_2D_ARRAY,
- RC_TEXTURE_1D_ARRAY,
- RC_TEXTURE_CUBE,
- RC_TEXTURE_3D,
- RC_TEXTURE_RECT,
- RC_TEXTURE_2D,
- RC_TEXTURE_1D
-} rc_texture_target;
-
-typedef enum {
- /**
- * Used to indicate unused register descriptions and
- * source register that use a constant swizzle.
- */
- RC_FILE_NONE = 0,
- RC_FILE_TEMPORARY,
-
- /**
- * Input register.
- *
- * \note The compiler attaches no implicit semantics to input registers.
- * Fragment/vertex program specific semantics must be defined explicitly
- * using the appropriate compiler interfaces.
- */
- RC_FILE_INPUT,
-
- /**
- * Output register.
- *
- * \note The compiler attaches no implicit semantics to input registers.
- * Fragment/vertex program specific semantics must be defined explicitly
- * using the appropriate compiler interfaces.
- */
- RC_FILE_OUTPUT,
- RC_FILE_ADDRESS,
-
- /**
- * Indicates a constant from the \ref rc_constant_list .
- */
- RC_FILE_CONSTANT
-} rc_register_file;
-
-#define RC_REGISTER_INDEX_BITS 10
-#define RC_REGISTER_MAX_INDEX (1 << RC_REGISTER_INDEX_BITS)
-
-typedef enum {
- RC_SWIZZLE_X = 0,
- RC_SWIZZLE_Y,
- RC_SWIZZLE_Z,
- RC_SWIZZLE_W,
- RC_SWIZZLE_ZERO,
- RC_SWIZZLE_ONE,
- RC_SWIZZLE_HALF,
- RC_SWIZZLE_UNUSED
-} rc_swizzle;
-
-#define RC_MAKE_SWIZZLE(a,b,c,d) (((a)<<0) | ((b)<<3) | ((c)<<6) | ((d)<<9))
-#define RC_MAKE_SWIZZLE_SMEAR(a) RC_MAKE_SWIZZLE((a),(a),(a),(a))
-#define GET_SWZ(swz, idx) (((swz) >> ((idx)*3)) & 0x7)
-#define GET_BIT(msk, idx) (((msk) >> (idx)) & 0x1)
-
-#define RC_SWIZZLE_XYZW RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_W)
-#define RC_SWIZZLE_XXXX RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_X)
-#define RC_SWIZZLE_YYYY RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_Y)
-#define RC_SWIZZLE_ZZZZ RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_Z)
-#define RC_SWIZZLE_WWWW RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_W)
-#define RC_SWIZZLE_0000 RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_ZERO)
-#define RC_SWIZZLE_1111 RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_ONE)
-
-/**
- * \name Bitmasks for components of vectors.
- *
- * Used for write masks, negation masks, etc.
- */
-/*@{*/
-#define RC_MASK_NONE 0
-#define RC_MASK_X 1
-#define RC_MASK_Y 2
-#define RC_MASK_Z 4
-#define RC_MASK_W 8
-#define RC_MASK_XY (RC_MASK_X|RC_MASK_Y)
-#define RC_MASK_XYZ (RC_MASK_X|RC_MASK_Y|RC_MASK_Z)
-#define RC_MASK_XYW (RC_MASK_X|RC_MASK_Y|RC_MASK_W)
-#define RC_MASK_XYZW (RC_MASK_X|RC_MASK_Y|RC_MASK_Z|RC_MASK_W)
-/*@}*/
-
struct rc_src_register {
rc_register_file File:3;
@@ -198,6 +107,15 @@ struct rc_instruction {
struct rc_instruction * Next;
struct rc_sub_instruction I;
+
+ /**
+ * Dataflow annotations.
+ *
+ * These are not supplied by the caller of the compiler,
+ * but filled in during compilation stages that make use of
+ * dataflow analysis.
+ */
+ struct rc_instruction_dataflow Dataflow;
};
struct rc_program {
@@ -292,6 +210,10 @@ struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c);
struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler * c, struct rc_instruction * after);
void rc_remove_instruction(struct rc_instruction * inst);
-void rc_print_program(const struct rc_program *prog);
+enum {
+ RC_PRINT_DATAFLOW = 0x1
+};
+
+void rc_print_program(const struct rc_program *prog, unsigned int flags);
#endif
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h
new file mode 100644
index 00000000000..69994f9880f
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h
@@ -0,0 +1,128 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef RADEON_PROGRAM_CONSTANTS_H
+#define RADEON_PROGRAM_CONSTANTS_H
+
+typedef enum {
+ RC_SATURATE_NONE = 0,
+ RC_SATURATE_ZERO_ONE,
+ RC_SATURATE_MINUS_PLUS_ONE
+} rc_saturate_mode;
+
+typedef enum {
+ RC_TEXTURE_2D_ARRAY,
+ RC_TEXTURE_1D_ARRAY,
+ RC_TEXTURE_CUBE,
+ RC_TEXTURE_3D,
+ RC_TEXTURE_RECT,
+ RC_TEXTURE_2D,
+ RC_TEXTURE_1D
+} rc_texture_target;
+
+typedef enum {
+ /**
+ * Used to indicate unused register descriptions and
+ * source register that use a constant swizzle.
+ */
+ RC_FILE_NONE = 0,
+ RC_FILE_TEMPORARY,
+
+ /**
+ * Input register.
+ *
+ * \note The compiler attaches no implicit semantics to input registers.
+ * Fragment/vertex program specific semantics must be defined explicitly
+ * using the appropriate compiler interfaces.
+ */
+ RC_FILE_INPUT,
+
+ /**
+ * Output register.
+ *
+ * \note The compiler attaches no implicit semantics to input registers.
+ * Fragment/vertex program specific semantics must be defined explicitly
+ * using the appropriate compiler interfaces.
+ */
+ RC_FILE_OUTPUT,
+ RC_FILE_ADDRESS,
+
+ /**
+ * Indicates a constant from the \ref rc_constant_list .
+ */
+ RC_FILE_CONSTANT
+} rc_register_file;
+
+#define RC_REGISTER_INDEX_BITS 10
+#define RC_REGISTER_MAX_INDEX (1 << RC_REGISTER_INDEX_BITS)
+
+typedef enum {
+ RC_SWIZZLE_X = 0,
+ RC_SWIZZLE_Y,
+ RC_SWIZZLE_Z,
+ RC_SWIZZLE_W,
+ RC_SWIZZLE_ZERO,
+ RC_SWIZZLE_ONE,
+ RC_SWIZZLE_HALF,
+ RC_SWIZZLE_UNUSED
+} rc_swizzle;
+
+#define RC_MAKE_SWIZZLE(a,b,c,d) (((a)<<0) | ((b)<<3) | ((c)<<6) | ((d)<<9))
+#define RC_MAKE_SWIZZLE_SMEAR(a) RC_MAKE_SWIZZLE((a),(a),(a),(a))
+#define GET_SWZ(swz, idx) (((swz) >> ((idx)*3)) & 0x7)
+#define GET_BIT(msk, idx) (((msk) >> (idx)) & 0x1)
+#define SET_SWZ(swz, idx, newv) \
+ do { \
+ (swz) = ((swz) & ~(7 << ((idx)*3))) | ((newv) << ((idx)*3)); \
+ } while(0)
+
+#define RC_SWIZZLE_XYZW RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_W)
+#define RC_SWIZZLE_XXXX RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_X)
+#define RC_SWIZZLE_YYYY RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_Y)
+#define RC_SWIZZLE_ZZZZ RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_Z)
+#define RC_SWIZZLE_WWWW RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_W)
+#define RC_SWIZZLE_0000 RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_ZERO)
+#define RC_SWIZZLE_1111 RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_ONE)
+
+/**
+ * \name Bitmasks for components of vectors.
+ *
+ * Used for write masks, negation masks, etc.
+ */
+/*@{*/
+#define RC_MASK_NONE 0
+#define RC_MASK_X 1
+#define RC_MASK_Y 2
+#define RC_MASK_Z 4
+#define RC_MASK_W 8
+#define RC_MASK_XY (RC_MASK_X|RC_MASK_Y)
+#define RC_MASK_XYZ (RC_MASK_X|RC_MASK_Y|RC_MASK_Z)
+#define RC_MASK_XYW (RC_MASK_X|RC_MASK_Y|RC_MASK_W)
+#define RC_MASK_XYZW (RC_MASK_X|RC_MASK_Y|RC_MASK_Z|RC_MASK_W)
+/*@}*/
+
+#endif /* RADEON_PROGRAM_CONSTANTS_H */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c
new file mode 100644
index 00000000000..38060ea3adf
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c
@@ -0,0 +1,214 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "radeon_program.h"
+
+#include <stdio.h>
+
+static void print_comment(FILE * f)
+{
+ fprintf(f, " # ");
+}
+
+static const char * textarget_to_string(rc_texture_target target)
+{
+ switch(target) {
+ case RC_TEXTURE_2D_ARRAY: return "2D_ARRAY";
+ case RC_TEXTURE_1D_ARRAY: return "1D_ARRAY";
+ case RC_TEXTURE_CUBE: return "CUBE";
+ case RC_TEXTURE_3D: return "3D";
+ case RC_TEXTURE_RECT: return "RECT";
+ case RC_TEXTURE_2D: return "2D";
+ case RC_TEXTURE_1D: return "1D";
+ default: return "BAD_TEXTURE_TARGET";
+ }
+}
+
+static void rc_print_register(FILE * f, rc_register_file file, int index, unsigned int reladdr)
+{
+ if (file == RC_FILE_NONE) {
+ fprintf(f, "none");
+ } else {
+ const char * filename;
+ switch(file) {
+ case RC_FILE_TEMPORARY: filename = "temp"; break;
+ case RC_FILE_INPUT: filename = "input"; break;
+ case RC_FILE_OUTPUT: filename = "output"; break;
+ case RC_FILE_ADDRESS: filename = "addr"; break;
+ case RC_FILE_CONSTANT: filename = "const"; break;
+ default: filename = "BAD FILE"; break;
+ }
+ fprintf(f, "%s[%i%s]", filename, index, reladdr ? " + addr[0]" : "");
+ }
+}
+
+static void rc_print_mask(FILE * f, unsigned int mask)
+{
+ if (mask & RC_MASK_X) fprintf(f, "x");
+ if (mask & RC_MASK_Y) fprintf(f, "y");
+ if (mask & RC_MASK_Z) fprintf(f, "z");
+ if (mask & RC_MASK_W) fprintf(f, "w");
+}
+
+static void rc_print_dst_register(FILE * f, struct rc_dst_register dst)
+{
+ rc_print_register(f, dst.File, dst.Index, dst.RelAddr);
+ if (dst.WriteMask != RC_MASK_XYZW) {
+ fprintf(f, ".");
+ rc_print_mask(f, dst.WriteMask);
+ }
+}
+
+static void rc_print_swizzle(FILE * f, unsigned int swizzle, unsigned int negate)
+{
+ unsigned int comp;
+ for(comp = 0; comp < 4; ++comp) {
+ rc_swizzle swz = GET_SWZ(swizzle, comp);
+ if (GET_BIT(negate, comp))
+ fprintf(f, "-");
+ switch(swz) {
+ case RC_SWIZZLE_X: fprintf(f, "x"); break;
+ case RC_SWIZZLE_Y: fprintf(f, "y"); break;
+ case RC_SWIZZLE_Z: fprintf(f, "z"); break;
+ case RC_SWIZZLE_W: fprintf(f, "w"); break;
+ case RC_SWIZZLE_ZERO: fprintf(f, "0"); break;
+ case RC_SWIZZLE_ONE: fprintf(f, "1"); break;
+ case RC_SWIZZLE_HALF: fprintf(f, "H"); break;
+ case RC_SWIZZLE_UNUSED: fprintf(f, "_"); break;
+ }
+ }
+}
+
+static void rc_print_src_register(FILE * f, struct rc_src_register src)
+{
+ int trivial_negate = (src.Negate == RC_MASK_NONE || src.Negate == RC_MASK_XYZW);
+
+ if (src.Negate == RC_MASK_XYZW)
+ fprintf(f, "-");
+ if (src.Abs)
+ fprintf(f, "|");
+
+ rc_print_register(f, src.File, src.Index, src.RelAddr);
+
+ if (src.Abs && !trivial_negate)
+ fprintf(f, "|");
+
+ if (src.Swizzle != RC_SWIZZLE_XYZW || !trivial_negate) {
+ fprintf(f, ".");
+ rc_print_swizzle(f, src.Swizzle, trivial_negate ? 0 : src.Negate);
+ }
+
+ if (src.Abs && trivial_negate)
+ fprintf(f, "|");
+}
+
+static void rc_print_ref(FILE * f, struct rc_dataflow_ref * ref)
+{
+ fprintf(f, "ref(%p", ref->Vector);
+
+ if (ref->UseMask != RC_MASK_XYZW) {
+ fprintf(f, ".");
+ rc_print_mask(f, ref->UseMask);
+ }
+
+ fprintf(f, ")");
+}
+
+static void rc_print_instruction(FILE * f, unsigned int flags, struct rc_instruction * inst)
+{
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->I.Opcode);
+ unsigned int reg;
+
+ fprintf(f, "%s", opcode->Name);
+
+ switch(inst->I.SaturateMode) {
+ case RC_SATURATE_NONE: break;
+ case RC_SATURATE_ZERO_ONE: fprintf(f, "_SAT"); break;
+ case RC_SATURATE_MINUS_PLUS_ONE: fprintf(f, "_SAT2"); break;
+ default: fprintf(f, "_BAD_SAT"); break;
+ }
+
+ if (opcode->HasDstReg) {
+ fprintf(f, " ");
+ rc_print_dst_register(f, inst->I.DstReg);
+ if (opcode->NumSrcRegs)
+ fprintf(f, ",");
+ }
+
+ for(reg = 0; reg < opcode->NumSrcRegs; ++reg) {
+ if (reg > 0)
+ fprintf(f, ",");
+ fprintf(f, " ");
+ rc_print_src_register(f, inst->I.SrcReg[reg]);
+ }
+
+ if (opcode->HasTexture) {
+ fprintf(f, ", %s%s[%u]",
+ textarget_to_string(inst->I.TexSrcTarget),
+ inst->I.TexShadow ? "SHADOW" : "",
+ inst->I.TexSrcUnit);
+ }
+
+ fprintf(f, ";\n");
+
+ if (flags & RC_PRINT_DATAFLOW) {
+ print_comment(f);
+
+ fprintf(f, "Dst = %p", inst->Dataflow.DstReg);
+ if (inst->Dataflow.DstRegAliased)
+ fprintf(f, " aliased");
+ if (inst->Dataflow.DstRegPrev) {
+ fprintf(f, " from ");
+ rc_print_ref(f, inst->Dataflow.DstRegPrev);
+ }
+
+ for(reg = 0; reg < opcode->NumSrcRegs; ++reg) {
+ fprintf(f, ", ");
+ if (inst->Dataflow.SrcReg[reg])
+ rc_print_ref(f, inst->Dataflow.SrcReg[reg]);
+ else
+ fprintf(f, "<no ref>");
+ }
+
+ fprintf(f, "\n");
+ }
+}
+
+/**
+ * Print program to stderr, default options.
+ */
+void rc_print_program(const struct rc_program *prog, unsigned int flags)
+{
+ unsigned int linenum = 0;
+ struct rc_instruction *inst;
+
+ fprintf(stderr, "# Radeon Compiler Program%s\n",
+ flags & RC_PRINT_DATAFLOW ? " (with dataflow annotations)" : "");
+
+ for(inst = prog->Instructions.Next; inst != &prog->Instructions; inst = inst->Next) {
+ fprintf(stderr, "%3d: ", linenum);
+
+ rc_print_instruction(stderr, flags, inst);
+
+ linenum++;
+ }
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_swizzle.h b/src/mesa/drivers/dri/r300/compiler/radeon_swizzle.h
new file mode 100644
index 00000000000..c81d5f7a5e9
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_swizzle.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef RADEON_SWIZZLE_H
+#define RADEON_SWIZZLE_H
+
+#include "radeon_program.h"
+
+struct rc_swizzle_split {
+ unsigned char NumPhases;
+ unsigned char Phase[4];
+};
+
+/**
+ * Describe the swizzling capability of target hardware.
+ */
+struct rc_swizzle_caps {
+ /**
+ * Check whether the given swizzle, absolute and negate combination
+ * can be implemented natively by the hardware for this opcode.
+ *
+ * \return 1 if the swizzle is native for the given opcode
+ */
+ int (*IsNative)(rc_opcode opcode, struct rc_src_register reg);
+
+ /**
+ * Determine how to split access to the masked channels of the
+ * given source register to obtain ALU-native swizzles.
+ */
+ void (*Split)(struct rc_src_register reg, unsigned int mask, struct rc_swizzle_split * split);
+};
+
+#endif /* RADEON_SWIZZLE_H */
diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.c b/src/mesa/drivers/dri/r300/r300_vertprog.c
index be21268ba5e..b7d5429dc57 100644
--- a/src/mesa/drivers/dri/r300/r300_vertprog.c
+++ b/src/mesa/drivers/dri/r300/r300_vertprog.c
@@ -41,7 +41,6 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "tnl/tnl.h"
#include "compiler/radeon_compiler.h"
-#include "compiler/radeon_nqssadce.h"
#include "radeon_mesa_to_rc.h"
#include "r300_context.h"
#include "r300_state.h"