summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Stellard <tstellar@gmail.com>2012-01-14 08:08:33 -0500
committerTom Stellard <thomas.stellard@amd.com>2012-03-04 19:36:27 -0500
commitbefcce264c8bf8fdac233e6a01cadc595a1d11d3 (patch)
treed94c7bab725c2b349bffe9176061b0e99d2f8c52
parent95594bae47d8b3302be188e6f0be2d69c5507bb3 (diff)
r300/compiler: Add support for inline literals
On R500 chips, shader instructions can take 7-bit (3-bit mantissa, 4-bit exponent) floating point values as inputs in place of registers.
-rw-r--r--src/gallium/drivers/r300/Makefile.sources1
-rw-r--r--src/gallium/drivers/r300/compiler/r3xx_fragprog.c1
-rw-r--r--src/gallium/drivers/r300/compiler/r500_fragprog.c2
-rw-r--r--src/gallium/drivers/r300/compiler/r500_fragprog_emit.c2
-rw-r--r--src/gallium/drivers/r300/compiler/radeon_compiler.c16
-rw-r--r--src/gallium/drivers/r300/compiler/radeon_compiler.h1
-rw-r--r--src/gallium/drivers/r300/compiler/radeon_dataflow.h1
-rw-r--r--src/gallium/drivers/r300/compiler/radeon_inline_literals.c140
-rw-r--r--src/gallium/drivers/r300/compiler/radeon_pair_translate.c10
-rw-r--r--src/gallium/drivers/r300/compiler/radeon_program_constants.h7
-rw-r--r--src/gallium/drivers/r300/compiler/radeon_program_pair.h2
-rw-r--r--src/gallium/drivers/r300/compiler/radeon_program_print.c18
12 files changed, 192 insertions, 9 deletions
diff --git a/src/gallium/drivers/r300/Makefile.sources b/src/gallium/drivers/r300/Makefile.sources
index 90105d6e694..e27b14e5702 100644
--- a/src/gallium/drivers/r300/Makefile.sources
+++ b/src/gallium/drivers/r300/Makefile.sources
@@ -28,6 +28,7 @@ C_SOURCES := \
compiler/radeon_compiler_util.c \
compiler/radeon_emulate_branches.c \
compiler/radeon_emulate_loops.c \
+ compiler/radeon_inline_literals.c \
compiler/radeon_program.c \
compiler/radeon_program_print.c \
compiler/radeon_opcodes.c \
diff --git a/src/gallium/drivers/r300/compiler/r3xx_fragprog.c b/src/gallium/drivers/r300/compiler/r3xx_fragprog.c
index 8fea4db94ea..8ef2d24fc99 100644
--- a/src/gallium/drivers/r300/compiler/r3xx_fragprog.c
+++ b/src/gallium/drivers/r300/compiler/r3xx_fragprog.c
@@ -125,6 +125,7 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
{"emulate loops", 1, !is_r500, rc_emulate_loops, NULL},
{"register rename", 1, !is_r500 || opt, rc_rename_regs, NULL},
{"dataflow optimize", 1, opt, rc_optimize, NULL},
+ {"inline literals", 1, is_r500 && opt, rc_inline_literals, NULL},
{"dataflow swizzles", 1, 1, rc_dataflow_swizzles, NULL},
{"dead constants", 1, 1, rc_remove_unused_constants, &c->code->constants_remap_table},
{"pair translate", 1, 1, rc_pair_translate, NULL},
diff --git a/src/gallium/drivers/r300/compiler/r500_fragprog.c b/src/gallium/drivers/r300/compiler/r500_fragprog.c
index cf99f5e4538..499aa92423b 100644
--- a/src/gallium/drivers/r300/compiler/r500_fragprog.c
+++ b/src/gallium/drivers/r300/compiler/r500_fragprog.c
@@ -218,6 +218,8 @@ static int r500_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)
return 1;
return 0;
+ } else if (reg.File == RC_FILE_INLINE) {
+ return 1;
} else {
/* ALU instructions support almost everything */
relevant = 0;
diff --git a/src/gallium/drivers/r300/compiler/r500_fragprog_emit.c b/src/gallium/drivers/r300/compiler/r500_fragprog_emit.c
index 87b96d15079..f6b6c0f9c06 100644
--- a/src/gallium/drivers/r300/compiler/r500_fragprog_emit.c
+++ b/src/gallium/drivers/r300/compiler/r500_fragprog_emit.c
@@ -210,6 +210,8 @@ static unsigned int use_source(struct r500_fragment_program_code* code, struct r
} else if (src.File == RC_FILE_TEMPORARY || src.File == RC_FILE_INPUT) {
use_temporary(code, src.Index);
return src.Index;
+ } else if (src.File == RC_FILE_INLINE) {
+ return src.Index | (1 << 7);
}
return 0;
diff --git a/src/gallium/drivers/r300/compiler/radeon_compiler.c b/src/gallium/drivers/r300/compiler/radeon_compiler.c
index 986e3b77403..4d4eb645841 100644
--- a/src/gallium/drivers/r300/compiler/radeon_compiler.c
+++ b/src/gallium/drivers/r300/compiler/radeon_compiler.c
@@ -357,21 +357,22 @@ void rc_transform_fragment_face(struct radeon_compiler *c, unsigned face)
static void reg_count_callback(void * userdata, struct rc_instruction * inst,
rc_register_file file, unsigned int index, unsigned int mask)
{
- int *max_reg = userdata;
+ struct rc_program_stats *s = userdata;
if (file == RC_FILE_TEMPORARY)
- (int)index > *max_reg ? *max_reg = index : 0;
+ (int)index > s->num_temp_regs ? s->num_temp_regs = index : 0;
+ if (file == RC_FILE_INLINE)
+ s->num_inline_literals++;
}
void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s)
{
- int max_reg = -1;
struct rc_instruction * tmp;
memset(s, 0, sizeof(*s));
for(tmp = c->Program.Instructions.Next; tmp != &c->Program.Instructions;
tmp = tmp->Next){
const struct rc_opcode_info * info;
- rc_for_all_reads_mask(tmp, reg_count_callback, &max_reg);
+ rc_for_all_reads_mask(tmp, reg_count_callback, s);
if (tmp->Type == RC_INSTRUCTION_NORMAL) {
info = rc_get_opcode_info(tmp->U.I.Opcode);
if (info->Opcode == RC_OPCODE_BEGIN_TEX)
@@ -405,7 +406,9 @@ void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s)
s->num_tex_insts++;
s->num_insts++;
}
- s->num_temp_regs = max_reg + 1;
+ /* Increment here because the reg_count_callback store the max
+ * temporary reg index in s->nun_temp_regs. */
+ s->num_temp_regs++;
}
static void print_stats(struct radeon_compiler * c)
@@ -437,10 +440,11 @@ static void print_stats(struct radeon_compiler * c)
"~%4u Presub Operations\n"
"~%4u OMOD Operations\n"
"~%4u Temporary Registers\n"
+ "~%4u Inline Literals\n"
"~~~~~~~~~~~~~~ END ~~~~~~~~~~~~~~\n",
s.num_insts, s.num_rgb_insts, s.num_alpha_insts,
s.num_fc_insts, s.num_tex_insts, s.num_presub_ops,
- s.num_omod_ops, s.num_temp_regs);
+ s.num_omod_ops, s.num_temp_regs, s.num_inline_literals);
break;
default:
assert(0);
diff --git a/src/gallium/drivers/r300/compiler/radeon_compiler.h b/src/gallium/drivers/r300/compiler/radeon_compiler.h
index ac9691c816f..e7ccbb732d1 100644
--- a/src/gallium/drivers/r300/compiler/radeon_compiler.h
+++ b/src/gallium/drivers/r300/compiler/radeon_compiler.h
@@ -161,6 +161,7 @@ struct rc_program_stats {
unsigned num_presub_ops;
unsigned num_temp_regs;
unsigned num_omod_ops;
+ unsigned num_inline_literals;
};
void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s);
diff --git a/src/gallium/drivers/r300/compiler/radeon_dataflow.h b/src/gallium/drivers/r300/compiler/radeon_dataflow.h
index d8a627258ea..bb8d48206e2 100644
--- a/src/gallium/drivers/r300/compiler/radeon_dataflow.h
+++ b/src/gallium/drivers/r300/compiler/radeon_dataflow.h
@@ -130,5 +130,6 @@ void rc_dataflow_swizzles(struct radeon_compiler * c, void *user);
/*@}*/
void rc_optimize(struct radeon_compiler * c, void *user);
+void rc_inline_literals(struct radeon_compiler *c, void *user);
#endif /* RADEON_DATAFLOW_H */
diff --git a/src/gallium/drivers/r300/compiler/radeon_inline_literals.c b/src/gallium/drivers/r300/compiler/radeon_inline_literals.c
new file mode 100644
index 00000000000..568a3d60e14
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_inline_literals.c
@@ -0,0 +1,140 @@
+
+#include "radeon_compiler.h"
+#include "radeon_compiler_util.h"
+#include "radeon_dataflow.h"
+#include "radeon_program.h"
+#include "radeon_program_constants.h"
+#include <stdio.h>
+
+#define VERBOSE 0
+
+#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
+
+/* IEEE-754:
+ * 22:0 mantissa
+ * 30:23 exponent
+ * 31 sign
+ *
+ * R300:
+ * 0:2 mantissa
+ * 3:6 exponent (bias 7)
+ */
+static int ieee_754_to_r300_float(float f, unsigned char *r300_float_out)
+{
+ unsigned float_bits = *((unsigned *)&f);
+ /* XXX: Handle big-endian */
+ unsigned mantissa = float_bits & 0x007fffff;
+ unsigned biased_exponent = (float_bits & 0x7f800000) >> 23;
+ unsigned negate = !!(float_bits & 0x80000000);
+ int exponent = biased_exponent - 127;
+ unsigned mantissa_mask = 0xff8fffff;
+ unsigned r300_exponent, r300_mantissa;
+
+ DBG("Converting %f (0x%x) to 7-bit:\n", f, float_bits);
+ DBG("Raw exponent = %d\n", exponent);
+
+ if (exponent < -7 || exponent > 8) {
+ DBG("Failed exponent out of range\n\n");
+ return 0;
+ }
+
+ if (mantissa & mantissa_mask) {
+ DBG("Failed mantisa has too many bits:\n"
+ "manitssa=0x%x mantissa_mask=0x%x, and=0x%x\n\n",
+ mantissa, mantissa_mask,
+ mantissa & mantissa_mask);
+ return 0;
+ }
+
+ r300_exponent = exponent + 7;
+ r300_mantissa = (mantissa & ~mantissa_mask) >> 20;
+ *r300_float_out = r300_mantissa | (r300_exponent << 3);
+
+ DBG("Success! r300_float = 0x%x\n\n", *r300_float_out);
+
+ if (negate)
+ return -1;
+ else
+ return 1;
+}
+
+void rc_inline_literals(struct radeon_compiler *c, void *user)
+{
+ struct rc_instruction * inst;
+
+ for(inst = c->Program.Instructions.Next;
+ inst != &c->Program.Instructions;
+ inst = inst->Next) {
+ const struct rc_opcode_info * info =
+ rc_get_opcode_info(inst->U.I.Opcode);
+
+ unsigned src_idx;
+ struct rc_constant * constant;
+ float float_value;
+ unsigned char r300_float;
+ int ret;
+
+ /* XXX: Handle presub */
+
+ /* We aren't using rc_for_all_reads_src here, because presub
+ * sources need to be handled differently. */
+ for (src_idx = 0; src_idx < info->NumSrcRegs; src_idx++) {
+ unsigned new_swizzle;
+ unsigned use_literal = 0;
+ unsigned negate_mask = 0;
+ unsigned swz, chan;
+ struct rc_src_register * src_reg =
+ &inst->U.I.SrcReg[src_idx];
+ swz = RC_SWIZZLE_UNUSED;
+ if (src_reg->File != RC_FILE_CONSTANT) {
+ continue;
+ }
+ constant =
+ &c->Program.Constants.Constants[src_reg->Index];
+ if (constant->Type != RC_CONSTANT_IMMEDIATE) {
+ continue;
+ }
+ new_swizzle = rc_init_swizzle(RC_SWIZZLE_UNUSED, 0);
+ for (chan = 0; chan < 4; chan++) {
+ unsigned char r300_float_tmp;
+ swz = GET_SWZ(src_reg->Swizzle, chan);
+ if (swz == RC_SWIZZLE_UNUSED) {
+ continue;
+ }
+ float_value = constant->u.Immediate[swz];
+ ret = ieee_754_to_r300_float(float_value,
+ &r300_float_tmp);
+ if (!ret || (use_literal &&
+ r300_float != r300_float_tmp)) {
+ use_literal = 0;
+ break;
+ }
+
+ if (ret == -1 && src_reg->Abs) {
+ use_literal = 0;
+ break;
+ }
+
+ if (!use_literal) {
+ r300_float = r300_float_tmp;
+ use_literal = 1;
+ }
+
+ /* Use RC_SWIZZLE_W for the inline constant, so
+ * it will become one of the alpha sources. */
+ SET_SWZ(new_swizzle, chan, RC_SWIZZLE_W);
+ if (ret == -1) {
+ negate_mask |= (1 << chan);
+ }
+ }
+
+ if (!use_literal) {
+ continue;
+ }
+ src_reg->File = RC_FILE_INLINE;
+ src_reg->Index = r300_float;
+ src_reg->Swizzle = new_swizzle;
+ src_reg->Negate = src_reg->Negate ^ negate_mask;
+ }
+ }
+}
diff --git a/src/gallium/drivers/r300/compiler/radeon_pair_translate.c b/src/gallium/drivers/r300/compiler/radeon_pair_translate.c
index 7d9c8d1fab6..c6050bdf4dd 100644
--- a/src/gallium/drivers/r300/compiler/radeon_pair_translate.c
+++ b/src/gallium/drivers/r300/compiler/radeon_pair_translate.c
@@ -268,7 +268,15 @@ static void set_pair_instruction(struct r300_fragment_program_compiler *c,
pair->Alpha.Arg[i].Source = source;
pair->Alpha.Arg[i].Swizzle = rc_init_swizzle(swz, 1);
pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs;
- pair->Alpha.Arg[i].Negate = !!(inst->SrcReg[i].Negate & RC_MASK_W);
+
+ if (istranscendent) {
+ pair->Alpha.Arg[i].Negate =
+ !!(inst->SrcReg[i].Negate &
+ inst->DstReg.WriteMask);
+ } else {
+ pair->Alpha.Arg[i].Negate =
+ !!(inst->SrcReg[i].Negate & RC_MASK_W);
+ }
}
}
diff --git a/src/gallium/drivers/r300/compiler/radeon_program_constants.h b/src/gallium/drivers/r300/compiler/radeon_program_constants.h
index 4f59c47815e..c07c492b0c9 100644
--- a/src/gallium/drivers/r300/compiler/radeon_program_constants.h
+++ b/src/gallium/drivers/r300/compiler/radeon_program_constants.h
@@ -85,7 +85,12 @@ typedef enum {
* Indicates this register should use the result of the presubtract
* operation.
*/
- RC_FILE_PRESUB
+ RC_FILE_PRESUB,
+
+ /**
+ * Indicates that the source index has been encoded as a 7-bit float.
+ */
+ RC_FILE_INLINE
} rc_register_file;
enum {
diff --git a/src/gallium/drivers/r300/compiler/radeon_program_pair.h b/src/gallium/drivers/r300/compiler/radeon_program_pair.h
index b6eb0ebe47d..085ff994501 100644
--- a/src/gallium/drivers/r300/compiler/radeon_program_pair.h
+++ b/src/gallium/drivers/r300/compiler/radeon_program_pair.h
@@ -57,7 +57,7 @@ struct radeon_compiler;
struct rc_pair_instruction_source {
unsigned int Used:1;
- unsigned int File:3;
+ unsigned int File:4;
unsigned int Index:RC_REGISTER_INDEX_BITS;
};
diff --git a/src/gallium/drivers/r300/compiler/radeon_program_print.c b/src/gallium/drivers/r300/compiler/radeon_program_print.c
index dc40d7fd4f2..e3d2104b250 100644
--- a/src/gallium/drivers/r300/compiler/radeon_program_print.c
+++ b/src/gallium/drivers/r300/compiler/radeon_program_print.c
@@ -109,6 +109,22 @@ static void rc_print_comparefunc(FILE * f, const char * lhs, rc_compare_func fun
}
}
+static void rc_print_inline_float(FILE * f, int index)
+{
+ int r300_exponent = (index >> 3) & 0xf;
+ unsigned r300_mantissa = index & 0x7;
+ unsigned float_exponent;
+ unsigned real_float;
+ float * print_float = (float*) &real_float;
+
+ r300_exponent -= 7;
+ float_exponent = r300_exponent + 127;
+ real_float = (r300_mantissa << 20) | (float_exponent << 23);
+
+ fprintf(f, "%f (0x%x)", *print_float, index);
+
+}
+
static void rc_print_register(FILE * f, rc_register_file file, int index, unsigned int reladdr)
{
if (file == RC_FILE_NONE) {
@@ -118,6 +134,8 @@ static void rc_print_register(FILE * f, rc_register_file file, int index, unsign
case RC_SPECIAL_ALU_RESULT: fprintf(f, "aluresult"); break;
default: fprintf(f, "special[%i]", index); break;
}
+ } else if (file == RC_FILE_INLINE) {
+ rc_print_inline_float(f, index);
} else {
const char * filename;
switch(file) {