summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNicolai Hähnle <nicolai.haehnle@amd.com>2017-01-19 14:38:54 +0100
committerNicolai Hähnle <nicolai.haehnle@amd.com>2017-01-19 14:38:54 +0100
commit1e5a1928a55a551c76b406624a1b52a453a4cb47 (patch)
tree26f41b25b5ee31bb153cd8009371de8302015fc3
parentd7d32b3bfe86bd89d94d59393907bce1cb9dab7c (diff)
r600: double multiply can handle only one multiply at a time
It seems clear that trying to multiply two pairs of doubles would result in the temporary register getting overwritten by the second pair. So make the code more explicit.
-rw-r--r--src/gallium/drivers/r600/r600_shader.c36
1 files changed, 19 insertions, 17 deletions
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index ebe2744548..7d1452add3 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -4326,25 +4326,27 @@ static int cayman_mul_double_instr(struct r600_shader_ctx *ctx)
int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
int t1 = ctx->temp_reg;
- for (k = 0; k < 2; k++) {
- if (!(inst->Dst[0].Register.WriteMask & (0x3 << (k * 2))))
- continue;
+ /* t1 would get overwritten below if we actually tried to
+ * multiply two pairs of doubles at a time. */
+ assert(inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_XY ||
+ inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_ZW);
- for (i = 0; i < 4; i++) {
- memset(&alu, 0, sizeof(struct r600_bytecode_alu));
- alu.op = ctx->inst_info->op;
- for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
- r600_bytecode_src(&alu.src[j], &ctx->src[j], k * 2 + ((i == 3) ? 0 : 1));
- }
- alu.dst.sel = t1;
- alu.dst.chan = i;
- alu.dst.write = 1;
- if (i == 3)
- alu.last = 1;
- r = r600_bytecode_add_alu(ctx->bc, &alu);
- if (r)
- return r;
+ k = inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_XY ? 0 : 1;
+
+ for (i = 0; i < 4; i++) {
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ctx->inst_info->op;
+ for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
+ r600_bytecode_src(&alu.src[j], &ctx->src[j], k * 2 + ((i == 3) ? 0 : 1));
}
+ alu.dst.sel = t1;
+ alu.dst.chan = i;
+ alu.dst.write = 1;
+ if (i == 3)
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
}
for (i = 0; i <= lasti; i++) {