diff options
author | Nicolai Hähnle <nicolai.haehnle@amd.com> | 2017-01-19 14:38:54 +0100 |
---|---|---|
committer | Nicolai Hähnle <nicolai.haehnle@amd.com> | 2017-01-19 14:38:54 +0100 |
commit | 1e5a1928a55a551c76b406624a1b52a453a4cb47 (patch) | |
tree | 26f41b25b5ee31bb153cd8009371de8302015fc3 | |
parent | d7d32b3bfe86bd89d94d59393907bce1cb9dab7c (diff) |
r600: double multiply can handle only one multiply at a time
It seems clear that trying to multiply two pairs of doubles would result
in the temporary register getting overwritten by the second pair. So
make the code more explicit.
-rw-r--r-- | src/gallium/drivers/r600/r600_shader.c | 36 |
1 files changed, 19 insertions, 17 deletions
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index ebe2744548..7d1452add3 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -4326,25 +4326,27 @@ static int cayman_mul_double_instr(struct r600_shader_ctx *ctx) int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); int t1 = ctx->temp_reg; - for (k = 0; k < 2; k++) { - if (!(inst->Dst[0].Register.WriteMask & (0x3 << (k * 2)))) - continue; + /* t1 would get overwritten below if we actually tried to + * multiply two pairs of doubles at a time. */ + assert(inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_XY || + inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_ZW); - for (i = 0; i < 4; i++) { - memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.op = ctx->inst_info->op; - for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { - r600_bytecode_src(&alu.src[j], &ctx->src[j], k * 2 + ((i == 3) ? 0 : 1)); - } - alu.dst.sel = t1; - alu.dst.chan = i; - alu.dst.write = 1; - if (i == 3) - alu.last = 1; - r = r600_bytecode_add_alu(ctx->bc, &alu); - if (r) - return r; + k = inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_XY ? 0 : 1; + + for (i = 0; i < 4; i++) { + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.op = ctx->inst_info->op; + for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { + r600_bytecode_src(&alu.src[j], &ctx->src[j], k * 2 + ((i == 3) ? 0 : 1)); } + alu.dst.sel = t1; + alu.dst.chan = i; + alu.dst.write = 1; + if (i == 3) + alu.last = 1; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; } for (i = 0; i <= lasti; i++) { |