summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2018-01-22 08:39:46 +1000
committerDave Airlie <airlied@redhat.com>2018-01-29 05:42:17 +1000
commita7ec366e503cc2b05d6920fa5027b0f001ae9e58 (patch)
tree861e302922a0ed8314ddf53bffdeb50ce93e4ab4
parente0e23ea69cab23b9193b1e7c568fd23fc7073071 (diff)
r600/shader: refactor mul hi/lo instruction emission
This just makes it a bit simpler for cayman vs eg Reviewed-by: Roland Scheidegger <sroland@vmware.com> Signed-off-by: Dave Airlie <airlied@redhat.com>
-rw-r--r--src/gallium/drivers/r600/r600_shader.c370
1 files changed, 116 insertions, 254 deletions
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index cfc3400f925..cf669781202 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -5224,6 +5224,31 @@ static int tgsi_pow(struct r600_shader_ctx *ctx)
return tgsi_helper_tempx_replicate(ctx);
}
+static int emit_mul_int_op(struct r600_bytecode *bc,
+ struct r600_bytecode_alu *alu_src)
+{
+ struct r600_bytecode_alu alu;
+ int i, r;
+ alu = *alu_src;
+ if (bc->chip_class == CAYMAN) {
+ for (i = 0; i < 4; i++) {
+ alu.dst.chan = i;
+ alu.dst.write = (i == alu_src->dst.chan);
+ alu.last = (i == 3);
+
+ r = r600_bytecode_add_alu(bc, &alu);
+ if (r)
+ return r;
+ }
+ } else {
+ alu.last = 1;
+ r = r600_bytecode_add_alu(bc, &alu);
+ if (r)
+ return r;
+ }
+ return 0;
+}
+
static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
@@ -5465,50 +5490,25 @@ static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op)
}
/* 2. tmp0.z = lo (tmp0.x * src2) */
- if (ctx->bc->chip_class == CAYMAN) {
- for (j = 0 ; j < 4; j++) {
- memset(&alu, 0, sizeof(struct r600_bytecode_alu));
- alu.op = ALU_OP2_MULLO_UINT;
-
- alu.dst.sel = tmp0;
- alu.dst.chan = j;
- alu.dst.write = (j == 2);
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP2_MULLO_UINT;
- alu.src[0].sel = tmp0;
- alu.src[0].chan = 0;
- if (signed_op) {
- alu.src[1].sel = tmp2;
- alu.src[1].chan = 1;
- } else {
- r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
- }
+ alu.dst.sel = tmp0;
+ alu.dst.chan = 2;
+ alu.dst.write = 1;
- alu.last = (j == 3);
- if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
- return r;
- }
+ alu.src[0].sel = tmp0;
+ alu.src[0].chan = 0;
+ if (signed_op) {
+ alu.src[1].sel = tmp2;
+ alu.src[1].chan = 1;
} else {
- memset(&alu, 0, sizeof(struct r600_bytecode_alu));
- alu.op = ALU_OP2_MULLO_UINT;
-
- alu.dst.sel = tmp0;
- alu.dst.chan = 2;
- alu.dst.write = 1;
-
- alu.src[0].sel = tmp0;
- alu.src[0].chan = 0;
- if (signed_op) {
- alu.src[1].sel = tmp2;
- alu.src[1].chan = 1;
- } else {
- r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
- }
-
- alu.last = 1;
- if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
- return r;
+ r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
}
+ if ((r = emit_mul_int_op(ctx->bc, &alu)))
+ return r;
+
/* 3. tmp0.w = -tmp0.z */
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
alu.op = ALU_OP2_SUB_INT;
@@ -5526,51 +5526,26 @@ static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op)
return r;
/* 4. tmp0.y = hi (tmp0.x * src2) */
- if (ctx->bc->chip_class == CAYMAN) {
- for (j = 0 ; j < 4; j++) {
- memset(&alu, 0, sizeof(struct r600_bytecode_alu));
- alu.op = ALU_OP2_MULHI_UINT;
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP2_MULHI_UINT;
- alu.dst.sel = tmp0;
- alu.dst.chan = j;
- alu.dst.write = (j == 1);
+ alu.dst.sel = tmp0;
+ alu.dst.chan = 1;
+ alu.dst.write = 1;
- alu.src[0].sel = tmp0;
- alu.src[0].chan = 0;
+ alu.src[0].sel = tmp0;
+ alu.src[0].chan = 0;
- if (signed_op) {
- alu.src[1].sel = tmp2;
- alu.src[1].chan = 1;
- } else {
- r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
- }
- alu.last = (j == 3);
- if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
- return r;
- }
+ if (signed_op) {
+ alu.src[1].sel = tmp2;
+ alu.src[1].chan = 1;
} else {
- memset(&alu, 0, sizeof(struct r600_bytecode_alu));
- alu.op = ALU_OP2_MULHI_UINT;
-
- alu.dst.sel = tmp0;
- alu.dst.chan = 1;
- alu.dst.write = 1;
-
- alu.src[0].sel = tmp0;
- alu.src[0].chan = 0;
-
- if (signed_op) {
- alu.src[1].sel = tmp2;
- alu.src[1].chan = 1;
- } else {
- r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
- }
-
- alu.last = 1;
- if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
- return r;
+ r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
}
+ if ((r = emit_mul_int_op(ctx->bc, &alu)))
+ return r;
+
/* 5. tmp0.z = (tmp0.y == 0 ? tmp0.w : tmp0.z) = abs(lo(rcp*src)) */
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
alu.op = ALU_OP3_CNDE_INT;
@@ -5592,43 +5567,21 @@ static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op)
return r;
/* 6. tmp0.w = hi (tmp0.z * tmp0.x) = e, rounding error */
- if (ctx->bc->chip_class == CAYMAN) {
- for (j = 0 ; j < 4; j++) {
- memset(&alu, 0, sizeof(struct r600_bytecode_alu));
- alu.op = ALU_OP2_MULHI_UINT;
-
- alu.dst.sel = tmp0;
- alu.dst.chan = j;
- alu.dst.write = (j == 3);
-
- alu.src[0].sel = tmp0;
- alu.src[0].chan = 2;
-
- alu.src[1].sel = tmp0;
- alu.src[1].chan = 0;
-
- alu.last = (j == 3);
- if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
- return r;
- }
- } else {
- memset(&alu, 0, sizeof(struct r600_bytecode_alu));
- alu.op = ALU_OP2_MULHI_UINT;
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP2_MULHI_UINT;
- alu.dst.sel = tmp0;
- alu.dst.chan = 3;
- alu.dst.write = 1;
+ alu.dst.sel = tmp0;
+ alu.dst.chan = 3;
+ alu.dst.write = 1;
- alu.src[0].sel = tmp0;
- alu.src[0].chan = 2;
+ alu.src[0].sel = tmp0;
+ alu.src[0].chan = 2;
- alu.src[1].sel = tmp0;
- alu.src[1].chan = 0;
+ alu.src[1].sel = tmp0;
+ alu.src[1].chan = 0;
- alu.last = 1;
- if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
+ if ((r = emit_mul_int_op(ctx->bc, &alu)))
return r;
- }
/* 7. tmp1.x = tmp0.x - tmp0.w */
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
@@ -5685,98 +5638,46 @@ static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op)
return r;
/* 10. tmp0.z = hi(tmp0.x * src1) = q */
- if (ctx->bc->chip_class == CAYMAN) {
- for (j = 0 ; j < 4; j++) {
- memset(&alu, 0, sizeof(struct r600_bytecode_alu));
- alu.op = ALU_OP2_MULHI_UINT;
-
- alu.dst.sel = tmp0;
- alu.dst.chan = j;
- alu.dst.write = (j == 2);
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP2_MULHI_UINT;
- alu.src[0].sel = tmp0;
- alu.src[0].chan = 0;
+ alu.dst.sel = tmp0;
+ alu.dst.chan = 2;
+ alu.dst.write = 1;
- if (signed_op) {
- alu.src[1].sel = tmp2;
- alu.src[1].chan = 0;
- } else {
- r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
- }
+ alu.src[0].sel = tmp0;
+ alu.src[0].chan = 0;
- alu.last = (j == 3);
- if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
- return r;
- }
+ if (signed_op) {
+ alu.src[1].sel = tmp2;
+ alu.src[1].chan = 0;
} else {
- memset(&alu, 0, sizeof(struct r600_bytecode_alu));
- alu.op = ALU_OP2_MULHI_UINT;
-
- alu.dst.sel = tmp0;
- alu.dst.chan = 2;
- alu.dst.write = 1;
-
- alu.src[0].sel = tmp0;
- alu.src[0].chan = 0;
-
- if (signed_op) {
- alu.src[1].sel = tmp2;
- alu.src[1].chan = 0;
- } else {
- r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
- }
-
- alu.last = 1;
- if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
- return r;
+ r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
}
- /* 11. tmp0.y = lo (src2 * tmp0.z) = src2*q = src1 - r */
- if (ctx->bc->chip_class == CAYMAN) {
- for (j = 0 ; j < 4; j++) {
- memset(&alu, 0, sizeof(struct r600_bytecode_alu));
- alu.op = ALU_OP2_MULLO_UINT;
-
- alu.dst.sel = tmp0;
- alu.dst.chan = j;
- alu.dst.write = (j == 1);
+ if ((r = emit_mul_int_op(ctx->bc, &alu)))
+ return r;
- if (signed_op) {
- alu.src[0].sel = tmp2;
- alu.src[0].chan = 1;
- } else {
- r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
- }
+ /* 11. tmp0.y = lo (src2 * tmp0.z) = src2*q = src1 - r */
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP2_MULLO_UINT;
- alu.src[1].sel = tmp0;
- alu.src[1].chan = 2;
+ alu.dst.sel = tmp0;
+ alu.dst.chan = 1;
+ alu.dst.write = 1;
- alu.last = (j == 3);
- if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
- return r;
- }
+ if (signed_op) {
+ alu.src[0].sel = tmp2;
+ alu.src[0].chan = 1;
} else {
- memset(&alu, 0, sizeof(struct r600_bytecode_alu));
- alu.op = ALU_OP2_MULLO_UINT;
-
- alu.dst.sel = tmp0;
- alu.dst.chan = 1;
- alu.dst.write = 1;
-
- if (signed_op) {
- alu.src[0].sel = tmp2;
- alu.src[0].chan = 1;
- } else {
- r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
- }
+ r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
+ }
- alu.src[1].sel = tmp0;
- alu.src[1].chan = 2;
+ alu.src[1].sel = tmp0;
+ alu.src[1].chan = 2;
- alu.last = 1;
- if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
- return r;
- }
+ if ((r = emit_mul_int_op(ctx->bc, &alu)))
+ return r;
/* 12. tmp0.w = src1 - tmp0.y = r */
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
@@ -7548,38 +7449,18 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
return r;
/* temp.x = sample_index*4 */
- if (ctx->bc->chip_class == CAYMAN) {
- for (i = 0 ; i < 4; i++) {
- memset(&alu, 0, sizeof(struct r600_bytecode_alu));
- alu.op = ALU_OP2_MULLO_INT;
- alu.src[0].sel = src_gpr;
- alu.src[0].chan = sample_chan;
- alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
- alu.src[1].value = 4;
- alu.dst.sel = temp;
- alu.dst.chan = i;
- alu.dst.write = i == 0;
- if (i == 3)
- alu.last = 1;
- r = r600_bytecode_add_alu(ctx->bc, &alu);
- if (r)
- return r;
- }
- } else {
- memset(&alu, 0, sizeof(struct r600_bytecode_alu));
- alu.op = ALU_OP2_MULLO_INT;
- alu.src[0].sel = src_gpr;
- alu.src[0].chan = sample_chan;
- alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
- alu.src[1].value = 4;
- alu.dst.sel = temp;
- alu.dst.chan = 0;
- alu.dst.write = 1;
- alu.last = 1;
- r = r600_bytecode_add_alu(ctx->bc, &alu);
- if (r)
- return r;
- }
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP2_MULLO_INT;
+ alu.src[0].sel = src_gpr;
+ alu.src[0].chan = sample_chan;
+ alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
+ alu.src[1].value = 4;
+ alu.dst.sel = temp;
+ alu.dst.chan = 0;
+ alu.dst.write = 1;
+ r = emit_mul_int_op(ctx->bc, &alu);
+ if (r)
+ return r;
/* sample_index = temp.w >> temp.x */
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
@@ -9966,7 +9847,7 @@ static int tgsi_umad(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bytecode_alu alu;
- int i, j, k, r;
+ int i, j, r;
int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
/* src0 * src1 */
@@ -9974,40 +9855,21 @@ static int tgsi_umad(struct r600_shader_ctx *ctx)
if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
continue;
- if (ctx->bc->chip_class == CAYMAN) {
- for (j = 0 ; j < 4; j++) {
- memset(&alu, 0, sizeof(struct r600_bytecode_alu));
-
- alu.op = ALU_OP2_MULLO_UINT;
- for (k = 0; k < inst->Instruction.NumSrcRegs; k++) {
- r600_bytecode_src(&alu.src[k], &ctx->src[k], i);
- }
- alu.dst.chan = j;
- alu.dst.sel = ctx->temp_reg;
- alu.dst.write = (j == i);
- if (j == 3)
- alu.last = 1;
- r = r600_bytecode_add_alu(ctx->bc, &alu);
- if (r)
- return r;
- }
- } else {
- memset(&alu, 0, sizeof(struct r600_bytecode_alu));
-
- alu.dst.chan = i;
- alu.dst.sel = ctx->temp_reg;
- alu.dst.write = 1;
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
- alu.op = ALU_OP2_MULLO_UINT;
- for (j = 0; j < 2; j++) {
- r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
- }
+ alu.dst.chan = i;
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.write = 1;
- alu.last = 1;
- r = r600_bytecode_add_alu(ctx->bc, &alu);
- if (r)
- return r;
+ alu.op = ALU_OP2_MULLO_UINT;
+ for (j = 0; j < 2; j++) {
+ r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
}
+
+ alu.last = 1;
+ r = emit_mul_int_op(ctx->bc, &alu);
+ if (r)
+ return r;
}