From 64c134cedb8845f274b017ff9ca3c3ca4d34372b Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Sun, 1 Sep 2013 11:35:56 -0400 Subject: freedreno/a3xx/compiler: add LIT Needed by glxgears and etuxracer ;-) Signed-off-by: Rob Clark --- src/gallium/drivers/freedreno/a3xx/fd3_compiler.c | 145 +++++++++++++++++++++- 1 file changed, 142 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c index 994f71c0d2c..be6ecdc41fb 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c @@ -65,10 +65,11 @@ static unsigned regmask_idx(struct ir3_register *reg) static void regmask_set(regmask_t regmask, struct ir3_register *reg, unsigned wrmask) { + unsigned ridx = regmask_idx(reg) & ~0x3; unsigned i; for (i = 0; i < 4; i++) { if (wrmask & (1 << i)) { - unsigned idx = regmask_idx(reg) + i; + unsigned idx = ridx + i; regmask[idx / 8] |= 1 << (idx % 8); } } @@ -179,8 +180,7 @@ compile_init(struct fd3_compile_context *ctx, struct fd3_shader_stateobj *so, ctx->info.file_max[TGSI_FILE_OUTPUT] + 1; so->first_immediate = ctx->base_reg[TGSI_FILE_IMMEDIATE]; - ctx->immediate_idx = 4 * (ctx->info.file_max[TGSI_FILE_CONSTANT] + 1 + - ctx->info.file_max[TGSI_FILE_IMMEDIATE] + 1); + ctx->immediate_idx = 4 * (ctx->info.file_max[TGSI_FILE_IMMEDIATE] + 1); ret = tgsi_parse_init(&ctx->parser, tokens); if (ret != TGSI_PARSE_OK) @@ -602,6 +602,144 @@ vectorize(struct fd3_compile_context *ctx, struct ir3_instruction *instr, * native instructions: */ +/* LIT - Light Coefficients: + * dst.x = 1.0 + * dst.y = max(src.x, 0.0) + * dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0.0 + * dst.w = 1.0 + * + * max.f tmp.y, src.y, {0.0} + * nop + * max.f tmp.z, src.w, {-128.0} + * cmps.f.ge tmp.w, (neg)src.x, {0.0} + * max.f dst.y, src.x, {0.0} + * nop + * min.f tmp.x, tmp.z, {128.0} + * add.s tmp.z, tmp.w, -1 + * log2 tmp.y, tmp.y + * (rpt1)nop + * (ss)mul.f tmp.x, tmp.x, tmp.y + * mov.f16f16 dst.x, {1.0} + * mov.f16f16 dst.w, {1.0} + * (rpt3)nop + * exp2 tmp.x, tmp.x + * (ss)sel.f16 dst.z, {0.0}, tmp.z, tmp.x + */ +static void +trans_lit(const struct instr_translater *t, + struct fd3_compile_context *ctx, + struct tgsi_full_instruction *inst) +{ + struct ir3_instruction *instr; + struct ir3_register *r; + struct tgsi_dst_register tmp_dst; + struct tgsi_src_register *tmp_src; + struct tgsi_src_register constval0, constval1, constval128; + struct tgsi_src_register *src = &inst->Src[0].Register; + struct tgsi_dst_register *dst = get_dst(ctx, inst); + + tmp_src = get_internal_temp_repl(ctx, &tmp_dst); + + if (is_const(src)) + src = get_unconst(ctx, src); + + get_immediate(ctx, &constval0, fui(0.0)); + get_immediate(ctx, &constval1, fui(1.0)); + get_immediate(ctx, &constval128, fui(128.0)); + + /* max.f tmp.y, src.y, {0.0} */ + instr = ir3_instr_create(ctx->ir, 2, OPC_MAX_F); + add_dst_reg(ctx, instr, &tmp_dst, TGSI_SWIZZLE_Y); + add_src_reg(ctx, instr, src, TGSI_SWIZZLE_Y); + add_src_reg(ctx, instr, &constval0, constval0.SwizzleX); + + /* nop */ + ir3_instr_create(ctx->ir, 0, OPC_NOP); + + /* max.f tmp.z, src.w, {-128.0} */ + instr = ir3_instr_create(ctx->ir, 2, OPC_MAX_F); + add_dst_reg(ctx, instr, &tmp_dst, TGSI_SWIZZLE_Z); + add_src_reg(ctx, instr, src, TGSI_SWIZZLE_W); + add_src_reg(ctx, instr, &constval128, + constval128.SwizzleX)->flags |= IR3_REG_NEGATE; + + /* cmps.f.ge tmp.w, (neg)src.x, {0.0} */ + instr = ir3_instr_create(ctx->ir, 2, OPC_CMPS_F); + add_dst_reg(ctx, instr, &tmp_dst, TGSI_SWIZZLE_W); + add_src_reg(ctx, instr, src, TGSI_SWIZZLE_X)->flags |= IR3_REG_NEGATE; + add_src_reg(ctx, instr, &constval0, constval0.SwizzleX); + instr->cat2.condition = IR3_COND_GE; + + /* max.f dst.y, src.x, {0.0} */ + instr = ir3_instr_create(ctx->ir, 2, OPC_MAX_F); + add_dst_reg(ctx, instr, dst, TGSI_SWIZZLE_Y); + add_src_reg(ctx, instr, src, TGSI_SWIZZLE_X); + add_src_reg(ctx, instr, &constval0, constval0.SwizzleX); + + /* nop */ + ir3_instr_create(ctx->ir, 0, OPC_NOP); + + /* min.f tmp.x, tmp.z, {128.0} */ + instr = ir3_instr_create(ctx->ir, 2, OPC_MIN_F); + add_dst_reg(ctx, instr, &tmp_dst, TGSI_SWIZZLE_X); + add_src_reg(ctx, instr, tmp_src, TGSI_SWIZZLE_Z); + add_src_reg(ctx, instr, &constval128, constval128.SwizzleX); + + /* add.s tmp.z, tmp.w, -1 */ + instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_S); + add_dst_reg(ctx, instr, &tmp_dst, TGSI_SWIZZLE_Z); + add_src_reg(ctx, instr, tmp_src, TGSI_SWIZZLE_W); + ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = -1; + + /* log2 tmp.y, tmp.y */ + instr = ir3_instr_create(ctx->ir, 4, OPC_LOG2); + r = add_dst_reg(ctx, instr, &tmp_dst, TGSI_SWIZZLE_Y); + add_src_reg(ctx, instr, tmp_src, TGSI_SWIZZLE_Y); + regmask_set(ctx->needs_ss, r, TGSI_WRITEMASK_Y); + + /* (rpt1)nop */ + ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 1; + + /* (ss)mul.f tmp.x, tmp.x, tmp.y */ + instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F); + add_dst_reg(ctx, instr, &tmp_dst, TGSI_SWIZZLE_X); + add_src_reg(ctx, instr, tmp_src, TGSI_SWIZZLE_X); + add_src_reg(ctx, instr, tmp_src, TGSI_SWIZZLE_Y); + + /* mov.f16f16 dst.x, {1.0} */ + instr = ir3_instr_create(ctx->ir, 1, 0); + instr->cat1.src_type = get_ftype(ctx); + instr->cat1.dst_type = get_ftype(ctx); + add_dst_reg(ctx, instr, dst, TGSI_SWIZZLE_X); + add_src_reg(ctx, instr, &constval1, constval1.SwizzleX); + + /* mov.f16f16 dst.w, {1.0} */ + instr = ir3_instr_create(ctx->ir, 1, 0); + instr->cat1.src_type = get_ftype(ctx); + instr->cat1.dst_type = get_ftype(ctx); + add_dst_reg(ctx, instr, dst, TGSI_SWIZZLE_W); + add_src_reg(ctx, instr, &constval1, constval1.SwizzleX); + + /* (rpt3)nop */ + ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 3; + + /* exp2 tmp.x, tmp.x */ + instr = ir3_instr_create(ctx->ir, 4, OPC_EXP2); + r = add_dst_reg(ctx, instr, &tmp_dst, TGSI_SWIZZLE_X); + add_src_reg(ctx, instr, tmp_src, TGSI_SWIZZLE_X); + regmask_set(ctx->needs_ss, r, TGSI_WRITEMASK_X); + + /* (ss)sel.f16 dst.z, {0.0}, tmp.z, tmp.x */ + instr = ir3_instr_create(ctx->ir, 3, + ctx->so->half_precision ? OPC_SEL_F16 : OPC_SEL_F32); + add_dst_reg(ctx, instr, dst, TGSI_SWIZZLE_Z); + add_src_reg(ctx, instr, &constval0, constval0.SwizzleX); + add_src_reg(ctx, instr, tmp_src, TGSI_SWIZZLE_Z); + add_src_reg(ctx, instr, tmp_src, TGSI_SWIZZLE_X); + + put_dst(ctx, inst, dst); +} + static inline void get_swiz(unsigned *swiz, struct tgsi_src_register *src) { @@ -1266,6 +1404,7 @@ static const struct instr_translater translaters[TGSI_OPCODE_LAST] = { [TGSI_OPCODE_ ## n] = { .fxn = (f), .tgsi_opc = TGSI_OPCODE_ ## n, ##__VA_ARGS__ } INSTR(MOV, instr_cat1), + INSTR(LIT, trans_lit), INSTR(RCP, instr_cat4, .opc = OPC_RCP), INSTR(RSQ, instr_cat4, .opc = OPC_RSQ), INSTR(SQRT, instr_cat4, .opc = OPC_SQRT), -- cgit v1.2.3