From 6f35ebd8a5435747b2a4ee58bbbfbc9cb29f03b8 Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Mon, 14 Sep 2020 12:15:16 +0200 Subject: ir3: Support MOVMSK Signed-off-by: Rob Clark Part-of: --- src/freedreno/.gitlab-ci/reference/crash.log | 2 +- src/freedreno/ir3/disasm-a3xx.c | 112 ++++++++++++++------------- src/freedreno/ir3/instr-a3xx.h | 7 +- src/freedreno/ir3/ir3.c | 72 +++++++++++------ src/freedreno/ir3/ir3.h | 11 +++ src/freedreno/ir3/ir3_delay.c | 3 + src/freedreno/ir3/ir3_validate.c | 11 ++- src/freedreno/ir3/tests/disasm.c | 2 + 8 files changed, 136 insertions(+), 84 deletions(-) diff --git a/src/freedreno/.gitlab-ci/reference/crash.log b/src/freedreno/.gitlab-ci/reference/crash.log index db42f99d591..c88f1b9e007 100644 --- a/src/freedreno/.gitlab-ci/reference/crash.log +++ b/src/freedreno/.gitlab-ci/reference/crash.log @@ -3445,7 +3445,7 @@ shader-blocks: :5:0010:0022[a4827242x_46248300x] gather4b.a (s8)(y)hr16.z, hr32.x, s#1, t#35 :4:0011:0023[82342205x_cd064d21x] (rpt2)(ul)unknown(4,17) r1.y, (neg)c :5:0012:0026[a923bf8bx_81f95908x] (jp)samb.3d.a.p (u32)(xyzw)r34.w, hr33.x, hr43.x, s#15, t#64 - :1:0013:0027[3dda8123x_a0d91ccdx] (sy)(jp)(rpt1)cov.u8u16 (even)(pos_infinity)hr, 0xa0d91ccd + :1:0013:0027[3dda8123x_a0d91ccdx] (sy)(jp)(rpt1)unknown(1,2) Assertion `instr->cat6.opc == 0' failed. ----------------------------------------------- diff --git a/src/freedreno/ir3/disasm-a3xx.c b/src/freedreno/ir3/disasm-a3xx.c index 66699e66880..c4363542588 100644 --- a/src/freedreno/ir3/disasm-a3xx.c +++ b/src/freedreno/ir3/disasm-a3xx.c @@ -440,66 +440,69 @@ static void print_instr_cat1(struct disasm_ctx *ctx, instr_t *instr) { instr_cat1_t *cat1 = &instr->cat1; - if (cat1->ul) - fprintf(ctx->out, "(ul)"); - - if (cat1->src_type == cat1->dst_type) { - if ((cat1->src_type == TYPE_S16) && (((reg_t)cat1->dst).num == REG_A0)) { - /* special case (nmemonic?): */ - fprintf(ctx->out, "mova"); + switch (_OPC(1, cat1->opc)) { + case OPC_MOV: + if (cat1->src_type == cat1->dst_type) { + if ((cat1->src_type == TYPE_S16) && (((reg_t)cat1->dst).num == REG_A0)) { + /* special case (nmemonic?): */ + fprintf(ctx->out, "mova"); + } else { + fprintf(ctx->out, "mov.%s%s", type[cat1->src_type], type[cat1->dst_type]); + } } else { - fprintf(ctx->out, "mov.%s%s", type[cat1->src_type], type[cat1->dst_type]); + fprintf(ctx->out, "cov.%s%s", type[cat1->src_type], type[cat1->dst_type]); } - } else { - fprintf(ctx->out, "cov.%s%s", type[cat1->src_type], type[cat1->dst_type]); - } - fprintf(ctx->out, " "); + fprintf(ctx->out, " "); - if (cat1->even) - fprintf(ctx->out, "(even)"); + if (cat1->even) + fprintf(ctx->out, "(even)"); - if (cat1->pos_inf) - fprintf(ctx->out, "(pos_infinity)"); + if (cat1->pos_inf) + fprintf(ctx->out, "(pos_infinity)"); - print_reg_dst(ctx, (reg_t)(cat1->dst), type_size(cat1->dst_type) == 32, - cat1->dst_rel); + print_reg_dst(ctx, (reg_t)(cat1->dst), type_size(cat1->dst_type) == 32, + cat1->dst_rel); - fprintf(ctx->out, ", "); + fprintf(ctx->out, ", "); - /* ugg, have to special case this.. vs print_reg().. */ - if (cat1->src_im) { - if (type_float(cat1->src_type)) - fprintf(ctx->out, "(%f)", cat1->fim_val); - else if (type_uint(cat1->src_type)) - fprintf(ctx->out, "0x%08x", cat1->uim_val); - else - fprintf(ctx->out, "%d", cat1->iim_val); - } else if (cat1->src_rel && !cat1->src_c) { - /* I would just use %+d but trying to make it diff'able with - * libllvm-a3xx... - */ - char type = cat1->src_rel_c ? 'c' : 'r'; - const char *full = (type_size(cat1->src_type) == 32) ? "" : "h"; - if (cat1->off < 0) - fprintf(ctx->out, "%s%c", full, type, -cat1->off); - else if (cat1->off > 0) - fprintf(ctx->out, "%s%c", full, type, cat1->off); - else - fprintf(ctx->out, "%s%c", full, type); - } else { - struct reginfo src = { - .reg = (reg_t)cat1->src, - .full = type_size(cat1->src_type) == 32, - .r = cat1->src_r, - .c = cat1->src_c, - .im = cat1->src_im, - }; - print_src(ctx, &src); + /* ugg, have to special case this.. vs print_reg().. */ + if (cat1->src_im) { + if (type_float(cat1->src_type)) + fprintf(ctx->out, "(%f)", cat1->fim_val); + else if (type_uint(cat1->src_type)) + fprintf(ctx->out, "0x%08x", cat1->uim_val); + else + fprintf(ctx->out, "%d", cat1->iim_val); + } else if (cat1->src_rel && !cat1->src_c) { + /* I would just use %+d but trying to make it diff'able with + * libllvm-a3xx... + */ + char type = cat1->src_rel_c ? 'c' : 'r'; + const char *full = (type_size(cat1->src_type) == 32) ? "" : "h"; + if (cat1->off < 0) + fprintf(ctx->out, "%s%c", full, type, -cat1->off); + else if (cat1->off > 0) + fprintf(ctx->out, "%s%c", full, type, cat1->off); + else + fprintf(ctx->out, "%s%c", full, type); + } else { + struct reginfo src = { + .reg = (reg_t)cat1->src, + .full = type_size(cat1->src_type) == 32, + .r = cat1->src_r, + .c = cat1->src_c, + .im = cat1->src_im, + }; + print_src(ctx, &src); + } + break; + case OPC_MOVMSK: + fprintf(ctx->out, ".w%u", (cat1->repeat + 1) * 32); + fprintf(ctx->out, " "); + print_reg_dst(ctx, (reg_t)(cat1->dst), true, cat1->dst_rel); + break; } - - if ((debug & PRINT_VERBOSE) && (cat1->must_be_0)) - fprintf(ctx->out, "\t{1: %x}", cat1->must_be_0); } static void print_instr_cat2(struct disasm_ctx *ctx, instr_t *instr) @@ -1341,7 +1344,8 @@ static const struct opc_info { OPC(0, OPC_SHPE, shpe), /* category 1: */ - OPC(1, OPC_MOV, ), + OPC(1, OPC_MOV, ), + OPC(1, OPC_MOVMSK, movmsk), /* category 2: */ OPC(2, OPC_ADD_F, add.f), @@ -1569,7 +1573,9 @@ static bool print_instr(struct disasm_ctx *ctx, uint32_t *dwords, int n) fprintf(ctx->out, "(eq)"); if (instr_sat(instr)) fprintf(ctx->out, "(sat)"); - if (ctx->repeat) + if (instr->opc_cat == 1 && instr->cat1.ul) + fprintf(ctx->out, "(ul)"); + if (ctx->repeat && opc != OPC_MOVMSK) fprintf(ctx->out, "(rpt%d)", ctx->repeat); else if ((instr->opc_cat == 2) && (instr->cat2.src1_r || instr->cat2.src2_r)) nop = (instr->cat2.src2_r * 2) + instr->cat2.src1_r; diff --git a/src/freedreno/ir3/instr-a3xx.h b/src/freedreno/ir3/instr-a3xx.h index cb87cc2ccf6..25864302242 100644 --- a/src/freedreno/ir3/instr-a3xx.h +++ b/src/freedreno/ir3/instr-a3xx.h @@ -83,6 +83,7 @@ typedef enum { /* category 1: */ OPC_MOV = _OPC(1, 0), + OPC_MOVMSK = _OPC(1, 3), /* category 2: */ OPC_ADD_F = _OPC(2, 0), @@ -446,7 +447,7 @@ typedef struct PACKED { uint32_t src_im : 1; uint32_t even : 1; uint32_t pos_inf : 1; - uint32_t must_be_0 : 2; + uint32_t opc : 2; uint32_t jmp_tgt : 1; uint32_t sync : 1; uint32_t opc_cat : 3; @@ -472,7 +473,7 @@ typedef struct PACKED { struct PACKED { uint32_t src1 : 12; uint32_t src1_c : 1; /* const */ - uint32_t dummy : 3; + int32_t dummy : 3; } c1; }; @@ -1031,7 +1032,7 @@ static inline uint32_t instr_opc(instr_t *instr, unsigned gpu_id) { switch (instr->opc_cat) { case 0: return instr->cat0.opc | instr->cat0.opc_hi << 4; - case 1: return 0; + case 1: return instr->cat1.opc; case 2: return instr->cat2.opc; case 3: return instr->cat3.opc; case 4: return instr->cat4.opc; diff --git a/src/freedreno/ir3/ir3.c b/src/freedreno/ir3/ir3.c index 827f332f421..c99b5382bb7 100644 --- a/src/freedreno/ir3/ir3.c +++ b/src/freedreno/ir3/ir3.c @@ -159,41 +159,63 @@ static int emit_cat1(struct ir3_instruction *instr, void *ptr, struct ir3_info *info) { struct ir3_register *dst = instr->regs[0]; - struct ir3_register *src = instr->regs[1]; instr_cat1_t *cat1 = ptr; - iassert(instr->regs_count == 2); - iassert_type(dst, type_size(instr->cat1.dst_type) == 32); - if (!(src->flags & IR3_REG_IMMED)) - iassert_type(src, type_size(instr->cat1.src_type) == 32); - - if (src->flags & IR3_REG_IMMED) { - cat1->iim_val = src->iim_val; - cat1->src_im = 1; - } else if (src->flags & IR3_REG_RELATIV) { - cat1->off = reg(src, info, instr->repeat, - IR3_REG_R | IR3_REG_CONST | IR3_REG_HALF | IR3_REG_RELATIV | - IR3_REG_SHARED); - cat1->src_rel = 1; - cat1->src_rel_c = !!(src->flags & IR3_REG_CONST); - } else { - cat1->src = reg(src, info, instr->repeat, - IR3_REG_R | IR3_REG_CONST | IR3_REG_HALF | IR3_REG_SHARED); - cat1->src_c = !!(src->flags & IR3_REG_CONST); + switch (instr->opc) { + case OPC_MOV: { + struct ir3_register *src = instr->regs[1]; + iassert(instr->regs_count == 2); + iassert_type(dst, type_size(instr->cat1.dst_type) == 32); + if (!(src->flags & IR3_REG_IMMED)) + iassert_type(src, type_size(instr->cat1.src_type) == 32); + + if (src->flags & IR3_REG_IMMED) { + cat1->iim_val = src->iim_val; + cat1->src_im = 1; + } else if (src->flags & IR3_REG_RELATIV) { + cat1->off = reg(src, info, instr->repeat, + IR3_REG_R | IR3_REG_CONST | IR3_REG_HALF | IR3_REG_RELATIV | + IR3_REG_SHARED); + cat1->src_rel = 1; + cat1->src_rel_c = !!(src->flags & IR3_REG_CONST); + } else { + cat1->src = reg(src, info, instr->repeat, + IR3_REG_R | IR3_REG_CONST | IR3_REG_HALF | IR3_REG_SHARED); + cat1->src_c = !!(src->flags & IR3_REG_CONST); + } + cat1->src_r = !!(src->flags & IR3_REG_R); + cat1->dst_type = instr->cat1.dst_type; + cat1->src_type = instr->cat1.src_type; + cat1->even = !!(dst->flags & IR3_REG_EVEN); + cat1->pos_inf = !!(dst->flags & IR3_REG_POS_INF); + cat1->repeat = instr->repeat; + break; + } + case OPC_MOVMSK: { + iassert(instr->regs_count == 1); + iassert(!(dst->flags & IR3_REG_HALF)); + iassert(!(dst->flags & IR3_REG_EVEN)); + iassert(!(dst->flags & IR3_REG_POS_INF)); + iassert(instr->repeat == 0); + iassert(util_is_power_of_two_or_zero(dst->wrmask + 1)); + + unsigned components = util_last_bit(dst->wrmask); + cat1->repeat = components - 1; + cat1->src_type = cat1->dst_type = TYPE_U32; + + break; + } + default: + iassert(0); } cat1->dst = reg(dst, info, instr->repeat, IR3_REG_RELATIV | IR3_REG_EVEN | IR3_REG_R | IR3_REG_POS_INF | IR3_REG_HALF | IR3_REG_SHARED); - cat1->repeat = instr->repeat; - cat1->src_r = !!(src->flags & IR3_REG_R); cat1->ss = !!(instr->flags & IR3_INSTR_SS); cat1->ul = !!(instr->flags & IR3_INSTR_UL); - cat1->dst_type = instr->cat1.dst_type; cat1->dst_rel = !!(dst->flags & IR3_REG_RELATIV); - cat1->src_type = instr->cat1.src_type; - cat1->even = !!(dst->flags & IR3_REG_EVEN); - cat1->pos_inf = !!(dst->flags & IR3_REG_POS_INF); + cat1->opc = instr->opc; cat1->jmp_tgt = !!(instr->flags & IR3_INSTR_JP); cat1->sync = !!(instr->flags & IR3_INSTR_SY); cat1->opc_cat = 1; diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h index 58c7d56be5d..6982702bb9a 100644 --- a/src/freedreno/ir3/ir3.h +++ b/src/freedreno/ir3/ir3.h @@ -1474,6 +1474,17 @@ ir3_COV(struct ir3_block *block, struct ir3_instruction *src, return instr; } +static inline struct ir3_instruction * +ir3_MOVMSK(struct ir3_block *block, unsigned components) +{ + struct ir3_instruction *instr = ir3_instr_create(block, OPC_MOVMSK); + + struct ir3_register *dst = __ssa_dst(instr); + dst->flags |= IR3_REG_SHARED; + dst->wrmask = (1 << components) - 1; + return instr; +} + static inline struct ir3_instruction * ir3_NOP(struct ir3_block *block) { diff --git a/src/freedreno/ir3/ir3_delay.c b/src/freedreno/ir3/ir3_delay.c index 247ff6ee250..37b1a6428e4 100644 --- a/src/freedreno/ir3/ir3_delay.c +++ b/src/freedreno/ir3/ir3_delay.c @@ -102,6 +102,9 @@ ir3_delayslots(struct ir3_instruction *assigner, if (is_sfu(assigner) || is_tex(assigner) || is_mem(assigner)) return 0; + if (assigner->opc == OPC_MOVMSK) + return 4; + /* assigner must be alu: */ if (is_flow(consumer) || is_sfu(consumer) || is_tex(consumer) || is_mem(consumer)) { diff --git a/src/freedreno/ir3/ir3_validate.c b/src/freedreno/ir3/ir3_validate.c index 8aa0bf86e9c..810280aae2d 100644 --- a/src/freedreno/ir3/ir3_validate.c +++ b/src/freedreno/ir3/ir3_validate.c @@ -123,8 +123,15 @@ validate_instr(struct ir3_validate_ctx *ctx, struct ir3_instruction *instr) */ switch (opc_cat(instr->opc)) { case 1: /* move instructions */ - validate_reg_size(ctx, instr->regs[0], instr->cat1.dst_type); - validate_reg_size(ctx, instr->regs[1], instr->cat1.src_type); + if (instr->opc == OPC_MOVMSK) { + validate_assert(ctx, instr->regs_count == 1); + validate_assert(ctx, instr->regs[0]->flags & IR3_REG_SHARED); + validate_assert(ctx, !(instr->regs[0]->flags & IR3_REG_HALF)); + validate_assert(ctx, util_is_power_of_two_or_zero(instr->regs[0]->wrmask + 1)); + } else { + validate_reg_size(ctx, instr->regs[0], instr->cat1.dst_type); + validate_reg_size(ctx, instr->regs[1], instr->cat1.src_type); + } break; case 3: /* Validate that cat3 opc matches the src type. We've already checked that all diff --git a/src/freedreno/ir3/tests/disasm.c b/src/freedreno/ir3/tests/disasm.c index 1747f05393b..28d2be37b9f 100644 --- a/src/freedreno/ir3/tests/disasm.c +++ b/src/freedreno/ir3/tests/disasm.c @@ -62,6 +62,8 @@ static const struct test { INSTR_6XX(20156004_00000c11, "(ul)mov.s32s32 r1.x, c"), INSTR_6XX(201100f4_00000000, "mova a0.x, hr0.x"), INSTR_6XX(20244905_00000410, "(rpt1)mov.f32f32 r1.y, (r)c260.x"), + /* dEQP-VK.subgroups.ballot.compute.compute */ + INSTR_6XX(260cc3c0_00000000, "movmsk.w128 r48.x"), /* movmsk.w128 sr48.x */ /* cat2 */ INSTR_6XX(40104002_0c210001, "add.f hr0.z, r0.y, c"), -- cgit v1.2.3