From f1a48d54ea8989c3eb0a86510f68f707de523a72 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Thu, 7 Nov 2019 00:28:01 -0500 Subject: nir/serialize: reuse the writemask field for 2 src X swizzles of SSA ALU Reviewed-by: Connor Abbott --- src/compiler/nir/nir_serialize.c | 36 +++++++++++++++++++++++++++++++++--- 1 file changed, 33 insertions(+), 3 deletions(-) diff --git a/src/compiler/nir/nir_serialize.c b/src/compiler/nir/nir_serialize.c index 85db23beef8..47934eead53 100644 --- a/src/compiler/nir/nir_serialize.c +++ b/src/compiler/nir/nir_serialize.c @@ -613,7 +613,8 @@ union packed_instr { unsigned no_signed_wrap:1; unsigned no_unsigned_wrap:1; unsigned saturate:1; - unsigned writemask:4; + /* Reg: writemask; SSA: swizzles for 2 srcs */ + unsigned writemask_or_two_swizzles:4; unsigned op:9; unsigned packed_src_ssa_16bit:1; /* Scalarized ALUs always have the same header. */ @@ -787,6 +788,12 @@ is_alu_src_ssa_16bit(write_ctx *ctx, const nir_alu_instr *alu) unsigned src_components = nir_ssa_alu_instr_src_components(alu, i); for (unsigned chan = 0; chan < src_components; chan++) { + /* The swizzles for src0.x and src1.x are stored + * in writemask_or_two_swizzles for SSA ALUs. + */ + if (alu->dest.dest.is_ssa && i < 2 && chan == 0) + continue; + if (alu->src[i].swizzle[chan] != chan) return false; } @@ -809,10 +816,20 @@ write_alu(write_ctx *ctx, const nir_alu_instr *alu) header.alu.no_signed_wrap = alu->no_signed_wrap; header.alu.no_unsigned_wrap = alu->no_unsigned_wrap; header.alu.saturate = alu->dest.saturate; - header.alu.writemask = alu->dest.write_mask; header.alu.op = alu->op; header.alu.packed_src_ssa_16bit = is_alu_src_ssa_16bit(ctx, alu); + if (header.alu.packed_src_ssa_16bit && + alu->dest.dest.is_ssa) { + /* For packed srcs of SSA ALUs, this field stores the swizzles. */ + header.alu.writemask_or_two_swizzles = alu->src[0].swizzle[0]; + if (num_srcs > 1) + header.alu.writemask_or_two_swizzles |= alu->src[1].swizzle[0] << 2; + } else if (!alu->dest.dest.is_ssa) { + /* For registers, this field is a writemask. */ + header.alu.writemask_or_two_swizzles = alu->dest.write_mask; + } + write_dest(ctx, &alu->dest.dest, header, alu->instr.type); if (header.alu.packed_src_ssa_16bit) { @@ -849,7 +866,6 @@ read_alu(read_ctx *ctx, union packed_instr header) alu->no_signed_wrap = header.alu.no_signed_wrap; alu->no_unsigned_wrap = header.alu.no_unsigned_wrap; alu->dest.saturate = header.alu.saturate; - alu->dest.write_mask = header.alu.writemask; read_dest(ctx, &alu->dest.dest, &alu->instr, header); @@ -879,6 +895,20 @@ read_alu(read_ctx *ctx, union packed_instr header) } } + if (alu->dest.dest.is_ssa) { + alu->dest.write_mask = + u_bit_consecutive(0, alu->dest.dest.ssa.num_components); + } else { + alu->dest.write_mask = header.alu.writemask_or_two_swizzles; + } + + if (header.alu.packed_src_ssa_16bit && + alu->dest.dest.is_ssa) { + alu->src[0].swizzle[0] = header.alu.writemask_or_two_swizzles & 0x3; + if (num_srcs > 1) + alu->src[1].swizzle[0] = header.alu.writemask_or_two_swizzles >> 2; + } + return alu; } -- cgit v1.2.3