diff options
author | Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> | 2021-01-05 15:35:10 -0500 |
---|---|---|
committer | Marge Bot <eric+marge@anholt.net> | 2021-04-07 15:48:15 +0000 |
commit | 1286e73c2c0c5aac1bbc5a979230e9fd2c4a0600 (patch) | |
tree | e49de3143993d0f0c91b074226c157a0dc651bfc | |
parent | e91dec13276200cf7c376d94a888b4416dc284ea (diff) |
nir/lower_idiv: Add 8-bit and 16-bit lowering path
Roundtrip to a larger float and divide there. The extra details for
mod/rem are handled directly in integer space to simplify verification
of rounding details. The one issue is that the mantissa might be
rounded down which will cause issues; adding 1 unconditionally (proposed
by Jonathan Marek) fixes this. The lowerings here were tested
exhaustively on all pairs of 16-bit integers.
v2: Update idiv lowering per Rhys Perry's comment.
v3: Rewrite lowerings.
v4: Remove useless ftrunc, fix 8-bit issue, simplify code.
v5: Remove useless ffloor
Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Tested-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Tested-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8339>
-rw-r--r-- | src/compiler/nir/nir_lower_idiv.c | 45 |
1 files changed, 43 insertions, 2 deletions
diff --git a/src/compiler/nir/nir_lower_idiv.c b/src/compiler/nir/nir_lower_idiv.c index b30500190ea..c2f58df6b8c 100644 --- a/src/compiler/nir/nir_lower_idiv.c +++ b/src/compiler/nir/nir_lower_idiv.c @@ -199,6 +199,45 @@ convert_instr_precise(nir_builder *bld, nir_op op, } static nir_ssa_def * +convert_instr_small(nir_builder *b, nir_op op, + nir_ssa_def *numer, nir_ssa_def *denom) +{ + unsigned sz = numer->bit_size; + nir_alu_type int_type = nir_op_infos[op].output_type | sz; + nir_alu_type float_type = nir_type_float | (sz * 2); + + nir_ssa_def *p = nir_type_convert(b, numer, int_type, float_type); + nir_ssa_def *q = nir_type_convert(b, denom, int_type, float_type); + + /* Take 1/q but offset mantissa by 1 to correct for rounding. This is + * needed for correct results and has been checked exhaustively for + * all pairs of 16-bit integers */ + nir_ssa_def *rcp = nir_iadd_imm(b, nir_frcp(b, q), 1); + + /* Divide by multiplying by adjusted reciprocal */ + nir_ssa_def *res = nir_fmul(b, p, rcp); + + /* Convert back to integer space with rounding inferred by type */ + res = nir_type_convert(b, res, float_type, int_type); + + /* Get remainder given the quotient */ + if (op == nir_op_umod || op == nir_op_imod || op == nir_op_irem) + res = nir_isub(b, numer, nir_imul(b, denom, res)); + + /* Adjust for sign, see constant folding definition */ + if (op == nir_op_imod) { + nir_ssa_def *zero = nir_imm_zero(b, 1, sz); + nir_ssa_def *diff_sign = + nir_ine(b, nir_ige(b, numer, zero), nir_ige(b, denom, zero)); + + nir_ssa_def *adjust = nir_iand(b, diff_sign, nir_ine(b, res, zero)); + res = nir_iadd(b, res, nir_bcsel(b, adjust, denom, zero)); + } + + return res; +} + +static nir_ssa_def * lower_idiv(nir_builder *b, nir_instr *instr, void *_data) { enum nir_lower_idiv_path *path = _data; @@ -207,7 +246,9 @@ lower_idiv(nir_builder *b, nir_instr *instr, void *_data) nir_ssa_def *numer = nir_ssa_for_alu_src(b, alu, 0); nir_ssa_def *denom = nir_ssa_for_alu_src(b, alu, 1); - if (*path == nir_lower_idiv_precise) + if (numer->bit_size < 32) + return convert_instr_small(b, alu->op, numer, denom); + else if (*path == nir_lower_idiv_precise) return convert_instr_precise(b, alu->op, numer, denom); else return convert_instr(b, alu->op, numer, denom); @@ -221,7 +262,7 @@ inst_is_idiv(const nir_instr *instr, UNUSED const void *_state) nir_alu_instr *alu = nir_instr_as_alu(instr); - if (alu->dest.dest.ssa.bit_size != 32) + if (alu->dest.dest.ssa.bit_size > 32) return false; switch (alu->op) { |