summaryrefslogtreecommitdiff
path: root/src/compiler
diff options
context:
space:
mode:
authorJason Ekstrand <jason@jlekstrand.net>2021-06-18 09:28:59 -0500
committerJason Ekstrand <jason@jlekstrand.net>2021-06-21 09:04:08 -0500
commit2e08bae9b38a241b505e882d681f6f68d346937e (patch)
tree3bafb4090ed0b356181abffcc216a19747c9a80f /src/compiler
parent0afbfee8da30d465f141d214ad6a307ed54b3b03 (diff)
nir,vc4: Suffix a bunch of unorm 4x8 opcodes _vc4
Reviewed-by: Alyssa Rosenzweig <alyssa@collabora.com> Reviewed-by: Iago Toral Quiroga <itoral@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11463>
Diffstat (limited to 'src/compiler')
-rw-r--r--src/compiler/nir/nir_opcodes.py92
-rw-r--r--src/compiler/nir/nir_opt_algebraic.py12
2 files changed, 53 insertions, 51 deletions
diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py
index efd7ecce212..4e48567340c 100644
--- a/src/compiler/nir/nir_opcodes.py
+++ b/src/compiler/nir/nir_opcodes.py
@@ -883,51 +883,6 @@ binop("fmax", tfloat, _2src_commutative + associative, "fmax(src0, src1)")
binop("imax", tint, _2src_commutative + associative, "src1 > src0 ? src1 : src0")
binop("umax", tuint, _2src_commutative + associative, "src1 > src0 ? src1 : src0")
-# Saturated vector add for 4 8bit ints.
-binop("usadd_4x8", tint32, _2src_commutative + associative, """
-dst = 0;
-for (int i = 0; i < 32; i += 8) {
- dst |= MIN2(((src0 >> i) & 0xff) + ((src1 >> i) & 0xff), 0xff) << i;
-}
-""")
-
-# Saturated vector subtract for 4 8bit ints.
-binop("ussub_4x8", tint32, "", """
-dst = 0;
-for (int i = 0; i < 32; i += 8) {
- int src0_chan = (src0 >> i) & 0xff;
- int src1_chan = (src1 >> i) & 0xff;
- if (src0_chan > src1_chan)
- dst |= (src0_chan - src1_chan) << i;
-}
-""")
-
-# vector min for 4 8bit ints.
-binop("umin_4x8", tint32, _2src_commutative + associative, """
-dst = 0;
-for (int i = 0; i < 32; i += 8) {
- dst |= MIN2((src0 >> i) & 0xff, (src1 >> i) & 0xff) << i;
-}
-""")
-
-# vector max for 4 8bit ints.
-binop("umax_4x8", tint32, _2src_commutative + associative, """
-dst = 0;
-for (int i = 0; i < 32; i += 8) {
- dst |= MAX2((src0 >> i) & 0xff, (src1 >> i) & 0xff) << i;
-}
-""")
-
-# unorm multiply: (a * b) / 255.
-binop("umul_unorm_4x8", tint32, _2src_commutative + associative, """
-dst = 0;
-for (int i = 0; i < 32; i += 8) {
- int src0_chan = (src0 >> i) & 0xff;
- int src1_chan = (src1 >> i) & 0xff;
- dst |= ((src0_chan * src1_chan) / 255) << i;
-}
-""")
-
binop("fpow", tfloat, "", "bit_size == 64 ? powf(src0, src1) : pow(src0, src1)")
binop_horiz("pack_half_2x16_split", 1, tuint32, 1, tfloat32, 1, tfloat32,
@@ -1286,6 +1241,53 @@ binop("umul24_relaxed", tuint32, _2src_commutative + associative, "src0 * src1")
unop_convert("fisnormal", tbool1, tfloat, "isnormal(src0)")
unop_convert("fisfinite", tbool1, tfloat, "isfinite(src0)")
+# vc4-specific opcodes
+
+# Saturated vector add for 4 8bit ints.
+binop("usadd_4x8_vc4", tint32, _2src_commutative + associative, """
+dst = 0;
+for (int i = 0; i < 32; i += 8) {
+ dst |= MIN2(((src0 >> i) & 0xff) + ((src1 >> i) & 0xff), 0xff) << i;
+}
+""")
+
+# Saturated vector subtract for 4 8bit ints.
+binop("ussub_4x8_vc4", tint32, "", """
+dst = 0;
+for (int i = 0; i < 32; i += 8) {
+ int src0_chan = (src0 >> i) & 0xff;
+ int src1_chan = (src1 >> i) & 0xff;
+ if (src0_chan > src1_chan)
+ dst |= (src0_chan - src1_chan) << i;
+}
+""")
+
+# vector min for 4 8bit ints.
+binop("umin_4x8_vc4", tint32, _2src_commutative + associative, """
+dst = 0;
+for (int i = 0; i < 32; i += 8) {
+ dst |= MIN2((src0 >> i) & 0xff, (src1 >> i) & 0xff) << i;
+}
+""")
+
+# vector max for 4 8bit ints.
+binop("umax_4x8_vc4", tint32, _2src_commutative + associative, """
+dst = 0;
+for (int i = 0; i < 32; i += 8) {
+ dst |= MAX2((src0 >> i) & 0xff, (src1 >> i) & 0xff) << i;
+}
+""")
+
+# unorm multiply: (a * b) / 255.
+binop("umul_unorm_4x8_vc4", tint32, _2src_commutative + associative, """
+dst = 0;
+for (int i = 0; i < 32; i += 8) {
+ int src0_chan = (src0 >> i) & 0xff;
+ int src1_chan = (src1 >> i) & 0xff;
+ dst |= ((src0_chan * src1_chan) / 255) << i;
+}
+""")
+
# Mali-specific opcodes
unop("fsat_signed_mali", tfloat, ("fmin(fmax(src0, -1.0), 1.0)"))
unop("fclamp_pos_mali", tfloat, ("fmax(src0, 0.0)"))
diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py
index 8dc20390491..eef8027c7f6 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -133,8 +133,8 @@ optimizations = [
(('fadd(is_only_used_as_float)', 'a@16', 0.0), a, '!'+signed_zero_inf_nan_preserve_16),
(('fadd(is_only_used_as_float)', 'a@32', 0.0), a, '!'+signed_zero_inf_nan_preserve_32),
(('iadd', a, 0), a),
- (('usadd_4x8', a, 0), a),
- (('usadd_4x8', a, ~0), ~0),
+ (('usadd_4x8_vc4', a, 0), a),
+ (('usadd_4x8_vc4', a, ~0), ~0),
(('~fadd', ('fmul', a, b), ('fmul', a, c)), ('fmul', a, ('fadd', b, c))),
(('iadd', ('imul', a, b), ('imul', a, c)), ('imul', a, ('iadd', b, c))),
(('iand', ('ior', a, b), ('ior', a, c)), ('ior', a, ('iand', b, c))),
@@ -151,8 +151,8 @@ optimizations = [
(('fmul', 'a@16', 0.0), 0.0, '!'+signed_zero_inf_nan_preserve_16),
(('fmul', 'a@32', 0.0), 0.0, '!'+signed_zero_inf_nan_preserve_32),
(('imul', a, 0), 0),
- (('umul_unorm_4x8', a, 0), 0),
- (('umul_unorm_4x8', a, ~0), a),
+ (('umul_unorm_4x8_vc4', a, 0), 0),
+ (('umul_unorm_4x8_vc4', a, ~0), a),
(('~fmul', a, 1.0), a),
# The only effect a*1.0 can have is flushing denormals. If it's only used by
# a floating point instruction, they should flush any input denormals and
@@ -1333,8 +1333,8 @@ for op in ('extract_u8', 'extract_i8'):
optimizations.extend([
# Subtracts
- (('ussub_4x8', a, 0), a),
- (('ussub_4x8', a, ~0), 0),
+ (('ussub_4x8_vc4', a, 0), a),
+ (('ussub_4x8_vc4', a, ~0), 0),
# Lower all Subtractions first - they can get recombined later
(('fsub', a, b), ('fadd', a, ('fneg', b))),
(('isub', a, b), ('iadd', a, ('ineg', b))),