summaryrefslogtreecommitdiff
path: root/src/compiler
diff options
context:
space:
mode:
authorTimur Kristóf <timur.kristof@gmail.com>2021-05-28 21:53:06 +0200
committerMarge Bot <eric+marge@anholt.net>2021-06-09 16:48:51 +0000
commitc92dab8e2b6964b6dbd9ea122d7ff819efd45244 (patch)
treee8446ea9472a507d3377a146bc40f34e197e651f /src/compiler
parent228169c87c62caaa8770482d95b2b79a904075ff (diff)
nir: Add nir_op_sad_u8x4 which corresponds to AMD's v_sad_u8.
NIR currently doesn't have any intrinsics for a horizontal packed add, so this one is modeled after AMD's v_sad_u8. Signed-off-by: Timur Kristóf <timur.kristof@gmail.com> Reviewed-by: Tony Wasserka <tony.wasserka@gmx.de> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11072>
Diffstat (limited to 'src/compiler')
-rw-r--r--src/compiler/nir/nir_opcodes.py18
-rw-r--r--src/compiler/nir/nir_range_analysis.c3
2 files changed, 21 insertions, 0 deletions
diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py
index eda8d11cb3d..b39c7b57498 100644
--- a/src/compiler/nir/nir_opcodes.py
+++ b/src/compiler/nir/nir_opcodes.py
@@ -1097,6 +1097,24 @@ if (bits == 0) {
}
""")
+triop_horiz("sad_u8x4", 1, 1, 1, 1, """
+uint8_t s0_b0 = (src0.x & 0x000000ff) >> 0;
+uint8_t s0_b1 = (src0.x & 0x0000ff00) >> 8;
+uint8_t s0_b2 = (src0.x & 0x00ff0000) >> 16;
+uint8_t s0_b3 = (src0.x & 0xff000000) >> 24;
+
+uint8_t s1_b0 = (src1.x & 0x000000ff) >> 0;
+uint8_t s1_b1 = (src1.x & 0x0000ff00) >> 8;
+uint8_t s1_b2 = (src1.x & 0x00ff0000) >> 16;
+uint8_t s1_b3 = (src1.x & 0xff000000) >> 24;
+
+dst.x = src2.x +
+ (s0_b0 > s1_b0 ? (s0_b0 - s1_b0) : (s1_b0 - s0_b0)) +
+ (s0_b1 > s1_b1 ? (s0_b1 - s1_b1) : (s1_b1 - s0_b1)) +
+ (s0_b2 > s1_b2 ? (s0_b2 - s1_b2) : (s1_b2 - s0_b2)) +
+ (s0_b3 > s1_b3 ? (s0_b3 - s1_b3) : (s1_b3 - s0_b3));
+""")
+
# Combines the first component of each input to make a 3-component vector.
triop_horiz("vec3", 3, 1, 1, 1, """
diff --git a/src/compiler/nir/nir_range_analysis.c b/src/compiler/nir/nir_range_analysis.c
index 501084f14f5..e18d0446e3e 100644
--- a/src/compiler/nir/nir_range_analysis.c
+++ b/src/compiler/nir/nir_range_analysis.c
@@ -1579,6 +1579,9 @@ nir_unsigned_upper_bound(nir_shader *shader, struct hash_table *range_ht,
case nir_op_u2u32:
res = MIN2(src0, max);
break;
+ case nir_op_sad_u8x4:
+ res = src2 + 4 * 255;
+ break;
default:
res = max;
break;