diff options
author | Timur Kristóf <timur.kristof@gmail.com> | 2021-05-28 21:53:06 +0200 |
---|---|---|
committer | Marge Bot <eric+marge@anholt.net> | 2021-06-09 16:48:51 +0000 |
commit | c92dab8e2b6964b6dbd9ea122d7ff819efd45244 (patch) | |
tree | e8446ea9472a507d3377a146bc40f34e197e651f /src/compiler | |
parent | 228169c87c62caaa8770482d95b2b79a904075ff (diff) |
nir: Add nir_op_sad_u8x4 which corresponds to AMD's v_sad_u8.
NIR currently doesn't have any intrinsics for a horizontal packed add,
so this one is modeled after AMD's v_sad_u8.
Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Tony Wasserka <tony.wasserka@gmx.de>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11072>
Diffstat (limited to 'src/compiler')
-rw-r--r-- | src/compiler/nir/nir_opcodes.py | 18 | ||||
-rw-r--r-- | src/compiler/nir/nir_range_analysis.c | 3 |
2 files changed, 21 insertions, 0 deletions
diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py index eda8d11cb3d..b39c7b57498 100644 --- a/src/compiler/nir/nir_opcodes.py +++ b/src/compiler/nir/nir_opcodes.py @@ -1097,6 +1097,24 @@ if (bits == 0) { } """) +triop_horiz("sad_u8x4", 1, 1, 1, 1, """ +uint8_t s0_b0 = (src0.x & 0x000000ff) >> 0; +uint8_t s0_b1 = (src0.x & 0x0000ff00) >> 8; +uint8_t s0_b2 = (src0.x & 0x00ff0000) >> 16; +uint8_t s0_b3 = (src0.x & 0xff000000) >> 24; + +uint8_t s1_b0 = (src1.x & 0x000000ff) >> 0; +uint8_t s1_b1 = (src1.x & 0x0000ff00) >> 8; +uint8_t s1_b2 = (src1.x & 0x00ff0000) >> 16; +uint8_t s1_b3 = (src1.x & 0xff000000) >> 24; + +dst.x = src2.x + + (s0_b0 > s1_b0 ? (s0_b0 - s1_b0) : (s1_b0 - s0_b0)) + + (s0_b1 > s1_b1 ? (s0_b1 - s1_b1) : (s1_b1 - s0_b1)) + + (s0_b2 > s1_b2 ? (s0_b2 - s1_b2) : (s1_b2 - s0_b2)) + + (s0_b3 > s1_b3 ? (s0_b3 - s1_b3) : (s1_b3 - s0_b3)); +""") + # Combines the first component of each input to make a 3-component vector. triop_horiz("vec3", 3, 1, 1, 1, """ diff --git a/src/compiler/nir/nir_range_analysis.c b/src/compiler/nir/nir_range_analysis.c index 501084f14f5..e18d0446e3e 100644 --- a/src/compiler/nir/nir_range_analysis.c +++ b/src/compiler/nir/nir_range_analysis.c @@ -1579,6 +1579,9 @@ nir_unsigned_upper_bound(nir_shader *shader, struct hash_table *range_ht, case nir_op_u2u32: res = MIN2(src0, max); break; + case nir_op_sad_u8x4: + res = src2 + 4 * 255; + break; default: res = max; break; |