summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTimur Kristóf <timur.kristof@gmail.com>2021-05-28 21:56:13 +0200
committerMarge Bot <eric+marge@anholt.net>2021-06-09 16:48:51 +0000
commitfd6605367d00762e6f63dd6fc85b504ee0c1667a (patch)
tree308c8f9fdab39900fb78ea70f899e861788ce2b8
parentc92dab8e2b6964b6dbd9ea122d7ff819efd45244 (diff)
aco: Implement nir_op_sad_u8x4.
Fix up the operand size for v_sad instructions, and implement the new NIR horizontal add. There is no viable way to do this in SALU, so let's always use a VGPR destination. Signed-off-by: Timur Kristóf <timur.kristof@gmail.com> Reviewed-by: Tony Wasserka <tony.wasserka@gmx.de> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11072>
-rw-r--r--src/amd/compiler/aco_instruction_selection.cpp5
-rw-r--r--src/amd/compiler/aco_instruction_selection_setup.cpp1
-rw-r--r--src/amd/compiler/aco_opcodes.py8
3 files changed, 12 insertions, 2 deletions
diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp
index b985b5aad06..40a1687c226 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -3031,6 +3031,11 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
}
break;
}
+ case nir_op_sad_u8x4: {
+ assert(dst.regClass() == v1);
+ emit_vop3a_instruction(ctx, instr, aco_opcode::v_sad_u8, dst, false, 3u, false);
+ break;
+ }
case nir_op_fquantize2f16: {
Temp src = get_alu_src(ctx, instr->src[0]);
Temp f16 = bld.vop1(aco_opcode::v_cvt_f16_f32, bld.def(v1), src);
diff --git a/src/amd/compiler/aco_instruction_selection_setup.cpp b/src/amd/compiler/aco_instruction_selection_setup.cpp
index 15f9ce33b84..f7cebe0579f 100644
--- a/src/amd/compiler/aco_instruction_selection_setup.cpp
+++ b/src/amd/compiler/aco_instruction_selection_setup.cpp
@@ -663,6 +663,7 @@ void init_context(isel_context *ctx, nir_shader *shader)
case nir_op_frexp_exp:
case nir_op_cube_face_index:
case nir_op_cube_face_coord:
+ case nir_op_sad_u8x4:
type = RegType::vgpr;
break;
case nir_op_f2i16:
diff --git a/src/amd/compiler/aco_opcodes.py b/src/amd/compiler/aco_opcodes.py
index 7a1099d6909..5267fb19b62 100644
--- a/src/amd/compiler/aco_opcodes.py
+++ b/src/amd/compiler/aco_opcodes.py
@@ -238,8 +238,10 @@ class Opcode(object):
self.definition_size = def_dtype_sizes.get(def_dtype, self.operand_size)
# exceptions for operands:
- if 'sad_' in name:
+ if 'qsad_' in name:
self.operand_size = 0
+ elif 'sad_' in name:
+ self.operand_size = 32
elif name in ['v_mad_u64_u32', 'v_mad_i64_i32']:
self.operand_size = 0
elif self.operand_size == 24:
@@ -251,8 +253,10 @@ class Opcode(object):
self.operand_size = 32
# exceptions for definitions:
- if 'sad_' in name:
+ if 'qsad_' in name:
self.definition_size = 0
+ elif 'sad_' in name:
+ self.definition_size = 32
elif '_pk' in name:
self.definition_size = 32