summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRhys Perry <pendingchaos02@gmail.com>2020-08-24 20:00:10 +0100
committerMarge Bot <eric+marge@anholt.net>2020-09-04 13:03:50 +0000
commit8faf85f68770ee3e060bb74c87e857070f336a02 (patch)
tree4216edbc971393e9b93da009315375da1b01bd2b
parent663c4d53771a1b00ed49acb08768fd0c01cb9b8e (diff)
aco: fix byte_align_scalar for 3 dword vectors
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Fixes: fe08f0ccf94a7315bded5868b4f6a8bae744de79 ('aco: add byte_align_scalar() & trim_subdword_vector() helper functions') Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4710>
-rw-r--r--src/amd/compiler/aco_instruction_selection.cpp15
1 files changed, 11 insertions, 4 deletions
diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp
index 9b566442f51..f7205cd7ef3 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -447,10 +447,17 @@ void byte_align_scalar(isel_context *ctx, Temp vec, Operand offset, Temp dst)
emit_split_vector(ctx, dst, 2);
else
emit_extract_vector(ctx, tmp, 0, dst);
- } else if (vec.size() == 4) {
- Temp lo = bld.tmp(s2), hi = bld.tmp(s2);
- bld.pseudo(aco_opcode::p_split_vector, Definition(lo), Definition(hi), vec);
- hi = bld.pseudo(aco_opcode::p_extract_vector, bld.def(s1), hi, Operand(0u));
+ } else if (vec.size() == 3 || vec.size() == 4) {
+ Temp lo = bld.tmp(s2), hi;
+ if (vec.size() == 3) {
+ /* this can happen if we use VMEM for a uniform load */
+ hi = bld.tmp(s1);
+ bld.pseudo(aco_opcode::p_split_vector, Definition(lo), Definition(hi), vec);
+ } else {
+ hi = bld.tmp(s2);
+ bld.pseudo(aco_opcode::p_split_vector, Definition(lo), Definition(hi), vec);
+ hi = bld.pseudo(aco_opcode::p_extract_vector, bld.def(s1), hi, Operand(0u));
+ }
if (select != Temp())
hi = bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1), hi, Operand(0u), bld.scc(select));
lo = bld.sop2(aco_opcode::s_lshr_b64, bld.def(s2), bld.def(s1, scc), lo, shift);