summaryrefslogtreecommitdiff
path: root/src/amd/compiler
diff options
context:
space:
mode:
authorRhys Perry <pendingchaos02@gmail.com>2021-05-31 18:18:24 +0100
committerMarge Bot <eric+marge@anholt.net>2021-06-09 12:06:50 +0000
commit4870d7d829e57a993976d6da497e1202b1df2fa6 (patch)
tree44587e9fd837f98f3cad2d827cacf29cd1c4226b /src/amd/compiler
parent2fb436e92a522d3c620597c7662f7d6a4146a12c (diff)
aco: use v1b/v2b for ds_read_u8/ds_read_u16
The p_extract_vector isn't necessary. For ds_read_u8 and ds_read_u16, we used a 32-bit regclass, but did't load 32 bits, and used dst_hint for vector loads when we shouldn't have. Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/4863 Cc: mesa-stable Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11113>
Diffstat (limited to 'src/amd/compiler')
-rw-r--r--src/amd/compiler/aco_instruction_selection.cpp5
-rw-r--r--src/amd/compiler/aco_opcodes.py5
2 files changed, 6 insertions, 4 deletions
diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp
index bc9c11f96f1..ed87d757e03 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -3708,7 +3708,7 @@ Temp lds_load_callback(Builder& bld, const LoadEmitInfo &info,
const_offset /= const_offset_unit;
- RegClass rc = RegClass(RegType::vgpr, DIV_ROUND_UP(size, 4));
+ RegClass rc = RegClass::get(RegType::vgpr, size);
Temp val = rc == info.dst.regClass() && dst_hint.id() ? dst_hint : bld.tmp(rc);
Instruction *instr;
if (read2)
@@ -3717,9 +3717,6 @@ Temp lds_load_callback(Builder& bld, const LoadEmitInfo &info,
instr = bld.ds(op, Definition(val), offset, m, const_offset);
instr->ds().sync = info.sync;
- if (size < 4)
- val = bld.pseudo(aco_opcode::p_extract_vector, bld.def(RegClass::get(RegType::vgpr, size)), val, Operand(0u));
-
return val;
}
diff --git a/src/amd/compiler/aco_opcodes.py b/src/amd/compiler/aco_opcodes.py
index c399e1d86bb..a4f2688fe89 100644
--- a/src/amd/compiler/aco_opcodes.py
+++ b/src/amd/compiler/aco_opcodes.py
@@ -1696,3 +1696,8 @@ for ver in ['gfx9', 'gfx10']:
sys.exit(1)
else:
op_to_name[key] = op.name
+
+# These instructions write the entire 32-bit VGPR, but it's not clear in Opcode's constructor that
+# it should be 32, since it works accidentally.
+assert(opcodes['ds_read_u8'].definition_size == 32)
+assert(opcodes['ds_read_u16'].definition_size == 32)