summaryrefslogtreecommitdiff
path: root/src/amd/compiler/tests/test_to_hw_instr.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/amd/compiler/tests/test_to_hw_instr.cpp')
-rw-r--r--src/amd/compiler/tests/test_to_hw_instr.cpp113
1 files changed, 113 insertions, 0 deletions
diff --git a/src/amd/compiler/tests/test_to_hw_instr.cpp b/src/amd/compiler/tests/test_to_hw_instr.cpp
index 3047a3c54d2..0fe8e168aee 100644
--- a/src/amd/compiler/tests/test_to_hw_instr.cpp
+++ b/src/amd/compiler/tests/test_to_hw_instr.cpp
@@ -384,3 +384,116 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
finish_to_hw_instr_test();
}
END_TEST
+
+BEGIN_TEST(to_hw_instr.subdword_constant)
+ PhysReg v0_lo{256};
+ PhysReg v0_hi{256};
+ PhysReg v0_b1{256};
+ PhysReg v1_hi{257};
+ v0_hi.reg_b += 2;
+ v0_b1.reg_b += 1;
+ v1_hi.reg_b += 2;
+
+ for (unsigned i = GFX9; i <= GFX10; i++) {
+ if (!setup_cs(NULL, (chip_class)i))
+ continue;
+
+ /* 16-bit pack */
+ //>> p_unit_test 0
+ //! v1: %_:v[0] = v_pack_b32_f16 0.5, hi(%_:v[1][16:32])
+ bld.pseudo(aco_opcode::p_unit_test, Operand(0u));
+ bld.pseudo(aco_opcode::p_parallelcopy,
+ Definition(v0_lo, v2b), Definition(v0_hi, v2b),
+ Operand((uint16_t)0x3800), Operand(v1_hi, v2b));
+
+ //! p_unit_test 1
+ //~gfx9! v1: %_:v[0] = v_mov_b32 0x4205
+ //~gfx9! v2b: %_:v[0][16:32] = v_mov_b32 %_:v[1][16:32] dst_preserve
+ //~gfx10! v1: %_:v[0] = v_pack_b32_f16 0x4205, hi(%_:v[1][16:32])
+ bld.pseudo(aco_opcode::p_unit_test, Operand(1u));
+ bld.pseudo(aco_opcode::p_parallelcopy,
+ Definition(v0_lo, v2b), Definition(v0_hi, v2b),
+ Operand((uint16_t)0x4205), Operand(v1_hi, v2b));
+
+ //TODO: optimize this with GFX10. do_pack_2x16() isn't used in this case
+ //! p_unit_test 2
+ //~gfx9! v2b: %_:v[0][16:32] = v_mov_b32 %_:v[0][0:16] dst_preserve
+ //~gfx9! v1: %_:v[0] = v_and_b32 0xffff0000, %_:v[0]
+ //~gfx9! v1: %_:v[0] = v_or_b32 0x4205, %_:v[0]
+ //~gfx10! v2b: %_:v[0][16:32] = v_mov_b32 %_:v[0][0:16] dst_preserve
+ //~gfx10! v2b: %_:v[0][0:16] = v_pack_b32_f16 0x4205, hi(%_:v[0][16:32])
+ bld.pseudo(aco_opcode::p_unit_test, Operand(2u));
+ bld.pseudo(aco_opcode::p_parallelcopy,
+ Definition(v0_lo, v2b), Definition(v0_hi, v2b),
+ Operand((uint16_t)0x4205), Operand(v0_lo, v2b));
+
+ //! p_unit_test 3
+ //! v1: %_:v[0] = v_mov_b32 0x3c003800
+ bld.pseudo(aco_opcode::p_unit_test, Operand(3u));
+ bld.pseudo(aco_opcode::p_parallelcopy,
+ Definition(v0_lo, v2b), Definition(v0_hi, v2b),
+ Operand((uint16_t)0x3800), Operand((uint16_t)0x3c00));
+
+ //! p_unit_test 4
+ //! v1: %_:v[0] = v_mov_b32 0x43064205
+ bld.pseudo(aco_opcode::p_unit_test, Operand(4u));
+ bld.pseudo(aco_opcode::p_parallelcopy,
+ Definition(v0_lo, v2b), Definition(v0_hi, v2b),
+ Operand((uint16_t)0x4205), Operand((uint16_t)0x4306));
+
+ //! p_unit_test 5
+ //! v1: %_:v[0] = v_mov_b32 0x38004205
+ bld.pseudo(aco_opcode::p_unit_test, Operand(5u));
+ bld.pseudo(aco_opcode::p_parallelcopy,
+ Definition(v0_lo, v2b), Definition(v0_hi, v2b),
+ Operand((uint16_t)0x4205), Operand((uint16_t)0x3800));
+
+ /* 16-bit copy */
+ //! p_unit_test 6
+ //! v2b: %_:v[0][0:16] = v_add_f16 0.5, 0 dst_preserve
+ bld.pseudo(aco_opcode::p_unit_test, Operand(6u));
+ bld.pseudo(aco_opcode::p_parallelcopy,
+ Definition(v0_lo, v2b), Operand((uint16_t)0x3800));
+
+ //! p_unit_test 7
+ //~gfx9! v1: %_:v[0] = v_and_b32 0xffff0000, %_:v[0]
+ //~gfx9! v1: %_:v[0] = v_or_b32 0x4205, %_:v[0]
+ //~gfx10! v2b: %_:v[0][0:16] = v_pack_b32_f16 0x4205, hi(%_:v[0][16:32])
+ bld.pseudo(aco_opcode::p_unit_test, Operand(7u));
+ bld.pseudo(aco_opcode::p_parallelcopy,
+ Definition(v0_lo, v2b), Operand((uint16_t)0x4205));
+
+ //! p_unit_test 8
+ //~gfx9! v1: %_:v[0] = v_and_b32 0xffff, %_:v[0]
+ //~gfx9! v1: %_:v[0] = v_or_b32 0x42050000, %_:v[0]
+ //~gfx10! v2b: %_:v[0][16:32] = v_pack_b32_f16 %_:v[0][0:16], 0x4205
+ bld.pseudo(aco_opcode::p_unit_test, Operand(8u));
+ bld.pseudo(aco_opcode::p_parallelcopy,
+ Definition(v0_hi, v2b), Operand((uint16_t)0x4205));
+
+ //! p_unit_test 9
+ //! v1b: %_:v[0][8:16] = v_mov_b32 0 dst_preserve
+ //! v1b: %_:v[0][16:24] = v_mov_b32 56 dst_preserve
+ bld.pseudo(aco_opcode::p_unit_test, Operand(9u));
+ bld.pseudo(aco_opcode::p_parallelcopy,
+ Definition(v0_b1, v2b), Operand((uint16_t)0x3800));
+
+ //! p_unit_test 10
+ //! v1b: %_:v[0][8:16] = v_mov_b32 5 dst_preserve
+ //! v1b: %_:v[0][16:24] = v_mul_u32_u24 2, 33 dst_preserve
+ bld.pseudo(aco_opcode::p_unit_test, Operand(10u));
+ bld.pseudo(aco_opcode::p_parallelcopy,
+ Definition(v0_b1, v2b), Operand((uint16_t)0x4205));
+
+ /* 8-bit copy */
+ //! p_unit_test 11
+ //! v1b: %_:v[0][0:8] = v_mul_u32_u24 2, 33 dst_preserve
+ bld.pseudo(aco_opcode::p_unit_test, Operand(11u));
+ bld.pseudo(aco_opcode::p_parallelcopy,
+ Definition(v0_lo, v1b), Operand((uint8_t)0x42));
+
+ //! s_endpgm
+
+ finish_to_hw_instr_test();
+ }
+END_TEST