diff options
Diffstat (limited to 'src/amd/compiler/tests/test_to_hw_instr.cpp')
-rw-r--r-- | src/amd/compiler/tests/test_to_hw_instr.cpp | 113 |
1 files changed, 113 insertions, 0 deletions
diff --git a/src/amd/compiler/tests/test_to_hw_instr.cpp b/src/amd/compiler/tests/test_to_hw_instr.cpp index 3047a3c54d2..0fe8e168aee 100644 --- a/src/amd/compiler/tests/test_to_hw_instr.cpp +++ b/src/amd/compiler/tests/test_to_hw_instr.cpp @@ -384,3 +384,116 @@ BEGIN_TEST(to_hw_instr.swap_subdword) finish_to_hw_instr_test(); } END_TEST + +BEGIN_TEST(to_hw_instr.subdword_constant) + PhysReg v0_lo{256}; + PhysReg v0_hi{256}; + PhysReg v0_b1{256}; + PhysReg v1_hi{257}; + v0_hi.reg_b += 2; + v0_b1.reg_b += 1; + v1_hi.reg_b += 2; + + for (unsigned i = GFX9; i <= GFX10; i++) { + if (!setup_cs(NULL, (chip_class)i)) + continue; + + /* 16-bit pack */ + //>> p_unit_test 0 + //! v1: %_:v[0] = v_pack_b32_f16 0.5, hi(%_:v[1][16:32]) + bld.pseudo(aco_opcode::p_unit_test, Operand(0u)); + bld.pseudo(aco_opcode::p_parallelcopy, + Definition(v0_lo, v2b), Definition(v0_hi, v2b), + Operand((uint16_t)0x3800), Operand(v1_hi, v2b)); + + //! p_unit_test 1 + //~gfx9! v1: %_:v[0] = v_mov_b32 0x4205 + //~gfx9! v2b: %_:v[0][16:32] = v_mov_b32 %_:v[1][16:32] dst_preserve + //~gfx10! v1: %_:v[0] = v_pack_b32_f16 0x4205, hi(%_:v[1][16:32]) + bld.pseudo(aco_opcode::p_unit_test, Operand(1u)); + bld.pseudo(aco_opcode::p_parallelcopy, + Definition(v0_lo, v2b), Definition(v0_hi, v2b), + Operand((uint16_t)0x4205), Operand(v1_hi, v2b)); + + //TODO: optimize this with GFX10. do_pack_2x16() isn't used in this case + //! p_unit_test 2 + //~gfx9! v2b: %_:v[0][16:32] = v_mov_b32 %_:v[0][0:16] dst_preserve + //~gfx9! v1: %_:v[0] = v_and_b32 0xffff0000, %_:v[0] + //~gfx9! v1: %_:v[0] = v_or_b32 0x4205, %_:v[0] + //~gfx10! v2b: %_:v[0][16:32] = v_mov_b32 %_:v[0][0:16] dst_preserve + //~gfx10! v2b: %_:v[0][0:16] = v_pack_b32_f16 0x4205, hi(%_:v[0][16:32]) + bld.pseudo(aco_opcode::p_unit_test, Operand(2u)); + bld.pseudo(aco_opcode::p_parallelcopy, + Definition(v0_lo, v2b), Definition(v0_hi, v2b), + Operand((uint16_t)0x4205), Operand(v0_lo, v2b)); + + //! p_unit_test 3 + //! v1: %_:v[0] = v_mov_b32 0x3c003800 + bld.pseudo(aco_opcode::p_unit_test, Operand(3u)); + bld.pseudo(aco_opcode::p_parallelcopy, + Definition(v0_lo, v2b), Definition(v0_hi, v2b), + Operand((uint16_t)0x3800), Operand((uint16_t)0x3c00)); + + //! p_unit_test 4 + //! v1: %_:v[0] = v_mov_b32 0x43064205 + bld.pseudo(aco_opcode::p_unit_test, Operand(4u)); + bld.pseudo(aco_opcode::p_parallelcopy, + Definition(v0_lo, v2b), Definition(v0_hi, v2b), + Operand((uint16_t)0x4205), Operand((uint16_t)0x4306)); + + //! p_unit_test 5 + //! v1: %_:v[0] = v_mov_b32 0x38004205 + bld.pseudo(aco_opcode::p_unit_test, Operand(5u)); + bld.pseudo(aco_opcode::p_parallelcopy, + Definition(v0_lo, v2b), Definition(v0_hi, v2b), + Operand((uint16_t)0x4205), Operand((uint16_t)0x3800)); + + /* 16-bit copy */ + //! p_unit_test 6 + //! v2b: %_:v[0][0:16] = v_add_f16 0.5, 0 dst_preserve + bld.pseudo(aco_opcode::p_unit_test, Operand(6u)); + bld.pseudo(aco_opcode::p_parallelcopy, + Definition(v0_lo, v2b), Operand((uint16_t)0x3800)); + + //! p_unit_test 7 + //~gfx9! v1: %_:v[0] = v_and_b32 0xffff0000, %_:v[0] + //~gfx9! v1: %_:v[0] = v_or_b32 0x4205, %_:v[0] + //~gfx10! v2b: %_:v[0][0:16] = v_pack_b32_f16 0x4205, hi(%_:v[0][16:32]) + bld.pseudo(aco_opcode::p_unit_test, Operand(7u)); + bld.pseudo(aco_opcode::p_parallelcopy, + Definition(v0_lo, v2b), Operand((uint16_t)0x4205)); + + //! p_unit_test 8 + //~gfx9! v1: %_:v[0] = v_and_b32 0xffff, %_:v[0] + //~gfx9! v1: %_:v[0] = v_or_b32 0x42050000, %_:v[0] + //~gfx10! v2b: %_:v[0][16:32] = v_pack_b32_f16 %_:v[0][0:16], 0x4205 + bld.pseudo(aco_opcode::p_unit_test, Operand(8u)); + bld.pseudo(aco_opcode::p_parallelcopy, + Definition(v0_hi, v2b), Operand((uint16_t)0x4205)); + + //! p_unit_test 9 + //! v1b: %_:v[0][8:16] = v_mov_b32 0 dst_preserve + //! v1b: %_:v[0][16:24] = v_mov_b32 56 dst_preserve + bld.pseudo(aco_opcode::p_unit_test, Operand(9u)); + bld.pseudo(aco_opcode::p_parallelcopy, + Definition(v0_b1, v2b), Operand((uint16_t)0x3800)); + + //! p_unit_test 10 + //! v1b: %_:v[0][8:16] = v_mov_b32 5 dst_preserve + //! v1b: %_:v[0][16:24] = v_mul_u32_u24 2, 33 dst_preserve + bld.pseudo(aco_opcode::p_unit_test, Operand(10u)); + bld.pseudo(aco_opcode::p_parallelcopy, + Definition(v0_b1, v2b), Operand((uint16_t)0x4205)); + + /* 8-bit copy */ + //! p_unit_test 11 + //! v1b: %_:v[0][0:8] = v_mul_u32_u24 2, 33 dst_preserve + bld.pseudo(aco_opcode::p_unit_test, Operand(11u)); + bld.pseudo(aco_opcode::p_parallelcopy, + Definition(v0_lo, v1b), Operand((uint8_t)0x42)); + + //! s_endpgm + + finish_to_hw_instr_test(); + } +END_TEST |