diff options
author | Tom Stellard <thomas.stellard@amd.com> | 2014-12-18 21:39:30 -0500 |
---|---|---|
committer | Tom Stellard <thomas.stellard@amd.com> | 2014-12-30 11:14:15 -0500 |
commit | aed643bbba2fd8d76dd78474e1bf067a7a00f053 (patch) | |
tree | 117b31cfc226d1539bb69681c8321ba2c6b1ad60 | |
parent | 3105462cb9dd309af2b3119795ab79dff7cebafb (diff) |
R600/SI: Add assembler support for VOP2 instructions
32-bit and 64-bit encodings are supported for integer ops, and 32-bit
encodings are supported for floating-point ops. Support for instruction
modifiers is requires for full floating-point support.\
-rw-r--r-- | docs/R600Usage.rst | 20 | ||||
-rw-r--r-- | lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp | 179 | ||||
-rw-r--r-- | lib/Target/R600/SIInstrFormats.td | 6 | ||||
-rw-r--r-- | lib/Target/R600/SIInstrInfo.td | 1 | ||||
-rw-r--r-- | lib/Target/R600/SIInstructions.td | 35 | ||||
-rw-r--r-- | lib/Target/R600/SIRegisterInfo.td | 4 | ||||
-rw-r--r-- | test/MC/R600/vop2-err.s | 35 | ||||
-rw-r--r-- | test/MC/R600/vop2.s | 241 |
8 files changed, 497 insertions, 24 deletions
diff --git a/docs/R600Usage.rst b/docs/R600Usage.rst index 4e954398e2b..6b50736df9f 100644 --- a/docs/R600Usage.rst +++ b/docs/R600Usage.rst @@ -53,3 +53,23 @@ wait for. // Wait for vmcnt counter to be 1. s_waitcnt vmcnt(1) +VOP2 Instructions +----------------- + +All 32-bit encodings of VOP2 instructions are supported. 64-bit encodings of +integer operations only are also supported. + +The assembler will automatically detect which encoding size to use for +VOP2 instructions based on the operands. If you want to force a specific +encoding size, you can add an _e32 (for 32-bit encoding) or _e64 (for 64-bit +encoding) suffix to the instruction. Most, but not all instructions support +an explicit suffix. These are all valid assembly +strings: + +.. code-block:: nasm + + v_mul_i32_i24 v1, v2, v3 + v_mul_i32_i24_e32 v1, v2, v3 + v_mul_i32_i24_e64 v1, v2, v3 + + diff --git a/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp b/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp index 83494d995c5..5c2679e38c5 100644 --- a/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp +++ b/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "llvm/ADT/APFloat.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" @@ -74,17 +75,29 @@ class AMDGPUOperand : public MCParsedAsmOperand { Register } Kind; + SMLoc StartLoc, EndLoc; public: AMDGPUOperand(enum KindTy K) : MCParsedAsmOperand(), Kind(K) {} + enum TokenTy { + Default, + E32, + E64 + }; + struct TokOp { const char *Data; unsigned Length; + char *DataE32; + char *DataE64; + unsigned ExtendedLength; + enum TokenTy Type; }; struct ImmOp { + bool IsFPImm; int64_t Val; }; @@ -99,8 +112,18 @@ public: RegOp Reg; }; + ~AMDGPUOperand() { + if(isToken()) { + std::free(Tok.DataE32); + std::free(Tok.DataE64); + } + } + void addImmOperands(MCInst &Inst, unsigned N) const { - Inst.addOperand(MCOperand::CreateImm(getImm())); + if (Imm.IsFPImm) + Inst.addOperand(MCOperand::CreateFPImm(getImm())); + else + Inst.addOperand(MCOperand::CreateImm(getImm())); } void addRegOperands(MCInst &Inst, unsigned N) const { @@ -114,27 +137,67 @@ public: addImmOperands(Inst, N); } + void addRegAndInputMods(MCInst Inst, unsigned N) const { + //Inst.addOperand(MCOperand::CreateInputMod(getInputMod())); + addRegOperands(Inst, N); } StringRef getToken() const { - return StringRef(Tok.Data, Tok.Length); + switch (Tok.Type) { + case E32: return StringRef(Tok.DataE32, Tok.ExtendedLength); + case E64: return StringRef(Tok.DataE64, Tok.ExtendedLength); + default: return StringRef(Tok.Data, Tok.Length); + + } + } + + bool defaultTokenHasSuffix() const { + StringRef Token(Tok.Data, Tok.Length); + + return Token.endswith("_e32") || Token.endswith("_e64"); } bool isToken() const override { return Kind == Token; } - bool isImm() const override { - return Kind == Immediate; + enum TokenTy getTokenType() { + assert(isToken()); + return Tok.Type; } - bool is64BitInlineImm() const { - return isImm() && Imm.Val <= -1 && Imm.Val >= -16; + void setTokenType(enum TokenTy Type) { + assert(isToken()); + Tok.Type = Type; + switch(Type) { + default: break; + case E32: + if (!Tok.DataE32) { + std::string E32 = std::string(Tok.Data, Tok.Length) + "_e32"; + Tok.DataE32 = (char*)std::malloc(Tok.ExtendedLength); + std::memcpy(Tok.DataE32, E32.data(), Tok.ExtendedLength); + } + break; + case E64: + if (!Tok.DataE64) { + std::string E64 = std::string(Tok.Data, Tok.Length) + "_e64"; + Tok.DataE64 = (char*)std::malloc(Tok.ExtendedLength); + std::memcpy(Tok.DataE64, E64.data(), Tok.ExtendedLength); + } + break; + } } - bool isImm32Bit() const { - return isImm() && isUInt<32>(Imm.Val); + bool isImm() const override { + return Kind == Immediate; + } + + bool isInlineImm() const { + float F = APInt(32, Imm.Val).bitsToFloat(); + return isImm() && ((Imm.Val <= 64 && Imm.Val >= -16) || + (F == 0.0 || F == 0.5 || F == -0.5 || F == 1.0 || F == -1.0 || + F == 2.0 || F == -2.0 || F == 4.0 || F == -4.0)); } int64_t getImm() const { @@ -158,14 +221,30 @@ public: } bool isSSrc32() const { - return isImm32Bit() || (isReg() && isRegClass(AMDGPU::SReg_32RegClassID)); + return isImm() || (isReg() && isRegClass(AMDGPU::SReg_32RegClassID)); } bool isSSrc64() const { - return isImm32Bit() || is64BitInlineImm() || + return isImm() || isInlineImm() || (isReg() && isRegClass(AMDGPU::SReg_64RegClassID)); } + bool isVCSrc32() const { + return isInlineImm() || (isReg() && isRegClass(AMDGPU::VS_32RegClassID)); + } + + bool isVCSrc64() const { + return isInlineImm() || (isReg() && isRegClass(AMDGPU::VS_64RegClassID)); + } + + bool isVSrc32() const { + return isImm() || (isReg() && isRegClass(AMDGPU::VS_32RegClassID)); + } + + bool isVSrc64() const { + return isImm() || (isReg() && isRegClass(AMDGPU::VS_64RegClassID)); + } + bool isMem() const override { return false; } @@ -180,18 +259,26 @@ public: void print(raw_ostream &OS) const override { } - static std::unique_ptr<AMDGPUOperand> CreateImm(int64_t Val, SMLoc Loc) { + static std::unique_ptr<AMDGPUOperand> CreateImm(int64_t Val, SMLoc Loc, + bool IsFPImm = false) { auto Op = llvm::make_unique<AMDGPUOperand>(Immediate); Op->Imm.Val = Val; + Op->Imm.IsFPImm = IsFPImm; Op->StartLoc = Loc; Op->EndLoc = Loc; return Op; } - static std::unique_ptr<AMDGPUOperand> CreateToken(StringRef Str, SMLoc Loc) { + static std::unique_ptr<AMDGPUOperand> CreateToken(StringRef Str, SMLoc Loc, + bool HasExplicitEncodingSize = true) { auto Res = llvm::make_unique<AMDGPUOperand>(Token); Res->Tok.Data = Str.data(); Res->Tok.Length = Str.size(); + Res->Tok.Type = Default; + // Size of the Token plus 4 bytes for _e32 or _e64. + Res->Tok.ExtendedLength = Str.size() + 4; + Res->Tok.DataE32 = nullptr; + Res->Tok.DataE64 = nullptr; Res->StartLoc = Loc; Res->EndLoc = Loc; return Res; @@ -337,10 +424,44 @@ bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, Out.EmitInstruction(Inst, STI); return false; case Match_MissingFeature: - return Error(IDLoc, "instruction use requires an option to be enabled"); - case Match_MnemonicFail: + + // Fall-through + // FIXME better error message for Match_MissingFeature + case Match_MnemonicFail: { + // For VOP1, VOP2, and VOC instructions, the mnemonic defined in + // TableGen has either an _e32 or _e64 suffix, but the assembler + // should also accept mnemonics without a suffix. + AMDGPUOperand &Mnemonic = ((AMDGPUOperand&)*Operands[0]); + if (!Mnemonic.defaultTokenHasSuffix() && + Mnemonic.getTokenType() == AMDGPUOperand::Default) { + Mnemonic.setTokenType(AMDGPUOperand::E32); + if (!MatchAndEmitInstruction(IDLoc, Opcode, Operands, Out, + ErrorInfo, MatchingInlineAsm)) + return false; + return true; + } return Error(IDLoc, "unrecognized instruction mnemonic"); + } case Match_InvalidOperand: { + AMDGPUOperand &Mnemonic = ((AMDGPUOperand&)*Operands[0]); + if (!Mnemonic.defaultTokenHasSuffix() && + Mnemonic.getTokenType() != AMDGPUOperand::E64) { + + // We have failed to match this instruction so try with an _e32 or + // _e64 suffix. + if (Mnemonic.getTokenType() == AMDGPUOperand::Default) + Mnemonic.setTokenType(AMDGPUOperand::E32); + else { + assert(Mnemonic.getTokenType() == AMDGPUOperand::E32); + Mnemonic.setTokenType(AMDGPUOperand::E64); + } + // We failed to match this as a 32-bit instruction, try it as a + // 64-bit instruction. + if (!MatchAndEmitInstruction(IDLoc, Opcode, Operands, Out, + ErrorInfo, MatchingInlineAsm)) + return false; + } + SMLoc ErrorLoc = IDLoc; if (ErrorInfo != ~0ULL) { if (ErrorInfo >= Operands.size()) @@ -371,15 +492,45 @@ AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) { if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail) return ResTy; + bool Negate = false; + if (getLexer().getKind()== AsmToken::Minus) { + Parser.Lex(); + Negate = true; + } + switch(getLexer().getKind()) { case AsmToken::Integer: { SMLoc S = Parser.getTok().getLoc(); int64_t IntVal; if (getParser().parseAbsoluteExpression(IntVal)) return MatchOperand_ParseFail; + APInt IntVal32(32, IntVal); + if (IntVal32.getSExtValue() != IntVal) { + Error(S, "invalid immediate: only 32-bit values are legal"); + return MatchOperand_ParseFail; + } + + IntVal = IntVal32.getSExtValue(); + if (Negate) + IntVal *= -1; Operands.push_back(AMDGPUOperand::CreateImm(IntVal, S)); return MatchOperand_Success; } + case AsmToken::Real: { + // FIXME: We should emit an error if a double precisions floating-point + // value is used. I'm not sure the best way to detect this. + SMLoc S = Parser.getTok().getLoc(); + int64_t IntVal; + if (getParser().parseAbsoluteExpression(IntVal)) + return MatchOperand_ParseFail; + + APFloat F((float)APInt(64, IntVal).bitsToDouble()); + if (Negate) + F.changeSign(); + Operands.push_back( + AMDGPUOperand::CreateImm(F.bitcastToAPInt().getZExtValue(), S)); + return MatchOperand_Success; + } case AsmToken::Identifier: { SMLoc S, E; unsigned RegNo; diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td index 2f61cc18d7e..efce29784d6 100644 --- a/lib/Target/R600/SIInstrFormats.td +++ b/lib/Target/R600/SIInstrFormats.td @@ -136,6 +136,8 @@ class VOP3Common <dag outs, dag ins, string asm, list<dag> pattern> : let VOP3 = 1; let VALU = 1; + let isCodeGenOnly = 0; + int Size = 8; } @@ -575,7 +577,9 @@ class VOP1 <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> : VOP1e<op>; class VOP2 <bits<6> op, dag outs, dag ins, string asm, list<dag> pattern> : - VOP2Common <outs, ins, asm, pattern>, VOP2e<op>; + VOP2Common <outs, ins, asm, pattern>, VOP2e<op> { + let isCodeGenOnly = 0; +} class VOP3b <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> : VOP3Common <outs, ins, asm, pattern>, VOP3be<op>; diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index bb83cf02e4d..0976c72429f 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -836,6 +836,7 @@ class VOP3_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> : VOP <opName>, SIMCInstr<opName#"_e64", SISubtarget.NONE> { let isPseudo = 1; + let isCodeGenOnly = 1; } class VOP3_Real_si <bits<9> op, dag outs, dag ins, string asm, string opName> : diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 8029686cffb..5c0738d3bf8 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -35,8 +35,8 @@ def isSICI : Predicate< def isCI : Predicate<"Subtarget.getGeneration() " ">= AMDGPUSubtarget::SEA_ISLANDS">; def isVI : Predicate < - "Subtarget.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS" ->; + "Subtarget.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS">, + AssemblerPredicate<"FeatureVolcanicIslands">; def HasFlatAddressSpace : Predicate<"Subtarget.hasFlatAddressSpace()">; @@ -1401,11 +1401,18 @@ defm V_MUL_F32 : VOP2Inst <vop2<0x8, 0x5>, "v_mul_f32", defm V_MUL_I32_I24 : VOP2Inst <vop2<0x9, 0x6>, "v_mul_i32_i24", VOP_I32_I32_I32, AMDGPUmul_i24 >; -//defm V_MUL_HI_I32_I24 : VOP2_32 <0x0000000a, "v_mul_hi_i32_i24", []>; + +defm V_MUL_HI_I32_I24 : VOP2Inst <vop2<0xa,0x7>, "v_mul_hi_i32_i24", + VOP_I32_I32_I32 +>; + defm V_MUL_U32_U24 : VOP2Inst <vop2<0xb, 0x8>, "v_mul_u32_u24", VOP_I32_I32_I32, AMDGPUmul_u24 >; -//defm V_MUL_HI_U32_U24 : VOP2_32 <0x0000000c, "v_mul_hi_u32_u24", []>; + +defm V_MUL_HI_U32_U24 : VOP2Inst <vop2<0xc,0x9>, "v_mul_hi_u32_u24", + VOP_I32_I32_I32 +>; defm V_MIN_F32 : VOP2Inst <vop2<0xf, 0xa>, "v_min_f32", VOP_F32_F32_F32, fminnum>; @@ -1543,14 +1550,24 @@ defm V_LDEXP_F32 : VOP2Inst <vop2<0x2b>, "v_ldexp_f32", VOP_F32_F32_I32, AMDGPUldexp >; -////def V_CVT_PKACCUM_U8_F32 : VOP2_U8 <0x0000002c, "v_cvt_pkaccum_u8_f32", []>; -////def V_CVT_PKNORM_I16_F32 : VOP2_I16 <0x0000002d, "v_cvt_pknorm_i16_f32", []>; -////def V_CVT_PKNORM_U16_F32 : VOP2_U16 <0x0000002e, "v_cvt_pknorm_u16_f32", []>; +defm V_CVT_PKACCUM_U8_F32 : VOP2Inst <vop2<0x2c>, "v_cvt_pkaccum_u8_f32", + VOP_I32_F32_F32 +>; +defm V_CVT_PKNORM_I16_F32 : VOP2Inst <vop2<0x2d>, "v_cvt_pknorm_i16_f32", + VOP_I32_F32_F32 +>; +defm V_CVT_PKNORM_U16_F32 : VOP2Inst <vop2<0x2e>, "v_cvt_pknorm_u16_f32", + VOP_I32_F32_F32 +>; defm V_CVT_PKRTZ_F16_F32 : VOP2Inst <vop2<0x2f>, "v_cvt_pkrtz_f16_f32", VOP_I32_F32_F32, int_SI_packf16 >; -////def V_CVT_PK_U16_U32 : VOP2_U16 <0x00000030, "v_cvt_pk_u16_u32", []>; -////def V_CVT_PK_I16_I32 : VOP2_I16 <0x00000031, "v_cvt_pk_i16_i32", []>; +defm V_CVT_PK_U16_U32 : VOP2Inst <vop2<0x30>, "v_cvt_pk_u16_u32", + VOP_I32_F32_F32 +>; +defm V_CVT_PK_I16_I32 : VOP2Inst <vop2<0x31>, "v_cvt_pk_i16_i32", + VOP_I32_F32_F32 +>; } // End let SubtargetPredicate = SICI //===----------------------------------------------------------------------===// diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td index d4111c24ae0..d849e0844fc 100644 --- a/lib/Target/R600/SIRegisterInfo.td +++ b/lib/Target/R600/SIRegisterInfo.td @@ -245,11 +245,13 @@ def VS_64 : RegisterClass<"AMDGPU", [i64, f64], 64, (add VReg_64, SReg_64)>; def VSrc_32 : RegisterOperand<VS_32> { let OperandNamespace = "AMDGPU"; let OperandType = "OPERAND_REG_IMM32"; + let ParserMatchClass = RegImmMatcher<"VSrc32">; } def VSrc_64 : RegisterOperand<VS_64> { let OperandNamespace = "AMDGPU"; let OperandType = "OPERAND_REG_IMM32"; + let ParserMatchClass = RegImmMatcher<"VSrc64">; } //===----------------------------------------------------------------------===// @@ -259,11 +261,13 @@ def VSrc_64 : RegisterOperand<VS_64> { def VCSrc_32 : RegisterOperand<VS_32> { let OperandNamespace = "AMDGPU"; let OperandType = "OPERAND_REG_INLINE_C"; + let ParserMatchClass = RegImmMatcher<"VCSrc32">; } def VCSrc_64 : RegisterOperand<VS_64> { let OperandNamespace = "AMDGPU"; let OperandType = "OPERAND_REG_INLINE_C"; + let ParserMatchClass = RegImmMatcher<"VCSrc64">; } //===----------------------------------------------------------------------===// diff --git a/test/MC/R600/vop2-err.s b/test/MC/R600/vop2-err.s new file mode 100644 index 00000000000..1019a0b9b79 --- /dev/null +++ b/test/MC/R600/vop2-err.s @@ -0,0 +1,35 @@ +// RUN: not llvm-mc -arch=r600 -mcpu=SI %s 2>&1 | FileCheck %s + +//===----------------------------------------------------------------------===// +// Generic checks +//===----------------------------------------------------------------------===// + +v_mul_i32_i24 v1, v2, 100 +// CHECK: error: invalid operand for instruction +// CHECK: error: invalid operand for instruction + +//===----------------------------------------------------------------------===// +// _e32 checks +//===----------------------------------------------------------------------===// + +// Immediate src1 +v_mul_i32_i24_e32 v1, v2, 100 +// CHECK: error: invalid operand for instruction + +// sgpr src1 +v_mul_i32_i24_e32 v1, v2, s3 +// CHECK: error: invalid operand for instruction + +//===----------------------------------------------------------------------===// +// _e64 checks +//===----------------------------------------------------------------------===// + +// Immediate src0 +v_mul_i32_i24_e64 v1, 100, v3 +// CHECK: error: invalid operand for instruction + +// Immediate src1 +v_mul_i32_i24_e64 v1, v2, 100 +// CHECK: error: invalid operand for instruction + +// TODO: Constant bus restrictions diff --git a/test/MC/R600/vop2.s b/test/MC/R600/vop2.s new file mode 100644 index 00000000000..48666a6b58c --- /dev/null +++ b/test/MC/R600/vop2.s @@ -0,0 +1,241 @@ +// RUN: llvm-mc -arch=r600 -mcpu=SI -show-encoding %s | FileCheck %s + +//===----------------------------------------------------------------------===// +// Generic Checks for floating-point instructions (These have modifiers). +//===----------------------------------------------------------------------===// + +// TODO: 64-bit encoding of instructions with modifiers + +// _e32 suffix +// CHECK: v_add_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x06] +v_add_f32_e32 v1, v2, v3 + +// src0 inline immediate +// CHECK: v_add_f32_e32 v1, 1.0, v3 ; encoding: [0xf2,0x06,0x02,0x06] +v_add_f32 v1, 1.0, v3 + +// src0 negative inline immediate +// CHECK: v_add_f32_e32 v1, -1.0, v3 ; encoding: [0xf3,0x06,0x02,0x06] +v_add_f32 v1, -1.0, v3 + +// src0 literal +// CHECK: v_add_f32_e32 v1, 0x42c80000, v3 ; encoding: [0xff,0x06,0x02,0x06,0x00,0x00,0xc8,0x42] +v_add_f32 v1, 100.0, v3 + +// src1 negative literal +// CHECK: v_add_f32_e32 v1, 0xc2c80000, v3 ; encoding: [0xff,0x06,0x02,0x06,0x00,0x00,0xc8,0xc2] +v_add_f32 v1, -100.0, v3 + +//===----------------------------------------------------------------------===// +// Generic Checks for integer instructions (These don't have modifiers). +//===----------------------------------------------------------------------===// + +// _e32 suffix +// CHECK: v_mul_i32_i24_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x12] +v_mul_i32_i24_e32 v1, v2, v3 + +// _e64 suffix +// CHECK: v_mul_i32_i24_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x12,0xd2,0x02,0x07,0x02,0x00] +v_mul_i32_i24_e64 v1, v2, v3 + +// src0 inline +// CHECK: v_mul_i32_i24_e32 v1, 3, v3 ; encoding: [0x83,0x06,0x02,0x12] +v_mul_i32_i24 v1, 3, v3 + +// src0 negative inline +// CHECK: v_mul_i32_i24_e32 v1, -3, v3 ; encoding: [0xc3,0x06,0x02,0x12] +v_mul_i32_i24 v1, -3, v3 + +// src1 inline +// CHECK: v_mul_i32_i24_e64 v1, v2, 3 ; encoding: [0x01,0x00,0x12,0xd2,0x02,0x07,0x01,0x00] +v_mul_i32_i24 v1, v2, 3 + +// src1 negative inline +// CHECK: v_mul_i32_i24_e64 v1, v2, -3 ; encoding: [0x01,0x00,0x12,0xd2,0x02,0x87,0x01,0x00] +v_mul_i32_i24 v1, v2, -3 + +// src0 literal +// CHECK: v_mul_i32_i24_e32 v1, 0x64, v3 ; encoding: [0xff,0x06,0x02,0x12,0x64,0x00,0x00,0x00] +v_mul_i32_i24 v1, 100, v3 + +// src1 negative literal +// CHECK: v_mul_i32_i24_e32 v1, 0xffffff9c, v3 ; encoding: [0xff,0x06,0x02,0x12,0x9c,0xff,0xff,0xff] +v_mul_i32_i24 v1, -100, v3 + +//===----------------------------------------------------------------------===// +// Checks for legal operands +//===----------------------------------------------------------------------===// + +// src0 sgpr +// CHECK: v_mul_i32_i24_e32 v1, s2, v3 ; encoding: [0x02,0x06,0x02,0x12] +v_mul_i32_i24 v1, s2, v3 + +// src1 sgpr +// CHECK: v_mul_i32_i24_e64 v1, v2, s3 ; encoding: [0x01,0x00,0x12,0xd2,0x02,0x07,0x00,0x00] +v_mul_i32_i24 v1, v2, s3 + +// src0, src1 same sgpr +// CHECK: v_mul_i32_i24_e64 v1, s2, s2 ; encoding: [0x01,0x00,0x12,0xd2,0x02,0x04,0x00,0x00] +v_mul_i32_i24 v1, s2, s2 + +// src0 sgpr, src1 inline +// CHECK: v_mul_i32_i24_e64 v1, s2, 3 ; encoding: [0x01,0x00,0x12,0xd2,0x02,0x06,0x01,0x00] +v_mul_i32_i24 v1, s2, 3 + +// src0 inline src1 sgpr +// CHECK: v_mul_i32_i24_e64 v1, 3, s3 ; encoding: [0x01,0x00,0x12,0xd2,0x83,0x06,0x00,0x00] +v_mul_i32_i24 v1, 3, s3 + +//===----------------------------------------------------------------------===// +// Instructions +//===----------------------------------------------------------------------===// + +// CHECK: v_cndmask_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x00] +v_cndmask_b32 v1, v2, v3 + +// CHECK: v_readlane_b32 s1, v2, s3 ; encoding: [0x02,0x07,0x02,0x02] +v_readlane_b32 s1, v2, s3 + +// CHECK: v_writelane_b32 v1, s2, s3 ; encoding: [0x02,0x06,0x02,0x04] +v_writelane_b32 v1, s2, s3 + +// CHECK: v_add_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x06] +v_add_f32 v1, v2, v3 + +// CHECK: v_sub_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x08] +v_sub_f32 v1, v2, v3 + +// CHECK: v_subrev_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x0a] +v_subrev_f32 v1, v2, v3 + +// CHECK: v_mac_legacy_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x0c] +v_mac_legacy_f32 v1, v2, v3 + +// CHECK: v_mul_legacy_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x0e] +v_mul_legacy_f32_e32 v1, v2, v3 + +// CHECK: v_mul_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x10] +v_mul_f32 v1, v2, v3 + +// CHECK: v_mul_i32_i24_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x12] +v_mul_i32_i24 v1, v2, v3 + +// CHECK: v_mul_hi_i32_i24_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x14] +v_mul_hi_i32_i24 v1, v2, v3 + +// CHECK: v_mul_u32_u24_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x16] +v_mul_u32_u24 v1, v2, v3 + +// CHECK: v_mul_hi_u32_u24_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x18] +v_mul_hi_u32_u24 v1, v2, v3 + +// CHECK: v_min_legacy_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x1a] +v_min_legacy_f32_e32 v1, v2, v3 + +// CHECK: v_max_legacy_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x1c] +v_max_legacy_f32 v1, v2, v3 + +// CHECK: v_min_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x1e] +v_min_f32_e32 v1, v2, v3 + +// CHECK: v_max_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x20] +v_max_f32 v1, v2 v3 + +// CHECK: v_min_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x22] +v_min_i32 v1, v2, v3 + +// CHECK: v_max_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x24] +v_max_i32 v1, v2, v3 + +// CHECK: v_min_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x26] +v_min_u32 v1, v2, v3 + +// CHECK: v_max_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x28] +v_max_u32 v1, v2, v3 + +// CHECK: v_lshr_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x2a] +v_lshr_b32 v1, v2, v3 + +// CHECK: v_lshrrev_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x2c] +v_lshrrev_b32 v1, v2, v3 + +// CHECK: v_ashr_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x2e] +v_ashr_i32 v1, v2, v3 + +// CHECK: v_ashrrev_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x30] +v_ashrrev_i32 v1, v2, v3 + +// CHECK: v_lshl_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x32] +v_lshl_b32_e32 v1, v2, v3 + +// CHECK: v_lshlrev_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x34] +v_lshlrev_b32 v1, v2, v3 + +// CHECK: v_and_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x36] +v_and_b32 v1, v2, v3 + +// CHECK: v_or_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x38] +v_or_b32 v1, v2, v3 + +// CHECK: v_xor_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x3a] +v_xor_b32 v1, v2, v3 + +// CHECK: v_bfm_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x3c] +v_bfm_b32 v1, v2, v3 + +// CHECK: v_mac_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x3e] +v_mac_f32 v1, v2, v3 + +// CHECK: v_madmk_f32_e32 v1, v2, v3, 0x42800000 ; encoding: [0x02,0x07,0x02,0x40,0x00,0x00,0x80,0x42] +v_madmk_f32 v1, v2, v3, 64.0 + +// CHECK: v_madak_f32_e32 v1, v2, v3, 0x42800000 ; encoding: [0x02,0x07,0x02,0x42,0x00,0x00,0x80,0x42] +v_madak_f32 v1, v2, v3, 64.0 + +// CHECK: v_bcnt_u32_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x44] +v_bcnt_u32_b32 v1, v2, v3 + +// CHECK: v_mbcnt_lo_u32_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x46] +v_mbcnt_lo_u32_b32 v1, v2, v3 + +// CHECK: v_mbcnt_hi_u32_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x48] +v_mbcnt_hi_u32_b32_e32 v1, v2, v3 + +// CHECK: v_add_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4a] +v_add_i32 v1, v2, v3 + +// CHECK: v_sub_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4c] +v_sub_i32_e32 v1, v2, v3 + +// CHECK: v_subrev_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4e] +v_subrev_i32 v1, v2, v3 + +// CHECK : v_addc_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x50] +v_addc_u32 v1, v2, v3 + +// CHECK: v_subb_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x52] +v_subb_u32 v1, v2, v3 + +// CHECK: v_subbrev_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x54] +v_subbrev_u32 v1, v2, v3 + +// CHECK: v_ldexp_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x56] +v_ldexp_f32 v1, v2, v3 + +// CHECK: v_cvt_pkaccum_u8_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x58] +v_cvt_pkaccum_u8_f32 v1, v2, v3 + +// CHECK: v_cvt_pknorm_i16_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x5a] +v_cvt_pknorm_i16_f32 v1, v2, v3 + +// CHECK: v_cvt_pknorm_u16_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x5c] +v_cvt_pknorm_u16_f32 v1, v2, v3 + +// CHECK: v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x5e] +v_cvt_pkrtz_f16_f32 v1, v2, v3 + +// CHECK: v_cvt_pk_u16_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x60] +v_cvt_pk_u16_u32_e32 v1, v2, v3 + +// CHECK: v_cvt_pk_i16_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x62] +v_cvt_pk_i16_i32 v1, v2, v3 |