summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Stellard <thomas.stellard@amd.com>2014-12-18 21:39:30 -0500
committerTom Stellard <thomas.stellard@amd.com>2014-12-30 11:14:15 -0500
commitaed643bbba2fd8d76dd78474e1bf067a7a00f053 (patch)
tree117b31cfc226d1539bb69681c8321ba2c6b1ad60
parent3105462cb9dd309af2b3119795ab79dff7cebafb (diff)
R600/SI: Add assembler support for VOP2 instructions
32-bit and 64-bit encodings are supported for integer ops, and 32-bit encodings are supported for floating-point ops. Support for instruction modifiers is requires for full floating-point support.\
-rw-r--r--docs/R600Usage.rst20
-rw-r--r--lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp179
-rw-r--r--lib/Target/R600/SIInstrFormats.td6
-rw-r--r--lib/Target/R600/SIInstrInfo.td1
-rw-r--r--lib/Target/R600/SIInstructions.td35
-rw-r--r--lib/Target/R600/SIRegisterInfo.td4
-rw-r--r--test/MC/R600/vop2-err.s35
-rw-r--r--test/MC/R600/vop2.s241
8 files changed, 497 insertions, 24 deletions
diff --git a/docs/R600Usage.rst b/docs/R600Usage.rst
index 4e954398e2b..6b50736df9f 100644
--- a/docs/R600Usage.rst
+++ b/docs/R600Usage.rst
@@ -53,3 +53,23 @@ wait for.
// Wait for vmcnt counter to be 1.
s_waitcnt vmcnt(1)
+VOP2 Instructions
+-----------------
+
+All 32-bit encodings of VOP2 instructions are supported. 64-bit encodings of
+integer operations only are also supported.
+
+The assembler will automatically detect which encoding size to use for
+VOP2 instructions based on the operands. If you want to force a specific
+encoding size, you can add an _e32 (for 32-bit encoding) or _e64 (for 64-bit
+encoding) suffix to the instruction. Most, but not all instructions support
+an explicit suffix. These are all valid assembly
+strings:
+
+.. code-block:: nasm
+
+ v_mul_i32_i24 v1, v2, v3
+ v_mul_i32_i24_e32 v1, v2, v3
+ v_mul_i32_i24_e64 v1, v2, v3
+
+
diff --git a/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp b/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp
index 83494d995c5..5c2679e38c5 100644
--- a/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp
+++ b/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp
@@ -8,6 +8,7 @@
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/STLExtras.h"
@@ -74,17 +75,29 @@ class AMDGPUOperand : public MCParsedAsmOperand {
Register
} Kind;
+
SMLoc StartLoc, EndLoc;
public:
AMDGPUOperand(enum KindTy K) : MCParsedAsmOperand(), Kind(K) {}
+ enum TokenTy {
+ Default,
+ E32,
+ E64
+ };
+
struct TokOp {
const char *Data;
unsigned Length;
+ char *DataE32;
+ char *DataE64;
+ unsigned ExtendedLength;
+ enum TokenTy Type;
};
struct ImmOp {
+ bool IsFPImm;
int64_t Val;
};
@@ -99,8 +112,18 @@ public:
RegOp Reg;
};
+ ~AMDGPUOperand() {
+ if(isToken()) {
+ std::free(Tok.DataE32);
+ std::free(Tok.DataE64);
+ }
+ }
+
void addImmOperands(MCInst &Inst, unsigned N) const {
- Inst.addOperand(MCOperand::CreateImm(getImm()));
+ if (Imm.IsFPImm)
+ Inst.addOperand(MCOperand::CreateFPImm(getImm()));
+ else
+ Inst.addOperand(MCOperand::CreateImm(getImm()));
}
void addRegOperands(MCInst &Inst, unsigned N) const {
@@ -114,27 +137,67 @@ public:
addImmOperands(Inst, N);
}
+ void addRegAndInputMods(MCInst Inst, unsigned N) const {
+ //Inst.addOperand(MCOperand::CreateInputMod(getInputMod()));
+ addRegOperands(Inst, N);
}
StringRef getToken() const {
- return StringRef(Tok.Data, Tok.Length);
+ switch (Tok.Type) {
+ case E32: return StringRef(Tok.DataE32, Tok.ExtendedLength);
+ case E64: return StringRef(Tok.DataE64, Tok.ExtendedLength);
+ default: return StringRef(Tok.Data, Tok.Length);
+
+ }
+ }
+
+ bool defaultTokenHasSuffix() const {
+ StringRef Token(Tok.Data, Tok.Length);
+
+ return Token.endswith("_e32") || Token.endswith("_e64");
}
bool isToken() const override {
return Kind == Token;
}
- bool isImm() const override {
- return Kind == Immediate;
+ enum TokenTy getTokenType() {
+ assert(isToken());
+ return Tok.Type;
}
- bool is64BitInlineImm() const {
- return isImm() && Imm.Val <= -1 && Imm.Val >= -16;
+ void setTokenType(enum TokenTy Type) {
+ assert(isToken());
+ Tok.Type = Type;
+ switch(Type) {
+ default: break;
+ case E32:
+ if (!Tok.DataE32) {
+ std::string E32 = std::string(Tok.Data, Tok.Length) + "_e32";
+ Tok.DataE32 = (char*)std::malloc(Tok.ExtendedLength);
+ std::memcpy(Tok.DataE32, E32.data(), Tok.ExtendedLength);
+ }
+ break;
+ case E64:
+ if (!Tok.DataE64) {
+ std::string E64 = std::string(Tok.Data, Tok.Length) + "_e64";
+ Tok.DataE64 = (char*)std::malloc(Tok.ExtendedLength);
+ std::memcpy(Tok.DataE64, E64.data(), Tok.ExtendedLength);
+ }
+ break;
+ }
}
- bool isImm32Bit() const {
- return isImm() && isUInt<32>(Imm.Val);
+ bool isImm() const override {
+ return Kind == Immediate;
+ }
+
+ bool isInlineImm() const {
+ float F = APInt(32, Imm.Val).bitsToFloat();
+ return isImm() && ((Imm.Val <= 64 && Imm.Val >= -16) ||
+ (F == 0.0 || F == 0.5 || F == -0.5 || F == 1.0 || F == -1.0 ||
+ F == 2.0 || F == -2.0 || F == 4.0 || F == -4.0));
}
int64_t getImm() const {
@@ -158,14 +221,30 @@ public:
}
bool isSSrc32() const {
- return isImm32Bit() || (isReg() && isRegClass(AMDGPU::SReg_32RegClassID));
+ return isImm() || (isReg() && isRegClass(AMDGPU::SReg_32RegClassID));
}
bool isSSrc64() const {
- return isImm32Bit() || is64BitInlineImm() ||
+ return isImm() || isInlineImm() ||
(isReg() && isRegClass(AMDGPU::SReg_64RegClassID));
}
+ bool isVCSrc32() const {
+ return isInlineImm() || (isReg() && isRegClass(AMDGPU::VS_32RegClassID));
+ }
+
+ bool isVCSrc64() const {
+ return isInlineImm() || (isReg() && isRegClass(AMDGPU::VS_64RegClassID));
+ }
+
+ bool isVSrc32() const {
+ return isImm() || (isReg() && isRegClass(AMDGPU::VS_32RegClassID));
+ }
+
+ bool isVSrc64() const {
+ return isImm() || (isReg() && isRegClass(AMDGPU::VS_64RegClassID));
+ }
+
bool isMem() const override {
return false;
}
@@ -180,18 +259,26 @@ public:
void print(raw_ostream &OS) const override { }
- static std::unique_ptr<AMDGPUOperand> CreateImm(int64_t Val, SMLoc Loc) {
+ static std::unique_ptr<AMDGPUOperand> CreateImm(int64_t Val, SMLoc Loc,
+ bool IsFPImm = false) {
auto Op = llvm::make_unique<AMDGPUOperand>(Immediate);
Op->Imm.Val = Val;
+ Op->Imm.IsFPImm = IsFPImm;
Op->StartLoc = Loc;
Op->EndLoc = Loc;
return Op;
}
- static std::unique_ptr<AMDGPUOperand> CreateToken(StringRef Str, SMLoc Loc) {
+ static std::unique_ptr<AMDGPUOperand> CreateToken(StringRef Str, SMLoc Loc,
+ bool HasExplicitEncodingSize = true) {
auto Res = llvm::make_unique<AMDGPUOperand>(Token);
Res->Tok.Data = Str.data();
Res->Tok.Length = Str.size();
+ Res->Tok.Type = Default;
+ // Size of the Token plus 4 bytes for _e32 or _e64.
+ Res->Tok.ExtendedLength = Str.size() + 4;
+ Res->Tok.DataE32 = nullptr;
+ Res->Tok.DataE64 = nullptr;
Res->StartLoc = Loc;
Res->EndLoc = Loc;
return Res;
@@ -337,10 +424,44 @@ bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
Out.EmitInstruction(Inst, STI);
return false;
case Match_MissingFeature:
- return Error(IDLoc, "instruction use requires an option to be enabled");
- case Match_MnemonicFail:
+
+ // Fall-through
+ // FIXME better error message for Match_MissingFeature
+ case Match_MnemonicFail: {
+ // For VOP1, VOP2, and VOC instructions, the mnemonic defined in
+ // TableGen has either an _e32 or _e64 suffix, but the assembler
+ // should also accept mnemonics without a suffix.
+ AMDGPUOperand &Mnemonic = ((AMDGPUOperand&)*Operands[0]);
+ if (!Mnemonic.defaultTokenHasSuffix() &&
+ Mnemonic.getTokenType() == AMDGPUOperand::Default) {
+ Mnemonic.setTokenType(AMDGPUOperand::E32);
+ if (!MatchAndEmitInstruction(IDLoc, Opcode, Operands, Out,
+ ErrorInfo, MatchingInlineAsm))
+ return false;
+ return true;
+ }
return Error(IDLoc, "unrecognized instruction mnemonic");
+ }
case Match_InvalidOperand: {
+ AMDGPUOperand &Mnemonic = ((AMDGPUOperand&)*Operands[0]);
+ if (!Mnemonic.defaultTokenHasSuffix() &&
+ Mnemonic.getTokenType() != AMDGPUOperand::E64) {
+
+ // We have failed to match this instruction so try with an _e32 or
+ // _e64 suffix.
+ if (Mnemonic.getTokenType() == AMDGPUOperand::Default)
+ Mnemonic.setTokenType(AMDGPUOperand::E32);
+ else {
+ assert(Mnemonic.getTokenType() == AMDGPUOperand::E32);
+ Mnemonic.setTokenType(AMDGPUOperand::E64);
+ }
+ // We failed to match this as a 32-bit instruction, try it as a
+ // 64-bit instruction.
+ if (!MatchAndEmitInstruction(IDLoc, Opcode, Operands, Out,
+ ErrorInfo, MatchingInlineAsm))
+ return false;
+ }
+
SMLoc ErrorLoc = IDLoc;
if (ErrorInfo != ~0ULL) {
if (ErrorInfo >= Operands.size())
@@ -371,15 +492,45 @@ AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail)
return ResTy;
+ bool Negate = false;
+ if (getLexer().getKind()== AsmToken::Minus) {
+ Parser.Lex();
+ Negate = true;
+ }
+
switch(getLexer().getKind()) {
case AsmToken::Integer: {
SMLoc S = Parser.getTok().getLoc();
int64_t IntVal;
if (getParser().parseAbsoluteExpression(IntVal))
return MatchOperand_ParseFail;
+ APInt IntVal32(32, IntVal);
+ if (IntVal32.getSExtValue() != IntVal) {
+ Error(S, "invalid immediate: only 32-bit values are legal");
+ return MatchOperand_ParseFail;
+ }
+
+ IntVal = IntVal32.getSExtValue();
+ if (Negate)
+ IntVal *= -1;
Operands.push_back(AMDGPUOperand::CreateImm(IntVal, S));
return MatchOperand_Success;
}
+ case AsmToken::Real: {
+ // FIXME: We should emit an error if a double precisions floating-point
+ // value is used. I'm not sure the best way to detect this.
+ SMLoc S = Parser.getTok().getLoc();
+ int64_t IntVal;
+ if (getParser().parseAbsoluteExpression(IntVal))
+ return MatchOperand_ParseFail;
+
+ APFloat F((float)APInt(64, IntVal).bitsToDouble());
+ if (Negate)
+ F.changeSign();
+ Operands.push_back(
+ AMDGPUOperand::CreateImm(F.bitcastToAPInt().getZExtValue(), S));
+ return MatchOperand_Success;
+ }
case AsmToken::Identifier: {
SMLoc S, E;
unsigned RegNo;
diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td
index 2f61cc18d7e..efce29784d6 100644
--- a/lib/Target/R600/SIInstrFormats.td
+++ b/lib/Target/R600/SIInstrFormats.td
@@ -136,6 +136,8 @@ class VOP3Common <dag outs, dag ins, string asm, list<dag> pattern> :
let VOP3 = 1;
let VALU = 1;
+ let isCodeGenOnly = 0;
+
int Size = 8;
}
@@ -575,7 +577,9 @@ class VOP1 <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> :
VOP1e<op>;
class VOP2 <bits<6> op, dag outs, dag ins, string asm, list<dag> pattern> :
- VOP2Common <outs, ins, asm, pattern>, VOP2e<op>;
+ VOP2Common <outs, ins, asm, pattern>, VOP2e<op> {
+ let isCodeGenOnly = 0;
+}
class VOP3b <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> :
VOP3Common <outs, ins, asm, pattern>, VOP3be<op>;
diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
index bb83cf02e4d..0976c72429f 100644
--- a/lib/Target/R600/SIInstrInfo.td
+++ b/lib/Target/R600/SIInstrInfo.td
@@ -836,6 +836,7 @@ class VOP3_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
VOP <opName>,
SIMCInstr<opName#"_e64", SISubtarget.NONE> {
let isPseudo = 1;
+ let isCodeGenOnly = 1;
}
class VOP3_Real_si <bits<9> op, dag outs, dag ins, string asm, string opName> :
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
index 8029686cffb..5c0738d3bf8 100644
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -35,8 +35,8 @@ def isSICI : Predicate<
def isCI : Predicate<"Subtarget.getGeneration() "
">= AMDGPUSubtarget::SEA_ISLANDS">;
def isVI : Predicate <
- "Subtarget.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS"
->;
+ "Subtarget.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS">,
+ AssemblerPredicate<"FeatureVolcanicIslands">;
def HasFlatAddressSpace : Predicate<"Subtarget.hasFlatAddressSpace()">;
@@ -1401,11 +1401,18 @@ defm V_MUL_F32 : VOP2Inst <vop2<0x8, 0x5>, "v_mul_f32",
defm V_MUL_I32_I24 : VOP2Inst <vop2<0x9, 0x6>, "v_mul_i32_i24",
VOP_I32_I32_I32, AMDGPUmul_i24
>;
-//defm V_MUL_HI_I32_I24 : VOP2_32 <0x0000000a, "v_mul_hi_i32_i24", []>;
+
+defm V_MUL_HI_I32_I24 : VOP2Inst <vop2<0xa,0x7>, "v_mul_hi_i32_i24",
+ VOP_I32_I32_I32
+>;
+
defm V_MUL_U32_U24 : VOP2Inst <vop2<0xb, 0x8>, "v_mul_u32_u24",
VOP_I32_I32_I32, AMDGPUmul_u24
>;
-//defm V_MUL_HI_U32_U24 : VOP2_32 <0x0000000c, "v_mul_hi_u32_u24", []>;
+
+defm V_MUL_HI_U32_U24 : VOP2Inst <vop2<0xc,0x9>, "v_mul_hi_u32_u24",
+ VOP_I32_I32_I32
+>;
defm V_MIN_F32 : VOP2Inst <vop2<0xf, 0xa>, "v_min_f32", VOP_F32_F32_F32,
fminnum>;
@@ -1543,14 +1550,24 @@ defm V_LDEXP_F32 : VOP2Inst <vop2<0x2b>, "v_ldexp_f32",
VOP_F32_F32_I32, AMDGPUldexp
>;
-////def V_CVT_PKACCUM_U8_F32 : VOP2_U8 <0x0000002c, "v_cvt_pkaccum_u8_f32", []>;
-////def V_CVT_PKNORM_I16_F32 : VOP2_I16 <0x0000002d, "v_cvt_pknorm_i16_f32", []>;
-////def V_CVT_PKNORM_U16_F32 : VOP2_U16 <0x0000002e, "v_cvt_pknorm_u16_f32", []>;
+defm V_CVT_PKACCUM_U8_F32 : VOP2Inst <vop2<0x2c>, "v_cvt_pkaccum_u8_f32",
+ VOP_I32_F32_F32
+>;
+defm V_CVT_PKNORM_I16_F32 : VOP2Inst <vop2<0x2d>, "v_cvt_pknorm_i16_f32",
+ VOP_I32_F32_F32
+>;
+defm V_CVT_PKNORM_U16_F32 : VOP2Inst <vop2<0x2e>, "v_cvt_pknorm_u16_f32",
+ VOP_I32_F32_F32
+>;
defm V_CVT_PKRTZ_F16_F32 : VOP2Inst <vop2<0x2f>, "v_cvt_pkrtz_f16_f32",
VOP_I32_F32_F32, int_SI_packf16
>;
-////def V_CVT_PK_U16_U32 : VOP2_U16 <0x00000030, "v_cvt_pk_u16_u32", []>;
-////def V_CVT_PK_I16_I32 : VOP2_I16 <0x00000031, "v_cvt_pk_i16_i32", []>;
+defm V_CVT_PK_U16_U32 : VOP2Inst <vop2<0x30>, "v_cvt_pk_u16_u32",
+ VOP_I32_F32_F32
+>;
+defm V_CVT_PK_I16_I32 : VOP2Inst <vop2<0x31>, "v_cvt_pk_i16_i32",
+ VOP_I32_F32_F32
+>;
} // End let SubtargetPredicate = SICI
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td
index d4111c24ae0..d849e0844fc 100644
--- a/lib/Target/R600/SIRegisterInfo.td
+++ b/lib/Target/R600/SIRegisterInfo.td
@@ -245,11 +245,13 @@ def VS_64 : RegisterClass<"AMDGPU", [i64, f64], 64, (add VReg_64, SReg_64)>;
def VSrc_32 : RegisterOperand<VS_32> {
let OperandNamespace = "AMDGPU";
let OperandType = "OPERAND_REG_IMM32";
+ let ParserMatchClass = RegImmMatcher<"VSrc32">;
}
def VSrc_64 : RegisterOperand<VS_64> {
let OperandNamespace = "AMDGPU";
let OperandType = "OPERAND_REG_IMM32";
+ let ParserMatchClass = RegImmMatcher<"VSrc64">;
}
//===----------------------------------------------------------------------===//
@@ -259,11 +261,13 @@ def VSrc_64 : RegisterOperand<VS_64> {
def VCSrc_32 : RegisterOperand<VS_32> {
let OperandNamespace = "AMDGPU";
let OperandType = "OPERAND_REG_INLINE_C";
+ let ParserMatchClass = RegImmMatcher<"VCSrc32">;
}
def VCSrc_64 : RegisterOperand<VS_64> {
let OperandNamespace = "AMDGPU";
let OperandType = "OPERAND_REG_INLINE_C";
+ let ParserMatchClass = RegImmMatcher<"VCSrc64">;
}
//===----------------------------------------------------------------------===//
diff --git a/test/MC/R600/vop2-err.s b/test/MC/R600/vop2-err.s
new file mode 100644
index 00000000000..1019a0b9b79
--- /dev/null
+++ b/test/MC/R600/vop2-err.s
@@ -0,0 +1,35 @@
+// RUN: not llvm-mc -arch=r600 -mcpu=SI %s 2>&1 | FileCheck %s
+
+//===----------------------------------------------------------------------===//
+// Generic checks
+//===----------------------------------------------------------------------===//
+
+v_mul_i32_i24 v1, v2, 100
+// CHECK: error: invalid operand for instruction
+// CHECK: error: invalid operand for instruction
+
+//===----------------------------------------------------------------------===//
+// _e32 checks
+//===----------------------------------------------------------------------===//
+
+// Immediate src1
+v_mul_i32_i24_e32 v1, v2, 100
+// CHECK: error: invalid operand for instruction
+
+// sgpr src1
+v_mul_i32_i24_e32 v1, v2, s3
+// CHECK: error: invalid operand for instruction
+
+//===----------------------------------------------------------------------===//
+// _e64 checks
+//===----------------------------------------------------------------------===//
+
+// Immediate src0
+v_mul_i32_i24_e64 v1, 100, v3
+// CHECK: error: invalid operand for instruction
+
+// Immediate src1
+v_mul_i32_i24_e64 v1, v2, 100
+// CHECK: error: invalid operand for instruction
+
+// TODO: Constant bus restrictions
diff --git a/test/MC/R600/vop2.s b/test/MC/R600/vop2.s
new file mode 100644
index 00000000000..48666a6b58c
--- /dev/null
+++ b/test/MC/R600/vop2.s
@@ -0,0 +1,241 @@
+// RUN: llvm-mc -arch=r600 -mcpu=SI -show-encoding %s | FileCheck %s
+
+//===----------------------------------------------------------------------===//
+// Generic Checks for floating-point instructions (These have modifiers).
+//===----------------------------------------------------------------------===//
+
+// TODO: 64-bit encoding of instructions with modifiers
+
+// _e32 suffix
+// CHECK: v_add_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x06]
+v_add_f32_e32 v1, v2, v3
+
+// src0 inline immediate
+// CHECK: v_add_f32_e32 v1, 1.0, v3 ; encoding: [0xf2,0x06,0x02,0x06]
+v_add_f32 v1, 1.0, v3
+
+// src0 negative inline immediate
+// CHECK: v_add_f32_e32 v1, -1.0, v3 ; encoding: [0xf3,0x06,0x02,0x06]
+v_add_f32 v1, -1.0, v3
+
+// src0 literal
+// CHECK: v_add_f32_e32 v1, 0x42c80000, v3 ; encoding: [0xff,0x06,0x02,0x06,0x00,0x00,0xc8,0x42]
+v_add_f32 v1, 100.0, v3
+
+// src1 negative literal
+// CHECK: v_add_f32_e32 v1, 0xc2c80000, v3 ; encoding: [0xff,0x06,0x02,0x06,0x00,0x00,0xc8,0xc2]
+v_add_f32 v1, -100.0, v3
+
+//===----------------------------------------------------------------------===//
+// Generic Checks for integer instructions (These don't have modifiers).
+//===----------------------------------------------------------------------===//
+
+// _e32 suffix
+// CHECK: v_mul_i32_i24_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x12]
+v_mul_i32_i24_e32 v1, v2, v3
+
+// _e64 suffix
+// CHECK: v_mul_i32_i24_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x12,0xd2,0x02,0x07,0x02,0x00]
+v_mul_i32_i24_e64 v1, v2, v3
+
+// src0 inline
+// CHECK: v_mul_i32_i24_e32 v1, 3, v3 ; encoding: [0x83,0x06,0x02,0x12]
+v_mul_i32_i24 v1, 3, v3
+
+// src0 negative inline
+// CHECK: v_mul_i32_i24_e32 v1, -3, v3 ; encoding: [0xc3,0x06,0x02,0x12]
+v_mul_i32_i24 v1, -3, v3
+
+// src1 inline
+// CHECK: v_mul_i32_i24_e64 v1, v2, 3 ; encoding: [0x01,0x00,0x12,0xd2,0x02,0x07,0x01,0x00]
+v_mul_i32_i24 v1, v2, 3
+
+// src1 negative inline
+// CHECK: v_mul_i32_i24_e64 v1, v2, -3 ; encoding: [0x01,0x00,0x12,0xd2,0x02,0x87,0x01,0x00]
+v_mul_i32_i24 v1, v2, -3
+
+// src0 literal
+// CHECK: v_mul_i32_i24_e32 v1, 0x64, v3 ; encoding: [0xff,0x06,0x02,0x12,0x64,0x00,0x00,0x00]
+v_mul_i32_i24 v1, 100, v3
+
+// src1 negative literal
+// CHECK: v_mul_i32_i24_e32 v1, 0xffffff9c, v3 ; encoding: [0xff,0x06,0x02,0x12,0x9c,0xff,0xff,0xff]
+v_mul_i32_i24 v1, -100, v3
+
+//===----------------------------------------------------------------------===//
+// Checks for legal operands
+//===----------------------------------------------------------------------===//
+
+// src0 sgpr
+// CHECK: v_mul_i32_i24_e32 v1, s2, v3 ; encoding: [0x02,0x06,0x02,0x12]
+v_mul_i32_i24 v1, s2, v3
+
+// src1 sgpr
+// CHECK: v_mul_i32_i24_e64 v1, v2, s3 ; encoding: [0x01,0x00,0x12,0xd2,0x02,0x07,0x00,0x00]
+v_mul_i32_i24 v1, v2, s3
+
+// src0, src1 same sgpr
+// CHECK: v_mul_i32_i24_e64 v1, s2, s2 ; encoding: [0x01,0x00,0x12,0xd2,0x02,0x04,0x00,0x00]
+v_mul_i32_i24 v1, s2, s2
+
+// src0 sgpr, src1 inline
+// CHECK: v_mul_i32_i24_e64 v1, s2, 3 ; encoding: [0x01,0x00,0x12,0xd2,0x02,0x06,0x01,0x00]
+v_mul_i32_i24 v1, s2, 3
+
+// src0 inline src1 sgpr
+// CHECK: v_mul_i32_i24_e64 v1, 3, s3 ; encoding: [0x01,0x00,0x12,0xd2,0x83,0x06,0x00,0x00]
+v_mul_i32_i24 v1, 3, s3
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+// CHECK: v_cndmask_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x00]
+v_cndmask_b32 v1, v2, v3
+
+// CHECK: v_readlane_b32 s1, v2, s3 ; encoding: [0x02,0x07,0x02,0x02]
+v_readlane_b32 s1, v2, s3
+
+// CHECK: v_writelane_b32 v1, s2, s3 ; encoding: [0x02,0x06,0x02,0x04]
+v_writelane_b32 v1, s2, s3
+
+// CHECK: v_add_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x06]
+v_add_f32 v1, v2, v3
+
+// CHECK: v_sub_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x08]
+v_sub_f32 v1, v2, v3
+
+// CHECK: v_subrev_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x0a]
+v_subrev_f32 v1, v2, v3
+
+// CHECK: v_mac_legacy_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x0c]
+v_mac_legacy_f32 v1, v2, v3
+
+// CHECK: v_mul_legacy_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x0e]
+v_mul_legacy_f32_e32 v1, v2, v3
+
+// CHECK: v_mul_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x10]
+v_mul_f32 v1, v2, v3
+
+// CHECK: v_mul_i32_i24_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x12]
+v_mul_i32_i24 v1, v2, v3
+
+// CHECK: v_mul_hi_i32_i24_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x14]
+v_mul_hi_i32_i24 v1, v2, v3
+
+// CHECK: v_mul_u32_u24_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x16]
+v_mul_u32_u24 v1, v2, v3
+
+// CHECK: v_mul_hi_u32_u24_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x18]
+v_mul_hi_u32_u24 v1, v2, v3
+
+// CHECK: v_min_legacy_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x1a]
+v_min_legacy_f32_e32 v1, v2, v3
+
+// CHECK: v_max_legacy_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x1c]
+v_max_legacy_f32 v1, v2, v3
+
+// CHECK: v_min_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x1e]
+v_min_f32_e32 v1, v2, v3
+
+// CHECK: v_max_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x20]
+v_max_f32 v1, v2 v3
+
+// CHECK: v_min_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x22]
+v_min_i32 v1, v2, v3
+
+// CHECK: v_max_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x24]
+v_max_i32 v1, v2, v3
+
+// CHECK: v_min_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x26]
+v_min_u32 v1, v2, v3
+
+// CHECK: v_max_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x28]
+v_max_u32 v1, v2, v3
+
+// CHECK: v_lshr_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x2a]
+v_lshr_b32 v1, v2, v3
+
+// CHECK: v_lshrrev_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x2c]
+v_lshrrev_b32 v1, v2, v3
+
+// CHECK: v_ashr_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x2e]
+v_ashr_i32 v1, v2, v3
+
+// CHECK: v_ashrrev_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x30]
+v_ashrrev_i32 v1, v2, v3
+
+// CHECK: v_lshl_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x32]
+v_lshl_b32_e32 v1, v2, v3
+
+// CHECK: v_lshlrev_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x34]
+v_lshlrev_b32 v1, v2, v3
+
+// CHECK: v_and_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x36]
+v_and_b32 v1, v2, v3
+
+// CHECK: v_or_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x38]
+v_or_b32 v1, v2, v3
+
+// CHECK: v_xor_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x3a]
+v_xor_b32 v1, v2, v3
+
+// CHECK: v_bfm_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x3c]
+v_bfm_b32 v1, v2, v3
+
+// CHECK: v_mac_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x3e]
+v_mac_f32 v1, v2, v3
+
+// CHECK: v_madmk_f32_e32 v1, v2, v3, 0x42800000 ; encoding: [0x02,0x07,0x02,0x40,0x00,0x00,0x80,0x42]
+v_madmk_f32 v1, v2, v3, 64.0
+
+// CHECK: v_madak_f32_e32 v1, v2, v3, 0x42800000 ; encoding: [0x02,0x07,0x02,0x42,0x00,0x00,0x80,0x42]
+v_madak_f32 v1, v2, v3, 64.0
+
+// CHECK: v_bcnt_u32_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x44]
+v_bcnt_u32_b32 v1, v2, v3
+
+// CHECK: v_mbcnt_lo_u32_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x46]
+v_mbcnt_lo_u32_b32 v1, v2, v3
+
+// CHECK: v_mbcnt_hi_u32_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x48]
+v_mbcnt_hi_u32_b32_e32 v1, v2, v3
+
+// CHECK: v_add_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4a]
+v_add_i32 v1, v2, v3
+
+// CHECK: v_sub_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4c]
+v_sub_i32_e32 v1, v2, v3
+
+// CHECK: v_subrev_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4e]
+v_subrev_i32 v1, v2, v3
+
+// CHECK : v_addc_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x50]
+v_addc_u32 v1, v2, v3
+
+// CHECK: v_subb_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x52]
+v_subb_u32 v1, v2, v3
+
+// CHECK: v_subbrev_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x54]
+v_subbrev_u32 v1, v2, v3
+
+// CHECK: v_ldexp_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x56]
+v_ldexp_f32 v1, v2, v3
+
+// CHECK: v_cvt_pkaccum_u8_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x58]
+v_cvt_pkaccum_u8_f32 v1, v2, v3
+
+// CHECK: v_cvt_pknorm_i16_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x5a]
+v_cvt_pknorm_i16_f32 v1, v2, v3
+
+// CHECK: v_cvt_pknorm_u16_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x5c]
+v_cvt_pknorm_u16_f32 v1, v2, v3
+
+// CHECK: v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x5e]
+v_cvt_pkrtz_f16_f32 v1, v2, v3
+
+// CHECK: v_cvt_pk_u16_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x60]
+v_cvt_pk_u16_u32_e32 v1, v2, v3
+
+// CHECK: v_cvt_pk_i16_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x62]
+v_cvt_pk_i16_i32 v1, v2, v3