R600/SI: Add assembler support for VOP2 instructions

32-bit and 64-bit encodings are supported for integer ops, and 32-bit encodings are supported for floating-point ops. Support for instruction modifiers is requires for full floating-point support.\
author: Tom Stellard <thomas.stellard@amd.com> 2014-12-18 21:39:30 -0500
committer: Tom Stellard <thomas.stellard@amd.com> 2014-12-30 11:14:15 -0500
commit: aed643bbba2fd8d76dd78474e1bf067a7a00f053 (patch)
tree: 117b31cfc226d1539bb69681c8321ba2c6b1ad60
parent: 3105462cb9dd309af2b3119795ab79dff7cebafb (diff)
8 files changed, 497 insertions, 24 deletions
diff --git a/docs/R600Usage.rst b/docs/R600Usage.rst
index 4e954398e2b..6b50736df9f 100644
--- a/docs/R600Usage.rst
+++ b/docs/R600Usage.rst
@@ -53,3 +53,23 @@ wait for.
    // Wait for vmcnt counter to be 1.
    s_waitcnt vmcnt(1)
 
+VOP2 Instructions
+-----------------
+
+All 32-bit encodings of VOP2 instructions are supported.  64-bit encodings of
+integer operations only are also supported.
+
+The assembler will automatically detect which encoding size to use for
+VOP2 instructions based on the operands.  If you want to force a specific
+encoding size, you can add an _e32 (for 32-bit encoding) or _e64 (for 64-bit
+encoding) suffix to the instruction.  Most, but not all instructions support
+an explicit suffix.  These are all valid assembly
+strings:
+
+.. code-block:: nasm
+
+   v_mul_i32_i24 v1, v2, v3
+   v_mul_i32_i24_e32 v1, v2, v3
+   v_mul_i32_i24_e64 v1, v2, v3
+
+
diff --git a/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp b/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp
index 83494d995c5..5c2679e38c5 100644
--- a/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp
+++ b/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp
@@ -8,6 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/STLExtras.h"
@@ -74,17 +75,29 @@ class AMDGPUOperand : public MCParsedAsmOperand {
     Register
   } Kind;
 
+
   SMLoc StartLoc, EndLoc;
 
 public:
   AMDGPUOperand(enum KindTy K) : MCParsedAsmOperand(), Kind(K) {}
 
+  enum TokenTy {
+    Default,
+    E32,
+    E64
+  };
+
   struct TokOp {
     const char *Data;
     unsigned Length;
+    char *DataE32;
+    char *DataE64;
+    unsigned ExtendedLength;
+    enum TokenTy Type;
   };
 
   struct ImmOp {
+    bool IsFPImm;
     int64_t Val;
   };
 
@@ -99,8 +112,18 @@ public:
     RegOp Reg;
   };
 
+  ~AMDGPUOperand() {
+    if(isToken()) {
+      std::free(Tok.DataE32);
+      std::free(Tok.DataE64);
+    }
+  }
+
   void addImmOperands(MCInst &Inst, unsigned N) const {
-    Inst.addOperand(MCOperand::CreateImm(getImm()));
+    if (Imm.IsFPImm)
+      Inst.addOperand(MCOperand::CreateFPImm(getImm()));
+    else
+      Inst.addOperand(MCOperand::CreateImm(getImm()));
   }
 
   void addRegOperands(MCInst &Inst, unsigned N) const {
@@ -114,27 +137,67 @@ public:
       addImmOperands(Inst, N);
   }
 
+  void addRegAndInputMods(MCInst Inst, unsigned N) const {
+    //Inst.addOperand(MCOperand::CreateInputMod(getInputMod()));
+    addRegOperands(Inst, N);
   }
 
 
   StringRef getToken() const {
-    return StringRef(Tok.Data, Tok.Length);
+    switch (Tok.Type) {
+      case E32: return StringRef(Tok.DataE32, Tok.ExtendedLength);
+      case E64: return StringRef(Tok.DataE64, Tok.ExtendedLength);
+      default:  return StringRef(Tok.Data, Tok.Length);
+
+   }
+  }
+
+  bool defaultTokenHasSuffix() const {
+    StringRef Token(Tok.Data, Tok.Length);
+
+    return Token.endswith("_e32") || Token.endswith("_e64");
   }
 
   bool isToken() const override {
     return Kind == Token;
   }
 
-  bool isImm() const override {
-    return Kind == Immediate;
+  enum TokenTy getTokenType() {
+    assert(isToken());
+    return Tok.Type;
   }
 
-  bool is64BitInlineImm() const {
-    return isImm() && Imm.Val <= -1 && Imm.Val >= -16;
+  void setTokenType(enum TokenTy Type) {
+    assert(isToken());
+    Tok.Type = Type;
+    switch(Type) {
+      default: break;
+      case E32:
+        if (!Tok.DataE32) {
+          std::string E32 = std::string(Tok.Data, Tok.Length) + "_e32";
+          Tok.DataE32 = (char*)std::malloc(Tok.ExtendedLength);
+          std::memcpy(Tok.DataE32, E32.data(), Tok.ExtendedLength);
+        }
+        break;
+      case E64:
+        if (!Tok.DataE64) {
+          std::string E64 = std::string(Tok.Data, Tok.Length) + "_e64";
+          Tok.DataE64 = (char*)std::malloc(Tok.ExtendedLength);
+          std::memcpy(Tok.DataE64, E64.data(), Tok.ExtendedLength);
+        }
+        break;
+    }
   }
 
-  bool isImm32Bit() const {
-    return isImm() && isUInt<32>(Imm.Val);
+  bool isImm() const override {
+    return Kind == Immediate;
+  }
+
+  bool isInlineImm() const {
+    float F = APInt(32, Imm.Val).bitsToFloat();
+    return isImm() && ((Imm.Val <= 64 && Imm.Val >= -16) ||
+           (F == 0.0 || F == 0.5 || F == -0.5 || F == 1.0 || F == -1.0 ||
+           F == 2.0 || F == -2.0 || F == 4.0 || F == -4.0));
   }
 
   int64_t getImm() const {
@@ -158,14 +221,30 @@ public:
   }
 
   bool isSSrc32() const {
-    return isImm32Bit() || (isReg() && isRegClass(AMDGPU::SReg_32RegClassID));
+    return isImm() || (isReg() && isRegClass(AMDGPU::SReg_32RegClassID));
   }
 
   bool isSSrc64() const {
-    return isImm32Bit() || is64BitInlineImm() ||
+    return isImm() || isInlineImm() ||
            (isReg() && isRegClass(AMDGPU::SReg_64RegClassID));
   }
 
+  bool isVCSrc32() const {
+    return isInlineImm() || (isReg() && isRegClass(AMDGPU::VS_32RegClassID));
+  }
+
+  bool isVCSrc64() const {
+    return isInlineImm() || (isReg() && isRegClass(AMDGPU::VS_64RegClassID));
+  }
+
+  bool isVSrc32() const {
+    return isImm() || (isReg() && isRegClass(AMDGPU::VS_32RegClassID));
+  }
+
+  bool isVSrc64() const {
+    return isImm() || (isReg() && isRegClass(AMDGPU::VS_64RegClassID));
+  }
+
   bool isMem() const override {
     return false;
   }
@@ -180,18 +259,26 @@ public:
 
   void print(raw_ostream &OS) const override { }
 
-  static std::unique_ptr<AMDGPUOperand> CreateImm(int64_t Val, SMLoc Loc) {
+  static std::unique_ptr<AMDGPUOperand> CreateImm(int64_t Val, SMLoc Loc,
+                                                  bool IsFPImm = false) {
     auto Op = llvm::make_unique<AMDGPUOperand>(Immediate);
     Op->Imm.Val = Val;
+    Op->Imm.IsFPImm = IsFPImm;
     Op->StartLoc = Loc;
     Op->EndLoc = Loc;
     return Op;
   }
 
-  static std::unique_ptr<AMDGPUOperand> CreateToken(StringRef Str, SMLoc Loc) {
+  static std::unique_ptr<AMDGPUOperand> CreateToken(StringRef Str, SMLoc Loc,
+                                           bool HasExplicitEncodingSize = true) {
     auto Res = llvm::make_unique<AMDGPUOperand>(Token);
     Res->Tok.Data = Str.data();
     Res->Tok.Length = Str.size();
+    Res->Tok.Type = Default;
+    // Size of the Token plus 4 bytes for _e32 or _e64.
+    Res->Tok.ExtendedLength = Str.size() + 4;
+    Res->Tok.DataE32 = nullptr;
+    Res->Tok.DataE64 = nullptr;
     Res->StartLoc = Loc;
     Res->EndLoc = Loc;
     return Res;
@@ -337,10 +424,44 @@ bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
       Out.EmitInstruction(Inst, STI);
       return false;
     case Match_MissingFeature:
-      return Error(IDLoc, "instruction use requires an option to be enabled");
-    case Match_MnemonicFail:
+
+      // Fall-through
+      // FIXME better error message for Match_MissingFeature
+    case Match_MnemonicFail: {
+        // For VOP1, VOP2, and VOC instructions, the mnemonic defined in
+        // TableGen has either an _e32 or _e64 suffix, but the assembler
+        // should also accept mnemonics without a suffix.
+        AMDGPUOperand &Mnemonic =  ((AMDGPUOperand&)*Operands[0]);
+        if (!Mnemonic.defaultTokenHasSuffix() &&
+            Mnemonic.getTokenType() == AMDGPUOperand::Default) {
+          Mnemonic.setTokenType(AMDGPUOperand::E32);
+          if (!MatchAndEmitInstruction(IDLoc, Opcode, Operands, Out,
+                                       ErrorInfo, MatchingInlineAsm))
+            return false;
+          return true;
+        }
         return Error(IDLoc, "unrecognized instruction mnemonic");
+    }
     case Match_InvalidOperand: {
+      AMDGPUOperand &Mnemonic =  ((AMDGPUOperand&)*Operands[0]);
+      if (!Mnemonic.defaultTokenHasSuffix() &&
+          Mnemonic.getTokenType() != AMDGPUOperand::E64) {
+
+        // We have failed to match this instruction so try with an _e32 or
+        // _e64 suffix.
+        if (Mnemonic.getTokenType() == AMDGPUOperand::Default)
+          Mnemonic.setTokenType(AMDGPUOperand::E32);
+        else {
+          assert(Mnemonic.getTokenType() == AMDGPUOperand::E32);
+          Mnemonic.setTokenType(AMDGPUOperand::E64);
+        }
+        // We failed to match this as a 32-bit instruction, try it as a
+        // 64-bit instruction.
+        if (!MatchAndEmitInstruction(IDLoc, Opcode, Operands, Out,
+                                     ErrorInfo, MatchingInlineAsm))
+          return false;
+      }
+
       SMLoc ErrorLoc = IDLoc;
       if (ErrorInfo != ~0ULL) {
         if (ErrorInfo >= Operands.size())
@@ -371,15 +492,45 @@ AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail)
     return ResTy;
 
+  bool Negate = false;
+  if (getLexer().getKind()== AsmToken::Minus) {
+    Parser.Lex();
+    Negate = true;
+  }
+
   switch(getLexer().getKind()) {
     case AsmToken::Integer: {
       SMLoc S = Parser.getTok().getLoc();
       int64_t IntVal;
       if (getParser().parseAbsoluteExpression(IntVal))
         return MatchOperand_ParseFail;
+      APInt IntVal32(32, IntVal);
+      if (IntVal32.getSExtValue() != IntVal) {
+        Error(S, "invalid immediate: only 32-bit values are legal");
+        return MatchOperand_ParseFail;
+      }
+
+      IntVal = IntVal32.getSExtValue();
+      if (Negate)
+        IntVal *= -1;
       Operands.push_back(AMDGPUOperand::CreateImm(IntVal, S));
       return MatchOperand_Success;
     }
+    case AsmToken::Real: {
+      // FIXME: We should emit an error if a double precisions floating-point
+      // value is used.  I'm not sure the best way to detect this.
+      SMLoc S = Parser.getTok().getLoc();
+      int64_t IntVal;
+      if (getParser().parseAbsoluteExpression(IntVal))
+        return MatchOperand_ParseFail;
+
+      APFloat F((float)APInt(64, IntVal).bitsToDouble());
+      if (Negate)
+        F.changeSign();
+      Operands.push_back(
+          AMDGPUOperand::CreateImm(F.bitcastToAPInt().getZExtValue(), S));
+      return MatchOperand_Success;
+    }
     case AsmToken::Identifier: {
       SMLoc S, E;
       unsigned RegNo;
diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td
index 2f61cc18d7e..efce29784d6 100644
--- a/lib/Target/R600/SIInstrFormats.td
+++ b/lib/Target/R600/SIInstrFormats.td
@@ -136,6 +136,8 @@ class VOP3Common <dag outs, dag ins, string asm, list<dag> pattern> :
   let VOP3 = 1;
   let VALU = 1;
 
+  let isCodeGenOnly = 0;
+
   int Size = 8;
 }
 
@@ -575,7 +577,9 @@ class VOP1 <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> :
     VOP1e<op>;
 
 class VOP2 <bits<6> op, dag outs, dag ins, string asm, list<dag> pattern> :
-    VOP2Common <outs, ins, asm, pattern>, VOP2e<op>;
+    VOP2Common <outs, ins, asm, pattern>, VOP2e<op> {
+  let isCodeGenOnly = 0;
+}
 
 class VOP3b <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> :
     VOP3Common <outs, ins, asm, pattern>, VOP3be<op>;
diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
index bb83cf02e4d..0976c72429f 100644
--- a/lib/Target/R600/SIInstrInfo.td
+++ b/lib/Target/R600/SIInstrInfo.td
@@ -836,6 +836,7 @@ class VOP3_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
   VOP <opName>,
   SIMCInstr<opName#"_e64", SISubtarget.NONE> {
   let isPseudo = 1;
+  let isCodeGenOnly = 1;
 }
 
 class VOP3_Real_si <bits<9> op, dag outs, dag ins, string asm, string opName> :
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
index 8029686cffb..5c0738d3bf8 100644
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -35,8 +35,8 @@ def isSICI : Predicate<
 def isCI : Predicate<"Subtarget.getGeneration() "
                       ">= AMDGPUSubtarget::SEA_ISLANDS">;
 def isVI : Predicate <
-  "Subtarget.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS"
->;
+  "Subtarget.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS">,
+  AssemblerPredicate<"FeatureVolcanicIslands">;
 
 def HasFlatAddressSpace : Predicate<"Subtarget.hasFlatAddressSpace()">;
 
@@ -1401,11 +1401,18 @@ defm V_MUL_F32 : VOP2Inst <vop2<0x8, 0x5>, "v_mul_f32",
 defm V_MUL_I32_I24 : VOP2Inst <vop2<0x9, 0x6>, "v_mul_i32_i24",
   VOP_I32_I32_I32, AMDGPUmul_i24
 >;
-//defm V_MUL_HI_I32_I24 : VOP2_32 <0x0000000a, "v_mul_hi_i32_i24", []>;
+
+defm V_MUL_HI_I32_I24 : VOP2Inst <vop2<0xa,0x7>, "v_mul_hi_i32_i24",
+  VOP_I32_I32_I32
+>;
+
 defm V_MUL_U32_U24 : VOP2Inst <vop2<0xb, 0x8>, "v_mul_u32_u24",
   VOP_I32_I32_I32, AMDGPUmul_u24
 >;
-//defm V_MUL_HI_U32_U24 : VOP2_32 <0x0000000c, "v_mul_hi_u32_u24", []>;
+
+defm V_MUL_HI_U32_U24 : VOP2Inst <vop2<0xc,0x9>, "v_mul_hi_u32_u24",
+ VOP_I32_I32_I32
+>;
 
 defm V_MIN_F32 : VOP2Inst <vop2<0xf, 0xa>, "v_min_f32", VOP_F32_F32_F32,
   fminnum>;
@@ -1543,14 +1550,24 @@ defm V_LDEXP_F32 : VOP2Inst <vop2<0x2b>, "v_ldexp_f32",
   VOP_F32_F32_I32, AMDGPUldexp
 >;
 
-////def V_CVT_PKACCUM_U8_F32 : VOP2_U8 <0x0000002c, "v_cvt_pkaccum_u8_f32", []>;
-////def V_CVT_PKNORM_I16_F32 : VOP2_I16 <0x0000002d, "v_cvt_pknorm_i16_f32", []>;
-////def V_CVT_PKNORM_U16_F32 : VOP2_U16 <0x0000002e, "v_cvt_pknorm_u16_f32", []>;
+defm V_CVT_PKACCUM_U8_F32 : VOP2Inst <vop2<0x2c>, "v_cvt_pkaccum_u8_f32",
+  VOP_I32_F32_F32
+>;
+defm V_CVT_PKNORM_I16_F32 : VOP2Inst <vop2<0x2d>, "v_cvt_pknorm_i16_f32",
+  VOP_I32_F32_F32
+>;
+defm V_CVT_PKNORM_U16_F32 : VOP2Inst <vop2<0x2e>, "v_cvt_pknorm_u16_f32",
+  VOP_I32_F32_F32
+>;
 defm V_CVT_PKRTZ_F16_F32 : VOP2Inst <vop2<0x2f>, "v_cvt_pkrtz_f16_f32",
  VOP_I32_F32_F32, int_SI_packf16
 >;
-////def V_CVT_PK_U16_U32 : VOP2_U16 <0x00000030, "v_cvt_pk_u16_u32", []>;
-////def V_CVT_PK_I16_I32 : VOP2_I16 <0x00000031, "v_cvt_pk_i16_i32", []>;
+defm V_CVT_PK_U16_U32 : VOP2Inst <vop2<0x30>, "v_cvt_pk_u16_u32",
+  VOP_I32_F32_F32
+>;
+defm V_CVT_PK_I16_I32 : VOP2Inst <vop2<0x31>, "v_cvt_pk_i16_i32",
+  VOP_I32_F32_F32
+>;
 
 } // End let SubtargetPredicate = SICI
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td
index d4111c24ae0..d849e0844fc 100644
--- a/lib/Target/R600/SIRegisterInfo.td
+++ b/lib/Target/R600/SIRegisterInfo.td
@@ -245,11 +245,13 @@ def VS_64 : RegisterClass<"AMDGPU", [i64, f64], 64, (add VReg_64, SReg_64)>;
 def VSrc_32 : RegisterOperand<VS_32> {
   let OperandNamespace = "AMDGPU";
   let OperandType = "OPERAND_REG_IMM32";
+  let ParserMatchClass = RegImmMatcher<"VSrc32">;
 }
 
 def VSrc_64 : RegisterOperand<VS_64> {
   let OperandNamespace = "AMDGPU";
   let OperandType = "OPERAND_REG_IMM32";
+  let ParserMatchClass = RegImmMatcher<"VSrc64">;
 }
 
 //===----------------------------------------------------------------------===//
@@ -259,11 +261,13 @@ def VSrc_64 : RegisterOperand<VS_64> {
 def VCSrc_32 : RegisterOperand<VS_32> {
   let OperandNamespace = "AMDGPU";
   let OperandType = "OPERAND_REG_INLINE_C";
+  let ParserMatchClass = RegImmMatcher<"VCSrc32">;
 }
 
 def VCSrc_64 : RegisterOperand<VS_64> {
   let OperandNamespace = "AMDGPU";
   let OperandType = "OPERAND_REG_INLINE_C";
+  let ParserMatchClass = RegImmMatcher<"VCSrc64">;
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/test/MC/R600/vop2-err.s b/test/MC/R600/vop2-err.s
new file mode 100644
index 00000000000..1019a0b9b79
--- /dev/null
+++ b/test/MC/R600/vop2-err.s
@@ -0,0 +1,35 @@
+// RUN: not llvm-mc -arch=r600 -mcpu=SI %s 2>&1 | FileCheck %s
+
+//===----------------------------------------------------------------------===//
+// Generic checks
+//===----------------------------------------------------------------------===//
+
+v_mul_i32_i24 v1, v2, 100
+// CHECK: error: invalid operand for instruction
+// CHECK: error: invalid operand for instruction
+
+//===----------------------------------------------------------------------===//
+// _e32 checks
+//===----------------------------------------------------------------------===//
+
+// Immediate src1
+v_mul_i32_i24_e32 v1, v2, 100
+// CHECK: error: invalid operand for instruction
+
+// sgpr src1
+v_mul_i32_i24_e32 v1, v2, s3
+// CHECK: error: invalid operand for instruction
+
+//===----------------------------------------------------------------------===//
+// _e64 checks
+//===----------------------------------------------------------------------===//
+
+// Immediate src0
+v_mul_i32_i24_e64 v1, 100, v3
+// CHECK: error: invalid operand for instruction
+
+// Immediate src1
+v_mul_i32_i24_e64 v1, v2, 100
+// CHECK: error: invalid operand for instruction
+
+// TODO: Constant bus restrictions
diff --git a/test/MC/R600/vop2.s b/test/MC/R600/vop2.s
new file mode 100644
index 00000000000..48666a6b58c
--- /dev/null
+++ b/test/MC/R600/vop2.s
@@ -0,0 +1,241 @@
+// RUN: llvm-mc -arch=r600 -mcpu=SI  -show-encoding %s | FileCheck %s
+
+//===----------------------------------------------------------------------===//
+// Generic Checks for floating-point instructions (These have modifiers).
+//===----------------------------------------------------------------------===//
+
+// TODO: 64-bit encoding of instructions with modifiers
+
+// _e32 suffix
+// CHECK: v_add_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x06]
+v_add_f32_e32 v1, v2, v3
+
+// src0 inline immediate
+// CHECK: v_add_f32_e32 v1, 1.0, v3 ; encoding: [0xf2,0x06,0x02,0x06]
+v_add_f32 v1, 1.0, v3
+
+// src0 negative inline immediate
+// CHECK: v_add_f32_e32 v1, -1.0, v3 ; encoding: [0xf3,0x06,0x02,0x06]
+v_add_f32 v1, -1.0, v3
+
+// src0 literal
+// CHECK: v_add_f32_e32 v1, 0x42c80000, v3 ; encoding: [0xff,0x06,0x02,0x06,0x00,0x00,0xc8,0x42]
+v_add_f32 v1, 100.0, v3
+
+// src1 negative literal
+// CHECK: v_add_f32_e32 v1, 0xc2c80000, v3 ; encoding: [0xff,0x06,0x02,0x06,0x00,0x00,0xc8,0xc2]
+v_add_f32 v1, -100.0, v3
+
+//===----------------------------------------------------------------------===//
+// Generic Checks for integer instructions (These don't have modifiers).
+//===----------------------------------------------------------------------===//
+
+// _e32 suffix
+// CHECK: v_mul_i32_i24_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x12] 
+v_mul_i32_i24_e32 v1, v2, v3
+
+// _e64 suffix
+// CHECK: v_mul_i32_i24_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x12,0xd2,0x02,0x07,0x02,0x00]
+v_mul_i32_i24_e64 v1, v2, v3
+
+// src0 inline
+// CHECK: v_mul_i32_i24_e32 v1, 3, v3 ; encoding: [0x83,0x06,0x02,0x12]
+v_mul_i32_i24 v1, 3, v3
+
+// src0 negative inline
+// CHECK: v_mul_i32_i24_e32 v1, -3, v3 ; encoding: [0xc3,0x06,0x02,0x12]
+v_mul_i32_i24 v1, -3, v3
+
+// src1 inline
+// CHECK: v_mul_i32_i24_e64 v1, v2, 3 ; encoding: [0x01,0x00,0x12,0xd2,0x02,0x07,0x01,0x00]
+v_mul_i32_i24 v1, v2, 3
+
+// src1 negative inline
+// CHECK: v_mul_i32_i24_e64 v1, v2, -3 ; encoding: [0x01,0x00,0x12,0xd2,0x02,0x87,0x01,0x00]
+v_mul_i32_i24 v1, v2, -3
+
+// src0 literal
+// CHECK: v_mul_i32_i24_e32 v1, 0x64, v3 ; encoding: [0xff,0x06,0x02,0x12,0x64,0x00,0x00,0x00]
+v_mul_i32_i24 v1, 100, v3
+
+// src1 negative literal
+// CHECK: v_mul_i32_i24_e32 v1, 0xffffff9c, v3 ; encoding: [0xff,0x06,0x02,0x12,0x9c,0xff,0xff,0xff]
+v_mul_i32_i24 v1, -100, v3
+
+//===----------------------------------------------------------------------===//
+// Checks for legal operands
+//===----------------------------------------------------------------------===//
+
+// src0 sgpr
+// CHECK: v_mul_i32_i24_e32 v1, s2, v3 ; encoding: [0x02,0x06,0x02,0x12]
+v_mul_i32_i24 v1, s2, v3
+
+// src1 sgpr
+// CHECK: v_mul_i32_i24_e64 v1, v2, s3 ; encoding: [0x01,0x00,0x12,0xd2,0x02,0x07,0x00,0x00]
+v_mul_i32_i24 v1, v2, s3
+
+// src0, src1 same sgpr
+// CHECK: v_mul_i32_i24_e64 v1, s2, s2 ; encoding: [0x01,0x00,0x12,0xd2,0x02,0x04,0x00,0x00]
+v_mul_i32_i24 v1, s2, s2
+
+// src0 sgpr, src1 inline
+// CHECK: v_mul_i32_i24_e64 v1, s2, 3 ; encoding: [0x01,0x00,0x12,0xd2,0x02,0x06,0x01,0x00]
+v_mul_i32_i24 v1, s2, 3
+
+// src0 inline src1 sgpr
+// CHECK: v_mul_i32_i24_e64 v1, 3, s3 ; encoding: [0x01,0x00,0x12,0xd2,0x83,0x06,0x00,0x00]
+v_mul_i32_i24 v1, 3, s3
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+// CHECK: v_cndmask_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x00]
+v_cndmask_b32 v1, v2, v3
+
+// CHECK: v_readlane_b32 s1, v2, s3 ; encoding: [0x02,0x07,0x02,0x02]
+v_readlane_b32 s1, v2, s3
+
+// CHECK: v_writelane_b32 v1, s2, s3 ; encoding: [0x02,0x06,0x02,0x04]
+v_writelane_b32 v1, s2, s3
+
+// CHECK: v_add_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x06]
+v_add_f32 v1, v2, v3
+
+// CHECK: v_sub_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x08]
+v_sub_f32 v1, v2, v3
+
+// CHECK: v_subrev_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x0a]
+v_subrev_f32 v1, v2, v3
+
+// CHECK: v_mac_legacy_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x0c]
+v_mac_legacy_f32 v1, v2, v3
+
+// CHECK: v_mul_legacy_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x0e]
+v_mul_legacy_f32_e32 v1, v2, v3
+
+// CHECK: v_mul_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x10]
+v_mul_f32 v1, v2, v3
+
+// CHECK: v_mul_i32_i24_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x12]
+v_mul_i32_i24 v1, v2, v3
+
+// CHECK: v_mul_hi_i32_i24_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x14]
+v_mul_hi_i32_i24 v1, v2, v3
+
+// CHECK: v_mul_u32_u24_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x16]
+v_mul_u32_u24 v1, v2, v3
+
+// CHECK: v_mul_hi_u32_u24_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x18]
+v_mul_hi_u32_u24 v1, v2, v3
+
+// CHECK: v_min_legacy_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x1a]
+v_min_legacy_f32_e32 v1, v2, v3
+
+// CHECK: v_max_legacy_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x1c]
+v_max_legacy_f32 v1, v2, v3
+
+// CHECK: v_min_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x1e]
+v_min_f32_e32 v1, v2, v3
+
+// CHECK: v_max_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x20]
+v_max_f32 v1, v2 v3
+
+// CHECK: v_min_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x22]
+v_min_i32 v1, v2, v3
+
+// CHECK: v_max_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x24]
+v_max_i32 v1, v2, v3
+
+// CHECK: v_min_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x26]
+v_min_u32 v1, v2, v3
+
+// CHECK: v_max_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x28]
+v_max_u32 v1, v2, v3
+
+// CHECK: v_lshr_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x2a]
+v_lshr_b32 v1, v2, v3
+
+// CHECK: v_lshrrev_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x2c]
+v_lshrrev_b32 v1, v2, v3
+
+// CHECK: v_ashr_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x2e]
+v_ashr_i32 v1, v2, v3
+
+// CHECK: v_ashrrev_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x30]
+v_ashrrev_i32 v1, v2, v3
+
+// CHECK: v_lshl_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x32]
+v_lshl_b32_e32 v1, v2, v3
+
+// CHECK: v_lshlrev_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x34]
+v_lshlrev_b32 v1, v2, v3
+
+// CHECK: v_and_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x36]
+v_and_b32 v1, v2, v3
+
+// CHECK: v_or_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x38]
+v_or_b32 v1, v2, v3
+
+// CHECK: v_xor_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x3a]
+v_xor_b32 v1, v2, v3
+
+// CHECK: v_bfm_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x3c]
+v_bfm_b32 v1, v2, v3
+
+// CHECK: v_mac_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x3e]
+v_mac_f32 v1, v2, v3
+
+// CHECK: v_madmk_f32_e32 v1, v2, v3, 0x42800000 ; encoding: [0x02,0x07,0x02,0x40,0x00,0x00,0x80,0x42]
+v_madmk_f32 v1, v2, v3, 64.0
+
+// CHECK: v_madak_f32_e32 v1, v2, v3, 0x42800000 ; encoding: [0x02,0x07,0x02,0x42,0x00,0x00,0x80,0x42]
+v_madak_f32 v1, v2, v3, 64.0
+
+// CHECK: v_bcnt_u32_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x44]
+v_bcnt_u32_b32 v1, v2, v3
+
+// CHECK: v_mbcnt_lo_u32_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x46]
+v_mbcnt_lo_u32_b32 v1, v2, v3
+
+// CHECK: v_mbcnt_hi_u32_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x48]
+v_mbcnt_hi_u32_b32_e32 v1, v2, v3
+
+// CHECK: v_add_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4a]
+v_add_i32 v1, v2, v3
+
+// CHECK: v_sub_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4c]
+v_sub_i32_e32 v1, v2, v3
+
+// CHECK: v_subrev_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4e]
+v_subrev_i32 v1, v2, v3
+
+// CHECK : v_addc_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x50]
+v_addc_u32 v1, v2, v3
+
+// CHECK: v_subb_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x52]
+v_subb_u32 v1, v2, v3
+
+// CHECK: v_subbrev_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x54]
+v_subbrev_u32 v1, v2, v3
+
+// CHECK: v_ldexp_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x56]
+v_ldexp_f32 v1, v2, v3
+
+// CHECK: v_cvt_pkaccum_u8_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x58]
+v_cvt_pkaccum_u8_f32 v1, v2, v3
+
+// CHECK: v_cvt_pknorm_i16_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x5a]
+v_cvt_pknorm_i16_f32 v1, v2, v3
+
+// CHECK: v_cvt_pknorm_u16_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x5c]
+v_cvt_pknorm_u16_f32 v1, v2, v3
+
+// CHECK: v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x5e]
+v_cvt_pkrtz_f16_f32 v1, v2, v3
+
+// CHECK: v_cvt_pk_u16_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x60]
+v_cvt_pk_u16_u32_e32 v1, v2, v3
+
+// CHECK: v_cvt_pk_i16_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x62]
+v_cvt_pk_i16_i32 v1, v2, v3
author	Tom Stellard <thomas.stellard@amd.com>	2014-12-18 21:39:30 -0500
committer	Tom Stellard <thomas.stellard@amd.com>	2014-12-30 11:14:15 -0500
commit	aed643bbba2fd8d76dd78474e1bf067a7a00f053 (patch)
tree	117b31cfc226d1539bb69681c8321ba2c6b1ad60
parent	3105462cb9dd309af2b3119795ab79dff7cebafb (diff)