1 files changed, 109 insertions, 69 deletions
diff --git a/open-vm-tools/lib/include/vm_basic_asm_x86_64.h b/open-vm-tools/lib/include/vm_basic_asm_x86_64.h
index 4cd6aebe..ecb4189f 100644
--- a/open-vm-tools/lib/include/vm_basic_asm_x86_64.h
+++ b/open-vm-tools/lib/include/vm_basic_asm_x86_64.h
@@ -1,5 +1,5 @@
 /*********************************************************
- * Copyright (C) 1998-2004 VMware, Inc. All rights reserved.
+ * Copyright (C) 1998-2015 VMware, Inc. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU Lesser General Public License as published
@@ -53,14 +53,14 @@
 #error "This file is x86-64 only!"
 #endif
 
-#ifdef _MSC_VER
+#if defined(_MSC_VER) && !defined(BORA_NO_WIN32_INTRINS)
 
 #ifdef __cplusplus
 extern "C" {
 #endif
-uint64 _umul128(uint64 multiplier, uint64 multiplicand, 
+uint64 _umul128(uint64 multiplier, uint64 multiplicand,
                 uint64 *highProduct);
-int64 _mul128(int64 multiplier, int64 multiplicand, 
+int64 _mul128(int64 multiplier, int64 multiplicand,
               int64 *highProduct);
 uint64 __shiftright128(uint64 lowPart, uint64 highPart, uint8 shift);
 #ifdef __cplusplus
@@ -323,55 +323,81 @@ xtest(void)
  *
  * Mul64x6464 --
  *
- *    Unsigned integer by fixed point multiplication:
- *       result = multiplicand * multiplier >> shift
+ *    Unsigned integer by fixed point multiplication, with rounding:
+ *       result = floor(multiplicand * multiplier * 2**(-shift) + 0.5)
  * 
  *       Unsigned 64-bit integer multiplicand.
  *       Unsigned 64-bit fixed point multiplier, represented as
- *         multiplier >> shift, where shift < 64.
- *       Unsigned 64-bit integer product.
- *
- * Implementation:
- *    Multiply 64x64 bits to yield a full 128-bit product.
- *    Shift result in RDX:RAX right by "shift".
- *    Return the low-order 64 bits of the above.
+ *         (multiplier, shift), where shift < 64.
  *
  * Result:
- *    Product
+ *       Unsigned 64-bit integer product.
  *
  *-----------------------------------------------------------------------------
  */
 
-#if defined(__GNUC__)
+#if defined(__GNUC__) && !defined(MUL64_NO_ASM)
 
 static INLINE uint64
 Mul64x6464(uint64 multiplicand,
            uint64 multiplier,
            uint32 shift)
 {
+   /*
+    * Implementation:
+    *    Multiply 64x64 bits to yield a full 128-bit product.
+    *    Clear the carry bit (needed for the shift == 0 case).
+    *    Shift result in RDX:RAX right by "shift".
+    *    Add the carry bit.  (If shift > 0, this is the highest order bit
+    *      that was discarded by the shift; else it is 0.)
+    *    Return the low-order 64 bits of the above.
+    *
+    */
    uint64 result, dummy;
 
-   __asm__("mulq    %3      \n\t"
+   __asm__("mulq    %3           \n\t"
+           "clc                  \n\t"
            "shrdq   %b4, %1, %0  \n\t"
+           "adc     $0, %0       \n\t"
            : "=a" (result),
              "=d" (dummy)
            : "0"  (multiplier),
              "rm" (multiplicand),
-         "c"  (shift)
+             "c"  (shift)
            : "cc");
    return result;
 }
 
-#elif defined(_MSC_VER)
+#elif defined(_MSC_VER) && !defined(MUL64_NO_ASM)
 
 static INLINE uint64
-Mul64x6464(uint64 multiplicand, uint64 multiplier, uint32 shift)
+Mul64x6464(uint64 multiplicand,
+           uint64 multiplier,
+           uint32 shift)
 {
+   /*
+    * Unfortunately, MSVC intrinsics don't give us access to the carry
+    * flag after a 128-bit shift, so the implementation is more
+    * awkward:
+    *    Multiply 64x64 bits to yield a full 128-bit product.
+    *    Shift result right by "shift".
+    *    If shift != 0, extract and add in highest order bit that was
+    *      discarded by the shift.
+    *    Return the low-order 64 bits of the above.
+    */
    uint64 tmplo, tmphi;
    tmplo = _umul128(multiplicand, multiplier, &tmphi);
-   return __shiftright128(tmplo, tmphi, (uint8) shift);
+   if (shift == 0) {
+      return tmplo;
+   } else {
+      return __shiftright128(tmplo, tmphi, (uint8) shift) +
+         ((tmplo >> (shift - 1)) & 1);
+   }
 }
 
+#else
+#define MUL64_NO_ASM 1
+#include "mul64.h"
 #endif
 
 /*
@@ -379,80 +405,100 @@ Mul64x6464(uint64 multiplicand, uint64 multiplier, uint32 shift)
  *
  * Muls64x64s64 --
  *
- *    Signed integer by fixed point multiplication:
- *       result = multiplicand * multiplier >> shift
+ *    Signed integer by fixed point multiplication, with rounding:
+ *       result = floor(multiplicand * multiplier * 2**(-shift) + 0.5)
  * 
  *       Signed 64-bit integer multiplicand.
  *       Unsigned 64-bit fixed point multiplier, represented as
- *         multiplier >> shift, where shift < 64.
- *       Signed 64-bit integer product.
- *
- * Implementation:
- *    Multiply 64x64 bits to yield a full 128-bit product.
- *    Shift result in RDX:RAX right by "shift".
- *    Return the low-order 64 bits of the above.
- *
- *    Note: using an unsigned shift instruction is correct because
- *    shift < 64 and we return only the low 64 bits of the shifted
- *    result.
+ *         (multiplier, shift), where shift < 64.
  *
  * Result:
- *    Product
+ *       Signed 64-bit integer product.
  *
  *-----------------------------------------------------------------------------
  */
 
-#if defined(__GNUC__)
+#if defined(__GNUC__) && !defined(MUL64_NO_ASM)
 
 static inline int64
-Muls64x64s64(int64 multiplicand, int64 multiplier, uint32 shift)
+Muls64x64s64(int64 multiplicand,
+             int64 multiplier,
+             uint32 shift)
 {
    int64 result, dummy;
 
-   __asm__("imulq   %3      \n\t"
-       "shrdq   %b4, %1, %0  \n\t"
-       : "=a" (result),
-         "=d" (dummy)
-       : "0"  (multiplier),
-         "rm" (multiplicand),
-         "c"  (shift)
-       : "cc");
+   /* Implementation:
+    *    Multiply 64x64 bits to yield a full 128-bit product.
+    *    Clear the carry bit (needed for the shift == 0 case).
+    *    Shift result in RDX:RAX right by "shift".
+    *    Add the carry bit.  (If shift > 0, this is the highest order bit
+    *      that was discarded by the shift; else it is 0.)
+    *    Return the low-order 64 bits of the above.
+    *
+    *    Note: using the unsigned shrd instruction is correct because
+    *    shift < 64 and we return only the low 64 bits of the shifted
+    *    result.
+    */
+   __asm__("imulq   %3           \n\t"
+           "clc                  \n\t"
+           "shrdq   %b4, %1, %0  \n\t"
+           "adc     $0, %0       \n\t"
+           : "=a" (result),
+             "=d" (dummy)
+           : "0"  (multiplier),
+             "rm" (multiplicand),
+             "c"  (shift)
+           : "cc");
    return result;
 }
 
-#elif defined(_MSC_VER)
+#elif defined(_MSC_VER) && !defined(MUL64_NO_ASM)
 
 static INLINE int64
-Muls64x64s64(int64 multiplicand, int64 multiplier, uint32 shift)
+Muls64x64s64(int64 multiplicand,
+             int64 multiplier,
+             uint32 shift)
 {
+   /*
+    * Unfortunately, MSVC intrinsics don't give us access to the carry
+    * flag after a 128-bit shift, so the implementation is more
+    * awkward:
+    *    Multiply 64x64 bits to yield a full 128-bit product.
+    *    Shift result right by "shift".
+    *    If shift != 0, extract and add in highest order bit that was
+    *      discarded by the shift.
+    *    Return the low-order 64 bits of the above.
+    *
+    * Note: using an unsigned shift is correct because shift < 64 and
+    * we return only the low 64 bits of the shifted result.
+    */
    int64 tmplo, tmphi;
-
    tmplo = _mul128(multiplicand, multiplier, &tmphi);
-   return __shiftright128(tmplo, tmphi, (uint8) shift);
+   if (shift == 0) {
+      return tmplo;
+   } else {
+      return __shiftright128(tmplo, tmphi, (uint8) shift) +
+         ((tmplo >> (shift - 1)) & 1);
+   }
 }
 
 #endif
 
+#ifndef MUL64_NO_ASM
 /*
  *-----------------------------------------------------------------------------
  *
  * Mul64x3264 --
  *
- *    Unsigned integer by fixed point multiplication:
- *       result = multiplicand * multiplier >> shift
+ *    Unsigned integer by fixed point multiplication, with rounding:
+ *       result = floor(multiplicand * multiplier * 2**(-shift) + 0.5)
  * 
  *       Unsigned 64-bit integer multiplicand.
  *       Unsigned 32-bit fixed point multiplier, represented as
- *         multiplier >> shift, where shift < 64.
- *       Unsigned 64-bit integer product.
- *
- * Implementation:
- *    Multiply 64x64 bits to yield a full 128-bit product.
- *    Shift result in RDX:RAX right by "shift".
- *    Return the low-order 64 bits of the above.
+ *         (multiplier, shift), where shift < 64.
  *
  * Result:
- *    Return the low-order 64 bits of ((multiplicand * multiplier) >> shift)
+ *       Unsigned 64-bit integer product.
  *
  *-----------------------------------------------------------------------------
  */
@@ -468,21 +514,15 @@ Mul64x3264(uint64 multiplicand, uint32 multiplier, uint32 shift)
  *
  * Muls64x32s64 --
  *
- *    Signed integer by fixed point multiplication:
- *       result = (multiplicand * multiplier) >> shift
+ *    Signed integer by fixed point multiplication, with rounding:
+ *       result = floor(multiplicand * multiplier * 2**(-shift) + 0.5)
  * 
  *       Signed 64-bit integer multiplicand.
  *       Unsigned 32-bit fixed point multiplier, represented as
- *         multiplier >> shift, where shift < 64.
- *       Signed 64-bit integer product.
- *
- * Implementation:
- *    Multiply 64x64 bits to yield a full 128-bit product.
- *    Shift result in RDX:RAX right by "shift".
- *    Return the low-order 64 bits of the above.
+ *         (multiplier, shift), where shift < 64.
  *
  * Result:
- *    Return the low-order 64 bits of ((multiplicand * multiplier) >> shift)
+ *       Signed 64-bit integer product.
  *
  *-----------------------------------------------------------------------------
  */
@@ -492,7 +532,7 @@ Muls64x32s64(int64 multiplicand, uint32 multiplier, uint32 shift)
 {
    return Muls64x64s64(multiplicand, multiplier, shift);
 }
-
+#endif
 
 #if defined(__GNUC__)