summaryrefslogtreecommitdiff
path: root/open-vm-tools/lib/include/vm_basic_asm_x86_64.h
diff options
context:
space:
mode:
Diffstat (limited to 'open-vm-tools/lib/include/vm_basic_asm_x86_64.h')
-rw-r--r--open-vm-tools/lib/include/vm_basic_asm_x86_64.h178
1 files changed, 109 insertions, 69 deletions
diff --git a/open-vm-tools/lib/include/vm_basic_asm_x86_64.h b/open-vm-tools/lib/include/vm_basic_asm_x86_64.h
index 4cd6aebe..ecb4189f 100644
--- a/open-vm-tools/lib/include/vm_basic_asm_x86_64.h
+++ b/open-vm-tools/lib/include/vm_basic_asm_x86_64.h
@@ -1,5 +1,5 @@
/*********************************************************
- * Copyright (C) 1998-2004 VMware, Inc. All rights reserved.
+ * Copyright (C) 1998-2015 VMware, Inc. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
@@ -53,14 +53,14 @@
#error "This file is x86-64 only!"
#endif
-#ifdef _MSC_VER
+#if defined(_MSC_VER) && !defined(BORA_NO_WIN32_INTRINS)
#ifdef __cplusplus
extern "C" {
#endif
-uint64 _umul128(uint64 multiplier, uint64 multiplicand,
+uint64 _umul128(uint64 multiplier, uint64 multiplicand,
uint64 *highProduct);
-int64 _mul128(int64 multiplier, int64 multiplicand,
+int64 _mul128(int64 multiplier, int64 multiplicand,
int64 *highProduct);
uint64 __shiftright128(uint64 lowPart, uint64 highPart, uint8 shift);
#ifdef __cplusplus
@@ -323,55 +323,81 @@ xtest(void)
*
* Mul64x6464 --
*
- * Unsigned integer by fixed point multiplication:
- * result = multiplicand * multiplier >> shift
+ * Unsigned integer by fixed point multiplication, with rounding:
+ * result = floor(multiplicand * multiplier * 2**(-shift) + 0.5)
*
* Unsigned 64-bit integer multiplicand.
* Unsigned 64-bit fixed point multiplier, represented as
- * multiplier >> shift, where shift < 64.
- * Unsigned 64-bit integer product.
- *
- * Implementation:
- * Multiply 64x64 bits to yield a full 128-bit product.
- * Shift result in RDX:RAX right by "shift".
- * Return the low-order 64 bits of the above.
+ * (multiplier, shift), where shift < 64.
*
* Result:
- * Product
+ * Unsigned 64-bit integer product.
*
*-----------------------------------------------------------------------------
*/
-#if defined(__GNUC__)
+#if defined(__GNUC__) && !defined(MUL64_NO_ASM)
static INLINE uint64
Mul64x6464(uint64 multiplicand,
uint64 multiplier,
uint32 shift)
{
+ /*
+ * Implementation:
+ * Multiply 64x64 bits to yield a full 128-bit product.
+ * Clear the carry bit (needed for the shift == 0 case).
+ * Shift result in RDX:RAX right by "shift".
+ * Add the carry bit. (If shift > 0, this is the highest order bit
+ * that was discarded by the shift; else it is 0.)
+ * Return the low-order 64 bits of the above.
+ *
+ */
uint64 result, dummy;
- __asm__("mulq %3 \n\t"
+ __asm__("mulq %3 \n\t"
+ "clc \n\t"
"shrdq %b4, %1, %0 \n\t"
+ "adc $0, %0 \n\t"
: "=a" (result),
"=d" (dummy)
: "0" (multiplier),
"rm" (multiplicand),
- "c" (shift)
+ "c" (shift)
: "cc");
return result;
}
-#elif defined(_MSC_VER)
+#elif defined(_MSC_VER) && !defined(MUL64_NO_ASM)
static INLINE uint64
-Mul64x6464(uint64 multiplicand, uint64 multiplier, uint32 shift)
+Mul64x6464(uint64 multiplicand,
+ uint64 multiplier,
+ uint32 shift)
{
+ /*
+ * Unfortunately, MSVC intrinsics don't give us access to the carry
+ * flag after a 128-bit shift, so the implementation is more
+ * awkward:
+ * Multiply 64x64 bits to yield a full 128-bit product.
+ * Shift result right by "shift".
+ * If shift != 0, extract and add in highest order bit that was
+ * discarded by the shift.
+ * Return the low-order 64 bits of the above.
+ */
uint64 tmplo, tmphi;
tmplo = _umul128(multiplicand, multiplier, &tmphi);
- return __shiftright128(tmplo, tmphi, (uint8) shift);
+ if (shift == 0) {
+ return tmplo;
+ } else {
+ return __shiftright128(tmplo, tmphi, (uint8) shift) +
+ ((tmplo >> (shift - 1)) & 1);
+ }
}
+#else
+#define MUL64_NO_ASM 1
+#include "mul64.h"
#endif
/*
@@ -379,80 +405,100 @@ Mul64x6464(uint64 multiplicand, uint64 multiplier, uint32 shift)
*
* Muls64x64s64 --
*
- * Signed integer by fixed point multiplication:
- * result = multiplicand * multiplier >> shift
+ * Signed integer by fixed point multiplication, with rounding:
+ * result = floor(multiplicand * multiplier * 2**(-shift) + 0.5)
*
* Signed 64-bit integer multiplicand.
* Unsigned 64-bit fixed point multiplier, represented as
- * multiplier >> shift, where shift < 64.
- * Signed 64-bit integer product.
- *
- * Implementation:
- * Multiply 64x64 bits to yield a full 128-bit product.
- * Shift result in RDX:RAX right by "shift".
- * Return the low-order 64 bits of the above.
- *
- * Note: using an unsigned shift instruction is correct because
- * shift < 64 and we return only the low 64 bits of the shifted
- * result.
+ * (multiplier, shift), where shift < 64.
*
* Result:
- * Product
+ * Signed 64-bit integer product.
*
*-----------------------------------------------------------------------------
*/
-#if defined(__GNUC__)
+#if defined(__GNUC__) && !defined(MUL64_NO_ASM)
static inline int64
-Muls64x64s64(int64 multiplicand, int64 multiplier, uint32 shift)
+Muls64x64s64(int64 multiplicand,
+ int64 multiplier,
+ uint32 shift)
{
int64 result, dummy;
- __asm__("imulq %3 \n\t"
- "shrdq %b4, %1, %0 \n\t"
- : "=a" (result),
- "=d" (dummy)
- : "0" (multiplier),
- "rm" (multiplicand),
- "c" (shift)
- : "cc");
+ /* Implementation:
+ * Multiply 64x64 bits to yield a full 128-bit product.
+ * Clear the carry bit (needed for the shift == 0 case).
+ * Shift result in RDX:RAX right by "shift".
+ * Add the carry bit. (If shift > 0, this is the highest order bit
+ * that was discarded by the shift; else it is 0.)
+ * Return the low-order 64 bits of the above.
+ *
+ * Note: using the unsigned shrd instruction is correct because
+ * shift < 64 and we return only the low 64 bits of the shifted
+ * result.
+ */
+ __asm__("imulq %3 \n\t"
+ "clc \n\t"
+ "shrdq %b4, %1, %0 \n\t"
+ "adc $0, %0 \n\t"
+ : "=a" (result),
+ "=d" (dummy)
+ : "0" (multiplier),
+ "rm" (multiplicand),
+ "c" (shift)
+ : "cc");
return result;
}
-#elif defined(_MSC_VER)
+#elif defined(_MSC_VER) && !defined(MUL64_NO_ASM)
static INLINE int64
-Muls64x64s64(int64 multiplicand, int64 multiplier, uint32 shift)
+Muls64x64s64(int64 multiplicand,
+ int64 multiplier,
+ uint32 shift)
{
+ /*
+ * Unfortunately, MSVC intrinsics don't give us access to the carry
+ * flag after a 128-bit shift, so the implementation is more
+ * awkward:
+ * Multiply 64x64 bits to yield a full 128-bit product.
+ * Shift result right by "shift".
+ * If shift != 0, extract and add in highest order bit that was
+ * discarded by the shift.
+ * Return the low-order 64 bits of the above.
+ *
+ * Note: using an unsigned shift is correct because shift < 64 and
+ * we return only the low 64 bits of the shifted result.
+ */
int64 tmplo, tmphi;
-
tmplo = _mul128(multiplicand, multiplier, &tmphi);
- return __shiftright128(tmplo, tmphi, (uint8) shift);
+ if (shift == 0) {
+ return tmplo;
+ } else {
+ return __shiftright128(tmplo, tmphi, (uint8) shift) +
+ ((tmplo >> (shift - 1)) & 1);
+ }
}
#endif
+#ifndef MUL64_NO_ASM
/*
*-----------------------------------------------------------------------------
*
* Mul64x3264 --
*
- * Unsigned integer by fixed point multiplication:
- * result = multiplicand * multiplier >> shift
+ * Unsigned integer by fixed point multiplication, with rounding:
+ * result = floor(multiplicand * multiplier * 2**(-shift) + 0.5)
*
* Unsigned 64-bit integer multiplicand.
* Unsigned 32-bit fixed point multiplier, represented as
- * multiplier >> shift, where shift < 64.
- * Unsigned 64-bit integer product.
- *
- * Implementation:
- * Multiply 64x64 bits to yield a full 128-bit product.
- * Shift result in RDX:RAX right by "shift".
- * Return the low-order 64 bits of the above.
+ * (multiplier, shift), where shift < 64.
*
* Result:
- * Return the low-order 64 bits of ((multiplicand * multiplier) >> shift)
+ * Unsigned 64-bit integer product.
*
*-----------------------------------------------------------------------------
*/
@@ -468,21 +514,15 @@ Mul64x3264(uint64 multiplicand, uint32 multiplier, uint32 shift)
*
* Muls64x32s64 --
*
- * Signed integer by fixed point multiplication:
- * result = (multiplicand * multiplier) >> shift
+ * Signed integer by fixed point multiplication, with rounding:
+ * result = floor(multiplicand * multiplier * 2**(-shift) + 0.5)
*
* Signed 64-bit integer multiplicand.
* Unsigned 32-bit fixed point multiplier, represented as
- * multiplier >> shift, where shift < 64.
- * Signed 64-bit integer product.
- *
- * Implementation:
- * Multiply 64x64 bits to yield a full 128-bit product.
- * Shift result in RDX:RAX right by "shift".
- * Return the low-order 64 bits of the above.
+ * (multiplier, shift), where shift < 64.
*
* Result:
- * Return the low-order 64 bits of ((multiplicand * multiplier) >> shift)
+ * Signed 64-bit integer product.
*
*-----------------------------------------------------------------------------
*/
@@ -492,7 +532,7 @@ Muls64x32s64(int64 multiplicand, uint32 multiplier, uint32 shift)
{
return Muls64x64s64(multiplicand, multiplier, shift);
}
-
+#endif
#if defined(__GNUC__)