summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatt Turner <mattst88@gmail.com>2012-02-19 01:32:31 -0500
committerMatt Turner <mattst88@gmail.com>2012-02-21 12:46:02 -0500
commit14208344964f341a7b4a704b05cf4804c23792e9 (patch)
tree665468163524f0f6f6bbf165bc06b73d6420ebdf
parent69ed71fad11d541f89eee1238c587a03a9cf59cb (diff)
mmx: Use _mm_mulhi_pu16
The pmulhuw x86 instruction is part of Extended 3DNow! and SSE1. The equivalent ARM wmuluh instruction was available from the first iwMMXt instrucion set. This instruction is already used in the SSE2 code. Reduces code size by ~5%. amd64 text data bss dec hex filename 31325 2240 0 33565 831d .libs/libpixman_mmx_la-pixman-mmx.o 29925 2240 0 32165 7da5 .libs/libpixman_mmx_la-pixman-mmx.o x86 text data bss dec hex filename 29165 1792 0 30957 78ed .libs/libpixman_mmx_la-pixman-mmx.o 27677 1792 0 29469 731d .libs/libpixman_mmx_la-pixman-mmx.o arm text data bss dec hex filename 31632 1792 0 33424 8290 .libs/libpixman_iwmmxt_la-pixman-mmx.o 30176 1792 0 31968 7ce0 .libs/libpixman_iwmmxt_la-pixman-mmx.o Signed-off-by: Matt Turner <mattst88@gmail.com>
-rw-r--r--pixman/pixman-mmx.c20
1 files changed, 18 insertions, 2 deletions
diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c
index d89c3d6..f8950be 100644
--- a/pixman/pixman-mmx.c
+++ b/pixman/pixman-mmx.c
@@ -56,6 +56,21 @@ _mm_empty (void)
}
#endif
+#ifdef USE_X86_MMX
+/* We have to compile with -msse to use xmmintrin.h, but that causes SSE
+ * instructions to be generated that we don't want. Just duplicate the
+ * functions we want to use. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mulhi_pu16 (__m64 __A, __m64 __B)
+{
+ asm("pmulhuw %1, %0\n\t"
+ : "+y" (__A)
+ : "y" (__B)
+ );
+ return __A;
+}
+#endif
+
/* Notes about writing mmx code
*
* give memory operands as the second operand. If you give it as the
@@ -128,6 +143,7 @@ typedef struct
mmxdatafield mmx_ffff0000ffff0000;
mmxdatafield mmx_0000ffff00000000;
mmxdatafield mmx_000000000000ffff;
+ mmxdatafield mmx_4x0101;
} mmx_data_t;
#if defined(_MSC_VER)
@@ -155,6 +171,7 @@ static const mmx_data_t c =
MMXDATA_INIT (.mmx_ffff0000ffff0000, 0xffff0000ffff0000),
MMXDATA_INIT (.mmx_0000ffff00000000, 0x0000ffff00000000),
MMXDATA_INIT (.mmx_000000000000ffff, 0x000000000000ffff),
+ MMXDATA_INIT (.mmx_4x0101, 0x0101010101010101),
};
#ifdef USE_CVT_INTRINSICS
@@ -218,8 +235,7 @@ pix_multiply (__m64 a, __m64 b)
res = _mm_mullo_pi16 (a, b);
res = _mm_adds_pu16 (res, MC (4x0080));
- res = _mm_adds_pu16 (res, _mm_srli_pi16 (res, 8));
- res = _mm_srli_pi16 (res, 8);
+ res = _mm_mulhi_pu16 (res, MC (4x0101));
return res;
}