summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatt Turner <mattst88@gmail.com>2011-09-05 00:19:25 -0400
committerMatt Turner <mattst88@gmail.com>2011-09-05 00:19:25 -0400
commit2dc5dfe85f0f54a52a86fff39b2c23a20e6989da (patch)
tree99ad425b77f3c4c83c28611874c27d4748ecf483
parentd6a25ed7853479e4e31261c04d349a6d03bb8d99 (diff)
mmx: add ARM/iwmmxt inline assembly blit codeiwmmxt-optimizations4
Signed-off-by: Matt Turner <mattst88@gmail.com>
-rw-r--r--pixman/pixman-mmx.c49
1 files changed, 38 insertions, 11 deletions
diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c
index 4778454..2797975 100644
--- a/pixman/pixman-mmx.c
+++ b/pixman/pixman-mmx.c
@@ -2953,8 +2953,10 @@ pixman_blt_mmx (uint32_t *src_bits,
while (w >= 64)
{
-#if (defined (__GNUC__) || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590))) && defined USE_X86_MMX
+ __m64 v0, v1, v2, v3, v4, v5, v6, v7;
+#if (defined (__GNUC__) || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590)))
__asm__ (
+# ifdef USE_X86_MMX
"movq (%1), %%mm0\n"
"movq 8(%1), %%mm1\n"
"movq 16(%1), %%mm2\n"
@@ -2973,19 +2975,44 @@ pixman_blt_mmx (uint32_t *src_bits,
"movq %%mm6, 48(%0)\n"
"movq %%mm7, 56(%0)\n"
:
+# elif defined USE_ARM_IWMMXT
+ "wldrd %0, [%9]\n"
+ "wldrd %1, [%9, #8]\n"
+ "wldrd %2, [%9, #16]\n"
+ "wldrd %3, [%9, #24]\n"
+ "wldrd %4, [%9, #32]\n"
+ "wldrd %5, [%9, #40]\n"
+ "wldrd %6, [%9, #48]\n"
+ "wldrd %7, [%9, #56]\n"
+
+ "wstrd %0, [%8]\n"
+ "wstrd %1, [%8, #8]\n"
+ "wstrd %2, [%8, #16]\n"
+ "wstrd %3, [%8, #24]\n"
+ "wstrd %4, [%8, #32]\n"
+ "wstrd %5, [%8, #40]\n"
+ "wstrd %6, [%8, #48]\n"
+ "wstrd %7, [%8, #56]\n"
+ : "=&y" (v0), "=&y" (v1), "=&y" (v2), "=&y" (v3),
+ "=&y" (v4), "=&y" (v5), "=&y" (v6), "=&y" (v7)
+# endif
: "r" (d), "r" (s)
- : "memory",
+ : "memory"
+# ifdef USE_X86_MMX
+ ,
"%mm0", "%mm1", "%mm2", "%mm3",
- "%mm4", "%mm5", "%mm6", "%mm7");
+ "%mm4", "%mm5", "%mm6", "%mm7"
+# endif
+ );
#else
- __m64 v0 = *(__m64 *)(s + 0);
- __m64 v1 = *(__m64 *)(s + 8);
- __m64 v2 = *(__m64 *)(s + 16);
- __m64 v3 = *(__m64 *)(s + 24);
- __m64 v4 = *(__m64 *)(s + 32);
- __m64 v5 = *(__m64 *)(s + 40);
- __m64 v6 = *(__m64 *)(s + 48);
- __m64 v7 = *(__m64 *)(s + 56);
+ v0 = *(__m64 *)(s + 0);
+ v1 = *(__m64 *)(s + 8);
+ v2 = *(__m64 *)(s + 16);
+ v3 = *(__m64 *)(s + 24);
+ v4 = *(__m64 *)(s + 32);
+ v5 = *(__m64 *)(s + 40);
+ v6 = *(__m64 *)(s + 48);
+ v7 = *(__m64 *)(s + 56);
*(__m64 *)(d + 0) = v0;
*(__m64 *)(d + 8) = v1;
*(__m64 *)(d + 16) = v2;