diff options
author | Matt Turner <mattst88@gmail.com> | 2011-09-05 00:19:25 -0400 |
---|---|---|
committer | Matt Turner <mattst88@gmail.com> | 2011-09-05 00:19:25 -0400 |
commit | 2dc5dfe85f0f54a52a86fff39b2c23a20e6989da (patch) | |
tree | 99ad425b77f3c4c83c28611874c27d4748ecf483 | |
parent | d6a25ed7853479e4e31261c04d349a6d03bb8d99 (diff) |
mmx: add ARM/iwmmxt inline assembly blit codeiwmmxt-optimizations4
Signed-off-by: Matt Turner <mattst88@gmail.com>
-rw-r--r-- | pixman/pixman-mmx.c | 49 |
1 files changed, 38 insertions, 11 deletions
diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c index 4778454..2797975 100644 --- a/pixman/pixman-mmx.c +++ b/pixman/pixman-mmx.c @@ -2953,8 +2953,10 @@ pixman_blt_mmx (uint32_t *src_bits, while (w >= 64) { -#if (defined (__GNUC__) || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590))) && defined USE_X86_MMX + __m64 v0, v1, v2, v3, v4, v5, v6, v7; +#if (defined (__GNUC__) || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590))) __asm__ ( +# ifdef USE_X86_MMX "movq (%1), %%mm0\n" "movq 8(%1), %%mm1\n" "movq 16(%1), %%mm2\n" @@ -2973,19 +2975,44 @@ pixman_blt_mmx (uint32_t *src_bits, "movq %%mm6, 48(%0)\n" "movq %%mm7, 56(%0)\n" : +# elif defined USE_ARM_IWMMXT + "wldrd %0, [%9]\n" + "wldrd %1, [%9, #8]\n" + "wldrd %2, [%9, #16]\n" + "wldrd %3, [%9, #24]\n" + "wldrd %4, [%9, #32]\n" + "wldrd %5, [%9, #40]\n" + "wldrd %6, [%9, #48]\n" + "wldrd %7, [%9, #56]\n" + + "wstrd %0, [%8]\n" + "wstrd %1, [%8, #8]\n" + "wstrd %2, [%8, #16]\n" + "wstrd %3, [%8, #24]\n" + "wstrd %4, [%8, #32]\n" + "wstrd %5, [%8, #40]\n" + "wstrd %6, [%8, #48]\n" + "wstrd %7, [%8, #56]\n" + : "=&y" (v0), "=&y" (v1), "=&y" (v2), "=&y" (v3), + "=&y" (v4), "=&y" (v5), "=&y" (v6), "=&y" (v7) +# endif : "r" (d), "r" (s) - : "memory", + : "memory" +# ifdef USE_X86_MMX + , "%mm0", "%mm1", "%mm2", "%mm3", - "%mm4", "%mm5", "%mm6", "%mm7"); + "%mm4", "%mm5", "%mm6", "%mm7" +# endif + ); #else - __m64 v0 = *(__m64 *)(s + 0); - __m64 v1 = *(__m64 *)(s + 8); - __m64 v2 = *(__m64 *)(s + 16); - __m64 v3 = *(__m64 *)(s + 24); - __m64 v4 = *(__m64 *)(s + 32); - __m64 v5 = *(__m64 *)(s + 40); - __m64 v6 = *(__m64 *)(s + 48); - __m64 v7 = *(__m64 *)(s + 56); + v0 = *(__m64 *)(s + 0); + v1 = *(__m64 *)(s + 8); + v2 = *(__m64 *)(s + 16); + v3 = *(__m64 *)(s + 24); + v4 = *(__m64 *)(s + 32); + v5 = *(__m64 *)(s + 40); + v6 = *(__m64 *)(s + 48); + v7 = *(__m64 *)(s + 56); *(__m64 *)(d + 0) = v0; *(__m64 *)(d + 8) = v1; *(__m64 *)(d + 16) = v2; |