diff options
author | Matt Turner <mattst88@gmail.com> | 2012-05-20 20:51:08 -0400 |
---|---|---|
committer | Matt Turner <mattst88@gmail.com> | 2012-07-01 16:34:14 -0400 |
commit | 1ad6ae6ee8a350f6fe4f30ba928aacf44d04f86e (patch) | |
tree | 98d9a59ac17f4fe0b31b426623b6125af3d19413 | |
parent | c43de364cbcd195f7d1d6881a6109cbb3d6b73b8 (diff) |
mmx: add scaled bilinear over_8888_8_8888
Loongson:
image firefox-fishtank 1665.163 1670.370 0.17% 3/3
image firefox-fishtank 1037.738 1040.218 0.19% 3/3
ARM/iwMMXt:
image firefox-fishtank 2042.723 2045.308 0.10% 3/3
image firefox-fishtank 1487.282 1492.640 0.17% 3/3
-rw-r--r-- | pixman/pixman-mmx.c | 84 |
1 files changed, 84 insertions, 0 deletions
diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c index bf66a63..0c79f3a 100644 --- a/pixman/pixman-mmx.c +++ b/pixman/pixman-mmx.c @@ -3564,12 +3564,18 @@ do { \ lo = _mm_srli_pi32 (lo, BILINEAR_INTERPOLATION_BITS * 2); \ lo = _mm_packs_pi32 (lo, hi); \ lo = _mm_packs_pu16 (lo, lo); \ pix = lo; \ } while (0) +#define BILINEAR_SKIP_ONE_PIXEL() \ +do { \ + vx += unit_x; \ + mm_x = _mm_add_pi16 (mm_x, mm_ux); \ +} while(0) + static force_inline void scaled_bilinear_scanline_mmx_8888_8888_SRC (uint32_t * dst, const uint32_t * mask, const uint32_t * src_top, const uint32_t * src_bottom, int32_t w, @@ -3656,12 +3662,85 @@ FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8888_none_OVER, uint32_t, uint32_t, uint32_t, NONE, FLAG_NONE) FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8888_normal_OVER, scaled_bilinear_scanline_mmx_8888_8888_OVER, uint32_t, uint32_t, uint32_t, NORMAL, FLAG_NONE) + +static force_inline void +scaled_bilinear_scanline_mmx_8888_8_8888_OVER (uint32_t * dst, + const uint8_t * mask, + const uint32_t * src_top, + const uint32_t * src_bottom, + int32_t w, + int wt, + int wb, + pixman_fixed_t vx, + pixman_fixed_t unit_x, + pixman_fixed_t max_vx, + pixman_bool_t zero_src) +{ + BILINEAR_DECLARE_VARIABLES; + __m64 pix1, pix2; + uint32_t m; + + while (w) + { + m = (uint32_t) *mask++; + + if (m) + { + BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); + + if (m == 0xff && is_opaque (pix1)) + { + store (dst, pix1); + } + else + { + __m64 ms, md, ma, msa; + + pix2 = load (dst); + ma = expand_alpha_rev (to_m64 (m)); + ms = _mm_unpacklo_pi8 (pix1, _mm_setzero_si64 ()); + md = _mm_unpacklo_pi8 (pix2, _mm_setzero_si64 ()); + + msa = expand_alpha (ms); + + store8888 (dst, (in_over (ms, msa, ma, md))); + } + } + else + { + BILINEAR_SKIP_ONE_PIXEL (); + } + + w--; + dst++; + } + + _mm_empty (); +} + +FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8_8888_cover_OVER, + scaled_bilinear_scanline_mmx_8888_8_8888_OVER, + uint32_t, uint8_t, uint32_t, + COVER, FLAG_HAVE_NON_SOLID_MASK) +FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8_8888_pad_OVER, + scaled_bilinear_scanline_mmx_8888_8_8888_OVER, + uint32_t, uint8_t, uint32_t, + PAD, FLAG_HAVE_NON_SOLID_MASK) +FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8_8888_none_OVER, + scaled_bilinear_scanline_mmx_8888_8_8888_OVER, + uint32_t, uint8_t, uint32_t, + NONE, FLAG_HAVE_NON_SOLID_MASK) +FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8_8888_normal_OVER, + scaled_bilinear_scanline_mmx_8888_8_8888_OVER, + uint32_t, uint8_t, uint32_t, + NORMAL, FLAG_HAVE_NON_SOLID_MASK) + static uint32_t * mmx_fetch_x8r8g8b8 (pixman_iter_t *iter, const uint32_t *mask) { int w = iter->width; uint32_t *dst = iter->buffer; uint32_t *src = (uint32_t *)iter->bits; @@ -3923,12 +4002,17 @@ static const pixman_fast_path_t mmx_fast_paths[] = SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, mmx_8888_8888 ), SIMPLE_BILINEAR_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, mmx_8888_8888 ), SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, mmx_8888_8888 ), SIMPLE_BILINEAR_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, mmx_8888_8888 ), + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, mmx_8888_8_8888 ), + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, mmx_8888_8_8888 ), + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, mmx_8888_8_8888 ), + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, mmx_8888_8_8888 ), + { PIXMAN_OP_NONE }, }; static pixman_bool_t mmx_blt (pixman_implementation_t *imp, uint32_t * src_bits, |