summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatt Turner <mattst88@gmail.com>2012-05-20 20:51:08 -0400
committerMatt Turner <mattst88@gmail.com>2012-07-01 16:34:14 -0400
commit1ad6ae6ee8a350f6fe4f30ba928aacf44d04f86e (patch)
tree98d9a59ac17f4fe0b31b426623b6125af3d19413
parentc43de364cbcd195f7d1d6881a6109cbb3d6b73b8 (diff)
mmx: add scaled bilinear over_8888_8_8888
Loongson: image firefox-fishtank 1665.163 1670.370 0.17% 3/3 image firefox-fishtank 1037.738 1040.218 0.19% 3/3 ARM/iwMMXt: image firefox-fishtank 2042.723 2045.308 0.10% 3/3 image firefox-fishtank 1487.282 1492.640 0.17% 3/3
-rw-r--r--pixman/pixman-mmx.c84
1 files changed, 84 insertions, 0 deletions
diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c
index bf66a63..0c79f3a 100644
--- a/pixman/pixman-mmx.c
+++ b/pixman/pixman-mmx.c
@@ -3564,12 +3564,18 @@ do { \
lo = _mm_srli_pi32 (lo, BILINEAR_INTERPOLATION_BITS * 2); \
lo = _mm_packs_pi32 (lo, hi); \
lo = _mm_packs_pu16 (lo, lo); \
pix = lo; \
} while (0)
+#define BILINEAR_SKIP_ONE_PIXEL() \
+do { \
+ vx += unit_x; \
+ mm_x = _mm_add_pi16 (mm_x, mm_ux); \
+} while(0)
+
static force_inline void
scaled_bilinear_scanline_mmx_8888_8888_SRC (uint32_t * dst,
const uint32_t * mask,
const uint32_t * src_top,
const uint32_t * src_bottom,
int32_t w,
@@ -3656,12 +3662,85 @@ FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8888_none_OVER,
uint32_t, uint32_t, uint32_t,
NONE, FLAG_NONE)
FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8888_normal_OVER,
scaled_bilinear_scanline_mmx_8888_8888_OVER,
uint32_t, uint32_t, uint32_t,
NORMAL, FLAG_NONE)
+
+static force_inline void
+scaled_bilinear_scanline_mmx_8888_8_8888_OVER (uint32_t * dst,
+ const uint8_t * mask,
+ const uint32_t * src_top,
+ const uint32_t * src_bottom,
+ int32_t w,
+ int wt,
+ int wb,
+ pixman_fixed_t vx,
+ pixman_fixed_t unit_x,
+ pixman_fixed_t max_vx,
+ pixman_bool_t zero_src)
+{
+ BILINEAR_DECLARE_VARIABLES;
+ __m64 pix1, pix2;
+ uint32_t m;
+
+ while (w)
+ {
+ m = (uint32_t) *mask++;
+
+ if (m)
+ {
+ BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
+
+ if (m == 0xff && is_opaque (pix1))
+ {
+ store (dst, pix1);
+ }
+ else
+ {
+ __m64 ms, md, ma, msa;
+
+ pix2 = load (dst);
+ ma = expand_alpha_rev (to_m64 (m));
+ ms = _mm_unpacklo_pi8 (pix1, _mm_setzero_si64 ());
+ md = _mm_unpacklo_pi8 (pix2, _mm_setzero_si64 ());
+
+ msa = expand_alpha (ms);
+
+ store8888 (dst, (in_over (ms, msa, ma, md)));
+ }
+ }
+ else
+ {
+ BILINEAR_SKIP_ONE_PIXEL ();
+ }
+
+ w--;
+ dst++;
+ }
+
+ _mm_empty ();
+}
+
+FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8_8888_cover_OVER,
+ scaled_bilinear_scanline_mmx_8888_8_8888_OVER,
+ uint32_t, uint8_t, uint32_t,
+ COVER, FLAG_HAVE_NON_SOLID_MASK)
+FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8_8888_pad_OVER,
+ scaled_bilinear_scanline_mmx_8888_8_8888_OVER,
+ uint32_t, uint8_t, uint32_t,
+ PAD, FLAG_HAVE_NON_SOLID_MASK)
+FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8_8888_none_OVER,
+ scaled_bilinear_scanline_mmx_8888_8_8888_OVER,
+ uint32_t, uint8_t, uint32_t,
+ NONE, FLAG_HAVE_NON_SOLID_MASK)
+FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8_8888_normal_OVER,
+ scaled_bilinear_scanline_mmx_8888_8_8888_OVER,
+ uint32_t, uint8_t, uint32_t,
+ NORMAL, FLAG_HAVE_NON_SOLID_MASK)
+
static uint32_t *
mmx_fetch_x8r8g8b8 (pixman_iter_t *iter, const uint32_t *mask)
{
int w = iter->width;
uint32_t *dst = iter->buffer;
uint32_t *src = (uint32_t *)iter->bits;
@@ -3923,12 +4002,17 @@ static const pixman_fast_path_t mmx_fast_paths[] =
SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, mmx_8888_8888 ),
SIMPLE_BILINEAR_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, mmx_8888_8888 ),
SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, mmx_8888_8888 ),
SIMPLE_BILINEAR_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, mmx_8888_8888 ),
+ SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, mmx_8888_8_8888 ),
+ SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, mmx_8888_8_8888 ),
+ SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, mmx_8888_8_8888 ),
+ SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, mmx_8888_8_8888 ),
+
{ PIXMAN_OP_NONE },
};
static pixman_bool_t
mmx_blt (pixman_implementation_t *imp,
uint32_t * src_bits,