summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatt Turner <mattst88@gmail.com>2012-06-27 12:57:45 -0400
committerMatt Turner <mattst88@gmail.com>2012-07-01 16:33:19 -0400
commitc43de364cbcd195f7d1d6881a6109cbb3d6b73b8 (patch)
tree589b8bed5be82596ae518a606f625d23500342ca
parent9209cd746b7a81d0536df6dadd6a0b0b983291cb (diff)
mmx: add scaled bilinear over_8888_8888
Loongson: image firefox-planet-gnome 157.012 158.087 0.30% 6/6 image firefox-planet-gnome 156.617 157.109 0.15% 5/6 ARM/iwMMXt: image firefox-planet-gnome 148.086 149.339 0.76% 6/6 image firefox-planet-gnome 144.939 146.123 0.61% 6/6
-rw-r--r--pixman/pixman-mmx.c79
1 files changed, 76 insertions, 3 deletions
diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c
index 018a2ba..bf66a63 100644
--- a/pixman/pixman-mmx.c
+++ b/pixman/pixman-mmx.c
@@ -692,12 +692,30 @@ combine (const uint32_t *src, const uint32_t *mask)
vsrc = pix_multiply (vsrc, m);
}
return vsrc;
}
+static force_inline __m64
+core_combine_over_u_pixel_mmx (__m64 vsrc, __m64 vdst)
+{
+ vsrc = _mm_unpacklo_pi8 (vsrc, _mm_setzero_si64 ());
+
+ if (is_opaque (vsrc))
+ {
+ return vsrc;
+ }
+ else if (!is_zero (vsrc))
+ {
+ return over (vsrc, expand_alpha (vsrc),
+ _mm_unpacklo_pi8 (vdst, _mm_setzero_si64 ()));
+ }
+
+ return _mm_unpacklo_pi8 (vdst, _mm_setzero_si64 ());
+}
+
static void
mmx_combine_over_u (pixman_implementation_t *imp,
pixman_op_t op,
uint32_t * dest,
const uint32_t * src,
const uint32_t * mask,
@@ -3543,13 +3561,13 @@ do { \
_mm_unpackhi_pi16 (mm_lo_hi, mm_hi_hi)); \
/* shift and pack the result */ \
hi = _mm_srli_pi32 (hi, BILINEAR_INTERPOLATION_BITS * 2); \
lo = _mm_srli_pi32 (lo, BILINEAR_INTERPOLATION_BITS * 2); \
lo = _mm_packs_pi32 (lo, hi); \
lo = _mm_packs_pu16 (lo, lo); \
- store (&pix, lo); \
+ pix = lo; \
} while (0)
static force_inline void
scaled_bilinear_scanline_mmx_8888_8888_SRC (uint32_t * dst,
const uint32_t * mask,
const uint32_t * src_top,
@@ -3560,18 +3578,19 @@ scaled_bilinear_scanline_mmx_8888_8888_SRC (uint32_t * dst,
pixman_fixed_t vx,
pixman_fixed_t unit_x,
pixman_fixed_t max_vx,
pixman_bool_t zero_src)
{
BILINEAR_DECLARE_VARIABLES;
- uint32_t pix;
+ __m64 pix;
while (w--)
{
BILINEAR_INTERPOLATE_ONE_PIXEL (pix);
- *dst++ = pix;
+ store (dst, pix);
+ dst++;
}
_mm_empty ();
}
FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8888_cover_SRC,
@@ -3588,12 +3607,61 @@ FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8888_none_SRC,
NONE, FLAG_NONE)
FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8888_normal_SRC,
scaled_bilinear_scanline_mmx_8888_8888_SRC,
uint32_t, uint32_t, uint32_t,
NORMAL, FLAG_NONE)
+static force_inline void
+scaled_bilinear_scanline_mmx_8888_8888_OVER (uint32_t * dst,
+ const uint32_t * mask,
+ const uint32_t * src_top,
+ const uint32_t * src_bottom,
+ int32_t w,
+ int wt,
+ int wb,
+ pixman_fixed_t vx,
+ pixman_fixed_t unit_x,
+ pixman_fixed_t max_vx,
+ pixman_bool_t zero_src)
+{
+ BILINEAR_DECLARE_VARIABLES;
+ __m64 pix1, pix2;
+
+ while (w)
+ {
+ BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
+
+ if (!is_zero (pix1))
+ {
+ pix2 = load (dst);
+ store8888 (dst, core_combine_over_u_pixel_mmx (pix1, pix2));
+ }
+
+ w--;
+ dst++;
+ }
+
+ _mm_empty ();
+}
+
+FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8888_cover_OVER,
+ scaled_bilinear_scanline_mmx_8888_8888_OVER,
+ uint32_t, uint32_t, uint32_t,
+ COVER, FLAG_NONE)
+FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8888_pad_OVER,
+ scaled_bilinear_scanline_mmx_8888_8888_OVER,
+ uint32_t, uint32_t, uint32_t,
+ PAD, FLAG_NONE)
+FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8888_none_OVER,
+ scaled_bilinear_scanline_mmx_8888_8888_OVER,
+ uint32_t, uint32_t, uint32_t,
+ NONE, FLAG_NONE)
+FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8888_normal_OVER,
+ scaled_bilinear_scanline_mmx_8888_8888_OVER,
+ uint32_t, uint32_t, uint32_t,
+ NORMAL, FLAG_NONE)
static uint32_t *
mmx_fetch_x8r8g8b8 (pixman_iter_t *iter, const uint32_t *mask)
{
int w = iter->width;
uint32_t *dst = iter->buffer;
uint32_t *src = (uint32_t *)iter->bits;
@@ -3850,12 +3918,17 @@ static const pixman_fast_path_t mmx_fast_paths[] =
SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, mmx_8888_8888 ),
SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, mmx_8888_8888 ),
SIMPLE_BILINEAR_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8, mmx_8888_8888 ),
SIMPLE_BILINEAR_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8, mmx_8888_8888 ),
SIMPLE_BILINEAR_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, mmx_8888_8888 ),
+ SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, mmx_8888_8888 ),
+ SIMPLE_BILINEAR_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, mmx_8888_8888 ),
+ SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, mmx_8888_8888 ),
+ SIMPLE_BILINEAR_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, mmx_8888_8888 ),
+
{ PIXMAN_OP_NONE },
};
static pixman_bool_t
mmx_blt (pixman_implementation_t *imp,
uint32_t * src_bits,