From b7ff0749d0016be83f460294957d875f11af5802 Mon Sep 17 00:00:00 2001 From: Søren Sandmann Pedersen Date: Sun, 23 Jan 2011 16:53:26 -0500 Subject: Add SSE2 fetcher for 0565 Before: add_0565_0565 = L1: 61.08 L2: 61.03 M: 60.57 ( 10.95%) HT: 46.85 VT: 45.25 R: 39.99 RT: 20.41 ( 233Kops/s) After: add_0565_0565 = L1: 77.84 L2: 76.25 M: 75.38 ( 13.71%) HT: 55.99 VT: 54.56 R: 45.41 RT: 21.95 ( 255Kops/s) --- pixman/pixman-sse2.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c index e28b9866..91adc056 100644 --- a/pixman/pixman-sse2.c +++ b/pixman/pixman-sse2.c @@ -6081,6 +6081,52 @@ sse2_fetch_x8r8g8b8 (pixman_iter_t *iter, const uint32_t *mask) return iter->buffer; } +static uint32_t * +sse2_fetch_r5g6b5 (pixman_iter_t *iter, const uint32_t *mask) +{ + int w = iter->width; + uint32_t *dst = iter->buffer; + uint16_t *src = (uint16_t *)iter->bits; + __m128i ff000000 = mask_ff000000; + + iter->bits += iter->stride; + + while (w && ((unsigned long)dst) & 0x0f) + { + uint16_t s = *src++; + + *dst++ = CONVERT_0565_TO_8888 (s); + w--; + } + + while (w >= 8) + { + __m128i lo, hi, s; + + s = _mm_loadu_si128 ((__m128i *)src); + + lo = unpack_565_to_8888 (_mm_unpacklo_epi16 (s, _mm_setzero_si128 ())); + hi = unpack_565_to_8888 (_mm_unpackhi_epi16 (s, _mm_setzero_si128 ())); + + save_128_aligned ((__m128i *)(dst + 0), _mm_or_si128 (lo, ff000000)); + save_128_aligned ((__m128i *)(dst + 4), _mm_or_si128 (hi, ff000000)); + + dst += 8; + src += 8; + w -= 8; + } + + while (w) + { + uint16_t s = *src++; + + *dst++ = CONVERT_0565_TO_8888 (s); + w--; + } + + return iter->buffer; +} + static uint32_t * sse2_fetch_a8 (pixman_iter_t *iter, const uint32_t *mask) { @@ -6136,6 +6182,7 @@ typedef struct static const fetcher_info_t fetchers[] = { { PIXMAN_x8r8g8b8, sse2_fetch_x8r8g8b8 }, + { PIXMAN_r5g6b5, sse2_fetch_r5g6b5 }, { PIXMAN_a8, sse2_fetch_a8 }, { PIXMAN_null } }; -- cgit v1.2.3