From 7c7b6f5de75a998deaab5d00baf69a895ceba795 Mon Sep 17 00:00:00 2001 From: Siarhei Siamashka Date: Wed, 18 Nov 2009 04:26:18 +0200 Subject: ARM: NEON optimized pixman_blt NEON unit has fast access to L1/L2 caches and even simple copy of memory buffers using NEON provides more than 1.5x performance improvement on ARM Cortex-A8. --- pixman/pixman-arm-neon.c | 70 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c index 2ed8b4b..9194924 100644 --- a/pixman/pixman-arm-neon.c +++ b/pixman/pixman-arm-neon.c @@ -292,6 +292,46 @@ pixman_fill_neon (uint32_t *bits, } } +static pixman_bool_t +pixman_blt_neon (uint32_t *src_bits, + uint32_t *dst_bits, + int src_stride, + int dst_stride, + int src_bpp, + int dst_bpp, + int src_x, + int src_y, + int dst_x, + int dst_y, + int width, + int height) +{ + if (src_bpp != dst_bpp) + return FALSE; + + switch (src_bpp) + { + case 16: + pixman_composite_src_0565_0565_asm_neon ( + width, height, + (uint16_t *)(((char *) dst_bits) + + dst_y * dst_stride * 4 + dst_x * 2), dst_stride * 2, + (uint16_t *)(((char *) src_bits) + + src_y * src_stride * 4 + src_x * 2), src_stride * 2); + return TRUE; + case 32: + pixman_composite_src_8888_8888_asm_neon ( + width, height, + (uint32_t *)(((char *) dst_bits) + + dst_y * dst_stride * 4 + dst_x * 4), dst_stride, + (uint32_t *)(((char *) src_bits) + + src_y * src_stride * 4 + src_x * 4), src_stride); + return TRUE; + default: + return FALSE; + } +} + static const pixman_fast_path_t arm_neon_fast_path_array[] = { { PIXMAN_OP_SRC, PIXMAN_r5g6b5, PIXMAN_null, PIXMAN_r5g6b5, neon_composite_src_0565_0565 }, @@ -360,6 +400,35 @@ arm_neon_composite (pixman_implementation_t *imp, width, height); } +static pixman_bool_t +arm_neon_blt (pixman_implementation_t *imp, + uint32_t * src_bits, + uint32_t * dst_bits, + int src_stride, + int dst_stride, + int src_bpp, + int dst_bpp, + int src_x, + int src_y, + int dst_x, + int dst_y, + int width, + int height) +{ + if (!pixman_blt_neon ( + src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp, + src_x, src_y, dst_x, dst_y, width, height)) + + { + return _pixman_implementation_blt ( + imp->delegate, + src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp, + src_x, src_y, dst_x, dst_y, width, height); + } + + return TRUE; +} + static pixman_bool_t arm_neon_fill (pixman_implementation_t *imp, uint32_t * bits, @@ -385,6 +454,7 @@ _pixman_implementation_create_arm_neon (void) pixman_implementation_t *imp = _pixman_implementation_create (general); imp->composite = arm_neon_composite; + imp->blt = arm_neon_blt; imp->fill = arm_neon_fill; return imp; -- cgit v1.2.3