summaryrefslogtreecommitdiff
authorSiarhei Siamashka <siarhei.siamashka@nokia.com>2009-11-18 02:26:18 (GMT)
committer Siarhei Siamashka <siarhei.siamashka@nokia.com>2009-11-30 20:21:08 (GMT)
commit7c7b6f5de75a998deaab5d00baf69a895ceba795 (patch) (side-by-side diff)
treeb6b87e3957572e056291adacee730e6b4a74530c
parentdce6e1bd6840ce0646d8738aaa0927c003dbb361 (diff)
downloadpixman-7c7b6f5de75a998deaab5d00baf69a895ceba795.zip
pixman-7c7b6f5de75a998deaab5d00baf69a895ceba795.tar.gz
ARM: NEON optimized pixman_blt
NEON unit has fast access to L1/L2 caches and even simple copy of memory buffers using NEON provides more than 1.5x performance improvement on ARM Cortex-A8.
Diffstat (more/less context) (ignore whitespace changes)
-rw-r--r--pixman/pixman-arm-neon.c70
1 files changed, 70 insertions, 0 deletions
diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
index 2ed8b4b..9194924 100644
--- a/pixman/pixman-arm-neon.c
+++ b/pixman/pixman-arm-neon.c
@@ -292,6 +292,46 @@ pixman_fill_neon (uint32_t *bits,
}
}
+static pixman_bool_t
+pixman_blt_neon (uint32_t *src_bits,
+ uint32_t *dst_bits,
+ int src_stride,
+ int dst_stride,
+ int src_bpp,
+ int dst_bpp,
+ int src_x,
+ int src_y,
+ int dst_x,
+ int dst_y,
+ int width,
+ int height)
+{
+ if (src_bpp != dst_bpp)
+ return FALSE;
+
+ switch (src_bpp)
+ {
+ case 16:
+ pixman_composite_src_0565_0565_asm_neon (
+ width, height,
+ (uint16_t *)(((char *) dst_bits) +
+ dst_y * dst_stride * 4 + dst_x * 2), dst_stride * 2,
+ (uint16_t *)(((char *) src_bits) +
+ src_y * src_stride * 4 + src_x * 2), src_stride * 2);
+ return TRUE;
+ case 32:
+ pixman_composite_src_8888_8888_asm_neon (
+ width, height,
+ (uint32_t *)(((char *) dst_bits) +
+ dst_y * dst_stride * 4 + dst_x * 4), dst_stride,
+ (uint32_t *)(((char *) src_bits) +
+ src_y * src_stride * 4 + src_x * 4), src_stride);
+ return TRUE;
+ default:
+ return FALSE;
+ }
+}
+
static const pixman_fast_path_t arm_neon_fast_path_array[] =
{
{ PIXMAN_OP_SRC, PIXMAN_r5g6b5, PIXMAN_null, PIXMAN_r5g6b5, neon_composite_src_0565_0565 },
@@ -361,6 +401,35 @@ arm_neon_composite (pixman_implementation_t *imp,
}
static pixman_bool_t
+arm_neon_blt (pixman_implementation_t *imp,
+ uint32_t * src_bits,
+ uint32_t * dst_bits,
+ int src_stride,
+ int dst_stride,
+ int src_bpp,
+ int dst_bpp,
+ int src_x,
+ int src_y,
+ int dst_x,
+ int dst_y,
+ int width,
+ int height)
+{
+ if (!pixman_blt_neon (
+ src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp,
+ src_x, src_y, dst_x, dst_y, width, height))
+
+ {
+ return _pixman_implementation_blt (
+ imp->delegate,
+ src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp,
+ src_x, src_y, dst_x, dst_y, width, height);
+ }
+
+ return TRUE;
+}
+
+static pixman_bool_t
arm_neon_fill (pixman_implementation_t *imp,
uint32_t * bits,
int stride,
@@ -385,6 +454,7 @@ _pixman_implementation_create_arm_neon (void)
pixman_implementation_t *imp = _pixman_implementation_create (general);
imp->composite = arm_neon_composite;
+ imp->blt = arm_neon_blt;
imp->fill = arm_neon_fill;
return imp;