diff options
author | Siarhei Siamashka <siarhei.siamashka@nokia.com> | 2009-12-09 11:02:04 +0200 |
---|---|---|
committer | Siarhei Siamashka <siarhei.siamashka@nokia.com> | 2009-12-09 11:27:57 +0200 |
commit | 96fd17488f0966d2df53623195810dc640bf5ca6 (patch) | |
tree | b59bab442e01c2d6cb18f938b3125178956f562e | |
parent | 2d332c7a569803107e11b41c7b2c020b4050e26e (diff) |
ARM: added 'neon_composite_over_n_0565' fast path
-rw-r--r-- | pixman/pixman-arm-neon-asm.S | 69 | ||||
-rw-r--r-- | pixman/pixman-arm-neon.c | 43 |
2 files changed, 112 insertions, 0 deletions
diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S index bffc676..57680bb 100644 --- a/pixman/pixman-arm-neon-asm.S +++ b/pixman/pixman-arm-neon-asm.S @@ -344,6 +344,75 @@ generate_composite_function \ /******************************************************************************/ +.macro pixman_composite_over_n_0565_process_pixblock_head + /* convert 8 r5g6b5 pixel data from {d4, d5} to planar 8-bit format + and put data into d6 - red, d7 - green, d30 - blue */ + vshrn.u16 d6, q2, #8 + vshrn.u16 d7, q2, #3 + vsli.u16 q2, q2, #5 + vsri.u8 d6, d6, #5 + vsri.u8 d7, d7, #6 + vshrn.u16 d30, q2, #2 + /* now do alpha blending, storing results in 8-bit planar format + into d16 - red, d19 - green, d18 - blue */ + vmull.u8 q10, d3, d6 + vmull.u8 q11, d3, d7 + vmull.u8 q12, d3, d30 + vrshr.u16 q13, q10, #8 + vrshr.u16 q3, q11, #8 + vrshr.u16 q15, q12, #8 + vraddhn.u16 d20, q10, q13 + vraddhn.u16 d23, q11, q3 + vraddhn.u16 d22, q12, q15 +.endm + +.macro pixman_composite_over_n_0565_process_pixblock_tail + /* ... continue alpha blending */ + vqadd.u8 d16, d2, d20 + vqadd.u8 q9, q0, q11 + /* convert the result to r5g6b5 and store it into {d28, d29} */ + vshll.u8 q14, d16, #8 + vshll.u8 q8, d19, #8 + vshll.u8 q9, d18, #8 + vsri.u16 q14, q8, #5 + vsri.u16 q14, q9, #11 +.endm + +/* TODO: expand macros and do better instructions scheduling */ +.macro pixman_composite_over_n_0565_process_pixblock_tail_head + pixman_composite_over_n_0565_process_pixblock_tail + vld1.16 {d4, d5}, [DST_R, :128]! + vst1.16 {d28, d29}, [DST_W, :128]! + pixman_composite_over_n_0565_process_pixblock_head +.endm + +.macro pixman_composite_over_n_0565_init + add DUMMY, sp, #ARGS_STACK_OFFSET + vld1.32 {d3[0]}, [DUMMY] + vdup.8 d0, d3[0] + vdup.8 d1, d3[1] + vdup.8 d2, d3[2] + vdup.8 d3, d3[3] + vmvn.8 d3, d3 /* invert source alpha */ +.endm + +generate_composite_function \ + pixman_composite_over_n_0565_asm_neon, 0, 0, 16, \ + FLAG_DST_READWRITE, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + pixman_composite_over_n_0565_init, \ + default_cleanup, \ + pixman_composite_over_n_0565_process_pixblock_head, \ + pixman_composite_over_n_0565_process_pixblock_tail, \ + pixman_composite_over_n_0565_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 4, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 24 /* mask_basereg */ + +/******************************************************************************/ + .macro pixman_composite_src_8888_0565_process_pixblock_head vshll.u8 q8, d1, #8 vshll.u8 q14, d2, #8 diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c index 4894285..8ae79ae 100644 --- a/pixman/pixman-arm-neon.c +++ b/pixman/pixman-arm-neon.c @@ -71,6 +71,46 @@ neon_composite_##name (pixman_implementation_t *imp, \ src_line, src_stride); \ } +#define BIND_N_NULL_DST(name, dst_type, dst_cnt) \ +void \ +pixman_composite_##name##_asm_neon (int32_t w, \ + int32_t h, \ + dst_type *dst, \ + int32_t dst_stride, \ + uint32_t src); \ + \ +static void \ +neon_composite_##name (pixman_implementation_t *imp, \ + pixman_op_t op, \ + pixman_image_t * src_image, \ + pixman_image_t * mask_image, \ + pixman_image_t * dst_image, \ + int32_t src_x, \ + int32_t src_y, \ + int32_t mask_x, \ + int32_t mask_y, \ + int32_t dest_x, \ + int32_t dest_y, \ + int32_t width, \ + int32_t height) \ +{ \ + dst_type *dst_line; \ + int32_t dst_stride; \ + uint32_t src; \ + \ + src = _pixman_image_get_solid (src_image, dst_image->bits.format); \ + \ + if (src == 0) \ + return; \ + \ + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \ + dst_stride, dst_line, dst_cnt); \ + \ + pixman_composite_##name##_asm_neon (width, height, \ + dst_line, dst_stride, \ + src); \ +} + #define BIND_N_MASK_DST(name, mask_type, mask_cnt, dst_type, dst_cnt) \ void \ pixman_composite_##name##_asm_neon (int32_t w, \ @@ -218,6 +258,8 @@ BIND_SRC_NULL_DST(src_0565_8888, uint16_t, 1, uint32_t, 1) BIND_SRC_NULL_DST(add_8000_8000, uint8_t, 1, uint8_t, 1) BIND_SRC_NULL_DST(add_8888_8888, uint32_t, 1, uint32_t, 1) +BIND_N_NULL_DST(over_n_0565, uint16_t, 1) + BIND_SRC_NULL_DST(over_8888_0565, uint32_t, 1, uint16_t, 1) BIND_SRC_NULL_DST(over_8888_8888, uint32_t, 1, uint32_t, 1) @@ -360,6 +402,7 @@ static const pixman_fast_path_t arm_neon_fast_path_array[] = { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8r8g8b8, neon_composite_over_n_8_8888 }, { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8b8g8r8, neon_composite_over_n_8_8888 }, { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8b8g8r8, neon_composite_over_n_8_8888 }, + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_null, PIXMAN_r5g6b5, neon_composite_over_n_0565 }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_solid, PIXMAN_a8r8g8b8, neon_composite_over_8888_n_8888 }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_solid, PIXMAN_x8r8g8b8, neon_composite_over_8888_n_8888 }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8, PIXMAN_a8r8g8b8, neon_composite_over_8888_8_8888 }, |