summaryrefslogtreecommitdiff
path: root/pixman/pixman-mips-dspr2-asm.S
diff options
context:
space:
mode:
authorNemanja Lukic <nemanja.lukic@rt-rk.com>2012-09-14 09:31:23 +0200
committerSøren Sandmann Pedersen <ssp@redhat.com>2012-09-24 17:12:56 -0400
commit37e3368e20cee42f1e1039bb112ed9a09d21156f (patch)
treeb13d21306f6ed1144c6e3a1dfd1fc5f2129569ba /pixman/pixman-mips-dspr2-asm.S
parentf580c4c5b2a435ebe2751ce0dace6c42568557f8 (diff)
MIPS: DSPr2: Added fast-paths for OVER operation: - over_8888_n_8888 - over_8888_8_8888
Performance numbers before/after on MIPS-74kc @ 1GHz: lowlevel-blt-bench results Referent (before): over_8888_n_8888 = L1: 9.92 L2: 11.27 M: 8.50 ( 45.23%) HT: 4.70 VT: 4.45 R: 4.49 RT: 1.85 ( 20Kops/s) over_8888_8_8888 = L1: 12.54 L2: 10.86 M: 8.18 ( 54.36%) HT: 6.53 VT: 6.45 R: 6.41 RT: 3.83 ( 33Kops/s) Optimized: over_8888_n_8888 = L1: 28.02 L2: 24.92 M: 14.72 ( 78.15%) HT: 13.03 VT: 12.65 R: 12.00 RT: 7.49 ( 49Kops/s) over_8888_8_8888 = L1: 26.92 L2: 23.93 M: 13.65 ( 90.58%) HT: 11.68 VT: 11.29 R: 10.56 RT: 6.37 ( 45Kops/s)
Diffstat (limited to 'pixman/pixman-mips-dspr2-asm.S')
-rw-r--r--pixman/pixman-mips-dspr2-asm.S102
1 files changed, 102 insertions, 0 deletions
diff --git a/pixman/pixman-mips-dspr2-asm.S b/pixman/pixman-mips-dspr2-asm.S
index a8fccd5..165f177 100644
--- a/pixman/pixman-mips-dspr2-asm.S
+++ b/pixman/pixman-mips-dspr2-asm.S
@@ -749,6 +749,108 @@ LEAF_MIPS_DSPR2(pixman_composite_over_n_8_0565_asm_mips)
END(pixman_composite_over_n_8_0565_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_over_8888_n_8888_asm_mips)
+/*
+ * a0 - dst (a8r8g8b8)
+ * a1 - src (a8r8g8b8)
+ * a2 - mask (32bit constant)
+ * a3 - w
+ */
+
+ SAVE_REGS_ON_STACK 0, s0
+ li t4, 0x00ff00ff
+ beqz a3, 3f
+ nop
+ addiu t1, a3, -1
+ srl a2, a2, 24
+ beqz t1, 2f
+ nop
+
+1:
+ lw t0, 0(a1) /* t0 = source (a8r8g8b8) */
+ lw t1, 4(a1) /* t1 = source (a8r8g8b8) */
+ /* a2 = mask (32bit constant) */
+ lw t2, 0(a0) /* t2 = destination (a8r8g8b8) */
+ lw t3, 4(a0) /* t3 = destination (a8r8g8b8) */
+ addiu a1, a1, 8
+
+ OVER_2x8888_2x8_2x8888 t0, t1, a2, a2, t2, t3, \
+ t5, t6, t4, t7, t8, t9, t0, t1, s0
+
+ sw t5, 0(a0)
+ sw t6, 4(a0)
+ addiu a3, a3, -2
+ addiu t1, a3, -1
+ bgtz t1, 1b
+ addiu a0, a0, 8
+2:
+ beqz a3, 3f
+ nop
+ lw t0, 0(a1) /* t0 = source (a8r8g8b8) */
+ /* a2 = mask (32bit constant) */
+ lw t1, 0(a0) /* t1 = destination (a8r8g8b8) */
+
+ OVER_8888_8_8888 t0, a2, t1, t3, t4, t5, t6, t7, t8
+
+ sw t3, 0(a0)
+3:
+ RESTORE_REGS_FROM_STACK 0, s0
+ j ra
+ nop
+
+END(pixman_composite_over_8888_n_8888_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_composite_over_8888_8_8888_asm_mips)
+/*
+ * a0 - dst (a8r8g8b8)
+ * a1 - src (a8r8g8b8)
+ * a2 - mask (a8)
+ * a3 - w
+ */
+
+ SAVE_REGS_ON_STACK 0, s0, s1
+ li t4, 0x00ff00ff
+ beqz a3, 3f
+ nop
+ addiu t1, a3, -1
+ beqz t1, 2f
+ nop
+1:
+ lw t0, 0(a1) /* t0 = source (a8r8g8b8) */
+ lw t1, 4(a1) /* t1 = source (a8r8g8b8) */
+ lbu t2, 0(a2) /* t2 = mask (a8) */
+ lbu t3, 1(a2) /* t3 = mask (a8) */
+ lw t5, 0(a0) /* t5 = destination (a8r8g8b8) */
+ lw t6, 4(a0) /* t6 = destination (a8r8g8b8) */
+ addiu a1, a1, 8
+ addiu a2, a2, 2
+
+ OVER_2x8888_2x8_2x8888 t0, t1, t2, t3, t5, t6, \
+ t7, t8, t4, t9, s0, s1, t0, t1, t2
+
+ sw t7, 0(a0)
+ sw t8, 4(a0)
+ addiu a3, a3, -2
+ addiu t1, a3, -1
+ bgtz t1, 1b
+ addiu a0, a0, 8
+2:
+ beqz a3, 3f
+ nop
+ lw t0, 0(a1) /* t0 = source (a8r8g8b8) */
+ lbu t1, 0(a2) /* t1 = mask (a8) */
+ lw t2, 0(a0) /* t2 = destination (a8r8g8b8) */
+
+ OVER_8888_8_8888 t0, t1, t2, t3, t4, t5, t6, t7, t8
+
+ sw t3, 0(a0)
+3:
+ RESTORE_REGS_FROM_STACK 0, s0, s1
+ j ra
+ nop
+
+END(pixman_composite_over_8888_8_8888_asm_mips)
+
LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_mips)
/*
* a0 - *dst