summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBen Avison <bavison@riscosopen.org>2014-03-20 10:30:26 +0200
committerSøren Sandmann <ssp@redhat.com>2014-03-20 08:33:05 -0400
commit38317cbfde734a8d8ef65906229f41d8f248702c (patch)
tree5eda98257cdf7634f6b2fe4b8d4704eb5c54dc1e
parent763a6d3e6740d1624b557f8e49fd21fd26928e2c (diff)
ARMv6: remove 1 instr per row in generate_composite_function
This knocks off one instruction per row. The effect is probably too small to be measurable, but might as well be included. The second occurrence of this sequence doesn't actually benefit at all, but is changed for consistency. The saved instruction comes from combining the "and" inside the .if statement with an earlier "tst". The "and" was normally needed, except for in one special case, where bits 4-31 were all shifted off the top of the register later on in preload_leading_step2, so we didn't care about their values. v4, Pekka Paalanen <pekka.paalanen@collabora.co.uk> : Remove "bits 0-3" from the comments, update patch summary, and augment message with Ben's suggestion.
-rw-r--r--pixman/pixman-arm-simd-asm.h11
1 files changed, 4 insertions, 7 deletions
diff --git a/pixman/pixman-arm-simd-asm.h b/pixman/pixman-arm-simd-asm.h
index 74400c1..24b1ad2 100644
--- a/pixman/pixman-arm-simd-asm.h
+++ b/pixman/pixman-arm-simd-asm.h
@@ -741,12 +741,9 @@ fname:
preload_leading_step1 mask_bpp, WK2, MASK
preload_leading_step1 dst_r_bpp, WK3, DST
- tst DST, #15
+ ands WK0, DST, #15
beq 154f
- rsb WK0, DST, #0 /* bits 0-3 = number of leading bytes until destination aligned */
- .if (src_bpp != 0 && src_bpp != 2*dst_w_bpp) || (mask_bpp != 0 && mask_bpp != 2*dst_w_bpp)
- PF and, WK0, WK0, #15
- .endif
+ rsb WK0, WK0, #16 /* number of leading bytes until destination aligned */
preload_leading_step2 src_bpp, src_bpp_shift, WK1, SRC
preload_leading_step2 mask_bpp, mask_bpp_shift, WK2, MASK
@@ -787,9 +784,9 @@ fname:
preload_line 0, dst_r_bpp, dst_bpp_shift, DST
sub X, X, #128/dst_w_bpp /* simplifies inner loop termination */
- tst DST, #15
+ ands WK0, DST, #15
beq 164f
- rsb WK0, DST, #0 /* bits 0-3 = number of leading bytes until destination aligned */
+ rsb WK0, WK0, #16 /* number of leading bytes until destination aligned */
leading_15bytes process_head, process_tail