summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBen Avison <bavison@riscosopen.org>2013-02-06 00:39:12 +0000
committerSøren Sandmann Pedersen <ssp@redhat.com>2013-02-13 02:24:34 -0500
commit5e207f825bd1ed3142a623bcbceca00508907c5e (patch)
tree024e175e6ec9d64429ca0d85d28480d535897971
parentd26f922dc1a605dae00fa0540198707485ba1f08 (diff)
Fix to lowlevel-blt-bench
The source, mask and destination buffers are initialised to 0xCC just after they are allocated. Between each benchmark, there are a pair of memcpys, from the destination buffer to the source buffer and back again (there are no explanatory comments, but presumably this is an effort to flush the caches). However, it has an unintended consequence, which is to change the contents of the buffers on entry to subsequent benchmarks. This means it is not a fair test: for example, with over_n_8888 (featured in the following patches) it reports L2 and even M tests as being faster than the L1 test, because after the L1 test, the source buffer is filled with fully opaque pixels, for which over_n_8888 has a shortcut. The fix here is simply to reverse the order of the memcpys, so src and destination are both filled with 0xCC on entry to all tests.
-rw-r--r--test/lowlevel-blt-bench.c14
1 files changed, 7 insertions, 7 deletions
diff --git a/test/lowlevel-blt-bench.c b/test/lowlevel-blt-bench.c
index 8e80b42..4e16f7b 100644
--- a/test/lowlevel-blt-bench.c
+++ b/test/lowlevel-blt-bench.c
@@ -460,8 +460,8 @@ bench_composite (char * testname,
printf ("%24s %c", testname, func != pixman_image_composite_wrapper ?
'-' : '=');
- memcpy (src, dst, BUFSIZE);
memcpy (dst, src, BUFSIZE);
+ memcpy (src, dst, BUFSIZE);
l1test_width = L1CACHE_SIZE / 8 - 64;
if (l1test_width < 1)
@@ -480,8 +480,8 @@ bench_composite (char * testname,
((t3 - t2) - (t2 - t1)) / 1000000.);
fflush (stdout);
- memcpy (src, dst, BUFSIZE);
memcpy (dst, src, BUFSIZE);
+ memcpy (src, dst, BUFSIZE);
nlines = (L2CACHE_SIZE / l1test_width) /
((PIXMAN_FORMAT_BPP(src_fmt) + PIXMAN_FORMAT_BPP(dst_fmt)) / 8);
@@ -499,8 +499,8 @@ bench_composite (char * testname,
((t3 - t2) - (t2 - t1)) / 1000000.);
fflush (stdout);
- memcpy (src, dst, BUFSIZE);
memcpy (dst, src, BUFSIZE);
+ memcpy (src, dst, BUFSIZE);
n = 1 + npix / (WIDTH * HEIGHT);
t1 = gettime ();
@@ -515,8 +515,8 @@ bench_composite (char * testname,
((double)n * (WIDTH - 64) * HEIGHT / ((t3 - t2) - (t2 - t1)) * bytes_per_pix) * (100.0 / bandwidth) );
fflush (stdout);
- memcpy (src, dst, BUFSIZE);
memcpy (dst, src, BUFSIZE);
+ memcpy (src, dst, BUFSIZE);
n = 1 + npix / (8 * TILEWIDTH * TILEWIDTH);
t1 = gettime ();
@@ -529,8 +529,8 @@ bench_composite (char * testname,
printf (" HT:%6.2f", (double)pix_cnt / ((t3 - t2) - (t2 - t1)) / 1000000.);
fflush (stdout);
- memcpy (src, dst, BUFSIZE);
memcpy (dst, src, BUFSIZE);
+ memcpy (src, dst, BUFSIZE);
n = 1 + npix / (8 * TILEWIDTH * TILEWIDTH);
t1 = gettime ();
@@ -543,8 +543,8 @@ bench_composite (char * testname,
printf (" VT:%6.2f", (double)pix_cnt / ((t3 - t2) - (t2 - t1)) / 1000000.);
fflush (stdout);
- memcpy (src, dst, BUFSIZE);
memcpy (dst, src, BUFSIZE);
+ memcpy (src, dst, BUFSIZE);
n = 1 + npix / (8 * TILEWIDTH * TILEWIDTH);
t1 = gettime ();
@@ -557,8 +557,8 @@ bench_composite (char * testname,
printf (" R:%6.2f", (double)pix_cnt / ((t3 - t2) - (t2 - t1)) / 1000000.);
fflush (stdout);
- memcpy (src, dst, BUFSIZE);
memcpy (dst, src, BUFSIZE);
+ memcpy (src, dst, BUFSIZE);
n = 1 + npix / (16 * TINYWIDTH * TINYWIDTH);
t1 = gettime ();