diff options
author | Jeff Muizelaar <jmuizelaar@mozilla.com> | 2013-01-24 14:49:41 -0500 |
---|---|---|
committer | Jeff Muizelaar <jmuizelaar@mozilla.com> | 2013-01-25 13:14:37 -0500 |
commit | b7f523e3bcbef1f08bf9b374f2704723d5298c1f (patch) | |
tree | 295168800a044d8eab90e15e83c4c018bb49d4cf | |
parent | 24e83cae64eaa238a7bf67488917b0f8cac89114 (diff) |
Add a version of bilinear_interpolation for precision <=4
Having 4 or fewer bits means we can do two components at
a time in a single 32 bit register.
Here are the results for firefox-fishtank on a Pandaboard with
4.6.3 and PIXMAN_DISABLE="arm-neon"
Before:
[ # ] backend test min(s) median(s) stddev. count
[ 0] image t-firefox-fishtank 7.841 7.910 0.70% 6/6
After:
[ # ] backend test min(s) median(s) stddev. count
[ 0] image t-firefox-fishtank 6.951 6.995 1.11% 6/6
-rw-r--r-- | pixman/pixman-inlines.h | 37 |
1 files changed, 37 insertions, 0 deletions
diff --git a/pixman/pixman-inlines.h b/pixman/pixman-inlines.h index ab4def0..dd1c2f1 100644 --- a/pixman/pixman-inlines.h +++ b/pixman/pixman-inlines.h @@ -88,6 +88,42 @@ pixman_fixed_to_bilinear_weight (pixman_fixed_t x) ((1 << BILINEAR_INTERPOLATION_BITS) - 1); } +#if BILINEAR_INTERPOLATION_BITS <= 4 +/* Inspired by Filter_32_opaque from Skia */ +static force_inline uint32_t +bilinear_interpolation (uint32_t tl, uint32_t tr, + uint32_t bl, uint32_t br, + int distx, int disty) +{ + int distxy, distxiy, distixy, distixiy; + uint32_t lo, hi; + + distx <<= (4 - BILINEAR_INTERPOLATION_BITS); + disty <<= (4 - BILINEAR_INTERPOLATION_BITS); + + distxy = distx * disty; + distxiy = (distx << 4) - distxy; /* distx * (16 - disty) */ + distixy = (disty << 4) - distxy; /* disty * (16 - distx) */ + distixiy = + 16 * 16 - (disty << 4) - + (distx << 4) + distxy; /* (16 - distx) * (16 - disty) */ + + lo = (tl & 0xff00ff) * distixiy; + hi = ((tl >> 8) & 0xff00ff) * distixiy; + + lo += (tr & 0xff00ff) * distxiy; + hi += ((tr >> 8) & 0xff00ff) * distxiy; + + lo += (bl & 0xff00ff) * distixy; + hi += ((bl >> 8) & 0xff00ff) * distixy; + + lo += (br & 0xff00ff) * distxy; + hi += ((br >> 8) & 0xff00ff) * distxy; + + return ((lo >> 8) & 0xff00ff) | (hi & ~0xff00ff); +} + +#else #if SIZEOF_LONG > 4 static force_inline uint32_t @@ -184,6 +220,7 @@ bilinear_interpolation (uint32_t tl, uint32_t tr, } #endif +#endif // BILINEAR_INTERPOLATION_BITS <= 4 /* * For each scanline fetched from source image with PAD repeat: |