From c3deb8334a71998b986a7b8d5b74bedf26cc23aa Mon Sep 17 00:00:00 2001 From: Siarhei Siamashka Date: Fri, 14 Dec 2012 18:43:57 +0200 Subject: Add higher precision "pixman_transform_point_*" functions The following new functions are added: pixman_transform_point_31_16_3d() - Calculates the product of a matrix and a vector multiplication. pixman_transform_point_31_16() - Calculates the product of a matrix and a vector multiplication. Then converts the homogenous resulting vector [x, y, z] to cartesian [x', y', 1] variant, where x' = x / z, and y' = y / z. pixman_transform_point_31_16_affine() - A faster sibling of the other two functions, which assumes affine transformation, where the bottom row of the matrix is [0, 0, 1] and the last element of the input vector is set to 1. These functions transform a point with 31.16 fixed point coordinates from the destination space to a point with 48.16 fixed point coordinates in the source space. The results are accurate and the rounding errors may only show up in the least significant bit. No overflows are possible for the affine transformations as long as the input data is provided in 31.16 format. In the case of projective transformations, some output values may be not representable using 48.16 fixed point format. In this case the results are clamped to return maximum or minimum 48.16 values (so that the caller can at least handle NONE and PAD repeats correctly). --- pixman/pixman-matrix.c | 332 ++++++++++++++++++++++++++++++++++++++++++++++++ pixman/pixman-private.h | 21 +++ 2 files changed, 353 insertions(+) diff --git a/pixman/pixman-matrix.c b/pixman/pixman-matrix.c index cd2f1b5..cbc6fbe 100644 --- a/pixman/pixman-matrix.c +++ b/pixman/pixman-matrix.c @@ -34,6 +34,338 @@ #define F(x) pixman_int_to_fixed (x) +static force_inline int +count_leading_zeros (uint32_t x) +{ +#ifdef __GNUC__ + return __builtin_clz (x); +#else + int n = 0; + while (x) + { + n++; + x >>= 1; + } + return 32 - n; +#endif +} + +/* + * Large signed/unsigned integer division with rounding for the platforms with + * only 64-bit integer data type supported (no 128-bit data type). + * + * Arguments: + * hi, lo - high and low 64-bit parts of the dividend + * div - 48-bit divisor + * + * Returns: lowest 64 bits of the result as a return value and highest 64 + * bits of the result to "result_hi" pointer + */ + +/* grade-school unsigned division (128-bit by 48-bit) with rounding to nearest */ +static force_inline uint64_t +rounded_udiv_128_by_48 (uint64_t hi, + uint64_t lo, + uint64_t div, + uint64_t *result_hi) +{ + uint64_t tmp, remainder, result_lo; + assert(div < ((uint64_t)1 << 48)); + + remainder = hi % div; + *result_hi = hi / div; + + tmp = (remainder << 16) + (lo >> 48); + result_lo = tmp / div; + remainder = tmp % div; + + tmp = (remainder << 16) + ((lo >> 32) & 0xFFFF); + result_lo = (result_lo << 16) + (tmp / div); + remainder = tmp % div; + + tmp = (remainder << 16) + ((lo >> 16) & 0xFFFF); + result_lo = (result_lo << 16) + (tmp / div); + remainder = tmp % div; + + tmp = (remainder << 16) + (lo & 0xFFFF); + result_lo = (result_lo << 16) + (tmp / div); + remainder = tmp % div; + + /* round to nearest */ + if (remainder * 2 >= div && ++result_lo == 0) + *result_hi += 1; + + return result_lo; +} + +/* signed division (128-bit by 49-bit) with rounding to nearest */ +static inline int64_t +rounded_sdiv_128_by_49 (int64_t hi, + uint64_t lo, + int64_t div, + int64_t *signed_result_hi) +{ + uint64_t result_lo, result_hi; + int sign = 0; + if (div < 0) + { + div = -div; + sign ^= 1; + } + if (hi < 0) + { + if (lo != 0) + hi++; + hi = -hi; + lo = -lo; + sign ^= 1; + } + result_lo = rounded_udiv_128_by_48 (hi, lo, div, &result_hi); + if (sign) + { + if (result_lo != 0) + result_hi++; + result_hi = -result_hi; + result_lo = -result_lo; + } + if (signed_result_hi) + { + *signed_result_hi = result_hi; + } + return result_lo; +} + +/* + * Multiply 64.16 fixed point value by (2^scalebits) and convert + * to 128-bit integer. + */ +static force_inline void +fixed_64_16_to_int128 (int64_t hi, + int64_t lo, + int64_t *rhi, + int64_t *rlo, + int scalebits) +{ + /* separate integer and fractional parts */ + hi += lo >> 16; + lo &= 0xFFFF; + + if (scalebits <= 0) + { + *rlo = hi >> (-scalebits); + *rhi = *rlo >> 63; + } + else + { + *rhi = hi >> (64 - scalebits); + *rlo = (uint64_t)hi << scalebits; + if (scalebits < 16) + *rlo += lo >> (16 - scalebits); + else + *rlo += lo << (scalebits - 16); + } +} + +/* + * Convert 112.16 fixed point value to 48.16 with clamping for the out + * of range values. + */ +static force_inline pixman_fixed_48_16_t +fixed_112_16_to_fixed_48_16 (int64_t hi, int64_t lo, pixman_bool_t *clampflag) +{ + if ((lo >> 63) != hi) + { + *clampflag = TRUE; + return hi >= 0 ? INT64_MAX : INT64_MIN; + } + else + { + return lo; + } +} + +/* + * Transform a point with 31.16 fixed point coordinates from the destination + * space to a point with 48.16 fixed point coordinates in the source space. + * No overflows are possible for affine transformations and the results are + * accurate including the least significant bit. Projective transformations + * may overflow, in this case the results are just clamped to return maximum + * or minimum 48.16 values (so that the caller can at least handle the NONE + * and PAD repeats correctly) and the return value is FALSE to indicate that + * such clamping has happened. + */ +PIXMAN_EXPORT pixman_bool_t +pixman_transform_point_31_16 (const pixman_transform_t *t, + const pixman_vector_48_16_t *v, + pixman_vector_48_16_t *result) +{ + pixman_bool_t clampflag = FALSE; + int i; + int64_t tmp[3][2], divint; + uint16_t divfrac; + + /* input vector values must have no more than 31 bits (including sign) + * in the integer part */ + assert (v->v[0] < ((pixman_fixed_48_16_t)1 << (30 + 16))); + assert (v->v[0] >= -((pixman_fixed_48_16_t)1 << (30 + 16))); + assert (v->v[1] < ((pixman_fixed_48_16_t)1 << (30 + 16))); + assert (v->v[1] >= -((pixman_fixed_48_16_t)1 << (30 + 16))); + assert (v->v[2] < ((pixman_fixed_48_16_t)1 << (30 + 16))); + assert (v->v[2] >= -((pixman_fixed_48_16_t)1 << (30 + 16))); + + for (i = 0; i < 3; i++) + { + tmp[i][0] = (int64_t)t->matrix[i][0] * (v->v[0] >> 16); + tmp[i][1] = (int64_t)t->matrix[i][0] * (v->v[0] & 0xFFFF); + tmp[i][0] += (int64_t)t->matrix[i][1] * (v->v[1] >> 16); + tmp[i][1] += (int64_t)t->matrix[i][1] * (v->v[1] & 0xFFFF); + tmp[i][0] += (int64_t)t->matrix[i][2] * (v->v[2] >> 16); + tmp[i][1] += (int64_t)t->matrix[i][2] * (v->v[2] & 0xFFFF); + } + + /* + * separate 64-bit integer and 16-bit fractional parts for the divisor, + * which is also scaled by 65536 after fixed point multiplication. + */ + divint = tmp[2][0] + (tmp[2][1] >> 16); + divfrac = tmp[2][1] & 0xFFFF; + + if (divint == pixman_fixed_1 && divfrac == 0) + { + /* + * this is a simple affine transformation + */ + result->v[0] = tmp[0][0] + ((tmp[0][1] + 0x8000) >> 16); + result->v[1] = tmp[1][0] + ((tmp[1][1] + 0x8000) >> 16); + result->v[2] = pixman_fixed_1; + } + else if (divint == 0 && divfrac == 0) + { + /* + * handle zero divisor (if the values are non-zero, set the + * results to maximum positive or minimum negative) + */ + clampflag = TRUE; + + result->v[0] = tmp[0][0] + ((tmp[0][1] + 0x8000) >> 16); + result->v[1] = tmp[1][0] + ((tmp[1][1] + 0x8000) >> 16); + + if (result->v[0] > 0) + result->v[0] = INT64_MAX; + else if (result->v[0] < 0) + result->v[0] = INT64_MIN; + + if (result->v[1] > 0) + result->v[1] = INT64_MAX; + else if (result->v[1] < 0) + result->v[1] = INT64_MIN; + } + else + { + /* + * projective transformation, analyze the top 32 bits of the divisor + */ + int32_t hi32divbits = divint >> 32; + if (hi32divbits < 0) + hi32divbits = ~hi32divbits; + + if (hi32divbits == 0) + { + /* the divisor is small, we can actually keep all the bits */ + int64_t hi, rhi, lo, rlo; + int64_t div = (divint << 16) + divfrac; + + fixed_64_16_to_int128 (tmp[0][0], tmp[0][1], &hi, &lo, 32); + rlo = rounded_sdiv_128_by_49 (hi, lo, div, &rhi); + result->v[0] = fixed_112_16_to_fixed_48_16 (rhi, rlo, &clampflag); + + fixed_64_16_to_int128 (tmp[1][0], tmp[1][1], &hi, &lo, 32); + rlo = rounded_sdiv_128_by_49 (hi, lo, div, &rhi); + result->v[1] = fixed_112_16_to_fixed_48_16 (rhi, rlo, &clampflag); + } + else + { + /* the divisor needs to be reduced to 48 bits */ + int64_t hi, rhi, lo, rlo, div; + int shift = 32 - count_leading_zeros (hi32divbits); + fixed_64_16_to_int128 (divint, divfrac, &hi, &div, 16 - shift); + + fixed_64_16_to_int128 (tmp[0][0], tmp[0][1], &hi, &lo, 32 - shift); + rlo = rounded_sdiv_128_by_49 (hi, lo, div, &rhi); + result->v[0] = fixed_112_16_to_fixed_48_16 (rhi, rlo, &clampflag); + + fixed_64_16_to_int128 (tmp[1][0], tmp[1][1], &hi, &lo, 32 - shift); + rlo = rounded_sdiv_128_by_49 (hi, lo, div, &rhi); + result->v[1] = fixed_112_16_to_fixed_48_16 (rhi, rlo, &clampflag); + } + } + result->v[2] = pixman_fixed_1; + return !clampflag; +} + +PIXMAN_EXPORT void +pixman_transform_point_31_16_affine (const pixman_transform_t *t, + const pixman_vector_48_16_t *v, + pixman_vector_48_16_t *result) +{ + int64_t hi0, lo0, hi1, lo1; + + /* input vector values must have no more than 31 bits (including sign) + * in the integer part */ + assert (v->v[0] < ((pixman_fixed_48_16_t)1 << (30 + 16))); + assert (v->v[0] >= -((pixman_fixed_48_16_t)1 << (30 + 16))); + assert (v->v[1] < ((pixman_fixed_48_16_t)1 << (30 + 16))); + assert (v->v[1] >= -((pixman_fixed_48_16_t)1 << (30 + 16))); + + hi0 = (int64_t)t->matrix[0][0] * (v->v[0] >> 16); + lo0 = (int64_t)t->matrix[0][0] * (v->v[0] & 0xFFFF); + hi0 += (int64_t)t->matrix[0][1] * (v->v[1] >> 16); + lo0 += (int64_t)t->matrix[0][1] * (v->v[1] & 0xFFFF); + hi0 += (int64_t)t->matrix[0][2]; + + hi1 = (int64_t)t->matrix[1][0] * (v->v[0] >> 16); + lo1 = (int64_t)t->matrix[1][0] * (v->v[0] & 0xFFFF); + hi1 += (int64_t)t->matrix[1][1] * (v->v[1] >> 16); + lo1 += (int64_t)t->matrix[1][1] * (v->v[1] & 0xFFFF); + hi1 += (int64_t)t->matrix[1][2]; + + result->v[0] = hi0 + ((lo0 + 0x8000) >> 16); + result->v[1] = hi1 + ((lo1 + 0x8000) >> 16); + result->v[2] = pixman_fixed_1; +} + +PIXMAN_EXPORT void +pixman_transform_point_31_16_3d (const pixman_transform_t *t, + const pixman_vector_48_16_t *v, + pixman_vector_48_16_t *result) +{ + int i; + int64_t tmp[3][2]; + + /* input vector values must have no more than 31 bits (including sign) + * in the integer part */ + assert (v->v[0] < ((pixman_fixed_48_16_t)1 << (30 + 16))); + assert (v->v[0] >= -((pixman_fixed_48_16_t)1 << (30 + 16))); + assert (v->v[1] < ((pixman_fixed_48_16_t)1 << (30 + 16))); + assert (v->v[1] >= -((pixman_fixed_48_16_t)1 << (30 + 16))); + assert (v->v[2] < ((pixman_fixed_48_16_t)1 << (30 + 16))); + assert (v->v[2] >= -((pixman_fixed_48_16_t)1 << (30 + 16))); + + for (i = 0; i < 3; i++) + { + tmp[i][0] = (int64_t)t->matrix[i][0] * (v->v[0] >> 16); + tmp[i][1] = (int64_t)t->matrix[i][0] * (v->v[0] & 0xFFFF); + tmp[i][0] += (int64_t)t->matrix[i][1] * (v->v[1] >> 16); + tmp[i][1] += (int64_t)t->matrix[i][1] * (v->v[1] & 0xFFFF); + tmp[i][0] += (int64_t)t->matrix[i][2] * (v->v[2] >> 16); + tmp[i][1] += (int64_t)t->matrix[i][2] * (v->v[2] & 0xFFFF); + } + + result->v[0] = tmp[0][0] + ((tmp[0][1] + 0x8000) >> 16); + result->v[1] = tmp[1][0] + ((tmp[1][1] + 0x8000) >> 16); + result->v[2] = tmp[2][0] + ((tmp[2][1] + 0x8000) >> 16); +} + PIXMAN_EXPORT void pixman_transform_init_identity (struct pixman_transform *matrix) { diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h index 3981873..cb78a2e 100644 --- a/pixman/pixman-private.h +++ b/pixman/pixman-private.h @@ -1077,6 +1077,27 @@ _pixman_log_error (const char *function, const char *message); while (0) #endif +/* + * Matrix + */ + +typedef struct { pixman_fixed_48_16_t v[3]; } pixman_vector_48_16_t; + +pixman_bool_t +pixman_transform_point_31_16 (const pixman_transform_t *t, + const pixman_vector_48_16_t *v, + pixman_vector_48_16_t *result); + +void +pixman_transform_point_31_16_3d (const pixman_transform_t *t, + const pixman_vector_48_16_t *v, + pixman_vector_48_16_t *result); + +void +pixman_transform_point_31_16_affine (const pixman_transform_t *t, + const pixman_vector_48_16_t *v, + pixman_vector_48_16_t *result); + /* * Timers */ -- cgit v1.2.1