summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorrander <rander.wang@intel.com>2017-06-19 13:20:47 +0800
committerYang Rong <rong.r.yang@intel.com>2017-07-04 10:04:59 +0800
commit3ece4846013dc213686a372ddf630e34e435ea5c (patch)
tree88bfbe6cdf491ce96b38ac606241e1efcabc7e10
parent7e1e128818c96736a5953e9ff9a566e680bec98a (diff)
backend: refine math log function
remove a few unnecessary codes , and get 20% improvement at worse case. If X is a NAN, there are some if-return codes to return NAN. Now change it to add(x - x) which get the same NAN pass the conformance tests and utests Signed-off-by: rander.wang <rander.wang@intel.com> Reviewed-by: Yang Rong <rong.r.yang@intel.com>
-rw-r--r--backend/src/libocl/tmpl/ocl_math_common.tmpl.cl50
1 files changed, 10 insertions, 40 deletions
diff --git a/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl b/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl
index c0ab2514..6026629b 100644
--- a/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl
+++ b/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl
@@ -201,38 +201,19 @@ OVERLOADABLE float inline __gen_ocl_internal_log_valid(float x) {
k += (i>>23);
f = x - 1.0f;
fsq = f * f;
-
- if((0x007fffff & (15 + ix)) < 16) { /* |f| < 2**-20 */
- R = fsq * (0.5f - 0.33333333333333333f * f);
- return k * ln2_hi + k * ln2_lo + f - R;
- }
-
- s = f / (2.0f + f);
+ s = mad(-2.0f, 1.0f / (2.0f + f), 1.0f);
z = s * s;
- i = ix - (0x6147a << 3);
w = z * z;
- j = (0x6b851 << 3) - ix;
- t1= w * mad(w, Lg4, Lg2);
- t2= z * mad(w, Lg3, Lg1);
- i |= j;
- R = t2 + t1;
- partial = (i > 0) ? -mad(s, 0.5f * fsq, -0.5f * fsq) : (s * f);
-
- return mad(s, R, f) - partial + k * ln2_hi + k * ln2_lo;;
+ t1 = w * mad(w, Lg4, Lg2);
+ R = mad(z, mad(w, Lg3, Lg1), t1);
+ w = 0.5f * fsq;
+ partial = -mad(s, w, -w);
+ return mad(k, ln2_lo, mad(k, ln2_hi, mad(s, R, f) - partial));
}
OVERLOADABLE float __gen_ocl_internal_log(float x)
{
- union { unsigned int i; float f; } u;
- u.f = x;
- int ix = u.i;
-
- if (ix < 0 )
- return NAN; /* log(-#) = NaN */
- if (ix >= 0x7f800000)
- return NAN;
-
- return __gen_ocl_internal_log_valid(x);
+ return __gen_ocl_internal_log_valid(x) + (x - x);
}
OVERLOADABLE float __gen_ocl_internal_log10(float x)
@@ -244,12 +225,10 @@ OVERLOADABLE float __gen_ocl_internal_log10(float x)
log10_2lo = 7.9034151668e-07; /* 0x355427db */
float y, z;
- int i, k, hx;
+ int i, k;
+ unsigned int hx;
u.f = x; hx = u.i;
-
- if (hx<0)
- return NAN; /* log(-#) = NaN */
if (hx >= 0x7f800000)
return NAN;
@@ -267,17 +246,8 @@ OVERLOADABLE float __gen_ocl_internal_log2(float x)
{
const float zero = 0.0,
invln2 = 0x1.715476p+0f;
- int ix;
-
- union { float f; int i; } u;
- u.f = x; ix = u.i;
-
- if (ix < 0)
- return NAN; /** log(-#) = NaN */
- if (ix >= 0x7f800000)
- return NAN;
- return invln2 * __gen_ocl_internal_log_valid(x);
+ return invln2 * __gen_ocl_internal_log_valid(x) + (x - x);
}