summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorrander <rander.wang@intel.com>2017-03-30 14:11:34 +0800
committerYang Rong <rong.r.yang@intel.com>2017-04-17 16:08:48 +0800
commit26f541268343c76cb75fa0520a58fb741e8248d8 (patch)
tree093d5cddbafc25e13443fb9488b2d0b9325ef83c
parentfcf6317b0c78d61e2713b08279db52161360dcc0 (diff)
backend: add double version of asin
copy from fdlibm and pass the cft after refined Signed-off-by: rander <rander.wang@intel.com> Tested-by: Yang Rong <rong.r.yang@intel.com>
-rw-r--r--backend/src/libocl/tmpl/ocl_math_common.tmpl.cl60
-rw-r--r--backend/src/libocl/tmpl/ocl_math_common.tmpl.h1
2 files changed, 61 insertions, 0 deletions
diff --git a/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl b/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl
index 611c2e34..8cccf278 100644
--- a/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl
+++ b/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl
@@ -146,6 +146,66 @@ OVERLOADABLE double acosh(double x)
}
}
+OVERLOADABLE double asin(double x)
+{
+ double one = 1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */
+ huge = 1.000e+300,
+ pio2_hi = 1.57079632679489655800e+00, /* 0x3FF921FB, 0x54442D18 */
+ pio2_lo = 6.12323399573676603587e-17, /* 0x3C91A626, 0x33145C07 */
+ pio4_hi = 7.85398163397448278999e-01, /* 0x3FE921FB, 0x54442D18 */
+ /* coefficient for R(x^2) */
+ pS0 = 1.66666666666666657415e-01, /* 0x3FC55555, 0x55555555 */
+ pS1 = -3.25565818622400915405e-01, /* 0xBFD4D612, 0x03EB6F7D */
+ pS2 = 2.01212532134862925881e-01, /* 0x3FC9C155, 0x0E884455 */
+ pS3 = -4.00555345006794114027e-02, /* 0xBFA48228, 0xB5688F3B */
+ pS4 = 7.91534994289814532176e-04, /* 0x3F49EFE0, 0x7501B288 */
+ pS5 = 3.47933107596021167570e-05, /* 0x3F023DE1, 0x0DFDF709 */
+ qS1 = -2.40339491173441421878e+00, /* 0xC0033A27, 0x1C8A2D4B */
+ qS2 = 2.02094576023350569471e+00, /* 0x40002AE5, 0x9C598AC8 */
+ qS3 = -6.88283971605453293030e-01, /* 0xBFE6066C, 0x1B8D0159 */
+ qS4 = 7.70381505559019352791e-02; /* 0x3FB3B8C5, 0xB12E9282 */
+
+ double t,w,p,q,c,r,s;
+ int hx,ix;
+ hx = __HI(x);
+ ix = hx&0x7fffffff;
+ if(ix>= 0x3ff00000) { /* |x|>= 1 */
+ if(((ix-0x3ff00000)|__LO(x))==0)
+ /* asin(1)=+-pi/2 with inexact */
+ return x*pio2_hi+x*pio2_lo;
+ return (x-x)/(x-x); /* asin(|x|>1) is NaN */
+ } else if (ix<0x3fe00000) { /* |x|<0.5 */
+ if(ix<0x3e400000) { /* if |x| < 2**-27 */
+ if(huge+x>one) return x;/* return x with inexact if x!=0*/
+ } else
+ t = x*x;
+ p = t*(pS0+t*(pS1+t*(pS2+t*(pS3+t*(pS4+t*pS5)))));
+ q = one+t*(qS1+t*(qS2+t*(qS3+t*qS4)));
+ w = p/q;
+ return x+x*w;
+ }
+ /* 1> |x|>= 0.5 */
+ w = one-fabs(x);
+ t = w*0.5;
+ p = t*(pS0+t*(pS1+t*(pS2+t*(pS3+t*(pS4+t*pS5)))));
+ q = one+t*(qS1+t*(qS2+t*(qS3+t*qS4)));
+ s = sqrt(t);
+ if(ix>=0x3FEF3333) { /* if |x| > 0.975 */
+ w = p/q;
+ t = pio2_hi-(2.0*(s+s*w)-pio2_lo);
+ } else {
+ w = s;
+ __setLow(&w, 0);
+ c = (t-w*w)/(s+w);
+ r = p/q;
+ p = 2.0*s*r-(pio2_lo-2.0*c);
+ q = pio4_hi-2.0*w;
+ t = pio4_hi-(p-q);
+ }
+ if(hx>0) return t; else return -t;
+}
+
+
OVERLOADABLE double ceil(double x)
{
double ret;
diff --git a/backend/src/libocl/tmpl/ocl_math_common.tmpl.h b/backend/src/libocl/tmpl/ocl_math_common.tmpl.h
index 5bd5b36c..c2969255 100644
--- a/backend/src/libocl/tmpl/ocl_math_common.tmpl.h
+++ b/backend/src/libocl/tmpl/ocl_math_common.tmpl.h
@@ -23,6 +23,7 @@
OVERLOADABLE double acos(double x);
OVERLOADABLE double acospi(double x);
OVERLOADABLE double acosh(double x);
+OVERLOADABLE double asin(double x);
OVERLOADABLE double ceil(double x);
OVERLOADABLE double copysign(double x, double y);
OVERLOADABLE double fabs(double x);