diff options
author | rander <rander.wang@intel.com> | 2017-03-30 13:45:20 +0800 |
---|---|---|
committer | Yang Rong <rong.r.yang@intel.com> | 2017-04-17 16:08:48 +0800 |
commit | 7d52c16c64e3b7f91feb3043cd274f11eb10c585 (patch) | |
tree | b5d491c1abfb53867a645020ad124c45e7ae31e1 /backend/src | |
parent | 6ad6aa55083df963ad0648b4ee58a1d5813acc1b (diff) |
backend: add double version of acos
copy it from fdlibm and pass the conformance tests after refined
the copyright has been declared at the beginning of the file
Signed-off-by: rander <rander.wang@intel.com>
Tested-by: Yang Rong <rong.r.yang@intel.com>
Diffstat (limited to 'backend/src')
-rw-r--r-- | backend/src/libocl/include/ocl_float.h | 3 | ||||
-rw-r--r-- | backend/src/libocl/tmpl/ocl_math_common.tmpl.cl | 157 | ||||
-rw-r--r-- | backend/src/libocl/tmpl/ocl_math_common.tmpl.h | 2 |
3 files changed, 161 insertions, 1 deletions
diff --git a/backend/src/libocl/include/ocl_float.h b/backend/src/libocl/include/ocl_float.h index e78d38b8..77b78bbb 100644 --- a/backend/src/libocl/include/ocl_float.h +++ b/backend/src/libocl/include/ocl_float.h @@ -107,10 +107,11 @@ INLINE_OVERLOADABLE int __ocl_finitef (float x){ #define DF_EXP_OFFSET 52 #define DF_SIGN_OFFSET 63 #define DF_EXP_BIAS 1023 +#define DF_NEGTIVE_INF 0xFFF0000000000000 #define SF_POSITIVE_INF 0x7F800000 #define SF_NAN 0x7FFFFFFF -#define M_PI 3.1415926535897384626 +#define M_PI 3.141592653589793238462643383279 #endif /* __OCL_FLOAT_H__ */ diff --git a/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl b/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl index aee5769d..c9c76374 100644 --- a/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl +++ b/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl @@ -58,6 +58,63 @@ INLINE void __setLow(double *x, unsigned int val){ *x = as_double(low); }; +OVERLOADABLE double acos(double x) +{ + double one= 1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */ + pi = 3.14159265358979311600e+00, /* 0x400921FB, 0x54442D18 */ + pio2_hi = 1.57079632679489655800e+00, /* 0x3FF921FB, 0x54442D18 */ + pio2_lo = 6.12323399573676603587e-17, /* 0x3C91A626, 0x33145C07 */ + pS0 = 1.66666666666666657415e-01, /* 0x3FC55555, 0x55555555 */ + pS1 = -3.25565818622400915405e-01, /* 0xBFD4D612, 0x03EB6F7D */ + pS2 = 2.01212532134862925881e-01, /* 0x3FC9C155, 0x0E884455 */ + pS3 = -4.00555345006794114027e-02, /* 0xBFA48228, 0xB5688F3B */ + pS4 = 7.91534994289814532176e-04, /* 0x3F49EFE0, 0x7501B288 */ + pS5 = 3.47933107596021167570e-05, /* 0x3F023DE1, 0x0DFDF709 */ + qS1 = -2.40339491173441421878e+00, /* 0xC0033A27, 0x1C8A2D4B */ + qS2 = 2.02094576023350569471e+00, /* 0x40002AE5, 0x9C598AC8 */ + qS3 = -6.88283971605453293030e-01, /* 0xBFE6066C, 0x1B8D0159 */ + qS4 = 7.70381505559019352791e-02; /* 0x3FB3B8C5, 0xB12E9282 */ + + double z,p,q,r,w,s,c,df; + int hx,ix; + hx = __HI(x); + ix = hx&0x7fffffff; + if(ix>=0x3ff00000) { /* |x| >= 1 */ + if(((ix-0x3ff00000)|__LO(x))==0) { /* |x|==1 */ + if(hx>0) return 0.0; /* acos(1) = 0 */ + else return pi+2.0*pio2_lo; /* acos(-1)= pi */ + } + return (x-x)/(x-x); /* acos(|x|>1) is NaN */ + } + if(ix<0x3fe00000) { /* |x| < 0.5 */ + if(ix<=0x3c600000) return pio2_hi+pio2_lo;/*if|x|<2**-57*/ + z = x*x; + p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5))))); + q = one+z*(qS1+z*(qS2+z*(qS3+z*qS4))); + r = p/q; + return pio2_hi - (x - (pio2_lo-x*r)); + } else if (hx<0) { /* x < -0.5 */ + z = (one+x)*0.5; + p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5))))); + q = one+z*(qS1+z*(qS2+z*(qS3+z*qS4))); + s = sqrt(z); + r = p/q; + w = r*s-pio2_lo; + return pi - 2.0*(s+w); + } else { /* x > 0.5 */ + z = (one-x)*0.5; + s = sqrt(z); + df = s; + __setLow(&df, 0); + c = (z-df*df)/(s+df); + p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5))))); + q = one+z*(qS1+z*(qS2+z*(qS3+z*qS4))); + r = p/q; + w = r*s+c; + return 2.0*(df+w); + } +} + OVERLOADABLE double ceil(double x) { double ret; @@ -704,6 +761,106 @@ OVERLOADABLE double round(double x) return dp; } +OVERLOADABLE double sqrt(double x) +{ + double z; + int sign = (int)0x80000000; + unsigned r,t1,s1,ix1,q1; + int ix0,s0,q,m,t,i; + const double one = 1.0, tiny=1.0e-300; + + ix0 = __HI(x); /* high word of x */ + ix1 = __LO(x); /* low word of x */ + + /* take care of Inf and NaN */ + if((ix0&0x7ff00000)==0x7ff00000) { + return x*x+x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf + sqrt(-inf)=sNaN */ + } + /* take care of zero */ + if(ix0<=0) { + if(((ix0&(~sign))|ix1)==0) return x;/* sqrt(+-0) = +-0 */ + else if(ix0<0) + return (x-x)/(x-x); /* sqrt(-ve) = sNaN */ + } + /* normalize x */ + m = (ix0>>20); + if(m==0) { /* subnormal x */ + while(ix0==0) { + m -= 21; + ix0 |= (ix1>>11); ix1 <<= 21; + } + for(i=0;(ix0&0x00100000)==0;i++) ix0<<=1; + m -= i-1; + ix0 |= (ix1>>(32-i)); + ix1 <<= i; + } + m -= 1023; /* unbias exponent */ + ix0 = (ix0&0x000fffff)|0x00100000; + if(m&1){ /* odd m, double x to make it even */ + ix0 += ix0 + ((ix1&sign)>>31); + ix1 += ix1; + } + m >>= 1; /* m = [m/2] */ + + /* generate sqrt(x) bit by bit */ + ix0 += ix0 + ((ix1&sign)>>31); + ix1 += ix1; + q = q1 = s0 = s1 = 0; /* [q,q1] = sqrt(x) */ + r = 0x00200000; /* r = moving bit from right to left */ + + while(r!=0) { + t = s0+r; + if(t<=ix0) { + s0 = t+r; + ix0 -= t; + q += r; + } + ix0 += ix0 + ((ix1&sign)>>31); + ix1 += ix1; + r>>=1; + } + + r = sign; + while(r!=0) { + t1 = s1+r; + t = s0; + if((t<ix0)||((t==ix0)&&(t1<=ix1))) { + s1 = t1+r; + if(((t1&sign)==sign)&&(s1&sign)==0) s0 += 1; + ix0 -= t; + if (ix1 < t1) ix0 -= 1; + ix1 -= t1; + q1 += r; + } + ix0 += ix0 + ((ix1&sign)>>31); + ix1 += ix1; + r>>=1; + } + + /* use floating add to find out rounding direction */ + if((ix0|ix1)!=0) { + z = one-tiny; /* trigger inexact flag */ + if (z>=one) { + z = one+tiny; + if (q1==(unsigned)0xffffffff) { q1=0; q += 1;} + else if (z>one) { + if (q1==(unsigned)0xfffffffe) q+=1; + q1+=2; + } else + q1 += (q1&1); + } + } + ix0 = (q>>1)+0x3fe00000; + ix1 = q1>>1; + if ((q&1)==1) ix1 |= sign; + ix0 += (m <<20); + __setHigh(&z, ix0); + __setLow(&z, ix1); + return z; +} + + OVERLOADABLE double trunc(double x) { double ret = floor(fabs(x)); diff --git a/backend/src/libocl/tmpl/ocl_math_common.tmpl.h b/backend/src/libocl/tmpl/ocl_math_common.tmpl.h index 2f8eb29e..2ffaec1e 100644 --- a/backend/src/libocl/tmpl/ocl_math_common.tmpl.h +++ b/backend/src/libocl/tmpl/ocl_math_common.tmpl.h @@ -20,6 +20,7 @@ #include "ocl_types.h" +OVERLOADABLE double acos(double x); OVERLOADABLE double ceil(double x); OVERLOADABLE double copysign(double x, double y); OVERLOADABLE double fabs(double x); @@ -39,6 +40,7 @@ OVERLOADABLE double nan(ulong code); OVERLOADABLE double nextafter(double x, double y); OVERLOADABLE double rint(double x); OVERLOADABLE double round(double x); +OVERLOADABLE double sqrt(double x); OVERLOADABLE double trunc(double x); |