summaryrefslogtreecommitdiff
path: root/sc/source/core/opencl/opinlinefun_statistical.cxx
diff options
context:
space:
mode:
authormingli <mingli@multicorewareinc.com>2013-12-06 09:36:28 +0800
committerI-Jui (Ray) Sung <ray@multicorewareinc.com>2013-12-19 17:53:33 -0600
commit15a6315d7b21d00a9d7ec716c9d0b23a4ed8ffed (patch)
tree3f091dfd77aa7aee56cdafee23a64fe1c8234491 /sc/source/core/opencl/opinlinefun_statistical.cxx
parent269a72b31c2d1fd76cbaf6ddf13981be4b98b099 (diff)
GPU Calc: Optimized FINV
AMLOEXT-293 Change-Id: I9961bcc33e9ba1dc35e3f74e660946df61f16cd6 Signed-off-by: haochen <haochen@multicorewareinc.com> Signed-off-by: Wei Wei <weiwei@multicorewareinc.com> Signed-off-by: I-Jui (Ray) Sung <ray@multicorewareinc.com>
Diffstat (limited to 'sc/source/core/opencl/opinlinefun_statistical.cxx')
-rw-r--r--sc/source/core/opencl/opinlinefun_statistical.cxx202
1 files changed, 114 insertions, 88 deletions
diff --git a/sc/source/core/opencl/opinlinefun_statistical.cxx b/sc/source/core/opencl/opinlinefun_statistical.cxx
index dce8f6692e07..b75be381469c 100644
--- a/sc/source/core/opencl/opinlinefun_statistical.cxx
+++ b/sc/source/core/opencl/opinlinefun_statistical.cxx
@@ -252,9 +252,9 @@ std::string GetFInvValueDecl = "double GetFInvValue(double fF1,double fF2"
std::string GetFInvValue =
"double GetFInvValue(double fF1,double fF2,double fX1 )\n"
"{\n"
-" double arg = fF2/(fF2+fF1*fX1);\n"
-" double alpha = fF2/2.0;\n"
-" double beta = fF1/2.0;\n"
+" double arg = fF2*pow((fF2+fF1*fX1),-1.0);\n"
+" double alpha = fF2*pow(2.0,-1.0);\n"
+" double beta = fF1*pow(2.0,-1.0);\n"
" double fXin,fAlpha,fBeta;\n"
" fXin=arg;fAlpha=alpha;fBeta=beta;\n"
" if (fXin <= 0.0)\n"
@@ -272,7 +272,7 @@ std::string GetFInvValue =
" double flnX = log(fXin);\n"
" double fA = fAlpha;\n"
" double fB = fBeta;\n"
-" bool bReflect = fXin > fAlpha/(fAlpha+fBeta);\n"
+" bool bReflect = fXin > fAlpha*pow((fAlpha+fBeta),-1.0);\n"
" if (bReflect)\n"
" {\n"
" fA = fBeta;\n"
@@ -283,9 +283,9 @@ std::string GetFInvValue =
" flnY = log(fXin);\n"
" }\n"
" fResult = lcl_GetBetaHelperContFrac(fX,fA,fB);\n"
-" fResult = fResult/fA;\n"
-" double fP = fA/(fA+fB);\n"
-" double fQ = fB/(fA+fB);\n"
+" fResult = fResult*pow(fA,-1.0);\n"
+" double fP = fA*pow((fA+fB),-1.0);\n"
+" double fQ = fB*pow((fA+fB),-1.0);\n"
" double fTemp;\n"
" if (fA > 1.0 && fB > 1.0 && fP < 0.97 && fQ < 0.97)\n"
" fTemp = GetBetaDistPDF(fX,fA,fB)*fX*fY;\n"
@@ -951,30 +951,88 @@ std::string lcl_getLanczosSum =
" int nI;\n"
" if (fZ<=1.0)\n"
" {\n"
-" fSumNum = fNum[12];\n"
-" fSumDenom = fDenom[12];\n"
-" for (nI = 11; nI >= 0; --nI)\n"
-" {\n"
-" fSumNum *= fZ;\n"
-" fSumNum += fNum[nI];\n"
-" fSumDenom *= fZ;\n"
-" fSumDenom += fDenom[nI];\n"
-" }\n"
+" fSumNum = fNum[12];\n"
+" fSumDenom = fDenom[12];\n"
+" nI = 11;\n"
+" fSumNum = fSumNum*fZ+fNum[nI];\n"
+" fSumDenom = fSumDenom*fZ+fDenom[nI];\n"
+" nI = 10;\n"
+" fSumNum = fSumNum*fZ+fNum[nI];\n"
+" fSumDenom = fSumDenom*fZ+fDenom[nI];\n"
+" nI = 9;\n"
+" fSumNum = fSumNum*fZ+fNum[nI];\n"
+" fSumDenom = fSumDenom*fZ+fDenom[nI];\n"
+" nI = 8;\n"
+" fSumNum = fSumNum*fZ+fNum[nI];\n"
+" fSumDenom = fSumDenom*fZ+fDenom[nI];\n"
+" nI = 7;\n"
+" fSumNum = fSumNum*fZ+fNum[nI];\n"
+" fSumDenom = fSumDenom*fZ+fDenom[nI];\n"
+" nI = 6;\n"
+" fSumNum = fSumNum*fZ+fNum[nI];\n"
+" fSumDenom = fSumDenom*fZ+fDenom[nI];\n"
+" nI = 5;\n"
+" fSumNum = fSumNum*fZ+fNum[nI];\n"
+" fSumDenom = fSumDenom*fZ+fDenom[nI];\n"
+" nI = 4;\n"
+" fSumNum = fSumNum*fZ+fNum[nI];\n"
+" fSumDenom = fSumDenom*fZ+fDenom[nI];\n"
+" nI = 3;\n"
+" fSumNum = fSumNum*fZ+fNum[nI];\n"
+" fSumDenom = fSumDenom*fZ+fDenom[nI];\n"
+" nI = 2;\n"
+" fSumNum = fSumNum*fZ+fNum[nI];\n"
+" fSumDenom = fSumDenom*fZ+fDenom[nI];\n"
+" nI = 1;\n"
+" fSumNum = fSumNum*fZ+fNum[nI];\n"
+" fSumDenom = fSumDenom*fZ+fDenom[nI];\n"
+" nI = 0;\n"
+" fSumNum = fSumNum*fZ+fNum[nI];\n"
+" fSumDenom = fSumDenom*fZ+fDenom[nI];\n"
" }\n"
-" else\n"
+" if (fZ>1.0)\n"
" {\n"
-" double fZInv = 1/fZ;\n"
-" fSumNum = fNum[0];\n"
-" fSumDenom = fDenom[0];\n"
-" for (nI = 1; nI <=12; ++nI)\n"
-" {\n"
-" fSumNum *= fZInv;\n"
-" fSumNum += fNum[nI];\n"
-" fSumDenom *= fZInv;\n"
-" fSumDenom += fDenom[nI];\n"
-" }\n"
+" double fZInv = pow(fZ,-1.0);\n"
+" fSumNum = fNum[0];\n"
+" fSumDenom = fDenom[0];\n"
+" nI = 1;\n"
+" fSumNum = fSumNum*fZInv+fNum[nI];\n"
+" fSumDenom = fSumDenom*fZInv+fDenom[nI];\n"
+" nI = 2;\n"
+" fSumNum = fSumNum*fZInv+fNum[nI];\n"
+" fSumDenom = fSumDenom*fZInv+fDenom[nI];\n"
+" nI = 3;\n"
+" fSumNum = fSumNum*fZInv+fNum[nI];\n"
+" fSumDenom = fSumDenom*fZInv+fDenom[nI];\n"
+" nI = 4;\n"
+" fSumNum = fSumNum*fZInv+fNum[nI];\n"
+" fSumDenom = fSumDenom*fZInv+fDenom[nI];\n"
+" nI = 5;\n"
+" fSumNum = fSumNum*fZInv+fNum[nI];\n"
+" fSumDenom = fSumDenom*fZInv+fDenom[nI];\n"
+" nI = 6;\n"
+" fSumNum = fSumNum*fZInv+fNum[nI];\n"
+" fSumDenom = fSumDenom*fZInv+fDenom[nI];\n"
+" nI = 7;\n"
+" fSumNum = fSumNum*fZInv+fNum[nI];\n"
+" fSumDenom = fSumDenom*fZInv+fDenom[nI];\n"
+" nI = 8;\n"
+" fSumNum = fSumNum*fZInv+fNum[nI];\n"
+" fSumDenom = fSumDenom*fZInv+fDenom[nI];\n"
+" nI = 9;\n"
+" fSumNum = fSumNum*fZInv+fNum[nI];\n"
+" fSumDenom = fSumDenom*fZInv+fDenom[nI];\n"
+" nI = 10;\n"
+" fSumNum = fSumNum*fZInv+fNum[nI];\n"
+" fSumDenom = fSumDenom*fZInv+fDenom[nI];\n"
+" nI = 11;\n"
+" fSumNum = fSumNum*fZInv+fNum[nI];\n"
+" fSumDenom = fSumDenom*fZInv+fDenom[nI];\n"
+" nI = 12;\n"
+" fSumNum = fSumNum*fZInv+fNum[nI];\n"
+" fSumDenom = fSumDenom*fZInv+fDenom[nI];\n"
" }\n"
-" return fSumNum/fSumDenom;\n"
+" return fSumNum*pow(fSumDenom,-1.0);\n"
"}\n";
std::string GetUpRegIGammaDecl=
@@ -1011,28 +1069,17 @@ std::string GetBeta =
"{\n"
" double fA;\n"
" double fB;\n"
-" if (fAlpha > fBeta)\n"
-" {\n"
-" fA = fAlpha; fB = fBeta;\n"
-" }\n"
-" else\n"
-" {\n"
-" fA = fBeta; fB = fAlpha;\n"
-" }\n"
-" if (fA+fB < fMaxGammaArgument)\n"
-" return tgamma(fA)/tgamma(fA + fB)*tgamma(fB);\n"
-" double fg = 6.024680040776729583740234375;\n"
-" double fgm = fg - 0.5;\n"
-" double fLanczos = lcl_getLanczosSum(fA);\n"
-" fLanczos /= lcl_getLanczosSum(fA + fB);\n"
-" fLanczos *= lcl_getLanczosSum(fB);\n"
-" double fABgm = fA + fB + fgm;\n"
-" fLanczos *= sqrt((fABgm/(fA + fgm))/(fB + fgm));\n"
-" double fTempA = fB/(fA + fgm);\n"
-" double fTempB = fA/(fB + fgm);\n"
-" double fResult = exp(-fA*log1p(fTempA) - fB*log1p(fTempB) - fgm);\n"
-" fResult *= fLanczos;\n"
-" return fResult;\n"
+" fAlpha>fBeta?(fA = fAlpha,fB = fBeta):(fA = fBeta,fB = fAlpha);\n"
+" double fAB = fA + fB;\n"
+
+" if (fAB < fMaxGammaArgument)\n"
+" return tgamma(fA)*pow(tgamma(fAB),-1.0)*tgamma(fB);\n"
+" double fgm = 5.524680040776729583740234375;\n"
+" double fLanczos = lcl_getLanczosSum(fA)*lcl_getLanczosSum(fB)\n"
+" *pow(lcl_getLanczosSum(fAB),-1.0);\n"
+" fLanczos *= sqrt(((fAB + fgm)*pow(fA + fgm,-1.0))*pow(fB + fgm,-1.0));\n"
+" return fLanczos * exp(-fA*log1p(fB*pow(fA + fgm,-1.0))\n"
+" - fB*log1p(fA*pow(fB + fgm,-1.0)) - fgm);\n"
"}\n";
std::string GetLogBetaDecl=
@@ -1042,27 +1089,15 @@ std::string GetLogBeta =
"{\n"
" double fA;\n"
" double fB;\n"
-" if (fAlpha > fBeta)\n"
-" {\n"
-" fA = fAlpha; fB = fBeta;\n"
-" }\n"
-" else\n"
-" {\n"
-" fA = fBeta; fB = fAlpha;\n"
-" }\n"
-" double fg = 6.024680040776729583740234375;\n"
-" double fgm = fg - 0.5;\n"
-" double fLanczos = lcl_getLanczosSum(fA);\n"
-" fLanczos /= lcl_getLanczosSum(fA + fB);\n"
-" fLanczos *= lcl_getLanczosSum(fB);\n"
-" double fLogLanczos = log(fLanczos);\n"
-" double fABgm = fA + fB + fgm;\n"
-" fLogLanczos += 0.5*(log(fABgm) - log(fA + fgm) - log(fB + fgm));\n"
-" double fTempA = fB/(fA + fgm);\n"
-" double fTempB = fA/(fB + fgm);\n"
-" double fResult = -fA * log1p(fTempA)\n"
-" -fB * log1p(fTempB)-fgm;\n"
-" fResult += fLogLanczos;\n"
+" fAlpha>fBeta?(fA = fAlpha,fB = fBeta):(fA = fBeta,fB = fAlpha);\n"
+" double fgm = 5.524680040776729583740234375;\n"
+
+" double fLanczos = lcl_getLanczosSum(fA)*lcl_getLanczosSum(fB)*\n"
+" pow(lcl_getLanczosSum(fA + fB),-1.0);\n"
+" double fResult= -fA *log1p(fB*pow(fA + fgm,-1.0))"
+"-fB *log1p(fA*pow(fB + fgm,-1.0))-fgm;\n"
+" fResult += log(fLanczos)+0.5*(log(fA + fB + fgm) - log(fA + fgm)\n"
+" - log(fB + fgm));\n"
" return fResult;\n"
"}\n";
@@ -1126,7 +1161,8 @@ std::string GetBetaDistPDF =
" && fLogBeta < fLogDblMax && fLogBeta > fLogDblMin\n"
" && fAm1LogX + fBm1LogY < fLogDblMax && fAm1LogX + fBm1LogY > \n"
" fLogDblMin)\n"
-" return pow(fX,fA-1.0) * pow(0.5-fX+0.5,fB-1.0) / GetBeta(fA,fB);\n"
+" return pow(fX,fA-1.0)*pow(0.5-fX+0.5,fB-1.0)"
+"*pow(GetBeta(fA,fB),-1.0);\n"
" else \n"
" return exp( fAm1LogX + fBm1LogY - fLogBeta);\n"
"}\n";
@@ -1139,19 +1175,9 @@ std::string lcl_GetBetaHelperContFrac =
" double a1, b1, a2, b2, fnorm, apl2m, d2m, d2m1, cfnew, cf;\n"
" a1 = 1.0; b1 = 1.0;\n"
-" b2 = 1.0 - (fA+fB)/(fA+1.0)*fX;\n"
-" if (b2 == 0.0)\n"
-" {\n"
-" a2 = 0.0;\n"
-" fnorm = 1.0;\n"
-" cf = 1.0;\n"
-" }\n"
-" else\n"
-" {\n"
-" a2 = 1.0;\n"
-" fnorm = 1.0/b2;\n"
-" cf = a2*fnorm;\n"
-" }\n"
+" b2 = 1.0 - (fA+fB)*pow(fA+1.0,-1.0)*fX;\n"
+" b2==0.0?(a2 = 0.0,fnorm = 1.0,cf = 1.0):\n"
+" (a2 = 1.0,fnorm = pow(b2,-1.0),cf = a2*fnorm);\n"
" cfnew = 1.0;\n"
" double rm = 1.0;\n"
" double fMaxIter = 50000.0;\n"
@@ -1159,15 +1185,15 @@ std::string lcl_GetBetaHelperContFrac =
" do\n"
" {\n"
" apl2m = fA + 2.0*rm;\n"
-" d2m = rm*(fB-rm)*fX/((apl2m-1.0)*apl2m);\n"
-" d2m1 = -(fA+rm)*(fA+fB+rm)*fX/(apl2m*(apl2m+1.0));\n"
+" d2m = (rm*(fB-rm))*fX*pow(apl2m*(apl2m-1.0),-1.0);\n"
+" d2m1 = -((fA+rm)*(fA+rm+fB))*fX*pow(apl2m*(apl2m+1.0),-1.0);\n"
" a1 = (a2+d2m*a1)*fnorm;\n"
" b1 = (b2+d2m*b1)*fnorm;\n"
" a2 = a1 + d2m1*a2*fnorm;\n"
" b2 = b1 + d2m1*b2*fnorm;\n"
" if (b2 != 0.0) \n"
" {\n"
-" fnorm = 1.0/b2;\n"
+" fnorm = pow(b2,-1.0);\n"
" cfnew = a2*fnorm;\n"
" bfinished = (fabs(cf-cfnew) < fabs(cf)*fMachEps);\n"
" }\n"