LCOV - code coverage report
Current view: top level - sc/source/core/opencl - formulagroupcl.cxx (source / functions) Hit Total Coverage
Test: commit 0e63ca4fde4e446f346e35849c756a30ca294aab Lines: 3 1858 0.2 %
Date: 2014-04-11 Functions: 3 248 1.2 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
       2             : /*
       3             :  * This file is part of the LibreOffice project.
       4             :  *
       5             :  * This Source Code Form is subject to the terms of the Mozilla Public
       6             :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       7             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       8             :  */
       9             : 
      10             : #include "formulagroup.hxx"
      11             : #include "clkernelthread.hxx"
      12             : #include "grouptokenconverter.hxx"
      13             : #include "document.hxx"
      14             : #include "formulacell.hxx"
      15             : #include "tokenarray.hxx"
      16             : #include "compiler.hxx"
      17             : #include "interpre.hxx"
      18             : #include "formula/vectortoken.hxx"
      19             : #include "scmatrix.hxx"
      20             : 
      21             : #include "openclwrapper.hxx"
      22             : 
      23             : #include "op_financial.hxx"
      24             : #include "op_database.hxx"
      25             : #include "op_math.hxx"
      26             : #include "op_logical.hxx"
      27             : #include "op_statistical.hxx"
      28             : #include "op_array.hxx"
      29             : #include "op_spreadsheet.hxx"
      30             : #include "op_addin.hxx"
      31             : /// CONFIGURATIONS
      32             : // Comment out this to turn off FMIN and FMAX intrinsics
      33             : #define USE_FMIN_FMAX 1
      34             : #define REDUCE_THRESHOLD 4  // set to 4 for correctness testing. priority 1
      35             : #define UNROLLING_FACTOR 16  // set to 4 for correctness testing (if no reduce)
      36             : #include "formulagroupcl_public.hxx"
      37             : #ifdef WIN32
      38             : #ifndef NAN
      39             : namespace {
      40             : static const unsigned long __nan[2] = {0xffffffff, 0x7fffffff};
      41             : }
      42             : #define NAN (*(const double*) __nan)
      43             : #endif
      44             : #endif
      45             : 
      46             : #include <list>
      47             : #include <map>
      48             : #include <iostream>
      49             : #include <sstream>
      50             : #include <algorithm>
      51             : #define MD5_KERNEL 1
      52             : #ifdef MD5_KERNEL
      53             : #include <rtl/digest.h>
      54             : #endif
      55             : 
      56             : #include <boost/scoped_ptr.hpp>
      57             : #include <boost/scoped_array.hpp>
      58             : 
      59             : #undef NO_FALLBACK_TO_SWINTERP /* undef this for non-TDD runs */
      60             : 
      61             : using namespace formula;
      62             : 
      63             : namespace sc { namespace opencl {
      64             : 
      65             : 
      66             : /// Map the buffer used by an argument and do necessary argument setting
      67           0 : size_t VectorRef::Marshal(cl_kernel k, int argno, int, cl_program)
      68             : {
      69           0 :     FormulaToken *ref = mFormulaTree->GetFormulaToken();
      70           0 :     double *pHostBuffer = NULL;
      71           0 :     size_t szHostBuffer = 0;
      72           0 :     if (ref->GetType() == formula::svSingleVectorRef) {
      73             :         const formula::SingleVectorRefToken* pSVR =
      74           0 :             static_cast< const formula::SingleVectorRefToken* >(ref);
      75           0 :         pHostBuffer = const_cast<double*>(pSVR->GetArray().mpNumericArray);
      76           0 :         szHostBuffer = pSVR->GetArrayLength() * sizeof(double);
      77             : #if 0
      78             :         std::cerr << "Marshal a Single vector of size " << pSVR->GetArrayLength();
      79             :         std::cerr << " at argument "<< argno << "\n";
      80             : #endif
      81           0 :     } else if (ref->GetType() == formula::svDoubleVectorRef) {
      82             :         const formula::DoubleVectorRefToken* pDVR =
      83           0 :             static_cast< const formula::DoubleVectorRefToken* >(ref);
      84             :         pHostBuffer = const_cast<double*>(
      85           0 :                 pDVR->GetArrays()[mnIndex].mpNumericArray);
      86           0 :         szHostBuffer = pDVR->GetArrayLength() * sizeof(double);
      87             :     } else {
      88           0 :         throw Unhandled();
      89             :     }
      90             :     // Obtain cl context
      91             :     KernelEnv kEnv;
      92           0 :     OpenclDevice::setKernelEnv(&kEnv);
      93             :     cl_int err;
      94           0 :     if (pHostBuffer)
      95             :     {
      96             :         mpClmem = clCreateBuffer(kEnv.mpkContext,
      97             :                 (cl_mem_flags) CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR,
      98             :                 szHostBuffer,
      99           0 :                 pHostBuffer, &err);
     100           0 :         if (CL_SUCCESS != err)
     101           0 :             throw OpenCLError(err, __FILE__, __LINE__);
     102             :     }
     103             :     else
     104             :     {
     105           0 :         if (szHostBuffer == 0)
     106           0 :             szHostBuffer = sizeof(double); // a dummy small value
     107             :         // Marshal as a buffer of NANs
     108             :         mpClmem = clCreateBuffer(kEnv.mpkContext,
     109             :                 (cl_mem_flags) CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR,
     110           0 :                 szHostBuffer, NULL, &err);
     111           0 :         if (CL_SUCCESS != err)
     112           0 :             throw OpenCLError(err, __FILE__, __LINE__);
     113             :         double *pNanBuffer = (double*)clEnqueueMapBuffer(
     114             :                 kEnv.mpkCmdQueue, mpClmem, CL_TRUE, CL_MAP_WRITE, 0,
     115           0 :                 szHostBuffer, 0, NULL, NULL, &err);
     116           0 :         if (CL_SUCCESS != err)
     117           0 :             throw OpenCLError(err, __FILE__, __LINE__);
     118           0 :         for (size_t i = 0; i < szHostBuffer/sizeof(double); i++)
     119           0 :             pNanBuffer[i] = NAN;
     120             :         err = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue, mpClmem,
     121           0 :                 pNanBuffer, 0, NULL, NULL);
     122             :     }
     123             : 
     124           0 :     err = clSetKernelArg(k, argno, sizeof(cl_mem), (void*)&mpClmem);
     125           0 :     if (CL_SUCCESS != err)
     126           0 :         throw OpenCLError(err, __FILE__, __LINE__);
     127           0 :     return 1;
     128             : }
     129             : 
     130             : /// Arguments that are actually compile-time constant string
     131             : /// Currently, only the hash is passed.
     132             : /// TBD(IJSUNG): pass also length and the actual string if there is a
     133             : /// hash function collision
     134           0 : class ConstStringArgument: public DynamicKernelArgument
     135             : {
     136             : public:
     137           0 :     ConstStringArgument(const std::string &s,
     138             :         FormulaTreeNodeRef ft):
     139           0 :             DynamicKernelArgument(s, ft) {}
     140             :     /// Generate declaration
     141           0 :     virtual void GenDecl(std::stringstream &ss) const SAL_OVERRIDE
     142             :     {
     143           0 :         ss << "unsigned " << mSymName;
     144           0 :     }
     145           0 :     virtual void GenDeclRef(std::stringstream &ss) const SAL_OVERRIDE
     146             :     {
     147           0 :         ss << GenSlidingWindowDeclRef(false);
     148           0 :     }
     149           0 :     virtual void GenSlidingWindowDecl(std::stringstream &ss) const SAL_OVERRIDE
     150             :     {
     151           0 :         GenDecl(ss);
     152           0 :     }
     153           0 :     virtual std::string GenSlidingWindowDeclRef(bool=false) const SAL_OVERRIDE
     154             :     {
     155           0 :         std::stringstream ss;
     156           0 :         if (GetFormulaToken()->GetType() != formula::svString)
     157           0 :             throw Unhandled();
     158           0 :         FormulaToken *Tok = GetFormulaToken();
     159           0 :         ss << Tok->GetString().getString().toAsciiUpperCase().hashCode() << "U";
     160           0 :         return ss.str();
     161             :     }
     162           0 :     virtual size_t GetWindowSize(void) const SAL_OVERRIDE
     163             :     {
     164           0 :         return 1;
     165             :     }
     166             :     /// Pass the 32-bit hash of the string to the kernel
     167           0 :     virtual size_t Marshal(cl_kernel k, int argno, int, cl_program) SAL_OVERRIDE
     168             :     {
     169           0 :         FormulaToken *ref = mFormulaTree->GetFormulaToken();
     170           0 :         cl_uint hashCode = 0;
     171           0 :         if (ref->GetType() == formula::svString)
     172             :         {
     173           0 :             const rtl::OUString s = ref->GetString().getString().toAsciiUpperCase();
     174           0 :             hashCode = s.hashCode();
     175             :         } else {
     176           0 :             throw Unhandled();
     177             :         }
     178             :         // marshaling
     179             :         // Obtain cl context
     180             :         KernelEnv kEnv;
     181           0 :         OpenclDevice::setKernelEnv(&kEnv);
     182             :         // Pass the scalar result back to the rest of the formula kernel
     183           0 :         cl_int err = clSetKernelArg(k, argno, sizeof(cl_uint), (void*)&hashCode);
     184           0 :         if (CL_SUCCESS != err)
     185           0 :             throw OpenCLError(err, __FILE__, __LINE__);
     186           0 :         return 1;
     187             :     }
     188             : };
     189             : 
     190             : /// Arguments that are actually compile-time constants
     191           0 : class DynamicKernelConstantArgument: public DynamicKernelArgument
     192             : {
     193             : public:
     194           0 :     DynamicKernelConstantArgument(const std::string &s,
     195             :         FormulaTreeNodeRef ft):
     196           0 :             DynamicKernelArgument(s, ft) {}
     197             :     /// Generate declaration
     198           0 :     virtual void GenDecl(std::stringstream &ss) const SAL_OVERRIDE
     199             :     {
     200           0 :         ss << "double " << mSymName;
     201           0 :     }
     202           0 :     virtual void GenDeclRef(std::stringstream &ss) const SAL_OVERRIDE
     203             :     {
     204           0 :         ss << mSymName;
     205           0 :     }
     206           0 :     virtual void GenSlidingWindowDecl(std::stringstream &ss) const SAL_OVERRIDE
     207             :     {
     208           0 :         GenDecl(ss);
     209           0 :     }
     210           0 :     virtual std::string GenSlidingWindowDeclRef(bool=false) const SAL_OVERRIDE
     211             :     {
     212           0 :         if (GetFormulaToken()->GetType() != formula::svDouble)
     213           0 :             throw Unhandled();
     214           0 :         return mSymName;
     215             :     }
     216           0 :     virtual size_t GetWindowSize(void) const SAL_OVERRIDE
     217             :     {
     218           0 :         return 1;
     219             :     }
     220           0 :     double GetDouble(void) const
     221             :     {
     222           0 :         FormulaToken *Tok = GetFormulaToken();
     223           0 :         if (Tok->GetType() != formula::svDouble)
     224           0 :             throw Unhandled();
     225           0 :         return Tok->GetDouble();
     226             :     }
     227             :     /// Create buffer and pass the buffer to a given kernel
     228           0 :     virtual size_t Marshal(cl_kernel k, int argno, int, cl_program) SAL_OVERRIDE
     229             :     {
     230           0 :         double tmp = GetDouble();
     231             :         // Pass the scalar result back to the rest of the formula kernel
     232           0 :         cl_int err = clSetKernelArg(k, argno, sizeof(double), (void*)&tmp);
     233           0 :         if (CL_SUCCESS != err)
     234           0 :             throw OpenCLError(err, __FILE__, __LINE__);
     235           0 :         return 1;
     236             :     }
     237           0 :     virtual cl_mem GetCLBuffer(void) const { return NULL; }
     238             : };
     239             : 
     240           0 : class DynamicKernelPiArgument: public DynamicKernelArgument
     241             : {
     242             : public:
     243           0 :     DynamicKernelPiArgument(const std::string &s,
     244             :         FormulaTreeNodeRef ft):
     245           0 :             DynamicKernelArgument(s, ft) {}
     246             :     /// Generate declaration
     247           0 :     virtual void GenDecl(std::stringstream &ss) const SAL_OVERRIDE
     248             :     {
     249           0 :         ss << "double " << mSymName;
     250           0 :     }
     251           0 :     virtual void GenDeclRef(std::stringstream &ss) const SAL_OVERRIDE
     252             :     {
     253           0 :         ss << "3.14159265358979";
     254           0 :     }
     255           0 :     virtual void GenSlidingWindowDecl(std::stringstream &ss) const SAL_OVERRIDE
     256             :     {
     257           0 :         GenDecl(ss);
     258           0 :     }
     259           0 :     virtual std::string GenSlidingWindowDeclRef(bool=false) const SAL_OVERRIDE
     260             :     {
     261           0 :         return mSymName;
     262             :     }
     263           0 :     virtual size_t GetWindowSize(void) const SAL_OVERRIDE
     264             :     {
     265           0 :         return 1;
     266             :     }
     267             :     /// Create buffer and pass the buffer to a given kernel
     268           0 :     virtual size_t Marshal(cl_kernel k, int argno, int, cl_program) SAL_OVERRIDE
     269             :     {
     270           0 :         double tmp = 0.0;
     271             :         // Pass the scalar result back to the rest of the formula kernel
     272           0 :         cl_int err = clSetKernelArg(k, argno, sizeof(double), (void*)&tmp);
     273           0 :         if (CL_SUCCESS != err)
     274           0 :             throw OpenCLError(err, __FILE__, __LINE__);
     275           0 :         return 1;
     276             :     }
     277             : };
     278             : 
     279           0 : class DynamicKernelRandomArgument: public DynamicKernelArgument
     280             : {
     281             : public:
     282           0 :     DynamicKernelRandomArgument(const std::string &s,
     283             :         FormulaTreeNodeRef ft):
     284           0 :             DynamicKernelArgument(s, ft) {}
     285             :     /// Generate declaration
     286           0 :     virtual void GenDecl(std::stringstream &ss) const SAL_OVERRIDE
     287             :     {
     288           0 :         ss << "double " << mSymName;
     289           0 :     }
     290           0 :     virtual void GenDeclRef(std::stringstream &ss) const SAL_OVERRIDE
     291             :     {
     292           0 :         ss << mSymName;
     293           0 :     }
     294           0 :     virtual void GenSlidingWindowDecl(std::stringstream &ss) const SAL_OVERRIDE
     295             :     {
     296           0 :         GenDecl(ss);
     297           0 :     }
     298           0 :     virtual std::string GenSlidingWindowDeclRef(bool=false) const SAL_OVERRIDE
     299             :     {
     300           0 :         return mSymName + "_Random()";
     301             :     }
     302           0 :     void GenSlidingWindowFunction(std::stringstream &ss) SAL_OVERRIDE
     303             :     {
     304           0 :         ss << "\ndouble " << mSymName;
     305           0 :         ss << "_Random ()\n{\n";
     306           0 :         ss << "    int i, gid0=get_global_id(0);;\n";
     307           0 :         ss << "    double tmp = 0;\n";
     308           0 :         ss << "    double M = 2147483647;\n";
     309           0 :         ss << "    double Lamda = 32719;\n";
     310           0 :         ss << "    double f;\n";
     311           0 :         ss << "    f = gid0 + 1;\n";
     312           0 :         ss << "    int k;\n";
     313           0 :         ss << "    for(i = 1;i <= 100; ++i){\n";
     314           0 :         ss << "        f = Lamda * f;\n";
     315           0 :         ss << "        k = (int)(f * pow(M,-1.0));\n";
     316           0 :         ss << "        f = f - M * k;\n";
     317           0 :         ss << "    }\n";
     318           0 :         ss << "    tmp = f * pow(M,-1.0);\n";
     319           0 :         ss << "    return tmp;\n";
     320           0 :         ss << "}";
     321           0 :     }
     322           0 :     virtual size_t GetWindowSize(void) const SAL_OVERRIDE
     323             :     {
     324           0 :         return 1;
     325             :     }
     326             :     /// Create buffer and pass the buffer to a given kernel
     327           0 :     virtual size_t Marshal(cl_kernel k, int argno, int, cl_program) SAL_OVERRIDE
     328             :     {
     329           0 :         double tmp = 0.0;
     330             :         // Pass the scalar result back to the rest of the formula kernel
     331           0 :         cl_int err = clSetKernelArg(k, argno, sizeof(double), (void*)&tmp);
     332           0 :         if (CL_SUCCESS != err)
     333           0 :             throw OpenCLError(err, __FILE__, __LINE__);
     334           0 :         return 1;
     335             :     }
     336             : };
     337             : 
     338             : /// A vector of strings
     339           0 : class DynamicKernelStringArgument: public VectorRef
     340             : {
     341             : public:
     342           0 :     DynamicKernelStringArgument(const std::string &s,
     343             :         FormulaTreeNodeRef ft, int index = 0):
     344           0 :         VectorRef(s, ft, index) {}
     345             : 
     346           0 :     virtual void GenSlidingWindowFunction(std::stringstream &) SAL_OVERRIDE {}
     347             :     /// Generate declaration
     348           0 :     virtual void GenDecl(std::stringstream &ss) const SAL_OVERRIDE
     349             :     {
     350           0 :         ss << "__global unsigned int *"<<mSymName;
     351           0 :     }
     352           0 :     virtual void GenSlidingWindowDecl(std::stringstream& ss) const SAL_OVERRIDE
     353             :     {
     354           0 :         DynamicKernelStringArgument::GenDecl(ss);
     355           0 :     }
     356             :     virtual size_t Marshal(cl_kernel, int, int, cl_program) SAL_OVERRIDE;
     357             : };
     358             : 
     359             : /// Marshal a string vector reference
     360           0 : size_t DynamicKernelStringArgument::Marshal(cl_kernel k, int argno, int, cl_program)
     361             : {
     362           0 :     FormulaToken *ref = mFormulaTree->GetFormulaToken();
     363             :     // Obtain cl context
     364             :     KernelEnv kEnv;
     365           0 :     OpenclDevice::setKernelEnv(&kEnv);
     366             :     cl_int err;
     367           0 :     formula::VectorRefArray vRef;
     368           0 :     size_t nStrings = 0;
     369           0 :     if (ref->GetType() == formula::svSingleVectorRef) {
     370             :         const formula::SingleVectorRefToken* pSVR =
     371           0 :             static_cast< const formula::SingleVectorRefToken* >(ref);
     372           0 :         nStrings = pSVR->GetArrayLength();
     373           0 :         vRef = pSVR->GetArray();
     374           0 :     } else if (ref->GetType() == formula::svDoubleVectorRef) {
     375             :         const formula::DoubleVectorRefToken* pDVR =
     376           0 :             static_cast< const formula::DoubleVectorRefToken* >(ref);
     377           0 :         nStrings = pDVR->GetArrayLength();
     378           0 :         vRef = pDVR->GetArrays()[mnIndex];
     379             :     }
     380           0 :     size_t szHostBuffer = nStrings * sizeof(cl_int);
     381             :     // Marshal strings. Right now we pass hashes of these string
     382             :     mpClmem = clCreateBuffer(kEnv.mpkContext,
     383             :             (cl_mem_flags) CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR,
     384           0 :             szHostBuffer, NULL, &err);
     385           0 :     if (CL_SUCCESS != err)
     386           0 :         throw OpenCLError(err, __FILE__, __LINE__);
     387             :     cl_uint *pHashBuffer = (cl_uint*)clEnqueueMapBuffer(
     388             :             kEnv.mpkCmdQueue, mpClmem, CL_TRUE, CL_MAP_WRITE, 0,
     389           0 :             szHostBuffer, 0, NULL, NULL, &err);
     390           0 :     if (CL_SUCCESS != err)
     391           0 :         throw OpenCLError(err, __FILE__, __LINE__);
     392           0 :     for (size_t i = 0; i < nStrings; i++)
     393             :     {
     394           0 :         if (vRef.mpStringArray[i])
     395             :         {
     396           0 :             const OUString tmp = OUString(vRef.mpStringArray[i]);
     397           0 :             pHashBuffer[i] = tmp.hashCode();
     398             :         }
     399             :         else
     400             :         {
     401           0 :             pHashBuffer[i] = 0;
     402             :         }
     403             :     }
     404             :     err = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue, mpClmem,
     405           0 :             pHashBuffer, 0, NULL, NULL);
     406           0 :     if (CL_SUCCESS != err)
     407           0 :         throw OpenCLError(err, __FILE__, __LINE__);
     408             : 
     409           0 :     err = clSetKernelArg(k, argno, sizeof(cl_mem), (void*)&mpClmem);
     410           0 :     if (CL_SUCCESS != err)
     411           0 :         throw OpenCLError(err, __FILE__, __LINE__);
     412           0 :     return 1;
     413             : }
     414             : 
     415             : /// A mixed string/numberic vector
     416           0 : class DynamicKernelMixedArgument: public VectorRef
     417             : {
     418             : public:
     419           0 :     DynamicKernelMixedArgument(const std::string &s,
     420             :         FormulaTreeNodeRef ft):
     421           0 :         VectorRef(s, ft), mStringArgument(s+"s", ft) {}
     422           0 :     virtual void GenSlidingWindowDecl(std::stringstream& ss) const SAL_OVERRIDE
     423             :     {
     424           0 :         VectorRef::GenSlidingWindowDecl(ss);
     425           0 :         ss << ", ";
     426           0 :         mStringArgument.GenSlidingWindowDecl(ss);
     427           0 :     }
     428           0 :     virtual void GenSlidingWindowFunction(std::stringstream &) SAL_OVERRIDE {}
     429             :     /// Generate declaration
     430           0 :     virtual void GenDecl(std::stringstream &ss) const SAL_OVERRIDE
     431             :     {
     432           0 :         VectorRef::GenDecl(ss);
     433           0 :         ss << ", ";
     434           0 :         mStringArgument.GenDecl(ss);
     435           0 :     }
     436           0 :     virtual void GenDeclRef(std::stringstream &ss) const SAL_OVERRIDE
     437             :     {
     438           0 :         VectorRef::GenDeclRef(ss);
     439           0 :         ss << ",";
     440           0 :         mStringArgument.GenDeclRef(ss);
     441           0 :     }
     442           0 :     virtual std::string GenSlidingWindowDeclRef(bool) const SAL_OVERRIDE
     443             :     {
     444           0 :         std::stringstream ss;
     445           0 :         ss << "(!isNan(" << VectorRef::GenSlidingWindowDeclRef();
     446           0 :         ss << ")?" << VectorRef::GenSlidingWindowDeclRef();
     447           0 :         ss << ":" << mStringArgument.GenSlidingWindowDeclRef();
     448           0 :         ss << ")";
     449           0 :         return ss.str();
     450             :     }
     451           0 :     virtual std::string GenDoubleSlidingWindowDeclRef(bool=false) const SAL_OVERRIDE
     452             :     {
     453           0 :         std::stringstream ss;
     454           0 :         ss << VectorRef::GenSlidingWindowDeclRef();
     455           0 :         return ss.str();
     456             :     }
     457           0 :     virtual std::string GenStringSlidingWindowDeclRef(bool=false) const SAL_OVERRIDE
     458             :     {
     459           0 :         std::stringstream ss;
     460           0 :         ss << mStringArgument.GenSlidingWindowDeclRef();
     461           0 :         return ss.str();
     462             :     }
     463           0 :     virtual size_t Marshal(cl_kernel k, int argno, int vw, cl_program p) SAL_OVERRIDE
     464             :     {
     465           0 :         int i = VectorRef::Marshal(k, argno, vw, p);
     466           0 :         i += mStringArgument.Marshal(k, argno+i, vw, p);
     467           0 :         return i;
     468             :     }
     469             : protected:
     470             :     DynamicKernelStringArgument mStringArgument;
     471             : };
     472             : 
     473             : /// Handling a Double Vector that is used as a sliding window input
     474             : /// to either a sliding window average or sum-of-products
     475             : /// Generate a sequential loop for reductions
     476             : class OpSum; // Forward Declaration
     477             : class OpAverage; // Forward Declaration
     478             : class OpMin; // Forward Declaration
     479             : class OpMax; // Forward Declaration
     480             : class OpCount; // Forward Declaration
     481             : template<class Base>
     482             : class DynamicKernelSlidingArgument: public Base
     483             : {
     484             : public:
     485           0 :     DynamicKernelSlidingArgument(const std::string &s,
     486             :         FormulaTreeNodeRef ft, boost::shared_ptr<SlidingFunctionBase> &CodeGen,
     487             :         int index=0):
     488           0 :         Base(s, ft, index), mpCodeGen(CodeGen), mpClmem2(NULL)
     489             :     {
     490           0 :         FormulaToken *t = ft->GetFormulaToken();
     491           0 :         if (t->GetType() != formula::svDoubleVectorRef)
     492           0 :             throw Unhandled();
     493           0 :         mpDVR = static_cast<const formula::DoubleVectorRefToken *>(t);
     494           0 :         bIsStartFixed = mpDVR->IsStartFixed();
     495           0 :         bIsEndFixed = mpDVR->IsEndFixed();
     496           0 :     }
     497             :     // Should only be called by SumIfs. Yikes!
     498           0 :     virtual bool NeedParallelReduction(void) const
     499             :     {
     500             :         assert(dynamic_cast<OpSumIfs*>(mpCodeGen.get()));
     501           0 :         return GetWindowSize()> 100 &&
     502           0 :             ( (GetStartFixed() && GetEndFixed()) ||
     503           0 :               (!GetStartFixed() && !GetEndFixed())  ) ;
     504             :     }
     505           0 :     virtual void GenSlidingWindowFunction(std::stringstream &) {}
     506             : 
     507           0 :     virtual std::string GenSlidingWindowDeclRef(bool nested=false) const
     508             :     {
     509           0 :         size_t nArrayLength = mpDVR->GetArrayLength();
     510           0 :         std::stringstream ss;
     511           0 :         if (!bIsStartFixed && !bIsEndFixed)
     512             :         {
     513           0 :             if (nested)
     514           0 :                 ss << "((i+gid0) <" << nArrayLength <<"?";
     515           0 :             ss << Base::GetName() << "[i + gid0]";
     516           0 :             if (nested)
     517           0 :                 ss << ":NAN)";
     518             :         }
     519             :         else
     520             :         {
     521           0 :             if (nested)
     522           0 :                 ss << "(i <" << nArrayLength <<"?";
     523           0 :             ss << Base::GetName() << "[i]";
     524           0 :             if (nested)
     525           0 :                 ss << ":NAN)";
     526             :         }
     527           0 :         return ss.str();
     528             :     }
     529             :     /// Controls how the elements in the DoubleVectorRef are traversed
     530           0 :     virtual size_t GenReductionLoopHeader(
     531             :         std::stringstream &ss, bool &needBody)
     532             :     {
     533             :         assert(mpDVR);
     534           0 :         size_t nCurWindowSize = mpDVR->GetRefRowSize();
     535             :         // original for loop
     536             : #ifndef UNROLLING_FACTOR
     537             :         needBody = true;
     538             :         // No need to generate a for-loop for degenerated cases
     539             :         if (nCurWindowSize == 1)
     540             :         {
     541             :             ss << "if (gid0 <" << mpDVR->GetArrayLength();
     542             :             ss << ")\n\t{\tint i = 0;\n\t\t";
     543             :             return nCurWindowSize;
     544             :         }
     545             : 
     546             :         ss << "for (int i = ";
     547             :         if (!bIsStartFixed && bIsEndFixed)
     548             :         {
     549             : #ifdef  ISNAN
     550             :             ss << "gid0; i < " << mpDVR->GetArrayLength();
     551             :             ss << " && i < " << nCurWindowSize  << "; i++){\n\t\t";
     552             : #else
     553             :             ss << "gid0; i < "<< nCurWindowSize << "; i++)\n\t\t";
     554             : #endif
     555             :         }
     556             :         else if (bIsStartFixed && !bIsEndFixed)
     557             :         {
     558             : #ifdef  ISNAN
     559             :             ss << "0; i < " << mpDVR->GetArrayLength();
     560             :             ss << " && i < gid0+"<< nCurWindowSize << "; i++){\n\t\t";
     561             : #else
     562             :             ss << "0; i < gid0+"<< nCurWindowSize << "; i++)\n\t\t";
     563             : #endif
     564             :         }
     565             :         else if (!bIsStartFixed && !bIsEndFixed)
     566             :         {
     567             : #ifdef  ISNAN
     568             :             ss << "0; i + gid0 < " << mpDVR->GetArrayLength();
     569             :             ss << " &&  i < "<< nCurWindowSize << "; i++){\n\t\t";
     570             : #else
     571             :             ss << "0; i < "<< nCurWindowSize << "; i++)\n\t\t";
     572             : #endif
     573             :         }
     574             :         else
     575             :         {
     576             :             unsigned limit =
     577             :                 std::min(mpDVR->GetArrayLength(), nCurWindowSize);
     578             :             ss << "0; i < "<< limit << "; i++){\n\t\t";
     579             :         }
     580             : return nCurWindowSize;
     581             : #endif
     582             : 
     583             : #ifdef UNROLLING_FACTOR
     584             :         {
     585           0 :             if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed()) {
     586           0 :                 ss << "for (int i = ";
     587           0 :                 ss << "gid0; i < " << mpDVR->GetArrayLength();
     588           0 :                 ss << " && i < " << nCurWindowSize  << "; i++){\n\t\t";
     589           0 :                 needBody = true;
     590           0 :                 return nCurWindowSize;
     591           0 :             } else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed()) {
     592           0 :                 ss << "for (int i = ";
     593           0 :                 ss << "0; i < " << mpDVR->GetArrayLength();
     594           0 :                 ss << " && i < gid0+"<< nCurWindowSize << "; i++){\n\t\t";
     595           0 :                 needBody = true;
     596           0 :                 return nCurWindowSize;
     597           0 :             } else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed()){
     598           0 :                 ss << "tmpBottom = " << mpCodeGen->GetBottom() << ";\n\t";
     599           0 :                 ss << "{int i;\n\t";
     600           0 :                 std::stringstream temp1,temp2;
     601           0 :                 int outLoopSize = UNROLLING_FACTOR;
     602           0 :                 if ( nCurWindowSize/outLoopSize != 0){
     603           0 :                     ss << "for(int outLoop=0; outLoop<" << nCurWindowSize/outLoopSize<< "; outLoop++){\n\t";
     604           0 :                     for(int count=0; count < outLoopSize; count++){
     605           0 :                         ss << "i = outLoop*"<<outLoopSize<<"+"<<count<<";\n\t";
     606           0 :                         if(count==0){
     607           0 :                             temp1 << "if(i + gid0 < " <<mpDVR->GetArrayLength();
     608           0 :                             temp1 << "){\n\t\t";
     609           0 :                             temp1 << "tmp = legalize(";
     610           0 :                             temp1 <<  mpCodeGen->Gen2(GenSlidingWindowDeclRef(), "tmp");
     611           0 :                             temp1 << ", tmp);\n\t\t\t";
     612           0 :                             temp1 << "}\n\t";
     613             :                         }
     614           0 :                         ss << temp1.str();
     615             :                     }
     616           0 :                     ss << "}\n\t";
     617             :                 }
     618             :                 // The residual of mod outLoopSize
     619           0 :                 for(unsigned int count=nCurWindowSize/outLoopSize*outLoopSize; count < nCurWindowSize; count++){
     620           0 :                     ss << "i = "<<count<<";\n\t";
     621           0 :                     if(count==nCurWindowSize/outLoopSize*outLoopSize){
     622           0 :                         temp2 << "if(i + gid0 < " << mpDVR->GetArrayLength();
     623           0 :                         temp2 << "){\n\t\t";
     624           0 :                         temp2 << "tmp = legalize(";
     625           0 :                         temp2 << mpCodeGen->Gen2(GenSlidingWindowDeclRef(), "tmp");
     626           0 :                         temp2 << ", tmp);\n\t\t\t";
     627           0 :                         temp2 << "}\n\t";
     628             :                     }
     629           0 :                     ss << temp2.str();
     630             :                 }
     631           0 :                 ss << "} // to scope the int i declaration\n";
     632           0 :                 needBody = false;
     633           0 :                 return nCurWindowSize;
     634             :             }
     635             :             // (mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
     636             :             else {
     637           0 :                 ss << "//else situation \n\t";
     638           0 :                 ss << "tmpBottom = " << mpCodeGen->GetBottom() << ";\n\t";
     639           0 :                 ss << "{int i;\n\t";
     640           0 :                 std::stringstream temp1,temp2;
     641           0 :                 int outLoopSize = UNROLLING_FACTOR;
     642           0 :                 if (nCurWindowSize/outLoopSize != 0){
     643           0 :                     ss << "for(int outLoop=0; outLoop<" << nCurWindowSize/outLoopSize<< "; outLoop++){\n\t";
     644           0 :                     for(int count=0; count < outLoopSize; count++){
     645           0 :                         ss << "i = outLoop*"<<outLoopSize<<"+"<<count<<";\n\t";
     646           0 :                         if(count==0){
     647           0 :                             temp1 << "tmp = legalize(";
     648           0 :                             temp1 << mpCodeGen->Gen2(GenSlidingWindowDeclRef(), "tmp");
     649           0 :                             temp1 << ", tmp);\n\t\t\t";
     650             :                         }
     651           0 :                         ss << temp1.str();
     652             :                     }
     653           0 :                     ss << "}\n\t";
     654             :                 }
     655             :                 // The residual of mod outLoopSize
     656           0 :                 for(unsigned int count=nCurWindowSize/outLoopSize*outLoopSize; count < nCurWindowSize; count++){
     657           0 :                     ss << "i = "<<count<<";\n\t";
     658           0 :                     if(count==nCurWindowSize/outLoopSize*outLoopSize){
     659           0 :                         temp2 << "tmp = legalize(";
     660           0 :                         temp2 << mpCodeGen->Gen2(GenSlidingWindowDeclRef(), "tmp");
     661           0 :                         temp2 << ", tmp);\n\t\t\t";
     662             :                     }
     663           0 :                     ss << temp2.str();
     664             :                 }
     665           0 :                 ss << "} // to scope the int i declaration\n";
     666           0 :                 needBody = false;
     667           0 :                 return nCurWindowSize;
     668             :             }
     669             :         }
     670             : #endif
     671             : }
     672           0 :     ~DynamicKernelSlidingArgument()
     673             :     {
     674           0 :         if (mpClmem2)
     675             :         {
     676           0 :             clReleaseMemObject(mpClmem2);
     677           0 :             mpClmem2 = NULL;
     678             :         }
     679           0 :     }
     680             : 
     681           0 :     size_t GetArrayLength(void) const {return mpDVR->GetArrayLength(); }
     682             : 
     683           0 :     size_t GetWindowSize(void) const {return mpDVR->GetRefRowSize(); }
     684             : 
     685           0 :     size_t GetStartFixed(void) const {return bIsStartFixed; }
     686             : 
     687           0 :     size_t GetEndFixed(void) const {return bIsEndFixed; }
     688             : 
     689             : protected:
     690             :     bool bIsStartFixed, bIsEndFixed;
     691             :     const formula::DoubleVectorRefToken *mpDVR;
     692             :     // from parent nodes
     693             :     boost::shared_ptr<SlidingFunctionBase> mpCodeGen;
     694             :     // controls whether to invoke the reduction kernel during marshaling or not
     695             :     cl_mem mpClmem2;
     696             : };
     697             : 
     698             : /// A mixed string/numberic vector
     699           0 : class DynamicKernelMixedSlidingArgument : public VectorRef
     700             : {
     701             : public:
     702           0 :     DynamicKernelMixedSlidingArgument(const std::string &s,
     703             :         FormulaTreeNodeRef ft, boost::shared_ptr<SlidingFunctionBase> &CodeGen,
     704             :         int index = 0):
     705             :         VectorRef(s, ft),
     706             :         mDoubleArgument(s, ft, CodeGen, index),
     707           0 :         mStringArgument(s+"s", ft, CodeGen, index) {}
     708           0 :     virtual void GenSlidingWindowDecl(std::stringstream& ss) const SAL_OVERRIDE
     709             :     {
     710           0 :         mDoubleArgument.GenSlidingWindowDecl(ss);
     711           0 :         ss << ", ";
     712           0 :         mStringArgument.GenSlidingWindowDecl(ss);
     713           0 :     }
     714           0 :     virtual void GenSlidingWindowFunction(std::stringstream &) SAL_OVERRIDE {}
     715             :     /// Generate declaration
     716           0 :     virtual void GenDecl(std::stringstream &ss) const SAL_OVERRIDE
     717             :     {
     718           0 :         mDoubleArgument.GenDecl(ss);
     719           0 :         ss << ", ";
     720           0 :         mStringArgument.GenDecl(ss);
     721           0 :     }
     722           0 :     virtual void GenDeclRef(std::stringstream &ss) const SAL_OVERRIDE
     723             :     {
     724           0 :         mDoubleArgument.GenDeclRef(ss);
     725           0 :         ss << ",";
     726           0 :         mStringArgument.GenDeclRef(ss);
     727           0 :     }
     728           0 :     virtual std::string GenSlidingWindowDeclRef(bool) const SAL_OVERRIDE
     729             :     {
     730           0 :         std::stringstream ss;
     731           0 :         ss << "(!isNan(" << mDoubleArgument.GenSlidingWindowDeclRef();
     732           0 :         ss << ")?" << mDoubleArgument.GenSlidingWindowDeclRef();
     733           0 :         ss << ":" << mStringArgument.GenSlidingWindowDeclRef();
     734           0 :         ss << ")";
     735           0 :         return ss.str();
     736             :     }
     737           0 :     virtual std::string GenDoubleSlidingWindowDeclRef(bool=false) const SAL_OVERRIDE
     738             :     {
     739           0 :         std::stringstream ss;
     740           0 :         ss << mDoubleArgument.GenSlidingWindowDeclRef();
     741           0 :         return ss.str();
     742             :     }
     743           0 :     virtual std::string GenStringSlidingWindowDeclRef(bool=false) const SAL_OVERRIDE
     744             :     {
     745           0 :         std::stringstream ss;
     746           0 :         ss << mStringArgument.GenSlidingWindowDeclRef();
     747           0 :         return ss.str();
     748             :     }
     749           0 :     virtual size_t Marshal(cl_kernel k, int argno, int vw, cl_program p) SAL_OVERRIDE
     750             :     {
     751           0 :         int i = mDoubleArgument.Marshal(k, argno, vw, p);
     752           0 :         i += mStringArgument.Marshal(k, argno + i, vw, p);
     753           0 :         return i;
     754             :     }
     755             : protected:
     756             :     DynamicKernelSlidingArgument<VectorRef> mDoubleArgument;
     757             :     DynamicKernelSlidingArgument<DynamicKernelStringArgument> mStringArgument;
     758             : };
     759             : /// Holds the symbol table for a given dynamic kernel
     760           0 : class SymbolTable {
     761             : public:
     762             :     typedef std::map<const formula::FormulaToken *,
     763             :         boost::shared_ptr<DynamicKernelArgument> > ArgumentMap;
     764             :     // This avoids instability caused by using pointer as the key type
     765             :     typedef std::list< boost::shared_ptr<DynamicKernelArgument> > ArgumentList;
     766           0 :     SymbolTable(void):mCurId(0) {}
     767             :     template <class T>
     768             :     const DynamicKernelArgument *DeclRefArg(FormulaTreeNodeRef, SlidingFunctionBase* pCodeGen);
     769             :     /// Used to generate sliding window helpers
     770           0 :     void DumpSlidingWindowFunctions(std::stringstream &ss)
     771             :     {
     772           0 :         for(ArgumentList::iterator it = mParams.begin(), e= mParams.end(); it!=e;
     773             :             ++it) {
     774           0 :             (*it)->GenSlidingWindowFunction(ss);
     775           0 :             ss << "\n";
     776             :         }
     777           0 :     }
     778             :     /// Memory mapping from host to device and pass buffers to the given kernel as
     779             :     /// arguments
     780             :     void Marshal(cl_kernel, int, cl_program);
     781             :     // number of result items.
     782             :     static int nR;
     783             : private:
     784             :     unsigned int mCurId;
     785             :     ArgumentMap mSymbols;
     786             :     ArgumentList mParams;
     787             : };
     788             : int SymbolTable::nR = 0;
     789             : 
     790           0 : void SymbolTable::Marshal(cl_kernel k, int nVectorWidth, cl_program pProgram)
     791             : {
     792           0 :     int i = 1; //The first argument is reserved for results
     793           0 :     for(ArgumentList::iterator it = mParams.begin(), e= mParams.end(); it!=e;
     794             :             ++it) {
     795           0 :         i+=(*it)->Marshal(k, i, nVectorWidth, pProgram);
     796             :     }
     797           0 : }
     798             : 
     799             : /// Handling a Double Vector that is used as a sliding window input
     800             : /// Performs parallel reduction based on given operator
     801             : template<class Base>
     802             : class ParallelReductionVectorRef: public Base
     803             : {
     804             : public:
     805           0 :     ParallelReductionVectorRef(const std::string &s,
     806             :         FormulaTreeNodeRef ft, boost::shared_ptr<SlidingFunctionBase> &CodeGen,
     807             :         int index=0):
     808           0 :         Base(s, ft, index), mpCodeGen(CodeGen), mpClmem2(NULL)
     809             :     {
     810           0 :         FormulaToken *t = ft->GetFormulaToken();
     811           0 :         if (t->GetType() != formula::svDoubleVectorRef)
     812           0 :             throw Unhandled();
     813           0 :         mpDVR = static_cast<const formula::DoubleVectorRefToken *>(t);
     814           0 :         bIsStartFixed = mpDVR->IsStartFixed();
     815           0 :         bIsEndFixed = mpDVR->IsEndFixed();
     816           0 :     }
     817             :     /// Emit the definition for the auxiliary reduction kernel
     818           0 :     virtual void GenSlidingWindowFunction(std::stringstream &ss) {
     819           0 :       if ( !dynamic_cast<OpAverage*>(mpCodeGen.get()))
     820             :       {
     821           0 :         std::string name = Base::GetName();
     822           0 :         ss << "__kernel void "<<name;
     823           0 :         ss << "_reduction(__global double* A, "
     824             :             "__global double *result,int arrayLength,int windowSize){\n";
     825           0 :         ss << "    double tmp, current_result =" <<
     826           0 :             mpCodeGen->GetBottom();
     827           0 :         ss << ";\n";
     828           0 :         ss << "    int writePos = get_group_id(1);\n";
     829           0 :         ss << "    int lidx = get_local_id(0);\n";
     830           0 :         ss << "    __local double shm_buf[256];\n";
     831           0 :         if (mpDVR->IsStartFixed())
     832           0 :             ss << "    int offset = 0;\n";
     833             :         else // if (!mpDVR->IsStartFixed())
     834           0 :             ss << "    int offset = get_group_id(1);\n";
     835           0 :         if (mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
     836           0 :             ss << "    int end = windowSize;\n";
     837           0 :         else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
     838           0 :             ss << "    int end = offset + windowSize;\n";
     839           0 :         else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
     840           0 :             ss << "    int end = windowSize + get_group_id(1);\n";
     841           0 :         else if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
     842           0 :             ss << "    int end = windowSize;\n";
     843           0 :         ss << "    end = min(end, arrayLength);\n";
     844             : 
     845           0 :         ss << "    barrier(CLK_LOCAL_MEM_FENCE);\n";
     846           0 :         ss << "    int loop = arrayLength/512 + 1;\n";
     847           0 :         ss << "    for (int l=0; l<loop; l++){\n";
     848           0 :         ss << "    tmp = "<< mpCodeGen->GetBottom() << ";\n";
     849           0 :         ss << "    int loopOffset = l*512;\n";
     850           0 :         ss << "    if((loopOffset + lidx + offset + 256) < end) {\n";
     851           0 :         ss << "        tmp = legalize(" << mpCodeGen->Gen2(
     852           0 :                 "A[loopOffset + lidx + offset]", "tmp") <<", tmp);\n";
     853           0 :         ss << "        tmp = legalize(" << mpCodeGen->Gen2(
     854           0 :                 "A[loopOffset + lidx + offset + 256]", "tmp") <<", tmp);\n";
     855           0 :         ss << "    } else if ((loopOffset + lidx + offset) < end)\n";
     856           0 :         ss << "        tmp = legalize(" << mpCodeGen->Gen2(
     857           0 :                 "A[loopOffset + lidx + offset]", "tmp") <<", tmp);\n";
     858           0 :         ss << "    shm_buf[lidx] = tmp;\n";
     859           0 :         ss << "    barrier(CLK_LOCAL_MEM_FENCE);\n";
     860           0 :         ss << "    for (int i = 128; i >0; i/=2) {\n";
     861           0 :         ss << "        if (lidx < i)\n";
     862           0 :         ss << "            shm_buf[lidx] = ";
     863             :         // Special case count
     864           0 :         if (dynamic_cast<OpCount*>(mpCodeGen.get()))
     865           0 :             ss << "shm_buf[lidx] + shm_buf[lidx + i];\n";
     866             :         else
     867           0 :             ss << mpCodeGen->Gen2("shm_buf[lidx]", "shm_buf[lidx + i]")<<";\n";
     868           0 :         ss << "        barrier(CLK_LOCAL_MEM_FENCE);\n";
     869           0 :         ss << "    }\n";
     870           0 :         ss << "        if (lidx == 0)\n";
     871           0 :         ss << "            current_result =";
     872           0 :         if (dynamic_cast<OpCount*>(mpCodeGen.get()))
     873           0 :             ss << "current_result + shm_buf[0]";
     874             :         else
     875           0 :             ss << mpCodeGen->Gen2("current_result", "shm_buf[0]");
     876           0 :         ss << ";\n";
     877           0 :         ss << "        barrier(CLK_LOCAL_MEM_FENCE);\n";
     878           0 :         ss << "    }\n";
     879           0 :         ss << "    if (lidx == 0)\n";
     880           0 :         ss << "        result[writePos] = current_result;\n";
     881           0 :         ss << "}\n";
     882             :       }
     883             :       else{
     884           0 :         std::string name = Base::GetName();
     885             :         /*sum reduction*/
     886           0 :         ss << "__kernel void "<<name<<"_sum";
     887           0 :         ss << "_reduction(__global double* A, "
     888             :             "__global double *result,int arrayLength,int windowSize){\n";
     889           0 :         ss << "    double tmp, current_result =" <<
     890           0 :             mpCodeGen->GetBottom();
     891           0 :         ss << ";\n";
     892           0 :         ss << "    int writePos = get_group_id(1);\n";
     893           0 :         ss << "    int lidx = get_local_id(0);\n";
     894           0 :         ss << "    __local double shm_buf[256];\n";
     895           0 :         if (mpDVR->IsStartFixed())
     896           0 :             ss << "    int offset = 0;\n";
     897             :         else // if (!mpDVR->IsStartFixed())
     898           0 :             ss << "    int offset = get_group_id(1);\n";
     899           0 :         if (mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
     900           0 :             ss << "    int end = windowSize;\n";
     901           0 :         else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
     902           0 :             ss << "    int end = offset + windowSize;\n";
     903           0 :         else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
     904           0 :             ss << "    int end = windowSize + get_group_id(1);\n";
     905           0 :         else if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
     906           0 :             ss << "    int end = windowSize;\n";
     907           0 :         ss << "    end = min(end, arrayLength);\n";
     908           0 :         ss << "    barrier(CLK_LOCAL_MEM_FENCE);\n";
     909           0 :         ss << "    int loop = arrayLength/512 + 1;\n";
     910           0 :         ss << "    for (int l=0; l<loop; l++){\n";
     911           0 :         ss << "    tmp = "<< mpCodeGen->GetBottom() << ";\n";
     912           0 :         ss << "    int loopOffset = l*512;\n";
     913           0 :         ss << "    if((loopOffset + lidx + offset + 256) < end) {\n";
     914           0 :         ss << "        tmp = legalize(";
     915           0 :         ss << "(A[loopOffset + lidx + offset]+ tmp)";
     916           0 :         ss << ", tmp);\n";
     917           0 :         ss << "        tmp = legalize((A[loopOffset + lidx + offset + 256]+ tmp)";
     918           0 :         ss << ", tmp);\n";
     919           0 :         ss << "    } else if ((loopOffset + lidx + offset) < end)\n";
     920           0 :         ss << "        tmp = legalize((A[loopOffset + lidx + offset] + tmp)";
     921           0 :         ss << ", tmp);\n";
     922           0 :         ss << "    shm_buf[lidx] = tmp;\n";
     923           0 :         ss << "    barrier(CLK_LOCAL_MEM_FENCE);\n";
     924           0 :         ss << "    for (int i = 128; i >0; i/=2) {\n";
     925           0 :         ss << "        if (lidx < i)\n";
     926           0 :         ss << "            shm_buf[lidx] = ";
     927           0 :         ss << "shm_buf[lidx] + shm_buf[lidx + i];\n";
     928           0 :         ss << "        barrier(CLK_LOCAL_MEM_FENCE);\n";
     929           0 :         ss << "    }\n";
     930           0 :         ss << "        if (lidx == 0)\n";
     931           0 :         ss << "            current_result =";
     932           0 :         ss << "current_result + shm_buf[0]";
     933           0 :         ss << ";\n";
     934           0 :         ss << "        barrier(CLK_LOCAL_MEM_FENCE);\n";
     935           0 :         ss << "    }\n";
     936           0 :         ss << "    if (lidx == 0)\n";
     937           0 :         ss << "        result[writePos] = current_result;\n";
     938           0 :         ss << "}\n";
     939             :         /*count reduction*/
     940           0 :         ss << "__kernel void "<<name<<"_count";
     941           0 :         ss << "_reduction(__global double* A, "
     942             :             "__global double *result,int arrayLength,int windowSize){\n";
     943           0 :         ss << "    double tmp, current_result =" <<
     944           0 :             mpCodeGen->GetBottom();
     945           0 :         ss << ";\n";
     946           0 :         ss << "    int writePos = get_group_id(1);\n";
     947           0 :         ss << "    int lidx = get_local_id(0);\n";
     948           0 :         ss << "    __local double shm_buf[256];\n";
     949           0 :         if (mpDVR->IsStartFixed())
     950           0 :             ss << "    int offset = 0;\n";
     951             :         else // if (!mpDVR->IsStartFixed())
     952           0 :             ss << "    int offset = get_group_id(1);\n";
     953           0 :         if (mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
     954           0 :             ss << "    int end = windowSize;\n";
     955           0 :         else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
     956           0 :             ss << "    int end = offset + windowSize;\n";
     957           0 :         else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
     958           0 :             ss << "    int end = windowSize + get_group_id(1);\n";
     959           0 :         else if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
     960           0 :             ss << "    int end = windowSize;\n";
     961           0 :         ss << "    end = min(end, arrayLength);\n";
     962           0 :         ss << "    barrier(CLK_LOCAL_MEM_FENCE);\n";
     963           0 :         ss << "    int loop = arrayLength/512 + 1;\n";
     964           0 :         ss << "    for (int l=0; l<loop; l++){\n";
     965           0 :         ss << "    tmp = "<< mpCodeGen->GetBottom() << ";\n";
     966           0 :         ss << "    int loopOffset = l*512;\n";
     967           0 :         ss << "    if((loopOffset + lidx + offset + 256) < end) {\n";
     968           0 :         ss << "        tmp = legalize((isNan(A[loopOffset + lidx + offset])?tmp:tmp+1.0)";
     969           0 :         ss << ", tmp);\n";
     970           0 :         ss << "        tmp = legalize((isNan(A[loopOffset + lidx + offset+256])?tmp:tmp+1.0)";
     971           0 :         ss << ", tmp);\n";
     972           0 :         ss << "    } else if ((loopOffset + lidx + offset) < end)\n";
     973           0 :         ss << "        tmp = legalize((isNan(A[loopOffset + lidx + offset])?tmp:tmp+1.0)";
     974           0 :         ss << ", tmp);\n";
     975           0 :         ss << "    shm_buf[lidx] = tmp;\n";
     976           0 :         ss << "    barrier(CLK_LOCAL_MEM_FENCE);\n";
     977           0 :         ss << "    for (int i = 128; i >0; i/=2) {\n";
     978           0 :         ss << "        if (lidx < i)\n";
     979           0 :         ss << "            shm_buf[lidx] = ";
     980           0 :         ss << "shm_buf[lidx] + shm_buf[lidx + i];\n";
     981           0 :         ss << "        barrier(CLK_LOCAL_MEM_FENCE);\n";
     982           0 :         ss << "    }\n";
     983           0 :         ss << "        if (lidx == 0)\n";
     984           0 :         ss << "            current_result =";
     985           0 :         ss << "current_result + shm_buf[0];";
     986           0 :         ss << ";\n";
     987           0 :         ss << "        barrier(CLK_LOCAL_MEM_FENCE);\n";
     988           0 :         ss << "    }\n";
     989           0 :         ss << "    if (lidx == 0)\n";
     990           0 :         ss << "        result[writePos] = current_result;\n";
     991           0 :         ss << "}\n";
     992             :      }
     993             : 
     994           0 :     }
     995           0 :     virtual std::string GenSlidingWindowDeclRef(bool=false) const
     996             :     {
     997           0 :         std::stringstream ss;
     998           0 :         if (!bIsStartFixed && !bIsEndFixed)
     999           0 :             ss << Base::GetName() << "[i + gid0]";
    1000             :         else
    1001           0 :             ss << Base::GetName() << "[i]";
    1002           0 :         return ss.str();
    1003             :     }
    1004             :     /// Controls how the elements in the DoubleVectorRef are traversed
    1005           0 :     virtual size_t GenReductionLoopHeader(
    1006             :         std::stringstream &ss, bool &needBody)
    1007             :     {
    1008             :         assert(mpDVR);
    1009           0 :         size_t nCurWindowSize = mpDVR->GetRefRowSize();
    1010           0 :         std::string temp = Base::GetName() + "[gid0]";
    1011           0 :         ss << "tmp = ";
    1012             :         // Special case count
    1013           0 :         if ( dynamic_cast<OpAverage*>(mpCodeGen.get()))
    1014             :         {
    1015           0 :             ss << mpCodeGen->Gen2(temp, "tmp")<<";\n";
    1016           0 :             ss <<"nCount = nCount-1;\n";
    1017           0 :             ss <<"nCount = nCount +";/*re-assign nCount from count reduction*/
    1018           0 :             ss << Base::GetName()<<"[gid0+"<<SymbolTable::nR<<"]"<<";\n";
    1019             :         }
    1020           0 :         else if (dynamic_cast<OpCount*>(mpCodeGen.get()))
    1021           0 :             ss << temp << "+ tmp";
    1022             :         else
    1023           0 :             ss << mpCodeGen->Gen2(temp, "tmp");
    1024           0 :         ss << ";\n\t";
    1025           0 :         needBody = false;
    1026           0 :         return nCurWindowSize;
    1027             :     }
    1028             : 
    1029           0 :     virtual size_t Marshal(cl_kernel k, int argno, int w, cl_program mpProgram)
    1030             :     {
    1031             :         assert(Base::mpClmem == NULL);
    1032             :         // Obtain cl context
    1033             :         KernelEnv kEnv;
    1034           0 :         OpenclDevice::setKernelEnv(&kEnv);
    1035             :         cl_int err;
    1036           0 :         size_t nInput = mpDVR->GetArrayLength();
    1037           0 :         size_t nCurWindowSize = mpDVR->GetRefRowSize();
    1038             :         // create clmem buffer
    1039           0 :         if (mpDVR->GetArrays()[Base::mnIndex].mpNumericArray == NULL)
    1040           0 :             throw Unhandled();
    1041             :         double *pHostBuffer = const_cast<double*>(
    1042           0 :                 mpDVR->GetArrays()[Base::mnIndex].mpNumericArray);
    1043           0 :         size_t szHostBuffer = nInput * sizeof(double);
    1044           0 :         Base::mpClmem = clCreateBuffer(kEnv.mpkContext,
    1045             :                 (cl_mem_flags) CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR,
    1046             :                 szHostBuffer,
    1047           0 :                 pHostBuffer, &err);
    1048           0 :         mpClmem2 = clCreateBuffer(kEnv.mpkContext,
    1049             :         CL_MEM_READ_WRITE|CL_MEM_ALLOC_HOST_PTR,
    1050           0 :                 sizeof(double)*w, NULL, NULL);
    1051           0 :         if (CL_SUCCESS != err)
    1052           0 :             throw OpenCLError(err, __FILE__, __LINE__);
    1053             :         // reproduce the reduction function name
    1054           0 :         std::string kernelName;
    1055           0 :         if ( !dynamic_cast<OpAverage*>(mpCodeGen.get()))
    1056           0 :            kernelName = Base::GetName() + "_reduction";
    1057             :         else
    1058           0 :            kernelName = Base::GetName() + "_sum_reduction";
    1059           0 :         cl_kernel redKernel = clCreateKernel(mpProgram, kernelName.c_str(), &err);
    1060           0 :         if (err != CL_SUCCESS)
    1061           0 :             throw OpenCLError(err, __FILE__, __LINE__);
    1062             :         // set kernel arg of reduction kernel
    1063             :         // TODO(Wei Wei): use unique name for kernel
    1064           0 :         cl_mem buf = Base::GetCLBuffer();
    1065           0 :         err = clSetKernelArg(redKernel, 0, sizeof(cl_mem),
    1066           0 :                 (void *)&buf);
    1067           0 :         if (CL_SUCCESS != err)
    1068           0 :             throw OpenCLError(err, __FILE__, __LINE__);
    1069             : 
    1070           0 :         err = clSetKernelArg(redKernel, 1, sizeof(cl_mem), (void *)&mpClmem2);
    1071           0 :         if (CL_SUCCESS != err)
    1072           0 :             throw OpenCLError(err, __FILE__, __LINE__);
    1073             : 
    1074           0 :         err = clSetKernelArg(redKernel, 2, sizeof(cl_int), (void*)&nInput);
    1075           0 :         if (CL_SUCCESS != err)
    1076           0 :             throw OpenCLError(err, __FILE__, __LINE__);
    1077             : 
    1078           0 :         err = clSetKernelArg(redKernel, 3, sizeof(cl_int), (void*)&nCurWindowSize);
    1079           0 :         if (CL_SUCCESS != err)
    1080           0 :             throw OpenCLError(err, __FILE__, __LINE__);
    1081             : 
    1082             :         // set work group size and execute
    1083           0 :         size_t global_work_size[] = {256, (size_t)w };
    1084           0 :         size_t local_work_size[] = {256, 1};
    1085           0 :         err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, redKernel, 2, NULL,
    1086           0 :                 global_work_size, local_work_size, 0, NULL, NULL);
    1087           0 :         if (CL_SUCCESS != err)
    1088           0 :             throw OpenCLError(err, __FILE__, __LINE__);
    1089           0 :         err = clFinish(kEnv.mpkCmdQueue);
    1090           0 :         if (CL_SUCCESS != err)
    1091           0 :             throw OpenCLError(err, __FILE__, __LINE__);
    1092           0 :         if ( dynamic_cast<OpAverage*>(mpCodeGen.get()))
    1093             :         {
    1094             :              /*average need more reduction kernel for count computing*/
    1095           0 :             boost::scoped_array<double> pAllBuffer(new double[2*w]);
    1096             :             double *resbuf = (double*)clEnqueueMapBuffer(kEnv.mpkCmdQueue,
    1097             :                     mpClmem2,
    1098             :                     CL_TRUE, CL_MAP_READ, 0,
    1099             :                     sizeof(double)*w, 0, NULL, NULL,
    1100           0 :                     &err);
    1101           0 :             if (err != CL_SUCCESS)
    1102           0 :                 throw OpenCLError(err, __FILE__, __LINE__);
    1103             : 
    1104           0 :             for (int i=0 ; i < w; i++)
    1105           0 :                 pAllBuffer[i] = resbuf[i];
    1106           0 :             err = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue, mpClmem2, resbuf, 0, NULL, NULL);
    1107           0 :             if (err != CL_SUCCESS)
    1108           0 :                 throw OpenCLError(err, __FILE__, __LINE__);
    1109             : 
    1110           0 :             kernelName = Base::GetName() + "_count_reduction";
    1111           0 :             redKernel = clCreateKernel(mpProgram, kernelName.c_str(), &err);
    1112           0 :             if (err != CL_SUCCESS)
    1113           0 :                 throw OpenCLError(err, __FILE__, __LINE__);
    1114             :             // set kernel arg of reduction kernel
    1115           0 :             buf = Base::GetCLBuffer();
    1116           0 :             err = clSetKernelArg(redKernel, 0, sizeof(cl_mem),
    1117           0 :                     (void *)&buf);
    1118           0 :             if (CL_SUCCESS != err)
    1119           0 :                 throw OpenCLError(err, __FILE__, __LINE__);
    1120             : 
    1121           0 :             err = clSetKernelArg(redKernel, 1, sizeof(cl_mem), (void *)&mpClmem2);
    1122           0 :             if (CL_SUCCESS != err)
    1123           0 :                 throw OpenCLError(err, __FILE__, __LINE__);
    1124             : 
    1125           0 :             err = clSetKernelArg(redKernel, 2, sizeof(cl_int), (void*)&nInput);
    1126           0 :             if (CL_SUCCESS != err)
    1127           0 :                 throw OpenCLError(err, __FILE__, __LINE__);
    1128             : 
    1129           0 :             err = clSetKernelArg(redKernel, 3, sizeof(cl_int), (void*)&nCurWindowSize);
    1130           0 :             if (CL_SUCCESS != err)
    1131           0 :                 throw OpenCLError(err, __FILE__, __LINE__);
    1132             : 
    1133             :             // set work group size and execute
    1134           0 :             size_t global_work_size1[] = {256, (size_t)w };
    1135           0 :             size_t local_work_size1[] = {256, 1};
    1136           0 :             err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, redKernel, 2, NULL,
    1137           0 :                     global_work_size1, local_work_size1, 0, NULL, NULL);
    1138           0 :             if (CL_SUCCESS != err)
    1139           0 :                 throw OpenCLError(err, __FILE__, __LINE__);
    1140           0 :             err = clFinish(kEnv.mpkCmdQueue);
    1141           0 :             if (CL_SUCCESS != err)
    1142           0 :                 throw OpenCLError(err, __FILE__, __LINE__);
    1143           0 :             resbuf = (double*)clEnqueueMapBuffer(kEnv.mpkCmdQueue,
    1144             :                 mpClmem2,
    1145             :                 CL_TRUE, CL_MAP_READ, 0,
    1146             :                 sizeof(double)*w, 0, NULL, NULL,
    1147           0 :                 &err);
    1148           0 :             if (err != CL_SUCCESS)
    1149           0 :                 throw OpenCLError(err, __FILE__, __LINE__);
    1150           0 :             for (int i=0 ; i < w; i++)
    1151           0 :                 pAllBuffer[i+w] = resbuf[i];
    1152           0 :             err = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue, mpClmem2, resbuf, 0, NULL, NULL);
    1153           0 :             if (mpClmem2)
    1154             :             {
    1155           0 :                 clReleaseMemObject(mpClmem2);
    1156           0 :                 mpClmem2 = NULL;
    1157             :             }
    1158           0 :             mpClmem2 = clCreateBuffer(kEnv.mpkContext,
    1159             :                 (cl_mem_flags) CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR,
    1160           0 :                 w*sizeof(double)*2, pAllBuffer.get(), &err);
    1161           0 :             if (CL_SUCCESS != err)
    1162           0 :                 throw OpenCLError(err, __FILE__, __LINE__);
    1163             :         }
    1164             :         // set kernel arg
    1165           0 :         err = clSetKernelArg(k, argno, sizeof(cl_mem), (void*)&(mpClmem2));
    1166           0 :         if (CL_SUCCESS != err)
    1167           0 :             throw OpenCLError(err, __FILE__, __LINE__);
    1168           0 :         return 1;
    1169             :     }
    1170           0 :     ~ParallelReductionVectorRef()
    1171             :     {
    1172           0 :         if (mpClmem2)
    1173             :         {
    1174           0 :             clReleaseMemObject(mpClmem2);
    1175           0 :             mpClmem2 = NULL;
    1176             :         }
    1177           0 :     }
    1178             : 
    1179             :     size_t GetArrayLength(void) const {return mpDVR->GetArrayLength(); }
    1180             : 
    1181           0 :     size_t GetWindowSize(void) const {return mpDVR->GetRefRowSize(); }
    1182             : 
    1183             :     size_t GetStartFixed(void) const {return bIsStartFixed; }
    1184             : 
    1185             :     size_t GetEndFixed(void) const {return bIsEndFixed; }
    1186             : 
    1187             : protected:
    1188             :     bool bIsStartFixed, bIsEndFixed;
    1189             :     const formula::DoubleVectorRefToken *mpDVR;
    1190             :     // from parent nodes
    1191             :     boost::shared_ptr<SlidingFunctionBase> mpCodeGen;
    1192             :     // controls whether to invoke the reduction kernel during marshaling or not
    1193             :     cl_mem mpClmem2;
    1194             : };
    1195             : 
    1196           0 : class Reduction: public SlidingFunctionBase
    1197             : {
    1198             : public:
    1199             :     typedef DynamicKernelSlidingArgument<VectorRef> NumericRange;
    1200             :     typedef DynamicKernelSlidingArgument<DynamicKernelStringArgument> StringRange;
    1201             :     typedef ParallelReductionVectorRef<VectorRef> ParallelNumericRange;
    1202             : 
    1203           0 :     virtual void GenSlidingWindowFunction(std::stringstream &ss,
    1204             :             const std::string &sSymName, SubArguments &vSubArguments) SAL_OVERRIDE
    1205             :     {
    1206           0 :         ss << "\ndouble " << sSymName;
    1207           0 :         ss << "_"<< BinFuncName() <<"(";
    1208           0 :         for (unsigned i = 0; i < vSubArguments.size(); i++)
    1209             :         {
    1210           0 :             if (i)
    1211           0 :                 ss << ", ";
    1212           0 :             vSubArguments[i]->GenSlidingWindowDecl(ss);
    1213             :         }
    1214           0 :         ss << ") {\n\t";
    1215           0 :         ss << "double tmp = " << GetBottom() <<";\n\t";
    1216           0 :         ss << "int gid0 = get_global_id(0);\n\t";
    1217           0 :         if (isAverage())
    1218           0 :             ss << "int nCount = 0;\n\t";
    1219           0 :         ss << "double tmpBottom;\n\t";
    1220           0 :         unsigned i = vSubArguments.size();
    1221           0 :         size_t nItems = 0;
    1222           0 :         while (i--)
    1223             :         {
    1224           0 :             if (NumericRange *NR =
    1225           0 :                     dynamic_cast<NumericRange *> (vSubArguments[i].get()))
    1226             :             {
    1227             :                 bool needBody;
    1228           0 :                 nItems += NR->GenReductionLoopHeader(ss, needBody);
    1229           0 :                 if (needBody == false) continue;
    1230             :             }
    1231           0 :             else if (ParallelNumericRange *PNR =
    1232           0 :                     dynamic_cast<ParallelNumericRange *> (vSubArguments[i].get()))
    1233             :             {
    1234             :                 //did not handle yet
    1235             :                 bool needBody;
    1236           0 :                 nItems += PNR->GenReductionLoopHeader(ss, needBody);
    1237           0 :                 if (needBody == false) continue;
    1238             :             }
    1239           0 :             else if (StringRange *SR =
    1240           0 :                     dynamic_cast<StringRange *> (vSubArguments[i].get()))
    1241             :             {
    1242             :                 //did not handle yet
    1243             :                 bool needBody;
    1244           0 :                 nItems += SR->GenReductionLoopHeader(ss, needBody);
    1245           0 :                 if (needBody == false) continue;
    1246             :             }
    1247             :             else
    1248             :             {
    1249           0 :                 FormulaToken *pCur = vSubArguments[i]->GetFormulaToken();
    1250             :                 assert(pCur);
    1251             :                 assert(pCur->GetType() != formula::svDoubleVectorRef);
    1252             : 
    1253           0 :                 if (pCur->GetType() == formula::svSingleVectorRef)
    1254             :                 {
    1255             : #ifdef  ISNAN
    1256             :                     const formula::SingleVectorRefToken* pSVR =
    1257           0 :                         static_cast< const formula::SingleVectorRefToken* >(pCur);
    1258           0 :                     ss << "if (gid0 < " << pSVR->GetArrayLength() << "){\n\t\t";
    1259             : #else
    1260             :                     nItems += 1;
    1261             : #endif
    1262             :                 }
    1263           0 :                 else if (pCur->GetType() == formula::svDouble)
    1264             :                 {
    1265             : #ifdef  ISNAN
    1266           0 :                     ss << "{\n\t\t";
    1267             : #endif
    1268           0 :                     nItems += 1;
    1269             :                 }
    1270             :                 else
    1271             :                 {
    1272           0 :                     nItems += 1;
    1273             :                 }
    1274             :             }
    1275             : #ifdef  ISNAN
    1276           0 :             if(ocPush==vSubArguments[i]->GetFormulaToken()->GetOpCode())
    1277             :             {
    1278           0 :                 ss << "tmpBottom = " << GetBottom() << ";\n\t\t";
    1279           0 :                 ss << "if (isNan(";
    1280           0 :                 ss << vSubArguments[i]->GenSlidingWindowDeclRef();
    1281           0 :                 ss << "))\n\t\t\t";
    1282           0 :                 ss << "tmp = ";
    1283           0 :                 ss << Gen2("tmpBottom", "tmp") << ";\n\t\t";
    1284           0 :                 ss << "else{\n\t\t\t";
    1285           0 :                 ss << "tmp = ";
    1286           0 :                 ss << Gen2(vSubArguments[i]->GenSlidingWindowDeclRef(), "tmp");
    1287           0 :                 ss << ";\n\t\t\t";
    1288           0 :                 ss << "}\n\t";
    1289           0 :                 ss << "}\n\t";
    1290             :             }
    1291             :             else
    1292             :             {
    1293           0 :                 ss << "tmp = ";
    1294           0 :                 ss << Gen2(vSubArguments[i]->GenSlidingWindowDeclRef(), "tmp");
    1295           0 :                 ss << ";\n\t";
    1296             :             }
    1297             : #else
    1298             :             ss << "tmp = ";
    1299             :             // Generate the operation in binary form
    1300             :             ss << Gen2(vSubArguments[i]->GenSlidingWindowDeclRef(), "tmp");
    1301             :             ss << ";\n\t";
    1302             : #endif
    1303             :         }
    1304           0 :         ss << "return tmp";
    1305             : #ifdef  ISNAN
    1306           0 :         if (isAverage())
    1307           0 :             ss << "/(double)nCount";
    1308             : #else
    1309             :         if (isAverage())
    1310             :             ss << "/(double)"<<nItems;
    1311             : #endif
    1312           0 :         ss << ";\n}";
    1313           0 :     }
    1314           0 :     virtual bool isAverage() const { return false; }
    1315           0 :     virtual bool takeString() const SAL_OVERRIDE { return false; }
    1316           0 :     virtual bool takeNumeric() const SAL_OVERRIDE { return true; }
    1317             : };
    1318             : 
    1319             : // Strictly binary operators
    1320           0 : class Binary: public SlidingFunctionBase
    1321             : {
    1322             : public:
    1323           0 :     virtual void GenSlidingWindowFunction(std::stringstream &ss,
    1324             :             const std::string &sSymName, SubArguments &vSubArguments) SAL_OVERRIDE
    1325             :     {
    1326           0 :         ss << "\ndouble " << sSymName;
    1327           0 :         ss << "_"<< BinFuncName() <<"(";
    1328             :         assert(vSubArguments.size() == 2);
    1329           0 :         for (unsigned i = 0; i < vSubArguments.size(); i++)
    1330             :         {
    1331           0 :             if (i)
    1332           0 :                 ss << ", ";
    1333           0 :             vSubArguments[i]->GenSlidingWindowDecl(ss);
    1334             :         }
    1335           0 :         ss << ") {\n\t";
    1336           0 :         ss << "int gid0 = get_global_id(0), i = 0;\n\t";
    1337           0 :         ss << "double tmp = ";
    1338           0 :         ss << Gen2(vSubArguments[0]->GenSlidingWindowDeclRef(false),
    1339           0 :                 vSubArguments[1]->GenSlidingWindowDeclRef(false)) << ";\n\t";
    1340           0 :         ss << "return tmp;\n}";
    1341           0 :     }
    1342           0 :     virtual bool takeString() const SAL_OVERRIDE { return true; }
    1343           0 :     virtual bool takeNumeric() const SAL_OVERRIDE { return true; }
    1344             : };
    1345             : 
    1346           0 : class SumOfProduct: public SlidingFunctionBase
    1347             : {
    1348             : public:
    1349           0 :     virtual void GenSlidingWindowFunction(std::stringstream &ss,
    1350             :             const std::string &sSymName, SubArguments &vSubArguments) SAL_OVERRIDE
    1351             :     {
    1352           0 :         size_t nCurWindowSize = 0;
    1353           0 :         FormulaToken *tmpCur = NULL;
    1354           0 :         const formula::DoubleVectorRefToken *pCurDVR = NULL;
    1355           0 :         ss << "\ndouble " << sSymName;
    1356           0 :         ss << "_"<< BinFuncName() <<"(";
    1357           0 :         for (unsigned i = 0; i < vSubArguments.size(); i++)
    1358             :         {
    1359           0 :             if (i)
    1360           0 :                 ss << ",";
    1361           0 :             vSubArguments[i]->GenSlidingWindowDecl(ss);
    1362           0 :             size_t nCurChildWindowSize = vSubArguments[i]->GetWindowSize();
    1363             :             nCurWindowSize = (nCurWindowSize < nCurChildWindowSize)?
    1364           0 :                 nCurChildWindowSize:nCurWindowSize;
    1365           0 :             tmpCur = vSubArguments[i]->GetFormulaToken();
    1366           0 :             if (  ocPush==tmpCur->GetOpCode() )
    1367             :             {
    1368             : 
    1369             :                 pCurDVR = static_cast<
    1370           0 :                     const formula::DoubleVectorRefToken*>(tmpCur);
    1371           0 :                 if ( !
    1372           0 :                     ( (!pCurDVR->IsStartFixed() && !pCurDVR->IsEndFixed())
    1373           0 :                     || (pCurDVR->IsStartFixed() && pCurDVR->IsEndFixed()) )
    1374             :                     )
    1375           0 :                     throw Unhandled();
    1376             :             }
    1377             :         }
    1378           0 :         ss << ") {\n";
    1379           0 :         ss << "    double tmp = 0.0;\n";
    1380           0 :         ss << "    int gid0 = get_global_id(0);\n";
    1381             : #ifndef UNROLLING_FACTOR
    1382             :         ss << "    int i ;\n";
    1383             :         ss << "    for (i = 0; i < "<< nCurWindowSize <<"; i++)\n";
    1384             :         ss << "    {\n";
    1385             :         for (unsigned i = 0; i < vSubArguments.size(); i++)
    1386             :         {
    1387             :             tmpCur = vSubArguments[i]->GetFormulaToken();
    1388             :             if(ocPush==tmpCur->GetOpCode())
    1389             :             {
    1390             :                 pCurDVR= static_cast<
    1391             :                     const formula::DoubleVectorRefToken *>(tmpCur);
    1392             :                 if(!pCurDVR->IsStartFixed() && !pCurDVR->IsEndFixed())
    1393             :                 {
    1394             :                    ss << "        int currentCount";
    1395             :                    ss << i;
    1396             :                    ss <<" =i+gid0+1;\n";
    1397             :                 }
    1398             :                 else
    1399             :                 {
    1400             :                     ss << "        int currentCount";
    1401             :                     ss << i;
    1402             :                     ss << " =i+1;\n";
    1403             :                 }
    1404             :              }
    1405             :         }
    1406             :         ss << "        tmp += fsum(";
    1407             :         for (unsigned i = 0; i < vSubArguments.size(); i++)
    1408             :         {
    1409             :             if (i)
    1410             :                 ss << "*";
    1411             : #ifdef  ISNAN
    1412             :             if(ocPush==vSubArguments[i]->GetFormulaToken()->GetOpCode())
    1413             :             {
    1414             :                 ss <<"(";
    1415             :                 ss <<"(currentCount";
    1416             :                 ss << i;
    1417             :                 ss<< ">";
    1418             :                 if(vSubArguments[i]->GetFormulaToken()->GetType() ==
    1419             :                      formula::svSingleVectorRef)
    1420             :                 {
    1421             :                     const formula::SingleVectorRefToken* pSVR =
    1422             :                     static_cast< const formula::SingleVectorRefToken*>
    1423             :                          (vSubArguments[i]->GetFormulaToken());
    1424             :                     ss<<pSVR->GetArrayLength();
    1425             :                 }
    1426             :                 else if(vSubArguments[i]->GetFormulaToken()->GetType() ==
    1427             :                           formula::svDoubleVectorRef)
    1428             :                 {
    1429             :                     const formula::DoubleVectorRefToken* pSVR =
    1430             :                     static_cast< const formula::DoubleVectorRefToken*>
    1431             :                           (vSubArguments[i]->GetFormulaToken());
    1432             :                     ss<<pSVR->GetArrayLength();
    1433             :                 }
    1434             :                 ss << ")||isNan("<<vSubArguments[i]
    1435             :                     ->GenSlidingWindowDeclRef(true);
    1436             :                 ss << ")?0:";
    1437             :                 ss << vSubArguments[i]->GenSlidingWindowDeclRef(true);
    1438             :                 ss << ")";
    1439             :             }
    1440             :             else
    1441             :                 ss << vSubArguments[i]->GenSlidingWindowDeclRef(true);
    1442             : #else
    1443             :             ss << vSubArguments[i]->GenSlidingWindowDeclRef(true);
    1444             : #endif
    1445             :         }
    1446             :         ss << ", 0.0);\n\t}\n\t";
    1447             :         ss << "return tmp;\n";
    1448             :         ss << "}";
    1449             : #endif
    1450             : 
    1451             : #ifdef UNROLLING_FACTOR
    1452           0 :         ss << "\tint i;\n\t";
    1453           0 :         ss << "int currentCount0, currentCount1;\n\t";
    1454           0 :         std::stringstream temp3,temp4;
    1455           0 :         int outLoopSize = UNROLLING_FACTOR;
    1456           0 :         if (nCurWindowSize/outLoopSize != 0){
    1457           0 :             ss << "for(int outLoop=0; outLoop<" <<
    1458           0 :             nCurWindowSize/outLoopSize<< "; outLoop++){\n\t";
    1459           0 :             for(int count=0; count < outLoopSize; count++){
    1460           0 :                 ss << "i = outLoop*"<<outLoopSize<<"+"<<count<<";\n";
    1461           0 :                 if(count==0){
    1462           0 :                     for (unsigned i = 0; i < vSubArguments.size(); i++)
    1463             :                     {
    1464           0 :                         tmpCur = vSubArguments[i]->GetFormulaToken();
    1465           0 :                         if(ocPush==tmpCur->GetOpCode())
    1466             :                         {
    1467             :                             pCurDVR= static_cast<
    1468           0 :                                 const formula::DoubleVectorRefToken *>(tmpCur);
    1469           0 :                             if(!pCurDVR->IsStartFixed() && !pCurDVR->IsEndFixed())
    1470             :                             {
    1471           0 :                                 temp3 << "        currentCount";
    1472           0 :                                 temp3 << i;
    1473           0 :                                 temp3 <<" =i+gid0+1;\n";
    1474             :                             }
    1475             :                             else
    1476             :                             {
    1477           0 :                                 temp3 << "        currentCount";
    1478           0 :                                 temp3 << i;
    1479           0 :                                 temp3 << " =i+1;\n";
    1480             :                             }
    1481             :                         }
    1482             :                     }
    1483             : 
    1484           0 :                     temp3 << "tmp = fsum(";
    1485           0 :                     for (unsigned i = 0; i < vSubArguments.size(); i++){
    1486           0 :                         if (i)
    1487           0 :                             temp3 << "*";
    1488           0 :                         if(ocPush==vSubArguments[i]->GetFormulaToken()->GetOpCode()){
    1489           0 :                             temp3 <<"(";
    1490           0 :                             temp3 <<"(currentCount";
    1491           0 :                             temp3 << i;
    1492           0 :                             temp3 << ">";
    1493           0 :                             if(vSubArguments[i]->GetFormulaToken()->GetType() ==
    1494             :                                     formula::svSingleVectorRef){
    1495             :                                 const formula::SingleVectorRefToken* pSVR =
    1496             :                                     static_cast< const formula::SingleVectorRefToken*>
    1497           0 :                                     (vSubArguments[i]->GetFormulaToken());
    1498           0 :                                 temp3<<pSVR->GetArrayLength();
    1499             :                             }
    1500           0 :                             else if(vSubArguments[i]->GetFormulaToken()->GetType() ==
    1501             :                                     formula::svDoubleVectorRef){
    1502             :                                 const formula::DoubleVectorRefToken* pSVR =
    1503             :                                     static_cast< const formula::DoubleVectorRefToken*>
    1504           0 :                                     (vSubArguments[i]->GetFormulaToken());
    1505           0 :                                 temp3<<pSVR->GetArrayLength();
    1506             :                             }
    1507           0 :                             temp3 << ")||isNan("<<vSubArguments[i]
    1508           0 :                                 ->GenSlidingWindowDeclRef(true);
    1509           0 :                             temp3 << ")?0:";
    1510           0 :                             temp3 << vSubArguments[i]->GenSlidingWindowDeclRef(true);
    1511           0 :                             temp3  << ")";
    1512             :                         }
    1513             :                         else
    1514           0 :                             temp3 << vSubArguments[i]->GenSlidingWindowDeclRef(true);
    1515             :                     }
    1516           0 :                     temp3 << ", tmp);\n\t";
    1517             :                 }
    1518           0 :                 ss << temp3.str();
    1519             :             }
    1520           0 :             ss << "}\n\t";
    1521             :         }
    1522             :         //The residual of mod outLoopSize
    1523           0 :         for(unsigned int count=nCurWindowSize/outLoopSize*outLoopSize;
    1524             :         count < nCurWindowSize; count++)
    1525             :         {
    1526           0 :             ss << "i =" <<count<<";\n";
    1527           0 :             if(count==nCurWindowSize/outLoopSize*outLoopSize){
    1528           0 :                 for (unsigned i = 0; i < vSubArguments.size(); i++)
    1529             :                 {
    1530           0 :                     tmpCur = vSubArguments[i]->GetFormulaToken();
    1531           0 :                     if(ocPush==tmpCur->GetOpCode())
    1532             :                     {
    1533             :                         pCurDVR= static_cast<
    1534           0 :                             const formula::DoubleVectorRefToken *>(tmpCur);
    1535           0 :                         if(!pCurDVR->IsStartFixed() && !pCurDVR->IsEndFixed())
    1536             :                         {
    1537           0 :                             temp4 << "        currentCount";
    1538           0 :                             temp4 << i;
    1539           0 :                             temp4 <<" =i+gid0+1;\n";
    1540             :                         }
    1541             :                         else
    1542             :                         {
    1543           0 :                             temp4 << "        currentCount";
    1544           0 :                             temp4 << i;
    1545           0 :                             temp4 << " =i+1;\n";
    1546             :                         }
    1547             :                     }
    1548             :                 }
    1549             : 
    1550           0 :                 temp4 << "tmp = fsum(";
    1551           0 :                 for (unsigned i = 0; i < vSubArguments.size(); i++)
    1552             :                 {
    1553           0 :                     if (i)
    1554           0 :                         temp4 << "*";
    1555           0 :                     if(ocPush==vSubArguments[i]->GetFormulaToken()->GetOpCode())
    1556             :                     {
    1557           0 :                         temp4 <<"(";
    1558           0 :                         temp4 <<"(currentCount";
    1559           0 :                         temp4 << i;
    1560           0 :                         temp4 << ">";
    1561           0 :                        if(vSubArguments[i]->GetFormulaToken()->GetType() ==
    1562             :                                 formula::svSingleVectorRef)
    1563             :                         {
    1564             :                             const formula::SingleVectorRefToken* pSVR =
    1565             :                                 static_cast< const formula::SingleVectorRefToken*>
    1566           0 :                                 (vSubArguments[i]->GetFormulaToken());
    1567           0 :                             temp4<<pSVR->GetArrayLength();
    1568             :                         }
    1569           0 :                         else if(vSubArguments[i]->GetFormulaToken()->GetType() ==
    1570             :                                 formula::svDoubleVectorRef)
    1571             :                         {
    1572             :                             const formula::DoubleVectorRefToken* pSVR =
    1573             :                                 static_cast< const formula::DoubleVectorRefToken*>
    1574           0 :                                 (vSubArguments[i]->GetFormulaToken());
    1575           0 :                             temp4<<pSVR->GetArrayLength();
    1576             :                         }
    1577           0 :                         temp4 << ")||isNan("<<vSubArguments[i]
    1578           0 :                             ->GenSlidingWindowDeclRef(true);
    1579           0 :                         temp4 << ")?0:";
    1580           0 :                         temp4 << vSubArguments[i]->GenSlidingWindowDeclRef(true);
    1581           0 :                         temp4  << ")";
    1582             :                     }
    1583             :                     else
    1584             :                     {
    1585           0 :                         temp4 << vSubArguments[i]
    1586           0 :                             ->GenSlidingWindowDeclRef(true);
    1587             :                     }
    1588             :                 }
    1589           0 :                 temp4 << ", tmp);\n\t";
    1590             :             }
    1591           0 :             ss << temp4.str();
    1592             :         }
    1593           0 :         ss << "return tmp;\n";
    1594           0 :         ss << "}";
    1595             : #endif
    1596             : 
    1597           0 :     }
    1598           0 :     virtual bool takeString() const SAL_OVERRIDE { return false; }
    1599           0 :     virtual bool takeNumeric() const SAL_OVERRIDE { return true; }
    1600             : };
    1601             : 
    1602             : /// operator traits
    1603           0 : class OpNop: public Reduction {
    1604             : public:
    1605           0 :     virtual std::string GetBottom(void) SAL_OVERRIDE { return "0"; }
    1606           0 :     virtual std::string Gen2(const std::string &lhs, const std::string &) const SAL_OVERRIDE
    1607             :     {
    1608           0 :         return lhs;
    1609             :     }
    1610           0 :     virtual std::string BinFuncName(void) const SAL_OVERRIDE { return "nop"; }
    1611             : };
    1612             : 
    1613           0 : class OpCount: public Reduction {
    1614             : public:
    1615           0 :     virtual std::string GetBottom(void) SAL_OVERRIDE { return "0"; }
    1616           0 :     virtual std::string Gen2(const std::string &lhs, const std::string &rhs) const SAL_OVERRIDE
    1617             :     {
    1618           0 :         std::stringstream ss;
    1619           0 :         ss << "(isNan(" << lhs << ")?"<<rhs<<":"<<rhs<<"+1.0)";
    1620           0 :         return ss.str();
    1621             :     }
    1622           0 :     virtual std::string BinFuncName(void) const SAL_OVERRIDE { return "fcount"; }
    1623             : };
    1624             : 
    1625           0 : class OpEqual: public Binary {
    1626             : public:
    1627           0 :     virtual std::string GetBottom(void) SAL_OVERRIDE { return "0"; }
    1628           0 :     virtual std::string Gen2(const std::string &lhs, const std::string &rhs) const SAL_OVERRIDE
    1629             :     {
    1630           0 :         std::stringstream ss;
    1631           0 :         ss << "strequal("<< lhs << "," << rhs <<")";
    1632           0 :         return ss.str();
    1633             :     }
    1634           0 :     virtual std::string BinFuncName(void) const SAL_OVERRIDE { return "eq"; }
    1635             : };
    1636             : 
    1637           0 : class OpLessEqual: public Binary {
    1638             : public:
    1639           0 :     virtual std::string GetBottom(void) SAL_OVERRIDE { return "0"; }
    1640           0 :     virtual std::string Gen2(const std::string &lhs, const std::string &rhs) const SAL_OVERRIDE
    1641             :     {
    1642           0 :         std::stringstream ss;
    1643           0 :         ss << "("<< lhs << "<=" << rhs <<")";
    1644           0 :         return ss.str();
    1645             :     }
    1646           0 :     virtual std::string BinFuncName(void) const SAL_OVERRIDE { return "leq"; }
    1647             : };
    1648             : 
    1649           0 : class OpGreater: public Binary {
    1650             : public:
    1651           0 :     virtual std::string GetBottom(void) SAL_OVERRIDE { return "0"; }
    1652           0 :     virtual std::string Gen2(const std::string &lhs, const std::string &rhs) const SAL_OVERRIDE
    1653             :     {
    1654           0 :         std::stringstream ss;
    1655           0 :         ss << "("<< lhs << ">" << rhs <<")";
    1656           0 :         return ss.str();
    1657             :     }
    1658           0 :     virtual std::string BinFuncName(void) const SAL_OVERRIDE { return "gt"; }
    1659             : };
    1660             : 
    1661           0 : class OpSum: public Reduction {
    1662             : public:
    1663           0 :     virtual std::string GetBottom(void) SAL_OVERRIDE { return "0"; }
    1664           0 :     virtual std::string Gen2(const std::string &lhs, const std::string &rhs) const SAL_OVERRIDE
    1665             :     {
    1666           0 :         std::stringstream ss;
    1667           0 :         ss << "((" << lhs <<")+("<< rhs<<"))";
    1668           0 :         return ss.str();
    1669             :     }
    1670           0 :     virtual std::string BinFuncName(void) const SAL_OVERRIDE { return "fsum"; }
    1671             : };
    1672             : 
    1673           0 : class OpAverage: public Reduction {
    1674             : public:
    1675           0 :     virtual std::string GetBottom(void) SAL_OVERRIDE { return "0"; }
    1676           0 :     virtual std::string Gen2(const std::string &lhs, const std::string &rhs) const SAL_OVERRIDE
    1677             :     {
    1678           0 :         std::stringstream ss;
    1679           0 :         ss << "fsum_count(" << lhs <<","<< rhs<<", &nCount)";
    1680           0 :         return ss.str();
    1681             :     }
    1682           0 :     virtual std::string BinFuncName(void) const SAL_OVERRIDE { return "fsum"; }
    1683           0 :     virtual bool isAverage() const SAL_OVERRIDE { return true; }
    1684             : };
    1685             : 
    1686           0 : class OpSub: public Reduction {
    1687             : public:
    1688           0 :     virtual std::string GetBottom(void) SAL_OVERRIDE { return "0"; }
    1689           0 :     virtual std::string Gen2(const std::string &lhs, const std::string &rhs) const SAL_OVERRIDE
    1690             :     {
    1691           0 :         return lhs + "-" + rhs;
    1692             :     }
    1693           0 :     virtual std::string BinFuncName(void) const SAL_OVERRIDE { return "fsub"; }
    1694             : };
    1695             : 
    1696           0 : class OpMul: public Reduction {
    1697             : public:
    1698           0 :     virtual std::string GetBottom(void) SAL_OVERRIDE { return "1"; }
    1699           0 :     virtual std::string Gen2(const std::string &lhs, const std::string &rhs) const SAL_OVERRIDE
    1700             :     {
    1701           0 :         return lhs + "*" + rhs;
    1702             :     }
    1703           0 :     virtual std::string BinFuncName(void) const SAL_OVERRIDE { return "fmul"; }
    1704             : };
    1705             : 
    1706             : /// Technically not a reduction, but fits the framework.
    1707           0 : class OpDiv: public Reduction {
    1708             : public:
    1709           0 :     virtual std::string GetBottom(void) SAL_OVERRIDE { return "1.0"; }
    1710           0 :     virtual std::string Gen2(const std::string &lhs, const std::string &rhs) const SAL_OVERRIDE
    1711             :     {
    1712           0 :         return "(" + lhs + "/" + rhs + ")";
    1713             :     }
    1714           0 :     virtual std::string BinFuncName(void) const SAL_OVERRIDE { return "fdiv"; }
    1715             : };
    1716             : 
    1717           0 : class OpMin: public Reduction {
    1718             : public:
    1719           0 :     virtual std::string GetBottom(void) SAL_OVERRIDE { return "MAXFLOAT"; }
    1720           0 :     virtual std::string Gen2(const std::string &lhs, const std::string &rhs) const SAL_OVERRIDE
    1721             :     {
    1722           0 :         return "mcw_fmin("+lhs + "," + rhs +")";
    1723             :     }
    1724           0 :     virtual std::string BinFuncName(void) const SAL_OVERRIDE { return "min"; }
    1725             : };
    1726             : 
    1727           0 : class OpMax: public Reduction {
    1728             : public:
    1729           0 :     virtual std::string GetBottom(void) SAL_OVERRIDE { return "-MAXFLOAT"; }
    1730           0 :     virtual std::string Gen2(const std::string &lhs, const std::string &rhs) const SAL_OVERRIDE
    1731             :     {
    1732           0 :         return "mcw_fmax("+lhs + "," + rhs +")";
    1733             :     }
    1734           0 :     virtual std::string BinFuncName(void) const SAL_OVERRIDE { return "max"; }
    1735             : };
    1736           0 : class OpSumProduct: public SumOfProduct {
    1737             : public:
    1738           0 :     virtual std::string GetBottom(void) SAL_OVERRIDE { return "0"; }
    1739           0 :     virtual std::string Gen2(const std::string &lhs, const std::string &rhs) const SAL_OVERRIDE
    1740             :     {
    1741           0 :         return lhs + "*" + rhs;
    1742             :     }
    1743           0 :     virtual std::string BinFuncName(void) const SAL_OVERRIDE { return "fsop"; }
    1744             : };
    1745             : namespace {
    1746             : struct SumIfsArgs {
    1747           0 :     SumIfsArgs(cl_mem x): mCLMem(x), mConst(0.0) {}
    1748           0 :     SumIfsArgs(double x): mCLMem(NULL), mConst(x) {}
    1749             :     cl_mem mCLMem;
    1750             :     double mConst;
    1751             : };
    1752             : }
    1753             : /// Helper functions that have multiple buffers
    1754             : class DynamicKernelSoPArguments: public DynamicKernelArgument
    1755             : {
    1756             : public:
    1757             :     typedef boost::shared_ptr<DynamicKernelArgument> SubArgument;
    1758             :     typedef std::vector<SubArgument> SubArgumentsType;
    1759             : 
    1760             :     DynamicKernelSoPArguments(
    1761             :         const std::string &s, const FormulaTreeNodeRef& ft, SlidingFunctionBase* pCodeGen);
    1762             : 
    1763             :     /// Create buffer and pass the buffer to a given kernel
    1764           0 :     virtual size_t Marshal(cl_kernel k, int argno, int nVectorWidth, cl_program pProgram) SAL_OVERRIDE
    1765             :     {
    1766           0 :         unsigned i = 0;
    1767           0 :         for (SubArgumentsType::iterator it = mvSubArguments.begin(), e= mvSubArguments.end(); it!=e;
    1768             :                 ++it)
    1769             :         {
    1770           0 :             i += (*it)->Marshal(k, argno + i, nVectorWidth, pProgram);
    1771             :         }
    1772           0 :         if (OpGeoMean *OpSumCodeGen = dynamic_cast<OpGeoMean*>(mpCodeGen.get()))
    1773             :         {
    1774             :             // Obtain cl context
    1775             :             KernelEnv kEnv;
    1776           0 :             OpenclDevice::setKernelEnv(&kEnv);
    1777             :             cl_int err;
    1778             :             cl_mem pClmem2;
    1779             : 
    1780           0 :             if (OpSumCodeGen->NeedReductionKernel())
    1781             :             {
    1782           0 :                 std::vector<cl_mem> vclmem;
    1783           0 :                 for (SubArgumentsType::iterator it = mvSubArguments.begin(),
    1784           0 :                         e= mvSubArguments.end(); it!=e; ++it)
    1785             :                 {
    1786           0 :                     if (VectorRef *VR = dynamic_cast<VectorRef *>(it->get()))
    1787           0 :                         vclmem.push_back(VR->GetCLBuffer());
    1788             :                     else
    1789           0 :                         vclmem.push_back(NULL);
    1790             :                 }
    1791             :                 pClmem2 = clCreateBuffer(kEnv.mpkContext, CL_MEM_READ_WRITE,
    1792           0 :                         sizeof(double)*nVectorWidth, NULL, &err);
    1793           0 :                 if (CL_SUCCESS != err)
    1794           0 :                     throw OpenCLError(err, __FILE__, __LINE__);
    1795             : 
    1796           0 :                 std::string kernelName = "GeoMean_reduction";
    1797           0 :                 cl_kernel redKernel = clCreateKernel(pProgram, kernelName.c_str(), &err);
    1798           0 :                 if (err != CL_SUCCESS)
    1799           0 :                     throw OpenCLError(err, __FILE__, __LINE__);
    1800             :                     // set kernel arg of reduction kernel
    1801           0 :                 for (size_t j=0; j< vclmem.size(); j++){
    1802             :                     err = clSetKernelArg(redKernel, j,
    1803           0 :                             vclmem[j]?sizeof(cl_mem):sizeof(double),
    1804           0 :                             (void *)&vclmem[j]);
    1805           0 :                     if (CL_SUCCESS != err)
    1806           0 :                         throw OpenCLError(err, __FILE__, __LINE__);
    1807             :                 }
    1808           0 :                 err = clSetKernelArg(redKernel, vclmem.size(), sizeof(cl_mem), (void *)&pClmem2);
    1809           0 :                 if (CL_SUCCESS != err)
    1810           0 :                     throw OpenCLError(err, __FILE__, __LINE__);
    1811             : 
    1812             :                 // set work group size and execute
    1813           0 :                 size_t global_work_size[] = {256, (size_t)nVectorWidth };
    1814           0 :                 size_t local_work_size[] = {256, 1};
    1815             :                 err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, redKernel, 2, NULL,
    1816           0 :                         global_work_size, local_work_size, 0, NULL, NULL);
    1817           0 :                 if (CL_SUCCESS != err)
    1818           0 :                     throw OpenCLError(err, __FILE__, __LINE__);
    1819           0 :                 err = clFinish(kEnv.mpkCmdQueue);
    1820           0 :                 if (CL_SUCCESS != err)
    1821           0 :                     throw OpenCLError(err, __FILE__, __LINE__);
    1822             : 
    1823             :                  // Pass pClmem2 to the "real" kernel
    1824           0 :                 err = clSetKernelArg(k, argno, sizeof(cl_mem), (void *)&pClmem2);
    1825           0 :                 if (CL_SUCCESS != err)
    1826           0 :                     throw OpenCLError(err, __FILE__, __LINE__);
    1827             :             }
    1828             :          }
    1829           0 :         if (OpSumIfs *OpSumCodeGen = dynamic_cast<OpSumIfs*>(mpCodeGen.get()))
    1830             :         {
    1831             :             // Obtain cl context
    1832             :             KernelEnv kEnv;
    1833           0 :             OpenclDevice::setKernelEnv(&kEnv);
    1834             :             cl_int err;
    1835           0 :             DynamicKernelArgument *Arg = mvSubArguments[0].get();
    1836             :             DynamicKernelSlidingArgument<VectorRef> *slidingArgPtr =
    1837           0 :                 static_cast< DynamicKernelSlidingArgument<VectorRef> *> (Arg);
    1838           0 :             mpClmem2 = NULL;
    1839             : 
    1840           0 :             if (OpSumCodeGen->NeedReductionKernel())
    1841             :             {
    1842           0 :                 size_t nInput = slidingArgPtr -> GetArrayLength();
    1843           0 :                 size_t nCurWindowSize = slidingArgPtr -> GetWindowSize();
    1844           0 :                 std::vector<SumIfsArgs> vclmem;
    1845             : 
    1846           0 :                 for (SubArgumentsType::iterator it = mvSubArguments.begin(),
    1847           0 :                         e= mvSubArguments.end(); it!=e; ++it)
    1848             :                 {
    1849           0 :                     if (VectorRef *VR = dynamic_cast<VectorRef *>(it->get()))
    1850           0 :                         vclmem.push_back(SumIfsArgs(VR->GetCLBuffer()));
    1851           0 :                     else if (DynamicKernelConstantArgument *CA =
    1852             :                             dynamic_cast<
    1853           0 :                             DynamicKernelConstantArgument *>(it->get()))
    1854           0 :                         vclmem.push_back(SumIfsArgs(CA->GetDouble()));
    1855             :                     else
    1856           0 :                         vclmem.push_back(SumIfsArgs((cl_mem)NULL));
    1857             :                 }
    1858             :                 mpClmem2 = clCreateBuffer(kEnv.mpkContext, CL_MEM_READ_WRITE,
    1859           0 :                         sizeof(double)*nVectorWidth, NULL, &err);
    1860           0 :                 if (CL_SUCCESS != err)
    1861           0 :                     throw OpenCLError(err, __FILE__, __LINE__);
    1862             : 
    1863           0 :                 std::string kernelName = mvSubArguments[0]->GetName() + "_SumIfs_reduction";
    1864           0 :                 cl_kernel redKernel = clCreateKernel(pProgram, kernelName.c_str(), &err);
    1865           0 :                 if (err != CL_SUCCESS)
    1866           0 :                     throw OpenCLError(err, __FILE__, __LINE__);
    1867             : 
    1868             :                     // set kernel arg of reduction kernel
    1869           0 :                 for (size_t j=0; j< vclmem.size(); j++){
    1870             :                     err = clSetKernelArg(redKernel, j,
    1871           0 :                             vclmem[j].mCLMem?sizeof(cl_mem):sizeof(double),
    1872           0 :                             vclmem[j].mCLMem?(void *)&vclmem[j].mCLMem:
    1873           0 :                             (void*)&vclmem[j].mConst);
    1874           0 :                     if (CL_SUCCESS != err)
    1875           0 :                         throw OpenCLError(err, __FILE__, __LINE__);
    1876             :                 }
    1877           0 :                 err = clSetKernelArg(redKernel, vclmem.size(), sizeof(cl_mem), (void *)&mpClmem2);
    1878           0 :                 if (CL_SUCCESS != err)
    1879           0 :                     throw OpenCLError(err, __FILE__, __LINE__);
    1880             : 
    1881           0 :                 err = clSetKernelArg(redKernel, vclmem.size()+1, sizeof(cl_int), (void*)&nInput);
    1882           0 :                 if (CL_SUCCESS != err)
    1883           0 :                     throw OpenCLError(err, __FILE__, __LINE__);
    1884             : 
    1885           0 :                 err = clSetKernelArg(redKernel, vclmem.size()+2, sizeof(cl_int), (void*)&nCurWindowSize);
    1886           0 :                 if (CL_SUCCESS != err)
    1887           0 :                     throw OpenCLError(err, __FILE__, __LINE__);
    1888             :                 // set work group size and execute
    1889           0 :                 size_t global_work_size[] = {256, (size_t)nVectorWidth };
    1890           0 :                 size_t local_work_size[] = {256, 1};
    1891             :                 err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, redKernel, 2, NULL,
    1892           0 :                         global_work_size, local_work_size, 0, NULL, NULL);
    1893           0 :                 if (CL_SUCCESS != err)
    1894           0 :                     throw OpenCLError(err, __FILE__, __LINE__);
    1895           0 :                 err = clFinish(kEnv.mpkCmdQueue);
    1896           0 :                 if (CL_SUCCESS != err)
    1897           0 :                     throw OpenCLError(err, __FILE__, __LINE__);
    1898           0 :                 clReleaseKernel(redKernel);
    1899             :                  // Pass mpClmem2 to the "real" kernel
    1900           0 :                 err = clSetKernelArg(k, argno, sizeof(cl_mem), (void *)&mpClmem2);
    1901           0 :                 if (CL_SUCCESS != err)
    1902           0 :                     throw OpenCLError(err, __FILE__, __LINE__);
    1903             :             }
    1904             :         }
    1905           0 :         return i;
    1906             :     }
    1907             : 
    1908           0 :     virtual void GenSlidingWindowFunction(std::stringstream &ss) SAL_OVERRIDE {
    1909           0 :         for (unsigned i = 0; i < mvSubArguments.size(); i++)
    1910           0 :             mvSubArguments[i]->GenSlidingWindowFunction(ss);
    1911           0 :         mpCodeGen->GenSlidingWindowFunction(ss, mSymName, mvSubArguments);
    1912           0 :     }
    1913           0 :     virtual void GenDeclRef(std::stringstream &ss) const SAL_OVERRIDE
    1914             :     {
    1915           0 :         for (unsigned i = 0; i < mvSubArguments.size(); i++)
    1916             :         {
    1917           0 :             if (i)
    1918           0 :                 ss << ",";
    1919           0 :             mvSubArguments[i]->GenDeclRef(ss);
    1920             :         }
    1921           0 :     }
    1922           0 :     virtual void GenDecl(std::stringstream &ss) const SAL_OVERRIDE
    1923             :     {
    1924           0 :         for (SubArgumentsType::const_iterator it = mvSubArguments.begin(), e= mvSubArguments.end(); it!=e;
    1925             :             ++it) {
    1926           0 :             if (it != mvSubArguments.begin())
    1927           0 :                 ss << ", ";
    1928           0 :             (*it)->GenDecl(ss);
    1929             :         }
    1930           0 :     }
    1931             : 
    1932           0 :     virtual size_t GetWindowSize(void) const SAL_OVERRIDE
    1933             :     {
    1934           0 :         size_t nCurWindowSize = 0;
    1935           0 :         for (unsigned i = 0; i < mvSubArguments.size(); i++)
    1936             :         {
    1937           0 :             size_t nCurChildWindowSize = mvSubArguments[i]->GetWindowSize();
    1938             :             nCurWindowSize = (nCurWindowSize < nCurChildWindowSize) ?
    1939           0 :                 nCurChildWindowSize:nCurWindowSize;
    1940             :         }
    1941           0 :         return nCurWindowSize;
    1942             :     }
    1943             : 
    1944             :     /// When declared as input to a sliding window function
    1945           0 :     virtual void GenSlidingWindowDecl(std::stringstream &ss) const SAL_OVERRIDE
    1946             :     {
    1947           0 :         for (SubArgumentsType::const_iterator it = mvSubArguments.begin(), e= mvSubArguments.end(); it!=e;
    1948             :             ++it)
    1949             :         {
    1950           0 :             if (it != mvSubArguments.begin())
    1951           0 :                 ss << ", ";
    1952           0 :             (*it)->GenSlidingWindowDecl(ss);
    1953             :         }
    1954           0 :     }
    1955             :     /// Generate either a function call to each children
    1956             :     /// or direclty inline it if we are already inside a loop
    1957           0 :     virtual std::string GenSlidingWindowDeclRef(bool nested=false) const SAL_OVERRIDE
    1958             :     {
    1959           0 :         std::stringstream ss;
    1960           0 :         if (!nested)
    1961             :         {
    1962           0 :             ss << mSymName << "_" << mpCodeGen->BinFuncName() <<"(";
    1963           0 :             for (unsigned i = 0; i < mvSubArguments.size(); i++)
    1964             :             {
    1965           0 :                 if (i)
    1966           0 :                     ss << ", ";
    1967           0 :                 mvSubArguments[i]->GenDeclRef(ss);
    1968             :             }
    1969           0 :             ss << ")";
    1970             :         } else {
    1971           0 :             if (mvSubArguments.size() != 2)
    1972           0 :                 throw Unhandled();
    1973           0 :             ss << "(" << mpCodeGen->Gen2(mvSubArguments[0]->GenSlidingWindowDeclRef(true),
    1974           0 :                          mvSubArguments[1]->GenSlidingWindowDeclRef(true)) << ")";
    1975             :         }
    1976           0 :         return ss.str();
    1977             :     }
    1978           0 :     virtual std::string DumpOpName(void) const SAL_OVERRIDE
    1979             :     {
    1980           0 :         std::string t = "_" + mpCodeGen->BinFuncName();
    1981           0 :         for (unsigned i = 0; i < mvSubArguments.size(); i++)
    1982           0 :             t = t + mvSubArguments[i]->DumpOpName();
    1983           0 :         return t;
    1984             :     }
    1985           0 :     virtual void DumpInlineFun(std::set<std::string>& decls,
    1986             :                                 std::set<std::string>& funs) const SAL_OVERRIDE
    1987             :     {
    1988           0 :         mpCodeGen->BinInlineFun(decls,funs);
    1989           0 :         for (unsigned i = 0; i < mvSubArguments.size(); i++)
    1990           0 :             mvSubArguments[i]->DumpInlineFun(decls,funs);
    1991           0 :     }
    1992           0 :    virtual ~DynamicKernelSoPArguments()
    1993           0 :     {
    1994           0 :         if (mpClmem2)
    1995             :         {
    1996           0 :             clReleaseMemObject(mpClmem2);
    1997           0 :             mpClmem2 = NULL;
    1998             :         }
    1999           0 :     }
    2000             : private:
    2001             :     SubArgumentsType mvSubArguments;
    2002             :     boost::shared_ptr<SlidingFunctionBase> mpCodeGen;
    2003             :     cl_mem mpClmem2;
    2004             : };
    2005             : 
    2006           0 : boost::shared_ptr<DynamicKernelArgument> SoPHelper(
    2007             :     const std::string &ts, const FormulaTreeNodeRef& ft, SlidingFunctionBase* pCodeGen)
    2008             : {
    2009           0 :     return boost::shared_ptr<DynamicKernelArgument>(new DynamicKernelSoPArguments(ts, ft, pCodeGen));
    2010             : }
    2011             : 
    2012             : template<class Base>
    2013           0 : DynamicKernelArgument *VectorRefFactory(const std::string &s,
    2014             :         const FormulaTreeNodeRef& ft,
    2015             :         boost::shared_ptr<SlidingFunctionBase> &pCodeGen,
    2016             :         int index)
    2017             : {
    2018             :     //Black lists ineligible classes here ..
    2019             :     // SUMIFS does not perform parallel reduction at DoubleVectorRef level
    2020           0 :     if (dynamic_cast<OpSumIfs*>(pCodeGen.get())) {
    2021           0 :         if (index == 0) // the first argument of OpSumIfs cannot be strings anyway
    2022           0 :             return new DynamicKernelSlidingArgument<VectorRef>(s, ft, pCodeGen, index);
    2023           0 :         return new DynamicKernelSlidingArgument<Base>(s, ft, pCodeGen, index);
    2024             :     }
    2025             :     // AVERAGE is not supported yet
    2026             :     //Average has been supported by reduction kernel
    2027             :     /*else if (dynamic_cast<OpAverage*>(pCodeGen.get()))
    2028             :     {
    2029             :         return new DynamicKernelSlidingArgument<Base>(s, ft, pCodeGen, index);
    2030             :     }*/
    2031             :     // MUL is not supported yet
    2032           0 :     else if (dynamic_cast<OpMul*>(pCodeGen.get()))
    2033             :     {
    2034           0 :         return new DynamicKernelSlidingArgument<Base>(s, ft, pCodeGen, index);
    2035             :     }
    2036             :     // Sub is not a reduction per se
    2037           0 :     else if (dynamic_cast<OpSub*>(pCodeGen.get()))
    2038             :     {
    2039           0 :         return new DynamicKernelSlidingArgument<Base>(s, ft, pCodeGen, index);
    2040             :     }
    2041             :     // Only child class of Reduction is supported
    2042           0 :     else if (!dynamic_cast<Reduction*>(pCodeGen.get()))
    2043             :     {
    2044           0 :         return new DynamicKernelSlidingArgument<Base>(s, ft, pCodeGen, index);
    2045             :     }
    2046             : 
    2047             :     const formula::DoubleVectorRefToken* pDVR =
    2048             :         static_cast< const formula::DoubleVectorRefToken* >(
    2049           0 :                 ft->GetFormulaToken());
    2050             :     // Window being too small to justify a parallel reduction
    2051           0 :     if (pDVR->GetRefRowSize() < REDUCE_THRESHOLD)
    2052           0 :         return new DynamicKernelSlidingArgument<Base>(s, ft, pCodeGen, index);
    2053           0 :     if ((pDVR->IsStartFixed() && pDVR->IsEndFixed()) ||
    2054           0 :             (!pDVR->IsStartFixed() && !pDVR->IsEndFixed()))
    2055           0 :         return new ParallelReductionVectorRef<Base>(s, ft, pCodeGen, index);
    2056             :     else // Other cases are not supported as well
    2057           0 :         return new DynamicKernelSlidingArgument<Base>(s, ft, pCodeGen, index);
    2058             : }
    2059             : 
    2060           0 : DynamicKernelSoPArguments::DynamicKernelSoPArguments(
    2061             :     const std::string &s, const FormulaTreeNodeRef& ft, SlidingFunctionBase* pCodeGen) :
    2062           0 :     DynamicKernelArgument(s, ft), mpCodeGen(pCodeGen), mpClmem2(NULL)
    2063             : {
    2064           0 :     size_t nChildren = ft->Children.size();
    2065             : 
    2066           0 :     for (unsigned i = 0; i < nChildren; i++)
    2067             :     {
    2068           0 :         FormulaToken *pChild = ft->Children[i]->GetFormulaToken();
    2069           0 :         if (!pChild)
    2070           0 :             throw Unhandled();
    2071           0 :         OpCode opc = pChild->GetOpCode();
    2072           0 :         std::stringstream tmpname;
    2073           0 :         tmpname << s << "_" << i;
    2074           0 :         std::string ts = tmpname.str();
    2075           0 :         switch(opc) {
    2076             :             case ocPush:
    2077           0 :                 if (pChild->GetType() == formula::svDoubleVectorRef)
    2078             :                 {
    2079             :                     const formula::DoubleVectorRefToken* pDVR =
    2080           0 :                         static_cast< const formula::DoubleVectorRefToken* >(pChild);
    2081           0 :                     for (size_t j = 0; j < pDVR->GetArrays().size(); ++j)
    2082             :                     {
    2083           0 :                         if (pDVR->GetArrays()[j].mpNumericArray ||
    2084           0 :                             (pDVR->GetArrays()[j].mpNumericArray == NULL &&
    2085           0 :                             pDVR->GetArrays()[j].mpStringArray == NULL ))
    2086             :                         {
    2087           0 :                             if(pDVR->GetArrays()[j].mpNumericArray &&
    2088           0 :                                 pCodeGen->takeNumeric() &&
    2089           0 :                                 pDVR->GetArrays()[j].mpStringArray &&
    2090           0 :                                 pCodeGen->takeString())
    2091             :                             {
    2092             :                                 mvSubArguments.push_back(
    2093             :                                     SubArgument(
    2094             :                                     new DynamicKernelMixedSlidingArgument(
    2095           0 :                                     ts, ft->Children[i], mpCodeGen, j)));
    2096             :                             }
    2097             :                             else
    2098             :                             {
    2099             :                                 mvSubArguments.push_back(
    2100             :                                     SubArgument(VectorRefFactory<VectorRef>(
    2101           0 :                                     ts, ft->Children[i], mpCodeGen, j)));
    2102             :                             }
    2103             :                         }
    2104             :                         else
    2105             :                             mvSubArguments.push_back(
    2106             :                                     SubArgument(VectorRefFactory
    2107             :                                         <DynamicKernelStringArgument>(
    2108           0 :                                             ts, ft->Children[i], mpCodeGen, j)));
    2109             :                     }
    2110           0 :                 } else if (pChild->GetType() == formula::svSingleVectorRef) {
    2111             :                     const formula::SingleVectorRefToken* pSVR =
    2112           0 :                         static_cast< const formula::SingleVectorRefToken* >(pChild);
    2113           0 :                     if (pSVR->GetArray().mpNumericArray &&
    2114           0 :                         pCodeGen->takeNumeric() &&
    2115           0 :                         pSVR->GetArray().mpStringArray &&
    2116           0 :                         pCodeGen->takeString())
    2117             :                     {
    2118             :                         mvSubArguments.push_back(
    2119             :                                 SubArgument(new DynamicKernelMixedArgument(
    2120           0 :                                         ts, ft->Children[i])));
    2121             :                     }
    2122           0 :                     else if (pSVR->GetArray().mpNumericArray &&
    2123           0 :                             pCodeGen->takeNumeric())
    2124             :                     {
    2125             :                         mvSubArguments.push_back(
    2126             :                                 SubArgument(new VectorRef(ts,
    2127           0 :                                         ft->Children[i])));
    2128             :                     }
    2129           0 :                     else if (pSVR->GetArray().mpStringArray &&
    2130           0 :                             pCodeGen->takeString())
    2131             :                     {
    2132             :                         mvSubArguments.push_back(
    2133             :                                 SubArgument(new DynamicKernelStringArgument(
    2134           0 :                                         ts, ft->Children[i])));
    2135             :                     }
    2136           0 :                     else if (pSVR->GetArray().mpStringArray == NULL &&
    2137           0 :                         pSVR->GetArray().mpNumericArray == NULL)
    2138             :                     {
    2139             :                         // Push as an array of NANs
    2140             :                         mvSubArguments.push_back(
    2141             :                                 SubArgument(new VectorRef(ts,
    2142           0 :                                         ft->Children[i])));
    2143             :                     }
    2144             :                     else
    2145             :                         throw UnhandledToken(pChild,
    2146           0 :                                 "Got unhandled case here", __FILE__, __LINE__);
    2147           0 :                 } else if (pChild->GetType() == formula::svDouble) {
    2148             :                     mvSubArguments.push_back(
    2149             :                             SubArgument(new DynamicKernelConstantArgument(ts,
    2150           0 :                                     ft->Children[i])));
    2151           0 :                 } else if (pChild->GetType() == formula::svString
    2152           0 :                 && pCodeGen->takeString()) {
    2153             :                     mvSubArguments.push_back(
    2154             :                             SubArgument(new ConstStringArgument(ts,
    2155           0 :                                     ft->Children[i])));
    2156             :                 } else {
    2157           0 :                     throw UnhandledToken(pChild, "unknown operand for ocPush");
    2158             :                 }
    2159           0 :                 break;
    2160             :             case ocDiv:
    2161           0 :                 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpDiv));
    2162           0 :                 break;
    2163             :             case ocMul:
    2164           0 :                 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpMul));
    2165           0 :                 break;
    2166             :             case ocSub:
    2167           0 :                 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpSub));
    2168           0 :                 break;
    2169             :             case ocAdd:
    2170             :             case ocSum:
    2171           0 :                 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpSum));
    2172           0 :                 break;
    2173             :             case ocAverage:
    2174           0 :                 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpAverage));
    2175           0 :                 break;
    2176             :             case ocMin:
    2177           0 :                 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpMin));
    2178           0 :                 break;
    2179             :             case ocMax:
    2180           0 :                 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpMax));
    2181           0 :                 break;
    2182             :             case ocCount:
    2183           0 :                 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpCount));
    2184           0 :                 break;
    2185             :             case ocSumProduct:
    2186           0 :                 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpSumProduct));
    2187           0 :                 break;
    2188             :             case ocIRR:
    2189           0 :                 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpIRR));
    2190           0 :                 break;
    2191             :             case ocMIRR:
    2192           0 :                 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpMIRR));
    2193           0 :                 break;
    2194             :             case ocRMZ:
    2195           0 :                 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpPMT));
    2196           0 :                 break;
    2197             :             case ocZins:
    2198           0 :                 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpIntrate));
    2199           0 :                 break;
    2200             :             case ocZGZ:
    2201           0 :                 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpRRI));
    2202           0 :                 break;
    2203             :             case ocKapz:
    2204           0 :                 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpPPMT));
    2205           0 :                 break;
    2206             :             case ocFisher:
    2207           0 :                 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpFisher));
    2208           0 :                 break;
    2209             :             case ocFisherInv:
    2210           0 :                 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpFisherInv));
    2211           0 :                 break;
    2212             :             case ocGamma:
    2213           0 :                 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpGamma));
    2214           0 :                 break;
    2215             :             case ocLIA:
    2216           0 :                  mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpSLN));
    2217           0 :                 break;
    2218             :             case ocGammaLn:
    2219           0 :                 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpGammaLn));
    2220           0 :                 break;
    2221             :             case ocGauss:
    2222           0 :                 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpGauss));
    2223           0 :                 break;
    2224             :             /*case ocGeoMean:
    2225             :                 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpGeoMean));
    2226             :                 break;*/
    2227             :             case ocHarMean:
    2228           0 :                 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpHarMean));
    2229           0 :                 break;
    2230             :             case ocLessEqual:
    2231           0 :                 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpLessEqual));
    2232           0 :                 break;
    2233             :             case ocEqual:
    2234           0 :                 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpEqual));
    2235           0 :                 break;
    2236             :             case ocGreater:
    2237           0 :                 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpGreater));
    2238           0 :                 break;
    2239             :             case ocDIA:
    2240           0 :                 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpSYD));
    2241           0 :                 break;
    2242             :             case ocCorrel:
    2243           0 :                 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpCorrel));
    2244           0 :                 break;
    2245             :             case ocCos:
    2246           0 :                 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpCos));
    2247           0 :                 break;
    2248             :             case ocNegBinomVert :
    2249           0 :                 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpNegbinomdist));
    2250           0 :                 break;
    2251             :             case ocPearson:
    2252           0 :                 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpPearson));
    2253           0 :                 break;
    2254             :             case ocRSQ:
    2255           0 :                 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpRsq));
    2256           0 :                 break;
    2257             :             case ocCosecant:
    2258           0 :                 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpCsc));
    2259           0 :                 break;
    2260             :             case ocISPMT:
    2261           0 :                 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpISPMT));
    2262           0 :                 break;
    2263             :             case ocLaufz:
    2264             :                 mvSubArguments.push_back(SoPHelper(ts,
    2265           0 :                          ft->Children[i], new OpDuration));
    2266           0 :                 break;
    2267             :             case ocSinHyp:
    2268             :                 mvSubArguments.push_back(SoPHelper(ts,
    2269           0 :                          ft->Children[i],new OpSinh));
    2270           0 :                 break;
    2271             :             case ocAbs:
    2272             :                 mvSubArguments.push_back(SoPHelper(ts,
    2273           0 :                          ft->Children[i], new OpAbs));
    2274           0 :                 break;
    2275             :             case ocBW:
    2276             :                 mvSubArguments.push_back(SoPHelper(ts,
    2277           0 :                          ft->Children[i], new OpPV));
    2278           0 :                 break;
    2279             :             case ocSin:
    2280             :                 mvSubArguments.push_back(SoPHelper(ts,
    2281           0 :                          ft->Children[i], new OpSin));
    2282           0 :                 break;
    2283             :             case ocTan:
    2284             :                 mvSubArguments.push_back(SoPHelper(ts,
    2285           0 :                          ft->Children[i], new OpTan));
    2286           0 :                 break;
    2287             :             case ocTanHyp:
    2288             :                 mvSubArguments.push_back(SoPHelper(ts,
    2289           0 :                          ft->Children[i], new OpTanH));
    2290           0 :                 break;
    2291             :             case ocStandard:
    2292             :                 mvSubArguments.push_back(SoPHelper(ts,
    2293           0 :                          ft->Children[i], new OpStandard));
    2294           0 :                 break;
    2295             :             case ocWeibull:
    2296             :                 mvSubArguments.push_back(SoPHelper(ts,
    2297           0 :                          ft->Children[i], new OpWeibull));
    2298           0 :                 break;
    2299             :             /*case ocMedian:
    2300             :                 mvSubArguments.push_back(SoPHelper(ts,
    2301             :                          ft->Children[i],new OpMedian));
    2302             :                 break;*/
    2303             :             case ocGDA:
    2304             :                 mvSubArguments.push_back(SoPHelper(ts,
    2305           0 :                          ft->Children[i],new OpDDB));
    2306           0 :                 break;
    2307             :             case ocZW:
    2308             :                 mvSubArguments.push_back(SoPHelper(ts,
    2309           0 :                          ft->Children[i],new OpFV));
    2310           0 :                 break;
    2311             :             case ocSumIfs:
    2312             :                 mvSubArguments.push_back(SoPHelper(ts,
    2313           0 :                          ft->Children[i],new OpSumIfs));
    2314           0 :                  break;
    2315             :             /*case ocVBD:
    2316             :                 mvSubArguments.push_back(SoPHelper(ts,
    2317             :                          ft->Children[i],new OpVDB));
    2318             :                  break;*/
    2319             :             case ocKurt:
    2320             :                 mvSubArguments.push_back(SoPHelper(ts,
    2321           0 :                         ft->Children[i], new OpKurt));
    2322           0 :                  break;
    2323             :             /*case ocZZR:
    2324             :                 mvSubArguments.push_back(SoPHelper(ts,
    2325             :                         ft->Children[i], new OpNper));
    2326             :                  break;*/
    2327             :             case ocNormDist:
    2328             :                 mvSubArguments.push_back(SoPHelper(ts,
    2329           0 :                          ft->Children[i],new OpNormdist));
    2330           0 :                  break;
    2331             :             case ocArcCos:
    2332             :                 mvSubArguments.push_back(SoPHelper(ts,
    2333           0 :                          ft->Children[i], new OpArcCos));
    2334           0 :                  break;
    2335             :             case ocSqrt:
    2336             :                 mvSubArguments.push_back(SoPHelper(ts,
    2337           0 :                          ft->Children[i],new OpSqrt));
    2338           0 :                 break;
    2339             :             case ocArcCosHyp:
    2340             :                 mvSubArguments.push_back(SoPHelper(ts,
    2341           0 :                          ft->Children[i], new OpArcCosHyp));
    2342           0 :                 break;
    2343             :             case ocNPV:
    2344             :                 mvSubArguments.push_back(SoPHelper(ts,
    2345           0 :                     ft->Children[i], new OpNPV));
    2346           0 :                 break;
    2347             :             case ocStdNormDist:
    2348             :                 mvSubArguments.push_back(SoPHelper(ts,
    2349           0 :                          ft->Children[i],new OpNormsdist));
    2350           0 :                 break;
    2351             :             case ocNormInv:
    2352             :                 mvSubArguments.push_back(SoPHelper(ts,
    2353           0 :                          ft->Children[i],new OpNorminv));
    2354           0 :                 break;
    2355             :             case ocSNormInv:
    2356             :                 mvSubArguments.push_back(SoPHelper(ts,
    2357           0 :                          ft->Children[i],new OpNormsinv));
    2358           0 :                 break;
    2359             :             case ocVariationen:
    2360             :                 mvSubArguments.push_back(SoPHelper(ts,
    2361           0 :                          ft->Children[i],new OpVariationen));
    2362           0 :                 break;
    2363             :             case ocVariationen2:
    2364             :                 mvSubArguments.push_back(SoPHelper(ts,
    2365           0 :                          ft->Children[i],new OpVariationen2));
    2366           0 :                 break;
    2367             :             case ocPhi:
    2368             :                 mvSubArguments.push_back(SoPHelper(ts,
    2369           0 :                          ft->Children[i],new OpPhi));
    2370           0 :                 break;
    2371             :             case ocZinsZ:
    2372             :                 mvSubArguments.push_back(SoPHelper(ts,
    2373           0 :                          ft->Children[i],new OpIPMT));
    2374           0 :                  break;
    2375             :             case ocConfidence:
    2376             :                 mvSubArguments.push_back(SoPHelper(ts,
    2377           0 :                          ft->Children[i], new OpConfidence));
    2378           0 :                 break;
    2379             :             case ocIntercept:
    2380             :                 mvSubArguments.push_back(SoPHelper(ts,
    2381           0 :                          ft->Children[i], new OpIntercept));
    2382           0 :                 break;
    2383             :             case ocGDA2:
    2384           0 :                 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i],
    2385           0 :                     new OpDB));
    2386           0 :                 break;
    2387             :             case ocLogInv:
    2388             :                 mvSubArguments.push_back(SoPHelper(ts,
    2389           0 :                          ft->Children[i], new OpLogInv));
    2390           0 :                 break;
    2391             :             case ocArcCot:
    2392             :                 mvSubArguments.push_back(SoPHelper(ts,
    2393           0 :                     ft->Children[i], new OpArcCot));
    2394           0 :                 break;
    2395             :             case ocCosHyp:
    2396             :                 mvSubArguments.push_back(SoPHelper(ts,
    2397           0 :                     ft->Children[i], new OpCosh));
    2398           0 :                 break;
    2399             :             case ocKritBinom:
    2400             :                 mvSubArguments.push_back(SoPHelper(ts,
    2401           0 :                          ft->Children[i], new OpCritBinom));
    2402           0 :                 break;
    2403             :             case ocArcCotHyp:
    2404             :                 mvSubArguments.push_back(SoPHelper(ts,
    2405           0 :                     ft->Children[i], new OpArcCotHyp));
    2406           0 :                 break;
    2407             :             case ocArcSin:
    2408             :                 mvSubArguments.push_back(SoPHelper(ts,
    2409           0 :                         ft->Children[i], new OpArcSin));
    2410           0 :                 break;
    2411             :             case ocArcSinHyp:
    2412             :                 mvSubArguments.push_back(SoPHelper(ts,
    2413           0 :                     ft->Children[i], new OpArcSinHyp));
    2414           0 :                 break;
    2415             :             case ocArcTan:
    2416             :                 mvSubArguments.push_back(SoPHelper(ts,
    2417           0 :                     ft->Children[i], new OpArcTan));
    2418           0 :                 break;
    2419             :             case ocArcTanHyp:
    2420             :                 mvSubArguments.push_back(SoPHelper(ts,
    2421           0 :                     ft->Children[i], new OpArcTanH));
    2422           0 :                 break;
    2423             :             case ocBitAnd:
    2424             :                 mvSubArguments.push_back(SoPHelper(ts,
    2425           0 :                     ft->Children[i], new OpBitAnd));
    2426           0 :                 break;
    2427             :             case ocForecast:
    2428             :                 mvSubArguments.push_back(SoPHelper(ts,
    2429           0 :                          ft->Children[i], new OpForecast));
    2430           0 :                 break;
    2431             :             case ocLogNormDist:
    2432             :                 mvSubArguments.push_back(SoPHelper(ts,
    2433           0 :                          ft->Children[i], new OpLogNormDist));
    2434           0 :                 break;
    2435             :             /*case ocGammaDist:
    2436             :                 mvSubArguments.push_back(SoPHelper(ts,
    2437             :                          ft->Children[i], new OpGammaDist));
    2438             :                 break;*/
    2439             :             case ocLn:
    2440             :                 mvSubArguments.push_back(SoPHelper(ts,
    2441           0 :                          ft->Children[i],new OpLn));
    2442           0 :                 break;
    2443             :             case ocRound:
    2444             :                 mvSubArguments.push_back(SoPHelper(ts,
    2445           0 :                          ft->Children[i],new OpRound));
    2446           0 :                 break;
    2447             :             case ocCot:
    2448             :                 mvSubArguments.push_back(SoPHelper(ts,
    2449           0 :                          ft->Children[i], new OpCot));
    2450           0 :                 break;
    2451             :             case ocCotHyp:
    2452             :                 mvSubArguments.push_back(SoPHelper(ts,
    2453           0 :                         ft->Children[i], new OpCoth));
    2454           0 :                 break;
    2455             :             case ocFDist:
    2456             :                 mvSubArguments.push_back(SoPHelper(ts,
    2457           0 :                     ft->Children[i], new OpFdist));
    2458           0 :                 break;
    2459             :             case ocVar:
    2460             :                 mvSubArguments.push_back(SoPHelper(ts,
    2461           0 :                          ft->Children[i], new OpVar));
    2462           0 :                 break;
    2463             :             /*case ocChiDist:
    2464             :                 mvSubArguments.push_back(SoPHelper(ts,
    2465             :                          ft->Children[i],new OpChiDist));
    2466             :                 break;*/
    2467             :             case ocPow:
    2468             :             case ocPower:
    2469             :                 mvSubArguments.push_back(SoPHelper(ts,
    2470           0 :                          ft->Children[i], new OpPower));
    2471           0 :                 break;
    2472             :             case ocOdd:
    2473             :                 mvSubArguments.push_back(SoPHelper(ts,
    2474           0 :                          ft->Children[i], new OpOdd));
    2475           0 :                 break;
    2476             :             /*case ocChiSqDist:
    2477             :                 mvSubArguments.push_back(SoPHelper(ts,
    2478             :                          ft->Children[i],new OpChiSqDist));
    2479             :                 break;
    2480             :             case ocChiSqInv:
    2481             :                 mvSubArguments.push_back(SoPHelper(ts,
    2482             :                          ft->Children[i],new OpChiSqInv));
    2483             :                 break;
    2484             :             case ocGammaInv:
    2485             :                 mvSubArguments.push_back(SoPHelper(ts,
    2486             :                          ft->Children[i], new OpGammaInv));
    2487             :                 break;*/
    2488             :             case ocFloor:
    2489             :                 mvSubArguments.push_back(SoPHelper(ts,
    2490           0 :                          ft->Children[i], new OpFloor));
    2491           0 :                 break;
    2492             :             /*case ocFInv:
    2493             :                 mvSubArguments.push_back(SoPHelper(ts,
    2494             :                          ft->Children[i], new OpFInv));
    2495             :                 break;*/
    2496             :             case ocFTest:
    2497             :                 mvSubArguments.push_back(SoPHelper(ts,
    2498           0 :                          ft->Children[i], new OpFTest));
    2499           0 :                 break;
    2500             :             case ocB:
    2501             :                 mvSubArguments.push_back(SoPHelper(ts,
    2502           0 :                          ft->Children[i], new OpB));
    2503           0 :                 break;
    2504             :             case ocBetaDist:
    2505             :                 mvSubArguments.push_back(SoPHelper(ts,
    2506           0 :                          ft->Children[i], new OpBetaDist));
    2507           0 :                 break;
    2508             :              case ocCosecantHyp:
    2509             :                 mvSubArguments.push_back(SoPHelper(ts,
    2510           0 :                           ft->Children[i], new OpCscH));
    2511           0 :                 break;
    2512             :             case ocExp:
    2513             :                 mvSubArguments.push_back(SoPHelper(ts,
    2514           0 :                            ft->Children[i], new OpExp));
    2515           0 :                 break;
    2516             :             case ocLog10:
    2517             :                 mvSubArguments.push_back(SoPHelper(ts,
    2518           0 :                            ft->Children[i], new OpLog10));
    2519           0 :                 break;
    2520             :             case ocExpDist:
    2521             :                 mvSubArguments.push_back(SoPHelper(ts,
    2522           0 :                     ft->Children[i], new OpExponDist));
    2523           0 :                 break;
    2524             :             case ocAverageIfs:
    2525             :                 mvSubArguments.push_back(SoPHelper(ts,
    2526           0 :                          ft->Children[i],new OpAverageIfs));
    2527           0 :                 break;
    2528             :             case ocCountIfs:
    2529             :                 mvSubArguments.push_back(SoPHelper(ts,
    2530           0 :                          ft->Children[i],new OpCountIfs));
    2531           0 :                 break;
    2532             :             case ocKombin2:
    2533             :                 mvSubArguments.push_back(SoPHelper(ts,
    2534           0 :                          ft->Children[i], new OpCombina));
    2535           0 :                 break;
    2536             :             case ocEven:
    2537             :                 mvSubArguments.push_back(SoPHelper(ts,
    2538           0 :                         ft->Children[i], new OpEven));
    2539           0 :                 break;
    2540             :             case ocLog:
    2541             :                 mvSubArguments.push_back(SoPHelper(ts,
    2542           0 :                         ft->Children[i], new OpLog));
    2543           0 :                 break;
    2544             :             case ocMod:
    2545             :                 mvSubArguments.push_back(SoPHelper(ts,
    2546           0 :                        ft->Children[i], new OpMod));
    2547           0 :                 break;
    2548             :             case ocTrunc:
    2549             :                 mvSubArguments.push_back(SoPHelper(ts,
    2550           0 :                          ft->Children[i], new OpTrunc));
    2551           0 :                 break;
    2552             :             case ocSchiefe:
    2553             :                 mvSubArguments.push_back(SoPHelper(ts,
    2554           0 :                          ft->Children[i], new OpSkew));
    2555           0 :                 break;
    2556             :             case ocArcTan2:
    2557             :                 mvSubArguments.push_back(SoPHelper(ts,
    2558           0 :                     ft->Children[i], new OpArcTan2));
    2559           0 :                 break;
    2560             :             case ocBitOr:
    2561             :                 mvSubArguments.push_back(SoPHelper(ts,
    2562           0 :                     ft->Children[i], new OpBitOr));
    2563           0 :                 break;
    2564             :             case ocBitLshift:
    2565             :                 mvSubArguments.push_back(SoPHelper(ts,
    2566           0 :                     ft->Children[i], new OpBitLshift));
    2567           0 :                 break;
    2568             :             case ocBitRshift:
    2569             :                 mvSubArguments.push_back(SoPHelper(ts,
    2570           0 :                     ft->Children[i], new OpBitRshift));
    2571           0 :                 break;
    2572             :             case ocBitXor:
    2573             :                 mvSubArguments.push_back(SoPHelper(ts,
    2574           0 :                     ft->Children[i], new OpBitXor));
    2575           0 :                 break;
    2576             :             /*case ocChiInv:
    2577             :                 mvSubArguments.push_back(SoPHelper(ts,
    2578             :                          ft->Children[i],new OpChiInv));
    2579             :                 break;*/
    2580             :             case ocPoissonDist:
    2581             :                 mvSubArguments.push_back(SoPHelper(ts,
    2582           0 :                          ft->Children[i],new OpPoisson));
    2583           0 :                 break;
    2584             :             case ocSumSQ:
    2585             :                 mvSubArguments.push_back(SoPHelper(ts,
    2586           0 :                          ft->Children[i], new OpSumSQ));
    2587           0 :                 break;
    2588             :             case ocSkewp:
    2589             :                 mvSubArguments.push_back(SoPHelper(ts,
    2590           0 :                          ft->Children[i], new OpSkewp));
    2591           0 :                 break;
    2592             :             case ocBinomDist:
    2593             :                 mvSubArguments.push_back(SoPHelper(ts,
    2594           0 :                          ft->Children[i],new OpBinomdist));
    2595           0 :                 break;
    2596             :             case ocVarP:
    2597             :                 mvSubArguments.push_back(SoPHelper(ts,
    2598           0 :                          ft->Children[i], new OpVarP));
    2599           0 :                 break;
    2600             :             case ocCeil:
    2601             :                 mvSubArguments.push_back(SoPHelper(ts,
    2602           0 :                     ft->Children[i], new OpCeil));
    2603           0 :                 break;
    2604             :             case ocKombin:
    2605             :                 mvSubArguments.push_back(SoPHelper(ts,
    2606           0 :                     ft->Children[i], new OpKombin));
    2607           0 :                 break;
    2608             :             case ocDevSq:
    2609             :                 mvSubArguments.push_back(SoPHelper(ts,
    2610           0 :                          ft->Children[i], new OpDevSq));
    2611           0 :                 break;
    2612             :             case ocStDev:
    2613             :                 mvSubArguments.push_back(SoPHelper(ts,
    2614           0 :                          ft->Children[i], new OpStDev));
    2615           0 :                 break;
    2616             :             case ocSlope:
    2617             :                 mvSubArguments.push_back(SoPHelper(ts,
    2618           0 :                          ft->Children[i], new OpSlope));
    2619           0 :                 break;
    2620             :             case ocSTEYX:
    2621             :                 mvSubArguments.push_back(SoPHelper(ts,
    2622           0 :                          ft->Children[i], new OpSTEYX));
    2623           0 :                 break;
    2624             :             case ocZTest:
    2625             :                 mvSubArguments.push_back(SoPHelper(ts,
    2626           0 :                          ft->Children[i], new OpZTest));
    2627           0 :                 break;
    2628             :             case ocPi:
    2629             :                 mvSubArguments.push_back(
    2630             :                 SubArgument(new DynamicKernelPiArgument(ts,
    2631           0 :                 ft->Children[i])));
    2632           0 :                 break;
    2633             :             case ocRandom:
    2634             :                 mvSubArguments.push_back(
    2635             :                 SubArgument(new DynamicKernelRandomArgument(ts,
    2636           0 :                 ft->Children[i])));
    2637           0 :                 break;
    2638             :             case ocProduct:
    2639             :                 mvSubArguments.push_back(SoPHelper(ts,
    2640           0 :                          ft->Children[i], new OpProduct));
    2641           0 :                 break;
    2642             :             /*case ocHypGeomDist:
    2643             :                 mvSubArguments.push_back(SoPHelper(ts,
    2644             :                          ft->Children[i],new OpHypGeomDist));
    2645             :                 break;*/
    2646             :             case ocSumX2MY2:
    2647             :                 mvSubArguments.push_back(SoPHelper(ts,
    2648           0 :                          ft->Children[i],new OpSumX2MY2));
    2649           0 :                 break;
    2650             :             case ocSumX2DY2:
    2651             :                 mvSubArguments.push_back(SoPHelper(ts,
    2652           0 :                          ft->Children[i],new OpSumX2PY2));
    2653           0 :                  break;
    2654             :             /*case ocBetaInv:
    2655             :                 mvSubArguments.push_back(SoPHelper(ts,
    2656             :                          ft->Children[i],new OpBetainv));
    2657             :                  break;*/
    2658             :             case ocTTest:
    2659             :                 mvSubArguments.push_back(SoPHelper(ts,
    2660           0 :                          ft->Children[i], new OpTTest));
    2661           0 :                  break;
    2662             :             case ocTDist:
    2663             :                 mvSubArguments.push_back(SoPHelper(ts,
    2664           0 :                          ft->Children[i], new OpTDist));
    2665           0 :                  break;
    2666             :             /*case ocTInv:
    2667             :                 mvSubArguments.push_back(SoPHelper(ts,
    2668             :                          ft->Children[i], new OpTInv));
    2669             :                  break;*/
    2670             :             case ocSumXMY2:
    2671             :                 mvSubArguments.push_back(SoPHelper(ts,
    2672           0 :                          ft->Children[i],new OpSumXMY2));
    2673           0 :                  break;
    2674             :             case ocStDevP:
    2675             :                 mvSubArguments.push_back(SoPHelper(ts,
    2676           0 :                          ft->Children[i], new OpStDevP));
    2677           0 :                  break;
    2678             :             case ocCovar:
    2679             :                 mvSubArguments.push_back(SoPHelper(ts,
    2680           0 :                          ft->Children[i], new OpCovar));
    2681           0 :                  break;
    2682             :             case ocAnd:
    2683             :                 mvSubArguments.push_back(SoPHelper(ts,
    2684           0 :                          ft->Children[i], new OpAnd));
    2685           0 :                  break;
    2686             :             case ocVLookup:
    2687             :                 mvSubArguments.push_back(SoPHelper(ts,
    2688           0 :                          ft->Children[i], new OpVLookup));
    2689           0 :                  break;
    2690             :             case ocOr:
    2691             :                 mvSubArguments.push_back(SoPHelper(ts,
    2692           0 :                          ft->Children[i], new OpOr));
    2693           0 :                  break;
    2694             :             case ocNot:
    2695             :                 mvSubArguments.push_back(SoPHelper(ts,
    2696           0 :                          ft->Children[i], new OpNot));
    2697           0 :                  break;
    2698             :             case ocXor:
    2699             :                 mvSubArguments.push_back(SoPHelper(ts,
    2700           0 :                          ft->Children[i], new OpXor));
    2701           0 :                  break;
    2702             :             case ocDBMax:
    2703             :                 mvSubArguments.push_back(SoPHelper(ts,
    2704           0 :                          ft->Children[i], new OpDmax));
    2705           0 :                  break;
    2706             :             case ocDBMin:
    2707             :                 mvSubArguments.push_back(SoPHelper(ts,
    2708           0 :                          ft->Children[i], new OpDmin));
    2709           0 :                  break;
    2710             :             case ocDBProduct:
    2711             :                 mvSubArguments.push_back(SoPHelper(ts,
    2712           0 :                          ft->Children[i], new OpDproduct));
    2713           0 :                  break;
    2714             :             case ocDBAverage:
    2715             :                 mvSubArguments.push_back(SoPHelper(ts,
    2716           0 :                          ft->Children[i], new OpDaverage));
    2717           0 :                  break;
    2718             :             case ocDBStdDev:
    2719             :                 mvSubArguments.push_back(SoPHelper(ts,
    2720           0 :                          ft->Children[i], new OpDstdev));
    2721           0 :                  break;
    2722             :             case ocDBStdDevP:
    2723             :                 mvSubArguments.push_back(SoPHelper(ts,
    2724           0 :                          ft->Children[i], new OpDstdevp));
    2725           0 :                  break;
    2726             :             case ocDBSum:
    2727             :                 mvSubArguments.push_back(SoPHelper(ts,
    2728           0 :                          ft->Children[i], new OpDsum));
    2729           0 :                  break;
    2730             :             case ocDBVar:
    2731             :                 mvSubArguments.push_back(SoPHelper(ts,
    2732           0 :                          ft->Children[i], new OpDvar));
    2733           0 :                  break;
    2734             :             case ocDBVarP:
    2735             :                 mvSubArguments.push_back(SoPHelper(ts,
    2736           0 :                          ft->Children[i], new OpDvarp));
    2737           0 :                  break;
    2738             :             case ocAverageIf:
    2739             :                 mvSubArguments.push_back(SoPHelper(ts,
    2740           0 :                     ft->Children[i], new OpAverageIf));
    2741           0 :                 break;
    2742             :             case ocDBCount:
    2743             :                 mvSubArguments.push_back(SoPHelper(ts,
    2744           0 :                          ft->Children[i], new OpDcount));
    2745           0 :                  break;
    2746             :             case ocDBCount2:
    2747             :                 mvSubArguments.push_back(SoPHelper(ts,
    2748           0 :                          ft->Children[i], new OpDcount2));
    2749           0 :                  break;
    2750             :             case ocDeg:
    2751             :                 mvSubArguments.push_back(SoPHelper(ts,
    2752           0 :                          ft->Children[i], new OpDeg));
    2753           0 :                  break;
    2754             :             case ocRoundUp:
    2755             :                 mvSubArguments.push_back(SoPHelper(ts,
    2756           0 :                          ft->Children[i],new OpRoundUp));
    2757           0 :                 break;
    2758             :             case ocRoundDown:
    2759             :                 mvSubArguments.push_back(SoPHelper(ts,
    2760           0 :                          ft->Children[i],new OpRoundDown));
    2761           0 :                 break;
    2762             :             case ocInt:
    2763             :                 mvSubArguments.push_back(SoPHelper(ts,
    2764           0 :                          ft->Children[i],new OpInt));
    2765           0 :                 break;
    2766             :             case ocRad:
    2767             :                 mvSubArguments.push_back(SoPHelper(ts,
    2768           0 :                          ft->Children[i],new OpRadians));
    2769           0 :                 break;
    2770             :             case ocCountIf:
    2771             :                 mvSubArguments.push_back(SoPHelper(ts,
    2772           0 :                          ft->Children[i], new OpCountIf));
    2773           0 :                  break;
    2774             :             case ocIsEven:
    2775             :                 mvSubArguments.push_back(SoPHelper(ts,
    2776           0 :                          ft->Children[i],new OpIsEven));
    2777           0 :                 break;
    2778             :             case ocIsOdd:
    2779             :                 mvSubArguments.push_back(SoPHelper(ts,
    2780           0 :                          ft->Children[i],new OpIsOdd));
    2781           0 :                 break;
    2782             :             case ocFact:
    2783             :                 mvSubArguments.push_back(SoPHelper(ts,
    2784           0 :                          ft->Children[i], new OpFact));
    2785           0 :                  break;
    2786             :             case ocMinA:
    2787             :                 mvSubArguments.push_back(SoPHelper(ts,
    2788           0 :                          ft->Children[i], new OpMinA));
    2789           0 :                  break;
    2790             :             case ocCount2:
    2791             :                 mvSubArguments.push_back(SoPHelper(ts,
    2792           0 :                          ft->Children[i], new OpCountA));
    2793           0 :                  break;
    2794             :             case ocMaxA:
    2795             :                 mvSubArguments.push_back(SoPHelper(ts,
    2796           0 :                          ft->Children[i], new OpMaxA));
    2797           0 :                  break;
    2798             :             case ocAverageA:
    2799             :                 mvSubArguments.push_back(SoPHelper(ts,
    2800           0 :                          ft->Children[i], new OpAverageA));
    2801           0 :                  break;
    2802             :             case ocVarA:
    2803             :                 mvSubArguments.push_back(SoPHelper(ts,
    2804           0 :                          ft->Children[i], new OpVarA));
    2805           0 :                  break;
    2806             :             case ocVarPA:
    2807             :                 mvSubArguments.push_back(SoPHelper(ts,
    2808           0 :                          ft->Children[i], new OpVarPA));
    2809           0 :                  break;
    2810             :             case ocStDevA:
    2811             :                 mvSubArguments.push_back(SoPHelper(ts,
    2812           0 :                          ft->Children[i], new OpStDevA));
    2813           0 :                  break;
    2814             :             case ocStDevPA:
    2815             :                 mvSubArguments.push_back(SoPHelper(ts,
    2816           0 :                          ft->Children[i], new OpStDevPA));
    2817           0 :                  break;
    2818             :             case ocSecant:
    2819             :                 mvSubArguments.push_back(SoPHelper(ts,
    2820           0 :                          ft->Children[i], new OpSec));
    2821           0 :                 break;
    2822             :             case ocSecantHyp:
    2823             :                 mvSubArguments.push_back(SoPHelper(ts,
    2824           0 :                          ft->Children[i], new OpSecH));
    2825           0 :                 break;
    2826             :             case ocSumIf:
    2827             :                 mvSubArguments.push_back(SoPHelper(ts,
    2828           0 :                          ft->Children[i], new OpSumIf));
    2829           0 :                  break;
    2830             :             /*case ocNegSub:
    2831             :                 mvSubArguments.push_back(SoPHelper(ts,
    2832             :                          ft->Children[i], new OpNegSub));
    2833             :                  break;*/
    2834             :             case ocAveDev:
    2835             :                 mvSubArguments.push_back(SoPHelper(ts,
    2836           0 :                          ft->Children[i], new OpAveDev));
    2837           0 :                  break;
    2838             :             case ocExternal:
    2839           0 :                 if ( !(pChild->GetExternal().compareTo(OUString(
    2840           0 :                     "com.sun.star.sheet.addin.Analysis.getEffect"))))
    2841             :                 {
    2842           0 :                     mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpEffective));
    2843             :                 }
    2844           0 :                 else if ( !(pChild->GetExternal().compareTo(OUString(
    2845           0 :                     "com.sun.star.sheet.addin.Analysis.getCumipmt"))))
    2846             :                 {
    2847           0 :                     mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpCumipmt));
    2848             :                 }
    2849           0 :                 else if ( !(pChild->GetExternal().compareTo(OUString(
    2850           0 :                     "com.sun.star.sheet.addin.Analysis.getNominal"))))
    2851             :                 {
    2852           0 :                     mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpNominal));
    2853             :                 }
    2854           0 :                 else if ( !(pChild->GetExternal().compareTo(OUString(
    2855           0 :                     "com.sun.star.sheet.addin.Analysis.getCumprinc"))))
    2856             :                 {
    2857           0 :                     mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpCumprinc));
    2858             :                 }
    2859           0 :                 else if ( !(pChild->GetExternal().compareTo(OUString(
    2860           0 :                     "com.sun.star.sheet.addin.Analysis.getXnpv"))))
    2861             :                 {
    2862           0 :                     mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpXNPV));
    2863             :                 }
    2864           0 :                 else if ( !(pChild->GetExternal().compareTo(OUString(
    2865           0 :                     "com.sun.star.sheet.addin.Analysis.getPricemat"))))
    2866             :                 {
    2867           0 :                     mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpPriceMat));
    2868             :                 }
    2869           0 :                 else if ( !(pChild->GetExternal().compareTo(OUString(
    2870           0 :                     "com.sun.star.sheet.addin.Analysis.getReceived"))))
    2871             :                 {
    2872           0 :                     mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpReceived));
    2873             :                 }
    2874           0 :                 else if( !(pChild->GetExternal().compareTo(OUString(
    2875           0 :                     "com.sun.star.sheet.addin.Analysis.getTbilleq"))))
    2876             :                 {
    2877           0 :                     mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpTbilleq));
    2878             :                 }
    2879           0 :                 else if( !(pChild->GetExternal().compareTo(OUString(
    2880           0 :                     "com.sun.star.sheet.addin.Analysis.getTbillprice"))))
    2881             :                 {
    2882           0 :                     mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpTbillprice));
    2883             :                 }
    2884           0 :                 else if( !(pChild->GetExternal().compareTo(OUString(
    2885           0 :                     "com.sun.star.sheet.addin.Analysis.getTbillyield"))))
    2886             :                 {
    2887           0 :                     mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpTbillyield));
    2888             :                 }
    2889           0 :                 else if (!(pChild->GetExternal().compareTo(OUString(
    2890           0 :                     "com.sun.star.sheet.addin.Analysis.getFvschedule"))))
    2891             :                 {
    2892           0 :                     mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpFvschedule));
    2893             :                 }
    2894             :                 /*else if ( !(pChild->GetExternal().compareTo(OUString(
    2895             :                     "com.sun.star.sheet.addin.Analysis.getYield"))))
    2896             :                 {
    2897             :                     mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpYield));
    2898             :                 }*/
    2899           0 :                 else if ( !(pChild->GetExternal().compareTo(OUString(
    2900           0 :                    "com.sun.star.sheet.addin.Analysis.getYielddisc"))))
    2901             :                 {
    2902           0 :                     mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpYielddisc));
    2903             :                 }
    2904           0 :                 else    if ( !(pChild->GetExternal().compareTo(OUString(
    2905           0 :                      "com.sun.star.sheet.addin.Analysis.getYieldmat"))))
    2906             :                 {
    2907           0 :                     mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpYieldmat));
    2908             :                 }
    2909           0 :                 else if ( !(pChild->GetExternal().compareTo(OUString(
    2910           0 :                      "com.sun.star.sheet.addin.Analysis.getAccrintm"))))
    2911             :                 {
    2912           0 :                     mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpAccrintm));
    2913             :                 }
    2914           0 :                 else if ( !(pChild->GetExternal().compareTo(OUString(
    2915           0 :                      "com.sun.star.sheet.addin.Analysis.getCoupdaybs"))))
    2916             :                 {
    2917           0 :                     mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpCoupdaybs));
    2918             :                 }
    2919           0 :                 else if ( !(pChild->GetExternal().compareTo(OUString(
    2920           0 :                      "com.sun.star.sheet.addin.Analysis.getDollarde"))))
    2921             :                 {
    2922           0 :                     mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpDollarde));
    2923             :                 }
    2924           0 :                 else if ( !(pChild->GetExternal().compareTo(OUString(
    2925           0 :                      "com.sun.star.sheet.addin.Analysis.getDollarfr"))))
    2926             :                 {
    2927           0 :                     mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpDollarfr));
    2928             :                 }
    2929           0 :                 else if ( !(pChild->GetExternal().compareTo(OUString(
    2930           0 :                      "com.sun.star.sheet.addin.Analysis.getCoupdays"))))
    2931             :                 {
    2932           0 :                     mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpCoupdays));
    2933             :                 }
    2934           0 :                 else if ( !(pChild->GetExternal().compareTo(OUString(
    2935           0 :                      "com.sun.star.sheet.addin.Analysis.getCoupdaysnc"))))
    2936             :                 {
    2937           0 :                     mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpCoupdaysnc));
    2938             :                 }
    2939           0 :                 else if ( !(pChild->GetExternal().compareTo(OUString(
    2940           0 :                    "com.sun.star.sheet.addin.Analysis.getDisc"))))
    2941             :                 {
    2942           0 :                     mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpDISC));
    2943             :                 }
    2944           0 :                 else if ( !(pChild->GetExternal().compareTo(OUString(
    2945           0 :                    "com.sun.star.sheet.addin.Analysis.getIntrate"))))
    2946             :                 {
    2947           0 :                     mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpINTRATE));
    2948             :                 }
    2949           0 :                 else if ( !(pChild->GetExternal().compareTo(OUString(
    2950           0 :                    "com.sun.star.sheet.addin.Analysis.getPrice"))))
    2951             :                 {
    2952             :                     mvSubArguments.push_back(SoPHelper(ts,
    2953           0 :                                       ft->Children[i], new OpPrice));
    2954             :                 }
    2955           0 :                 else if ( !(pChild->GetExternal().compareTo(OUString(
    2956           0 :                      "com.sun.star.sheet.addin.Analysis.getCoupnum"))))
    2957             :                 {
    2958           0 :                     mvSubArguments.push_back(SoPHelper(ts, ft->Children[i],
    2959           0 :                         new OpCoupnum));
    2960             :                 }
    2961             :                 /*else if ( !(pChild->GetExternal().compareTo(OUString(
    2962             :                    "com.sun.star.sheet.addin.Analysis.getDuration"))))
    2963             :                 {
    2964             :                     mvSubArguments.push_back(
    2965             :                         SoPHelper(ts, ft->Children[i], new OpDuration_ADD));
    2966             :                 }*/
    2967           0 :                 else if ( !(pChild->GetExternal().compareTo(OUString(
    2968           0 :                    "com.sun.star.sheet.addin.Analysis.getAmordegrc"))))
    2969             :                 {
    2970           0 :                     mvSubArguments.push_back(SoPHelper(ts, ft->Children[i],
    2971           0 :                         new OpAmordegrc));
    2972             :                 }
    2973           0 :                 else if ( !(pChild->GetExternal().compareTo(OUString(
    2974           0 :                    "com.sun.star.sheet.addin.Analysis.getAmorlinc"))))
    2975             :                 {
    2976           0 :                     mvSubArguments.push_back(SoPHelper(ts, ft->Children[i],
    2977           0 :                         new OpAmorlinc));
    2978             :                 }
    2979           0 :                 else if ( !(pChild->GetExternal().compareTo(OUString(
    2980           0 :                      "com.sun.star.sheet.addin.Analysis.getMduration"))))
    2981             :                 {
    2982           0 :                     mvSubArguments.push_back(SoPHelper(ts, ft->Children[i],
    2983           0 :                         new OpMDuration));
    2984             :                 }
    2985           0 :                 else if ( !(pChild->GetExternal().compareTo(OUString(
    2986           0 :                      "com.sun.star.sheet.addin.Analysis.getXirr"))))
    2987             :                 {
    2988           0 :                     mvSubArguments.push_back(SoPHelper(ts, ft->Children[i],
    2989           0 :                         new OpXirr));
    2990             :                 }
    2991           0 :                 else if ( !(pChild->GetExternal().compareTo(OUString(
    2992           0 :                     "com.sun.star.sheet.addin.Analysis.getOddlprice"))))
    2993             :                 {
    2994             :                     mvSubArguments.push_back(SoPHelper(ts,
    2995           0 :                         ft->Children[i], new OpOddlprice));
    2996             :                 }
    2997           0 :                 else if ( !(pChild->GetExternal().compareTo(OUString(
    2998           0 :                      "com.sun.star.sheet.addin.Analysis.getOddlyield"))))
    2999             :                 {
    3000           0 :                     mvSubArguments.push_back(SoPHelper(ts, ft->Children[i],
    3001           0 :                         new OpOddlyield));
    3002             :                 }
    3003           0 :                 else if ( !(pChild->GetExternal().compareTo(OUString(
    3004           0 :                     "com.sun.star.sheet.addin.Analysis.getPricedisc"))))
    3005             :                 {
    3006             :                     mvSubArguments.push_back(SoPHelper(ts,
    3007           0 :                         ft->Children[i], new OpPriceDisc));
    3008             :                 }
    3009           0 :                 else if ( !(pChild->GetExternal().compareTo(OUString(
    3010           0 :                      "com.sun.star.sheet.addin.Analysis.getCouppcd"))))
    3011             :                 {
    3012           0 :                     mvSubArguments.push_back(SoPHelper(ts, ft->Children[i],
    3013           0 :                         new OpCouppcd));
    3014             :                 }
    3015           0 :                 else if ( !(pChild->GetExternal().compareTo(OUString(
    3016           0 :                      "com.sun.star.sheet.addin.Analysis.getCoupncd"))))
    3017             :                 {
    3018           0 :                     mvSubArguments.push_back(SoPHelper(ts, ft->Children[i],
    3019           0 :                         new OpCoupncd));
    3020             :                 }
    3021           0 :                 else if ( !(pChild->GetExternal().compareTo(OUString(
    3022           0 :                      "com.sun.star.sheet.addin.Analysis.getAccrint"))))
    3023             :                 {
    3024           0 :                     mvSubArguments.push_back(SoPHelper(ts, ft->Children[i],
    3025           0 :                         new OpAccrint));
    3026             :                 }
    3027           0 :                 else if ( !(pChild->GetExternal().compareTo(OUString(
    3028           0 :                      "com.sun.star.sheet.addin.Analysis.getSqrtpi"))))
    3029             :                 {
    3030           0 :                     mvSubArguments.push_back(SoPHelper(ts, ft->Children[i],
    3031           0 :                         new OpSqrtPi));
    3032             :                 }
    3033           0 :                 else if ( !(pChild->GetExternal().compareTo(OUString(
    3034           0 :                      "com.sun.star.sheet.addin.Analysis.getConvert"))))
    3035             :                 {
    3036           0 :                     mvSubArguments.push_back(SoPHelper(ts, ft->Children[i],
    3037           0 :                         new OpConvert));
    3038           0 :                 }else if ( !(pChild->GetExternal().compareTo(OUString(
    3039           0 :                      "com.sun.star.sheet.addin.Analysis.getIseven"))))
    3040             :                 {
    3041           0 :                     mvSubArguments.push_back(SoPHelper(ts, ft->Children[i],
    3042           0 :                         new OpIsEven));
    3043             :                 }
    3044           0 :                 else if ( !(pChild->GetExternal().compareTo(OUString(
    3045           0 :                      "com.sun.star.sheet.addin.Analysis.getIsodd"))))
    3046             :                 {
    3047           0 :                     mvSubArguments.push_back(SoPHelper(ts, ft->Children[i],
    3048           0 :                         new OpIsOdd));
    3049             :                 }
    3050           0 :                 else if ( !(pChild->GetExternal().compareTo(OUString(
    3051           0 :                      "com.sun.star.sheet.addin.Analysis.getMround"))))
    3052             :                 {
    3053           0 :                     mvSubArguments.push_back(SoPHelper(ts, ft->Children[i],
    3054           0 :                         new OpMROUND));
    3055             :                 }
    3056           0 :                 else if ( !(pChild->GetExternal().compareTo(OUString(
    3057           0 :                     "com.sun.star.sheet.addin.Analysis.getQuotient"))))
    3058             :                 {
    3059           0 :                     mvSubArguments.push_back(SoPHelper(ts, ft->Children[i],
    3060           0 :                         new OpQuotient));
    3061             :                 }
    3062           0 :                 else if ( !(pChild->GetExternal().compareTo(OUString(
    3063           0 :                     "com.sun.star.sheet.addin.Analysis.getSeriessum"))))
    3064             :                 {
    3065           0 :                     mvSubArguments.push_back(SoPHelper(ts, ft->Children[i],
    3066           0 :                         new OpSeriesSum));
    3067             :                 }
    3068           0 :                 else if ( !(pChild->GetExternal().compareTo(OUString(
    3069           0 :                     "com.sun.star.sheet.addin.Analysis.getBesselj"))))
    3070             :                 {
    3071           0 :                     mvSubArguments.push_back(SoPHelper(ts, ft->Children[i],
    3072           0 :                         new OpBesselj));
    3073             :                 }
    3074             :                 else
    3075           0 :                     throw UnhandledToken(pChild, "unhandled opcode");
    3076           0 :                 break;
    3077             : 
    3078             :             default:
    3079           0 :                 throw UnhandledToken(pChild, "unhandled opcode");
    3080             :         };
    3081           0 :     }
    3082           0 : }
    3083             : 
    3084             : class DynamicKernel : public CompiledFormula
    3085             : {
    3086             : public:
    3087           0 :     DynamicKernel(FormulaTreeNodeRef r):mpRoot(r),
    3088           0 :         mpProgram(NULL), mpKernel(NULL), mpResClmem(NULL) {}
    3089             :     static DynamicKernel *create(ScDocument& rDoc,
    3090             :                                  const ScAddress& rTopPos,
    3091             :                                  ScTokenArray& rCode);
    3092             :     /// OpenCL code generation
    3093           0 :     void CodeGen() {
    3094             :         // Travese the tree of expression and declare symbols used
    3095             :         const DynamicKernelArgument *DK= mSyms.DeclRefArg<
    3096           0 :             DynamicKernelSoPArguments>(mpRoot, new OpNop);
    3097             : 
    3098           0 :         std::stringstream decl;
    3099           0 :         if (OpenclDevice::gpuEnv.mnKhrFp64Flag) {
    3100           0 :             decl << "#pragma OPENCL EXTENSION cl_khr_fp64: enable\n";
    3101           0 :         } else if (OpenclDevice::gpuEnv.mnAmdFp64Flag) {
    3102           0 :             decl << "#pragma OPENCL EXTENSION cl_amd_fp64: enable\n";
    3103             :         }
    3104             :         // preambles
    3105           0 :         decl << publicFunc;
    3106           0 :         DK->DumpInlineFun(inlineDecl,inlineFun);
    3107           0 :         for(std::set<std::string>::iterator set_iter=inlineDecl.begin();
    3108           0 :                                          set_iter!=inlineDecl.end();++set_iter)
    3109             :         {
    3110           0 :             decl<<*set_iter;
    3111             :         }
    3112             : 
    3113           0 :         for(std::set<std::string>::iterator set_iter=inlineFun.begin();
    3114           0 :                                          set_iter!=inlineFun.end();++set_iter)
    3115             :         {
    3116           0 :             decl<<*set_iter;
    3117             :         }
    3118           0 :         mSyms.DumpSlidingWindowFunctions(decl);
    3119           0 :         mKernelSignature = DK->DumpOpName();
    3120           0 :         decl << "__kernel void DynamicKernel" << mKernelSignature;
    3121           0 :         decl << "(__global double *result, ";
    3122           0 :         DK->GenSlidingWindowDecl(decl);
    3123           0 :         decl << ") {\n\tint gid0 = get_global_id(0);\n\tresult[gid0] = " <<
    3124           0 :             DK->GenSlidingWindowDeclRef(false) << ";\n}\n";
    3125           0 :         mFullProgramSrc = decl.str();
    3126           0 :         SAL_INFO("sc.opencl.source", "Program to be compiled:\n" << mFullProgramSrc);
    3127           0 :     }
    3128             :     /// Produce kernel hash
    3129           0 :     std::string GetMD5(void)
    3130             :     {
    3131             : #ifdef MD5_KERNEL
    3132           0 :         if (mKernelHash.empty()) {
    3133           0 :             std::stringstream md5s;
    3134             :             // Compute MD5SUM of kernel body to obtain the name
    3135             :             sal_uInt8 result[RTL_DIGEST_LENGTH_MD5];
    3136             :             rtl_digest_MD5(
    3137           0 :                 mFullProgramSrc.c_str(),
    3138           0 :                 mFullProgramSrc.length(), result,
    3139           0 :                 RTL_DIGEST_LENGTH_MD5);
    3140           0 :             for(int i=0; i < RTL_DIGEST_LENGTH_MD5; i++) {
    3141           0 :                 md5s << std::hex << (int)result[i];
    3142             :             }
    3143           0 :             mKernelHash = md5s.str();
    3144             :         }
    3145           0 :         return mKernelHash;
    3146             : #else
    3147             :         return "";
    3148             : #endif
    3149             :     }
    3150             :     /// Create program, build, and create kerenl
    3151             :     /// TODO cache results based on kernel body hash
    3152             :     /// TODO: abstract OpenCL part out into OpenCL wrapper.
    3153             :     void CreateKernel(void);
    3154             :     /// Prepare buffers, marshal them to GPU, and launch the kernel
    3155             :     /// TODO: abstract OpenCL part out into OpenCL wrapper.
    3156           0 :     void Launch(size_t nr)
    3157             :     {
    3158             :         // Obtain cl context
    3159             :         KernelEnv kEnv;
    3160           0 :         OpenclDevice::setKernelEnv(&kEnv);
    3161             :         cl_int err;
    3162             :         // The results
    3163             :         mpResClmem = clCreateBuffer(kEnv.mpkContext,
    3164             :                 (cl_mem_flags) CL_MEM_READ_WRITE|CL_MEM_ALLOC_HOST_PTR,
    3165           0 :                 nr*sizeof(double), NULL, &err);
    3166           0 :         if (CL_SUCCESS != err)
    3167           0 :             throw OpenCLError(err, __FILE__, __LINE__);
    3168           0 :         err = clSetKernelArg(mpKernel, 0, sizeof(cl_mem), (void*)&mpResClmem);
    3169           0 :         if (CL_SUCCESS != err)
    3170           0 :             throw OpenCLError(err, __FILE__, __LINE__);
    3171             :         // The rest of buffers
    3172           0 :         mSyms.Marshal(mpKernel, nr, mpProgram);
    3173           0 :         size_t global_work_size[] = {nr};
    3174             :         err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, mpKernel, 1, NULL,
    3175           0 :             global_work_size, NULL, 0, NULL, NULL);
    3176           0 :         if (CL_SUCCESS != err)
    3177           0 :             throw OpenCLError(err, __FILE__, __LINE__);
    3178           0 :     }
    3179             :     virtual ~DynamicKernel();
    3180           0 :     cl_mem GetResultBuffer(void) const { return mpResClmem; }
    3181             : 
    3182             : private:
    3183             : 
    3184             :     FormulaTreeNodeRef mpRoot;
    3185             :     SymbolTable mSyms;
    3186             :     std::string mKernelSignature, mKernelHash;
    3187             :     std::string mFullProgramSrc;
    3188             :     cl_program mpProgram;
    3189             :     cl_kernel mpKernel;
    3190             :     cl_mem mpResClmem; // Results
    3191             :     std::set<std::string> inlineDecl;
    3192             :     std::set<std::string> inlineFun;
    3193             : };
    3194             : 
    3195           0 : DynamicKernel::~DynamicKernel()
    3196             : {
    3197           0 :     if (mpResClmem) {
    3198           0 :         clReleaseMemObject(mpResClmem);
    3199             :     }
    3200           0 :     if (mpKernel) {
    3201           0 :         clReleaseKernel(mpKernel);
    3202             :     }
    3203             :     // mpProgram is not going to be released here -- it's cached.
    3204           0 : }
    3205             : /// Build code
    3206           0 : void DynamicKernel::CreateKernel(void)
    3207             : {
    3208           0 :     if (mpKernel)
    3209             :         // already created.
    3210           0 :         return;
    3211             : 
    3212             :     cl_int err;
    3213           0 :     std::string kname = "DynamicKernel"+mKernelSignature;
    3214             :     // Compile kernel here!!!
    3215             :     // Obtain cl context
    3216             :     KernelEnv kEnv;
    3217           0 :     OpenclDevice::setKernelEnv(&kEnv);
    3218           0 :     const char *src = mFullProgramSrc.c_str();
    3219           0 :     static std::string lastOneKernelHash = "";
    3220           0 :     static std::string lastSecondKernelHash = "";
    3221             :     static cl_program lastOneProgram = NULL;
    3222             :     static cl_program lastSecondProgram = NULL;
    3223           0 :     std::string KernelHash = mKernelSignature+GetMD5();
    3224           0 :     if (lastOneKernelHash == KernelHash && lastOneProgram)
    3225             :     {
    3226           0 :         mpProgram = lastOneProgram;
    3227             :     }
    3228           0 :     else if(lastSecondKernelHash == KernelHash && lastSecondProgram)
    3229             :     {
    3230           0 :         mpProgram = lastSecondProgram;
    3231             :     }
    3232             :     else
    3233             :     {   // doesn't match the last compiled formula.
    3234             : 
    3235           0 :         if (lastSecondProgram) {
    3236           0 :             clReleaseProgram(lastSecondProgram);
    3237             :         }
    3238           0 :         if (OpenclDevice::buildProgramFromBinary("",
    3239           0 :                     &OpenclDevice::gpuEnv, KernelHash.c_str(), 0)) {
    3240           0 :             mpProgram = OpenclDevice::gpuEnv.mpArryPrograms[0];
    3241           0 :             OpenclDevice::gpuEnv.mpArryPrograms[0] = NULL;
    3242             :         } else {
    3243             :             mpProgram = clCreateProgramWithSource(kEnv.mpkContext, 1,
    3244           0 :                     &src, NULL, &err);
    3245           0 :             if (err != CL_SUCCESS)
    3246           0 :                 throw OpenCLError(err, __FILE__, __LINE__);
    3247             :             err = clBuildProgram(mpProgram, 1,
    3248           0 :                     OpenclDevice::gpuEnv.mpArryDevsID, "", NULL, NULL);
    3249           0 :             if (err != CL_SUCCESS)
    3250             :             {
    3251             : #if OSL_DEBUG_LEVEL > 0
    3252             :                 if (err == CL_BUILD_PROGRAM_FAILURE)
    3253             :                 {
    3254             :                     cl_build_status stat;
    3255             :                     cl_int e = clGetProgramBuildInfo(
    3256             :                         mpProgram, OpenclDevice::gpuEnv.mpArryDevsID[0],
    3257             :                         CL_PROGRAM_BUILD_STATUS, sizeof (cl_build_status),
    3258             :                         &stat, 0);
    3259             :                     SAL_WARN_IF(
    3260             :                         e != CL_SUCCESS, "sc.opencl",
    3261             :                         "after CL_BUILD_PROGRAM_FAILURE,"
    3262             :                             " clGetProgramBuildInfo(CL_PROGRAM_BUILD_STATUS)"
    3263             :                             " fails with " << e);
    3264             :                     if (e == CL_SUCCESS)
    3265             :                     {
    3266             :                         size_t n;
    3267             :                         e = clGetProgramBuildInfo(
    3268             :                             mpProgram, OpenclDevice::gpuEnv.mpArryDevsID[0],
    3269             :                             CL_PROGRAM_BUILD_LOG, 0, 0, &n);
    3270             :                         SAL_WARN_IF(
    3271             :                             e != CL_SUCCESS || n == 0, "sc.opencl",
    3272             :                             "after CL_BUILD_PROGRAM_FAILURE,"
    3273             :                                 " clGetProgramBuildInfo(CL_PROGRAM_BUILD_LOG)"
    3274             :                                 " fails with " << e << ", n=" << n);
    3275             :                         if (e == CL_SUCCESS && n != 0)
    3276             :                         {
    3277             :                             std::vector<char> log(n);
    3278             :                             e = clGetProgramBuildInfo(
    3279             :                                 mpProgram, OpenclDevice::gpuEnv.mpArryDevsID[0],
    3280             :                                 CL_PROGRAM_BUILD_LOG, n, &log[0], 0);
    3281             :                             SAL_WARN_IF(
    3282             :                                 e != CL_SUCCESS || n == 0, "sc.opencl",
    3283             :                                 "after CL_BUILD_PROGRAM_FAILURE,"
    3284             :                                     " clGetProgramBuildInfo("
    3285             :                                     "CL_PROGRAM_BUILD_LOG) fails with " << e);
    3286             :                             if (e == CL_SUCCESS)
    3287             :                                 SAL_WARN(
    3288             :                                     "sc.opencl",
    3289             :                                     "CL_BUILD_PROGRAM_FAILURE, status " << stat
    3290             :                                         << ", log \"" << &log[0] << "\"");
    3291             :                         }
    3292             :                     }
    3293             :                 }
    3294             : #endif
    3295           0 :                 throw OpenCLError(err, __FILE__, __LINE__);
    3296             :             }
    3297             :             // Generate binary out of compiled kernel.
    3298             :             OpenclDevice::generatBinFromKernelSource(mpProgram,
    3299           0 :                     (mKernelSignature+GetMD5()).c_str());
    3300             :         }
    3301           0 :         lastSecondKernelHash = lastOneKernelHash;
    3302           0 :         lastSecondProgram = lastOneProgram;
    3303           0 :         lastOneKernelHash = KernelHash;
    3304           0 :         lastOneProgram = mpProgram;
    3305             :     }
    3306           0 :     mpKernel = clCreateKernel(mpProgram, kname.c_str(), &err);
    3307           0 :     if (err != CL_SUCCESS)
    3308           0 :         throw OpenCLError(err, __FILE__, __LINE__);
    3309             : }
    3310             : // Symbol lookup. If there is no such symbol created, allocate one
    3311             : // kernel with argument with unique name and return so.
    3312             : // The template argument T must be a subclass of DynamicKernelArgument
    3313             : template <typename T>
    3314           0 : const DynamicKernelArgument *SymbolTable::DeclRefArg(
    3315             :                   FormulaTreeNodeRef t, SlidingFunctionBase* pCodeGen)
    3316             : {
    3317           0 :     FormulaToken *ref = t->GetFormulaToken();
    3318           0 :     ArgumentMap::iterator it = mSymbols.find(ref);
    3319           0 :     if (it == mSymbols.end()) {
    3320             :         // Allocate new symbols
    3321           0 :         std::stringstream ss;
    3322           0 :         ss << "tmp"<< mCurId++;
    3323           0 :         boost::shared_ptr<DynamicKernelArgument> new_arg(new T(ss.str(), t, pCodeGen));
    3324           0 :         mSymbols[ref] = new_arg;
    3325           0 :         mParams.push_back(new_arg);
    3326           0 :         return new_arg.get();
    3327             :     } else {
    3328           0 :         return it->second.get();
    3329             :     }
    3330             : }
    3331             : 
    3332             : class FormulaGroupInterpreterOpenCL : public FormulaGroupInterpreter
    3333             : {
    3334             : public:
    3335           0 :     FormulaGroupInterpreterOpenCL() :
    3336           0 :         FormulaGroupInterpreter()
    3337             :     {
    3338           0 :     }
    3339           0 :     virtual ~FormulaGroupInterpreterOpenCL()
    3340           0 :     {
    3341           0 :     }
    3342             : 
    3343             :     virtual ScMatrixRef inverseMatrix( const ScMatrix& rMat ) SAL_OVERRIDE;
    3344             :     virtual CompiledFormula* createCompiledFormula(ScDocument& rDoc,
    3345             :                                                    const ScAddress& rTopPos,
    3346             :                                                    ScFormulaCellGroup& rGroup,
    3347             :                                                    ScTokenArray& rCode) SAL_OVERRIDE;
    3348             :     virtual bool interpret( ScDocument& rDoc, const ScAddress& rTopPos,
    3349             :                             ScFormulaCellGroupRef& xGroup, ScTokenArray& rCode ) SAL_OVERRIDE;
    3350             : };
    3351             : 
    3352           0 : ScMatrixRef FormulaGroupInterpreterOpenCL::inverseMatrix( const ScMatrix& )
    3353             : {
    3354           0 :     return NULL;
    3355             : }
    3356             : 
    3357           0 : DynamicKernel* DynamicKernel::create(ScDocument& /* rDoc */,
    3358             :                                      const ScAddress& /* rTopPos */,
    3359             :                                      ScTokenArray& rCode)
    3360             : {
    3361             :     // Constructing "AST"
    3362           0 :     FormulaTokenIterator aCode(rCode);
    3363           0 :     std::list<FormulaToken*> aTokenList;
    3364           0 :     std::map<FormulaToken*, FormulaTreeNodeRef> aHashMap;
    3365             :     FormulaToken*  pCur;
    3366           0 :     while( (pCur = (FormulaToken*)(aCode.Next()) ) != NULL)
    3367             :     {
    3368           0 :         OpCode eOp = pCur->GetOpCode();
    3369           0 :         if ( eOp != ocPush )
    3370             :         {
    3371           0 :             FormulaTreeNodeRef pCurNode(new FormulaTreeNode(pCur));
    3372           0 :             sal_uInt8 nParamCount =  pCur->GetParamCount();
    3373           0 :             for (sal_uInt8 i = 0; i < nParamCount; i++)
    3374             :             {
    3375           0 :                 FormulaToken* pTempFormula = aTokenList.back();
    3376           0 :                 aTokenList.pop_back();
    3377           0 :                 if (pTempFormula->GetOpCode() != ocPush)
    3378             :                 {
    3379           0 :                     if (aHashMap.find(pTempFormula)==aHashMap.end())
    3380           0 :                         return NULL;
    3381           0 :                     pCurNode->Children.push_back(aHashMap[pTempFormula]);
    3382             :                 }
    3383             :                 else
    3384             :                 {
    3385             :                     FormulaTreeNodeRef pChildTreeNode =
    3386             :                       FormulaTreeNodeRef(
    3387           0 :                                new FormulaTreeNode(pTempFormula));
    3388           0 :                     pCurNode->Children.push_back(pChildTreeNode);
    3389             :                 }
    3390             :             }
    3391           0 :             std::reverse(pCurNode->Children.begin(), pCurNode->Children.end());
    3392           0 :             aHashMap[pCur] = pCurNode;
    3393             :         }
    3394           0 :         aTokenList.push_back(pCur);
    3395             :     }
    3396             : 
    3397           0 :     FormulaTreeNodeRef Root = FormulaTreeNodeRef(new FormulaTreeNode(NULL));
    3398           0 :     Root->Children.push_back(aHashMap[aTokenList.back()]);
    3399             : 
    3400           0 :     DynamicKernel* pDynamicKernel = new DynamicKernel(Root);
    3401             : 
    3402           0 :     if (!pDynamicKernel)
    3403           0 :         return NULL;
    3404             : 
    3405             :     // OpenCL source code generation and kernel compilation
    3406             :     try
    3407             :     {
    3408           0 :         pDynamicKernel->CodeGen();
    3409           0 :         pDynamicKernel->CreateKernel();
    3410             :     }
    3411           0 :     catch (const UnhandledToken &ut)
    3412             :     {
    3413           0 :         std::cerr << "\nDynamic formual compiler: unhandled token: ";
    3414           0 :         std::cerr << ut.mMessage << " at ";
    3415           0 :         std::cerr << ut.mFile << ":" << ut.mLineNumber << "\n";
    3416             : #ifdef NO_FALLBACK_TO_SWINTERP
    3417             :         assert(false);
    3418             : #else
    3419           0 :         free(pDynamicKernel);
    3420           0 :         return NULL;
    3421             : #endif
    3422             :     }
    3423           0 :     catch (...)
    3424             :     {
    3425           0 :         std::cerr << "Dynamic formula compiler: unhandled compiler error\n";
    3426           0 :         return NULL;
    3427             :     }
    3428           0 :     return pDynamicKernel;
    3429             : }
    3430             : 
    3431           0 : CompiledFormula* FormulaGroupInterpreterOpenCL::createCompiledFormula(ScDocument& rDoc,
    3432             :                                                                       const ScAddress& rTopPos,
    3433             :                                                                       ScFormulaCellGroup& rGroup,
    3434             :                                                                       ScTokenArray& rCode)
    3435             : {
    3436           0 :     ScTokenArray aConvertedCode;
    3437           0 :     ScGroupTokenConverter aConverter(aConvertedCode, rDoc, *rGroup.mpTopCell, rTopPos);
    3438           0 :     if (!aConverter.convert(rCode) || aConvertedCode.GetLen() == 0)
    3439           0 :         return NULL;
    3440             : 
    3441           0 :     SymbolTable::nR = rGroup.mnLength;
    3442             : 
    3443           0 :     return DynamicKernel::create(rDoc, rTopPos, aConvertedCode);
    3444             : }
    3445             : 
    3446           0 : bool FormulaGroupInterpreterOpenCL::interpret( ScDocument& rDoc,
    3447             :     const ScAddress& rTopPos, ScFormulaCellGroupRef& xGroup,
    3448             :     ScTokenArray& rCode )
    3449             : {
    3450           0 :     DynamicKernel *pKernel = NULL;
    3451           0 :     boost::scoped_ptr<DynamicKernel> pLocalKernel;
    3452             : 
    3453             : #if ENABLE_THREADED_OPENCL_KERNEL_COMPILATION
    3454             :     if (xGroup->meKernelState == sc::OpenCLKernelCompilationScheduled ||
    3455             :         xGroup->meKernelState == sc::OpenCLKernelBinaryCreated)
    3456             :     {
    3457             :         if (xGroup->meKernelState == sc::OpenCLKernelCompilationScheduled)
    3458             :         {
    3459             :             ScFormulaCellGroup::sxCompilationThread->maCompilationDoneCondition.wait();
    3460             :             ScFormulaCellGroup::sxCompilationThread->maCompilationDoneCondition.reset();
    3461             :         }
    3462             : 
    3463             :         pKernel = static_cast<DynamicKernel*>(xGroup->mpCompiledFormula);
    3464             :     }
    3465             :     else
    3466             :     {
    3467             :         assert(xGroup->meCalcState == sc::GroupCalcRunning);
    3468             :         pKernel = static_cast<DynamicKernel*>(createCompiledFormula(rDoc, rTopPos, *xGroup, rCode));
    3469             :         pLocalKernel.reset(pKernel); // to be deleted when done.
    3470             :     }
    3471             : #else
    3472           0 :     pKernel = static_cast<DynamicKernel*>(createCompiledFormula(rDoc, rTopPos, *xGroup, rCode));
    3473           0 :     pLocalKernel.reset(pKernel); // to be deleted when done.
    3474             : #endif
    3475             : 
    3476           0 :     if (!pKernel)
    3477           0 :         return false;
    3478             : 
    3479             :     try {
    3480             :         // Obtain cl context
    3481             :         KernelEnv kEnv;
    3482           0 :         OpenclDevice::setKernelEnv(&kEnv);
    3483             :         // Run the kernel.
    3484           0 :         pKernel->Launch(xGroup->mnLength);
    3485             :         // Map results back
    3486           0 :         cl_mem res = pKernel->GetResultBuffer();
    3487             :         cl_int err;
    3488             :         double *resbuf = (double*)clEnqueueMapBuffer(kEnv.mpkCmdQueue,
    3489             :                 res,
    3490             :                 CL_TRUE, CL_MAP_READ, 0,
    3491           0 :                 xGroup->mnLength*sizeof(double), 0, NULL, NULL,
    3492           0 :                 &err);
    3493           0 :         if (err != CL_SUCCESS)
    3494           0 :             throw OpenCLError(err, __FILE__, __LINE__);
    3495           0 :         rDoc.SetFormulaResults(rTopPos, resbuf, xGroup->mnLength);
    3496           0 :         err = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue, res, resbuf, 0, NULL, NULL);
    3497           0 :         if (err != CL_SUCCESS)
    3498           0 :             throw OpenCLError(err, __FILE__, __LINE__);
    3499             :     }
    3500           0 :     catch (const UnhandledToken &ut) {
    3501           0 :         std::cerr << "\nDynamic formual compiler: unhandled token: ";
    3502           0 :         std::cerr << ut.mMessage << "\n";
    3503             : #ifdef NO_FALLBACK_TO_SWINTERP
    3504             :         assert(false);
    3505             :         return true;
    3506             : #else
    3507           0 :         return false;
    3508             : #endif
    3509             :     }
    3510           0 :     catch (const OpenCLError &oce) {
    3511           0 :         std::cerr << "Dynamic formula compiler: OpenCL error: ";
    3512           0 :         std::cerr << oce.mError;
    3513           0 :         std::cerr <<" at ";
    3514           0 :         std::cerr << oce.mFile << ":" << oce.mLineNumber << "\n";
    3515             : #ifdef NO_FALLBACK_TO_SWINTERP
    3516             :         assert(false);
    3517             :         return true;
    3518             : #else
    3519           0 :         return false;
    3520             : #endif
    3521             :     }
    3522           0 :     catch (const Unhandled &uh) {
    3523           0 :         std::cerr << "Dynamic formula compiler: unhandled case:";
    3524           0 :         std::cerr <<" at ";
    3525           0 :         std::cerr << uh.mFile << ":" << uh.mLineNumber << "\n";
    3526             : #ifdef NO_FALLBACK_TO_SWINTERP
    3527             :         assert(false);
    3528             :         return true;
    3529             : #else
    3530           0 :         return false;
    3531             : #endif
    3532             :     }
    3533           0 :     catch (...) {
    3534           0 :         std::cerr << "Dynamic formula compiler: unhandled compiler error\n";
    3535             : #ifdef NO_FALLBACK_TO_SWINTERP
    3536             :         assert(false);
    3537             :         return true;
    3538             : #else
    3539           0 :         return false;
    3540             : #endif
    3541             :     }
    3542           0 :     return true;
    3543             : }
    3544             : 
    3545             : }} // namespace sc::opencl
    3546             : 
    3547             : extern "C" {
    3548             : 
    3549             : SAL_DLLPUBLIC_EXPORT sc::FormulaGroupInterpreter* SAL_CALL
    3550           0 :                    createFormulaGroupOpenCLInterpreter()
    3551             : {
    3552           0 :     return new sc::opencl::FormulaGroupInterpreterOpenCL();
    3553             : }
    3554             : 
    3555           0 : SAL_DLLPUBLIC_EXPORT size_t getOpenCLPlatformCount()
    3556             : {
    3557           0 :     return sc::opencl::getOpenCLPlatformCount();
    3558             : }
    3559             : 
    3560           0 : SAL_DLLPUBLIC_EXPORT void SAL_CALL fillOpenCLInfo(
    3561             :                sc::OpenclPlatformInfo* pInfos, size_t nInfoSize)
    3562             : {
    3563             :     const std::vector<sc::OpenclPlatformInfo>& rPlatforms =
    3564           0 :                  sc::opencl::fillOpenCLInfo();
    3565           0 :     size_t n = std::min(rPlatforms.size(), nInfoSize);
    3566           0 :     for (size_t i = 0; i < n; ++i)
    3567           0 :         pInfos[i] = rPlatforms[i];
    3568           0 : }
    3569             : 
    3570         222 : SAL_DLLPUBLIC_EXPORT bool SAL_CALL switchOpenClDevice(
    3571             :                        const OUString* pDeviceId, bool bAutoSelect,
    3572             :                        bool bForceEvaluation)
    3573             : {
    3574         222 :     return sc::opencl::switchOpenclDevice(pDeviceId, bAutoSelect, bForceEvaluation);
    3575             : }
    3576             : 
    3577           0 : SAL_DLLPUBLIC_EXPORT void SAL_CALL getOpenCLDeviceInfo(size_t* pDeviceId, size_t* pPlatformId)
    3578             : {
    3579           0 :     sc::opencl::getOpenCLDeviceInfo(*pDeviceId, *pPlatformId);
    3580           0 : }
    3581             : 
    3582           3 : } // extern "C"
    3583             : 
    3584             : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */

Generated by: LCOV version 1.10