LCOV - code coverage report
Current view: top level - sc/source/core/opencl - formulagroupcl.cxx (source / functions) Hit Total Coverage
Test: commit c8344322a7af75b84dd3ca8f78b05543a976dfd5 Lines: 1 2011 0.1 %
Date: 2015-06-13 12:38:46 Functions: 2 272 0.7 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
       2             : /*
       3             :  * This file is part of the LibreOffice project.
       4             :  *
       5             :  * This Source Code Form is subject to the terms of the Mozilla Public
       6             :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       7             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       8             :  */
       9             : 
      10             : #include "formulagroup.hxx"
      11             : #include "formulagroupcl.hxx"
      12             : #include "clkernelthread.hxx"
      13             : #include "grouptokenconverter.hxx"
      14             : #include "document.hxx"
      15             : #include "formulacell.hxx"
      16             : #include "tokenarray.hxx"
      17             : #include "compiler.hxx"
      18             : #include "interpre.hxx"
      19             : #include <formula/random.hxx>
      20             : #include <formula/vectortoken.hxx>
      21             : #include "scmatrix.hxx"
      22             : 
      23             : #include <opencl/openclwrapper.hxx>
      24             : 
      25             : #include "op_financial.hxx"
      26             : #include "op_database.hxx"
      27             : #include "op_math.hxx"
      28             : #include "op_logical.hxx"
      29             : #include "op_statistical.hxx"
      30             : #include "op_array.hxx"
      31             : #include "op_spreadsheet.hxx"
      32             : #include "op_addin.hxx"
      33             : 
      34             : /// CONFIGURATIONS
      35             : #define REDUCE_THRESHOLD 201  // set to 4 for correctness testing. priority 1
      36             : #define UNROLLING_FACTOR 16  // set to 4 for correctness testing (if no reduce)
      37             : 
      38             : static const char* publicFunc =
      39             :  "\n"
      40             :  "#define errIllegalFPOperation 503 // #NUM!\n"
      41             :  "#define errNoValue 519 // #VALUE!\n"
      42             :  "#define errDivisionByZero 532 // #DIV/0!\n"
      43             :  "\n"
      44             :  "double CreateDoubleError(ulong nErr)\n"
      45             :  "{\n"
      46             :  "    return nan(nErr);\n"
      47             :  "}\n"
      48             :  "\n"
      49             :  "uint GetDoubleErrorValue(double fVal)\n"
      50             :  "{\n"
      51             :  "    if (isfinite(fVal))\n"
      52             :  "        return 0;\n"
      53             :  "    if (isinf(fVal))\n"
      54             :  "        return errIllegalFPOperation; // normal INF\n"
      55             :  "    if (as_ulong(fVal) & 0XFFFF0000u)\n"
      56             :  "        return errNoValue;            // just a normal NAN\n"
      57             :  "    return (as_ulong(fVal) & 0XFFFF); // any other error\n"
      58             :  "}\n"
      59             :  "\n"
      60             :  "int isNan(double a) { return isnan(a); }\n"
      61             :  "double fsum_count(double a, double b, __private int *p) {\n"
      62             :  "    bool t = isNan(a);\n"
      63             :  "    (*p) += t?0:1;\n"
      64             :  "    return t?b:a+b;\n"
      65             :  "}\n"
      66             :  "double fsum(double a, double b) { return isNan(a)?b:a+b; }\n"
      67             :  "double legalize(double a, double b) { return isNan(a)?b:a;}\n"
      68             :  "double fsub(double a, double b) { return a-b; }\n"
      69             :  "double fdiv(double a, double b) { return a/b; }\n"
      70             :  "double strequal(unsigned a, unsigned b) { return (a==b)?1.0:0; }\n"
      71             :  ;
      72             : 
      73             : #ifdef WIN32
      74             : #ifndef NAN
      75             : namespace {
      76             : 
      77             : const unsigned long __nan[2] = {0xffffffff, 0x7fffffff};
      78             : 
      79             : }
      80             : #define NAN (*(const double*) __nan)
      81             : #endif
      82             : #endif
      83             : 
      84             : #include <list>
      85             : #include <map>
      86             : #include <iostream>
      87             : #include <sstream>
      88             : #include <algorithm>
      89             : 
      90             : #include <rtl/digest.h>
      91             : 
      92             : #include <boost/scoped_ptr.hpp>
      93             : #include <boost/scoped_array.hpp>
      94             : 
      95             : using namespace formula;
      96             : 
      97             : namespace sc { namespace opencl {
      98             : 
      99             : namespace {
     100             : 
     101           0 : std::string StackVarEnumToString(StackVar const e)
     102             : {
     103           0 :     switch (e)
     104             :     {
     105             : #define CASE(x) case sv##x: return #x
     106           0 :         CASE(Byte);
     107           0 :         CASE(Double);
     108           0 :         CASE(String);
     109           0 :         CASE(SingleRef);
     110           0 :         CASE(DoubleRef);
     111           0 :         CASE(Matrix);
     112           0 :         CASE(Index);
     113           0 :         CASE(Jump);
     114           0 :         CASE(External);
     115           0 :         CASE(FAP);
     116           0 :         CASE(JumpMatrix);
     117           0 :         CASE(RefList);
     118           0 :         CASE(EmptyCell);
     119           0 :         CASE(MatrixCell);
     120           0 :         CASE(HybridCell);
     121           0 :         CASE(HybridValueCell);
     122           0 :         CASE(ExternalSingleRef);
     123           0 :         CASE(ExternalDoubleRef);
     124           0 :         CASE(ExternalName);
     125           0 :         CASE(SingleVectorRef);
     126           0 :         CASE(DoubleVectorRef);
     127           0 :         CASE(Subroutine);
     128           0 :         CASE(Error);
     129           0 :         CASE(Missing);
     130           0 :         CASE(Sep);
     131           0 :         CASE(Unknown);
     132             : #undef CASE
     133             :     }
     134           0 :     return std::to_string(static_cast<int>(e));
     135             : }
     136             : 
     137             : #ifdef SAL_DETAIL_ENABLE_LOG_INFO
     138           0 : std::string linenumberify(const std::string& s)
     139             : {
     140           0 :     std::stringstream ss;
     141           0 :     int linenumber = 1;
     142           0 :     size_t start = 0;
     143             :     size_t newline;
     144           0 :     while ((newline = s.find('\n', start)) != std::string::npos)
     145             :     {
     146           0 :         ss << "/*" << std::setw(4) << linenumber++ << "*/ " << s.substr(start, newline-start+1);
     147           0 :         start = newline + 1;
     148             :     }
     149           0 :     if (start < s.size())
     150           0 :         ss << "/*" << std::setw(4) << linenumber++ << "*/ " << s.substr(start, std::string::npos);
     151           0 :     return ss.str();
     152             : }
     153             : #endif
     154             : 
     155             : } // anonymous namespace
     156             : 
     157             : /// Map the buffer used by an argument and do necessary argument setting
     158           0 : size_t VectorRef::Marshal( cl_kernel k, int argno, int, cl_program )
     159             : {
     160           0 :     FormulaToken* ref = mFormulaTree->GetFormulaToken();
     161           0 :     double* pHostBuffer = NULL;
     162           0 :     size_t szHostBuffer = 0;
     163           0 :     if (ref->GetType() == formula::svSingleVectorRef)
     164             :     {
     165             :         const formula::SingleVectorRefToken* pSVR =
     166           0 :             static_cast<const formula::SingleVectorRefToken*>(ref);
     167             : 
     168             :         SAL_INFO("sc.opencl", "SingleVectorRef len=" << pSVR->GetArrayLength() << " mpNumericArray=" << pSVR->GetArray().mpNumericArray << " (mpStringArray=" << pSVR->GetArray().mpStringArray << ")");
     169             : 
     170           0 :         pHostBuffer = const_cast<double*>(pSVR->GetArray().mpNumericArray);
     171           0 :         szHostBuffer = pSVR->GetArrayLength() * sizeof(double);
     172             :     }
     173           0 :     else if (ref->GetType() == formula::svDoubleVectorRef)
     174             :     {
     175             :         const formula::DoubleVectorRefToken* pDVR =
     176           0 :             static_cast<const formula::DoubleVectorRefToken*>(ref);
     177             : 
     178             :         SAL_INFO("sc.opencl", "DoubleVectorRef index=" << mnIndex << " len=" << pDVR->GetArrayLength() << " mpNumericArray=" << pDVR->GetArrays()[mnIndex].mpNumericArray << " (mpStringArray=" << pDVR->GetArrays()[mnIndex].mpStringArray << ")");
     179             : 
     180             :         pHostBuffer = const_cast<double*>(
     181           0 :             pDVR->GetArrays()[mnIndex].mpNumericArray);
     182           0 :         szHostBuffer = pDVR->GetArrayLength() * sizeof(double);
     183             :     }
     184             :     else
     185             :     {
     186           0 :         throw Unhandled();
     187             :     }
     188             :     // Obtain cl context
     189             :     ::opencl::KernelEnv kEnv;
     190           0 :     ::opencl::setKernelEnv(&kEnv);
     191             :     cl_int err;
     192           0 :     if (pHostBuffer)
     193             :     {
     194             :         mpClmem = clCreateBuffer(kEnv.mpkContext,
     195             :             (cl_mem_flags)CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
     196             :             szHostBuffer,
     197           0 :             pHostBuffer, &err);
     198           0 :         if (CL_SUCCESS != err)
     199           0 :             throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
     200             :         SAL_INFO("sc.opencl", "Created buffer " << mpClmem << " size " << szHostBuffer << " using host buffer " << pHostBuffer);
     201             :     }
     202             :     else
     203             :     {
     204           0 :         if (szHostBuffer == 0)
     205           0 :             szHostBuffer = sizeof(double); // a dummy small value
     206             :                                            // Marshal as a buffer of NANs
     207             :         mpClmem = clCreateBuffer(kEnv.mpkContext,
     208             :             (cl_mem_flags)CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
     209           0 :             szHostBuffer, NULL, &err);
     210           0 :         if (CL_SUCCESS != err)
     211           0 :             throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
     212             :         SAL_INFO("sc.opencl", "Created buffer " << mpClmem << " size " << szHostBuffer);
     213             : 
     214             :         double* pNanBuffer = static_cast<double*>(clEnqueueMapBuffer(
     215             :             kEnv.mpkCmdQueue, mpClmem, CL_TRUE, CL_MAP_WRITE, 0,
     216           0 :             szHostBuffer, 0, NULL, NULL, &err));
     217           0 :         if (CL_SUCCESS != err)
     218           0 :             throw OpenCLError("clEnqueueMapBuffer", err, __FILE__, __LINE__);
     219             : 
     220           0 :         for (size_t i = 0; i < szHostBuffer / sizeof(double); i++)
     221           0 :             pNanBuffer[i] = NAN;
     222             :         err = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue, mpClmem,
     223           0 :             pNanBuffer, 0, NULL, NULL);
     224             :         // FIXME: Is it intentional to not throw an OpenCLError even if the clEnqueueUnmapMemObject() fails?
     225           0 :         if (CL_SUCCESS != err)
     226             :             SAL_WARN("sc.opencl", "clEnqueueUnmapMemObject failed: " << ::opencl::errorString(err));
     227             :     }
     228             : 
     229             :     SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_mem: " << mpClmem);
     230           0 :     err = clSetKernelArg(k, argno, sizeof(cl_mem), static_cast<void*>(&mpClmem));
     231           0 :     if (CL_SUCCESS != err)
     232           0 :         throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
     233           0 :     return 1;
     234             : }
     235             : 
     236             : /// Arguments that are actually compile-time constant string
     237             : /// Currently, only the hash is passed.
     238             : /// TBD(IJSUNG): pass also length and the actual string if there is a
     239             : /// hash function collision
     240           0 : class ConstStringArgument : public DynamicKernelArgument
     241             : {
     242             : public:
     243           0 :     ConstStringArgument( const ScCalcConfig& config, const std::string& s,
     244             :         FormulaTreeNodeRef ft ) :
     245           0 :         DynamicKernelArgument(config, s, ft) { }
     246             :     /// Generate declaration
     247           0 :     virtual void GenDecl( std::stringstream& ss ) const SAL_OVERRIDE
     248             :     {
     249           0 :         ss << "unsigned " << mSymName;
     250           0 :     }
     251           0 :     virtual void GenDeclRef( std::stringstream& ss ) const SAL_OVERRIDE
     252             :     {
     253           0 :         ss << GenSlidingWindowDeclRef(false);
     254           0 :     }
     255           0 :     virtual void GenSlidingWindowDecl( std::stringstream& ss ) const SAL_OVERRIDE
     256             :     {
     257           0 :         GenDecl(ss);
     258           0 :     }
     259           0 :     virtual std::string GenSlidingWindowDeclRef( bool = false ) const SAL_OVERRIDE
     260             :     {
     261           0 :         std::stringstream ss;
     262           0 :         if (GetFormulaToken()->GetType() != formula::svString)
     263           0 :             throw Unhandled();
     264           0 :         FormulaToken* Tok = GetFormulaToken();
     265           0 :         ss << Tok->GetString().getString().toAsciiUpperCase().hashCode() << "U";
     266           0 :         return ss.str();
     267             :     }
     268           0 :     virtual size_t GetWindowSize() const SAL_OVERRIDE
     269             :     {
     270           0 :         return 1;
     271             :     }
     272             :     /// Pass the 32-bit hash of the string to the kernel
     273           0 :     virtual size_t Marshal( cl_kernel k, int argno, int, cl_program ) SAL_OVERRIDE
     274             :     {
     275           0 :         FormulaToken* ref = mFormulaTree->GetFormulaToken();
     276           0 :         cl_uint hashCode = 0;
     277           0 :         if (ref->GetType() == formula::svString)
     278             :         {
     279           0 :             const rtl::OUString s = ref->GetString().getString().toAsciiUpperCase();
     280           0 :             hashCode = s.hashCode();
     281             :         }
     282             :         else
     283             :         {
     284           0 :             throw Unhandled();
     285             :         }
     286             : 
     287             :         // Pass the scalar result back to the rest of the formula kernel
     288             :         SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_uint: " << hashCode);
     289           0 :         cl_int err = clSetKernelArg(k, argno, sizeof(cl_uint), static_cast<void*>(&hashCode));
     290           0 :         if (CL_SUCCESS != err)
     291           0 :             throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
     292           0 :         return 1;
     293             :     }
     294             : };
     295             : 
     296             : /// Arguments that are actually compile-time constants
     297           0 : class DynamicKernelConstantArgument : public DynamicKernelArgument
     298             : {
     299             : public:
     300           0 :     DynamicKernelConstantArgument( const ScCalcConfig& config, const std::string& s,
     301             :         FormulaTreeNodeRef ft ) :
     302           0 :         DynamicKernelArgument(config, s, ft) { }
     303             :     /// Generate declaration
     304           0 :     virtual void GenDecl( std::stringstream& ss ) const SAL_OVERRIDE
     305             :     {
     306           0 :         ss << "double " << mSymName;
     307           0 :     }
     308           0 :     virtual void GenDeclRef( std::stringstream& ss ) const SAL_OVERRIDE
     309             :     {
     310           0 :         ss << mSymName;
     311           0 :     }
     312           0 :     virtual void GenSlidingWindowDecl( std::stringstream& ss ) const SAL_OVERRIDE
     313             :     {
     314           0 :         GenDecl(ss);
     315           0 :     }
     316           0 :     virtual std::string GenSlidingWindowDeclRef( bool = false ) const SAL_OVERRIDE
     317             :     {
     318           0 :         if (GetFormulaToken()->GetType() != formula::svDouble)
     319           0 :             throw Unhandled();
     320           0 :         return mSymName;
     321             :     }
     322           0 :     virtual size_t GetWindowSize() const SAL_OVERRIDE
     323             :     {
     324           0 :         return 1;
     325             :     }
     326           0 :     double GetDouble() const
     327             :     {
     328           0 :         FormulaToken* Tok = GetFormulaToken();
     329           0 :         if (Tok->GetType() != formula::svDouble)
     330           0 :             throw Unhandled();
     331           0 :         return Tok->GetDouble();
     332             :     }
     333             :     /// Create buffer and pass the buffer to a given kernel
     334           0 :     virtual size_t Marshal( cl_kernel k, int argno, int, cl_program ) SAL_OVERRIDE
     335             :     {
     336           0 :         double tmp = GetDouble();
     337             :         // Pass the scalar result back to the rest of the formula kernel
     338             :         SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": double: " << tmp);
     339           0 :         cl_int err = clSetKernelArg(k, argno, sizeof(double), static_cast<void*>(&tmp));
     340           0 :         if (CL_SUCCESS != err)
     341           0 :             throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
     342           0 :         return 1;
     343             :     }
     344             : };
     345             : 
     346           0 : class DynamicKernelPiArgument : public DynamicKernelArgument
     347             : {
     348             : public:
     349           0 :     DynamicKernelPiArgument( const ScCalcConfig& config, const std::string& s,
     350             :         FormulaTreeNodeRef ft ) :
     351           0 :         DynamicKernelArgument(config, s, ft) { }
     352             :     /// Generate declaration
     353           0 :     virtual void GenDecl( std::stringstream& ss ) const SAL_OVERRIDE
     354             :     {
     355           0 :         ss << "double " << mSymName;
     356           0 :     }
     357           0 :     virtual void GenDeclRef( std::stringstream& ss ) const SAL_OVERRIDE
     358             :     {
     359           0 :         ss << "3.14159265358979";
     360           0 :     }
     361           0 :     virtual void GenSlidingWindowDecl( std::stringstream& ss ) const SAL_OVERRIDE
     362             :     {
     363           0 :         GenDecl(ss);
     364           0 :     }
     365           0 :     virtual std::string GenSlidingWindowDeclRef( bool = false ) const SAL_OVERRIDE
     366             :     {
     367           0 :         return mSymName;
     368             :     }
     369           0 :     virtual size_t GetWindowSize() const SAL_OVERRIDE
     370             :     {
     371           0 :         return 1;
     372             :     }
     373             :     /// Create buffer and pass the buffer to a given kernel
     374           0 :     virtual size_t Marshal( cl_kernel k, int argno, int, cl_program ) SAL_OVERRIDE
     375             :     {
     376           0 :         double tmp = 0.0;
     377             :         // Pass the scalar result back to the rest of the formula kernel
     378             :         SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": double: " << tmp);
     379           0 :         cl_int err = clSetKernelArg(k, argno, sizeof(double), static_cast<void*>(&tmp));
     380           0 :         if (CL_SUCCESS != err)
     381           0 :             throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
     382           0 :         return 1;
     383             :     }
     384             : };
     385             : 
     386           0 : class DynamicKernelRandomArgument : public DynamicKernelArgument
     387             : {
     388             : public:
     389           0 :     DynamicKernelRandomArgument( const ScCalcConfig& config, const std::string& s,
     390             :         FormulaTreeNodeRef ft ) :
     391           0 :         DynamicKernelArgument(config, s, ft) { }
     392             :     /// Generate declaration
     393           0 :     virtual void GenDecl( std::stringstream& ss ) const SAL_OVERRIDE
     394             :     {
     395           0 :         ss << "double " << mSymName;
     396           0 :     }
     397           0 :     virtual void GenDeclRef( std::stringstream& ss ) const SAL_OVERRIDE
     398             :     {
     399           0 :         ss << mSymName;
     400           0 :     }
     401           0 :     virtual void GenSlidingWindowDecl( std::stringstream& ss ) const SAL_OVERRIDE
     402             :     {
     403           0 :         ss << "int " << mSymName;
     404           0 :     }
     405           0 :     virtual std::string GenSlidingWindowDeclRef( bool = false ) const SAL_OVERRIDE
     406             :     {
     407           0 :         return mSymName + "_Random(" + mSymName + ")";
     408             :     }
     409           0 :     virtual void GenSlidingWindowFunction( std::stringstream& ss ) SAL_OVERRIDE
     410             :     {
     411             :         // This string is from the pi_opencl_kernel.i file as
     412             :         // generated when building the Random123 examples. Unused
     413             :         // stuff has been removed, and the actual kernel is not the
     414             :         // same as in the totally different use case of that example,
     415             :         // of course. Only the code that calculates the counter-based
     416             :         // random number and what it needs is left.
     417           0 :         ss << "\
     418             : \n\
     419             : #ifndef DEFINED_RANDOM123_STUFF\n\
     420             : #define DEFINED_RANDOM123_STUFF\n\
     421             : \n\
     422             : /*\n\
     423             : Copyright 2010-2011, D. E. Shaw Research.\n\
     424             : All rights reserved.\n\
     425             : \n\
     426             : Redistribution and use in source and binary forms, with or without\n\
     427             : modification, are permitted provided that the following conditions are\n\
     428             : met:\n\
     429             : \n\
     430             : * Redistributions of source code must retain the above copyright\n\
     431             :   notice, this list of conditions, and the following disclaimer.\n\
     432             : \n\
     433             : * Redistributions in binary form must reproduce the above copyright\n\
     434             :   notice, this list of conditions, and the following disclaimer in the\n\
     435             :   documentation and/or other materials provided with the distribution.\n\
     436             : \n\
     437             : * Neither the name of D. E. Shaw Research nor the names of its\n\
     438             :   contributors may be used to endorse or promote products derived from\n\
     439             :   this software without specific prior written permission.\n\
     440             : \n\
     441             : THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\n\
     442             : \"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\n\
     443             : LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\n\
     444             : A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT\n\
     445             : OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\n\
     446             : SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT\n\
     447             : LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\n\
     448             : DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\n\
     449             : THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n\
     450             : (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\n\
     451             : OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\
     452             : */\n\
     453             : \n\
     454             : typedef uint uint32_t;\n\
     455             : struct r123array2x32\n\
     456             : {\n\
     457             :   uint32_t v[2];\n\
     458             : };\n\
     459             : enum r123_enum_threefry32x2\n\
     460             : {\n\
     461             :   R_32x2_0_0 = 13,\n\
     462             :   R_32x2_1_0 = 15,\n\
     463             :   R_32x2_2_0 = 26,\n\
     464             :   R_32x2_3_0 = 6,\n\
     465             :   R_32x2_4_0 = 17,\n\
     466             :   R_32x2_5_0 = 29,\n\
     467             :   R_32x2_6_0 = 16,\n\
     468             :   R_32x2_7_0 = 24\n\
     469             : };\n\
     470             : inline uint32_t RotL_32 (uint32_t x, unsigned int N)\n\
     471             :   __attribute__ ((always_inline));\n\
     472             : inline uint32_t\n\
     473             : RotL_32 (uint32_t x, unsigned int N)\n\
     474             : {\n\
     475             :   return (x << (N & 31)) | (x >> ((32 - N) & 31));\n\
     476             : }\n\
     477             : \n\
     478             : typedef struct r123array2x32 threefry2x32_ctr_t;\n\
     479             : typedef struct r123array2x32 threefry2x32_key_t;\n\
     480             : typedef struct r123array2x32 threefry2x32_ukey_t;\n\
     481             : inline threefry2x32_key_t\n\
     482             : threefry2x32keyinit (threefry2x32_ukey_t uk)\n\
     483             : {\n\
     484             :   return uk;\n\
     485             : }\n\
     486             : \n\
     487             : inline threefry2x32_ctr_t threefry2x32_R (unsigned int Nrounds,\n\
     488             :                       threefry2x32_ctr_t in,\n\
     489             :                       threefry2x32_key_t k)\n\
     490             :   __attribute__ ((always_inline));\n\
     491             : inline threefry2x32_ctr_t\n\
     492             : threefry2x32_R (unsigned int Nrounds, threefry2x32_ctr_t in,\n\
     493             :         threefry2x32_key_t k)\n\
     494             : {\n\
     495             :   threefry2x32_ctr_t X;\n\
     496             :   uint32_t ks[2 + 1];\n\
     497             :   int i;\n\
     498             :   ks[2] = 0x1BD11BDA;\n\
     499             :   for (i = 0; i < 2; i++) {\n\
     500             :     ks[i] = k.v[i];\n\
     501             :     X.v[i] = in.v[i];\n\
     502             :     ks[2] ^= k.v[i];\n\
     503             :   }\n\
     504             :   X.v[0] += ks[0];\n\
     505             :   X.v[1] += ks[1];\n\
     506             :   if (Nrounds > 0) {\n\
     507             :     X.v[0] += X.v[1];\n\
     508             :     X.v[1] = RotL_32 (X.v[1], R_32x2_0_0);\n\
     509             :     X.v[1] ^= X.v[0];\n\
     510             :   }\n\
     511             :   if (Nrounds > 1) {\n\
     512             :     X.v[0] += X.v[1];\n\
     513             :     X.v[1] = RotL_32 (X.v[1], R_32x2_1_0);\n\
     514             :     X.v[1] ^= X.v[0];\n\
     515             :   }\n\
     516             :   if (Nrounds > 2) {\n\
     517             :     X.v[0] += X.v[1];\n\
     518             :     X.v[1] = RotL_32 (X.v[1], R_32x2_2_0);\n\
     519             :     X.v[1] ^= X.v[0];\n\
     520             :   }\n\
     521             :   if (Nrounds > 3) {\n\
     522             :     X.v[0] += X.v[1];\n\
     523             :     X.v[1] = RotL_32 (X.v[1], R_32x2_3_0);\n\
     524             :     X.v[1] ^= X.v[0];\n\
     525             :   }\n\
     526             :   if (Nrounds > 3) {\n\
     527             :     X.v[0] += ks[1];\n\
     528             :     X.v[1] += ks[2];\n\
     529             :     X.v[1] += 1;\n\
     530             :   }\n\
     531             :   if (Nrounds > 4) {\n\
     532             :     X.v[0] += X.v[1];\n\
     533             :     X.v[1] = RotL_32 (X.v[1], R_32x2_4_0);\n\
     534             :     X.v[1] ^= X.v[0];\n\
     535             :   }\n\
     536             :   if (Nrounds > 5) {\n\
     537             :     X.v[0] += X.v[1];\n\
     538             :     X.v[1] = RotL_32 (X.v[1], R_32x2_5_0);\n\
     539             :     X.v[1] ^= X.v[0];\n\
     540             :   }\n\
     541             :   if (Nrounds > 6) {\n\
     542             :     X.v[0] += X.v[1];\n\
     543             :     X.v[1] = RotL_32 (X.v[1], R_32x2_6_0);\n\
     544             :     X.v[1] ^= X.v[0];\n\
     545             :   }\n\
     546             :   if (Nrounds > 7) {\n\
     547             :     X.v[0] += X.v[1];\n\
     548             :     X.v[1] = RotL_32 (X.v[1], R_32x2_7_0);\n\
     549             :     X.v[1] ^= X.v[0];\n\
     550             :   }\n\
     551             :   if (Nrounds > 7) {\n\
     552             :     X.v[0] += ks[2];\n\
     553             :     X.v[1] += ks[0];\n\
     554             :     X.v[1] += 2;\n\
     555             :   }\n\
     556             :   if (Nrounds > 8) {\n\
     557             :     X.v[0] += X.v[1];\n\
     558             :     X.v[1] = RotL_32 (X.v[1], R_32x2_0_0);\n\
     559             :     X.v[1] ^= X.v[0];\n\
     560             :   }\n\
     561             :   if (Nrounds > 9) {\n\
     562             :     X.v[0] += X.v[1];\n\
     563             :     X.v[1] = RotL_32 (X.v[1], R_32x2_1_0);\n\
     564             :     X.v[1] ^= X.v[0];\n\
     565             :   }\n\
     566             :   if (Nrounds > 10) {\n\
     567             :     X.v[0] += X.v[1];\n\
     568             :     X.v[1] = RotL_32 (X.v[1], R_32x2_2_0);\n\
     569             :     X.v[1] ^= X.v[0];\n\
     570             :   }\n\
     571             :   if (Nrounds > 11) {\n\
     572             :     X.v[0] += X.v[1];\n\
     573             :     X.v[1] = RotL_32 (X.v[1], R_32x2_3_0);\n\
     574             :     X.v[1] ^= X.v[0];\n\
     575             :   }\n\
     576             :   if (Nrounds > 11) {\n\
     577             :     X.v[0] += ks[0];\n\
     578             :     X.v[1] += ks[1];\n\
     579             :     X.v[1] += 3;\n\
     580             :   }\n\
     581             :   if (Nrounds > 12) {\n\
     582             :     X.v[0] += X.v[1];\n\
     583             :     X.v[1] = RotL_32 (X.v[1], R_32x2_4_0);\n\
     584             :     X.v[1] ^= X.v[0];\n\
     585             :   }\n\
     586             :   if (Nrounds > 13) {\n\
     587             :     X.v[0] += X.v[1];\n\
     588             :     X.v[1] = RotL_32 (X.v[1], R_32x2_5_0);\n\
     589             :     X.v[1] ^= X.v[0];\n\
     590             :   }\n\
     591             :   if (Nrounds > 14) {\n\
     592             :     X.v[0] += X.v[1];\n\
     593             :     X.v[1] = RotL_32 (X.v[1], R_32x2_6_0);\n\
     594             :     X.v[1] ^= X.v[0];\n\
     595             :   }\n\
     596             :   if (Nrounds > 15) {\n\
     597             :     X.v[0] += X.v[1];\n\
     598             :     X.v[1] = RotL_32 (X.v[1], R_32x2_7_0);\n\
     599             :     X.v[1] ^= X.v[0];\n\
     600             :   }\n\
     601             :   if (Nrounds > 15) {\n\
     602             :     X.v[0] += ks[1];\n\
     603             :     X.v[1] += ks[2];\n\
     604             :     X.v[1] += 4;\n\
     605             :   }\n\
     606             :   if (Nrounds > 16) {\n\
     607             :     X.v[0] += X.v[1];\n\
     608             :     X.v[1] = RotL_32 (X.v[1], R_32x2_0_0);\n\
     609             :     X.v[1] ^= X.v[0];\n\
     610             :   }\n\
     611             :   if (Nrounds > 17) {\n\
     612             :     X.v[0] += X.v[1];\n\
     613             :     X.v[1] = RotL_32 (X.v[1], R_32x2_1_0);\n\
     614             :     X.v[1] ^= X.v[0];\n\
     615             :   }\n\
     616             :   if (Nrounds > 18) {\n\
     617             :     X.v[0] += X.v[1];\n\
     618             :     X.v[1] = RotL_32 (X.v[1], R_32x2_2_0);\n\
     619             :     X.v[1] ^= X.v[0];\n\
     620             :   }\n\
     621             :   if (Nrounds > 19) {\n\
     622             :     X.v[0] += X.v[1];\n\
     623             :     X.v[1] = RotL_32 (X.v[1], R_32x2_3_0);\n\
     624             :     X.v[1] ^= X.v[0];\n\
     625             :   }\n\
     626             :   if (Nrounds > 19) {\n\
     627             :     X.v[0] += ks[2];\n\
     628             :     X.v[1] += ks[0];\n\
     629             :     X.v[1] += 5;\n\
     630             :   }\n\
     631             :   if (Nrounds > 20) {\n\
     632             :     X.v[0] += X.v[1];\n\
     633             :     X.v[1] = RotL_32 (X.v[1], R_32x2_4_0);\n\
     634             :     X.v[1] ^= X.v[0];\n\
     635             :   }\n\
     636             :   if (Nrounds > 21) {\n\
     637             :     X.v[0] += X.v[1];\n\
     638             :     X.v[1] = RotL_32 (X.v[1], R_32x2_5_0);\n\
     639             :     X.v[1] ^= X.v[0];\n\
     640             :   }\n\
     641             :   if (Nrounds > 22) {\n\
     642             :     X.v[0] += X.v[1];\n\
     643             :     X.v[1] = RotL_32 (X.v[1], R_32x2_6_0);\n\
     644             :     X.v[1] ^= X.v[0];\n\
     645             :   }\n\
     646             :   if (Nrounds > 23) {\n\
     647             :     X.v[0] += X.v[1];\n\
     648             :     X.v[1] = RotL_32 (X.v[1], R_32x2_7_0);\n\
     649             :     X.v[1] ^= X.v[0];\n\
     650             :   }\n\
     651             :   if (Nrounds > 23) {\n\
     652             :     X.v[0] += ks[0];\n\
     653             :     X.v[1] += ks[1];\n\
     654             :     X.v[1] += 6;\n\
     655             :   }\n\
     656             :   if (Nrounds > 24) {\n\
     657             :     X.v[0] += X.v[1];\n\
     658             :     X.v[1] = RotL_32 (X.v[1], R_32x2_0_0);\n\
     659             :     X.v[1] ^= X.v[0];\n\
     660             :   }\n\
     661             :   if (Nrounds > 25) {\n\
     662             :     X.v[0] += X.v[1];\n\
     663             :     X.v[1] = RotL_32 (X.v[1], R_32x2_1_0);\n\
     664             :     X.v[1] ^= X.v[0];\n\
     665             :   }\n\
     666             :   if (Nrounds > 26) {\n\
     667             :     X.v[0] += X.v[1];\n\
     668             :     X.v[1] = RotL_32 (X.v[1], R_32x2_2_0);\n\
     669             :     X.v[1] ^= X.v[0];\n\
     670             :   }\n\
     671             :   if (Nrounds > 27) {\n\
     672             :     X.v[0] += X.v[1];\n\
     673             :     X.v[1] = RotL_32 (X.v[1], R_32x2_3_0);\n\
     674             :     X.v[1] ^= X.v[0];\n\
     675             :   }\n\
     676             :   if (Nrounds > 27) {\n\
     677             :     X.v[0] += ks[1];\n\
     678             :     X.v[1] += ks[2];\n\
     679             :     X.v[1] += 7;\n\
     680             :   }\n\
     681             :   if (Nrounds > 28) {\n\
     682             :     X.v[0] += X.v[1];\n\
     683             :     X.v[1] = RotL_32 (X.v[1], R_32x2_4_0);\n\
     684             :     X.v[1] ^= X.v[0];\n\
     685             :   }\n\
     686             :   if (Nrounds > 29) {\n\
     687             :     X.v[0] += X.v[1];\n\
     688             :     X.v[1] = RotL_32 (X.v[1], R_32x2_5_0);\n\
     689             :     X.v[1] ^= X.v[0];\n\
     690             :   }\n\
     691             :   if (Nrounds > 30) {\n\
     692             :     X.v[0] += X.v[1];\n\
     693             :     X.v[1] = RotL_32 (X.v[1], R_32x2_6_0);\n\
     694             :     X.v[1] ^= X.v[0];\n\
     695             :   }\n\
     696             :   if (Nrounds > 31) {\n\
     697             :     X.v[0] += X.v[1];\n\
     698             :     X.v[1] = RotL_32 (X.v[1], R_32x2_7_0);\n\
     699             :     X.v[1] ^= X.v[0];\n\
     700             :   }\n\
     701             :   if (Nrounds > 31) {\n\
     702             :     X.v[0] += ks[2];\n\
     703             :     X.v[1] += ks[0];\n\
     704             :     X.v[1] += 8;\n\
     705             :   }\n\
     706             :   return X;\n\
     707             : }\n\
     708             : \n\
     709             : enum r123_enum_threefry2x32\n\
     710             : { threefry2x32_rounds = 20 };\n\
     711             : inline threefry2x32_ctr_t threefry2x32 (threefry2x32_ctr_t in,\n\
     712             :                     threefry2x32_key_t k)\n\
     713             :   __attribute__ ((always_inline));\n\
     714             : inline threefry2x32_ctr_t\n\
     715             : threefry2x32 (threefry2x32_ctr_t in, threefry2x32_key_t k)\n\
     716             : {\n\
     717             :   return threefry2x32_R (threefry2x32_rounds, in, k);\n\
     718             : }\n\
     719             : #endif\n\
     720             : \n\
     721           0 : ";
     722           0 :         ss << "double " << mSymName << "_Random (int seed)\n\
     723             : {\n\
     724             :   unsigned tid = get_global_id(0);\n\
     725             :   threefry2x32_key_t k = { {tid, 0xdecafbad} };\n\
     726             :   threefry2x32_ctr_t c = { {seed, 0xf00dcafe} };\n\
     727             :   c = threefry2x32_R(threefry2x32_rounds, c, k);\n\
     728           0 :   const double factor = 1./(" << SAL_MAX_UINT32 << ".0 + 1.0);\n\
     729             :   const double halffactor = 0.5*factor;\n\
     730             :   return c.v[0] * factor + halffactor;\n\
     731             : }\n\
     732           0 : ";
     733           0 :     }
     734           0 :     virtual size_t GetWindowSize() const SAL_OVERRIDE
     735             :     {
     736           0 :         return 1;
     737             :     }
     738             :     /// Create buffer and pass the buffer to a given kernel
     739           0 :     virtual size_t Marshal( cl_kernel k, int argno, int, cl_program ) SAL_OVERRIDE
     740             :     {
     741           0 :         cl_int seed = formula::rng::nRandom(0, SAL_MAX_INT32);
     742             :         // Pass the scalar result back to the rest of the formula kernel
     743             :         SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_int: " << seed);
     744           0 :         cl_int err = clSetKernelArg(k, argno, sizeof(cl_int), static_cast<void*>(&seed));
     745           0 :         if (CL_SUCCESS != err)
     746           0 :             throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
     747           0 :         return 1;
     748             :     }
     749             : };
     750             : 
     751             : /// A vector of strings
     752           0 : class DynamicKernelStringArgument : public VectorRef
     753             : {
     754             : public:
     755           0 :     DynamicKernelStringArgument( const ScCalcConfig& config, const std::string& s,
     756             :         FormulaTreeNodeRef ft, int index = 0 ) :
     757           0 :         VectorRef(config, s, ft, index) { }
     758             : 
     759           0 :     virtual void GenSlidingWindowFunction( std::stringstream& ) SAL_OVERRIDE { }
     760             :     /// Generate declaration
     761           0 :     virtual void GenDecl( std::stringstream& ss ) const SAL_OVERRIDE
     762             :     {
     763           0 :         ss << "__global unsigned int *" << mSymName;
     764           0 :     }
     765           0 :     virtual void GenSlidingWindowDecl( std::stringstream& ss ) const SAL_OVERRIDE
     766             :     {
     767           0 :         DynamicKernelStringArgument::GenDecl(ss);
     768           0 :     }
     769             :     virtual size_t Marshal( cl_kernel, int, int, cl_program ) SAL_OVERRIDE;
     770             : };
     771             : 
     772             : /// Marshal a string vector reference
     773           0 : size_t DynamicKernelStringArgument::Marshal( cl_kernel k, int argno, int, cl_program )
     774             : {
     775           0 :     FormulaToken* ref = mFormulaTree->GetFormulaToken();
     776             :     // Obtain cl context
     777             :     ::opencl::KernelEnv kEnv;
     778           0 :     ::opencl::setKernelEnv(&kEnv);
     779             :     cl_int err;
     780           0 :     formula::VectorRefArray vRef;
     781           0 :     size_t nStrings = 0;
     782           0 :     if (ref->GetType() == formula::svSingleVectorRef)
     783             :     {
     784             :         const formula::SingleVectorRefToken* pSVR =
     785           0 :             static_cast<const formula::SingleVectorRefToken*>(ref);
     786           0 :         nStrings = pSVR->GetArrayLength();
     787           0 :         vRef = pSVR->GetArray();
     788             :     }
     789           0 :     else if (ref->GetType() == formula::svDoubleVectorRef)
     790             :     {
     791             :         const formula::DoubleVectorRefToken* pDVR =
     792           0 :             static_cast<const formula::DoubleVectorRefToken*>(ref);
     793           0 :         nStrings = pDVR->GetArrayLength();
     794           0 :         vRef = pDVR->GetArrays()[mnIndex];
     795             :     }
     796           0 :     size_t szHostBuffer = nStrings * sizeof(cl_int);
     797           0 :     cl_uint* pHashBuffer = NULL;
     798             : 
     799           0 :     if (vRef.mpStringArray != NULL)
     800             :     {
     801             :         // Marshal strings. Right now we pass hashes of these string
     802             :         mpClmem = clCreateBuffer(kEnv.mpkContext,
     803             :             (cl_mem_flags)CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
     804           0 :             szHostBuffer, NULL, &err);
     805           0 :         if (CL_SUCCESS != err)
     806           0 :             throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
     807             :         SAL_INFO("sc.opencl", "Created buffer " << mpClmem << " size " << szHostBuffer);
     808             : 
     809             :         pHashBuffer = static_cast<cl_uint*>(clEnqueueMapBuffer(
     810             :             kEnv.mpkCmdQueue, mpClmem, CL_TRUE, CL_MAP_WRITE, 0,
     811           0 :             szHostBuffer, 0, NULL, NULL, &err));
     812           0 :         if (CL_SUCCESS != err)
     813           0 :             throw OpenCLError("clEnqueueMapBuffer", err, __FILE__, __LINE__);
     814             : 
     815           0 :         for (size_t i = 0; i < nStrings; i++)
     816             :         {
     817           0 :             if (vRef.mpStringArray[i])
     818             :             {
     819           0 :                 const OUString tmp = OUString(vRef.mpStringArray[i]);
     820           0 :                 pHashBuffer[i] = tmp.hashCode();
     821             :             }
     822             :             else
     823             :             {
     824           0 :                 pHashBuffer[i] = 0;
     825             :             }
     826             :         }
     827             :     }
     828             :     else
     829             :     {
     830           0 :         if (nStrings == 0)
     831           0 :             szHostBuffer = sizeof(cl_int); // a dummy small value
     832             :                                            // Marshal as a buffer of NANs
     833             :         mpClmem = clCreateBuffer(kEnv.mpkContext,
     834             :             (cl_mem_flags)CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
     835           0 :             szHostBuffer, NULL, &err);
     836           0 :         if (CL_SUCCESS != err)
     837           0 :             throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
     838             :         SAL_INFO("sc.opencl", "Created buffer " << mpClmem << " size " << szHostBuffer);
     839             : 
     840             :         pHashBuffer = static_cast<cl_uint*>(clEnqueueMapBuffer(
     841             :             kEnv.mpkCmdQueue, mpClmem, CL_TRUE, CL_MAP_WRITE, 0,
     842           0 :             szHostBuffer, 0, NULL, NULL, &err));
     843           0 :         if (CL_SUCCESS != err)
     844           0 :             throw OpenCLError("clEnqueueMapBuffer", err, __FILE__, __LINE__);
     845             : 
     846           0 :         for (size_t i = 0; i < szHostBuffer / sizeof(cl_int); i++)
     847           0 :             pHashBuffer[i] = 0;
     848             :     }
     849             :     err = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue, mpClmem,
     850           0 :         pHashBuffer, 0, NULL, NULL);
     851           0 :     if (CL_SUCCESS != err)
     852           0 :         throw OpenCLError("clEnqueueUnmapMemObject", err, __FILE__, __LINE__);
     853             : 
     854             :     SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_mem: " << mpClmem);
     855           0 :     err = clSetKernelArg(k, argno, sizeof(cl_mem), static_cast<void*>(&mpClmem));
     856           0 :     if (CL_SUCCESS != err)
     857           0 :         throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
     858           0 :     return 1;
     859             : }
     860             : 
     861             : /// A mixed string/numberic vector
     862           0 : class DynamicKernelMixedArgument : public VectorRef
     863             : {
     864             : public:
     865           0 :     DynamicKernelMixedArgument( const ScCalcConfig& config, const std::string& s,
     866             :         FormulaTreeNodeRef ft ) :
     867           0 :         VectorRef(config, s, ft), mStringArgument(config, s + "s", ft) { }
     868           0 :     virtual void GenSlidingWindowDecl( std::stringstream& ss ) const SAL_OVERRIDE
     869             :     {
     870           0 :         VectorRef::GenSlidingWindowDecl(ss);
     871           0 :         ss << ", ";
     872           0 :         mStringArgument.GenSlidingWindowDecl(ss);
     873           0 :     }
     874           0 :     virtual bool IsMixedArgument() const SAL_OVERRIDE { return true;}
     875           0 :     virtual void GenSlidingWindowFunction( std::stringstream& ) SAL_OVERRIDE { }
     876             :     /// Generate declaration
     877           0 :     virtual void GenDecl( std::stringstream& ss ) const SAL_OVERRIDE
     878             :     {
     879           0 :         VectorRef::GenDecl(ss);
     880           0 :         ss << ", ";
     881           0 :         mStringArgument.GenDecl(ss);
     882           0 :     }
     883           0 :     virtual void GenDeclRef( std::stringstream& ss ) const SAL_OVERRIDE
     884             :     {
     885           0 :         VectorRef::GenDeclRef(ss);
     886           0 :         ss << ",";
     887           0 :         mStringArgument.GenDeclRef(ss);
     888           0 :     }
     889           0 :     virtual void GenNumDeclRef( std::stringstream& ss ) const SAL_OVERRIDE
     890             :     {
     891           0 :         VectorRef::GenSlidingWindowDecl(ss);
     892           0 :     }
     893           0 :     virtual void GenStringDeclRef( std::stringstream& ss ) const SAL_OVERRIDE
     894             :     {
     895           0 :         mStringArgument.GenSlidingWindowDecl(ss);
     896           0 :     }
     897           0 :     virtual std::string GenSlidingWindowDeclRef( bool nested ) const SAL_OVERRIDE
     898             :     {
     899           0 :         std::stringstream ss;
     900           0 :         ss << "(!isNan(" << VectorRef::GenSlidingWindowDeclRef();
     901           0 :         ss << ")?" << VectorRef::GenSlidingWindowDeclRef();
     902           0 :         ss << ":" << mStringArgument.GenSlidingWindowDeclRef(nested);
     903           0 :         ss << ")";
     904           0 :         return ss.str();
     905             :     }
     906           0 :     virtual std::string GenDoubleSlidingWindowDeclRef( bool = false ) const SAL_OVERRIDE
     907             :     {
     908           0 :         std::stringstream ss;
     909           0 :         ss << VectorRef::GenSlidingWindowDeclRef();
     910           0 :         return ss.str();
     911             :     }
     912           0 :     virtual std::string GenStringSlidingWindowDeclRef( bool = false ) const SAL_OVERRIDE
     913             :     {
     914           0 :         std::stringstream ss;
     915           0 :         ss << mStringArgument.GenSlidingWindowDeclRef();
     916           0 :         return ss.str();
     917             :     }
     918           0 :     virtual size_t Marshal( cl_kernel k, int argno, int vw, cl_program p ) SAL_OVERRIDE
     919             :     {
     920           0 :         int i = VectorRef::Marshal(k, argno, vw, p);
     921           0 :         i += mStringArgument.Marshal(k, argno + i, vw, p);
     922           0 :         return i;
     923             :     }
     924             : 
     925             : protected:
     926             :     DynamicKernelStringArgument mStringArgument;
     927             : };
     928             : 
     929             : /// Handling a Double Vector that is used as a sliding window input
     930             : /// to either a sliding window average or sum-of-products
     931             : /// Generate a sequential loop for reductions
     932             : class OpAverage;
     933             : class OpCount;
     934             : 
     935             : template<class Base>
     936             : class DynamicKernelSlidingArgument : public Base
     937             : {
     938             : public:
     939           0 :     DynamicKernelSlidingArgument( const ScCalcConfig& config, const std::string& s,
     940             :         FormulaTreeNodeRef ft, boost::shared_ptr<SlidingFunctionBase>& CodeGen,
     941             :         int index = 0 ) :
     942           0 :         Base(config, s, ft, index), mpCodeGen(CodeGen), mpClmem2(NULL)
     943             :     {
     944           0 :         FormulaToken* t = ft->GetFormulaToken();
     945           0 :         if (t->GetType() != formula::svDoubleVectorRef)
     946           0 :             throw Unhandled();
     947           0 :         mpDVR = static_cast<const formula::DoubleVectorRefToken*>(t);
     948           0 :         bIsStartFixed = mpDVR->IsStartFixed();
     949           0 :         bIsEndFixed = mpDVR->IsEndFixed();
     950           0 :     }
     951             :     // Should only be called by SumIfs. Yikes!
     952           0 :     virtual bool NeedParallelReduction() const
     953             :     {
     954             :         assert(dynamic_cast<OpSumIfs*>(mpCodeGen.get()));
     955           0 :         return GetWindowSize() > 100 &&
     956           0 :                ((GetStartFixed() && GetEndFixed()) ||
     957           0 :             (!GetStartFixed() && !GetEndFixed()));
     958             :     }
     959           0 :     virtual void GenSlidingWindowFunction( std::stringstream& ) { }
     960             : 
     961           0 :     virtual std::string GenSlidingWindowDeclRef( bool nested = false ) const
     962             :     {
     963           0 :         size_t nArrayLength = mpDVR->GetArrayLength();
     964           0 :         std::stringstream ss;
     965           0 :         if (!bIsStartFixed && !bIsEndFixed)
     966             :         {
     967           0 :             if (nested)
     968           0 :                 ss << "((i+gid0) <" << nArrayLength << "?";
     969           0 :             ss << Base::GetName() << "[i + gid0]";
     970           0 :             if (nested)
     971           0 :                 ss << ":NAN)";
     972             :         }
     973             :         else
     974             :         {
     975           0 :             if (nested)
     976           0 :                 ss << "(i <" << nArrayLength << "?";
     977           0 :             ss << Base::GetName() << "[i]";
     978           0 :             if (nested)
     979           0 :                 ss << ":NAN)";
     980             :         }
     981           0 :         return ss.str();
     982             :     }
     983             :     /// Controls how the elements in the DoubleVectorRef are traversed
     984           0 :     virtual size_t GenReductionLoopHeader(
     985             :         std::stringstream& ss, bool& needBody )
     986             :     {
     987             :         assert(mpDVR);
     988           0 :         size_t nCurWindowSize = mpDVR->GetRefRowSize();
     989             : 
     990             :         {
     991           0 :             if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
     992             :             {
     993           0 :                 ss << "for (int i = ";
     994           0 :                 ss << "gid0; i < " << mpDVR->GetArrayLength();
     995           0 :                 ss << " && i < " << nCurWindowSize  << "; i++){\n\t\t";
     996           0 :                 needBody = true;
     997           0 :                 return nCurWindowSize;
     998             :             }
     999           0 :             else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
    1000             :             {
    1001           0 :                 ss << "for (int i = ";
    1002           0 :                 ss << "0; i < " << mpDVR->GetArrayLength();
    1003           0 :                 ss << " && i < gid0+" << nCurWindowSize << "; i++){\n\t\t";
    1004           0 :                 needBody = true;
    1005           0 :                 return nCurWindowSize;
    1006             :             }
    1007           0 :             else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
    1008             :             {
    1009           0 :                 ss << "tmpBottom = " << mpCodeGen->GetBottom() << ";\n\t";
    1010           0 :                 ss << "{int i;\n\t";
    1011           0 :                 std::stringstream temp1, temp2;
    1012           0 :                 int outLoopSize = UNROLLING_FACTOR;
    1013           0 :                 if (nCurWindowSize / outLoopSize != 0)
    1014             :                 {
    1015           0 :                     ss << "for(int outLoop=0; outLoop<" << nCurWindowSize / outLoopSize << "; outLoop++){\n\t";
    1016           0 :                     for (int count = 0; count < outLoopSize; count++)
    1017             :                     {
    1018           0 :                         ss << "i = outLoop*" << outLoopSize << "+" << count << ";\n\t";
    1019           0 :                         if (count == 0)
    1020             :                         {
    1021           0 :                             temp1 << "if(i + gid0 < " << mpDVR->GetArrayLength();
    1022           0 :                             temp1 << "){\n\t\t";
    1023           0 :                             temp1 << "tmp = legalize(";
    1024           0 :                             temp1 <<  mpCodeGen->Gen2(GenSlidingWindowDeclRef(), "tmp");
    1025           0 :                             temp1 << ", tmp);\n\t\t\t";
    1026           0 :                             temp1 << "}\n\t";
    1027             :                         }
    1028           0 :                         ss << temp1.str();
    1029             :                     }
    1030           0 :                     ss << "}\n\t";
    1031             :                 }
    1032             :                 // The residual of mod outLoopSize
    1033           0 :                 for (size_t count = nCurWindowSize / outLoopSize * outLoopSize; count < nCurWindowSize; count++)
    1034             :                 {
    1035           0 :                     ss << "i = " << count << ";\n\t";
    1036           0 :                     if (count == nCurWindowSize / outLoopSize * outLoopSize)
    1037             :                     {
    1038           0 :                         temp2 << "if(i + gid0 < " << mpDVR->GetArrayLength();
    1039           0 :                         temp2 << "){\n\t\t";
    1040           0 :                         temp2 << "tmp = legalize(";
    1041           0 :                         temp2 << mpCodeGen->Gen2(GenSlidingWindowDeclRef(), "tmp");
    1042           0 :                         temp2 << ", tmp);\n\t\t\t";
    1043           0 :                         temp2 << "}\n\t";
    1044             :                     }
    1045           0 :                     ss << temp2.str();
    1046             :                 }
    1047           0 :                 ss << "}\n";
    1048           0 :                 needBody = false;
    1049           0 :                 return nCurWindowSize;
    1050             :             }
    1051             :             // (mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
    1052             :             else
    1053             :             {
    1054           0 :                 ss << "\n\t";
    1055           0 :                 ss << "tmpBottom = " << mpCodeGen->GetBottom() << ";\n\t";
    1056           0 :                 ss << "{int i;\n\t";
    1057           0 :                 std::stringstream temp1, temp2;
    1058           0 :                 int outLoopSize = UNROLLING_FACTOR;
    1059           0 :                 if (nCurWindowSize / outLoopSize != 0)
    1060             :                 {
    1061           0 :                     ss << "for(int outLoop=0; outLoop<" << nCurWindowSize / outLoopSize << "; outLoop++){\n\t";
    1062           0 :                     for (int count = 0; count < outLoopSize; count++)
    1063             :                     {
    1064           0 :                         ss << "i = outLoop*" << outLoopSize << "+" << count << ";\n\t";
    1065           0 :                         if (count == 0)
    1066             :                         {
    1067           0 :                             temp1 << "tmp = legalize(";
    1068           0 :                             temp1 << mpCodeGen->Gen2(GenSlidingWindowDeclRef(), "tmp");
    1069           0 :                             temp1 << ", tmp);\n\t\t\t";
    1070             :                         }
    1071           0 :                         ss << temp1.str();
    1072             :                     }
    1073           0 :                     ss << "}\n\t";
    1074             :                 }
    1075             :                 // The residual of mod outLoopSize
    1076           0 :                 for (size_t count = nCurWindowSize / outLoopSize * outLoopSize; count < nCurWindowSize; count++)
    1077             :                 {
    1078           0 :                     ss << "i = " << count << ";\n\t";
    1079           0 :                     if (count == nCurWindowSize / outLoopSize * outLoopSize)
    1080             :                     {
    1081           0 :                         temp2 << "tmp = legalize(";
    1082           0 :                         temp2 << mpCodeGen->Gen2(GenSlidingWindowDeclRef(), "tmp");
    1083           0 :                         temp2 << ", tmp);\n\t\t\t";
    1084             :                     }
    1085           0 :                     ss << temp2.str();
    1086             :                 }
    1087           0 :                 ss << "}\n";
    1088           0 :                 needBody = false;
    1089           0 :                 return nCurWindowSize;
    1090             :             }
    1091             :         }
    1092             :     }
    1093           0 :     ~DynamicKernelSlidingArgument()
    1094             :     {
    1095           0 :         if (mpClmem2)
    1096             :         {
    1097             :             cl_int err;
    1098           0 :             err = clReleaseMemObject(mpClmem2);
    1099             :             SAL_WARN_IF(err != CL_SUCCESS, "sc.opencl", "clReleaseMemObject failed: " << ::opencl::errorString(err));
    1100           0 :             mpClmem2 = NULL;
    1101             :         }
    1102           0 :     }
    1103             : 
    1104           0 :     size_t GetArrayLength() const { return mpDVR->GetArrayLength(); }
    1105             : 
    1106           0 :     size_t GetWindowSize() const { return mpDVR->GetRefRowSize(); }
    1107             : 
    1108           0 :     size_t GetStartFixed() const { return bIsStartFixed; }
    1109             : 
    1110           0 :     size_t GetEndFixed() const { return bIsEndFixed; }
    1111             : 
    1112             : protected:
    1113             :     bool bIsStartFixed, bIsEndFixed;
    1114             :     const formula::DoubleVectorRefToken* mpDVR;
    1115             :     // from parent nodes
    1116             :     boost::shared_ptr<SlidingFunctionBase> mpCodeGen;
    1117             :     // controls whether to invoke the reduction kernel during marshaling or not
    1118             :     cl_mem mpClmem2;
    1119             : };
    1120             : 
    1121             : /// A mixed string/numberic vector
    1122           0 : class DynamicKernelMixedSlidingArgument : public VectorRef
    1123             : {
    1124             : public:
    1125           0 :     DynamicKernelMixedSlidingArgument( const ScCalcConfig& config, const std::string& s,
    1126             :         FormulaTreeNodeRef ft, boost::shared_ptr<SlidingFunctionBase>& CodeGen,
    1127             :         int index = 0 ) :
    1128             :         VectorRef(config, s, ft),
    1129             :         mDoubleArgument(mCalcConfig, s, ft, CodeGen, index),
    1130           0 :         mStringArgument(mCalcConfig, s + "s", ft, CodeGen, index) { }
    1131           0 :     virtual void GenSlidingWindowDecl( std::stringstream& ss ) const SAL_OVERRIDE
    1132             :     {
    1133           0 :         mDoubleArgument.GenSlidingWindowDecl(ss);
    1134           0 :         ss << ", ";
    1135           0 :         mStringArgument.GenSlidingWindowDecl(ss);
    1136           0 :     }
    1137           0 :     virtual void GenSlidingWindowFunction( std::stringstream& ) SAL_OVERRIDE { }
    1138             :     /// Generate declaration
    1139           0 :     virtual void GenDecl( std::stringstream& ss ) const SAL_OVERRIDE
    1140             :     {
    1141           0 :         mDoubleArgument.GenDecl(ss);
    1142           0 :         ss << ", ";
    1143           0 :         mStringArgument.GenDecl(ss);
    1144           0 :     }
    1145           0 :     virtual void GenDeclRef( std::stringstream& ss ) const SAL_OVERRIDE
    1146             :     {
    1147           0 :         mDoubleArgument.GenDeclRef(ss);
    1148           0 :         ss << ",";
    1149           0 :         mStringArgument.GenDeclRef(ss);
    1150           0 :     }
    1151           0 :     virtual std::string GenSlidingWindowDeclRef( bool nested ) const SAL_OVERRIDE
    1152             :     {
    1153           0 :         std::stringstream ss;
    1154           0 :         ss << "(!isNan(" << mDoubleArgument.GenSlidingWindowDeclRef();
    1155           0 :         ss << ")?" << mDoubleArgument.GenSlidingWindowDeclRef();
    1156           0 :         ss << ":" << mStringArgument.GenSlidingWindowDeclRef(nested);
    1157           0 :         ss << ")";
    1158           0 :         return ss.str();
    1159             :     }
    1160           0 :     virtual bool IsMixedArgument() const SAL_OVERRIDE { return true;}
    1161           0 :     virtual std::string GenDoubleSlidingWindowDeclRef( bool = false ) const SAL_OVERRIDE
    1162             :     {
    1163           0 :         std::stringstream ss;
    1164           0 :         ss << mDoubleArgument.GenSlidingWindowDeclRef();
    1165           0 :         return ss.str();
    1166             :     }
    1167           0 :     virtual std::string GenStringSlidingWindowDeclRef( bool = false ) const SAL_OVERRIDE
    1168             :     {
    1169           0 :         std::stringstream ss;
    1170           0 :         ss << mStringArgument.GenSlidingWindowDeclRef();
    1171           0 :         return ss.str();
    1172             :     }
    1173           0 :     virtual void GenNumDeclRef( std::stringstream& ss ) const SAL_OVERRIDE
    1174             :     {
    1175           0 :         mDoubleArgument.GenDeclRef(ss);
    1176           0 :     }
    1177           0 :     virtual void GenStringDeclRef( std::stringstream& ss ) const SAL_OVERRIDE
    1178             :     {
    1179           0 :         mStringArgument.GenDeclRef(ss);
    1180           0 :     }
    1181           0 :     virtual size_t Marshal( cl_kernel k, int argno, int vw, cl_program p ) SAL_OVERRIDE
    1182             :     {
    1183           0 :         int i = mDoubleArgument.Marshal(k, argno, vw, p);
    1184           0 :         i += mStringArgument.Marshal(k, argno + i, vw, p);
    1185           0 :         return i;
    1186             :     }
    1187             : 
    1188             : protected:
    1189             :     DynamicKernelSlidingArgument<VectorRef> mDoubleArgument;
    1190             :     DynamicKernelSlidingArgument<DynamicKernelStringArgument> mStringArgument;
    1191             : };
    1192             : 
    1193             : /// Holds the symbol table for a given dynamic kernel
    1194           0 : class SymbolTable
    1195             : {
    1196             : public:
    1197             :     typedef std::map<const formula::FormulaToken*, DynamicKernelArgumentRef> ArgumentMap;
    1198             :     // This avoids instability caused by using pointer as the key type
    1199             :     typedef std::list<DynamicKernelArgumentRef> ArgumentList;
    1200           0 :     SymbolTable() : mCurId(0) { }
    1201             :     template<class T>
    1202             :     const DynamicKernelArgument* DeclRefArg( const ScCalcConfig& config, FormulaTreeNodeRef, SlidingFunctionBase* pCodeGen, int nResultSize );
    1203             :     /// Used to generate sliding window helpers
    1204           0 :     void DumpSlidingWindowFunctions( std::stringstream& ss )
    1205             :     {
    1206           0 :         for (ArgumentList::iterator it = mParams.begin(), e = mParams.end(); it != e;
    1207             :             ++it)
    1208             :         {
    1209           0 :             (*it)->GenSlidingWindowFunction(ss);
    1210           0 :             ss << "\n";
    1211             :         }
    1212           0 :     }
    1213             :     /// Memory mapping from host to device and pass buffers to the given kernel as
    1214             :     /// arguments
    1215             :     void Marshal( cl_kernel, int, cl_program );
    1216             : 
    1217             : private:
    1218             :     unsigned int mCurId;
    1219             :     ArgumentMap mSymbols;
    1220             :     ArgumentList mParams;
    1221             : };
    1222             : 
    1223           0 : void SymbolTable::Marshal( cl_kernel k, int nVectorWidth, cl_program pProgram )
    1224             : {
    1225           0 :     int i = 1; //The first argument is reserved for results
    1226           0 :     for (ArgumentList::iterator it = mParams.begin(), e = mParams.end(); it != e;
    1227             :         ++it)
    1228             :     {
    1229           0 :         i += (*it)->Marshal(k, i, nVectorWidth, pProgram);
    1230             :     }
    1231           0 : }
    1232             : 
    1233             : /// Handling a Double Vector that is used as a sliding window input
    1234             : /// Performs parallel reduction based on given operator
    1235             : template<class Base>
    1236             : class ParallelReductionVectorRef : public Base
    1237             : {
    1238             : public:
    1239           0 :     ParallelReductionVectorRef( const ScCalcConfig& config, const std::string& s,
    1240             :         FormulaTreeNodeRef ft, boost::shared_ptr<SlidingFunctionBase>& CodeGen,
    1241             :         int index = 0 ) :
    1242           0 :         Base(config, s, ft, index), mpCodeGen(CodeGen), mpClmem2(NULL)
    1243             :     {
    1244           0 :         FormulaToken* t = ft->GetFormulaToken();
    1245           0 :         if (t->GetType() != formula::svDoubleVectorRef)
    1246           0 :             throw Unhandled();
    1247           0 :         mpDVR = static_cast<const formula::DoubleVectorRefToken*>(t);
    1248           0 :         bIsStartFixed = mpDVR->IsStartFixed();
    1249           0 :         bIsEndFixed = mpDVR->IsEndFixed();
    1250           0 :     }
    1251             :     /// Emit the definition for the auxiliary reduction kernel
    1252           0 :     virtual void GenSlidingWindowFunction( std::stringstream& ss )
    1253             :     {
    1254           0 :         if (!dynamic_cast<OpAverage*>(mpCodeGen.get()))
    1255             :         {
    1256           0 :             std::string name = Base::GetName();
    1257           0 :             ss << "__kernel void " << name;
    1258           0 :             ss << "_reduction(__global double* A, "
    1259             :                 "__global double *result,int arrayLength,int windowSize){\n";
    1260           0 :             ss << "    double tmp, current_result =" <<
    1261           0 :                 mpCodeGen->GetBottom();
    1262           0 :             ss << ";\n";
    1263           0 :             ss << "    int writePos = get_group_id(1);\n";
    1264           0 :             ss << "    int lidx = get_local_id(0);\n";
    1265           0 :             ss << "    __local double shm_buf[256];\n";
    1266           0 :             if (mpDVR->IsStartFixed())
    1267           0 :                 ss << "    int offset = 0;\n";
    1268             :             else // if (!mpDVR->IsStartFixed())
    1269           0 :                 ss << "    int offset = get_group_id(1);\n";
    1270           0 :             if (mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
    1271           0 :                 ss << "    int end = windowSize;\n";
    1272           0 :             else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
    1273           0 :                 ss << "    int end = offset + windowSize;\n";
    1274           0 :             else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
    1275           0 :                 ss << "    int end = windowSize + get_group_id(1);\n";
    1276           0 :             else if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
    1277           0 :                 ss << "    int end = windowSize;\n";
    1278           0 :             ss << "    end = min(end, arrayLength);\n";
    1279             : 
    1280           0 :             ss << "    barrier(CLK_LOCAL_MEM_FENCE);\n";
    1281           0 :             ss << "    int loop = arrayLength/512 + 1;\n";
    1282           0 :             ss << "    for (int l=0; l<loop; l++){\n";
    1283           0 :             ss << "    tmp = " << mpCodeGen->GetBottom() << ";\n";
    1284           0 :             ss << "    int loopOffset = l*512;\n";
    1285           0 :             ss << "    if((loopOffset + lidx + offset + 256) < end) {\n";
    1286           0 :             ss << "        tmp = legalize(" << mpCodeGen->Gen2(
    1287           0 :                 "A[loopOffset + lidx + offset]", "tmp") << ", tmp);\n";
    1288           0 :             ss << "        tmp = legalize(" << mpCodeGen->Gen2(
    1289           0 :                 "A[loopOffset + lidx + offset + 256]", "tmp") << ", tmp);\n";
    1290           0 :             ss << "    } else if ((loopOffset + lidx + offset) < end)\n";
    1291           0 :             ss << "        tmp = legalize(" << mpCodeGen->Gen2(
    1292           0 :                 "A[loopOffset + lidx + offset]", "tmp") << ", tmp);\n";
    1293           0 :             ss << "    shm_buf[lidx] = tmp;\n";
    1294           0 :             ss << "    barrier(CLK_LOCAL_MEM_FENCE);\n";
    1295           0 :             ss << "    for (int i = 128; i >0; i/=2) {\n";
    1296           0 :             ss << "        if (lidx < i)\n";
    1297           0 :             ss << "            shm_buf[lidx] = ";
    1298             :             // Special case count
    1299           0 :             if (dynamic_cast<OpCount*>(mpCodeGen.get()))
    1300           0 :                 ss << "shm_buf[lidx] + shm_buf[lidx + i];\n";
    1301             :             else
    1302           0 :                 ss << mpCodeGen->Gen2("shm_buf[lidx]", "shm_buf[lidx + i]") << ";\n";
    1303           0 :             ss << "        barrier(CLK_LOCAL_MEM_FENCE);\n";
    1304           0 :             ss << "    }\n";
    1305           0 :             ss << "        if (lidx == 0)\n";
    1306           0 :             ss << "            current_result =";
    1307           0 :             if (dynamic_cast<OpCount*>(mpCodeGen.get()))
    1308           0 :                 ss << "current_result + shm_buf[0]";
    1309             :             else
    1310           0 :                 ss << mpCodeGen->Gen2("current_result", "shm_buf[0]");
    1311           0 :             ss << ";\n";
    1312           0 :             ss << "        barrier(CLK_LOCAL_MEM_FENCE);\n";
    1313           0 :             ss << "    }\n";
    1314           0 :             ss << "    if (lidx == 0)\n";
    1315           0 :             ss << "        result[writePos] = current_result;\n";
    1316           0 :             ss << "}\n";
    1317             :         }
    1318             :         else
    1319             :         {
    1320           0 :             std::string name = Base::GetName();
    1321             :             /*sum reduction*/
    1322           0 :             ss << "__kernel void " << name << "_sum";
    1323           0 :             ss << "_reduction(__global double* A, "
    1324             :                 "__global double *result,int arrayLength,int windowSize){\n";
    1325           0 :             ss << "    double tmp, current_result =" <<
    1326           0 :                 mpCodeGen->GetBottom();
    1327           0 :             ss << ";\n";
    1328           0 :             ss << "    int writePos = get_group_id(1);\n";
    1329           0 :             ss << "    int lidx = get_local_id(0);\n";
    1330           0 :             ss << "    __local double shm_buf[256];\n";
    1331           0 :             if (mpDVR->IsStartFixed())
    1332           0 :                 ss << "    int offset = 0;\n";
    1333             :             else // if (!mpDVR->IsStartFixed())
    1334           0 :                 ss << "    int offset = get_group_id(1);\n";
    1335           0 :             if (mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
    1336           0 :                 ss << "    int end = windowSize;\n";
    1337           0 :             else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
    1338           0 :                 ss << "    int end = offset + windowSize;\n";
    1339           0 :             else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
    1340           0 :                 ss << "    int end = windowSize + get_group_id(1);\n";
    1341           0 :             else if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
    1342           0 :                 ss << "    int end = windowSize;\n";
    1343           0 :             ss << "    end = min(end, arrayLength);\n";
    1344           0 :             ss << "    barrier(CLK_LOCAL_MEM_FENCE);\n";
    1345           0 :             ss << "    int loop = arrayLength/512 + 1;\n";
    1346           0 :             ss << "    for (int l=0; l<loop; l++){\n";
    1347           0 :             ss << "    tmp = " << mpCodeGen->GetBottom() << ";\n";
    1348           0 :             ss << "    int loopOffset = l*512;\n";
    1349           0 :             ss << "    if((loopOffset + lidx + offset + 256) < end) {\n";
    1350           0 :             ss << "        tmp = legalize(";
    1351           0 :             ss << "(A[loopOffset + lidx + offset]+ tmp)";
    1352           0 :             ss << ", tmp);\n";
    1353           0 :             ss << "        tmp = legalize((A[loopOffset + lidx + offset + 256]+ tmp)";
    1354           0 :             ss << ", tmp);\n";
    1355           0 :             ss << "    } else if ((loopOffset + lidx + offset) < end)\n";
    1356           0 :             ss << "        tmp = legalize((A[loopOffset + lidx + offset] + tmp)";
    1357           0 :             ss << ", tmp);\n";
    1358           0 :             ss << "    shm_buf[lidx] = tmp;\n";
    1359           0 :             ss << "    barrier(CLK_LOCAL_MEM_FENCE);\n";
    1360           0 :             ss << "    for (int i = 128; i >0; i/=2) {\n";
    1361           0 :             ss << "        if (lidx < i)\n";
    1362           0 :             ss << "            shm_buf[lidx] = ";
    1363           0 :             ss << "shm_buf[lidx] + shm_buf[lidx + i];\n";
    1364           0 :             ss << "        barrier(CLK_LOCAL_MEM_FENCE);\n";
    1365           0 :             ss << "    }\n";
    1366           0 :             ss << "        if (lidx == 0)\n";
    1367           0 :             ss << "            current_result =";
    1368           0 :             ss << "current_result + shm_buf[0]";
    1369           0 :             ss << ";\n";
    1370           0 :             ss << "        barrier(CLK_LOCAL_MEM_FENCE);\n";
    1371           0 :             ss << "    }\n";
    1372           0 :             ss << "    if (lidx == 0)\n";
    1373           0 :             ss << "        result[writePos] = current_result;\n";
    1374           0 :             ss << "}\n";
    1375             :             /*count reduction*/
    1376           0 :             ss << "__kernel void " << name << "_count";
    1377           0 :             ss << "_reduction(__global double* A, "
    1378             :                 "__global double *result,int arrayLength,int windowSize){\n";
    1379           0 :             ss << "    double tmp, current_result =" <<
    1380           0 :                 mpCodeGen->GetBottom();
    1381           0 :             ss << ";\n";
    1382           0 :             ss << "    int writePos = get_group_id(1);\n";
    1383           0 :             ss << "    int lidx = get_local_id(0);\n";
    1384           0 :             ss << "    __local double shm_buf[256];\n";
    1385           0 :             if (mpDVR->IsStartFixed())
    1386           0 :                 ss << "    int offset = 0;\n";
    1387             :             else // if (!mpDVR->IsStartFixed())
    1388           0 :                 ss << "    int offset = get_group_id(1);\n";
    1389           0 :             if (mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
    1390           0 :                 ss << "    int end = windowSize;\n";
    1391           0 :             else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
    1392           0 :                 ss << "    int end = offset + windowSize;\n";
    1393           0 :             else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
    1394           0 :                 ss << "    int end = windowSize + get_group_id(1);\n";
    1395           0 :             else if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
    1396           0 :                 ss << "    int end = windowSize;\n";
    1397           0 :             ss << "    end = min(end, arrayLength);\n";
    1398           0 :             ss << "    barrier(CLK_LOCAL_MEM_FENCE);\n";
    1399           0 :             ss << "    int loop = arrayLength/512 + 1;\n";
    1400           0 :             ss << "    for (int l=0; l<loop; l++){\n";
    1401           0 :             ss << "    tmp = " << mpCodeGen->GetBottom() << ";\n";
    1402           0 :             ss << "    int loopOffset = l*512;\n";
    1403           0 :             ss << "    if((loopOffset + lidx + offset + 256) < end) {\n";
    1404           0 :             ss << "        tmp = legalize((isNan(A[loopOffset + lidx + offset])?tmp:tmp+1.0)";
    1405           0 :             ss << ", tmp);\n";
    1406           0 :             ss << "        tmp = legalize((isNan(A[loopOffset + lidx + offset+256])?tmp:tmp+1.0)";
    1407           0 :             ss << ", tmp);\n";
    1408           0 :             ss << "    } else if ((loopOffset + lidx + offset) < end)\n";
    1409           0 :             ss << "        tmp = legalize((isNan(A[loopOffset + lidx + offset])?tmp:tmp+1.0)";
    1410           0 :             ss << ", tmp);\n";
    1411           0 :             ss << "    shm_buf[lidx] = tmp;\n";
    1412           0 :             ss << "    barrier(CLK_LOCAL_MEM_FENCE);\n";
    1413           0 :             ss << "    for (int i = 128; i >0; i/=2) {\n";
    1414           0 :             ss << "        if (lidx < i)\n";
    1415           0 :             ss << "            shm_buf[lidx] = ";
    1416           0 :             ss << "shm_buf[lidx] + shm_buf[lidx + i];\n";
    1417           0 :             ss << "        barrier(CLK_LOCAL_MEM_FENCE);\n";
    1418           0 :             ss << "    }\n";
    1419           0 :             ss << "        if (lidx == 0)\n";
    1420           0 :             ss << "            current_result =";
    1421           0 :             ss << "current_result + shm_buf[0];";
    1422           0 :             ss << ";\n";
    1423           0 :             ss << "        barrier(CLK_LOCAL_MEM_FENCE);\n";
    1424           0 :             ss << "    }\n";
    1425           0 :             ss << "    if (lidx == 0)\n";
    1426           0 :             ss << "        result[writePos] = current_result;\n";
    1427           0 :             ss << "}\n";
    1428             :         }
    1429             : 
    1430           0 :     }
    1431           0 :     virtual std::string GenSlidingWindowDeclRef( bool = false ) const
    1432             :     {
    1433           0 :         std::stringstream ss;
    1434           0 :         if (!bIsStartFixed && !bIsEndFixed)
    1435           0 :             ss << Base::GetName() << "[i + gid0]";
    1436             :         else
    1437           0 :             ss << Base::GetName() << "[i]";
    1438           0 :         return ss.str();
    1439             :     }
    1440             :     /// Controls how the elements in the DoubleVectorRef are traversed
    1441           0 :     virtual size_t GenReductionLoopHeader(
    1442             :         std::stringstream& ss, int nResultSize, bool& needBody )
    1443             :     {
    1444             :         assert(mpDVR);
    1445           0 :         size_t nCurWindowSize = mpDVR->GetRefRowSize();
    1446           0 :         std::string temp = Base::GetName() + "[gid0]";
    1447           0 :         ss << "tmp = ";
    1448             :         // Special case count
    1449           0 :         if (dynamic_cast<OpAverage*>(mpCodeGen.get()))
    1450             :         {
    1451           0 :             ss << mpCodeGen->Gen2(temp, "tmp") << ";\n";
    1452           0 :             ss << "nCount = nCount-1;\n";
    1453           0 :             ss << "nCount = nCount +"; /*re-assign nCount from count reduction*/
    1454           0 :             ss << Base::GetName() << "[gid0+" << nResultSize << "]" << ";\n";
    1455             :         }
    1456           0 :         else if (dynamic_cast<OpCount*>(mpCodeGen.get()))
    1457           0 :             ss << temp << "+ tmp";
    1458             :         else
    1459           0 :             ss << mpCodeGen->Gen2(temp, "tmp");
    1460           0 :         ss << ";\n\t";
    1461           0 :         needBody = false;
    1462           0 :         return nCurWindowSize;
    1463             :     }
    1464             : 
    1465           0 :     virtual size_t Marshal( cl_kernel k, int argno, int w, cl_program mpProgram )
    1466             :     {
    1467             :         assert(Base::mpClmem == NULL);
    1468             :         // Obtain cl context
    1469             :         ::opencl::KernelEnv kEnv;
    1470           0 :         ::opencl::setKernelEnv(&kEnv);
    1471             :         cl_int err;
    1472           0 :         size_t nInput = mpDVR->GetArrayLength();
    1473           0 :         size_t nCurWindowSize = mpDVR->GetRefRowSize();
    1474             :         // create clmem buffer
    1475           0 :         if (mpDVR->GetArrays()[Base::mnIndex].mpNumericArray == NULL)
    1476           0 :             throw Unhandled();
    1477             :         double* pHostBuffer = const_cast<double*>(
    1478           0 :             mpDVR->GetArrays()[Base::mnIndex].mpNumericArray);
    1479           0 :         size_t szHostBuffer = nInput * sizeof(double);
    1480           0 :         Base::mpClmem = clCreateBuffer(kEnv.mpkContext,
    1481             :             (cl_mem_flags)CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
    1482             :             szHostBuffer,
    1483           0 :             pHostBuffer, &err);
    1484             :         SAL_INFO("sc.opencl", "Created buffer " << Base::mpClmem << " size " << nInput << "*" << sizeof(double) << "=" << szHostBuffer << " using host buffer " << pHostBuffer);
    1485             : 
    1486           0 :         mpClmem2 = clCreateBuffer(kEnv.mpkContext,
    1487             :             CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
    1488           0 :             sizeof(double) * w, NULL, NULL);
    1489           0 :         if (CL_SUCCESS != err)
    1490           0 :             throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
    1491             :         SAL_INFO("sc.opencl", "Created buffer " << mpClmem2 << " size " << sizeof(double) << "*" << w << "=" << (sizeof(double)*w));
    1492             : 
    1493             :         // reproduce the reduction function name
    1494           0 :         std::string kernelName;
    1495           0 :         if (!dynamic_cast<OpAverage*>(mpCodeGen.get()))
    1496           0 :             kernelName = Base::GetName() + "_reduction";
    1497             :         else
    1498           0 :             kernelName = Base::GetName() + "_sum_reduction";
    1499           0 :         cl_kernel redKernel = clCreateKernel(mpProgram, kernelName.c_str(), &err);
    1500           0 :         if (err != CL_SUCCESS)
    1501           0 :             throw OpenCLError("clCreateKernel", err, __FILE__, __LINE__);
    1502             :         SAL_INFO("sc.opencl", "Created kernel " << redKernel << " with name " << kernelName << " in program " << mpProgram);
    1503             : 
    1504             :         // set kernel arg of reduction kernel
    1505             :         // TODO(Wei Wei): use unique name for kernel
    1506           0 :         cl_mem buf = Base::GetCLBuffer();
    1507             :         SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 0 << ": cl_mem: " << buf);
    1508           0 :         err = clSetKernelArg(redKernel, 0, sizeof(cl_mem),
    1509           0 :             static_cast<void*>(&buf));
    1510           0 :         if (CL_SUCCESS != err)
    1511           0 :             throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
    1512             : 
    1513             :         SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 1 << ": cl_mem: " << mpClmem2);
    1514           0 :         err = clSetKernelArg(redKernel, 1, sizeof(cl_mem), &mpClmem2);
    1515           0 :         if (CL_SUCCESS != err)
    1516           0 :             throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
    1517             : 
    1518             :         SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 2 << ": cl_int: " << nInput);
    1519           0 :         err = clSetKernelArg(redKernel, 2, sizeof(cl_int), static_cast<void*>(&nInput));
    1520           0 :         if (CL_SUCCESS != err)
    1521           0 :             throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
    1522             : 
    1523             :         SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 3 << ": cl_int: " << nCurWindowSize);
    1524           0 :         err = clSetKernelArg(redKernel, 3, sizeof(cl_int), static_cast<void*>(&nCurWindowSize));
    1525           0 :         if (CL_SUCCESS != err)
    1526           0 :             throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
    1527             : 
    1528             :         // set work group size and execute
    1529           0 :         size_t global_work_size[] = { 256, (size_t)w };
    1530           0 :         size_t local_work_size[] = { 256, 1 };
    1531             :         SAL_INFO("sc.opencl", "Enqueing kernel " << redKernel);
    1532           0 :         err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, redKernel, 2, NULL,
    1533           0 :             global_work_size, local_work_size, 0, NULL, NULL);
    1534           0 :         if (CL_SUCCESS != err)
    1535           0 :             throw OpenCLError("clEnqueueNDRangeKernel", err, __FILE__, __LINE__);
    1536           0 :         err = clFinish(kEnv.mpkCmdQueue);
    1537           0 :         if (CL_SUCCESS != err)
    1538           0 :             throw OpenCLError("clFinish", err, __FILE__, __LINE__);
    1539           0 :         if (dynamic_cast<OpAverage*>(mpCodeGen.get()))
    1540             :         {
    1541             :             /*average need more reduction kernel for count computing*/
    1542           0 :             boost::scoped_array<double> pAllBuffer(new double[2 * w]);
    1543             :             double* resbuf = static_cast<double*>(clEnqueueMapBuffer(kEnv.mpkCmdQueue,
    1544             :                 mpClmem2,
    1545             :                 CL_TRUE, CL_MAP_READ, 0,
    1546             :                 sizeof(double) * w, 0, NULL, NULL,
    1547           0 :                 &err));
    1548           0 :             if (err != CL_SUCCESS)
    1549           0 :                 throw OpenCLError("clEnqueueMapBuffer", err, __FILE__, __LINE__);
    1550             : 
    1551           0 :             for (int i = 0; i < w; i++)
    1552           0 :                 pAllBuffer[i] = resbuf[i];
    1553           0 :             err = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue, mpClmem2, resbuf, 0, NULL, NULL);
    1554           0 :             if (err != CL_SUCCESS)
    1555           0 :                 throw OpenCLError("clEnqueueUnmapMemObject", err, __FILE__, __LINE__);
    1556             : 
    1557           0 :             kernelName = Base::GetName() + "_count_reduction";
    1558           0 :             redKernel = clCreateKernel(mpProgram, kernelName.c_str(), &err);
    1559           0 :             if (err != CL_SUCCESS)
    1560           0 :                 throw OpenCLError("clCreateKernel", err, __FILE__, __LINE__);
    1561             :             SAL_INFO("sc.opencl", "Created kernel " << redKernel << " with name " << kernelName << " in program " << mpProgram);
    1562             : 
    1563             :             // set kernel arg of reduction kernel
    1564           0 :             buf = Base::GetCLBuffer();
    1565             :             SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 0 << ": cl_mem: " << buf);
    1566           0 :             err = clSetKernelArg(redKernel, 0, sizeof(cl_mem),
    1567           0 :                 static_cast<void*>(&buf));
    1568           0 :             if (CL_SUCCESS != err)
    1569           0 :                 throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
    1570             : 
    1571             :             SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 1 << ": cl_mem: " << mpClmem2);
    1572           0 :             err = clSetKernelArg(redKernel, 1, sizeof(cl_mem), &mpClmem2);
    1573           0 :             if (CL_SUCCESS != err)
    1574           0 :                 throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
    1575             : 
    1576             :             SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 2 << ": cl_int: " << nInput);
    1577           0 :             err = clSetKernelArg(redKernel, 2, sizeof(cl_int), static_cast<void*>(&nInput));
    1578           0 :             if (CL_SUCCESS != err)
    1579           0 :                 throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
    1580             : 
    1581             :             SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 3 << ": cl_int: " << nCurWindowSize);
    1582           0 :             err = clSetKernelArg(redKernel, 3, sizeof(cl_int), static_cast<void*>(&nCurWindowSize));
    1583           0 :             if (CL_SUCCESS != err)
    1584           0 :                 throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
    1585             : 
    1586             :             // set work group size and execute
    1587           0 :             size_t global_work_size1[] = { 256, (size_t)w };
    1588           0 :             size_t local_work_size1[] = { 256, 1 };
    1589             :             SAL_INFO("sc.opencl", "Enqueing kernel " << redKernel);
    1590           0 :             err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, redKernel, 2, NULL,
    1591           0 :                 global_work_size1, local_work_size1, 0, NULL, NULL);
    1592           0 :             if (CL_SUCCESS != err)
    1593           0 :                 throw OpenCLError("clEnqueueNDRangeKernel", err, __FILE__, __LINE__);
    1594           0 :             err = clFinish(kEnv.mpkCmdQueue);
    1595           0 :             if (CL_SUCCESS != err)
    1596           0 :                 throw OpenCLError("clFinish", err, __FILE__, __LINE__);
    1597           0 :             resbuf = static_cast<double*>(clEnqueueMapBuffer(kEnv.mpkCmdQueue,
    1598             :                 mpClmem2,
    1599             :                 CL_TRUE, CL_MAP_READ, 0,
    1600             :                 sizeof(double) * w, 0, NULL, NULL,
    1601           0 :                 &err));
    1602           0 :             if (err != CL_SUCCESS)
    1603           0 :                 throw OpenCLError("clEnqueueMapBuffer", err, __FILE__, __LINE__);
    1604           0 :             for (int i = 0; i < w; i++)
    1605           0 :                 pAllBuffer[i + w] = resbuf[i];
    1606           0 :             err = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue, mpClmem2, resbuf, 0, NULL, NULL);
    1607             :             // FIXME: Is it intentional to not throw an OpenCLError even if the clEnqueueUnmapMemObject() fails?
    1608           0 :             if (CL_SUCCESS != err)
    1609             :                 SAL_WARN("sc.opencl", "clEnqueueUnmapMemObject failed: " << ::opencl::errorString(err));
    1610           0 :             if (mpClmem2)
    1611             :             {
    1612           0 :                 err = clReleaseMemObject(mpClmem2);
    1613             :                 SAL_WARN_IF(err != CL_SUCCESS, "sc.opencl", "clReleaseMemObject failed: " << ::opencl::errorString(err));
    1614           0 :                 mpClmem2 = NULL;
    1615             :             }
    1616           0 :             mpClmem2 = clCreateBuffer(kEnv.mpkContext,
    1617             :                 (cl_mem_flags)CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
    1618           0 :                 w * sizeof(double) * 2, pAllBuffer.get(), &err);
    1619           0 :             if (CL_SUCCESS != err)
    1620           0 :                 throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
    1621           0 :             SAL_INFO("sc.opencl", "Created buffer " << mpClmem2 << " size " << w << "*" << sizeof(double) << "=" << (w*sizeof(double)) << " copying host buffer " << pAllBuffer.get());
    1622             :         }
    1623             :         // set kernel arg
    1624             :         SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_mem: " << mpClmem2);
    1625           0 :         err = clSetKernelArg(k, argno, sizeof(cl_mem), &(mpClmem2));
    1626           0 :         if (CL_SUCCESS != err)
    1627           0 :             throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
    1628           0 :         return 1;
    1629             :     }
    1630           0 :     ~ParallelReductionVectorRef()
    1631             :     {
    1632           0 :         if (mpClmem2)
    1633             :         {
    1634             :             cl_int err;
    1635           0 :             err = clReleaseMemObject(mpClmem2);
    1636             :             SAL_WARN_IF(err != CL_SUCCESS, "sc.opencl", "clReleaseMemObject failed: " << ::opencl::errorString(err));
    1637           0 :             mpClmem2 = NULL;
    1638             :         }
    1639           0 :     }
    1640             : 
    1641             :     size_t GetArrayLength() const { return mpDVR->GetArrayLength(); }
    1642             : 
    1643           0 :     size_t GetWindowSize() const { return mpDVR->GetRefRowSize(); }
    1644             : 
    1645             :     size_t GetStartFixed() const { return bIsStartFixed; }
    1646             : 
    1647             :     size_t GetEndFixed() const { return bIsEndFixed; }
    1648             : 
    1649             : protected:
    1650             :     bool bIsStartFixed, bIsEndFixed;
    1651             :     const formula::DoubleVectorRefToken* mpDVR;
    1652             :     // from parent nodes
    1653             :     boost::shared_ptr<SlidingFunctionBase> mpCodeGen;
    1654             :     // controls whether to invoke the reduction kernel during marshaling or not
    1655             :     cl_mem mpClmem2;
    1656             : };
    1657             : 
    1658           0 : class Reduction : public SlidingFunctionBase
    1659             : {
    1660             :     int mnResultSize;
    1661             : public:
    1662           0 :     Reduction( int nResultSize ) : mnResultSize(nResultSize) {}
    1663             : 
    1664             :     typedef DynamicKernelSlidingArgument<VectorRef> NumericRange;
    1665             :     typedef DynamicKernelSlidingArgument<DynamicKernelStringArgument> StringRange;
    1666             :     typedef ParallelReductionVectorRef<VectorRef> ParallelNumericRange;
    1667             : 
    1668           0 :     virtual bool HandleNaNArgument( std::stringstream&, unsigned, SubArguments& ) const
    1669             :     {
    1670           0 :         return false;
    1671             :     }
    1672             : 
    1673           0 :     virtual void GenSlidingWindowFunction( std::stringstream& ss,
    1674             :         const std::string& sSymName, SubArguments& vSubArguments ) SAL_OVERRIDE
    1675             :     {
    1676           0 :         ss << "\ndouble " << sSymName;
    1677           0 :         ss << "_" << BinFuncName() << "(";
    1678           0 :         for (size_t i = 0; i < vSubArguments.size(); i++)
    1679             :         {
    1680           0 :             if (i)
    1681           0 :                 ss << ", ";
    1682           0 :             vSubArguments[i]->GenSlidingWindowDecl(ss);
    1683             :         }
    1684           0 :         ss << ") {\n";
    1685           0 :         ss << "double tmp = " << GetBottom() << ";\n";
    1686           0 :         ss << "int gid0 = get_global_id(0);\n";
    1687           0 :         if (isAverage())
    1688           0 :             ss << "int nCount = 0;\n";
    1689           0 :         ss << "double tmpBottom;\n";
    1690           0 :         unsigned i = vSubArguments.size();
    1691           0 :         while (i--)
    1692             :         {
    1693           0 :             if (NumericRange* NR =
    1694           0 :                 dynamic_cast<NumericRange*>(vSubArguments[i].get()))
    1695             :             {
    1696           0 :                 bool needBody; NR->GenReductionLoopHeader(ss, needBody); if (!needBody)
    1697           0 :                     continue;
    1698             :             }
    1699           0 :             else if (ParallelNumericRange* PNR =
    1700           0 :                 dynamic_cast<ParallelNumericRange*>(vSubArguments[i].get()))
    1701             :             {
    1702             :                 //did not handle yet
    1703           0 :                 bool bNeedBody = false;
    1704           0 :                 PNR->GenReductionLoopHeader(ss, mnResultSize, bNeedBody);
    1705           0 :                 if (!bNeedBody)
    1706           0 :                     continue;
    1707             :             }
    1708           0 :             else if (StringRange* SR =
    1709           0 :                 dynamic_cast<StringRange*>(vSubArguments[i].get()))
    1710             :             {
    1711             :                 //did not handle yet
    1712             :                 bool needBody;
    1713           0 :                 SR->GenReductionLoopHeader(ss, needBody);
    1714           0 :                 if (!needBody)
    1715           0 :                     continue;
    1716             :             }
    1717             :             else
    1718             :             {
    1719           0 :                 FormulaToken* pCur = vSubArguments[i]->GetFormulaToken();
    1720             :                 assert(pCur);
    1721             :                 assert(pCur->GetType() != formula::svDoubleVectorRef);
    1722             : 
    1723           0 :                 if (pCur->GetType() == formula::svSingleVectorRef)
    1724             :                 {
    1725             :                     const formula::SingleVectorRefToken* pSVR =
    1726           0 :                         static_cast<const formula::SingleVectorRefToken*>(pCur);
    1727           0 :                     ss << "if (gid0 < " << pSVR->GetArrayLength() << "){\n";
    1728             :                 }
    1729           0 :                 else if (pCur->GetType() == formula::svDouble)
    1730             :                 {
    1731           0 :                     ss << "{\n";
    1732             :                 }
    1733             :             }
    1734           0 :             if (ocPush == vSubArguments[i]->GetFormulaToken()->GetOpCode())
    1735             :             {
    1736           0 :                 bool bNanHandled = HandleNaNArgument(ss, i, vSubArguments);
    1737             : 
    1738           0 :                 ss << "tmpBottom = " << GetBottom() << ";\n";
    1739             : 
    1740           0 :                 if (!bNanHandled)
    1741             :                 {
    1742           0 :                     ss << "if (isNan(";
    1743           0 :                     ss << vSubArguments[i]->GenSlidingWindowDeclRef();
    1744           0 :                     ss << "))\n";
    1745           0 :                     if (ZeroReturnZero())
    1746           0 :                         ss << "    return 0;\n";
    1747             :                     else
    1748             :                     {
    1749           0 :                         ss << "    tmp = ";
    1750           0 :                         ss << Gen2("tmpBottom", "tmp") << ";\n";
    1751             :                     }
    1752           0 :                     ss << "else\n";
    1753             :                 }
    1754           0 :                 ss << "{";
    1755           0 :                 ss << "        tmp = ";
    1756           0 :                 ss << Gen2(vSubArguments[i]->GenSlidingWindowDeclRef(), "tmp");
    1757           0 :                 ss << ";\n";
    1758           0 :                 ss << "    }\n";
    1759           0 :                 ss << "}\n";
    1760           0 :                 if (vSubArguments[i]->GetFormulaToken()->GetType() ==
    1761           0 :                         formula::svSingleVectorRef && ZeroReturnZero())
    1762             :                 {
    1763           0 :                     ss << "else{\n";
    1764           0 :                     ss << "        return 0;\n";
    1765           0 :                     ss << "    }\n";
    1766             :                 }
    1767             :             }
    1768             :             else
    1769             :             {
    1770           0 :                 ss << "tmp = ";
    1771           0 :                 ss << Gen2(vSubArguments[i]->GenSlidingWindowDeclRef(), "tmp");
    1772           0 :                 ss << ";\n";
    1773             :             }
    1774             :         }
    1775           0 :         ss << "return tmp";
    1776           0 :         if (isAverage())
    1777           0 :             ss << "*pow((double)nCount,-1.0)";
    1778           0 :         ss << ";\n}";
    1779           0 :     }
    1780           0 :     virtual bool isAverage() const { return false; }
    1781           0 :     virtual bool takeString() const SAL_OVERRIDE { return false; }
    1782           0 :     virtual bool takeNumeric() const SAL_OVERRIDE { return true; }
    1783             : };
    1784             : 
    1785             : // Strictly binary operators
    1786           0 : class Binary : public SlidingFunctionBase
    1787             : {
    1788             : public:
    1789           0 :     virtual void GenSlidingWindowFunction( std::stringstream& ss,
    1790             :         const std::string& sSymName, SubArguments& vSubArguments ) SAL_OVERRIDE
    1791             :     {
    1792           0 :         ss << "\ndouble " << sSymName;
    1793           0 :         ss << "_" << BinFuncName() << "(";
    1794             :         assert(vSubArguments.size() == 2);
    1795           0 :         for (size_t i = 0; i < vSubArguments.size(); i++)
    1796             :         {
    1797           0 :             if (i)
    1798           0 :                 ss << ", ";
    1799           0 :             vSubArguments[i]->GenSlidingWindowDecl(ss);
    1800             :         }
    1801           0 :         ss << ") {\n\t";
    1802           0 :         ss << "int gid0 = get_global_id(0), i = 0;\n\t";
    1803           0 :         ss << "double tmp = ";
    1804           0 :         ss << Gen2(vSubArguments[0]->GenSlidingWindowDeclRef(false),
    1805           0 :             vSubArguments[1]->GenSlidingWindowDeclRef(false)) << ";\n\t";
    1806           0 :         ss << "return tmp;\n}";
    1807           0 :     }
    1808           0 :     virtual bool takeString() const SAL_OVERRIDE { return true; }
    1809           0 :     virtual bool takeNumeric() const SAL_OVERRIDE { return true; }
    1810             : };
    1811             : 
    1812           0 : class SumOfProduct : public SlidingFunctionBase
    1813             : {
    1814             : public:
    1815           0 :     virtual void GenSlidingWindowFunction( std::stringstream& ss,
    1816             :         const std::string& sSymName, SubArguments& vSubArguments ) SAL_OVERRIDE
    1817             :     {
    1818           0 :         size_t nCurWindowSize = 0;
    1819           0 :         FormulaToken* tmpCur = NULL;
    1820           0 :         const formula::DoubleVectorRefToken* pCurDVR = NULL;
    1821           0 :         ss << "\ndouble " << sSymName;
    1822           0 :         ss << "_" << BinFuncName() << "(";
    1823           0 :         for (size_t i = 0; i < vSubArguments.size(); i++)
    1824             :         {
    1825           0 :             if (i)
    1826           0 :                 ss << ",";
    1827           0 :             vSubArguments[i]->GenSlidingWindowDecl(ss);
    1828           0 :             size_t nCurChildWindowSize = vSubArguments[i]->GetWindowSize();
    1829             :             nCurWindowSize = (nCurWindowSize < nCurChildWindowSize) ?
    1830           0 :                 nCurChildWindowSize : nCurWindowSize;
    1831           0 :             tmpCur = vSubArguments[i]->GetFormulaToken();
    1832           0 :             if (ocPush == tmpCur->GetOpCode())
    1833             :             {
    1834             : 
    1835           0 :                 pCurDVR = static_cast<const formula::DoubleVectorRefToken*>(tmpCur);
    1836           0 :                 if (!
    1837           0 :                     ((!pCurDVR->IsStartFixed() && !pCurDVR->IsEndFixed())
    1838           0 :                         || (pCurDVR->IsStartFixed() && pCurDVR->IsEndFixed()))
    1839             :                     )
    1840           0 :                     throw Unhandled();
    1841             :             }
    1842             :         }
    1843           0 :         ss << ") {\n";
    1844           0 :         ss << "    double tmp = 0.0;\n";
    1845           0 :         ss << "    int gid0 = get_global_id(0);\n";
    1846             : 
    1847           0 :         ss << "\tint i;\n\t";
    1848           0 :         ss << "int currentCount0;\n";
    1849           0 :         for (unsigned i = 0; i < vSubArguments.size() - 1; i++)
    1850           0 :             ss << "int currentCount" << i + 1 << ";\n";
    1851           0 :         std::stringstream temp3, temp4;
    1852           0 :         int outLoopSize = UNROLLING_FACTOR;
    1853           0 :         if (nCurWindowSize / outLoopSize != 0)
    1854             :         {
    1855           0 :             ss << "for(int outLoop=0; outLoop<" <<
    1856           0 :                 nCurWindowSize / outLoopSize << "; outLoop++){\n\t";
    1857           0 :             for (int count = 0; count < outLoopSize; count++)
    1858             :             {
    1859           0 :                 ss << "i = outLoop*" << outLoopSize << "+" << count << ";\n";
    1860           0 :                 if (count == 0)
    1861             :                 {
    1862           0 :                     for (size_t i = 0; i < vSubArguments.size(); i++)
    1863             :                     {
    1864           0 :                         tmpCur = vSubArguments[i]->GetFormulaToken();
    1865           0 :                         if (ocPush == tmpCur->GetOpCode())
    1866             :                         {
    1867           0 :                             pCurDVR = static_cast<const formula::DoubleVectorRefToken*>(tmpCur);
    1868           0 :                             if (!pCurDVR->IsStartFixed() && !pCurDVR->IsEndFixed())
    1869             :                             {
    1870           0 :                                 temp3 << "        currentCount";
    1871           0 :                                 temp3 << i;
    1872           0 :                                 temp3 << " =i+gid0+1;\n";
    1873             :                             }
    1874             :                             else
    1875             :                             {
    1876           0 :                                 temp3 << "        currentCount";
    1877           0 :                                 temp3 << i;
    1878           0 :                                 temp3 << " =i+1;\n";
    1879             :                             }
    1880             :                         }
    1881             :                     }
    1882             : 
    1883           0 :                     temp3 << "tmp = fsum(";
    1884           0 :                     for (size_t i = 0; i < vSubArguments.size(); i++)
    1885             :                     {
    1886           0 :                         if (i)
    1887           0 :                             temp3 << "*";
    1888           0 :                         if (ocPush == vSubArguments[i]->GetFormulaToken()->GetOpCode())
    1889             :                         {
    1890           0 :                             temp3 << "(";
    1891           0 :                             temp3 << "(currentCount";
    1892           0 :                             temp3 << i;
    1893           0 :                             temp3 << ">";
    1894           0 :                             if (vSubArguments[i]->GetFormulaToken()->GetType() ==
    1895             :                                     formula::svSingleVectorRef)
    1896             :                             {
    1897             :                                 const formula::SingleVectorRefToken* pSVR =
    1898             :                                     static_cast<const formula::SingleVectorRefToken*>
    1899           0 :                                     (vSubArguments[i]->GetFormulaToken());
    1900           0 :                                 temp3 << pSVR->GetArrayLength();
    1901           0 :                                 temp3 << ")||isNan(" << vSubArguments[i]
    1902           0 :                                     ->GenSlidingWindowDeclRef();
    1903           0 :                                 temp3 << ")?0:";
    1904           0 :                                 temp3 << vSubArguments[i]->GenSlidingWindowDeclRef();
    1905           0 :                                 temp3  << ")";
    1906             :                             }
    1907           0 :                             else if (vSubArguments[i]->GetFormulaToken()->GetType() ==
    1908             :                                     formula::svDoubleVectorRef)
    1909             :                             {
    1910             :                                 const formula::DoubleVectorRefToken* pSVR =
    1911             :                                     static_cast<const formula::DoubleVectorRefToken*>
    1912           0 :                                     (vSubArguments[i]->GetFormulaToken());
    1913           0 :                                 temp3 << pSVR->GetArrayLength();
    1914           0 :                                 temp3 << ")||isNan(" << vSubArguments[i]
    1915           0 :                                     ->GenSlidingWindowDeclRef(true);
    1916           0 :                                 temp3 << ")?0:";
    1917           0 :                                 temp3 << vSubArguments[i]->GenSlidingWindowDeclRef(true);
    1918           0 :                                 temp3  << ")";
    1919             :                             }
    1920             : 
    1921             :                         }
    1922             :                         else
    1923           0 :                             temp3 << vSubArguments[i]->GenSlidingWindowDeclRef(true);
    1924             :                     }
    1925           0 :                     temp3 << ", tmp);\n\t";
    1926             :                 }
    1927           0 :                 ss << temp3.str();
    1928             :             }
    1929           0 :             ss << "}\n\t";
    1930             :         }
    1931             :         //The residual of mod outLoopSize
    1932           0 :         for (size_t count = nCurWindowSize / outLoopSize * outLoopSize;
    1933             :             count < nCurWindowSize; count++)
    1934             :         {
    1935           0 :             ss << "i =" << count << ";\n";
    1936           0 :             if (count == nCurWindowSize / outLoopSize * outLoopSize)
    1937             :             {
    1938           0 :                 for (size_t i = 0; i < vSubArguments.size(); i++)
    1939             :                 {
    1940           0 :                     tmpCur = vSubArguments[i]->GetFormulaToken();
    1941           0 :                     if (ocPush == tmpCur->GetOpCode())
    1942             :                     {
    1943           0 :                         pCurDVR = static_cast<const formula::DoubleVectorRefToken*>(tmpCur);
    1944           0 :                         if (!pCurDVR->IsStartFixed() && !pCurDVR->IsEndFixed())
    1945             :                         {
    1946           0 :                             temp4 << "        currentCount";
    1947           0 :                             temp4 << i;
    1948           0 :                             temp4 << " =i+gid0+1;\n";
    1949             :                         }
    1950             :                         else
    1951             :                         {
    1952           0 :                             temp4 << "        currentCount";
    1953           0 :                             temp4 << i;
    1954           0 :                             temp4 << " =i+1;\n";
    1955             :                         }
    1956             :                     }
    1957             :                 }
    1958             : 
    1959           0 :                 temp4 << "tmp = fsum(";
    1960           0 :                 for (size_t i = 0; i < vSubArguments.size(); i++)
    1961             :                 {
    1962           0 :                     if (i)
    1963           0 :                         temp4 << "*";
    1964           0 :                     if (ocPush == vSubArguments[i]->GetFormulaToken()->GetOpCode())
    1965             :                     {
    1966           0 :                         temp4 << "(";
    1967           0 :                         temp4 << "(currentCount";
    1968           0 :                         temp4 << i;
    1969           0 :                         temp4 << ">";
    1970           0 :                         if (vSubArguments[i]->GetFormulaToken()->GetType() ==
    1971             :                                 formula::svSingleVectorRef)
    1972             :                         {
    1973             :                             const formula::SingleVectorRefToken* pSVR =
    1974             :                                 static_cast<const formula::SingleVectorRefToken*>
    1975           0 :                                 (vSubArguments[i]->GetFormulaToken());
    1976           0 :                             temp4 << pSVR->GetArrayLength();
    1977           0 :                             temp4 << ")||isNan(" << vSubArguments[i]
    1978           0 :                                 ->GenSlidingWindowDeclRef();
    1979           0 :                             temp4 << ")?0:";
    1980           0 :                             temp4 << vSubArguments[i]->GenSlidingWindowDeclRef();
    1981           0 :                             temp4  << ")";
    1982             :                         }
    1983           0 :                         else if (vSubArguments[i]->GetFormulaToken()->GetType() ==
    1984             :                                 formula::svDoubleVectorRef)
    1985             :                         {
    1986             :                             const formula::DoubleVectorRefToken* pSVR =
    1987             :                                 static_cast<const formula::DoubleVectorRefToken*>
    1988           0 :                                 (vSubArguments[i]->GetFormulaToken());
    1989           0 :                             temp4 << pSVR->GetArrayLength();
    1990           0 :                             temp4 << ")||isNan(" << vSubArguments[i]
    1991           0 :                                 ->GenSlidingWindowDeclRef(true);
    1992           0 :                             temp4 << ")?0:";
    1993           0 :                             temp4 << vSubArguments[i]->GenSlidingWindowDeclRef(true);
    1994           0 :                             temp4  << ")";
    1995             :                         }
    1996             : 
    1997             :                     }
    1998             :                     else
    1999             :                     {
    2000           0 :                         temp4 << vSubArguments[i]
    2001           0 :                             ->GenSlidingWindowDeclRef(true);
    2002             :                     }
    2003             :                 }
    2004           0 :                 temp4 << ", tmp);\n\t";
    2005             :             }
    2006           0 :             ss << temp4.str();
    2007             :         }
    2008           0 :         ss << "return tmp;\n";
    2009           0 :         ss << "}";
    2010           0 :     }
    2011           0 :     virtual bool takeString() const SAL_OVERRIDE { return false; }
    2012           0 :     virtual bool takeNumeric() const SAL_OVERRIDE { return true; }
    2013             : };
    2014             : 
    2015             : /// operator traits
    2016           0 : class OpNop : public Reduction
    2017             : {
    2018             : public:
    2019           0 :     OpNop( int nResultSize ) : Reduction(nResultSize) {}
    2020             : 
    2021           0 :     virtual std::string GetBottom() SAL_OVERRIDE { return "0"; }
    2022           0 :     virtual std::string Gen2( const std::string& lhs, const std::string& ) const SAL_OVERRIDE
    2023             :     {
    2024           0 :         return lhs;
    2025             :     }
    2026           0 :     virtual std::string BinFuncName() const SAL_OVERRIDE { return "nop"; }
    2027             : };
    2028             : 
    2029           0 : class OpCount : public Reduction
    2030             : {
    2031             : public:
    2032           0 :     OpCount( int nResultSize ) : Reduction(nResultSize) {}
    2033             : 
    2034           0 :     virtual std::string GetBottom() SAL_OVERRIDE { return "0"; }
    2035           0 :     virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const SAL_OVERRIDE
    2036             :     {
    2037           0 :         std::stringstream ss;
    2038           0 :         ss << "(isNan(" << lhs << ")?" << rhs << ":" << rhs << "+1.0)";
    2039           0 :         return ss.str();
    2040             :     }
    2041           0 :     virtual std::string BinFuncName() const SAL_OVERRIDE { return "fcount"; }
    2042             : };
    2043             : 
    2044           0 : class OpEqual : public Binary
    2045             : {
    2046             : public:
    2047           0 :     virtual std::string GetBottom() SAL_OVERRIDE { return "0"; }
    2048           0 :     virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const SAL_OVERRIDE
    2049             :     {
    2050           0 :         std::stringstream ss;
    2051           0 :         ss << "strequal(" << lhs << "," << rhs << ")";
    2052           0 :         return ss.str();
    2053             :     }
    2054           0 :     virtual std::string BinFuncName() const SAL_OVERRIDE { return "eq"; }
    2055             : };
    2056             : 
    2057           0 : class OpLessEqual : public Binary
    2058             : {
    2059             : public:
    2060           0 :     virtual std::string GetBottom() SAL_OVERRIDE { return "0"; }
    2061           0 :     virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const SAL_OVERRIDE
    2062             :     {
    2063           0 :         std::stringstream ss;
    2064           0 :         ss << "(" << lhs << "<=" << rhs << ")";
    2065           0 :         return ss.str();
    2066             :     }
    2067           0 :     virtual std::string BinFuncName() const SAL_OVERRIDE { return "leq"; }
    2068             : };
    2069             : 
    2070           0 : class OpLess : public Binary
    2071             : {
    2072             : public:
    2073           0 :     virtual std::string GetBottom() SAL_OVERRIDE { return "0"; }
    2074           0 :     virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const SAL_OVERRIDE
    2075             :     {
    2076           0 :         std::stringstream ss;
    2077           0 :         ss << "(" << lhs << "<" << rhs << ")";
    2078           0 :         return ss.str();
    2079             :     }
    2080           0 :     virtual std::string BinFuncName() const SAL_OVERRIDE { return "less"; }
    2081             : };
    2082             : 
    2083           0 : class OpGreater : public Binary
    2084             : {
    2085             : public:
    2086           0 :     virtual std::string GetBottom() SAL_OVERRIDE { return "0"; }
    2087           0 :     virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const SAL_OVERRIDE
    2088             :     {
    2089           0 :         std::stringstream ss;
    2090           0 :         ss << "(" << lhs << ">" << rhs << ")";
    2091           0 :         return ss.str();
    2092             :     }
    2093           0 :     virtual std::string BinFuncName() const SAL_OVERRIDE { return "gt"; }
    2094             : };
    2095             : 
    2096           0 : class OpSum : public Reduction
    2097             : {
    2098             : public:
    2099           0 :     OpSum( int nResultSize ) : Reduction(nResultSize) {}
    2100             : 
    2101           0 :     virtual std::string GetBottom() SAL_OVERRIDE { return "0"; }
    2102           0 :     virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const SAL_OVERRIDE
    2103             :     {
    2104           0 :         std::stringstream ss;
    2105           0 :         ss << "((" << lhs << ")+(" << rhs << "))";
    2106           0 :         return ss.str();
    2107             :     }
    2108           0 :     virtual std::string BinFuncName() const SAL_OVERRIDE { return "fsum"; }
    2109             : };
    2110             : 
    2111           0 : class OpAverage : public Reduction
    2112             : {
    2113             : public:
    2114           0 :     OpAverage( int nResultSize ) : Reduction(nResultSize) {}
    2115             : 
    2116           0 :     virtual std::string GetBottom() SAL_OVERRIDE { return "0"; }
    2117           0 :     virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const SAL_OVERRIDE
    2118             :     {
    2119           0 :         std::stringstream ss;
    2120           0 :         ss << "fsum_count(" << lhs << "," << rhs << ", &nCount)";
    2121           0 :         return ss.str();
    2122             :     }
    2123           0 :     virtual std::string BinFuncName() const SAL_OVERRIDE { return "fsum"; }
    2124           0 :     virtual bool isAverage() const SAL_OVERRIDE { return true; }
    2125             : };
    2126             : 
    2127           0 : class OpSub : public Reduction
    2128             : {
    2129             : public:
    2130           0 :     OpSub( int nResultSize ) : Reduction(nResultSize) {}
    2131             : 
    2132           0 :     virtual std::string GetBottom() SAL_OVERRIDE { return "0"; }
    2133           0 :     virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const SAL_OVERRIDE
    2134             :     {
    2135           0 :         return lhs + "-" + rhs;
    2136             :     }
    2137           0 :     virtual std::string BinFuncName() const SAL_OVERRIDE { return "fsub"; }
    2138             : };
    2139             : 
    2140           0 : class OpMul : public Reduction
    2141             : {
    2142             : public:
    2143           0 :     OpMul( int nResultSize ) : Reduction(nResultSize) {}
    2144             : 
    2145           0 :     virtual std::string GetBottom() SAL_OVERRIDE { return "1"; }
    2146           0 :     virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const SAL_OVERRIDE
    2147             :     {
    2148           0 :         return lhs + "*" + rhs;
    2149             :     }
    2150           0 :     virtual std::string BinFuncName() const SAL_OVERRIDE { return "fmul"; }
    2151           0 :     virtual bool ZeroReturnZero() SAL_OVERRIDE { return true; }
    2152             : };
    2153             : 
    2154             : /// Technically not a reduction, but fits the framework.
    2155           0 : class OpDiv : public Reduction
    2156             : {
    2157             : public:
    2158           0 :     OpDiv( int nResultSize ) : Reduction(nResultSize) {}
    2159             : 
    2160           0 :     virtual std::string GetBottom() SAL_OVERRIDE { return "1.0"; }
    2161           0 :     virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const SAL_OVERRIDE
    2162             :     {
    2163           0 :         return "(" + rhs + "==0 ? CreateDoubleError(errDivisionByZero) : (" + lhs + "/" + rhs + ") )";
    2164             :     }
    2165           0 :     virtual std::string BinFuncName() const SAL_OVERRIDE { return "fdiv"; }
    2166             : 
    2167           0 :     virtual bool HandleNaNArgument( std::stringstream& ss, unsigned argno, SubArguments& vSubArguments ) const SAL_OVERRIDE
    2168             :     {
    2169           0 :         if (argno == 1)
    2170             :         {
    2171             :             ss <<
    2172           0 :                 "if (isnan(" << vSubArguments[argno]->GenSlidingWindowDeclRef() << ")) {\n"
    2173           0 :                 "    if (GetDoubleErrorValue(" << vSubArguments[argno]->GenSlidingWindowDeclRef() << ") == errNoValue)\n"
    2174             :                 "        return CreateDoubleError(errDivisionByZero);\n"
    2175           0 :                 "}\n";
    2176           0 :             return true;
    2177             :         }
    2178           0 :         else if (argno == 0)
    2179             :         {
    2180             :             ss <<
    2181           0 :                 "if (isnan(" << vSubArguments[argno]->GenSlidingWindowDeclRef() << ")) {\n"
    2182           0 :                 "    if (GetDoubleErrorValue(" << vSubArguments[argno]->GenSlidingWindowDeclRef() << ") == errNoValue) {\n"
    2183           0 :                 "        if (" << vSubArguments[1]->GenSlidingWindowDeclRef() << " == 0)\n"
    2184             :                 "            return CreateDoubleError(errDivisionByZero);\n"
    2185             :                 "        return 0;\n"
    2186             :                 "    }\n"
    2187           0 :                 "}\n";
    2188             :         }
    2189           0 :         return false;
    2190             :     }
    2191             : 
    2192             : };
    2193             : 
    2194           0 : class OpMin : public Reduction
    2195             : {
    2196             : public:
    2197           0 :     OpMin( int nResultSize ) : Reduction(nResultSize) {}
    2198             : 
    2199           0 :     virtual std::string GetBottom() SAL_OVERRIDE { return "MAXFLOAT"; }
    2200           0 :     virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const SAL_OVERRIDE
    2201             :     {
    2202           0 :         return "fmin(" + lhs + "," + rhs + ")";
    2203             :     }
    2204           0 :     virtual std::string BinFuncName() const SAL_OVERRIDE { return "min"; }
    2205             : };
    2206             : 
    2207           0 : class OpMax : public Reduction
    2208             : {
    2209             : public:
    2210           0 :     OpMax( int nResultSize ) : Reduction(nResultSize) {}
    2211             : 
    2212           0 :     virtual std::string GetBottom() SAL_OVERRIDE { return "-MAXFLOAT"; }
    2213           0 :     virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const SAL_OVERRIDE
    2214             :     {
    2215           0 :         return "fmax(" + lhs + "," + rhs + ")";
    2216             :     }
    2217           0 :     virtual std::string BinFuncName() const SAL_OVERRIDE { return "max"; }
    2218             : };
    2219             : 
    2220           0 : class OpSumProduct : public SumOfProduct
    2221             : {
    2222             : public:
    2223           0 :     virtual std::string GetBottom() SAL_OVERRIDE { return "0"; }
    2224           0 :     virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const SAL_OVERRIDE
    2225             :     {
    2226           0 :         return lhs + "*" + rhs;
    2227             :     }
    2228           0 :     virtual std::string BinFuncName() const SAL_OVERRIDE { return "fsop"; }
    2229             : };
    2230             : namespace {
    2231             : struct SumIfsArgs
    2232             : {
    2233           0 :     SumIfsArgs( cl_mem x ) : mCLMem(x), mConst(0.0) { }
    2234           0 :     SumIfsArgs( double x ) : mCLMem(NULL), mConst(x) { }
    2235             :     cl_mem mCLMem;
    2236             :     double mConst;
    2237             : };
    2238             : }
    2239             : 
    2240             : /// Helper functions that have multiple buffers
    2241             : class DynamicKernelSoPArguments : public DynamicKernelArgument
    2242             : {
    2243             : public:
    2244             :     typedef std::vector<DynamicKernelArgumentRef> SubArgumentsType;
    2245             : 
    2246             :     DynamicKernelSoPArguments( const ScCalcConfig& config,
    2247             :         const std::string& s, const FormulaTreeNodeRef& ft,
    2248             :         SlidingFunctionBase* pCodeGen, int nResultSize );
    2249             : 
    2250             :     /// Create buffer and pass the buffer to a given kernel
    2251           0 :     virtual size_t Marshal( cl_kernel k, int argno, int nVectorWidth, cl_program pProgram ) SAL_OVERRIDE
    2252             :     {
    2253           0 :         unsigned i = 0;
    2254           0 :         for (SubArgumentsType::iterator it = mvSubArguments.begin(), e = mvSubArguments.end(); it != e;
    2255             :             ++it)
    2256             :         {
    2257           0 :             i += (*it)->Marshal(k, argno + i, nVectorWidth, pProgram);
    2258             :         }
    2259           0 :         if (OpGeoMean* OpSumCodeGen = dynamic_cast<OpGeoMean*>(mpCodeGen.get()))
    2260             :         {
    2261             :             // Obtain cl context
    2262             :             ::opencl::KernelEnv kEnv;
    2263           0 :             ::opencl::setKernelEnv(&kEnv);
    2264             :             cl_int err;
    2265             :             cl_mem pClmem2;
    2266             : 
    2267           0 :             if (OpSumCodeGen->NeedReductionKernel())
    2268             :             {
    2269           0 :                 std::vector<cl_mem> vclmem;
    2270           0 :                 for (SubArgumentsType::iterator it = mvSubArguments.begin(),
    2271           0 :                     e = mvSubArguments.end(); it != e; ++it)
    2272             :                 {
    2273           0 :                     if (VectorRef* VR = dynamic_cast<VectorRef*>(it->get()))
    2274           0 :                         vclmem.push_back(VR->GetCLBuffer());
    2275             :                     else
    2276           0 :                         vclmem.push_back(NULL);
    2277             :                 }
    2278             :                 pClmem2 = clCreateBuffer(kEnv.mpkContext, CL_MEM_READ_WRITE,
    2279           0 :                     sizeof(double) * nVectorWidth, NULL, &err);
    2280           0 :                 if (CL_SUCCESS != err)
    2281           0 :                     throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
    2282             :                 SAL_INFO("sc.opencl", "Created buffer " << pClmem2 << " size " << sizeof(double) << "*" << nVectorWidth << "=" << (sizeof(double)*nVectorWidth));
    2283             : 
    2284           0 :                 std::string kernelName = "GeoMean_reduction";
    2285           0 :                 cl_kernel redKernel = clCreateKernel(pProgram, kernelName.c_str(), &err);
    2286           0 :                 if (err != CL_SUCCESS)
    2287           0 :                     throw OpenCLError("clCreateKernel", err, __FILE__, __LINE__);
    2288             :                 SAL_INFO("sc.opencl", "Created kernel " << redKernel << " with name " << kernelName << " in program " << pProgram);
    2289             : 
    2290             :                 // set kernel arg of reduction kernel
    2291           0 :                 for (size_t j = 0; j < vclmem.size(); j++)
    2292             :                 {
    2293             :                     SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << j << ": " << (vclmem[j] ? "cl_mem" : "double") << ": " << vclmem[j]);
    2294             :                     err = clSetKernelArg(redKernel, j,
    2295           0 :                         vclmem[j] ? sizeof(cl_mem) : sizeof(double),
    2296           0 :                         static_cast<void*>(&vclmem[j]));
    2297           0 :                     if (CL_SUCCESS != err)
    2298           0 :                         throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
    2299             :                 }
    2300             :                 SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << vclmem.size() << ": cl_mem: " << pClmem2);
    2301           0 :                 err = clSetKernelArg(redKernel, vclmem.size(), sizeof(cl_mem), static_cast<void*>(&pClmem2));
    2302           0 :                 if (CL_SUCCESS != err)
    2303           0 :                     throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
    2304             : 
    2305             :                 // set work group size and execute
    2306           0 :                 size_t global_work_size[] = { 256, (size_t)nVectorWidth };
    2307           0 :                 size_t local_work_size[] = { 256, 1 };
    2308             :                 SAL_INFO("sc.opencl", "Enqueing kernel " << redKernel);
    2309             :                 err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, redKernel, 2, NULL,
    2310           0 :                     global_work_size, local_work_size, 0, NULL, NULL);
    2311           0 :                 if (CL_SUCCESS != err)
    2312           0 :                     throw OpenCLError("clEnqueueNDRangeKernel", err, __FILE__, __LINE__);
    2313           0 :                 err = clFinish(kEnv.mpkCmdQueue);
    2314           0 :                 if (CL_SUCCESS != err)
    2315           0 :                     throw OpenCLError("clFinish", err, __FILE__, __LINE__);
    2316             : 
    2317             :                 // Pass pClmem2 to the "real" kernel
    2318             :                 SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_mem: " << pClmem2);
    2319           0 :                 err = clSetKernelArg(k, argno, sizeof(cl_mem), static_cast<void*>(&pClmem2));
    2320           0 :                 if (CL_SUCCESS != err)
    2321           0 :                     throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
    2322             :             }
    2323             :         }
    2324           0 :         if (OpSumIfs* OpSumCodeGen = dynamic_cast<OpSumIfs*>(mpCodeGen.get()))
    2325             :         {
    2326             :             // Obtain cl context
    2327             :             ::opencl::KernelEnv kEnv;
    2328           0 :             ::opencl::setKernelEnv(&kEnv);
    2329             :             cl_int err;
    2330           0 :             DynamicKernelArgument* Arg = mvSubArguments[0].get();
    2331             :             DynamicKernelSlidingArgument<VectorRef>* slidingArgPtr =
    2332           0 :                 static_cast<DynamicKernelSlidingArgument<VectorRef>*>(Arg);
    2333           0 :             mpClmem2 = NULL;
    2334             : 
    2335           0 :             if (OpSumCodeGen->NeedReductionKernel())
    2336             :             {
    2337           0 :                 size_t nInput = slidingArgPtr->GetArrayLength();
    2338           0 :                 size_t nCurWindowSize = slidingArgPtr->GetWindowSize();
    2339           0 :                 std::vector<SumIfsArgs> vclmem;
    2340             : 
    2341           0 :                 for (SubArgumentsType::iterator it = mvSubArguments.begin(),
    2342           0 :                     e = mvSubArguments.end(); it != e; ++it)
    2343             :                 {
    2344           0 :                     if (VectorRef* VR = dynamic_cast<VectorRef*>(it->get()))
    2345           0 :                         vclmem.push_back(SumIfsArgs(VR->GetCLBuffer()));
    2346           0 :                     else if (DynamicKernelConstantArgument* CA = dynamic_cast<DynamicKernelConstantArgument*>(it->get()))
    2347           0 :                         vclmem.push_back(SumIfsArgs(CA->GetDouble()));
    2348             :                     else
    2349           0 :                         vclmem.push_back(SumIfsArgs(nullptr));
    2350             :                 }
    2351             :                 mpClmem2 = clCreateBuffer(kEnv.mpkContext, CL_MEM_READ_WRITE,
    2352           0 :                     sizeof(double) * nVectorWidth, NULL, &err);
    2353           0 :                 if (CL_SUCCESS != err)
    2354           0 :                     throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
    2355             :                 SAL_INFO("sc.opencl", "Created buffer " << mpClmem2 << " size " << sizeof(double) << "*" << nVectorWidth << "=" << (sizeof(double)*nVectorWidth));
    2356             : 
    2357           0 :                 std::string kernelName = mvSubArguments[0]->GetName() + "_SumIfs_reduction";
    2358           0 :                 cl_kernel redKernel = clCreateKernel(pProgram, kernelName.c_str(), &err);
    2359           0 :                 if (err != CL_SUCCESS)
    2360           0 :                     throw OpenCLError("clCreateKernel", err, __FILE__, __LINE__);
    2361             :                 SAL_INFO("sc.opencl", "Created kernel " << redKernel << " with name " << kernelName << " in program " << pProgram);
    2362             : 
    2363             :                 // set kernel arg of reduction kernel
    2364           0 :                 for (size_t j = 0; j < vclmem.size(); j++)
    2365             :                 {
    2366           0 :                     if (vclmem[j].mCLMem)
    2367             :                         SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << j << ": cl_mem: " << vclmem[j].mCLMem);
    2368             :                     else
    2369             :                         SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << j << ": double: " << vclmem[j].mConst);
    2370             :                     err = clSetKernelArg(redKernel, j,
    2371           0 :                         vclmem[j].mCLMem ? sizeof(cl_mem) : sizeof(double),
    2372           0 :                         vclmem[j].mCLMem ? static_cast<void*>(&vclmem[j].mCLMem) :
    2373           0 :                                            static_cast<void*>(&vclmem[j].mConst));
    2374           0 :                     if (CL_SUCCESS != err)
    2375           0 :                         throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
    2376             :                 }
    2377             :                 SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << vclmem.size() << ": cl_mem: " << mpClmem2);
    2378           0 :                 err = clSetKernelArg(redKernel, vclmem.size(), sizeof(cl_mem), static_cast<void*>(&mpClmem2));
    2379           0 :                 if (CL_SUCCESS != err)
    2380           0 :                     throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
    2381             : 
    2382             :                 SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << (vclmem.size() + 1) << ": cl_int: " << nInput);
    2383           0 :                 err = clSetKernelArg(redKernel, vclmem.size() + 1, sizeof(cl_int), static_cast<void*>(&nInput));
    2384           0 :                 if (CL_SUCCESS != err)
    2385           0 :                     throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
    2386             : 
    2387             :                 SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << (vclmem.size() + 2) << ": cl_int: " << nCurWindowSize);
    2388           0 :                 err = clSetKernelArg(redKernel, vclmem.size() + 2, sizeof(cl_int), static_cast<void*>(&nCurWindowSize));
    2389           0 :                 if (CL_SUCCESS != err)
    2390           0 :                     throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
    2391             :                 // set work group size and execute
    2392           0 :                 size_t global_work_size[] = { 256, (size_t)nVectorWidth };
    2393           0 :                 size_t local_work_size[] = { 256, 1 };
    2394             :                 SAL_INFO("sc.opencl", "Enqueing kernel " << redKernel);
    2395             :                 err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, redKernel, 2, NULL,
    2396           0 :                     global_work_size, local_work_size, 0, NULL, NULL);
    2397           0 :                 if (CL_SUCCESS != err)
    2398           0 :                     throw OpenCLError("clEnqueueNDRangeKernel", err, __FILE__, __LINE__);
    2399             : 
    2400           0 :                 err = clFinish(kEnv.mpkCmdQueue);
    2401           0 :                 if (CL_SUCCESS != err)
    2402           0 :                     throw OpenCLError("clFinish", err, __FILE__, __LINE__);
    2403             : 
    2404             :                 SAL_INFO("sc.opencl", "Relasing kernel " << redKernel);
    2405           0 :                 err = clReleaseKernel(redKernel);
    2406             :                 SAL_WARN_IF(err != CL_SUCCESS, "sc.opencl", "clReleaseKernel failed: " << ::opencl::errorString(err));
    2407             : 
    2408             :                 // Pass mpClmem2 to the "real" kernel
    2409             :                 SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_mem: " << mpClmem2);
    2410           0 :                 err = clSetKernelArg(k, argno, sizeof(cl_mem), static_cast<void*>(&mpClmem2));
    2411           0 :                 if (CL_SUCCESS != err)
    2412           0 :                     throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
    2413             :             }
    2414             :         }
    2415           0 :         return i;
    2416             :     }
    2417             : 
    2418           0 :     virtual void GenSlidingWindowFunction( std::stringstream& ss ) SAL_OVERRIDE
    2419             :     {
    2420           0 :         for (size_t i = 0; i < mvSubArguments.size(); i++)
    2421           0 :             mvSubArguments[i]->GenSlidingWindowFunction(ss);
    2422           0 :         mpCodeGen->GenSlidingWindowFunction(ss, mSymName, mvSubArguments);
    2423           0 :     }
    2424           0 :     virtual void GenDeclRef( std::stringstream& ss ) const SAL_OVERRIDE
    2425             :     {
    2426           0 :         for (size_t i = 0; i < mvSubArguments.size(); i++)
    2427             :         {
    2428           0 :             if (i)
    2429           0 :                 ss << ",";
    2430           0 :             mvSubArguments[i]->GenDeclRef(ss);
    2431             :         }
    2432           0 :     }
    2433           0 :     virtual void GenDecl( std::stringstream& ss ) const SAL_OVERRIDE
    2434             :     {
    2435           0 :         for (SubArgumentsType::const_iterator it = mvSubArguments.begin(), e = mvSubArguments.end(); it != e;
    2436             :             ++it)
    2437             :         {
    2438           0 :             if (it != mvSubArguments.begin())
    2439           0 :                 ss << ", ";
    2440           0 :             (*it)->GenDecl(ss);
    2441             :         }
    2442           0 :     }
    2443             : 
    2444           0 :     virtual size_t GetWindowSize() const SAL_OVERRIDE
    2445             :     {
    2446           0 :         size_t nCurWindowSize = 0;
    2447           0 :         for (size_t i = 0; i < mvSubArguments.size(); i++)
    2448             :         {
    2449           0 :             size_t nCurChildWindowSize = mvSubArguments[i]->GetWindowSize();
    2450             :             nCurWindowSize = (nCurWindowSize < nCurChildWindowSize) ?
    2451           0 :                 nCurChildWindowSize : nCurWindowSize;
    2452             :         }
    2453           0 :         return nCurWindowSize;
    2454             :     }
    2455             : 
    2456             :     /// When declared as input to a sliding window function
    2457           0 :     virtual void GenSlidingWindowDecl( std::stringstream& ss ) const SAL_OVERRIDE
    2458             :     {
    2459           0 :         for (SubArgumentsType::const_iterator it = mvSubArguments.begin(), e = mvSubArguments.end(); it != e;
    2460             :             ++it)
    2461             :         {
    2462           0 :             if (it != mvSubArguments.begin())
    2463           0 :                 ss << ", ";
    2464           0 :             (*it)->GenSlidingWindowDecl(ss);
    2465             :         }
    2466           0 :     }
    2467             :     /// Generate either a function call to each children
    2468             :     /// or directly inline it if we are already inside a loop
    2469           0 :     virtual std::string GenSlidingWindowDeclRef( bool nested = false ) const SAL_OVERRIDE
    2470             :     {
    2471           0 :         std::stringstream ss;
    2472           0 :         if (!nested)
    2473             :         {
    2474           0 :             ss << mSymName << "_" << mpCodeGen->BinFuncName() << "(";
    2475           0 :             for (size_t i = 0; i < mvSubArguments.size(); i++)
    2476             :             {
    2477           0 :                 if (i)
    2478           0 :                     ss << ", ";
    2479           0 :                 mvSubArguments[i]->GenDeclRef(ss);
    2480             :             }
    2481           0 :             ss << ")";
    2482             :         }
    2483             :         else
    2484             :         {
    2485           0 :             if (mvSubArguments.size() != 2)
    2486           0 :                 throw Unhandled();
    2487             :             bool bArgument1_NeedNested =
    2488           0 :                 mvSubArguments[0]->GetFormulaToken()->GetType()
    2489           0 :                 != formula::svSingleVectorRef;
    2490             :             bool bArgument2_NeedNested =
    2491           0 :                 mvSubArguments[1]->GetFormulaToken()->GetType()
    2492           0 :                 != formula::svSingleVectorRef;
    2493           0 :             ss << "(";
    2494           0 :             ss << mpCodeGen->
    2495           0 :                 Gen2(mvSubArguments[0]
    2496           0 :                 ->GenSlidingWindowDeclRef(bArgument1_NeedNested),
    2497           0 :                 mvSubArguments[1]
    2498           0 :                 ->GenSlidingWindowDeclRef(bArgument2_NeedNested));
    2499           0 :             ss << ")";
    2500             :         }
    2501           0 :         return ss.str();
    2502             :     }
    2503           0 :     virtual std::string DumpOpName() const SAL_OVERRIDE
    2504             :     {
    2505           0 :         std::string t = "_" + mpCodeGen->BinFuncName();
    2506           0 :         for (size_t i = 0; i < mvSubArguments.size(); i++)
    2507           0 :             t = t + mvSubArguments[i]->DumpOpName();
    2508           0 :         return t;
    2509             :     }
    2510           0 :     virtual void DumpInlineFun( std::set<std::string>& decls,
    2511             :         std::set<std::string>& funs ) const SAL_OVERRIDE
    2512             :     {
    2513           0 :         mpCodeGen->BinInlineFun(decls, funs);
    2514           0 :         for (size_t i = 0; i < mvSubArguments.size(); i++)
    2515           0 :             mvSubArguments[i]->DumpInlineFun(decls, funs);
    2516           0 :     }
    2517           0 :     virtual ~DynamicKernelSoPArguments()
    2518           0 :     {
    2519           0 :         if (mpClmem2)
    2520             :         {
    2521             :             cl_int err;
    2522           0 :             err = clReleaseMemObject(mpClmem2);
    2523             :             SAL_WARN_IF(err != CL_SUCCESS, "sc.opencl", "clReleaseMemObject failed: " << ::opencl::errorString(err));
    2524           0 :             mpClmem2 = NULL;
    2525             :         }
    2526           0 :     }
    2527             : 
    2528             : private:
    2529             :     SubArgumentsType mvSubArguments;
    2530             :     boost::shared_ptr<SlidingFunctionBase> mpCodeGen;
    2531             :     cl_mem mpClmem2;
    2532             : };
    2533             : 
    2534           0 : DynamicKernelArgumentRef SoPHelper( const ScCalcConfig& config,
    2535             :     const std::string& ts, const FormulaTreeNodeRef& ft, SlidingFunctionBase* pCodeGen,
    2536             :     int nResultSize )
    2537             : {
    2538           0 :     return DynamicKernelArgumentRef(new DynamicKernelSoPArguments(config, ts, ft, pCodeGen, nResultSize));
    2539             : }
    2540             : 
    2541             : template<class Base>
    2542           0 : DynamicKernelArgument* VectorRefFactory( const ScCalcConfig& config, const std::string& s,
    2543             :     const FormulaTreeNodeRef& ft,
    2544             :     boost::shared_ptr<SlidingFunctionBase>& pCodeGen,
    2545             :     int index )
    2546             : {
    2547             :     //Black lists ineligible classes here ..
    2548             :     // SUMIFS does not perform parallel reduction at DoubleVectorRef level
    2549           0 :     if (dynamic_cast<OpSumIfs*>(pCodeGen.get()))
    2550             :     {
    2551           0 :         if (index == 0) // the first argument of OpSumIfs cannot be strings anyway
    2552           0 :             return new DynamicKernelSlidingArgument<VectorRef>(config, s, ft, pCodeGen, index);
    2553           0 :         return new DynamicKernelSlidingArgument<Base>(config, s, ft, pCodeGen, index);
    2554             :     }
    2555             :     // AVERAGE is not supported yet
    2556             :     //Average has been supported by reduction kernel
    2557             :     /*else if (dynamic_cast<OpAverage*>(pCodeGen.get()))
    2558             :     {
    2559             :         return new DynamicKernelSlidingArgument<Base>(config, s, ft, pCodeGen, index);
    2560             :     }*/
    2561             :     // MUL is not supported yet
    2562           0 :     else if (dynamic_cast<OpMul*>(pCodeGen.get()))
    2563             :     {
    2564           0 :         return new DynamicKernelSlidingArgument<Base>(config, s, ft, pCodeGen, index);
    2565             :     }
    2566             :     // Sub is not a reduction per se
    2567           0 :     else if (dynamic_cast<OpSub*>(pCodeGen.get()))
    2568             :     {
    2569           0 :         return new DynamicKernelSlidingArgument<Base>(config, s, ft, pCodeGen, index);
    2570             :     }
    2571             :     // Only child class of Reduction is supported
    2572           0 :     else if (!dynamic_cast<Reduction*>(pCodeGen.get()))
    2573             :     {
    2574           0 :         return new DynamicKernelSlidingArgument<Base>(config, s, ft, pCodeGen, index);
    2575             :     }
    2576             : 
    2577             :     const formula::DoubleVectorRefToken* pDVR =
    2578             :         static_cast<const formula::DoubleVectorRefToken*>(
    2579           0 :         ft->GetFormulaToken());
    2580             :     // Window being too small to justify a parallel reduction
    2581           0 :     if (pDVR->GetRefRowSize() < REDUCE_THRESHOLD)
    2582           0 :         return new DynamicKernelSlidingArgument<Base>(config, s, ft, pCodeGen, index);
    2583           0 :     if ((pDVR->IsStartFixed() && pDVR->IsEndFixed()) ||
    2584           0 :         (!pDVR->IsStartFixed() && !pDVR->IsEndFixed()))
    2585           0 :         return new ParallelReductionVectorRef<Base>(config, s, ft, pCodeGen, index);
    2586             :     else // Other cases are not supported as well
    2587           0 :         return new DynamicKernelSlidingArgument<Base>(config, s, ft, pCodeGen, index);
    2588             : }
    2589             : 
    2590           0 : DynamicKernelSoPArguments::DynamicKernelSoPArguments(const ScCalcConfig& config,
    2591             :     const std::string& s, const FormulaTreeNodeRef& ft, SlidingFunctionBase* pCodeGen, int nResultSize ) :
    2592           0 :     DynamicKernelArgument(config, s, ft), mpCodeGen(pCodeGen), mpClmem2(NULL)
    2593             : {
    2594           0 :     size_t nChildren = ft->Children.size();
    2595             : 
    2596           0 :     for (size_t i = 0; i < nChildren; i++)
    2597             :     {
    2598           0 :         FormulaTreeNodeRef rChild = ft->Children[i];
    2599           0 :         if (!rChild)
    2600           0 :             throw Unhandled();
    2601           0 :         FormulaToken* pChild = rChild->GetFormulaToken();
    2602           0 :         if (!pChild)
    2603           0 :             throw Unhandled();
    2604           0 :         OpCode opc = pChild->GetOpCode();
    2605           0 :         std::stringstream tmpname;
    2606           0 :         tmpname << s << "_" << i;
    2607           0 :         std::string ts = tmpname.str();
    2608           0 :         switch (opc)
    2609             :         {
    2610             :             case ocPush:
    2611           0 :                 if (pChild->GetType() == formula::svDoubleVectorRef)
    2612             :                 {
    2613             :                     const formula::DoubleVectorRefToken* pDVR =
    2614           0 :                         static_cast<const formula::DoubleVectorRefToken*>(pChild);
    2615             : 
    2616           0 :                     for (size_t j = 0; j < pDVR->GetArrays().size(); ++j)
    2617             :                     {
    2618             :                         SAL_INFO("sc.opencl", "j=" << j << " mpNumericArray=" << pDVR->GetArrays()[j].mpNumericArray <<
    2619             :                                  " mpStringArray=" << pDVR->GetArrays()[j].mpStringArray <<
    2620             :                                  " takeNumeric=" << (pCodeGen->takeNumeric()?"YES":"NO") <<
    2621             :                                  " takeString=" << (pCodeGen->takeString()?"YES":"NO"));
    2622             : 
    2623           0 :                         if (pDVR->GetArrays()[j].mpNumericArray ||
    2624           0 :                             (pDVR->GetArrays()[j].mpNumericArray == NULL &&
    2625           0 :                                 pDVR->GetArrays()[j].mpStringArray == NULL))
    2626             :                         {
    2627           0 :                             if (pDVR->GetArrays()[j].mpNumericArray &&
    2628           0 :                                 pCodeGen->takeNumeric() &&
    2629           0 :                                 pDVR->GetArrays()[j].mpStringArray &&
    2630           0 :                                 pCodeGen->takeString())
    2631             :                             {
    2632             :                                 mvSubArguments.push_back(
    2633             :                                     DynamicKernelArgumentRef(
    2634             :                                         new DynamicKernelMixedSlidingArgument(mCalcConfig,
    2635           0 :                                             ts, ft->Children[i], mpCodeGen, j)));
    2636             :                             }
    2637             :                             else
    2638             :                             {
    2639             :                                 mvSubArguments.push_back(
    2640             :                                     DynamicKernelArgumentRef(VectorRefFactory<VectorRef>(mCalcConfig,
    2641           0 :                                             ts, ft->Children[i], mpCodeGen, j)));
    2642             :                             }
    2643             :                         }
    2644             :                         else
    2645             :                             mvSubArguments.push_back(
    2646             :                                 DynamicKernelArgumentRef(VectorRefFactory
    2647             :                                     <DynamicKernelStringArgument>(mCalcConfig,
    2648           0 :                                         ts, ft->Children[i], mpCodeGen, j)));
    2649             :                     }
    2650             :                 }
    2651           0 :                 else if (pChild->GetType() == formula::svSingleVectorRef)
    2652             :                 {
    2653             :                     const formula::SingleVectorRefToken* pSVR =
    2654           0 :                         static_cast<const formula::SingleVectorRefToken*>(pChild);
    2655             : 
    2656             :                     SAL_INFO("sc.opencl", "mpNumericArray=" << pSVR->GetArray().mpNumericArray <<
    2657             :                              " mpStringArray=" << pSVR->GetArray().mpStringArray <<
    2658             :                              " takeNumeric=" << (pCodeGen->takeNumeric()?"YES":"NO") <<
    2659             :                              " takeString=" << (pCodeGen->takeString()?"YES":"NO"));
    2660             : 
    2661           0 :                     if (pSVR->GetArray().mpNumericArray &&
    2662           0 :                         pCodeGen->takeNumeric() &&
    2663           0 :                         pSVR->GetArray().mpStringArray &&
    2664           0 :                         pCodeGen->takeString())
    2665             :                     {
    2666             :                         // Function takes numbers or strings, there are both
    2667             :                         mvSubArguments.push_back(
    2668             :                             DynamicKernelArgumentRef(new DynamicKernelMixedArgument(mCalcConfig,
    2669           0 :                                     ts, ft->Children[i])));
    2670             :                     }
    2671           0 :                     else if (pSVR->GetArray().mpNumericArray &&
    2672           0 :                         pCodeGen->takeNumeric() &&
    2673           0 :                         (pSVR->GetArray().mpStringArray == NULL || mCalcConfig.meStringConversion == ScCalcConfig::StringConversion::ZERO))
    2674             :                     {
    2675             :                         // Function takes numbers, and either there
    2676             :                         // are no strings, or there are strings but
    2677             :                         // they are to be treated as zero
    2678             :                         mvSubArguments.push_back(
    2679             :                             DynamicKernelArgumentRef(new VectorRef(mCalcConfig, ts,
    2680           0 :                                     ft->Children[i])));
    2681             :                     }
    2682           0 :                     else if (pSVR->GetArray().mpNumericArray == NULL &&
    2683           0 :                         pCodeGen->takeNumeric() &&
    2684           0 :                         pSVR->GetArray().mpStringArray &&
    2685           0 :                         mCalcConfig.meStringConversion == ScCalcConfig::StringConversion::ZERO)
    2686             :                     {
    2687             :                         // Function takes numbers, and there are only
    2688             :                         // strings, but they are to be treated as zero
    2689             :                         mvSubArguments.push_back(
    2690             :                             DynamicKernelArgumentRef(new VectorRef(mCalcConfig, ts,
    2691           0 :                                     ft->Children[i])));
    2692             :                     }
    2693           0 :                     else if (pSVR->GetArray().mpStringArray &&
    2694           0 :                         pCodeGen->takeString())
    2695             :                     {
    2696             :                         // There are strings, and the function takes
    2697             :                         // strings.
    2698             : 
    2699             :                         mvSubArguments.push_back(
    2700             :                             DynamicKernelArgumentRef(new DynamicKernelStringArgument(mCalcConfig,
    2701           0 :                                     ts, ft->Children[i])));
    2702             :                     }
    2703           0 :                     else if (pSVR->GetArray().mpStringArray == NULL &&
    2704           0 :                         pSVR->GetArray().mpNumericArray == NULL)
    2705             :                     {
    2706             :                         // There are only empty cells. Push as an
    2707             :                         // array of NANs
    2708             :                         mvSubArguments.push_back(
    2709             :                             DynamicKernelArgumentRef(new VectorRef(mCalcConfig, ts,
    2710           0 :                                     ft->Children[i])));
    2711             :                     }
    2712             :                     else
    2713             :                         throw UnhandledToken(pChild,
    2714           0 :                             "Got unhandled case here", __FILE__, __LINE__);
    2715             :                 }
    2716           0 :                 else if (pChild->GetType() == formula::svDouble)
    2717             :                 {
    2718             :                     mvSubArguments.push_back(
    2719             :                         DynamicKernelArgumentRef(new DynamicKernelConstantArgument(mCalcConfig, ts,
    2720           0 :                                 ft->Children[i])));
    2721             :                 }
    2722           0 :                 else if (pChild->GetType() == formula::svString
    2723           0 :                     && pCodeGen->takeString())
    2724             :                 {
    2725             :                     mvSubArguments.push_back(
    2726             :                         DynamicKernelArgumentRef(new ConstStringArgument(mCalcConfig, ts,
    2727           0 :                                 ft->Children[i])));
    2728             :                 }
    2729             :                 else
    2730             :                 {
    2731           0 :                     throw UnhandledToken(pChild, ("unhandled operand " + StackVarEnumToString(pChild->GetType()) + " for ocPush").c_str());
    2732             :                 }
    2733           0 :                 break;
    2734             :             case ocDiv:
    2735           0 :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpDiv(nResultSize), nResultSize));
    2736           0 :                 break;
    2737             :             case ocMul:
    2738           0 :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpMul(nResultSize), nResultSize));
    2739           0 :                 break;
    2740             :             case ocSub:
    2741           0 :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpSub(nResultSize), nResultSize));
    2742           0 :                 break;
    2743             :             case ocAdd:
    2744             :             case ocSum:
    2745           0 :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpSum(nResultSize), nResultSize));
    2746           0 :                 break;
    2747             :             case ocAverage:
    2748           0 :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpAverage(nResultSize), nResultSize));
    2749           0 :                 break;
    2750             :             case ocMin:
    2751           0 :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpMin(nResultSize), nResultSize));
    2752           0 :                 break;
    2753             :             case ocMax:
    2754           0 :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpMax(nResultSize), nResultSize));
    2755           0 :                 break;
    2756             :             case ocCount:
    2757           0 :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpCount(nResultSize), nResultSize));
    2758           0 :                 break;
    2759             :             case ocSumProduct:
    2760           0 :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpSumProduct, nResultSize));
    2761           0 :                 break;
    2762             :             case ocIRR:
    2763           0 :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpIRR, nResultSize));
    2764           0 :                 break;
    2765             :             case ocMIRR:
    2766           0 :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpMIRR, nResultSize));
    2767           0 :                 break;
    2768             :             case ocPMT:
    2769           0 :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpPMT, nResultSize));
    2770           0 :                 break;
    2771             :             case ocRate:
    2772           0 :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpIntrate, nResultSize));
    2773           0 :                 break;
    2774             :             case ocRRI:
    2775           0 :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpRRI, nResultSize));
    2776           0 :                 break;
    2777             :             case ocPpmt:
    2778           0 :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpPPMT, nResultSize));
    2779           0 :                 break;
    2780             :             case ocFisher:
    2781           0 :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpFisher, nResultSize));
    2782           0 :                 break;
    2783             :             case ocFisherInv:
    2784           0 :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpFisherInv, nResultSize));
    2785           0 :                 break;
    2786             :             case ocGamma:
    2787           0 :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpGamma, nResultSize));
    2788           0 :                 break;
    2789             :             case ocSLN:
    2790           0 :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpSLN, nResultSize));
    2791           0 :                 break;
    2792             :             case ocGammaLn:
    2793           0 :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpGammaLn, nResultSize));
    2794           0 :                 break;
    2795             :             case ocGauss:
    2796           0 :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpGauss, nResultSize));
    2797           0 :                 break;
    2798             :             /*case ocGeoMean:
    2799             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpGeoMean));
    2800             :                 break;*/
    2801             :             case ocHarMean:
    2802           0 :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpHarMean, nResultSize));
    2803           0 :                 break;
    2804             :             case ocLessEqual:
    2805           0 :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpLessEqual, nResultSize));
    2806           0 :                 break;
    2807             :             case ocLess:
    2808           0 :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpLess, nResultSize));
    2809           0 :                 break;
    2810             :             case ocEqual:
    2811           0 :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpEqual, nResultSize));
    2812           0 :                 break;
    2813             :             case ocGreater:
    2814           0 :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpGreater, nResultSize));
    2815           0 :                 break;
    2816             :             case ocSYD:
    2817           0 :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpSYD, nResultSize));
    2818           0 :                 break;
    2819             :             case ocCorrel:
    2820           0 :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpCorrel, nResultSize));
    2821           0 :                 break;
    2822             :             case ocCos:
    2823           0 :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpCos, nResultSize));
    2824           0 :                 break;
    2825             :             case ocNegBinomVert :
    2826           0 :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpNegbinomdist, nResultSize));
    2827           0 :                 break;
    2828             :             case ocPearson:
    2829           0 :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpPearson, nResultSize));
    2830           0 :                 break;
    2831             :             case ocRSQ:
    2832           0 :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpRsq, nResultSize));
    2833           0 :                 break;
    2834             :             case ocCosecant:
    2835           0 :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpCsc, nResultSize));
    2836           0 :                 break;
    2837             :             case ocISPMT:
    2838           0 :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpISPMT, nResultSize));
    2839           0 :                 break;
    2840             :             case ocDuration:
    2841             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    2842           0 :                         ft->Children[i], new OpDuration, nResultSize));
    2843           0 :                 break;
    2844             :             case ocSinHyp:
    2845             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    2846           0 :                         ft->Children[i], new OpSinh, nResultSize));
    2847           0 :                 break;
    2848             :             case ocAbs:
    2849             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    2850           0 :                         ft->Children[i], new OpAbs, nResultSize));
    2851           0 :                 break;
    2852             :             case ocPV:
    2853             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    2854           0 :                         ft->Children[i], new OpPV, nResultSize));
    2855           0 :                 break;
    2856             :             case ocSin:
    2857             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    2858           0 :                         ft->Children[i], new OpSin, nResultSize));
    2859           0 :                 break;
    2860             :             case ocTan:
    2861             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    2862           0 :                         ft->Children[i], new OpTan, nResultSize));
    2863           0 :                 break;
    2864             :             case ocTanHyp:
    2865             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    2866           0 :                         ft->Children[i], new OpTanH, nResultSize));
    2867           0 :                 break;
    2868             :             case ocStandard:
    2869             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    2870           0 :                         ft->Children[i], new OpStandard, nResultSize));
    2871           0 :                 break;
    2872             :             case ocWeibull:
    2873             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    2874           0 :                         ft->Children[i], new OpWeibull, nResultSize));
    2875           0 :                 break;
    2876             :             /*case ocMedian:
    2877             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    2878             :                          ft->Children[i],new OpMedian));
    2879             :                 break;*/
    2880             :             case ocDDB:
    2881             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    2882           0 :                         ft->Children[i], new OpDDB, nResultSize));
    2883           0 :                 break;
    2884             :             case ocFV:
    2885             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    2886           0 :                         ft->Children[i], new OpFV, nResultSize));
    2887           0 :                 break;
    2888             :             case ocSumIfs:
    2889             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    2890           0 :                         ft->Children[i], new OpSumIfs, nResultSize));
    2891           0 :                 break;
    2892             :                 /*case ocVBD:
    2893             :                     mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    2894             :                              ft->Children[i],new OpVDB));
    2895             :                      break;*/
    2896             :             case ocKurt:
    2897             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    2898           0 :                         ft->Children[i], new OpKurt, nResultSize));
    2899           0 :                 break;
    2900             :                 /*case ocNper:
    2901             :                     mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    2902             :                             ft->Children[i], new OpNper));
    2903             :                      break;*/
    2904             :             case ocNormDist:
    2905             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    2906           0 :                         ft->Children[i], new OpNormdist, nResultSize));
    2907           0 :                 break;
    2908             :             case ocArcCos:
    2909             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    2910           0 :                         ft->Children[i], new OpArcCos, nResultSize));
    2911           0 :                 break;
    2912             :             case ocSqrt:
    2913             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    2914           0 :                         ft->Children[i], new OpSqrt, nResultSize));
    2915           0 :                 break;
    2916             :             case ocArcCosHyp:
    2917             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    2918           0 :                         ft->Children[i], new OpArcCosHyp, nResultSize));
    2919           0 :                 break;
    2920             :             case ocNPV:
    2921             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    2922           0 :                         ft->Children[i], new OpNPV, nResultSize));
    2923           0 :                 break;
    2924             :             case ocStdNormDist:
    2925             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    2926           0 :                         ft->Children[i], new OpNormsdist, nResultSize));
    2927           0 :                 break;
    2928             :             case ocNormInv:
    2929             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    2930           0 :                         ft->Children[i], new OpNorminv, nResultSize));
    2931           0 :                 break;
    2932             :             case ocSNormInv:
    2933             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    2934           0 :                         ft->Children[i], new OpNormsinv, nResultSize));
    2935           0 :                 break;
    2936             :             case ocPermut:
    2937             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    2938           0 :                         ft->Children[i], new OpPermut, nResultSize));
    2939           0 :                 break;
    2940             :             case ocPermutationA:
    2941             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    2942           0 :                         ft->Children[i], new OpPermutationA, nResultSize));
    2943           0 :                 break;
    2944             :             case ocPhi:
    2945             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    2946           0 :                         ft->Children[i], new OpPhi, nResultSize));
    2947           0 :                 break;
    2948             :             case ocIpmt:
    2949             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    2950           0 :                         ft->Children[i], new OpIPMT, nResultSize));
    2951           0 :                 break;
    2952             :             case ocConfidence:
    2953             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    2954           0 :                         ft->Children[i], new OpConfidence, nResultSize));
    2955           0 :                 break;
    2956             :             case ocIntercept:
    2957             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    2958           0 :                         ft->Children[i], new OpIntercept, nResultSize));
    2959           0 :                 break;
    2960             :             case ocDB:
    2961           0 :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
    2962           0 :                         new OpDB, nResultSize));
    2963           0 :                 break;
    2964             :             case ocLogInv:
    2965             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    2966           0 :                         ft->Children[i], new OpLogInv, nResultSize));
    2967           0 :                 break;
    2968             :             case ocArcCot:
    2969             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    2970           0 :                         ft->Children[i], new OpArcCot, nResultSize));
    2971           0 :                 break;
    2972             :             case ocCosHyp:
    2973             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    2974           0 :                         ft->Children[i], new OpCosh, nResultSize));
    2975           0 :                 break;
    2976             :             case ocCritBinom:
    2977             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    2978           0 :                         ft->Children[i], new OpCritBinom, nResultSize));
    2979           0 :                 break;
    2980             :             case ocArcCotHyp:
    2981             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    2982           0 :                         ft->Children[i], new OpArcCotHyp, nResultSize));
    2983           0 :                 break;
    2984             :             case ocArcSin:
    2985             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    2986           0 :                         ft->Children[i], new OpArcSin, nResultSize));
    2987           0 :                 break;
    2988             :             case ocArcSinHyp:
    2989             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    2990           0 :                         ft->Children[i], new OpArcSinHyp, nResultSize));
    2991           0 :                 break;
    2992             :             case ocArcTan:
    2993             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    2994           0 :                         ft->Children[i], new OpArcTan, nResultSize));
    2995           0 :                 break;
    2996             :             case ocArcTanHyp:
    2997             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    2998           0 :                         ft->Children[i], new OpArcTanH, nResultSize));
    2999           0 :                 break;
    3000             :             case ocBitAnd:
    3001             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3002           0 :                         ft->Children[i], new OpBitAnd, nResultSize));
    3003           0 :                 break;
    3004             :             case ocForecast:
    3005             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3006           0 :                         ft->Children[i], new OpForecast, nResultSize));
    3007           0 :                 break;
    3008             :             case ocLogNormDist:
    3009             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3010           0 :                         ft->Children[i], new OpLogNormDist, nResultSize));
    3011           0 :                 break;
    3012             :             /*case ocGammaDist:
    3013             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3014             :                          ft->Children[i], new OpGammaDist));
    3015             :                 break;*/
    3016             :             case ocLn:
    3017             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3018           0 :                         ft->Children[i], new OpLn, nResultSize));
    3019           0 :                 break;
    3020             :             case ocRound:
    3021             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3022           0 :                         ft->Children[i], new OpRound, nResultSize));
    3023           0 :                 break;
    3024             :             case ocCot:
    3025             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3026           0 :                         ft->Children[i], new OpCot, nResultSize));
    3027           0 :                 break;
    3028             :             case ocCotHyp:
    3029             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3030           0 :                         ft->Children[i], new OpCoth, nResultSize));
    3031           0 :                 break;
    3032             :             case ocFDist:
    3033             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3034           0 :                         ft->Children[i], new OpFdist, nResultSize));
    3035           0 :                 break;
    3036             :             case ocVar:
    3037             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3038           0 :                         ft->Children[i], new OpVar, nResultSize));
    3039           0 :                 break;
    3040             :             /*case ocChiDist:
    3041             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3042             :                          ft->Children[i],new OpChiDist));
    3043             :                 break;*/
    3044             :             case ocPow:
    3045             :             case ocPower:
    3046             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3047           0 :                         ft->Children[i], new OpPower, nResultSize));
    3048           0 :                 break;
    3049             :             case ocOdd:
    3050             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3051           0 :                         ft->Children[i], new OpOdd, nResultSize));
    3052           0 :                 break;
    3053             :             /*case ocChiSqDist:
    3054             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3055             :                          ft->Children[i],new OpChiSqDist));
    3056             :                 break;
    3057             :             case ocChiSqInv:
    3058             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3059             :                          ft->Children[i],new OpChiSqInv));
    3060             :                 break;
    3061             :             case ocGammaInv:
    3062             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3063             :                          ft->Children[i], new OpGammaInv));
    3064             :                 break;*/
    3065             :             case ocFloor:
    3066             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3067           0 :                         ft->Children[i], new OpFloor, nResultSize));
    3068           0 :                 break;
    3069             :             /*case ocFInv:
    3070             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3071             :                          ft->Children[i], new OpFInv));
    3072             :                 break;*/
    3073             :             case ocFTest:
    3074             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3075           0 :                         ft->Children[i], new OpFTest, nResultSize));
    3076           0 :                 break;
    3077             :             case ocB:
    3078             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3079           0 :                         ft->Children[i], new OpB, nResultSize));
    3080           0 :                 break;
    3081             :             case ocBetaDist:
    3082             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3083           0 :                         ft->Children[i], new OpBetaDist, nResultSize));
    3084           0 :                 break;
    3085             :             case ocCosecantHyp:
    3086             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3087           0 :                         ft->Children[i], new OpCscH, nResultSize));
    3088           0 :                 break;
    3089             :             case ocExp:
    3090             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3091           0 :                         ft->Children[i], new OpExp, nResultSize));
    3092           0 :                 break;
    3093             :             case ocLog10:
    3094             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3095           0 :                         ft->Children[i], new OpLog10, nResultSize));
    3096           0 :                 break;
    3097             :             case ocExpDist:
    3098             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3099           0 :                         ft->Children[i], new OpExponDist, nResultSize));
    3100           0 :                 break;
    3101             :             case ocAverageIfs:
    3102             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3103           0 :                         ft->Children[i], new OpAverageIfs, nResultSize));
    3104           0 :                 break;
    3105             :             case ocCountIfs:
    3106             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3107           0 :                         ft->Children[i], new OpCountIfs, nResultSize));
    3108           0 :                 break;
    3109             :             case ocCombinA:
    3110             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3111           0 :                         ft->Children[i], new OpCombinA, nResultSize));
    3112           0 :                 break;
    3113             :             case ocEven:
    3114             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3115           0 :                         ft->Children[i], new OpEven, nResultSize));
    3116           0 :                 break;
    3117             :             case ocLog:
    3118             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3119           0 :                         ft->Children[i], new OpLog, nResultSize));
    3120           0 :                 break;
    3121             :             case ocMod:
    3122             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3123           0 :                         ft->Children[i], new OpMod, nResultSize));
    3124           0 :                 break;
    3125             :             case ocTrunc:
    3126             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3127           0 :                         ft->Children[i], new OpTrunc, nResultSize));
    3128           0 :                 break;
    3129             :             case ocSkew:
    3130             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3131           0 :                         ft->Children[i], new OpSkew, nResultSize));
    3132           0 :                 break;
    3133             :             case ocArcTan2:
    3134             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3135           0 :                         ft->Children[i], new OpArcTan2, nResultSize));
    3136           0 :                 break;
    3137             :             case ocBitOr:
    3138             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3139           0 :                         ft->Children[i], new OpBitOr, nResultSize));
    3140           0 :                 break;
    3141             :             case ocBitLshift:
    3142             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3143           0 :                         ft->Children[i], new OpBitLshift, nResultSize));
    3144           0 :                 break;
    3145             :             case ocBitRshift:
    3146             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3147           0 :                         ft->Children[i], new OpBitRshift, nResultSize));
    3148           0 :                 break;
    3149             :             case ocBitXor:
    3150             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3151           0 :                         ft->Children[i], new OpBitXor, nResultSize));
    3152           0 :                 break;
    3153             :             /*case ocChiInv:
    3154             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3155             :                          ft->Children[i],new OpChiInv));
    3156             :                 break;*/
    3157             :             case ocPoissonDist:
    3158             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3159           0 :                         ft->Children[i], new OpPoisson, nResultSize));
    3160           0 :                 break;
    3161             :             case ocSumSQ:
    3162             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3163           0 :                         ft->Children[i], new OpSumSQ, nResultSize));
    3164           0 :                 break;
    3165             :             case ocSkewp:
    3166             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3167           0 :                         ft->Children[i], new OpSkewp, nResultSize));
    3168           0 :                 break;
    3169             :             case ocBinomDist:
    3170             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3171           0 :                         ft->Children[i], new OpBinomdist, nResultSize));
    3172           0 :                 break;
    3173             :             case ocVarP:
    3174             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3175           0 :                         ft->Children[i], new OpVarP, nResultSize));
    3176           0 :                 break;
    3177             :             case ocCeil:
    3178             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3179           0 :                         ft->Children[i], new OpCeil, nResultSize));
    3180           0 :                 break;
    3181             :             case ocCombin:
    3182             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3183           0 :                         ft->Children[i], new OpCombin, nResultSize));
    3184           0 :                 break;
    3185             :             case ocDevSq:
    3186             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3187           0 :                         ft->Children[i], new OpDevSq, nResultSize));
    3188           0 :                 break;
    3189             :             case ocStDev:
    3190             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3191           0 :                         ft->Children[i], new OpStDev, nResultSize));
    3192           0 :                 break;
    3193             :             case ocSlope:
    3194             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3195           0 :                         ft->Children[i], new OpSlope, nResultSize));
    3196           0 :                 break;
    3197             :             case ocSTEYX:
    3198             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3199           0 :                         ft->Children[i], new OpSTEYX, nResultSize));
    3200           0 :                 break;
    3201             :             case ocZTest:
    3202             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3203           0 :                         ft->Children[i], new OpZTest, nResultSize));
    3204           0 :                 break;
    3205             :             case ocPi:
    3206             :                 mvSubArguments.push_back(
    3207             :                     DynamicKernelArgumentRef(new DynamicKernelPiArgument(mCalcConfig, ts,
    3208           0 :                             ft->Children[i])));
    3209           0 :                 break;
    3210             :             case ocRandom:
    3211             :                 mvSubArguments.push_back(
    3212             :                     DynamicKernelArgumentRef(new DynamicKernelRandomArgument(mCalcConfig, ts,
    3213           0 :                             ft->Children[i])));
    3214           0 :                 break;
    3215             :             case ocProduct:
    3216             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3217           0 :                         ft->Children[i], new OpProduct, nResultSize));
    3218           0 :                 break;
    3219             :             /*case ocHypGeomDist:
    3220             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3221             :                          ft->Children[i],new OpHypGeomDist));
    3222             :                 break;*/
    3223             :             case ocSumX2MY2:
    3224             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3225           0 :                         ft->Children[i], new OpSumX2MY2, nResultSize));
    3226           0 :                 break;
    3227             :             case ocSumX2DY2:
    3228             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3229           0 :                         ft->Children[i], new OpSumX2PY2, nResultSize));
    3230           0 :                 break;
    3231             :             /*case ocBetaInv:
    3232             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3233             :                          ft->Children[i],new OpBetainv));
    3234             :                  break;*/
    3235             :             case ocTTest:
    3236             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3237           0 :                         ft->Children[i], new OpTTest, nResultSize));
    3238           0 :                 break;
    3239             :             case ocTDist:
    3240             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3241           0 :                         ft->Children[i], new OpTDist, nResultSize));
    3242           0 :                 break;
    3243             :             /*case ocTInv:
    3244             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3245             :                          ft->Children[i], new OpTInv));
    3246             :                  break;*/
    3247             :             case ocSumXMY2:
    3248             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3249           0 :                         ft->Children[i], new OpSumXMY2, nResultSize));
    3250           0 :                 break;
    3251             :             case ocStDevP:
    3252             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3253           0 :                         ft->Children[i], new OpStDevP, nResultSize));
    3254           0 :                 break;
    3255             :             case ocCovar:
    3256             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3257           0 :                         ft->Children[i], new OpCovar, nResultSize));
    3258           0 :                 break;
    3259             :             case ocAnd:
    3260             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3261           0 :                         ft->Children[i], new OpAnd, nResultSize));
    3262           0 :                 break;
    3263             :             case ocVLookup:
    3264             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3265           0 :                         ft->Children[i], new OpVLookup, nResultSize));
    3266           0 :                 break;
    3267             :             case ocOr:
    3268             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3269           0 :                         ft->Children[i], new OpOr, nResultSize));
    3270           0 :                 break;
    3271             :             case ocNot:
    3272             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3273           0 :                         ft->Children[i], new OpNot, nResultSize));
    3274           0 :                 break;
    3275             :             case ocXor:
    3276             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3277           0 :                         ft->Children[i], new OpXor, nResultSize));
    3278           0 :                 break;
    3279             :             case ocDBMax:
    3280             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3281           0 :                         ft->Children[i], new OpDmax, nResultSize));
    3282           0 :                 break;
    3283             :             case ocDBMin:
    3284             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3285           0 :                         ft->Children[i], new OpDmin, nResultSize));
    3286           0 :                 break;
    3287             :             case ocDBProduct:
    3288             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3289           0 :                         ft->Children[i], new OpDproduct, nResultSize));
    3290           0 :                 break;
    3291             :             case ocDBAverage:
    3292             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3293           0 :                         ft->Children[i], new OpDaverage, nResultSize));
    3294           0 :                 break;
    3295             :             case ocDBStdDev:
    3296             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3297           0 :                         ft->Children[i], new OpDstdev, nResultSize));
    3298           0 :                 break;
    3299             :             case ocDBStdDevP:
    3300             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3301           0 :                         ft->Children[i], new OpDstdevp, nResultSize));
    3302           0 :                 break;
    3303             :             case ocDBSum:
    3304             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3305           0 :                         ft->Children[i], new OpDsum, nResultSize));
    3306           0 :                 break;
    3307             :             case ocDBVar:
    3308             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3309           0 :                         ft->Children[i], new OpDvar, nResultSize));
    3310           0 :                 break;
    3311             :             case ocDBVarP:
    3312             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3313           0 :                         ft->Children[i], new OpDvarp, nResultSize));
    3314           0 :                 break;
    3315             :             case ocAverageIf:
    3316             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3317           0 :                         ft->Children[i], new OpAverageIf, nResultSize));
    3318           0 :                 break;
    3319             :             case ocDBCount:
    3320             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3321           0 :                         ft->Children[i], new OpDcount, nResultSize));
    3322           0 :                 break;
    3323             :             case ocDBCount2:
    3324             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3325           0 :                         ft->Children[i], new OpDcount2, nResultSize));
    3326           0 :                 break;
    3327             :             case ocDeg:
    3328             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3329           0 :                         ft->Children[i], new OpDeg, nResultSize));
    3330           0 :                 break;
    3331             :             case ocRoundUp:
    3332             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3333           0 :                         ft->Children[i], new OpRoundUp, nResultSize));
    3334           0 :                 break;
    3335             :             case ocRoundDown:
    3336             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3337           0 :                         ft->Children[i], new OpRoundDown, nResultSize));
    3338           0 :                 break;
    3339             :             case ocInt:
    3340             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3341           0 :                         ft->Children[i], new OpInt, nResultSize));
    3342           0 :                 break;
    3343             :             case ocRad:
    3344             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3345           0 :                         ft->Children[i], new OpRadians, nResultSize));
    3346           0 :                 break;
    3347             :             case ocCountIf:
    3348             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3349           0 :                         ft->Children[i], new OpCountIf, nResultSize));
    3350           0 :                 break;
    3351             :             case ocIsEven:
    3352             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3353           0 :                         ft->Children[i], new OpIsEven, nResultSize));
    3354           0 :                 break;
    3355             :             case ocIsOdd:
    3356             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3357           0 :                         ft->Children[i], new OpIsOdd, nResultSize));
    3358           0 :                 break;
    3359             :             case ocFact:
    3360             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3361           0 :                         ft->Children[i], new OpFact, nResultSize));
    3362           0 :                 break;
    3363             :             case ocMinA:
    3364             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3365           0 :                         ft->Children[i], new OpMinA, nResultSize));
    3366           0 :                 break;
    3367             :             case ocCount2:
    3368             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3369           0 :                         ft->Children[i], new OpCountA, nResultSize));
    3370           0 :                 break;
    3371             :             case ocMaxA:
    3372             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3373           0 :                         ft->Children[i], new OpMaxA, nResultSize));
    3374           0 :                 break;
    3375             :             case ocAverageA:
    3376             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3377           0 :                         ft->Children[i], new OpAverageA, nResultSize));
    3378           0 :                 break;
    3379             :             case ocVarA:
    3380             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3381           0 :                         ft->Children[i], new OpVarA, nResultSize));
    3382           0 :                 break;
    3383             :             case ocVarPA:
    3384             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3385           0 :                         ft->Children[i], new OpVarPA, nResultSize));
    3386           0 :                 break;
    3387             :             case ocStDevA:
    3388             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3389           0 :                         ft->Children[i], new OpStDevA, nResultSize));
    3390           0 :                 break;
    3391             :             case ocStDevPA:
    3392             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3393           0 :                         ft->Children[i], new OpStDevPA, nResultSize));
    3394           0 :                 break;
    3395             :             case ocSecant:
    3396             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3397           0 :                         ft->Children[i], new OpSec, nResultSize));
    3398           0 :                 break;
    3399             :             case ocSecantHyp:
    3400             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3401           0 :                         ft->Children[i], new OpSecH, nResultSize));
    3402           0 :                 break;
    3403             :             case ocSumIf:
    3404             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3405           0 :                         ft->Children[i], new OpSumIf, nResultSize));
    3406           0 :                 break;
    3407             :             case ocNegSub:
    3408             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3409           0 :                         ft->Children[i], new OpNegSub, nResultSize));
    3410           0 :                 break;
    3411             :             case ocAveDev:
    3412             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3413           0 :                         ft->Children[i], new OpAveDev, nResultSize));
    3414           0 :                 break;
    3415             :             case ocIf:
    3416             :                 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3417           0 :                         ft->Children[i], new OpIf, nResultSize));
    3418           0 :                 break;
    3419             :             case ocExternal:
    3420           0 :                 if (!(pChild->GetExternal().compareTo(OUString(
    3421           0 :                                 "com.sun.star.sheet.addin.Analysis.getEffect"))))
    3422             :                 {
    3423           0 :                     mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpEffective, nResultSize));
    3424             :                 }
    3425           0 :                 else if (!(pChild->GetExternal().compareTo(OUString(
    3426           0 :                                 "com.sun.star.sheet.addin.Analysis.getCumipmt"))))
    3427             :                 {
    3428           0 :                     mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpCumipmt, nResultSize));
    3429             :                 }
    3430           0 :                 else if (!(pChild->GetExternal().compareTo(OUString(
    3431           0 :                                 "com.sun.star.sheet.addin.Analysis.getNominal"))))
    3432             :                 {
    3433           0 :                     mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpNominal, nResultSize));
    3434             :                 }
    3435           0 :                 else if (!(pChild->GetExternal().compareTo(OUString(
    3436           0 :                                 "com.sun.star.sheet.addin.Analysis.getCumprinc"))))
    3437             :                 {
    3438           0 :                     mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpCumprinc, nResultSize));
    3439             :                 }
    3440           0 :                 else if (!(pChild->GetExternal().compareTo(OUString(
    3441           0 :                                 "com.sun.star.sheet.addin.Analysis.getXnpv"))))
    3442             :                 {
    3443           0 :                     mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpXNPV, nResultSize));
    3444             :                 }
    3445           0 :                 else if (!(pChild->GetExternal().compareTo(OUString(
    3446           0 :                                 "com.sun.star.sheet.addin.Analysis.getPricemat"))))
    3447             :                 {
    3448           0 :                     mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpPriceMat, nResultSize));
    3449             :                 }
    3450           0 :                 else if (!(pChild->GetExternal().compareTo(OUString(
    3451           0 :                                 "com.sun.star.sheet.addin.Analysis.getReceived"))))
    3452             :                 {
    3453           0 :                     mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpReceived, nResultSize));
    3454             :                 }
    3455           0 :                 else if (!(pChild->GetExternal().compareTo(OUString(
    3456           0 :                                 "com.sun.star.sheet.addin.Analysis.getTbilleq"))))
    3457             :                 {
    3458           0 :                     mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpTbilleq, nResultSize));
    3459             :                 }
    3460           0 :                 else if (!(pChild->GetExternal().compareTo(OUString(
    3461           0 :                                 "com.sun.star.sheet.addin.Analysis.getTbillprice"))))
    3462             :                 {
    3463           0 :                     mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpTbillprice, nResultSize));
    3464             :                 }
    3465           0 :                 else if (!(pChild->GetExternal().compareTo(OUString(
    3466           0 :                                 "com.sun.star.sheet.addin.Analysis.getTbillyield"))))
    3467             :                 {
    3468           0 :                     mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpTbillyield, nResultSize));
    3469             :                 }
    3470           0 :                 else if (!(pChild->GetExternal().compareTo(OUString(
    3471           0 :                                 "com.sun.star.sheet.addin.Analysis.getFvschedule"))))
    3472             :                 {
    3473           0 :                     mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpFvschedule, nResultSize));
    3474             :                 }
    3475             :                 /*else if ( !(pChild->GetExternal().compareTo(OUString(
    3476             :                     "com.sun.star.sheet.addin.Analysis.getYield"))))
    3477             :                 {
    3478             :                     mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpYield));
    3479             :                 }*/
    3480           0 :                 else if (!(pChild->GetExternal().compareTo(OUString(
    3481           0 :                                 "com.sun.star.sheet.addin.Analysis.getYielddisc"))))
    3482             :                 {
    3483           0 :                     mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpYielddisc, nResultSize));
    3484             :                 }
    3485           0 :                 else    if (!(pChild->GetExternal().compareTo(OUString(
    3486           0 :                                 "com.sun.star.sheet.addin.Analysis.getYieldmat"))))
    3487             :                 {
    3488           0 :                     mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpYieldmat, nResultSize));
    3489             :                 }
    3490           0 :                 else if (!(pChild->GetExternal().compareTo(OUString(
    3491           0 :                                 "com.sun.star.sheet.addin.Analysis.getAccrintm"))))
    3492             :                 {
    3493           0 :                     mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpAccrintm, nResultSize));
    3494             :                 }
    3495           0 :                 else if (!(pChild->GetExternal().compareTo(OUString(
    3496           0 :                                 "com.sun.star.sheet.addin.Analysis.getCoupdaybs"))))
    3497             :                 {
    3498           0 :                     mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpCoupdaybs, nResultSize));
    3499             :                 }
    3500           0 :                 else if (!(pChild->GetExternal().compareTo(OUString(
    3501           0 :                                 "com.sun.star.sheet.addin.Analysis.getDollarde"))))
    3502             :                 {
    3503           0 :                     mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpDollarde, nResultSize));
    3504             :                 }
    3505           0 :                 else if (!(pChild->GetExternal().compareTo(OUString(
    3506           0 :                                 "com.sun.star.sheet.addin.Analysis.getDollarfr"))))
    3507             :                 {
    3508           0 :                     mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpDollarfr, nResultSize));
    3509             :                 }
    3510           0 :                 else if (!(pChild->GetExternal().compareTo(OUString(
    3511           0 :                                 "com.sun.star.sheet.addin.Analysis.getCoupdays"))))
    3512             :                 {
    3513           0 :                     mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpCoupdays, nResultSize));
    3514             :                 }
    3515           0 :                 else if (!(pChild->GetExternal().compareTo(OUString(
    3516           0 :                                 "com.sun.star.sheet.addin.Analysis.getCoupdaysnc"))))
    3517             :                 {
    3518           0 :                     mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpCoupdaysnc, nResultSize));
    3519             :                 }
    3520           0 :                 else if (!(pChild->GetExternal().compareTo(OUString(
    3521           0 :                                 "com.sun.star.sheet.addin.Analysis.getDisc"))))
    3522             :                 {
    3523           0 :                     mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpDISC, nResultSize));
    3524             :                 }
    3525           0 :                 else if (!(pChild->GetExternal().compareTo(OUString(
    3526           0 :                                 "com.sun.star.sheet.addin.Analysis.getIntrate"))))
    3527             :                 {
    3528           0 :                     mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpINTRATE, nResultSize));
    3529             :                 }
    3530           0 :                 else if (!(pChild->GetExternal().compareTo(OUString(
    3531           0 :                                 "com.sun.star.sheet.addin.Analysis.getPrice"))))
    3532             :                 {
    3533             :                     mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3534           0 :                             ft->Children[i], new OpPrice, nResultSize));
    3535             :                 }
    3536           0 :                 else if (!(pChild->GetExternal().compareTo(OUString(
    3537           0 :                                 "com.sun.star.sheet.addin.Analysis.getCoupnum"))))
    3538             :                 {
    3539           0 :                     mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
    3540           0 :                             new OpCoupnum, nResultSize));
    3541             :                 }
    3542             :                 /*else if ( !(pChild->GetExternal().compareTo(OUString(
    3543             :                    "com.sun.star.sheet.addin.Analysis.getDuration"))))
    3544             :                 {
    3545             :                     mvSubArguments.push_back(
    3546             :                         SoPHelper(mCalcConfig, ts, ft->Children[i], new OpDuration_ADD));
    3547             :                 }*/
    3548           0 :                 else if (!(pChild->GetExternal().compareTo(OUString(
    3549           0 :                                 "com.sun.star.sheet.addin.Analysis.getAmordegrc"))))
    3550             :                 {
    3551           0 :                     mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
    3552           0 :                             new OpAmordegrc, nResultSize));
    3553             :                 }
    3554           0 :                 else if (!(pChild->GetExternal().compareTo(OUString(
    3555           0 :                                 "com.sun.star.sheet.addin.Analysis.getAmorlinc"))))
    3556             :                 {
    3557           0 :                     mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
    3558           0 :                             new OpAmorlinc, nResultSize));
    3559             :                 }
    3560           0 :                 else if (!(pChild->GetExternal().compareTo(OUString(
    3561           0 :                                 "com.sun.star.sheet.addin.Analysis.getMduration"))))
    3562             :                 {
    3563           0 :                     mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
    3564           0 :                             new OpMDuration, nResultSize));
    3565             :                 }
    3566           0 :                 else if (!(pChild->GetExternal().compareTo(OUString(
    3567           0 :                                 "com.sun.star.sheet.addin.Analysis.getXirr"))))
    3568             :                 {
    3569           0 :                     mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
    3570           0 :                             new OpXirr, nResultSize));
    3571             :                 }
    3572           0 :                 else if (!(pChild->GetExternal().compareTo(OUString(
    3573           0 :                                 "com.sun.star.sheet.addin.Analysis.getOddlprice"))))
    3574             :                 {
    3575             :                     mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3576           0 :                             ft->Children[i], new OpOddlprice, nResultSize));
    3577             :                 }
    3578           0 :                 else if (!(pChild->GetExternal().compareTo(OUString(
    3579           0 :                                 "com.sun.star.sheet.addin.Analysis.getOddlyield"))))
    3580             :                 {
    3581           0 :                     mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
    3582           0 :                             new OpOddlyield, nResultSize));
    3583             :                 }
    3584           0 :                 else if (!(pChild->GetExternal().compareTo(OUString(
    3585           0 :                                 "com.sun.star.sheet.addin.Analysis.getPricedisc"))))
    3586             :                 {
    3587             :                     mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
    3588           0 :                             ft->Children[i], new OpPriceDisc, nResultSize));
    3589             :                 }
    3590           0 :                 else if (!(pChild->GetExternal().compareTo(OUString(
    3591           0 :                                 "com.sun.star.sheet.addin.Analysis.getCouppcd"))))
    3592             :                 {
    3593           0 :                     mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
    3594           0 :                             new OpCouppcd, nResultSize));
    3595             :                 }
    3596           0 :                 else if (!(pChild->GetExternal().compareTo(OUString(
    3597           0 :                                 "com.sun.star.sheet.addin.Analysis.getCoupncd"))))
    3598             :                 {
    3599           0 :                     mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
    3600           0 :                             new OpCoupncd, nResultSize));
    3601             :                 }
    3602           0 :                 else if (!(pChild->GetExternal().compareTo(OUString(
    3603           0 :                                 "com.sun.star.sheet.addin.Analysis.getAccrint"))))
    3604             :                 {
    3605           0 :                     mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
    3606           0 :                             new OpAccrint, nResultSize));
    3607             :                 }
    3608           0 :                 else if (!(pChild->GetExternal().compareTo(OUString(
    3609           0 :                                 "com.sun.star.sheet.addin.Analysis.getSqrtpi"))))
    3610             :                 {
    3611           0 :                     mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
    3612           0 :                             new OpSqrtPi, nResultSize));
    3613             :                 }
    3614           0 :                 else if (!(pChild->GetExternal().compareTo(OUString(
    3615           0 :                                 "com.sun.star.sheet.addin.Analysis.getConvert"))))
    3616             :                 {
    3617           0 :                     mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
    3618           0 :                             new OpConvert, nResultSize));
    3619             :                 }
    3620           0 :                 else if (!(pChild->GetExternal().compareTo(OUString(
    3621           0 :                                 "com.sun.star.sheet.addin.Analysis.getIseven"))))
    3622             :                 {
    3623           0 :                     mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
    3624           0 :                             new OpIsEven, nResultSize));
    3625             :                 }
    3626           0 :                 else if (!(pChild->GetExternal().compareTo(OUString(
    3627           0 :                                 "com.sun.star.sheet.addin.Analysis.getIsodd"))))
    3628             :                 {
    3629           0 :                     mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
    3630           0 :                             new OpIsOdd, nResultSize));
    3631             :                 }
    3632           0 :                 else if (!(pChild->GetExternal().compareTo(OUString(
    3633           0 :                                 "com.sun.star.sheet.addin.Analysis.getMround"))))
    3634             :                 {
    3635           0 :                     mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
    3636           0 :                             new OpMROUND, nResultSize));
    3637             :                 }
    3638           0 :                 else if (!(pChild->GetExternal().compareTo(OUString(
    3639           0 :                                 "com.sun.star.sheet.addin.Analysis.getQuotient"))))
    3640             :                 {
    3641           0 :                     mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
    3642           0 :                             new OpQuotient, nResultSize));
    3643             :                 }
    3644           0 :                 else if (!(pChild->GetExternal().compareTo(OUString(
    3645           0 :                                 "com.sun.star.sheet.addin.Analysis.getSeriessum"))))
    3646             :                 {
    3647           0 :                     mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
    3648           0 :                             new OpSeriesSum, nResultSize));
    3649             :                 }
    3650           0 :                 else if (!(pChild->GetExternal().compareTo(OUString(
    3651           0 :                                 "com.sun.star.sheet.addin.Analysis.getBesselj"))))
    3652             :                 {
    3653           0 :                     mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
    3654           0 :                             new OpBesselj, nResultSize));
    3655             :                 }
    3656           0 :                 else if (!(pChild->GetExternal().compareTo(OUString(
    3657           0 :                                 "com.sun.star.sheet.addin.Analysis.getGestep"))))
    3658             :                 {
    3659           0 :                     mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
    3660           0 :                             new OpGestep, nResultSize));
    3661             :                 }
    3662             :                 else
    3663           0 :                     throw UnhandledToken(pChild, "unhandled opcode");
    3664           0 :                 break;
    3665             : 
    3666             :             default:
    3667           0 :                 throw UnhandledToken(pChild, "unhandled opcode");
    3668             :         }
    3669           0 :     }
    3670           0 : }
    3671             : 
    3672             : class DynamicKernel : public CompiledFormula
    3673             : {
    3674             : public:
    3675             :     DynamicKernel( const ScCalcConfig& config, const FormulaTreeNodeRef& r, int nResultSize );
    3676             :     virtual ~DynamicKernel();
    3677             : 
    3678             :     static DynamicKernel* create( const ScCalcConfig& config, ScTokenArray& rCode, int nResultSize );
    3679             : 
    3680             :     /// OpenCL code generation
    3681             :     void CodeGen();
    3682             : 
    3683             :     /// Produce kernel hash
    3684             :     std::string GetMD5();
    3685             : 
    3686             :     /// Create program, build, and create kerenl
    3687             :     /// TODO cache results based on kernel body hash
    3688             :     /// TODO: abstract OpenCL part out into OpenCL wrapper.
    3689             :     void CreateKernel();
    3690             : 
    3691             :     /// Prepare buffers, marshal them to GPU, and launch the kernel
    3692             :     /// TODO: abstract OpenCL part out into OpenCL wrapper.
    3693             :     void Launch( size_t nr );
    3694             : 
    3695           0 :     cl_mem GetResultBuffer() const { return mpResClmem; }
    3696             : 
    3697             : private:
    3698             :     ScCalcConfig mCalcConfig;
    3699             :     FormulaTreeNodeRef mpRoot;
    3700             :     SymbolTable mSyms;
    3701             :     std::string mKernelSignature, mKernelHash;
    3702             :     std::string mFullProgramSrc;
    3703             :     cl_program mpProgram;
    3704             :     cl_kernel mpKernel;
    3705             :     cl_mem mpResClmem; // Results
    3706             :     std::set<std::string> inlineDecl;
    3707             :     std::set<std::string> inlineFun;
    3708             : 
    3709             :     int mnResultSize;
    3710             : };
    3711             : 
    3712           0 : DynamicKernel::DynamicKernel( const ScCalcConfig& config, const FormulaTreeNodeRef& r, int nResultSize ) :
    3713             :     mCalcConfig(config),
    3714             :     mpRoot(r),
    3715             :     mpProgram(NULL),
    3716             :     mpKernel(NULL),
    3717             :     mpResClmem(NULL),
    3718           0 :     mnResultSize(nResultSize) {}
    3719             : 
    3720           0 : DynamicKernel::~DynamicKernel()
    3721             : {
    3722             :     cl_int err;
    3723           0 :     if (mpResClmem)
    3724             :     {
    3725           0 :         err = clReleaseMemObject(mpResClmem);
    3726             :         SAL_WARN_IF(err != CL_SUCCESS, "sc.opencl", "clReleaseMemObject failed: " << ::opencl::errorString(err));
    3727             :     }
    3728           0 :     if (mpKernel)
    3729             :     {
    3730             :         SAL_INFO("sc.opencl", "Releasing kernel " << mpKernel);
    3731           0 :         err = clReleaseKernel(mpKernel);
    3732             :         SAL_WARN_IF(err != CL_SUCCESS, "sc.opencl", "clReleaseKernel failed: " << ::opencl::errorString(err));
    3733             :     }
    3734             :     // mpProgram is not going to be released here -- it's cached.
    3735           0 : }
    3736             : 
    3737           0 : void DynamicKernel::CodeGen()
    3738             : {
    3739             :     // Travese the tree of expression and declare symbols used
    3740           0 :     const DynamicKernelArgument* DK = mSyms.DeclRefArg<DynamicKernelSoPArguments>(mCalcConfig, mpRoot, new OpNop(mnResultSize), mnResultSize);
    3741             : 
    3742           0 :     std::stringstream decl;
    3743           0 :     if (::opencl::gpuEnv.mnKhrFp64Flag)
    3744             :     {
    3745           0 :         decl << "#if __OPENCL_VERSION__ < 120\n";
    3746           0 :         decl << "#pragma OPENCL EXTENSION cl_khr_fp64: enable\n";
    3747           0 :         decl << "#endif\n";
    3748             :     }
    3749           0 :     else if (::opencl::gpuEnv.mnAmdFp64Flag)
    3750             :     {
    3751           0 :         decl << "#pragma OPENCL EXTENSION cl_amd_fp64: enable\n";
    3752             :     }
    3753             :     // preambles
    3754           0 :     decl << publicFunc;
    3755           0 :     DK->DumpInlineFun(inlineDecl, inlineFun);
    3756           0 :     for (std::set<std::string>::iterator set_iter = inlineDecl.begin();
    3757           0 :         set_iter != inlineDecl.end(); ++set_iter)
    3758             :     {
    3759           0 :         decl << *set_iter;
    3760             :     }
    3761             : 
    3762           0 :     for (std::set<std::string>::iterator set_iter = inlineFun.begin();
    3763           0 :         set_iter != inlineFun.end(); ++set_iter)
    3764             :     {
    3765           0 :         decl << *set_iter;
    3766             :     }
    3767           0 :     mSyms.DumpSlidingWindowFunctions(decl);
    3768           0 :     mKernelSignature = DK->DumpOpName();
    3769           0 :     decl << "__kernel void DynamicKernel" << mKernelSignature;
    3770           0 :     decl << "(__global double *result, ";
    3771           0 :     DK->GenSlidingWindowDecl(decl);
    3772           0 :     decl << ") {\n\tint gid0 = get_global_id(0);\n\tresult[gid0] = " <<
    3773           0 :         DK->GenSlidingWindowDeclRef(false) << ";\n}\n";
    3774           0 :     mFullProgramSrc = decl.str();
    3775             : #ifdef SAL_DETAIL_ENABLE_LOG_INFO
    3776           0 :     std::stringstream area;
    3777           0 :     if (mKernelSignature[0] == '_')
    3778           0 :         area << "sc.opencl.source." << mKernelSignature.substr(1, std::string::npos);
    3779             :     else
    3780           0 :         area << "sc.opencl.source." << mKernelSignature;
    3781           0 :     SAL_INFO(area.str().c_str(), "Program to be compiled:\n" << linenumberify(mFullProgramSrc));
    3782             : #endif
    3783           0 : }
    3784             : 
    3785           0 : std::string DynamicKernel::GetMD5()
    3786             : {
    3787           0 :     if (mKernelHash.empty())
    3788             :     {
    3789           0 :         std::stringstream md5s;
    3790             :         // Compute MD5SUM of kernel body to obtain the name
    3791             :         sal_uInt8 result[RTL_DIGEST_LENGTH_MD5];
    3792             :         rtl_digest_MD5(
    3793           0 :             mFullProgramSrc.c_str(),
    3794           0 :             mFullProgramSrc.length(), result,
    3795           0 :             RTL_DIGEST_LENGTH_MD5);
    3796           0 :         for (int i = 0; i < RTL_DIGEST_LENGTH_MD5; i++)
    3797             :         {
    3798           0 :             md5s << std::hex << (int)result[i];
    3799             :         }
    3800           0 :         mKernelHash = md5s.str();
    3801             :     }
    3802           0 :     return mKernelHash;
    3803             : }
    3804             : 
    3805             : /// Build code
    3806           0 : void DynamicKernel::CreateKernel()
    3807             : {
    3808           0 :     if (mpKernel)
    3809             :         // already created.
    3810           0 :         return;
    3811             : 
    3812             :     cl_int err;
    3813           0 :     std::string kname = "DynamicKernel" + mKernelSignature;
    3814             :     // Compile kernel here!!!
    3815             :     // Obtain cl context
    3816             :     ::opencl::KernelEnv kEnv;
    3817           0 :     ::opencl::setKernelEnv(&kEnv);
    3818           0 :     const char* src = mFullProgramSrc.c_str();
    3819           0 :     static std::string lastOneKernelHash = "";
    3820           0 :     static std::string lastSecondKernelHash = "";
    3821             :     static cl_program lastOneProgram = NULL;
    3822             :     static cl_program lastSecondProgram = NULL;
    3823           0 :     std::string KernelHash = mKernelSignature + GetMD5();
    3824           0 :     if (lastOneKernelHash == KernelHash && lastOneProgram)
    3825             :     {
    3826           0 :         mpProgram = lastOneProgram;
    3827             :     }
    3828           0 :     else if (lastSecondKernelHash == KernelHash && lastSecondProgram)
    3829             :     {
    3830           0 :         mpProgram = lastSecondProgram;
    3831             :     }
    3832             :     else
    3833             :     {   // doesn't match the last compiled formula.
    3834             : 
    3835           0 :         if (lastSecondProgram)
    3836             :         {
    3837             :             SAL_INFO("sc.opencl", "Releasing program " << lastSecondProgram);
    3838           0 :             err = clReleaseProgram(lastSecondProgram);
    3839             :             SAL_WARN_IF(err != CL_SUCCESS, "sc.opencl", "clReleaseProgram failed: " << ::opencl::errorString(err));
    3840           0 :             lastSecondProgram = NULL;
    3841             :         }
    3842           0 :         if (::opencl::buildProgramFromBinary("",
    3843           0 :                 &::opencl::gpuEnv, KernelHash.c_str(), 0))
    3844             :         {
    3845           0 :             mpProgram = ::opencl::gpuEnv.mpArryPrograms[0];
    3846           0 :             ::opencl::gpuEnv.mpArryPrograms[0] = NULL;
    3847             :         }
    3848             :         else
    3849             :         {
    3850             :             mpProgram = clCreateProgramWithSource(kEnv.mpkContext, 1,
    3851           0 :                 &src, NULL, &err);
    3852           0 :             if (err != CL_SUCCESS)
    3853           0 :                 throw OpenCLError("clCreateProgramWithSource", err, __FILE__, __LINE__);
    3854             :             SAL_INFO("sc.opencl", "Created program " << mpProgram);
    3855             : 
    3856             :             err = clBuildProgram(mpProgram, 1,
    3857           0 :                 ::opencl::gpuEnv.mpArryDevsID, "", NULL, NULL);
    3858           0 :             if (err != CL_SUCCESS)
    3859             :             {
    3860             : #if OSL_DEBUG_LEVEL > 0
    3861             :                 if (err == CL_BUILD_PROGRAM_FAILURE)
    3862             :                 {
    3863             :                     cl_build_status stat;
    3864             :                     cl_int e = clGetProgramBuildInfo(
    3865             :                         mpProgram, ::opencl::gpuEnv.mpArryDevsID[0],
    3866             :                         CL_PROGRAM_BUILD_STATUS, sizeof(cl_build_status),
    3867             :                         &stat, 0);
    3868             :                     SAL_WARN_IF(
    3869             :                         e != CL_SUCCESS, "sc.opencl",
    3870             :                         "after CL_BUILD_PROGRAM_FAILURE,"
    3871             :                         " clGetProgramBuildInfo(CL_PROGRAM_BUILD_STATUS)"
    3872             :                         " fails with " << ::opencl::errorString(e));
    3873             :                     if (e == CL_SUCCESS)
    3874             :                     {
    3875             :                         size_t n;
    3876             :                         e = clGetProgramBuildInfo(
    3877             :                             mpProgram, ::opencl::gpuEnv.mpArryDevsID[0],
    3878             :                             CL_PROGRAM_BUILD_LOG, 0, 0, &n);
    3879             :                         SAL_WARN_IF(
    3880             :                             e != CL_SUCCESS || n == 0, "sc.opencl",
    3881             :                             "after CL_BUILD_PROGRAM_FAILURE,"
    3882             :                             " clGetProgramBuildInfo(CL_PROGRAM_BUILD_LOG)"
    3883             :                             " fails with " << ::opencl::errorString(e) << ", n=" << n);
    3884             :                         if (e == CL_SUCCESS && n != 0)
    3885             :                         {
    3886             :                             std::vector<char> log(n);
    3887             :                             e = clGetProgramBuildInfo(
    3888             :                                 mpProgram, ::opencl::gpuEnv.mpArryDevsID[0],
    3889             :                                 CL_PROGRAM_BUILD_LOG, n, &log[0], 0);
    3890             :                             SAL_WARN_IF(
    3891             :                                 e != CL_SUCCESS || n == 0, "sc.opencl",
    3892             :                                 "after CL_BUILD_PROGRAM_FAILURE,"
    3893             :                                 " clGetProgramBuildInfo("
    3894             :                                 "CL_PROGRAM_BUILD_LOG) fails with " << ::opencl::errorString(e));
    3895             :                             if (e == CL_SUCCESS)
    3896             :                                 SAL_WARN(
    3897             :                                     "sc.opencl",
    3898             :                                     "CL_BUILD_PROGRAM_FAILURE, status " << stat
    3899             :                                     << ", log \"" << &log[0] << "\"");
    3900             :                         }
    3901             :                     }
    3902             :                 }
    3903             : #endif
    3904           0 :                 throw OpenCLError("clBuildProgram", err, __FILE__, __LINE__);
    3905             :             }
    3906             :             SAL_INFO("sc.opencl", "Built program " << mpProgram);
    3907             : 
    3908             :             // Generate binary out of compiled kernel.
    3909             :             ::opencl::generatBinFromKernelSource(mpProgram,
    3910           0 :                 (mKernelSignature + GetMD5()).c_str());
    3911             :         }
    3912           0 :         lastSecondKernelHash = lastOneKernelHash;
    3913           0 :         lastSecondProgram = lastOneProgram;
    3914           0 :         lastOneKernelHash = KernelHash;
    3915           0 :         lastOneProgram = mpProgram;
    3916             :     }
    3917           0 :     mpKernel = clCreateKernel(mpProgram, kname.c_str(), &err);
    3918           0 :     if (err != CL_SUCCESS)
    3919           0 :         throw OpenCLError("clCreateKernel", err, __FILE__, __LINE__);
    3920           0 :     SAL_INFO("sc.opencl", "Created kernel " << mpKernel << " with name " << kname << " in program " << mpProgram);
    3921             : }
    3922             : 
    3923           0 : void DynamicKernel::Launch( size_t nr )
    3924             : {
    3925             :     // Obtain cl context
    3926             :     ::opencl::KernelEnv kEnv;
    3927           0 :     ::opencl::setKernelEnv(&kEnv);
    3928             :     cl_int err;
    3929             :     // The results
    3930             :     mpResClmem = clCreateBuffer(kEnv.mpkContext,
    3931             :         (cl_mem_flags)CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
    3932           0 :         nr * sizeof(double), NULL, &err);
    3933           0 :     if (CL_SUCCESS != err)
    3934           0 :         throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
    3935             :     SAL_INFO("sc.opencl", "Created buffer " << mpResClmem << " size " << nr << "*" << sizeof(double) << "=" << (nr*sizeof(double)));
    3936             : 
    3937             :     SAL_INFO("sc.opencl", "Kernel " << mpKernel << " arg " << 0 << ": cl_mem: " << mpResClmem);
    3938           0 :     err = clSetKernelArg(mpKernel, 0, sizeof(cl_mem), static_cast<void*>(&mpResClmem));
    3939           0 :     if (CL_SUCCESS != err)
    3940           0 :         throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
    3941             :     // The rest of buffers
    3942           0 :     mSyms.Marshal(mpKernel, nr, mpProgram);
    3943           0 :     size_t global_work_size[] = { nr };
    3944             :     SAL_INFO("sc.opencl", "Enqueing kernel " << mpKernel);
    3945             :     err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, mpKernel, 1, NULL,
    3946           0 :         global_work_size, NULL, 0, NULL, NULL);
    3947           0 :     if (CL_SUCCESS != err)
    3948           0 :         throw OpenCLError("clEnqueueNDRangeKernel", err, __FILE__, __LINE__);
    3949           0 :     err = clFlush(kEnv.mpkCmdQueue);
    3950           0 :     if (CL_SUCCESS != err)
    3951           0 :         throw OpenCLError("clFlush", err, __FILE__, __LINE__);
    3952           0 : }
    3953             : 
    3954             : // Symbol lookup. If there is no such symbol created, allocate one
    3955             : // kernel with argument with unique name and return so.
    3956             : // The template argument T must be a subclass of DynamicKernelArgument
    3957             : template<typename T>
    3958           0 : const DynamicKernelArgument* SymbolTable::DeclRefArg( const ScCalcConfig& config,
    3959             :     FormulaTreeNodeRef t, SlidingFunctionBase* pCodeGen, int nResultSize )
    3960             : {
    3961           0 :     FormulaToken* ref = t->GetFormulaToken();
    3962           0 :     ArgumentMap::iterator it = mSymbols.find(ref);
    3963           0 :     if (it == mSymbols.end())
    3964             :     {
    3965             :         // Allocate new symbols
    3966           0 :         std::stringstream ss;
    3967           0 :         ss << "tmp" << mCurId++;
    3968           0 :         DynamicKernelArgumentRef new_arg(new T(config, ss.str(), t, pCodeGen, nResultSize));
    3969           0 :         mSymbols[ref] = new_arg;
    3970           0 :         mParams.push_back(new_arg);
    3971           0 :         return new_arg.get();
    3972             :     }
    3973             :     else
    3974             :     {
    3975           0 :         return it->second.get();
    3976             :     }
    3977             : }
    3978             : 
    3979           0 : FormulaGroupInterpreterOpenCL::FormulaGroupInterpreterOpenCL() :
    3980           0 :     FormulaGroupInterpreter() {}
    3981             : 
    3982           0 : FormulaGroupInterpreterOpenCL::~FormulaGroupInterpreterOpenCL() {}
    3983             : 
    3984           0 : ScMatrixRef FormulaGroupInterpreterOpenCL::inverseMatrix( const ScMatrix& )
    3985             : {
    3986           0 :     return NULL;
    3987             : }
    3988             : 
    3989           0 : DynamicKernel* DynamicKernel::create( const ScCalcConfig& rConfig, ScTokenArray& rCode, int nResultSize )
    3990             : {
    3991             :     // Constructing "AST"
    3992           0 :     FormulaTokenIterator aCode(rCode);
    3993           0 :     std::list<FormulaToken*> aTokenList;
    3994           0 :     std::map<FormulaToken*, FormulaTreeNodeRef> aHashMap;
    3995             :     FormulaToken*  pCur;
    3996           0 :     while ((pCur = const_cast<FormulaToken*>(aCode.Next())) != NULL)
    3997             :     {
    3998           0 :         OpCode eOp = pCur->GetOpCode();
    3999           0 :         if (eOp != ocPush)
    4000             :         {
    4001           0 :             FormulaTreeNodeRef pCurNode(new FormulaTreeNode(pCur));
    4002           0 :             sal_uInt8 nParamCount =  pCur->GetParamCount();
    4003           0 :             for (sal_uInt8 i = 0; i < nParamCount; i++)
    4004             :             {
    4005           0 :                 FormulaToken* pTempFormula = aTokenList.back();
    4006           0 :                 aTokenList.pop_back();
    4007           0 :                 if (pTempFormula->GetOpCode() != ocPush)
    4008             :                 {
    4009           0 :                     if (aHashMap.find(pTempFormula) == aHashMap.end())
    4010           0 :                         return NULL;
    4011           0 :                     pCurNode->Children.push_back(aHashMap[pTempFormula]);
    4012             :                 }
    4013             :                 else
    4014             :                 {
    4015             :                     FormulaTreeNodeRef pChildTreeNode =
    4016             :                         FormulaTreeNodeRef(
    4017           0 :                         new FormulaTreeNode(pTempFormula));
    4018           0 :                     pCurNode->Children.push_back(pChildTreeNode);
    4019             :                 }
    4020             :             }
    4021           0 :             std::reverse(pCurNode->Children.begin(), pCurNode->Children.end());
    4022           0 :             aHashMap[pCur] = pCurNode;
    4023             :         }
    4024           0 :         aTokenList.push_back(pCur);
    4025             :     }
    4026             : 
    4027           0 :     FormulaTreeNodeRef Root = FormulaTreeNodeRef(new FormulaTreeNode(NULL));
    4028           0 :     Root->Children.push_back(aHashMap[aTokenList.back()]);
    4029             : 
    4030           0 :     DynamicKernel* pDynamicKernel = new DynamicKernel(rConfig, Root, nResultSize);
    4031             : 
    4032             :     // OpenCL source code generation and kernel compilation
    4033             :     try
    4034             :     {
    4035           0 :         pDynamicKernel->CodeGen();
    4036           0 :         pDynamicKernel->CreateKernel();
    4037             :     }
    4038           0 :     catch (const UnhandledToken& ut)
    4039             :     {
    4040             :         SAL_WARN("sc.opencl", "Dynamic formula compiler: unhandled token: " << ut.mMessage << " at " << ut.mFile << ":" << ut.mLineNumber);
    4041           0 :         delete pDynamicKernel;
    4042           0 :         return NULL;
    4043             :     }
    4044           0 :     catch (...)
    4045             :     {
    4046             :         SAL_WARN("sc.opencl", "Dynamic formula compiler: unhandled compiler error");
    4047           0 :         return NULL;
    4048             :     }
    4049           0 :     return pDynamicKernel;
    4050             : }
    4051             : 
    4052           0 : CompiledFormula* FormulaGroupInterpreterOpenCL::createCompiledFormula(
    4053             :     ScFormulaCellGroup& rGroup, ScTokenArray& rCode )
    4054             : {
    4055           0 :     return DynamicKernel::create(maCalcConfig, rCode, rGroup.mnLength);
    4056             : }
    4057             : 
    4058             : namespace {
    4059             : 
    4060             : class CLInterpreterResult
    4061             : {
    4062             :     DynamicKernel* mpKernel;
    4063             : 
    4064             :     SCROW mnGroupLength;
    4065             : 
    4066             :     cl_mem mpCLResBuf;
    4067             :     double* mpResBuf;
    4068             : 
    4069             : public:
    4070           0 :     CLInterpreterResult() : mpKernel(NULL), mnGroupLength(0), mpCLResBuf(NULL), mpResBuf(NULL) {}
    4071           0 :     CLInterpreterResult( DynamicKernel* pKernel, SCROW nGroupLength ) :
    4072           0 :         mpKernel(pKernel), mnGroupLength(nGroupLength), mpCLResBuf(NULL), mpResBuf(NULL) {}
    4073             : 
    4074           0 :     bool isValid() const { return mpKernel != NULL; }
    4075             : 
    4076           0 :     void fetchResultFromKernel()
    4077             :     {
    4078           0 :         if (!isValid())
    4079           0 :             return;
    4080             : 
    4081             :         // Map results back
    4082           0 :         mpCLResBuf = mpKernel->GetResultBuffer();
    4083             : 
    4084             :         // Obtain cl context
    4085             :         ::opencl::KernelEnv kEnv;
    4086           0 :         ::opencl::setKernelEnv(&kEnv);
    4087             : 
    4088             :         cl_int err;
    4089             :         mpResBuf = static_cast<double*>(clEnqueueMapBuffer(kEnv.mpkCmdQueue,
    4090             :             mpCLResBuf,
    4091             :             CL_TRUE, CL_MAP_READ, 0,
    4092             :             mnGroupLength * sizeof(double), 0, NULL, NULL,
    4093           0 :             &err));
    4094             : 
    4095           0 :         if (err != CL_SUCCESS)
    4096             :         {
    4097             :             SAL_WARN("sc.opencl", "clEnqueueMapBuffer failed:: " << ::opencl::errorString(err));
    4098           0 :             mpResBuf = NULL;
    4099           0 :             return;
    4100             :         }
    4101             :     }
    4102             : 
    4103           0 :     bool pushResultToDocument( ScDocument& rDoc, const ScAddress& rTopPos )
    4104             :     {
    4105           0 :         if (!mpResBuf)
    4106           0 :             return false;
    4107             : 
    4108           0 :         rDoc.SetFormulaResults(rTopPos, mpResBuf, mnGroupLength);
    4109             : 
    4110             :         // Obtain cl context
    4111             :         ::opencl::KernelEnv kEnv;
    4112           0 :         ::opencl::setKernelEnv(&kEnv);
    4113             : 
    4114             :         cl_int err;
    4115           0 :         err = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue, mpCLResBuf, mpResBuf, 0, NULL, NULL);
    4116             : 
    4117           0 :         if (err != CL_SUCCESS)
    4118             :         {
    4119             :             SAL_WARN("sc.opencl", "clEnqueueUnmapMemObject failed: " << ::opencl::errorString(err));
    4120           0 :             return false;
    4121             :         }
    4122             : 
    4123           0 :         return true;
    4124             :     }
    4125             : };
    4126             : 
    4127           0 : class CLInterpreterContext
    4128             : {
    4129             :     std::shared_ptr<DynamicKernel> mpKernelStore; /// for managed kernel instance.
    4130             :     DynamicKernel* mpKernel;
    4131             : 
    4132             :     SCROW mnGroupLength;
    4133             : 
    4134             : public:
    4135           0 :     CLInterpreterContext( SCROW nGroupLength ) :
    4136           0 :         mpKernel(NULL), mnGroupLength(nGroupLength) {}
    4137             : 
    4138           0 :     bool isValid() const
    4139             :     {
    4140           0 :         return mpKernel != NULL;
    4141             :     }
    4142             : 
    4143           0 :     void setManagedKernel( DynamicKernel* pKernel )
    4144             :     {
    4145           0 :         mpKernelStore.reset(pKernel);
    4146           0 :         mpKernel = pKernel;
    4147           0 :     }
    4148             : 
    4149             : #if ENABLE_THREADED_OPENCL_KERNEL_COMPILATION
    4150             :     void setUnmanagedKernel( DynamicKernel* pKernel )
    4151             :     {
    4152             :         mpKernel = pKernel;
    4153             :     }
    4154             : #endif
    4155             : 
    4156           0 :     CLInterpreterResult launchKernel()
    4157             :     {
    4158           0 :         if (!isValid())
    4159           0 :             return CLInterpreterResult();
    4160             : 
    4161             :         try
    4162             :         {
    4163             :             // Run the kernel.
    4164           0 :             mpKernel->Launch(mnGroupLength);
    4165             :         }
    4166           0 :         catch (const UnhandledToken& ut)
    4167             :         {
    4168             :             SAL_WARN("sc.opencl", "Dynamic formula compiler: unhandled token: " << ut.mMessage << " at " << ut.mFile << ":" << ut.mLineNumber);
    4169           0 :             return CLInterpreterResult();
    4170             :         }
    4171           0 :         catch (const OpenCLError& oce)
    4172             :         {
    4173             :             SAL_WARN("sc.opencl", "Dynamic formula compiler: OpenCL error from " << oce.mFunction << ": " << ::opencl::errorString(oce.mError) << " at " << oce.mFile << ":" << oce.mLineNumber);
    4174           0 :             return CLInterpreterResult();
    4175             :         }
    4176           0 :         catch (const Unhandled& uh)
    4177             :         {
    4178             :             SAL_WARN("sc.opencl", "Dynamic formula compiler: unhandled case at " << uh.mFile << ":" << uh.mLineNumber);
    4179           0 :             return CLInterpreterResult();
    4180             :         }
    4181           0 :         catch (...)
    4182             :         {
    4183             :             SAL_WARN("sc.opencl", "Dynamic formula compiler: unhandled compiler error");
    4184           0 :             return CLInterpreterResult();
    4185             :         }
    4186             : 
    4187           0 :         return CLInterpreterResult(mpKernel, mnGroupLength);
    4188             :     }
    4189             : };
    4190             : 
    4191             : 
    4192           0 : CLInterpreterContext createCLInterpreterContext( const ScCalcConfig& rConfig,
    4193             :     ScFormulaCellGroupRef& xGroup, ScTokenArray& rCode )
    4194             : {
    4195           0 :     CLInterpreterContext aCxt(xGroup->mnLength);
    4196             : 
    4197             : #if ENABLE_THREADED_OPENCL_KERNEL_COMPILATION
    4198             :     if (rGroup.meKernelState == sc::OpenCLKernelCompilationScheduled ||
    4199             :         rGroup.meKernelState == sc::OpenCLKernelBinaryCreated)
    4200             :     {
    4201             :         if (rGroup.meKernelState == sc::OpenCLKernelCompilationScheduled)
    4202             :         {
    4203             :             ScFormulaCellGroup::sxCompilationThread->maCompilationDoneCondition.wait();
    4204             :             ScFormulaCellGroup::sxCompilationThread->maCompilationDoneCondition.reset();
    4205             :         }
    4206             : 
    4207             :         // Kernel instance is managed by the formula group.
    4208             :         aCxt.setUnmanagedKernel(static_cast<DynamicKernel*>(xGroup->mpCompiledFormula));
    4209             :     }
    4210             :     else
    4211             :     {
    4212             :         assert(xGroup->meCalcState == sc::GroupCalcRunning);
    4213             :         aCxt.setManagedKernel(static_cast<DynamicKernel*>(DynamicKernel::create(rConfig, rCode, xGroup->mnLength)));
    4214             :     }
    4215             : #else
    4216           0 :     aCxt.setManagedKernel(static_cast<DynamicKernel*>(DynamicKernel::create(rConfig, rCode, xGroup->mnLength)));
    4217             : #endif
    4218             : 
    4219           0 :     return aCxt;
    4220             : }
    4221             : 
    4222           0 : void genRPNTokens( ScDocument& rDoc, const ScAddress& rTopPos, ScTokenArray& rCode )
    4223             : {
    4224           0 :     ScCompiler aComp(&rDoc, rTopPos, rCode);
    4225           0 :     aComp.SetGrammar(rDoc.GetGrammar());
    4226             :     // Disable special ordering for jump commands for the OpenCL interpreter.
    4227           0 :     aComp.EnableJumpCommandReorder(false);
    4228           0 :     aComp.CompileTokenArray(); // Regenerate RPN tokens.
    4229           0 : }
    4230             : 
    4231           0 : bool waitForResults()
    4232             : {
    4233             :     // Obtain cl context
    4234             :     ::opencl::KernelEnv kEnv;
    4235           0 :     ::opencl::setKernelEnv(&kEnv);
    4236             : 
    4237           0 :     cl_int err = clFinish(kEnv.mpkCmdQueue);
    4238           0 :     if (err != CL_SUCCESS)
    4239             :         SAL_WARN("sc.opencl", "clFinish failed: " << ::opencl::errorString(err));
    4240             : 
    4241           0 :     return err == CL_SUCCESS;
    4242             : }
    4243             : 
    4244             : }
    4245             : 
    4246           0 : bool FormulaGroupInterpreterOpenCL::interpret( ScDocument& rDoc,
    4247             :     const ScAddress& rTopPos, ScFormulaCellGroupRef& xGroup,
    4248             :     ScTokenArray& rCode )
    4249             : {
    4250           0 :     MergeCalcConfig(rDoc);
    4251             : 
    4252           0 :     genRPNTokens(rDoc, rTopPos, rCode);
    4253             : 
    4254           0 :     CLInterpreterContext aCxt = createCLInterpreterContext(maCalcConfig, xGroup, rCode);
    4255           0 :     if (!aCxt.isValid())
    4256           0 :         return false;
    4257             : 
    4258           0 :     CLInterpreterResult aRes = aCxt.launchKernel();
    4259           0 :     if (!aRes.isValid())
    4260           0 :         return false;
    4261             : 
    4262           0 :     if (!waitForResults())
    4263           0 :         return false;
    4264             : 
    4265           0 :     aRes.fetchResultFromKernel();
    4266             : 
    4267           0 :     return aRes.pushResultToDocument(rDoc, rTopPos);
    4268             : }
    4269             : 
    4270         156 : }} // namespace sc::opencl
    4271             : 
    4272             : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */

Generated by: LCOV version 1.11