Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : */
9 :
10 : #include "formulagroup.hxx"
11 : #include "formulagroupcl.hxx"
12 : #include "clkernelthread.hxx"
13 : #include "grouptokenconverter.hxx"
14 : #include "document.hxx"
15 : #include "formulacell.hxx"
16 : #include "tokenarray.hxx"
17 : #include "compiler.hxx"
18 : #include "interpre.hxx"
19 : #include <formula/random.hxx>
20 : #include <formula/vectortoken.hxx>
21 : #include "scmatrix.hxx"
22 :
23 : #include <opencl/openclwrapper.hxx>
24 :
25 : #include "op_financial.hxx"
26 : #include "op_database.hxx"
27 : #include "op_math.hxx"
28 : #include "op_logical.hxx"
29 : #include "op_statistical.hxx"
30 : #include "op_array.hxx"
31 : #include "op_spreadsheet.hxx"
32 : #include "op_addin.hxx"
33 :
34 : /// CONFIGURATIONS
35 : #define REDUCE_THRESHOLD 201 // set to 4 for correctness testing. priority 1
36 : #define UNROLLING_FACTOR 16 // set to 4 for correctness testing (if no reduce)
37 :
38 : static const char* publicFunc =
39 : "\n"
40 : "#define errIllegalFPOperation 503 // #NUM!\n"
41 : "#define errNoValue 519 // #VALUE!\n"
42 : "#define errDivisionByZero 532 // #DIV/0!\n"
43 : "\n"
44 : "double CreateDoubleError(ulong nErr)\n"
45 : "{\n"
46 : " return nan(nErr);\n"
47 : "}\n"
48 : "\n"
49 : "uint GetDoubleErrorValue(double fVal)\n"
50 : "{\n"
51 : " if (isfinite(fVal))\n"
52 : " return 0;\n"
53 : " if (isinf(fVal))\n"
54 : " return errIllegalFPOperation; // normal INF\n"
55 : " if (as_ulong(fVal) & 0XFFFF0000u)\n"
56 : " return errNoValue; // just a normal NAN\n"
57 : " return (as_ulong(fVal) & 0XFFFF); // any other error\n"
58 : "}\n"
59 : "\n"
60 : "int isNan(double a) { return isnan(a); }\n"
61 : "double fsum_count(double a, double b, __private int *p) {\n"
62 : " bool t = isNan(a);\n"
63 : " (*p) += t?0:1;\n"
64 : " return t?b:a+b;\n"
65 : "}\n"
66 : "double fsum(double a, double b) { return isNan(a)?b:a+b; }\n"
67 : "double legalize(double a, double b) { return isNan(a)?b:a;}\n"
68 : "double fsub(double a, double b) { return a-b; }\n"
69 : "double fdiv(double a, double b) { return a/b; }\n"
70 : "double strequal(unsigned a, unsigned b) { return (a==b)?1.0:0; }\n"
71 : ;
72 :
73 : #ifdef WIN32
74 : #ifndef NAN
75 : namespace {
76 :
77 : const unsigned long __nan[2] = {0xffffffff, 0x7fffffff};
78 :
79 : }
80 : #define NAN (*(const double*) __nan)
81 : #endif
82 : #endif
83 :
84 : #include <list>
85 : #include <map>
86 : #include <iostream>
87 : #include <sstream>
88 : #include <algorithm>
89 :
90 : #include <rtl/digest.h>
91 :
92 : #include <boost/scoped_ptr.hpp>
93 : #include <boost/scoped_array.hpp>
94 :
95 : using namespace formula;
96 :
97 : namespace sc { namespace opencl {
98 :
99 : namespace {
100 :
101 0 : std::string StackVarEnumToString(StackVar const e)
102 : {
103 0 : switch (e)
104 : {
105 : #define CASE(x) case sv##x: return #x
106 0 : CASE(Byte);
107 0 : CASE(Double);
108 0 : CASE(String);
109 0 : CASE(SingleRef);
110 0 : CASE(DoubleRef);
111 0 : CASE(Matrix);
112 0 : CASE(Index);
113 0 : CASE(Jump);
114 0 : CASE(External);
115 0 : CASE(FAP);
116 0 : CASE(JumpMatrix);
117 0 : CASE(RefList);
118 0 : CASE(EmptyCell);
119 0 : CASE(MatrixCell);
120 0 : CASE(HybridCell);
121 0 : CASE(HybridValueCell);
122 0 : CASE(ExternalSingleRef);
123 0 : CASE(ExternalDoubleRef);
124 0 : CASE(ExternalName);
125 0 : CASE(SingleVectorRef);
126 0 : CASE(DoubleVectorRef);
127 0 : CASE(Subroutine);
128 0 : CASE(Error);
129 0 : CASE(Missing);
130 0 : CASE(Sep);
131 0 : CASE(Unknown);
132 : #undef CASE
133 : }
134 0 : return std::to_string(static_cast<int>(e));
135 : }
136 :
137 : #ifdef SAL_DETAIL_ENABLE_LOG_INFO
138 0 : std::string linenumberify(const std::string& s)
139 : {
140 0 : std::stringstream ss;
141 0 : int linenumber = 1;
142 0 : size_t start = 0;
143 : size_t newline;
144 0 : while ((newline = s.find('\n', start)) != std::string::npos)
145 : {
146 0 : ss << "/*" << std::setw(4) << linenumber++ << "*/ " << s.substr(start, newline-start+1);
147 0 : start = newline + 1;
148 : }
149 0 : if (start < s.size())
150 0 : ss << "/*" << std::setw(4) << linenumber++ << "*/ " << s.substr(start, std::string::npos);
151 0 : return ss.str();
152 : }
153 : #endif
154 :
155 : } // anonymous namespace
156 :
157 : /// Map the buffer used by an argument and do necessary argument setting
158 0 : size_t VectorRef::Marshal( cl_kernel k, int argno, int, cl_program )
159 : {
160 0 : FormulaToken* ref = mFormulaTree->GetFormulaToken();
161 0 : double* pHostBuffer = NULL;
162 0 : size_t szHostBuffer = 0;
163 0 : if (ref->GetType() == formula::svSingleVectorRef)
164 : {
165 : const formula::SingleVectorRefToken* pSVR =
166 0 : static_cast<const formula::SingleVectorRefToken*>(ref);
167 :
168 : SAL_INFO("sc.opencl", "SingleVectorRef len=" << pSVR->GetArrayLength() << " mpNumericArray=" << pSVR->GetArray().mpNumericArray << " (mpStringArray=" << pSVR->GetArray().mpStringArray << ")");
169 :
170 0 : pHostBuffer = const_cast<double*>(pSVR->GetArray().mpNumericArray);
171 0 : szHostBuffer = pSVR->GetArrayLength() * sizeof(double);
172 : }
173 0 : else if (ref->GetType() == formula::svDoubleVectorRef)
174 : {
175 : const formula::DoubleVectorRefToken* pDVR =
176 0 : static_cast<const formula::DoubleVectorRefToken*>(ref);
177 :
178 : SAL_INFO("sc.opencl", "DoubleVectorRef index=" << mnIndex << " len=" << pDVR->GetArrayLength() << " mpNumericArray=" << pDVR->GetArrays()[mnIndex].mpNumericArray << " (mpStringArray=" << pDVR->GetArrays()[mnIndex].mpStringArray << ")");
179 :
180 : pHostBuffer = const_cast<double*>(
181 0 : pDVR->GetArrays()[mnIndex].mpNumericArray);
182 0 : szHostBuffer = pDVR->GetArrayLength() * sizeof(double);
183 : }
184 : else
185 : {
186 0 : throw Unhandled();
187 : }
188 : // Obtain cl context
189 : ::opencl::KernelEnv kEnv;
190 0 : ::opencl::setKernelEnv(&kEnv);
191 : cl_int err;
192 0 : if (pHostBuffer)
193 : {
194 : mpClmem = clCreateBuffer(kEnv.mpkContext,
195 : (cl_mem_flags)CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
196 : szHostBuffer,
197 0 : pHostBuffer, &err);
198 0 : if (CL_SUCCESS != err)
199 0 : throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
200 : SAL_INFO("sc.opencl", "Created buffer " << mpClmem << " size " << szHostBuffer << " using host buffer " << pHostBuffer);
201 : }
202 : else
203 : {
204 0 : if (szHostBuffer == 0)
205 0 : szHostBuffer = sizeof(double); // a dummy small value
206 : // Marshal as a buffer of NANs
207 : mpClmem = clCreateBuffer(kEnv.mpkContext,
208 : (cl_mem_flags)CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
209 0 : szHostBuffer, NULL, &err);
210 0 : if (CL_SUCCESS != err)
211 0 : throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
212 : SAL_INFO("sc.opencl", "Created buffer " << mpClmem << " size " << szHostBuffer);
213 :
214 : double* pNanBuffer = static_cast<double*>(clEnqueueMapBuffer(
215 : kEnv.mpkCmdQueue, mpClmem, CL_TRUE, CL_MAP_WRITE, 0,
216 0 : szHostBuffer, 0, NULL, NULL, &err));
217 0 : if (CL_SUCCESS != err)
218 0 : throw OpenCLError("clEnqueueMapBuffer", err, __FILE__, __LINE__);
219 :
220 0 : for (size_t i = 0; i < szHostBuffer / sizeof(double); i++)
221 0 : pNanBuffer[i] = NAN;
222 : err = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue, mpClmem,
223 0 : pNanBuffer, 0, NULL, NULL);
224 : // FIXME: Is it intentional to not throw an OpenCLError even if the clEnqueueUnmapMemObject() fails?
225 0 : if (CL_SUCCESS != err)
226 : SAL_WARN("sc.opencl", "clEnqueueUnmapMemObject failed: " << ::opencl::errorString(err));
227 : }
228 :
229 : SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_mem: " << mpClmem);
230 0 : err = clSetKernelArg(k, argno, sizeof(cl_mem), static_cast<void*>(&mpClmem));
231 0 : if (CL_SUCCESS != err)
232 0 : throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
233 0 : return 1;
234 : }
235 :
236 : /// Arguments that are actually compile-time constant string
237 : /// Currently, only the hash is passed.
238 : /// TBD(IJSUNG): pass also length and the actual string if there is a
239 : /// hash function collision
240 0 : class ConstStringArgument : public DynamicKernelArgument
241 : {
242 : public:
243 0 : ConstStringArgument( const ScCalcConfig& config, const std::string& s,
244 : FormulaTreeNodeRef ft ) :
245 0 : DynamicKernelArgument(config, s, ft) { }
246 : /// Generate declaration
247 0 : virtual void GenDecl( std::stringstream& ss ) const SAL_OVERRIDE
248 : {
249 0 : ss << "unsigned " << mSymName;
250 0 : }
251 0 : virtual void GenDeclRef( std::stringstream& ss ) const SAL_OVERRIDE
252 : {
253 0 : ss << GenSlidingWindowDeclRef(false);
254 0 : }
255 0 : virtual void GenSlidingWindowDecl( std::stringstream& ss ) const SAL_OVERRIDE
256 : {
257 0 : GenDecl(ss);
258 0 : }
259 0 : virtual std::string GenSlidingWindowDeclRef( bool = false ) const SAL_OVERRIDE
260 : {
261 0 : std::stringstream ss;
262 0 : if (GetFormulaToken()->GetType() != formula::svString)
263 0 : throw Unhandled();
264 0 : FormulaToken* Tok = GetFormulaToken();
265 0 : ss << Tok->GetString().getString().toAsciiUpperCase().hashCode() << "U";
266 0 : return ss.str();
267 : }
268 0 : virtual size_t GetWindowSize() const SAL_OVERRIDE
269 : {
270 0 : return 1;
271 : }
272 : /// Pass the 32-bit hash of the string to the kernel
273 0 : virtual size_t Marshal( cl_kernel k, int argno, int, cl_program ) SAL_OVERRIDE
274 : {
275 0 : FormulaToken* ref = mFormulaTree->GetFormulaToken();
276 0 : cl_uint hashCode = 0;
277 0 : if (ref->GetType() == formula::svString)
278 : {
279 0 : const rtl::OUString s = ref->GetString().getString().toAsciiUpperCase();
280 0 : hashCode = s.hashCode();
281 : }
282 : else
283 : {
284 0 : throw Unhandled();
285 : }
286 :
287 : // Pass the scalar result back to the rest of the formula kernel
288 : SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_uint: " << hashCode);
289 0 : cl_int err = clSetKernelArg(k, argno, sizeof(cl_uint), static_cast<void*>(&hashCode));
290 0 : if (CL_SUCCESS != err)
291 0 : throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
292 0 : return 1;
293 : }
294 : };
295 :
296 : /// Arguments that are actually compile-time constants
297 0 : class DynamicKernelConstantArgument : public DynamicKernelArgument
298 : {
299 : public:
300 0 : DynamicKernelConstantArgument( const ScCalcConfig& config, const std::string& s,
301 : FormulaTreeNodeRef ft ) :
302 0 : DynamicKernelArgument(config, s, ft) { }
303 : /// Generate declaration
304 0 : virtual void GenDecl( std::stringstream& ss ) const SAL_OVERRIDE
305 : {
306 0 : ss << "double " << mSymName;
307 0 : }
308 0 : virtual void GenDeclRef( std::stringstream& ss ) const SAL_OVERRIDE
309 : {
310 0 : ss << mSymName;
311 0 : }
312 0 : virtual void GenSlidingWindowDecl( std::stringstream& ss ) const SAL_OVERRIDE
313 : {
314 0 : GenDecl(ss);
315 0 : }
316 0 : virtual std::string GenSlidingWindowDeclRef( bool = false ) const SAL_OVERRIDE
317 : {
318 0 : if (GetFormulaToken()->GetType() != formula::svDouble)
319 0 : throw Unhandled();
320 0 : return mSymName;
321 : }
322 0 : virtual size_t GetWindowSize() const SAL_OVERRIDE
323 : {
324 0 : return 1;
325 : }
326 0 : double GetDouble() const
327 : {
328 0 : FormulaToken* Tok = GetFormulaToken();
329 0 : if (Tok->GetType() != formula::svDouble)
330 0 : throw Unhandled();
331 0 : return Tok->GetDouble();
332 : }
333 : /// Create buffer and pass the buffer to a given kernel
334 0 : virtual size_t Marshal( cl_kernel k, int argno, int, cl_program ) SAL_OVERRIDE
335 : {
336 0 : double tmp = GetDouble();
337 : // Pass the scalar result back to the rest of the formula kernel
338 : SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": double: " << tmp);
339 0 : cl_int err = clSetKernelArg(k, argno, sizeof(double), static_cast<void*>(&tmp));
340 0 : if (CL_SUCCESS != err)
341 0 : throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
342 0 : return 1;
343 : }
344 : };
345 :
346 0 : class DynamicKernelPiArgument : public DynamicKernelArgument
347 : {
348 : public:
349 0 : DynamicKernelPiArgument( const ScCalcConfig& config, const std::string& s,
350 : FormulaTreeNodeRef ft ) :
351 0 : DynamicKernelArgument(config, s, ft) { }
352 : /// Generate declaration
353 0 : virtual void GenDecl( std::stringstream& ss ) const SAL_OVERRIDE
354 : {
355 0 : ss << "double " << mSymName;
356 0 : }
357 0 : virtual void GenDeclRef( std::stringstream& ss ) const SAL_OVERRIDE
358 : {
359 0 : ss << "3.14159265358979";
360 0 : }
361 0 : virtual void GenSlidingWindowDecl( std::stringstream& ss ) const SAL_OVERRIDE
362 : {
363 0 : GenDecl(ss);
364 0 : }
365 0 : virtual std::string GenSlidingWindowDeclRef( bool = false ) const SAL_OVERRIDE
366 : {
367 0 : return mSymName;
368 : }
369 0 : virtual size_t GetWindowSize() const SAL_OVERRIDE
370 : {
371 0 : return 1;
372 : }
373 : /// Create buffer and pass the buffer to a given kernel
374 0 : virtual size_t Marshal( cl_kernel k, int argno, int, cl_program ) SAL_OVERRIDE
375 : {
376 0 : double tmp = 0.0;
377 : // Pass the scalar result back to the rest of the formula kernel
378 : SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": double: " << tmp);
379 0 : cl_int err = clSetKernelArg(k, argno, sizeof(double), static_cast<void*>(&tmp));
380 0 : if (CL_SUCCESS != err)
381 0 : throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
382 0 : return 1;
383 : }
384 : };
385 :
386 0 : class DynamicKernelRandomArgument : public DynamicKernelArgument
387 : {
388 : public:
389 0 : DynamicKernelRandomArgument( const ScCalcConfig& config, const std::string& s,
390 : FormulaTreeNodeRef ft ) :
391 0 : DynamicKernelArgument(config, s, ft) { }
392 : /// Generate declaration
393 0 : virtual void GenDecl( std::stringstream& ss ) const SAL_OVERRIDE
394 : {
395 0 : ss << "double " << mSymName;
396 0 : }
397 0 : virtual void GenDeclRef( std::stringstream& ss ) const SAL_OVERRIDE
398 : {
399 0 : ss << mSymName;
400 0 : }
401 0 : virtual void GenSlidingWindowDecl( std::stringstream& ss ) const SAL_OVERRIDE
402 : {
403 0 : ss << "int " << mSymName;
404 0 : }
405 0 : virtual std::string GenSlidingWindowDeclRef( bool = false ) const SAL_OVERRIDE
406 : {
407 0 : return mSymName + "_Random(" + mSymName + ")";
408 : }
409 0 : virtual void GenSlidingWindowFunction( std::stringstream& ss ) SAL_OVERRIDE
410 : {
411 : // This string is from the pi_opencl_kernel.i file as
412 : // generated when building the Random123 examples. Unused
413 : // stuff has been removed, and the actual kernel is not the
414 : // same as in the totally different use case of that example,
415 : // of course. Only the code that calculates the counter-based
416 : // random number and what it needs is left.
417 0 : ss << "\
418 : \n\
419 : #ifndef DEFINED_RANDOM123_STUFF\n\
420 : #define DEFINED_RANDOM123_STUFF\n\
421 : \n\
422 : /*\n\
423 : Copyright 2010-2011, D. E. Shaw Research.\n\
424 : All rights reserved.\n\
425 : \n\
426 : Redistribution and use in source and binary forms, with or without\n\
427 : modification, are permitted provided that the following conditions are\n\
428 : met:\n\
429 : \n\
430 : * Redistributions of source code must retain the above copyright\n\
431 : notice, this list of conditions, and the following disclaimer.\n\
432 : \n\
433 : * Redistributions in binary form must reproduce the above copyright\n\
434 : notice, this list of conditions, and the following disclaimer in the\n\
435 : documentation and/or other materials provided with the distribution.\n\
436 : \n\
437 : * Neither the name of D. E. Shaw Research nor the names of its\n\
438 : contributors may be used to endorse or promote products derived from\n\
439 : this software without specific prior written permission.\n\
440 : \n\
441 : THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\n\
442 : \"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\n\
443 : LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\n\
444 : A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT\n\
445 : OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\n\
446 : SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT\n\
447 : LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\n\
448 : DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\n\
449 : THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n\
450 : (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\n\
451 : OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\
452 : */\n\
453 : \n\
454 : typedef uint uint32_t;\n\
455 : struct r123array2x32\n\
456 : {\n\
457 : uint32_t v[2];\n\
458 : };\n\
459 : enum r123_enum_threefry32x2\n\
460 : {\n\
461 : R_32x2_0_0 = 13,\n\
462 : R_32x2_1_0 = 15,\n\
463 : R_32x2_2_0 = 26,\n\
464 : R_32x2_3_0 = 6,\n\
465 : R_32x2_4_0 = 17,\n\
466 : R_32x2_5_0 = 29,\n\
467 : R_32x2_6_0 = 16,\n\
468 : R_32x2_7_0 = 24\n\
469 : };\n\
470 : inline uint32_t RotL_32 (uint32_t x, unsigned int N)\n\
471 : __attribute__ ((always_inline));\n\
472 : inline uint32_t\n\
473 : RotL_32 (uint32_t x, unsigned int N)\n\
474 : {\n\
475 : return (x << (N & 31)) | (x >> ((32 - N) & 31));\n\
476 : }\n\
477 : \n\
478 : typedef struct r123array2x32 threefry2x32_ctr_t;\n\
479 : typedef struct r123array2x32 threefry2x32_key_t;\n\
480 : typedef struct r123array2x32 threefry2x32_ukey_t;\n\
481 : inline threefry2x32_key_t\n\
482 : threefry2x32keyinit (threefry2x32_ukey_t uk)\n\
483 : {\n\
484 : return uk;\n\
485 : }\n\
486 : \n\
487 : inline threefry2x32_ctr_t threefry2x32_R (unsigned int Nrounds,\n\
488 : threefry2x32_ctr_t in,\n\
489 : threefry2x32_key_t k)\n\
490 : __attribute__ ((always_inline));\n\
491 : inline threefry2x32_ctr_t\n\
492 : threefry2x32_R (unsigned int Nrounds, threefry2x32_ctr_t in,\n\
493 : threefry2x32_key_t k)\n\
494 : {\n\
495 : threefry2x32_ctr_t X;\n\
496 : uint32_t ks[2 + 1];\n\
497 : int i;\n\
498 : ks[2] = 0x1BD11BDA;\n\
499 : for (i = 0; i < 2; i++) {\n\
500 : ks[i] = k.v[i];\n\
501 : X.v[i] = in.v[i];\n\
502 : ks[2] ^= k.v[i];\n\
503 : }\n\
504 : X.v[0] += ks[0];\n\
505 : X.v[1] += ks[1];\n\
506 : if (Nrounds > 0) {\n\
507 : X.v[0] += X.v[1];\n\
508 : X.v[1] = RotL_32 (X.v[1], R_32x2_0_0);\n\
509 : X.v[1] ^= X.v[0];\n\
510 : }\n\
511 : if (Nrounds > 1) {\n\
512 : X.v[0] += X.v[1];\n\
513 : X.v[1] = RotL_32 (X.v[1], R_32x2_1_0);\n\
514 : X.v[1] ^= X.v[0];\n\
515 : }\n\
516 : if (Nrounds > 2) {\n\
517 : X.v[0] += X.v[1];\n\
518 : X.v[1] = RotL_32 (X.v[1], R_32x2_2_0);\n\
519 : X.v[1] ^= X.v[0];\n\
520 : }\n\
521 : if (Nrounds > 3) {\n\
522 : X.v[0] += X.v[1];\n\
523 : X.v[1] = RotL_32 (X.v[1], R_32x2_3_0);\n\
524 : X.v[1] ^= X.v[0];\n\
525 : }\n\
526 : if (Nrounds > 3) {\n\
527 : X.v[0] += ks[1];\n\
528 : X.v[1] += ks[2];\n\
529 : X.v[1] += 1;\n\
530 : }\n\
531 : if (Nrounds > 4) {\n\
532 : X.v[0] += X.v[1];\n\
533 : X.v[1] = RotL_32 (X.v[1], R_32x2_4_0);\n\
534 : X.v[1] ^= X.v[0];\n\
535 : }\n\
536 : if (Nrounds > 5) {\n\
537 : X.v[0] += X.v[1];\n\
538 : X.v[1] = RotL_32 (X.v[1], R_32x2_5_0);\n\
539 : X.v[1] ^= X.v[0];\n\
540 : }\n\
541 : if (Nrounds > 6) {\n\
542 : X.v[0] += X.v[1];\n\
543 : X.v[1] = RotL_32 (X.v[1], R_32x2_6_0);\n\
544 : X.v[1] ^= X.v[0];\n\
545 : }\n\
546 : if (Nrounds > 7) {\n\
547 : X.v[0] += X.v[1];\n\
548 : X.v[1] = RotL_32 (X.v[1], R_32x2_7_0);\n\
549 : X.v[1] ^= X.v[0];\n\
550 : }\n\
551 : if (Nrounds > 7) {\n\
552 : X.v[0] += ks[2];\n\
553 : X.v[1] += ks[0];\n\
554 : X.v[1] += 2;\n\
555 : }\n\
556 : if (Nrounds > 8) {\n\
557 : X.v[0] += X.v[1];\n\
558 : X.v[1] = RotL_32 (X.v[1], R_32x2_0_0);\n\
559 : X.v[1] ^= X.v[0];\n\
560 : }\n\
561 : if (Nrounds > 9) {\n\
562 : X.v[0] += X.v[1];\n\
563 : X.v[1] = RotL_32 (X.v[1], R_32x2_1_0);\n\
564 : X.v[1] ^= X.v[0];\n\
565 : }\n\
566 : if (Nrounds > 10) {\n\
567 : X.v[0] += X.v[1];\n\
568 : X.v[1] = RotL_32 (X.v[1], R_32x2_2_0);\n\
569 : X.v[1] ^= X.v[0];\n\
570 : }\n\
571 : if (Nrounds > 11) {\n\
572 : X.v[0] += X.v[1];\n\
573 : X.v[1] = RotL_32 (X.v[1], R_32x2_3_0);\n\
574 : X.v[1] ^= X.v[0];\n\
575 : }\n\
576 : if (Nrounds > 11) {\n\
577 : X.v[0] += ks[0];\n\
578 : X.v[1] += ks[1];\n\
579 : X.v[1] += 3;\n\
580 : }\n\
581 : if (Nrounds > 12) {\n\
582 : X.v[0] += X.v[1];\n\
583 : X.v[1] = RotL_32 (X.v[1], R_32x2_4_0);\n\
584 : X.v[1] ^= X.v[0];\n\
585 : }\n\
586 : if (Nrounds > 13) {\n\
587 : X.v[0] += X.v[1];\n\
588 : X.v[1] = RotL_32 (X.v[1], R_32x2_5_0);\n\
589 : X.v[1] ^= X.v[0];\n\
590 : }\n\
591 : if (Nrounds > 14) {\n\
592 : X.v[0] += X.v[1];\n\
593 : X.v[1] = RotL_32 (X.v[1], R_32x2_6_0);\n\
594 : X.v[1] ^= X.v[0];\n\
595 : }\n\
596 : if (Nrounds > 15) {\n\
597 : X.v[0] += X.v[1];\n\
598 : X.v[1] = RotL_32 (X.v[1], R_32x2_7_0);\n\
599 : X.v[1] ^= X.v[0];\n\
600 : }\n\
601 : if (Nrounds > 15) {\n\
602 : X.v[0] += ks[1];\n\
603 : X.v[1] += ks[2];\n\
604 : X.v[1] += 4;\n\
605 : }\n\
606 : if (Nrounds > 16) {\n\
607 : X.v[0] += X.v[1];\n\
608 : X.v[1] = RotL_32 (X.v[1], R_32x2_0_0);\n\
609 : X.v[1] ^= X.v[0];\n\
610 : }\n\
611 : if (Nrounds > 17) {\n\
612 : X.v[0] += X.v[1];\n\
613 : X.v[1] = RotL_32 (X.v[1], R_32x2_1_0);\n\
614 : X.v[1] ^= X.v[0];\n\
615 : }\n\
616 : if (Nrounds > 18) {\n\
617 : X.v[0] += X.v[1];\n\
618 : X.v[1] = RotL_32 (X.v[1], R_32x2_2_0);\n\
619 : X.v[1] ^= X.v[0];\n\
620 : }\n\
621 : if (Nrounds > 19) {\n\
622 : X.v[0] += X.v[1];\n\
623 : X.v[1] = RotL_32 (X.v[1], R_32x2_3_0);\n\
624 : X.v[1] ^= X.v[0];\n\
625 : }\n\
626 : if (Nrounds > 19) {\n\
627 : X.v[0] += ks[2];\n\
628 : X.v[1] += ks[0];\n\
629 : X.v[1] += 5;\n\
630 : }\n\
631 : if (Nrounds > 20) {\n\
632 : X.v[0] += X.v[1];\n\
633 : X.v[1] = RotL_32 (X.v[1], R_32x2_4_0);\n\
634 : X.v[1] ^= X.v[0];\n\
635 : }\n\
636 : if (Nrounds > 21) {\n\
637 : X.v[0] += X.v[1];\n\
638 : X.v[1] = RotL_32 (X.v[1], R_32x2_5_0);\n\
639 : X.v[1] ^= X.v[0];\n\
640 : }\n\
641 : if (Nrounds > 22) {\n\
642 : X.v[0] += X.v[1];\n\
643 : X.v[1] = RotL_32 (X.v[1], R_32x2_6_0);\n\
644 : X.v[1] ^= X.v[0];\n\
645 : }\n\
646 : if (Nrounds > 23) {\n\
647 : X.v[0] += X.v[1];\n\
648 : X.v[1] = RotL_32 (X.v[1], R_32x2_7_0);\n\
649 : X.v[1] ^= X.v[0];\n\
650 : }\n\
651 : if (Nrounds > 23) {\n\
652 : X.v[0] += ks[0];\n\
653 : X.v[1] += ks[1];\n\
654 : X.v[1] += 6;\n\
655 : }\n\
656 : if (Nrounds > 24) {\n\
657 : X.v[0] += X.v[1];\n\
658 : X.v[1] = RotL_32 (X.v[1], R_32x2_0_0);\n\
659 : X.v[1] ^= X.v[0];\n\
660 : }\n\
661 : if (Nrounds > 25) {\n\
662 : X.v[0] += X.v[1];\n\
663 : X.v[1] = RotL_32 (X.v[1], R_32x2_1_0);\n\
664 : X.v[1] ^= X.v[0];\n\
665 : }\n\
666 : if (Nrounds > 26) {\n\
667 : X.v[0] += X.v[1];\n\
668 : X.v[1] = RotL_32 (X.v[1], R_32x2_2_0);\n\
669 : X.v[1] ^= X.v[0];\n\
670 : }\n\
671 : if (Nrounds > 27) {\n\
672 : X.v[0] += X.v[1];\n\
673 : X.v[1] = RotL_32 (X.v[1], R_32x2_3_0);\n\
674 : X.v[1] ^= X.v[0];\n\
675 : }\n\
676 : if (Nrounds > 27) {\n\
677 : X.v[0] += ks[1];\n\
678 : X.v[1] += ks[2];\n\
679 : X.v[1] += 7;\n\
680 : }\n\
681 : if (Nrounds > 28) {\n\
682 : X.v[0] += X.v[1];\n\
683 : X.v[1] = RotL_32 (X.v[1], R_32x2_4_0);\n\
684 : X.v[1] ^= X.v[0];\n\
685 : }\n\
686 : if (Nrounds > 29) {\n\
687 : X.v[0] += X.v[1];\n\
688 : X.v[1] = RotL_32 (X.v[1], R_32x2_5_0);\n\
689 : X.v[1] ^= X.v[0];\n\
690 : }\n\
691 : if (Nrounds > 30) {\n\
692 : X.v[0] += X.v[1];\n\
693 : X.v[1] = RotL_32 (X.v[1], R_32x2_6_0);\n\
694 : X.v[1] ^= X.v[0];\n\
695 : }\n\
696 : if (Nrounds > 31) {\n\
697 : X.v[0] += X.v[1];\n\
698 : X.v[1] = RotL_32 (X.v[1], R_32x2_7_0);\n\
699 : X.v[1] ^= X.v[0];\n\
700 : }\n\
701 : if (Nrounds > 31) {\n\
702 : X.v[0] += ks[2];\n\
703 : X.v[1] += ks[0];\n\
704 : X.v[1] += 8;\n\
705 : }\n\
706 : return X;\n\
707 : }\n\
708 : \n\
709 : enum r123_enum_threefry2x32\n\
710 : { threefry2x32_rounds = 20 };\n\
711 : inline threefry2x32_ctr_t threefry2x32 (threefry2x32_ctr_t in,\n\
712 : threefry2x32_key_t k)\n\
713 : __attribute__ ((always_inline));\n\
714 : inline threefry2x32_ctr_t\n\
715 : threefry2x32 (threefry2x32_ctr_t in, threefry2x32_key_t k)\n\
716 : {\n\
717 : return threefry2x32_R (threefry2x32_rounds, in, k);\n\
718 : }\n\
719 : #endif\n\
720 : \n\
721 0 : ";
722 0 : ss << "double " << mSymName << "_Random (int seed)\n\
723 : {\n\
724 : unsigned tid = get_global_id(0);\n\
725 : threefry2x32_key_t k = { {tid, 0xdecafbad} };\n\
726 : threefry2x32_ctr_t c = { {seed, 0xf00dcafe} };\n\
727 : c = threefry2x32_R(threefry2x32_rounds, c, k);\n\
728 0 : const double factor = 1./(" << SAL_MAX_UINT32 << ".0 + 1.0);\n\
729 : const double halffactor = 0.5*factor;\n\
730 : return c.v[0] * factor + halffactor;\n\
731 : }\n\
732 0 : ";
733 0 : }
734 0 : virtual size_t GetWindowSize() const SAL_OVERRIDE
735 : {
736 0 : return 1;
737 : }
738 : /// Create buffer and pass the buffer to a given kernel
739 0 : virtual size_t Marshal( cl_kernel k, int argno, int, cl_program ) SAL_OVERRIDE
740 : {
741 0 : cl_int seed = formula::rng::nRandom(0, SAL_MAX_INT32);
742 : // Pass the scalar result back to the rest of the formula kernel
743 : SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_int: " << seed);
744 0 : cl_int err = clSetKernelArg(k, argno, sizeof(cl_int), static_cast<void*>(&seed));
745 0 : if (CL_SUCCESS != err)
746 0 : throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
747 0 : return 1;
748 : }
749 : };
750 :
751 : /// A vector of strings
752 0 : class DynamicKernelStringArgument : public VectorRef
753 : {
754 : public:
755 0 : DynamicKernelStringArgument( const ScCalcConfig& config, const std::string& s,
756 : FormulaTreeNodeRef ft, int index = 0 ) :
757 0 : VectorRef(config, s, ft, index) { }
758 :
759 0 : virtual void GenSlidingWindowFunction( std::stringstream& ) SAL_OVERRIDE { }
760 : /// Generate declaration
761 0 : virtual void GenDecl( std::stringstream& ss ) const SAL_OVERRIDE
762 : {
763 0 : ss << "__global unsigned int *" << mSymName;
764 0 : }
765 0 : virtual void GenSlidingWindowDecl( std::stringstream& ss ) const SAL_OVERRIDE
766 : {
767 0 : DynamicKernelStringArgument::GenDecl(ss);
768 0 : }
769 : virtual size_t Marshal( cl_kernel, int, int, cl_program ) SAL_OVERRIDE;
770 : };
771 :
772 : /// Marshal a string vector reference
773 0 : size_t DynamicKernelStringArgument::Marshal( cl_kernel k, int argno, int, cl_program )
774 : {
775 0 : FormulaToken* ref = mFormulaTree->GetFormulaToken();
776 : // Obtain cl context
777 : ::opencl::KernelEnv kEnv;
778 0 : ::opencl::setKernelEnv(&kEnv);
779 : cl_int err;
780 0 : formula::VectorRefArray vRef;
781 0 : size_t nStrings = 0;
782 0 : if (ref->GetType() == formula::svSingleVectorRef)
783 : {
784 : const formula::SingleVectorRefToken* pSVR =
785 0 : static_cast<const formula::SingleVectorRefToken*>(ref);
786 0 : nStrings = pSVR->GetArrayLength();
787 0 : vRef = pSVR->GetArray();
788 : }
789 0 : else if (ref->GetType() == formula::svDoubleVectorRef)
790 : {
791 : const formula::DoubleVectorRefToken* pDVR =
792 0 : static_cast<const formula::DoubleVectorRefToken*>(ref);
793 0 : nStrings = pDVR->GetArrayLength();
794 0 : vRef = pDVR->GetArrays()[mnIndex];
795 : }
796 0 : size_t szHostBuffer = nStrings * sizeof(cl_int);
797 0 : cl_uint* pHashBuffer = NULL;
798 :
799 0 : if (vRef.mpStringArray != NULL)
800 : {
801 : // Marshal strings. Right now we pass hashes of these string
802 : mpClmem = clCreateBuffer(kEnv.mpkContext,
803 : (cl_mem_flags)CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
804 0 : szHostBuffer, NULL, &err);
805 0 : if (CL_SUCCESS != err)
806 0 : throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
807 : SAL_INFO("sc.opencl", "Created buffer " << mpClmem << " size " << szHostBuffer);
808 :
809 : pHashBuffer = static_cast<cl_uint*>(clEnqueueMapBuffer(
810 : kEnv.mpkCmdQueue, mpClmem, CL_TRUE, CL_MAP_WRITE, 0,
811 0 : szHostBuffer, 0, NULL, NULL, &err));
812 0 : if (CL_SUCCESS != err)
813 0 : throw OpenCLError("clEnqueueMapBuffer", err, __FILE__, __LINE__);
814 :
815 0 : for (size_t i = 0; i < nStrings; i++)
816 : {
817 0 : if (vRef.mpStringArray[i])
818 : {
819 0 : const OUString tmp = OUString(vRef.mpStringArray[i]);
820 0 : pHashBuffer[i] = tmp.hashCode();
821 : }
822 : else
823 : {
824 0 : pHashBuffer[i] = 0;
825 : }
826 : }
827 : }
828 : else
829 : {
830 0 : if (nStrings == 0)
831 0 : szHostBuffer = sizeof(cl_int); // a dummy small value
832 : // Marshal as a buffer of NANs
833 : mpClmem = clCreateBuffer(kEnv.mpkContext,
834 : (cl_mem_flags)CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
835 0 : szHostBuffer, NULL, &err);
836 0 : if (CL_SUCCESS != err)
837 0 : throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
838 : SAL_INFO("sc.opencl", "Created buffer " << mpClmem << " size " << szHostBuffer);
839 :
840 : pHashBuffer = static_cast<cl_uint*>(clEnqueueMapBuffer(
841 : kEnv.mpkCmdQueue, mpClmem, CL_TRUE, CL_MAP_WRITE, 0,
842 0 : szHostBuffer, 0, NULL, NULL, &err));
843 0 : if (CL_SUCCESS != err)
844 0 : throw OpenCLError("clEnqueueMapBuffer", err, __FILE__, __LINE__);
845 :
846 0 : for (size_t i = 0; i < szHostBuffer / sizeof(cl_int); i++)
847 0 : pHashBuffer[i] = 0;
848 : }
849 : err = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue, mpClmem,
850 0 : pHashBuffer, 0, NULL, NULL);
851 0 : if (CL_SUCCESS != err)
852 0 : throw OpenCLError("clEnqueueUnmapMemObject", err, __FILE__, __LINE__);
853 :
854 : SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_mem: " << mpClmem);
855 0 : err = clSetKernelArg(k, argno, sizeof(cl_mem), static_cast<void*>(&mpClmem));
856 0 : if (CL_SUCCESS != err)
857 0 : throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
858 0 : return 1;
859 : }
860 :
861 : /// A mixed string/numberic vector
862 0 : class DynamicKernelMixedArgument : public VectorRef
863 : {
864 : public:
865 0 : DynamicKernelMixedArgument( const ScCalcConfig& config, const std::string& s,
866 : FormulaTreeNodeRef ft ) :
867 0 : VectorRef(config, s, ft), mStringArgument(config, s + "s", ft) { }
868 0 : virtual void GenSlidingWindowDecl( std::stringstream& ss ) const SAL_OVERRIDE
869 : {
870 0 : VectorRef::GenSlidingWindowDecl(ss);
871 0 : ss << ", ";
872 0 : mStringArgument.GenSlidingWindowDecl(ss);
873 0 : }
874 0 : virtual bool IsMixedArgument() const SAL_OVERRIDE { return true;}
875 0 : virtual void GenSlidingWindowFunction( std::stringstream& ) SAL_OVERRIDE { }
876 : /// Generate declaration
877 0 : virtual void GenDecl( std::stringstream& ss ) const SAL_OVERRIDE
878 : {
879 0 : VectorRef::GenDecl(ss);
880 0 : ss << ", ";
881 0 : mStringArgument.GenDecl(ss);
882 0 : }
883 0 : virtual void GenDeclRef( std::stringstream& ss ) const SAL_OVERRIDE
884 : {
885 0 : VectorRef::GenDeclRef(ss);
886 0 : ss << ",";
887 0 : mStringArgument.GenDeclRef(ss);
888 0 : }
889 0 : virtual void GenNumDeclRef( std::stringstream& ss ) const SAL_OVERRIDE
890 : {
891 0 : VectorRef::GenSlidingWindowDecl(ss);
892 0 : }
893 0 : virtual void GenStringDeclRef( std::stringstream& ss ) const SAL_OVERRIDE
894 : {
895 0 : mStringArgument.GenSlidingWindowDecl(ss);
896 0 : }
897 0 : virtual std::string GenSlidingWindowDeclRef( bool nested ) const SAL_OVERRIDE
898 : {
899 0 : std::stringstream ss;
900 0 : ss << "(!isNan(" << VectorRef::GenSlidingWindowDeclRef();
901 0 : ss << ")?" << VectorRef::GenSlidingWindowDeclRef();
902 0 : ss << ":" << mStringArgument.GenSlidingWindowDeclRef(nested);
903 0 : ss << ")";
904 0 : return ss.str();
905 : }
906 0 : virtual std::string GenDoubleSlidingWindowDeclRef( bool = false ) const SAL_OVERRIDE
907 : {
908 0 : std::stringstream ss;
909 0 : ss << VectorRef::GenSlidingWindowDeclRef();
910 0 : return ss.str();
911 : }
912 0 : virtual std::string GenStringSlidingWindowDeclRef( bool = false ) const SAL_OVERRIDE
913 : {
914 0 : std::stringstream ss;
915 0 : ss << mStringArgument.GenSlidingWindowDeclRef();
916 0 : return ss.str();
917 : }
918 0 : virtual size_t Marshal( cl_kernel k, int argno, int vw, cl_program p ) SAL_OVERRIDE
919 : {
920 0 : int i = VectorRef::Marshal(k, argno, vw, p);
921 0 : i += mStringArgument.Marshal(k, argno + i, vw, p);
922 0 : return i;
923 : }
924 :
925 : protected:
926 : DynamicKernelStringArgument mStringArgument;
927 : };
928 :
929 : /// Handling a Double Vector that is used as a sliding window input
930 : /// to either a sliding window average or sum-of-products
931 : /// Generate a sequential loop for reductions
932 : class OpAverage;
933 : class OpCount;
934 :
935 : template<class Base>
936 : class DynamicKernelSlidingArgument : public Base
937 : {
938 : public:
939 0 : DynamicKernelSlidingArgument( const ScCalcConfig& config, const std::string& s,
940 : FormulaTreeNodeRef ft, boost::shared_ptr<SlidingFunctionBase>& CodeGen,
941 : int index = 0 ) :
942 0 : Base(config, s, ft, index), mpCodeGen(CodeGen), mpClmem2(NULL)
943 : {
944 0 : FormulaToken* t = ft->GetFormulaToken();
945 0 : if (t->GetType() != formula::svDoubleVectorRef)
946 0 : throw Unhandled();
947 0 : mpDVR = static_cast<const formula::DoubleVectorRefToken*>(t);
948 0 : bIsStartFixed = mpDVR->IsStartFixed();
949 0 : bIsEndFixed = mpDVR->IsEndFixed();
950 0 : }
951 : // Should only be called by SumIfs. Yikes!
952 0 : virtual bool NeedParallelReduction() const
953 : {
954 : assert(dynamic_cast<OpSumIfs*>(mpCodeGen.get()));
955 0 : return GetWindowSize() > 100 &&
956 0 : ((GetStartFixed() && GetEndFixed()) ||
957 0 : (!GetStartFixed() && !GetEndFixed()));
958 : }
959 0 : virtual void GenSlidingWindowFunction( std::stringstream& ) { }
960 :
961 0 : virtual std::string GenSlidingWindowDeclRef( bool nested = false ) const
962 : {
963 0 : size_t nArrayLength = mpDVR->GetArrayLength();
964 0 : std::stringstream ss;
965 0 : if (!bIsStartFixed && !bIsEndFixed)
966 : {
967 0 : if (nested)
968 0 : ss << "((i+gid0) <" << nArrayLength << "?";
969 0 : ss << Base::GetName() << "[i + gid0]";
970 0 : if (nested)
971 0 : ss << ":NAN)";
972 : }
973 : else
974 : {
975 0 : if (nested)
976 0 : ss << "(i <" << nArrayLength << "?";
977 0 : ss << Base::GetName() << "[i]";
978 0 : if (nested)
979 0 : ss << ":NAN)";
980 : }
981 0 : return ss.str();
982 : }
983 : /// Controls how the elements in the DoubleVectorRef are traversed
984 0 : virtual size_t GenReductionLoopHeader(
985 : std::stringstream& ss, bool& needBody )
986 : {
987 : assert(mpDVR);
988 0 : size_t nCurWindowSize = mpDVR->GetRefRowSize();
989 :
990 : {
991 0 : if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
992 : {
993 0 : ss << "for (int i = ";
994 0 : ss << "gid0; i < " << mpDVR->GetArrayLength();
995 0 : ss << " && i < " << nCurWindowSize << "; i++){\n\t\t";
996 0 : needBody = true;
997 0 : return nCurWindowSize;
998 : }
999 0 : else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
1000 : {
1001 0 : ss << "for (int i = ";
1002 0 : ss << "0; i < " << mpDVR->GetArrayLength();
1003 0 : ss << " && i < gid0+" << nCurWindowSize << "; i++){\n\t\t";
1004 0 : needBody = true;
1005 0 : return nCurWindowSize;
1006 : }
1007 0 : else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
1008 : {
1009 0 : ss << "tmpBottom = " << mpCodeGen->GetBottom() << ";\n\t";
1010 0 : ss << "{int i;\n\t";
1011 0 : std::stringstream temp1, temp2;
1012 0 : int outLoopSize = UNROLLING_FACTOR;
1013 0 : if (nCurWindowSize / outLoopSize != 0)
1014 : {
1015 0 : ss << "for(int outLoop=0; outLoop<" << nCurWindowSize / outLoopSize << "; outLoop++){\n\t";
1016 0 : for (int count = 0; count < outLoopSize; count++)
1017 : {
1018 0 : ss << "i = outLoop*" << outLoopSize << "+" << count << ";\n\t";
1019 0 : if (count == 0)
1020 : {
1021 0 : temp1 << "if(i + gid0 < " << mpDVR->GetArrayLength();
1022 0 : temp1 << "){\n\t\t";
1023 0 : temp1 << "tmp = legalize(";
1024 0 : temp1 << mpCodeGen->Gen2(GenSlidingWindowDeclRef(), "tmp");
1025 0 : temp1 << ", tmp);\n\t\t\t";
1026 0 : temp1 << "}\n\t";
1027 : }
1028 0 : ss << temp1.str();
1029 : }
1030 0 : ss << "}\n\t";
1031 : }
1032 : // The residual of mod outLoopSize
1033 0 : for (size_t count = nCurWindowSize / outLoopSize * outLoopSize; count < nCurWindowSize; count++)
1034 : {
1035 0 : ss << "i = " << count << ";\n\t";
1036 0 : if (count == nCurWindowSize / outLoopSize * outLoopSize)
1037 : {
1038 0 : temp2 << "if(i + gid0 < " << mpDVR->GetArrayLength();
1039 0 : temp2 << "){\n\t\t";
1040 0 : temp2 << "tmp = legalize(";
1041 0 : temp2 << mpCodeGen->Gen2(GenSlidingWindowDeclRef(), "tmp");
1042 0 : temp2 << ", tmp);\n\t\t\t";
1043 0 : temp2 << "}\n\t";
1044 : }
1045 0 : ss << temp2.str();
1046 : }
1047 0 : ss << "}\n";
1048 0 : needBody = false;
1049 0 : return nCurWindowSize;
1050 : }
1051 : // (mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
1052 : else
1053 : {
1054 0 : ss << "\n\t";
1055 0 : ss << "tmpBottom = " << mpCodeGen->GetBottom() << ";\n\t";
1056 0 : ss << "{int i;\n\t";
1057 0 : std::stringstream temp1, temp2;
1058 0 : int outLoopSize = UNROLLING_FACTOR;
1059 0 : if (nCurWindowSize / outLoopSize != 0)
1060 : {
1061 0 : ss << "for(int outLoop=0; outLoop<" << nCurWindowSize / outLoopSize << "; outLoop++){\n\t";
1062 0 : for (int count = 0; count < outLoopSize; count++)
1063 : {
1064 0 : ss << "i = outLoop*" << outLoopSize << "+" << count << ";\n\t";
1065 0 : if (count == 0)
1066 : {
1067 0 : temp1 << "tmp = legalize(";
1068 0 : temp1 << mpCodeGen->Gen2(GenSlidingWindowDeclRef(), "tmp");
1069 0 : temp1 << ", tmp);\n\t\t\t";
1070 : }
1071 0 : ss << temp1.str();
1072 : }
1073 0 : ss << "}\n\t";
1074 : }
1075 : // The residual of mod outLoopSize
1076 0 : for (size_t count = nCurWindowSize / outLoopSize * outLoopSize; count < nCurWindowSize; count++)
1077 : {
1078 0 : ss << "i = " << count << ";\n\t";
1079 0 : if (count == nCurWindowSize / outLoopSize * outLoopSize)
1080 : {
1081 0 : temp2 << "tmp = legalize(";
1082 0 : temp2 << mpCodeGen->Gen2(GenSlidingWindowDeclRef(), "tmp");
1083 0 : temp2 << ", tmp);\n\t\t\t";
1084 : }
1085 0 : ss << temp2.str();
1086 : }
1087 0 : ss << "}\n";
1088 0 : needBody = false;
1089 0 : return nCurWindowSize;
1090 : }
1091 : }
1092 : }
1093 0 : ~DynamicKernelSlidingArgument()
1094 : {
1095 0 : if (mpClmem2)
1096 : {
1097 : cl_int err;
1098 0 : err = clReleaseMemObject(mpClmem2);
1099 : SAL_WARN_IF(err != CL_SUCCESS, "sc.opencl", "clReleaseMemObject failed: " << ::opencl::errorString(err));
1100 0 : mpClmem2 = NULL;
1101 : }
1102 0 : }
1103 :
1104 0 : size_t GetArrayLength() const { return mpDVR->GetArrayLength(); }
1105 :
1106 0 : size_t GetWindowSize() const { return mpDVR->GetRefRowSize(); }
1107 :
1108 0 : size_t GetStartFixed() const { return bIsStartFixed; }
1109 :
1110 0 : size_t GetEndFixed() const { return bIsEndFixed; }
1111 :
1112 : protected:
1113 : bool bIsStartFixed, bIsEndFixed;
1114 : const formula::DoubleVectorRefToken* mpDVR;
1115 : // from parent nodes
1116 : boost::shared_ptr<SlidingFunctionBase> mpCodeGen;
1117 : // controls whether to invoke the reduction kernel during marshaling or not
1118 : cl_mem mpClmem2;
1119 : };
1120 :
1121 : /// A mixed string/numberic vector
1122 0 : class DynamicKernelMixedSlidingArgument : public VectorRef
1123 : {
1124 : public:
1125 0 : DynamicKernelMixedSlidingArgument( const ScCalcConfig& config, const std::string& s,
1126 : FormulaTreeNodeRef ft, boost::shared_ptr<SlidingFunctionBase>& CodeGen,
1127 : int index = 0 ) :
1128 : VectorRef(config, s, ft),
1129 : mDoubleArgument(mCalcConfig, s, ft, CodeGen, index),
1130 0 : mStringArgument(mCalcConfig, s + "s", ft, CodeGen, index) { }
1131 0 : virtual void GenSlidingWindowDecl( std::stringstream& ss ) const SAL_OVERRIDE
1132 : {
1133 0 : mDoubleArgument.GenSlidingWindowDecl(ss);
1134 0 : ss << ", ";
1135 0 : mStringArgument.GenSlidingWindowDecl(ss);
1136 0 : }
1137 0 : virtual void GenSlidingWindowFunction( std::stringstream& ) SAL_OVERRIDE { }
1138 : /// Generate declaration
1139 0 : virtual void GenDecl( std::stringstream& ss ) const SAL_OVERRIDE
1140 : {
1141 0 : mDoubleArgument.GenDecl(ss);
1142 0 : ss << ", ";
1143 0 : mStringArgument.GenDecl(ss);
1144 0 : }
1145 0 : virtual void GenDeclRef( std::stringstream& ss ) const SAL_OVERRIDE
1146 : {
1147 0 : mDoubleArgument.GenDeclRef(ss);
1148 0 : ss << ",";
1149 0 : mStringArgument.GenDeclRef(ss);
1150 0 : }
1151 0 : virtual std::string GenSlidingWindowDeclRef( bool nested ) const SAL_OVERRIDE
1152 : {
1153 0 : std::stringstream ss;
1154 0 : ss << "(!isNan(" << mDoubleArgument.GenSlidingWindowDeclRef();
1155 0 : ss << ")?" << mDoubleArgument.GenSlidingWindowDeclRef();
1156 0 : ss << ":" << mStringArgument.GenSlidingWindowDeclRef(nested);
1157 0 : ss << ")";
1158 0 : return ss.str();
1159 : }
1160 0 : virtual bool IsMixedArgument() const SAL_OVERRIDE { return true;}
1161 0 : virtual std::string GenDoubleSlidingWindowDeclRef( bool = false ) const SAL_OVERRIDE
1162 : {
1163 0 : std::stringstream ss;
1164 0 : ss << mDoubleArgument.GenSlidingWindowDeclRef();
1165 0 : return ss.str();
1166 : }
1167 0 : virtual std::string GenStringSlidingWindowDeclRef( bool = false ) const SAL_OVERRIDE
1168 : {
1169 0 : std::stringstream ss;
1170 0 : ss << mStringArgument.GenSlidingWindowDeclRef();
1171 0 : return ss.str();
1172 : }
1173 0 : virtual void GenNumDeclRef( std::stringstream& ss ) const SAL_OVERRIDE
1174 : {
1175 0 : mDoubleArgument.GenDeclRef(ss);
1176 0 : }
1177 0 : virtual void GenStringDeclRef( std::stringstream& ss ) const SAL_OVERRIDE
1178 : {
1179 0 : mStringArgument.GenDeclRef(ss);
1180 0 : }
1181 0 : virtual size_t Marshal( cl_kernel k, int argno, int vw, cl_program p ) SAL_OVERRIDE
1182 : {
1183 0 : int i = mDoubleArgument.Marshal(k, argno, vw, p);
1184 0 : i += mStringArgument.Marshal(k, argno + i, vw, p);
1185 0 : return i;
1186 : }
1187 :
1188 : protected:
1189 : DynamicKernelSlidingArgument<VectorRef> mDoubleArgument;
1190 : DynamicKernelSlidingArgument<DynamicKernelStringArgument> mStringArgument;
1191 : };
1192 :
1193 : /// Holds the symbol table for a given dynamic kernel
1194 0 : class SymbolTable
1195 : {
1196 : public:
1197 : typedef std::map<const formula::FormulaToken*, DynamicKernelArgumentRef> ArgumentMap;
1198 : // This avoids instability caused by using pointer as the key type
1199 : typedef std::list<DynamicKernelArgumentRef> ArgumentList;
1200 0 : SymbolTable() : mCurId(0) { }
1201 : template<class T>
1202 : const DynamicKernelArgument* DeclRefArg( const ScCalcConfig& config, FormulaTreeNodeRef, SlidingFunctionBase* pCodeGen, int nResultSize );
1203 : /// Used to generate sliding window helpers
1204 0 : void DumpSlidingWindowFunctions( std::stringstream& ss )
1205 : {
1206 0 : for (ArgumentList::iterator it = mParams.begin(), e = mParams.end(); it != e;
1207 : ++it)
1208 : {
1209 0 : (*it)->GenSlidingWindowFunction(ss);
1210 0 : ss << "\n";
1211 : }
1212 0 : }
1213 : /// Memory mapping from host to device and pass buffers to the given kernel as
1214 : /// arguments
1215 : void Marshal( cl_kernel, int, cl_program );
1216 :
1217 : private:
1218 : unsigned int mCurId;
1219 : ArgumentMap mSymbols;
1220 : ArgumentList mParams;
1221 : };
1222 :
1223 0 : void SymbolTable::Marshal( cl_kernel k, int nVectorWidth, cl_program pProgram )
1224 : {
1225 0 : int i = 1; //The first argument is reserved for results
1226 0 : for (ArgumentList::iterator it = mParams.begin(), e = mParams.end(); it != e;
1227 : ++it)
1228 : {
1229 0 : i += (*it)->Marshal(k, i, nVectorWidth, pProgram);
1230 : }
1231 0 : }
1232 :
1233 : /// Handling a Double Vector that is used as a sliding window input
1234 : /// Performs parallel reduction based on given operator
1235 : template<class Base>
1236 : class ParallelReductionVectorRef : public Base
1237 : {
1238 : public:
1239 0 : ParallelReductionVectorRef( const ScCalcConfig& config, const std::string& s,
1240 : FormulaTreeNodeRef ft, boost::shared_ptr<SlidingFunctionBase>& CodeGen,
1241 : int index = 0 ) :
1242 0 : Base(config, s, ft, index), mpCodeGen(CodeGen), mpClmem2(NULL)
1243 : {
1244 0 : FormulaToken* t = ft->GetFormulaToken();
1245 0 : if (t->GetType() != formula::svDoubleVectorRef)
1246 0 : throw Unhandled();
1247 0 : mpDVR = static_cast<const formula::DoubleVectorRefToken*>(t);
1248 0 : bIsStartFixed = mpDVR->IsStartFixed();
1249 0 : bIsEndFixed = mpDVR->IsEndFixed();
1250 0 : }
1251 : /// Emit the definition for the auxiliary reduction kernel
1252 0 : virtual void GenSlidingWindowFunction( std::stringstream& ss )
1253 : {
1254 0 : if (!dynamic_cast<OpAverage*>(mpCodeGen.get()))
1255 : {
1256 0 : std::string name = Base::GetName();
1257 0 : ss << "__kernel void " << name;
1258 0 : ss << "_reduction(__global double* A, "
1259 : "__global double *result,int arrayLength,int windowSize){\n";
1260 0 : ss << " double tmp, current_result =" <<
1261 0 : mpCodeGen->GetBottom();
1262 0 : ss << ";\n";
1263 0 : ss << " int writePos = get_group_id(1);\n";
1264 0 : ss << " int lidx = get_local_id(0);\n";
1265 0 : ss << " __local double shm_buf[256];\n";
1266 0 : if (mpDVR->IsStartFixed())
1267 0 : ss << " int offset = 0;\n";
1268 : else // if (!mpDVR->IsStartFixed())
1269 0 : ss << " int offset = get_group_id(1);\n";
1270 0 : if (mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
1271 0 : ss << " int end = windowSize;\n";
1272 0 : else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
1273 0 : ss << " int end = offset + windowSize;\n";
1274 0 : else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
1275 0 : ss << " int end = windowSize + get_group_id(1);\n";
1276 0 : else if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
1277 0 : ss << " int end = windowSize;\n";
1278 0 : ss << " end = min(end, arrayLength);\n";
1279 :
1280 0 : ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
1281 0 : ss << " int loop = arrayLength/512 + 1;\n";
1282 0 : ss << " for (int l=0; l<loop; l++){\n";
1283 0 : ss << " tmp = " << mpCodeGen->GetBottom() << ";\n";
1284 0 : ss << " int loopOffset = l*512;\n";
1285 0 : ss << " if((loopOffset + lidx + offset + 256) < end) {\n";
1286 0 : ss << " tmp = legalize(" << mpCodeGen->Gen2(
1287 0 : "A[loopOffset + lidx + offset]", "tmp") << ", tmp);\n";
1288 0 : ss << " tmp = legalize(" << mpCodeGen->Gen2(
1289 0 : "A[loopOffset + lidx + offset + 256]", "tmp") << ", tmp);\n";
1290 0 : ss << " } else if ((loopOffset + lidx + offset) < end)\n";
1291 0 : ss << " tmp = legalize(" << mpCodeGen->Gen2(
1292 0 : "A[loopOffset + lidx + offset]", "tmp") << ", tmp);\n";
1293 0 : ss << " shm_buf[lidx] = tmp;\n";
1294 0 : ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
1295 0 : ss << " for (int i = 128; i >0; i/=2) {\n";
1296 0 : ss << " if (lidx < i)\n";
1297 0 : ss << " shm_buf[lidx] = ";
1298 : // Special case count
1299 0 : if (dynamic_cast<OpCount*>(mpCodeGen.get()))
1300 0 : ss << "shm_buf[lidx] + shm_buf[lidx + i];\n";
1301 : else
1302 0 : ss << mpCodeGen->Gen2("shm_buf[lidx]", "shm_buf[lidx + i]") << ";\n";
1303 0 : ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
1304 0 : ss << " }\n";
1305 0 : ss << " if (lidx == 0)\n";
1306 0 : ss << " current_result =";
1307 0 : if (dynamic_cast<OpCount*>(mpCodeGen.get()))
1308 0 : ss << "current_result + shm_buf[0]";
1309 : else
1310 0 : ss << mpCodeGen->Gen2("current_result", "shm_buf[0]");
1311 0 : ss << ";\n";
1312 0 : ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
1313 0 : ss << " }\n";
1314 0 : ss << " if (lidx == 0)\n";
1315 0 : ss << " result[writePos] = current_result;\n";
1316 0 : ss << "}\n";
1317 : }
1318 : else
1319 : {
1320 0 : std::string name = Base::GetName();
1321 : /*sum reduction*/
1322 0 : ss << "__kernel void " << name << "_sum";
1323 0 : ss << "_reduction(__global double* A, "
1324 : "__global double *result,int arrayLength,int windowSize){\n";
1325 0 : ss << " double tmp, current_result =" <<
1326 0 : mpCodeGen->GetBottom();
1327 0 : ss << ";\n";
1328 0 : ss << " int writePos = get_group_id(1);\n";
1329 0 : ss << " int lidx = get_local_id(0);\n";
1330 0 : ss << " __local double shm_buf[256];\n";
1331 0 : if (mpDVR->IsStartFixed())
1332 0 : ss << " int offset = 0;\n";
1333 : else // if (!mpDVR->IsStartFixed())
1334 0 : ss << " int offset = get_group_id(1);\n";
1335 0 : if (mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
1336 0 : ss << " int end = windowSize;\n";
1337 0 : else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
1338 0 : ss << " int end = offset + windowSize;\n";
1339 0 : else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
1340 0 : ss << " int end = windowSize + get_group_id(1);\n";
1341 0 : else if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
1342 0 : ss << " int end = windowSize;\n";
1343 0 : ss << " end = min(end, arrayLength);\n";
1344 0 : ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
1345 0 : ss << " int loop = arrayLength/512 + 1;\n";
1346 0 : ss << " for (int l=0; l<loop; l++){\n";
1347 0 : ss << " tmp = " << mpCodeGen->GetBottom() << ";\n";
1348 0 : ss << " int loopOffset = l*512;\n";
1349 0 : ss << " if((loopOffset + lidx + offset + 256) < end) {\n";
1350 0 : ss << " tmp = legalize(";
1351 0 : ss << "(A[loopOffset + lidx + offset]+ tmp)";
1352 0 : ss << ", tmp);\n";
1353 0 : ss << " tmp = legalize((A[loopOffset + lidx + offset + 256]+ tmp)";
1354 0 : ss << ", tmp);\n";
1355 0 : ss << " } else if ((loopOffset + lidx + offset) < end)\n";
1356 0 : ss << " tmp = legalize((A[loopOffset + lidx + offset] + tmp)";
1357 0 : ss << ", tmp);\n";
1358 0 : ss << " shm_buf[lidx] = tmp;\n";
1359 0 : ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
1360 0 : ss << " for (int i = 128; i >0; i/=2) {\n";
1361 0 : ss << " if (lidx < i)\n";
1362 0 : ss << " shm_buf[lidx] = ";
1363 0 : ss << "shm_buf[lidx] + shm_buf[lidx + i];\n";
1364 0 : ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
1365 0 : ss << " }\n";
1366 0 : ss << " if (lidx == 0)\n";
1367 0 : ss << " current_result =";
1368 0 : ss << "current_result + shm_buf[0]";
1369 0 : ss << ";\n";
1370 0 : ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
1371 0 : ss << " }\n";
1372 0 : ss << " if (lidx == 0)\n";
1373 0 : ss << " result[writePos] = current_result;\n";
1374 0 : ss << "}\n";
1375 : /*count reduction*/
1376 0 : ss << "__kernel void " << name << "_count";
1377 0 : ss << "_reduction(__global double* A, "
1378 : "__global double *result,int arrayLength,int windowSize){\n";
1379 0 : ss << " double tmp, current_result =" <<
1380 0 : mpCodeGen->GetBottom();
1381 0 : ss << ";\n";
1382 0 : ss << " int writePos = get_group_id(1);\n";
1383 0 : ss << " int lidx = get_local_id(0);\n";
1384 0 : ss << " __local double shm_buf[256];\n";
1385 0 : if (mpDVR->IsStartFixed())
1386 0 : ss << " int offset = 0;\n";
1387 : else // if (!mpDVR->IsStartFixed())
1388 0 : ss << " int offset = get_group_id(1);\n";
1389 0 : if (mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
1390 0 : ss << " int end = windowSize;\n";
1391 0 : else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
1392 0 : ss << " int end = offset + windowSize;\n";
1393 0 : else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
1394 0 : ss << " int end = windowSize + get_group_id(1);\n";
1395 0 : else if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
1396 0 : ss << " int end = windowSize;\n";
1397 0 : ss << " end = min(end, arrayLength);\n";
1398 0 : ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
1399 0 : ss << " int loop = arrayLength/512 + 1;\n";
1400 0 : ss << " for (int l=0; l<loop; l++){\n";
1401 0 : ss << " tmp = " << mpCodeGen->GetBottom() << ";\n";
1402 0 : ss << " int loopOffset = l*512;\n";
1403 0 : ss << " if((loopOffset + lidx + offset + 256) < end) {\n";
1404 0 : ss << " tmp = legalize((isNan(A[loopOffset + lidx + offset])?tmp:tmp+1.0)";
1405 0 : ss << ", tmp);\n";
1406 0 : ss << " tmp = legalize((isNan(A[loopOffset + lidx + offset+256])?tmp:tmp+1.0)";
1407 0 : ss << ", tmp);\n";
1408 0 : ss << " } else if ((loopOffset + lidx + offset) < end)\n";
1409 0 : ss << " tmp = legalize((isNan(A[loopOffset + lidx + offset])?tmp:tmp+1.0)";
1410 0 : ss << ", tmp);\n";
1411 0 : ss << " shm_buf[lidx] = tmp;\n";
1412 0 : ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
1413 0 : ss << " for (int i = 128; i >0; i/=2) {\n";
1414 0 : ss << " if (lidx < i)\n";
1415 0 : ss << " shm_buf[lidx] = ";
1416 0 : ss << "shm_buf[lidx] + shm_buf[lidx + i];\n";
1417 0 : ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
1418 0 : ss << " }\n";
1419 0 : ss << " if (lidx == 0)\n";
1420 0 : ss << " current_result =";
1421 0 : ss << "current_result + shm_buf[0];";
1422 0 : ss << ";\n";
1423 0 : ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
1424 0 : ss << " }\n";
1425 0 : ss << " if (lidx == 0)\n";
1426 0 : ss << " result[writePos] = current_result;\n";
1427 0 : ss << "}\n";
1428 : }
1429 :
1430 0 : }
1431 0 : virtual std::string GenSlidingWindowDeclRef( bool = false ) const
1432 : {
1433 0 : std::stringstream ss;
1434 0 : if (!bIsStartFixed && !bIsEndFixed)
1435 0 : ss << Base::GetName() << "[i + gid0]";
1436 : else
1437 0 : ss << Base::GetName() << "[i]";
1438 0 : return ss.str();
1439 : }
1440 : /// Controls how the elements in the DoubleVectorRef are traversed
1441 0 : virtual size_t GenReductionLoopHeader(
1442 : std::stringstream& ss, int nResultSize, bool& needBody )
1443 : {
1444 : assert(mpDVR);
1445 0 : size_t nCurWindowSize = mpDVR->GetRefRowSize();
1446 0 : std::string temp = Base::GetName() + "[gid0]";
1447 0 : ss << "tmp = ";
1448 : // Special case count
1449 0 : if (dynamic_cast<OpAverage*>(mpCodeGen.get()))
1450 : {
1451 0 : ss << mpCodeGen->Gen2(temp, "tmp") << ";\n";
1452 0 : ss << "nCount = nCount-1;\n";
1453 0 : ss << "nCount = nCount +"; /*re-assign nCount from count reduction*/
1454 0 : ss << Base::GetName() << "[gid0+" << nResultSize << "]" << ";\n";
1455 : }
1456 0 : else if (dynamic_cast<OpCount*>(mpCodeGen.get()))
1457 0 : ss << temp << "+ tmp";
1458 : else
1459 0 : ss << mpCodeGen->Gen2(temp, "tmp");
1460 0 : ss << ";\n\t";
1461 0 : needBody = false;
1462 0 : return nCurWindowSize;
1463 : }
1464 :
1465 0 : virtual size_t Marshal( cl_kernel k, int argno, int w, cl_program mpProgram )
1466 : {
1467 : assert(Base::mpClmem == NULL);
1468 : // Obtain cl context
1469 : ::opencl::KernelEnv kEnv;
1470 0 : ::opencl::setKernelEnv(&kEnv);
1471 : cl_int err;
1472 0 : size_t nInput = mpDVR->GetArrayLength();
1473 0 : size_t nCurWindowSize = mpDVR->GetRefRowSize();
1474 : // create clmem buffer
1475 0 : if (mpDVR->GetArrays()[Base::mnIndex].mpNumericArray == NULL)
1476 0 : throw Unhandled();
1477 : double* pHostBuffer = const_cast<double*>(
1478 0 : mpDVR->GetArrays()[Base::mnIndex].mpNumericArray);
1479 0 : size_t szHostBuffer = nInput * sizeof(double);
1480 0 : Base::mpClmem = clCreateBuffer(kEnv.mpkContext,
1481 : (cl_mem_flags)CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
1482 : szHostBuffer,
1483 0 : pHostBuffer, &err);
1484 : SAL_INFO("sc.opencl", "Created buffer " << Base::mpClmem << " size " << nInput << "*" << sizeof(double) << "=" << szHostBuffer << " using host buffer " << pHostBuffer);
1485 :
1486 0 : mpClmem2 = clCreateBuffer(kEnv.mpkContext,
1487 : CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
1488 0 : sizeof(double) * w, NULL, NULL);
1489 0 : if (CL_SUCCESS != err)
1490 0 : throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
1491 : SAL_INFO("sc.opencl", "Created buffer " << mpClmem2 << " size " << sizeof(double) << "*" << w << "=" << (sizeof(double)*w));
1492 :
1493 : // reproduce the reduction function name
1494 0 : std::string kernelName;
1495 0 : if (!dynamic_cast<OpAverage*>(mpCodeGen.get()))
1496 0 : kernelName = Base::GetName() + "_reduction";
1497 : else
1498 0 : kernelName = Base::GetName() + "_sum_reduction";
1499 0 : cl_kernel redKernel = clCreateKernel(mpProgram, kernelName.c_str(), &err);
1500 0 : if (err != CL_SUCCESS)
1501 0 : throw OpenCLError("clCreateKernel", err, __FILE__, __LINE__);
1502 : SAL_INFO("sc.opencl", "Created kernel " << redKernel << " with name " << kernelName << " in program " << mpProgram);
1503 :
1504 : // set kernel arg of reduction kernel
1505 : // TODO(Wei Wei): use unique name for kernel
1506 0 : cl_mem buf = Base::GetCLBuffer();
1507 : SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 0 << ": cl_mem: " << buf);
1508 0 : err = clSetKernelArg(redKernel, 0, sizeof(cl_mem),
1509 0 : static_cast<void*>(&buf));
1510 0 : if (CL_SUCCESS != err)
1511 0 : throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
1512 :
1513 : SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 1 << ": cl_mem: " << mpClmem2);
1514 0 : err = clSetKernelArg(redKernel, 1, sizeof(cl_mem), &mpClmem2);
1515 0 : if (CL_SUCCESS != err)
1516 0 : throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
1517 :
1518 : SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 2 << ": cl_int: " << nInput);
1519 0 : err = clSetKernelArg(redKernel, 2, sizeof(cl_int), static_cast<void*>(&nInput));
1520 0 : if (CL_SUCCESS != err)
1521 0 : throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
1522 :
1523 : SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 3 << ": cl_int: " << nCurWindowSize);
1524 0 : err = clSetKernelArg(redKernel, 3, sizeof(cl_int), static_cast<void*>(&nCurWindowSize));
1525 0 : if (CL_SUCCESS != err)
1526 0 : throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
1527 :
1528 : // set work group size and execute
1529 0 : size_t global_work_size[] = { 256, (size_t)w };
1530 0 : size_t local_work_size[] = { 256, 1 };
1531 : SAL_INFO("sc.opencl", "Enqueing kernel " << redKernel);
1532 0 : err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, redKernel, 2, NULL,
1533 0 : global_work_size, local_work_size, 0, NULL, NULL);
1534 0 : if (CL_SUCCESS != err)
1535 0 : throw OpenCLError("clEnqueueNDRangeKernel", err, __FILE__, __LINE__);
1536 0 : err = clFinish(kEnv.mpkCmdQueue);
1537 0 : if (CL_SUCCESS != err)
1538 0 : throw OpenCLError("clFinish", err, __FILE__, __LINE__);
1539 0 : if (dynamic_cast<OpAverage*>(mpCodeGen.get()))
1540 : {
1541 : /*average need more reduction kernel for count computing*/
1542 0 : boost::scoped_array<double> pAllBuffer(new double[2 * w]);
1543 : double* resbuf = static_cast<double*>(clEnqueueMapBuffer(kEnv.mpkCmdQueue,
1544 : mpClmem2,
1545 : CL_TRUE, CL_MAP_READ, 0,
1546 : sizeof(double) * w, 0, NULL, NULL,
1547 0 : &err));
1548 0 : if (err != CL_SUCCESS)
1549 0 : throw OpenCLError("clEnqueueMapBuffer", err, __FILE__, __LINE__);
1550 :
1551 0 : for (int i = 0; i < w; i++)
1552 0 : pAllBuffer[i] = resbuf[i];
1553 0 : err = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue, mpClmem2, resbuf, 0, NULL, NULL);
1554 0 : if (err != CL_SUCCESS)
1555 0 : throw OpenCLError("clEnqueueUnmapMemObject", err, __FILE__, __LINE__);
1556 :
1557 0 : kernelName = Base::GetName() + "_count_reduction";
1558 0 : redKernel = clCreateKernel(mpProgram, kernelName.c_str(), &err);
1559 0 : if (err != CL_SUCCESS)
1560 0 : throw OpenCLError("clCreateKernel", err, __FILE__, __LINE__);
1561 : SAL_INFO("sc.opencl", "Created kernel " << redKernel << " with name " << kernelName << " in program " << mpProgram);
1562 :
1563 : // set kernel arg of reduction kernel
1564 0 : buf = Base::GetCLBuffer();
1565 : SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 0 << ": cl_mem: " << buf);
1566 0 : err = clSetKernelArg(redKernel, 0, sizeof(cl_mem),
1567 0 : static_cast<void*>(&buf));
1568 0 : if (CL_SUCCESS != err)
1569 0 : throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
1570 :
1571 : SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 1 << ": cl_mem: " << mpClmem2);
1572 0 : err = clSetKernelArg(redKernel, 1, sizeof(cl_mem), &mpClmem2);
1573 0 : if (CL_SUCCESS != err)
1574 0 : throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
1575 :
1576 : SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 2 << ": cl_int: " << nInput);
1577 0 : err = clSetKernelArg(redKernel, 2, sizeof(cl_int), static_cast<void*>(&nInput));
1578 0 : if (CL_SUCCESS != err)
1579 0 : throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
1580 :
1581 : SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 3 << ": cl_int: " << nCurWindowSize);
1582 0 : err = clSetKernelArg(redKernel, 3, sizeof(cl_int), static_cast<void*>(&nCurWindowSize));
1583 0 : if (CL_SUCCESS != err)
1584 0 : throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
1585 :
1586 : // set work group size and execute
1587 0 : size_t global_work_size1[] = { 256, (size_t)w };
1588 0 : size_t local_work_size1[] = { 256, 1 };
1589 : SAL_INFO("sc.opencl", "Enqueing kernel " << redKernel);
1590 0 : err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, redKernel, 2, NULL,
1591 0 : global_work_size1, local_work_size1, 0, NULL, NULL);
1592 0 : if (CL_SUCCESS != err)
1593 0 : throw OpenCLError("clEnqueueNDRangeKernel", err, __FILE__, __LINE__);
1594 0 : err = clFinish(kEnv.mpkCmdQueue);
1595 0 : if (CL_SUCCESS != err)
1596 0 : throw OpenCLError("clFinish", err, __FILE__, __LINE__);
1597 0 : resbuf = static_cast<double*>(clEnqueueMapBuffer(kEnv.mpkCmdQueue,
1598 : mpClmem2,
1599 : CL_TRUE, CL_MAP_READ, 0,
1600 : sizeof(double) * w, 0, NULL, NULL,
1601 0 : &err));
1602 0 : if (err != CL_SUCCESS)
1603 0 : throw OpenCLError("clEnqueueMapBuffer", err, __FILE__, __LINE__);
1604 0 : for (int i = 0; i < w; i++)
1605 0 : pAllBuffer[i + w] = resbuf[i];
1606 0 : err = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue, mpClmem2, resbuf, 0, NULL, NULL);
1607 : // FIXME: Is it intentional to not throw an OpenCLError even if the clEnqueueUnmapMemObject() fails?
1608 0 : if (CL_SUCCESS != err)
1609 : SAL_WARN("sc.opencl", "clEnqueueUnmapMemObject failed: " << ::opencl::errorString(err));
1610 0 : if (mpClmem2)
1611 : {
1612 0 : err = clReleaseMemObject(mpClmem2);
1613 : SAL_WARN_IF(err != CL_SUCCESS, "sc.opencl", "clReleaseMemObject failed: " << ::opencl::errorString(err));
1614 0 : mpClmem2 = NULL;
1615 : }
1616 0 : mpClmem2 = clCreateBuffer(kEnv.mpkContext,
1617 : (cl_mem_flags)CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
1618 0 : w * sizeof(double) * 2, pAllBuffer.get(), &err);
1619 0 : if (CL_SUCCESS != err)
1620 0 : throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
1621 0 : SAL_INFO("sc.opencl", "Created buffer " << mpClmem2 << " size " << w << "*" << sizeof(double) << "=" << (w*sizeof(double)) << " copying host buffer " << pAllBuffer.get());
1622 : }
1623 : // set kernel arg
1624 : SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_mem: " << mpClmem2);
1625 0 : err = clSetKernelArg(k, argno, sizeof(cl_mem), &(mpClmem2));
1626 0 : if (CL_SUCCESS != err)
1627 0 : throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
1628 0 : return 1;
1629 : }
1630 0 : ~ParallelReductionVectorRef()
1631 : {
1632 0 : if (mpClmem2)
1633 : {
1634 : cl_int err;
1635 0 : err = clReleaseMemObject(mpClmem2);
1636 : SAL_WARN_IF(err != CL_SUCCESS, "sc.opencl", "clReleaseMemObject failed: " << ::opencl::errorString(err));
1637 0 : mpClmem2 = NULL;
1638 : }
1639 0 : }
1640 :
1641 : size_t GetArrayLength() const { return mpDVR->GetArrayLength(); }
1642 :
1643 0 : size_t GetWindowSize() const { return mpDVR->GetRefRowSize(); }
1644 :
1645 : size_t GetStartFixed() const { return bIsStartFixed; }
1646 :
1647 : size_t GetEndFixed() const { return bIsEndFixed; }
1648 :
1649 : protected:
1650 : bool bIsStartFixed, bIsEndFixed;
1651 : const formula::DoubleVectorRefToken* mpDVR;
1652 : // from parent nodes
1653 : boost::shared_ptr<SlidingFunctionBase> mpCodeGen;
1654 : // controls whether to invoke the reduction kernel during marshaling or not
1655 : cl_mem mpClmem2;
1656 : };
1657 :
1658 0 : class Reduction : public SlidingFunctionBase
1659 : {
1660 : int mnResultSize;
1661 : public:
1662 0 : Reduction( int nResultSize ) : mnResultSize(nResultSize) {}
1663 :
1664 : typedef DynamicKernelSlidingArgument<VectorRef> NumericRange;
1665 : typedef DynamicKernelSlidingArgument<DynamicKernelStringArgument> StringRange;
1666 : typedef ParallelReductionVectorRef<VectorRef> ParallelNumericRange;
1667 :
1668 0 : virtual bool HandleNaNArgument( std::stringstream&, unsigned, SubArguments& ) const
1669 : {
1670 0 : return false;
1671 : }
1672 :
1673 0 : virtual void GenSlidingWindowFunction( std::stringstream& ss,
1674 : const std::string& sSymName, SubArguments& vSubArguments ) SAL_OVERRIDE
1675 : {
1676 0 : ss << "\ndouble " << sSymName;
1677 0 : ss << "_" << BinFuncName() << "(";
1678 0 : for (size_t i = 0; i < vSubArguments.size(); i++)
1679 : {
1680 0 : if (i)
1681 0 : ss << ", ";
1682 0 : vSubArguments[i]->GenSlidingWindowDecl(ss);
1683 : }
1684 0 : ss << ") {\n";
1685 0 : ss << "double tmp = " << GetBottom() << ";\n";
1686 0 : ss << "int gid0 = get_global_id(0);\n";
1687 0 : if (isAverage())
1688 0 : ss << "int nCount = 0;\n";
1689 0 : ss << "double tmpBottom;\n";
1690 0 : unsigned i = vSubArguments.size();
1691 0 : while (i--)
1692 : {
1693 0 : if (NumericRange* NR =
1694 0 : dynamic_cast<NumericRange*>(vSubArguments[i].get()))
1695 : {
1696 0 : bool needBody; NR->GenReductionLoopHeader(ss, needBody); if (!needBody)
1697 0 : continue;
1698 : }
1699 0 : else if (ParallelNumericRange* PNR =
1700 0 : dynamic_cast<ParallelNumericRange*>(vSubArguments[i].get()))
1701 : {
1702 : //did not handle yet
1703 0 : bool bNeedBody = false;
1704 0 : PNR->GenReductionLoopHeader(ss, mnResultSize, bNeedBody);
1705 0 : if (!bNeedBody)
1706 0 : continue;
1707 : }
1708 0 : else if (StringRange* SR =
1709 0 : dynamic_cast<StringRange*>(vSubArguments[i].get()))
1710 : {
1711 : //did not handle yet
1712 : bool needBody;
1713 0 : SR->GenReductionLoopHeader(ss, needBody);
1714 0 : if (!needBody)
1715 0 : continue;
1716 : }
1717 : else
1718 : {
1719 0 : FormulaToken* pCur = vSubArguments[i]->GetFormulaToken();
1720 : assert(pCur);
1721 : assert(pCur->GetType() != formula::svDoubleVectorRef);
1722 :
1723 0 : if (pCur->GetType() == formula::svSingleVectorRef)
1724 : {
1725 : const formula::SingleVectorRefToken* pSVR =
1726 0 : static_cast<const formula::SingleVectorRefToken*>(pCur);
1727 0 : ss << "if (gid0 < " << pSVR->GetArrayLength() << "){\n";
1728 : }
1729 0 : else if (pCur->GetType() == formula::svDouble)
1730 : {
1731 0 : ss << "{\n";
1732 : }
1733 : }
1734 0 : if (ocPush == vSubArguments[i]->GetFormulaToken()->GetOpCode())
1735 : {
1736 0 : bool bNanHandled = HandleNaNArgument(ss, i, vSubArguments);
1737 :
1738 0 : ss << "tmpBottom = " << GetBottom() << ";\n";
1739 :
1740 0 : if (!bNanHandled)
1741 : {
1742 0 : ss << "if (isNan(";
1743 0 : ss << vSubArguments[i]->GenSlidingWindowDeclRef();
1744 0 : ss << "))\n";
1745 0 : if (ZeroReturnZero())
1746 0 : ss << " return 0;\n";
1747 : else
1748 : {
1749 0 : ss << " tmp = ";
1750 0 : ss << Gen2("tmpBottom", "tmp") << ";\n";
1751 : }
1752 0 : ss << "else\n";
1753 : }
1754 0 : ss << "{";
1755 0 : ss << " tmp = ";
1756 0 : ss << Gen2(vSubArguments[i]->GenSlidingWindowDeclRef(), "tmp");
1757 0 : ss << ";\n";
1758 0 : ss << " }\n";
1759 0 : ss << "}\n";
1760 0 : if (vSubArguments[i]->GetFormulaToken()->GetType() ==
1761 0 : formula::svSingleVectorRef && ZeroReturnZero())
1762 : {
1763 0 : ss << "else{\n";
1764 0 : ss << " return 0;\n";
1765 0 : ss << " }\n";
1766 : }
1767 : }
1768 : else
1769 : {
1770 0 : ss << "tmp = ";
1771 0 : ss << Gen2(vSubArguments[i]->GenSlidingWindowDeclRef(), "tmp");
1772 0 : ss << ";\n";
1773 : }
1774 : }
1775 0 : ss << "return tmp";
1776 0 : if (isAverage())
1777 0 : ss << "*pow((double)nCount,-1.0)";
1778 0 : ss << ";\n}";
1779 0 : }
1780 0 : virtual bool isAverage() const { return false; }
1781 0 : virtual bool takeString() const SAL_OVERRIDE { return false; }
1782 0 : virtual bool takeNumeric() const SAL_OVERRIDE { return true; }
1783 : };
1784 :
1785 : // Strictly binary operators
1786 0 : class Binary : public SlidingFunctionBase
1787 : {
1788 : public:
1789 0 : virtual void GenSlidingWindowFunction( std::stringstream& ss,
1790 : const std::string& sSymName, SubArguments& vSubArguments ) SAL_OVERRIDE
1791 : {
1792 0 : ss << "\ndouble " << sSymName;
1793 0 : ss << "_" << BinFuncName() << "(";
1794 : assert(vSubArguments.size() == 2);
1795 0 : for (size_t i = 0; i < vSubArguments.size(); i++)
1796 : {
1797 0 : if (i)
1798 0 : ss << ", ";
1799 0 : vSubArguments[i]->GenSlidingWindowDecl(ss);
1800 : }
1801 0 : ss << ") {\n\t";
1802 0 : ss << "int gid0 = get_global_id(0), i = 0;\n\t";
1803 0 : ss << "double tmp = ";
1804 0 : ss << Gen2(vSubArguments[0]->GenSlidingWindowDeclRef(false),
1805 0 : vSubArguments[1]->GenSlidingWindowDeclRef(false)) << ";\n\t";
1806 0 : ss << "return tmp;\n}";
1807 0 : }
1808 0 : virtual bool takeString() const SAL_OVERRIDE { return true; }
1809 0 : virtual bool takeNumeric() const SAL_OVERRIDE { return true; }
1810 : };
1811 :
1812 0 : class SumOfProduct : public SlidingFunctionBase
1813 : {
1814 : public:
1815 0 : virtual void GenSlidingWindowFunction( std::stringstream& ss,
1816 : const std::string& sSymName, SubArguments& vSubArguments ) SAL_OVERRIDE
1817 : {
1818 0 : size_t nCurWindowSize = 0;
1819 0 : FormulaToken* tmpCur = NULL;
1820 0 : const formula::DoubleVectorRefToken* pCurDVR = NULL;
1821 0 : ss << "\ndouble " << sSymName;
1822 0 : ss << "_" << BinFuncName() << "(";
1823 0 : for (size_t i = 0; i < vSubArguments.size(); i++)
1824 : {
1825 0 : if (i)
1826 0 : ss << ",";
1827 0 : vSubArguments[i]->GenSlidingWindowDecl(ss);
1828 0 : size_t nCurChildWindowSize = vSubArguments[i]->GetWindowSize();
1829 : nCurWindowSize = (nCurWindowSize < nCurChildWindowSize) ?
1830 0 : nCurChildWindowSize : nCurWindowSize;
1831 0 : tmpCur = vSubArguments[i]->GetFormulaToken();
1832 0 : if (ocPush == tmpCur->GetOpCode())
1833 : {
1834 :
1835 0 : pCurDVR = static_cast<const formula::DoubleVectorRefToken*>(tmpCur);
1836 0 : if (!
1837 0 : ((!pCurDVR->IsStartFixed() && !pCurDVR->IsEndFixed())
1838 0 : || (pCurDVR->IsStartFixed() && pCurDVR->IsEndFixed()))
1839 : )
1840 0 : throw Unhandled();
1841 : }
1842 : }
1843 0 : ss << ") {\n";
1844 0 : ss << " double tmp = 0.0;\n";
1845 0 : ss << " int gid0 = get_global_id(0);\n";
1846 :
1847 0 : ss << "\tint i;\n\t";
1848 0 : ss << "int currentCount0;\n";
1849 0 : for (unsigned i = 0; i < vSubArguments.size() - 1; i++)
1850 0 : ss << "int currentCount" << i + 1 << ";\n";
1851 0 : std::stringstream temp3, temp4;
1852 0 : int outLoopSize = UNROLLING_FACTOR;
1853 0 : if (nCurWindowSize / outLoopSize != 0)
1854 : {
1855 0 : ss << "for(int outLoop=0; outLoop<" <<
1856 0 : nCurWindowSize / outLoopSize << "; outLoop++){\n\t";
1857 0 : for (int count = 0; count < outLoopSize; count++)
1858 : {
1859 0 : ss << "i = outLoop*" << outLoopSize << "+" << count << ";\n";
1860 0 : if (count == 0)
1861 : {
1862 0 : for (size_t i = 0; i < vSubArguments.size(); i++)
1863 : {
1864 0 : tmpCur = vSubArguments[i]->GetFormulaToken();
1865 0 : if (ocPush == tmpCur->GetOpCode())
1866 : {
1867 0 : pCurDVR = static_cast<const formula::DoubleVectorRefToken*>(tmpCur);
1868 0 : if (!pCurDVR->IsStartFixed() && !pCurDVR->IsEndFixed())
1869 : {
1870 0 : temp3 << " currentCount";
1871 0 : temp3 << i;
1872 0 : temp3 << " =i+gid0+1;\n";
1873 : }
1874 : else
1875 : {
1876 0 : temp3 << " currentCount";
1877 0 : temp3 << i;
1878 0 : temp3 << " =i+1;\n";
1879 : }
1880 : }
1881 : }
1882 :
1883 0 : temp3 << "tmp = fsum(";
1884 0 : for (size_t i = 0; i < vSubArguments.size(); i++)
1885 : {
1886 0 : if (i)
1887 0 : temp3 << "*";
1888 0 : if (ocPush == vSubArguments[i]->GetFormulaToken()->GetOpCode())
1889 : {
1890 0 : temp3 << "(";
1891 0 : temp3 << "(currentCount";
1892 0 : temp3 << i;
1893 0 : temp3 << ">";
1894 0 : if (vSubArguments[i]->GetFormulaToken()->GetType() ==
1895 : formula::svSingleVectorRef)
1896 : {
1897 : const formula::SingleVectorRefToken* pSVR =
1898 : static_cast<const formula::SingleVectorRefToken*>
1899 0 : (vSubArguments[i]->GetFormulaToken());
1900 0 : temp3 << pSVR->GetArrayLength();
1901 0 : temp3 << ")||isNan(" << vSubArguments[i]
1902 0 : ->GenSlidingWindowDeclRef();
1903 0 : temp3 << ")?0:";
1904 0 : temp3 << vSubArguments[i]->GenSlidingWindowDeclRef();
1905 0 : temp3 << ")";
1906 : }
1907 0 : else if (vSubArguments[i]->GetFormulaToken()->GetType() ==
1908 : formula::svDoubleVectorRef)
1909 : {
1910 : const formula::DoubleVectorRefToken* pSVR =
1911 : static_cast<const formula::DoubleVectorRefToken*>
1912 0 : (vSubArguments[i]->GetFormulaToken());
1913 0 : temp3 << pSVR->GetArrayLength();
1914 0 : temp3 << ")||isNan(" << vSubArguments[i]
1915 0 : ->GenSlidingWindowDeclRef(true);
1916 0 : temp3 << ")?0:";
1917 0 : temp3 << vSubArguments[i]->GenSlidingWindowDeclRef(true);
1918 0 : temp3 << ")";
1919 : }
1920 :
1921 : }
1922 : else
1923 0 : temp3 << vSubArguments[i]->GenSlidingWindowDeclRef(true);
1924 : }
1925 0 : temp3 << ", tmp);\n\t";
1926 : }
1927 0 : ss << temp3.str();
1928 : }
1929 0 : ss << "}\n\t";
1930 : }
1931 : //The residual of mod outLoopSize
1932 0 : for (size_t count = nCurWindowSize / outLoopSize * outLoopSize;
1933 : count < nCurWindowSize; count++)
1934 : {
1935 0 : ss << "i =" << count << ";\n";
1936 0 : if (count == nCurWindowSize / outLoopSize * outLoopSize)
1937 : {
1938 0 : for (size_t i = 0; i < vSubArguments.size(); i++)
1939 : {
1940 0 : tmpCur = vSubArguments[i]->GetFormulaToken();
1941 0 : if (ocPush == tmpCur->GetOpCode())
1942 : {
1943 0 : pCurDVR = static_cast<const formula::DoubleVectorRefToken*>(tmpCur);
1944 0 : if (!pCurDVR->IsStartFixed() && !pCurDVR->IsEndFixed())
1945 : {
1946 0 : temp4 << " currentCount";
1947 0 : temp4 << i;
1948 0 : temp4 << " =i+gid0+1;\n";
1949 : }
1950 : else
1951 : {
1952 0 : temp4 << " currentCount";
1953 0 : temp4 << i;
1954 0 : temp4 << " =i+1;\n";
1955 : }
1956 : }
1957 : }
1958 :
1959 0 : temp4 << "tmp = fsum(";
1960 0 : for (size_t i = 0; i < vSubArguments.size(); i++)
1961 : {
1962 0 : if (i)
1963 0 : temp4 << "*";
1964 0 : if (ocPush == vSubArguments[i]->GetFormulaToken()->GetOpCode())
1965 : {
1966 0 : temp4 << "(";
1967 0 : temp4 << "(currentCount";
1968 0 : temp4 << i;
1969 0 : temp4 << ">";
1970 0 : if (vSubArguments[i]->GetFormulaToken()->GetType() ==
1971 : formula::svSingleVectorRef)
1972 : {
1973 : const formula::SingleVectorRefToken* pSVR =
1974 : static_cast<const formula::SingleVectorRefToken*>
1975 0 : (vSubArguments[i]->GetFormulaToken());
1976 0 : temp4 << pSVR->GetArrayLength();
1977 0 : temp4 << ")||isNan(" << vSubArguments[i]
1978 0 : ->GenSlidingWindowDeclRef();
1979 0 : temp4 << ")?0:";
1980 0 : temp4 << vSubArguments[i]->GenSlidingWindowDeclRef();
1981 0 : temp4 << ")";
1982 : }
1983 0 : else if (vSubArguments[i]->GetFormulaToken()->GetType() ==
1984 : formula::svDoubleVectorRef)
1985 : {
1986 : const formula::DoubleVectorRefToken* pSVR =
1987 : static_cast<const formula::DoubleVectorRefToken*>
1988 0 : (vSubArguments[i]->GetFormulaToken());
1989 0 : temp4 << pSVR->GetArrayLength();
1990 0 : temp4 << ")||isNan(" << vSubArguments[i]
1991 0 : ->GenSlidingWindowDeclRef(true);
1992 0 : temp4 << ")?0:";
1993 0 : temp4 << vSubArguments[i]->GenSlidingWindowDeclRef(true);
1994 0 : temp4 << ")";
1995 : }
1996 :
1997 : }
1998 : else
1999 : {
2000 0 : temp4 << vSubArguments[i]
2001 0 : ->GenSlidingWindowDeclRef(true);
2002 : }
2003 : }
2004 0 : temp4 << ", tmp);\n\t";
2005 : }
2006 0 : ss << temp4.str();
2007 : }
2008 0 : ss << "return tmp;\n";
2009 0 : ss << "}";
2010 0 : }
2011 0 : virtual bool takeString() const SAL_OVERRIDE { return false; }
2012 0 : virtual bool takeNumeric() const SAL_OVERRIDE { return true; }
2013 : };
2014 :
2015 : /// operator traits
2016 0 : class OpNop : public Reduction
2017 : {
2018 : public:
2019 0 : OpNop( int nResultSize ) : Reduction(nResultSize) {}
2020 :
2021 0 : virtual std::string GetBottom() SAL_OVERRIDE { return "0"; }
2022 0 : virtual std::string Gen2( const std::string& lhs, const std::string& ) const SAL_OVERRIDE
2023 : {
2024 0 : return lhs;
2025 : }
2026 0 : virtual std::string BinFuncName() const SAL_OVERRIDE { return "nop"; }
2027 : };
2028 :
2029 0 : class OpCount : public Reduction
2030 : {
2031 : public:
2032 0 : OpCount( int nResultSize ) : Reduction(nResultSize) {}
2033 :
2034 0 : virtual std::string GetBottom() SAL_OVERRIDE { return "0"; }
2035 0 : virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const SAL_OVERRIDE
2036 : {
2037 0 : std::stringstream ss;
2038 0 : ss << "(isNan(" << lhs << ")?" << rhs << ":" << rhs << "+1.0)";
2039 0 : return ss.str();
2040 : }
2041 0 : virtual std::string BinFuncName() const SAL_OVERRIDE { return "fcount"; }
2042 : };
2043 :
2044 0 : class OpEqual : public Binary
2045 : {
2046 : public:
2047 0 : virtual std::string GetBottom() SAL_OVERRIDE { return "0"; }
2048 0 : virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const SAL_OVERRIDE
2049 : {
2050 0 : std::stringstream ss;
2051 0 : ss << "strequal(" << lhs << "," << rhs << ")";
2052 0 : return ss.str();
2053 : }
2054 0 : virtual std::string BinFuncName() const SAL_OVERRIDE { return "eq"; }
2055 : };
2056 :
2057 0 : class OpLessEqual : public Binary
2058 : {
2059 : public:
2060 0 : virtual std::string GetBottom() SAL_OVERRIDE { return "0"; }
2061 0 : virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const SAL_OVERRIDE
2062 : {
2063 0 : std::stringstream ss;
2064 0 : ss << "(" << lhs << "<=" << rhs << ")";
2065 0 : return ss.str();
2066 : }
2067 0 : virtual std::string BinFuncName() const SAL_OVERRIDE { return "leq"; }
2068 : };
2069 :
2070 0 : class OpLess : public Binary
2071 : {
2072 : public:
2073 0 : virtual std::string GetBottom() SAL_OVERRIDE { return "0"; }
2074 0 : virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const SAL_OVERRIDE
2075 : {
2076 0 : std::stringstream ss;
2077 0 : ss << "(" << lhs << "<" << rhs << ")";
2078 0 : return ss.str();
2079 : }
2080 0 : virtual std::string BinFuncName() const SAL_OVERRIDE { return "less"; }
2081 : };
2082 :
2083 0 : class OpGreater : public Binary
2084 : {
2085 : public:
2086 0 : virtual std::string GetBottom() SAL_OVERRIDE { return "0"; }
2087 0 : virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const SAL_OVERRIDE
2088 : {
2089 0 : std::stringstream ss;
2090 0 : ss << "(" << lhs << ">" << rhs << ")";
2091 0 : return ss.str();
2092 : }
2093 0 : virtual std::string BinFuncName() const SAL_OVERRIDE { return "gt"; }
2094 : };
2095 :
2096 0 : class OpSum : public Reduction
2097 : {
2098 : public:
2099 0 : OpSum( int nResultSize ) : Reduction(nResultSize) {}
2100 :
2101 0 : virtual std::string GetBottom() SAL_OVERRIDE { return "0"; }
2102 0 : virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const SAL_OVERRIDE
2103 : {
2104 0 : std::stringstream ss;
2105 0 : ss << "((" << lhs << ")+(" << rhs << "))";
2106 0 : return ss.str();
2107 : }
2108 0 : virtual std::string BinFuncName() const SAL_OVERRIDE { return "fsum"; }
2109 : };
2110 :
2111 0 : class OpAverage : public Reduction
2112 : {
2113 : public:
2114 0 : OpAverage( int nResultSize ) : Reduction(nResultSize) {}
2115 :
2116 0 : virtual std::string GetBottom() SAL_OVERRIDE { return "0"; }
2117 0 : virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const SAL_OVERRIDE
2118 : {
2119 0 : std::stringstream ss;
2120 0 : ss << "fsum_count(" << lhs << "," << rhs << ", &nCount)";
2121 0 : return ss.str();
2122 : }
2123 0 : virtual std::string BinFuncName() const SAL_OVERRIDE { return "fsum"; }
2124 0 : virtual bool isAverage() const SAL_OVERRIDE { return true; }
2125 : };
2126 :
2127 0 : class OpSub : public Reduction
2128 : {
2129 : public:
2130 0 : OpSub( int nResultSize ) : Reduction(nResultSize) {}
2131 :
2132 0 : virtual std::string GetBottom() SAL_OVERRIDE { return "0"; }
2133 0 : virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const SAL_OVERRIDE
2134 : {
2135 0 : return lhs + "-" + rhs;
2136 : }
2137 0 : virtual std::string BinFuncName() const SAL_OVERRIDE { return "fsub"; }
2138 : };
2139 :
2140 0 : class OpMul : public Reduction
2141 : {
2142 : public:
2143 0 : OpMul( int nResultSize ) : Reduction(nResultSize) {}
2144 :
2145 0 : virtual std::string GetBottom() SAL_OVERRIDE { return "1"; }
2146 0 : virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const SAL_OVERRIDE
2147 : {
2148 0 : return lhs + "*" + rhs;
2149 : }
2150 0 : virtual std::string BinFuncName() const SAL_OVERRIDE { return "fmul"; }
2151 0 : virtual bool ZeroReturnZero() SAL_OVERRIDE { return true; }
2152 : };
2153 :
2154 : /// Technically not a reduction, but fits the framework.
2155 0 : class OpDiv : public Reduction
2156 : {
2157 : public:
2158 0 : OpDiv( int nResultSize ) : Reduction(nResultSize) {}
2159 :
2160 0 : virtual std::string GetBottom() SAL_OVERRIDE { return "1.0"; }
2161 0 : virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const SAL_OVERRIDE
2162 : {
2163 0 : return "(" + rhs + "==0 ? CreateDoubleError(errDivisionByZero) : (" + lhs + "/" + rhs + ") )";
2164 : }
2165 0 : virtual std::string BinFuncName() const SAL_OVERRIDE { return "fdiv"; }
2166 :
2167 0 : virtual bool HandleNaNArgument( std::stringstream& ss, unsigned argno, SubArguments& vSubArguments ) const SAL_OVERRIDE
2168 : {
2169 0 : if (argno == 1)
2170 : {
2171 : ss <<
2172 0 : "if (isnan(" << vSubArguments[argno]->GenSlidingWindowDeclRef() << ")) {\n"
2173 0 : " if (GetDoubleErrorValue(" << vSubArguments[argno]->GenSlidingWindowDeclRef() << ") == errNoValue)\n"
2174 : " return CreateDoubleError(errDivisionByZero);\n"
2175 0 : "}\n";
2176 0 : return true;
2177 : }
2178 0 : else if (argno == 0)
2179 : {
2180 : ss <<
2181 0 : "if (isnan(" << vSubArguments[argno]->GenSlidingWindowDeclRef() << ")) {\n"
2182 0 : " if (GetDoubleErrorValue(" << vSubArguments[argno]->GenSlidingWindowDeclRef() << ") == errNoValue) {\n"
2183 0 : " if (" << vSubArguments[1]->GenSlidingWindowDeclRef() << " == 0)\n"
2184 : " return CreateDoubleError(errDivisionByZero);\n"
2185 : " return 0;\n"
2186 : " }\n"
2187 0 : "}\n";
2188 : }
2189 0 : return false;
2190 : }
2191 :
2192 : };
2193 :
2194 0 : class OpMin : public Reduction
2195 : {
2196 : public:
2197 0 : OpMin( int nResultSize ) : Reduction(nResultSize) {}
2198 :
2199 0 : virtual std::string GetBottom() SAL_OVERRIDE { return "MAXFLOAT"; }
2200 0 : virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const SAL_OVERRIDE
2201 : {
2202 0 : return "fmin(" + lhs + "," + rhs + ")";
2203 : }
2204 0 : virtual std::string BinFuncName() const SAL_OVERRIDE { return "min"; }
2205 : };
2206 :
2207 0 : class OpMax : public Reduction
2208 : {
2209 : public:
2210 0 : OpMax( int nResultSize ) : Reduction(nResultSize) {}
2211 :
2212 0 : virtual std::string GetBottom() SAL_OVERRIDE { return "-MAXFLOAT"; }
2213 0 : virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const SAL_OVERRIDE
2214 : {
2215 0 : return "fmax(" + lhs + "," + rhs + ")";
2216 : }
2217 0 : virtual std::string BinFuncName() const SAL_OVERRIDE { return "max"; }
2218 : };
2219 :
2220 0 : class OpSumProduct : public SumOfProduct
2221 : {
2222 : public:
2223 0 : virtual std::string GetBottom() SAL_OVERRIDE { return "0"; }
2224 0 : virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const SAL_OVERRIDE
2225 : {
2226 0 : return lhs + "*" + rhs;
2227 : }
2228 0 : virtual std::string BinFuncName() const SAL_OVERRIDE { return "fsop"; }
2229 : };
2230 : namespace {
2231 : struct SumIfsArgs
2232 : {
2233 0 : SumIfsArgs( cl_mem x ) : mCLMem(x), mConst(0.0) { }
2234 0 : SumIfsArgs( double x ) : mCLMem(NULL), mConst(x) { }
2235 : cl_mem mCLMem;
2236 : double mConst;
2237 : };
2238 : }
2239 :
2240 : /// Helper functions that have multiple buffers
2241 : class DynamicKernelSoPArguments : public DynamicKernelArgument
2242 : {
2243 : public:
2244 : typedef std::vector<DynamicKernelArgumentRef> SubArgumentsType;
2245 :
2246 : DynamicKernelSoPArguments( const ScCalcConfig& config,
2247 : const std::string& s, const FormulaTreeNodeRef& ft,
2248 : SlidingFunctionBase* pCodeGen, int nResultSize );
2249 :
2250 : /// Create buffer and pass the buffer to a given kernel
2251 0 : virtual size_t Marshal( cl_kernel k, int argno, int nVectorWidth, cl_program pProgram ) SAL_OVERRIDE
2252 : {
2253 0 : unsigned i = 0;
2254 0 : for (SubArgumentsType::iterator it = mvSubArguments.begin(), e = mvSubArguments.end(); it != e;
2255 : ++it)
2256 : {
2257 0 : i += (*it)->Marshal(k, argno + i, nVectorWidth, pProgram);
2258 : }
2259 0 : if (OpGeoMean* OpSumCodeGen = dynamic_cast<OpGeoMean*>(mpCodeGen.get()))
2260 : {
2261 : // Obtain cl context
2262 : ::opencl::KernelEnv kEnv;
2263 0 : ::opencl::setKernelEnv(&kEnv);
2264 : cl_int err;
2265 : cl_mem pClmem2;
2266 :
2267 0 : if (OpSumCodeGen->NeedReductionKernel())
2268 : {
2269 0 : std::vector<cl_mem> vclmem;
2270 0 : for (SubArgumentsType::iterator it = mvSubArguments.begin(),
2271 0 : e = mvSubArguments.end(); it != e; ++it)
2272 : {
2273 0 : if (VectorRef* VR = dynamic_cast<VectorRef*>(it->get()))
2274 0 : vclmem.push_back(VR->GetCLBuffer());
2275 : else
2276 0 : vclmem.push_back(NULL);
2277 : }
2278 : pClmem2 = clCreateBuffer(kEnv.mpkContext, CL_MEM_READ_WRITE,
2279 0 : sizeof(double) * nVectorWidth, NULL, &err);
2280 0 : if (CL_SUCCESS != err)
2281 0 : throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
2282 : SAL_INFO("sc.opencl", "Created buffer " << pClmem2 << " size " << sizeof(double) << "*" << nVectorWidth << "=" << (sizeof(double)*nVectorWidth));
2283 :
2284 0 : std::string kernelName = "GeoMean_reduction";
2285 0 : cl_kernel redKernel = clCreateKernel(pProgram, kernelName.c_str(), &err);
2286 0 : if (err != CL_SUCCESS)
2287 0 : throw OpenCLError("clCreateKernel", err, __FILE__, __LINE__);
2288 : SAL_INFO("sc.opencl", "Created kernel " << redKernel << " with name " << kernelName << " in program " << pProgram);
2289 :
2290 : // set kernel arg of reduction kernel
2291 0 : for (size_t j = 0; j < vclmem.size(); j++)
2292 : {
2293 : SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << j << ": " << (vclmem[j] ? "cl_mem" : "double") << ": " << vclmem[j]);
2294 : err = clSetKernelArg(redKernel, j,
2295 0 : vclmem[j] ? sizeof(cl_mem) : sizeof(double),
2296 0 : static_cast<void*>(&vclmem[j]));
2297 0 : if (CL_SUCCESS != err)
2298 0 : throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
2299 : }
2300 : SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << vclmem.size() << ": cl_mem: " << pClmem2);
2301 0 : err = clSetKernelArg(redKernel, vclmem.size(), sizeof(cl_mem), static_cast<void*>(&pClmem2));
2302 0 : if (CL_SUCCESS != err)
2303 0 : throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
2304 :
2305 : // set work group size and execute
2306 0 : size_t global_work_size[] = { 256, (size_t)nVectorWidth };
2307 0 : size_t local_work_size[] = { 256, 1 };
2308 : SAL_INFO("sc.opencl", "Enqueing kernel " << redKernel);
2309 : err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, redKernel, 2, NULL,
2310 0 : global_work_size, local_work_size, 0, NULL, NULL);
2311 0 : if (CL_SUCCESS != err)
2312 0 : throw OpenCLError("clEnqueueNDRangeKernel", err, __FILE__, __LINE__);
2313 0 : err = clFinish(kEnv.mpkCmdQueue);
2314 0 : if (CL_SUCCESS != err)
2315 0 : throw OpenCLError("clFinish", err, __FILE__, __LINE__);
2316 :
2317 : // Pass pClmem2 to the "real" kernel
2318 : SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_mem: " << pClmem2);
2319 0 : err = clSetKernelArg(k, argno, sizeof(cl_mem), static_cast<void*>(&pClmem2));
2320 0 : if (CL_SUCCESS != err)
2321 0 : throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
2322 : }
2323 : }
2324 0 : if (OpSumIfs* OpSumCodeGen = dynamic_cast<OpSumIfs*>(mpCodeGen.get()))
2325 : {
2326 : // Obtain cl context
2327 : ::opencl::KernelEnv kEnv;
2328 0 : ::opencl::setKernelEnv(&kEnv);
2329 : cl_int err;
2330 0 : DynamicKernelArgument* Arg = mvSubArguments[0].get();
2331 : DynamicKernelSlidingArgument<VectorRef>* slidingArgPtr =
2332 0 : static_cast<DynamicKernelSlidingArgument<VectorRef>*>(Arg);
2333 0 : mpClmem2 = NULL;
2334 :
2335 0 : if (OpSumCodeGen->NeedReductionKernel())
2336 : {
2337 0 : size_t nInput = slidingArgPtr->GetArrayLength();
2338 0 : size_t nCurWindowSize = slidingArgPtr->GetWindowSize();
2339 0 : std::vector<SumIfsArgs> vclmem;
2340 :
2341 0 : for (SubArgumentsType::iterator it = mvSubArguments.begin(),
2342 0 : e = mvSubArguments.end(); it != e; ++it)
2343 : {
2344 0 : if (VectorRef* VR = dynamic_cast<VectorRef*>(it->get()))
2345 0 : vclmem.push_back(SumIfsArgs(VR->GetCLBuffer()));
2346 0 : else if (DynamicKernelConstantArgument* CA = dynamic_cast<DynamicKernelConstantArgument*>(it->get()))
2347 0 : vclmem.push_back(SumIfsArgs(CA->GetDouble()));
2348 : else
2349 0 : vclmem.push_back(SumIfsArgs(nullptr));
2350 : }
2351 : mpClmem2 = clCreateBuffer(kEnv.mpkContext, CL_MEM_READ_WRITE,
2352 0 : sizeof(double) * nVectorWidth, NULL, &err);
2353 0 : if (CL_SUCCESS != err)
2354 0 : throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
2355 : SAL_INFO("sc.opencl", "Created buffer " << mpClmem2 << " size " << sizeof(double) << "*" << nVectorWidth << "=" << (sizeof(double)*nVectorWidth));
2356 :
2357 0 : std::string kernelName = mvSubArguments[0]->GetName() + "_SumIfs_reduction";
2358 0 : cl_kernel redKernel = clCreateKernel(pProgram, kernelName.c_str(), &err);
2359 0 : if (err != CL_SUCCESS)
2360 0 : throw OpenCLError("clCreateKernel", err, __FILE__, __LINE__);
2361 : SAL_INFO("sc.opencl", "Created kernel " << redKernel << " with name " << kernelName << " in program " << pProgram);
2362 :
2363 : // set kernel arg of reduction kernel
2364 0 : for (size_t j = 0; j < vclmem.size(); j++)
2365 : {
2366 0 : if (vclmem[j].mCLMem)
2367 : SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << j << ": cl_mem: " << vclmem[j].mCLMem);
2368 : else
2369 : SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << j << ": double: " << vclmem[j].mConst);
2370 : err = clSetKernelArg(redKernel, j,
2371 0 : vclmem[j].mCLMem ? sizeof(cl_mem) : sizeof(double),
2372 0 : vclmem[j].mCLMem ? static_cast<void*>(&vclmem[j].mCLMem) :
2373 0 : static_cast<void*>(&vclmem[j].mConst));
2374 0 : if (CL_SUCCESS != err)
2375 0 : throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
2376 : }
2377 : SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << vclmem.size() << ": cl_mem: " << mpClmem2);
2378 0 : err = clSetKernelArg(redKernel, vclmem.size(), sizeof(cl_mem), static_cast<void*>(&mpClmem2));
2379 0 : if (CL_SUCCESS != err)
2380 0 : throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
2381 :
2382 : SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << (vclmem.size() + 1) << ": cl_int: " << nInput);
2383 0 : err = clSetKernelArg(redKernel, vclmem.size() + 1, sizeof(cl_int), static_cast<void*>(&nInput));
2384 0 : if (CL_SUCCESS != err)
2385 0 : throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
2386 :
2387 : SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << (vclmem.size() + 2) << ": cl_int: " << nCurWindowSize);
2388 0 : err = clSetKernelArg(redKernel, vclmem.size() + 2, sizeof(cl_int), static_cast<void*>(&nCurWindowSize));
2389 0 : if (CL_SUCCESS != err)
2390 0 : throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
2391 : // set work group size and execute
2392 0 : size_t global_work_size[] = { 256, (size_t)nVectorWidth };
2393 0 : size_t local_work_size[] = { 256, 1 };
2394 : SAL_INFO("sc.opencl", "Enqueing kernel " << redKernel);
2395 : err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, redKernel, 2, NULL,
2396 0 : global_work_size, local_work_size, 0, NULL, NULL);
2397 0 : if (CL_SUCCESS != err)
2398 0 : throw OpenCLError("clEnqueueNDRangeKernel", err, __FILE__, __LINE__);
2399 :
2400 0 : err = clFinish(kEnv.mpkCmdQueue);
2401 0 : if (CL_SUCCESS != err)
2402 0 : throw OpenCLError("clFinish", err, __FILE__, __LINE__);
2403 :
2404 : SAL_INFO("sc.opencl", "Relasing kernel " << redKernel);
2405 0 : err = clReleaseKernel(redKernel);
2406 : SAL_WARN_IF(err != CL_SUCCESS, "sc.opencl", "clReleaseKernel failed: " << ::opencl::errorString(err));
2407 :
2408 : // Pass mpClmem2 to the "real" kernel
2409 : SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_mem: " << mpClmem2);
2410 0 : err = clSetKernelArg(k, argno, sizeof(cl_mem), static_cast<void*>(&mpClmem2));
2411 0 : if (CL_SUCCESS != err)
2412 0 : throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
2413 : }
2414 : }
2415 0 : return i;
2416 : }
2417 :
2418 0 : virtual void GenSlidingWindowFunction( std::stringstream& ss ) SAL_OVERRIDE
2419 : {
2420 0 : for (size_t i = 0; i < mvSubArguments.size(); i++)
2421 0 : mvSubArguments[i]->GenSlidingWindowFunction(ss);
2422 0 : mpCodeGen->GenSlidingWindowFunction(ss, mSymName, mvSubArguments);
2423 0 : }
2424 0 : virtual void GenDeclRef( std::stringstream& ss ) const SAL_OVERRIDE
2425 : {
2426 0 : for (size_t i = 0; i < mvSubArguments.size(); i++)
2427 : {
2428 0 : if (i)
2429 0 : ss << ",";
2430 0 : mvSubArguments[i]->GenDeclRef(ss);
2431 : }
2432 0 : }
2433 0 : virtual void GenDecl( std::stringstream& ss ) const SAL_OVERRIDE
2434 : {
2435 0 : for (SubArgumentsType::const_iterator it = mvSubArguments.begin(), e = mvSubArguments.end(); it != e;
2436 : ++it)
2437 : {
2438 0 : if (it != mvSubArguments.begin())
2439 0 : ss << ", ";
2440 0 : (*it)->GenDecl(ss);
2441 : }
2442 0 : }
2443 :
2444 0 : virtual size_t GetWindowSize() const SAL_OVERRIDE
2445 : {
2446 0 : size_t nCurWindowSize = 0;
2447 0 : for (size_t i = 0; i < mvSubArguments.size(); i++)
2448 : {
2449 0 : size_t nCurChildWindowSize = mvSubArguments[i]->GetWindowSize();
2450 : nCurWindowSize = (nCurWindowSize < nCurChildWindowSize) ?
2451 0 : nCurChildWindowSize : nCurWindowSize;
2452 : }
2453 0 : return nCurWindowSize;
2454 : }
2455 :
2456 : /// When declared as input to a sliding window function
2457 0 : virtual void GenSlidingWindowDecl( std::stringstream& ss ) const SAL_OVERRIDE
2458 : {
2459 0 : for (SubArgumentsType::const_iterator it = mvSubArguments.begin(), e = mvSubArguments.end(); it != e;
2460 : ++it)
2461 : {
2462 0 : if (it != mvSubArguments.begin())
2463 0 : ss << ", ";
2464 0 : (*it)->GenSlidingWindowDecl(ss);
2465 : }
2466 0 : }
2467 : /// Generate either a function call to each children
2468 : /// or directly inline it if we are already inside a loop
2469 0 : virtual std::string GenSlidingWindowDeclRef( bool nested = false ) const SAL_OVERRIDE
2470 : {
2471 0 : std::stringstream ss;
2472 0 : if (!nested)
2473 : {
2474 0 : ss << mSymName << "_" << mpCodeGen->BinFuncName() << "(";
2475 0 : for (size_t i = 0; i < mvSubArguments.size(); i++)
2476 : {
2477 0 : if (i)
2478 0 : ss << ", ";
2479 0 : mvSubArguments[i]->GenDeclRef(ss);
2480 : }
2481 0 : ss << ")";
2482 : }
2483 : else
2484 : {
2485 0 : if (mvSubArguments.size() != 2)
2486 0 : throw Unhandled();
2487 : bool bArgument1_NeedNested =
2488 0 : mvSubArguments[0]->GetFormulaToken()->GetType()
2489 0 : != formula::svSingleVectorRef;
2490 : bool bArgument2_NeedNested =
2491 0 : mvSubArguments[1]->GetFormulaToken()->GetType()
2492 0 : != formula::svSingleVectorRef;
2493 0 : ss << "(";
2494 0 : ss << mpCodeGen->
2495 0 : Gen2(mvSubArguments[0]
2496 0 : ->GenSlidingWindowDeclRef(bArgument1_NeedNested),
2497 0 : mvSubArguments[1]
2498 0 : ->GenSlidingWindowDeclRef(bArgument2_NeedNested));
2499 0 : ss << ")";
2500 : }
2501 0 : return ss.str();
2502 : }
2503 0 : virtual std::string DumpOpName() const SAL_OVERRIDE
2504 : {
2505 0 : std::string t = "_" + mpCodeGen->BinFuncName();
2506 0 : for (size_t i = 0; i < mvSubArguments.size(); i++)
2507 0 : t = t + mvSubArguments[i]->DumpOpName();
2508 0 : return t;
2509 : }
2510 0 : virtual void DumpInlineFun( std::set<std::string>& decls,
2511 : std::set<std::string>& funs ) const SAL_OVERRIDE
2512 : {
2513 0 : mpCodeGen->BinInlineFun(decls, funs);
2514 0 : for (size_t i = 0; i < mvSubArguments.size(); i++)
2515 0 : mvSubArguments[i]->DumpInlineFun(decls, funs);
2516 0 : }
2517 0 : virtual ~DynamicKernelSoPArguments()
2518 0 : {
2519 0 : if (mpClmem2)
2520 : {
2521 : cl_int err;
2522 0 : err = clReleaseMemObject(mpClmem2);
2523 : SAL_WARN_IF(err != CL_SUCCESS, "sc.opencl", "clReleaseMemObject failed: " << ::opencl::errorString(err));
2524 0 : mpClmem2 = NULL;
2525 : }
2526 0 : }
2527 :
2528 : private:
2529 : SubArgumentsType mvSubArguments;
2530 : boost::shared_ptr<SlidingFunctionBase> mpCodeGen;
2531 : cl_mem mpClmem2;
2532 : };
2533 :
2534 0 : DynamicKernelArgumentRef SoPHelper( const ScCalcConfig& config,
2535 : const std::string& ts, const FormulaTreeNodeRef& ft, SlidingFunctionBase* pCodeGen,
2536 : int nResultSize )
2537 : {
2538 0 : return DynamicKernelArgumentRef(new DynamicKernelSoPArguments(config, ts, ft, pCodeGen, nResultSize));
2539 : }
2540 :
2541 : template<class Base>
2542 0 : DynamicKernelArgument* VectorRefFactory( const ScCalcConfig& config, const std::string& s,
2543 : const FormulaTreeNodeRef& ft,
2544 : boost::shared_ptr<SlidingFunctionBase>& pCodeGen,
2545 : int index )
2546 : {
2547 : //Black lists ineligible classes here ..
2548 : // SUMIFS does not perform parallel reduction at DoubleVectorRef level
2549 0 : if (dynamic_cast<OpSumIfs*>(pCodeGen.get()))
2550 : {
2551 0 : if (index == 0) // the first argument of OpSumIfs cannot be strings anyway
2552 0 : return new DynamicKernelSlidingArgument<VectorRef>(config, s, ft, pCodeGen, index);
2553 0 : return new DynamicKernelSlidingArgument<Base>(config, s, ft, pCodeGen, index);
2554 : }
2555 : // AVERAGE is not supported yet
2556 : //Average has been supported by reduction kernel
2557 : /*else if (dynamic_cast<OpAverage*>(pCodeGen.get()))
2558 : {
2559 : return new DynamicKernelSlidingArgument<Base>(config, s, ft, pCodeGen, index);
2560 : }*/
2561 : // MUL is not supported yet
2562 0 : else if (dynamic_cast<OpMul*>(pCodeGen.get()))
2563 : {
2564 0 : return new DynamicKernelSlidingArgument<Base>(config, s, ft, pCodeGen, index);
2565 : }
2566 : // Sub is not a reduction per se
2567 0 : else if (dynamic_cast<OpSub*>(pCodeGen.get()))
2568 : {
2569 0 : return new DynamicKernelSlidingArgument<Base>(config, s, ft, pCodeGen, index);
2570 : }
2571 : // Only child class of Reduction is supported
2572 0 : else if (!dynamic_cast<Reduction*>(pCodeGen.get()))
2573 : {
2574 0 : return new DynamicKernelSlidingArgument<Base>(config, s, ft, pCodeGen, index);
2575 : }
2576 :
2577 : const formula::DoubleVectorRefToken* pDVR =
2578 : static_cast<const formula::DoubleVectorRefToken*>(
2579 0 : ft->GetFormulaToken());
2580 : // Window being too small to justify a parallel reduction
2581 0 : if (pDVR->GetRefRowSize() < REDUCE_THRESHOLD)
2582 0 : return new DynamicKernelSlidingArgument<Base>(config, s, ft, pCodeGen, index);
2583 0 : if ((pDVR->IsStartFixed() && pDVR->IsEndFixed()) ||
2584 0 : (!pDVR->IsStartFixed() && !pDVR->IsEndFixed()))
2585 0 : return new ParallelReductionVectorRef<Base>(config, s, ft, pCodeGen, index);
2586 : else // Other cases are not supported as well
2587 0 : return new DynamicKernelSlidingArgument<Base>(config, s, ft, pCodeGen, index);
2588 : }
2589 :
2590 0 : DynamicKernelSoPArguments::DynamicKernelSoPArguments(const ScCalcConfig& config,
2591 : const std::string& s, const FormulaTreeNodeRef& ft, SlidingFunctionBase* pCodeGen, int nResultSize ) :
2592 0 : DynamicKernelArgument(config, s, ft), mpCodeGen(pCodeGen), mpClmem2(NULL)
2593 : {
2594 0 : size_t nChildren = ft->Children.size();
2595 :
2596 0 : for (size_t i = 0; i < nChildren; i++)
2597 : {
2598 0 : FormulaTreeNodeRef rChild = ft->Children[i];
2599 0 : if (!rChild)
2600 0 : throw Unhandled();
2601 0 : FormulaToken* pChild = rChild->GetFormulaToken();
2602 0 : if (!pChild)
2603 0 : throw Unhandled();
2604 0 : OpCode opc = pChild->GetOpCode();
2605 0 : std::stringstream tmpname;
2606 0 : tmpname << s << "_" << i;
2607 0 : std::string ts = tmpname.str();
2608 0 : switch (opc)
2609 : {
2610 : case ocPush:
2611 0 : if (pChild->GetType() == formula::svDoubleVectorRef)
2612 : {
2613 : const formula::DoubleVectorRefToken* pDVR =
2614 0 : static_cast<const formula::DoubleVectorRefToken*>(pChild);
2615 :
2616 0 : for (size_t j = 0; j < pDVR->GetArrays().size(); ++j)
2617 : {
2618 : SAL_INFO("sc.opencl", "j=" << j << " mpNumericArray=" << pDVR->GetArrays()[j].mpNumericArray <<
2619 : " mpStringArray=" << pDVR->GetArrays()[j].mpStringArray <<
2620 : " takeNumeric=" << (pCodeGen->takeNumeric()?"YES":"NO") <<
2621 : " takeString=" << (pCodeGen->takeString()?"YES":"NO"));
2622 :
2623 0 : if (pDVR->GetArrays()[j].mpNumericArray ||
2624 0 : (pDVR->GetArrays()[j].mpNumericArray == NULL &&
2625 0 : pDVR->GetArrays()[j].mpStringArray == NULL))
2626 : {
2627 0 : if (pDVR->GetArrays()[j].mpNumericArray &&
2628 0 : pCodeGen->takeNumeric() &&
2629 0 : pDVR->GetArrays()[j].mpStringArray &&
2630 0 : pCodeGen->takeString())
2631 : {
2632 : mvSubArguments.push_back(
2633 : DynamicKernelArgumentRef(
2634 : new DynamicKernelMixedSlidingArgument(mCalcConfig,
2635 0 : ts, ft->Children[i], mpCodeGen, j)));
2636 : }
2637 : else
2638 : {
2639 : mvSubArguments.push_back(
2640 : DynamicKernelArgumentRef(VectorRefFactory<VectorRef>(mCalcConfig,
2641 0 : ts, ft->Children[i], mpCodeGen, j)));
2642 : }
2643 : }
2644 : else
2645 : mvSubArguments.push_back(
2646 : DynamicKernelArgumentRef(VectorRefFactory
2647 : <DynamicKernelStringArgument>(mCalcConfig,
2648 0 : ts, ft->Children[i], mpCodeGen, j)));
2649 : }
2650 : }
2651 0 : else if (pChild->GetType() == formula::svSingleVectorRef)
2652 : {
2653 : const formula::SingleVectorRefToken* pSVR =
2654 0 : static_cast<const formula::SingleVectorRefToken*>(pChild);
2655 :
2656 : SAL_INFO("sc.opencl", "mpNumericArray=" << pSVR->GetArray().mpNumericArray <<
2657 : " mpStringArray=" << pSVR->GetArray().mpStringArray <<
2658 : " takeNumeric=" << (pCodeGen->takeNumeric()?"YES":"NO") <<
2659 : " takeString=" << (pCodeGen->takeString()?"YES":"NO"));
2660 :
2661 0 : if (pSVR->GetArray().mpNumericArray &&
2662 0 : pCodeGen->takeNumeric() &&
2663 0 : pSVR->GetArray().mpStringArray &&
2664 0 : pCodeGen->takeString())
2665 : {
2666 : // Function takes numbers or strings, there are both
2667 : mvSubArguments.push_back(
2668 : DynamicKernelArgumentRef(new DynamicKernelMixedArgument(mCalcConfig,
2669 0 : ts, ft->Children[i])));
2670 : }
2671 0 : else if (pSVR->GetArray().mpNumericArray &&
2672 0 : pCodeGen->takeNumeric() &&
2673 0 : (pSVR->GetArray().mpStringArray == NULL || mCalcConfig.meStringConversion == ScCalcConfig::StringConversion::ZERO))
2674 : {
2675 : // Function takes numbers, and either there
2676 : // are no strings, or there are strings but
2677 : // they are to be treated as zero
2678 : mvSubArguments.push_back(
2679 : DynamicKernelArgumentRef(new VectorRef(mCalcConfig, ts,
2680 0 : ft->Children[i])));
2681 : }
2682 0 : else if (pSVR->GetArray().mpNumericArray == NULL &&
2683 0 : pCodeGen->takeNumeric() &&
2684 0 : pSVR->GetArray().mpStringArray &&
2685 0 : mCalcConfig.meStringConversion == ScCalcConfig::StringConversion::ZERO)
2686 : {
2687 : // Function takes numbers, and there are only
2688 : // strings, but they are to be treated as zero
2689 : mvSubArguments.push_back(
2690 : DynamicKernelArgumentRef(new VectorRef(mCalcConfig, ts,
2691 0 : ft->Children[i])));
2692 : }
2693 0 : else if (pSVR->GetArray().mpStringArray &&
2694 0 : pCodeGen->takeString())
2695 : {
2696 : // There are strings, and the function takes
2697 : // strings.
2698 :
2699 : mvSubArguments.push_back(
2700 : DynamicKernelArgumentRef(new DynamicKernelStringArgument(mCalcConfig,
2701 0 : ts, ft->Children[i])));
2702 : }
2703 0 : else if (pSVR->GetArray().mpStringArray == NULL &&
2704 0 : pSVR->GetArray().mpNumericArray == NULL)
2705 : {
2706 : // There are only empty cells. Push as an
2707 : // array of NANs
2708 : mvSubArguments.push_back(
2709 : DynamicKernelArgumentRef(new VectorRef(mCalcConfig, ts,
2710 0 : ft->Children[i])));
2711 : }
2712 : else
2713 : throw UnhandledToken(pChild,
2714 0 : "Got unhandled case here", __FILE__, __LINE__);
2715 : }
2716 0 : else if (pChild->GetType() == formula::svDouble)
2717 : {
2718 : mvSubArguments.push_back(
2719 : DynamicKernelArgumentRef(new DynamicKernelConstantArgument(mCalcConfig, ts,
2720 0 : ft->Children[i])));
2721 : }
2722 0 : else if (pChild->GetType() == formula::svString
2723 0 : && pCodeGen->takeString())
2724 : {
2725 : mvSubArguments.push_back(
2726 : DynamicKernelArgumentRef(new ConstStringArgument(mCalcConfig, ts,
2727 0 : ft->Children[i])));
2728 : }
2729 : else
2730 : {
2731 0 : throw UnhandledToken(pChild, ("unhandled operand " + StackVarEnumToString(pChild->GetType()) + " for ocPush").c_str());
2732 : }
2733 0 : break;
2734 : case ocDiv:
2735 0 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpDiv(nResultSize), nResultSize));
2736 0 : break;
2737 : case ocMul:
2738 0 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpMul(nResultSize), nResultSize));
2739 0 : break;
2740 : case ocSub:
2741 0 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpSub(nResultSize), nResultSize));
2742 0 : break;
2743 : case ocAdd:
2744 : case ocSum:
2745 0 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpSum(nResultSize), nResultSize));
2746 0 : break;
2747 : case ocAverage:
2748 0 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpAverage(nResultSize), nResultSize));
2749 0 : break;
2750 : case ocMin:
2751 0 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpMin(nResultSize), nResultSize));
2752 0 : break;
2753 : case ocMax:
2754 0 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpMax(nResultSize), nResultSize));
2755 0 : break;
2756 : case ocCount:
2757 0 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpCount(nResultSize), nResultSize));
2758 0 : break;
2759 : case ocSumProduct:
2760 0 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpSumProduct, nResultSize));
2761 0 : break;
2762 : case ocIRR:
2763 0 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpIRR, nResultSize));
2764 0 : break;
2765 : case ocMIRR:
2766 0 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpMIRR, nResultSize));
2767 0 : break;
2768 : case ocPMT:
2769 0 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpPMT, nResultSize));
2770 0 : break;
2771 : case ocRate:
2772 0 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpIntrate, nResultSize));
2773 0 : break;
2774 : case ocRRI:
2775 0 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpRRI, nResultSize));
2776 0 : break;
2777 : case ocPpmt:
2778 0 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpPPMT, nResultSize));
2779 0 : break;
2780 : case ocFisher:
2781 0 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpFisher, nResultSize));
2782 0 : break;
2783 : case ocFisherInv:
2784 0 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpFisherInv, nResultSize));
2785 0 : break;
2786 : case ocGamma:
2787 0 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpGamma, nResultSize));
2788 0 : break;
2789 : case ocSLN:
2790 0 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpSLN, nResultSize));
2791 0 : break;
2792 : case ocGammaLn:
2793 0 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpGammaLn, nResultSize));
2794 0 : break;
2795 : case ocGauss:
2796 0 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpGauss, nResultSize));
2797 0 : break;
2798 : /*case ocGeoMean:
2799 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpGeoMean));
2800 : break;*/
2801 : case ocHarMean:
2802 0 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpHarMean, nResultSize));
2803 0 : break;
2804 : case ocLessEqual:
2805 0 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpLessEqual, nResultSize));
2806 0 : break;
2807 : case ocLess:
2808 0 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpLess, nResultSize));
2809 0 : break;
2810 : case ocEqual:
2811 0 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpEqual, nResultSize));
2812 0 : break;
2813 : case ocGreater:
2814 0 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpGreater, nResultSize));
2815 0 : break;
2816 : case ocSYD:
2817 0 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpSYD, nResultSize));
2818 0 : break;
2819 : case ocCorrel:
2820 0 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpCorrel, nResultSize));
2821 0 : break;
2822 : case ocCos:
2823 0 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpCos, nResultSize));
2824 0 : break;
2825 : case ocNegBinomVert :
2826 0 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpNegbinomdist, nResultSize));
2827 0 : break;
2828 : case ocPearson:
2829 0 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpPearson, nResultSize));
2830 0 : break;
2831 : case ocRSQ:
2832 0 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpRsq, nResultSize));
2833 0 : break;
2834 : case ocCosecant:
2835 0 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpCsc, nResultSize));
2836 0 : break;
2837 : case ocISPMT:
2838 0 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpISPMT, nResultSize));
2839 0 : break;
2840 : case ocDuration:
2841 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
2842 0 : ft->Children[i], new OpDuration, nResultSize));
2843 0 : break;
2844 : case ocSinHyp:
2845 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
2846 0 : ft->Children[i], new OpSinh, nResultSize));
2847 0 : break;
2848 : case ocAbs:
2849 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
2850 0 : ft->Children[i], new OpAbs, nResultSize));
2851 0 : break;
2852 : case ocPV:
2853 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
2854 0 : ft->Children[i], new OpPV, nResultSize));
2855 0 : break;
2856 : case ocSin:
2857 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
2858 0 : ft->Children[i], new OpSin, nResultSize));
2859 0 : break;
2860 : case ocTan:
2861 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
2862 0 : ft->Children[i], new OpTan, nResultSize));
2863 0 : break;
2864 : case ocTanHyp:
2865 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
2866 0 : ft->Children[i], new OpTanH, nResultSize));
2867 0 : break;
2868 : case ocStandard:
2869 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
2870 0 : ft->Children[i], new OpStandard, nResultSize));
2871 0 : break;
2872 : case ocWeibull:
2873 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
2874 0 : ft->Children[i], new OpWeibull, nResultSize));
2875 0 : break;
2876 : /*case ocMedian:
2877 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
2878 : ft->Children[i],new OpMedian));
2879 : break;*/
2880 : case ocDDB:
2881 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
2882 0 : ft->Children[i], new OpDDB, nResultSize));
2883 0 : break;
2884 : case ocFV:
2885 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
2886 0 : ft->Children[i], new OpFV, nResultSize));
2887 0 : break;
2888 : case ocSumIfs:
2889 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
2890 0 : ft->Children[i], new OpSumIfs, nResultSize));
2891 0 : break;
2892 : /*case ocVBD:
2893 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
2894 : ft->Children[i],new OpVDB));
2895 : break;*/
2896 : case ocKurt:
2897 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
2898 0 : ft->Children[i], new OpKurt, nResultSize));
2899 0 : break;
2900 : /*case ocNper:
2901 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
2902 : ft->Children[i], new OpNper));
2903 : break;*/
2904 : case ocNormDist:
2905 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
2906 0 : ft->Children[i], new OpNormdist, nResultSize));
2907 0 : break;
2908 : case ocArcCos:
2909 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
2910 0 : ft->Children[i], new OpArcCos, nResultSize));
2911 0 : break;
2912 : case ocSqrt:
2913 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
2914 0 : ft->Children[i], new OpSqrt, nResultSize));
2915 0 : break;
2916 : case ocArcCosHyp:
2917 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
2918 0 : ft->Children[i], new OpArcCosHyp, nResultSize));
2919 0 : break;
2920 : case ocNPV:
2921 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
2922 0 : ft->Children[i], new OpNPV, nResultSize));
2923 0 : break;
2924 : case ocStdNormDist:
2925 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
2926 0 : ft->Children[i], new OpNormsdist, nResultSize));
2927 0 : break;
2928 : case ocNormInv:
2929 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
2930 0 : ft->Children[i], new OpNorminv, nResultSize));
2931 0 : break;
2932 : case ocSNormInv:
2933 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
2934 0 : ft->Children[i], new OpNormsinv, nResultSize));
2935 0 : break;
2936 : case ocPermut:
2937 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
2938 0 : ft->Children[i], new OpPermut, nResultSize));
2939 0 : break;
2940 : case ocPermutationA:
2941 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
2942 0 : ft->Children[i], new OpPermutationA, nResultSize));
2943 0 : break;
2944 : case ocPhi:
2945 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
2946 0 : ft->Children[i], new OpPhi, nResultSize));
2947 0 : break;
2948 : case ocIpmt:
2949 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
2950 0 : ft->Children[i], new OpIPMT, nResultSize));
2951 0 : break;
2952 : case ocConfidence:
2953 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
2954 0 : ft->Children[i], new OpConfidence, nResultSize));
2955 0 : break;
2956 : case ocIntercept:
2957 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
2958 0 : ft->Children[i], new OpIntercept, nResultSize));
2959 0 : break;
2960 : case ocDB:
2961 0 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
2962 0 : new OpDB, nResultSize));
2963 0 : break;
2964 : case ocLogInv:
2965 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
2966 0 : ft->Children[i], new OpLogInv, nResultSize));
2967 0 : break;
2968 : case ocArcCot:
2969 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
2970 0 : ft->Children[i], new OpArcCot, nResultSize));
2971 0 : break;
2972 : case ocCosHyp:
2973 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
2974 0 : ft->Children[i], new OpCosh, nResultSize));
2975 0 : break;
2976 : case ocCritBinom:
2977 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
2978 0 : ft->Children[i], new OpCritBinom, nResultSize));
2979 0 : break;
2980 : case ocArcCotHyp:
2981 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
2982 0 : ft->Children[i], new OpArcCotHyp, nResultSize));
2983 0 : break;
2984 : case ocArcSin:
2985 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
2986 0 : ft->Children[i], new OpArcSin, nResultSize));
2987 0 : break;
2988 : case ocArcSinHyp:
2989 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
2990 0 : ft->Children[i], new OpArcSinHyp, nResultSize));
2991 0 : break;
2992 : case ocArcTan:
2993 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
2994 0 : ft->Children[i], new OpArcTan, nResultSize));
2995 0 : break;
2996 : case ocArcTanHyp:
2997 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
2998 0 : ft->Children[i], new OpArcTanH, nResultSize));
2999 0 : break;
3000 : case ocBitAnd:
3001 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3002 0 : ft->Children[i], new OpBitAnd, nResultSize));
3003 0 : break;
3004 : case ocForecast:
3005 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3006 0 : ft->Children[i], new OpForecast, nResultSize));
3007 0 : break;
3008 : case ocLogNormDist:
3009 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3010 0 : ft->Children[i], new OpLogNormDist, nResultSize));
3011 0 : break;
3012 : /*case ocGammaDist:
3013 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3014 : ft->Children[i], new OpGammaDist));
3015 : break;*/
3016 : case ocLn:
3017 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3018 0 : ft->Children[i], new OpLn, nResultSize));
3019 0 : break;
3020 : case ocRound:
3021 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3022 0 : ft->Children[i], new OpRound, nResultSize));
3023 0 : break;
3024 : case ocCot:
3025 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3026 0 : ft->Children[i], new OpCot, nResultSize));
3027 0 : break;
3028 : case ocCotHyp:
3029 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3030 0 : ft->Children[i], new OpCoth, nResultSize));
3031 0 : break;
3032 : case ocFDist:
3033 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3034 0 : ft->Children[i], new OpFdist, nResultSize));
3035 0 : break;
3036 : case ocVar:
3037 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3038 0 : ft->Children[i], new OpVar, nResultSize));
3039 0 : break;
3040 : /*case ocChiDist:
3041 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3042 : ft->Children[i],new OpChiDist));
3043 : break;*/
3044 : case ocPow:
3045 : case ocPower:
3046 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3047 0 : ft->Children[i], new OpPower, nResultSize));
3048 0 : break;
3049 : case ocOdd:
3050 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3051 0 : ft->Children[i], new OpOdd, nResultSize));
3052 0 : break;
3053 : /*case ocChiSqDist:
3054 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3055 : ft->Children[i],new OpChiSqDist));
3056 : break;
3057 : case ocChiSqInv:
3058 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3059 : ft->Children[i],new OpChiSqInv));
3060 : break;
3061 : case ocGammaInv:
3062 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3063 : ft->Children[i], new OpGammaInv));
3064 : break;*/
3065 : case ocFloor:
3066 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3067 0 : ft->Children[i], new OpFloor, nResultSize));
3068 0 : break;
3069 : /*case ocFInv:
3070 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3071 : ft->Children[i], new OpFInv));
3072 : break;*/
3073 : case ocFTest:
3074 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3075 0 : ft->Children[i], new OpFTest, nResultSize));
3076 0 : break;
3077 : case ocB:
3078 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3079 0 : ft->Children[i], new OpB, nResultSize));
3080 0 : break;
3081 : case ocBetaDist:
3082 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3083 0 : ft->Children[i], new OpBetaDist, nResultSize));
3084 0 : break;
3085 : case ocCosecantHyp:
3086 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3087 0 : ft->Children[i], new OpCscH, nResultSize));
3088 0 : break;
3089 : case ocExp:
3090 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3091 0 : ft->Children[i], new OpExp, nResultSize));
3092 0 : break;
3093 : case ocLog10:
3094 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3095 0 : ft->Children[i], new OpLog10, nResultSize));
3096 0 : break;
3097 : case ocExpDist:
3098 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3099 0 : ft->Children[i], new OpExponDist, nResultSize));
3100 0 : break;
3101 : case ocAverageIfs:
3102 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3103 0 : ft->Children[i], new OpAverageIfs, nResultSize));
3104 0 : break;
3105 : case ocCountIfs:
3106 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3107 0 : ft->Children[i], new OpCountIfs, nResultSize));
3108 0 : break;
3109 : case ocCombinA:
3110 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3111 0 : ft->Children[i], new OpCombinA, nResultSize));
3112 0 : break;
3113 : case ocEven:
3114 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3115 0 : ft->Children[i], new OpEven, nResultSize));
3116 0 : break;
3117 : case ocLog:
3118 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3119 0 : ft->Children[i], new OpLog, nResultSize));
3120 0 : break;
3121 : case ocMod:
3122 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3123 0 : ft->Children[i], new OpMod, nResultSize));
3124 0 : break;
3125 : case ocTrunc:
3126 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3127 0 : ft->Children[i], new OpTrunc, nResultSize));
3128 0 : break;
3129 : case ocSkew:
3130 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3131 0 : ft->Children[i], new OpSkew, nResultSize));
3132 0 : break;
3133 : case ocArcTan2:
3134 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3135 0 : ft->Children[i], new OpArcTan2, nResultSize));
3136 0 : break;
3137 : case ocBitOr:
3138 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3139 0 : ft->Children[i], new OpBitOr, nResultSize));
3140 0 : break;
3141 : case ocBitLshift:
3142 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3143 0 : ft->Children[i], new OpBitLshift, nResultSize));
3144 0 : break;
3145 : case ocBitRshift:
3146 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3147 0 : ft->Children[i], new OpBitRshift, nResultSize));
3148 0 : break;
3149 : case ocBitXor:
3150 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3151 0 : ft->Children[i], new OpBitXor, nResultSize));
3152 0 : break;
3153 : /*case ocChiInv:
3154 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3155 : ft->Children[i],new OpChiInv));
3156 : break;*/
3157 : case ocPoissonDist:
3158 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3159 0 : ft->Children[i], new OpPoisson, nResultSize));
3160 0 : break;
3161 : case ocSumSQ:
3162 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3163 0 : ft->Children[i], new OpSumSQ, nResultSize));
3164 0 : break;
3165 : case ocSkewp:
3166 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3167 0 : ft->Children[i], new OpSkewp, nResultSize));
3168 0 : break;
3169 : case ocBinomDist:
3170 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3171 0 : ft->Children[i], new OpBinomdist, nResultSize));
3172 0 : break;
3173 : case ocVarP:
3174 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3175 0 : ft->Children[i], new OpVarP, nResultSize));
3176 0 : break;
3177 : case ocCeil:
3178 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3179 0 : ft->Children[i], new OpCeil, nResultSize));
3180 0 : break;
3181 : case ocCombin:
3182 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3183 0 : ft->Children[i], new OpCombin, nResultSize));
3184 0 : break;
3185 : case ocDevSq:
3186 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3187 0 : ft->Children[i], new OpDevSq, nResultSize));
3188 0 : break;
3189 : case ocStDev:
3190 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3191 0 : ft->Children[i], new OpStDev, nResultSize));
3192 0 : break;
3193 : case ocSlope:
3194 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3195 0 : ft->Children[i], new OpSlope, nResultSize));
3196 0 : break;
3197 : case ocSTEYX:
3198 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3199 0 : ft->Children[i], new OpSTEYX, nResultSize));
3200 0 : break;
3201 : case ocZTest:
3202 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3203 0 : ft->Children[i], new OpZTest, nResultSize));
3204 0 : break;
3205 : case ocPi:
3206 : mvSubArguments.push_back(
3207 : DynamicKernelArgumentRef(new DynamicKernelPiArgument(mCalcConfig, ts,
3208 0 : ft->Children[i])));
3209 0 : break;
3210 : case ocRandom:
3211 : mvSubArguments.push_back(
3212 : DynamicKernelArgumentRef(new DynamicKernelRandomArgument(mCalcConfig, ts,
3213 0 : ft->Children[i])));
3214 0 : break;
3215 : case ocProduct:
3216 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3217 0 : ft->Children[i], new OpProduct, nResultSize));
3218 0 : break;
3219 : /*case ocHypGeomDist:
3220 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3221 : ft->Children[i],new OpHypGeomDist));
3222 : break;*/
3223 : case ocSumX2MY2:
3224 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3225 0 : ft->Children[i], new OpSumX2MY2, nResultSize));
3226 0 : break;
3227 : case ocSumX2DY2:
3228 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3229 0 : ft->Children[i], new OpSumX2PY2, nResultSize));
3230 0 : break;
3231 : /*case ocBetaInv:
3232 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3233 : ft->Children[i],new OpBetainv));
3234 : break;*/
3235 : case ocTTest:
3236 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3237 0 : ft->Children[i], new OpTTest, nResultSize));
3238 0 : break;
3239 : case ocTDist:
3240 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3241 0 : ft->Children[i], new OpTDist, nResultSize));
3242 0 : break;
3243 : /*case ocTInv:
3244 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3245 : ft->Children[i], new OpTInv));
3246 : break;*/
3247 : case ocSumXMY2:
3248 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3249 0 : ft->Children[i], new OpSumXMY2, nResultSize));
3250 0 : break;
3251 : case ocStDevP:
3252 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3253 0 : ft->Children[i], new OpStDevP, nResultSize));
3254 0 : break;
3255 : case ocCovar:
3256 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3257 0 : ft->Children[i], new OpCovar, nResultSize));
3258 0 : break;
3259 : case ocAnd:
3260 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3261 0 : ft->Children[i], new OpAnd, nResultSize));
3262 0 : break;
3263 : case ocVLookup:
3264 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3265 0 : ft->Children[i], new OpVLookup, nResultSize));
3266 0 : break;
3267 : case ocOr:
3268 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3269 0 : ft->Children[i], new OpOr, nResultSize));
3270 0 : break;
3271 : case ocNot:
3272 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3273 0 : ft->Children[i], new OpNot, nResultSize));
3274 0 : break;
3275 : case ocXor:
3276 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3277 0 : ft->Children[i], new OpXor, nResultSize));
3278 0 : break;
3279 : case ocDBMax:
3280 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3281 0 : ft->Children[i], new OpDmax, nResultSize));
3282 0 : break;
3283 : case ocDBMin:
3284 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3285 0 : ft->Children[i], new OpDmin, nResultSize));
3286 0 : break;
3287 : case ocDBProduct:
3288 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3289 0 : ft->Children[i], new OpDproduct, nResultSize));
3290 0 : break;
3291 : case ocDBAverage:
3292 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3293 0 : ft->Children[i], new OpDaverage, nResultSize));
3294 0 : break;
3295 : case ocDBStdDev:
3296 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3297 0 : ft->Children[i], new OpDstdev, nResultSize));
3298 0 : break;
3299 : case ocDBStdDevP:
3300 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3301 0 : ft->Children[i], new OpDstdevp, nResultSize));
3302 0 : break;
3303 : case ocDBSum:
3304 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3305 0 : ft->Children[i], new OpDsum, nResultSize));
3306 0 : break;
3307 : case ocDBVar:
3308 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3309 0 : ft->Children[i], new OpDvar, nResultSize));
3310 0 : break;
3311 : case ocDBVarP:
3312 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3313 0 : ft->Children[i], new OpDvarp, nResultSize));
3314 0 : break;
3315 : case ocAverageIf:
3316 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3317 0 : ft->Children[i], new OpAverageIf, nResultSize));
3318 0 : break;
3319 : case ocDBCount:
3320 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3321 0 : ft->Children[i], new OpDcount, nResultSize));
3322 0 : break;
3323 : case ocDBCount2:
3324 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3325 0 : ft->Children[i], new OpDcount2, nResultSize));
3326 0 : break;
3327 : case ocDeg:
3328 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3329 0 : ft->Children[i], new OpDeg, nResultSize));
3330 0 : break;
3331 : case ocRoundUp:
3332 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3333 0 : ft->Children[i], new OpRoundUp, nResultSize));
3334 0 : break;
3335 : case ocRoundDown:
3336 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3337 0 : ft->Children[i], new OpRoundDown, nResultSize));
3338 0 : break;
3339 : case ocInt:
3340 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3341 0 : ft->Children[i], new OpInt, nResultSize));
3342 0 : break;
3343 : case ocRad:
3344 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3345 0 : ft->Children[i], new OpRadians, nResultSize));
3346 0 : break;
3347 : case ocCountIf:
3348 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3349 0 : ft->Children[i], new OpCountIf, nResultSize));
3350 0 : break;
3351 : case ocIsEven:
3352 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3353 0 : ft->Children[i], new OpIsEven, nResultSize));
3354 0 : break;
3355 : case ocIsOdd:
3356 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3357 0 : ft->Children[i], new OpIsOdd, nResultSize));
3358 0 : break;
3359 : case ocFact:
3360 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3361 0 : ft->Children[i], new OpFact, nResultSize));
3362 0 : break;
3363 : case ocMinA:
3364 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3365 0 : ft->Children[i], new OpMinA, nResultSize));
3366 0 : break;
3367 : case ocCount2:
3368 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3369 0 : ft->Children[i], new OpCountA, nResultSize));
3370 0 : break;
3371 : case ocMaxA:
3372 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3373 0 : ft->Children[i], new OpMaxA, nResultSize));
3374 0 : break;
3375 : case ocAverageA:
3376 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3377 0 : ft->Children[i], new OpAverageA, nResultSize));
3378 0 : break;
3379 : case ocVarA:
3380 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3381 0 : ft->Children[i], new OpVarA, nResultSize));
3382 0 : break;
3383 : case ocVarPA:
3384 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3385 0 : ft->Children[i], new OpVarPA, nResultSize));
3386 0 : break;
3387 : case ocStDevA:
3388 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3389 0 : ft->Children[i], new OpStDevA, nResultSize));
3390 0 : break;
3391 : case ocStDevPA:
3392 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3393 0 : ft->Children[i], new OpStDevPA, nResultSize));
3394 0 : break;
3395 : case ocSecant:
3396 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3397 0 : ft->Children[i], new OpSec, nResultSize));
3398 0 : break;
3399 : case ocSecantHyp:
3400 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3401 0 : ft->Children[i], new OpSecH, nResultSize));
3402 0 : break;
3403 : case ocSumIf:
3404 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3405 0 : ft->Children[i], new OpSumIf, nResultSize));
3406 0 : break;
3407 : case ocNegSub:
3408 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3409 0 : ft->Children[i], new OpNegSub, nResultSize));
3410 0 : break;
3411 : case ocAveDev:
3412 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3413 0 : ft->Children[i], new OpAveDev, nResultSize));
3414 0 : break;
3415 : case ocIf:
3416 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3417 0 : ft->Children[i], new OpIf, nResultSize));
3418 0 : break;
3419 : case ocExternal:
3420 0 : if (!(pChild->GetExternal().compareTo(OUString(
3421 0 : "com.sun.star.sheet.addin.Analysis.getEffect"))))
3422 : {
3423 0 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpEffective, nResultSize));
3424 : }
3425 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3426 0 : "com.sun.star.sheet.addin.Analysis.getCumipmt"))))
3427 : {
3428 0 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpCumipmt, nResultSize));
3429 : }
3430 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3431 0 : "com.sun.star.sheet.addin.Analysis.getNominal"))))
3432 : {
3433 0 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpNominal, nResultSize));
3434 : }
3435 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3436 0 : "com.sun.star.sheet.addin.Analysis.getCumprinc"))))
3437 : {
3438 0 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpCumprinc, nResultSize));
3439 : }
3440 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3441 0 : "com.sun.star.sheet.addin.Analysis.getXnpv"))))
3442 : {
3443 0 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpXNPV, nResultSize));
3444 : }
3445 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3446 0 : "com.sun.star.sheet.addin.Analysis.getPricemat"))))
3447 : {
3448 0 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpPriceMat, nResultSize));
3449 : }
3450 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3451 0 : "com.sun.star.sheet.addin.Analysis.getReceived"))))
3452 : {
3453 0 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpReceived, nResultSize));
3454 : }
3455 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3456 0 : "com.sun.star.sheet.addin.Analysis.getTbilleq"))))
3457 : {
3458 0 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpTbilleq, nResultSize));
3459 : }
3460 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3461 0 : "com.sun.star.sheet.addin.Analysis.getTbillprice"))))
3462 : {
3463 0 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpTbillprice, nResultSize));
3464 : }
3465 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3466 0 : "com.sun.star.sheet.addin.Analysis.getTbillyield"))))
3467 : {
3468 0 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpTbillyield, nResultSize));
3469 : }
3470 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3471 0 : "com.sun.star.sheet.addin.Analysis.getFvschedule"))))
3472 : {
3473 0 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpFvschedule, nResultSize));
3474 : }
3475 : /*else if ( !(pChild->GetExternal().compareTo(OUString(
3476 : "com.sun.star.sheet.addin.Analysis.getYield"))))
3477 : {
3478 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpYield));
3479 : }*/
3480 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3481 0 : "com.sun.star.sheet.addin.Analysis.getYielddisc"))))
3482 : {
3483 0 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpYielddisc, nResultSize));
3484 : }
3485 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3486 0 : "com.sun.star.sheet.addin.Analysis.getYieldmat"))))
3487 : {
3488 0 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpYieldmat, nResultSize));
3489 : }
3490 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3491 0 : "com.sun.star.sheet.addin.Analysis.getAccrintm"))))
3492 : {
3493 0 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpAccrintm, nResultSize));
3494 : }
3495 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3496 0 : "com.sun.star.sheet.addin.Analysis.getCoupdaybs"))))
3497 : {
3498 0 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpCoupdaybs, nResultSize));
3499 : }
3500 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3501 0 : "com.sun.star.sheet.addin.Analysis.getDollarde"))))
3502 : {
3503 0 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpDollarde, nResultSize));
3504 : }
3505 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3506 0 : "com.sun.star.sheet.addin.Analysis.getDollarfr"))))
3507 : {
3508 0 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpDollarfr, nResultSize));
3509 : }
3510 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3511 0 : "com.sun.star.sheet.addin.Analysis.getCoupdays"))))
3512 : {
3513 0 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpCoupdays, nResultSize));
3514 : }
3515 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3516 0 : "com.sun.star.sheet.addin.Analysis.getCoupdaysnc"))))
3517 : {
3518 0 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpCoupdaysnc, nResultSize));
3519 : }
3520 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3521 0 : "com.sun.star.sheet.addin.Analysis.getDisc"))))
3522 : {
3523 0 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpDISC, nResultSize));
3524 : }
3525 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3526 0 : "com.sun.star.sheet.addin.Analysis.getIntrate"))))
3527 : {
3528 0 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpINTRATE, nResultSize));
3529 : }
3530 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3531 0 : "com.sun.star.sheet.addin.Analysis.getPrice"))))
3532 : {
3533 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3534 0 : ft->Children[i], new OpPrice, nResultSize));
3535 : }
3536 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3537 0 : "com.sun.star.sheet.addin.Analysis.getCoupnum"))))
3538 : {
3539 0 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3540 0 : new OpCoupnum, nResultSize));
3541 : }
3542 : /*else if ( !(pChild->GetExternal().compareTo(OUString(
3543 : "com.sun.star.sheet.addin.Analysis.getDuration"))))
3544 : {
3545 : mvSubArguments.push_back(
3546 : SoPHelper(mCalcConfig, ts, ft->Children[i], new OpDuration_ADD));
3547 : }*/
3548 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3549 0 : "com.sun.star.sheet.addin.Analysis.getAmordegrc"))))
3550 : {
3551 0 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3552 0 : new OpAmordegrc, nResultSize));
3553 : }
3554 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3555 0 : "com.sun.star.sheet.addin.Analysis.getAmorlinc"))))
3556 : {
3557 0 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3558 0 : new OpAmorlinc, nResultSize));
3559 : }
3560 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3561 0 : "com.sun.star.sheet.addin.Analysis.getMduration"))))
3562 : {
3563 0 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3564 0 : new OpMDuration, nResultSize));
3565 : }
3566 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3567 0 : "com.sun.star.sheet.addin.Analysis.getXirr"))))
3568 : {
3569 0 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3570 0 : new OpXirr, nResultSize));
3571 : }
3572 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3573 0 : "com.sun.star.sheet.addin.Analysis.getOddlprice"))))
3574 : {
3575 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3576 0 : ft->Children[i], new OpOddlprice, nResultSize));
3577 : }
3578 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3579 0 : "com.sun.star.sheet.addin.Analysis.getOddlyield"))))
3580 : {
3581 0 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3582 0 : new OpOddlyield, nResultSize));
3583 : }
3584 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3585 0 : "com.sun.star.sheet.addin.Analysis.getPricedisc"))))
3586 : {
3587 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3588 0 : ft->Children[i], new OpPriceDisc, nResultSize));
3589 : }
3590 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3591 0 : "com.sun.star.sheet.addin.Analysis.getCouppcd"))))
3592 : {
3593 0 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3594 0 : new OpCouppcd, nResultSize));
3595 : }
3596 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3597 0 : "com.sun.star.sheet.addin.Analysis.getCoupncd"))))
3598 : {
3599 0 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3600 0 : new OpCoupncd, nResultSize));
3601 : }
3602 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3603 0 : "com.sun.star.sheet.addin.Analysis.getAccrint"))))
3604 : {
3605 0 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3606 0 : new OpAccrint, nResultSize));
3607 : }
3608 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3609 0 : "com.sun.star.sheet.addin.Analysis.getSqrtpi"))))
3610 : {
3611 0 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3612 0 : new OpSqrtPi, nResultSize));
3613 : }
3614 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3615 0 : "com.sun.star.sheet.addin.Analysis.getConvert"))))
3616 : {
3617 0 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3618 0 : new OpConvert, nResultSize));
3619 : }
3620 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3621 0 : "com.sun.star.sheet.addin.Analysis.getIseven"))))
3622 : {
3623 0 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3624 0 : new OpIsEven, nResultSize));
3625 : }
3626 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3627 0 : "com.sun.star.sheet.addin.Analysis.getIsodd"))))
3628 : {
3629 0 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3630 0 : new OpIsOdd, nResultSize));
3631 : }
3632 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3633 0 : "com.sun.star.sheet.addin.Analysis.getMround"))))
3634 : {
3635 0 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3636 0 : new OpMROUND, nResultSize));
3637 : }
3638 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3639 0 : "com.sun.star.sheet.addin.Analysis.getQuotient"))))
3640 : {
3641 0 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3642 0 : new OpQuotient, nResultSize));
3643 : }
3644 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3645 0 : "com.sun.star.sheet.addin.Analysis.getSeriessum"))))
3646 : {
3647 0 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3648 0 : new OpSeriesSum, nResultSize));
3649 : }
3650 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3651 0 : "com.sun.star.sheet.addin.Analysis.getBesselj"))))
3652 : {
3653 0 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3654 0 : new OpBesselj, nResultSize));
3655 : }
3656 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3657 0 : "com.sun.star.sheet.addin.Analysis.getGestep"))))
3658 : {
3659 0 : mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3660 0 : new OpGestep, nResultSize));
3661 : }
3662 : else
3663 0 : throw UnhandledToken(pChild, "unhandled opcode");
3664 0 : break;
3665 :
3666 : default:
3667 0 : throw UnhandledToken(pChild, "unhandled opcode");
3668 : }
3669 0 : }
3670 0 : }
3671 :
3672 : class DynamicKernel : public CompiledFormula
3673 : {
3674 : public:
3675 : DynamicKernel( const ScCalcConfig& config, const FormulaTreeNodeRef& r, int nResultSize );
3676 : virtual ~DynamicKernel();
3677 :
3678 : static DynamicKernel* create( const ScCalcConfig& config, ScTokenArray& rCode, int nResultSize );
3679 :
3680 : /// OpenCL code generation
3681 : void CodeGen();
3682 :
3683 : /// Produce kernel hash
3684 : std::string GetMD5();
3685 :
3686 : /// Create program, build, and create kerenl
3687 : /// TODO cache results based on kernel body hash
3688 : /// TODO: abstract OpenCL part out into OpenCL wrapper.
3689 : void CreateKernel();
3690 :
3691 : /// Prepare buffers, marshal them to GPU, and launch the kernel
3692 : /// TODO: abstract OpenCL part out into OpenCL wrapper.
3693 : void Launch( size_t nr );
3694 :
3695 0 : cl_mem GetResultBuffer() const { return mpResClmem; }
3696 :
3697 : private:
3698 : ScCalcConfig mCalcConfig;
3699 : FormulaTreeNodeRef mpRoot;
3700 : SymbolTable mSyms;
3701 : std::string mKernelSignature, mKernelHash;
3702 : std::string mFullProgramSrc;
3703 : cl_program mpProgram;
3704 : cl_kernel mpKernel;
3705 : cl_mem mpResClmem; // Results
3706 : std::set<std::string> inlineDecl;
3707 : std::set<std::string> inlineFun;
3708 :
3709 : int mnResultSize;
3710 : };
3711 :
3712 0 : DynamicKernel::DynamicKernel( const ScCalcConfig& config, const FormulaTreeNodeRef& r, int nResultSize ) :
3713 : mCalcConfig(config),
3714 : mpRoot(r),
3715 : mpProgram(NULL),
3716 : mpKernel(NULL),
3717 : mpResClmem(NULL),
3718 0 : mnResultSize(nResultSize) {}
3719 :
3720 0 : DynamicKernel::~DynamicKernel()
3721 : {
3722 : cl_int err;
3723 0 : if (mpResClmem)
3724 : {
3725 0 : err = clReleaseMemObject(mpResClmem);
3726 : SAL_WARN_IF(err != CL_SUCCESS, "sc.opencl", "clReleaseMemObject failed: " << ::opencl::errorString(err));
3727 : }
3728 0 : if (mpKernel)
3729 : {
3730 : SAL_INFO("sc.opencl", "Releasing kernel " << mpKernel);
3731 0 : err = clReleaseKernel(mpKernel);
3732 : SAL_WARN_IF(err != CL_SUCCESS, "sc.opencl", "clReleaseKernel failed: " << ::opencl::errorString(err));
3733 : }
3734 : // mpProgram is not going to be released here -- it's cached.
3735 0 : }
3736 :
3737 0 : void DynamicKernel::CodeGen()
3738 : {
3739 : // Travese the tree of expression and declare symbols used
3740 0 : const DynamicKernelArgument* DK = mSyms.DeclRefArg<DynamicKernelSoPArguments>(mCalcConfig, mpRoot, new OpNop(mnResultSize), mnResultSize);
3741 :
3742 0 : std::stringstream decl;
3743 0 : if (::opencl::gpuEnv.mnKhrFp64Flag)
3744 : {
3745 0 : decl << "#if __OPENCL_VERSION__ < 120\n";
3746 0 : decl << "#pragma OPENCL EXTENSION cl_khr_fp64: enable\n";
3747 0 : decl << "#endif\n";
3748 : }
3749 0 : else if (::opencl::gpuEnv.mnAmdFp64Flag)
3750 : {
3751 0 : decl << "#pragma OPENCL EXTENSION cl_amd_fp64: enable\n";
3752 : }
3753 : // preambles
3754 0 : decl << publicFunc;
3755 0 : DK->DumpInlineFun(inlineDecl, inlineFun);
3756 0 : for (std::set<std::string>::iterator set_iter = inlineDecl.begin();
3757 0 : set_iter != inlineDecl.end(); ++set_iter)
3758 : {
3759 0 : decl << *set_iter;
3760 : }
3761 :
3762 0 : for (std::set<std::string>::iterator set_iter = inlineFun.begin();
3763 0 : set_iter != inlineFun.end(); ++set_iter)
3764 : {
3765 0 : decl << *set_iter;
3766 : }
3767 0 : mSyms.DumpSlidingWindowFunctions(decl);
3768 0 : mKernelSignature = DK->DumpOpName();
3769 0 : decl << "__kernel void DynamicKernel" << mKernelSignature;
3770 0 : decl << "(__global double *result, ";
3771 0 : DK->GenSlidingWindowDecl(decl);
3772 0 : decl << ") {\n\tint gid0 = get_global_id(0);\n\tresult[gid0] = " <<
3773 0 : DK->GenSlidingWindowDeclRef(false) << ";\n}\n";
3774 0 : mFullProgramSrc = decl.str();
3775 : #ifdef SAL_DETAIL_ENABLE_LOG_INFO
3776 0 : std::stringstream area;
3777 0 : if (mKernelSignature[0] == '_')
3778 0 : area << "sc.opencl.source." << mKernelSignature.substr(1, std::string::npos);
3779 : else
3780 0 : area << "sc.opencl.source." << mKernelSignature;
3781 0 : SAL_INFO(area.str().c_str(), "Program to be compiled:\n" << linenumberify(mFullProgramSrc));
3782 : #endif
3783 0 : }
3784 :
3785 0 : std::string DynamicKernel::GetMD5()
3786 : {
3787 0 : if (mKernelHash.empty())
3788 : {
3789 0 : std::stringstream md5s;
3790 : // Compute MD5SUM of kernel body to obtain the name
3791 : sal_uInt8 result[RTL_DIGEST_LENGTH_MD5];
3792 : rtl_digest_MD5(
3793 0 : mFullProgramSrc.c_str(),
3794 0 : mFullProgramSrc.length(), result,
3795 0 : RTL_DIGEST_LENGTH_MD5);
3796 0 : for (int i = 0; i < RTL_DIGEST_LENGTH_MD5; i++)
3797 : {
3798 0 : md5s << std::hex << (int)result[i];
3799 : }
3800 0 : mKernelHash = md5s.str();
3801 : }
3802 0 : return mKernelHash;
3803 : }
3804 :
3805 : /// Build code
3806 0 : void DynamicKernel::CreateKernel()
3807 : {
3808 0 : if (mpKernel)
3809 : // already created.
3810 0 : return;
3811 :
3812 : cl_int err;
3813 0 : std::string kname = "DynamicKernel" + mKernelSignature;
3814 : // Compile kernel here!!!
3815 : // Obtain cl context
3816 : ::opencl::KernelEnv kEnv;
3817 0 : ::opencl::setKernelEnv(&kEnv);
3818 0 : const char* src = mFullProgramSrc.c_str();
3819 0 : static std::string lastOneKernelHash = "";
3820 0 : static std::string lastSecondKernelHash = "";
3821 : static cl_program lastOneProgram = NULL;
3822 : static cl_program lastSecondProgram = NULL;
3823 0 : std::string KernelHash = mKernelSignature + GetMD5();
3824 0 : if (lastOneKernelHash == KernelHash && lastOneProgram)
3825 : {
3826 0 : mpProgram = lastOneProgram;
3827 : }
3828 0 : else if (lastSecondKernelHash == KernelHash && lastSecondProgram)
3829 : {
3830 0 : mpProgram = lastSecondProgram;
3831 : }
3832 : else
3833 : { // doesn't match the last compiled formula.
3834 :
3835 0 : if (lastSecondProgram)
3836 : {
3837 : SAL_INFO("sc.opencl", "Releasing program " << lastSecondProgram);
3838 0 : err = clReleaseProgram(lastSecondProgram);
3839 : SAL_WARN_IF(err != CL_SUCCESS, "sc.opencl", "clReleaseProgram failed: " << ::opencl::errorString(err));
3840 0 : lastSecondProgram = NULL;
3841 : }
3842 0 : if (::opencl::buildProgramFromBinary("",
3843 0 : &::opencl::gpuEnv, KernelHash.c_str(), 0))
3844 : {
3845 0 : mpProgram = ::opencl::gpuEnv.mpArryPrograms[0];
3846 0 : ::opencl::gpuEnv.mpArryPrograms[0] = NULL;
3847 : }
3848 : else
3849 : {
3850 : mpProgram = clCreateProgramWithSource(kEnv.mpkContext, 1,
3851 0 : &src, NULL, &err);
3852 0 : if (err != CL_SUCCESS)
3853 0 : throw OpenCLError("clCreateProgramWithSource", err, __FILE__, __LINE__);
3854 : SAL_INFO("sc.opencl", "Created program " << mpProgram);
3855 :
3856 : err = clBuildProgram(mpProgram, 1,
3857 0 : ::opencl::gpuEnv.mpArryDevsID, "", NULL, NULL);
3858 0 : if (err != CL_SUCCESS)
3859 : {
3860 : #if OSL_DEBUG_LEVEL > 0
3861 : if (err == CL_BUILD_PROGRAM_FAILURE)
3862 : {
3863 : cl_build_status stat;
3864 : cl_int e = clGetProgramBuildInfo(
3865 : mpProgram, ::opencl::gpuEnv.mpArryDevsID[0],
3866 : CL_PROGRAM_BUILD_STATUS, sizeof(cl_build_status),
3867 : &stat, 0);
3868 : SAL_WARN_IF(
3869 : e != CL_SUCCESS, "sc.opencl",
3870 : "after CL_BUILD_PROGRAM_FAILURE,"
3871 : " clGetProgramBuildInfo(CL_PROGRAM_BUILD_STATUS)"
3872 : " fails with " << ::opencl::errorString(e));
3873 : if (e == CL_SUCCESS)
3874 : {
3875 : size_t n;
3876 : e = clGetProgramBuildInfo(
3877 : mpProgram, ::opencl::gpuEnv.mpArryDevsID[0],
3878 : CL_PROGRAM_BUILD_LOG, 0, 0, &n);
3879 : SAL_WARN_IF(
3880 : e != CL_SUCCESS || n == 0, "sc.opencl",
3881 : "after CL_BUILD_PROGRAM_FAILURE,"
3882 : " clGetProgramBuildInfo(CL_PROGRAM_BUILD_LOG)"
3883 : " fails with " << ::opencl::errorString(e) << ", n=" << n);
3884 : if (e == CL_SUCCESS && n != 0)
3885 : {
3886 : std::vector<char> log(n);
3887 : e = clGetProgramBuildInfo(
3888 : mpProgram, ::opencl::gpuEnv.mpArryDevsID[0],
3889 : CL_PROGRAM_BUILD_LOG, n, &log[0], 0);
3890 : SAL_WARN_IF(
3891 : e != CL_SUCCESS || n == 0, "sc.opencl",
3892 : "after CL_BUILD_PROGRAM_FAILURE,"
3893 : " clGetProgramBuildInfo("
3894 : "CL_PROGRAM_BUILD_LOG) fails with " << ::opencl::errorString(e));
3895 : if (e == CL_SUCCESS)
3896 : SAL_WARN(
3897 : "sc.opencl",
3898 : "CL_BUILD_PROGRAM_FAILURE, status " << stat
3899 : << ", log \"" << &log[0] << "\"");
3900 : }
3901 : }
3902 : }
3903 : #endif
3904 0 : throw OpenCLError("clBuildProgram", err, __FILE__, __LINE__);
3905 : }
3906 : SAL_INFO("sc.opencl", "Built program " << mpProgram);
3907 :
3908 : // Generate binary out of compiled kernel.
3909 : ::opencl::generatBinFromKernelSource(mpProgram,
3910 0 : (mKernelSignature + GetMD5()).c_str());
3911 : }
3912 0 : lastSecondKernelHash = lastOneKernelHash;
3913 0 : lastSecondProgram = lastOneProgram;
3914 0 : lastOneKernelHash = KernelHash;
3915 0 : lastOneProgram = mpProgram;
3916 : }
3917 0 : mpKernel = clCreateKernel(mpProgram, kname.c_str(), &err);
3918 0 : if (err != CL_SUCCESS)
3919 0 : throw OpenCLError("clCreateKernel", err, __FILE__, __LINE__);
3920 0 : SAL_INFO("sc.opencl", "Created kernel " << mpKernel << " with name " << kname << " in program " << mpProgram);
3921 : }
3922 :
3923 0 : void DynamicKernel::Launch( size_t nr )
3924 : {
3925 : // Obtain cl context
3926 : ::opencl::KernelEnv kEnv;
3927 0 : ::opencl::setKernelEnv(&kEnv);
3928 : cl_int err;
3929 : // The results
3930 : mpResClmem = clCreateBuffer(kEnv.mpkContext,
3931 : (cl_mem_flags)CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
3932 0 : nr * sizeof(double), NULL, &err);
3933 0 : if (CL_SUCCESS != err)
3934 0 : throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
3935 : SAL_INFO("sc.opencl", "Created buffer " << mpResClmem << " size " << nr << "*" << sizeof(double) << "=" << (nr*sizeof(double)));
3936 :
3937 : SAL_INFO("sc.opencl", "Kernel " << mpKernel << " arg " << 0 << ": cl_mem: " << mpResClmem);
3938 0 : err = clSetKernelArg(mpKernel, 0, sizeof(cl_mem), static_cast<void*>(&mpResClmem));
3939 0 : if (CL_SUCCESS != err)
3940 0 : throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
3941 : // The rest of buffers
3942 0 : mSyms.Marshal(mpKernel, nr, mpProgram);
3943 0 : size_t global_work_size[] = { nr };
3944 : SAL_INFO("sc.opencl", "Enqueing kernel " << mpKernel);
3945 : err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, mpKernel, 1, NULL,
3946 0 : global_work_size, NULL, 0, NULL, NULL);
3947 0 : if (CL_SUCCESS != err)
3948 0 : throw OpenCLError("clEnqueueNDRangeKernel", err, __FILE__, __LINE__);
3949 0 : err = clFlush(kEnv.mpkCmdQueue);
3950 0 : if (CL_SUCCESS != err)
3951 0 : throw OpenCLError("clFlush", err, __FILE__, __LINE__);
3952 0 : }
3953 :
3954 : // Symbol lookup. If there is no such symbol created, allocate one
3955 : // kernel with argument with unique name and return so.
3956 : // The template argument T must be a subclass of DynamicKernelArgument
3957 : template<typename T>
3958 0 : const DynamicKernelArgument* SymbolTable::DeclRefArg( const ScCalcConfig& config,
3959 : FormulaTreeNodeRef t, SlidingFunctionBase* pCodeGen, int nResultSize )
3960 : {
3961 0 : FormulaToken* ref = t->GetFormulaToken();
3962 0 : ArgumentMap::iterator it = mSymbols.find(ref);
3963 0 : if (it == mSymbols.end())
3964 : {
3965 : // Allocate new symbols
3966 0 : std::stringstream ss;
3967 0 : ss << "tmp" << mCurId++;
3968 0 : DynamicKernelArgumentRef new_arg(new T(config, ss.str(), t, pCodeGen, nResultSize));
3969 0 : mSymbols[ref] = new_arg;
3970 0 : mParams.push_back(new_arg);
3971 0 : return new_arg.get();
3972 : }
3973 : else
3974 : {
3975 0 : return it->second.get();
3976 : }
3977 : }
3978 :
3979 0 : FormulaGroupInterpreterOpenCL::FormulaGroupInterpreterOpenCL() :
3980 0 : FormulaGroupInterpreter() {}
3981 :
3982 0 : FormulaGroupInterpreterOpenCL::~FormulaGroupInterpreterOpenCL() {}
3983 :
3984 0 : ScMatrixRef FormulaGroupInterpreterOpenCL::inverseMatrix( const ScMatrix& )
3985 : {
3986 0 : return NULL;
3987 : }
3988 :
3989 0 : DynamicKernel* DynamicKernel::create( const ScCalcConfig& rConfig, ScTokenArray& rCode, int nResultSize )
3990 : {
3991 : // Constructing "AST"
3992 0 : FormulaTokenIterator aCode(rCode);
3993 0 : std::list<FormulaToken*> aTokenList;
3994 0 : std::map<FormulaToken*, FormulaTreeNodeRef> aHashMap;
3995 : FormulaToken* pCur;
3996 0 : while ((pCur = const_cast<FormulaToken*>(aCode.Next())) != NULL)
3997 : {
3998 0 : OpCode eOp = pCur->GetOpCode();
3999 0 : if (eOp != ocPush)
4000 : {
4001 0 : FormulaTreeNodeRef pCurNode(new FormulaTreeNode(pCur));
4002 0 : sal_uInt8 nParamCount = pCur->GetParamCount();
4003 0 : for (sal_uInt8 i = 0; i < nParamCount; i++)
4004 : {
4005 0 : FormulaToken* pTempFormula = aTokenList.back();
4006 0 : aTokenList.pop_back();
4007 0 : if (pTempFormula->GetOpCode() != ocPush)
4008 : {
4009 0 : if (aHashMap.find(pTempFormula) == aHashMap.end())
4010 0 : return NULL;
4011 0 : pCurNode->Children.push_back(aHashMap[pTempFormula]);
4012 : }
4013 : else
4014 : {
4015 : FormulaTreeNodeRef pChildTreeNode =
4016 : FormulaTreeNodeRef(
4017 0 : new FormulaTreeNode(pTempFormula));
4018 0 : pCurNode->Children.push_back(pChildTreeNode);
4019 : }
4020 : }
4021 0 : std::reverse(pCurNode->Children.begin(), pCurNode->Children.end());
4022 0 : aHashMap[pCur] = pCurNode;
4023 : }
4024 0 : aTokenList.push_back(pCur);
4025 : }
4026 :
4027 0 : FormulaTreeNodeRef Root = FormulaTreeNodeRef(new FormulaTreeNode(NULL));
4028 0 : Root->Children.push_back(aHashMap[aTokenList.back()]);
4029 :
4030 0 : DynamicKernel* pDynamicKernel = new DynamicKernel(rConfig, Root, nResultSize);
4031 :
4032 : // OpenCL source code generation and kernel compilation
4033 : try
4034 : {
4035 0 : pDynamicKernel->CodeGen();
4036 0 : pDynamicKernel->CreateKernel();
4037 : }
4038 0 : catch (const UnhandledToken& ut)
4039 : {
4040 : SAL_WARN("sc.opencl", "Dynamic formula compiler: unhandled token: " << ut.mMessage << " at " << ut.mFile << ":" << ut.mLineNumber);
4041 0 : delete pDynamicKernel;
4042 0 : return NULL;
4043 : }
4044 0 : catch (...)
4045 : {
4046 : SAL_WARN("sc.opencl", "Dynamic formula compiler: unhandled compiler error");
4047 0 : return NULL;
4048 : }
4049 0 : return pDynamicKernel;
4050 : }
4051 :
4052 0 : CompiledFormula* FormulaGroupInterpreterOpenCL::createCompiledFormula(
4053 : ScFormulaCellGroup& rGroup, ScTokenArray& rCode )
4054 : {
4055 0 : return DynamicKernel::create(maCalcConfig, rCode, rGroup.mnLength);
4056 : }
4057 :
4058 : namespace {
4059 :
4060 : class CLInterpreterResult
4061 : {
4062 : DynamicKernel* mpKernel;
4063 :
4064 : SCROW mnGroupLength;
4065 :
4066 : cl_mem mpCLResBuf;
4067 : double* mpResBuf;
4068 :
4069 : public:
4070 0 : CLInterpreterResult() : mpKernel(NULL), mnGroupLength(0), mpCLResBuf(NULL), mpResBuf(NULL) {}
4071 0 : CLInterpreterResult( DynamicKernel* pKernel, SCROW nGroupLength ) :
4072 0 : mpKernel(pKernel), mnGroupLength(nGroupLength), mpCLResBuf(NULL), mpResBuf(NULL) {}
4073 :
4074 0 : bool isValid() const { return mpKernel != NULL; }
4075 :
4076 0 : void fetchResultFromKernel()
4077 : {
4078 0 : if (!isValid())
4079 0 : return;
4080 :
4081 : // Map results back
4082 0 : mpCLResBuf = mpKernel->GetResultBuffer();
4083 :
4084 : // Obtain cl context
4085 : ::opencl::KernelEnv kEnv;
4086 0 : ::opencl::setKernelEnv(&kEnv);
4087 :
4088 : cl_int err;
4089 : mpResBuf = static_cast<double*>(clEnqueueMapBuffer(kEnv.mpkCmdQueue,
4090 : mpCLResBuf,
4091 : CL_TRUE, CL_MAP_READ, 0,
4092 : mnGroupLength * sizeof(double), 0, NULL, NULL,
4093 0 : &err));
4094 :
4095 0 : if (err != CL_SUCCESS)
4096 : {
4097 : SAL_WARN("sc.opencl", "clEnqueueMapBuffer failed:: " << ::opencl::errorString(err));
4098 0 : mpResBuf = NULL;
4099 0 : return;
4100 : }
4101 : }
4102 :
4103 0 : bool pushResultToDocument( ScDocument& rDoc, const ScAddress& rTopPos )
4104 : {
4105 0 : if (!mpResBuf)
4106 0 : return false;
4107 :
4108 0 : rDoc.SetFormulaResults(rTopPos, mpResBuf, mnGroupLength);
4109 :
4110 : // Obtain cl context
4111 : ::opencl::KernelEnv kEnv;
4112 0 : ::opencl::setKernelEnv(&kEnv);
4113 :
4114 : cl_int err;
4115 0 : err = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue, mpCLResBuf, mpResBuf, 0, NULL, NULL);
4116 :
4117 0 : if (err != CL_SUCCESS)
4118 : {
4119 : SAL_WARN("sc.opencl", "clEnqueueUnmapMemObject failed: " << ::opencl::errorString(err));
4120 0 : return false;
4121 : }
4122 :
4123 0 : return true;
4124 : }
4125 : };
4126 :
4127 0 : class CLInterpreterContext
4128 : {
4129 : std::shared_ptr<DynamicKernel> mpKernelStore; /// for managed kernel instance.
4130 : DynamicKernel* mpKernel;
4131 :
4132 : SCROW mnGroupLength;
4133 :
4134 : public:
4135 0 : CLInterpreterContext( SCROW nGroupLength ) :
4136 0 : mpKernel(NULL), mnGroupLength(nGroupLength) {}
4137 :
4138 0 : bool isValid() const
4139 : {
4140 0 : return mpKernel != NULL;
4141 : }
4142 :
4143 0 : void setManagedKernel( DynamicKernel* pKernel )
4144 : {
4145 0 : mpKernelStore.reset(pKernel);
4146 0 : mpKernel = pKernel;
4147 0 : }
4148 :
4149 : #if ENABLE_THREADED_OPENCL_KERNEL_COMPILATION
4150 : void setUnmanagedKernel( DynamicKernel* pKernel )
4151 : {
4152 : mpKernel = pKernel;
4153 : }
4154 : #endif
4155 :
4156 0 : CLInterpreterResult launchKernel()
4157 : {
4158 0 : if (!isValid())
4159 0 : return CLInterpreterResult();
4160 :
4161 : try
4162 : {
4163 : // Run the kernel.
4164 0 : mpKernel->Launch(mnGroupLength);
4165 : }
4166 0 : catch (const UnhandledToken& ut)
4167 : {
4168 : SAL_WARN("sc.opencl", "Dynamic formula compiler: unhandled token: " << ut.mMessage << " at " << ut.mFile << ":" << ut.mLineNumber);
4169 0 : return CLInterpreterResult();
4170 : }
4171 0 : catch (const OpenCLError& oce)
4172 : {
4173 : SAL_WARN("sc.opencl", "Dynamic formula compiler: OpenCL error from " << oce.mFunction << ": " << ::opencl::errorString(oce.mError) << " at " << oce.mFile << ":" << oce.mLineNumber);
4174 0 : return CLInterpreterResult();
4175 : }
4176 0 : catch (const Unhandled& uh)
4177 : {
4178 : SAL_WARN("sc.opencl", "Dynamic formula compiler: unhandled case at " << uh.mFile << ":" << uh.mLineNumber);
4179 0 : return CLInterpreterResult();
4180 : }
4181 0 : catch (...)
4182 : {
4183 : SAL_WARN("sc.opencl", "Dynamic formula compiler: unhandled compiler error");
4184 0 : return CLInterpreterResult();
4185 : }
4186 :
4187 0 : return CLInterpreterResult(mpKernel, mnGroupLength);
4188 : }
4189 : };
4190 :
4191 :
4192 0 : CLInterpreterContext createCLInterpreterContext( const ScCalcConfig& rConfig,
4193 : ScFormulaCellGroupRef& xGroup, ScTokenArray& rCode )
4194 : {
4195 0 : CLInterpreterContext aCxt(xGroup->mnLength);
4196 :
4197 : #if ENABLE_THREADED_OPENCL_KERNEL_COMPILATION
4198 : if (rGroup.meKernelState == sc::OpenCLKernelCompilationScheduled ||
4199 : rGroup.meKernelState == sc::OpenCLKernelBinaryCreated)
4200 : {
4201 : if (rGroup.meKernelState == sc::OpenCLKernelCompilationScheduled)
4202 : {
4203 : ScFormulaCellGroup::sxCompilationThread->maCompilationDoneCondition.wait();
4204 : ScFormulaCellGroup::sxCompilationThread->maCompilationDoneCondition.reset();
4205 : }
4206 :
4207 : // Kernel instance is managed by the formula group.
4208 : aCxt.setUnmanagedKernel(static_cast<DynamicKernel*>(xGroup->mpCompiledFormula));
4209 : }
4210 : else
4211 : {
4212 : assert(xGroup->meCalcState == sc::GroupCalcRunning);
4213 : aCxt.setManagedKernel(static_cast<DynamicKernel*>(DynamicKernel::create(rConfig, rCode, xGroup->mnLength)));
4214 : }
4215 : #else
4216 0 : aCxt.setManagedKernel(static_cast<DynamicKernel*>(DynamicKernel::create(rConfig, rCode, xGroup->mnLength)));
4217 : #endif
4218 :
4219 0 : return aCxt;
4220 : }
4221 :
4222 0 : void genRPNTokens( ScDocument& rDoc, const ScAddress& rTopPos, ScTokenArray& rCode )
4223 : {
4224 0 : ScCompiler aComp(&rDoc, rTopPos, rCode);
4225 0 : aComp.SetGrammar(rDoc.GetGrammar());
4226 : // Disable special ordering for jump commands for the OpenCL interpreter.
4227 0 : aComp.EnableJumpCommandReorder(false);
4228 0 : aComp.CompileTokenArray(); // Regenerate RPN tokens.
4229 0 : }
4230 :
4231 0 : bool waitForResults()
4232 : {
4233 : // Obtain cl context
4234 : ::opencl::KernelEnv kEnv;
4235 0 : ::opencl::setKernelEnv(&kEnv);
4236 :
4237 0 : cl_int err = clFinish(kEnv.mpkCmdQueue);
4238 0 : if (err != CL_SUCCESS)
4239 : SAL_WARN("sc.opencl", "clFinish failed: " << ::opencl::errorString(err));
4240 :
4241 0 : return err == CL_SUCCESS;
4242 : }
4243 :
4244 : }
4245 :
4246 0 : bool FormulaGroupInterpreterOpenCL::interpret( ScDocument& rDoc,
4247 : const ScAddress& rTopPos, ScFormulaCellGroupRef& xGroup,
4248 : ScTokenArray& rCode )
4249 : {
4250 0 : MergeCalcConfig(rDoc);
4251 :
4252 0 : genRPNTokens(rDoc, rTopPos, rCode);
4253 :
4254 0 : CLInterpreterContext aCxt = createCLInterpreterContext(maCalcConfig, xGroup, rCode);
4255 0 : if (!aCxt.isValid())
4256 0 : return false;
4257 :
4258 0 : CLInterpreterResult aRes = aCxt.launchKernel();
4259 0 : if (!aRes.isValid())
4260 0 : return false;
4261 :
4262 0 : if (!waitForResults())
4263 0 : return false;
4264 :
4265 0 : aRes.fetchResultFromKernel();
4266 :
4267 0 : return aRes.pushResultToDocument(rDoc, rTopPos);
4268 : }
4269 :
4270 156 : }} // namespace sc::opencl
4271 :
4272 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|