Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : */
9 :
10 : #include "formulagroup.hxx"
11 : #include "formulagroupcl.hxx"
12 : #include "clkernelthread.hxx"
13 : #include "grouptokenconverter.hxx"
14 : #include "document.hxx"
15 : #include "formulacell.hxx"
16 : #include "tokenarray.hxx"
17 : #include "compiler.hxx"
18 : #include "interpre.hxx"
19 : #include <formula/vectortoken.hxx>
20 : #include "scmatrix.hxx"
21 :
22 : #include "openclwrapper.hxx"
23 :
24 : #include "op_financial.hxx"
25 : #include "op_database.hxx"
26 : #include "op_math.hxx"
27 : #include "op_logical.hxx"
28 : #include "op_statistical.hxx"
29 : #include "op_array.hxx"
30 : #include "op_spreadsheet.hxx"
31 : #include "op_addin.hxx"
32 : /// CONFIGURATIONS
33 : // Comment out this to turn off FMIN and FMAX intrinsics
34 : #define USE_FMIN_FMAX 1
35 : #define REDUCE_THRESHOLD 201 // set to 4 for correctness testing. priority 1
36 : #define UNROLLING_FACTOR 16 // set to 4 for correctness testing (if no reduce)
37 : #include "formulagroupcl_public.hxx"
38 : #ifdef WIN32
39 : #ifndef NAN
40 : namespace {
41 :
42 : const unsigned long __nan[2] = {0xffffffff, 0x7fffffff};
43 :
44 : }
45 : #define NAN (*(const double*) __nan)
46 : #endif
47 : #endif
48 :
49 : #include <list>
50 : #include <map>
51 : #include <iostream>
52 : #include <sstream>
53 : #include <algorithm>
54 : #define MD5_KERNEL 1
55 : #ifdef MD5_KERNEL
56 : #include <rtl/digest.h>
57 : #endif
58 :
59 : #include <boost/scoped_ptr.hpp>
60 : #include <boost/scoped_array.hpp>
61 :
62 : #undef NO_FALLBACK_TO_SWINTERP /* undef this for non-TDD runs */
63 :
64 : using namespace formula;
65 :
66 : namespace sc { namespace opencl {
67 :
68 : namespace {
69 :
70 : #ifdef SAL_DETAIL_ENABLE_LOG_INFO
71 0 : std::string linenumberify(const std::string s)
72 : {
73 0 : std::stringstream ss;
74 0 : int linenumber = 1;
75 0 : size_t start = 0;
76 : size_t newline;
77 0 : while ((newline = s.find('\n', start)) != std::string::npos)
78 : {
79 0 : ss << "/*" << std::setw(4) << linenumber++ << "*/ " << s.substr(start, newline-start+1);
80 0 : start = newline + 1;
81 : }
82 0 : if (start < s.size())
83 0 : ss << "/*" << std::setw(4) << linenumber++ << "*/ " << s.substr(start, std::string::npos);
84 0 : return ss.str();
85 : }
86 : #endif
87 :
88 : } // anonymous namespace
89 :
90 : /// Map the buffer used by an argument and do necessary argument setting
91 0 : size_t VectorRef::Marshal( cl_kernel k, int argno, int, cl_program )
92 : {
93 0 : FormulaToken* ref = mFormulaTree->GetFormulaToken();
94 0 : double* pHostBuffer = NULL;
95 0 : size_t szHostBuffer = 0;
96 0 : if (ref->GetType() == formula::svSingleVectorRef)
97 : {
98 : const formula::SingleVectorRefToken* pSVR =
99 0 : static_cast<const formula::SingleVectorRefToken*>(ref);
100 0 : pHostBuffer = const_cast<double*>(pSVR->GetArray().mpNumericArray);
101 0 : szHostBuffer = pSVR->GetArrayLength() * sizeof(double);
102 : }
103 0 : else if (ref->GetType() == formula::svDoubleVectorRef)
104 : {
105 : const formula::DoubleVectorRefToken* pDVR =
106 0 : static_cast<const formula::DoubleVectorRefToken*>(ref);
107 : pHostBuffer = const_cast<double*>(
108 0 : pDVR->GetArrays()[mnIndex].mpNumericArray);
109 0 : szHostBuffer = pDVR->GetArrayLength() * sizeof(double);
110 : }
111 : else
112 : {
113 0 : throw Unhandled();
114 : }
115 : // Obtain cl context
116 : KernelEnv kEnv;
117 0 : OpenCLDevice::setKernelEnv(&kEnv);
118 : cl_int err;
119 0 : if (pHostBuffer)
120 : {
121 : mpClmem = clCreateBuffer(kEnv.mpkContext,
122 : (cl_mem_flags)CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
123 : szHostBuffer,
124 0 : pHostBuffer, &err);
125 0 : if (CL_SUCCESS != err)
126 0 : throw OpenCLError(err, __FILE__, __LINE__);
127 : }
128 : else
129 : {
130 0 : if (szHostBuffer == 0)
131 0 : szHostBuffer = sizeof(double); // a dummy small value
132 : // Marshal as a buffer of NANs
133 : mpClmem = clCreateBuffer(kEnv.mpkContext,
134 : (cl_mem_flags)CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
135 0 : szHostBuffer, NULL, &err);
136 0 : if (CL_SUCCESS != err)
137 0 : throw OpenCLError(err, __FILE__, __LINE__);
138 : double* pNanBuffer = (double*)clEnqueueMapBuffer(
139 : kEnv.mpkCmdQueue, mpClmem, CL_TRUE, CL_MAP_WRITE, 0,
140 0 : szHostBuffer, 0, NULL, NULL, &err);
141 0 : if (CL_SUCCESS != err)
142 0 : throw OpenCLError(err, __FILE__, __LINE__);
143 0 : for (size_t i = 0; i < szHostBuffer / sizeof(double); i++)
144 0 : pNanBuffer[i] = NAN;
145 : err = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue, mpClmem,
146 0 : pNanBuffer, 0, NULL, NULL);
147 : }
148 :
149 0 : err = clSetKernelArg(k, argno, sizeof(cl_mem), (void*)&mpClmem);
150 0 : if (CL_SUCCESS != err)
151 0 : throw OpenCLError(err, __FILE__, __LINE__);
152 0 : return 1;
153 : }
154 :
155 : /// Arguments that are actually compile-time constant string
156 : /// Currently, only the hash is passed.
157 : /// TBD(IJSUNG): pass also length and the actual string if there is a
158 : /// hash function collision
159 0 : class ConstStringArgument : public DynamicKernelArgument
160 : {
161 : public:
162 0 : ConstStringArgument( const std::string& s,
163 : FormulaTreeNodeRef ft ) :
164 0 : DynamicKernelArgument(s, ft) { }
165 : /// Generate declaration
166 0 : virtual void GenDecl( std::stringstream& ss ) const SAL_OVERRIDE
167 : {
168 0 : ss << "unsigned " << mSymName;
169 0 : }
170 0 : virtual void GenDeclRef( std::stringstream& ss ) const SAL_OVERRIDE
171 : {
172 0 : ss << GenSlidingWindowDeclRef(false);
173 0 : }
174 0 : virtual void GenSlidingWindowDecl( std::stringstream& ss ) const SAL_OVERRIDE
175 : {
176 0 : GenDecl(ss);
177 0 : }
178 0 : virtual std::string GenSlidingWindowDeclRef( bool = false ) const SAL_OVERRIDE
179 : {
180 0 : std::stringstream ss;
181 0 : if (GetFormulaToken()->GetType() != formula::svString)
182 0 : throw Unhandled();
183 0 : FormulaToken* Tok = GetFormulaToken();
184 0 : ss << Tok->GetString().getString().toAsciiUpperCase().hashCode() << "U";
185 0 : return ss.str();
186 : }
187 0 : virtual size_t GetWindowSize() const SAL_OVERRIDE
188 : {
189 0 : return 1;
190 : }
191 : /// Pass the 32-bit hash of the string to the kernel
192 0 : virtual size_t Marshal( cl_kernel k, int argno, int, cl_program ) SAL_OVERRIDE
193 : {
194 0 : FormulaToken* ref = mFormulaTree->GetFormulaToken();
195 0 : cl_uint hashCode = 0;
196 0 : if (ref->GetType() == formula::svString)
197 : {
198 0 : const rtl::OUString s = ref->GetString().getString().toAsciiUpperCase();
199 0 : hashCode = s.hashCode();
200 : }
201 : else
202 : {
203 0 : throw Unhandled();
204 : }
205 : // marshaling
206 : // Obtain cl context
207 : KernelEnv kEnv;
208 0 : OpenCLDevice::setKernelEnv(&kEnv);
209 : // Pass the scalar result back to the rest of the formula kernel
210 0 : cl_int err = clSetKernelArg(k, argno, sizeof(cl_uint), (void*)&hashCode);
211 0 : if (CL_SUCCESS != err)
212 0 : throw OpenCLError(err, __FILE__, __LINE__);
213 0 : return 1;
214 : }
215 : };
216 :
217 : /// Arguments that are actually compile-time constants
218 0 : class DynamicKernelConstantArgument : public DynamicKernelArgument
219 : {
220 : public:
221 0 : DynamicKernelConstantArgument( const std::string& s,
222 : FormulaTreeNodeRef ft ) :
223 0 : DynamicKernelArgument(s, ft) { }
224 : /// Generate declaration
225 0 : virtual void GenDecl( std::stringstream& ss ) const SAL_OVERRIDE
226 : {
227 0 : ss << "double " << mSymName;
228 0 : }
229 0 : virtual void GenDeclRef( std::stringstream& ss ) const SAL_OVERRIDE
230 : {
231 0 : ss << mSymName;
232 0 : }
233 0 : virtual void GenSlidingWindowDecl( std::stringstream& ss ) const SAL_OVERRIDE
234 : {
235 0 : GenDecl(ss);
236 0 : }
237 0 : virtual std::string GenSlidingWindowDeclRef( bool = false ) const SAL_OVERRIDE
238 : {
239 0 : if (GetFormulaToken()->GetType() != formula::svDouble)
240 0 : throw Unhandled();
241 0 : return mSymName;
242 : }
243 0 : virtual size_t GetWindowSize() const SAL_OVERRIDE
244 : {
245 0 : return 1;
246 : }
247 0 : double GetDouble() const
248 : {
249 0 : FormulaToken* Tok = GetFormulaToken();
250 0 : if (Tok->GetType() != formula::svDouble)
251 0 : throw Unhandled();
252 0 : return Tok->GetDouble();
253 : }
254 : /// Create buffer and pass the buffer to a given kernel
255 0 : virtual size_t Marshal( cl_kernel k, int argno, int, cl_program ) SAL_OVERRIDE
256 : {
257 0 : double tmp = GetDouble();
258 : // Pass the scalar result back to the rest of the formula kernel
259 0 : cl_int err = clSetKernelArg(k, argno, sizeof(double), (void*)&tmp);
260 0 : if (CL_SUCCESS != err)
261 0 : throw OpenCLError(err, __FILE__, __LINE__);
262 0 : return 1;
263 : }
264 0 : virtual cl_mem GetCLBuffer() const { return NULL; }
265 : };
266 :
267 0 : class DynamicKernelPiArgument : public DynamicKernelArgument
268 : {
269 : public:
270 0 : DynamicKernelPiArgument( const std::string& s,
271 : FormulaTreeNodeRef ft ) :
272 0 : DynamicKernelArgument(s, ft) { }
273 : /// Generate declaration
274 0 : virtual void GenDecl( std::stringstream& ss ) const SAL_OVERRIDE
275 : {
276 0 : ss << "double " << mSymName;
277 0 : }
278 0 : virtual void GenDeclRef( std::stringstream& ss ) const SAL_OVERRIDE
279 : {
280 0 : ss << "3.14159265358979";
281 0 : }
282 0 : virtual void GenSlidingWindowDecl( std::stringstream& ss ) const SAL_OVERRIDE
283 : {
284 0 : GenDecl(ss);
285 0 : }
286 0 : virtual std::string GenSlidingWindowDeclRef( bool = false ) const SAL_OVERRIDE
287 : {
288 0 : return mSymName;
289 : }
290 0 : virtual size_t GetWindowSize() const SAL_OVERRIDE
291 : {
292 0 : return 1;
293 : }
294 : /// Create buffer and pass the buffer to a given kernel
295 0 : virtual size_t Marshal( cl_kernel k, int argno, int, cl_program ) SAL_OVERRIDE
296 : {
297 0 : double tmp = 0.0;
298 : // Pass the scalar result back to the rest of the formula kernel
299 0 : cl_int err = clSetKernelArg(k, argno, sizeof(double), (void*)&tmp);
300 0 : if (CL_SUCCESS != err)
301 0 : throw OpenCLError(err, __FILE__, __LINE__);
302 0 : return 1;
303 : }
304 : };
305 :
306 0 : class DynamicKernelRandomArgument : public DynamicKernelArgument
307 : {
308 : public:
309 0 : DynamicKernelRandomArgument( const std::string& s,
310 : FormulaTreeNodeRef ft ) :
311 0 : DynamicKernelArgument(s, ft) { }
312 : /// Generate declaration
313 0 : virtual void GenDecl( std::stringstream& ss ) const SAL_OVERRIDE
314 : {
315 0 : ss << "double " << mSymName;
316 0 : }
317 0 : virtual void GenDeclRef( std::stringstream& ss ) const SAL_OVERRIDE
318 : {
319 0 : ss << mSymName;
320 0 : }
321 0 : virtual void GenSlidingWindowDecl( std::stringstream& ss ) const SAL_OVERRIDE
322 : {
323 0 : GenDecl(ss);
324 0 : }
325 0 : virtual std::string GenSlidingWindowDeclRef( bool = false ) const SAL_OVERRIDE
326 : {
327 0 : return mSymName + "_Random()";
328 : }
329 0 : virtual void GenSlidingWindowFunction( std::stringstream& ss ) SAL_OVERRIDE
330 : {
331 0 : ss << "\ndouble " << mSymName;
332 0 : ss << "_Random ()\n{\n";
333 0 : ss << " int i, gid0=get_global_id(0);;\n";
334 0 : ss << " double tmp = 0;\n";
335 0 : ss << " double M = 2147483647;\n";
336 0 : ss << " double Lamda = 32719;\n";
337 0 : ss << " double f;\n";
338 0 : ss << " f = gid0 + 1;\n";
339 0 : ss << " int k;\n";
340 0 : ss << " for(i = 1;i <= 100; ++i){\n";
341 0 : ss << " f = Lamda * f;\n";
342 0 : ss << " k = (int)(f * pow(M,-1.0));\n";
343 0 : ss << " f = f - M * k;\n";
344 0 : ss << " }\n";
345 0 : ss << " tmp = f * pow(M,-1.0);\n";
346 0 : ss << " return tmp;\n";
347 0 : ss << "}";
348 0 : }
349 0 : virtual size_t GetWindowSize() const SAL_OVERRIDE
350 : {
351 0 : return 1;
352 : }
353 : /// Create buffer and pass the buffer to a given kernel
354 0 : virtual size_t Marshal( cl_kernel k, int argno, int, cl_program ) SAL_OVERRIDE
355 : {
356 0 : double tmp = 0.0;
357 : // Pass the scalar result back to the rest of the formula kernel
358 0 : cl_int err = clSetKernelArg(k, argno, sizeof(double), (void*)&tmp);
359 0 : if (CL_SUCCESS != err)
360 0 : throw OpenCLError(err, __FILE__, __LINE__);
361 0 : return 1;
362 : }
363 : };
364 :
365 : /// A vector of strings
366 0 : class DynamicKernelStringArgument : public VectorRef
367 : {
368 : public:
369 0 : DynamicKernelStringArgument( const std::string& s,
370 : FormulaTreeNodeRef ft, int index = 0 ) :
371 0 : VectorRef(s, ft, index) { }
372 :
373 0 : virtual void GenSlidingWindowFunction( std::stringstream& ) SAL_OVERRIDE { }
374 : /// Generate declaration
375 0 : virtual void GenDecl( std::stringstream& ss ) const SAL_OVERRIDE
376 : {
377 0 : ss << "__global unsigned int *" << mSymName;
378 0 : }
379 0 : virtual void GenSlidingWindowDecl( std::stringstream& ss ) const SAL_OVERRIDE
380 : {
381 0 : DynamicKernelStringArgument::GenDecl(ss);
382 0 : }
383 : virtual size_t Marshal( cl_kernel, int, int, cl_program ) SAL_OVERRIDE;
384 : };
385 :
386 : /// Marshal a string vector reference
387 0 : size_t DynamicKernelStringArgument::Marshal( cl_kernel k, int argno, int, cl_program )
388 : {
389 0 : FormulaToken* ref = mFormulaTree->GetFormulaToken();
390 : // Obtain cl context
391 : KernelEnv kEnv;
392 0 : OpenCLDevice::setKernelEnv(&kEnv);
393 : cl_int err;
394 0 : formula::VectorRefArray vRef;
395 0 : size_t nStrings = 0;
396 0 : if (ref->GetType() == formula::svSingleVectorRef)
397 : {
398 : const formula::SingleVectorRefToken* pSVR =
399 0 : static_cast<const formula::SingleVectorRefToken*>(ref);
400 0 : nStrings = pSVR->GetArrayLength();
401 0 : vRef = pSVR->GetArray();
402 : }
403 0 : else if (ref->GetType() == formula::svDoubleVectorRef)
404 : {
405 : const formula::DoubleVectorRefToken* pDVR =
406 0 : static_cast<const formula::DoubleVectorRefToken*>(ref);
407 0 : nStrings = pDVR->GetArrayLength();
408 0 : vRef = pDVR->GetArrays()[mnIndex];
409 : }
410 0 : size_t szHostBuffer = nStrings * sizeof(cl_int);
411 0 : cl_uint* pHashBuffer = NULL;
412 :
413 0 : if (vRef.mpStringArray != NULL)
414 : {
415 : // Marshal strings. Right now we pass hashes of these string
416 : mpClmem = clCreateBuffer(kEnv.mpkContext,
417 : (cl_mem_flags)CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
418 0 : szHostBuffer, NULL, &err);
419 0 : if (CL_SUCCESS != err)
420 0 : throw OpenCLError(err, __FILE__, __LINE__);
421 :
422 : pHashBuffer = (cl_uint*)clEnqueueMapBuffer(
423 : kEnv.mpkCmdQueue, mpClmem, CL_TRUE, CL_MAP_WRITE, 0,
424 0 : szHostBuffer, 0, NULL, NULL, &err);
425 0 : if (CL_SUCCESS != err)
426 0 : throw OpenCLError(err, __FILE__, __LINE__);
427 :
428 0 : for (size_t i = 0; i < nStrings; i++)
429 : {
430 0 : if (vRef.mpStringArray[i])
431 : {
432 0 : const OUString tmp = OUString(vRef.mpStringArray[i]);
433 0 : pHashBuffer[i] = tmp.hashCode();
434 : }
435 : else
436 : {
437 0 : pHashBuffer[i] = 0;
438 : }
439 : }
440 : }
441 : else
442 : {
443 0 : if (nStrings == 0)
444 0 : szHostBuffer = sizeof(cl_int); // a dummy small value
445 : // Marshal as a buffer of NANs
446 : mpClmem = clCreateBuffer(kEnv.mpkContext,
447 : (cl_mem_flags)CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
448 0 : szHostBuffer, NULL, &err);
449 0 : if (CL_SUCCESS != err)
450 0 : throw OpenCLError(err, __FILE__, __LINE__);
451 :
452 : pHashBuffer = (cl_uint*)clEnqueueMapBuffer(
453 : kEnv.mpkCmdQueue, mpClmem, CL_TRUE, CL_MAP_WRITE, 0,
454 0 : szHostBuffer, 0, NULL, NULL, &err);
455 0 : if (CL_SUCCESS != err)
456 0 : throw OpenCLError(err, __FILE__, __LINE__);
457 :
458 0 : for (size_t i = 0; i < szHostBuffer / sizeof(cl_int); i++)
459 0 : pHashBuffer[i] = 0;
460 : }
461 : err = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue, mpClmem,
462 0 : pHashBuffer, 0, NULL, NULL);
463 0 : if (CL_SUCCESS != err)
464 0 : throw OpenCLError(err, __FILE__, __LINE__);
465 :
466 0 : err = clSetKernelArg(k, argno, sizeof(cl_mem), (void*)&mpClmem);
467 0 : if (CL_SUCCESS != err)
468 0 : throw OpenCLError(err, __FILE__, __LINE__);
469 0 : return 1;
470 : }
471 :
472 : /// A mixed string/numberic vector
473 0 : class DynamicKernelMixedArgument : public VectorRef
474 : {
475 : public:
476 0 : DynamicKernelMixedArgument( const std::string& s,
477 : FormulaTreeNodeRef ft ) :
478 0 : VectorRef(s, ft), mStringArgument(s + "s", ft) { }
479 0 : virtual void GenSlidingWindowDecl( std::stringstream& ss ) const SAL_OVERRIDE
480 : {
481 0 : VectorRef::GenSlidingWindowDecl(ss);
482 0 : ss << ", ";
483 0 : mStringArgument.GenSlidingWindowDecl(ss);
484 0 : }
485 0 : virtual bool IsMixedArgument() const SAL_OVERRIDE { return true;}
486 0 : virtual void GenSlidingWindowFunction( std::stringstream& ) SAL_OVERRIDE { }
487 : /// Generate declaration
488 0 : virtual void GenDecl( std::stringstream& ss ) const SAL_OVERRIDE
489 : {
490 0 : VectorRef::GenDecl(ss);
491 0 : ss << ", ";
492 0 : mStringArgument.GenDecl(ss);
493 0 : }
494 0 : virtual void GenDeclRef( std::stringstream& ss ) const SAL_OVERRIDE
495 : {
496 0 : VectorRef::GenDeclRef(ss);
497 0 : ss << ",";
498 0 : mStringArgument.GenDeclRef(ss);
499 0 : }
500 0 : virtual void GenNumDeclRef( std::stringstream& ss ) const SAL_OVERRIDE
501 : {
502 0 : VectorRef::GenSlidingWindowDecl(ss);
503 0 : }
504 0 : virtual void GenStringDeclRef( std::stringstream& ss ) const SAL_OVERRIDE
505 : {
506 0 : mStringArgument.GenSlidingWindowDecl(ss);
507 0 : }
508 0 : virtual std::string GenSlidingWindowDeclRef( bool nested ) const SAL_OVERRIDE
509 : {
510 0 : std::stringstream ss;
511 0 : ss << "(!isNan(" << VectorRef::GenSlidingWindowDeclRef();
512 0 : ss << ")?" << VectorRef::GenSlidingWindowDeclRef();
513 0 : ss << ":" << mStringArgument.GenSlidingWindowDeclRef(nested);
514 0 : ss << ")";
515 0 : return ss.str();
516 : }
517 0 : virtual std::string GenDoubleSlidingWindowDeclRef( bool = false ) const SAL_OVERRIDE
518 : {
519 0 : std::stringstream ss;
520 0 : ss << VectorRef::GenSlidingWindowDeclRef();
521 0 : return ss.str();
522 : }
523 0 : virtual std::string GenStringSlidingWindowDeclRef( bool = false ) const SAL_OVERRIDE
524 : {
525 0 : std::stringstream ss;
526 0 : ss << mStringArgument.GenSlidingWindowDeclRef();
527 0 : return ss.str();
528 : }
529 0 : virtual size_t Marshal( cl_kernel k, int argno, int vw, cl_program p ) SAL_OVERRIDE
530 : {
531 0 : int i = VectorRef::Marshal(k, argno, vw, p);
532 0 : i += mStringArgument.Marshal(k, argno + i, vw, p);
533 0 : return i;
534 : }
535 :
536 : protected:
537 : DynamicKernelStringArgument mStringArgument;
538 : };
539 :
540 : /// Handling a Double Vector that is used as a sliding window input
541 : /// to either a sliding window average or sum-of-products
542 : /// Generate a sequential loop for reductions
543 : class OpSum; // Forward Declaration
544 : class OpAverage; // Forward Declaration
545 : class OpMin; // Forward Declaration
546 : class OpMax; // Forward Declaration
547 : class OpCount; // Forward Declaration
548 :
549 : template<class Base>
550 : class DynamicKernelSlidingArgument : public Base
551 : {
552 : public:
553 0 : DynamicKernelSlidingArgument( const std::string& s,
554 : FormulaTreeNodeRef ft, boost::shared_ptr<SlidingFunctionBase>& CodeGen,
555 : int index = 0 ) :
556 0 : Base(s, ft, index), mpCodeGen(CodeGen), mpClmem2(NULL)
557 : {
558 0 : FormulaToken* t = ft->GetFormulaToken();
559 0 : if (t->GetType() != formula::svDoubleVectorRef)
560 0 : throw Unhandled();
561 0 : mpDVR = static_cast<const formula::DoubleVectorRefToken*>(t);
562 0 : bIsStartFixed = mpDVR->IsStartFixed();
563 0 : bIsEndFixed = mpDVR->IsEndFixed();
564 0 : }
565 : // Should only be called by SumIfs. Yikes!
566 0 : virtual bool NeedParallelReduction() const
567 : {
568 : assert(dynamic_cast<OpSumIfs*>(mpCodeGen.get()));
569 0 : return GetWindowSize() > 100 &&
570 0 : ((GetStartFixed() && GetEndFixed()) ||
571 0 : (!GetStartFixed() && !GetEndFixed()));
572 : }
573 0 : virtual void GenSlidingWindowFunction( std::stringstream& ) { }
574 :
575 0 : virtual std::string GenSlidingWindowDeclRef( bool nested = false ) const
576 : {
577 0 : size_t nArrayLength = mpDVR->GetArrayLength();
578 0 : std::stringstream ss;
579 0 : if (!bIsStartFixed && !bIsEndFixed)
580 : {
581 0 : if (nested)
582 0 : ss << "((i+gid0) <" << nArrayLength << "?";
583 0 : ss << Base::GetName() << "[i + gid0]";
584 0 : if (nested)
585 0 : ss << ":NAN)";
586 : }
587 : else
588 : {
589 0 : if (nested)
590 0 : ss << "(i <" << nArrayLength << "?";
591 0 : ss << Base::GetName() << "[i]";
592 0 : if (nested)
593 0 : ss << ":NAN)";
594 : }
595 0 : return ss.str();
596 : }
597 : /// Controls how the elements in the DoubleVectorRef are traversed
598 0 : virtual size_t GenReductionLoopHeader(
599 : std::stringstream& ss, bool& needBody )
600 : {
601 : assert(mpDVR);
602 0 : size_t nCurWindowSize = mpDVR->GetRefRowSize();
603 : // original for loop
604 : #ifndef UNROLLING_FACTOR
605 : needBody = true;
606 : // No need to generate a for-loop for degenerated cases
607 : if (nCurWindowSize == 1)
608 : {
609 : ss << "if (gid0 <" << mpDVR->GetArrayLength();
610 : ss << ")\n\t{\tint i = 0;\n\t\t";
611 : return nCurWindowSize;
612 : }
613 :
614 : ss << "for (int i = ";
615 : if (!bIsStartFixed && bIsEndFixed)
616 : {
617 : #ifdef ISNAN
618 : ss << "gid0; i < " << mpDVR->GetArrayLength();
619 : ss << " && i < " << nCurWindowSize << "; i++){\n\t\t";
620 : #else
621 : ss << "gid0; i < " << nCurWindowSize << "; i++)\n\t\t";
622 : #endif
623 : }
624 : else if (bIsStartFixed && !bIsEndFixed)
625 : {
626 : #ifdef ISNAN
627 : ss << "0; i < " << mpDVR->GetArrayLength();
628 : ss << " && i < gid0+" << nCurWindowSize << "; i++){\n\t\t";
629 : #else
630 : ss << "0; i < gid0+" << nCurWindowSize << "; i++)\n\t\t";
631 : #endif
632 : }
633 : else if (!bIsStartFixed && !bIsEndFixed)
634 : {
635 : #ifdef ISNAN
636 : ss << "0; i + gid0 < " << mpDVR->GetArrayLength();
637 : ss << " && i < " << nCurWindowSize << "; i++){\n\t\t";
638 : #else
639 : ss << "0; i < " << nCurWindowSize << "; i++)\n\t\t";
640 : #endif
641 : }
642 : else
643 : {
644 : unsigned limit =
645 : std::min(mpDVR->GetArrayLength(), nCurWindowSize);
646 : ss << "0; i < " << limit << "; i++){\n\t\t";
647 : }
648 : return nCurWindowSize;
649 : #endif
650 :
651 : #ifdef UNROLLING_FACTOR
652 : {
653 0 : if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
654 : {
655 0 : ss << "for (int i = ";
656 0 : ss << "gid0; i < " << mpDVR->GetArrayLength();
657 0 : ss << " && i < " << nCurWindowSize << "; i++){\n\t\t";
658 0 : needBody = true;
659 0 : return nCurWindowSize;
660 : }
661 0 : else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
662 : {
663 0 : ss << "for (int i = ";
664 0 : ss << "0; i < " << mpDVR->GetArrayLength();
665 0 : ss << " && i < gid0+" << nCurWindowSize << "; i++){\n\t\t";
666 0 : needBody = true;
667 0 : return nCurWindowSize;
668 : }
669 0 : else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
670 : {
671 0 : ss << "tmpBottom = " << mpCodeGen->GetBottom() << ";\n\t";
672 0 : ss << "{int i;\n\t";
673 0 : std::stringstream temp1, temp2;
674 0 : int outLoopSize = UNROLLING_FACTOR;
675 0 : if (nCurWindowSize / outLoopSize != 0)
676 : {
677 0 : ss << "for(int outLoop=0; outLoop<" << nCurWindowSize / outLoopSize << "; outLoop++){\n\t";
678 0 : for (int count = 0; count < outLoopSize; count++)
679 : {
680 0 : ss << "i = outLoop*" << outLoopSize << "+" << count << ";\n\t";
681 0 : if (count == 0)
682 : {
683 0 : temp1 << "if(i + gid0 < " << mpDVR->GetArrayLength();
684 0 : temp1 << "){\n\t\t";
685 0 : temp1 << "tmp = legalize(";
686 0 : temp1 << mpCodeGen->Gen2(GenSlidingWindowDeclRef(), "tmp");
687 0 : temp1 << ", tmp);\n\t\t\t";
688 0 : temp1 << "}\n\t";
689 : }
690 0 : ss << temp1.str();
691 : }
692 0 : ss << "}\n\t";
693 : }
694 : // The residual of mod outLoopSize
695 0 : for (unsigned int count = nCurWindowSize / outLoopSize * outLoopSize; count < nCurWindowSize; count++)
696 : {
697 0 : ss << "i = " << count << ";\n\t";
698 0 : if (count == nCurWindowSize / outLoopSize * outLoopSize)
699 : {
700 0 : temp2 << "if(i + gid0 < " << mpDVR->GetArrayLength();
701 0 : temp2 << "){\n\t\t";
702 0 : temp2 << "tmp = legalize(";
703 0 : temp2 << mpCodeGen->Gen2(GenSlidingWindowDeclRef(), "tmp");
704 0 : temp2 << ", tmp);\n\t\t\t";
705 0 : temp2 << "}\n\t";
706 : }
707 0 : ss << temp2.str();
708 : }
709 0 : ss << "} // to scope the int i declaration\n";
710 0 : needBody = false;
711 0 : return nCurWindowSize;
712 : }
713 : // (mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
714 : else
715 : {
716 0 : ss << "//else situation \n\t";
717 0 : ss << "tmpBottom = " << mpCodeGen->GetBottom() << ";\n\t";
718 0 : ss << "{int i;\n\t";
719 0 : std::stringstream temp1, temp2;
720 0 : int outLoopSize = UNROLLING_FACTOR;
721 0 : if (nCurWindowSize / outLoopSize != 0)
722 : {
723 0 : ss << "for(int outLoop=0; outLoop<" << nCurWindowSize / outLoopSize << "; outLoop++){\n\t";
724 0 : for (int count = 0; count < outLoopSize; count++)
725 : {
726 0 : ss << "i = outLoop*" << outLoopSize << "+" << count << ";\n\t";
727 0 : if (count == 0)
728 : {
729 0 : temp1 << "tmp = legalize(";
730 0 : temp1 << mpCodeGen->Gen2(GenSlidingWindowDeclRef(), "tmp");
731 0 : temp1 << ", tmp);\n\t\t\t";
732 : }
733 0 : ss << temp1.str();
734 : }
735 0 : ss << "}\n\t";
736 : }
737 : // The residual of mod outLoopSize
738 0 : for (unsigned int count = nCurWindowSize / outLoopSize * outLoopSize; count < nCurWindowSize; count++)
739 : {
740 0 : ss << "i = " << count << ";\n\t";
741 0 : if (count == nCurWindowSize / outLoopSize * outLoopSize)
742 : {
743 0 : temp2 << "tmp = legalize(";
744 0 : temp2 << mpCodeGen->Gen2(GenSlidingWindowDeclRef(), "tmp");
745 0 : temp2 << ", tmp);\n\t\t\t";
746 : }
747 0 : ss << temp2.str();
748 : }
749 0 : ss << "} // to scope the int i declaration\n";
750 0 : needBody = false;
751 0 : return nCurWindowSize;
752 : }
753 : }
754 : #endif
755 : }
756 0 : ~DynamicKernelSlidingArgument()
757 : {
758 0 : if (mpClmem2)
759 : {
760 0 : clReleaseMemObject(mpClmem2);
761 0 : mpClmem2 = NULL;
762 : }
763 0 : }
764 :
765 0 : size_t GetArrayLength() const { return mpDVR->GetArrayLength(); }
766 :
767 0 : size_t GetWindowSize() const { return mpDVR->GetRefRowSize(); }
768 :
769 0 : size_t GetStartFixed() const { return bIsStartFixed; }
770 :
771 0 : size_t GetEndFixed() const { return bIsEndFixed; }
772 :
773 : protected:
774 : bool bIsStartFixed, bIsEndFixed;
775 : const formula::DoubleVectorRefToken* mpDVR;
776 : // from parent nodes
777 : boost::shared_ptr<SlidingFunctionBase> mpCodeGen;
778 : // controls whether to invoke the reduction kernel during marshaling or not
779 : cl_mem mpClmem2;
780 : };
781 :
782 : /// A mixed string/numberic vector
783 0 : class DynamicKernelMixedSlidingArgument : public VectorRef
784 : {
785 : public:
786 0 : DynamicKernelMixedSlidingArgument( const std::string& s,
787 : FormulaTreeNodeRef ft, boost::shared_ptr<SlidingFunctionBase>& CodeGen,
788 : int index = 0 ) :
789 : VectorRef(s, ft),
790 : mDoubleArgument(s, ft, CodeGen, index),
791 0 : mStringArgument(s + "s", ft, CodeGen, index) { }
792 0 : virtual void GenSlidingWindowDecl( std::stringstream& ss ) const SAL_OVERRIDE
793 : {
794 0 : mDoubleArgument.GenSlidingWindowDecl(ss);
795 0 : ss << ", ";
796 0 : mStringArgument.GenSlidingWindowDecl(ss);
797 0 : }
798 0 : virtual void GenSlidingWindowFunction( std::stringstream& ) SAL_OVERRIDE { }
799 : /// Generate declaration
800 0 : virtual void GenDecl( std::stringstream& ss ) const SAL_OVERRIDE
801 : {
802 0 : mDoubleArgument.GenDecl(ss);
803 0 : ss << ", ";
804 0 : mStringArgument.GenDecl(ss);
805 0 : }
806 0 : virtual void GenDeclRef( std::stringstream& ss ) const SAL_OVERRIDE
807 : {
808 0 : mDoubleArgument.GenDeclRef(ss);
809 0 : ss << ",";
810 0 : mStringArgument.GenDeclRef(ss);
811 0 : }
812 0 : virtual std::string GenSlidingWindowDeclRef( bool nested ) const SAL_OVERRIDE
813 : {
814 0 : std::stringstream ss;
815 0 : ss << "(!isNan(" << mDoubleArgument.GenSlidingWindowDeclRef();
816 0 : ss << ")?" << mDoubleArgument.GenSlidingWindowDeclRef();
817 0 : ss << ":" << mStringArgument.GenSlidingWindowDeclRef(nested);
818 0 : ss << ")";
819 0 : return ss.str();
820 : }
821 0 : virtual bool IsMixedArgument() const SAL_OVERRIDE { return true;}
822 0 : virtual std::string GenDoubleSlidingWindowDeclRef( bool = false ) const SAL_OVERRIDE
823 : {
824 0 : std::stringstream ss;
825 0 : ss << mDoubleArgument.GenSlidingWindowDeclRef();
826 0 : return ss.str();
827 : }
828 0 : virtual std::string GenStringSlidingWindowDeclRef( bool = false ) const SAL_OVERRIDE
829 : {
830 0 : std::stringstream ss;
831 0 : ss << mStringArgument.GenSlidingWindowDeclRef();
832 0 : return ss.str();
833 : }
834 0 : virtual void GenNumDeclRef( std::stringstream& ss ) const SAL_OVERRIDE
835 : {
836 0 : mDoubleArgument.GenDeclRef(ss);
837 0 : }
838 0 : virtual void GenStringDeclRef( std::stringstream& ss ) const SAL_OVERRIDE
839 : {
840 0 : mStringArgument.GenDeclRef(ss);
841 0 : }
842 0 : virtual size_t Marshal( cl_kernel k, int argno, int vw, cl_program p ) SAL_OVERRIDE
843 : {
844 0 : int i = mDoubleArgument.Marshal(k, argno, vw, p);
845 0 : i += mStringArgument.Marshal(k, argno + i, vw, p);
846 0 : return i;
847 : }
848 :
849 : protected:
850 : DynamicKernelSlidingArgument<VectorRef> mDoubleArgument;
851 : DynamicKernelSlidingArgument<DynamicKernelStringArgument> mStringArgument;
852 : };
853 :
854 : /// Holds the symbol table for a given dynamic kernel
855 0 : class SymbolTable
856 : {
857 : public:
858 : typedef std::map<const formula::FormulaToken*,
859 : boost::shared_ptr<DynamicKernelArgument> > ArgumentMap;
860 : // This avoids instability caused by using pointer as the key type
861 : typedef std::list<boost::shared_ptr<DynamicKernelArgument> > ArgumentList;
862 0 : SymbolTable() : mCurId(0) { }
863 : template<class T>
864 : const DynamicKernelArgument* DeclRefArg( FormulaTreeNodeRef, SlidingFunctionBase* pCodeGen );
865 : /// Used to generate sliding window helpers
866 0 : void DumpSlidingWindowFunctions( std::stringstream& ss )
867 : {
868 0 : for (ArgumentList::iterator it = mParams.begin(), e = mParams.end(); it != e;
869 : ++it)
870 : {
871 0 : (*it)->GenSlidingWindowFunction(ss);
872 0 : ss << "\n";
873 : }
874 0 : }
875 : /// Memory mapping from host to device and pass buffers to the given kernel as
876 : /// arguments
877 : void Marshal( cl_kernel, int, cl_program );
878 : // number of result items.
879 : static int nR;
880 :
881 : private:
882 : unsigned int mCurId;
883 : ArgumentMap mSymbols;
884 : ArgumentList mParams;
885 : };
886 : int SymbolTable::nR = 0;
887 :
888 0 : void SymbolTable::Marshal( cl_kernel k, int nVectorWidth, cl_program pProgram )
889 : {
890 0 : int i = 1; //The first argument is reserved for results
891 0 : for (ArgumentList::iterator it = mParams.begin(), e = mParams.end(); it != e;
892 : ++it)
893 : {
894 0 : i += (*it)->Marshal(k, i, nVectorWidth, pProgram);
895 : }
896 0 : }
897 :
898 : /// Handling a Double Vector that is used as a sliding window input
899 : /// Performs parallel reduction based on given operator
900 : template<class Base>
901 : class ParallelReductionVectorRef : public Base
902 : {
903 : public:
904 0 : ParallelReductionVectorRef( const std::string& s,
905 : FormulaTreeNodeRef ft, boost::shared_ptr<SlidingFunctionBase>& CodeGen,
906 : int index = 0 ) :
907 0 : Base(s, ft, index), mpCodeGen(CodeGen), mpClmem2(NULL)
908 : {
909 0 : FormulaToken* t = ft->GetFormulaToken();
910 0 : if (t->GetType() != formula::svDoubleVectorRef)
911 0 : throw Unhandled();
912 0 : mpDVR = static_cast<const formula::DoubleVectorRefToken*>(t);
913 0 : bIsStartFixed = mpDVR->IsStartFixed();
914 0 : bIsEndFixed = mpDVR->IsEndFixed();
915 0 : }
916 : /// Emit the definition for the auxiliary reduction kernel
917 0 : virtual void GenSlidingWindowFunction( std::stringstream& ss )
918 : {
919 0 : if (!dynamic_cast<OpAverage*>(mpCodeGen.get()))
920 : {
921 0 : std::string name = Base::GetName();
922 0 : ss << "__kernel void " << name;
923 0 : ss << "_reduction(__global double* A, "
924 : "__global double *result,int arrayLength,int windowSize){\n";
925 0 : ss << " double tmp, current_result =" <<
926 0 : mpCodeGen->GetBottom();
927 0 : ss << ";\n";
928 0 : ss << " int writePos = get_group_id(1);\n";
929 0 : ss << " int lidx = get_local_id(0);\n";
930 0 : ss << " __local double shm_buf[256];\n";
931 0 : if (mpDVR->IsStartFixed())
932 0 : ss << " int offset = 0;\n";
933 : else // if (!mpDVR->IsStartFixed())
934 0 : ss << " int offset = get_group_id(1);\n";
935 0 : if (mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
936 0 : ss << " int end = windowSize;\n";
937 0 : else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
938 0 : ss << " int end = offset + windowSize;\n";
939 0 : else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
940 0 : ss << " int end = windowSize + get_group_id(1);\n";
941 0 : else if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
942 0 : ss << " int end = windowSize;\n";
943 0 : ss << " end = min(end, arrayLength);\n";
944 :
945 0 : ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
946 0 : ss << " int loop = arrayLength/512 + 1;\n";
947 0 : ss << " for (int l=0; l<loop; l++){\n";
948 0 : ss << " tmp = " << mpCodeGen->GetBottom() << ";\n";
949 0 : ss << " int loopOffset = l*512;\n";
950 0 : ss << " if((loopOffset + lidx + offset + 256) < end) {\n";
951 0 : ss << " tmp = legalize(" << mpCodeGen->Gen2(
952 0 : "A[loopOffset + lidx + offset]", "tmp") << ", tmp);\n";
953 0 : ss << " tmp = legalize(" << mpCodeGen->Gen2(
954 0 : "A[loopOffset + lidx + offset + 256]", "tmp") << ", tmp);\n";
955 0 : ss << " } else if ((loopOffset + lidx + offset) < end)\n";
956 0 : ss << " tmp = legalize(" << mpCodeGen->Gen2(
957 0 : "A[loopOffset + lidx + offset]", "tmp") << ", tmp);\n";
958 0 : ss << " shm_buf[lidx] = tmp;\n";
959 0 : ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
960 0 : ss << " for (int i = 128; i >0; i/=2) {\n";
961 0 : ss << " if (lidx < i)\n";
962 0 : ss << " shm_buf[lidx] = ";
963 : // Special case count
964 0 : if (dynamic_cast<OpCount*>(mpCodeGen.get()))
965 0 : ss << "shm_buf[lidx] + shm_buf[lidx + i];\n";
966 : else
967 0 : ss << mpCodeGen->Gen2("shm_buf[lidx]", "shm_buf[lidx + i]") << ";\n";
968 0 : ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
969 0 : ss << " }\n";
970 0 : ss << " if (lidx == 0)\n";
971 0 : ss << " current_result =";
972 0 : if (dynamic_cast<OpCount*>(mpCodeGen.get()))
973 0 : ss << "current_result + shm_buf[0]";
974 : else
975 0 : ss << mpCodeGen->Gen2("current_result", "shm_buf[0]");
976 0 : ss << ";\n";
977 0 : ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
978 0 : ss << " }\n";
979 0 : ss << " if (lidx == 0)\n";
980 0 : ss << " result[writePos] = current_result;\n";
981 0 : ss << "}\n";
982 : }
983 : else
984 : {
985 0 : std::string name = Base::GetName();
986 : /*sum reduction*/
987 0 : ss << "__kernel void " << name << "_sum";
988 0 : ss << "_reduction(__global double* A, "
989 : "__global double *result,int arrayLength,int windowSize){\n";
990 0 : ss << " double tmp, current_result =" <<
991 0 : mpCodeGen->GetBottom();
992 0 : ss << ";\n";
993 0 : ss << " int writePos = get_group_id(1);\n";
994 0 : ss << " int lidx = get_local_id(0);\n";
995 0 : ss << " __local double shm_buf[256];\n";
996 0 : if (mpDVR->IsStartFixed())
997 0 : ss << " int offset = 0;\n";
998 : else // if (!mpDVR->IsStartFixed())
999 0 : ss << " int offset = get_group_id(1);\n";
1000 0 : if (mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
1001 0 : ss << " int end = windowSize;\n";
1002 0 : else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
1003 0 : ss << " int end = offset + windowSize;\n";
1004 0 : else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
1005 0 : ss << " int end = windowSize + get_group_id(1);\n";
1006 0 : else if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
1007 0 : ss << " int end = windowSize;\n";
1008 0 : ss << " end = min(end, arrayLength);\n";
1009 0 : ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
1010 0 : ss << " int loop = arrayLength/512 + 1;\n";
1011 0 : ss << " for (int l=0; l<loop; l++){\n";
1012 0 : ss << " tmp = " << mpCodeGen->GetBottom() << ";\n";
1013 0 : ss << " int loopOffset = l*512;\n";
1014 0 : ss << " if((loopOffset + lidx + offset + 256) < end) {\n";
1015 0 : ss << " tmp = legalize(";
1016 0 : ss << "(A[loopOffset + lidx + offset]+ tmp)";
1017 0 : ss << ", tmp);\n";
1018 0 : ss << " tmp = legalize((A[loopOffset + lidx + offset + 256]+ tmp)";
1019 0 : ss << ", tmp);\n";
1020 0 : ss << " } else if ((loopOffset + lidx + offset) < end)\n";
1021 0 : ss << " tmp = legalize((A[loopOffset + lidx + offset] + tmp)";
1022 0 : ss << ", tmp);\n";
1023 0 : ss << " shm_buf[lidx] = tmp;\n";
1024 0 : ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
1025 0 : ss << " for (int i = 128; i >0; i/=2) {\n";
1026 0 : ss << " if (lidx < i)\n";
1027 0 : ss << " shm_buf[lidx] = ";
1028 0 : ss << "shm_buf[lidx] + shm_buf[lidx + i];\n";
1029 0 : ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
1030 0 : ss << " }\n";
1031 0 : ss << " if (lidx == 0)\n";
1032 0 : ss << " current_result =";
1033 0 : ss << "current_result + shm_buf[0]";
1034 0 : ss << ";\n";
1035 0 : ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
1036 0 : ss << " }\n";
1037 0 : ss << " if (lidx == 0)\n";
1038 0 : ss << " result[writePos] = current_result;\n";
1039 0 : ss << "}\n";
1040 : /*count reduction*/
1041 0 : ss << "__kernel void " << name << "_count";
1042 0 : ss << "_reduction(__global double* A, "
1043 : "__global double *result,int arrayLength,int windowSize){\n";
1044 0 : ss << " double tmp, current_result =" <<
1045 0 : mpCodeGen->GetBottom();
1046 0 : ss << ";\n";
1047 0 : ss << " int writePos = get_group_id(1);\n";
1048 0 : ss << " int lidx = get_local_id(0);\n";
1049 0 : ss << " __local double shm_buf[256];\n";
1050 0 : if (mpDVR->IsStartFixed())
1051 0 : ss << " int offset = 0;\n";
1052 : else // if (!mpDVR->IsStartFixed())
1053 0 : ss << " int offset = get_group_id(1);\n";
1054 0 : if (mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
1055 0 : ss << " int end = windowSize;\n";
1056 0 : else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
1057 0 : ss << " int end = offset + windowSize;\n";
1058 0 : else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
1059 0 : ss << " int end = windowSize + get_group_id(1);\n";
1060 0 : else if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
1061 0 : ss << " int end = windowSize;\n";
1062 0 : ss << " end = min(end, arrayLength);\n";
1063 0 : ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
1064 0 : ss << " int loop = arrayLength/512 + 1;\n";
1065 0 : ss << " for (int l=0; l<loop; l++){\n";
1066 0 : ss << " tmp = " << mpCodeGen->GetBottom() << ";\n";
1067 0 : ss << " int loopOffset = l*512;\n";
1068 0 : ss << " if((loopOffset + lidx + offset + 256) < end) {\n";
1069 0 : ss << " tmp = legalize((isNan(A[loopOffset + lidx + offset])?tmp:tmp+1.0)";
1070 0 : ss << ", tmp);\n";
1071 0 : ss << " tmp = legalize((isNan(A[loopOffset + lidx + offset+256])?tmp:tmp+1.0)";
1072 0 : ss << ", tmp);\n";
1073 0 : ss << " } else if ((loopOffset + lidx + offset) < end)\n";
1074 0 : ss << " tmp = legalize((isNan(A[loopOffset + lidx + offset])?tmp:tmp+1.0)";
1075 0 : ss << ", tmp);\n";
1076 0 : ss << " shm_buf[lidx] = tmp;\n";
1077 0 : ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
1078 0 : ss << " for (int i = 128; i >0; i/=2) {\n";
1079 0 : ss << " if (lidx < i)\n";
1080 0 : ss << " shm_buf[lidx] = ";
1081 0 : ss << "shm_buf[lidx] + shm_buf[lidx + i];\n";
1082 0 : ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
1083 0 : ss << " }\n";
1084 0 : ss << " if (lidx == 0)\n";
1085 0 : ss << " current_result =";
1086 0 : ss << "current_result + shm_buf[0];";
1087 0 : ss << ";\n";
1088 0 : ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
1089 0 : ss << " }\n";
1090 0 : ss << " if (lidx == 0)\n";
1091 0 : ss << " result[writePos] = current_result;\n";
1092 0 : ss << "}\n";
1093 : }
1094 :
1095 0 : }
1096 0 : virtual std::string GenSlidingWindowDeclRef( bool = false ) const
1097 : {
1098 0 : std::stringstream ss;
1099 0 : if (!bIsStartFixed && !bIsEndFixed)
1100 0 : ss << Base::GetName() << "[i + gid0]";
1101 : else
1102 0 : ss << Base::GetName() << "[i]";
1103 0 : return ss.str();
1104 : }
1105 : /// Controls how the elements in the DoubleVectorRef are traversed
1106 0 : virtual size_t GenReductionLoopHeader(
1107 : std::stringstream& ss, bool& needBody )
1108 : {
1109 : assert(mpDVR);
1110 0 : size_t nCurWindowSize = mpDVR->GetRefRowSize();
1111 0 : std::string temp = Base::GetName() + "[gid0]";
1112 0 : ss << "tmp = ";
1113 : // Special case count
1114 0 : if (dynamic_cast<OpAverage*>(mpCodeGen.get()))
1115 : {
1116 0 : ss << mpCodeGen->Gen2(temp, "tmp") << ";\n";
1117 0 : ss << "nCount = nCount-1;\n";
1118 0 : ss << "nCount = nCount +"; /*re-assign nCount from count reduction*/
1119 0 : ss << Base::GetName() << "[gid0+" << SymbolTable::nR << "]" << ";\n";
1120 : }
1121 0 : else if (dynamic_cast<OpCount*>(mpCodeGen.get()))
1122 0 : ss << temp << "+ tmp";
1123 : else
1124 0 : ss << mpCodeGen->Gen2(temp, "tmp");
1125 0 : ss << ";\n\t";
1126 0 : needBody = false;
1127 0 : return nCurWindowSize;
1128 : }
1129 :
1130 0 : virtual size_t Marshal( cl_kernel k, int argno, int w, cl_program mpProgram )
1131 : {
1132 : assert(Base::mpClmem == NULL);
1133 : // Obtain cl context
1134 : KernelEnv kEnv;
1135 0 : OpenCLDevice::setKernelEnv(&kEnv);
1136 : cl_int err;
1137 0 : size_t nInput = mpDVR->GetArrayLength();
1138 0 : size_t nCurWindowSize = mpDVR->GetRefRowSize();
1139 : // create clmem buffer
1140 0 : if (mpDVR->GetArrays()[Base::mnIndex].mpNumericArray == NULL)
1141 0 : throw Unhandled();
1142 : double* pHostBuffer = const_cast<double*>(
1143 0 : mpDVR->GetArrays()[Base::mnIndex].mpNumericArray);
1144 0 : size_t szHostBuffer = nInput * sizeof(double);
1145 0 : Base::mpClmem = clCreateBuffer(kEnv.mpkContext,
1146 : (cl_mem_flags)CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
1147 : szHostBuffer,
1148 0 : pHostBuffer, &err);
1149 0 : mpClmem2 = clCreateBuffer(kEnv.mpkContext,
1150 : CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
1151 0 : sizeof(double) * w, NULL, NULL);
1152 0 : if (CL_SUCCESS != err)
1153 0 : throw OpenCLError(err, __FILE__, __LINE__);
1154 : // reproduce the reduction function name
1155 0 : std::string kernelName;
1156 0 : if (!dynamic_cast<OpAverage*>(mpCodeGen.get()))
1157 0 : kernelName = Base::GetName() + "_reduction";
1158 : else
1159 0 : kernelName = Base::GetName() + "_sum_reduction";
1160 0 : cl_kernel redKernel = clCreateKernel(mpProgram, kernelName.c_str(), &err);
1161 0 : if (err != CL_SUCCESS)
1162 0 : throw OpenCLError(err, __FILE__, __LINE__);
1163 : // set kernel arg of reduction kernel
1164 : // TODO(Wei Wei): use unique name for kernel
1165 0 : cl_mem buf = Base::GetCLBuffer();
1166 0 : err = clSetKernelArg(redKernel, 0, sizeof(cl_mem),
1167 0 : (void*)&buf);
1168 0 : if (CL_SUCCESS != err)
1169 0 : throw OpenCLError(err, __FILE__, __LINE__);
1170 :
1171 0 : err = clSetKernelArg(redKernel, 1, sizeof(cl_mem), (void*)&mpClmem2);
1172 0 : if (CL_SUCCESS != err)
1173 0 : throw OpenCLError(err, __FILE__, __LINE__);
1174 :
1175 0 : err = clSetKernelArg(redKernel, 2, sizeof(cl_int), (void*)&nInput);
1176 0 : if (CL_SUCCESS != err)
1177 0 : throw OpenCLError(err, __FILE__, __LINE__);
1178 :
1179 0 : err = clSetKernelArg(redKernel, 3, sizeof(cl_int), (void*)&nCurWindowSize);
1180 0 : if (CL_SUCCESS != err)
1181 0 : throw OpenCLError(err, __FILE__, __LINE__);
1182 :
1183 : // set work group size and execute
1184 0 : size_t global_work_size[] = { 256, (size_t)w };
1185 0 : size_t local_work_size[] = { 256, 1 };
1186 0 : err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, redKernel, 2, NULL,
1187 0 : global_work_size, local_work_size, 0, NULL, NULL);
1188 0 : if (CL_SUCCESS != err)
1189 0 : throw OpenCLError(err, __FILE__, __LINE__);
1190 0 : err = clFinish(kEnv.mpkCmdQueue);
1191 0 : if (CL_SUCCESS != err)
1192 0 : throw OpenCLError(err, __FILE__, __LINE__);
1193 0 : if (dynamic_cast<OpAverage*>(mpCodeGen.get()))
1194 : {
1195 : /*average need more reduction kernel for count computing*/
1196 0 : boost::scoped_array<double> pAllBuffer(new double[2 * w]);
1197 : double* resbuf = (double*)clEnqueueMapBuffer(kEnv.mpkCmdQueue,
1198 : mpClmem2,
1199 : CL_TRUE, CL_MAP_READ, 0,
1200 : sizeof(double) * w, 0, NULL, NULL,
1201 0 : &err);
1202 0 : if (err != CL_SUCCESS)
1203 0 : throw OpenCLError(err, __FILE__, __LINE__);
1204 :
1205 0 : for (int i = 0; i < w; i++)
1206 0 : pAllBuffer[i] = resbuf[i];
1207 0 : err = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue, mpClmem2, resbuf, 0, NULL, NULL);
1208 0 : if (err != CL_SUCCESS)
1209 0 : throw OpenCLError(err, __FILE__, __LINE__);
1210 :
1211 0 : kernelName = Base::GetName() + "_count_reduction";
1212 0 : redKernel = clCreateKernel(mpProgram, kernelName.c_str(), &err);
1213 0 : if (err != CL_SUCCESS)
1214 0 : throw OpenCLError(err, __FILE__, __LINE__);
1215 : // set kernel arg of reduction kernel
1216 0 : buf = Base::GetCLBuffer();
1217 0 : err = clSetKernelArg(redKernel, 0, sizeof(cl_mem),
1218 0 : (void*)&buf);
1219 0 : if (CL_SUCCESS != err)
1220 0 : throw OpenCLError(err, __FILE__, __LINE__);
1221 :
1222 0 : err = clSetKernelArg(redKernel, 1, sizeof(cl_mem), (void*)&mpClmem2);
1223 0 : if (CL_SUCCESS != err)
1224 0 : throw OpenCLError(err, __FILE__, __LINE__);
1225 :
1226 0 : err = clSetKernelArg(redKernel, 2, sizeof(cl_int), (void*)&nInput);
1227 0 : if (CL_SUCCESS != err)
1228 0 : throw OpenCLError(err, __FILE__, __LINE__);
1229 :
1230 0 : err = clSetKernelArg(redKernel, 3, sizeof(cl_int), (void*)&nCurWindowSize);
1231 0 : if (CL_SUCCESS != err)
1232 0 : throw OpenCLError(err, __FILE__, __LINE__);
1233 :
1234 : // set work group size and execute
1235 0 : size_t global_work_size1[] = { 256, (size_t)w };
1236 0 : size_t local_work_size1[] = { 256, 1 };
1237 0 : err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, redKernel, 2, NULL,
1238 0 : global_work_size1, local_work_size1, 0, NULL, NULL);
1239 0 : if (CL_SUCCESS != err)
1240 0 : throw OpenCLError(err, __FILE__, __LINE__);
1241 0 : err = clFinish(kEnv.mpkCmdQueue);
1242 0 : if (CL_SUCCESS != err)
1243 0 : throw OpenCLError(err, __FILE__, __LINE__);
1244 0 : resbuf = (double*)clEnqueueMapBuffer(kEnv.mpkCmdQueue,
1245 : mpClmem2,
1246 : CL_TRUE, CL_MAP_READ, 0,
1247 : sizeof(double) * w, 0, NULL, NULL,
1248 0 : &err);
1249 0 : if (err != CL_SUCCESS)
1250 0 : throw OpenCLError(err, __FILE__, __LINE__);
1251 0 : for (int i = 0; i < w; i++)
1252 0 : pAllBuffer[i + w] = resbuf[i];
1253 0 : err = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue, mpClmem2, resbuf, 0, NULL, NULL);
1254 0 : if (mpClmem2)
1255 : {
1256 0 : clReleaseMemObject(mpClmem2);
1257 0 : mpClmem2 = NULL;
1258 : }
1259 0 : mpClmem2 = clCreateBuffer(kEnv.mpkContext,
1260 : (cl_mem_flags)CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
1261 0 : w * sizeof(double) * 2, pAllBuffer.get(), &err);
1262 0 : if (CL_SUCCESS != err)
1263 0 : throw OpenCLError(err, __FILE__, __LINE__);
1264 : }
1265 : // set kernel arg
1266 0 : err = clSetKernelArg(k, argno, sizeof(cl_mem), (void*)&(mpClmem2));
1267 0 : if (CL_SUCCESS != err)
1268 0 : throw OpenCLError(err, __FILE__, __LINE__);
1269 0 : return 1;
1270 : }
1271 0 : ~ParallelReductionVectorRef()
1272 : {
1273 0 : if (mpClmem2)
1274 : {
1275 0 : clReleaseMemObject(mpClmem2);
1276 0 : mpClmem2 = NULL;
1277 : }
1278 0 : }
1279 :
1280 : size_t GetArrayLength() const { return mpDVR->GetArrayLength(); }
1281 :
1282 0 : size_t GetWindowSize() const { return mpDVR->GetRefRowSize(); }
1283 :
1284 : size_t GetStartFixed() const { return bIsStartFixed; }
1285 :
1286 : size_t GetEndFixed() const { return bIsEndFixed; }
1287 :
1288 : protected:
1289 : bool bIsStartFixed, bIsEndFixed;
1290 : const formula::DoubleVectorRefToken* mpDVR;
1291 : // from parent nodes
1292 : boost::shared_ptr<SlidingFunctionBase> mpCodeGen;
1293 : // controls whether to invoke the reduction kernel during marshaling or not
1294 : cl_mem mpClmem2;
1295 : };
1296 :
1297 0 : class Reduction : public SlidingFunctionBase
1298 : {
1299 : public:
1300 : typedef DynamicKernelSlidingArgument<VectorRef> NumericRange;
1301 : typedef DynamicKernelSlidingArgument<DynamicKernelStringArgument> StringRange;
1302 : typedef ParallelReductionVectorRef<VectorRef> ParallelNumericRange;
1303 :
1304 0 : virtual void GenSlidingWindowFunction( std::stringstream& ss,
1305 : const std::string& sSymName, SubArguments& vSubArguments ) SAL_OVERRIDE
1306 : {
1307 0 : ss << "\ndouble " << sSymName;
1308 0 : ss << "_" << BinFuncName() << "(";
1309 0 : for (unsigned i = 0; i < vSubArguments.size(); i++)
1310 : {
1311 0 : if (i)
1312 0 : ss << ", ";
1313 0 : vSubArguments[i]->GenSlidingWindowDecl(ss);
1314 : }
1315 0 : ss << ") {\n";
1316 0 : ss << "double tmp = " << GetBottom() << ";\n";
1317 0 : ss << "int gid0 = get_global_id(0);\n";
1318 0 : if (isAverage())
1319 0 : ss << "int nCount = 0;\n";
1320 0 : ss << "double tmpBottom;\n";
1321 0 : unsigned i = vSubArguments.size();
1322 0 : while (i--)
1323 : {
1324 0 : if (NumericRange* NR =
1325 0 : dynamic_cast<NumericRange*>(vSubArguments[i].get()))
1326 : {
1327 0 : bool needBody; NR->GenReductionLoopHeader(ss, needBody); if (needBody == false)
1328 0 : continue;
1329 : }
1330 0 : else if (ParallelNumericRange* PNR =
1331 0 : dynamic_cast<ParallelNumericRange*>(vSubArguments[i].get()))
1332 : {
1333 : //did not handle yet
1334 0 : bool needBody; PNR->GenReductionLoopHeader(ss, needBody); if (needBody == false)
1335 0 : continue;
1336 : }
1337 0 : else if (StringRange* SR =
1338 0 : dynamic_cast<StringRange*>(vSubArguments[i].get()))
1339 : {
1340 : //did not handle yet
1341 : bool needBody;
1342 0 : SR->GenReductionLoopHeader(ss, needBody);
1343 0 : if (needBody == false)
1344 0 : continue;
1345 : }
1346 : else
1347 : {
1348 0 : FormulaToken* pCur = vSubArguments[i]->GetFormulaToken();
1349 : assert(pCur);
1350 : assert(pCur->GetType() != formula::svDoubleVectorRef);
1351 :
1352 0 : if (pCur->GetType() == formula::svSingleVectorRef)
1353 : {
1354 : const formula::SingleVectorRefToken* pSVR =
1355 0 : static_cast<const formula::SingleVectorRefToken*>(pCur);
1356 0 : ss << "if (gid0 < " << pSVR->GetArrayLength() << "){\n";
1357 : }
1358 0 : else if (pCur->GetType() == formula::svDouble)
1359 : {
1360 0 : ss << "{\n";
1361 : }
1362 : }
1363 0 : if (ocPush == vSubArguments[i]->GetFormulaToken()->GetOpCode())
1364 : {
1365 0 : ss << "tmpBottom = " << GetBottom() << ";\n";
1366 0 : ss << "if (isNan(";
1367 0 : ss << vSubArguments[i]->GenSlidingWindowDeclRef();
1368 0 : ss << "))\n";
1369 0 : if (ZeroReturnZero())
1370 0 : ss << " return 0;\n";
1371 : else
1372 : {
1373 0 : ss << " tmp = ";
1374 0 : ss << Gen2("tmpBottom", "tmp") << ";\n";
1375 : }
1376 0 : ss << "else{\n";
1377 0 : ss << " tmp = ";
1378 0 : ss << Gen2(vSubArguments[i]->GenSlidingWindowDeclRef(), "tmp");
1379 0 : ss << ";\n";
1380 0 : ss << " }\n";
1381 0 : ss << "}\n";
1382 0 : if (vSubArguments[i]->GetFormulaToken()->GetType() ==
1383 0 : formula::svSingleVectorRef && ZeroReturnZero())
1384 : {
1385 0 : ss << "else{\n";
1386 0 : ss << " return 0;\n";
1387 0 : ss << " }\n";
1388 : }
1389 : }
1390 : else
1391 : {
1392 0 : ss << "tmp = ";
1393 0 : ss << Gen2(vSubArguments[i]->GenSlidingWindowDeclRef(), "tmp");
1394 0 : ss << ";\n";
1395 : }
1396 : }
1397 0 : ss << "return tmp";
1398 0 : if (isAverage())
1399 0 : ss << "*pow((double)nCount,-1.0)";
1400 0 : ss << ";\n}";
1401 0 : }
1402 0 : virtual bool isAverage() const { return false; }
1403 0 : virtual bool takeString() const SAL_OVERRIDE { return false; }
1404 0 : virtual bool takeNumeric() const SAL_OVERRIDE { return true; }
1405 : };
1406 :
1407 : // Strictly binary operators
1408 0 : class Binary : public SlidingFunctionBase
1409 : {
1410 : public:
1411 0 : virtual void GenSlidingWindowFunction( std::stringstream& ss,
1412 : const std::string& sSymName, SubArguments& vSubArguments ) SAL_OVERRIDE
1413 : {
1414 0 : ss << "\ndouble " << sSymName;
1415 0 : ss << "_" << BinFuncName() << "(";
1416 : assert(vSubArguments.size() == 2);
1417 0 : for (unsigned i = 0; i < vSubArguments.size(); i++)
1418 : {
1419 0 : if (i)
1420 0 : ss << ", ";
1421 0 : vSubArguments[i]->GenSlidingWindowDecl(ss);
1422 : }
1423 0 : ss << ") {\n\t";
1424 0 : ss << "int gid0 = get_global_id(0), i = 0;\n\t";
1425 0 : ss << "double tmp = ";
1426 0 : ss << Gen2(vSubArguments[0]->GenSlidingWindowDeclRef(false),
1427 0 : vSubArguments[1]->GenSlidingWindowDeclRef(false)) << ";\n\t";
1428 0 : ss << "return tmp;\n}";
1429 0 : }
1430 0 : virtual bool takeString() const SAL_OVERRIDE { return true; }
1431 0 : virtual bool takeNumeric() const SAL_OVERRIDE { return true; }
1432 : };
1433 :
1434 0 : class SumOfProduct : public SlidingFunctionBase
1435 : {
1436 : public:
1437 0 : virtual void GenSlidingWindowFunction( std::stringstream& ss,
1438 : const std::string& sSymName, SubArguments& vSubArguments ) SAL_OVERRIDE
1439 : {
1440 0 : size_t nCurWindowSize = 0;
1441 0 : FormulaToken* tmpCur = NULL;
1442 0 : const formula::DoubleVectorRefToken* pCurDVR = NULL;
1443 0 : ss << "\ndouble " << sSymName;
1444 0 : ss << "_" << BinFuncName() << "(";
1445 0 : for (unsigned i = 0; i < vSubArguments.size(); i++)
1446 : {
1447 0 : if (i)
1448 0 : ss << ",";
1449 0 : vSubArguments[i]->GenSlidingWindowDecl(ss);
1450 0 : size_t nCurChildWindowSize = vSubArguments[i]->GetWindowSize();
1451 : nCurWindowSize = (nCurWindowSize < nCurChildWindowSize) ?
1452 0 : nCurChildWindowSize : nCurWindowSize;
1453 0 : tmpCur = vSubArguments[i]->GetFormulaToken();
1454 0 : if (ocPush == tmpCur->GetOpCode())
1455 : {
1456 :
1457 : pCurDVR = static_cast<
1458 0 : const formula::DoubleVectorRefToken*>(tmpCur);
1459 0 : if (!
1460 0 : ((!pCurDVR->IsStartFixed() && !pCurDVR->IsEndFixed())
1461 0 : || (pCurDVR->IsStartFixed() && pCurDVR->IsEndFixed()))
1462 : )
1463 0 : throw Unhandled();
1464 : }
1465 : }
1466 0 : ss << ") {\n";
1467 0 : ss << " double tmp = 0.0;\n";
1468 0 : ss << " int gid0 = get_global_id(0);\n";
1469 : #ifndef UNROLLING_FACTOR
1470 : ss << " int i ;\n";
1471 : ss << " for (i = 0; i < " << nCurWindowSize << "; i++)\n";
1472 : ss << " {\n";
1473 : for (unsigned i = 0; i < vSubArguments.size(); i++)
1474 : {
1475 : tmpCur = vSubArguments[i]->GetFormulaToken();
1476 : if (ocPush == tmpCur->GetOpCode())
1477 : {
1478 : pCurDVR = static_cast<
1479 : const formula::DoubleVectorRefToken*>(tmpCur);
1480 : if (!pCurDVR->IsStartFixed() && !pCurDVR->IsEndFixed())
1481 : {
1482 : ss << " int currentCount";
1483 : ss << i;
1484 : ss << " =i+gid0+1;\n";
1485 : }
1486 : else
1487 : {
1488 : ss << " int currentCount";
1489 : ss << i;
1490 : ss << " =i+1;\n";
1491 : }
1492 : }
1493 : }
1494 : ss << " tmp += fsum(";
1495 : for (unsigned i = 0; i < vSubArguments.size(); i++)
1496 : {
1497 : if (i)
1498 : ss << "*";
1499 : #ifdef ISNAN
1500 : if (ocPush == vSubArguments[i]->GetFormulaToken()->GetOpCode())
1501 : {
1502 : ss << "(";
1503 : ss << "(currentCount";
1504 : ss << i;
1505 : ss << ">";
1506 : if (vSubArguments[i]->GetFormulaToken()->GetType() ==
1507 : formula::svSingleVectorRef)
1508 : {
1509 : const formula::SingleVectorRefToken* pSVR =
1510 : static_cast<const formula::SingleVectorRefToken*>
1511 : (vSubArguments[i]->GetFormulaToken());
1512 : ss << pSVR->GetArrayLength();
1513 : }
1514 : else if (vSubArguments[i]->GetFormulaToken()->GetType() ==
1515 : formula::svDoubleVectorRef)
1516 : {
1517 : const formula::DoubleVectorRefToken* pSVR =
1518 : static_cast<const formula::DoubleVectorRefToken*>
1519 : (vSubArguments[i]->GetFormulaToken());
1520 : ss << pSVR->GetArrayLength();
1521 : }
1522 : ss << ")||isNan(" << vSubArguments[i]
1523 : ->GenSlidingWindowDeclRef(true);
1524 : ss << ")?0:";
1525 : ss << vSubArguments[i]->GenSlidingWindowDeclRef(true);
1526 : ss << ")";
1527 : }
1528 : else
1529 : ss << vSubArguments[i]->GenSlidingWindowDeclRef(true);
1530 : #else
1531 : ss << vSubArguments[i]->GenSlidingWindowDeclRef(true);
1532 : #endif
1533 : }
1534 : ss << ", 0.0);\n\t}\n\t";
1535 : ss << "return tmp;\n";
1536 : ss << "}";
1537 : #endif
1538 :
1539 : #ifdef UNROLLING_FACTOR
1540 0 : ss << "\tint i;\n\t";
1541 0 : ss << "int currentCount0;\n";
1542 0 : for (unsigned i = 0; i < vSubArguments.size() - 1; i++)
1543 0 : ss << "int currentCount" << i + 1 << ";\n";
1544 0 : std::stringstream temp3, temp4;
1545 0 : int outLoopSize = UNROLLING_FACTOR;
1546 0 : if (nCurWindowSize / outLoopSize != 0)
1547 : {
1548 0 : ss << "for(int outLoop=0; outLoop<" <<
1549 0 : nCurWindowSize / outLoopSize << "; outLoop++){\n\t";
1550 0 : for (int count = 0; count < outLoopSize; count++)
1551 : {
1552 0 : ss << "i = outLoop*" << outLoopSize << "+" << count << ";\n";
1553 0 : if (count == 0)
1554 : {
1555 0 : for (unsigned i = 0; i < vSubArguments.size(); i++)
1556 : {
1557 0 : tmpCur = vSubArguments[i]->GetFormulaToken();
1558 0 : if (ocPush == tmpCur->GetOpCode())
1559 : {
1560 : pCurDVR = static_cast<
1561 0 : const formula::DoubleVectorRefToken*>(tmpCur);
1562 0 : if (!pCurDVR->IsStartFixed() && !pCurDVR->IsEndFixed())
1563 : {
1564 0 : temp3 << " currentCount";
1565 0 : temp3 << i;
1566 0 : temp3 << " =i+gid0+1;\n";
1567 : }
1568 : else
1569 : {
1570 0 : temp3 << " currentCount";
1571 0 : temp3 << i;
1572 0 : temp3 << " =i+1;\n";
1573 : }
1574 : }
1575 : }
1576 :
1577 0 : temp3 << "tmp = fsum(";
1578 0 : for (unsigned i = 0; i < vSubArguments.size(); i++)
1579 : {
1580 0 : if (i)
1581 0 : temp3 << "*";
1582 0 : if (ocPush == vSubArguments[i]->GetFormulaToken()->GetOpCode())
1583 : {
1584 0 : temp3 << "(";
1585 0 : temp3 << "(currentCount";
1586 0 : temp3 << i;
1587 0 : temp3 << ">";
1588 0 : if (vSubArguments[i]->GetFormulaToken()->GetType() ==
1589 : formula::svSingleVectorRef)
1590 : {
1591 : const formula::SingleVectorRefToken* pSVR =
1592 : static_cast<const formula::SingleVectorRefToken*>
1593 0 : (vSubArguments[i]->GetFormulaToken());
1594 0 : temp3 << pSVR->GetArrayLength();
1595 0 : temp3 << ")||isNan(" << vSubArguments[i]
1596 0 : ->GenSlidingWindowDeclRef();
1597 0 : temp3 << ")?0:";
1598 0 : temp3 << vSubArguments[i]->GenSlidingWindowDeclRef();
1599 0 : temp3 << ")";
1600 : }
1601 0 : else if (vSubArguments[i]->GetFormulaToken()->GetType() ==
1602 : formula::svDoubleVectorRef)
1603 : {
1604 : const formula::DoubleVectorRefToken* pSVR =
1605 : static_cast<const formula::DoubleVectorRefToken*>
1606 0 : (vSubArguments[i]->GetFormulaToken());
1607 0 : temp3 << pSVR->GetArrayLength();
1608 0 : temp3 << ")||isNan(" << vSubArguments[i]
1609 0 : ->GenSlidingWindowDeclRef(true);
1610 0 : temp3 << ")?0:";
1611 0 : temp3 << vSubArguments[i]->GenSlidingWindowDeclRef(true);
1612 0 : temp3 << ")";
1613 : }
1614 :
1615 : }
1616 : else
1617 0 : temp3 << vSubArguments[i]->GenSlidingWindowDeclRef(true);
1618 : }
1619 0 : temp3 << ", tmp);\n\t";
1620 : }
1621 0 : ss << temp3.str();
1622 : }
1623 0 : ss << "}\n\t";
1624 : }
1625 : //The residual of mod outLoopSize
1626 0 : for (unsigned int count = nCurWindowSize / outLoopSize * outLoopSize;
1627 0 : count < nCurWindowSize; count++)
1628 : {
1629 0 : ss << "i =" << count << ";\n";
1630 0 : if (count == nCurWindowSize / outLoopSize * outLoopSize)
1631 : {
1632 0 : for (unsigned i = 0; i < vSubArguments.size(); i++)
1633 : {
1634 0 : tmpCur = vSubArguments[i]->GetFormulaToken();
1635 0 : if (ocPush == tmpCur->GetOpCode())
1636 : {
1637 : pCurDVR = static_cast<
1638 0 : const formula::DoubleVectorRefToken*>(tmpCur);
1639 0 : if (!pCurDVR->IsStartFixed() && !pCurDVR->IsEndFixed())
1640 : {
1641 0 : temp4 << " currentCount";
1642 0 : temp4 << i;
1643 0 : temp4 << " =i+gid0+1;\n";
1644 : }
1645 : else
1646 : {
1647 0 : temp4 << " currentCount";
1648 0 : temp4 << i;
1649 0 : temp4 << " =i+1;\n";
1650 : }
1651 : }
1652 : }
1653 :
1654 0 : temp4 << "tmp = fsum(";
1655 0 : for (unsigned i = 0; i < vSubArguments.size(); i++)
1656 : {
1657 0 : if (i)
1658 0 : temp4 << "*";
1659 0 : if (ocPush == vSubArguments[i]->GetFormulaToken()->GetOpCode())
1660 : {
1661 0 : temp4 << "(";
1662 0 : temp4 << "(currentCount";
1663 0 : temp4 << i;
1664 0 : temp4 << ">";
1665 0 : if (vSubArguments[i]->GetFormulaToken()->GetType() ==
1666 : formula::svSingleVectorRef)
1667 : {
1668 : const formula::SingleVectorRefToken* pSVR =
1669 : static_cast<const formula::SingleVectorRefToken*>
1670 0 : (vSubArguments[i]->GetFormulaToken());
1671 0 : temp4 << pSVR->GetArrayLength();
1672 0 : temp4 << ")||isNan(" << vSubArguments[i]
1673 0 : ->GenSlidingWindowDeclRef();
1674 0 : temp4 << ")?0:";
1675 0 : temp4 << vSubArguments[i]->GenSlidingWindowDeclRef();
1676 0 : temp4 << ")";
1677 : }
1678 0 : else if (vSubArguments[i]->GetFormulaToken()->GetType() ==
1679 : formula::svDoubleVectorRef)
1680 : {
1681 : const formula::DoubleVectorRefToken* pSVR =
1682 : static_cast<const formula::DoubleVectorRefToken*>
1683 0 : (vSubArguments[i]->GetFormulaToken());
1684 0 : temp4 << pSVR->GetArrayLength();
1685 0 : temp4 << ")||isNan(" << vSubArguments[i]
1686 0 : ->GenSlidingWindowDeclRef(true);
1687 0 : temp4 << ")?0:";
1688 0 : temp4 << vSubArguments[i]->GenSlidingWindowDeclRef(true);
1689 0 : temp4 << ")";
1690 : }
1691 :
1692 : }
1693 : else
1694 : {
1695 0 : temp4 << vSubArguments[i]
1696 0 : ->GenSlidingWindowDeclRef(true);
1697 : }
1698 : }
1699 0 : temp4 << ", tmp);\n\t";
1700 : }
1701 0 : ss << temp4.str();
1702 : }
1703 0 : ss << "return tmp;\n";
1704 0 : ss << "}";
1705 : #endif
1706 :
1707 0 : }
1708 0 : virtual bool takeString() const SAL_OVERRIDE { return false; }
1709 0 : virtual bool takeNumeric() const SAL_OVERRIDE { return true; }
1710 : };
1711 :
1712 : /// operator traits
1713 0 : class OpNop : public Reduction
1714 : {
1715 : public:
1716 0 : virtual std::string GetBottom() SAL_OVERRIDE { return "0"; }
1717 0 : virtual std::string Gen2( const std::string& lhs, const std::string& ) const SAL_OVERRIDE
1718 : {
1719 0 : return lhs;
1720 : }
1721 0 : virtual std::string BinFuncName() const SAL_OVERRIDE { return "nop"; }
1722 : };
1723 :
1724 0 : class OpCount : public Reduction
1725 : {
1726 : public:
1727 0 : virtual std::string GetBottom() SAL_OVERRIDE { return "0"; }
1728 0 : virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const SAL_OVERRIDE
1729 : {
1730 0 : std::stringstream ss;
1731 0 : ss << "(isNan(" << lhs << ")?" << rhs << ":" << rhs << "+1.0)";
1732 0 : return ss.str();
1733 : }
1734 0 : virtual std::string BinFuncName() const SAL_OVERRIDE { return "fcount"; }
1735 : };
1736 :
1737 0 : class OpEqual : public Binary
1738 : {
1739 : public:
1740 0 : virtual std::string GetBottom() SAL_OVERRIDE { return "0"; }
1741 0 : virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const SAL_OVERRIDE
1742 : {
1743 0 : std::stringstream ss;
1744 0 : ss << "strequal(" << lhs << "," << rhs << ")";
1745 0 : return ss.str();
1746 : }
1747 0 : virtual std::string BinFuncName() const SAL_OVERRIDE { return "eq"; }
1748 : };
1749 :
1750 0 : class OpLessEqual : public Binary
1751 : {
1752 : public:
1753 0 : virtual std::string GetBottom() SAL_OVERRIDE { return "0"; }
1754 0 : virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const SAL_OVERRIDE
1755 : {
1756 0 : std::stringstream ss;
1757 0 : ss << "(" << lhs << "<=" << rhs << ")";
1758 0 : return ss.str();
1759 : }
1760 0 : virtual std::string BinFuncName() const SAL_OVERRIDE { return "leq"; }
1761 : };
1762 :
1763 0 : class OpLess : public Binary
1764 : {
1765 : public:
1766 0 : virtual std::string GetBottom() SAL_OVERRIDE { return "0"; }
1767 0 : virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const SAL_OVERRIDE
1768 : {
1769 0 : std::stringstream ss;
1770 0 : ss << "(" << lhs << "<" << rhs << ")";
1771 0 : return ss.str();
1772 : }
1773 0 : virtual std::string BinFuncName() const SAL_OVERRIDE { return "less"; }
1774 : };
1775 :
1776 0 : class OpGreater : public Binary
1777 : {
1778 : public:
1779 0 : virtual std::string GetBottom() SAL_OVERRIDE { return "0"; }
1780 0 : virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const SAL_OVERRIDE
1781 : {
1782 0 : std::stringstream ss;
1783 0 : ss << "(" << lhs << ">" << rhs << ")";
1784 0 : return ss.str();
1785 : }
1786 0 : virtual std::string BinFuncName() const SAL_OVERRIDE { return "gt"; }
1787 : };
1788 :
1789 0 : class OpSum : public Reduction
1790 : {
1791 : public:
1792 0 : virtual std::string GetBottom() SAL_OVERRIDE { return "0"; }
1793 0 : virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const SAL_OVERRIDE
1794 : {
1795 0 : std::stringstream ss;
1796 0 : ss << "((" << lhs << ")+(" << rhs << "))";
1797 0 : return ss.str();
1798 : }
1799 0 : virtual std::string BinFuncName() const SAL_OVERRIDE { return "fsum"; }
1800 : };
1801 :
1802 0 : class OpAverage : public Reduction
1803 : {
1804 : public:
1805 0 : virtual std::string GetBottom() SAL_OVERRIDE { return "0"; }
1806 0 : virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const SAL_OVERRIDE
1807 : {
1808 0 : std::stringstream ss;
1809 0 : ss << "fsum_count(" << lhs << "," << rhs << ", &nCount)";
1810 0 : return ss.str();
1811 : }
1812 0 : virtual std::string BinFuncName() const SAL_OVERRIDE { return "fsum"; }
1813 0 : virtual bool isAverage() const SAL_OVERRIDE { return true; }
1814 : };
1815 :
1816 0 : class OpSub : public Reduction
1817 : {
1818 : public:
1819 0 : virtual std::string GetBottom() SAL_OVERRIDE { return "0"; }
1820 0 : virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const SAL_OVERRIDE
1821 : {
1822 0 : return lhs + "-" + rhs;
1823 : }
1824 0 : virtual std::string BinFuncName() const SAL_OVERRIDE { return "fsub"; }
1825 : };
1826 :
1827 0 : class OpMul : public Reduction
1828 : {
1829 : public:
1830 0 : virtual std::string GetBottom() SAL_OVERRIDE { return "1"; }
1831 0 : virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const SAL_OVERRIDE
1832 : {
1833 0 : return lhs + "*" + rhs;
1834 : }
1835 0 : virtual std::string BinFuncName() const SAL_OVERRIDE { return "fmul"; }
1836 0 : virtual bool ZeroReturnZero() SAL_OVERRIDE { return true; }
1837 : };
1838 :
1839 : /// Technically not a reduction, but fits the framework.
1840 0 : class OpDiv : public Reduction
1841 : {
1842 : public:
1843 0 : virtual std::string GetBottom() SAL_OVERRIDE { return "1.0"; }
1844 0 : virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const SAL_OVERRIDE
1845 : {
1846 0 : return "(" + lhs + "/" + rhs + ")";
1847 : }
1848 0 : virtual std::string BinFuncName() const SAL_OVERRIDE { return "fdiv"; }
1849 : };
1850 :
1851 0 : class OpMin : public Reduction
1852 : {
1853 : public:
1854 0 : virtual std::string GetBottom() SAL_OVERRIDE { return "MAXFLOAT"; }
1855 0 : virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const SAL_OVERRIDE
1856 : {
1857 0 : return "mcw_fmin(" + lhs + "," + rhs + ")";
1858 : }
1859 0 : virtual std::string BinFuncName() const SAL_OVERRIDE { return "min"; }
1860 : };
1861 :
1862 0 : class OpMax : public Reduction
1863 : {
1864 : public:
1865 0 : virtual std::string GetBottom() SAL_OVERRIDE { return "-MAXFLOAT"; }
1866 0 : virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const SAL_OVERRIDE
1867 : {
1868 0 : return "mcw_fmax(" + lhs + "," + rhs + ")";
1869 : }
1870 0 : virtual std::string BinFuncName() const SAL_OVERRIDE { return "max"; }
1871 : };
1872 :
1873 0 : class OpSumProduct : public SumOfProduct
1874 : {
1875 : public:
1876 0 : virtual std::string GetBottom() SAL_OVERRIDE { return "0"; }
1877 0 : virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const SAL_OVERRIDE
1878 : {
1879 0 : return lhs + "*" + rhs;
1880 : }
1881 0 : virtual std::string BinFuncName() const SAL_OVERRIDE { return "fsop"; }
1882 : };
1883 : namespace {
1884 : struct SumIfsArgs
1885 : {
1886 0 : SumIfsArgs( cl_mem x ) : mCLMem(x), mConst(0.0) { }
1887 0 : SumIfsArgs( double x ) : mCLMem(NULL), mConst(x) { }
1888 : cl_mem mCLMem;
1889 : double mConst;
1890 : };
1891 : }
1892 :
1893 : /// Helper functions that have multiple buffers
1894 : class DynamicKernelSoPArguments : public DynamicKernelArgument
1895 : {
1896 : public:
1897 : typedef boost::shared_ptr<DynamicKernelArgument> SubArgument;
1898 : typedef std::vector<SubArgument> SubArgumentsType;
1899 :
1900 : DynamicKernelSoPArguments(
1901 : const std::string& s, const FormulaTreeNodeRef& ft, SlidingFunctionBase* pCodeGen );
1902 :
1903 : /// Create buffer and pass the buffer to a given kernel
1904 0 : virtual size_t Marshal( cl_kernel k, int argno, int nVectorWidth, cl_program pProgram ) SAL_OVERRIDE
1905 : {
1906 0 : unsigned i = 0;
1907 0 : for (SubArgumentsType::iterator it = mvSubArguments.begin(), e = mvSubArguments.end(); it != e;
1908 : ++it)
1909 : {
1910 0 : i += (*it)->Marshal(k, argno + i, nVectorWidth, pProgram);
1911 : }
1912 0 : if (OpGeoMean* OpSumCodeGen = dynamic_cast<OpGeoMean*>(mpCodeGen.get()))
1913 : {
1914 : // Obtain cl context
1915 : KernelEnv kEnv;
1916 0 : OpenCLDevice::setKernelEnv(&kEnv);
1917 : cl_int err;
1918 : cl_mem pClmem2;
1919 :
1920 0 : if (OpSumCodeGen->NeedReductionKernel())
1921 : {
1922 0 : std::vector<cl_mem> vclmem;
1923 0 : for (SubArgumentsType::iterator it = mvSubArguments.begin(),
1924 0 : e = mvSubArguments.end(); it != e; ++it)
1925 : {
1926 0 : if (VectorRef* VR = dynamic_cast<VectorRef*>(it->get()))
1927 0 : vclmem.push_back(VR->GetCLBuffer());
1928 : else
1929 0 : vclmem.push_back(NULL);
1930 : }
1931 : pClmem2 = clCreateBuffer(kEnv.mpkContext, CL_MEM_READ_WRITE,
1932 0 : sizeof(double) * nVectorWidth, NULL, &err);
1933 0 : if (CL_SUCCESS != err)
1934 0 : throw OpenCLError(err, __FILE__, __LINE__);
1935 :
1936 0 : std::string kernelName = "GeoMean_reduction";
1937 0 : cl_kernel redKernel = clCreateKernel(pProgram, kernelName.c_str(), &err);
1938 0 : if (err != CL_SUCCESS)
1939 0 : throw OpenCLError(err, __FILE__, __LINE__);
1940 : // set kernel arg of reduction kernel
1941 0 : for (size_t j = 0; j < vclmem.size(); j++)
1942 : {
1943 : err = clSetKernelArg(redKernel, j,
1944 0 : vclmem[j] ? sizeof(cl_mem) : sizeof(double),
1945 0 : (void*)&vclmem[j]);
1946 0 : if (CL_SUCCESS != err)
1947 0 : throw OpenCLError(err, __FILE__, __LINE__);
1948 : }
1949 0 : err = clSetKernelArg(redKernel, vclmem.size(), sizeof(cl_mem), (void*)&pClmem2);
1950 0 : if (CL_SUCCESS != err)
1951 0 : throw OpenCLError(err, __FILE__, __LINE__);
1952 :
1953 : // set work group size and execute
1954 0 : size_t global_work_size[] = { 256, (size_t)nVectorWidth };
1955 0 : size_t local_work_size[] = { 256, 1 };
1956 : err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, redKernel, 2, NULL,
1957 0 : global_work_size, local_work_size, 0, NULL, NULL);
1958 0 : if (CL_SUCCESS != err)
1959 0 : throw OpenCLError(err, __FILE__, __LINE__);
1960 0 : err = clFinish(kEnv.mpkCmdQueue);
1961 0 : if (CL_SUCCESS != err)
1962 0 : throw OpenCLError(err, __FILE__, __LINE__);
1963 :
1964 : // Pass pClmem2 to the "real" kernel
1965 0 : err = clSetKernelArg(k, argno, sizeof(cl_mem), (void*)&pClmem2);
1966 0 : if (CL_SUCCESS != err)
1967 0 : throw OpenCLError(err, __FILE__, __LINE__);
1968 : }
1969 : }
1970 0 : if (OpSumIfs* OpSumCodeGen = dynamic_cast<OpSumIfs*>(mpCodeGen.get()))
1971 : {
1972 : // Obtain cl context
1973 : KernelEnv kEnv;
1974 0 : OpenCLDevice::setKernelEnv(&kEnv);
1975 : cl_int err;
1976 0 : DynamicKernelArgument* Arg = mvSubArguments[0].get();
1977 : DynamicKernelSlidingArgument<VectorRef>* slidingArgPtr =
1978 0 : static_cast<DynamicKernelSlidingArgument<VectorRef>*>(Arg);
1979 0 : mpClmem2 = NULL;
1980 :
1981 0 : if (OpSumCodeGen->NeedReductionKernel())
1982 : {
1983 0 : size_t nInput = slidingArgPtr->GetArrayLength();
1984 0 : size_t nCurWindowSize = slidingArgPtr->GetWindowSize();
1985 0 : std::vector<SumIfsArgs> vclmem;
1986 :
1987 0 : for (SubArgumentsType::iterator it = mvSubArguments.begin(),
1988 0 : e = mvSubArguments.end(); it != e; ++it)
1989 : {
1990 0 : if (VectorRef* VR = dynamic_cast<VectorRef*>(it->get()))
1991 0 : vclmem.push_back(SumIfsArgs(VR->GetCLBuffer()));
1992 0 : else if (DynamicKernelConstantArgument* CA =
1993 : dynamic_cast<
1994 0 : DynamicKernelConstantArgument*>(it->get()))
1995 0 : vclmem.push_back(SumIfsArgs(CA->GetDouble()));
1996 : else
1997 0 : vclmem.push_back(SumIfsArgs((cl_mem)NULL));
1998 : }
1999 : mpClmem2 = clCreateBuffer(kEnv.mpkContext, CL_MEM_READ_WRITE,
2000 0 : sizeof(double) * nVectorWidth, NULL, &err);
2001 0 : if (CL_SUCCESS != err)
2002 0 : throw OpenCLError(err, __FILE__, __LINE__);
2003 :
2004 0 : std::string kernelName = mvSubArguments[0]->GetName() + "_SumIfs_reduction";
2005 0 : cl_kernel redKernel = clCreateKernel(pProgram, kernelName.c_str(), &err);
2006 0 : if (err != CL_SUCCESS)
2007 0 : throw OpenCLError(err, __FILE__, __LINE__);
2008 :
2009 : // set kernel arg of reduction kernel
2010 0 : for (size_t j = 0; j < vclmem.size(); j++)
2011 : {
2012 : err = clSetKernelArg(redKernel, j,
2013 0 : vclmem[j].mCLMem ? sizeof(cl_mem) : sizeof(double),
2014 0 : vclmem[j].mCLMem ? (void*)&vclmem[j].mCLMem :
2015 0 : (void*)&vclmem[j].mConst);
2016 0 : if (CL_SUCCESS != err)
2017 0 : throw OpenCLError(err, __FILE__, __LINE__);
2018 : }
2019 0 : err = clSetKernelArg(redKernel, vclmem.size(), sizeof(cl_mem), (void*)&mpClmem2);
2020 0 : if (CL_SUCCESS != err)
2021 0 : throw OpenCLError(err, __FILE__, __LINE__);
2022 :
2023 0 : err = clSetKernelArg(redKernel, vclmem.size() + 1, sizeof(cl_int), (void*)&nInput);
2024 0 : if (CL_SUCCESS != err)
2025 0 : throw OpenCLError(err, __FILE__, __LINE__);
2026 :
2027 0 : err = clSetKernelArg(redKernel, vclmem.size() + 2, sizeof(cl_int), (void*)&nCurWindowSize);
2028 0 : if (CL_SUCCESS != err)
2029 0 : throw OpenCLError(err, __FILE__, __LINE__);
2030 : // set work group size and execute
2031 0 : size_t global_work_size[] = { 256, (size_t)nVectorWidth };
2032 0 : size_t local_work_size[] = { 256, 1 };
2033 : err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, redKernel, 2, NULL,
2034 0 : global_work_size, local_work_size, 0, NULL, NULL);
2035 0 : if (CL_SUCCESS != err)
2036 0 : throw OpenCLError(err, __FILE__, __LINE__);
2037 0 : err = clFinish(kEnv.mpkCmdQueue);
2038 0 : if (CL_SUCCESS != err)
2039 0 : throw OpenCLError(err, __FILE__, __LINE__);
2040 0 : clReleaseKernel(redKernel);
2041 : // Pass mpClmem2 to the "real" kernel
2042 0 : err = clSetKernelArg(k, argno, sizeof(cl_mem), (void*)&mpClmem2);
2043 0 : if (CL_SUCCESS != err)
2044 0 : throw OpenCLError(err, __FILE__, __LINE__);
2045 : }
2046 : }
2047 0 : return i;
2048 : }
2049 :
2050 0 : virtual void GenSlidingWindowFunction( std::stringstream& ss ) SAL_OVERRIDE
2051 : {
2052 0 : for (unsigned i = 0; i < mvSubArguments.size(); i++)
2053 0 : mvSubArguments[i]->GenSlidingWindowFunction(ss);
2054 0 : mpCodeGen->GenSlidingWindowFunction(ss, mSymName, mvSubArguments);
2055 0 : }
2056 0 : virtual void GenDeclRef( std::stringstream& ss ) const SAL_OVERRIDE
2057 : {
2058 0 : for (unsigned i = 0; i < mvSubArguments.size(); i++)
2059 : {
2060 0 : if (i)
2061 0 : ss << ",";
2062 0 : mvSubArguments[i]->GenDeclRef(ss);
2063 : }
2064 0 : }
2065 0 : virtual void GenDecl( std::stringstream& ss ) const SAL_OVERRIDE
2066 : {
2067 0 : for (SubArgumentsType::const_iterator it = mvSubArguments.begin(), e = mvSubArguments.end(); it != e;
2068 : ++it)
2069 : {
2070 0 : if (it != mvSubArguments.begin())
2071 0 : ss << ", ";
2072 0 : (*it)->GenDecl(ss);
2073 : }
2074 0 : }
2075 :
2076 0 : virtual size_t GetWindowSize() const SAL_OVERRIDE
2077 : {
2078 0 : size_t nCurWindowSize = 0;
2079 0 : for (unsigned i = 0; i < mvSubArguments.size(); i++)
2080 : {
2081 0 : size_t nCurChildWindowSize = mvSubArguments[i]->GetWindowSize();
2082 : nCurWindowSize = (nCurWindowSize < nCurChildWindowSize) ?
2083 0 : nCurChildWindowSize : nCurWindowSize;
2084 : }
2085 0 : return nCurWindowSize;
2086 : }
2087 :
2088 : /// When declared as input to a sliding window function
2089 0 : virtual void GenSlidingWindowDecl( std::stringstream& ss ) const SAL_OVERRIDE
2090 : {
2091 0 : for (SubArgumentsType::const_iterator it = mvSubArguments.begin(), e = mvSubArguments.end(); it != e;
2092 : ++it)
2093 : {
2094 0 : if (it != mvSubArguments.begin())
2095 0 : ss << ", ";
2096 0 : (*it)->GenSlidingWindowDecl(ss);
2097 : }
2098 0 : }
2099 : /// Generate either a function call to each children
2100 : /// or directly inline it if we are already inside a loop
2101 0 : virtual std::string GenSlidingWindowDeclRef( bool nested = false ) const SAL_OVERRIDE
2102 : {
2103 0 : std::stringstream ss;
2104 0 : if (!nested)
2105 : {
2106 0 : ss << mSymName << "_" << mpCodeGen->BinFuncName() << "(";
2107 0 : for (unsigned i = 0; i < mvSubArguments.size(); i++)
2108 : {
2109 0 : if (i)
2110 0 : ss << ", ";
2111 0 : mvSubArguments[i]->GenDeclRef(ss);
2112 : }
2113 0 : ss << ")";
2114 : }
2115 : else
2116 : {
2117 0 : if (mvSubArguments.size() != 2)
2118 0 : throw Unhandled();
2119 : bool bArgument1_NeedNested =
2120 0 : (mvSubArguments[0]->GetFormulaToken()->GetType()
2121 0 : == formula::svSingleVectorRef) ? false : true;
2122 : bool bArgument2_NeedNested =
2123 0 : (mvSubArguments[1]->GetFormulaToken()->GetType()
2124 0 : == formula::svSingleVectorRef) ? false : true;
2125 0 : ss << "(";
2126 0 : ss << mpCodeGen->
2127 0 : Gen2(mvSubArguments[0]
2128 0 : ->GenSlidingWindowDeclRef(bArgument1_NeedNested),
2129 0 : mvSubArguments[1]
2130 0 : ->GenSlidingWindowDeclRef(bArgument2_NeedNested));
2131 0 : ss << ")";
2132 : }
2133 0 : return ss.str();
2134 : }
2135 0 : virtual std::string DumpOpName() const SAL_OVERRIDE
2136 : {
2137 0 : std::string t = "_" + mpCodeGen->BinFuncName();
2138 0 : for (unsigned i = 0; i < mvSubArguments.size(); i++)
2139 0 : t = t + mvSubArguments[i]->DumpOpName();
2140 0 : return t;
2141 : }
2142 0 : virtual void DumpInlineFun( std::set<std::string>& decls,
2143 : std::set<std::string>& funs ) const SAL_OVERRIDE
2144 : {
2145 0 : mpCodeGen->BinInlineFun(decls, funs);
2146 0 : for (unsigned i = 0; i < mvSubArguments.size(); i++)
2147 0 : mvSubArguments[i]->DumpInlineFun(decls, funs);
2148 0 : }
2149 0 : virtual ~DynamicKernelSoPArguments()
2150 0 : {
2151 0 : if (mpClmem2)
2152 : {
2153 0 : clReleaseMemObject(mpClmem2);
2154 0 : mpClmem2 = NULL;
2155 : }
2156 0 : }
2157 :
2158 : private:
2159 : SubArgumentsType mvSubArguments;
2160 : boost::shared_ptr<SlidingFunctionBase> mpCodeGen;
2161 : cl_mem mpClmem2;
2162 : };
2163 :
2164 0 : boost::shared_ptr<DynamicKernelArgument> SoPHelper(
2165 : const std::string& ts, const FormulaTreeNodeRef& ft, SlidingFunctionBase* pCodeGen )
2166 : {
2167 0 : return boost::shared_ptr<DynamicKernelArgument>(new DynamicKernelSoPArguments(ts, ft, pCodeGen));
2168 : }
2169 :
2170 : template<class Base>
2171 0 : DynamicKernelArgument* VectorRefFactory( const std::string& s,
2172 : const FormulaTreeNodeRef& ft,
2173 : boost::shared_ptr<SlidingFunctionBase>& pCodeGen,
2174 : int index )
2175 : {
2176 : //Black lists ineligible classes here ..
2177 : // SUMIFS does not perform parallel reduction at DoubleVectorRef level
2178 0 : if (dynamic_cast<OpSumIfs*>(pCodeGen.get()))
2179 : {
2180 0 : if (index == 0) // the first argument of OpSumIfs cannot be strings anyway
2181 0 : return new DynamicKernelSlidingArgument<VectorRef>(s, ft, pCodeGen, index);
2182 0 : return new DynamicKernelSlidingArgument<Base>(s, ft, pCodeGen, index);
2183 : }
2184 : // AVERAGE is not supported yet
2185 : //Average has been supported by reduction kernel
2186 : /*else if (dynamic_cast<OpAverage*>(pCodeGen.get()))
2187 : {
2188 : return new DynamicKernelSlidingArgument<Base>(s, ft, pCodeGen, index);
2189 : }*/
2190 : // MUL is not supported yet
2191 0 : else if (dynamic_cast<OpMul*>(pCodeGen.get()))
2192 : {
2193 0 : return new DynamicKernelSlidingArgument<Base>(s, ft, pCodeGen, index);
2194 : }
2195 : // Sub is not a reduction per se
2196 0 : else if (dynamic_cast<OpSub*>(pCodeGen.get()))
2197 : {
2198 0 : return new DynamicKernelSlidingArgument<Base>(s, ft, pCodeGen, index);
2199 : }
2200 : // Only child class of Reduction is supported
2201 0 : else if (!dynamic_cast<Reduction*>(pCodeGen.get()))
2202 : {
2203 0 : return new DynamicKernelSlidingArgument<Base>(s, ft, pCodeGen, index);
2204 : }
2205 :
2206 : const formula::DoubleVectorRefToken* pDVR =
2207 : static_cast<const formula::DoubleVectorRefToken*>(
2208 0 : ft->GetFormulaToken());
2209 : // Window being too small to justify a parallel reduction
2210 0 : if (pDVR->GetRefRowSize() < REDUCE_THRESHOLD)
2211 0 : return new DynamicKernelSlidingArgument<Base>(s, ft, pCodeGen, index);
2212 0 : if ((pDVR->IsStartFixed() && pDVR->IsEndFixed()) ||
2213 0 : (!pDVR->IsStartFixed() && !pDVR->IsEndFixed()))
2214 0 : return new ParallelReductionVectorRef<Base>(s, ft, pCodeGen, index);
2215 : else // Other cases are not supported as well
2216 0 : return new DynamicKernelSlidingArgument<Base>(s, ft, pCodeGen, index);
2217 : }
2218 :
2219 0 : DynamicKernelSoPArguments::DynamicKernelSoPArguments(
2220 : const std::string& s, const FormulaTreeNodeRef& ft, SlidingFunctionBase* pCodeGen ) :
2221 0 : DynamicKernelArgument(s, ft), mpCodeGen(pCodeGen), mpClmem2(NULL)
2222 : {
2223 0 : size_t nChildren = ft->Children.size();
2224 :
2225 0 : for (unsigned i = 0; i < nChildren; i++)
2226 : {
2227 0 : FormulaToken* pChild = ft->Children[i]->GetFormulaToken();
2228 0 : if (!pChild)
2229 0 : throw Unhandled();
2230 0 : OpCode opc = pChild->GetOpCode();
2231 0 : std::stringstream tmpname;
2232 0 : tmpname << s << "_" << i;
2233 0 : std::string ts = tmpname.str();
2234 0 : switch (opc)
2235 : {
2236 : case ocPush:
2237 0 : if (pChild->GetType() == formula::svDoubleVectorRef)
2238 : {
2239 : const formula::DoubleVectorRefToken* pDVR =
2240 0 : static_cast<const formula::DoubleVectorRefToken*>(pChild);
2241 0 : for (size_t j = 0; j < pDVR->GetArrays().size(); ++j)
2242 : {
2243 0 : if (pDVR->GetArrays()[j].mpNumericArray ||
2244 0 : (pDVR->GetArrays()[j].mpNumericArray == NULL &&
2245 0 : pDVR->GetArrays()[j].mpStringArray == NULL))
2246 : {
2247 0 : if (pDVR->GetArrays()[j].mpNumericArray &&
2248 0 : pCodeGen->takeNumeric() &&
2249 0 : pDVR->GetArrays()[j].mpStringArray &&
2250 0 : pCodeGen->takeString())
2251 : {
2252 : mvSubArguments.push_back(
2253 : SubArgument(
2254 : new DynamicKernelMixedSlidingArgument(
2255 0 : ts, ft->Children[i], mpCodeGen, j)));
2256 : }
2257 : else
2258 : {
2259 : mvSubArguments.push_back(
2260 : SubArgument(VectorRefFactory<VectorRef>(
2261 0 : ts, ft->Children[i], mpCodeGen, j)));
2262 : }
2263 : }
2264 : else
2265 : mvSubArguments.push_back(
2266 : SubArgument(VectorRefFactory
2267 : <DynamicKernelStringArgument>(
2268 0 : ts, ft->Children[i], mpCodeGen, j)));
2269 : }
2270 : }
2271 0 : else if (pChild->GetType() == formula::svSingleVectorRef)
2272 : {
2273 : const formula::SingleVectorRefToken* pSVR =
2274 0 : static_cast<const formula::SingleVectorRefToken*>(pChild);
2275 0 : if (pSVR->GetArray().mpNumericArray &&
2276 0 : pCodeGen->takeNumeric() &&
2277 0 : pSVR->GetArray().mpStringArray &&
2278 0 : pCodeGen->takeString())
2279 : {
2280 : mvSubArguments.push_back(
2281 : SubArgument(new DynamicKernelMixedArgument(
2282 0 : ts, ft->Children[i])));
2283 : }
2284 0 : else if (pSVR->GetArray().mpNumericArray &&
2285 0 : pCodeGen->takeNumeric())
2286 : {
2287 : mvSubArguments.push_back(
2288 : SubArgument(new VectorRef(ts,
2289 0 : ft->Children[i])));
2290 : }
2291 0 : else if (pSVR->GetArray().mpStringArray &&
2292 0 : pCodeGen->takeString())
2293 : {
2294 : mvSubArguments.push_back(
2295 : SubArgument(new DynamicKernelStringArgument(
2296 0 : ts, ft->Children[i])));
2297 : }
2298 0 : else if (pSVR->GetArray().mpStringArray == NULL &&
2299 0 : pSVR->GetArray().mpNumericArray == NULL)
2300 : {
2301 : // Push as an array of NANs
2302 : mvSubArguments.push_back(
2303 : SubArgument(new VectorRef(ts,
2304 0 : ft->Children[i])));
2305 : }
2306 : else
2307 : throw UnhandledToken(pChild,
2308 0 : "Got unhandled case here", __FILE__, __LINE__);
2309 : }
2310 0 : else if (pChild->GetType() == formula::svDouble)
2311 : {
2312 : mvSubArguments.push_back(
2313 : SubArgument(new DynamicKernelConstantArgument(ts,
2314 0 : ft->Children[i])));
2315 : }
2316 0 : else if (pChild->GetType() == formula::svString
2317 0 : && pCodeGen->takeString())
2318 : {
2319 : mvSubArguments.push_back(
2320 : SubArgument(new ConstStringArgument(ts,
2321 0 : ft->Children[i])));
2322 : }
2323 : else
2324 : {
2325 0 : throw UnhandledToken(pChild, "unknown operand for ocPush");
2326 : }
2327 0 : break;
2328 : case ocDiv:
2329 0 : mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpDiv));
2330 0 : break;
2331 : case ocMul:
2332 0 : mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpMul));
2333 0 : break;
2334 : case ocSub:
2335 0 : mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpSub));
2336 0 : break;
2337 : case ocAdd:
2338 : case ocSum:
2339 0 : mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpSum));
2340 0 : break;
2341 : case ocAverage:
2342 0 : mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpAverage));
2343 0 : break;
2344 : case ocMin:
2345 0 : mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpMin));
2346 0 : break;
2347 : case ocMax:
2348 0 : mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpMax));
2349 0 : break;
2350 : case ocCount:
2351 0 : mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpCount));
2352 0 : break;
2353 : case ocSumProduct:
2354 0 : mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpSumProduct));
2355 0 : break;
2356 : case ocIRR:
2357 0 : mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpIRR));
2358 0 : break;
2359 : case ocMIRR:
2360 0 : mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpMIRR));
2361 0 : break;
2362 : case ocRMZ:
2363 0 : mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpPMT));
2364 0 : break;
2365 : case ocZins:
2366 0 : mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpIntrate));
2367 0 : break;
2368 : case ocZGZ:
2369 0 : mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpRRI));
2370 0 : break;
2371 : case ocKapz:
2372 0 : mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpPPMT));
2373 0 : break;
2374 : case ocFisher:
2375 0 : mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpFisher));
2376 0 : break;
2377 : case ocFisherInv:
2378 0 : mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpFisherInv));
2379 0 : break;
2380 : case ocGamma:
2381 0 : mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpGamma));
2382 0 : break;
2383 : case ocLIA:
2384 0 : mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpSLN));
2385 0 : break;
2386 : case ocGammaLn:
2387 0 : mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpGammaLn));
2388 0 : break;
2389 : case ocGauss:
2390 0 : mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpGauss));
2391 0 : break;
2392 : /*case ocGeoMean:
2393 : mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpGeoMean));
2394 : break;*/
2395 : case ocHarMean:
2396 0 : mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpHarMean));
2397 0 : break;
2398 : case ocLessEqual:
2399 0 : mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpLessEqual));
2400 0 : break;
2401 : case ocLess:
2402 0 : mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpLess));
2403 0 : break;
2404 : case ocEqual:
2405 0 : mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpEqual));
2406 0 : break;
2407 : case ocGreater:
2408 0 : mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpGreater));
2409 0 : break;
2410 : case ocDIA:
2411 0 : mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpSYD));
2412 0 : break;
2413 : case ocCorrel:
2414 0 : mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpCorrel));
2415 0 : break;
2416 : case ocCos:
2417 0 : mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpCos));
2418 0 : break;
2419 : case ocNegBinomVert :
2420 0 : mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpNegbinomdist));
2421 0 : break;
2422 : case ocPearson:
2423 0 : mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpPearson));
2424 0 : break;
2425 : case ocRSQ:
2426 0 : mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpRsq));
2427 0 : break;
2428 : case ocCosecant:
2429 0 : mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpCsc));
2430 0 : break;
2431 : case ocISPMT:
2432 0 : mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpISPMT));
2433 0 : break;
2434 : case ocLaufz:
2435 : mvSubArguments.push_back(SoPHelper(ts,
2436 0 : ft->Children[i], new OpDuration));
2437 0 : break;
2438 : case ocSinHyp:
2439 : mvSubArguments.push_back(SoPHelper(ts,
2440 0 : ft->Children[i], new OpSinh));
2441 0 : break;
2442 : case ocAbs:
2443 : mvSubArguments.push_back(SoPHelper(ts,
2444 0 : ft->Children[i], new OpAbs));
2445 0 : break;
2446 : case ocBW:
2447 : mvSubArguments.push_back(SoPHelper(ts,
2448 0 : ft->Children[i], new OpPV));
2449 0 : break;
2450 : case ocSin:
2451 : mvSubArguments.push_back(SoPHelper(ts,
2452 0 : ft->Children[i], new OpSin));
2453 0 : break;
2454 : case ocTan:
2455 : mvSubArguments.push_back(SoPHelper(ts,
2456 0 : ft->Children[i], new OpTan));
2457 0 : break;
2458 : case ocTanHyp:
2459 : mvSubArguments.push_back(SoPHelper(ts,
2460 0 : ft->Children[i], new OpTanH));
2461 0 : break;
2462 : case ocStandard:
2463 : mvSubArguments.push_back(SoPHelper(ts,
2464 0 : ft->Children[i], new OpStandard));
2465 0 : break;
2466 : case ocWeibull:
2467 : mvSubArguments.push_back(SoPHelper(ts,
2468 0 : ft->Children[i], new OpWeibull));
2469 0 : break;
2470 : /*case ocMedian:
2471 : mvSubArguments.push_back(SoPHelper(ts,
2472 : ft->Children[i],new OpMedian));
2473 : break;*/
2474 : case ocGDA:
2475 : mvSubArguments.push_back(SoPHelper(ts,
2476 0 : ft->Children[i], new OpDDB));
2477 0 : break;
2478 : case ocZW:
2479 : mvSubArguments.push_back(SoPHelper(ts,
2480 0 : ft->Children[i], new OpFV));
2481 0 : break;
2482 : case ocSumIfs:
2483 : mvSubArguments.push_back(SoPHelper(ts,
2484 0 : ft->Children[i], new OpSumIfs));
2485 0 : break;
2486 : /*case ocVBD:
2487 : mvSubArguments.push_back(SoPHelper(ts,
2488 : ft->Children[i],new OpVDB));
2489 : break;*/
2490 : case ocKurt:
2491 : mvSubArguments.push_back(SoPHelper(ts,
2492 0 : ft->Children[i], new OpKurt));
2493 0 : break;
2494 : /*case ocZZR:
2495 : mvSubArguments.push_back(SoPHelper(ts,
2496 : ft->Children[i], new OpNper));
2497 : break;*/
2498 : case ocNormDist:
2499 : mvSubArguments.push_back(SoPHelper(ts,
2500 0 : ft->Children[i], new OpNormdist));
2501 0 : break;
2502 : case ocArcCos:
2503 : mvSubArguments.push_back(SoPHelper(ts,
2504 0 : ft->Children[i], new OpArcCos));
2505 0 : break;
2506 : case ocSqrt:
2507 : mvSubArguments.push_back(SoPHelper(ts,
2508 0 : ft->Children[i], new OpSqrt));
2509 0 : break;
2510 : case ocArcCosHyp:
2511 : mvSubArguments.push_back(SoPHelper(ts,
2512 0 : ft->Children[i], new OpArcCosHyp));
2513 0 : break;
2514 : case ocNPV:
2515 : mvSubArguments.push_back(SoPHelper(ts,
2516 0 : ft->Children[i], new OpNPV));
2517 0 : break;
2518 : case ocStdNormDist:
2519 : mvSubArguments.push_back(SoPHelper(ts,
2520 0 : ft->Children[i], new OpNormsdist));
2521 0 : break;
2522 : case ocNormInv:
2523 : mvSubArguments.push_back(SoPHelper(ts,
2524 0 : ft->Children[i], new OpNorminv));
2525 0 : break;
2526 : case ocSNormInv:
2527 : mvSubArguments.push_back(SoPHelper(ts,
2528 0 : ft->Children[i], new OpNormsinv));
2529 0 : break;
2530 : case ocVariationen:
2531 : mvSubArguments.push_back(SoPHelper(ts,
2532 0 : ft->Children[i], new OpVariationen));
2533 0 : break;
2534 : case ocVariationen2:
2535 : mvSubArguments.push_back(SoPHelper(ts,
2536 0 : ft->Children[i], new OpVariationen2));
2537 0 : break;
2538 : case ocPhi:
2539 : mvSubArguments.push_back(SoPHelper(ts,
2540 0 : ft->Children[i], new OpPhi));
2541 0 : break;
2542 : case ocZinsZ:
2543 : mvSubArguments.push_back(SoPHelper(ts,
2544 0 : ft->Children[i], new OpIPMT));
2545 0 : break;
2546 : case ocConfidence:
2547 : mvSubArguments.push_back(SoPHelper(ts,
2548 0 : ft->Children[i], new OpConfidence));
2549 0 : break;
2550 : case ocIntercept:
2551 : mvSubArguments.push_back(SoPHelper(ts,
2552 0 : ft->Children[i], new OpIntercept));
2553 0 : break;
2554 : case ocGDA2:
2555 0 : mvSubArguments.push_back(SoPHelper(ts, ft->Children[i],
2556 0 : new OpDB));
2557 0 : break;
2558 : case ocLogInv:
2559 : mvSubArguments.push_back(SoPHelper(ts,
2560 0 : ft->Children[i], new OpLogInv));
2561 0 : break;
2562 : case ocArcCot:
2563 : mvSubArguments.push_back(SoPHelper(ts,
2564 0 : ft->Children[i], new OpArcCot));
2565 0 : break;
2566 : case ocCosHyp:
2567 : mvSubArguments.push_back(SoPHelper(ts,
2568 0 : ft->Children[i], new OpCosh));
2569 0 : break;
2570 : case ocKritBinom:
2571 : mvSubArguments.push_back(SoPHelper(ts,
2572 0 : ft->Children[i], new OpCritBinom));
2573 0 : break;
2574 : case ocArcCotHyp:
2575 : mvSubArguments.push_back(SoPHelper(ts,
2576 0 : ft->Children[i], new OpArcCotHyp));
2577 0 : break;
2578 : case ocArcSin:
2579 : mvSubArguments.push_back(SoPHelper(ts,
2580 0 : ft->Children[i], new OpArcSin));
2581 0 : break;
2582 : case ocArcSinHyp:
2583 : mvSubArguments.push_back(SoPHelper(ts,
2584 0 : ft->Children[i], new OpArcSinHyp));
2585 0 : break;
2586 : case ocArcTan:
2587 : mvSubArguments.push_back(SoPHelper(ts,
2588 0 : ft->Children[i], new OpArcTan));
2589 0 : break;
2590 : case ocArcTanHyp:
2591 : mvSubArguments.push_back(SoPHelper(ts,
2592 0 : ft->Children[i], new OpArcTanH));
2593 0 : break;
2594 : case ocBitAnd:
2595 : mvSubArguments.push_back(SoPHelper(ts,
2596 0 : ft->Children[i], new OpBitAnd));
2597 0 : break;
2598 : case ocForecast:
2599 : mvSubArguments.push_back(SoPHelper(ts,
2600 0 : ft->Children[i], new OpForecast));
2601 0 : break;
2602 : case ocLogNormDist:
2603 : mvSubArguments.push_back(SoPHelper(ts,
2604 0 : ft->Children[i], new OpLogNormDist));
2605 0 : break;
2606 : /*case ocGammaDist:
2607 : mvSubArguments.push_back(SoPHelper(ts,
2608 : ft->Children[i], new OpGammaDist));
2609 : break;*/
2610 : case ocLn:
2611 : mvSubArguments.push_back(SoPHelper(ts,
2612 0 : ft->Children[i], new OpLn));
2613 0 : break;
2614 : case ocRound:
2615 : mvSubArguments.push_back(SoPHelper(ts,
2616 0 : ft->Children[i], new OpRound));
2617 0 : break;
2618 : case ocCot:
2619 : mvSubArguments.push_back(SoPHelper(ts,
2620 0 : ft->Children[i], new OpCot));
2621 0 : break;
2622 : case ocCotHyp:
2623 : mvSubArguments.push_back(SoPHelper(ts,
2624 0 : ft->Children[i], new OpCoth));
2625 0 : break;
2626 : case ocFDist:
2627 : mvSubArguments.push_back(SoPHelper(ts,
2628 0 : ft->Children[i], new OpFdist));
2629 0 : break;
2630 : case ocVar:
2631 : mvSubArguments.push_back(SoPHelper(ts,
2632 0 : ft->Children[i], new OpVar));
2633 0 : break;
2634 : /*case ocChiDist:
2635 : mvSubArguments.push_back(SoPHelper(ts,
2636 : ft->Children[i],new OpChiDist));
2637 : break;*/
2638 : case ocPow:
2639 : case ocPower:
2640 : mvSubArguments.push_back(SoPHelper(ts,
2641 0 : ft->Children[i], new OpPower));
2642 0 : break;
2643 : case ocOdd:
2644 : mvSubArguments.push_back(SoPHelper(ts,
2645 0 : ft->Children[i], new OpOdd));
2646 0 : break;
2647 : /*case ocChiSqDist:
2648 : mvSubArguments.push_back(SoPHelper(ts,
2649 : ft->Children[i],new OpChiSqDist));
2650 : break;
2651 : case ocChiSqInv:
2652 : mvSubArguments.push_back(SoPHelper(ts,
2653 : ft->Children[i],new OpChiSqInv));
2654 : break;
2655 : case ocGammaInv:
2656 : mvSubArguments.push_back(SoPHelper(ts,
2657 : ft->Children[i], new OpGammaInv));
2658 : break;*/
2659 : case ocFloor:
2660 : mvSubArguments.push_back(SoPHelper(ts,
2661 0 : ft->Children[i], new OpFloor));
2662 0 : break;
2663 : /*case ocFInv:
2664 : mvSubArguments.push_back(SoPHelper(ts,
2665 : ft->Children[i], new OpFInv));
2666 : break;*/
2667 : case ocFTest:
2668 : mvSubArguments.push_back(SoPHelper(ts,
2669 0 : ft->Children[i], new OpFTest));
2670 0 : break;
2671 : case ocB:
2672 : mvSubArguments.push_back(SoPHelper(ts,
2673 0 : ft->Children[i], new OpB));
2674 0 : break;
2675 : case ocBetaDist:
2676 : mvSubArguments.push_back(SoPHelper(ts,
2677 0 : ft->Children[i], new OpBetaDist));
2678 0 : break;
2679 : case ocCosecantHyp:
2680 : mvSubArguments.push_back(SoPHelper(ts,
2681 0 : ft->Children[i], new OpCscH));
2682 0 : break;
2683 : case ocExp:
2684 : mvSubArguments.push_back(SoPHelper(ts,
2685 0 : ft->Children[i], new OpExp));
2686 0 : break;
2687 : case ocLog10:
2688 : mvSubArguments.push_back(SoPHelper(ts,
2689 0 : ft->Children[i], new OpLog10));
2690 0 : break;
2691 : case ocExpDist:
2692 : mvSubArguments.push_back(SoPHelper(ts,
2693 0 : ft->Children[i], new OpExponDist));
2694 0 : break;
2695 : case ocAverageIfs:
2696 : mvSubArguments.push_back(SoPHelper(ts,
2697 0 : ft->Children[i], new OpAverageIfs));
2698 0 : break;
2699 : case ocCountIfs:
2700 : mvSubArguments.push_back(SoPHelper(ts,
2701 0 : ft->Children[i], new OpCountIfs));
2702 0 : break;
2703 : case ocKombin2:
2704 : mvSubArguments.push_back(SoPHelper(ts,
2705 0 : ft->Children[i], new OpCombina));
2706 0 : break;
2707 : case ocEven:
2708 : mvSubArguments.push_back(SoPHelper(ts,
2709 0 : ft->Children[i], new OpEven));
2710 0 : break;
2711 : case ocLog:
2712 : mvSubArguments.push_back(SoPHelper(ts,
2713 0 : ft->Children[i], new OpLog));
2714 0 : break;
2715 : case ocMod:
2716 : mvSubArguments.push_back(SoPHelper(ts,
2717 0 : ft->Children[i], new OpMod));
2718 0 : break;
2719 : case ocTrunc:
2720 : mvSubArguments.push_back(SoPHelper(ts,
2721 0 : ft->Children[i], new OpTrunc));
2722 0 : break;
2723 : case ocSchiefe:
2724 : mvSubArguments.push_back(SoPHelper(ts,
2725 0 : ft->Children[i], new OpSkew));
2726 0 : break;
2727 : case ocArcTan2:
2728 : mvSubArguments.push_back(SoPHelper(ts,
2729 0 : ft->Children[i], new OpArcTan2));
2730 0 : break;
2731 : case ocBitOr:
2732 : mvSubArguments.push_back(SoPHelper(ts,
2733 0 : ft->Children[i], new OpBitOr));
2734 0 : break;
2735 : case ocBitLshift:
2736 : mvSubArguments.push_back(SoPHelper(ts,
2737 0 : ft->Children[i], new OpBitLshift));
2738 0 : break;
2739 : case ocBitRshift:
2740 : mvSubArguments.push_back(SoPHelper(ts,
2741 0 : ft->Children[i], new OpBitRshift));
2742 0 : break;
2743 : case ocBitXor:
2744 : mvSubArguments.push_back(SoPHelper(ts,
2745 0 : ft->Children[i], new OpBitXor));
2746 0 : break;
2747 : /*case ocChiInv:
2748 : mvSubArguments.push_back(SoPHelper(ts,
2749 : ft->Children[i],new OpChiInv));
2750 : break;*/
2751 : case ocPoissonDist:
2752 : mvSubArguments.push_back(SoPHelper(ts,
2753 0 : ft->Children[i], new OpPoisson));
2754 0 : break;
2755 : case ocSumSQ:
2756 : mvSubArguments.push_back(SoPHelper(ts,
2757 0 : ft->Children[i], new OpSumSQ));
2758 0 : break;
2759 : case ocSkewp:
2760 : mvSubArguments.push_back(SoPHelper(ts,
2761 0 : ft->Children[i], new OpSkewp));
2762 0 : break;
2763 : case ocBinomDist:
2764 : mvSubArguments.push_back(SoPHelper(ts,
2765 0 : ft->Children[i], new OpBinomdist));
2766 0 : break;
2767 : case ocVarP:
2768 : mvSubArguments.push_back(SoPHelper(ts,
2769 0 : ft->Children[i], new OpVarP));
2770 0 : break;
2771 : case ocCeil:
2772 : mvSubArguments.push_back(SoPHelper(ts,
2773 0 : ft->Children[i], new OpCeil));
2774 0 : break;
2775 : case ocKombin:
2776 : mvSubArguments.push_back(SoPHelper(ts,
2777 0 : ft->Children[i], new OpKombin));
2778 0 : break;
2779 : case ocDevSq:
2780 : mvSubArguments.push_back(SoPHelper(ts,
2781 0 : ft->Children[i], new OpDevSq));
2782 0 : break;
2783 : case ocStDev:
2784 : mvSubArguments.push_back(SoPHelper(ts,
2785 0 : ft->Children[i], new OpStDev));
2786 0 : break;
2787 : case ocSlope:
2788 : mvSubArguments.push_back(SoPHelper(ts,
2789 0 : ft->Children[i], new OpSlope));
2790 0 : break;
2791 : case ocSTEYX:
2792 : mvSubArguments.push_back(SoPHelper(ts,
2793 0 : ft->Children[i], new OpSTEYX));
2794 0 : break;
2795 : case ocZTest:
2796 : mvSubArguments.push_back(SoPHelper(ts,
2797 0 : ft->Children[i], new OpZTest));
2798 0 : break;
2799 : case ocPi:
2800 : mvSubArguments.push_back(
2801 : SubArgument(new DynamicKernelPiArgument(ts,
2802 0 : ft->Children[i])));
2803 0 : break;
2804 : case ocRandom:
2805 : mvSubArguments.push_back(
2806 : SubArgument(new DynamicKernelRandomArgument(ts,
2807 0 : ft->Children[i])));
2808 0 : break;
2809 : case ocProduct:
2810 : mvSubArguments.push_back(SoPHelper(ts,
2811 0 : ft->Children[i], new OpProduct));
2812 0 : break;
2813 : /*case ocHypGeomDist:
2814 : mvSubArguments.push_back(SoPHelper(ts,
2815 : ft->Children[i],new OpHypGeomDist));
2816 : break;*/
2817 : case ocSumX2MY2:
2818 : mvSubArguments.push_back(SoPHelper(ts,
2819 0 : ft->Children[i], new OpSumX2MY2));
2820 0 : break;
2821 : case ocSumX2DY2:
2822 : mvSubArguments.push_back(SoPHelper(ts,
2823 0 : ft->Children[i], new OpSumX2PY2));
2824 0 : break;
2825 : /*case ocBetaInv:
2826 : mvSubArguments.push_back(SoPHelper(ts,
2827 : ft->Children[i],new OpBetainv));
2828 : break;*/
2829 : case ocTTest:
2830 : mvSubArguments.push_back(SoPHelper(ts,
2831 0 : ft->Children[i], new OpTTest));
2832 0 : break;
2833 : case ocTDist:
2834 : mvSubArguments.push_back(SoPHelper(ts,
2835 0 : ft->Children[i], new OpTDist));
2836 0 : break;
2837 : /*case ocTInv:
2838 : mvSubArguments.push_back(SoPHelper(ts,
2839 : ft->Children[i], new OpTInv));
2840 : break;*/
2841 : case ocSumXMY2:
2842 : mvSubArguments.push_back(SoPHelper(ts,
2843 0 : ft->Children[i], new OpSumXMY2));
2844 0 : break;
2845 : case ocStDevP:
2846 : mvSubArguments.push_back(SoPHelper(ts,
2847 0 : ft->Children[i], new OpStDevP));
2848 0 : break;
2849 : case ocCovar:
2850 : mvSubArguments.push_back(SoPHelper(ts,
2851 0 : ft->Children[i], new OpCovar));
2852 0 : break;
2853 : case ocAnd:
2854 : mvSubArguments.push_back(SoPHelper(ts,
2855 0 : ft->Children[i], new OpAnd));
2856 0 : break;
2857 : case ocVLookup:
2858 : mvSubArguments.push_back(SoPHelper(ts,
2859 0 : ft->Children[i], new OpVLookup));
2860 0 : break;
2861 : case ocOr:
2862 : mvSubArguments.push_back(SoPHelper(ts,
2863 0 : ft->Children[i], new OpOr));
2864 0 : break;
2865 : case ocNot:
2866 : mvSubArguments.push_back(SoPHelper(ts,
2867 0 : ft->Children[i], new OpNot));
2868 0 : break;
2869 : case ocXor:
2870 : mvSubArguments.push_back(SoPHelper(ts,
2871 0 : ft->Children[i], new OpXor));
2872 0 : break;
2873 : case ocDBMax:
2874 : mvSubArguments.push_back(SoPHelper(ts,
2875 0 : ft->Children[i], new OpDmax));
2876 0 : break;
2877 : case ocDBMin:
2878 : mvSubArguments.push_back(SoPHelper(ts,
2879 0 : ft->Children[i], new OpDmin));
2880 0 : break;
2881 : case ocDBProduct:
2882 : mvSubArguments.push_back(SoPHelper(ts,
2883 0 : ft->Children[i], new OpDproduct));
2884 0 : break;
2885 : case ocDBAverage:
2886 : mvSubArguments.push_back(SoPHelper(ts,
2887 0 : ft->Children[i], new OpDaverage));
2888 0 : break;
2889 : case ocDBStdDev:
2890 : mvSubArguments.push_back(SoPHelper(ts,
2891 0 : ft->Children[i], new OpDstdev));
2892 0 : break;
2893 : case ocDBStdDevP:
2894 : mvSubArguments.push_back(SoPHelper(ts,
2895 0 : ft->Children[i], new OpDstdevp));
2896 0 : break;
2897 : case ocDBSum:
2898 : mvSubArguments.push_back(SoPHelper(ts,
2899 0 : ft->Children[i], new OpDsum));
2900 0 : break;
2901 : case ocDBVar:
2902 : mvSubArguments.push_back(SoPHelper(ts,
2903 0 : ft->Children[i], new OpDvar));
2904 0 : break;
2905 : case ocDBVarP:
2906 : mvSubArguments.push_back(SoPHelper(ts,
2907 0 : ft->Children[i], new OpDvarp));
2908 0 : break;
2909 : case ocAverageIf:
2910 : mvSubArguments.push_back(SoPHelper(ts,
2911 0 : ft->Children[i], new OpAverageIf));
2912 0 : break;
2913 : case ocDBCount:
2914 : mvSubArguments.push_back(SoPHelper(ts,
2915 0 : ft->Children[i], new OpDcount));
2916 0 : break;
2917 : case ocDBCount2:
2918 : mvSubArguments.push_back(SoPHelper(ts,
2919 0 : ft->Children[i], new OpDcount2));
2920 0 : break;
2921 : case ocDeg:
2922 : mvSubArguments.push_back(SoPHelper(ts,
2923 0 : ft->Children[i], new OpDeg));
2924 0 : break;
2925 : case ocRoundUp:
2926 : mvSubArguments.push_back(SoPHelper(ts,
2927 0 : ft->Children[i], new OpRoundUp));
2928 0 : break;
2929 : case ocRoundDown:
2930 : mvSubArguments.push_back(SoPHelper(ts,
2931 0 : ft->Children[i], new OpRoundDown));
2932 0 : break;
2933 : case ocInt:
2934 : mvSubArguments.push_back(SoPHelper(ts,
2935 0 : ft->Children[i], new OpInt));
2936 0 : break;
2937 : case ocRad:
2938 : mvSubArguments.push_back(SoPHelper(ts,
2939 0 : ft->Children[i], new OpRadians));
2940 0 : break;
2941 : case ocCountIf:
2942 : mvSubArguments.push_back(SoPHelper(ts,
2943 0 : ft->Children[i], new OpCountIf));
2944 0 : break;
2945 : case ocIsEven:
2946 : mvSubArguments.push_back(SoPHelper(ts,
2947 0 : ft->Children[i], new OpIsEven));
2948 0 : break;
2949 : case ocIsOdd:
2950 : mvSubArguments.push_back(SoPHelper(ts,
2951 0 : ft->Children[i], new OpIsOdd));
2952 0 : break;
2953 : case ocFact:
2954 : mvSubArguments.push_back(SoPHelper(ts,
2955 0 : ft->Children[i], new OpFact));
2956 0 : break;
2957 : case ocMinA:
2958 : mvSubArguments.push_back(SoPHelper(ts,
2959 0 : ft->Children[i], new OpMinA));
2960 0 : break;
2961 : case ocCount2:
2962 : mvSubArguments.push_back(SoPHelper(ts,
2963 0 : ft->Children[i], new OpCountA));
2964 0 : break;
2965 : case ocMaxA:
2966 : mvSubArguments.push_back(SoPHelper(ts,
2967 0 : ft->Children[i], new OpMaxA));
2968 0 : break;
2969 : case ocAverageA:
2970 : mvSubArguments.push_back(SoPHelper(ts,
2971 0 : ft->Children[i], new OpAverageA));
2972 0 : break;
2973 : case ocVarA:
2974 : mvSubArguments.push_back(SoPHelper(ts,
2975 0 : ft->Children[i], new OpVarA));
2976 0 : break;
2977 : case ocVarPA:
2978 : mvSubArguments.push_back(SoPHelper(ts,
2979 0 : ft->Children[i], new OpVarPA));
2980 0 : break;
2981 : case ocStDevA:
2982 : mvSubArguments.push_back(SoPHelper(ts,
2983 0 : ft->Children[i], new OpStDevA));
2984 0 : break;
2985 : case ocStDevPA:
2986 : mvSubArguments.push_back(SoPHelper(ts,
2987 0 : ft->Children[i], new OpStDevPA));
2988 0 : break;
2989 : case ocSecant:
2990 : mvSubArguments.push_back(SoPHelper(ts,
2991 0 : ft->Children[i], new OpSec));
2992 0 : break;
2993 : case ocSecantHyp:
2994 : mvSubArguments.push_back(SoPHelper(ts,
2995 0 : ft->Children[i], new OpSecH));
2996 0 : break;
2997 : case ocSumIf:
2998 : mvSubArguments.push_back(SoPHelper(ts,
2999 0 : ft->Children[i], new OpSumIf));
3000 0 : break;
3001 : case ocNegSub:
3002 : mvSubArguments.push_back(SoPHelper(ts,
3003 0 : ft->Children[i], new OpNegSub));
3004 0 : break;
3005 : case ocAveDev:
3006 : mvSubArguments.push_back(SoPHelper(ts,
3007 0 : ft->Children[i], new OpAveDev));
3008 0 : break;
3009 : case ocIf:
3010 : mvSubArguments.push_back(SoPHelper(ts,
3011 0 : ft->Children[i], new OpIf));
3012 0 : break;
3013 : case ocExternal:
3014 0 : if (!(pChild->GetExternal().compareTo(OUString(
3015 0 : "com.sun.star.sheet.addin.Analysis.getEffect"))))
3016 : {
3017 0 : mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpEffective));
3018 : }
3019 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3020 0 : "com.sun.star.sheet.addin.Analysis.getCumipmt"))))
3021 : {
3022 0 : mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpCumipmt));
3023 : }
3024 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3025 0 : "com.sun.star.sheet.addin.Analysis.getNominal"))))
3026 : {
3027 0 : mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpNominal));
3028 : }
3029 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3030 0 : "com.sun.star.sheet.addin.Analysis.getCumprinc"))))
3031 : {
3032 0 : mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpCumprinc));
3033 : }
3034 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3035 0 : "com.sun.star.sheet.addin.Analysis.getXnpv"))))
3036 : {
3037 0 : mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpXNPV));
3038 : }
3039 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3040 0 : "com.sun.star.sheet.addin.Analysis.getPricemat"))))
3041 : {
3042 0 : mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpPriceMat));
3043 : }
3044 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3045 0 : "com.sun.star.sheet.addin.Analysis.getReceived"))))
3046 : {
3047 0 : mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpReceived));
3048 : }
3049 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3050 0 : "com.sun.star.sheet.addin.Analysis.getTbilleq"))))
3051 : {
3052 0 : mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpTbilleq));
3053 : }
3054 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3055 0 : "com.sun.star.sheet.addin.Analysis.getTbillprice"))))
3056 : {
3057 0 : mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpTbillprice));
3058 : }
3059 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3060 0 : "com.sun.star.sheet.addin.Analysis.getTbillyield"))))
3061 : {
3062 0 : mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpTbillyield));
3063 : }
3064 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3065 0 : "com.sun.star.sheet.addin.Analysis.getFvschedule"))))
3066 : {
3067 0 : mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpFvschedule));
3068 : }
3069 : /*else if ( !(pChild->GetExternal().compareTo(OUString(
3070 : "com.sun.star.sheet.addin.Analysis.getYield"))))
3071 : {
3072 : mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpYield));
3073 : }*/
3074 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3075 0 : "com.sun.star.sheet.addin.Analysis.getYielddisc"))))
3076 : {
3077 0 : mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpYielddisc));
3078 : }
3079 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3080 0 : "com.sun.star.sheet.addin.Analysis.getYieldmat"))))
3081 : {
3082 0 : mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpYieldmat));
3083 : }
3084 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3085 0 : "com.sun.star.sheet.addin.Analysis.getAccrintm"))))
3086 : {
3087 0 : mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpAccrintm));
3088 : }
3089 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3090 0 : "com.sun.star.sheet.addin.Analysis.getCoupdaybs"))))
3091 : {
3092 0 : mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpCoupdaybs));
3093 : }
3094 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3095 0 : "com.sun.star.sheet.addin.Analysis.getDollarde"))))
3096 : {
3097 0 : mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpDollarde));
3098 : }
3099 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3100 0 : "com.sun.star.sheet.addin.Analysis.getDollarfr"))))
3101 : {
3102 0 : mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpDollarfr));
3103 : }
3104 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3105 0 : "com.sun.star.sheet.addin.Analysis.getCoupdays"))))
3106 : {
3107 0 : mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpCoupdays));
3108 : }
3109 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3110 0 : "com.sun.star.sheet.addin.Analysis.getCoupdaysnc"))))
3111 : {
3112 0 : mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpCoupdaysnc));
3113 : }
3114 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3115 0 : "com.sun.star.sheet.addin.Analysis.getDisc"))))
3116 : {
3117 0 : mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpDISC));
3118 : }
3119 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3120 0 : "com.sun.star.sheet.addin.Analysis.getIntrate"))))
3121 : {
3122 0 : mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpINTRATE));
3123 : }
3124 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3125 0 : "com.sun.star.sheet.addin.Analysis.getPrice"))))
3126 : {
3127 : mvSubArguments.push_back(SoPHelper(ts,
3128 0 : ft->Children[i], new OpPrice));
3129 : }
3130 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3131 0 : "com.sun.star.sheet.addin.Analysis.getCoupnum"))))
3132 : {
3133 0 : mvSubArguments.push_back(SoPHelper(ts, ft->Children[i],
3134 0 : new OpCoupnum));
3135 : }
3136 : /*else if ( !(pChild->GetExternal().compareTo(OUString(
3137 : "com.sun.star.sheet.addin.Analysis.getDuration"))))
3138 : {
3139 : mvSubArguments.push_back(
3140 : SoPHelper(ts, ft->Children[i], new OpDuration_ADD));
3141 : }*/
3142 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3143 0 : "com.sun.star.sheet.addin.Analysis.getAmordegrc"))))
3144 : {
3145 0 : mvSubArguments.push_back(SoPHelper(ts, ft->Children[i],
3146 0 : new OpAmordegrc));
3147 : }
3148 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3149 0 : "com.sun.star.sheet.addin.Analysis.getAmorlinc"))))
3150 : {
3151 0 : mvSubArguments.push_back(SoPHelper(ts, ft->Children[i],
3152 0 : new OpAmorlinc));
3153 : }
3154 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3155 0 : "com.sun.star.sheet.addin.Analysis.getMduration"))))
3156 : {
3157 0 : mvSubArguments.push_back(SoPHelper(ts, ft->Children[i],
3158 0 : new OpMDuration));
3159 : }
3160 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3161 0 : "com.sun.star.sheet.addin.Analysis.getXirr"))))
3162 : {
3163 0 : mvSubArguments.push_back(SoPHelper(ts, ft->Children[i],
3164 0 : new OpXirr));
3165 : }
3166 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3167 0 : "com.sun.star.sheet.addin.Analysis.getOddlprice"))))
3168 : {
3169 : mvSubArguments.push_back(SoPHelper(ts,
3170 0 : ft->Children[i], new OpOddlprice));
3171 : }
3172 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3173 0 : "com.sun.star.sheet.addin.Analysis.getOddlyield"))))
3174 : {
3175 0 : mvSubArguments.push_back(SoPHelper(ts, ft->Children[i],
3176 0 : new OpOddlyield));
3177 : }
3178 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3179 0 : "com.sun.star.sheet.addin.Analysis.getPricedisc"))))
3180 : {
3181 : mvSubArguments.push_back(SoPHelper(ts,
3182 0 : ft->Children[i], new OpPriceDisc));
3183 : }
3184 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3185 0 : "com.sun.star.sheet.addin.Analysis.getCouppcd"))))
3186 : {
3187 0 : mvSubArguments.push_back(SoPHelper(ts, ft->Children[i],
3188 0 : new OpCouppcd));
3189 : }
3190 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3191 0 : "com.sun.star.sheet.addin.Analysis.getCoupncd"))))
3192 : {
3193 0 : mvSubArguments.push_back(SoPHelper(ts, ft->Children[i],
3194 0 : new OpCoupncd));
3195 : }
3196 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3197 0 : "com.sun.star.sheet.addin.Analysis.getAccrint"))))
3198 : {
3199 0 : mvSubArguments.push_back(SoPHelper(ts, ft->Children[i],
3200 0 : new OpAccrint));
3201 : }
3202 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3203 0 : "com.sun.star.sheet.addin.Analysis.getSqrtpi"))))
3204 : {
3205 0 : mvSubArguments.push_back(SoPHelper(ts, ft->Children[i],
3206 0 : new OpSqrtPi));
3207 : }
3208 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3209 0 : "com.sun.star.sheet.addin.Analysis.getConvert"))))
3210 : {
3211 0 : mvSubArguments.push_back(SoPHelper(ts, ft->Children[i],
3212 0 : new OpConvert));
3213 : }
3214 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3215 0 : "com.sun.star.sheet.addin.Analysis.getIseven"))))
3216 : {
3217 0 : mvSubArguments.push_back(SoPHelper(ts, ft->Children[i],
3218 0 : new OpIsEven));
3219 : }
3220 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3221 0 : "com.sun.star.sheet.addin.Analysis.getIsodd"))))
3222 : {
3223 0 : mvSubArguments.push_back(SoPHelper(ts, ft->Children[i],
3224 0 : new OpIsOdd));
3225 : }
3226 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3227 0 : "com.sun.star.sheet.addin.Analysis.getMround"))))
3228 : {
3229 0 : mvSubArguments.push_back(SoPHelper(ts, ft->Children[i],
3230 0 : new OpMROUND));
3231 : }
3232 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3233 0 : "com.sun.star.sheet.addin.Analysis.getQuotient"))))
3234 : {
3235 0 : mvSubArguments.push_back(SoPHelper(ts, ft->Children[i],
3236 0 : new OpQuotient));
3237 : }
3238 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3239 0 : "com.sun.star.sheet.addin.Analysis.getSeriessum"))))
3240 : {
3241 0 : mvSubArguments.push_back(SoPHelper(ts, ft->Children[i],
3242 0 : new OpSeriesSum));
3243 : }
3244 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3245 0 : "com.sun.star.sheet.addin.Analysis.getBesselj"))))
3246 : {
3247 0 : mvSubArguments.push_back(SoPHelper(ts, ft->Children[i],
3248 0 : new OpBesselj));
3249 : }
3250 0 : else if (!(pChild->GetExternal().compareTo(OUString(
3251 0 : "com.sun.star.sheet.addin.Analysis.getGestep"))))
3252 : {
3253 0 : mvSubArguments.push_back(SoPHelper(ts, ft->Children[i],
3254 0 : new OpGestep));
3255 : }
3256 : else
3257 0 : throw UnhandledToken(pChild, "unhandled opcode");
3258 0 : break;
3259 :
3260 : default:
3261 0 : throw UnhandledToken(pChild, "unhandled opcode");
3262 : };
3263 0 : }
3264 0 : }
3265 :
3266 : class DynamicKernel : public CompiledFormula
3267 : {
3268 : public:
3269 0 : DynamicKernel( FormulaTreeNodeRef r ) : mpRoot(r),
3270 0 : mpProgram(NULL), mpKernel(NULL), mpResClmem(NULL) { }
3271 : static DynamicKernel* create( ScDocument& rDoc,
3272 : const ScAddress& rTopPos,
3273 : ScTokenArray& rCode );
3274 : /// OpenCL code generation
3275 0 : void CodeGen()
3276 : {
3277 : // Travese the tree of expression and declare symbols used
3278 : const DynamicKernelArgument* DK = mSyms.DeclRefArg<
3279 0 : DynamicKernelSoPArguments>(mpRoot, new OpNop);
3280 :
3281 0 : std::stringstream decl;
3282 0 : if (OpenCLDevice::gpuEnv.mnKhrFp64Flag)
3283 : {
3284 0 : decl << "#pragma OPENCL EXTENSION cl_khr_fp64: enable\n";
3285 : }
3286 0 : else if (OpenCLDevice::gpuEnv.mnAmdFp64Flag)
3287 : {
3288 0 : decl << "#pragma OPENCL EXTENSION cl_amd_fp64: enable\n";
3289 : }
3290 : // preambles
3291 0 : decl << publicFunc;
3292 0 : DK->DumpInlineFun(inlineDecl, inlineFun);
3293 0 : for (std::set<std::string>::iterator set_iter = inlineDecl.begin();
3294 0 : set_iter != inlineDecl.end(); ++set_iter)
3295 : {
3296 0 : decl << *set_iter;
3297 : }
3298 :
3299 0 : for (std::set<std::string>::iterator set_iter = inlineFun.begin();
3300 0 : set_iter != inlineFun.end(); ++set_iter)
3301 : {
3302 0 : decl << *set_iter;
3303 : }
3304 0 : mSyms.DumpSlidingWindowFunctions(decl);
3305 0 : mKernelSignature = DK->DumpOpName();
3306 0 : decl << "__kernel void DynamicKernel" << mKernelSignature;
3307 0 : decl << "(__global double *result, ";
3308 0 : DK->GenSlidingWindowDecl(decl);
3309 0 : decl << ") {\n\tint gid0 = get_global_id(0);\n\tresult[gid0] = " <<
3310 0 : DK->GenSlidingWindowDeclRef(false) << ";\n}\n";
3311 0 : mFullProgramSrc = decl.str();
3312 : #ifdef SAL_DETAIL_ENABLE_LOG_INFO
3313 0 : std::stringstream area;
3314 0 : if (mKernelSignature[0] == '_')
3315 0 : area << "sc.opencl.source." << mKernelSignature.substr(1, std::string::npos);
3316 : else
3317 0 : area << "sc.opencl.source." << mKernelSignature;
3318 0 : SAL_INFO(area.str().c_str(), "Program to be compiled:\n" << linenumberify(mFullProgramSrc));
3319 : #endif
3320 0 : }
3321 : /// Produce kernel hash
3322 0 : std::string GetMD5()
3323 : {
3324 : #ifdef MD5_KERNEL
3325 0 : if (mKernelHash.empty())
3326 : {
3327 0 : std::stringstream md5s;
3328 : // Compute MD5SUM of kernel body to obtain the name
3329 : sal_uInt8 result[RTL_DIGEST_LENGTH_MD5];
3330 : rtl_digest_MD5(
3331 0 : mFullProgramSrc.c_str(),
3332 0 : mFullProgramSrc.length(), result,
3333 0 : RTL_DIGEST_LENGTH_MD5);
3334 0 : for (int i = 0; i < RTL_DIGEST_LENGTH_MD5; i++)
3335 : {
3336 0 : md5s << std::hex << (int)result[i];
3337 : }
3338 0 : mKernelHash = md5s.str();
3339 : }
3340 0 : return mKernelHash;
3341 : #else
3342 : return "";
3343 : #endif
3344 : }
3345 : /// Create program, build, and create kerenl
3346 : /// TODO cache results based on kernel body hash
3347 : /// TODO: abstract OpenCL part out into OpenCL wrapper.
3348 : void CreateKernel();
3349 : /// Prepare buffers, marshal them to GPU, and launch the kernel
3350 : /// TODO: abstract OpenCL part out into OpenCL wrapper.
3351 0 : void Launch( size_t nr )
3352 : {
3353 : // Obtain cl context
3354 : KernelEnv kEnv;
3355 0 : OpenCLDevice::setKernelEnv(&kEnv);
3356 : cl_int err;
3357 : // The results
3358 : mpResClmem = clCreateBuffer(kEnv.mpkContext,
3359 : (cl_mem_flags)CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
3360 0 : nr * sizeof(double), NULL, &err);
3361 0 : if (CL_SUCCESS != err)
3362 0 : throw OpenCLError(err, __FILE__, __LINE__);
3363 0 : err = clSetKernelArg(mpKernel, 0, sizeof(cl_mem), (void*)&mpResClmem);
3364 0 : if (CL_SUCCESS != err)
3365 0 : throw OpenCLError(err, __FILE__, __LINE__);
3366 : // The rest of buffers
3367 0 : mSyms.Marshal(mpKernel, nr, mpProgram);
3368 0 : size_t global_work_size[] = { nr };
3369 : err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, mpKernel, 1, NULL,
3370 0 : global_work_size, NULL, 0, NULL, NULL);
3371 0 : if (CL_SUCCESS != err)
3372 0 : throw OpenCLError(err, __FILE__, __LINE__);
3373 0 : err = clFinish(kEnv.mpkCmdQueue);
3374 0 : if (CL_SUCCESS != err)
3375 0 : throw OpenCLError(err, __FILE__, __LINE__);
3376 0 : }
3377 : virtual ~DynamicKernel();
3378 0 : cl_mem GetResultBuffer() const { return mpResClmem; }
3379 :
3380 : private:
3381 :
3382 : FormulaTreeNodeRef mpRoot;
3383 : SymbolTable mSyms;
3384 : std::string mKernelSignature, mKernelHash;
3385 : std::string mFullProgramSrc;
3386 : cl_program mpProgram;
3387 : cl_kernel mpKernel;
3388 : cl_mem mpResClmem; // Results
3389 : std::set<std::string> inlineDecl;
3390 : std::set<std::string> inlineFun;
3391 : };
3392 :
3393 0 : DynamicKernel::~DynamicKernel()
3394 : {
3395 0 : if (mpResClmem)
3396 : {
3397 0 : clReleaseMemObject(mpResClmem);
3398 : }
3399 0 : if (mpKernel)
3400 : {
3401 0 : clReleaseKernel(mpKernel);
3402 : }
3403 : // mpProgram is not going to be released here -- it's cached.
3404 0 : }
3405 : /// Build code
3406 0 : void DynamicKernel::CreateKernel()
3407 : {
3408 0 : if (mpKernel)
3409 : // already created.
3410 0 : return;
3411 :
3412 : cl_int err;
3413 0 : std::string kname = "DynamicKernel" + mKernelSignature;
3414 : // Compile kernel here!!!
3415 : // Obtain cl context
3416 : KernelEnv kEnv;
3417 0 : OpenCLDevice::setKernelEnv(&kEnv);
3418 0 : const char* src = mFullProgramSrc.c_str();
3419 0 : static std::string lastOneKernelHash = "";
3420 0 : static std::string lastSecondKernelHash = "";
3421 : static cl_program lastOneProgram = NULL;
3422 : static cl_program lastSecondProgram = NULL;
3423 0 : std::string KernelHash = mKernelSignature + GetMD5();
3424 0 : if (lastOneKernelHash == KernelHash && lastOneProgram)
3425 : {
3426 0 : mpProgram = lastOneProgram;
3427 : }
3428 0 : else if (lastSecondKernelHash == KernelHash && lastSecondProgram)
3429 : {
3430 0 : mpProgram = lastSecondProgram;
3431 : }
3432 : else
3433 : { // doesn't match the last compiled formula.
3434 :
3435 0 : if (lastSecondProgram)
3436 : {
3437 0 : clReleaseProgram(lastSecondProgram);
3438 : }
3439 0 : if (OpenCLDevice::buildProgramFromBinary("",
3440 0 : &OpenCLDevice::gpuEnv, KernelHash.c_str(), 0))
3441 : {
3442 0 : mpProgram = OpenCLDevice::gpuEnv.mpArryPrograms[0];
3443 0 : OpenCLDevice::gpuEnv.mpArryPrograms[0] = NULL;
3444 : }
3445 : else
3446 : {
3447 : mpProgram = clCreateProgramWithSource(kEnv.mpkContext, 1,
3448 0 : &src, NULL, &err);
3449 0 : if (err != CL_SUCCESS)
3450 0 : throw OpenCLError(err, __FILE__, __LINE__);
3451 : err = clBuildProgram(mpProgram, 1,
3452 0 : OpenCLDevice::gpuEnv.mpArryDevsID, "", NULL, NULL);
3453 0 : if (err != CL_SUCCESS)
3454 : {
3455 : #if OSL_DEBUG_LEVEL > 0
3456 : if (err == CL_BUILD_PROGRAM_FAILURE)
3457 : {
3458 : cl_build_status stat;
3459 : cl_int e = clGetProgramBuildInfo(
3460 : mpProgram, OpenCLDevice::gpuEnv.mpArryDevsID[0],
3461 : CL_PROGRAM_BUILD_STATUS, sizeof(cl_build_status),
3462 : &stat, 0);
3463 : SAL_WARN_IF(
3464 : e != CL_SUCCESS, "sc.opencl",
3465 : "after CL_BUILD_PROGRAM_FAILURE,"
3466 : " clGetProgramBuildInfo(CL_PROGRAM_BUILD_STATUS)"
3467 : " fails with " << e);
3468 : if (e == CL_SUCCESS)
3469 : {
3470 : size_t n;
3471 : e = clGetProgramBuildInfo(
3472 : mpProgram, OpenCLDevice::gpuEnv.mpArryDevsID[0],
3473 : CL_PROGRAM_BUILD_LOG, 0, 0, &n);
3474 : SAL_WARN_IF(
3475 : e != CL_SUCCESS || n == 0, "sc.opencl",
3476 : "after CL_BUILD_PROGRAM_FAILURE,"
3477 : " clGetProgramBuildInfo(CL_PROGRAM_BUILD_LOG)"
3478 : " fails with " << e << ", n=" << n);
3479 : if (e == CL_SUCCESS && n != 0)
3480 : {
3481 : std::vector<char> log(n);
3482 : e = clGetProgramBuildInfo(
3483 : mpProgram, OpenCLDevice::gpuEnv.mpArryDevsID[0],
3484 : CL_PROGRAM_BUILD_LOG, n, &log[0], 0);
3485 : SAL_WARN_IF(
3486 : e != CL_SUCCESS || n == 0, "sc.opencl",
3487 : "after CL_BUILD_PROGRAM_FAILURE,"
3488 : " clGetProgramBuildInfo("
3489 : "CL_PROGRAM_BUILD_LOG) fails with " << e);
3490 : if (e == CL_SUCCESS)
3491 : SAL_WARN(
3492 : "sc.opencl",
3493 : "CL_BUILD_PROGRAM_FAILURE, status " << stat
3494 : << ", log \"" << &log[0] << "\"");
3495 : }
3496 : }
3497 : }
3498 : #endif
3499 0 : throw OpenCLError(err, __FILE__, __LINE__);
3500 : }
3501 : // Generate binary out of compiled kernel.
3502 : OpenCLDevice::generatBinFromKernelSource(mpProgram,
3503 0 : (mKernelSignature + GetMD5()).c_str());
3504 : }
3505 0 : lastSecondKernelHash = lastOneKernelHash;
3506 0 : lastSecondProgram = lastOneProgram;
3507 0 : lastOneKernelHash = KernelHash;
3508 0 : lastOneProgram = mpProgram;
3509 : }
3510 0 : mpKernel = clCreateKernel(mpProgram, kname.c_str(), &err);
3511 0 : if (err != CL_SUCCESS)
3512 0 : throw OpenCLError(err, __FILE__, __LINE__);
3513 : }
3514 : // Symbol lookup. If there is no such symbol created, allocate one
3515 : // kernel with argument with unique name and return so.
3516 : // The template argument T must be a subclass of DynamicKernelArgument
3517 : template<typename T>
3518 0 : const DynamicKernelArgument* SymbolTable::DeclRefArg(
3519 : FormulaTreeNodeRef t, SlidingFunctionBase* pCodeGen )
3520 : {
3521 0 : FormulaToken* ref = t->GetFormulaToken();
3522 0 : ArgumentMap::iterator it = mSymbols.find(ref);
3523 0 : if (it == mSymbols.end())
3524 : {
3525 : // Allocate new symbols
3526 0 : std::stringstream ss;
3527 0 : ss << "tmp" << mCurId++;
3528 0 : boost::shared_ptr<DynamicKernelArgument> new_arg(new T(ss.str(), t, pCodeGen));
3529 0 : mSymbols[ref] = new_arg;
3530 0 : mParams.push_back(new_arg);
3531 0 : return new_arg.get();
3532 : }
3533 : else
3534 : {
3535 0 : return it->second.get();
3536 : }
3537 : }
3538 :
3539 0 : ScMatrixRef FormulaGroupInterpreterOpenCL::inverseMatrix( const ScMatrix& )
3540 : {
3541 0 : return NULL;
3542 : }
3543 :
3544 0 : DynamicKernel* DynamicKernel::create( ScDocument& /* rDoc */,
3545 : const ScAddress& /* rTopPos */,
3546 : ScTokenArray& rCode )
3547 : {
3548 : // Constructing "AST"
3549 0 : FormulaTokenIterator aCode(rCode);
3550 0 : std::list<FormulaToken*> aTokenList;
3551 0 : std::map<FormulaToken*, FormulaTreeNodeRef> aHashMap;
3552 : FormulaToken* pCur;
3553 0 : while ((pCur = (FormulaToken*)(aCode.Next())) != NULL)
3554 : {
3555 0 : OpCode eOp = pCur->GetOpCode();
3556 0 : if (eOp != ocPush)
3557 : {
3558 0 : FormulaTreeNodeRef pCurNode(new FormulaTreeNode(pCur));
3559 0 : sal_uInt8 nParamCount = pCur->GetParamCount();
3560 0 : for (sal_uInt8 i = 0; i < nParamCount; i++)
3561 : {
3562 0 : FormulaToken* pTempFormula = aTokenList.back();
3563 0 : aTokenList.pop_back();
3564 0 : if (pTempFormula->GetOpCode() != ocPush)
3565 : {
3566 0 : if (aHashMap.find(pTempFormula) == aHashMap.end())
3567 0 : return NULL;
3568 0 : pCurNode->Children.push_back(aHashMap[pTempFormula]);
3569 : }
3570 : else
3571 : {
3572 : FormulaTreeNodeRef pChildTreeNode =
3573 : FormulaTreeNodeRef(
3574 0 : new FormulaTreeNode(pTempFormula));
3575 0 : pCurNode->Children.push_back(pChildTreeNode);
3576 : }
3577 : }
3578 0 : std::reverse(pCurNode->Children.begin(), pCurNode->Children.end());
3579 0 : aHashMap[pCur] = pCurNode;
3580 : }
3581 0 : aTokenList.push_back(pCur);
3582 : }
3583 :
3584 0 : FormulaTreeNodeRef Root = FormulaTreeNodeRef(new FormulaTreeNode(NULL));
3585 0 : Root->Children.push_back(aHashMap[aTokenList.back()]);
3586 :
3587 0 : DynamicKernel* pDynamicKernel = new DynamicKernel(Root);
3588 :
3589 0 : if (!pDynamicKernel)
3590 0 : return NULL;
3591 :
3592 : // OpenCL source code generation and kernel compilation
3593 : try
3594 : {
3595 0 : pDynamicKernel->CodeGen();
3596 0 : pDynamicKernel->CreateKernel();
3597 : }
3598 0 : catch (const UnhandledToken& ut)
3599 : {
3600 : SAL_WARN("sc.opencl", "Dynamic formula compiler: unhandled token: " << ut.mMessage << " at " << ut.mFile << ":" << ut.mLineNumber);
3601 : #ifdef NO_FALLBACK_TO_SWINTERP
3602 : assert(false);
3603 : #else
3604 0 : delete pDynamicKernel;
3605 0 : return NULL;
3606 : #endif
3607 : }
3608 0 : catch (...)
3609 : {
3610 : SAL_WARN("sc.opencl", "Dynamic formula compiler: unhandled compiler error");
3611 0 : return NULL;
3612 : }
3613 0 : return pDynamicKernel;
3614 : }
3615 :
3616 0 : CompiledFormula* FormulaGroupInterpreterOpenCL::createCompiledFormula( ScDocument& rDoc,
3617 : const ScAddress& rTopPos,
3618 : ScFormulaCellGroup& rGroup,
3619 : ScTokenArray& rCode )
3620 : {
3621 0 : SymbolTable::nR = rGroup.mnLength;
3622 0 : return DynamicKernel::create(rDoc, rTopPos, rCode);
3623 : }
3624 :
3625 0 : bool FormulaGroupInterpreterOpenCL::interpret( ScDocument& rDoc,
3626 : const ScAddress& rTopPos, ScFormulaCellGroupRef& xGroup,
3627 : ScTokenArray& rCode )
3628 : {
3629 0 : ScCompiler aComp(&rDoc, rTopPos, rCode);
3630 0 : aComp.SetGrammar(rDoc.GetGrammar());
3631 : // Disable special ordering for jump commands for the OpenCL interpreter.
3632 0 : aComp.EnableJumpCommandReorder(false);
3633 0 : aComp.CompileTokenArray(); // Regenerate RPN tokens.
3634 :
3635 0 : DynamicKernel* pKernel = NULL;
3636 0 : boost::scoped_ptr<DynamicKernel> pLocalKernel;
3637 :
3638 : #if ENABLE_THREADED_OPENCL_KERNEL_COMPILATION
3639 : if (xGroup->meKernelState == sc::OpenCLKernelCompilationScheduled ||
3640 : xGroup->meKernelState == sc::OpenCLKernelBinaryCreated)
3641 : {
3642 : if (xGroup->meKernelState == sc::OpenCLKernelCompilationScheduled)
3643 : {
3644 : ScFormulaCellGroup::sxCompilationThread->maCompilationDoneCondition.wait();
3645 : ScFormulaCellGroup::sxCompilationThread->maCompilationDoneCondition.reset();
3646 : }
3647 :
3648 : pKernel = static_cast<DynamicKernel*>(xGroup->mpCompiledFormula);
3649 : }
3650 : else
3651 : {
3652 : assert(xGroup->meCalcState == sc::GroupCalcRunning);
3653 : pKernel = static_cast<DynamicKernel*>(createCompiledFormula(rDoc, rTopPos, *xGroup, rCode));
3654 : pLocalKernel.reset(pKernel); // to be deleted when done.
3655 : }
3656 : #else
3657 0 : pKernel = static_cast<DynamicKernel*>(createCompiledFormula(rDoc, rTopPos, *xGroup, rCode));
3658 0 : pLocalKernel.reset(pKernel); // to be deleted when done.
3659 : #endif
3660 :
3661 0 : if (!pKernel)
3662 0 : return false;
3663 :
3664 : try
3665 : {
3666 : // Obtain cl context
3667 : KernelEnv kEnv;
3668 0 : OpenCLDevice::setKernelEnv(&kEnv);
3669 : // Run the kernel.
3670 0 : pKernel->Launch(xGroup->mnLength);
3671 : // Map results back
3672 0 : cl_mem res = pKernel->GetResultBuffer();
3673 : cl_int err;
3674 : double* resbuf = (double*)clEnqueueMapBuffer(kEnv.mpkCmdQueue,
3675 : res,
3676 : CL_TRUE, CL_MAP_READ, 0,
3677 0 : xGroup->mnLength * sizeof(double), 0, NULL, NULL,
3678 0 : &err);
3679 0 : if (err != CL_SUCCESS)
3680 0 : throw OpenCLError(err, __FILE__, __LINE__);
3681 0 : rDoc.SetFormulaResults(rTopPos, resbuf, xGroup->mnLength);
3682 0 : err = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue, res, resbuf, 0, NULL, NULL);
3683 0 : if (err != CL_SUCCESS)
3684 0 : throw OpenCLError(err, __FILE__, __LINE__);
3685 : }
3686 0 : catch (const UnhandledToken& ut)
3687 : {
3688 : SAL_WARN("sc.opencl", "Dynamic formula compiler: unhandled token: " << ut.mMessage << " at " << ut.mFile << ":" << ut.mLineNumber);
3689 : #ifdef NO_FALLBACK_TO_SWINTERP
3690 : assert(false);
3691 : return true;
3692 : #else
3693 0 : return false;
3694 : #endif
3695 : }
3696 0 : catch (const OpenCLError& oce)
3697 : {
3698 : SAL_WARN("sc.opencl", "Dynamic formula compiler: OpenCL error: " << oce.mError << " at " << oce.mFile << ":" << oce.mLineNumber);
3699 : #ifdef NO_FALLBACK_TO_SWINTERP
3700 : assert(false);
3701 : return true;
3702 : #else
3703 0 : return false;
3704 : #endif
3705 : }
3706 0 : catch (const Unhandled& uh)
3707 : {
3708 : SAL_WARN("sc.opencl", "Dynamic formula compiler: unhandled case at " << uh.mFile << ":" << uh.mLineNumber);
3709 : #ifdef NO_FALLBACK_TO_SWINTERP
3710 : assert(false);
3711 : return true;
3712 : #else
3713 0 : return false;
3714 : #endif
3715 : }
3716 0 : catch (...)
3717 : {
3718 : SAL_WARN("sc.opencl", "Dynamic formula compiler: unhandled compiler error");
3719 : #ifdef NO_FALLBACK_TO_SWINTERP
3720 : assert(false);
3721 : return true;
3722 : #else
3723 0 : return false;
3724 : #endif
3725 : }
3726 0 : return true;
3727 : }
3728 :
3729 228 : }} // namespace sc::opencl
3730 :
3731 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|