Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : */
9 :
10 : #include <config_folders.h>
11 :
12 : #include "opencl_device.hxx"
13 :
14 : #include <comphelper/string.hxx>
15 : #include <opencl/openclconfig.hxx>
16 : #include <opencl/openclwrapper.hxx>
17 : #include <osl/file.hxx>
18 : #include <rtl/bootstrap.hxx>
19 : #include <rtl/digest.h>
20 : #include <rtl/strbuf.hxx>
21 : #include <rtl/ustring.hxx>
22 : #include <sal/config.h>
23 : #include <sal/log.hxx>
24 :
25 : #include <boost/scoped_array.hpp>
26 : #include <unicode/regex.h>
27 :
28 : #include <stdio.h>
29 : #include <stdlib.h>
30 : #include <string.h>
31 :
32 : #include <cmath>
33 :
34 : #ifdef _WIN32
35 : #include <prewin.h>
36 : #include <postwin.h>
37 : #define OPENCL_DLL_NAME "OpenCL.dll"
38 : #elif defined(MACOSX)
39 : #define OPENCL_DLL_NAME NULL
40 : #else
41 : #define OPENCL_DLL_NAME "libOpenCL.so"
42 : #endif
43 :
44 : #define DEVICE_NAME_LENGTH 1024
45 : #define DRIVER_VERSION_LENGTH 1024
46 : #define PLATFORM_VERSION_LENGTH 1024
47 :
48 : #define CHECK_OPENCL(status,name) \
49 : if( status != CL_SUCCESS ) \
50 : { \
51 : SAL_WARN( "opencl", "OpenCL error code " << status << " at " SAL_DETAIL_WHERE " from " name ); \
52 : return false; \
53 : }
54 :
55 : using namespace std;
56 :
57 : namespace opencl {
58 :
59 : GPUEnv gpuEnv;
60 :
61 : namespace {
62 :
63 : bool bIsInited = false;
64 :
65 0 : OString generateMD5(const void* pData, size_t length)
66 : {
67 : sal_uInt8 pBuffer[RTL_DIGEST_LENGTH_MD5];
68 : rtlDigestError aError = rtl_digest_MD5(pData, length,
69 0 : pBuffer, RTL_DIGEST_LENGTH_MD5);
70 : SAL_WARN_IF(aError != rtl_Digest_E_None, "opencl", "md5 generation failed");
71 :
72 0 : OStringBuffer aBuffer;
73 0 : const char* pString = "0123456789ABCDEF";
74 0 : for(size_t i = 0; i < RTL_DIGEST_LENGTH_MD5; ++i)
75 : {
76 0 : sal_uInt8 val = pBuffer[i];
77 0 : aBuffer.append(pString[val/16]);
78 0 : aBuffer.append(pString[val%16]);
79 : }
80 0 : return aBuffer.makeStringAndClear();
81 : }
82 :
83 52 : OString getCacheFolder()
84 : {
85 52 : OUString url("${$BRAND_BASE_DIR/" LIBO_ETC_FOLDER "/" SAL_CONFIGFILE("bootstrap") ":UserInstallation}/cache/");
86 52 : rtl::Bootstrap::expandMacros(url);
87 :
88 52 : osl::Directory::create(url);
89 :
90 52 : return rtl::OUStringToOString(url, RTL_TEXTENCODING_UTF8);
91 : }
92 :
93 52 : OString maCacheFolder = getCacheFolder();
94 :
95 : }
96 :
97 0 : void setKernelEnv( KernelEnv *envInfo )
98 : {
99 0 : envInfo->mpkContext = gpuEnv.mpContext;
100 0 : envInfo->mpkProgram = gpuEnv.mpArryPrograms[0];
101 :
102 : assert(gpuEnv.mnCmdQueuePos < OPENCL_CMDQUEUE_SIZE);
103 0 : envInfo->mpkCmdQueue = gpuEnv.mpCmdQueue[gpuEnv.mnCmdQueuePos];
104 0 : }
105 :
106 : namespace {
107 :
108 0 : OString createFileName(cl_device_id deviceId, const char* clFileName)
109 : {
110 0 : OString fileName(clFileName);
111 0 : sal_Int32 nIndex = fileName.lastIndexOf(".cl");
112 0 : if(nIndex > 0)
113 0 : fileName = fileName.copy(0, nIndex);
114 :
115 0 : char deviceName[DEVICE_NAME_LENGTH] = {0};
116 : clGetDeviceInfo(deviceId, CL_DEVICE_NAME,
117 0 : sizeof(deviceName), deviceName, NULL);
118 :
119 0 : char driverVersion[DRIVER_VERSION_LENGTH] = {0};
120 : clGetDeviceInfo(deviceId, CL_DRIVER_VERSION,
121 0 : sizeof(driverVersion), driverVersion, NULL);
122 :
123 : cl_platform_id platformId;
124 : clGetDeviceInfo(deviceId, CL_DEVICE_PLATFORM,
125 0 : sizeof(platformId), &platformId, NULL);
126 :
127 0 : char platformVersion[PLATFORM_VERSION_LENGTH] = {0};
128 : clGetPlatformInfo(platformId, CL_PLATFORM_VERSION, sizeof(platformVersion),
129 0 : platformVersion, NULL);
130 :
131 : // create hash for deviceName + driver version + platform version
132 0 : OString aString = OString(deviceName) + driverVersion + platformVersion;
133 0 : OString aHash = generateMD5(aString.getStr(), aString.getLength());
134 :
135 0 : return maCacheFolder + fileName + "-" +
136 0 : aHash + ".bin";
137 : }
138 :
139 0 : std::vector<boost::shared_ptr<osl::File> > binaryGenerated( const char * clFileName, cl_context context )
140 : {
141 0 : size_t numDevices=0;
142 :
143 0 : std::vector<boost::shared_ptr<osl::File> > aGeneratedFiles;
144 : cl_int clStatus = clGetContextInfo( context, CL_CONTEXT_DEVICES,
145 0 : 0, NULL, &numDevices );
146 0 : numDevices /= sizeof(numDevices);
147 :
148 0 : if(clStatus != CL_SUCCESS)
149 0 : return aGeneratedFiles;
150 :
151 : // grab the handles to all of the devices in the context.
152 0 : boost::scoped_array<cl_device_id> pArryDevsID(new cl_device_id[numDevices]);
153 : clStatus = clGetContextInfo( context, CL_CONTEXT_DEVICES,
154 0 : sizeof( cl_device_id ) * numDevices, pArryDevsID.get(), NULL );
155 :
156 0 : if(clStatus != CL_SUCCESS)
157 0 : return aGeneratedFiles;
158 :
159 0 : for ( size_t i = 0; i < numDevices; i++ )
160 : {
161 0 : if ( pArryDevsID[i] != 0 )
162 : {
163 0 : OString fileName = createFileName(gpuEnv.mpArryDevsID[i], clFileName);
164 0 : osl::File* pNewFile = new osl::File(rtl::OStringToOUString(fileName, RTL_TEXTENCODING_UTF8));
165 0 : if(pNewFile->open(osl_File_OpenFlag_Read) == osl::FileBase::E_None)
166 : {
167 0 : aGeneratedFiles.push_back(boost::shared_ptr<osl::File>(pNewFile));
168 : SAL_INFO("opencl.file", "Opening binary file '" << fileName << "' for reading: success");
169 : }
170 : else
171 : {
172 : SAL_INFO("opencl.file", "Opening binary file '" << fileName << "' for reading: FAIL");
173 0 : delete pNewFile;
174 0 : break;
175 0 : }
176 : }
177 : }
178 :
179 0 : return aGeneratedFiles;
180 : }
181 :
182 0 : bool writeBinaryToFile( const OString& rFileName, const char* binary, size_t numBytes )
183 : {
184 0 : osl::File file(rtl::OStringToOUString(rFileName, RTL_TEXTENCODING_UTF8));
185 : osl::FileBase::RC status = file.open(
186 0 : osl_File_OpenFlag_Write | osl_File_OpenFlag_Create );
187 :
188 0 : if(status != osl::FileBase::E_None)
189 0 : return false;
190 :
191 0 : sal_uInt64 nBytesWritten = 0;
192 0 : file.write( binary, numBytes, nBytesWritten );
193 :
194 : assert(numBytes == nBytesWritten);
195 :
196 0 : return true;
197 : }
198 :
199 : }
200 :
201 0 : bool generatBinFromKernelSource( cl_program program, const char * clFileName )
202 : {
203 : cl_uint numDevices;
204 :
205 : cl_int clStatus = clGetProgramInfo( program, CL_PROGRAM_NUM_DEVICES,
206 0 : sizeof(numDevices), &numDevices, NULL );
207 0 : CHECK_OPENCL( clStatus, "clGetProgramInfo" );
208 :
209 0 : std::vector<cl_device_id> pArryDevsID(numDevices);
210 : /* grab the handles to all of the devices in the program. */
211 : clStatus = clGetProgramInfo( program, CL_PROGRAM_DEVICES,
212 0 : sizeof(cl_device_id) * numDevices, &pArryDevsID[0], NULL );
213 0 : CHECK_OPENCL( clStatus, "clGetProgramInfo" );
214 :
215 : /* figure out the sizes of each of the binaries. */
216 0 : std::vector<size_t> binarySizes(numDevices);
217 :
218 : clStatus = clGetProgramInfo( program, CL_PROGRAM_BINARY_SIZES,
219 0 : sizeof(size_t) * numDevices, &binarySizes[0], NULL );
220 0 : CHECK_OPENCL( clStatus, "clGetProgramInfo" );
221 :
222 : /* copy over all of the generated binaries. */
223 0 : boost::scoped_array<char*> binaries(new char*[numDevices]);
224 :
225 0 : for ( size_t i = 0; i < numDevices; i++ )
226 : {
227 0 : if ( binarySizes[i] != 0 )
228 : {
229 0 : binaries[i] = new char[binarySizes[i]];
230 : }
231 : else
232 : {
233 0 : binaries[i] = NULL;
234 : }
235 : }
236 :
237 : clStatus = clGetProgramInfo( program, CL_PROGRAM_BINARIES,
238 0 : sizeof(char *) * numDevices, binaries.get(), NULL );
239 0 : CHECK_OPENCL(clStatus,"clGetProgramInfo");
240 :
241 : /* dump out each binary into its own separate file. */
242 0 : for ( size_t i = 0; i < numDevices; i++ )
243 : {
244 :
245 0 : if ( binarySizes[i] != 0 )
246 : {
247 0 : OString fileName = createFileName(pArryDevsID[i], clFileName);
248 0 : if ( !writeBinaryToFile( fileName,
249 0 : binaries[i], binarySizes[i] ) )
250 : SAL_INFO("opencl.file", "Writing binary file '" << fileName << "': FAIL");
251 : else
252 0 : SAL_INFO("opencl.file", "Writing binary file '" << fileName << "': success");
253 : }
254 : }
255 :
256 : // Release all resources and memory
257 0 : for ( size_t i = 0; i < numDevices; i++ )
258 : {
259 0 : delete[] binaries[i];
260 : }
261 :
262 0 : return true;
263 : }
264 :
265 : namespace {
266 :
267 : struct OpenCLEnv
268 : {
269 : cl_platform_id mpOclPlatformID;
270 : cl_context mpOclContext;
271 : cl_device_id mpOclDevsID;
272 : cl_command_queue mpOclCmdQueue[OPENCL_CMDQUEUE_SIZE];
273 : };
274 :
275 0 : bool initOpenCLAttr( OpenCLEnv * env )
276 : {
277 0 : if ( gpuEnv.mnIsUserCreated )
278 0 : return true;
279 :
280 0 : gpuEnv.mpContext = env->mpOclContext;
281 0 : gpuEnv.mpPlatformID = env->mpOclPlatformID;
282 0 : gpuEnv.mpDevID = env->mpOclDevsID;
283 :
284 0 : gpuEnv.mnIsUserCreated = 1;
285 :
286 0 : for (int i = 0; i < OPENCL_CMDQUEUE_SIZE; ++i)
287 0 : gpuEnv.mpCmdQueue[i] = env->mpOclCmdQueue[i];
288 :
289 0 : gpuEnv.mnCmdQueuePos = 0; // default to 0.
290 :
291 0 : return false;
292 : }
293 :
294 0 : void releaseOpenCLEnv( GPUEnv *gpuInfo )
295 : {
296 0 : if ( !bIsInited )
297 : {
298 0 : return;
299 : }
300 :
301 0 : for (int i = 0; i < OPENCL_CMDQUEUE_SIZE; ++i)
302 : {
303 0 : if (gpuEnv.mpCmdQueue[i])
304 : {
305 0 : clReleaseCommandQueue(gpuEnv.mpCmdQueue[i]);
306 0 : gpuEnv.mpCmdQueue[i] = NULL;
307 : }
308 : }
309 0 : gpuEnv.mnCmdQueuePos = 0;
310 :
311 0 : if ( gpuEnv.mpContext )
312 : {
313 0 : clReleaseContext( gpuEnv.mpContext );
314 0 : gpuEnv.mpContext = NULL;
315 : }
316 0 : bIsInited = false;
317 0 : gpuInfo->mnIsUserCreated = 0;
318 0 : free( gpuInfo->mpArryDevsID );
319 :
320 0 : return;
321 : }
322 :
323 0 : bool buildProgram(const char* buildOption, GPUEnv* gpuInfo, int idx)
324 : {
325 : cl_int clStatus;
326 : //char options[512];
327 : // create a cl program executable for all the devices specified
328 0 : if (!gpuInfo->mnIsUserCreated)
329 : {
330 : clStatus = clBuildProgram(gpuInfo->mpArryPrograms[idx], 1, gpuInfo->mpArryDevsID,
331 0 : buildOption, NULL, NULL);
332 : }
333 : else
334 : {
335 : clStatus = clBuildProgram(gpuInfo->mpArryPrograms[idx], 1, &(gpuInfo->mpDevID),
336 0 : buildOption, NULL, NULL);
337 : }
338 :
339 0 : if ( clStatus != CL_SUCCESS )
340 : {
341 : size_t length;
342 0 : if ( !gpuInfo->mnIsUserCreated )
343 : {
344 0 : clStatus = clGetProgramBuildInfo( gpuInfo->mpArryPrograms[idx], gpuInfo->mpArryDevsID[0],
345 0 : CL_PROGRAM_BUILD_LOG, 0, NULL, &length );
346 : }
347 : else
348 : {
349 : clStatus = clGetProgramBuildInfo( gpuInfo->mpArryPrograms[idx], gpuInfo->mpDevID,
350 0 : CL_PROGRAM_BUILD_LOG, 0, NULL, &length);
351 : }
352 0 : if ( clStatus != CL_SUCCESS )
353 : {
354 0 : return false;
355 : }
356 :
357 0 : boost::scoped_array<char> buildLog(new char[length]);
358 0 : if ( !gpuInfo->mnIsUserCreated )
359 : {
360 0 : clStatus = clGetProgramBuildInfo( gpuInfo->mpArryPrograms[idx], gpuInfo->mpArryDevsID[0],
361 0 : CL_PROGRAM_BUILD_LOG, length, buildLog.get(), &length );
362 : }
363 : else
364 : {
365 : clStatus = clGetProgramBuildInfo( gpuInfo->mpArryPrograms[idx], gpuInfo->mpDevID,
366 0 : CL_PROGRAM_BUILD_LOG, length, buildLog.get(), &length );
367 : }
368 0 : if ( clStatus != CL_SUCCESS )
369 : {
370 0 : return false;
371 : }
372 :
373 0 : OString aBuildLogFileURL = maCacheFolder + "kernel-build.log";
374 0 : osl::File aBuildLogFile(rtl::OStringToOUString(aBuildLogFileURL, RTL_TEXTENCODING_UTF8));
375 : osl::FileBase::RC status = aBuildLogFile.open(
376 0 : osl_File_OpenFlag_Write | osl_File_OpenFlag_Create );
377 :
378 0 : if(status != osl::FileBase::E_None)
379 0 : return false;
380 :
381 0 : sal_uInt64 nBytesWritten = 0;
382 0 : aBuildLogFile.write( buildLog.get(), length, nBytesWritten );
383 :
384 0 : return false;
385 : }
386 :
387 0 : return true;
388 : }
389 :
390 : }
391 :
392 0 : bool buildProgramFromBinary(const char* buildOption, GPUEnv* gpuInfo, const char* filename, int idx)
393 : {
394 : size_t numDevices;
395 : cl_int clStatus = clGetContextInfo( gpuInfo->mpContext, CL_CONTEXT_DEVICES,
396 0 : 0, NULL, &numDevices );
397 0 : numDevices /= sizeof(numDevices);
398 0 : CHECK_OPENCL( clStatus, "clGetContextInfo" );
399 :
400 : std::vector<boost::shared_ptr<osl::File> > aGeneratedFiles = binaryGenerated(
401 0 : filename, gpuInfo->mpContext );
402 :
403 0 : if (aGeneratedFiles.size() == numDevices)
404 : {
405 0 : boost::scoped_array<size_t> length(new size_t[numDevices]);
406 0 : boost::scoped_array<unsigned char*> pBinary(new unsigned char*[numDevices]);
407 0 : for(size_t i = 0; i < numDevices; ++i)
408 : {
409 : sal_uInt64 nSize;
410 0 : aGeneratedFiles[i]->getSize(nSize);
411 0 : unsigned char* binary = new unsigned char[nSize];
412 : sal_uInt64 nBytesRead;
413 0 : aGeneratedFiles[i]->read(binary, nSize, nBytesRead);
414 0 : if(nSize != nBytesRead)
415 : assert(false);
416 :
417 0 : length[i] = nBytesRead;
418 :
419 0 : pBinary[i] = binary;
420 : }
421 :
422 : // grab the handles to all of the devices in the context.
423 0 : boost::scoped_array<cl_device_id> pArryDevsID(new cl_device_id[numDevices]);
424 : clStatus = clGetContextInfo( gpuInfo->mpContext, CL_CONTEXT_DEVICES,
425 0 : sizeof( cl_device_id ) * numDevices, pArryDevsID.get(), NULL );
426 :
427 0 : if(clStatus != CL_SUCCESS)
428 : {
429 0 : for(size_t i = 0; i < numDevices; ++i)
430 : {
431 0 : delete[] pBinary[i];
432 : }
433 0 : return false;
434 : }
435 :
436 : cl_int binary_status;
437 :
438 : gpuInfo->mpArryPrograms[idx] = clCreateProgramWithBinary( gpuInfo->mpContext,numDevices,
439 0 : pArryDevsID.get(), length.get(), const_cast<const unsigned char**>(pBinary.get()),
440 0 : &binary_status, &clStatus );
441 0 : if(clStatus != CL_SUCCESS)
442 : {
443 : // something went wrong, fall back to compiling from source
444 0 : return false;
445 : }
446 : SAL_INFO("opencl", "Created program " << gpuInfo->mpArryPrograms[idx] << " from binary");
447 0 : for(size_t i = 0; i < numDevices; ++i)
448 : {
449 0 : delete[] pBinary[i];
450 0 : }
451 : }
452 :
453 0 : if ( !gpuInfo->mpArryPrograms[idx] )
454 : {
455 0 : return false;
456 : }
457 0 : return buildProgram(buildOption, gpuInfo, idx);
458 : }
459 :
460 : namespace {
461 :
462 0 : void checkDeviceForDoubleSupport(cl_device_id deviceId, bool& bKhrFp64, bool& bAmdFp64)
463 : {
464 0 : bKhrFp64 = false;
465 0 : bAmdFp64 = false;
466 :
467 : // Check device extensions for double type
468 0 : size_t aDevExtInfoSize = 0;
469 :
470 0 : cl_uint clStatus = clGetDeviceInfo( deviceId, CL_DEVICE_EXTENSIONS, 0, NULL, &aDevExtInfoSize );
471 0 : if( clStatus != CL_SUCCESS )
472 0 : return;
473 :
474 0 : boost::scoped_array<char> pExtInfo(new char[aDevExtInfoSize]);
475 :
476 : clStatus = clGetDeviceInfo( deviceId, CL_DEVICE_EXTENSIONS,
477 0 : sizeof(char) * aDevExtInfoSize, pExtInfo.get(), NULL);
478 :
479 0 : if( clStatus != CL_SUCCESS )
480 0 : return;
481 :
482 0 : if ( strstr( pExtInfo.get(), "cl_khr_fp64" ) )
483 : {
484 0 : bKhrFp64 = true;
485 : }
486 : else
487 : {
488 : // Check if cl_amd_fp64 extension is supported
489 0 : if ( strstr( pExtInfo.get(), "cl_amd_fp64" ) )
490 0 : bAmdFp64 = true;
491 0 : }
492 : }
493 :
494 0 : bool initOpenCLRunEnv( GPUEnv *gpuInfo )
495 : {
496 0 : bool bKhrFp64 = false;
497 0 : bool bAmdFp64 = false;
498 :
499 0 : checkDeviceForDoubleSupport(gpuInfo->mpArryDevsID[0], bKhrFp64, bAmdFp64);
500 :
501 0 : gpuInfo->mnKhrFp64Flag = bKhrFp64;
502 0 : gpuInfo->mnAmdFp64Flag = bAmdFp64;
503 :
504 0 : return false;
505 : }
506 :
507 0 : bool initOpenCLRunEnv( int argc )
508 : {
509 0 : if ( ( argc > MAX_CLFILE_NUM ) || ( argc < 0 ) )
510 0 : return true;
511 :
512 0 : if ( !bIsInited )
513 : {
514 0 : if ( !gpuEnv.mnIsUserCreated )
515 0 : memset( &gpuEnv, 0, sizeof(gpuEnv) );
516 :
517 : //initialize devices, context, command_queue
518 0 : bool status = initOpenCLRunEnv( &gpuEnv );
519 0 : if ( status )
520 : {
521 0 : return true;
522 : }
523 : //initialize program, kernelName, kernelCount
524 0 : if( getenv( "SC_FLOAT" ) )
525 : {
526 0 : gpuEnv.mnKhrFp64Flag = false;
527 0 : gpuEnv.mnAmdFp64Flag = false;
528 : }
529 0 : if( gpuEnv.mnKhrFp64Flag )
530 : {
531 : SAL_INFO("opencl", "Use Khr double");
532 : }
533 0 : else if( gpuEnv.mnAmdFp64Flag )
534 : {
535 : SAL_INFO("opencl", "Use AMD double type");
536 : }
537 : else
538 : {
539 : SAL_INFO("opencl", "USE float type");
540 : }
541 0 : bIsInited = true;
542 : }
543 0 : return false;
544 : }
545 :
546 : // based on crashes and hanging during kernel compilation
547 0 : void createDeviceInfo(cl_device_id aDeviceId, OpenCLPlatformInfo& rPlatformInfo)
548 : {
549 0 : OpenCLDeviceInfo aDeviceInfo;
550 0 : aDeviceInfo.device = aDeviceId;
551 :
552 : char pName[DEVICE_NAME_LENGTH];
553 0 : cl_int nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_NAME, DEVICE_NAME_LENGTH, pName, NULL);
554 0 : if(nState != CL_SUCCESS)
555 0 : return;
556 :
557 0 : aDeviceInfo.maName = OUString::createFromAscii(pName);
558 :
559 : char pVendor[DEVICE_NAME_LENGTH];
560 0 : nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_VENDOR, DEVICE_NAME_LENGTH, pVendor, NULL);
561 0 : if(nState != CL_SUCCESS)
562 0 : return;
563 :
564 0 : aDeviceInfo.maVendor = OUString::createFromAscii(pVendor);
565 :
566 : cl_ulong nMemSize;
567 0 : nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(nMemSize), &nMemSize, NULL);
568 0 : if(nState != CL_SUCCESS)
569 0 : return;
570 :
571 0 : aDeviceInfo.mnMemory = nMemSize;
572 :
573 : cl_uint nClockFrequency;
574 0 : nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(nClockFrequency), &nClockFrequency, NULL);
575 0 : if(nState != CL_SUCCESS)
576 0 : return;
577 :
578 0 : aDeviceInfo.mnFrequency = nClockFrequency;
579 :
580 : cl_uint nComputeUnits;
581 0 : nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(nComputeUnits), &nComputeUnits, NULL);
582 0 : if(nState != CL_SUCCESS)
583 0 : return;
584 :
585 : char pDriver[DEVICE_NAME_LENGTH];
586 0 : nState = clGetDeviceInfo(aDeviceId, CL_DRIVER_VERSION, DEVICE_NAME_LENGTH, pDriver, NULL);
587 :
588 0 : if(nState != CL_SUCCESS)
589 0 : return;
590 :
591 0 : aDeviceInfo.maDriver = OUString::createFromAscii(pDriver);
592 :
593 0 : bool bKhrFp64 = false;
594 0 : bool bAmdFp64 = false;
595 0 : checkDeviceForDoubleSupport(aDeviceId, bKhrFp64, bAmdFp64);
596 :
597 : // only list devices that support double
598 0 : if(!bKhrFp64 && !bAmdFp64)
599 0 : return;
600 :
601 0 : aDeviceInfo.mnComputeUnits = nComputeUnits;
602 :
603 0 : if(!OpenCLConfig::get().checkImplementation(rPlatformInfo, aDeviceInfo))
604 0 : rPlatformInfo.maDevices.push_back(aDeviceInfo);
605 : }
606 :
607 0 : bool createPlatformInfo(cl_platform_id nPlatformId, OpenCLPlatformInfo& rPlatformInfo)
608 : {
609 0 : rPlatformInfo.platform = nPlatformId;
610 : char pName[64];
611 : cl_int nState = clGetPlatformInfo(nPlatformId, CL_PLATFORM_NAME, 64,
612 0 : pName, NULL);
613 0 : if(nState != CL_SUCCESS)
614 0 : return false;
615 0 : rPlatformInfo.maName = OUString::createFromAscii(pName);
616 :
617 : char pVendor[64];
618 : nState = clGetPlatformInfo(nPlatformId, CL_PLATFORM_VENDOR, 64,
619 0 : pVendor, NULL);
620 0 : if(nState != CL_SUCCESS)
621 0 : return false;
622 :
623 0 : rPlatformInfo.maVendor = OUString::createFromAscii(pVendor);
624 :
625 : cl_uint nDevices;
626 0 : nState = clGetDeviceIDs(nPlatformId, CL_DEVICE_TYPE_ALL, 0, NULL, &nDevices);
627 0 : if(nState != CL_SUCCESS)
628 0 : return false;
629 :
630 : // memory leak that does not matter
631 : // memory is stored in static variable that lives through the whole program
632 0 : cl_device_id* pDevices = new cl_device_id[nDevices];
633 0 : nState = clGetDeviceIDs(nPlatformId, CL_DEVICE_TYPE_ALL, nDevices, pDevices, NULL);
634 0 : if(nState != CL_SUCCESS)
635 0 : return false;
636 :
637 0 : for(size_t i = 0; i < nDevices; ++i)
638 : {
639 0 : createDeviceInfo(pDevices[i], rPlatformInfo);
640 : }
641 :
642 0 : return true;
643 : }
644 :
645 : }
646 :
647 224 : const std::vector<OpenCLPlatformInfo>& fillOpenCLInfo()
648 : {
649 224 : static std::vector<OpenCLPlatformInfo> aPlatforms;
650 224 : if(!aPlatforms.empty())
651 0 : return aPlatforms;
652 :
653 224 : int status = clewInit(OPENCL_DLL_NAME);
654 224 : if (status < 0)
655 0 : return aPlatforms;
656 :
657 : cl_uint nPlatforms;
658 224 : cl_int nState = clGetPlatformIDs(0, NULL, &nPlatforms);
659 :
660 224 : if(nState != CL_SUCCESS)
661 224 : return aPlatforms;
662 :
663 : // memory leak that does not matter,
664 : // memory is stored in static instance aPlatforms
665 0 : cl_platform_id* pPlatforms = new cl_platform_id[nPlatforms];
666 0 : nState = clGetPlatformIDs(nPlatforms, pPlatforms, NULL);
667 :
668 0 : if(nState != CL_SUCCESS)
669 0 : return aPlatforms;
670 :
671 0 : for(size_t i = 0; i < nPlatforms; ++i)
672 : {
673 0 : OpenCLPlatformInfo aPlatformInfo;
674 0 : if(createPlatformInfo(pPlatforms[i], aPlatformInfo))
675 0 : aPlatforms.push_back(aPlatformInfo);
676 0 : }
677 :
678 0 : return aPlatforms;
679 : }
680 :
681 : namespace {
682 :
683 0 : cl_device_id findDeviceIdByDeviceString(const OUString& rString, const std::vector<OpenCLPlatformInfo>& rPlatforms)
684 : {
685 0 : std::vector<OpenCLPlatformInfo>::const_iterator it = rPlatforms.begin(), itEnd = rPlatforms.end();
686 0 : for(; it != itEnd; ++it)
687 : {
688 0 : std::vector<OpenCLDeviceInfo>::const_iterator itr = it->maDevices.begin(), itrEnd = it->maDevices.end();
689 0 : for(; itr != itrEnd; ++itr)
690 : {
691 0 : OUString aDeviceId = it->maVendor + " " + itr->maName;
692 0 : if(rString == aDeviceId)
693 : {
694 0 : return static_cast<cl_device_id>(itr->device);
695 : }
696 0 : }
697 : }
698 :
699 0 : return NULL;
700 : }
701 :
702 0 : void findDeviceInfoFromDeviceId(cl_device_id aDeviceId, size_t& rDeviceId, size_t& rPlatformId)
703 : {
704 : cl_platform_id platformId;
705 : cl_int nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_PLATFORM,
706 0 : sizeof(platformId), &platformId, NULL);
707 :
708 0 : if(nState != CL_SUCCESS)
709 0 : return;
710 :
711 0 : const std::vector<OpenCLPlatformInfo>& rPlatforms = fillOpenCLInfo();
712 0 : for(size_t i = 0; i < rPlatforms.size(); ++i)
713 : {
714 0 : cl_platform_id platId = static_cast<cl_platform_id>(rPlatforms[i].platform);
715 0 : if(platId != platformId)
716 0 : continue;
717 :
718 0 : for(size_t j = 0; j < rPlatforms[i].maDevices.size(); ++j)
719 : {
720 0 : cl_device_id id = static_cast<cl_device_id>(rPlatforms[i].maDevices[j].device);
721 0 : if(id == aDeviceId)
722 : {
723 0 : rDeviceId = j;
724 0 : rPlatformId = i;
725 0 : return;
726 : }
727 : }
728 : }
729 : }
730 :
731 : }
732 :
733 224 : bool switchOpenCLDevice(const OUString* pDevice, bool bAutoSelect, bool bForceEvaluation)
734 : {
735 224 : if(fillOpenCLInfo().empty())
736 224 : return false;
737 :
738 0 : cl_device_id pDeviceId = NULL;
739 0 : if(pDevice)
740 0 : pDeviceId = findDeviceIdByDeviceString(*pDevice, fillOpenCLInfo());
741 :
742 0 : if(!pDeviceId || bAutoSelect)
743 : {
744 0 : int status = clewInit(OPENCL_DLL_NAME);
745 0 : if (status < 0)
746 0 : return false;
747 :
748 0 : OUString url("${$BRAND_BASE_DIR/" LIBO_ETC_FOLDER "/" SAL_CONFIGFILE("bootstrap") ":UserInstallation}/cache/");
749 0 : rtl::Bootstrap::expandMacros(url);
750 0 : OUString path;
751 0 : osl::FileBase::getSystemPathFromFileURL(url,path);
752 0 : OString dsFileName = rtl::OUStringToOString(path, RTL_TEXTENCODING_UTF8);
753 0 : ds_device pSelectedDevice = getDeviceSelection(dsFileName.getStr(), bForceEvaluation);
754 0 : pDeviceId = pSelectedDevice.oclDeviceID;
755 :
756 : }
757 :
758 0 : if(gpuEnv.mpDevID == pDeviceId)
759 : {
760 : // we don't need to change anything
761 : // still the same device
762 0 : return pDeviceId != NULL;
763 : }
764 :
765 : cl_platform_id platformId;
766 : cl_int nState = clGetDeviceInfo(pDeviceId, CL_DEVICE_PLATFORM,
767 0 : sizeof(platformId), &platformId, NULL);
768 :
769 : cl_context_properties cps[3];
770 0 : cps[0] = CL_CONTEXT_PLATFORM;
771 0 : cps[1] = reinterpret_cast<cl_context_properties>(platformId);
772 0 : cps[2] = 0;
773 0 : cl_context context = clCreateContext( cps, 1, &pDeviceId, NULL, NULL, &nState );
774 0 : if (nState != CL_SUCCESS)
775 : SAL_WARN("opencl", "clCreateContext failed: " << nState);
776 :
777 0 : if(nState != CL_SUCCESS || context == NULL)
778 : {
779 0 : if(context != NULL)
780 0 : clReleaseContext(context);
781 :
782 : SAL_WARN("opencl", "failed to set/switch opencl device");
783 0 : return false;
784 : }
785 : SAL_INFO("opencl", "Created context " << context << " for platform " << platformId << ", device " << pDeviceId);
786 :
787 : cl_command_queue command_queue[OPENCL_CMDQUEUE_SIZE];
788 0 : for (int i = 0; i < OPENCL_CMDQUEUE_SIZE; ++i)
789 : {
790 : command_queue[i] = clCreateCommandQueue(
791 0 : context, pDeviceId, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &nState);
792 0 : if (nState != CL_SUCCESS)
793 : SAL_WARN("opencl", "clCreateCommandQueue failed: " << nState);
794 :
795 0 : if (command_queue[i] == NULL || nState != CL_SUCCESS)
796 : {
797 : // Release all command queues created so far.
798 0 : for (int j = 0; j <= i; ++j)
799 : {
800 0 : if (command_queue[j])
801 : {
802 0 : clReleaseCommandQueue(command_queue[j]);
803 0 : command_queue[j] = NULL;
804 : }
805 : }
806 :
807 0 : clReleaseContext(context);
808 : SAL_WARN("opencl", "failed to set/switch opencl device");
809 0 : return false;
810 : }
811 :
812 : SAL_INFO("opencl", "Created command queue " << command_queue[i] << " for context " << context);
813 : }
814 :
815 0 : setOpenCLCmdQueuePosition(0); // Call this just to avoid the method being deleted from unused function deleter.
816 :
817 0 : releaseOpenCLEnv(&gpuEnv);
818 : OpenCLEnv env;
819 0 : env.mpOclPlatformID = platformId;
820 0 : env.mpOclContext = context;
821 0 : env.mpOclDevsID = pDeviceId;
822 :
823 0 : for (int i = 0; i < OPENCL_CMDQUEUE_SIZE; ++i)
824 0 : env.mpOclCmdQueue[i] = command_queue[i];
825 :
826 0 : initOpenCLAttr(&env);
827 :
828 : // why do we need this at all?
829 :
830 : // (Assuming the above question refers to the mpArryDevsID
831 : // initialisation below.) Because otherwise the code crashes in
832 : // initOpenCLRunEnv(). Confused? You should be.
833 :
834 0 : gpuEnv.mpArryDevsID = static_cast<cl_device_id*>(malloc( sizeof(cl_device_id) ));
835 0 : gpuEnv.mpArryDevsID[0] = pDeviceId;
836 :
837 0 : return !initOpenCLRunEnv(0);
838 : }
839 :
840 0 : void getOpenCLDeviceInfo(size_t& rDeviceId, size_t& rPlatformId)
841 : {
842 0 : int status = clewInit(OPENCL_DLL_NAME);
843 0 : if (status < 0)
844 0 : return;
845 :
846 0 : cl_device_id id = gpuEnv.mpDevID;
847 0 : findDeviceInfoFromDeviceId(id, rDeviceId, rPlatformId);
848 : }
849 :
850 0 : void setOpenCLCmdQueuePosition( int nPos )
851 : {
852 0 : if (nPos < 0 || nPos >= OPENCL_CMDQUEUE_SIZE)
853 : // Out of range. Ignore this.
854 0 : return;
855 :
856 0 : gpuEnv.mnCmdQueuePos = nPos;
857 : }
858 :
859 0 : const char* errorString(cl_int nError)
860 : {
861 : #define CASE(val) case CL_##val: return #val
862 0 : switch (nError)
863 : {
864 0 : CASE(SUCCESS);
865 0 : CASE(DEVICE_NOT_FOUND);
866 0 : CASE(DEVICE_NOT_AVAILABLE);
867 0 : CASE(COMPILER_NOT_AVAILABLE);
868 0 : CASE(MEM_OBJECT_ALLOCATION_FAILURE);
869 0 : CASE(OUT_OF_RESOURCES);
870 0 : CASE(OUT_OF_HOST_MEMORY);
871 0 : CASE(PROFILING_INFO_NOT_AVAILABLE);
872 0 : CASE(MEM_COPY_OVERLAP);
873 0 : CASE(IMAGE_FORMAT_MISMATCH);
874 0 : CASE(IMAGE_FORMAT_NOT_SUPPORTED);
875 0 : CASE(BUILD_PROGRAM_FAILURE);
876 0 : CASE(MAP_FAILURE);
877 0 : CASE(INVALID_VALUE);
878 0 : CASE(INVALID_DEVICE_TYPE);
879 0 : CASE(INVALID_PLATFORM);
880 0 : CASE(INVALID_DEVICE);
881 0 : CASE(INVALID_CONTEXT);
882 0 : CASE(INVALID_QUEUE_PROPERTIES);
883 0 : CASE(INVALID_COMMAND_QUEUE);
884 0 : CASE(INVALID_HOST_PTR);
885 0 : CASE(INVALID_MEM_OBJECT);
886 0 : CASE(INVALID_IMAGE_FORMAT_DESCRIPTOR);
887 0 : CASE(INVALID_IMAGE_SIZE);
888 0 : CASE(INVALID_SAMPLER);
889 0 : CASE(INVALID_BINARY);
890 0 : CASE(INVALID_BUILD_OPTIONS);
891 0 : CASE(INVALID_PROGRAM);
892 0 : CASE(INVALID_PROGRAM_EXECUTABLE);
893 0 : CASE(INVALID_KERNEL_NAME);
894 0 : CASE(INVALID_KERNEL_DEFINITION);
895 0 : CASE(INVALID_KERNEL);
896 0 : CASE(INVALID_ARG_INDEX);
897 0 : CASE(INVALID_ARG_VALUE);
898 0 : CASE(INVALID_ARG_SIZE);
899 0 : CASE(INVALID_KERNEL_ARGS);
900 0 : CASE(INVALID_WORK_DIMENSION);
901 0 : CASE(INVALID_WORK_GROUP_SIZE);
902 0 : CASE(INVALID_WORK_ITEM_SIZE);
903 0 : CASE(INVALID_GLOBAL_OFFSET);
904 0 : CASE(INVALID_EVENT_WAIT_LIST);
905 0 : CASE(INVALID_EVENT);
906 0 : CASE(INVALID_OPERATION);
907 0 : CASE(INVALID_GL_OBJECT);
908 0 : CASE(INVALID_BUFFER_SIZE);
909 0 : CASE(INVALID_MIP_LEVEL);
910 0 : CASE(INVALID_GLOBAL_WORK_SIZE);
911 : default:
912 0 : return "Unknown OpenCL error code";
913 : }
914 : #undef CASE
915 : }
916 :
917 156 : }
918 :
919 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|