Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : */
9 :
10 : #include <config_folders.h>
11 :
12 : #include "openclwrapper.hxx"
13 :
14 : #include <rtl/ustring.hxx>
15 : #include <rtl/strbuf.hxx>
16 : #include <rtl/digest.h>
17 : #include <rtl/bootstrap.hxx>
18 : #include <boost/scoped_array.hpp>
19 :
20 : #include "sal/config.h"
21 : #include <osl/file.hxx>
22 : #include "opencl_device.hxx"
23 :
24 : #include <stdio.h>
25 : #include <stdlib.h>
26 : #include <string.h>
27 : #include <cmath>
28 :
29 : #ifdef WIN32
30 : #include <windows.h>
31 : #define OPENCL_DLL_NAME "OpenCL.dll"
32 : #elif defined(MACOSX)
33 : #define OPENCL_DLL_NAME NULL
34 : #else
35 : #define OPENCL_DLL_NAME "libOpenCL.so"
36 : #endif
37 :
38 : #define DEVICE_NAME_LENGTH 1024
39 : #define DRIVER_VERSION_LENGTH 1024
40 : #define PLATFORM_VERSION_LENGTH 1024
41 :
42 : using namespace std;
43 :
44 : namespace sc { namespace opencl {
45 :
46 : GPUEnv OpenclDevice::gpuEnv;
47 : bool OpenclDevice::bIsInited = false;
48 :
49 : namespace {
50 :
51 0 : OString generateMD5(const void* pData, size_t length)
52 : {
53 : sal_uInt8 pBuffer[RTL_DIGEST_LENGTH_MD5];
54 : rtlDigestError aError = rtl_digest_MD5(pData, length,
55 0 : pBuffer, RTL_DIGEST_LENGTH_MD5);
56 : SAL_WARN_IF(aError != rtl_Digest_E_None, "sc", "md5 generation failed");
57 :
58 0 : OStringBuffer aBuffer;
59 0 : const char* pString = "0123456789ABCDEF";
60 0 : for(size_t i = 0; i < RTL_DIGEST_LENGTH_MD5; ++i)
61 : {
62 0 : sal_uInt8 val = pBuffer[i];
63 0 : aBuffer.append(pString[val/16]);
64 0 : aBuffer.append(pString[val%16]);
65 : }
66 0 : return aBuffer.makeStringAndClear();
67 : }
68 :
69 0 : OString getCacheFolder()
70 : {
71 0 : OUString url("${$BRAND_BASE_DIR/" LIBO_ETC_FOLDER "/" SAL_CONFIGFILE("bootstrap") ":UserInstallation}/cache/");
72 0 : rtl::Bootstrap::expandMacros(url);
73 :
74 0 : osl::Directory::create(url);
75 :
76 0 : return rtl::OUStringToOString(url, RTL_TEXTENCODING_UTF8);
77 : }
78 :
79 0 : void clearCache()
80 : {
81 : #if 0
82 : // We used to delete all files that did not end with the hash of
83 : // the static kernel source string from oclkernels.hxx. But as
84 : // those static kernels were not used for anything, it was
85 : // pointless, that hash never changed. The static kernels are now
86 : // removed, their hash is not part of the .bin file names any
87 : // more. So there is little this function can do until we come up
88 : // with some other way to figure out which cached .bin files are
89 : // "current".
90 : OUString aCacheDirURL(rtl::OStringToOUString(OpenclDevice::maCacheFolder, RTL_TEXTENCODING_UTF8));
91 : osl::Directory aCacheDir(aCacheDirURL);
92 : osl::FileBase::RC status = aCacheDir.open();
93 : if(status != osl::FileBase::E_None)
94 : return;
95 :
96 : osl::DirectoryItem aItem;
97 : while(osl::FileBase::E_None == aCacheDir.getNextItem(aItem))
98 : {
99 : osl::FileStatus aFileStatus(osl_FileStatus_Mask_FileName|osl_FileStatus_Mask_FileURL);
100 : status = aItem.getFileStatus(aFileStatus);
101 : if(status != osl::FileBase::E_None)
102 : continue;
103 :
104 : OUString aFileName = aFileStatus.getFileName();
105 : if(aFileName.endsWith(".bin"))
106 : {
107 : if ( file is in some way obsolete )
108 : {
109 : // delete the file
110 : OUString aFileUrl = aFileStatus.getFileURL();
111 : osl::File::remove(aFileUrl);
112 : }
113 : }
114 : }
115 : #endif
116 0 : }
117 :
118 : }
119 :
120 0 : OString OpenclDevice::maCacheFolder = getCacheFolder();
121 :
122 0 : void OpenclDevice::registOpenclKernel()
123 : {
124 0 : if ( !gpuEnv.mnIsUserCreated )
125 0 : memset( &gpuEnv, 0, sizeof(gpuEnv) );
126 0 : }
127 :
128 0 : void OpenclDevice::setKernelEnv( KernelEnv *envInfo )
129 : {
130 0 : envInfo->mpkContext = gpuEnv.mpContext;
131 0 : envInfo->mpkCmdQueue = gpuEnv.mpCmdQueue;
132 0 : envInfo->mpkProgram = gpuEnv.mpArryPrograms[0];
133 0 : }
134 :
135 : namespace {
136 :
137 0 : OString createFileName(cl_device_id deviceId, const char* clFileName)
138 : {
139 0 : OString fileName(clFileName);
140 0 : sal_Int32 nIndex = fileName.lastIndexOf(".cl");
141 0 : if(nIndex > 0)
142 0 : fileName = fileName.copy(0, nIndex);
143 :
144 0 : char deviceName[DEVICE_NAME_LENGTH] = {0};
145 : clGetDeviceInfo(deviceId, CL_DEVICE_NAME,
146 0 : sizeof(deviceName), deviceName, NULL);
147 :
148 0 : char driverVersion[DRIVER_VERSION_LENGTH] = {0};
149 : clGetDeviceInfo(deviceId, CL_DRIVER_VERSION,
150 0 : sizeof(driverVersion), driverVersion, NULL);
151 :
152 : cl_platform_id platformId;
153 : clGetDeviceInfo(deviceId, CL_DEVICE_PLATFORM,
154 0 : sizeof(platformId), &platformId, NULL);
155 :
156 0 : char platformVersion[PLATFORM_VERSION_LENGTH] = {0};
157 : clGetPlatformInfo(platformId, CL_PLATFORM_VERSION, sizeof(platformVersion),
158 0 : platformVersion, NULL);
159 :
160 : // create hash for deviceName + driver version + platform version
161 0 : OString aString = OString(deviceName) + driverVersion + platformVersion;
162 0 : OString aHash = generateMD5(aString.getStr(), aString.getLength());
163 :
164 0 : return OpenclDevice::maCacheFolder + fileName + "-" +
165 0 : aHash + ".bin";
166 : }
167 :
168 : }
169 :
170 0 : std::vector<boost::shared_ptr<osl::File> > OpenclDevice::binaryGenerated( const char * clFileName, cl_context context )
171 : {
172 0 : size_t numDevices=0;
173 :
174 0 : std::vector<boost::shared_ptr<osl::File> > aGeneratedFiles;
175 : cl_int clStatus = clGetContextInfo( context, CL_CONTEXT_DEVICES,
176 0 : 0, NULL, &numDevices );
177 0 : numDevices /= sizeof(numDevices);
178 :
179 0 : if(clStatus != CL_SUCCESS)
180 0 : return aGeneratedFiles;
181 :
182 :
183 : // grab the handles to all of the devices in the context.
184 0 : boost::scoped_array<cl_device_id> mpArryDevsID(new cl_device_id[numDevices]);
185 : clStatus = clGetContextInfo( context, CL_CONTEXT_DEVICES,
186 0 : sizeof( cl_device_id ) * numDevices, mpArryDevsID.get(), NULL );
187 :
188 0 : if(clStatus != CL_SUCCESS)
189 0 : return aGeneratedFiles;
190 :
191 0 : for ( size_t i = 0; i < numDevices; i++ )
192 : {
193 0 : if ( mpArryDevsID[i] != 0 )
194 : {
195 0 : OString fileName = createFileName(gpuEnv.mpArryDevsID[i], clFileName);
196 0 : osl::File* pNewFile = new osl::File(rtl::OStringToOUString(fileName, RTL_TEXTENCODING_UTF8));
197 0 : if(pNewFile->open(osl_File_OpenFlag_Read) == osl::FileBase::E_None)
198 : {
199 0 : aGeneratedFiles.push_back(boost::shared_ptr<osl::File>(pNewFile));
200 : SAL_INFO("sc.opencl", "Opening binary file '" << fileName << "' for reading: success");
201 : }
202 : else
203 : {
204 : SAL_INFO("sc.opencl", "Opening binary file '" << fileName << "' for reading: FAIL");
205 0 : delete pNewFile;
206 0 : break;
207 0 : }
208 : }
209 : }
210 :
211 0 : return aGeneratedFiles;
212 : }
213 :
214 0 : bool OpenclDevice::writeBinaryToFile( const OString& rFileName, const char* binary, size_t numBytes )
215 : {
216 0 : clearCache();
217 0 : osl::File file(rtl::OStringToOUString(rFileName, RTL_TEXTENCODING_UTF8));
218 : osl::FileBase::RC status = file.open(
219 0 : osl_File_OpenFlag_Write | osl_File_OpenFlag_Create );
220 :
221 0 : if(status != osl::FileBase::E_None)
222 0 : return false;
223 :
224 0 : sal_uInt64 nBytesWritten = 0;
225 0 : file.write( binary, numBytes, nBytesWritten );
226 :
227 : assert(numBytes == nBytesWritten);
228 :
229 0 : return true;
230 : }
231 :
232 0 : bool OpenclDevice::generatBinFromKernelSource( cl_program program, const char * clFileName )
233 : {
234 : cl_uint numDevices;
235 :
236 : cl_int clStatus = clGetProgramInfo( program, CL_PROGRAM_NUM_DEVICES,
237 0 : sizeof(numDevices), &numDevices, NULL );
238 0 : CHECK_OPENCL( clStatus, "clGetProgramInfo" );
239 :
240 0 : std::vector<cl_device_id> mpArryDevsID(numDevices);
241 : /* grab the handles to all of the devices in the program. */
242 : clStatus = clGetProgramInfo( program, CL_PROGRAM_DEVICES,
243 0 : sizeof(cl_device_id) * numDevices, &mpArryDevsID[0], NULL );
244 0 : CHECK_OPENCL( clStatus, "clGetProgramInfo" );
245 :
246 : /* figure out the sizes of each of the binaries. */
247 0 : std::vector<size_t> binarySizes(numDevices);
248 :
249 : clStatus = clGetProgramInfo( program, CL_PROGRAM_BINARY_SIZES,
250 0 : sizeof(size_t) * numDevices, &binarySizes[0], NULL );
251 0 : CHECK_OPENCL( clStatus, "clGetProgramInfo" );
252 :
253 : /* copy over all of the generated binaries. */
254 0 : boost::scoped_array<char*> binaries(new char*[numDevices]);
255 :
256 0 : for ( size_t i = 0; i < numDevices; i++ )
257 : {
258 0 : if ( binarySizes[i] != 0 )
259 : {
260 0 : binaries[i] = new char[binarySizes[i]];
261 : }
262 : else
263 : {
264 0 : binaries[i] = NULL;
265 : }
266 : }
267 :
268 : clStatus = clGetProgramInfo( program, CL_PROGRAM_BINARIES,
269 0 : sizeof(char *) * numDevices, binaries.get(), NULL );
270 0 : CHECK_OPENCL(clStatus,"clGetProgramInfo");
271 :
272 : /* dump out each binary into its own separate file. */
273 0 : for ( size_t i = 0; i < numDevices; i++ )
274 : {
275 :
276 0 : if ( binarySizes[i] != 0 )
277 : {
278 0 : OString fileName = createFileName(mpArryDevsID[i], clFileName);
279 0 : if ( !writeBinaryToFile( fileName,
280 0 : binaries[i], binarySizes[i] ) )
281 : SAL_INFO("sc.opencl", "Writing binary file '" << fileName << "': FAIL");
282 : else
283 0 : SAL_INFO("sc.opencl", "Writing binary file '" << fileName << "': success");
284 : }
285 : }
286 :
287 : // Release all resouces and memory
288 0 : for ( size_t i = 0; i < numDevices; i++ )
289 : {
290 0 : delete[] binaries[i];
291 : }
292 :
293 0 : return true;
294 : }
295 :
296 0 : bool OpenclDevice::initOpenclAttr( OpenCLEnv * env )
297 : {
298 0 : if ( gpuEnv.mnIsUserCreated )
299 0 : return true;
300 :
301 0 : gpuEnv.mpContext = env->mpOclContext;
302 0 : gpuEnv.mpPlatformID = env->mpOclPlatformID;
303 0 : gpuEnv.mpDevID = env->mpOclDevsID;
304 0 : gpuEnv.mpCmdQueue = env->mpOclCmdQueue;
305 :
306 0 : gpuEnv.mnIsUserCreated = 1;
307 :
308 0 : return false;
309 : }
310 :
311 0 : void OpenclDevice::releaseOpenclEnv( GPUEnv *gpuInfo )
312 : {
313 0 : if ( !bIsInited )
314 : {
315 0 : return;
316 : }
317 :
318 0 : if ( gpuEnv.mpCmdQueue )
319 : {
320 0 : clReleaseCommandQueue( gpuEnv.mpCmdQueue );
321 0 : gpuEnv.mpCmdQueue = NULL;
322 : }
323 0 : if ( gpuEnv.mpContext )
324 : {
325 0 : clReleaseContext( gpuEnv.mpContext );
326 0 : gpuEnv.mpContext = NULL;
327 : }
328 0 : bIsInited = false;
329 0 : gpuInfo->mnIsUserCreated = 0;
330 0 : free( gpuInfo->mpArryDevsID );
331 :
332 0 : return;
333 : }
334 :
335 : namespace {
336 :
337 0 : bool buildProgram(const char* buildOption, GPUEnv* gpuInfo, int idx)
338 : {
339 : cl_int clStatus;
340 : //char options[512];
341 : // create a cl program executable for all the devices specified
342 0 : if (!gpuInfo->mnIsUserCreated)
343 : {
344 : clStatus = clBuildProgram(gpuInfo->mpArryPrograms[idx], 1, gpuInfo->mpArryDevsID,
345 0 : buildOption, NULL, NULL);
346 : }
347 : else
348 : {
349 : clStatus = clBuildProgram(gpuInfo->mpArryPrograms[idx], 1, &(gpuInfo->mpDevID),
350 0 : buildOption, NULL, NULL);
351 : }
352 :
353 0 : if ( clStatus != CL_SUCCESS )
354 : {
355 : size_t length;
356 0 : if ( !gpuInfo->mnIsUserCreated )
357 : {
358 0 : clStatus = clGetProgramBuildInfo( gpuInfo->mpArryPrograms[idx], gpuInfo->mpArryDevsID[0],
359 0 : CL_PROGRAM_BUILD_LOG, 0, NULL, &length );
360 : }
361 : else
362 : {
363 : clStatus = clGetProgramBuildInfo( gpuInfo->mpArryPrograms[idx], gpuInfo->mpDevID,
364 0 : CL_PROGRAM_BUILD_LOG, 0, NULL, &length);
365 : }
366 0 : if ( clStatus != CL_SUCCESS )
367 : {
368 0 : return false;
369 : }
370 :
371 0 : boost::scoped_array<char> buildLog(new char[length]);
372 0 : if ( !gpuInfo->mnIsUserCreated )
373 : {
374 0 : clStatus = clGetProgramBuildInfo( gpuInfo->mpArryPrograms[idx], gpuInfo->mpArryDevsID[0],
375 0 : CL_PROGRAM_BUILD_LOG, length, buildLog.get(), &length );
376 : }
377 : else
378 : {
379 : clStatus = clGetProgramBuildInfo( gpuInfo->mpArryPrograms[idx], gpuInfo->mpDevID,
380 0 : CL_PROGRAM_BUILD_LOG, length, buildLog.get(), &length );
381 : }
382 0 : if ( clStatus != CL_SUCCESS )
383 : {
384 0 : return false;
385 : }
386 :
387 0 : OString aBuildLogFileURL = OpenclDevice::maCacheFolder + "kernel-build.log";
388 0 : osl::File aBuildLogFile(rtl::OStringToOUString(aBuildLogFileURL, RTL_TEXTENCODING_UTF8));
389 : osl::FileBase::RC status = aBuildLogFile.open(
390 0 : osl_File_OpenFlag_Write | osl_File_OpenFlag_Create );
391 :
392 0 : if(status != osl::FileBase::E_None)
393 0 : return false;
394 :
395 0 : sal_uInt64 nBytesWritten = 0;
396 0 : aBuildLogFile.write( buildLog.get(), length, nBytesWritten );
397 :
398 0 : return false;
399 : }
400 :
401 0 : return true;
402 : }
403 :
404 : }
405 :
406 0 : bool OpenclDevice::buildProgramFromBinary(const char* buildOption, GPUEnv* gpuInfo, const char* filename, int idx)
407 : {
408 : size_t numDevices;
409 : cl_int clStatus = clGetContextInfo( gpuInfo->mpContext, CL_CONTEXT_DEVICES,
410 0 : 0, NULL, &numDevices );
411 0 : numDevices /= sizeof(numDevices);
412 0 : CHECK_OPENCL( clStatus, "clGetContextInfo" );
413 :
414 : std::vector<boost::shared_ptr<osl::File> > aGeneratedFiles = binaryGenerated(
415 0 : filename, gpuInfo->mpContext );
416 :
417 0 : if (aGeneratedFiles.size() == numDevices)
418 : {
419 0 : boost::scoped_array<size_t> length(new size_t[numDevices]);
420 0 : boost::scoped_array<unsigned char*> pBinary(new unsigned char*[numDevices]);
421 0 : for(size_t i = 0; i < numDevices; ++i)
422 : {
423 : sal_uInt64 nSize;
424 0 : aGeneratedFiles[i]->getSize(nSize);
425 0 : unsigned char* binary = new unsigned char[nSize];
426 : sal_uInt64 nBytesRead;
427 0 : aGeneratedFiles[i]->read(binary, nSize, nBytesRead);
428 0 : if(nSize != nBytesRead)
429 : assert(false);
430 :
431 0 : length[i] = nBytesRead;
432 :
433 0 : pBinary[i] = binary;
434 : }
435 :
436 : // grab the handles to all of the devices in the context.
437 0 : boost::scoped_array<cl_device_id> mpArryDevsID(new cl_device_id[numDevices]);
438 : clStatus = clGetContextInfo( gpuInfo->mpContext, CL_CONTEXT_DEVICES,
439 0 : sizeof( cl_device_id ) * numDevices, mpArryDevsID.get(), NULL );
440 :
441 0 : if(clStatus != CL_SUCCESS)
442 : {
443 0 : for(size_t i = 0; i < numDevices; ++i)
444 : {
445 0 : delete[] pBinary[i];
446 : }
447 0 : return false;
448 : }
449 :
450 : cl_int binary_status;
451 :
452 : gpuInfo->mpArryPrograms[idx] = clCreateProgramWithBinary( gpuInfo->mpContext,numDevices,
453 0 : mpArryDevsID.get(), length.get(), (const unsigned char**) pBinary.get(),
454 0 : &binary_status, &clStatus );
455 0 : if(clStatus != CL_SUCCESS)
456 : {
457 : // something went wrong, fall back to compiling from source
458 0 : return false;
459 : }
460 0 : for(size_t i = 0; i < numDevices; ++i)
461 : {
462 0 : delete[] pBinary[i];
463 0 : }
464 : }
465 :
466 0 : if ( !gpuInfo->mpArryPrograms[idx] )
467 : {
468 0 : return false;
469 : }
470 0 : return buildProgram(buildOption, gpuInfo, idx);
471 : }
472 :
473 0 : bool OpenclDevice::initOpenclRunEnv( int argc )
474 : {
475 : if ( MAX_CLKERNEL_NUM <= 0 )
476 : {
477 : return true;
478 : }
479 0 : if ( ( argc > MAX_CLFILE_NUM ) || ( argc < 0 ) )
480 0 : return true;
481 :
482 0 : if ( !bIsInited )
483 : {
484 0 : registOpenclKernel();
485 : //initialize devices, context, command_queue
486 0 : bool status = initOpenclRunEnv( &gpuEnv );
487 0 : if ( status )
488 : {
489 0 : return true;
490 : }
491 : //initialize program, kernelName, kernelCount
492 0 : if( getenv( "SC_FLOAT" ) )
493 : {
494 0 : gpuEnv.mnKhrFp64Flag = false;
495 0 : gpuEnv.mnAmdFp64Flag = false;
496 : }
497 0 : if( gpuEnv.mnKhrFp64Flag )
498 : {
499 : SAL_INFO("sc.opencl", "Use Khr double");
500 : }
501 0 : else if( gpuEnv.mnAmdFp64Flag )
502 : {
503 : SAL_INFO("sc.opencl", "Use AMD double type");
504 : }
505 : else
506 : {
507 : SAL_INFO("sc.opencl", "USE float type");
508 : }
509 0 : bIsInited = true;
510 : }
511 0 : return false;
512 : }
513 :
514 : namespace {
515 :
516 0 : void checkDeviceForDoubleSupport(cl_device_id deviceId, bool& bKhrFp64, bool& bAmdFp64)
517 : {
518 0 : bKhrFp64 = false;
519 0 : bAmdFp64 = false;
520 :
521 : // Check device extensions for double type
522 0 : size_t aDevExtInfoSize = 0;
523 :
524 0 : cl_uint clStatus = clGetDeviceInfo( deviceId, CL_DEVICE_EXTENSIONS, 0, NULL, &aDevExtInfoSize );
525 0 : if( clStatus != CL_SUCCESS )
526 0 : return;
527 :
528 0 : boost::scoped_array<char> pExtInfo(new char[aDevExtInfoSize]);
529 :
530 : clStatus = clGetDeviceInfo( deviceId, CL_DEVICE_EXTENSIONS,
531 0 : sizeof(char) * aDevExtInfoSize, pExtInfo.get(), NULL);
532 :
533 0 : if( clStatus != CL_SUCCESS )
534 0 : return;
535 :
536 0 : if ( strstr( pExtInfo.get(), "cl_khr_fp64" ) )
537 : {
538 0 : bKhrFp64 = true;
539 : }
540 : else
541 : {
542 : // Check if cl_amd_fp64 extension is supported
543 0 : if ( strstr( pExtInfo.get(), "cl_amd_fp64" ) )
544 0 : bAmdFp64 = true;
545 0 : }
546 : }
547 :
548 : }
549 :
550 0 : bool OpenclDevice::initOpenclRunEnv( GPUEnv *gpuInfo )
551 : {
552 : size_t length;
553 : cl_int clStatus;
554 : cl_uint numPlatforms, numDevices;
555 : cl_platform_id *platforms;
556 :
557 : // Have a look at the available platforms.
558 :
559 0 : if ( !gpuInfo->mnIsUserCreated )
560 : {
561 0 : clStatus = clGetPlatformIDs( 0, NULL, &numPlatforms );
562 0 : CHECK_OPENCL(clStatus, "clGetPlatformIDs");
563 0 : gpuInfo->mpPlatformID = NULL;
564 :
565 0 : if ( 0 < numPlatforms )
566 : {
567 : char platformName[256];
568 0 : platforms = (cl_platform_id*) malloc( numPlatforms * sizeof( cl_platform_id ) );
569 0 : if (!platforms)
570 : {
571 0 : return true;
572 : }
573 0 : clStatus = clGetPlatformIDs( numPlatforms, platforms, NULL );
574 0 : CHECK_OPENCL(clStatus, "clGetPlatformIDs");
575 :
576 0 : for ( unsigned int i = 0; i < numPlatforms; i++ )
577 : {
578 0 : clStatus = clGetPlatformInfo( platforms[i], CL_PLATFORM_VENDOR,
579 0 : sizeof( platformName ), platformName, NULL );
580 :
581 0 : if ( clStatus != CL_SUCCESS )
582 : {
583 0 : break;
584 : }
585 0 : gpuInfo->mpPlatformID = platforms[i];
586 :
587 : //if (!strcmp(platformName, "Intel(R) Coporation"))
588 : //if( !strcmp( platformName, "Advanced Micro Devices, Inc." ))
589 : {
590 0 : gpuInfo->mpPlatformID = platforms[i];
591 0 : if ( getenv("SC_OPENCLCPU") )
592 : {
593 : clStatus = clGetDeviceIDs(gpuInfo->mpPlatformID, // platform
594 : CL_DEVICE_TYPE_CPU, // device_type for CPU device
595 : 0, // num_entries
596 : NULL, // devices
597 0 : &numDevices);
598 : }
599 : else
600 : {
601 : clStatus = clGetDeviceIDs(gpuInfo->mpPlatformID, // platform
602 : CL_DEVICE_TYPE_GPU, // device_type for GPU device
603 : 0, // num_entries
604 : NULL, // devices
605 0 : &numDevices);
606 : }
607 0 : if ( clStatus != CL_SUCCESS )
608 0 : continue;
609 :
610 0 : if ( numDevices )
611 0 : break;
612 : }
613 : }
614 0 : free( platforms );
615 0 : if ( clStatus != CL_SUCCESS )
616 0 : return true;
617 : }
618 0 : if ( NULL == gpuInfo->mpPlatformID )
619 0 : return true;
620 :
621 : // Use available platform.
622 : cl_context_properties cps[3];
623 0 : cps[0] = CL_CONTEXT_PLATFORM;
624 0 : cps[1] = (cl_context_properties) gpuInfo->mpPlatformID;
625 0 : cps[2] = 0;
626 : // Set device type for OpenCL
627 0 : if ( getenv("SC_OPENCLCPU") )
628 : {
629 0 : gpuInfo->mDevType = CL_DEVICE_TYPE_CPU;
630 : }
631 : else
632 : {
633 0 : gpuInfo->mDevType = CL_DEVICE_TYPE_GPU;
634 : }
635 0 : gpuInfo->mpContext = clCreateContextFromType( cps, gpuInfo->mDevType, NULL, NULL, &clStatus );
636 :
637 0 : if ( ( gpuInfo->mpContext == (cl_context) NULL) || ( clStatus != CL_SUCCESS ) )
638 : {
639 0 : gpuInfo->mDevType = CL_DEVICE_TYPE_CPU;
640 0 : gpuInfo->mpContext = clCreateContextFromType( cps, gpuInfo->mDevType, NULL, NULL, &clStatus );
641 : }
642 0 : if ( ( gpuInfo->mpContext == (cl_context) NULL) || ( clStatus != CL_SUCCESS ) )
643 : {
644 0 : gpuInfo->mDevType = CL_DEVICE_TYPE_DEFAULT;
645 0 : gpuInfo->mpContext = clCreateContextFromType( cps, gpuInfo->mDevType, NULL, NULL, &clStatus );
646 : }
647 0 : if ( ( gpuInfo->mpContext == (cl_context) NULL) || ( clStatus != CL_SUCCESS ) )
648 0 : return true;
649 : // Detect OpenCL devices.
650 : // First, get the size of device list data
651 0 : clStatus = clGetContextInfo( gpuInfo->mpContext, CL_CONTEXT_DEVICES, 0, NULL, &length );
652 0 : if ( ( clStatus != CL_SUCCESS ) || ( length == 0 ) )
653 0 : return true;
654 : // Now allocate memory for device list based on the size we got earlier
655 0 : gpuInfo->mpArryDevsID = (cl_device_id*) malloc( length );
656 0 : if ( gpuInfo->mpArryDevsID == (cl_device_id*) NULL )
657 0 : return true;
658 : // Now, get the device list data
659 : clStatus = clGetContextInfo( gpuInfo->mpContext, CL_CONTEXT_DEVICES, length,
660 0 : gpuInfo->mpArryDevsID, NULL );
661 0 : CHECK_OPENCL(clStatus, "clGetContextInfo");
662 :
663 : // Create OpenCL command queue.
664 0 : gpuInfo->mpCmdQueue = clCreateCommandQueue( gpuInfo->mpContext, gpuInfo->mpArryDevsID[0], 0, &clStatus );
665 :
666 0 : CHECK_OPENCL(clStatus, "clCreateCommandQueue");
667 : }
668 0 : bool bKhrFp64 = false;
669 0 : bool bAmdFp64 = false;
670 :
671 0 : checkDeviceForDoubleSupport(gpuInfo->mpArryDevsID[0], bKhrFp64, bAmdFp64);
672 :
673 0 : gpuInfo->mnKhrFp64Flag = bKhrFp64;
674 0 : gpuInfo->mnAmdFp64Flag = bAmdFp64;
675 :
676 0 : return false;
677 : }
678 :
679 : namespace {
680 :
681 : // based on crashes and hanging during kernel compilation
682 0 : bool checkForKnownBadCompilers(const OpenclDeviceInfo& rInfo)
683 : {
684 :
685 : struct {
686 : const char* pVendorName; const char* pDriverVersion;
687 : } aBadOpenCLCompilers[] = {
688 : { "Intel(R) Corporation", "9.17.10.2884" }
689 0 : };
690 :
691 0 : for(size_t i = 0; i < SAL_N_ELEMENTS(aBadOpenCLCompilers); ++i)
692 : {
693 0 : if(rInfo.maVendor == OUString::createFromAscii(aBadOpenCLCompilers[i].pVendorName) &&
694 0 : rInfo.maDriver == OUString::createFromAscii(aBadOpenCLCompilers[i].pDriverVersion))
695 0 : return true;
696 : }
697 :
698 0 : return false;
699 : }
700 :
701 0 : void createDeviceInfo(cl_device_id aDeviceId, OpenclPlatformInfo& rPlatformInfo)
702 : {
703 0 : OpenclDeviceInfo aDeviceInfo;
704 0 : aDeviceInfo.device = aDeviceId;
705 :
706 : char pName[DEVICE_NAME_LENGTH];
707 0 : cl_int nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_NAME, DEVICE_NAME_LENGTH, pName, NULL);
708 0 : if(nState != CL_SUCCESS)
709 0 : return;
710 :
711 0 : aDeviceInfo.maName = OUString::createFromAscii(pName);
712 :
713 : char pVendor[DEVICE_NAME_LENGTH];
714 0 : nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_VENDOR, DEVICE_NAME_LENGTH, pVendor, NULL);
715 0 : if(nState != CL_SUCCESS)
716 0 : return;
717 :
718 0 : aDeviceInfo.maVendor = OUString::createFromAscii(pVendor);
719 :
720 : cl_ulong nMemSize;
721 0 : nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(nMemSize), &nMemSize, NULL);
722 0 : if(nState != CL_SUCCESS)
723 0 : return;
724 :
725 0 : aDeviceInfo.mnMemory = nMemSize;
726 :
727 : cl_uint nClockFrequency;
728 0 : nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(nClockFrequency), &nClockFrequency, NULL);
729 0 : if(nState != CL_SUCCESS)
730 0 : return;
731 :
732 0 : aDeviceInfo.mnFrequency = nClockFrequency;
733 :
734 : cl_uint nComputeUnits;
735 0 : nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(nComputeUnits), &nComputeUnits, NULL);
736 0 : if(nState != CL_SUCCESS)
737 0 : return;
738 :
739 : char pDriver[DEVICE_NAME_LENGTH];
740 0 : nState = clGetDeviceInfo(aDeviceId, CL_DRIVER_VERSION, DEVICE_NAME_LENGTH, pDriver, NULL);
741 :
742 0 : if(nState != CL_SUCCESS)
743 0 : return;
744 :
745 0 : aDeviceInfo.maDriver = OUString::createFromAscii(pDriver);
746 :
747 0 : bool bKhrFp64 = false;
748 0 : bool bAmdFp64 = false;
749 0 : checkDeviceForDoubleSupport(aDeviceId, bKhrFp64, bAmdFp64);
750 :
751 : // only list devices that support double
752 0 : if(!bKhrFp64 && !bAmdFp64)
753 0 : return;
754 :
755 0 : aDeviceInfo.mnComputeUnits = nComputeUnits;
756 :
757 0 : if(!checkForKnownBadCompilers(aDeviceInfo))
758 0 : rPlatformInfo.maDevices.push_back(aDeviceInfo);
759 : }
760 :
761 0 : bool createPlatformInfo(cl_platform_id nPlatformId, OpenclPlatformInfo& rPlatformInfo)
762 : {
763 0 : rPlatformInfo.platform = nPlatformId;
764 : char pName[64];
765 : cl_int nState = clGetPlatformInfo(nPlatformId, CL_PLATFORM_NAME, 64,
766 0 : pName, NULL);
767 0 : if(nState != CL_SUCCESS)
768 0 : return false;
769 0 : rPlatformInfo.maName = OUString::createFromAscii(pName);
770 :
771 : char pVendor[64];
772 : nState = clGetPlatformInfo(nPlatformId, CL_PLATFORM_VENDOR, 64,
773 0 : pVendor, NULL);
774 0 : if(nState != CL_SUCCESS)
775 0 : return false;
776 :
777 0 : rPlatformInfo.maVendor = OUString::createFromAscii(pName);
778 :
779 : cl_uint nDevices;
780 0 : nState = clGetDeviceIDs(nPlatformId, CL_DEVICE_TYPE_ALL, 0, NULL, &nDevices);
781 0 : if(nState != CL_SUCCESS)
782 0 : return false;
783 :
784 : // memory leak that does not matter
785 : // memory is stored in static variable that lives through the whole program
786 0 : cl_device_id* pDevices = new cl_device_id[nDevices];
787 0 : nState = clGetDeviceIDs(nPlatformId, CL_DEVICE_TYPE_ALL, nDevices, pDevices, NULL);
788 0 : if(nState != CL_SUCCESS)
789 0 : return false;
790 :
791 0 : for(size_t i = 0; i < nDevices; ++i)
792 : {
793 0 : createDeviceInfo(pDevices[i], rPlatformInfo);
794 : }
795 :
796 0 : return true;
797 : }
798 :
799 : }
800 :
801 0 : size_t getOpenCLPlatformCount()
802 : {
803 0 : int status = clewInit(OPENCL_DLL_NAME);
804 0 : if (status < 0)
805 0 : return 0;
806 :
807 : cl_uint nPlatforms;
808 0 : cl_int nState = clGetPlatformIDs(0, NULL, &nPlatforms);
809 :
810 0 : if (nState != CL_SUCCESS)
811 0 : return 0;
812 :
813 0 : return nPlatforms;
814 : }
815 :
816 0 : const std::vector<OpenclPlatformInfo>& fillOpenCLInfo()
817 : {
818 0 : static std::vector<OpenclPlatformInfo> aPlatforms;
819 0 : if(!aPlatforms.empty())
820 0 : return aPlatforms;
821 :
822 0 : int status = clewInit(OPENCL_DLL_NAME);
823 0 : if (status < 0)
824 0 : return aPlatforms;
825 :
826 : cl_uint nPlatforms;
827 0 : cl_int nState = clGetPlatformIDs(0, NULL, &nPlatforms);
828 :
829 0 : if(nState != CL_SUCCESS)
830 0 : return aPlatforms;
831 :
832 : // memory leak that does not matter,
833 : // memory is stored in static instance aPlatforms
834 0 : cl_platform_id* pPlatforms = new cl_platform_id[nPlatforms];
835 0 : nState = clGetPlatformIDs(nPlatforms, pPlatforms, NULL);
836 :
837 0 : if(nState != CL_SUCCESS)
838 0 : return aPlatforms;
839 :
840 0 : for(size_t i = 0; i < nPlatforms; ++i)
841 : {
842 0 : OpenclPlatformInfo aPlatformInfo;
843 0 : if(createPlatformInfo(pPlatforms[i], aPlatformInfo))
844 0 : aPlatforms.push_back(aPlatformInfo);
845 0 : }
846 :
847 0 : return aPlatforms;
848 : }
849 :
850 : namespace {
851 :
852 0 : cl_device_id findDeviceIdByDeviceString(const OUString& rString, const std::vector<OpenclPlatformInfo>& rPlatforms)
853 : {
854 0 : std::vector<OpenclPlatformInfo>::const_iterator it = rPlatforms.begin(), itEnd = rPlatforms.end();
855 0 : for(; it != itEnd; ++it)
856 : {
857 0 : std::vector<OpenclDeviceInfo>::const_iterator itr = it->maDevices.begin(), itrEnd = it->maDevices.end();
858 0 : for(; itr != itrEnd; ++itr)
859 : {
860 0 : OUString aDeviceId = it->maVendor + " " + itr->maName;
861 0 : if(rString == aDeviceId)
862 : {
863 0 : return static_cast<cl_device_id>(itr->device);
864 : }
865 0 : }
866 : }
867 :
868 0 : return NULL;
869 : }
870 :
871 0 : void findDeviceInfoFromDeviceId(cl_device_id aDeviceId, size_t& rDeviceId, size_t& rPlatformId)
872 : {
873 : cl_platform_id platformId;
874 : cl_int nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_PLATFORM,
875 0 : sizeof(platformId), &platformId, NULL);
876 :
877 0 : if(nState != CL_SUCCESS)
878 0 : return;
879 :
880 0 : const std::vector<OpenclPlatformInfo>& rPlatforms = fillOpenCLInfo();
881 0 : for(size_t i = 0; i < rPlatforms.size(); ++i)
882 : {
883 0 : cl_platform_id platId = static_cast<cl_platform_id>(rPlatforms[i].platform);
884 0 : if(platId != platformId)
885 0 : continue;
886 :
887 0 : for(size_t j = 0; j < rPlatforms[i].maDevices.size(); ++j)
888 : {
889 0 : cl_device_id id = static_cast<cl_device_id>(rPlatforms[i].maDevices[j].device);
890 0 : if(id == aDeviceId)
891 : {
892 0 : rDeviceId = j;
893 0 : rPlatformId = i;
894 0 : return;
895 : }
896 : }
897 : }
898 : }
899 :
900 : }
901 :
902 0 : bool switchOpenclDevice(const OUString* pDevice, bool bAutoSelect, bool bForceEvaluation)
903 : {
904 0 : if(fillOpenCLInfo().empty())
905 0 : return false;
906 :
907 0 : cl_device_id pDeviceId = NULL;
908 0 : if(pDevice)
909 0 : pDeviceId = findDeviceIdByDeviceString(*pDevice, fillOpenCLInfo());
910 :
911 0 : if(!pDeviceId || bAutoSelect)
912 : {
913 0 : int status = clewInit(OPENCL_DLL_NAME);
914 0 : if (status < 0)
915 0 : return false;
916 :
917 0 : OUString url("${$BRAND_BASE_DIR/" LIBO_ETC_FOLDER "/" SAL_CONFIGFILE("bootstrap") ":UserInstallation}/cache/");
918 0 : rtl::Bootstrap::expandMacros(url);
919 0 : OUString path;
920 0 : osl::FileBase::getSystemPathFromFileURL(url,path);
921 0 : OString dsFileName = rtl::OUStringToOString(path, RTL_TEXTENCODING_UTF8);
922 0 : ds_device pSelectedDevice = sc::OpenCLDevice::getDeviceSelection(dsFileName.getStr(), bForceEvaluation);
923 0 : pDeviceId = pSelectedDevice.oclDeviceID;
924 :
925 : }
926 :
927 0 : if(OpenclDevice::gpuEnv.mpDevID == pDeviceId)
928 : {
929 : // we don't need to change anything
930 : // still the same device
931 0 : return pDeviceId != NULL;
932 : }
933 :
934 : cl_platform_id platformId;
935 : cl_int nState = clGetDeviceInfo(pDeviceId, CL_DEVICE_PLATFORM,
936 0 : sizeof(platformId), &platformId, NULL);
937 :
938 : cl_context_properties cps[3];
939 0 : cps[0] = CL_CONTEXT_PLATFORM;
940 0 : cps[1] = (cl_context_properties) platformId;
941 0 : cps[2] = 0;
942 0 : cl_context context = clCreateContext( cps, 1, &pDeviceId, NULL, NULL, &nState );
943 :
944 0 : if(nState != CL_SUCCESS || context == NULL)
945 : {
946 0 : if(context != NULL)
947 0 : clReleaseContext(context);
948 :
949 : SAL_WARN("sc", "failed to set/switch opencl device");
950 0 : return false;
951 : }
952 :
953 : cl_command_queue command_queue = clCreateCommandQueue(
954 0 : context, pDeviceId, 0, &nState);
955 :
956 0 : if(command_queue == NULL || nState != CL_SUCCESS)
957 : {
958 0 : if(command_queue != NULL)
959 0 : clReleaseCommandQueue(command_queue);
960 :
961 0 : clReleaseContext(context);
962 : SAL_WARN("sc", "failed to set/switch opencl device");
963 0 : return false;
964 : }
965 :
966 0 : OpenclDevice::releaseOpenclEnv(&OpenclDevice::gpuEnv);
967 : OpenCLEnv env;
968 0 : env.mpOclPlatformID = platformId;
969 0 : env.mpOclContext = context;
970 0 : env.mpOclDevsID = pDeviceId;
971 0 : env.mpOclCmdQueue = command_queue;
972 0 : OpenclDevice::initOpenclAttr(&env);
973 :
974 : // why do we need this at all?
975 0 : OpenclDevice::gpuEnv.mpArryDevsID = (cl_device_id*) malloc( sizeof(cl_device_id) );
976 0 : OpenclDevice::gpuEnv.mpArryDevsID[0] = pDeviceId;
977 0 : return !OpenclDevice::initOpenclRunEnv(0);
978 : }
979 :
980 0 : void getOpenCLDeviceInfo(size_t& rDeviceId, size_t& rPlatformId)
981 : {
982 0 : int status = clewInit(OPENCL_DLL_NAME);
983 0 : if (status < 0)
984 0 : return;
985 :
986 0 : cl_device_id id = OpenclDevice::gpuEnv.mpDevID;
987 0 : findDeviceInfoFromDeviceId(id, rDeviceId, rPlatformId);
988 : }
989 :
990 0 : }}
991 :
992 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|