Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : */
9 :
10 : #include <config_folders.h>
11 :
12 : #include "openclwrapper.hxx"
13 :
14 : #include <rtl/ustring.hxx>
15 : #include <rtl/strbuf.hxx>
16 : #include <rtl/digest.h>
17 : #include <rtl/bootstrap.hxx>
18 : #include <boost/scoped_array.hpp>
19 :
20 : #include <sal/config.h>
21 : #include <osl/file.hxx>
22 : #include "opencl_device.hxx"
23 :
24 : #include <stdio.h>
25 : #include <stdlib.h>
26 : #include <string.h>
27 : #include <cmath>
28 :
29 : #ifdef _WIN32
30 : #include <prewin.h>
31 : #include <postwin.h>
32 : #define OPENCL_DLL_NAME "OpenCL.dll"
33 : #elif defined(MACOSX)
34 : #define OPENCL_DLL_NAME NULL
35 : #else
36 : #define OPENCL_DLL_NAME "libOpenCL.so"
37 : #endif
38 :
39 : #define DEVICE_NAME_LENGTH 1024
40 : #define DRIVER_VERSION_LENGTH 1024
41 : #define PLATFORM_VERSION_LENGTH 1024
42 :
43 : using namespace std;
44 :
45 : namespace sc { namespace opencl {
46 :
47 : GPUEnv OpenCLDevice::gpuEnv;
48 : bool OpenCLDevice::bIsInited = false;
49 :
50 : namespace {
51 :
52 0 : OString generateMD5(const void* pData, size_t length)
53 : {
54 : sal_uInt8 pBuffer[RTL_DIGEST_LENGTH_MD5];
55 : rtlDigestError aError = rtl_digest_MD5(pData, length,
56 0 : pBuffer, RTL_DIGEST_LENGTH_MD5);
57 : SAL_WARN_IF(aError != rtl_Digest_E_None, "sc", "md5 generation failed");
58 :
59 0 : OStringBuffer aBuffer;
60 0 : const char* pString = "0123456789ABCDEF";
61 0 : for(size_t i = 0; i < RTL_DIGEST_LENGTH_MD5; ++i)
62 : {
63 0 : sal_uInt8 val = pBuffer[i];
64 0 : aBuffer.append(pString[val/16]);
65 0 : aBuffer.append(pString[val%16]);
66 : }
67 0 : return aBuffer.makeStringAndClear();
68 : }
69 :
70 76 : OString getCacheFolder()
71 : {
72 76 : OUString url("${$BRAND_BASE_DIR/" LIBO_ETC_FOLDER "/" SAL_CONFIGFILE("bootstrap") ":UserInstallation}/cache/");
73 76 : rtl::Bootstrap::expandMacros(url);
74 :
75 76 : osl::Directory::create(url);
76 :
77 76 : return rtl::OUStringToOString(url, RTL_TEXTENCODING_UTF8);
78 : }
79 :
80 : }
81 :
82 76 : OString OpenCLDevice::maCacheFolder = getCacheFolder();
83 :
84 0 : void OpenCLDevice::registerOpenCLKernel()
85 : {
86 0 : if ( !gpuEnv.mnIsUserCreated )
87 0 : memset( &gpuEnv, 0, sizeof(gpuEnv) );
88 0 : }
89 :
90 0 : void OpenCLDevice::setKernelEnv( KernelEnv *envInfo )
91 : {
92 0 : envInfo->mpkContext = gpuEnv.mpContext;
93 0 : envInfo->mpkCmdQueue = gpuEnv.mpCmdQueue;
94 0 : envInfo->mpkProgram = gpuEnv.mpArryPrograms[0];
95 0 : }
96 :
97 : namespace {
98 :
99 0 : OString createFileName(cl_device_id deviceId, const char* clFileName)
100 : {
101 0 : OString fileName(clFileName);
102 0 : sal_Int32 nIndex = fileName.lastIndexOf(".cl");
103 0 : if(nIndex > 0)
104 0 : fileName = fileName.copy(0, nIndex);
105 :
106 0 : char deviceName[DEVICE_NAME_LENGTH] = {0};
107 : clGetDeviceInfo(deviceId, CL_DEVICE_NAME,
108 0 : sizeof(deviceName), deviceName, NULL);
109 :
110 0 : char driverVersion[DRIVER_VERSION_LENGTH] = {0};
111 : clGetDeviceInfo(deviceId, CL_DRIVER_VERSION,
112 0 : sizeof(driverVersion), driverVersion, NULL);
113 :
114 : cl_platform_id platformId;
115 : clGetDeviceInfo(deviceId, CL_DEVICE_PLATFORM,
116 0 : sizeof(platformId), &platformId, NULL);
117 :
118 0 : char platformVersion[PLATFORM_VERSION_LENGTH] = {0};
119 : clGetPlatformInfo(platformId, CL_PLATFORM_VERSION, sizeof(platformVersion),
120 0 : platformVersion, NULL);
121 :
122 : // create hash for deviceName + driver version + platform version
123 0 : OString aString = OString(deviceName) + driverVersion + platformVersion;
124 0 : OString aHash = generateMD5(aString.getStr(), aString.getLength());
125 :
126 0 : return OpenCLDevice::maCacheFolder + fileName + "-" +
127 0 : aHash + ".bin";
128 : }
129 :
130 : }
131 :
132 0 : std::vector<boost::shared_ptr<osl::File> > OpenCLDevice::binaryGenerated( const char * clFileName, cl_context context )
133 : {
134 0 : size_t numDevices=0;
135 :
136 0 : std::vector<boost::shared_ptr<osl::File> > aGeneratedFiles;
137 : cl_int clStatus = clGetContextInfo( context, CL_CONTEXT_DEVICES,
138 0 : 0, NULL, &numDevices );
139 0 : numDevices /= sizeof(numDevices);
140 :
141 0 : if(clStatus != CL_SUCCESS)
142 0 : return aGeneratedFiles;
143 :
144 : // grab the handles to all of the devices in the context.
145 0 : boost::scoped_array<cl_device_id> mpArryDevsID(new cl_device_id[numDevices]);
146 : clStatus = clGetContextInfo( context, CL_CONTEXT_DEVICES,
147 0 : sizeof( cl_device_id ) * numDevices, mpArryDevsID.get(), NULL );
148 :
149 0 : if(clStatus != CL_SUCCESS)
150 0 : return aGeneratedFiles;
151 :
152 0 : for ( size_t i = 0; i < numDevices; i++ )
153 : {
154 0 : if ( mpArryDevsID[i] != 0 )
155 : {
156 0 : OString fileName = createFileName(gpuEnv.mpArryDevsID[i], clFileName);
157 0 : osl::File* pNewFile = new osl::File(rtl::OStringToOUString(fileName, RTL_TEXTENCODING_UTF8));
158 0 : if(pNewFile->open(osl_File_OpenFlag_Read) == osl::FileBase::E_None)
159 : {
160 0 : aGeneratedFiles.push_back(boost::shared_ptr<osl::File>(pNewFile));
161 : SAL_INFO("sc.opencl.file", "Opening binary file '" << fileName << "' for reading: success");
162 : }
163 : else
164 : {
165 : SAL_INFO("sc.opencl.file", "Opening binary file '" << fileName << "' for reading: FAIL");
166 0 : delete pNewFile;
167 0 : break;
168 0 : }
169 : }
170 : }
171 :
172 0 : return aGeneratedFiles;
173 : }
174 :
175 0 : bool OpenCLDevice::writeBinaryToFile( const OString& rFileName, const char* binary, size_t numBytes )
176 : {
177 0 : osl::File file(rtl::OStringToOUString(rFileName, RTL_TEXTENCODING_UTF8));
178 : osl::FileBase::RC status = file.open(
179 0 : osl_File_OpenFlag_Write | osl_File_OpenFlag_Create );
180 :
181 0 : if(status != osl::FileBase::E_None)
182 0 : return false;
183 :
184 0 : sal_uInt64 nBytesWritten = 0;
185 0 : file.write( binary, numBytes, nBytesWritten );
186 :
187 : assert(numBytes == nBytesWritten);
188 :
189 0 : return true;
190 : }
191 :
192 0 : bool OpenCLDevice::generatBinFromKernelSource( cl_program program, const char * clFileName )
193 : {
194 : cl_uint numDevices;
195 :
196 : cl_int clStatus = clGetProgramInfo( program, CL_PROGRAM_NUM_DEVICES,
197 0 : sizeof(numDevices), &numDevices, NULL );
198 0 : CHECK_OPENCL( clStatus, "clGetProgramInfo" );
199 :
200 0 : std::vector<cl_device_id> mpArryDevsID(numDevices);
201 : /* grab the handles to all of the devices in the program. */
202 : clStatus = clGetProgramInfo( program, CL_PROGRAM_DEVICES,
203 0 : sizeof(cl_device_id) * numDevices, &mpArryDevsID[0], NULL );
204 0 : CHECK_OPENCL( clStatus, "clGetProgramInfo" );
205 :
206 : /* figure out the sizes of each of the binaries. */
207 0 : std::vector<size_t> binarySizes(numDevices);
208 :
209 : clStatus = clGetProgramInfo( program, CL_PROGRAM_BINARY_SIZES,
210 0 : sizeof(size_t) * numDevices, &binarySizes[0], NULL );
211 0 : CHECK_OPENCL( clStatus, "clGetProgramInfo" );
212 :
213 : /* copy over all of the generated binaries. */
214 0 : boost::scoped_array<char*> binaries(new char*[numDevices]);
215 :
216 0 : for ( size_t i = 0; i < numDevices; i++ )
217 : {
218 0 : if ( binarySizes[i] != 0 )
219 : {
220 0 : binaries[i] = new char[binarySizes[i]];
221 : }
222 : else
223 : {
224 0 : binaries[i] = NULL;
225 : }
226 : }
227 :
228 : clStatus = clGetProgramInfo( program, CL_PROGRAM_BINARIES,
229 0 : sizeof(char *) * numDevices, binaries.get(), NULL );
230 0 : CHECK_OPENCL(clStatus,"clGetProgramInfo");
231 :
232 : /* dump out each binary into its own separate file. */
233 0 : for ( size_t i = 0; i < numDevices; i++ )
234 : {
235 :
236 0 : if ( binarySizes[i] != 0 )
237 : {
238 0 : OString fileName = createFileName(mpArryDevsID[i], clFileName);
239 0 : if ( !writeBinaryToFile( fileName,
240 0 : binaries[i], binarySizes[i] ) )
241 : SAL_INFO("sc.opencl.file", "Writing binary file '" << fileName << "': FAIL");
242 : else
243 0 : SAL_INFO("sc.opencl.file", "Writing binary file '" << fileName << "': success");
244 : }
245 : }
246 :
247 : // Release all resouces and memory
248 0 : for ( size_t i = 0; i < numDevices; i++ )
249 : {
250 0 : delete[] binaries[i];
251 : }
252 :
253 0 : return true;
254 : }
255 :
256 0 : bool OpenCLDevice::initOpenCLAttr( OpenCLEnv * env )
257 : {
258 0 : if ( gpuEnv.mnIsUserCreated )
259 0 : return true;
260 :
261 0 : gpuEnv.mpContext = env->mpOclContext;
262 0 : gpuEnv.mpPlatformID = env->mpOclPlatformID;
263 0 : gpuEnv.mpDevID = env->mpOclDevsID;
264 0 : gpuEnv.mpCmdQueue = env->mpOclCmdQueue;
265 :
266 0 : gpuEnv.mnIsUserCreated = 1;
267 :
268 0 : return false;
269 : }
270 :
271 0 : void OpenCLDevice::releaseOpenCLEnv( GPUEnv *gpuInfo )
272 : {
273 0 : if ( !bIsInited )
274 : {
275 0 : return;
276 : }
277 :
278 0 : if ( gpuEnv.mpCmdQueue )
279 : {
280 0 : clReleaseCommandQueue( gpuEnv.mpCmdQueue );
281 0 : gpuEnv.mpCmdQueue = NULL;
282 : }
283 0 : if ( gpuEnv.mpContext )
284 : {
285 0 : clReleaseContext( gpuEnv.mpContext );
286 0 : gpuEnv.mpContext = NULL;
287 : }
288 0 : bIsInited = false;
289 0 : gpuInfo->mnIsUserCreated = 0;
290 0 : free( gpuInfo->mpArryDevsID );
291 :
292 0 : return;
293 : }
294 :
295 : namespace {
296 :
297 0 : bool buildProgram(const char* buildOption, GPUEnv* gpuInfo, int idx)
298 : {
299 : cl_int clStatus;
300 : //char options[512];
301 : // create a cl program executable for all the devices specified
302 0 : if (!gpuInfo->mnIsUserCreated)
303 : {
304 : clStatus = clBuildProgram(gpuInfo->mpArryPrograms[idx], 1, gpuInfo->mpArryDevsID,
305 0 : buildOption, NULL, NULL);
306 : }
307 : else
308 : {
309 : clStatus = clBuildProgram(gpuInfo->mpArryPrograms[idx], 1, &(gpuInfo->mpDevID),
310 0 : buildOption, NULL, NULL);
311 : }
312 :
313 0 : if ( clStatus != CL_SUCCESS )
314 : {
315 : size_t length;
316 0 : if ( !gpuInfo->mnIsUserCreated )
317 : {
318 0 : clStatus = clGetProgramBuildInfo( gpuInfo->mpArryPrograms[idx], gpuInfo->mpArryDevsID[0],
319 0 : CL_PROGRAM_BUILD_LOG, 0, NULL, &length );
320 : }
321 : else
322 : {
323 : clStatus = clGetProgramBuildInfo( gpuInfo->mpArryPrograms[idx], gpuInfo->mpDevID,
324 0 : CL_PROGRAM_BUILD_LOG, 0, NULL, &length);
325 : }
326 0 : if ( clStatus != CL_SUCCESS )
327 : {
328 0 : return false;
329 : }
330 :
331 0 : boost::scoped_array<char> buildLog(new char[length]);
332 0 : if ( !gpuInfo->mnIsUserCreated )
333 : {
334 0 : clStatus = clGetProgramBuildInfo( gpuInfo->mpArryPrograms[idx], gpuInfo->mpArryDevsID[0],
335 0 : CL_PROGRAM_BUILD_LOG, length, buildLog.get(), &length );
336 : }
337 : else
338 : {
339 : clStatus = clGetProgramBuildInfo( gpuInfo->mpArryPrograms[idx], gpuInfo->mpDevID,
340 0 : CL_PROGRAM_BUILD_LOG, length, buildLog.get(), &length );
341 : }
342 0 : if ( clStatus != CL_SUCCESS )
343 : {
344 0 : return false;
345 : }
346 :
347 0 : OString aBuildLogFileURL = OpenCLDevice::maCacheFolder + "kernel-build.log";
348 0 : osl::File aBuildLogFile(rtl::OStringToOUString(aBuildLogFileURL, RTL_TEXTENCODING_UTF8));
349 : osl::FileBase::RC status = aBuildLogFile.open(
350 0 : osl_File_OpenFlag_Write | osl_File_OpenFlag_Create );
351 :
352 0 : if(status != osl::FileBase::E_None)
353 0 : return false;
354 :
355 0 : sal_uInt64 nBytesWritten = 0;
356 0 : aBuildLogFile.write( buildLog.get(), length, nBytesWritten );
357 :
358 0 : return false;
359 : }
360 :
361 0 : return true;
362 : }
363 :
364 : }
365 :
366 0 : bool OpenCLDevice::buildProgramFromBinary(const char* buildOption, GPUEnv* gpuInfo, const char* filename, int idx)
367 : {
368 : size_t numDevices;
369 : cl_int clStatus = clGetContextInfo( gpuInfo->mpContext, CL_CONTEXT_DEVICES,
370 0 : 0, NULL, &numDevices );
371 0 : numDevices /= sizeof(numDevices);
372 0 : CHECK_OPENCL( clStatus, "clGetContextInfo" );
373 :
374 : std::vector<boost::shared_ptr<osl::File> > aGeneratedFiles = binaryGenerated(
375 0 : filename, gpuInfo->mpContext );
376 :
377 0 : if (aGeneratedFiles.size() == numDevices)
378 : {
379 0 : boost::scoped_array<size_t> length(new size_t[numDevices]);
380 0 : boost::scoped_array<unsigned char*> pBinary(new unsigned char*[numDevices]);
381 0 : for(size_t i = 0; i < numDevices; ++i)
382 : {
383 : sal_uInt64 nSize;
384 0 : aGeneratedFiles[i]->getSize(nSize);
385 0 : unsigned char* binary = new unsigned char[nSize];
386 : sal_uInt64 nBytesRead;
387 0 : aGeneratedFiles[i]->read(binary, nSize, nBytesRead);
388 0 : if(nSize != nBytesRead)
389 : assert(false);
390 :
391 0 : length[i] = nBytesRead;
392 :
393 0 : pBinary[i] = binary;
394 : }
395 :
396 : // grab the handles to all of the devices in the context.
397 0 : boost::scoped_array<cl_device_id> mpArryDevsID(new cl_device_id[numDevices]);
398 : clStatus = clGetContextInfo( gpuInfo->mpContext, CL_CONTEXT_DEVICES,
399 0 : sizeof( cl_device_id ) * numDevices, mpArryDevsID.get(), NULL );
400 :
401 0 : if(clStatus != CL_SUCCESS)
402 : {
403 0 : for(size_t i = 0; i < numDevices; ++i)
404 : {
405 0 : delete[] pBinary[i];
406 : }
407 0 : return false;
408 : }
409 :
410 : cl_int binary_status;
411 :
412 : gpuInfo->mpArryPrograms[idx] = clCreateProgramWithBinary( gpuInfo->mpContext,numDevices,
413 0 : mpArryDevsID.get(), length.get(), (const unsigned char**) pBinary.get(),
414 0 : &binary_status, &clStatus );
415 0 : if(clStatus != CL_SUCCESS)
416 : {
417 : // something went wrong, fall back to compiling from source
418 0 : return false;
419 : }
420 0 : for(size_t i = 0; i < numDevices; ++i)
421 : {
422 0 : delete[] pBinary[i];
423 0 : }
424 : }
425 :
426 0 : if ( !gpuInfo->mpArryPrograms[idx] )
427 : {
428 0 : return false;
429 : }
430 0 : return buildProgram(buildOption, gpuInfo, idx);
431 : }
432 :
433 0 : bool OpenCLDevice::initOpenCLRunEnv( int argc )
434 : {
435 0 : if ( ( argc > MAX_CLFILE_NUM ) || ( argc < 0 ) )
436 0 : return true;
437 :
438 0 : if ( !bIsInited )
439 : {
440 0 : registerOpenCLKernel();
441 : //initialize devices, context, command_queue
442 0 : bool status = initOpenCLRunEnv( &gpuEnv );
443 0 : if ( status )
444 : {
445 0 : return true;
446 : }
447 : //initialize program, kernelName, kernelCount
448 0 : if( getenv( "SC_FLOAT" ) )
449 : {
450 0 : gpuEnv.mnKhrFp64Flag = false;
451 0 : gpuEnv.mnAmdFp64Flag = false;
452 : }
453 0 : if( gpuEnv.mnKhrFp64Flag )
454 : {
455 : SAL_INFO("sc.opencl", "Use Khr double");
456 : }
457 0 : else if( gpuEnv.mnAmdFp64Flag )
458 : {
459 : SAL_INFO("sc.opencl", "Use AMD double type");
460 : }
461 : else
462 : {
463 : SAL_INFO("sc.opencl", "USE float type");
464 : }
465 0 : bIsInited = true;
466 : }
467 0 : return false;
468 : }
469 :
470 : namespace {
471 :
472 0 : void checkDeviceForDoubleSupport(cl_device_id deviceId, bool& bKhrFp64, bool& bAmdFp64)
473 : {
474 0 : bKhrFp64 = false;
475 0 : bAmdFp64 = false;
476 :
477 : // Check device extensions for double type
478 0 : size_t aDevExtInfoSize = 0;
479 :
480 0 : cl_uint clStatus = clGetDeviceInfo( deviceId, CL_DEVICE_EXTENSIONS, 0, NULL, &aDevExtInfoSize );
481 0 : if( clStatus != CL_SUCCESS )
482 0 : return;
483 :
484 0 : boost::scoped_array<char> pExtInfo(new char[aDevExtInfoSize]);
485 :
486 : clStatus = clGetDeviceInfo( deviceId, CL_DEVICE_EXTENSIONS,
487 0 : sizeof(char) * aDevExtInfoSize, pExtInfo.get(), NULL);
488 :
489 0 : if( clStatus != CL_SUCCESS )
490 0 : return;
491 :
492 0 : if ( strstr( pExtInfo.get(), "cl_khr_fp64" ) )
493 : {
494 0 : bKhrFp64 = true;
495 : }
496 : else
497 : {
498 : // Check if cl_amd_fp64 extension is supported
499 0 : if ( strstr( pExtInfo.get(), "cl_amd_fp64" ) )
500 0 : bAmdFp64 = true;
501 0 : }
502 : }
503 :
504 : }
505 :
506 0 : bool OpenCLDevice::initOpenCLRunEnv( GPUEnv *gpuInfo )
507 : {
508 : size_t length;
509 : cl_int clStatus;
510 : cl_uint numPlatforms, numDevices;
511 : cl_platform_id *platforms;
512 :
513 : // Have a look at the available platforms.
514 :
515 0 : if ( !gpuInfo->mnIsUserCreated )
516 : {
517 0 : clStatus = clGetPlatformIDs( 0, NULL, &numPlatforms );
518 0 : CHECK_OPENCL(clStatus, "clGetPlatformIDs");
519 0 : gpuInfo->mpPlatformID = NULL;
520 :
521 0 : if ( 0 < numPlatforms )
522 : {
523 : char platformName[256];
524 0 : platforms = (cl_platform_id*) malloc( numPlatforms * sizeof( cl_platform_id ) );
525 0 : if (!platforms)
526 : {
527 0 : return true;
528 : }
529 0 : clStatus = clGetPlatformIDs( numPlatforms, platforms, NULL );
530 0 : CHECK_OPENCL(clStatus, "clGetPlatformIDs");
531 :
532 0 : for ( unsigned int i = 0; i < numPlatforms; i++ )
533 : {
534 0 : clStatus = clGetPlatformInfo( platforms[i], CL_PLATFORM_VENDOR,
535 0 : sizeof( platformName ), platformName, NULL );
536 :
537 0 : if ( clStatus != CL_SUCCESS )
538 : {
539 0 : break;
540 : }
541 0 : gpuInfo->mpPlatformID = platforms[i];
542 :
543 : //if (!strcmp(platformName, "Intel(R) Coporation"))
544 : //if( !strcmp( platformName, "Advanced Micro Devices, Inc." ))
545 : {
546 0 : gpuInfo->mpPlatformID = platforms[i];
547 0 : if ( getenv("SC_OPENCLCPU") )
548 : {
549 : clStatus = clGetDeviceIDs(gpuInfo->mpPlatformID, // platform
550 : CL_DEVICE_TYPE_CPU, // device_type for CPU device
551 : 0, // num_entries
552 : NULL, // devices
553 0 : &numDevices);
554 : }
555 : else
556 : {
557 : clStatus = clGetDeviceIDs(gpuInfo->mpPlatformID, // platform
558 : CL_DEVICE_TYPE_GPU, // device_type for GPU device
559 : 0, // num_entries
560 : NULL, // devices
561 0 : &numDevices);
562 : }
563 0 : if ( clStatus != CL_SUCCESS )
564 0 : continue;
565 :
566 0 : if ( numDevices )
567 0 : break;
568 : }
569 : }
570 0 : free( platforms );
571 0 : if ( clStatus != CL_SUCCESS )
572 0 : return true;
573 : }
574 0 : if ( NULL == gpuInfo->mpPlatformID )
575 0 : return true;
576 :
577 : // Use available platform.
578 : cl_context_properties cps[3];
579 0 : cps[0] = CL_CONTEXT_PLATFORM;
580 0 : cps[1] = reinterpret_cast<cl_context_properties>(gpuInfo->mpPlatformID);
581 0 : cps[2] = 0;
582 : // Set device type for OpenCL
583 0 : if ( getenv("SC_OPENCLCPU") )
584 : {
585 0 : gpuInfo->mDevType = CL_DEVICE_TYPE_CPU;
586 : }
587 : else
588 : {
589 0 : gpuInfo->mDevType = CL_DEVICE_TYPE_GPU;
590 : }
591 0 : gpuInfo->mpContext = clCreateContextFromType( cps, gpuInfo->mDevType, NULL, NULL, &clStatus );
592 :
593 0 : if ( ( gpuInfo->mpContext == (cl_context) NULL) || ( clStatus != CL_SUCCESS ) )
594 : {
595 0 : gpuInfo->mDevType = CL_DEVICE_TYPE_CPU;
596 0 : gpuInfo->mpContext = clCreateContextFromType( cps, gpuInfo->mDevType, NULL, NULL, &clStatus );
597 : }
598 0 : if ( ( gpuInfo->mpContext == (cl_context) NULL) || ( clStatus != CL_SUCCESS ) )
599 : {
600 0 : gpuInfo->mDevType = CL_DEVICE_TYPE_DEFAULT;
601 0 : gpuInfo->mpContext = clCreateContextFromType( cps, gpuInfo->mDevType, NULL, NULL, &clStatus );
602 : }
603 0 : if ( ( gpuInfo->mpContext == (cl_context) NULL) || ( clStatus != CL_SUCCESS ) )
604 0 : return true;
605 : // Detect OpenCL devices.
606 : // First, get the size of device list data
607 0 : clStatus = clGetContextInfo( gpuInfo->mpContext, CL_CONTEXT_DEVICES, 0, NULL, &length );
608 0 : if ( ( clStatus != CL_SUCCESS ) || ( length == 0 ) )
609 0 : return true;
610 : // Now allocate memory for device list based on the size we got earlier
611 0 : gpuInfo->mpArryDevsID = (cl_device_id*) malloc( length );
612 0 : if ( gpuInfo->mpArryDevsID == (cl_device_id*) NULL )
613 0 : return true;
614 : // Now, get the device list data
615 : clStatus = clGetContextInfo( gpuInfo->mpContext, CL_CONTEXT_DEVICES, length,
616 0 : gpuInfo->mpArryDevsID, NULL );
617 0 : CHECK_OPENCL(clStatus, "clGetContextInfo");
618 :
619 : // Create OpenCL command queue.
620 0 : gpuInfo->mpCmdQueue = clCreateCommandQueue( gpuInfo->mpContext, gpuInfo->mpArryDevsID[0], 0, &clStatus );
621 :
622 0 : CHECK_OPENCL(clStatus, "clCreateCommandQueue");
623 : }
624 0 : bool bKhrFp64 = false;
625 0 : bool bAmdFp64 = false;
626 :
627 0 : checkDeviceForDoubleSupport(gpuInfo->mpArryDevsID[0], bKhrFp64, bAmdFp64);
628 :
629 0 : gpuInfo->mnKhrFp64Flag = bKhrFp64;
630 0 : gpuInfo->mnAmdFp64Flag = bAmdFp64;
631 :
632 0 : return false;
633 : }
634 :
635 : namespace {
636 :
637 : // based on crashes and hanging during kernel compilation
638 0 : bool checkForKnownBadCompilers(const OpenCLDeviceInfo& rInfo)
639 : {
640 :
641 : struct {
642 : const char* pVendorName; const char* pDriverVersion;
643 : } aBadOpenCLCompilers[] = {
644 : { "Intel(R) Corporation", "9.17.10.2884" }
645 0 : };
646 :
647 0 : for(size_t i = 0; i < SAL_N_ELEMENTS(aBadOpenCLCompilers); ++i)
648 : {
649 0 : if(rInfo.maVendor == OUString::createFromAscii(aBadOpenCLCompilers[i].pVendorName) &&
650 0 : rInfo.maDriver == OUString::createFromAscii(aBadOpenCLCompilers[i].pDriverVersion))
651 0 : return true;
652 : }
653 :
654 0 : return false;
655 : }
656 :
657 0 : void createDeviceInfo(cl_device_id aDeviceId, OpenCLPlatformInfo& rPlatformInfo)
658 : {
659 0 : OpenCLDeviceInfo aDeviceInfo;
660 0 : aDeviceInfo.device = aDeviceId;
661 :
662 : char pName[DEVICE_NAME_LENGTH];
663 0 : cl_int nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_NAME, DEVICE_NAME_LENGTH, pName, NULL);
664 0 : if(nState != CL_SUCCESS)
665 0 : return;
666 :
667 0 : aDeviceInfo.maName = OUString::createFromAscii(pName);
668 :
669 : char pVendor[DEVICE_NAME_LENGTH];
670 0 : nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_VENDOR, DEVICE_NAME_LENGTH, pVendor, NULL);
671 0 : if(nState != CL_SUCCESS)
672 0 : return;
673 :
674 0 : aDeviceInfo.maVendor = OUString::createFromAscii(pVendor);
675 :
676 : cl_ulong nMemSize;
677 0 : nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(nMemSize), &nMemSize, NULL);
678 0 : if(nState != CL_SUCCESS)
679 0 : return;
680 :
681 0 : aDeviceInfo.mnMemory = nMemSize;
682 :
683 : cl_uint nClockFrequency;
684 0 : nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(nClockFrequency), &nClockFrequency, NULL);
685 0 : if(nState != CL_SUCCESS)
686 0 : return;
687 :
688 0 : aDeviceInfo.mnFrequency = nClockFrequency;
689 :
690 : cl_uint nComputeUnits;
691 0 : nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(nComputeUnits), &nComputeUnits, NULL);
692 0 : if(nState != CL_SUCCESS)
693 0 : return;
694 :
695 : char pDriver[DEVICE_NAME_LENGTH];
696 0 : nState = clGetDeviceInfo(aDeviceId, CL_DRIVER_VERSION, DEVICE_NAME_LENGTH, pDriver, NULL);
697 :
698 0 : if(nState != CL_SUCCESS)
699 0 : return;
700 :
701 0 : aDeviceInfo.maDriver = OUString::createFromAscii(pDriver);
702 :
703 0 : bool bKhrFp64 = false;
704 0 : bool bAmdFp64 = false;
705 0 : checkDeviceForDoubleSupport(aDeviceId, bKhrFp64, bAmdFp64);
706 :
707 : // only list devices that support double
708 0 : if(!bKhrFp64 && !bAmdFp64)
709 0 : return;
710 :
711 0 : aDeviceInfo.mnComputeUnits = nComputeUnits;
712 :
713 0 : if(!checkForKnownBadCompilers(aDeviceInfo))
714 0 : rPlatformInfo.maDevices.push_back(aDeviceInfo);
715 : }
716 :
717 0 : bool createPlatformInfo(cl_platform_id nPlatformId, OpenCLPlatformInfo& rPlatformInfo)
718 : {
719 0 : rPlatformInfo.platform = nPlatformId;
720 : char pName[64];
721 : cl_int nState = clGetPlatformInfo(nPlatformId, CL_PLATFORM_NAME, 64,
722 0 : pName, NULL);
723 0 : if(nState != CL_SUCCESS)
724 0 : return false;
725 0 : rPlatformInfo.maName = OUString::createFromAscii(pName);
726 :
727 : char pVendor[64];
728 : nState = clGetPlatformInfo(nPlatformId, CL_PLATFORM_VENDOR, 64,
729 0 : pVendor, NULL);
730 0 : if(nState != CL_SUCCESS)
731 0 : return false;
732 :
733 0 : rPlatformInfo.maVendor = OUString::createFromAscii(pName);
734 :
735 : cl_uint nDevices;
736 0 : nState = clGetDeviceIDs(nPlatformId, CL_DEVICE_TYPE_ALL, 0, NULL, &nDevices);
737 0 : if(nState != CL_SUCCESS)
738 0 : return false;
739 :
740 : // memory leak that does not matter
741 : // memory is stored in static variable that lives through the whole program
742 0 : cl_device_id* pDevices = new cl_device_id[nDevices];
743 0 : nState = clGetDeviceIDs(nPlatformId, CL_DEVICE_TYPE_ALL, nDevices, pDevices, NULL);
744 0 : if(nState != CL_SUCCESS)
745 0 : return false;
746 :
747 0 : for(size_t i = 0; i < nDevices; ++i)
748 : {
749 0 : createDeviceInfo(pDevices[i], rPlatformInfo);
750 : }
751 :
752 0 : return true;
753 : }
754 :
755 : }
756 :
757 446 : const std::vector<OpenCLPlatformInfo>& fillOpenCLInfo()
758 : {
759 446 : static std::vector<OpenCLPlatformInfo> aPlatforms;
760 446 : if(!aPlatforms.empty())
761 0 : return aPlatforms;
762 :
763 446 : int status = clewInit(OPENCL_DLL_NAME);
764 446 : if (status < 0)
765 446 : return aPlatforms;
766 :
767 : cl_uint nPlatforms;
768 0 : cl_int nState = clGetPlatformIDs(0, NULL, &nPlatforms);
769 :
770 0 : if(nState != CL_SUCCESS)
771 0 : return aPlatforms;
772 :
773 : // memory leak that does not matter,
774 : // memory is stored in static instance aPlatforms
775 0 : cl_platform_id* pPlatforms = new cl_platform_id[nPlatforms];
776 0 : nState = clGetPlatformIDs(nPlatforms, pPlatforms, NULL);
777 :
778 0 : if(nState != CL_SUCCESS)
779 0 : return aPlatforms;
780 :
781 0 : for(size_t i = 0; i < nPlatforms; ++i)
782 : {
783 0 : OpenCLPlatformInfo aPlatformInfo;
784 0 : if(createPlatformInfo(pPlatforms[i], aPlatformInfo))
785 0 : aPlatforms.push_back(aPlatformInfo);
786 0 : }
787 :
788 0 : return aPlatforms;
789 : }
790 :
791 : namespace {
792 :
793 0 : cl_device_id findDeviceIdByDeviceString(const OUString& rString, const std::vector<OpenCLPlatformInfo>& rPlatforms)
794 : {
795 0 : std::vector<OpenCLPlatformInfo>::const_iterator it = rPlatforms.begin(), itEnd = rPlatforms.end();
796 0 : for(; it != itEnd; ++it)
797 : {
798 0 : std::vector<OpenCLDeviceInfo>::const_iterator itr = it->maDevices.begin(), itrEnd = it->maDevices.end();
799 0 : for(; itr != itrEnd; ++itr)
800 : {
801 0 : OUString aDeviceId = it->maVendor + " " + itr->maName;
802 0 : if(rString == aDeviceId)
803 : {
804 0 : return static_cast<cl_device_id>(itr->device);
805 : }
806 0 : }
807 : }
808 :
809 0 : return NULL;
810 : }
811 :
812 0 : void findDeviceInfoFromDeviceId(cl_device_id aDeviceId, size_t& rDeviceId, size_t& rPlatformId)
813 : {
814 : cl_platform_id platformId;
815 : cl_int nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_PLATFORM,
816 0 : sizeof(platformId), &platformId, NULL);
817 :
818 0 : if(nState != CL_SUCCESS)
819 0 : return;
820 :
821 0 : const std::vector<OpenCLPlatformInfo>& rPlatforms = fillOpenCLInfo();
822 0 : for(size_t i = 0; i < rPlatforms.size(); ++i)
823 : {
824 0 : cl_platform_id platId = static_cast<cl_platform_id>(rPlatforms[i].platform);
825 0 : if(platId != platformId)
826 0 : continue;
827 :
828 0 : for(size_t j = 0; j < rPlatforms[i].maDevices.size(); ++j)
829 : {
830 0 : cl_device_id id = static_cast<cl_device_id>(rPlatforms[i].maDevices[j].device);
831 0 : if(id == aDeviceId)
832 : {
833 0 : rDeviceId = j;
834 0 : rPlatformId = i;
835 0 : return;
836 : }
837 : }
838 : }
839 : }
840 :
841 : }
842 :
843 446 : bool switchOpenCLDevice(const OUString* pDevice, bool bAutoSelect, bool bForceEvaluation)
844 : {
845 446 : if(fillOpenCLInfo().empty())
846 446 : return false;
847 :
848 0 : cl_device_id pDeviceId = NULL;
849 0 : if(pDevice)
850 0 : pDeviceId = findDeviceIdByDeviceString(*pDevice, fillOpenCLInfo());
851 :
852 0 : if(!pDeviceId || bAutoSelect)
853 : {
854 0 : int status = clewInit(OPENCL_DLL_NAME);
855 0 : if (status < 0)
856 0 : return false;
857 :
858 0 : OUString url("${$BRAND_BASE_DIR/" LIBO_ETC_FOLDER "/" SAL_CONFIGFILE("bootstrap") ":UserInstallation}/cache/");
859 0 : rtl::Bootstrap::expandMacros(url);
860 0 : OUString path;
861 0 : osl::FileBase::getSystemPathFromFileURL(url,path);
862 0 : OString dsFileName = rtl::OUStringToOString(path, RTL_TEXTENCODING_UTF8);
863 0 : ds_device pSelectedDevice = sc::OpenCLDevice::getDeviceSelection(dsFileName.getStr(), bForceEvaluation);
864 0 : pDeviceId = pSelectedDevice.oclDeviceID;
865 :
866 : }
867 :
868 0 : if(OpenCLDevice::gpuEnv.mpDevID == pDeviceId)
869 : {
870 : // we don't need to change anything
871 : // still the same device
872 0 : return pDeviceId != NULL;
873 : }
874 :
875 : cl_platform_id platformId;
876 : cl_int nState = clGetDeviceInfo(pDeviceId, CL_DEVICE_PLATFORM,
877 0 : sizeof(platformId), &platformId, NULL);
878 :
879 : cl_context_properties cps[3];
880 0 : cps[0] = CL_CONTEXT_PLATFORM;
881 0 : cps[1] = reinterpret_cast<cl_context_properties>(platformId);
882 0 : cps[2] = 0;
883 0 : cl_context context = clCreateContext( cps, 1, &pDeviceId, NULL, NULL, &nState );
884 :
885 0 : if(nState != CL_SUCCESS || context == NULL)
886 : {
887 0 : if(context != NULL)
888 0 : clReleaseContext(context);
889 :
890 : SAL_WARN("sc", "failed to set/switch opencl device");
891 0 : return false;
892 : }
893 :
894 : cl_command_queue command_queue = clCreateCommandQueue(
895 0 : context, pDeviceId, 0, &nState);
896 :
897 0 : if(command_queue == NULL || nState != CL_SUCCESS)
898 : {
899 0 : if(command_queue != NULL)
900 0 : clReleaseCommandQueue(command_queue);
901 :
902 0 : clReleaseContext(context);
903 : SAL_WARN("sc", "failed to set/switch opencl device");
904 0 : return false;
905 : }
906 :
907 0 : OpenCLDevice::releaseOpenCLEnv(&OpenCLDevice::gpuEnv);
908 : OpenCLEnv env;
909 0 : env.mpOclPlatformID = platformId;
910 0 : env.mpOclContext = context;
911 0 : env.mpOclDevsID = pDeviceId;
912 0 : env.mpOclCmdQueue = command_queue;
913 0 : OpenCLDevice::initOpenCLAttr(&env);
914 :
915 : // why do we need this at all?
916 0 : OpenCLDevice::gpuEnv.mpArryDevsID = (cl_device_id*) malloc( sizeof(cl_device_id) );
917 0 : OpenCLDevice::gpuEnv.mpArryDevsID[0] = pDeviceId;
918 0 : return !OpenCLDevice::initOpenCLRunEnv(0);
919 : }
920 :
921 0 : void getOpenCLDeviceInfo(size_t& rDeviceId, size_t& rPlatformId)
922 : {
923 0 : int status = clewInit(OPENCL_DLL_NAME);
924 0 : if (status < 0)
925 0 : return;
926 :
927 0 : cl_device_id id = OpenCLDevice::gpuEnv.mpDevID;
928 0 : findDeviceInfoFromDeviceId(id, rDeviceId, rPlatformId);
929 : }
930 :
931 228 : }}
932 :
933 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|