LCOV - code coverage report
Current view: top level - sc/source/core/opencl - openclwrapper.cxx (source / functions) Hit Total Coverage
Test: commit 0e63ca4fde4e446f346e35849c756a30ca294aab Lines: 16 433 3.7 %
Date: 2014-04-11 Functions: 5 27 18.5 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
       2             : /*
       3             :  * This file is part of the LibreOffice project.
       4             :  *
       5             :  * This Source Code Form is subject to the terms of the Mozilla Public
       6             :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       7             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       8             :  */
       9             : 
      10             : #include <config_folders.h>
      11             : 
      12             : #include "openclwrapper.hxx"
      13             : 
      14             : #include <rtl/ustring.hxx>
      15             : #include <rtl/strbuf.hxx>
      16             : #include <rtl/digest.h>
      17             : #include <rtl/bootstrap.hxx>
      18             : #include <boost/scoped_array.hpp>
      19             : 
      20             : #include "sal/config.h"
      21             : #include <osl/file.hxx>
      22             : #include "opencl_device.hxx"
      23             : 
      24             : #include <stdio.h>
      25             : #include <stdlib.h>
      26             : #include <string.h>
      27             : #include <cmath>
      28             : 
      29             : #ifdef WIN32
      30             : #include <windows.h>
      31             : #define OPENCL_DLL_NAME "OpenCL.dll"
      32             : #elif defined(MACOSX)
      33             : #define OPENCL_DLL_NAME NULL
      34             : #else
      35             : #define OPENCL_DLL_NAME "libOpenCL.so"
      36             : #endif
      37             : 
      38             : #define DEVICE_NAME_LENGTH 1024
      39             : #define DRIVER_VERSION_LENGTH 1024
      40             : #define PLATFORM_VERSION_LENGTH 1024
      41             : 
      42             : using namespace std;
      43             : 
      44             : namespace sc { namespace opencl {
      45             : 
      46             : GPUEnv OpenclDevice::gpuEnv;
      47             : bool OpenclDevice::bIsInited = false;
      48             : 
      49             : namespace {
      50             : 
      51           0 : OString generateMD5(const void* pData, size_t length)
      52             : {
      53             :     sal_uInt8 pBuffer[RTL_DIGEST_LENGTH_MD5];
      54             :     rtlDigestError aError = rtl_digest_MD5(pData, length,
      55           0 :             pBuffer, RTL_DIGEST_LENGTH_MD5);
      56             :     SAL_WARN_IF(aError != rtl_Digest_E_None, "sc", "md5 generation failed");
      57             : 
      58           0 :     OStringBuffer aBuffer;
      59           0 :     const char* pString = "0123456789ABCDEF";
      60           0 :     for(size_t i = 0; i < RTL_DIGEST_LENGTH_MD5; ++i)
      61             :     {
      62           0 :         sal_uInt8 val = pBuffer[i];
      63           0 :         aBuffer.append(pString[val/16]);
      64           0 :         aBuffer.append(pString[val%16]);
      65             :     }
      66           0 :     return aBuffer.makeStringAndClear();
      67             : }
      68             : 
      69           1 : OString getCacheFolder()
      70             : {
      71           1 :     OUString url("${$BRAND_BASE_DIR/" LIBO_ETC_FOLDER "/" SAL_CONFIGFILE("bootstrap") ":UserInstallation}/cache/");
      72           1 :     rtl::Bootstrap::expandMacros(url);
      73             : 
      74           1 :     osl::Directory::create(url);
      75             : 
      76           1 :     return rtl::OUStringToOString(url, RTL_TEXTENCODING_UTF8);
      77             : }
      78             : 
      79           0 : void clearCache()
      80             : {
      81             : #if 0
      82             :     // We used to delete all files that did not end with the hash of
      83             :     // the static kernel source string from oclkernels.hxx. But as
      84             :     // those static kernels were not used for anything, it was
      85             :     // pointless, that hash never changed. The static kernels are now
      86             :     // removed, their hash is not part of the .bin file names any
      87             :     // more.  So there is little this function can do until we come up
      88             :     // with some other way to figure out which cached .bin files are
      89             :     // "current".
      90             :     OUString aCacheDirURL(rtl::OStringToOUString(OpenclDevice::maCacheFolder, RTL_TEXTENCODING_UTF8));
      91             :     osl::Directory aCacheDir(aCacheDirURL);
      92             :     osl::FileBase::RC status = aCacheDir.open();
      93             :     if(status != osl::FileBase::E_None)
      94             :         return;
      95             : 
      96             :     osl::DirectoryItem aItem;
      97             :     while(osl::FileBase::E_None == aCacheDir.getNextItem(aItem))
      98             :     {
      99             :         osl::FileStatus aFileStatus(osl_FileStatus_Mask_FileName|osl_FileStatus_Mask_FileURL);
     100             :         status = aItem.getFileStatus(aFileStatus);
     101             :         if(status != osl::FileBase::E_None)
     102             :             continue;
     103             : 
     104             :         OUString aFileName = aFileStatus.getFileName();
     105             :         if(aFileName.endsWith(".bin"))
     106             :         {
     107             :             if ( file is in some way obsolete )
     108             :             {
     109             :                 // delete the file
     110             :                 OUString aFileUrl = aFileStatus.getFileURL();
     111             :                 osl::File::remove(aFileUrl);
     112             :             }
     113             :         }
     114             :     }
     115             : #endif
     116           0 : }
     117             : 
     118             : }
     119             : 
     120           1 : OString OpenclDevice::maCacheFolder = getCacheFolder();
     121             : 
     122           0 : void OpenclDevice::registOpenclKernel()
     123             : {
     124           0 :     if ( !gpuEnv.mnIsUserCreated )
     125           0 :         memset( &gpuEnv, 0, sizeof(gpuEnv) );
     126           0 : }
     127             : 
     128           0 : void OpenclDevice::setKernelEnv( KernelEnv *envInfo )
     129             : {
     130           0 :     envInfo->mpkContext = gpuEnv.mpContext;
     131           0 :     envInfo->mpkCmdQueue = gpuEnv.mpCmdQueue;
     132           0 :     envInfo->mpkProgram = gpuEnv.mpArryPrograms[0];
     133           0 : }
     134             : 
     135             : namespace {
     136             : 
     137           0 : OString createFileName(cl_device_id deviceId, const char* clFileName)
     138             : {
     139           0 :     OString fileName(clFileName);
     140           0 :     sal_Int32 nIndex = fileName.lastIndexOf(".cl");
     141           0 :     if(nIndex > 0)
     142           0 :         fileName = fileName.copy(0, nIndex);
     143             : 
     144           0 :     char deviceName[DEVICE_NAME_LENGTH] = {0};
     145             :     clGetDeviceInfo(deviceId, CL_DEVICE_NAME,
     146           0 :             sizeof(deviceName), deviceName, NULL);
     147             : 
     148           0 :     char driverVersion[DRIVER_VERSION_LENGTH] = {0};
     149             :     clGetDeviceInfo(deviceId, CL_DRIVER_VERSION,
     150           0 :             sizeof(driverVersion), driverVersion, NULL);
     151             : 
     152             :     cl_platform_id platformId;
     153             :     clGetDeviceInfo(deviceId, CL_DEVICE_PLATFORM,
     154           0 :             sizeof(platformId), &platformId, NULL);
     155             : 
     156           0 :     char platformVersion[PLATFORM_VERSION_LENGTH] = {0};
     157             :     clGetPlatformInfo(platformId, CL_PLATFORM_VERSION, sizeof(platformVersion),
     158           0 :             platformVersion, NULL);
     159             : 
     160             :     // create hash for deviceName + driver version + platform version
     161           0 :     OString aString = OString(deviceName) + driverVersion + platformVersion;
     162           0 :     OString aHash = generateMD5(aString.getStr(), aString.getLength());
     163             : 
     164           0 :     return OpenclDevice::maCacheFolder + fileName + "-" +
     165           0 :         aHash + ".bin";
     166             : }
     167             : 
     168             : }
     169             : 
     170           0 : std::vector<boost::shared_ptr<osl::File> > OpenclDevice::binaryGenerated( const char * clFileName, cl_context context )
     171             : {
     172           0 :     size_t numDevices=0;
     173             : 
     174           0 :     std::vector<boost::shared_ptr<osl::File> > aGeneratedFiles;
     175             :     cl_int clStatus = clGetContextInfo( context, CL_CONTEXT_DEVICES,
     176           0 :             0, NULL, &numDevices );
     177           0 :     numDevices /= sizeof(numDevices);
     178             : 
     179           0 :     if(clStatus != CL_SUCCESS)
     180           0 :         return aGeneratedFiles;
     181             : 
     182             : 
     183             :     // grab the handles to all of the devices in the context.
     184           0 :     boost::scoped_array<cl_device_id> mpArryDevsID(new cl_device_id[numDevices]);
     185             :     clStatus = clGetContextInfo( context, CL_CONTEXT_DEVICES,
     186           0 :             sizeof( cl_device_id ) * numDevices, mpArryDevsID.get(), NULL );
     187             : 
     188           0 :     if(clStatus != CL_SUCCESS)
     189           0 :         return aGeneratedFiles;
     190             : 
     191           0 :     for ( size_t i = 0; i < numDevices; i++ )
     192             :     {
     193           0 :         if ( mpArryDevsID[i] != 0 )
     194             :         {
     195           0 :             OString fileName = createFileName(gpuEnv.mpArryDevsID[i], clFileName);
     196           0 :             osl::File* pNewFile = new osl::File(rtl::OStringToOUString(fileName, RTL_TEXTENCODING_UTF8));
     197           0 :             if(pNewFile->open(osl_File_OpenFlag_Read) == osl::FileBase::E_None)
     198             :             {
     199           0 :                 aGeneratedFiles.push_back(boost::shared_ptr<osl::File>(pNewFile));
     200             :                 SAL_INFO("sc.opencl", "Opening binary file '" << fileName << "' for reading: success");
     201             :             }
     202             :             else
     203             :             {
     204             :                 SAL_INFO("sc.opencl", "Opening binary file '" << fileName << "' for reading: FAIL");
     205           0 :                 delete pNewFile;
     206           0 :                 break;
     207           0 :             }
     208             :         }
     209             :     }
     210             : 
     211           0 :     return aGeneratedFiles;
     212             : }
     213             : 
     214           0 : bool OpenclDevice::writeBinaryToFile( const OString& rFileName, const char* binary, size_t numBytes )
     215             : {
     216           0 :     clearCache();
     217           0 :     osl::File file(rtl::OStringToOUString(rFileName, RTL_TEXTENCODING_UTF8));
     218             :     osl::FileBase::RC status = file.open(
     219           0 :             osl_File_OpenFlag_Write | osl_File_OpenFlag_Create );
     220             : 
     221           0 :     if(status != osl::FileBase::E_None)
     222           0 :         return false;
     223             : 
     224           0 :     sal_uInt64 nBytesWritten = 0;
     225           0 :     file.write( binary, numBytes, nBytesWritten );
     226             : 
     227             :     assert(numBytes == nBytesWritten);
     228             : 
     229           0 :     return true;
     230             : }
     231             : 
     232           0 : bool OpenclDevice::generatBinFromKernelSource( cl_program program, const char * clFileName )
     233             : {
     234             :     cl_uint numDevices;
     235             : 
     236             :     cl_int clStatus = clGetProgramInfo( program, CL_PROGRAM_NUM_DEVICES,
     237           0 :                    sizeof(numDevices), &numDevices, NULL );
     238           0 :     CHECK_OPENCL( clStatus, "clGetProgramInfo" );
     239             : 
     240           0 :     std::vector<cl_device_id> mpArryDevsID(numDevices);
     241             :     /* grab the handles to all of the devices in the program. */
     242             :     clStatus = clGetProgramInfo( program, CL_PROGRAM_DEVICES,
     243           0 :                    sizeof(cl_device_id) * numDevices, &mpArryDevsID[0], NULL );
     244           0 :     CHECK_OPENCL( clStatus, "clGetProgramInfo" );
     245             : 
     246             :     /* figure out the sizes of each of the binaries. */
     247           0 :     std::vector<size_t> binarySizes(numDevices);
     248             : 
     249             :     clStatus = clGetProgramInfo( program, CL_PROGRAM_BINARY_SIZES,
     250           0 :                    sizeof(size_t) * numDevices, &binarySizes[0], NULL );
     251           0 :     CHECK_OPENCL( clStatus, "clGetProgramInfo" );
     252             : 
     253             :     /* copy over all of the generated binaries. */
     254           0 :     boost::scoped_array<char*> binaries(new char*[numDevices]);
     255             : 
     256           0 :     for ( size_t i = 0; i < numDevices; i++ )
     257             :     {
     258           0 :         if ( binarySizes[i] != 0 )
     259             :         {
     260           0 :             binaries[i] = new char[binarySizes[i]];
     261             :         }
     262             :         else
     263             :         {
     264           0 :             binaries[i] = NULL;
     265             :         }
     266             :     }
     267             : 
     268             :     clStatus = clGetProgramInfo( program, CL_PROGRAM_BINARIES,
     269           0 :                    sizeof(char *) * numDevices, binaries.get(), NULL );
     270           0 :     CHECK_OPENCL(clStatus,"clGetProgramInfo");
     271             : 
     272             :     /* dump out each binary into its own separate file. */
     273           0 :     for ( size_t i = 0; i < numDevices; i++ )
     274             :     {
     275             : 
     276           0 :         if ( binarySizes[i] != 0 )
     277             :         {
     278           0 :             OString fileName = createFileName(mpArryDevsID[i], clFileName);
     279           0 :             if ( !writeBinaryToFile( fileName,
     280           0 :                         binaries[i], binarySizes[i] ) )
     281             :                 SAL_INFO("sc.opencl", "Writing binary file '" << fileName << "': FAIL");
     282             :             else
     283           0 :                 SAL_INFO("sc.opencl", "Writing binary file '" << fileName << "': success");
     284             :         }
     285             :     }
     286             : 
     287             :     // Release all resouces and memory
     288           0 :     for ( size_t i = 0; i < numDevices; i++ )
     289             :     {
     290           0 :         delete[] binaries[i];
     291             :     }
     292             : 
     293           0 :     return true;
     294             : }
     295             : 
     296           0 : bool OpenclDevice::initOpenclAttr( OpenCLEnv * env )
     297             : {
     298           0 :     if ( gpuEnv.mnIsUserCreated )
     299           0 :         return true;
     300             : 
     301           0 :     gpuEnv.mpContext = env->mpOclContext;
     302           0 :     gpuEnv.mpPlatformID = env->mpOclPlatformID;
     303           0 :     gpuEnv.mpDevID = env->mpOclDevsID;
     304           0 :     gpuEnv.mpCmdQueue = env->mpOclCmdQueue;
     305             : 
     306           0 :     gpuEnv.mnIsUserCreated = 1;
     307             : 
     308           0 :     return false;
     309             : }
     310             : 
     311           0 : void OpenclDevice::releaseOpenclEnv( GPUEnv *gpuInfo )
     312             : {
     313           0 :     if ( !bIsInited )
     314             :     {
     315           0 :         return;
     316             :     }
     317             : 
     318           0 :     if ( gpuEnv.mpCmdQueue )
     319             :     {
     320           0 :         clReleaseCommandQueue( gpuEnv.mpCmdQueue );
     321           0 :         gpuEnv.mpCmdQueue = NULL;
     322             :     }
     323           0 :     if ( gpuEnv.mpContext )
     324             :     {
     325           0 :         clReleaseContext( gpuEnv.mpContext );
     326           0 :         gpuEnv.mpContext = NULL;
     327             :     }
     328           0 :     bIsInited = false;
     329           0 :     gpuInfo->mnIsUserCreated = 0;
     330           0 :     free( gpuInfo->mpArryDevsID );
     331             : 
     332           0 :     return;
     333             : }
     334             : 
     335             : namespace {
     336             : 
     337           0 : bool buildProgram(const char* buildOption, GPUEnv* gpuInfo, int idx)
     338             : {
     339             :     cl_int clStatus;
     340             :     //char options[512];
     341             :     // create a cl program executable for all the devices specified
     342           0 :     if (!gpuInfo->mnIsUserCreated)
     343             :     {
     344             :         clStatus = clBuildProgram(gpuInfo->mpArryPrograms[idx], 1, gpuInfo->mpArryDevsID,
     345           0 :                        buildOption, NULL, NULL);
     346             :     }
     347             :     else
     348             :     {
     349             :         clStatus = clBuildProgram(gpuInfo->mpArryPrograms[idx], 1, &(gpuInfo->mpDevID),
     350           0 :                        buildOption, NULL, NULL);
     351             :     }
     352             : 
     353           0 :     if ( clStatus != CL_SUCCESS )
     354             :     {
     355             :         size_t length;
     356           0 :         if ( !gpuInfo->mnIsUserCreated )
     357             :         {
     358           0 :             clStatus = clGetProgramBuildInfo( gpuInfo->mpArryPrograms[idx], gpuInfo->mpArryDevsID[0],
     359           0 :                            CL_PROGRAM_BUILD_LOG, 0, NULL, &length );
     360             :         }
     361             :         else
     362             :         {
     363             :             clStatus = clGetProgramBuildInfo( gpuInfo->mpArryPrograms[idx], gpuInfo->mpDevID,
     364           0 :                            CL_PROGRAM_BUILD_LOG, 0, NULL, &length);
     365             :         }
     366           0 :         if ( clStatus != CL_SUCCESS )
     367             :         {
     368           0 :             return false;
     369             :         }
     370             : 
     371           0 :         boost::scoped_array<char> buildLog(new char[length]);
     372           0 :         if ( !gpuInfo->mnIsUserCreated )
     373             :         {
     374           0 :             clStatus = clGetProgramBuildInfo( gpuInfo->mpArryPrograms[idx], gpuInfo->mpArryDevsID[0],
     375           0 :                            CL_PROGRAM_BUILD_LOG, length, buildLog.get(), &length );
     376             :         }
     377             :         else
     378             :         {
     379             :             clStatus = clGetProgramBuildInfo( gpuInfo->mpArryPrograms[idx], gpuInfo->mpDevID,
     380           0 :                            CL_PROGRAM_BUILD_LOG, length, buildLog.get(), &length );
     381             :         }
     382           0 :         if ( clStatus != CL_SUCCESS )
     383             :         {
     384           0 :             return false;
     385             :         }
     386             : 
     387           0 :         OString aBuildLogFileURL = OpenclDevice::maCacheFolder + "kernel-build.log";
     388           0 :         osl::File aBuildLogFile(rtl::OStringToOUString(aBuildLogFileURL, RTL_TEXTENCODING_UTF8));
     389             :         osl::FileBase::RC status = aBuildLogFile.open(
     390           0 :                 osl_File_OpenFlag_Write | osl_File_OpenFlag_Create );
     391             : 
     392           0 :         if(status != osl::FileBase::E_None)
     393           0 :             return false;
     394             : 
     395           0 :         sal_uInt64 nBytesWritten = 0;
     396           0 :         aBuildLogFile.write( buildLog.get(), length, nBytesWritten );
     397             : 
     398           0 :         return false;
     399             :     }
     400             : 
     401           0 :     return true;
     402             : }
     403             : 
     404             : }
     405             : 
     406           0 : bool OpenclDevice::buildProgramFromBinary(const char* buildOption, GPUEnv* gpuInfo, const char* filename, int idx)
     407             : {
     408             :     size_t numDevices;
     409             :     cl_int clStatus = clGetContextInfo( gpuInfo->mpContext, CL_CONTEXT_DEVICES,
     410           0 :             0, NULL, &numDevices );
     411           0 :     numDevices /= sizeof(numDevices);
     412           0 :     CHECK_OPENCL( clStatus, "clGetContextInfo" );
     413             : 
     414             :     std::vector<boost::shared_ptr<osl::File> > aGeneratedFiles = binaryGenerated(
     415           0 :             filename, gpuInfo->mpContext );
     416             : 
     417           0 :     if (aGeneratedFiles.size() == numDevices)
     418             :     {
     419           0 :         boost::scoped_array<size_t> length(new size_t[numDevices]);
     420           0 :         boost::scoped_array<unsigned char*> pBinary(new unsigned char*[numDevices]);
     421           0 :         for(size_t i = 0; i < numDevices; ++i)
     422             :         {
     423             :             sal_uInt64 nSize;
     424           0 :             aGeneratedFiles[i]->getSize(nSize);
     425           0 :             unsigned char* binary = new unsigned char[nSize];
     426             :             sal_uInt64 nBytesRead;
     427           0 :             aGeneratedFiles[i]->read(binary, nSize, nBytesRead);
     428           0 :             if(nSize != nBytesRead)
     429             :                 assert(false);
     430             : 
     431           0 :             length[i] = nBytesRead;
     432             : 
     433           0 :             pBinary[i] = binary;
     434             :         }
     435             : 
     436             :         // grab the handles to all of the devices in the context.
     437           0 :         boost::scoped_array<cl_device_id> mpArryDevsID(new cl_device_id[numDevices]);
     438             :         clStatus = clGetContextInfo( gpuInfo->mpContext, CL_CONTEXT_DEVICES,
     439           0 :                        sizeof( cl_device_id ) * numDevices, mpArryDevsID.get(), NULL );
     440             : 
     441           0 :         if(clStatus != CL_SUCCESS)
     442             :         {
     443           0 :             for(size_t i = 0; i < numDevices; ++i)
     444             :             {
     445           0 :                 delete[] pBinary[i];
     446             :             }
     447           0 :             return false;
     448             :         }
     449             : 
     450             :         cl_int binary_status;
     451             : 
     452             :         gpuInfo->mpArryPrograms[idx] = clCreateProgramWithBinary( gpuInfo->mpContext,numDevices,
     453           0 :                                            mpArryDevsID.get(), length.get(), (const unsigned char**) pBinary.get(),
     454           0 :                                            &binary_status, &clStatus );
     455           0 :         if(clStatus != CL_SUCCESS)
     456             :         {
     457             :             // something went wrong, fall back to compiling from source
     458           0 :             return false;
     459             :         }
     460           0 :         for(size_t i = 0; i < numDevices; ++i)
     461             :         {
     462           0 :             delete[] pBinary[i];
     463           0 :         }
     464             :     }
     465             : 
     466           0 :     if ( !gpuInfo->mpArryPrograms[idx] )
     467             :     {
     468           0 :         return false;
     469             :     }
     470           0 :     return buildProgram(buildOption, gpuInfo, idx);
     471             : }
     472             : 
     473           0 : bool OpenclDevice::initOpenclRunEnv( int argc )
     474             : {
     475             :     if ( MAX_CLKERNEL_NUM <= 0 )
     476             :     {
     477             :         return true;
     478             :     }
     479           0 :     if ( ( argc > MAX_CLFILE_NUM ) || ( argc < 0 ) )
     480           0 :         return true;
     481             : 
     482           0 :     if ( !bIsInited )
     483             :     {
     484           0 :         registOpenclKernel();
     485             :         //initialize devices, context, command_queue
     486           0 :         bool status = initOpenclRunEnv( &gpuEnv );
     487           0 :         if ( status )
     488             :         {
     489           0 :             return true;
     490             :         }
     491             :         //initialize program, kernelName, kernelCount
     492           0 :         if( getenv( "SC_FLOAT" ) )
     493             :         {
     494           0 :             gpuEnv.mnKhrFp64Flag = false;
     495           0 :             gpuEnv.mnAmdFp64Flag = false;
     496             :         }
     497           0 :         if( gpuEnv.mnKhrFp64Flag )
     498             :         {
     499             :             SAL_INFO("sc.opencl", "Use Khr double");
     500             :         }
     501           0 :         else if( gpuEnv.mnAmdFp64Flag )
     502             :         {
     503             :             SAL_INFO("sc.opencl", "Use AMD double type");
     504             :         }
     505             :         else
     506             :         {
     507             :             SAL_INFO("sc.opencl", "USE float type");
     508             :         }
     509           0 :         bIsInited = true;
     510             :     }
     511           0 :     return false;
     512             : }
     513             : 
     514             : namespace {
     515             : 
     516           0 : void checkDeviceForDoubleSupport(cl_device_id deviceId, bool& bKhrFp64, bool& bAmdFp64)
     517             : {
     518           0 :     bKhrFp64 = false;
     519           0 :     bAmdFp64 = false;
     520             : 
     521             :     // Check device extensions for double type
     522           0 :     size_t aDevExtInfoSize = 0;
     523             : 
     524           0 :     cl_uint clStatus = clGetDeviceInfo( deviceId, CL_DEVICE_EXTENSIONS, 0, NULL, &aDevExtInfoSize );
     525           0 :     if( clStatus != CL_SUCCESS )
     526           0 :         return;
     527             : 
     528           0 :     boost::scoped_array<char> pExtInfo(new char[aDevExtInfoSize]);
     529             : 
     530             :     clStatus = clGetDeviceInfo( deviceId, CL_DEVICE_EXTENSIONS,
     531           0 :                    sizeof(char) * aDevExtInfoSize, pExtInfo.get(), NULL);
     532             : 
     533           0 :     if( clStatus != CL_SUCCESS )
     534           0 :         return;
     535             : 
     536           0 :     if ( strstr( pExtInfo.get(), "cl_khr_fp64" ) )
     537             :     {
     538           0 :         bKhrFp64 = true;
     539             :     }
     540             :     else
     541             :     {
     542             :         // Check if cl_amd_fp64 extension is supported
     543           0 :         if ( strstr( pExtInfo.get(), "cl_amd_fp64" ) )
     544           0 :             bAmdFp64 = true;
     545           0 :     }
     546             : }
     547             : 
     548             : }
     549             : 
     550           0 : bool OpenclDevice::initOpenclRunEnv( GPUEnv *gpuInfo )
     551             : {
     552             :     size_t length;
     553             :     cl_int clStatus;
     554             :     cl_uint numPlatforms, numDevices;
     555             :     cl_platform_id *platforms;
     556             : 
     557             :     // Have a look at the available platforms.
     558             : 
     559           0 :     if ( !gpuInfo->mnIsUserCreated )
     560             :     {
     561           0 :         clStatus = clGetPlatformIDs( 0, NULL, &numPlatforms );
     562           0 :         CHECK_OPENCL(clStatus, "clGetPlatformIDs");
     563           0 :         gpuInfo->mpPlatformID = NULL;
     564             : 
     565           0 :         if ( 0 < numPlatforms )
     566             :         {
     567             :             char platformName[256];
     568           0 :             platforms = (cl_platform_id*) malloc( numPlatforms * sizeof( cl_platform_id ) );
     569           0 :             if (!platforms)
     570             :             {
     571           0 :                 return true;
     572             :             }
     573           0 :             clStatus = clGetPlatformIDs( numPlatforms, platforms, NULL );
     574           0 :             CHECK_OPENCL(clStatus, "clGetPlatformIDs");
     575             : 
     576           0 :             for ( unsigned int i = 0; i < numPlatforms; i++ )
     577             :             {
     578           0 :                 clStatus = clGetPlatformInfo( platforms[i], CL_PLATFORM_VENDOR,
     579           0 :                     sizeof( platformName ), platformName, NULL );
     580             : 
     581           0 :                 if ( clStatus != CL_SUCCESS )
     582             :                 {
     583           0 :                     break;
     584             :                 }
     585           0 :                 gpuInfo->mpPlatformID = platforms[i];
     586             : 
     587             :                 //if (!strcmp(platformName, "Intel(R) Coporation"))
     588             :                 //if( !strcmp( platformName, "Advanced Micro Devices, Inc." ))
     589             :                 {
     590           0 :                     gpuInfo->mpPlatformID = platforms[i];
     591           0 :                     if ( getenv("SC_OPENCLCPU") )
     592             :                     {
     593             :                         clStatus = clGetDeviceIDs(gpuInfo->mpPlatformID, // platform
     594             :                                                   CL_DEVICE_TYPE_CPU,    // device_type for CPU device
     595             :                                                   0,                     // num_entries
     596             :                                                   NULL,                  // devices
     597           0 :                                                   &numDevices);
     598             :                     }
     599             :                     else
     600             :                     {
     601             :                           clStatus = clGetDeviceIDs(gpuInfo->mpPlatformID, // platform
     602             :                                                   CL_DEVICE_TYPE_GPU,      // device_type for GPU device
     603             :                                                   0,                       // num_entries
     604             :                                                   NULL,                    // devices
     605           0 :                                                   &numDevices);
     606             :                     }
     607           0 :                     if ( clStatus != CL_SUCCESS )
     608           0 :                         continue;
     609             : 
     610           0 :                     if ( numDevices )
     611           0 :                         break;
     612             :                 }
     613             :             }
     614           0 :             free( platforms );
     615           0 :             if ( clStatus != CL_SUCCESS )
     616           0 :                 return true;
     617             :         }
     618           0 :         if ( NULL == gpuInfo->mpPlatformID )
     619           0 :             return true;
     620             : 
     621             :         // Use available platform.
     622             :         cl_context_properties cps[3];
     623           0 :         cps[0] = CL_CONTEXT_PLATFORM;
     624           0 :         cps[1] = (cl_context_properties) gpuInfo->mpPlatformID;
     625           0 :         cps[2] = 0;
     626             :         // Set device type for OpenCL
     627           0 :         if ( getenv("SC_OPENCLCPU") )
     628             :         {
     629           0 :             gpuInfo->mDevType = CL_DEVICE_TYPE_CPU;
     630             :         }
     631             :         else
     632             :         {
     633           0 :             gpuInfo->mDevType = CL_DEVICE_TYPE_GPU;
     634             :         }
     635           0 :         gpuInfo->mpContext = clCreateContextFromType( cps, gpuInfo->mDevType, NULL, NULL, &clStatus );
     636             : 
     637           0 :         if ( ( gpuInfo->mpContext == (cl_context) NULL) || ( clStatus != CL_SUCCESS ) )
     638             :         {
     639           0 :             gpuInfo->mDevType = CL_DEVICE_TYPE_CPU;
     640           0 :             gpuInfo->mpContext = clCreateContextFromType( cps, gpuInfo->mDevType, NULL, NULL, &clStatus );
     641             :         }
     642           0 :         if ( ( gpuInfo->mpContext == (cl_context) NULL) || ( clStatus != CL_SUCCESS ) )
     643             :         {
     644           0 :             gpuInfo->mDevType = CL_DEVICE_TYPE_DEFAULT;
     645           0 :             gpuInfo->mpContext = clCreateContextFromType( cps, gpuInfo->mDevType, NULL, NULL, &clStatus );
     646             :         }
     647           0 :         if ( ( gpuInfo->mpContext == (cl_context) NULL) || ( clStatus != CL_SUCCESS ) )
     648           0 :             return true;
     649             :         // Detect OpenCL devices.
     650             :         // First, get the size of device list data
     651           0 :         clStatus = clGetContextInfo( gpuInfo->mpContext, CL_CONTEXT_DEVICES, 0, NULL, &length );
     652           0 :         if ( ( clStatus != CL_SUCCESS ) || ( length == 0 ) )
     653           0 :             return true;
     654             :         // Now allocate memory for device list based on the size we got earlier
     655           0 :         gpuInfo->mpArryDevsID = (cl_device_id*) malloc( length );
     656           0 :         if ( gpuInfo->mpArryDevsID == (cl_device_id*) NULL )
     657           0 :             return true;
     658             :         // Now, get the device list data
     659             :         clStatus = clGetContextInfo( gpuInfo->mpContext, CL_CONTEXT_DEVICES, length,
     660           0 :                        gpuInfo->mpArryDevsID, NULL );
     661           0 :         CHECK_OPENCL(clStatus, "clGetContextInfo");
     662             : 
     663             :         // Create OpenCL command queue.
     664           0 :         gpuInfo->mpCmdQueue = clCreateCommandQueue( gpuInfo->mpContext, gpuInfo->mpArryDevsID[0], 0, &clStatus );
     665             : 
     666           0 :         CHECK_OPENCL(clStatus, "clCreateCommandQueue");
     667             :     }
     668           0 :     bool bKhrFp64 = false;
     669           0 :     bool bAmdFp64 = false;
     670             : 
     671           0 :     checkDeviceForDoubleSupport(gpuInfo->mpArryDevsID[0], bKhrFp64, bAmdFp64);
     672             : 
     673           0 :     gpuInfo->mnKhrFp64Flag = bKhrFp64;
     674           0 :     gpuInfo->mnAmdFp64Flag = bAmdFp64;
     675             : 
     676           0 :     return false;
     677             : }
     678             : 
     679             : namespace {
     680             : 
     681             : // based on crashes and hanging during kernel compilation
     682           0 : bool checkForKnownBadCompilers(const OpenclDeviceInfo& rInfo)
     683             : {
     684             : 
     685             :     struct {
     686             :         const char* pVendorName; const char* pDriverVersion;
     687             :     } aBadOpenCLCompilers[] = {
     688             :         { "Intel(R) Corporation", "9.17.10.2884" }
     689           0 :     };
     690             : 
     691           0 :     for(size_t i = 0; i < SAL_N_ELEMENTS(aBadOpenCLCompilers); ++i)
     692             :     {
     693           0 :         if(rInfo.maVendor == OUString::createFromAscii(aBadOpenCLCompilers[i].pVendorName) &&
     694           0 :                 rInfo.maDriver == OUString::createFromAscii(aBadOpenCLCompilers[i].pDriverVersion))
     695           0 :             return true;
     696             :     }
     697             : 
     698           0 :     return false;
     699             : }
     700             : 
     701           0 : void createDeviceInfo(cl_device_id aDeviceId, OpenclPlatformInfo& rPlatformInfo)
     702             : {
     703           0 :     OpenclDeviceInfo aDeviceInfo;
     704           0 :     aDeviceInfo.device = aDeviceId;
     705             : 
     706             :     char pName[DEVICE_NAME_LENGTH];
     707           0 :     cl_int nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_NAME, DEVICE_NAME_LENGTH, pName, NULL);
     708           0 :     if(nState != CL_SUCCESS)
     709           0 :         return;
     710             : 
     711           0 :     aDeviceInfo.maName = OUString::createFromAscii(pName);
     712             : 
     713             :     char pVendor[DEVICE_NAME_LENGTH];
     714           0 :     nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_VENDOR, DEVICE_NAME_LENGTH, pVendor, NULL);
     715           0 :     if(nState != CL_SUCCESS)
     716           0 :         return;
     717             : 
     718           0 :     aDeviceInfo.maVendor = OUString::createFromAscii(pVendor);
     719             : 
     720             :     cl_ulong nMemSize;
     721           0 :     nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(nMemSize), &nMemSize, NULL);
     722           0 :     if(nState != CL_SUCCESS)
     723           0 :         return;
     724             : 
     725           0 :     aDeviceInfo.mnMemory = nMemSize;
     726             : 
     727             :     cl_uint nClockFrequency;
     728           0 :     nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(nClockFrequency), &nClockFrequency, NULL);
     729           0 :     if(nState != CL_SUCCESS)
     730           0 :         return;
     731             : 
     732           0 :     aDeviceInfo.mnFrequency = nClockFrequency;
     733             : 
     734             :     cl_uint nComputeUnits;
     735           0 :     nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(nComputeUnits), &nComputeUnits, NULL);
     736           0 :     if(nState != CL_SUCCESS)
     737           0 :         return;
     738             : 
     739             :     char pDriver[DEVICE_NAME_LENGTH];
     740           0 :     nState = clGetDeviceInfo(aDeviceId, CL_DRIVER_VERSION, DEVICE_NAME_LENGTH, pDriver, NULL);
     741             : 
     742           0 :     if(nState != CL_SUCCESS)
     743           0 :         return;
     744             : 
     745           0 :     aDeviceInfo.maDriver = OUString::createFromAscii(pDriver);
     746             : 
     747           0 :     bool bKhrFp64 = false;
     748           0 :     bool bAmdFp64 = false;
     749           0 :     checkDeviceForDoubleSupport(aDeviceId, bKhrFp64, bAmdFp64);
     750             : 
     751             :     // only list devices that support double
     752           0 :     if(!bKhrFp64 && !bAmdFp64)
     753           0 :         return;
     754             : 
     755           0 :     aDeviceInfo.mnComputeUnits = nComputeUnits;
     756             : 
     757           0 :     if(!checkForKnownBadCompilers(aDeviceInfo))
     758           0 :         rPlatformInfo.maDevices.push_back(aDeviceInfo);
     759             : }
     760             : 
     761           0 : bool createPlatformInfo(cl_platform_id nPlatformId, OpenclPlatformInfo& rPlatformInfo)
     762             : {
     763           0 :     rPlatformInfo.platform = nPlatformId;
     764             :     char pName[64];
     765             :     cl_int nState = clGetPlatformInfo(nPlatformId, CL_PLATFORM_NAME, 64,
     766           0 :              pName, NULL);
     767           0 :     if(nState != CL_SUCCESS)
     768           0 :         return false;
     769           0 :     rPlatformInfo.maName = OUString::createFromAscii(pName);
     770             : 
     771             :     char pVendor[64];
     772             :     nState = clGetPlatformInfo(nPlatformId, CL_PLATFORM_VENDOR, 64,
     773           0 :              pVendor, NULL);
     774           0 :     if(nState != CL_SUCCESS)
     775           0 :         return false;
     776             : 
     777           0 :     rPlatformInfo.maVendor = OUString::createFromAscii(pName);
     778             : 
     779             :     cl_uint nDevices;
     780           0 :     nState = clGetDeviceIDs(nPlatformId, CL_DEVICE_TYPE_ALL, 0, NULL, &nDevices);
     781           0 :     if(nState != CL_SUCCESS)
     782           0 :         return false;
     783             : 
     784             :     // memory leak that does not matter
     785             :     // memory is stored in static variable that lives through the whole program
     786           0 :     cl_device_id* pDevices = new cl_device_id[nDevices];
     787           0 :     nState = clGetDeviceIDs(nPlatformId, CL_DEVICE_TYPE_ALL, nDevices, pDevices, NULL);
     788           0 :     if(nState != CL_SUCCESS)
     789           0 :         return false;
     790             : 
     791           0 :     for(size_t i = 0; i < nDevices; ++i)
     792             :     {
     793           0 :         createDeviceInfo(pDevices[i], rPlatformInfo);
     794             :     }
     795             : 
     796           0 :     return true;
     797             : }
     798             : 
     799             : }
     800             : 
     801           0 : size_t getOpenCLPlatformCount()
     802             : {
     803           0 :     int status = clewInit(OPENCL_DLL_NAME);
     804           0 :     if (status < 0)
     805           0 :         return 0;
     806             : 
     807             :     cl_uint nPlatforms;
     808           0 :     cl_int nState = clGetPlatformIDs(0, NULL, &nPlatforms);
     809             : 
     810           0 :     if (nState != CL_SUCCESS)
     811           0 :         return 0;
     812             : 
     813           0 :     return nPlatforms;
     814             : }
     815             : 
     816         222 : const std::vector<OpenclPlatformInfo>& fillOpenCLInfo()
     817             : {
     818         222 :     static std::vector<OpenclPlatformInfo> aPlatforms;
     819         222 :     if(!aPlatforms.empty())
     820           0 :         return aPlatforms;
     821             : 
     822         222 :     int status = clewInit(OPENCL_DLL_NAME);
     823         222 :     if (status < 0)
     824         222 :         return aPlatforms;
     825             : 
     826             :     cl_uint nPlatforms;
     827           0 :     cl_int nState = clGetPlatformIDs(0, NULL, &nPlatforms);
     828             : 
     829           0 :     if(nState != CL_SUCCESS)
     830           0 :         return aPlatforms;
     831             : 
     832             :     // memory leak that does not matter,
     833             :     // memory is stored in static instance aPlatforms
     834           0 :     cl_platform_id* pPlatforms = new cl_platform_id[nPlatforms];
     835           0 :     nState = clGetPlatformIDs(nPlatforms, pPlatforms, NULL);
     836             : 
     837           0 :     if(nState != CL_SUCCESS)
     838           0 :         return aPlatforms;
     839             : 
     840           0 :     for(size_t i = 0; i < nPlatforms; ++i)
     841             :     {
     842           0 :         OpenclPlatformInfo aPlatformInfo;
     843           0 :         if(createPlatformInfo(pPlatforms[i], aPlatformInfo))
     844           0 :             aPlatforms.push_back(aPlatformInfo);
     845           0 :     }
     846             : 
     847           0 :     return aPlatforms;
     848             : }
     849             : 
     850             : namespace {
     851             : 
     852           0 : cl_device_id findDeviceIdByDeviceString(const OUString& rString, const std::vector<OpenclPlatformInfo>& rPlatforms)
     853             : {
     854           0 :     std::vector<OpenclPlatformInfo>::const_iterator it = rPlatforms.begin(), itEnd = rPlatforms.end();
     855           0 :     for(; it != itEnd; ++it)
     856             :     {
     857           0 :         std::vector<OpenclDeviceInfo>::const_iterator itr = it->maDevices.begin(), itrEnd = it->maDevices.end();
     858           0 :         for(; itr != itrEnd; ++itr)
     859             :         {
     860           0 :             OUString aDeviceId = it->maVendor + " " + itr->maName;
     861           0 :             if(rString == aDeviceId)
     862             :             {
     863           0 :                 return static_cast<cl_device_id>(itr->device);
     864             :             }
     865           0 :         }
     866             :     }
     867             : 
     868           0 :     return NULL;
     869             : }
     870             : 
     871           0 : void findDeviceInfoFromDeviceId(cl_device_id aDeviceId, size_t& rDeviceId, size_t& rPlatformId)
     872             : {
     873             :     cl_platform_id platformId;
     874             :     cl_int nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_PLATFORM,
     875           0 :             sizeof(platformId), &platformId, NULL);
     876             : 
     877           0 :     if(nState != CL_SUCCESS)
     878           0 :         return;
     879             : 
     880           0 :     const std::vector<OpenclPlatformInfo>& rPlatforms = fillOpenCLInfo();
     881           0 :     for(size_t i = 0; i < rPlatforms.size(); ++i)
     882             :     {
     883           0 :         cl_platform_id platId = static_cast<cl_platform_id>(rPlatforms[i].platform);
     884           0 :         if(platId != platformId)
     885           0 :             continue;
     886             : 
     887           0 :         for(size_t j = 0; j < rPlatforms[i].maDevices.size(); ++j)
     888             :         {
     889           0 :             cl_device_id id = static_cast<cl_device_id>(rPlatforms[i].maDevices[j].device);
     890           0 :             if(id == aDeviceId)
     891             :             {
     892           0 :                 rDeviceId = j;
     893           0 :                 rPlatformId = i;
     894           0 :                 return;
     895             :             }
     896             :         }
     897             :     }
     898             : }
     899             : 
     900             : }
     901             : 
     902         222 : bool switchOpenclDevice(const OUString* pDevice, bool bAutoSelect, bool bForceEvaluation)
     903             : {
     904         222 :     if(fillOpenCLInfo().empty())
     905         222 :         return false;
     906             : 
     907           0 :     cl_device_id pDeviceId = NULL;
     908           0 :     if(pDevice)
     909           0 :         pDeviceId = findDeviceIdByDeviceString(*pDevice, fillOpenCLInfo());
     910             : 
     911           0 :     if(!pDeviceId || bAutoSelect)
     912             :     {
     913           0 :         int status = clewInit(OPENCL_DLL_NAME);
     914           0 :         if (status < 0)
     915           0 :             return false;
     916             : 
     917           0 :         OUString url("${$BRAND_BASE_DIR/" LIBO_ETC_FOLDER "/" SAL_CONFIGFILE("bootstrap") ":UserInstallation}/cache/");
     918           0 :         rtl::Bootstrap::expandMacros(url);
     919           0 :         OUString path;
     920           0 :         osl::FileBase::getSystemPathFromFileURL(url,path);
     921           0 :         OString dsFileName = rtl::OUStringToOString(path, RTL_TEXTENCODING_UTF8);
     922           0 :         ds_device pSelectedDevice = sc::OpenCLDevice::getDeviceSelection(dsFileName.getStr(), bForceEvaluation);
     923           0 :         pDeviceId = pSelectedDevice.oclDeviceID;
     924             : 
     925             :     }
     926             : 
     927           0 :     if(OpenclDevice::gpuEnv.mpDevID == pDeviceId)
     928             :     {
     929             :         // we don't need to change anything
     930             :         // still the same device
     931           0 :         return pDeviceId != NULL;
     932             :     }
     933             : 
     934             :     cl_platform_id platformId;
     935             :     cl_int nState = clGetDeviceInfo(pDeviceId, CL_DEVICE_PLATFORM,
     936           0 :             sizeof(platformId), &platformId, NULL);
     937             : 
     938             :     cl_context_properties cps[3];
     939           0 :     cps[0] = CL_CONTEXT_PLATFORM;
     940           0 :     cps[1] = (cl_context_properties) platformId;
     941           0 :     cps[2] = 0;
     942           0 :     cl_context context = clCreateContext( cps, 1, &pDeviceId, NULL, NULL, &nState );
     943             : 
     944           0 :     if(nState != CL_SUCCESS || context == NULL)
     945             :     {
     946           0 :         if(context != NULL)
     947           0 :             clReleaseContext(context);
     948             : 
     949             :         SAL_WARN("sc", "failed to set/switch opencl device");
     950           0 :         return false;
     951             :     }
     952             : 
     953             :     cl_command_queue command_queue = clCreateCommandQueue(
     954           0 :             context, pDeviceId, 0, &nState);
     955             : 
     956           0 :     if(command_queue == NULL || nState != CL_SUCCESS)
     957             :     {
     958           0 :         if(command_queue != NULL)
     959           0 :             clReleaseCommandQueue(command_queue);
     960             : 
     961           0 :         clReleaseContext(context);
     962             :         SAL_WARN("sc", "failed to set/switch opencl device");
     963           0 :         return false;
     964             :     }
     965             : 
     966           0 :     OpenclDevice::releaseOpenclEnv(&OpenclDevice::gpuEnv);
     967             :     OpenCLEnv env;
     968           0 :     env.mpOclPlatformID = platformId;
     969           0 :     env.mpOclContext = context;
     970           0 :     env.mpOclDevsID = pDeviceId;
     971           0 :     env.mpOclCmdQueue = command_queue;
     972           0 :     OpenclDevice::initOpenclAttr(&env);
     973             : 
     974             :     // why do we need this at all?
     975           0 :     OpenclDevice::gpuEnv.mpArryDevsID = (cl_device_id*) malloc( sizeof(cl_device_id) );
     976           0 :     OpenclDevice::gpuEnv.mpArryDevsID[0] = pDeviceId;
     977           0 :     return !OpenclDevice::initOpenclRunEnv(0);
     978             : }
     979             : 
     980           0 : void getOpenCLDeviceInfo(size_t& rDeviceId, size_t& rPlatformId)
     981             : {
     982           0 :     int status = clewInit(OPENCL_DLL_NAME);
     983           0 :     if (status < 0)
     984           0 :         return;
     985             : 
     986           0 :     cl_device_id id = OpenclDevice::gpuEnv.mpDevID;
     987           0 :     findDeviceInfoFromDeviceId(id, rDeviceId, rPlatformId);
     988             : }
     989             : 
     990           3 : }}
     991             : 
     992             : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */

Generated by: LCOV version 1.10