LCOV - code coverage report
Current view: top level - opencl/source - openclwrapper.cxx (source / functions) Hit Total Coverage
Test: commit c8344322a7af75b84dd3ca8f78b05543a976dfd5 Lines: 18 423 4.3 %
Date: 2015-06-13 12:38:46 Functions: 5 25 20.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
       2             : /*
       3             :  * This file is part of the LibreOffice project.
       4             :  *
       5             :  * This Source Code Form is subject to the terms of the Mozilla Public
       6             :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       7             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       8             :  */
       9             : 
      10             : #include <config_folders.h>
      11             : 
      12             : #include "opencl_device.hxx"
      13             : 
      14             : #include <comphelper/string.hxx>
      15             : #include <opencl/openclconfig.hxx>
      16             : #include <opencl/openclwrapper.hxx>
      17             : #include <osl/file.hxx>
      18             : #include <rtl/bootstrap.hxx>
      19             : #include <rtl/digest.h>
      20             : #include <rtl/strbuf.hxx>
      21             : #include <rtl/ustring.hxx>
      22             : #include <sal/config.h>
      23             : #include <sal/log.hxx>
      24             : 
      25             : #include <boost/scoped_array.hpp>
      26             : #include <unicode/regex.h>
      27             : 
      28             : #include <stdio.h>
      29             : #include <stdlib.h>
      30             : #include <string.h>
      31             : 
      32             : #include <cmath>
      33             : 
      34             : #ifdef _WIN32
      35             : #include <prewin.h>
      36             : #include <postwin.h>
      37             : #define OPENCL_DLL_NAME "OpenCL.dll"
      38             : #elif defined(MACOSX)
      39             : #define OPENCL_DLL_NAME NULL
      40             : #else
      41             : #define OPENCL_DLL_NAME "libOpenCL.so"
      42             : #endif
      43             : 
      44             : #define DEVICE_NAME_LENGTH 1024
      45             : #define DRIVER_VERSION_LENGTH 1024
      46             : #define PLATFORM_VERSION_LENGTH 1024
      47             : 
      48             : #define CHECK_OPENCL(status,name) \
      49             : if( status != CL_SUCCESS )  \
      50             : { \
      51             :     SAL_WARN( "opencl", "OpenCL error code " << status << " at " SAL_DETAIL_WHERE " from " name ); \
      52             :     return false; \
      53             : }
      54             : 
      55             : using namespace std;
      56             : 
      57             : namespace opencl {
      58             : 
      59             : GPUEnv gpuEnv;
      60             : 
      61             : namespace {
      62             : 
      63             : bool bIsInited = false;
      64             : 
      65           0 : OString generateMD5(const void* pData, size_t length)
      66             : {
      67             :     sal_uInt8 pBuffer[RTL_DIGEST_LENGTH_MD5];
      68             :     rtlDigestError aError = rtl_digest_MD5(pData, length,
      69           0 :             pBuffer, RTL_DIGEST_LENGTH_MD5);
      70             :     SAL_WARN_IF(aError != rtl_Digest_E_None, "opencl", "md5 generation failed");
      71             : 
      72           0 :     OStringBuffer aBuffer;
      73           0 :     const char* pString = "0123456789ABCDEF";
      74           0 :     for(size_t i = 0; i < RTL_DIGEST_LENGTH_MD5; ++i)
      75             :     {
      76           0 :         sal_uInt8 val = pBuffer[i];
      77           0 :         aBuffer.append(pString[val/16]);
      78           0 :         aBuffer.append(pString[val%16]);
      79             :     }
      80           0 :     return aBuffer.makeStringAndClear();
      81             : }
      82             : 
      83          52 : OString getCacheFolder()
      84             : {
      85          52 :     OUString url("${$BRAND_BASE_DIR/" LIBO_ETC_FOLDER "/" SAL_CONFIGFILE("bootstrap") ":UserInstallation}/cache/");
      86          52 :     rtl::Bootstrap::expandMacros(url);
      87             : 
      88          52 :     osl::Directory::create(url);
      89             : 
      90          52 :     return rtl::OUStringToOString(url, RTL_TEXTENCODING_UTF8);
      91             : }
      92             : 
      93          52 : OString maCacheFolder = getCacheFolder();
      94             : 
      95             : }
      96             : 
      97           0 : void setKernelEnv( KernelEnv *envInfo )
      98             : {
      99           0 :     envInfo->mpkContext = gpuEnv.mpContext;
     100           0 :     envInfo->mpkProgram = gpuEnv.mpArryPrograms[0];
     101             : 
     102             :     assert(gpuEnv.mnCmdQueuePos < OPENCL_CMDQUEUE_SIZE);
     103           0 :     envInfo->mpkCmdQueue = gpuEnv.mpCmdQueue[gpuEnv.mnCmdQueuePos];
     104           0 : }
     105             : 
     106             : namespace {
     107             : 
     108           0 : OString createFileName(cl_device_id deviceId, const char* clFileName)
     109             : {
     110           0 :     OString fileName(clFileName);
     111           0 :     sal_Int32 nIndex = fileName.lastIndexOf(".cl");
     112           0 :     if(nIndex > 0)
     113           0 :         fileName = fileName.copy(0, nIndex);
     114             : 
     115           0 :     char deviceName[DEVICE_NAME_LENGTH] = {0};
     116             :     clGetDeviceInfo(deviceId, CL_DEVICE_NAME,
     117           0 :             sizeof(deviceName), deviceName, NULL);
     118             : 
     119           0 :     char driverVersion[DRIVER_VERSION_LENGTH] = {0};
     120             :     clGetDeviceInfo(deviceId, CL_DRIVER_VERSION,
     121           0 :             sizeof(driverVersion), driverVersion, NULL);
     122             : 
     123             :     cl_platform_id platformId;
     124             :     clGetDeviceInfo(deviceId, CL_DEVICE_PLATFORM,
     125           0 :             sizeof(platformId), &platformId, NULL);
     126             : 
     127           0 :     char platformVersion[PLATFORM_VERSION_LENGTH] = {0};
     128             :     clGetPlatformInfo(platformId, CL_PLATFORM_VERSION, sizeof(platformVersion),
     129           0 :             platformVersion, NULL);
     130             : 
     131             :     // create hash for deviceName + driver version + platform version
     132           0 :     OString aString = OString(deviceName) + driverVersion + platformVersion;
     133           0 :     OString aHash = generateMD5(aString.getStr(), aString.getLength());
     134             : 
     135           0 :     return maCacheFolder + fileName + "-" +
     136           0 :         aHash + ".bin";
     137             : }
     138             : 
     139           0 : std::vector<boost::shared_ptr<osl::File> > binaryGenerated( const char * clFileName, cl_context context )
     140             : {
     141           0 :     size_t numDevices=0;
     142             : 
     143           0 :     std::vector<boost::shared_ptr<osl::File> > aGeneratedFiles;
     144             :     cl_int clStatus = clGetContextInfo( context, CL_CONTEXT_DEVICES,
     145           0 :             0, NULL, &numDevices );
     146           0 :     numDevices /= sizeof(numDevices);
     147             : 
     148           0 :     if(clStatus != CL_SUCCESS)
     149           0 :         return aGeneratedFiles;
     150             : 
     151             :     // grab the handles to all of the devices in the context.
     152           0 :     boost::scoped_array<cl_device_id> pArryDevsID(new cl_device_id[numDevices]);
     153             :     clStatus = clGetContextInfo( context, CL_CONTEXT_DEVICES,
     154           0 :             sizeof( cl_device_id ) * numDevices, pArryDevsID.get(), NULL );
     155             : 
     156           0 :     if(clStatus != CL_SUCCESS)
     157           0 :         return aGeneratedFiles;
     158             : 
     159           0 :     for ( size_t i = 0; i < numDevices; i++ )
     160             :     {
     161           0 :         if ( pArryDevsID[i] != 0 )
     162             :         {
     163           0 :             OString fileName = createFileName(gpuEnv.mpArryDevsID[i], clFileName);
     164           0 :             osl::File* pNewFile = new osl::File(rtl::OStringToOUString(fileName, RTL_TEXTENCODING_UTF8));
     165           0 :             if(pNewFile->open(osl_File_OpenFlag_Read) == osl::FileBase::E_None)
     166             :             {
     167           0 :                 aGeneratedFiles.push_back(boost::shared_ptr<osl::File>(pNewFile));
     168             :                 SAL_INFO("opencl.file", "Opening binary file '" << fileName << "' for reading: success");
     169             :             }
     170             :             else
     171             :             {
     172             :                 SAL_INFO("opencl.file", "Opening binary file '" << fileName << "' for reading: FAIL");
     173           0 :                 delete pNewFile;
     174           0 :                 break;
     175           0 :             }
     176             :         }
     177             :     }
     178             : 
     179           0 :     return aGeneratedFiles;
     180             : }
     181             : 
     182           0 : bool writeBinaryToFile( const OString& rFileName, const char* binary, size_t numBytes )
     183             : {
     184           0 :     osl::File file(rtl::OStringToOUString(rFileName, RTL_TEXTENCODING_UTF8));
     185             :     osl::FileBase::RC status = file.open(
     186           0 :             osl_File_OpenFlag_Write | osl_File_OpenFlag_Create );
     187             : 
     188           0 :     if(status != osl::FileBase::E_None)
     189           0 :         return false;
     190             : 
     191           0 :     sal_uInt64 nBytesWritten = 0;
     192           0 :     file.write( binary, numBytes, nBytesWritten );
     193             : 
     194             :     assert(numBytes == nBytesWritten);
     195             : 
     196           0 :     return true;
     197             : }
     198             : 
     199             : }
     200             : 
     201           0 : bool generatBinFromKernelSource( cl_program program, const char * clFileName )
     202             : {
     203             :     cl_uint numDevices;
     204             : 
     205             :     cl_int clStatus = clGetProgramInfo( program, CL_PROGRAM_NUM_DEVICES,
     206           0 :                    sizeof(numDevices), &numDevices, NULL );
     207           0 :     CHECK_OPENCL( clStatus, "clGetProgramInfo" );
     208             : 
     209           0 :     std::vector<cl_device_id> pArryDevsID(numDevices);
     210             :     /* grab the handles to all of the devices in the program. */
     211             :     clStatus = clGetProgramInfo( program, CL_PROGRAM_DEVICES,
     212           0 :                    sizeof(cl_device_id) * numDevices, &pArryDevsID[0], NULL );
     213           0 :     CHECK_OPENCL( clStatus, "clGetProgramInfo" );
     214             : 
     215             :     /* figure out the sizes of each of the binaries. */
     216           0 :     std::vector<size_t> binarySizes(numDevices);
     217             : 
     218             :     clStatus = clGetProgramInfo( program, CL_PROGRAM_BINARY_SIZES,
     219           0 :                    sizeof(size_t) * numDevices, &binarySizes[0], NULL );
     220           0 :     CHECK_OPENCL( clStatus, "clGetProgramInfo" );
     221             : 
     222             :     /* copy over all of the generated binaries. */
     223           0 :     boost::scoped_array<char*> binaries(new char*[numDevices]);
     224             : 
     225           0 :     for ( size_t i = 0; i < numDevices; i++ )
     226             :     {
     227           0 :         if ( binarySizes[i] != 0 )
     228             :         {
     229           0 :             binaries[i] = new char[binarySizes[i]];
     230             :         }
     231             :         else
     232             :         {
     233           0 :             binaries[i] = NULL;
     234             :         }
     235             :     }
     236             : 
     237             :     clStatus = clGetProgramInfo( program, CL_PROGRAM_BINARIES,
     238           0 :                    sizeof(char *) * numDevices, binaries.get(), NULL );
     239           0 :     CHECK_OPENCL(clStatus,"clGetProgramInfo");
     240             : 
     241             :     /* dump out each binary into its own separate file. */
     242           0 :     for ( size_t i = 0; i < numDevices; i++ )
     243             :     {
     244             : 
     245           0 :         if ( binarySizes[i] != 0 )
     246             :         {
     247           0 :             OString fileName = createFileName(pArryDevsID[i], clFileName);
     248           0 :             if ( !writeBinaryToFile( fileName,
     249           0 :                         binaries[i], binarySizes[i] ) )
     250             :                 SAL_INFO("opencl.file", "Writing binary file '" << fileName << "': FAIL");
     251             :             else
     252           0 :                 SAL_INFO("opencl.file", "Writing binary file '" << fileName << "': success");
     253             :         }
     254             :     }
     255             : 
     256             :     // Release all resources and memory
     257           0 :     for ( size_t i = 0; i < numDevices; i++ )
     258             :     {
     259           0 :         delete[] binaries[i];
     260             :     }
     261             : 
     262           0 :     return true;
     263             : }
     264             : 
     265             : namespace {
     266             : 
     267             : struct OpenCLEnv
     268             : {
     269             :     cl_platform_id mpOclPlatformID;
     270             :     cl_context mpOclContext;
     271             :     cl_device_id mpOclDevsID;
     272             :     cl_command_queue mpOclCmdQueue[OPENCL_CMDQUEUE_SIZE];
     273             : };
     274             : 
     275           0 : bool initOpenCLAttr( OpenCLEnv * env )
     276             : {
     277           0 :     if ( gpuEnv.mnIsUserCreated )
     278           0 :         return true;
     279             : 
     280           0 :     gpuEnv.mpContext = env->mpOclContext;
     281           0 :     gpuEnv.mpPlatformID = env->mpOclPlatformID;
     282           0 :     gpuEnv.mpDevID = env->mpOclDevsID;
     283             : 
     284           0 :     gpuEnv.mnIsUserCreated = 1;
     285             : 
     286           0 :     for (int i = 0; i < OPENCL_CMDQUEUE_SIZE; ++i)
     287           0 :         gpuEnv.mpCmdQueue[i] = env->mpOclCmdQueue[i];
     288             : 
     289           0 :     gpuEnv.mnCmdQueuePos = 0; // default to 0.
     290             : 
     291           0 :     return false;
     292             : }
     293             : 
     294           0 : void releaseOpenCLEnv( GPUEnv *gpuInfo )
     295             : {
     296           0 :     if ( !bIsInited )
     297             :     {
     298           0 :         return;
     299             :     }
     300             : 
     301           0 :     for (int i = 0; i < OPENCL_CMDQUEUE_SIZE; ++i)
     302             :     {
     303           0 :         if (gpuEnv.mpCmdQueue[i])
     304             :         {
     305           0 :             clReleaseCommandQueue(gpuEnv.mpCmdQueue[i]);
     306           0 :             gpuEnv.mpCmdQueue[i] = NULL;
     307             :         }
     308             :     }
     309           0 :     gpuEnv.mnCmdQueuePos = 0;
     310             : 
     311           0 :     if ( gpuEnv.mpContext )
     312             :     {
     313           0 :         clReleaseContext( gpuEnv.mpContext );
     314           0 :         gpuEnv.mpContext = NULL;
     315             :     }
     316           0 :     bIsInited = false;
     317           0 :     gpuInfo->mnIsUserCreated = 0;
     318           0 :     free( gpuInfo->mpArryDevsID );
     319             : 
     320           0 :     return;
     321             : }
     322             : 
     323           0 : bool buildProgram(const char* buildOption, GPUEnv* gpuInfo, int idx)
     324             : {
     325             :     cl_int clStatus;
     326             :     //char options[512];
     327             :     // create a cl program executable for all the devices specified
     328           0 :     if (!gpuInfo->mnIsUserCreated)
     329             :     {
     330             :         clStatus = clBuildProgram(gpuInfo->mpArryPrograms[idx], 1, gpuInfo->mpArryDevsID,
     331           0 :                        buildOption, NULL, NULL);
     332             :     }
     333             :     else
     334             :     {
     335             :         clStatus = clBuildProgram(gpuInfo->mpArryPrograms[idx], 1, &(gpuInfo->mpDevID),
     336           0 :                        buildOption, NULL, NULL);
     337             :     }
     338             : 
     339           0 :     if ( clStatus != CL_SUCCESS )
     340             :     {
     341             :         size_t length;
     342           0 :         if ( !gpuInfo->mnIsUserCreated )
     343             :         {
     344           0 :             clStatus = clGetProgramBuildInfo( gpuInfo->mpArryPrograms[idx], gpuInfo->mpArryDevsID[0],
     345           0 :                            CL_PROGRAM_BUILD_LOG, 0, NULL, &length );
     346             :         }
     347             :         else
     348             :         {
     349             :             clStatus = clGetProgramBuildInfo( gpuInfo->mpArryPrograms[idx], gpuInfo->mpDevID,
     350           0 :                            CL_PROGRAM_BUILD_LOG, 0, NULL, &length);
     351             :         }
     352           0 :         if ( clStatus != CL_SUCCESS )
     353             :         {
     354           0 :             return false;
     355             :         }
     356             : 
     357           0 :         boost::scoped_array<char> buildLog(new char[length]);
     358           0 :         if ( !gpuInfo->mnIsUserCreated )
     359             :         {
     360           0 :             clStatus = clGetProgramBuildInfo( gpuInfo->mpArryPrograms[idx], gpuInfo->mpArryDevsID[0],
     361           0 :                            CL_PROGRAM_BUILD_LOG, length, buildLog.get(), &length );
     362             :         }
     363             :         else
     364             :         {
     365             :             clStatus = clGetProgramBuildInfo( gpuInfo->mpArryPrograms[idx], gpuInfo->mpDevID,
     366           0 :                            CL_PROGRAM_BUILD_LOG, length, buildLog.get(), &length );
     367             :         }
     368           0 :         if ( clStatus != CL_SUCCESS )
     369             :         {
     370           0 :             return false;
     371             :         }
     372             : 
     373           0 :         OString aBuildLogFileURL = maCacheFolder + "kernel-build.log";
     374           0 :         osl::File aBuildLogFile(rtl::OStringToOUString(aBuildLogFileURL, RTL_TEXTENCODING_UTF8));
     375             :         osl::FileBase::RC status = aBuildLogFile.open(
     376           0 :                 osl_File_OpenFlag_Write | osl_File_OpenFlag_Create );
     377             : 
     378           0 :         if(status != osl::FileBase::E_None)
     379           0 :             return false;
     380             : 
     381           0 :         sal_uInt64 nBytesWritten = 0;
     382           0 :         aBuildLogFile.write( buildLog.get(), length, nBytesWritten );
     383             : 
     384           0 :         return false;
     385             :     }
     386             : 
     387           0 :     return true;
     388             : }
     389             : 
     390             : }
     391             : 
     392           0 : bool buildProgramFromBinary(const char* buildOption, GPUEnv* gpuInfo, const char* filename, int idx)
     393             : {
     394             :     size_t numDevices;
     395             :     cl_int clStatus = clGetContextInfo( gpuInfo->mpContext, CL_CONTEXT_DEVICES,
     396           0 :             0, NULL, &numDevices );
     397           0 :     numDevices /= sizeof(numDevices);
     398           0 :     CHECK_OPENCL( clStatus, "clGetContextInfo" );
     399             : 
     400             :     std::vector<boost::shared_ptr<osl::File> > aGeneratedFiles = binaryGenerated(
     401           0 :             filename, gpuInfo->mpContext );
     402             : 
     403           0 :     if (aGeneratedFiles.size() == numDevices)
     404             :     {
     405           0 :         boost::scoped_array<size_t> length(new size_t[numDevices]);
     406           0 :         boost::scoped_array<unsigned char*> pBinary(new unsigned char*[numDevices]);
     407           0 :         for(size_t i = 0; i < numDevices; ++i)
     408             :         {
     409             :             sal_uInt64 nSize;
     410           0 :             aGeneratedFiles[i]->getSize(nSize);
     411           0 :             unsigned char* binary = new unsigned char[nSize];
     412             :             sal_uInt64 nBytesRead;
     413           0 :             aGeneratedFiles[i]->read(binary, nSize, nBytesRead);
     414           0 :             if(nSize != nBytesRead)
     415             :                 assert(false);
     416             : 
     417           0 :             length[i] = nBytesRead;
     418             : 
     419           0 :             pBinary[i] = binary;
     420             :         }
     421             : 
     422             :         // grab the handles to all of the devices in the context.
     423           0 :         boost::scoped_array<cl_device_id> pArryDevsID(new cl_device_id[numDevices]);
     424             :         clStatus = clGetContextInfo( gpuInfo->mpContext, CL_CONTEXT_DEVICES,
     425           0 :                        sizeof( cl_device_id ) * numDevices, pArryDevsID.get(), NULL );
     426             : 
     427           0 :         if(clStatus != CL_SUCCESS)
     428             :         {
     429           0 :             for(size_t i = 0; i < numDevices; ++i)
     430             :             {
     431           0 :                 delete[] pBinary[i];
     432             :             }
     433           0 :             return false;
     434             :         }
     435             : 
     436             :         cl_int binary_status;
     437             : 
     438             :         gpuInfo->mpArryPrograms[idx] = clCreateProgramWithBinary( gpuInfo->mpContext,numDevices,
     439           0 :                                            pArryDevsID.get(), length.get(), const_cast<const unsigned char**>(pBinary.get()),
     440           0 :                                            &binary_status, &clStatus );
     441           0 :         if(clStatus != CL_SUCCESS)
     442             :         {
     443             :             // something went wrong, fall back to compiling from source
     444           0 :             return false;
     445             :         }
     446             :         SAL_INFO("opencl", "Created program " << gpuInfo->mpArryPrograms[idx] << " from binary");
     447           0 :         for(size_t i = 0; i < numDevices; ++i)
     448             :         {
     449           0 :             delete[] pBinary[i];
     450           0 :         }
     451             :     }
     452             : 
     453           0 :     if ( !gpuInfo->mpArryPrograms[idx] )
     454             :     {
     455           0 :         return false;
     456             :     }
     457           0 :     return buildProgram(buildOption, gpuInfo, idx);
     458             : }
     459             : 
     460             : namespace {
     461             : 
     462           0 : void checkDeviceForDoubleSupport(cl_device_id deviceId, bool& bKhrFp64, bool& bAmdFp64)
     463             : {
     464           0 :     bKhrFp64 = false;
     465           0 :     bAmdFp64 = false;
     466             : 
     467             :     // Check device extensions for double type
     468           0 :     size_t aDevExtInfoSize = 0;
     469             : 
     470           0 :     cl_uint clStatus = clGetDeviceInfo( deviceId, CL_DEVICE_EXTENSIONS, 0, NULL, &aDevExtInfoSize );
     471           0 :     if( clStatus != CL_SUCCESS )
     472           0 :         return;
     473             : 
     474           0 :     boost::scoped_array<char> pExtInfo(new char[aDevExtInfoSize]);
     475             : 
     476             :     clStatus = clGetDeviceInfo( deviceId, CL_DEVICE_EXTENSIONS,
     477           0 :                    sizeof(char) * aDevExtInfoSize, pExtInfo.get(), NULL);
     478             : 
     479           0 :     if( clStatus != CL_SUCCESS )
     480           0 :         return;
     481             : 
     482           0 :     if ( strstr( pExtInfo.get(), "cl_khr_fp64" ) )
     483             :     {
     484           0 :         bKhrFp64 = true;
     485             :     }
     486             :     else
     487             :     {
     488             :         // Check if cl_amd_fp64 extension is supported
     489           0 :         if ( strstr( pExtInfo.get(), "cl_amd_fp64" ) )
     490           0 :             bAmdFp64 = true;
     491           0 :     }
     492             : }
     493             : 
     494           0 : bool initOpenCLRunEnv( GPUEnv *gpuInfo )
     495             : {
     496           0 :     bool bKhrFp64 = false;
     497           0 :     bool bAmdFp64 = false;
     498             : 
     499           0 :     checkDeviceForDoubleSupport(gpuInfo->mpArryDevsID[0], bKhrFp64, bAmdFp64);
     500             : 
     501           0 :     gpuInfo->mnKhrFp64Flag = bKhrFp64;
     502           0 :     gpuInfo->mnAmdFp64Flag = bAmdFp64;
     503             : 
     504           0 :     return false;
     505             : }
     506             : 
     507           0 : bool initOpenCLRunEnv( int argc )
     508             : {
     509           0 :     if ( ( argc > MAX_CLFILE_NUM ) || ( argc < 0 ) )
     510           0 :         return true;
     511             : 
     512           0 :     if ( !bIsInited )
     513             :     {
     514           0 :         if ( !gpuEnv.mnIsUserCreated )
     515           0 :             memset( &gpuEnv, 0, sizeof(gpuEnv) );
     516             : 
     517             :         //initialize devices, context, command_queue
     518           0 :         bool status = initOpenCLRunEnv( &gpuEnv );
     519           0 :         if ( status )
     520             :         {
     521           0 :             return true;
     522             :         }
     523             :         //initialize program, kernelName, kernelCount
     524           0 :         if( getenv( "SC_FLOAT" ) )
     525             :         {
     526           0 :             gpuEnv.mnKhrFp64Flag = false;
     527           0 :             gpuEnv.mnAmdFp64Flag = false;
     528             :         }
     529           0 :         if( gpuEnv.mnKhrFp64Flag )
     530             :         {
     531             :             SAL_INFO("opencl", "Use Khr double");
     532             :         }
     533           0 :         else if( gpuEnv.mnAmdFp64Flag )
     534             :         {
     535             :             SAL_INFO("opencl", "Use AMD double type");
     536             :         }
     537             :         else
     538             :         {
     539             :             SAL_INFO("opencl", "USE float type");
     540             :         }
     541           0 :         bIsInited = true;
     542             :     }
     543           0 :     return false;
     544             : }
     545             : 
     546             : // based on crashes and hanging during kernel compilation
     547           0 : void createDeviceInfo(cl_device_id aDeviceId, OpenCLPlatformInfo& rPlatformInfo)
     548             : {
     549           0 :     OpenCLDeviceInfo aDeviceInfo;
     550           0 :     aDeviceInfo.device = aDeviceId;
     551             : 
     552             :     char pName[DEVICE_NAME_LENGTH];
     553           0 :     cl_int nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_NAME, DEVICE_NAME_LENGTH, pName, NULL);
     554           0 :     if(nState != CL_SUCCESS)
     555           0 :         return;
     556             : 
     557           0 :     aDeviceInfo.maName = OUString::createFromAscii(pName);
     558             : 
     559             :     char pVendor[DEVICE_NAME_LENGTH];
     560           0 :     nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_VENDOR, DEVICE_NAME_LENGTH, pVendor, NULL);
     561           0 :     if(nState != CL_SUCCESS)
     562           0 :         return;
     563             : 
     564           0 :     aDeviceInfo.maVendor = OUString::createFromAscii(pVendor);
     565             : 
     566             :     cl_ulong nMemSize;
     567           0 :     nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(nMemSize), &nMemSize, NULL);
     568           0 :     if(nState != CL_SUCCESS)
     569           0 :         return;
     570             : 
     571           0 :     aDeviceInfo.mnMemory = nMemSize;
     572             : 
     573             :     cl_uint nClockFrequency;
     574           0 :     nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(nClockFrequency), &nClockFrequency, NULL);
     575           0 :     if(nState != CL_SUCCESS)
     576           0 :         return;
     577             : 
     578           0 :     aDeviceInfo.mnFrequency = nClockFrequency;
     579             : 
     580             :     cl_uint nComputeUnits;
     581           0 :     nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(nComputeUnits), &nComputeUnits, NULL);
     582           0 :     if(nState != CL_SUCCESS)
     583           0 :         return;
     584             : 
     585             :     char pDriver[DEVICE_NAME_LENGTH];
     586           0 :     nState = clGetDeviceInfo(aDeviceId, CL_DRIVER_VERSION, DEVICE_NAME_LENGTH, pDriver, NULL);
     587             : 
     588           0 :     if(nState != CL_SUCCESS)
     589           0 :         return;
     590             : 
     591           0 :     aDeviceInfo.maDriver = OUString::createFromAscii(pDriver);
     592             : 
     593           0 :     bool bKhrFp64 = false;
     594           0 :     bool bAmdFp64 = false;
     595           0 :     checkDeviceForDoubleSupport(aDeviceId, bKhrFp64, bAmdFp64);
     596             : 
     597             :     // only list devices that support double
     598           0 :     if(!bKhrFp64 && !bAmdFp64)
     599           0 :         return;
     600             : 
     601           0 :     aDeviceInfo.mnComputeUnits = nComputeUnits;
     602             : 
     603           0 :     if(!OpenCLConfig::get().checkImplementation(rPlatformInfo, aDeviceInfo))
     604           0 :         rPlatformInfo.maDevices.push_back(aDeviceInfo);
     605             : }
     606             : 
     607           0 : bool createPlatformInfo(cl_platform_id nPlatformId, OpenCLPlatformInfo& rPlatformInfo)
     608             : {
     609           0 :     rPlatformInfo.platform = nPlatformId;
     610             :     char pName[64];
     611             :     cl_int nState = clGetPlatformInfo(nPlatformId, CL_PLATFORM_NAME, 64,
     612           0 :              pName, NULL);
     613           0 :     if(nState != CL_SUCCESS)
     614           0 :         return false;
     615           0 :     rPlatformInfo.maName = OUString::createFromAscii(pName);
     616             : 
     617             :     char pVendor[64];
     618             :     nState = clGetPlatformInfo(nPlatformId, CL_PLATFORM_VENDOR, 64,
     619           0 :              pVendor, NULL);
     620           0 :     if(nState != CL_SUCCESS)
     621           0 :         return false;
     622             : 
     623           0 :     rPlatformInfo.maVendor = OUString::createFromAscii(pVendor);
     624             : 
     625             :     cl_uint nDevices;
     626           0 :     nState = clGetDeviceIDs(nPlatformId, CL_DEVICE_TYPE_ALL, 0, NULL, &nDevices);
     627           0 :     if(nState != CL_SUCCESS)
     628           0 :         return false;
     629             : 
     630             :     // memory leak that does not matter
     631             :     // memory is stored in static variable that lives through the whole program
     632           0 :     cl_device_id* pDevices = new cl_device_id[nDevices];
     633           0 :     nState = clGetDeviceIDs(nPlatformId, CL_DEVICE_TYPE_ALL, nDevices, pDevices, NULL);
     634           0 :     if(nState != CL_SUCCESS)
     635           0 :         return false;
     636             : 
     637           0 :     for(size_t i = 0; i < nDevices; ++i)
     638             :     {
     639           0 :         createDeviceInfo(pDevices[i], rPlatformInfo);
     640             :     }
     641             : 
     642           0 :     return true;
     643             : }
     644             : 
     645             : }
     646             : 
     647         224 : const std::vector<OpenCLPlatformInfo>& fillOpenCLInfo()
     648             : {
     649         224 :     static std::vector<OpenCLPlatformInfo> aPlatforms;
     650         224 :     if(!aPlatforms.empty())
     651           0 :         return aPlatforms;
     652             : 
     653         224 :     int status = clewInit(OPENCL_DLL_NAME);
     654         224 :     if (status < 0)
     655           0 :         return aPlatforms;
     656             : 
     657             :     cl_uint nPlatforms;
     658         224 :     cl_int nState = clGetPlatformIDs(0, NULL, &nPlatforms);
     659             : 
     660         224 :     if(nState != CL_SUCCESS)
     661         224 :         return aPlatforms;
     662             : 
     663             :     // memory leak that does not matter,
     664             :     // memory is stored in static instance aPlatforms
     665           0 :     cl_platform_id* pPlatforms = new cl_platform_id[nPlatforms];
     666           0 :     nState = clGetPlatformIDs(nPlatforms, pPlatforms, NULL);
     667             : 
     668           0 :     if(nState != CL_SUCCESS)
     669           0 :         return aPlatforms;
     670             : 
     671           0 :     for(size_t i = 0; i < nPlatforms; ++i)
     672             :     {
     673           0 :         OpenCLPlatformInfo aPlatformInfo;
     674           0 :         if(createPlatformInfo(pPlatforms[i], aPlatformInfo))
     675           0 :             aPlatforms.push_back(aPlatformInfo);
     676           0 :     }
     677             : 
     678           0 :     return aPlatforms;
     679             : }
     680             : 
     681             : namespace {
     682             : 
     683           0 : cl_device_id findDeviceIdByDeviceString(const OUString& rString, const std::vector<OpenCLPlatformInfo>& rPlatforms)
     684             : {
     685           0 :     std::vector<OpenCLPlatformInfo>::const_iterator it = rPlatforms.begin(), itEnd = rPlatforms.end();
     686           0 :     for(; it != itEnd; ++it)
     687             :     {
     688           0 :         std::vector<OpenCLDeviceInfo>::const_iterator itr = it->maDevices.begin(), itrEnd = it->maDevices.end();
     689           0 :         for(; itr != itrEnd; ++itr)
     690             :         {
     691           0 :             OUString aDeviceId = it->maVendor + " " + itr->maName;
     692           0 :             if(rString == aDeviceId)
     693             :             {
     694           0 :                 return static_cast<cl_device_id>(itr->device);
     695             :             }
     696           0 :         }
     697             :     }
     698             : 
     699           0 :     return NULL;
     700             : }
     701             : 
     702           0 : void findDeviceInfoFromDeviceId(cl_device_id aDeviceId, size_t& rDeviceId, size_t& rPlatformId)
     703             : {
     704             :     cl_platform_id platformId;
     705             :     cl_int nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_PLATFORM,
     706           0 :             sizeof(platformId), &platformId, NULL);
     707             : 
     708           0 :     if(nState != CL_SUCCESS)
     709           0 :         return;
     710             : 
     711           0 :     const std::vector<OpenCLPlatformInfo>& rPlatforms = fillOpenCLInfo();
     712           0 :     for(size_t i = 0; i < rPlatforms.size(); ++i)
     713             :     {
     714           0 :         cl_platform_id platId = static_cast<cl_platform_id>(rPlatforms[i].platform);
     715           0 :         if(platId != platformId)
     716           0 :             continue;
     717             : 
     718           0 :         for(size_t j = 0; j < rPlatforms[i].maDevices.size(); ++j)
     719             :         {
     720           0 :             cl_device_id id = static_cast<cl_device_id>(rPlatforms[i].maDevices[j].device);
     721           0 :             if(id == aDeviceId)
     722             :             {
     723           0 :                 rDeviceId = j;
     724           0 :                 rPlatformId = i;
     725           0 :                 return;
     726             :             }
     727             :         }
     728             :     }
     729             : }
     730             : 
     731             : }
     732             : 
     733         224 : bool switchOpenCLDevice(const OUString* pDevice, bool bAutoSelect, bool bForceEvaluation)
     734             : {
     735         224 :     if(fillOpenCLInfo().empty())
     736         224 :         return false;
     737             : 
     738           0 :     cl_device_id pDeviceId = NULL;
     739           0 :     if(pDevice)
     740           0 :         pDeviceId = findDeviceIdByDeviceString(*pDevice, fillOpenCLInfo());
     741             : 
     742           0 :     if(!pDeviceId || bAutoSelect)
     743             :     {
     744           0 :         int status = clewInit(OPENCL_DLL_NAME);
     745           0 :         if (status < 0)
     746           0 :             return false;
     747             : 
     748           0 :         OUString url("${$BRAND_BASE_DIR/" LIBO_ETC_FOLDER "/" SAL_CONFIGFILE("bootstrap") ":UserInstallation}/cache/");
     749           0 :         rtl::Bootstrap::expandMacros(url);
     750           0 :         OUString path;
     751           0 :         osl::FileBase::getSystemPathFromFileURL(url,path);
     752           0 :         OString dsFileName = rtl::OUStringToOString(path, RTL_TEXTENCODING_UTF8);
     753           0 :         ds_device pSelectedDevice = getDeviceSelection(dsFileName.getStr(), bForceEvaluation);
     754           0 :         pDeviceId = pSelectedDevice.oclDeviceID;
     755             : 
     756             :     }
     757             : 
     758           0 :     if(gpuEnv.mpDevID == pDeviceId)
     759             :     {
     760             :         // we don't need to change anything
     761             :         // still the same device
     762           0 :         return pDeviceId != NULL;
     763             :     }
     764             : 
     765             :     cl_platform_id platformId;
     766             :     cl_int nState = clGetDeviceInfo(pDeviceId, CL_DEVICE_PLATFORM,
     767           0 :             sizeof(platformId), &platformId, NULL);
     768             : 
     769             :     cl_context_properties cps[3];
     770           0 :     cps[0] = CL_CONTEXT_PLATFORM;
     771           0 :     cps[1] = reinterpret_cast<cl_context_properties>(platformId);
     772           0 :     cps[2] = 0;
     773           0 :     cl_context context = clCreateContext( cps, 1, &pDeviceId, NULL, NULL, &nState );
     774           0 :     if (nState != CL_SUCCESS)
     775             :         SAL_WARN("opencl", "clCreateContext failed: " << nState);
     776             : 
     777           0 :     if(nState != CL_SUCCESS || context == NULL)
     778             :     {
     779           0 :         if(context != NULL)
     780           0 :             clReleaseContext(context);
     781             : 
     782             :         SAL_WARN("opencl", "failed to set/switch opencl device");
     783           0 :         return false;
     784             :     }
     785             :     SAL_INFO("opencl", "Created context " << context << " for platform " << platformId << ", device " << pDeviceId);
     786             : 
     787             :     cl_command_queue command_queue[OPENCL_CMDQUEUE_SIZE];
     788           0 :     for (int i = 0; i < OPENCL_CMDQUEUE_SIZE; ++i)
     789             :     {
     790             :         command_queue[i] = clCreateCommandQueue(
     791           0 :             context, pDeviceId, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &nState);
     792           0 :         if (nState != CL_SUCCESS)
     793             :             SAL_WARN("opencl", "clCreateCommandQueue failed: " << nState);
     794             : 
     795           0 :         if (command_queue[i] == NULL || nState != CL_SUCCESS)
     796             :         {
     797             :             // Release all command queues created so far.
     798           0 :             for (int j = 0; j <= i; ++j)
     799             :             {
     800           0 :                 if (command_queue[j])
     801             :                 {
     802           0 :                     clReleaseCommandQueue(command_queue[j]);
     803           0 :                     command_queue[j] = NULL;
     804             :                 }
     805             :             }
     806             : 
     807           0 :             clReleaseContext(context);
     808             :             SAL_WARN("opencl", "failed to set/switch opencl device");
     809           0 :             return false;
     810             :         }
     811             : 
     812             :         SAL_INFO("opencl", "Created command queue " << command_queue[i] << " for context " << context);
     813             :     }
     814             : 
     815           0 :     setOpenCLCmdQueuePosition(0); // Call this just to avoid the method being deleted from unused function deleter.
     816             : 
     817           0 :     releaseOpenCLEnv(&gpuEnv);
     818             :     OpenCLEnv env;
     819           0 :     env.mpOclPlatformID = platformId;
     820           0 :     env.mpOclContext = context;
     821           0 :     env.mpOclDevsID = pDeviceId;
     822             : 
     823           0 :     for (int i = 0; i < OPENCL_CMDQUEUE_SIZE; ++i)
     824           0 :         env.mpOclCmdQueue[i] = command_queue[i];
     825             : 
     826           0 :     initOpenCLAttr(&env);
     827             : 
     828             :     // why do we need this at all?
     829             : 
     830             :     // (Assuming the above question refers to the mpArryDevsID
     831             :     // initialisation below.) Because otherwise the code crashes in
     832             :     // initOpenCLRunEnv(). Confused? You should be.
     833             : 
     834           0 :     gpuEnv.mpArryDevsID = static_cast<cl_device_id*>(malloc( sizeof(cl_device_id) ));
     835           0 :     gpuEnv.mpArryDevsID[0] = pDeviceId;
     836             : 
     837           0 :     return !initOpenCLRunEnv(0);
     838             : }
     839             : 
     840           0 : void getOpenCLDeviceInfo(size_t& rDeviceId, size_t& rPlatformId)
     841             : {
     842           0 :     int status = clewInit(OPENCL_DLL_NAME);
     843           0 :     if (status < 0)
     844           0 :         return;
     845             : 
     846           0 :     cl_device_id id = gpuEnv.mpDevID;
     847           0 :     findDeviceInfoFromDeviceId(id, rDeviceId, rPlatformId);
     848             : }
     849             : 
     850           0 : void setOpenCLCmdQueuePosition( int nPos )
     851             : {
     852           0 :     if (nPos < 0 || nPos >= OPENCL_CMDQUEUE_SIZE)
     853             :         // Out of range. Ignore this.
     854           0 :         return;
     855             : 
     856           0 :     gpuEnv.mnCmdQueuePos = nPos;
     857             : }
     858             : 
     859           0 : const char* errorString(cl_int nError)
     860             : {
     861             : #define CASE(val) case CL_##val: return #val
     862           0 :     switch (nError)
     863             :     {
     864           0 :         CASE(SUCCESS);
     865           0 :         CASE(DEVICE_NOT_FOUND);
     866           0 :         CASE(DEVICE_NOT_AVAILABLE);
     867           0 :         CASE(COMPILER_NOT_AVAILABLE);
     868           0 :         CASE(MEM_OBJECT_ALLOCATION_FAILURE);
     869           0 :         CASE(OUT_OF_RESOURCES);
     870           0 :         CASE(OUT_OF_HOST_MEMORY);
     871           0 :         CASE(PROFILING_INFO_NOT_AVAILABLE);
     872           0 :         CASE(MEM_COPY_OVERLAP);
     873           0 :         CASE(IMAGE_FORMAT_MISMATCH);
     874           0 :         CASE(IMAGE_FORMAT_NOT_SUPPORTED);
     875           0 :         CASE(BUILD_PROGRAM_FAILURE);
     876           0 :         CASE(MAP_FAILURE);
     877           0 :         CASE(INVALID_VALUE);
     878           0 :         CASE(INVALID_DEVICE_TYPE);
     879           0 :         CASE(INVALID_PLATFORM);
     880           0 :         CASE(INVALID_DEVICE);
     881           0 :         CASE(INVALID_CONTEXT);
     882           0 :         CASE(INVALID_QUEUE_PROPERTIES);
     883           0 :         CASE(INVALID_COMMAND_QUEUE);
     884           0 :         CASE(INVALID_HOST_PTR);
     885           0 :         CASE(INVALID_MEM_OBJECT);
     886           0 :         CASE(INVALID_IMAGE_FORMAT_DESCRIPTOR);
     887           0 :         CASE(INVALID_IMAGE_SIZE);
     888           0 :         CASE(INVALID_SAMPLER);
     889           0 :         CASE(INVALID_BINARY);
     890           0 :         CASE(INVALID_BUILD_OPTIONS);
     891           0 :         CASE(INVALID_PROGRAM);
     892           0 :         CASE(INVALID_PROGRAM_EXECUTABLE);
     893           0 :         CASE(INVALID_KERNEL_NAME);
     894           0 :         CASE(INVALID_KERNEL_DEFINITION);
     895           0 :         CASE(INVALID_KERNEL);
     896           0 :         CASE(INVALID_ARG_INDEX);
     897           0 :         CASE(INVALID_ARG_VALUE);
     898           0 :         CASE(INVALID_ARG_SIZE);
     899           0 :         CASE(INVALID_KERNEL_ARGS);
     900           0 :         CASE(INVALID_WORK_DIMENSION);
     901           0 :         CASE(INVALID_WORK_GROUP_SIZE);
     902           0 :         CASE(INVALID_WORK_ITEM_SIZE);
     903           0 :         CASE(INVALID_GLOBAL_OFFSET);
     904           0 :         CASE(INVALID_EVENT_WAIT_LIST);
     905           0 :         CASE(INVALID_EVENT);
     906           0 :         CASE(INVALID_OPERATION);
     907           0 :         CASE(INVALID_GL_OBJECT);
     908           0 :         CASE(INVALID_BUFFER_SIZE);
     909           0 :         CASE(INVALID_MIP_LEVEL);
     910           0 :         CASE(INVALID_GLOBAL_WORK_SIZE);
     911             :         default:
     912           0 :             return "Unknown OpenCL error code";
     913             :     }
     914             : #undef CASE
     915             : }
     916             : 
     917         156 : }
     918             : 
     919             : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */

Generated by: LCOV version 1.11