LCOV - code coverage report
Current view: top level - opencl/source - opencl_device.cxx (source / functions) Hit Total Coverage
Test: commit c8344322a7af75b84dd3ca8f78b05543a976dfd5 Lines: 2 215 0.9 %
Date: 2015-06-13 12:38:46 Functions: 2 15 13.3 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
       2             : /*
       3             :  * This file is part of the LibreOffice project.
       4             :  *
       5             :  * This Source Code Form is subject to the terms of the Mozilla Public
       6             :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       7             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       8             :  */
       9             : 
      10             : #ifdef _WIN32
      11             : #include <prewin.h>
      12             : #include <postwin.h>
      13             : #elif defined __MACH__
      14             : #include <mach/mach_time.h>
      15             : #else
      16             : #include <sys/time.h>
      17             : #endif
      18             : 
      19             : #include <time.h>
      20             : #include <math.h>
      21             : #include <float.h>
      22             : #include <iostream>
      23             : #include <sstream>
      24             : #include <vector>
      25             : 
      26             : #include <boost/scoped_ptr.hpp>
      27             : 
      28             : #include <comphelper/random.hxx>
      29             : #include <opencl/openclconfig.hxx>
      30             : #include <opencl/openclwrapper.hxx>
      31             : #include <opencl/platforminfo.hxx>
      32             : #include <sal/log.hxx>
      33             : 
      34             : #include "opencl_device.hxx"
      35             : 
      36             : #define INPUTSIZE  15360
      37             : #define OUTPUTSIZE 15360
      38             : 
      39             : #define STRINGIFY(...) #__VA_ARGS__"\n"
      40             : 
      41             : #define DS_CHECK_STATUS(status, name) \
      42             :     if (CL_SUCCESS != status) \
      43             :     { \
      44             :     SAL_INFO("opencl.device", "Error code is " << status << " at " name); \
      45             :     }
      46             : 
      47             : namespace opencl {
      48             : 
      49             : bool bIsDeviceSelected = false;
      50             : ds_device selectedDevice;
      51             : 
      52             : struct LibreOfficeDeviceScore
      53             : {
      54             :     double fTime;     // small time means faster device
      55             :     bool bNoCLErrors; // were there any opencl errors
      56             : };
      57             : 
      58           0 : struct LibreOfficeDeviceEvaluationIO
      59             : {
      60             :     std::vector<double> input0;
      61             :     std::vector<double> input1;
      62             :     std::vector<double> input2;
      63             :     std::vector<double> input3;
      64             :     std::vector<double> output;
      65             :     unsigned long inputSize;
      66             :     unsigned long outputSize;
      67             : };
      68             : 
      69             : struct timer
      70             : {
      71             : #ifdef _WIN32
      72             :     LARGE_INTEGER start;
      73             : #else
      74             :     long long start;
      75             : #endif
      76             : };
      77             : 
      78             : const char* source = STRINGIFY(
      79             : \n#if defined(KHR_DP_EXTENSION)
      80             : \n#pragma OPENCL EXTENSION cl_khr_fp64 : enable
      81             : \n#elif defined(AMD_DP_EXTENSION)
      82             : \n#pragma OPENCL EXTENSION cl_amd_fp64 : enable
      83             : \n#endif
      84             :     \n
      85             :     int isNan(fp_t a) { return a != a; }
      86             :     fp_t fsum(fp_t a, fp_t b) { return a + b; }
      87             : 
      88             :     fp_t fAverage(__global fp_t* input)
      89             : {
      90             :     fp_t sum = 0;
      91             :     int count = 0;
      92             :     for (int i = 0; i < INPUTSIZE; i++)
      93             :     {
      94             :         if (!isNan(input[i]))
      95             :         {
      96             :             sum = fsum(input[i], sum);
      97             :             count += 1;
      98             :         }
      99             :     }
     100             :     return sum / (fp_t)count;
     101             : }
     102             :     fp_t fMin(__global fp_t* input)
     103             : {
     104             :     fp_t min = MAXFLOAT;
     105             :     for (int i = 0; i < INPUTSIZE; i++)
     106             :     {
     107             :         if (!isNan(input[i]))
     108             :         {
     109             :             min = fmin(input[i], min);
     110             :         }
     111             :     }
     112             :     return min;
     113             : }
     114             :     fp_t fSoP(__global fp_t* input0, __global fp_t* input1)
     115             : {
     116             :     fp_t sop = 0.0;
     117             :     for (int i = 0; i < INPUTSIZE; i++)
     118             :     {
     119             :         sop += (isNan(input0[i]) ? 0 : input0[i]) * (isNan(input1[i]) ? 0 : input1[i]);
     120             :     }
     121             :     return sop;
     122             : }
     123             :     __kernel void DynamicKernel(
     124             :         __global fp_t* result, __global fp_t* input0, __global fp_t* input1, __global fp_t* input2, __global fp_t* input3)
     125             : {
     126             :     int gid0 = get_global_id(0);
     127             :     fp_t tmp0 = fAverage(input0);
     128             :     fp_t tmp1 = fMin(input1) * fSoP(input2, input3);
     129             :     result[gid0] = fsum(tmp0, tmp1);
     130             : }
     131             :     );
     132             : 
     133          52 : size_t sourceSize[] = { strlen(source) };
     134             : 
     135             : /*************************************************************************/
     136             : /* INTERNAL FUNCTIONS                                                    */
     137             : /*************************************************************************/
     138             : /* Timer functions - start timer */
     139           0 : void timerStart(timer* mytimer)
     140             : {
     141             : #ifdef _WIN32
     142             :     QueryPerformanceCounter(&mytimer->start);
     143             : #elif defined __MACH__
     144             :     mytimer->start = mach_absolute_time();
     145             : #else
     146             :     struct timespec s;
     147           0 :     clock_gettime(CLOCK_MONOTONIC, &s);
     148           0 :     mytimer->start = (long long)s.tv_sec * (long long)1.0E6 + (long long)s.tv_nsec / (long long)1.0E3;
     149             : #endif
     150           0 : }
     151             : 
     152             : /* Timer functions - get current value */
     153           0 : double timerCurrent(timer* mytimer)
     154             : {
     155             : #ifdef _WIN32
     156             :     LARGE_INTEGER stop, frequency;
     157             :     QueryPerformanceCounter(&stop);
     158             :     QueryPerformanceFrequency(&frequency);
     159             :     double time = ((double)(stop.QuadPart - mytimer->start.QuadPart) / frequency.QuadPart);
     160             : #elif defined __MACH__
     161             :     static mach_timebase_info_data_t info = { 0, 0 };
     162             :     if (info.numer == 0)
     163             :         mach_timebase_info(&info);
     164             :     long long stop = mach_absolute_time();
     165             :     double time = ((stop - mytimer->start) * (double) info.numer / info.denom) / 1.0E9;
     166             : #else
     167             :     struct timespec s;
     168             :     long long stop;
     169           0 :     clock_gettime(CLOCK_MONOTONIC, &s);
     170           0 :     stop = (long long)s.tv_sec * (long long)1.0E6 + (long long)s.tv_nsec / (long long)1.0E3;
     171           0 :     double time = ((double)(stop - mytimer->start) / 1.0E6);
     172             : #endif
     173           0 :     return time;
     174             : }
     175             : 
     176             : /* Random number generator */
     177           0 : double random(double min, double max)
     178             : {
     179           0 :     if (min == max)
     180           0 :         return min;
     181           0 :     return comphelper::rng::uniform_real_distribution(min, max);
     182             : }
     183             : 
     184             : /* Populate input */
     185           0 : void populateInput(LibreOfficeDeviceEvaluationIO* testData)
     186             : {
     187           0 :     double* input0 = &testData->input0[0];
     188           0 :     double* input1 = &testData->input1[0];
     189           0 :     double* input2 = &testData->input2[0];
     190           0 :     double* input3 = &testData->input3[0];
     191           0 :     for (unsigned long i = 0; i < testData->inputSize; i++)
     192             :     {
     193           0 :         input0[i] = random(0, i);
     194           0 :         input1[i] = random(0, i);
     195           0 :         input2[i] = random(0, i);
     196           0 :         input3[i] = random(0, i);
     197             :     }
     198           0 : }
     199             : /* Encode score object as byte string */
     200           0 : ds_status serializeScore(ds_device* device, void** serializedScore, unsigned int* serializedScoreSize)
     201             : {
     202           0 :     *serializedScoreSize = sizeof(LibreOfficeDeviceScore);
     203           0 :     *serializedScore = static_cast<void*>(new unsigned char[*serializedScoreSize]);
     204           0 :     memcpy(*serializedScore, device->score, *serializedScoreSize);
     205           0 :     return DS_SUCCESS;
     206             : }
     207             : 
     208             : /* Parses byte string and stores in score object */
     209           0 : ds_status deserializeScore(ds_device* device, const unsigned char* serializedScore, unsigned int serializedScoreSize)
     210             : {
     211             :     // check that serializedScoreSize == sizeof(LibreOfficeDeviceScore);
     212           0 :     device->score = new LibreOfficeDeviceScore;
     213           0 :     memcpy(device->score, serializedScore, serializedScoreSize);
     214           0 :     return DS_SUCCESS;
     215             : }
     216             : 
     217             : /* Releases memory held by score */
     218           0 : ds_status releaseScore(void* score)
     219             : {
     220           0 :     if (NULL != score)
     221             :     {
     222           0 :         delete static_cast<LibreOfficeDeviceScore*>(score);
     223             :     }
     224           0 :     return DS_SUCCESS;
     225             : }
     226             : 
     227             : /* Evaluate devices */
     228           0 : ds_status evaluateScoreForDevice(ds_device* device, void* evalData)
     229             : {
     230           0 :     if (DS_DEVICE_OPENCL_DEVICE == device->type)
     231             :     {
     232             :         /* Evaluating an OpenCL device */
     233             :         SAL_INFO("opencl.device", "Device: \"" << device->oclDeviceName << "\" (OpenCL) evaluation...");
     234             :         cl_int clStatus;
     235             :         /* Check for 64-bit float extensions */
     236           0 :         size_t aDevExtInfoSize = 0;
     237           0 :         clStatus = clGetDeviceInfo(device->oclDeviceID, CL_DEVICE_EXTENSIONS, 0, NULL, &aDevExtInfoSize);
     238           0 :         DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clGetDeviceInfo");
     239             : 
     240           0 :         char* aExtInfo = new char[aDevExtInfoSize];
     241           0 :         clStatus = clGetDeviceInfo(device->oclDeviceID, CL_DEVICE_EXTENSIONS, sizeof(char) * aDevExtInfoSize, aExtInfo, NULL);
     242           0 :         DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clGetDeviceInfo");
     243           0 :         bool bKhrFp64Flag = false;
     244           0 :         bool bAmdFp64Flag = false;
     245           0 :         const char* buildOption = NULL;
     246           0 :         std::string tmpStr("-Dfp_t=double -Dfp_t4=double4 -Dfp_t16=double16 -DINPUTSIZE=");
     247           0 :         std::ostringstream tmpOStrStr;
     248           0 :         tmpOStrStr << std::dec << INPUTSIZE;
     249           0 :         tmpStr.append(tmpOStrStr.str());
     250             : 
     251           0 :         if ((std::string(aExtInfo)).find("cl_khr_fp64") != std::string::npos)
     252             :         {
     253           0 :             bKhrFp64Flag = true;
     254             :             //buildOption = "-D KHR_DP_EXTENSION -Dfp_t=double -Dfp_t4=double4 -Dfp_t16=double16";
     255           0 :             tmpStr.append(" -DKHR_DP_EXTENSION");
     256           0 :             buildOption = tmpStr.c_str();
     257             :             SAL_INFO("opencl.device", "... has cl_khr_fp64");
     258             :         }
     259           0 :         else if ((std::string(aExtInfo)).find("cl_amd_fp64") != std::string::npos)
     260             :         {
     261           0 :             bAmdFp64Flag = true;
     262             :             //buildOption = "-D AMD_DP_EXTENSION -Dfp_t=double -Dfp_t4=double4 -Dfp_t16=double16";
     263           0 :             tmpStr.append(" -DAMD_DP_EXTENSION");
     264           0 :             buildOption = tmpStr.c_str();
     265             :             SAL_INFO("opencl.device", "... has cl_amd_fp64");
     266             :         }
     267           0 :         delete[] aExtInfo;
     268             : 
     269           0 :         if (!bKhrFp64Flag && !bAmdFp64Flag)
     270             :         {
     271             :             /* No 64-bit float support */
     272           0 :             device->score = static_cast<void*>(new LibreOfficeDeviceScore);
     273           0 :             static_cast<LibreOfficeDeviceScore*>(device->score)->fTime = DBL_MAX;
     274           0 :             static_cast<LibreOfficeDeviceScore*>(device->score)->bNoCLErrors = true;
     275           0 :             SAL_INFO("opencl.device", "... no fp64 support");
     276             :         }
     277             :         else
     278             :         {
     279             :             /* 64-bit float support present */
     280             : 
     281             :             /* Create context and command queue */
     282           0 :             cl_context  clContext = clCreateContext(NULL, 1, &device->oclDeviceID, NULL, NULL, &clStatus);
     283           0 :             DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateContext");
     284           0 :             cl_command_queue clQueue = clCreateCommandQueue(clContext, device->oclDeviceID, 0, &clStatus);
     285           0 :             DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateCommandQueue");
     286             : 
     287             :             /* Build program */
     288           0 :             cl_program clProgram = clCreateProgramWithSource(clContext, 1, &source, sourceSize, &clStatus);
     289           0 :             DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateProgramWithSource");
     290           0 :             clStatus = clBuildProgram(clProgram, 1, &device->oclDeviceID, buildOption, NULL, NULL);
     291           0 :             DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clBuildProgram");
     292           0 :             if (CL_SUCCESS != clStatus)
     293             :             {
     294             :                 /* Build program failed */
     295             :                 size_t length;
     296             :                 char* buildLog;
     297           0 :                 clStatus = clGetProgramBuildInfo(clProgram, device->oclDeviceID, CL_PROGRAM_BUILD_LOG, 0, NULL, &length);
     298           0 :                 buildLog = static_cast<char*>(malloc(length));
     299           0 :                 clGetProgramBuildInfo(clProgram, device->oclDeviceID, CL_PROGRAM_BUILD_LOG, length, buildLog, &length);
     300             :                 SAL_INFO("opencl.device", "Build Errors:\n" << buildLog);
     301           0 :                 free(buildLog);
     302             : 
     303           0 :                 device->score = static_cast<void*>(new LibreOfficeDeviceScore);
     304           0 :                 static_cast<LibreOfficeDeviceScore*>(device->score)->fTime = DBL_MAX;
     305           0 :                 static_cast<LibreOfficeDeviceScore*>(device->score)->bNoCLErrors = false;
     306             :             }
     307             :             else
     308             :             {
     309             :                 /* Build program succeeded */
     310             :                 timer kernelTime;
     311           0 :                 timerStart(&kernelTime);
     312             : 
     313             :                 /* Run kernel */
     314           0 :                 LibreOfficeDeviceEvaluationIO* testData = static_cast<LibreOfficeDeviceEvaluationIO*>(evalData);
     315           0 :                 cl_kernel clKernel = clCreateKernel(clProgram, "DynamicKernel", &clStatus);
     316           0 :                 DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateKernel");
     317           0 :                 cl_mem clResult = clCreateBuffer(clContext, CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR, sizeof(cl_double) * testData->outputSize, &testData->output[0], &clStatus);
     318           0 :                 DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateBuffer::clResult");
     319           0 :                 cl_mem clInput0 = clCreateBuffer(clContext, CL_MEM_READ_ONLY  | CL_MEM_USE_HOST_PTR, sizeof(cl_double) * testData->inputSize,  &testData->input0[0], &clStatus);
     320           0 :                 DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateBuffer::clInput0");
     321           0 :                 cl_mem clInput1 = clCreateBuffer(clContext, CL_MEM_READ_ONLY  | CL_MEM_USE_HOST_PTR, sizeof(cl_double) * testData->inputSize,  &testData->input1[0], &clStatus);
     322           0 :                 DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateBuffer::clInput1");
     323           0 :                 cl_mem clInput2 = clCreateBuffer(clContext, CL_MEM_READ_ONLY  | CL_MEM_USE_HOST_PTR, sizeof(cl_double) * testData->inputSize,  &testData->input2[0], &clStatus);
     324           0 :                 DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateBuffer::clInput2");
     325           0 :                 cl_mem clInput3 = clCreateBuffer(clContext, CL_MEM_READ_ONLY  | CL_MEM_USE_HOST_PTR, sizeof(cl_double) * testData->inputSize,  &testData->input3[0], &clStatus);
     326           0 :                 DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateBuffer::clInput3");
     327           0 :                 clStatus = clSetKernelArg(clKernel, 0, sizeof(cl_mem), static_cast<void*>(&clResult));
     328           0 :                 DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clSetKernelArg::clResult");
     329           0 :                 clStatus = clSetKernelArg(clKernel, 1, sizeof(cl_mem), static_cast<void*>(&clInput0));
     330           0 :                 DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clSetKernelArg::clInput0");
     331           0 :                 clStatus = clSetKernelArg(clKernel, 2, sizeof(cl_mem), static_cast<void*>(&clInput1));
     332           0 :                 DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clSetKernelArg::clInput1");
     333           0 :                 clStatus = clSetKernelArg(clKernel, 3, sizeof(cl_mem), static_cast<void*>(&clInput2));
     334           0 :                 DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clSetKernelArg::clInput2");
     335           0 :                 clStatus = clSetKernelArg(clKernel, 4, sizeof(cl_mem), static_cast<void*>(&clInput3));
     336           0 :                 DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clSetKernelArg::clInput3");
     337           0 :                 size_t globalWS[1] = { testData->outputSize };
     338           0 :                 size_t localSize[1] = { 64 };
     339           0 :                 clStatus = clEnqueueNDRangeKernel(clQueue, clKernel, 1, 0, globalWS, localSize, 0, NULL, NULL);
     340           0 :                 DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clEnqueueNDRangeKernel");
     341           0 :                 clFinish(clQueue);
     342           0 :                 clReleaseMemObject(clInput3);
     343           0 :                 clReleaseMemObject(clInput2);
     344           0 :                 clReleaseMemObject(clInput1);
     345           0 :                 clReleaseMemObject(clInput0);
     346           0 :                 clReleaseMemObject(clResult);
     347           0 :                 clReleaseKernel(clKernel);
     348             : 
     349           0 :                 device->score = static_cast<void*>(new LibreOfficeDeviceScore);
     350           0 :                 static_cast<LibreOfficeDeviceScore*>(device->score)->fTime = timerCurrent(&kernelTime);
     351           0 :                 static_cast<LibreOfficeDeviceScore*>(device->score)->bNoCLErrors = true;
     352             :             }
     353             : 
     354           0 :             clReleaseProgram(clProgram);
     355           0 :             clReleaseCommandQueue(clQueue);
     356           0 :             clReleaseContext(clContext);
     357           0 :         }
     358             :     }
     359             :     else
     360             :     {
     361             :         /* Evaluating an Native CPU device */
     362             :         SAL_INFO("opencl.device", "Device: \"CPU\" (Native) evaluation...");
     363             :         timer kernelTime;
     364           0 :         timerStart(&kernelTime);
     365             : 
     366           0 :         LibreOfficeDeviceEvaluationIO* testData = static_cast<LibreOfficeDeviceEvaluationIO*>(evalData);
     367           0 :         for (unsigned long j = 0; j < testData->outputSize; j++)
     368             :         {
     369           0 :             double fAverage = 0.0f;
     370           0 :             double fMin = DBL_MAX;
     371           0 :             double fSoP = 0.0f;
     372           0 :             for (unsigned long i = 0; i < testData->inputSize; i++)
     373             :             {
     374           0 :                 fAverage += testData->input0[i];
     375           0 :                 fMin = ((fMin < testData->input1[i]) ? fMin : testData->input1[i]);
     376           0 :                 fSoP += testData->input2[i] * testData->input3[i];
     377             :             }
     378           0 :             fAverage /= testData->inputSize;
     379           0 :             testData->output[j] = fAverage + (fMin * fSoP);
     380             :         }
     381             : 
     382             :         // InterpretTail - the S/W fallback is nothing like as efficient
     383             :         // as any good openCL implementation: no SIMD, tons of branching
     384             :         // in the inner loops etc. Generously characterise it as only 10x
     385             :         // slower than the above.
     386           0 :         float fInterpretTailFactor = 10.0;
     387             : 
     388           0 :         device->score = static_cast<void*>(new LibreOfficeDeviceScore);
     389           0 :         static_cast<LibreOfficeDeviceScore*>(device->score)->fTime = timerCurrent(&kernelTime);
     390           0 :         static_cast<LibreOfficeDeviceScore*>(device->score)->bNoCLErrors = true;
     391             : 
     392           0 :         static_cast<LibreOfficeDeviceScore*>(device->score)->fTime *= fInterpretTailFactor;
     393             :     }
     394           0 :     return DS_SUCCESS;
     395             : }
     396             : 
     397             : /* Pick best device */
     398           0 : ds_status pickBestDevice(ds_profile* profile, int* bestDeviceIdx)
     399             : {
     400           0 :     double bestScore = DBL_MAX;
     401           0 :     *bestDeviceIdx = -1;
     402             : 
     403           0 :     for (unsigned int d = 0; d < profile->numDevices; d++)
     404             :     {
     405           0 :         ds_device device = profile->devices[d];
     406           0 :         LibreOfficeDeviceScore *pScore = static_cast<LibreOfficeDeviceScore*>(device.score);
     407             : 
     408             :         // Check blacklist and whitelist for actual devices
     409           0 :         if (device.type == DS_DEVICE_OPENCL_DEVICE)
     410             :         {
     411             :             // There is a silly impedance mismatch here. Why do we
     412             :             // need two different ways to describe an OpenCL platform
     413             :             // and an OpenCL device driver?
     414             : 
     415           0 :             OpenCLPlatformInfo aPlatform;
     416           0 :             OpenCLDeviceInfo aDevice;
     417             : 
     418             :             // We know that only the below fields are used by checkForKnownBadCompilers()
     419           0 :             aPlatform.maVendor = OUString(device.oclPlatformVendor, strlen(device.oclPlatformVendor), RTL_TEXTENCODING_UTF8);
     420           0 :             aDevice.maName = OUString(device.oclDeviceName, strlen(device.oclDeviceName), RTL_TEXTENCODING_UTF8);
     421           0 :             aDevice.maDriver = OUString(device.oclDriverVersion, strlen(device.oclDriverVersion), RTL_TEXTENCODING_UTF8);
     422             : 
     423             :             // If blacklisted or not whitelisted, ignore it
     424           0 :             if (OpenCLConfig::get().checkImplementation(aPlatform, aDevice))
     425             :             {
     426             :                 SAL_INFO("opencl.device", "Device[" << d << "] " << device.oclDeviceName << " is blacklisted or not whitelisted");
     427           0 :                 pScore->fTime = DBL_MAX;
     428           0 :                 pScore->bNoCLErrors = true;
     429           0 :             }
     430             :         }
     431             : 
     432           0 :         double fScore = DBL_MAX;
     433           0 :         if (pScore)
     434             :         {
     435           0 :             fScore = pScore->fTime;
     436             :         }
     437             :         else
     438             :         {
     439             :             SAL_INFO("opencl.device", "Unusual null score");
     440             :         }
     441             : 
     442           0 :         if (DS_DEVICE_OPENCL_DEVICE == device.type)
     443             :         {
     444             :             SAL_INFO("opencl.device", "Device[" << d << "] " << device.oclDeviceName << " (OpenCL) score is " << fScore);
     445             :         }
     446             :         else
     447             :         {
     448             :             SAL_INFO("opencl.device", "Device[" << d << "] CPU (Native) score is " << fScore);
     449             :         }
     450           0 :         if (fScore < bestScore)
     451             :         {
     452           0 :             bestScore = fScore;
     453           0 :             *bestDeviceIdx = d;
     454             :         }
     455             :     }
     456           0 :     if (DS_DEVICE_OPENCL_DEVICE == profile->devices[*bestDeviceIdx].type)
     457             :     {
     458             :         SAL_INFO("opencl.device", "Selected Device[" << *bestDeviceIdx << "]: " << profile->devices[*bestDeviceIdx].oclDeviceName << "(OpenCL).");
     459             :     }
     460             :     else
     461             :     {
     462             :         SAL_INFO("opencl.device", "Selected Device[" << *bestDeviceIdx << "]: CPU (Native).");
     463             :     }
     464             : 
     465           0 :     return DS_SUCCESS;
     466             : }
     467             : 
     468             : /* Return device ID for matching device name */
     469           0 : int matchDevice(ds_profile* profile, char* deviceName)
     470             : {
     471           0 :     int deviceMatch = -1;
     472           0 :     for (unsigned int d = 0; d < profile->numDevices - 1; d++)
     473             :     {
     474           0 :         if ((std::string(profile->devices[d].oclDeviceName)).find(deviceName) != std::string::npos) deviceMatch = d;
     475             :     }
     476           0 :     if (std::string("NATIVE_CPU").find(deviceName) != std::string::npos) deviceMatch = profile->numDevices - 1;
     477           0 :     return deviceMatch;
     478             : }
     479             : 
     480             : /*************************************************************************/
     481             : /* EXTERNAL FUNCTIONS                                                    */
     482             : /*************************************************************************/
     483           0 : ds_device getDeviceSelection(const char* sProfilePath, bool bForceSelection)
     484             : {
     485             :     /* Run only if device is not yet selected */
     486           0 :     if (!bIsDeviceSelected || bForceSelection)
     487             :     {
     488             :         /* Setup */
     489             :         ds_status status;
     490           0 :         ds_profile* profile = NULL;
     491           0 :         status = initDSProfile(&profile, "LibreOffice v0.1");
     492             : 
     493           0 :         if (!profile)
     494             :         {
     495             :             // failed to initialize profile.
     496           0 :             selectedDevice.type = DS_DEVICE_NATIVE_CPU;
     497           0 :             return selectedDevice;
     498             :         }
     499             : 
     500             :         /* Try reading scores from file */
     501           0 :         std::string tmpStr(sProfilePath);
     502           0 :         const char* fileName = tmpStr.append("sc_opencl_device_profile.dat").c_str();
     503           0 :         if (!bForceSelection)
     504             :         {
     505           0 :             status = readProfileFromFile(profile, deserializeScore, fileName);
     506             :         }
     507             :         else
     508             :         {
     509           0 :             status = DS_INVALID_PROFILE;
     510             :             SAL_INFO("opencl.device", "Performing forced profiling.");
     511             :         }
     512           0 :         if (DS_SUCCESS != status)
     513             :         {
     514           0 :             if (!bForceSelection)
     515             :             {
     516             :                 SAL_INFO("opencl.device", "Profile file not available (" << fileName << "); performing profiling.");
     517             :             }
     518             : 
     519             :             /* Populate input data for micro-benchmark */
     520           0 :             boost::scoped_ptr<LibreOfficeDeviceEvaluationIO> testData(new LibreOfficeDeviceEvaluationIO);
     521           0 :             testData->inputSize  = INPUTSIZE;
     522           0 :             testData->outputSize = OUTPUTSIZE;
     523           0 :             testData->input0.resize(testData->inputSize);
     524           0 :             testData->input1.resize(testData->inputSize);
     525           0 :             testData->input2.resize(testData->inputSize);
     526           0 :             testData->input3.resize(testData->inputSize);
     527           0 :             testData->output.resize(testData->outputSize);
     528           0 :             populateInput(testData.get());
     529             : 
     530             :             /* Perform evaluations */
     531             :             unsigned int numUpdates;
     532           0 :             status = profileDevices(profile, DS_EVALUATE_ALL, evaluateScoreForDevice, static_cast<void*>(testData.get()), &numUpdates);
     533             : 
     534           0 :             if (DS_SUCCESS == status)
     535             :             {
     536             :                 /* Write scores to file */
     537           0 :                 status = writeProfileToFile(profile, serializeScore, fileName);
     538           0 :                 if (DS_SUCCESS == status)
     539             :                 {
     540             :                     SAL_INFO("opencl.device", "Scores written to file (" << fileName << ").");
     541             :                 }
     542             :                 else
     543             :                 {
     544             :                     SAL_INFO("opencl.device", "Error saving scores to file (" << fileName << "); scores not written to file.");
     545             :                 }
     546             :             }
     547             :             else
     548             :             {
     549             :                 SAL_INFO("opencl.device", "Unable to evaluate performance; scores not written to file.");
     550           0 :             }
     551             :         }
     552             :         else
     553             :         {
     554             :             SAL_INFO("opencl.device", "Profile read from file (" << fileName << ").");
     555             :         }
     556             : 
     557             :         /* Pick best device */
     558             :         int bestDeviceIdx;
     559           0 :         pickBestDevice(profile, &bestDeviceIdx);
     560             : 
     561             :         /* Override if necessary */
     562           0 :         char* overrideDeviceStr = getenv("SC_OPENCL_DEVICE_OVERRIDE");
     563           0 :         if (NULL != overrideDeviceStr)
     564             :         {
     565           0 :             int overrideDeviceIdx = matchDevice(profile, overrideDeviceStr);
     566           0 :             if (-1 != overrideDeviceIdx)
     567             :             {
     568             :                 SAL_INFO("opencl.device", "Overriding Device Selection (SC_OPENCL_DEVICE_OVERRIDE=" << overrideDeviceStr << ").");
     569           0 :                 bestDeviceIdx = overrideDeviceIdx;
     570           0 :                 if (DS_DEVICE_OPENCL_DEVICE == profile->devices[bestDeviceIdx].type)
     571             :                 {
     572             :                     SAL_INFO("opencl.device", "Selected Device[" << bestDeviceIdx << "]: " << profile->devices[bestDeviceIdx].oclDeviceName << " (OpenCL).");
     573             :                 }
     574             :                 else
     575             :                 {
     576             :                     SAL_INFO("opencl.device", "Selected Device[" << bestDeviceIdx << "]: CPU (Native).");
     577             :                 }
     578             :             }
     579             :             else
     580             :             {
     581             :                 SAL_INFO("opencl.device", "Ignoring invalid SC_OPENCL_DEVICE_OVERRIDE=" << overrideDeviceStr << ").");
     582             :             }
     583             :         }
     584             : 
     585             :         /* Final device selection */
     586           0 :         selectedDevice = profile->devices[bestDeviceIdx];
     587           0 :         bIsDeviceSelected = true;
     588             : 
     589             :         /* Release profile */
     590           0 :         releaseDSProfile(profile, releaseScore);
     591             :     }
     592           0 :     return selectedDevice;
     593             : }
     594             : 
     595         156 : }
     596             : 
     597             : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */

Generated by: LCOV version 1.11