Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : */
9 :
10 : #ifdef _WIN32
11 : #include <prewin.h>
12 : #include <postwin.h>
13 : #elif defined __MACH__
14 : #include <mach/mach_time.h>
15 : #else
16 : #include <sys/time.h>
17 : #endif
18 :
19 : #include <time.h>
20 : #include <math.h>
21 : #include <float.h>
22 : #include <iostream>
23 : #include <sstream>
24 : #include <vector>
25 :
26 : #include <boost/scoped_ptr.hpp>
27 :
28 : #include <comphelper/random.hxx>
29 : #include <opencl/openclconfig.hxx>
30 : #include <opencl/openclwrapper.hxx>
31 : #include <opencl/platforminfo.hxx>
32 : #include <sal/log.hxx>
33 :
34 : #include "opencl_device.hxx"
35 :
36 : #define INPUTSIZE 15360
37 : #define OUTPUTSIZE 15360
38 :
39 : #define STRINGIFY(...) #__VA_ARGS__"\n"
40 :
41 : #define DS_CHECK_STATUS(status, name) \
42 : if (CL_SUCCESS != status) \
43 : { \
44 : SAL_INFO("opencl.device", "Error code is " << status << " at " name); \
45 : }
46 :
47 : namespace opencl {
48 :
49 : bool bIsDeviceSelected = false;
50 : ds_device selectedDevice;
51 :
52 : struct LibreOfficeDeviceScore
53 : {
54 : double fTime; // small time means faster device
55 : bool bNoCLErrors; // were there any opencl errors
56 : };
57 :
58 0 : struct LibreOfficeDeviceEvaluationIO
59 : {
60 : std::vector<double> input0;
61 : std::vector<double> input1;
62 : std::vector<double> input2;
63 : std::vector<double> input3;
64 : std::vector<double> output;
65 : unsigned long inputSize;
66 : unsigned long outputSize;
67 : };
68 :
69 : struct timer
70 : {
71 : #ifdef _WIN32
72 : LARGE_INTEGER start;
73 : #else
74 : long long start;
75 : #endif
76 : };
77 :
78 : const char* source = STRINGIFY(
79 : \n#if defined(KHR_DP_EXTENSION)
80 : \n#pragma OPENCL EXTENSION cl_khr_fp64 : enable
81 : \n#elif defined(AMD_DP_EXTENSION)
82 : \n#pragma OPENCL EXTENSION cl_amd_fp64 : enable
83 : \n#endif
84 : \n
85 : int isNan(fp_t a) { return a != a; }
86 : fp_t fsum(fp_t a, fp_t b) { return a + b; }
87 :
88 : fp_t fAverage(__global fp_t* input)
89 : {
90 : fp_t sum = 0;
91 : int count = 0;
92 : for (int i = 0; i < INPUTSIZE; i++)
93 : {
94 : if (!isNan(input[i]))
95 : {
96 : sum = fsum(input[i], sum);
97 : count += 1;
98 : }
99 : }
100 : return sum / (fp_t)count;
101 : }
102 : fp_t fMin(__global fp_t* input)
103 : {
104 : fp_t min = MAXFLOAT;
105 : for (int i = 0; i < INPUTSIZE; i++)
106 : {
107 : if (!isNan(input[i]))
108 : {
109 : min = fmin(input[i], min);
110 : }
111 : }
112 : return min;
113 : }
114 : fp_t fSoP(__global fp_t* input0, __global fp_t* input1)
115 : {
116 : fp_t sop = 0.0;
117 : for (int i = 0; i < INPUTSIZE; i++)
118 : {
119 : sop += (isNan(input0[i]) ? 0 : input0[i]) * (isNan(input1[i]) ? 0 : input1[i]);
120 : }
121 : return sop;
122 : }
123 : __kernel void DynamicKernel(
124 : __global fp_t* result, __global fp_t* input0, __global fp_t* input1, __global fp_t* input2, __global fp_t* input3)
125 : {
126 : int gid0 = get_global_id(0);
127 : fp_t tmp0 = fAverage(input0);
128 : fp_t tmp1 = fMin(input1) * fSoP(input2, input3);
129 : result[gid0] = fsum(tmp0, tmp1);
130 : }
131 : );
132 :
133 52 : size_t sourceSize[] = { strlen(source) };
134 :
135 : /*************************************************************************/
136 : /* INTERNAL FUNCTIONS */
137 : /*************************************************************************/
138 : /* Timer functions - start timer */
139 0 : void timerStart(timer* mytimer)
140 : {
141 : #ifdef _WIN32
142 : QueryPerformanceCounter(&mytimer->start);
143 : #elif defined __MACH__
144 : mytimer->start = mach_absolute_time();
145 : #else
146 : struct timespec s;
147 0 : clock_gettime(CLOCK_MONOTONIC, &s);
148 0 : mytimer->start = (long long)s.tv_sec * (long long)1.0E6 + (long long)s.tv_nsec / (long long)1.0E3;
149 : #endif
150 0 : }
151 :
152 : /* Timer functions - get current value */
153 0 : double timerCurrent(timer* mytimer)
154 : {
155 : #ifdef _WIN32
156 : LARGE_INTEGER stop, frequency;
157 : QueryPerformanceCounter(&stop);
158 : QueryPerformanceFrequency(&frequency);
159 : double time = ((double)(stop.QuadPart - mytimer->start.QuadPart) / frequency.QuadPart);
160 : #elif defined __MACH__
161 : static mach_timebase_info_data_t info = { 0, 0 };
162 : if (info.numer == 0)
163 : mach_timebase_info(&info);
164 : long long stop = mach_absolute_time();
165 : double time = ((stop - mytimer->start) * (double) info.numer / info.denom) / 1.0E9;
166 : #else
167 : struct timespec s;
168 : long long stop;
169 0 : clock_gettime(CLOCK_MONOTONIC, &s);
170 0 : stop = (long long)s.tv_sec * (long long)1.0E6 + (long long)s.tv_nsec / (long long)1.0E3;
171 0 : double time = ((double)(stop - mytimer->start) / 1.0E6);
172 : #endif
173 0 : return time;
174 : }
175 :
176 : /* Random number generator */
177 0 : double random(double min, double max)
178 : {
179 0 : if (min == max)
180 0 : return min;
181 0 : return comphelper::rng::uniform_real_distribution(min, max);
182 : }
183 :
184 : /* Populate input */
185 0 : void populateInput(LibreOfficeDeviceEvaluationIO* testData)
186 : {
187 0 : double* input0 = &testData->input0[0];
188 0 : double* input1 = &testData->input1[0];
189 0 : double* input2 = &testData->input2[0];
190 0 : double* input3 = &testData->input3[0];
191 0 : for (unsigned long i = 0; i < testData->inputSize; i++)
192 : {
193 0 : input0[i] = random(0, i);
194 0 : input1[i] = random(0, i);
195 0 : input2[i] = random(0, i);
196 0 : input3[i] = random(0, i);
197 : }
198 0 : }
199 : /* Encode score object as byte string */
200 0 : ds_status serializeScore(ds_device* device, void** serializedScore, unsigned int* serializedScoreSize)
201 : {
202 0 : *serializedScoreSize = sizeof(LibreOfficeDeviceScore);
203 0 : *serializedScore = static_cast<void*>(new unsigned char[*serializedScoreSize]);
204 0 : memcpy(*serializedScore, device->score, *serializedScoreSize);
205 0 : return DS_SUCCESS;
206 : }
207 :
208 : /* Parses byte string and stores in score object */
209 0 : ds_status deserializeScore(ds_device* device, const unsigned char* serializedScore, unsigned int serializedScoreSize)
210 : {
211 : // check that serializedScoreSize == sizeof(LibreOfficeDeviceScore);
212 0 : device->score = new LibreOfficeDeviceScore;
213 0 : memcpy(device->score, serializedScore, serializedScoreSize);
214 0 : return DS_SUCCESS;
215 : }
216 :
217 : /* Releases memory held by score */
218 0 : ds_status releaseScore(void* score)
219 : {
220 0 : if (NULL != score)
221 : {
222 0 : delete static_cast<LibreOfficeDeviceScore*>(score);
223 : }
224 0 : return DS_SUCCESS;
225 : }
226 :
227 : /* Evaluate devices */
228 0 : ds_status evaluateScoreForDevice(ds_device* device, void* evalData)
229 : {
230 0 : if (DS_DEVICE_OPENCL_DEVICE == device->type)
231 : {
232 : /* Evaluating an OpenCL device */
233 : SAL_INFO("opencl.device", "Device: \"" << device->oclDeviceName << "\" (OpenCL) evaluation...");
234 : cl_int clStatus;
235 : /* Check for 64-bit float extensions */
236 0 : size_t aDevExtInfoSize = 0;
237 0 : clStatus = clGetDeviceInfo(device->oclDeviceID, CL_DEVICE_EXTENSIONS, 0, NULL, &aDevExtInfoSize);
238 0 : DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clGetDeviceInfo");
239 :
240 0 : char* aExtInfo = new char[aDevExtInfoSize];
241 0 : clStatus = clGetDeviceInfo(device->oclDeviceID, CL_DEVICE_EXTENSIONS, sizeof(char) * aDevExtInfoSize, aExtInfo, NULL);
242 0 : DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clGetDeviceInfo");
243 0 : bool bKhrFp64Flag = false;
244 0 : bool bAmdFp64Flag = false;
245 0 : const char* buildOption = NULL;
246 0 : std::string tmpStr("-Dfp_t=double -Dfp_t4=double4 -Dfp_t16=double16 -DINPUTSIZE=");
247 0 : std::ostringstream tmpOStrStr;
248 0 : tmpOStrStr << std::dec << INPUTSIZE;
249 0 : tmpStr.append(tmpOStrStr.str());
250 :
251 0 : if ((std::string(aExtInfo)).find("cl_khr_fp64") != std::string::npos)
252 : {
253 0 : bKhrFp64Flag = true;
254 : //buildOption = "-D KHR_DP_EXTENSION -Dfp_t=double -Dfp_t4=double4 -Dfp_t16=double16";
255 0 : tmpStr.append(" -DKHR_DP_EXTENSION");
256 0 : buildOption = tmpStr.c_str();
257 : SAL_INFO("opencl.device", "... has cl_khr_fp64");
258 : }
259 0 : else if ((std::string(aExtInfo)).find("cl_amd_fp64") != std::string::npos)
260 : {
261 0 : bAmdFp64Flag = true;
262 : //buildOption = "-D AMD_DP_EXTENSION -Dfp_t=double -Dfp_t4=double4 -Dfp_t16=double16";
263 0 : tmpStr.append(" -DAMD_DP_EXTENSION");
264 0 : buildOption = tmpStr.c_str();
265 : SAL_INFO("opencl.device", "... has cl_amd_fp64");
266 : }
267 0 : delete[] aExtInfo;
268 :
269 0 : if (!bKhrFp64Flag && !bAmdFp64Flag)
270 : {
271 : /* No 64-bit float support */
272 0 : device->score = static_cast<void*>(new LibreOfficeDeviceScore);
273 0 : static_cast<LibreOfficeDeviceScore*>(device->score)->fTime = DBL_MAX;
274 0 : static_cast<LibreOfficeDeviceScore*>(device->score)->bNoCLErrors = true;
275 0 : SAL_INFO("opencl.device", "... no fp64 support");
276 : }
277 : else
278 : {
279 : /* 64-bit float support present */
280 :
281 : /* Create context and command queue */
282 0 : cl_context clContext = clCreateContext(NULL, 1, &device->oclDeviceID, NULL, NULL, &clStatus);
283 0 : DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateContext");
284 0 : cl_command_queue clQueue = clCreateCommandQueue(clContext, device->oclDeviceID, 0, &clStatus);
285 0 : DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateCommandQueue");
286 :
287 : /* Build program */
288 0 : cl_program clProgram = clCreateProgramWithSource(clContext, 1, &source, sourceSize, &clStatus);
289 0 : DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateProgramWithSource");
290 0 : clStatus = clBuildProgram(clProgram, 1, &device->oclDeviceID, buildOption, NULL, NULL);
291 0 : DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clBuildProgram");
292 0 : if (CL_SUCCESS != clStatus)
293 : {
294 : /* Build program failed */
295 : size_t length;
296 : char* buildLog;
297 0 : clStatus = clGetProgramBuildInfo(clProgram, device->oclDeviceID, CL_PROGRAM_BUILD_LOG, 0, NULL, &length);
298 0 : buildLog = static_cast<char*>(malloc(length));
299 0 : clGetProgramBuildInfo(clProgram, device->oclDeviceID, CL_PROGRAM_BUILD_LOG, length, buildLog, &length);
300 : SAL_INFO("opencl.device", "Build Errors:\n" << buildLog);
301 0 : free(buildLog);
302 :
303 0 : device->score = static_cast<void*>(new LibreOfficeDeviceScore);
304 0 : static_cast<LibreOfficeDeviceScore*>(device->score)->fTime = DBL_MAX;
305 0 : static_cast<LibreOfficeDeviceScore*>(device->score)->bNoCLErrors = false;
306 : }
307 : else
308 : {
309 : /* Build program succeeded */
310 : timer kernelTime;
311 0 : timerStart(&kernelTime);
312 :
313 : /* Run kernel */
314 0 : LibreOfficeDeviceEvaluationIO* testData = static_cast<LibreOfficeDeviceEvaluationIO*>(evalData);
315 0 : cl_kernel clKernel = clCreateKernel(clProgram, "DynamicKernel", &clStatus);
316 0 : DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateKernel");
317 0 : cl_mem clResult = clCreateBuffer(clContext, CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR, sizeof(cl_double) * testData->outputSize, &testData->output[0], &clStatus);
318 0 : DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateBuffer::clResult");
319 0 : cl_mem clInput0 = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, sizeof(cl_double) * testData->inputSize, &testData->input0[0], &clStatus);
320 0 : DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateBuffer::clInput0");
321 0 : cl_mem clInput1 = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, sizeof(cl_double) * testData->inputSize, &testData->input1[0], &clStatus);
322 0 : DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateBuffer::clInput1");
323 0 : cl_mem clInput2 = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, sizeof(cl_double) * testData->inputSize, &testData->input2[0], &clStatus);
324 0 : DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateBuffer::clInput2");
325 0 : cl_mem clInput3 = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, sizeof(cl_double) * testData->inputSize, &testData->input3[0], &clStatus);
326 0 : DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateBuffer::clInput3");
327 0 : clStatus = clSetKernelArg(clKernel, 0, sizeof(cl_mem), static_cast<void*>(&clResult));
328 0 : DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clSetKernelArg::clResult");
329 0 : clStatus = clSetKernelArg(clKernel, 1, sizeof(cl_mem), static_cast<void*>(&clInput0));
330 0 : DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clSetKernelArg::clInput0");
331 0 : clStatus = clSetKernelArg(clKernel, 2, sizeof(cl_mem), static_cast<void*>(&clInput1));
332 0 : DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clSetKernelArg::clInput1");
333 0 : clStatus = clSetKernelArg(clKernel, 3, sizeof(cl_mem), static_cast<void*>(&clInput2));
334 0 : DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clSetKernelArg::clInput2");
335 0 : clStatus = clSetKernelArg(clKernel, 4, sizeof(cl_mem), static_cast<void*>(&clInput3));
336 0 : DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clSetKernelArg::clInput3");
337 0 : size_t globalWS[1] = { testData->outputSize };
338 0 : size_t localSize[1] = { 64 };
339 0 : clStatus = clEnqueueNDRangeKernel(clQueue, clKernel, 1, 0, globalWS, localSize, 0, NULL, NULL);
340 0 : DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clEnqueueNDRangeKernel");
341 0 : clFinish(clQueue);
342 0 : clReleaseMemObject(clInput3);
343 0 : clReleaseMemObject(clInput2);
344 0 : clReleaseMemObject(clInput1);
345 0 : clReleaseMemObject(clInput0);
346 0 : clReleaseMemObject(clResult);
347 0 : clReleaseKernel(clKernel);
348 :
349 0 : device->score = static_cast<void*>(new LibreOfficeDeviceScore);
350 0 : static_cast<LibreOfficeDeviceScore*>(device->score)->fTime = timerCurrent(&kernelTime);
351 0 : static_cast<LibreOfficeDeviceScore*>(device->score)->bNoCLErrors = true;
352 : }
353 :
354 0 : clReleaseProgram(clProgram);
355 0 : clReleaseCommandQueue(clQueue);
356 0 : clReleaseContext(clContext);
357 0 : }
358 : }
359 : else
360 : {
361 : /* Evaluating an Native CPU device */
362 : SAL_INFO("opencl.device", "Device: \"CPU\" (Native) evaluation...");
363 : timer kernelTime;
364 0 : timerStart(&kernelTime);
365 :
366 0 : LibreOfficeDeviceEvaluationIO* testData = static_cast<LibreOfficeDeviceEvaluationIO*>(evalData);
367 0 : for (unsigned long j = 0; j < testData->outputSize; j++)
368 : {
369 0 : double fAverage = 0.0f;
370 0 : double fMin = DBL_MAX;
371 0 : double fSoP = 0.0f;
372 0 : for (unsigned long i = 0; i < testData->inputSize; i++)
373 : {
374 0 : fAverage += testData->input0[i];
375 0 : fMin = ((fMin < testData->input1[i]) ? fMin : testData->input1[i]);
376 0 : fSoP += testData->input2[i] * testData->input3[i];
377 : }
378 0 : fAverage /= testData->inputSize;
379 0 : testData->output[j] = fAverage + (fMin * fSoP);
380 : }
381 :
382 : // InterpretTail - the S/W fallback is nothing like as efficient
383 : // as any good openCL implementation: no SIMD, tons of branching
384 : // in the inner loops etc. Generously characterise it as only 10x
385 : // slower than the above.
386 0 : float fInterpretTailFactor = 10.0;
387 :
388 0 : device->score = static_cast<void*>(new LibreOfficeDeviceScore);
389 0 : static_cast<LibreOfficeDeviceScore*>(device->score)->fTime = timerCurrent(&kernelTime);
390 0 : static_cast<LibreOfficeDeviceScore*>(device->score)->bNoCLErrors = true;
391 :
392 0 : static_cast<LibreOfficeDeviceScore*>(device->score)->fTime *= fInterpretTailFactor;
393 : }
394 0 : return DS_SUCCESS;
395 : }
396 :
397 : /* Pick best device */
398 0 : ds_status pickBestDevice(ds_profile* profile, int* bestDeviceIdx)
399 : {
400 0 : double bestScore = DBL_MAX;
401 0 : *bestDeviceIdx = -1;
402 :
403 0 : for (unsigned int d = 0; d < profile->numDevices; d++)
404 : {
405 0 : ds_device device = profile->devices[d];
406 0 : LibreOfficeDeviceScore *pScore = static_cast<LibreOfficeDeviceScore*>(device.score);
407 :
408 : // Check blacklist and whitelist for actual devices
409 0 : if (device.type == DS_DEVICE_OPENCL_DEVICE)
410 : {
411 : // There is a silly impedance mismatch here. Why do we
412 : // need two different ways to describe an OpenCL platform
413 : // and an OpenCL device driver?
414 :
415 0 : OpenCLPlatformInfo aPlatform;
416 0 : OpenCLDeviceInfo aDevice;
417 :
418 : // We know that only the below fields are used by checkForKnownBadCompilers()
419 0 : aPlatform.maVendor = OUString(device.oclPlatformVendor, strlen(device.oclPlatformVendor), RTL_TEXTENCODING_UTF8);
420 0 : aDevice.maName = OUString(device.oclDeviceName, strlen(device.oclDeviceName), RTL_TEXTENCODING_UTF8);
421 0 : aDevice.maDriver = OUString(device.oclDriverVersion, strlen(device.oclDriverVersion), RTL_TEXTENCODING_UTF8);
422 :
423 : // If blacklisted or not whitelisted, ignore it
424 0 : if (OpenCLConfig::get().checkImplementation(aPlatform, aDevice))
425 : {
426 : SAL_INFO("opencl.device", "Device[" << d << "] " << device.oclDeviceName << " is blacklisted or not whitelisted");
427 0 : pScore->fTime = DBL_MAX;
428 0 : pScore->bNoCLErrors = true;
429 0 : }
430 : }
431 :
432 0 : double fScore = DBL_MAX;
433 0 : if (pScore)
434 : {
435 0 : fScore = pScore->fTime;
436 : }
437 : else
438 : {
439 : SAL_INFO("opencl.device", "Unusual null score");
440 : }
441 :
442 0 : if (DS_DEVICE_OPENCL_DEVICE == device.type)
443 : {
444 : SAL_INFO("opencl.device", "Device[" << d << "] " << device.oclDeviceName << " (OpenCL) score is " << fScore);
445 : }
446 : else
447 : {
448 : SAL_INFO("opencl.device", "Device[" << d << "] CPU (Native) score is " << fScore);
449 : }
450 0 : if (fScore < bestScore)
451 : {
452 0 : bestScore = fScore;
453 0 : *bestDeviceIdx = d;
454 : }
455 : }
456 0 : if (DS_DEVICE_OPENCL_DEVICE == profile->devices[*bestDeviceIdx].type)
457 : {
458 : SAL_INFO("opencl.device", "Selected Device[" << *bestDeviceIdx << "]: " << profile->devices[*bestDeviceIdx].oclDeviceName << "(OpenCL).");
459 : }
460 : else
461 : {
462 : SAL_INFO("opencl.device", "Selected Device[" << *bestDeviceIdx << "]: CPU (Native).");
463 : }
464 :
465 0 : return DS_SUCCESS;
466 : }
467 :
468 : /* Return device ID for matching device name */
469 0 : int matchDevice(ds_profile* profile, char* deviceName)
470 : {
471 0 : int deviceMatch = -1;
472 0 : for (unsigned int d = 0; d < profile->numDevices - 1; d++)
473 : {
474 0 : if ((std::string(profile->devices[d].oclDeviceName)).find(deviceName) != std::string::npos) deviceMatch = d;
475 : }
476 0 : if (std::string("NATIVE_CPU").find(deviceName) != std::string::npos) deviceMatch = profile->numDevices - 1;
477 0 : return deviceMatch;
478 : }
479 :
480 : /*************************************************************************/
481 : /* EXTERNAL FUNCTIONS */
482 : /*************************************************************************/
483 0 : ds_device getDeviceSelection(const char* sProfilePath, bool bForceSelection)
484 : {
485 : /* Run only if device is not yet selected */
486 0 : if (!bIsDeviceSelected || bForceSelection)
487 : {
488 : /* Setup */
489 : ds_status status;
490 0 : ds_profile* profile = NULL;
491 0 : status = initDSProfile(&profile, "LibreOffice v0.1");
492 :
493 0 : if (!profile)
494 : {
495 : // failed to initialize profile.
496 0 : selectedDevice.type = DS_DEVICE_NATIVE_CPU;
497 0 : return selectedDevice;
498 : }
499 :
500 : /* Try reading scores from file */
501 0 : std::string tmpStr(sProfilePath);
502 0 : const char* fileName = tmpStr.append("sc_opencl_device_profile.dat").c_str();
503 0 : if (!bForceSelection)
504 : {
505 0 : status = readProfileFromFile(profile, deserializeScore, fileName);
506 : }
507 : else
508 : {
509 0 : status = DS_INVALID_PROFILE;
510 : SAL_INFO("opencl.device", "Performing forced profiling.");
511 : }
512 0 : if (DS_SUCCESS != status)
513 : {
514 0 : if (!bForceSelection)
515 : {
516 : SAL_INFO("opencl.device", "Profile file not available (" << fileName << "); performing profiling.");
517 : }
518 :
519 : /* Populate input data for micro-benchmark */
520 0 : boost::scoped_ptr<LibreOfficeDeviceEvaluationIO> testData(new LibreOfficeDeviceEvaluationIO);
521 0 : testData->inputSize = INPUTSIZE;
522 0 : testData->outputSize = OUTPUTSIZE;
523 0 : testData->input0.resize(testData->inputSize);
524 0 : testData->input1.resize(testData->inputSize);
525 0 : testData->input2.resize(testData->inputSize);
526 0 : testData->input3.resize(testData->inputSize);
527 0 : testData->output.resize(testData->outputSize);
528 0 : populateInput(testData.get());
529 :
530 : /* Perform evaluations */
531 : unsigned int numUpdates;
532 0 : status = profileDevices(profile, DS_EVALUATE_ALL, evaluateScoreForDevice, static_cast<void*>(testData.get()), &numUpdates);
533 :
534 0 : if (DS_SUCCESS == status)
535 : {
536 : /* Write scores to file */
537 0 : status = writeProfileToFile(profile, serializeScore, fileName);
538 0 : if (DS_SUCCESS == status)
539 : {
540 : SAL_INFO("opencl.device", "Scores written to file (" << fileName << ").");
541 : }
542 : else
543 : {
544 : SAL_INFO("opencl.device", "Error saving scores to file (" << fileName << "); scores not written to file.");
545 : }
546 : }
547 : else
548 : {
549 : SAL_INFO("opencl.device", "Unable to evaluate performance; scores not written to file.");
550 0 : }
551 : }
552 : else
553 : {
554 : SAL_INFO("opencl.device", "Profile read from file (" << fileName << ").");
555 : }
556 :
557 : /* Pick best device */
558 : int bestDeviceIdx;
559 0 : pickBestDevice(profile, &bestDeviceIdx);
560 :
561 : /* Override if necessary */
562 0 : char* overrideDeviceStr = getenv("SC_OPENCL_DEVICE_OVERRIDE");
563 0 : if (NULL != overrideDeviceStr)
564 : {
565 0 : int overrideDeviceIdx = matchDevice(profile, overrideDeviceStr);
566 0 : if (-1 != overrideDeviceIdx)
567 : {
568 : SAL_INFO("opencl.device", "Overriding Device Selection (SC_OPENCL_DEVICE_OVERRIDE=" << overrideDeviceStr << ").");
569 0 : bestDeviceIdx = overrideDeviceIdx;
570 0 : if (DS_DEVICE_OPENCL_DEVICE == profile->devices[bestDeviceIdx].type)
571 : {
572 : SAL_INFO("opencl.device", "Selected Device[" << bestDeviceIdx << "]: " << profile->devices[bestDeviceIdx].oclDeviceName << " (OpenCL).");
573 : }
574 : else
575 : {
576 : SAL_INFO("opencl.device", "Selected Device[" << bestDeviceIdx << "]: CPU (Native).");
577 : }
578 : }
579 : else
580 : {
581 : SAL_INFO("opencl.device", "Ignoring invalid SC_OPENCL_DEVICE_OVERRIDE=" << overrideDeviceStr << ").");
582 : }
583 : }
584 :
585 : /* Final device selection */
586 0 : selectedDevice = profile->devices[bestDeviceIdx];
587 0 : bIsDeviceSelected = true;
588 :
589 : /* Release profile */
590 0 : releaseDSProfile(profile, releaseScore);
591 : }
592 0 : return selectedDevice;
593 : }
594 :
595 156 : }
596 :
597 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|