Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : */
9 :
10 : #ifdef _WIN32
11 : #include <prewin.h>
12 : #include <postwin.h>
13 : #elif defined __MACH__
14 : #include <mach/mach_time.h>
15 : #else
16 : #include <sys/time.h>
17 : #endif
18 : #include <time.h>
19 : #include <math.h>
20 : #include <float.h>
21 : #include <iostream>
22 : #include <sstream>
23 : #include <vector>
24 : #include <sal/log.hxx>
25 : #include <comphelper/random.hxx>
26 : #include <boost/scoped_ptr.hpp>
27 :
28 : #include "opencl_device.hxx"
29 :
30 : #define INPUTSIZE 15360
31 : #define OUTPUTSIZE 15360
32 :
33 : #define STRINGIFY(...) #__VA_ARGS__"\n"
34 :
35 : #define DS_CHECK_STATUS(status, name) \
36 : if (CL_SUCCESS != status) \
37 : { \
38 : SAL_INFO("sc.opencl.device", "Error code is " << status << " at " name); \
39 : }
40 :
41 : namespace sc { namespace OpenCLDevice {
42 :
43 : bool bIsInited = false;
44 : bool bIsDeviceSelected = false;
45 : ds_device selectedDevice;
46 :
47 : struct LibreOfficeDeviceScore
48 : {
49 : double fTime; // small time means faster device
50 : bool bNoCLErrors; // were there any opencl errors
51 : };
52 :
53 0 : struct LibreOfficeDeviceEvaluationIO
54 : {
55 : std::vector<double> input0;
56 : std::vector<double> input1;
57 : std::vector<double> input2;
58 : std::vector<double> input3;
59 : std::vector<double> output;
60 : unsigned long inputSize;
61 : unsigned long outputSize;
62 : };
63 :
64 : struct timer
65 : {
66 : #ifdef _WIN32
67 : LARGE_INTEGER start;
68 : #else
69 : long long start;
70 : #endif
71 : };
72 :
73 : const char* source = STRINGIFY(
74 : \n#if defined(KHR_DP_EXTENSION)
75 : \n#pragma OPENCL EXTENSION cl_khr_fp64 : enable
76 : \n#elif defined(AMD_DP_EXTENSION)
77 : \n#pragma OPENCL EXTENSION cl_amd_fp64 : enable
78 : \n#endif
79 : \n
80 : int isNan(fp_t a) { return a != a; }
81 : fp_t fsum(fp_t a, fp_t b) { return a + b; }
82 :
83 : fp_t fAverage(__global fp_t* input)
84 : {
85 : fp_t sum = 0;
86 : int count = 0;
87 : for (int i = 0; i < INPUTSIZE; i++)
88 : {
89 : if (!isNan(input[i]))
90 : {
91 : sum = fsum(input[i], sum);
92 : count += 1;
93 : }
94 : }
95 : return sum / (fp_t)count;
96 : }
97 : fp_t fMin(__global fp_t* input)
98 : {
99 : fp_t min = MAXFLOAT;
100 : for (int i = 0; i < INPUTSIZE; i++)
101 : {
102 : if (!isNan(input[i]))
103 : {
104 : min = fmin(input[i], min);
105 : }
106 : }
107 : return min;
108 : }
109 : fp_t fSoP(__global fp_t* input0, __global fp_t* input1)
110 : {
111 : fp_t sop = 0.0;
112 : for (int i = 0; i < INPUTSIZE; i++)
113 : {
114 : sop += (isNan(input0[i]) ? 0 : input0[i]) * (isNan(input1[i]) ? 0 : input1[i]);
115 : }
116 : return sop;
117 : }
118 : __kernel void DynamicKernel(
119 : __global fp_t* result, __global fp_t* input0, __global fp_t* input1, __global fp_t* input2, __global fp_t* input3)
120 : {
121 : int gid0 = get_global_id(0);
122 : fp_t tmp0 = fAverage(input0);
123 : fp_t tmp1 = fMin(input1) * fSoP(input2, input3);
124 : result[gid0] = fsum(tmp0, tmp1);
125 : }
126 : );
127 :
128 76 : size_t sourceSize[] = { strlen(source) };
129 :
130 : /*************************************************************************/
131 : /* INTERNAL FUNCTIONS */
132 : /*************************************************************************/
133 : /* Timer functions - start timer */
134 0 : void timerStart(timer* mytimer)
135 : {
136 : #ifdef _WIN32
137 : QueryPerformanceCounter(&mytimer->start);
138 : #elif defined __MACH__
139 : mytimer->start = mach_absolute_time();
140 : #else
141 : struct timespec s;
142 0 : clock_gettime(CLOCK_MONOTONIC, &s);
143 0 : mytimer->start = (long long)s.tv_sec * (long long)1.0E6 + (long long)s.tv_nsec / (long long)1.0E3;
144 : #endif
145 0 : }
146 :
147 : /* Timer functions - get current value */
148 0 : double timerCurrent(timer* mytimer)
149 : {
150 : #ifdef _WIN32
151 : LARGE_INTEGER stop, frequency;
152 : QueryPerformanceCounter(&stop);
153 : QueryPerformanceFrequency(&frequency);
154 : double time = ((double)(stop.QuadPart - mytimer->start.QuadPart) / frequency.QuadPart);
155 : #elif defined __MACH__
156 : static mach_timebase_info_data_t info = { 0, 0 };
157 : if (info.numer == 0)
158 : mach_timebase_info(&info);
159 : long long stop = mach_absolute_time();
160 : double time = ((stop - mytimer->start) * (double) info.numer / info.denom) / 1.0E9;
161 : #else
162 : struct timespec s;
163 : long long stop;
164 0 : clock_gettime(CLOCK_MONOTONIC, &s);
165 0 : stop = (long long)s.tv_sec * (long long)1.0E6 + (long long)s.tv_nsec / (long long)1.0E3;
166 0 : double time = ((double)(stop - mytimer->start) / 1.0E6);
167 : #endif
168 0 : return time;
169 : }
170 :
171 : /* Random number generator */
172 0 : double random(double min, double max)
173 : {
174 0 : if (min == max)
175 0 : return min;
176 0 : return comphelper::rng::uniform_real_distribution(min, max);
177 : }
178 :
179 : /* Populate input */
180 0 : void populateInput(LibreOfficeDeviceEvaluationIO* testData)
181 : {
182 0 : double* input0 = &testData->input0[0];
183 0 : double* input1 = &testData->input1[0];
184 0 : double* input2 = &testData->input2[0];
185 0 : double* input3 = &testData->input3[0];
186 0 : for (unsigned long i = 0; i < testData->inputSize; i++)
187 : {
188 0 : input0[i] = random(0, i);
189 0 : input1[i] = random(0, i);
190 0 : input2[i] = random(0, i);
191 0 : input3[i] = random(0, i);
192 : }
193 0 : }
194 : /* Encode score object as byte string */
195 0 : ds_status serializeScore(ds_device* device, void** serializedScore, unsigned int* serializedScoreSize)
196 : {
197 0 : *serializedScoreSize = sizeof(LibreOfficeDeviceScore);
198 0 : *serializedScore = (void*)new unsigned char[*serializedScoreSize];
199 0 : memcpy(*serializedScore, device->score, *serializedScoreSize);
200 0 : return DS_SUCCESS;
201 : }
202 :
203 : /* Parses byte string and stores in score object */
204 0 : ds_status deserializeScore(ds_device* device, const unsigned char* serializedScore, unsigned int serializedScoreSize)
205 : {
206 : // check that serializedScoreSize == sizeof(LibreOfficeDeviceScore);
207 0 : device->score = new LibreOfficeDeviceScore;
208 0 : memcpy(device->score, serializedScore, serializedScoreSize);
209 0 : return DS_SUCCESS;
210 : }
211 :
212 : /* Releases memory held by score */
213 0 : ds_status releaseScore(void* score)
214 : {
215 0 : if (NULL != score)
216 : {
217 0 : delete (LibreOfficeDeviceScore*)score;
218 : }
219 0 : return DS_SUCCESS;
220 : }
221 :
222 : /* Evaluate devices */
223 0 : ds_status evaluateScoreForDevice(ds_device* device, void* evalData)
224 : {
225 0 : if (DS_DEVICE_OPENCL_DEVICE == device->type)
226 : {
227 : /* Evaluating an OpenCL device */
228 : SAL_INFO("sc.opencl.device", "Device: \"" << device->oclDeviceName << "\" (OpenCL) evaluation...");
229 : cl_int clStatus;
230 : /* Check for 64-bit float extensions */
231 0 : size_t aDevExtInfoSize = 0;
232 0 : clStatus = clGetDeviceInfo(device->oclDeviceID, CL_DEVICE_EXTENSIONS, 0, NULL, &aDevExtInfoSize);
233 0 : DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clGetDeviceInfo");
234 :
235 0 : char* aExtInfo = new char[aDevExtInfoSize];
236 0 : clStatus = clGetDeviceInfo(device->oclDeviceID, CL_DEVICE_EXTENSIONS, sizeof(char) * aDevExtInfoSize, aExtInfo, NULL);
237 0 : DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clGetDeviceInfo");
238 0 : bool bKhrFp64Flag = false;
239 0 : bool bAmdFp64Flag = false;
240 0 : const char* buildOption = NULL;
241 0 : std::string tmpStr("-Dfp_t=double -Dfp_t4=double4 -Dfp_t16=double16 -DINPUTSIZE=");
242 0 : std::ostringstream tmpOStrStr;
243 0 : tmpOStrStr << std::dec << INPUTSIZE;
244 0 : tmpStr.append(tmpOStrStr.str());
245 :
246 0 : if ((std::string(aExtInfo)).find("cl_khr_fp64") != std::string::npos)
247 : {
248 0 : bKhrFp64Flag = true;
249 : //buildOption = "-D KHR_DP_EXTENSION -Dfp_t=double -Dfp_t4=double4 -Dfp_t16=double16";
250 0 : tmpStr.append(" -DKHR_DP_EXTENSION");
251 0 : buildOption = tmpStr.c_str();
252 : SAL_INFO("sc.opencl.device", "... has cl_khr_fp64");
253 : }
254 0 : else if ((std::string(aExtInfo)).find("cl_amd_fp64") != std::string::npos)
255 : {
256 0 : bAmdFp64Flag = true;
257 : //buildOption = "-D AMD_DP_EXTENSION -Dfp_t=double -Dfp_t4=double4 -Dfp_t16=double16";
258 0 : tmpStr.append(" -DAMD_DP_EXTENSION");
259 0 : buildOption = tmpStr.c_str();
260 : SAL_INFO("sc.opencl.device", "... has cl_amd_fp64");
261 : }
262 0 : delete[] aExtInfo;
263 :
264 0 : if (!bKhrFp64Flag && !bAmdFp64Flag)
265 : {
266 : /* No 64-bit float support */
267 0 : device->score = (void*)new LibreOfficeDeviceScore;
268 0 : ((LibreOfficeDeviceScore*)device->score)->fTime = DBL_MAX;
269 0 : ((LibreOfficeDeviceScore*)device->score)->bNoCLErrors = true;
270 0 : SAL_INFO("sc.opencl.device", "... no fp64 support");
271 : }
272 : else
273 : {
274 : /* 64-bit float support present */
275 :
276 : /* Create context and command queue */
277 0 : cl_context clContext = clCreateContext(NULL, 1, &device->oclDeviceID, NULL, NULL, &clStatus);
278 0 : DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateContext");
279 0 : cl_command_queue clQueue = clCreateCommandQueue(clContext, device->oclDeviceID, 0, &clStatus);
280 0 : DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateCommandQueue");
281 :
282 : /* Build program */
283 0 : cl_program clProgram = clCreateProgramWithSource(clContext, 1, &source, sourceSize, &clStatus);
284 0 : DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateProgramWithSource");
285 0 : clStatus = clBuildProgram(clProgram, 1, &device->oclDeviceID, buildOption, NULL, NULL);
286 0 : DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clBuildProgram");
287 0 : if (CL_SUCCESS != clStatus)
288 : {
289 : /* Build program failed */
290 : size_t length;
291 : char* buildLog;
292 0 : clStatus = clGetProgramBuildInfo(clProgram, device->oclDeviceID, CL_PROGRAM_BUILD_LOG, 0, NULL, &length);
293 0 : buildLog = (char*)malloc(length);
294 0 : clGetProgramBuildInfo(clProgram, device->oclDeviceID, CL_PROGRAM_BUILD_LOG, length, buildLog, &length);
295 : SAL_INFO("sc.opencl.device", "Build Errors:\n" << buildLog);
296 0 : free(buildLog);
297 :
298 0 : device->score = (void*)new LibreOfficeDeviceScore;
299 0 : ((LibreOfficeDeviceScore*)device->score)->fTime = DBL_MAX;
300 0 : ((LibreOfficeDeviceScore*)device->score)->bNoCLErrors = false;
301 : }
302 : else
303 : {
304 : /* Build program succeeded */
305 : timer kernelTime;
306 0 : timerStart(&kernelTime);
307 :
308 : /* Run kernel */
309 0 : LibreOfficeDeviceEvaluationIO* testData = (LibreOfficeDeviceEvaluationIO*)evalData;
310 0 : cl_kernel clKernel = clCreateKernel(clProgram, "DynamicKernel", &clStatus);
311 0 : DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateKernel");
312 0 : cl_mem clResult = clCreateBuffer(clContext, CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR, sizeof(cl_double) * testData->outputSize, &testData->output[0], &clStatus);
313 0 : DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateBuffer::clResult");
314 0 : cl_mem clInput0 = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, sizeof(cl_double) * testData->inputSize, &testData->input0[0], &clStatus);
315 0 : DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateBuffer::clInput0");
316 0 : cl_mem clInput1 = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, sizeof(cl_double) * testData->inputSize, &testData->input1[0], &clStatus);
317 0 : DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateBuffer::clInput1");
318 0 : cl_mem clInput2 = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, sizeof(cl_double) * testData->inputSize, &testData->input2[0], &clStatus);
319 0 : DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateBuffer::clInput2");
320 0 : cl_mem clInput3 = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, sizeof(cl_double) * testData->inputSize, &testData->input3[0], &clStatus);
321 0 : DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateBuffer::clInput3");
322 0 : clStatus = clSetKernelArg(clKernel, 0, sizeof(cl_mem), (void*)&clResult);
323 0 : DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clSetKernelArg::clResult");
324 0 : clStatus = clSetKernelArg(clKernel, 1, sizeof(cl_mem), (void*)&clInput0);
325 0 : DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clSetKernelArg::clInput0");
326 0 : clStatus = clSetKernelArg(clKernel, 2, sizeof(cl_mem), (void*)&clInput1);
327 0 : DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clSetKernelArg::clInput1");
328 0 : clStatus = clSetKernelArg(clKernel, 3, sizeof(cl_mem), (void*)&clInput2);
329 0 : DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clSetKernelArg::clInput2");
330 0 : clStatus = clSetKernelArg(clKernel, 4, sizeof(cl_mem), (void*)&clInput3);
331 0 : DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clSetKernelArg::clInput3");
332 0 : size_t globalWS[1] = { testData->outputSize };
333 0 : size_t localSize[1] = { 64 };
334 0 : clStatus = clEnqueueNDRangeKernel(clQueue, clKernel, 1, 0, globalWS, localSize, 0, NULL, NULL);
335 0 : DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clEnqueueNDRangeKernel");
336 0 : clFinish(clQueue);
337 0 : clReleaseMemObject(clInput3);
338 0 : clReleaseMemObject(clInput2);
339 0 : clReleaseMemObject(clInput1);
340 0 : clReleaseMemObject(clInput0);
341 0 : clReleaseMemObject(clResult);
342 0 : clReleaseKernel(clKernel);
343 :
344 0 : device->score = (void*)new LibreOfficeDeviceScore;
345 0 : ((LibreOfficeDeviceScore*)device->score)->fTime = timerCurrent(&kernelTime);
346 0 : ((LibreOfficeDeviceScore*)device->score)->bNoCLErrors = true;
347 : }
348 :
349 0 : clReleaseProgram(clProgram);
350 0 : clReleaseCommandQueue(clQueue);
351 0 : clReleaseContext(clContext);
352 0 : }
353 : }
354 : else
355 : {
356 : /* Evaluating an Native CPU device */
357 : SAL_INFO("sc.opencl.device", "Device: \"CPU\" (Native) evaluation...");
358 : timer kernelTime;
359 0 : timerStart(&kernelTime);
360 :
361 0 : LibreOfficeDeviceEvaluationIO* testData = (LibreOfficeDeviceEvaluationIO*)evalData;
362 0 : for (unsigned long j = 0; j < testData->outputSize; j++)
363 : {
364 0 : double fAverage = 0.0f;
365 0 : double fMin = DBL_MAX;
366 0 : double fSoP = 0.0f;
367 0 : for (unsigned long i = 0; i < testData->inputSize; i++)
368 : {
369 0 : fAverage += testData->input0[i];
370 0 : fMin = ((fMin < testData->input1[i]) ? fMin : testData->input1[i]);
371 0 : fSoP += testData->input2[i] * testData->input3[i];
372 : }
373 0 : fAverage /= testData->inputSize;
374 0 : testData->output[j] = fAverage + (fMin * fSoP);
375 : }
376 :
377 : // InterpretTail - the S/W fallback is nothing like as efficient
378 : // as any good openCL implementation: no SIMD, tons of branching
379 : // in the inner loops etc. Generously characterise it as only 10x
380 : // slower than the above.
381 0 : float fInterpretTailFactor = 10.0;
382 :
383 0 : device->score = (void*)new LibreOfficeDeviceScore;
384 0 : ((LibreOfficeDeviceScore*)device->score)->fTime = timerCurrent(&kernelTime);
385 0 : ((LibreOfficeDeviceScore*)device->score)->bNoCLErrors = true;
386 :
387 0 : ((LibreOfficeDeviceScore*)device->score)->fTime *= fInterpretTailFactor;
388 : }
389 0 : return DS_SUCCESS;
390 : }
391 :
392 : /* Pick best device */
393 0 : ds_status pickBestDevice(ds_profile* profile, int* bestDeviceIdx)
394 : {
395 0 : double bestScore = DBL_MAX;
396 0 : *bestDeviceIdx = -1;
397 :
398 0 : for (unsigned int d = 0; d < profile->numDevices; d++)
399 : {
400 0 : ds_device device = profile->devices[d];
401 0 : LibreOfficeDeviceScore *pScore = (LibreOfficeDeviceScore*)device.score;
402 :
403 0 : double fScore = DBL_MAX;
404 0 : if (pScore)
405 : {
406 0 : fScore = pScore->fTime;
407 : }
408 : else
409 : {
410 : SAL_INFO("sc.opencl.device", "Unusual null score");
411 : }
412 :
413 0 : if (DS_DEVICE_OPENCL_DEVICE == device.type)
414 : {
415 : SAL_INFO("sc.opencl.device", "Device[" << d << "] " << device.oclDeviceName << " (OpenCL) score is " << fScore);
416 : }
417 : else
418 : {
419 : SAL_INFO("sc.opencl.device", "Device[" << d << "] CPU (Native) score is " << fScore);
420 : }
421 0 : if (fScore < bestScore)
422 : {
423 0 : bestScore = fScore;
424 0 : *bestDeviceIdx = d;
425 : }
426 : }
427 0 : if (DS_DEVICE_OPENCL_DEVICE == profile->devices[*bestDeviceIdx].type)
428 : {
429 : SAL_INFO("sc.opencl.device", "Selected Device[" << *bestDeviceIdx << "]: " << profile->devices[*bestDeviceIdx].oclDeviceName << "(OpenCL).");
430 : }
431 : else
432 : {
433 : SAL_INFO("sc.opencl.device", "Selected Device[" << *bestDeviceIdx << "]: CPU (Native).");
434 : }
435 :
436 0 : return DS_SUCCESS;
437 : }
438 :
439 : /* Return device ID for matching device name */
440 0 : int matchDevice(ds_profile* profile, char* deviceName)
441 : {
442 0 : int deviceMatch = -1;
443 0 : for (unsigned int d = 0; d < profile->numDevices - 1; d++)
444 : {
445 0 : if ((std::string(profile->devices[d].oclDeviceName)).find(deviceName) != std::string::npos) deviceMatch = d;
446 : }
447 0 : if (std::string("NATIVE_CPU").find(deviceName) != std::string::npos) deviceMatch = profile->numDevices - 1;
448 0 : return deviceMatch;
449 : }
450 :
451 : /*************************************************************************/
452 : /* EXTERNAL FUNCTIONS */
453 : /*************************************************************************/
454 0 : ds_device getDeviceSelection(const char* sProfilePath, bool bForceSelection)
455 : {
456 : /* Run only if device is not yet selected */
457 0 : if (!bIsDeviceSelected || bForceSelection)
458 : {
459 : /* Setup */
460 : ds_status status;
461 0 : ds_profile* profile = NULL;
462 0 : status = initDSProfile(&profile, "LibreOffice v0.1");
463 :
464 0 : if (!profile)
465 : {
466 : // failed to initialize profile.
467 0 : selectedDevice.type = DS_DEVICE_NATIVE_CPU;
468 0 : return selectedDevice;
469 : }
470 :
471 : /* Try reading scores from file */
472 0 : std::string tmpStr(sProfilePath);
473 0 : const char* fileName = tmpStr.append("sc_opencl_device_profile.dat").c_str();
474 0 : if (!bForceSelection)
475 : {
476 0 : status = readProfileFromFile(profile, deserializeScore, fileName);
477 : }
478 : else
479 : {
480 0 : status = DS_INVALID_PROFILE;
481 : SAL_INFO("sc.opencl.device", "Performing forced profiling.");
482 : }
483 0 : if (DS_SUCCESS != status)
484 : {
485 0 : if (!bForceSelection)
486 : {
487 : SAL_INFO("sc.opencl.device", "Profile file not available (" << fileName << "); performing profiling.");
488 : }
489 :
490 : /* Populate input data for micro-benchmark */
491 0 : boost::scoped_ptr<LibreOfficeDeviceEvaluationIO> testData(new LibreOfficeDeviceEvaluationIO);
492 0 : testData->inputSize = INPUTSIZE;
493 0 : testData->outputSize = OUTPUTSIZE;
494 0 : testData->input0.resize(testData->inputSize);
495 0 : testData->input1.resize(testData->inputSize);
496 0 : testData->input2.resize(testData->inputSize);
497 0 : testData->input3.resize(testData->inputSize);
498 0 : testData->output.resize(testData->outputSize);
499 0 : populateInput(testData.get());
500 :
501 : /* Perform evaluations */
502 : unsigned int numUpdates;
503 0 : status = profileDevices(profile, DS_EVALUATE_ALL, evaluateScoreForDevice, (void*)testData.get(), &numUpdates);
504 :
505 0 : if (DS_SUCCESS == status)
506 : {
507 : /* Write scores to file */
508 0 : status = writeProfileToFile(profile, serializeScore, fileName);
509 0 : if (DS_SUCCESS == status)
510 : {
511 : SAL_INFO("sc.opencl.device", "Scores written to file (" << fileName << ").");
512 : }
513 : else
514 : {
515 : SAL_INFO("sc.opencl.device", "Error saving scores to file (" << fileName << "); scores not written to file.");
516 : }
517 : }
518 : else
519 : {
520 : SAL_INFO("sc.opencl.device", "Unable to evaluate performance; scores not written to file.");
521 0 : }
522 : }
523 : else
524 : {
525 : SAL_INFO("sc.opencl.device", "Profile read from file (" << fileName << ").");
526 : }
527 :
528 : /* Pick best device */
529 : int bestDeviceIdx;
530 0 : pickBestDevice(profile, &bestDeviceIdx);
531 :
532 : /* Overide if necessary */
533 0 : char* overrideDeviceStr = getenv("SC_OPENCL_DEVICE_OVERRIDE");
534 0 : if (NULL != overrideDeviceStr)
535 : {
536 0 : int overrideDeviceIdx = matchDevice(profile, overrideDeviceStr);
537 0 : if (-1 != overrideDeviceIdx)
538 : {
539 : SAL_INFO("sc.opencl.device", "Overriding Device Selection (SC_OPENCL_DEVICE_OVERRIDE=" << overrideDeviceStr << ").");
540 0 : bestDeviceIdx = overrideDeviceIdx;
541 0 : if (DS_DEVICE_OPENCL_DEVICE == profile->devices[bestDeviceIdx].type)
542 : {
543 : SAL_INFO("sc.opencl.device", "Selected Device[" << bestDeviceIdx << "]: " << profile->devices[bestDeviceIdx].oclDeviceName << " (OpenCL).");
544 : }
545 : else
546 : {
547 : SAL_INFO("sc.opencl.device", "Selected Device[" << bestDeviceIdx << "]: CPU (Native).");
548 : }
549 : }
550 : else
551 : {
552 : SAL_INFO("sc.opencl.device", "Ignoring invalid SC_OPENCL_DEVICE_OVERRIDE=" << overrideDeviceStr << ").");
553 : }
554 : }
555 :
556 : /* Final device selection */
557 0 : selectedDevice = profile->devices[bestDeviceIdx];
558 0 : bIsDeviceSelected = true;
559 :
560 : /* Release profile */
561 0 : releaseDSProfile(profile, releaseScore);
562 : }
563 0 : return selectedDevice;
564 : }
565 :
566 228 : }}
567 :
568 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|