Révision 191
Pi/C/OpenCL/PiOpenCL.c (revision 191) | ||
---|---|---|
1 |
// Pi Dart Dash in OpenCL in C, illustrative example |
|
2 |
// |
|
3 |
// Emmanuel Quemener <emmanuel.quemener@gmail.com> |
|
4 |
// |
|
5 |
// CC BY-NC-SA 2011 : Emmanuel QUEMENER <emmanuel.quemener@gmail.com> |
|
6 |
// Copyleft Cecill v2 |
|
7 |
// |
|
8 |
// -h : print the documentation and detect devices as (platform,device) |
|
9 |
// |
|
10 |
// classical use: |
|
11 |
// #1 OpenCL Plateform ID: get this information with -h option |
|
12 |
// #2 OpenCL Device ID: get this information with -h option |
|
13 |
// #3 Minimal number of iterations: |
|
14 |
// #4 Parallel Rate: scattering global work in parts executed //ly |
|
15 |
// #5 Loops (to improve statistics) |
|
16 |
// #6 Type of variables INT32, INT64, FP32, FP64 |
|
17 |
// |
|
18 |
// To compile : gcc -o PiOpenCL PiOpenCL.c -lOpenCL |
|
19 |
|
|
20 |
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS |
|
21 |
#include <stdio.h> |
|
22 |
#include <stdlib.h> |
|
23 |
#include <string.h> |
|
24 |
#include <CL/cl.h> |
|
25 |
#include <stdint.h> |
|
26 |
#include <sys/time.h> |
|
27 |
|
|
28 |
#define TINT32 0 |
|
29 |
#define TINT64 1 |
|
30 |
#define TFP32 2 |
|
31 |
#define TFP64 3 |
|
32 |
|
|
33 |
int DetectOpenCLDevices(void) |
|
34 |
{ |
|
35 |
int i, j; |
|
36 |
char* value; |
|
37 |
size_t valueSize; |
|
38 |
cl_uint platformCount; |
|
39 |
cl_platform_id* platforms; |
|
40 |
cl_uint deviceCount; |
|
41 |
cl_device_id* devices; |
|
42 |
cl_uint maxComputeUnits; |
|
43 |
cl_int maxWorkGroupSize; |
|
44 |
cl_int maxWorkItemSizes; |
|
45 |
cl_device_type dev_type; |
|
46 |
|
|
47 |
// get all platforms |
|
48 |
clGetPlatformIDs(0, NULL, &platformCount); |
|
49 |
platforms = (cl_platform_id*) malloc(sizeof(cl_platform_id) * platformCount); |
|
50 |
clGetPlatformIDs(platformCount, platforms, NULL); |
|
51 |
|
|
52 |
printf("OpenCL statistics: %d platform(s) detected\n\n",platformCount); |
|
53 |
|
|
54 |
for (i = 0; i < platformCount; i++) { |
|
55 |
|
|
56 |
// get all devices |
|
57 |
clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, 0, NULL, &deviceCount); |
|
58 |
devices = (cl_device_id*) malloc(sizeof(cl_device_id) * deviceCount); |
|
59 |
clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, deviceCount, devices, NULL); |
|
60 |
|
|
61 |
// for each device print critical attributes |
|
62 |
for (j = 0; j < deviceCount; j++) { |
|
63 |
|
|
64 |
// print device name |
|
65 |
clGetDeviceInfo(devices[j], CL_DEVICE_NAME, 0, NULL, &valueSize); |
|
66 |
value = (char*) malloc(valueSize); |
|
67 |
clGetDeviceInfo(devices[j], CL_DEVICE_NAME, valueSize, value, NULL); |
|
68 |
printf("Device (%d,%d): %s\n",i, j, value); |
|
69 |
free(value); |
|
70 |
|
|
71 |
// print type device CPU/GPU/ACCELERATOR |
|
72 |
clGetDeviceInfo(devices[j], CL_DEVICE_TYPE, sizeof(dev_type), &dev_type, NULL); |
|
73 |
printf("\tDevice Type: "); |
|
74 |
if(dev_type & CL_DEVICE_TYPE_GPU) |
|
75 |
printf("CL_DEVICE_TYPE_GPU "); |
|
76 |
if(dev_type & CL_DEVICE_TYPE_CPU) |
|
77 |
printf("CL_DEVICE_TYPE_CPU "); |
|
78 |
if(dev_type & CL_DEVICE_TYPE_ACCELERATOR) |
|
79 |
printf("CL_DEVICE_TYPE_ACCELERATOR "); |
|
80 |
if(dev_type & CL_DEVICE_TYPE_DEFAULT) |
|
81 |
printf("CL_DEVICE_TYPE_DEFAULT "); |
|
82 |
printf("\n"); |
|
83 |
|
|
84 |
// print device vendor |
|
85 |
clGetDeviceInfo(devices[j], CL_DEVICE_VENDOR, 0, NULL, &valueSize); |
|
86 |
value = (char*) malloc(valueSize); |
|
87 |
clGetDeviceInfo(devices[j], CL_DEVICE_VENDOR, valueSize, value, NULL); |
|
88 |
printf("\tDevice vendor: %s\n", value); |
|
89 |
free(value); |
|
90 |
|
|
91 |
// print hardware device version |
|
92 |
clGetDeviceInfo(devices[j], CL_DEVICE_VERSION, 0, NULL, &valueSize); |
|
93 |
value = (char*) malloc(valueSize); |
|
94 |
clGetDeviceInfo(devices[j], CL_DEVICE_VERSION, valueSize, value, NULL); |
|
95 |
printf("\tHardware version: %s\n", value); |
|
96 |
free(value); |
|
97 |
|
|
98 |
// print software driver version |
|
99 |
clGetDeviceInfo(devices[j], CL_DRIVER_VERSION, 0, NULL, &valueSize); |
|
100 |
value = (char*) malloc(valueSize); |
|
101 |
clGetDeviceInfo(devices[j], CL_DRIVER_VERSION, valueSize, value, NULL); |
|
102 |
printf("\tSoftware version: %s\n", value); |
|
103 |
free(value); |
|
104 |
|
|
105 |
// print c version supported by compiler for device |
|
106 |
clGetDeviceInfo(devices[j], CL_DEVICE_OPENCL_C_VERSION, 0, NULL, &valueSize); |
|
107 |
value = (char*) malloc(valueSize); |
|
108 |
clGetDeviceInfo(devices[j], CL_DEVICE_OPENCL_C_VERSION, valueSize, value, NULL); |
|
109 |
printf("\tOpenCL C version: %s\n", value); |
|
110 |
free(value); |
|
111 |
|
|
112 |
// print parallel compute units |
|
113 |
clGetDeviceInfo(devices[j], CL_DEVICE_MAX_COMPUTE_UNITS, |
|
114 |
sizeof(maxComputeUnits), &maxComputeUnits, NULL); |
|
115 |
printf("\tParallel compute units: %d\n", maxComputeUnits); |
|
116 |
|
|
117 |
// print max work group size |
|
118 |
clGetDeviceInfo(devices[j], CL_DEVICE_MAX_WORK_GROUP_SIZE, |
|
119 |
sizeof(maxWorkGroupSize), &maxWorkGroupSize, NULL); |
|
120 |
printf("\tMaximum Work Group Size: %d\n", maxWorkGroupSize); |
|
121 |
|
|
122 |
// print max work items size |
|
123 |
clGetDeviceInfo(devices[j], CL_DEVICE_MAX_WORK_ITEM_SIZES, |
|
124 |
sizeof(maxWorkItemSizes), &maxWorkItemSizes, NULL); |
|
125 |
printf("\tMaximum Work Item Sizes: %d\n", maxWorkItemSizes); |
|
126 |
|
|
127 |
} |
|
128 |
printf("\n"); |
|
129 |
free(devices); |
|
130 |
} |
|
131 |
|
|
132 |
free(platforms); |
|
133 |
return 0; |
|
134 |
|
|
135 |
} |
|
136 |
|
|
137 |
const char* OpenCLSource[] = { |
|
138 |
"#pragma OPENCL EXTENSION cl_khr_fp64: enable \n", |
|
139 |
"// Marsaglia RNG very simple implementation \n", |
|
140 |
"#define znew ((z=36969*(z&65535)+(z>>16))<<16) \n", |
|
141 |
"#define wnew ((w=18000*(w&65535)+(w>>16))&65535) \n", |
|
142 |
"#define MWC (znew+wnew) \n", |
|
143 |
"#define SHR3 (jsr=(jsr=(jsr=jsr^(jsr<<17))^(jsr>>13))^(jsr<<5)) \n", |
|
144 |
"#define CONG (jcong=69069*jcong+1234567) \n", |
|
145 |
"#define KISS ((MWC^CONG)+SHR3) \n", |
|
146 |
"#define MWCfp MWC * 2.328306435454494e-10f \n", |
|
147 |
"#define KISSfp KISS * 2.328306435454494e-10f \n", |
|
148 |
"#define CONGfp CONG * 2.328306435454494e-10f \n", |
|
149 |
"#define SHR3fp SHR3 * 2.328306435454494e-10f \n", |
|
150 |
"#define TINT32 0 \n", |
|
151 |
"#define TINT64 1 \n", |
|
152 |
"#define TFP32 2 \n", |
|
153 |
"#define TFP64 3 \n", |
|
154 |
"#define THEONE32I 1073741824 \n", |
|
155 |
"#define THEONE32F 1.e0f \n", |
|
156 |
"#define THEONE64I 4611686018427387904 \n", |
|
157 |
"#define THEONE64F (double)1.e0f \n", |
|
158 |
"ulong MainLoop32I(ulong iterations,uint seed_z,uint seed_w,size_t work)", |
|
159 |
"{", |
|
160 |
" uint z=seed_z+work;", |
|
161 |
" uint w=seed_w+work;", |
|
162 |
" ulong total=0;", |
|
163 |
" for (ulong i=0;i<iterations;i++)", |
|
164 |
" {", |
|
165 |
" uint x= MWC>>17;", |
|
166 |
" uint y= MWC>>17;", |
|
167 |
" ulong inside=((x*x+y*y) <= THEONE32I) ? 1:0;", |
|
168 |
" total+=inside;", |
|
169 |
" }", |
|
170 |
" return(total);", |
|
171 |
"}", |
|
172 |
"ulong MainLoop32F(ulong iterations,uint seed_z,uint seed_w,size_t work)", |
|
173 |
"{", |
|
174 |
" uint z=seed_z+work;", |
|
175 |
" uint w=seed_w+work;", |
|
176 |
" ulong total=0;", |
|
177 |
" for (ulong i=0;i<iterations;i++)", |
|
178 |
" {", |
|
179 |
" float x=(float)MWCfp ;", |
|
180 |
" float y=(float)MWCfp ;", |
|
181 |
" ulong inside=((x*x+y*y) <= THEONE32F) ? 1:0;", |
|
182 |
" total+=inside;", |
|
183 |
" }", |
|
184 |
" return(total);", |
|
185 |
"}", |
|
186 |
"ulong MainLoop64I(ulong iterations,uint seed_z,uint seed_w,size_t work)", |
|
187 |
"{", |
|
188 |
" uint z=seed_z+work;", |
|
189 |
" uint w=seed_w+work;", |
|
190 |
" ulong total=0;", |
|
191 |
" for (ulong i=0;i<iterations;i++)", |
|
192 |
" {", |
|
193 |
" ulong x=(ulong)(MWC>>1);", |
|
194 |
" ulong y=(ulong)(MWC>>1);", |
|
195 |
" ulong inside=((x*x+y*y) <= THEONE64I) ? 1:0;", |
|
196 |
" total+=inside;", |
|
197 |
" }", |
|
198 |
" return(total);", |
|
199 |
"}", |
|
200 |
"ulong MainLoop64F(ulong iterations,uint seed_z,uint seed_w,size_t work)", |
|
201 |
"{", |
|
202 |
" uint z=seed_z+work;", |
|
203 |
" uint w=seed_w+work;", |
|
204 |
" ulong total=0;", |
|
205 |
" for (ulong i=0;i<iterations;i++)", |
|
206 |
"{", |
|
207 |
" double x=(double)MWCfp ;", |
|
208 |
" double y=(double)MWCfp ;", |
|
209 |
" ulong inside=((x*x+y*y) <= THEONE64F) ? 1:0;", |
|
210 |
" total+=inside;", |
|
211 |
"}", |
|
212 |
" return(total);", |
|
213 |
"}", |
|
214 |
"__kernel void MainLoopGlobal(__global ulong *s,ulong iterations,uint seed_w,uint seed_z,uint MyType)", |
|
215 |
"{", |
|
216 |
" ulong total;", |
|
217 |
" if (MyType==TFP32) {", |
|
218 |
" total=(ulong)MainLoop32F(iterations,seed_z,seed_w,get_global_id(0));", |
|
219 |
" }", |
|
220 |
" else if (MyType==TFP64) {", |
|
221 |
" total=(ulong)MainLoop64F(iterations,seed_z,seed_w,get_global_id(0));", |
|
222 |
" }", |
|
223 |
" else if (MyType==TINT32) {", |
|
224 |
" total=(ulong)MainLoop32I(iterations,seed_z,seed_w,get_global_id(0));", |
|
225 |
" }", |
|
226 |
" else if (MyType==TINT64) {", |
|
227 |
" total=(ulong)MainLoop64I(iterations,seed_z,seed_w,get_global_id(0));", |
|
228 |
" }", |
|
229 |
" barrier(CLK_GLOBAL_MEM_FENCE);", |
|
230 |
" s[get_global_id(0)]=(ulong)total;", |
|
231 |
"}", |
|
232 |
"__kernel void MainLoopLocal(__global ulong *s,ulong iterations,uint seed_w,uint seed_z,uint MyType)", |
|
233 |
"{", |
|
234 |
" ulong total;", |
|
235 |
" if (MyType==TFP32) {", |
|
236 |
" total=(ulong)MainLoop32F(iterations,seed_z,seed_w,get_local_id(0));", |
|
237 |
" }", |
|
238 |
" else if (MyType==TFP64) {", |
|
239 |
" total=(ulong)MainLoop64F(iterations,seed_z,seed_w,get_local_id(0));", |
|
240 |
" }", |
|
241 |
" else if (MyType==TINT32) {", |
|
242 |
" total=(ulong)MainLoop32I(iterations,seed_z,seed_w,get_local_id(0));", |
|
243 |
" }", |
|
244 |
" else if (MyType==TINT64) {", |
|
245 |
" total=(ulong)MainLoop64I(iterations,seed_z,seed_w,get_local_id(0));", |
|
246 |
" }", |
|
247 |
" barrier(CLK_LOCAL_MEM_FENCE);", |
|
248 |
" s[get_local_id(0)]=(ulong)total;", |
|
249 |
"}", |
|
250 |
"__kernel void MainLoopHybrid(__global ulong *s,ulong iterations,uint seed_w,uint seed_z,uint MyType)", |
|
251 |
"{", |
|
252 |
" ulong total;", |
|
253 |
" if (MyType==TFP32) {", |
|
254 |
" total=(ulong)MainLoop32F(iterations,seed_z,seed_w,get_global_id(0));", |
|
255 |
" }", |
|
256 |
" else if (MyType==TFP64) {", |
|
257 |
" total=(ulong)MainLoop64F(iterations,seed_z,seed_w,get_global_id(0));", |
|
258 |
" }", |
|
259 |
" else if (MyType==TINT32) {", |
|
260 |
" total=(ulong)MainLoop32I(iterations,seed_z,seed_w,get_global_id(0));", |
|
261 |
" }", |
|
262 |
" else if (MyType==TINT64) {", |
|
263 |
" total=(ulong)MainLoop64I(iterations,seed_z,seed_w,get_global_id(0));", |
|
264 |
" }", |
|
265 |
" barrier(CLK_GLOBAL_MEM_FENCE || CLK_LOCAL_MEM_FENCE);", |
|
266 |
" s[get_global_id(0)]=total;", |
|
267 |
"}" |
|
268 |
}; |
|
269 |
|
|
270 |
int main(int argc, char **argv) |
|
271 |
{ |
|
272 |
if ((argc==1)|| |
|
273 |
(strcmp(argv[1],"-h")==0)|| |
|
274 |
(strcmp(argv[1],"--help")==0)) |
|
275 |
{ |
|
276 |
printf("\nPerforms a Pi estimation by Dart Dash:\n\n" |
|
277 |
"\t#1 OpenCL Plateform ID (default 0)\n" |
|
278 |
"\t#2 OpenCL Device ID (default 0)\n" |
|
279 |
"\t#3 Minimal number of iterations (default 1000000)\n" |
|
280 |
"\t#4 Parallel Rate (default 1024)\n" |
|
281 |
"\t#5 Loops (default 1)\n" |
|
282 |
"\t#6 Type of variable: INT32, INT64, FP32, FP64 (default FP32)\n\n"); |
|
283 |
DetectOpenCLDevices(); |
|
284 |
} |
|
285 |
else |
|
286 |
{ |
|
287 |
|
|
288 |
int MyPlatform=atoi(argv[1]); |
|
289 |
int MyDevice=atoi(argv[2]); |
|
290 |
|
|
291 |
struct timeval tv1,tv2; |
|
292 |
struct timezone tz; |
|
293 |
|
|
294 |
uint64_t Iterations=1000000; |
|
295 |
if (argc>3) { |
|
296 |
Iterations=(uint64_t)atoll(argv[3]); |
|
297 |
} |
|
298 |
|
|
299 |
uint32_t ParallelRate=64; |
|
300 |
if (argc>4) { |
|
301 |
ParallelRate=(uint32_t)atoi(argv[4]); |
|
302 |
} |
|
303 |
|
|
304 |
uint32_t Loops=1; |
|
305 |
if (argc>5) { |
|
306 |
Loops=(uint32_t)atoi(argv[5]); |
|
307 |
} |
|
308 |
|
|
309 |
uint32_t MyType=TFP32; |
|
310 |
if (argc>6) { |
|
311 |
if (strcmp(argv[6],"INT32")==0) { |
|
312 |
MyType=(uint32_t)TINT32; |
|
313 |
} |
|
314 |
else if (strcmp(argv[6],"INT64")==0) { |
|
315 |
MyType=(uint32_t)TINT64; |
|
316 |
} |
|
317 |
else if (strcmp(argv[6],"FP32")==0) { |
|
318 |
MyType=(uint32_t)TFP32; |
|
319 |
} |
|
320 |
else if (strcmp(argv[6],"FP64")==0) { |
|
321 |
MyType=(uint32_t)TFP64; |
|
322 |
} |
|
323 |
} |
|
324 |
|
|
325 |
printf("MyType %d\n",MyType); |
|
326 |
|
|
327 |
cl_int err; |
|
328 |
cl_context_properties props[3] = { CL_CONTEXT_PLATFORM, 0, 0 }; |
|
329 |
|
|
330 |
// Detect, scan, get & initialize platform and device |
|
331 |
cl_uint platformCount; |
|
332 |
cl_platform_id* platforms; |
|
333 |
cl_uint deviceCount; |
|
334 |
cl_device_id* devices; |
|
335 |
size_t valueSize; |
|
336 |
|
|
337 |
/* Setup OpenCL environment. */ |
|
338 |
|
|
339 |
// Get all platforms |
|
340 |
err = clGetPlatformIDs(0, NULL, &platformCount); |
|
341 |
platforms = (cl_platform_id*) malloc(sizeof(cl_platform_id) * platformCount); |
|
342 |
err = clGetPlatformIDs(platformCount, platforms, NULL); |
|
343 |
|
|
344 |
// Get Device defined |
|
345 |
err = clGetDeviceIDs(platforms[MyPlatform], CL_DEVICE_TYPE_ALL, 0, NULL, &deviceCount); |
|
346 |
devices = (cl_device_id*) malloc(sizeof(cl_device_id) * deviceCount); |
|
347 |
err = clGetDeviceIDs(platforms[MyPlatform], CL_DEVICE_TYPE_ALL, deviceCount, devices, NULL); |
|
348 |
|
|
349 |
// print device name |
|
350 |
err = clGetDeviceInfo(devices[MyDevice], CL_DEVICE_NAME, 0, NULL, &valueSize); |
|
351 |
char* deviceName=(char*)malloc(valueSize); |
|
352 |
err = clGetDeviceInfo(devices[MyDevice], CL_DEVICE_NAME, valueSize, deviceName, NULL); |
|
353 |
err = clGetDeviceInfo(devices[MyDevice], CL_DEVICE_VENDOR, 0, NULL, &valueSize); |
|
354 |
char* vendorName=(char*)malloc(valueSize); |
|
355 |
err = clGetDeviceInfo(devices[MyDevice], CL_DEVICE_VENDOR, valueSize, vendorName, NULL); |
|
356 |
printf("\nDevice (%d,%d):\n\t- vendor: %s\n\t- device: %s\n\n",MyPlatform,MyDevice, vendorName,deviceName); |
|
357 |
free(deviceName); |
|
358 |
free(vendorName); |
|
359 |
|
|
360 |
props[1] = (cl_context_properties)platforms[MyPlatform]; |
|
361 |
|
|
362 |
cl_context GPUContext = clCreateContext(props, 1, &devices[MyDevice], NULL, NULL, &err); |
|
363 |
cl_command_queue cqCommandQueue = clCreateCommandQueue(GPUContext,devices[MyDevice], 0, &err); |
|
364 |
|
|
365 |
cl_mem GPUInside = clCreateBuffer(GPUContext, CL_MEM_WRITE_ONLY, |
|
366 |
sizeof(uint64_t) * ParallelRate, NULL, NULL); |
|
367 |
|
|
368 |
// 51 is the number of line for OpenCL code |
|
369 |
// 66, sans test |
|
370 |
cl_program OpenCLProgram = clCreateProgramWithSource(GPUContext, 130 ,OpenCLSource,NULL,NULL); |
|
371 |
clBuildProgram(OpenCLProgram, 0, NULL, NULL, NULL, NULL); |
|
372 |
cl_kernel OpenCLMainLoopGlobal = clCreateKernel(OpenCLProgram, "MainLoopGlobal", NULL); |
|
373 |
cl_kernel OpenCLMainLoopLocal = clCreateKernel(OpenCLProgram, "MainLoopLocal", NULL); |
|
374 |
cl_kernel OpenCLMainLoopHybrid = clCreateKernel(OpenCLProgram, "MainLoopHybrid", NULL); |
|
375 |
|
|
376 |
// Divide the total number of iterations by the parallel rate |
|
377 |
// Add +1 to the number of per work iterations if division not integer |
|
378 |
uint64_t IterationsEach=((Iterations%ParallelRate)==0)?Iterations/ParallelRate:Iterations/ParallelRate+1; |
|
379 |
// Initialize seeds for MWC RNG generator from Marsaglia |
|
380 |
uint32_t seed_w=110271; |
|
381 |
uint32_t seed_z=101008; |
|
382 |
|
|
383 |
// Set the values of arguments for OpenCL function call |
|
384 |
clSetKernelArg(OpenCLMainLoopGlobal, 0, sizeof(cl_mem),&GPUInside); |
|
385 |
clSetKernelArg(OpenCLMainLoopGlobal, 1, sizeof(uint64_t),&IterationsEach); |
|
386 |
clSetKernelArg(OpenCLMainLoopGlobal, 2, sizeof(uint32_t),&seed_w); |
|
387 |
clSetKernelArg(OpenCLMainLoopGlobal, 3, sizeof(uint32_t),&seed_z); |
|
388 |
clSetKernelArg(OpenCLMainLoopGlobal, 4, sizeof(uint32_t),&MyType); |
|
389 |
|
|
390 |
size_t WorkSize[1] = {ParallelRate}; // one dimensional Range |
|
391 |
|
|
392 |
uint64_t HostInside[ParallelRate]; |
|
393 |
|
|
394 |
for (uint32_t loop=0;loop<Loops;loop++) { |
|
395 |
// Set start timer |
|
396 |
gettimeofday(&tv1, &tz); |
|
397 |
|
|
398 |
// Execute the OpenCL kernel with datas |
|
399 |
clEnqueueNDRangeKernel(cqCommandQueue, OpenCLMainLoopGlobal, 1, NULL, |
|
400 |
WorkSize, NULL, 0, NULL, NULL); |
|
401 |
// Copy each result for each PR from Device to Host |
|
402 |
clEnqueueReadBuffer(cqCommandQueue, GPUInside, CL_TRUE, 0, |
|
403 |
ParallelRate * sizeof(uint64_t), HostInside, 0, NULL, NULL); |
|
404 |
uint64_t inside=0; |
|
405 |
|
|
406 |
for (int i= 0; i < ParallelRate; i++) { |
|
407 |
inside+=HostInside[i]; |
|
408 |
} |
|
409 |
|
|
410 |
// Set stop timer |
|
411 |
gettimeofday(&tv2, &tz); |
|
412 |
|
|
413 |
double elapsed=(double)((tv2.tv_sec-tv1.tv_sec) * 1000000L + |
|
414 |
(tv2.tv_usec-tv1.tv_usec))/1000000; |
|
415 |
|
|
416 |
double itops=(double)(ParallelRate*IterationsEach)/elapsed; |
|
417 |
|
|
418 |
printf("Inside/Total %ld %ld\nParallelRate %i\nElapsed Time %.2f\nItops %.0f\nPi estimation %f\n\n",inside,ParallelRate*IterationsEach,ParallelRate,elapsed,itops,(4.*(float)inside/((float)(ParallelRate)*(float)(IterationsEach)))); |
|
419 |
} |
|
420 |
printf("\n\n"); |
|
421 |
|
|
422 |
clReleaseKernel(OpenCLMainLoopGlobal); |
|
423 |
clReleaseProgram(OpenCLProgram); |
|
424 |
clReleaseCommandQueue(cqCommandQueue); |
|
425 |
clReleaseContext(GPUContext); |
|
426 |
clReleaseMemObject(GPUInside); |
|
427 |
|
|
428 |
|
|
429 |
return 0; |
|
430 |
} |
|
431 |
} |
|
432 |
|
Formats disponibles : Unified diff