Statistiques
| Révision :

root / Pi / OpenCL / PiOpenCL.c @ 156

Historique | Voir | Annoter | Télécharger (14,84 ko)

1 152 equemene
// Pi Dart Dash in OpenCL in C, illustrative example
2 152 equemene
//
3 152 equemene
// Emmanuel Quemener <emmanuel.quemener@ens-lyon.fr
4 152 equemene
// Copyleft Cecill v2
5 152 equemene
//
6 152 equemene
// -h : print the documentation and detect devices as (platform,device)
7 152 equemene
//
8 152 equemene
// classical use:
9 152 equemene
// #1 OpenCL Plateform ID: get this information with -h option
10 152 equemene
// #2 OpenCL Device ID: get this information with -h option
11 152 equemene
// #3 Minimal number of iterations:
12 152 equemene
// #4 Parallel Rate: scattering global work in parts executed //ly
13 152 equemene
// #5 Loops (to improve statistics)
14 152 equemene
//
15 152 equemene
16 152 equemene
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
17 152 equemene
#include <stdio.h>
18 152 equemene
#include <stdlib.h>
19 152 equemene
#include <string.h>
20 152 equemene
#include <CL/cl.h>
21 152 equemene
#include <stdint.h>
22 152 equemene
#include <sys/time.h>
23 152 equemene
24 156 equemene
#define TINT32 0
25 156 equemene
#define TINT64 1
26 156 equemene
#define TFP32 2
27 156 equemene
#define TFP64 3
28 156 equemene
29 152 equemene
int DetectOpenCLDevices(void)
30 152 equemene
{
31 152 equemene
  int i, j;
32 152 equemene
  char* value;
33 152 equemene
  size_t valueSize;
34 152 equemene
  cl_uint platformCount;
35 152 equemene
  cl_platform_id* platforms;
36 152 equemene
  cl_uint deviceCount;
37 152 equemene
  cl_device_id* devices;
38 152 equemene
  cl_uint maxComputeUnits;
39 152 equemene
  cl_int maxWorkGroupSize;
40 152 equemene
  cl_int maxWorkItemSizes;
41 152 equemene
  cl_device_type dev_type;
42 152 equemene
43 152 equemene
  // get all platforms
44 152 equemene
  clGetPlatformIDs(0, NULL, &platformCount);
45 152 equemene
  platforms = (cl_platform_id*) malloc(sizeof(cl_platform_id) * platformCount);
46 152 equemene
  clGetPlatformIDs(platformCount, platforms, NULL);
47 152 equemene
48 152 equemene
  printf("OpenCL statistics: %d platform(s) detected\n\n",platformCount);
49 152 equemene
50 152 equemene
  for (i = 0; i < platformCount; i++) {
51 152 equemene
52 152 equemene
    // get all devices
53 152 equemene
    clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, 0, NULL, &deviceCount);
54 152 equemene
    devices = (cl_device_id*) malloc(sizeof(cl_device_id) * deviceCount);
55 152 equemene
    clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, deviceCount, devices, NULL);
56 152 equemene
57 152 equemene
    // for each device print critical attributes
58 152 equemene
    for (j = 0; j < deviceCount; j++) {
59 152 equemene
60 152 equemene
      // print device name
61 152 equemene
      clGetDeviceInfo(devices[j], CL_DEVICE_NAME, 0, NULL, &valueSize);
62 152 equemene
      value = (char*) malloc(valueSize);
63 152 equemene
      clGetDeviceInfo(devices[j], CL_DEVICE_NAME, valueSize, value, NULL);
64 152 equemene
      printf("Device (%d,%d): %s\n",i, j, value);
65 152 equemene
      free(value);
66 152 equemene
67 152 equemene
      // print type device CPU/GPU/ACCELERATOR
68 152 equemene
      clGetDeviceInfo(devices[j], CL_DEVICE_TYPE, sizeof(dev_type), &dev_type, NULL);
69 152 equemene
      printf("\tDevice Type: ");
70 152 equemene
      if(dev_type & CL_DEVICE_TYPE_GPU)
71 152 equemene
        printf("CL_DEVICE_TYPE_GPU ");
72 152 equemene
      if(dev_type & CL_DEVICE_TYPE_CPU)
73 152 equemene
        printf("CL_DEVICE_TYPE_CPU ");
74 152 equemene
      if(dev_type & CL_DEVICE_TYPE_ACCELERATOR)
75 152 equemene
        printf("CL_DEVICE_TYPE_ACCELERATOR ");
76 152 equemene
      if(dev_type & CL_DEVICE_TYPE_DEFAULT)
77 152 equemene
        printf("CL_DEVICE_TYPE_DEFAULT ");
78 152 equemene
      printf("\n");
79 152 equemene
80 152 equemene
      // print device vendor
81 152 equemene
      clGetDeviceInfo(devices[j], CL_DEVICE_VENDOR, 0, NULL, &valueSize);
82 152 equemene
      value = (char*) malloc(valueSize);
83 152 equemene
      clGetDeviceInfo(devices[j], CL_DEVICE_VENDOR, valueSize, value, NULL);
84 152 equemene
      printf("\tDevice vendor: %s\n", value);
85 152 equemene
      free(value);
86 152 equemene
87 152 equemene
      // print hardware device version
88 152 equemene
      clGetDeviceInfo(devices[j], CL_DEVICE_VERSION, 0, NULL, &valueSize);
89 152 equemene
      value = (char*) malloc(valueSize);
90 152 equemene
      clGetDeviceInfo(devices[j], CL_DEVICE_VERSION, valueSize, value, NULL);
91 152 equemene
      printf("\tHardware version: %s\n", value);
92 152 equemene
      free(value);
93 152 equemene
94 152 equemene
      // print software driver version
95 152 equemene
      clGetDeviceInfo(devices[j], CL_DRIVER_VERSION, 0, NULL, &valueSize);
96 152 equemene
      value = (char*) malloc(valueSize);
97 152 equemene
      clGetDeviceInfo(devices[j], CL_DRIVER_VERSION, valueSize, value, NULL);
98 152 equemene
      printf("\tSoftware version: %s\n", value);
99 152 equemene
      free(value);
100 152 equemene
101 152 equemene
      // print c version supported by compiler for device
102 152 equemene
      clGetDeviceInfo(devices[j], CL_DEVICE_OPENCL_C_VERSION, 0, NULL, &valueSize);
103 152 equemene
      value = (char*) malloc(valueSize);
104 152 equemene
      clGetDeviceInfo(devices[j], CL_DEVICE_OPENCL_C_VERSION, valueSize, value, NULL);
105 152 equemene
      printf("\tOpenCL C version: %s\n", value);
106 152 equemene
      free(value);
107 152 equemene
108 152 equemene
      // print parallel compute units
109 152 equemene
      clGetDeviceInfo(devices[j], CL_DEVICE_MAX_COMPUTE_UNITS,
110 152 equemene
                      sizeof(maxComputeUnits), &maxComputeUnits, NULL);
111 152 equemene
      printf("\tParallel compute units: %d\n", maxComputeUnits);
112 152 equemene
113 152 equemene
      // print max work group size
114 152 equemene
      clGetDeviceInfo(devices[j], CL_DEVICE_MAX_WORK_GROUP_SIZE,
115 152 equemene
                      sizeof(maxWorkGroupSize), &maxWorkGroupSize, NULL);
116 152 equemene
      printf("\tMaximum Work Group Size: %d\n", maxWorkGroupSize);
117 152 equemene
118 152 equemene
      // print max work items size
119 152 equemene
      clGetDeviceInfo(devices[j], CL_DEVICE_MAX_WORK_ITEM_SIZES,
120 152 equemene
                      sizeof(maxWorkItemSizes), &maxWorkItemSizes, NULL);
121 152 equemene
      printf("\tMaximum Work Item Sizes: %d\n", maxWorkItemSizes);
122 152 equemene
123 152 equemene
    }
124 152 equemene
    printf("\n");
125 152 equemene
    free(devices);
126 152 equemene
  }
127 152 equemene
128 152 equemene
  free(platforms);
129 152 equemene
  return 0;
130 152 equemene
131 152 equemene
}
132 152 equemene
133 152 equemene
const char* OpenCLSource[] = {
134 156 equemene
  "#pragma OPENCL EXTENSION cl_khr_fp64: enable \n",
135 152 equemene
  "// Marsaglia RNG very simple implementation \n",
136 152 equemene
  "#define znew  ((z=36969*(z&65535)+(z>>16))<<16) \n",
137 152 equemene
  "#define wnew  ((w=18000*(w&65535)+(w>>16))&65535) \n",
138 152 equemene
  "#define MWC   (znew+wnew) \n",
139 152 equemene
  "#define SHR3  (jsr=(jsr=(jsr=jsr^(jsr<<17))^(jsr>>13))^(jsr<<5)) \n",
140 152 equemene
  "#define CONG  (jcong=69069*jcong+1234567) \n",
141 152 equemene
  "#define KISS  ((MWC^CONG)+SHR3) \n",
142 152 equemene
  "#define MWCfp MWC * 2.328306435454494e-10f \n",
143 152 equemene
  "#define KISSfp KISS * 2.328306435454494e-10f \n",
144 152 equemene
  "#define CONGfp CONG * 2.328306435454494e-10f \n",
145 152 equemene
  "#define SHR3fp SHR3 * 2.328306435454494e-10f \n",
146 156 equemene
  "#define TINT32 0 \n",
147 156 equemene
  "#define TINT64 1 \n",
148 156 equemene
  "#define TFP32 2 \n",
149 156 equemene
  "#define TFP64 3 \n",
150 156 equemene
  "#define THEONE32I 1073741824 \n",
151 156 equemene
  "#define THEONE32F 1.e0f \n",
152 156 equemene
  "#define THEONE64I 4611686018427387904 \n",
153 156 equemene
  "#define THEONE64F (double)1.e0f \n",
154 156 equemene
  "ulong MainLoop32I(ulong iterations,uint seed_z,uint seed_w,size_t work)",
155 152 equemene
  "{",
156 152 equemene
  "   uint z=seed_z+work;",
157 152 equemene
  "   uint w=seed_w+work;",
158 152 equemene
  "   ulong total=0;",
159 152 equemene
  "   for (ulong i=0;i<iterations;i++)",
160 156 equemene
  "   {",
161 156 equemene
  "      uint x= MWC>>17;",
162 156 equemene
  "      uint y= MWC>>17;",
163 156 equemene
  "      ulong inside=((x*x+y*y) <= THEONE32I) ? 1:0;",
164 156 equemene
  "      total+=inside;",
165 156 equemene
  "   }",
166 156 equemene
  "   return(total);",
167 156 equemene
  "}",
168 156 equemene
  "ulong MainLoop32F(ulong iterations,uint seed_z,uint seed_w,size_t work)",
169 152 equemene
  "{",
170 156 equemene
  "   uint z=seed_z+work;",
171 156 equemene
  "   uint w=seed_w+work;",
172 156 equemene
  "   ulong total=0;",
173 156 equemene
  "   for (ulong i=0;i<iterations;i++)",
174 156 equemene
  "   {",
175 156 equemene
  "      float x=(float)MWCfp ;",
176 156 equemene
  "      float y=(float)MWCfp ;",
177 156 equemene
  "      ulong inside=((x*x+y*y) <= THEONE32F) ? 1:0;",
178 152 equemene
  "      total+=inside;",
179 156 equemene
  "   }",
180 156 equemene
  "   return(total);",
181 152 equemene
  "}",
182 156 equemene
  "ulong MainLoop64I(ulong iterations,uint seed_z,uint seed_w,size_t work)",
183 156 equemene
  "{",
184 156 equemene
  "   uint z=seed_z+work;",
185 156 equemene
  "   uint w=seed_w+work;",
186 156 equemene
  "   ulong total=0;",
187 156 equemene
  "   for (ulong i=0;i<iterations;i++)",
188 156 equemene
  "   {",
189 156 equemene
  "      ulong x=(ulong)(MWC>>1);",
190 156 equemene
  "      ulong y=(ulong)(MWC>>1);",
191 156 equemene
  "      ulong inside=((x*x+y*y) <= THEONE64I) ? 1:0;",
192 156 equemene
  "      total+=inside;",
193 156 equemene
  "   }",
194 152 equemene
  "   return(total);",
195 152 equemene
  "}",
196 156 equemene
  "ulong MainLoop64F(ulong iterations,uint seed_z,uint seed_w,size_t work)",
197 152 equemene
  "{",
198 156 equemene
  "   uint z=seed_z+work;",
199 156 equemene
  "   uint w=seed_w+work;",
200 156 equemene
  "   ulong total=0;",
201 156 equemene
  "   for (ulong i=0;i<iterations;i++)",
202 156 equemene
  "{",
203 156 equemene
  "        float x=(double)MWCfp ;",
204 156 equemene
  "        float y=(double)MWCfp ;",
205 156 equemene
  "      ulong inside=((x*x+y*y) <= THEONE64F) ? 1:0;",
206 156 equemene
  "      total+=inside;",
207 156 equemene
  "}",
208 156 equemene
  "   return(total);",
209 156 equemene
  "}",
210 156 equemene
  "__kernel void MainLoopGlobal(__global ulong *s,ulong iterations,uint seed_w,uint seed_z,uint MyType)",
211 156 equemene
  "{",
212 156 equemene
  "   ulong total;",
213 156 equemene
  "   if (MyType==TFP32) {",
214 156 equemene
  "      total=(ulong)MainLoop32F(iterations,seed_z,seed_w,get_global_id(0));",
215 156 equemene
  "   }",
216 156 equemene
  "   else if (MyType==TFP64) {",
217 156 equemene
  "      total=(ulong)MainLoop64F(iterations,seed_z,seed_w,get_global_id(0));",
218 156 equemene
  "   }",
219 156 equemene
  "   else if (MyType==TINT32) {",
220 156 equemene
  "      total=(ulong)MainLoop32I(iterations,seed_z,seed_w,get_global_id(0));",
221 156 equemene
  "   }",
222 156 equemene
  "   else if (MyType==TINT64) {",
223 156 equemene
  "      total=(ulong)MainLoop64I(iterations,seed_z,seed_w,get_global_id(0));",
224 156 equemene
  "   }",
225 152 equemene
  "   barrier(CLK_GLOBAL_MEM_FENCE);",
226 152 equemene
  "   s[get_global_id(0)]=(ulong)total;",
227 152 equemene
  "}",
228 156 equemene
  "__kernel void MainLoopLocal(__global ulong *s,ulong iterations,uint seed_w,uint seed_z,uint MyType)",
229 152 equemene
  "{",
230 156 equemene
  "   ulong total;",
231 156 equemene
  "   if (MyType==TFP32) {",
232 156 equemene
  "      total=(ulong)MainLoop32F(iterations,seed_z,seed_w,get_local_id(0));",
233 156 equemene
  "   }",
234 156 equemene
  "   else if (MyType==TFP64) {",
235 156 equemene
  "      total=(ulong)MainLoop64F(iterations,seed_z,seed_w,get_local_id(0));",
236 156 equemene
  "   }",
237 156 equemene
  "   else if (MyType==TINT32) {",
238 156 equemene
  "      total=(ulong)MainLoop32I(iterations,seed_z,seed_w,get_local_id(0));",
239 156 equemene
  "   }",
240 156 equemene
  "   else if (MyType==TINT64) {",
241 156 equemene
  "      total=(ulong)MainLoop64I(iterations,seed_z,seed_w,get_local_id(0));",
242 156 equemene
  "   }",
243 152 equemene
  "   barrier(CLK_LOCAL_MEM_FENCE);",
244 156 equemene
  "   s[get_local_id(0)]=(ulong)total;",
245 152 equemene
  "}",
246 156 equemene
  "__kernel void MainLoopHybrid(__global ulong *s,ulong iterations,uint seed_w,uint seed_z,uint MyType)",
247 152 equemene
  "{",
248 156 equemene
  "   ulong total;",
249 156 equemene
  "   if (MyType==TFP32) {",
250 156 equemene
  "      total=(ulong)MainLoop32F(iterations,seed_z,seed_w,get_global_id(0));",
251 156 equemene
  "   }",
252 156 equemene
  "   else if (MyType==TFP64) {",
253 156 equemene
  "      total=(ulong)MainLoop64F(iterations,seed_z,seed_w,get_global_id(0));",
254 156 equemene
  "   }",
255 156 equemene
  "   else if (MyType==TINT32) {",
256 156 equemene
  "      total=(ulong)MainLoop32I(iterations,seed_z,seed_w,get_global_id(0));",
257 156 equemene
  "   }",
258 156 equemene
  "   else if (MyType==TINT64) {",
259 156 equemene
  "      total=(ulong)MainLoop64I(iterations,seed_z,seed_w,get_global_id(0));",
260 156 equemene
  "   }",
261 152 equemene
  "   barrier(CLK_GLOBAL_MEM_FENCE || CLK_LOCAL_MEM_FENCE);",
262 152 equemene
  "   s[get_global_id(0)]=total;",
263 152 equemene
  "}"
264 152 equemene
};
265 152 equemene
266 152 equemene
int main(int argc, char **argv)
267 152 equemene
{
268 152 equemene
  if ((argc==1)||
269 152 equemene
      (strcmp(argv[1],"-h")==0)||
270 152 equemene
      (strcmp(argv[1],"--help")==0))
271 152 equemene
    {
272 152 equemene
      printf("\nPerforms a Pi estimation by Dart Dash:\n\n"
273 156 equemene
             "\t#1 OpenCL Plateform ID (default 0)\n"
274 156 equemene
             "\t#2 OpenCL Device ID (default 0)\n"
275 156 equemene
             "\t#3 Minimal number of iterations (default 1000000)\n"
276 156 equemene
             "\t#4 Parallel Rate (default 1024)\n"
277 156 equemene
             "\t#5 Loops (default 1)\n"
278 156 equemene
             "\t#6 Type of variable: INT32, INT64, FP32, FP64 (default FP32)\n\n");
279 152 equemene
      DetectOpenCLDevices();
280 152 equemene
    }
281 152 equemene
  else
282 152 equemene
    {
283 152 equemene
284 152 equemene
      int MyPlatform=atoi(argv[1]);
285 152 equemene
      int MyDevice=atoi(argv[2]);
286 152 equemene
287 152 equemene
      struct timeval tv1,tv2;
288 152 equemene
      struct timezone tz;
289 152 equemene
290 152 equemene
      uint64_t Iterations=1000000;
291 152 equemene
      if (argc>3) {
292 152 equemene
        Iterations=(uint64_t)atoll(argv[3]);
293 152 equemene
      }
294 152 equemene
295 152 equemene
      uint32_t ParallelRate=64;
296 152 equemene
      if (argc>4) {
297 152 equemene
        ParallelRate=(uint32_t)atoi(argv[4]);
298 152 equemene
      }
299 152 equemene
300 152 equemene
      uint32_t Loops=1;
301 152 equemene
      if (argc>5) {
302 152 equemene
        Loops=(uint32_t)atoi(argv[5]);
303 152 equemene
      }
304 152 equemene
305 156 equemene
      uint32_t MyType=TFP32;
306 156 equemene
      if (argc>6) {
307 156 equemene
        if (strcmp(argv[6],"INT32")==0) {
308 156 equemene
          MyType=(uint32_t)TINT32;
309 156 equemene
        }
310 156 equemene
        else if (strcmp(argv[6],"INT64")==0) {
311 156 equemene
          MyType=(uint32_t)TINT64;
312 156 equemene
        }
313 156 equemene
        else if (strcmp(argv[6],"FP32")==0) {
314 156 equemene
          MyType=(uint32_t)TFP32;
315 156 equemene
        }
316 156 equemene
        else if (strcmp(argv[6],"FP64")==0) {
317 156 equemene
          MyType=(uint32_t)TFP64;
318 156 equemene
        }
319 156 equemene
      }
320 156 equemene
321 156 equemene
      printf("MyType %d\n",MyType);
322 156 equemene
323 152 equemene
      cl_int err;
324 152 equemene
      cl_context_properties props[3] = { CL_CONTEXT_PLATFORM, 0, 0 };
325 152 equemene
326 152 equemene
      // Detect, scan, get & initialize platform and device
327 152 equemene
      cl_uint platformCount;
328 152 equemene
      cl_platform_id* platforms;
329 152 equemene
      cl_uint deviceCount;
330 152 equemene
      cl_device_id* devices;
331 152 equemene
      size_t valueSize;
332 152 equemene
333 152 equemene
      /* Setup OpenCL environment. */
334 152 equemene
335 152 equemene
      // Get all platforms
336 152 equemene
      err = clGetPlatformIDs(0, NULL, &platformCount);
337 152 equemene
      platforms = (cl_platform_id*) malloc(sizeof(cl_platform_id) * platformCount);
338 152 equemene
      err = clGetPlatformIDs(platformCount, platforms, NULL);
339 152 equemene
340 152 equemene
      // Get Device defined
341 152 equemene
      err = clGetDeviceIDs(platforms[MyPlatform], CL_DEVICE_TYPE_ALL, 0, NULL, &deviceCount);
342 152 equemene
      devices = (cl_device_id*) malloc(sizeof(cl_device_id) * deviceCount);
343 152 equemene
      err = clGetDeviceIDs(platforms[MyPlatform], CL_DEVICE_TYPE_ALL, deviceCount, devices, NULL);
344 152 equemene
345 152 equemene
      // print device name
346 152 equemene
      err = clGetDeviceInfo(devices[MyDevice], CL_DEVICE_NAME, 0, NULL, &valueSize);
347 152 equemene
      char* deviceName=(char*)malloc(valueSize);
348 152 equemene
      err = clGetDeviceInfo(devices[MyDevice], CL_DEVICE_NAME, valueSize, deviceName, NULL);
349 152 equemene
      err = clGetDeviceInfo(devices[MyDevice], CL_DEVICE_VENDOR, 0, NULL, &valueSize);
350 152 equemene
      char* vendorName=(char*)malloc(valueSize);
351 152 equemene
      err = clGetDeviceInfo(devices[MyDevice], CL_DEVICE_VENDOR, valueSize, vendorName, NULL);
352 152 equemene
      printf("\nDevice (%d,%d):\n\t- vendor: %s\n\t- device: %s\n\n",MyPlatform,MyDevice, vendorName,deviceName);
353 152 equemene
      free(deviceName);
354 152 equemene
      free(vendorName);
355 152 equemene
356 152 equemene
      props[1] = (cl_context_properties)platforms[MyPlatform];
357 152 equemene
358 152 equemene
      cl_context GPUContext = clCreateContext(props, 1, &devices[MyDevice], NULL, NULL, &err);
359 152 equemene
      cl_command_queue cqCommandQueue = clCreateCommandQueue(GPUContext,devices[MyDevice], 0, &err);
360 152 equemene
361 152 equemene
      cl_mem GPUInside = clCreateBuffer(GPUContext, CL_MEM_WRITE_ONLY,
362 152 equemene
                                              sizeof(uint64_t) * ParallelRate, NULL, NULL);
363 152 equemene
364 152 equemene
      // 51 is the number of line for OpenCL code
365 156 equemene
      // 66, sans test
366 156 equemene
      cl_program OpenCLProgram = clCreateProgramWithSource(GPUContext, 130 ,OpenCLSource,NULL,NULL);
367 152 equemene
      clBuildProgram(OpenCLProgram, 0, NULL, NULL, NULL, NULL);
368 152 equemene
      cl_kernel OpenCLMainLoopGlobal = clCreateKernel(OpenCLProgram, "MainLoopGlobal", NULL);
369 152 equemene
      cl_kernel OpenCLMainLoopLocal = clCreateKernel(OpenCLProgram, "MainLoopLocal", NULL);
370 152 equemene
      cl_kernel OpenCLMainLoopHybrid = clCreateKernel(OpenCLProgram, "MainLoopHybrid", NULL);
371 152 equemene
372 152 equemene
      // Divide the total number of iterations by the parallel rate
373 152 equemene
      // Add +1 to the number of per work iterations if division not integer
374 152 equemene
      uint64_t IterationsEach=((Iterations%ParallelRate)==0)?Iterations/ParallelRate:Iterations/ParallelRate+1;
375 152 equemene
      // Initialize seeds for MWC RNG generator from Marsaglia
376 152 equemene
      uint32_t seed_w=110271;
377 152 equemene
      uint32_t seed_z=101008;
378 152 equemene
379 152 equemene
      // Set the values of arguments for OpenCL function call
380 152 equemene
      clSetKernelArg(OpenCLMainLoopGlobal, 0, sizeof(cl_mem),&GPUInside);
381 152 equemene
      clSetKernelArg(OpenCLMainLoopGlobal, 1, sizeof(uint64_t),&IterationsEach);
382 152 equemene
      clSetKernelArg(OpenCLMainLoopGlobal, 2, sizeof(uint32_t),&seed_w);
383 152 equemene
      clSetKernelArg(OpenCLMainLoopGlobal, 3, sizeof(uint32_t),&seed_z);
384 156 equemene
      clSetKernelArg(OpenCLMainLoopGlobal, 4, sizeof(uint32_t),&MyType);
385 152 equemene
386 152 equemene
      size_t WorkSize[1] = {ParallelRate}; // one dimensional Range
387 152 equemene
388 152 equemene
      uint64_t HostInside[ParallelRate];
389 152 equemene
390 152 equemene
      for (uint32_t loop=0;loop<Loops;loop++) {
391 153 equemene
        // Set start timer
392 153 equemene
        gettimeofday(&tv1, &tz);
393 153 equemene
394 153 equemene
            // Execute the OpenCL kernel with datas
395 152 equemene
        clEnqueueNDRangeKernel(cqCommandQueue, OpenCLMainLoopGlobal, 1, NULL,
396 152 equemene
                               WorkSize, NULL, 0, NULL, NULL);
397 153 equemene
        // Copy each result for each PR from Device to Host
398 152 equemene
        clEnqueueReadBuffer(cqCommandQueue, GPUInside, CL_TRUE, 0,
399 152 equemene
                            ParallelRate * sizeof(uint64_t), HostInside, 0, NULL, NULL);
400 153 equemene
        uint64_t inside=0;
401 153 equemene
402 153 equemene
        for (int i= 0; i < ParallelRate; i++) {
403 153 equemene
          inside+=HostInside[i];
404 153 equemene
        }
405 153 equemene
406 153 equemene
        // Set stop timer
407 153 equemene
        gettimeofday(&tv2, &tz);
408 153 equemene
409 153 equemene
        double elapsed=(double)((tv2.tv_sec-tv1.tv_sec) * 1000000L +
410 153 equemene
                                (tv2.tv_usec-tv1.tv_usec))/1000000;
411 153 equemene
412 153 equemene
        double itops=(double)(ParallelRate*IterationsEach)/elapsed;
413 153 equemene
414 153 equemene
        printf("Inside/Total %ld %ld\nParallelRate %i\nElapsed Time %.2f\nItops %.0f\nPi estimation %f\n\n",inside,ParallelRate*IterationsEach,ParallelRate,elapsed,itops,(4.*(float)inside/((float)(ParallelRate)*(float)(IterationsEach))));
415 152 equemene
      }
416 152 equemene
      printf("\n\n");
417 152 equemene
418 152 equemene
      clReleaseKernel(OpenCLMainLoopGlobal);
419 152 equemene
      clReleaseProgram(OpenCLProgram);
420 152 equemene
      clReleaseCommandQueue(cqCommandQueue);
421 152 equemene
      clReleaseContext(GPUContext);
422 152 equemene
      clReleaseMemObject(GPUInside);
423 152 equemene
424 152 equemene
425 152 equemene
      return 0;
426 152 equemene
    }
427 152 equemene
}