Révision 255

Pi/C/OpenCL/Makefile (revision 255)
1
SOURCE=PiOpenCL.c PiOpenCL_CONG.c PiOpenCL_SHR3.c PiOpenCL_MWC.c PiOpenCL_KISS.c
2

  
3
CC=gcc
4
CFLAGS=-Wall -Wno-unused-variable -Wno-sequence-point -O3 -std=c99
5
LIBRARY=-lm -lOpenCL
6

  
7
all: $(SOURCE)
8

  
9
	$(foreach SRC,$(SOURCE),$(CC) $(CFLAGS) -o $(SRC:.c=) $(SRC) $(LIBRARY); )
10

  
11
.PHONY: clean check mrproper
12

  
13
mrproper: 
14
	rm -rf $(foreach SRC,$(SOURCE),$(SRC:.c=) )
15
	find . -name "*~" -exec rm {} \;
16

  
17
clean:
18
	find . -name "*~" -exec rm {} \;
Pi/C/OpenCL/PiOpenCL_CONG.c (revision 255)
1
// Pi Dart Dash in OpenCL in C, illustrative example
2
//
3
// Emmanuel Quemener <emmanuel.quemener@gmail.com>
4
//
5
// CC BY-NC-SA 2011 : Emmanuel QUEMENER <emmanuel.quemener@gmail.com> 
6
// Copyleft Cecill v2
7
//
8
// -h : print the documentation and detect devices as (platform,device)
9
//
10
// classical use:
11
// #1 OpenCL Plateform ID: get this information with -h option
12
// #2 OpenCL Device ID: get this information with -h option
13
// #3 Minimal number of iterations: 
14
// #4 Parallel Rate: scattering global work in parts executed //ly
15
// #5 Loops (to improve statistics)
16
// #6 Type of variables INT32, INT64, FP32, FP64
17
// 
18
// To compile : gcc -o PiOpenCL_CONG PiOpenCL_CONG.c -lOpenCL -lm
19

  
20
#define CL_TARGET_OPENCL_VERSION 220
21
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
22
#include <stdio.h>
23
#include <stdlib.h>
24
#include <string.h>
25
#include <CL/cl.h>
26
#include <stdint.h>
27
#include <math.h>
28
#include <sys/time.h>
29

  
30
#define TINT32 0
31
#define TINT64 1
32
#define TFP32 2
33
#define TFP64 3
34

  
35
int DetectOpenCLDevices(void) 
36
{
37
  int i, j;
38
  char* value;
39
  size_t valueSize;
40
  cl_uint platformCount;
41
  cl_platform_id* platforms;
42
  cl_uint deviceCount;
43
  cl_device_id* devices;
44
  cl_uint maxComputeUnits;
45
  cl_int maxWorkGroupSize;
46
  cl_int maxWorkItemSizes;
47
  cl_device_type dev_type;
48

  
49
  // get all platforms
50
  clGetPlatformIDs(0, NULL, &platformCount);
51
  platforms = (cl_platform_id*) malloc(sizeof(cl_platform_id) * platformCount);
52
  clGetPlatformIDs(platformCount, platforms, NULL);
53

  
54
  printf("OpenCL statistics: %d platform(s) detected\n\n",platformCount);
55

  
56
  for (i = 0; i < platformCount; i++) {
57

  
58
    // get all devices
59
    clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, 0, NULL, &deviceCount);
60
    devices = (cl_device_id*) malloc(sizeof(cl_device_id) * deviceCount);
61
    clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, deviceCount, devices, NULL);
62

  
63
    // for each device print critical attributes
64
    for (j = 0; j < deviceCount; j++) {
65
      
66
      // print device name
67
      clGetDeviceInfo(devices[j], CL_DEVICE_NAME, 0, NULL, &valueSize);
68
      value = (char*) malloc(valueSize);
69
      clGetDeviceInfo(devices[j], CL_DEVICE_NAME, valueSize, value, NULL);
70
      printf("Device (%d,%d): %s\n",i, j, value);
71
      free(value);
72

  
73
      // print type device CPU/GPU/ACCELERATOR
74
      clGetDeviceInfo(devices[j], CL_DEVICE_TYPE, sizeof(dev_type), &dev_type, NULL);
75
      printf("\tDevice Type: ");
76
      if(dev_type & CL_DEVICE_TYPE_GPU)
77
	printf("CL_DEVICE_TYPE_GPU ");
78
      if(dev_type & CL_DEVICE_TYPE_CPU)
79
	printf("CL_DEVICE_TYPE_CPU ");
80
      if(dev_type & CL_DEVICE_TYPE_ACCELERATOR)
81
	printf("CL_DEVICE_TYPE_ACCELERATOR ");
82
      if(dev_type & CL_DEVICE_TYPE_DEFAULT)
83
	printf("CL_DEVICE_TYPE_DEFAULT ");
84
      printf("\n");
85

  
86
      // print device vendor
87
      clGetDeviceInfo(devices[j], CL_DEVICE_VENDOR, 0, NULL, &valueSize);
88
      value = (char*) malloc(valueSize);
89
      clGetDeviceInfo(devices[j], CL_DEVICE_VENDOR, valueSize, value, NULL);
90
      printf("\tDevice vendor: %s\n", value);
91
      free(value);
92

  
93
      // print hardware device version
94
      clGetDeviceInfo(devices[j], CL_DEVICE_VERSION, 0, NULL, &valueSize);
95
      value = (char*) malloc(valueSize);
96
      clGetDeviceInfo(devices[j], CL_DEVICE_VERSION, valueSize, value, NULL);
97
      printf("\tHardware version: %s\n", value);
98
      free(value);
99

  
100
      // print software driver version
101
      clGetDeviceInfo(devices[j], CL_DRIVER_VERSION, 0, NULL, &valueSize);
102
      value = (char*) malloc(valueSize);
103
      clGetDeviceInfo(devices[j], CL_DRIVER_VERSION, valueSize, value, NULL);
104
      printf("\tSoftware version: %s\n", value);
105
      free(value);
106
      
107
      // print c version supported by compiler for device
108
      clGetDeviceInfo(devices[j], CL_DEVICE_OPENCL_C_VERSION, 0, NULL, &valueSize);
109
      value = (char*) malloc(valueSize);
110
      clGetDeviceInfo(devices[j], CL_DEVICE_OPENCL_C_VERSION, valueSize, value, NULL);
111
      printf("\tOpenCL C version: %s\n", value);
112
      free(value);
113

  
114
      // print parallel compute units
115
      clGetDeviceInfo(devices[j], CL_DEVICE_MAX_COMPUTE_UNITS,
116
		      sizeof(maxComputeUnits), &maxComputeUnits, NULL);
117
      printf("\tParallel compute units: %d\n", maxComputeUnits);
118
      
119
      // print max work group size
120
      clGetDeviceInfo(devices[j], CL_DEVICE_MAX_WORK_GROUP_SIZE,
121
		      sizeof(maxWorkGroupSize), &maxWorkGroupSize, NULL);
122
      printf("\tMaximum Work Group Size: %d\n", maxWorkGroupSize);
123
      
124
      // print max work items size
125
      clGetDeviceInfo(devices[j], CL_DEVICE_MAX_WORK_ITEM_SIZES,
126
		      sizeof(maxWorkItemSizes), &maxWorkItemSizes, NULL);
127
      printf("\tMaximum Work Item Sizes: %d\n", maxWorkItemSizes);
128
      
129
    }
130
    printf("\n");
131
    free(devices);
132
  }
133

  
134
  free(platforms);
135
  return 0;
136

  
137
}
138

  
139
const char* OpenCLSource[] = {
140
  "#pragma OPENCL EXTENSION cl_khr_fp64: enable \n",
141
  "// Marsaglia RNG very simple implementation \n",
142
  "#define znew  ((z=36969*(z&65535)+(z>>16))<<16) \n",
143
  "#define wnew  ((w=18000*(w&65535)+(w>>16))&65535) \n",
144
  "#define MWC   (znew+wnew) \n",
145
  "#define SHR3  (jsr=(jsr=(jsr=jsr^(jsr<<17))^(jsr>>13))^(jsr<<5)) \n",
146
  "#define CONG  (jcong=69069*jcong+1234567) \n",
147
  "#define KISS  ((MWC^CONG)+SHR3) \n",
148
  "#define MWCfp MWC * 2.328306435454494e-10f \n",
149
  "#define KISSfp KISS * 2.328306435454494e-10f \n",
150
  "#define CONGfp CONG * 2.328306435454494e-10f \n",
151
  "#define SHR3fp SHR3 * 2.328306435454494e-10f \n",
152
  "#define TINT32 0 \n",
153
  "#define TINT64 1 \n",
154
  "#define TFP32 2 \n",
155
  "#define TFP64 3 \n",
156
  "#define THEONE32I 1073741824 \n",
157
  "#define THEONE32F 1.e0f \n",
158
  "#define THEONE64I 4611686018427387904 \n",
159
  "#define THEONE64F (double)1.e0f \n",
160
  "ulong MainLoop32I(ulong iterations,uint seed_z,uint seed_w,size_t work)",
161
  "{",
162
  "   uint jcong=seed_z+work;",
163
  "   ulong total=0;",
164
  "   for (ulong i=0;i<iterations;i++)",
165
  "   {",
166
  "      uint x= CONG>>17;",
167
  "      uint y= CONG>>17;",
168
  "      ulong inside=((x*x+y*y) <= THEONE32I) ? 1:0;",
169
  "      total+=inside;",
170
  "   }",
171
  "   return(total);",
172
  "}",
173
  "ulong MainLoop32F(ulong iterations,uint seed_z,uint seed_w,size_t work)",
174
  "{",
175
  "   uint jcong=seed_z+work;",
176
  "   ulong total=0;",
177
  "   for (ulong i=0;i<iterations;i++)",
178
  "   {",
179
  "      float x=(float)CONGfp ;",
180
  "      float y=(float)CONGfp ;",
181
  "      ulong inside=((x*x+y*y) <= THEONE32F) ? 1:0;",
182
  "      total+=inside;",
183
  "   }",
184
  "   return(total);",
185
  "}",
186
  "ulong MainLoop64I(ulong iterations,uint seed_z,uint seed_w,size_t work)",
187
  "{",
188
  "   uint jcong=seed_z+work;",
189
  "   ulong total=0;",
190
  "   for (ulong i=0;i<iterations;i++)",
191
  "   {",
192
  "      ulong x=(ulong)(CONG>>1);",
193
  "      ulong y=(ulong)(CONG>>1);",
194
  "      ulong inside=((x*x+y*y) <= THEONE64I) ? 1:0;",
195
  "      total+=inside;",
196
  "   }",
197
  "   return(total);",
198
  "}",
199
  "ulong MainLoop64F(ulong iterations,uint seed_z,uint seed_w,size_t work)",
200
  "{",
201
  "   uint jcong=seed_z+work;",
202
  "   ulong total=0;",
203
  "   for (ulong i=0;i<iterations;i++)",
204
  "{",
205
  "        double x=(double)CONGfp ;",
206
  "        double y=(double)CONGfp ;",
207
  "      ulong inside=((x*x+y*y) <= THEONE64F) ? 1:0;",
208
  "      total+=inside;",
209
  "}",
210
  "   return(total);",
211
  "}",
212
  "__kernel void MainLoopGlobal(__global ulong *s,ulong iterations,uint seed_w,uint seed_z,uint MyType)",
213
  "{",
214
  "   ulong total;",
215
  "   if (MyType==TFP32) {",
216
  "      total=(ulong)MainLoop32F(iterations,seed_z,seed_w,get_global_id(0));",
217
  "   }",
218
  "   else if (MyType==TFP64) {",
219
  "      total=(ulong)MainLoop64F(iterations,seed_z,seed_w,get_global_id(0));",
220
  "   }",  
221
  "   else if (MyType==TINT32) {",
222
  "      total=(ulong)MainLoop32I(iterations,seed_z,seed_w,get_global_id(0));",
223
  "   }",  
224
  "   else if (MyType==TINT64) {",
225
  "      total=(ulong)MainLoop64I(iterations,seed_z,seed_w,get_global_id(0));",
226
  "   }",  
227
  "   barrier(CLK_GLOBAL_MEM_FENCE);",
228
  "   s[get_global_id(0)]=(ulong)total;",
229
  "}",
230
  "__kernel void MainLoopLocal(__global ulong *s,ulong iterations,uint seed_w,uint seed_z,uint MyType)",
231
  "{",
232
  "   ulong total;",
233
  "   if (MyType==TFP32) {",
234
  "      total=(ulong)MainLoop32F(iterations,seed_z,seed_w,get_local_id(0));",
235
  "   }",
236
  "   else if (MyType==TFP64) {",
237
  "      total=(ulong)MainLoop64F(iterations,seed_z,seed_w,get_local_id(0));",
238
  "   }",  
239
  "   else if (MyType==TINT32) {",
240
  "      total=(ulong)MainLoop32I(iterations,seed_z,seed_w,get_local_id(0));",
241
  "   }",  
242
  "   else if (MyType==TINT64) {",
243
  "      total=(ulong)MainLoop64I(iterations,seed_z,seed_w,get_local_id(0));",
244
  "   }",  
245
  "   barrier(CLK_LOCAL_MEM_FENCE);",
246
  "   s[get_local_id(0)]=(ulong)total;",
247
  "}",
248
  "__kernel void MainLoopHybrid(__global ulong *s,ulong iterations,uint seed_w,uint seed_z,uint MyType)",
249
  "{",
250
  "   ulong total;",
251
  "   if (MyType==TFP32) {",
252
  "      total=(ulong)MainLoop32F(iterations,seed_z,seed_w,get_global_id(0));",
253
  "   }",
254
  "   else if (MyType==TFP64) {",
255
  "      total=(ulong)MainLoop64F(iterations,seed_z,seed_w,get_global_id(0));",
256
  "   }",  
257
  "   else if (MyType==TINT32) {",
258
  "      total=(ulong)MainLoop32I(iterations,seed_z,seed_w,get_global_id(0));",
259
  "   }",  
260
  "   else if (MyType==TINT64) {",
261
  "      total=(ulong)MainLoop64I(iterations,seed_z,seed_w,get_global_id(0));",
262
  "   }",  
263
  "   barrier(CLK_GLOBAL_MEM_FENCE || CLK_LOCAL_MEM_FENCE);",
264
  "   s[get_global_id(0)]=total;",
265
  "}"
266
};
267

  
268
int main(int argc, char **argv)
269
{
270
  if ((argc==1)||
271
      (strcmp(argv[1],"-h")==0)||
272
      (strcmp(argv[1],"--help")==0))
273
    {
274
      printf("\nPerforms a Pi estimation by Dart Dash:\n\n"
275
	     "\t#1 OpenCL Plateform ID (default 0)\n"
276
	     "\t#2 OpenCL Device ID (default 0)\n"
277
	     "\t#3 Minimal number of iterations (default 1000000)\n"
278
	     "\t#4 Parallel Rate (default 1024)\n"
279
	     "\t#5 Loops (default 1)\n"
280
	     "\t#6 Type of variable: INT32, INT64, FP32, FP64 (default FP32)\n\n");
281
      DetectOpenCLDevices();
282
    }
283
  else
284
    {
285
      
286
      int MyPlatform=atoi(argv[1]);
287
      int MyDevice=atoi(argv[2]);
288

  
289
      struct timeval tv1,tv2;
290
      
291
      uint64_t Iterations=1000000;
292
      if (argc>3) {
293
	Iterations=(uint64_t)atoll(argv[3]);
294
      }
295
      
296
      uint32_t ParallelRate=1024;
297
      if (argc>4) {
298
	ParallelRate=(uint32_t)atoi(argv[4]);
299
      }
300
      
301
      uint32_t Loops=1;
302
      if (argc>5) {
303
	Loops=(uint32_t)atoi(argv[5]);
304
      }
305
      
306
      uint32_t MyType=TFP32;
307
      if (argc>6) {
308
	if (strcmp(argv[6],"INT32")==0) {
309
	  MyType=(uint32_t)TINT32;
310
	}
311
	else if (strcmp(argv[6],"INT64")==0) {
312
	  MyType=(uint32_t)TINT64;
313
	}
314
	else if (strcmp(argv[6],"FP32")==0) {
315
	  MyType=(uint32_t)TFP32;
316
	}
317
	else if (strcmp(argv[6],"FP64")==0) {
318
	  MyType=(uint32_t)TFP64;
319
	}
320
      }
321

  
322
      printf("MyType %d\n",MyType);
323
      
324
      cl_int err;
325
      cl_context_properties props[3] = { CL_CONTEXT_PLATFORM, 0, 0 };
326
      
327
      // Detect, scan, get & initialize platform and device
328
      cl_uint platformCount;
329
      cl_platform_id* platforms;
330
      cl_uint deviceCount;
331
      cl_device_id* devices;      
332
      size_t valueSize;
333
      
334
      /* Setup OpenCL environment. */
335
     
336
      // Get all platforms
337
      err = clGetPlatformIDs(0, NULL, &platformCount);
338
      platforms = (cl_platform_id*) malloc(sizeof(cl_platform_id) * platformCount);
339
      err = clGetPlatformIDs(platformCount, platforms, NULL);
340

  
341
      // Get Device defined
342
      err = clGetDeviceIDs(platforms[MyPlatform], CL_DEVICE_TYPE_ALL, 0, NULL, &deviceCount);
343
      devices = (cl_device_id*) malloc(sizeof(cl_device_id) * deviceCount);
344
      err = clGetDeviceIDs(platforms[MyPlatform], CL_DEVICE_TYPE_ALL, deviceCount, devices, NULL);  
345

  
346
      // print device name
347
      err = clGetDeviceInfo(devices[MyDevice], CL_DEVICE_NAME, 0, NULL, &valueSize);
348
      char* deviceName=(char*)malloc(valueSize);
349
      err = clGetDeviceInfo(devices[MyDevice], CL_DEVICE_NAME, valueSize, deviceName, NULL);
350
      err = clGetDeviceInfo(devices[MyDevice], CL_DEVICE_VENDOR, 0, NULL, &valueSize);
351
      char* vendorName=(char*)malloc(valueSize);      
352
      err = clGetDeviceInfo(devices[MyDevice], CL_DEVICE_VENDOR, valueSize, vendorName, NULL);
353
      printf("\nDevice (%d,%d):\n\t- vendor: %s\n\t- device: %s\n\n",MyPlatform,MyDevice, vendorName,deviceName);
354
      free(deviceName);
355
      free(vendorName);
356
      
357
      props[1] = (cl_context_properties)platforms[MyPlatform];
358
      
359
      cl_context GPUContext = clCreateContext(props, 1, &devices[MyDevice], NULL, NULL, &err);
360
      cl_command_queue cqCommandQueue = clCreateCommandQueue(GPUContext,devices[MyDevice], 0, &err);
361

  
362
      cl_mem GPUInside = clCreateBuffer(GPUContext, CL_MEM_WRITE_ONLY,
363
					      sizeof(uint64_t) * ParallelRate, NULL, NULL);
364
      
365
      // 126 is the number of lines for OpenCL code (for kernels)
366
      cl_program OpenCLProgram = clCreateProgramWithSource(GPUContext, 126 ,OpenCLSource,NULL,NULL);
367
      clBuildProgram(OpenCLProgram, 0, NULL, NULL, NULL, NULL);
368
      cl_kernel OpenCLMainLoopGlobal = clCreateKernel(OpenCLProgram, "MainLoopGlobal", NULL);
369
      cl_kernel OpenCLMainLoopLocal = clCreateKernel(OpenCLProgram, "MainLoopLocal", NULL);
370
      cl_kernel OpenCLMainLoopHybrid = clCreateKernel(OpenCLProgram, "MainLoopHybrid", NULL);
371

  
372
      // Divide the total number of iterations by the parallel rate
373
      // Add +1 to the number of per work iterations if division not integer
374
      uint64_t IterationsEach=((Iterations%ParallelRate)==0)?Iterations/ParallelRate:Iterations/ParallelRate+1;
375
      // Initialize seeds for MWC RNG generator from Marsaglia
376
      uint32_t seed_w=110271;
377
      uint32_t seed_z=101008;
378

  
379
      // Set the values of arguments for OpenCL function call
380
      clSetKernelArg(OpenCLMainLoopGlobal, 0, sizeof(cl_mem),&GPUInside);
381
      clSetKernelArg(OpenCLMainLoopGlobal, 1, sizeof(uint64_t),&IterationsEach);
382
      clSetKernelArg(OpenCLMainLoopGlobal, 2, sizeof(uint32_t),&seed_w);
383
      clSetKernelArg(OpenCLMainLoopGlobal, 3, sizeof(uint32_t),&seed_z);
384
      clSetKernelArg(OpenCLMainLoopGlobal, 4, sizeof(uint32_t),&MyType);
385
      
386
      size_t WorkSize[1] = {ParallelRate}; // one dimensional Range
387

  
388
      uint64_t HostInside[ParallelRate];
389

  
390
      for (uint32_t loop=0;loop<Loops;loop++) {
391
	// Set start timer
392
	gettimeofday(&tv1, NULL);
393
	
394
    	// Execute the OpenCL kernel with datas
395
	clEnqueueNDRangeKernel(cqCommandQueue, OpenCLMainLoopGlobal, 1, NULL,
396
			       WorkSize, NULL, 0, NULL, NULL);
397
	// Copy each result for each PR from Device to Host
398
	clEnqueueReadBuffer(cqCommandQueue, GPUInside, CL_TRUE, 0,
399
			    ParallelRate * sizeof(uint64_t), HostInside, 0, NULL, NULL);
400
	uint64_t inside=0;
401

  
402
	for (int i= 0; i < ParallelRate; i++) {
403
	  inside+=HostInside[i];
404
	}
405
	  
406
	// Set stop timer
407
	gettimeofday(&tv2, NULL);
408

  
409
	double elapsed=(double)((tv2.tv_sec-tv1.tv_sec) * 1000000L +
410
				(tv2.tv_usec-tv1.tv_usec))/1000000;  
411

  
412
	double itops=(double)(ParallelRate*IterationsEach)/elapsed;
413
      
414
	printf("Inside/Total %ld %ld\nParallelRate %i\nElapsed Time %.2f\nItops %.0f\nLogItops %.2f\nPi estimation %f\n\n",inside,ParallelRate*IterationsEach,ParallelRate,elapsed,itops,log10(itops),(4.*(float)inside/((float)(ParallelRate)*(float)(IterationsEach))));
415
      }
416
      printf("\n\n");
417
      
418
      clReleaseKernel(OpenCLMainLoopGlobal);
419
      clReleaseProgram(OpenCLProgram);
420
      clReleaseCommandQueue(cqCommandQueue);
421
      clReleaseContext(GPUContext);
422
      clReleaseMemObject(GPUInside);
423

  
424
      
425
      return 0;
426
    }
427
}
428

  
Pi/C/OpenCL/PiOpenCL_MWC.c (revision 255)
1
// Pi Dart Dash in OpenCL in C, illustrative example
2
//
3
// Emmanuel Quemener <emmanuel.quemener@gmail.com>
4
//
5
// CC BY-NC-SA 2011 : Emmanuel QUEMENER <emmanuel.quemener@gmail.com> 
6
// Copyleft Cecill v2
7
//
8
// -h : print the documentation and detect devices as (platform,device)
9
//
10
// classical use:
11
// #1 OpenCL Plateform ID: get this information with -h option
12
// #2 OpenCL Device ID: get this information with -h option
13
// #3 Minimal number of iterations: 
14
// #4 Parallel Rate: scattering global work in parts executed //ly
15
// #5 Loops (to improve statistics)
16
// #6 Type of variables INT32, INT64, FP32, FP64
17
// 
18
// To compile : gcc -o PiOpenCL PiOpenCL.c -lOpenCL -lm
19

  
20
#define CL_TARGET_OPENCL_VERSION 220
21
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
22
#include <stdio.h>
23
#include <stdlib.h>
24
#include <string.h>
25
#include <CL/cl.h>
26
#include <stdint.h>
27
#include <math.h>
28
#include <sys/time.h>
29

  
30
#define TINT32 0
31
#define TINT64 1
32
#define TFP32 2
33
#define TFP64 3
34

  
35
int DetectOpenCLDevices(void) 
36
{
37
  int i, j;
38
  char* value;
39
  size_t valueSize;
40
  cl_uint platformCount;
41
  cl_platform_id* platforms;
42
  cl_uint deviceCount;
43
  cl_device_id* devices;
44
  cl_uint maxComputeUnits;
45
  cl_int maxWorkGroupSize;
46
  cl_int maxWorkItemSizes;
47
  cl_device_type dev_type;
48

  
49
  // get all platforms
50
  clGetPlatformIDs(0, NULL, &platformCount);
51
  platforms = (cl_platform_id*) malloc(sizeof(cl_platform_id) * platformCount);
52
  clGetPlatformIDs(platformCount, platforms, NULL);
53

  
54
  printf("OpenCL statistics: %d platform(s) detected\n\n",platformCount);
55

  
56
  for (i = 0; i < platformCount; i++) {
57

  
58
    // get all devices
59
    clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, 0, NULL, &deviceCount);
60
    devices = (cl_device_id*) malloc(sizeof(cl_device_id) * deviceCount);
61
    clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, deviceCount, devices, NULL);
62

  
63
    // for each device print critical attributes
64
    for (j = 0; j < deviceCount; j++) {
65
      
66
      // print device name
67
      clGetDeviceInfo(devices[j], CL_DEVICE_NAME, 0, NULL, &valueSize);
68
      value = (char*) malloc(valueSize);
69
      clGetDeviceInfo(devices[j], CL_DEVICE_NAME, valueSize, value, NULL);
70
      printf("Device (%d,%d): %s\n",i, j, value);
71
      free(value);
72

  
73
      // print type device CPU/GPU/ACCELERATOR
74
      clGetDeviceInfo(devices[j], CL_DEVICE_TYPE, sizeof(dev_type), &dev_type, NULL);
75
      printf("\tDevice Type: ");
76
      if(dev_type & CL_DEVICE_TYPE_GPU)
77
	printf("CL_DEVICE_TYPE_GPU ");
78
      if(dev_type & CL_DEVICE_TYPE_CPU)
79
	printf("CL_DEVICE_TYPE_CPU ");
80
      if(dev_type & CL_DEVICE_TYPE_ACCELERATOR)
81
	printf("CL_DEVICE_TYPE_ACCELERATOR ");
82
      if(dev_type & CL_DEVICE_TYPE_DEFAULT)
83
	printf("CL_DEVICE_TYPE_DEFAULT ");
84
      printf("\n");
85

  
86
      // print device vendor
87
      clGetDeviceInfo(devices[j], CL_DEVICE_VENDOR, 0, NULL, &valueSize);
88
      value = (char*) malloc(valueSize);
89
      clGetDeviceInfo(devices[j], CL_DEVICE_VENDOR, valueSize, value, NULL);
90
      printf("\tDevice vendor: %s\n", value);
91
      free(value);
92

  
93
      // print hardware device version
94
      clGetDeviceInfo(devices[j], CL_DEVICE_VERSION, 0, NULL, &valueSize);
95
      value = (char*) malloc(valueSize);
96
      clGetDeviceInfo(devices[j], CL_DEVICE_VERSION, valueSize, value, NULL);
97
      printf("\tHardware version: %s\n", value);
98
      free(value);
99

  
100
      // print software driver version
101
      clGetDeviceInfo(devices[j], CL_DRIVER_VERSION, 0, NULL, &valueSize);
102
      value = (char*) malloc(valueSize);
103
      clGetDeviceInfo(devices[j], CL_DRIVER_VERSION, valueSize, value, NULL);
104
      printf("\tSoftware version: %s\n", value);
105
      free(value);
106
      
107
      // print c version supported by compiler for device
108
      clGetDeviceInfo(devices[j], CL_DEVICE_OPENCL_C_VERSION, 0, NULL, &valueSize);
109
      value = (char*) malloc(valueSize);
110
      clGetDeviceInfo(devices[j], CL_DEVICE_OPENCL_C_VERSION, valueSize, value, NULL);
111
      printf("\tOpenCL C version: %s\n", value);
112
      free(value);
113

  
114
      // print parallel compute units
115
      clGetDeviceInfo(devices[j], CL_DEVICE_MAX_COMPUTE_UNITS,
116
		      sizeof(maxComputeUnits), &maxComputeUnits, NULL);
117
      printf("\tParallel compute units: %d\n", maxComputeUnits);
118
      
119
      // print max work group size
120
      clGetDeviceInfo(devices[j], CL_DEVICE_MAX_WORK_GROUP_SIZE,
121
		      sizeof(maxWorkGroupSize), &maxWorkGroupSize, NULL);
122
      printf("\tMaximum Work Group Size: %d\n", maxWorkGroupSize);
123
      
124
      // print max work items size
125
      clGetDeviceInfo(devices[j], CL_DEVICE_MAX_WORK_ITEM_SIZES,
126
		      sizeof(maxWorkItemSizes), &maxWorkItemSizes, NULL);
127
      printf("\tMaximum Work Item Sizes: %d\n", maxWorkItemSizes);
128
      
129
    }
130
    printf("\n");
131
    free(devices);
132
  }
133

  
134
  free(platforms);
135
  return 0;
136

  
137
}
138

  
139
const char* OpenCLSource[] = {
140
  "#pragma OPENCL EXTENSION cl_khr_fp64: enable \n",
141
  "// Marsaglia RNG very simple implementation \n",
142
  "#define znew  ((z=36969*(z&65535)+(z>>16))<<16) \n",
143
  "#define wnew  ((w=18000*(w&65535)+(w>>16))&65535) \n",
144
  "#define MWC   (znew+wnew) \n",
145
  "#define SHR3  (jsr=(jsr=(jsr=jsr^(jsr<<17))^(jsr>>13))^(jsr<<5)) \n",
146
  "#define CONG  (jcong=69069*jcong+1234567) \n",
147
  "#define KISS  ((MWC^CONG)+SHR3) \n",
148
  "#define MWCfp MWC * 2.328306435454494e-10f \n",
149
  "#define KISSfp KISS * 2.328306435454494e-10f \n",
150
  "#define CONGfp CONG * 2.328306435454494e-10f \n",
151
  "#define SHR3fp SHR3 * 2.328306435454494e-10f \n",
152
  "#define TINT32 0 \n",
153
  "#define TINT64 1 \n",
154
  "#define TFP32 2 \n",
155
  "#define TFP64 3 \n",
156
  "#define THEONE32I 1073741824 \n",
157
  "#define THEONE32F 1.e0f \n",
158
  "#define THEONE64I 4611686018427387904 \n",
159
  "#define THEONE64F (double)1.e0f \n",
160
  "ulong MainLoop32I(ulong iterations,uint seed_z,uint seed_w,size_t work)",
161
  "{",
162
  "   uint z=seed_z+work;",
163
  "   uint w=seed_w+work;",
164
  "   ulong total=0;",
165
  "   for (ulong i=0;i<iterations;i++)",
166
  "   {",
167
  "      uint x= MWC>>17;",
168
  "      uint y= MWC>>17;",
169
  "      ulong inside=((x*x+y*y) <= THEONE32I) ? 1:0;",
170
  "      total+=inside;",
171
  "   }",
172
  "   return(total);",
173
  "}",
174
  "ulong MainLoop32F(ulong iterations,uint seed_z,uint seed_w,size_t work)",
175
  "{",
176
  "   uint z=seed_z+work;",
177
  "   uint w=seed_w+work;",
178
  "   ulong total=0;",
179
  "   for (ulong i=0;i<iterations;i++)",
180
  "   {",
181
  "      float x=(float)MWCfp ;",
182
  "      float y=(float)MWCfp ;",
183
  "      ulong inside=((x*x+y*y) <= THEONE32F) ? 1:0;",
184
  "      total+=inside;",
185
  "   }",
186
  "   return(total);",
187
  "}",
188
  "ulong MainLoop64I(ulong iterations,uint seed_z,uint seed_w,size_t work)",
189
  "{",
190
  "   uint z=seed_z+work;",
191
  "   uint w=seed_w+work;",
192
  "   ulong total=0;",
193
  "   for (ulong i=0;i<iterations;i++)",
194
  "   {",
195
  "      ulong x=(ulong)(MWC>>1);",
196
  "      ulong y=(ulong)(MWC>>1);",
197
  "      ulong inside=((x*x+y*y) <= THEONE64I) ? 1:0;",
198
  "      total+=inside;",
199
  "   }",
200
  "   return(total);",
201
  "}",
202
  "ulong MainLoop64F(ulong iterations,uint seed_z,uint seed_w,size_t work)",
203
  "{",
204
  "   uint z=seed_z+work;",
205
  "   uint w=seed_w+work;",
206
  "   ulong total=0;",
207
  "   for (ulong i=0;i<iterations;i++)",
208
  "{",
209
  "        double x=(double)MWCfp ;",
210
  "        double y=(double)MWCfp ;",
211
  "      ulong inside=((x*x+y*y) <= THEONE64F) ? 1:0;",
212
  "      total+=inside;",
213
  "}",
214
  "   return(total);",
215
  "}",
216
  "__kernel void MainLoopGlobal(__global ulong *s,ulong iterations,uint seed_w,uint seed_z,uint MyType)",
217
  "{",
218
  "   ulong total;",
219
  "   if (MyType==TFP32) {",
220
  "      total=(ulong)MainLoop32F(iterations,seed_z,seed_w,get_global_id(0));",
221
  "   }",
222
  "   else if (MyType==TFP64) {",
223
  "      total=(ulong)MainLoop64F(iterations,seed_z,seed_w,get_global_id(0));",
224
  "   }",  
225
  "   else if (MyType==TINT32) {",
226
  "      total=(ulong)MainLoop32I(iterations,seed_z,seed_w,get_global_id(0));",
227
  "   }",  
228
  "   else if (MyType==TINT64) {",
229
  "      total=(ulong)MainLoop64I(iterations,seed_z,seed_w,get_global_id(0));",
230
  "   }",  
231
  "   barrier(CLK_GLOBAL_MEM_FENCE);",
232
  "   s[get_global_id(0)]=(ulong)total;",
233
  "}",
234
  "__kernel void MainLoopLocal(__global ulong *s,ulong iterations,uint seed_w,uint seed_z,uint MyType)",
235
  "{",
236
  "   ulong total;",
237
  "   if (MyType==TFP32) {",
238
  "      total=(ulong)MainLoop32F(iterations,seed_z,seed_w,get_local_id(0));",
239
  "   }",
240
  "   else if (MyType==TFP64) {",
241
  "      total=(ulong)MainLoop64F(iterations,seed_z,seed_w,get_local_id(0));",
242
  "   }",  
243
  "   else if (MyType==TINT32) {",
244
  "      total=(ulong)MainLoop32I(iterations,seed_z,seed_w,get_local_id(0));",
245
  "   }",  
246
  "   else if (MyType==TINT64) {",
247
  "      total=(ulong)MainLoop64I(iterations,seed_z,seed_w,get_local_id(0));",
248
  "   }",  
249
  "   barrier(CLK_LOCAL_MEM_FENCE);",
250
  "   s[get_local_id(0)]=(ulong)total;",
251
  "}",
252
  "__kernel void MainLoopHybrid(__global ulong *s,ulong iterations,uint seed_w,uint seed_z,uint MyType)",
253
  "{",
254
  "   ulong total;",
255
  "   if (MyType==TFP32) {",
256
  "      total=(ulong)MainLoop32F(iterations,seed_z,seed_w,get_global_id(0));",
257
  "   }",
258
  "   else if (MyType==TFP64) {",
259
  "      total=(ulong)MainLoop64F(iterations,seed_z,seed_w,get_global_id(0));",
260
  "   }",  
261
  "   else if (MyType==TINT32) {",
262
  "      total=(ulong)MainLoop32I(iterations,seed_z,seed_w,get_global_id(0));",
263
  "   }",  
264
  "   else if (MyType==TINT64) {",
265
  "      total=(ulong)MainLoop64I(iterations,seed_z,seed_w,get_global_id(0));",
266
  "   }",  
267
  "   barrier(CLK_GLOBAL_MEM_FENCE || CLK_LOCAL_MEM_FENCE);",
268
  "   s[get_global_id(0)]=total;",
269
  "}"
270
};
271

  
272
int main(int argc, char **argv)
273
{
274
  if ((argc==1)||
275
      (strcmp(argv[1],"-h")==0)||
276
      (strcmp(argv[1],"--help")==0))
277
    {
278
      printf("\nPerforms a Pi estimation by Dart Dash:\n\n"
279
	     "\t#1 OpenCL Plateform ID (default 0)\n"
280
	     "\t#2 OpenCL Device ID (default 0)\n"
281
	     "\t#3 Minimal number of iterations (default 1000000)\n"
282
	     "\t#4 Parallel Rate (default 1024)\n"
283
	     "\t#5 Loops (default 1)\n"
284
	     "\t#6 Type of variable: INT32, INT64, FP32, FP64 (default FP32)\n\n");
285
      DetectOpenCLDevices();
286
    }
287
  else
288
    {
289
      
290
      int MyPlatform=atoi(argv[1]);
291
      int MyDevice=atoi(argv[2]);
292

  
293
      struct timeval tv1,tv2;
294
      
295
      uint64_t Iterations=1000000;
296
      if (argc>3) {
297
	Iterations=(uint64_t)atoll(argv[3]);
298
      }
299
      
300
      uint32_t ParallelRate=1024;
301
      if (argc>4) {
302
	ParallelRate=(uint32_t)atoi(argv[4]);
303
      }
304
      
305
      uint32_t Loops=1;
306
      if (argc>5) {
307
	Loops=(uint32_t)atoi(argv[5]);
308
      }
309
      
310
      uint32_t MyType=TFP32;
311
      if (argc>6) {
312
	if (strcmp(argv[6],"INT32")==0) {
313
	  MyType=(uint32_t)TINT32;
314
	}
315
	else if (strcmp(argv[6],"INT64")==0) {
316
	  MyType=(uint32_t)TINT64;
317
	}
318
	else if (strcmp(argv[6],"FP32")==0) {
319
	  MyType=(uint32_t)TFP32;
320
	}
321
	else if (strcmp(argv[6],"FP64")==0) {
322
	  MyType=(uint32_t)TFP64;
323
	}
324
      }
325

  
326
      printf("MyType %d\n",MyType);
327
      
328
      cl_int err;
329
      cl_context_properties props[3] = { CL_CONTEXT_PLATFORM, 0, 0 };
330
      
331
      // Detect, scan, get & initialize platform and device
332
      cl_uint platformCount;
333
      cl_platform_id* platforms;
334
      cl_uint deviceCount;
335
      cl_device_id* devices;      
336
      size_t valueSize;
337
      
338
      /* Setup OpenCL environment. */
339
     
340
      // Get all platforms
341
      err = clGetPlatformIDs(0, NULL, &platformCount);
342
      platforms = (cl_platform_id*) malloc(sizeof(cl_platform_id) * platformCount);
343
      err = clGetPlatformIDs(platformCount, platforms, NULL);
344

  
345
      // Get Device defined
346
      err = clGetDeviceIDs(platforms[MyPlatform], CL_DEVICE_TYPE_ALL, 0, NULL, &deviceCount);
347
      devices = (cl_device_id*) malloc(sizeof(cl_device_id) * deviceCount);
348
      err = clGetDeviceIDs(platforms[MyPlatform], CL_DEVICE_TYPE_ALL, deviceCount, devices, NULL);  
349

  
350
      // print device name
351
      err = clGetDeviceInfo(devices[MyDevice], CL_DEVICE_NAME, 0, NULL, &valueSize);
352
      char* deviceName=(char*)malloc(valueSize);
353
      err = clGetDeviceInfo(devices[MyDevice], CL_DEVICE_NAME, valueSize, deviceName, NULL);
354
      err = clGetDeviceInfo(devices[MyDevice], CL_DEVICE_VENDOR, 0, NULL, &valueSize);
355
      char* vendorName=(char*)malloc(valueSize);      
356
      err = clGetDeviceInfo(devices[MyDevice], CL_DEVICE_VENDOR, valueSize, vendorName, NULL);
357
      printf("\nDevice (%d,%d):\n\t- vendor: %s\n\t- device: %s\n\n",MyPlatform,MyDevice, vendorName,deviceName);
358
      free(deviceName);
359
      free(vendorName);
360
      
361
      props[1] = (cl_context_properties)platforms[MyPlatform];
362
      
363
      cl_context GPUContext = clCreateContext(props, 1, &devices[MyDevice], NULL, NULL, &err);
364
      cl_command_queue cqCommandQueue = clCreateCommandQueue(GPUContext,devices[MyDevice], 0, &err);
365

  
366
      cl_mem GPUInside = clCreateBuffer(GPUContext, CL_MEM_WRITE_ONLY,
367
					      sizeof(uint64_t) * ParallelRate, NULL, NULL);
368
      
369
      // 130 is the number of line for OpenCL code
370
      cl_program OpenCLProgram = clCreateProgramWithSource(GPUContext, 130 ,OpenCLSource,NULL,NULL);
371
      clBuildProgram(OpenCLProgram, 0, NULL, NULL, NULL, NULL);
372
      cl_kernel OpenCLMainLoopGlobal = clCreateKernel(OpenCLProgram, "MainLoopGlobal", NULL);
373
      cl_kernel OpenCLMainLoopLocal = clCreateKernel(OpenCLProgram, "MainLoopLocal", NULL);
374
      cl_kernel OpenCLMainLoopHybrid = clCreateKernel(OpenCLProgram, "MainLoopHybrid", NULL);
375

  
376
      // Divide the total number of iterations by the parallel rate
377
      // Add +1 to the number of per work iterations if division not integer
378
      uint64_t IterationsEach=((Iterations%ParallelRate)==0)?Iterations/ParallelRate:Iterations/ParallelRate+1;
379
      // Initialize seeds for MWC RNG generator from Marsaglia
380
      uint32_t seed_w=110271;
381
      uint32_t seed_z=101008;
382

  
383
      // Set the values of arguments for OpenCL function call
384
      clSetKernelArg(OpenCLMainLoopGlobal, 0, sizeof(cl_mem),&GPUInside);
385
      clSetKernelArg(OpenCLMainLoopGlobal, 1, sizeof(uint64_t),&IterationsEach);
386
      clSetKernelArg(OpenCLMainLoopGlobal, 2, sizeof(uint32_t),&seed_w);
387
      clSetKernelArg(OpenCLMainLoopGlobal, 3, sizeof(uint32_t),&seed_z);
388
      clSetKernelArg(OpenCLMainLoopGlobal, 4, sizeof(uint32_t),&MyType);
389
      
390
      size_t WorkSize[1] = {ParallelRate}; // one dimensional Range
391

  
392
      uint64_t HostInside[ParallelRate];
393

  
394
      for (uint32_t loop=0;loop<Loops;loop++) {
395
	// Set start timer
396
	gettimeofday(&tv1, NULL);
397
	
398
    	// Execute the OpenCL kernel with datas
399
	clEnqueueNDRangeKernel(cqCommandQueue, OpenCLMainLoopGlobal, 1, NULL,
400
			       WorkSize, NULL, 0, NULL, NULL);
401
	// Copy each result for each PR from Device to Host
402
	clEnqueueReadBuffer(cqCommandQueue, GPUInside, CL_TRUE, 0,
403
			    ParallelRate * sizeof(uint64_t), HostInside, 0, NULL, NULL);
404
	uint64_t inside=0;
405

  
406
	for (int i= 0; i < ParallelRate; i++) {
407
	  inside+=HostInside[i];
408
	}
409
	  
410
	// Set stop timer
411
	gettimeofday(&tv2, NULL);
412

  
413
	double elapsed=(double)((tv2.tv_sec-tv1.tv_sec) * 1000000L +
414
				(tv2.tv_usec-tv1.tv_usec))/1000000;  
415

  
416
	double itops=(double)(ParallelRate*IterationsEach)/elapsed;
417
      
418
	printf("Inside/Total %ld %ld\nParallelRate %i\nElapsed Time %.2f\nItops %.0f\nLogItops %.2f\nPi estimation %f\n\n",inside,ParallelRate*IterationsEach,ParallelRate,elapsed,itops,log10(itops),(4.*(float)inside/((float)(ParallelRate)*(float)(IterationsEach))));
419
      }
420
      printf("\n\n");
421
      
422
      clReleaseKernel(OpenCLMainLoopGlobal);
423
      clReleaseProgram(OpenCLProgram);
424
      clReleaseCommandQueue(cqCommandQueue);
425
      clReleaseContext(GPUContext);
426
      clReleaseMemObject(GPUInside);
427

  
428
      
429
      return 0;
430
    }
431
}
432

  
Pi/C/OpenCL/PiOpenCL_KISS.c (revision 255)
1
// Pi Dart Dash in OpenCL in C, illustrative example
2
//
3
// Emmanuel Quemener <emmanuel.quemener@gmail.com>
4
//
5
// CC BY-NC-SA 2011 : Emmanuel QUEMENER <emmanuel.quemener@gmail.com> 
6
// Copyleft Cecill v2
7
//
8
// -h : print the documentation and detect devices as (platform,device)
9
//
10
// classical use:
11
// #1 OpenCL Plateform ID: get this information with -h option
12
// #2 OpenCL Device ID: get this information with -h option
13
// #3 Minimal number of iterations: 
14
// #4 Parallel Rate: scattering global work in parts executed //ly
15
// #5 Loops (to improve statistics)
16
// #6 Type of variables INT32, INT64, FP32, FP64
17
// 
18
// To compile : gcc -o PiOpenCL_KISS PiOpenCL_KISS.c -lOpenCL -lm
19

  
20
#define CL_TARGET_OPENCL_VERSION 220
21
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
22
#include <stdio.h>
23
#include <stdlib.h>
24
#include <string.h>
25
#include <CL/cl.h>
26
#include <stdint.h>
27
#include <math.h>
28
#include <sys/time.h>
29

  
30
#define TINT32 0
31
#define TINT64 1
32
#define TFP32 2
33
#define TFP64 3
34

  
35
int DetectOpenCLDevices(void) 
36
{
37
  int i, j;
38
  char* value;
39
  size_t valueSize;
40
  cl_uint platformCount;
41
  cl_platform_id* platforms;
42
  cl_uint deviceCount;
43
  cl_device_id* devices;
44
  cl_uint maxComputeUnits;
45
  cl_int maxWorkGroupSize;
46
  cl_int maxWorkItemSizes;
47
  cl_device_type dev_type;
48

  
49
  // get all platforms
50
  clGetPlatformIDs(0, NULL, &platformCount);
51
  platforms = (cl_platform_id*) malloc(sizeof(cl_platform_id) * platformCount);
52
  clGetPlatformIDs(platformCount, platforms, NULL);
53

  
54
  printf("OpenCL statistics: %d platform(s) detected\n\n",platformCount);
55

  
56
  for (i = 0; i < platformCount; i++) {
57

  
58
    // get all devices
59
    clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, 0, NULL, &deviceCount);
60
    devices = (cl_device_id*) malloc(sizeof(cl_device_id) * deviceCount);
61
    clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, deviceCount, devices, NULL);
62

  
63
    // for each device print critical attributes
64
    for (j = 0; j < deviceCount; j++) {
65
      
66
      // print device name
67
      clGetDeviceInfo(devices[j], CL_DEVICE_NAME, 0, NULL, &valueSize);
68
      value = (char*) malloc(valueSize);
69
      clGetDeviceInfo(devices[j], CL_DEVICE_NAME, valueSize, value, NULL);
70
      printf("Device (%d,%d): %s\n",i, j, value);
71
      free(value);
72

  
73
      // print type device CPU/GPU/ACCELERATOR
74
      clGetDeviceInfo(devices[j], CL_DEVICE_TYPE, sizeof(dev_type), &dev_type, NULL);
75
      printf("\tDevice Type: ");
76
      if(dev_type & CL_DEVICE_TYPE_GPU)
77
	printf("CL_DEVICE_TYPE_GPU ");
78
      if(dev_type & CL_DEVICE_TYPE_CPU)
79
	printf("CL_DEVICE_TYPE_CPU ");
80
      if(dev_type & CL_DEVICE_TYPE_ACCELERATOR)
81
	printf("CL_DEVICE_TYPE_ACCELERATOR ");
82
      if(dev_type & CL_DEVICE_TYPE_DEFAULT)
83
	printf("CL_DEVICE_TYPE_DEFAULT ");
84
      printf("\n");
85

  
86
      // print device vendor
87
      clGetDeviceInfo(devices[j], CL_DEVICE_VENDOR, 0, NULL, &valueSize);
88
      value = (char*) malloc(valueSize);
89
      clGetDeviceInfo(devices[j], CL_DEVICE_VENDOR, valueSize, value, NULL);
90
      printf("\tDevice vendor: %s\n", value);
91
      free(value);
92

  
93
      // print hardware device version
94
      clGetDeviceInfo(devices[j], CL_DEVICE_VERSION, 0, NULL, &valueSize);
95
      value = (char*) malloc(valueSize);
96
      clGetDeviceInfo(devices[j], CL_DEVICE_VERSION, valueSize, value, NULL);
97
      printf("\tHardware version: %s\n", value);
98
      free(value);
99

  
100
      // print software driver version
101
      clGetDeviceInfo(devices[j], CL_DRIVER_VERSION, 0, NULL, &valueSize);
102
      value = (char*) malloc(valueSize);
103
      clGetDeviceInfo(devices[j], CL_DRIVER_VERSION, valueSize, value, NULL);
104
      printf("\tSoftware version: %s\n", value);
105
      free(value);
106
      
107
      // print c version supported by compiler for device
108
      clGetDeviceInfo(devices[j], CL_DEVICE_OPENCL_C_VERSION, 0, NULL, &valueSize);
109
      value = (char*) malloc(valueSize);
110
      clGetDeviceInfo(devices[j], CL_DEVICE_OPENCL_C_VERSION, valueSize, value, NULL);
111
      printf("\tOpenCL C version: %s\n", value);
112
      free(value);
113

  
114
      // print parallel compute units
115
      clGetDeviceInfo(devices[j], CL_DEVICE_MAX_COMPUTE_UNITS,
116
		      sizeof(maxComputeUnits), &maxComputeUnits, NULL);
117
      printf("\tParallel compute units: %d\n", maxComputeUnits);
118
      
119
      // print max work group size
120
      clGetDeviceInfo(devices[j], CL_DEVICE_MAX_WORK_GROUP_SIZE,
121
		      sizeof(maxWorkGroupSize), &maxWorkGroupSize, NULL);
122
      printf("\tMaximum Work Group Size: %d\n", maxWorkGroupSize);
123
      
124
      // print max work items size
125
      clGetDeviceInfo(devices[j], CL_DEVICE_MAX_WORK_ITEM_SIZES,
126
		      sizeof(maxWorkItemSizes), &maxWorkItemSizes, NULL);
127
      printf("\tMaximum Work Item Sizes: %d\n", maxWorkItemSizes);
128
      
129
    }
130
    printf("\n");
131
    free(devices);
132
  }
133

  
134
  free(platforms);
135
  return 0;
136

  
137
}
138

  
139
const char* OpenCLSource[] = {
140
  "#pragma OPENCL EXTENSION cl_khr_fp64: enable \n",
141
  "// Marsaglia RNG very simple implementation \n",
142
  "#define znew  ((z=36969*(z&65535)+(z>>16))<<16) \n",
143
  "#define wnew  ((w=18000*(w&65535)+(w>>16))&65535) \n",
144
  "#define MWC   (znew+wnew) \n",
145
  "#define SHR3  (jsr=(jsr=(jsr=jsr^(jsr<<17))^(jsr>>13))^(jsr<<5)) \n",
146
  "#define CONG  (jcong=69069*jcong+1234567) \n",
147
  "#define KISS  ((MWC^CONG)+SHR3) \n",
148
  "#define MWCfp MWC * 2.328306435454494e-10f \n",
149
  "#define KISSfp KISS * 2.328306435454494e-10f \n",
150
  "#define CONGfp CONG * 2.328306435454494e-10f \n",
151
  "#define SHR3fp SHR3 * 2.328306435454494e-10f \n",
152
  "#define TINT32 0 \n",
153
  "#define TINT64 1 \n",
154
  "#define TFP32 2 \n",
155
  "#define TFP64 3 \n",
156
  "#define THEONE32I 1073741824 \n",
157
  "#define THEONE32F 1.e0f \n",
158
  "#define THEONE64I 4611686018427387904 \n",
159
  "#define THEONE64F (double)1.e0f \n",
160
  "ulong MainLoop32I(ulong iterations,uint seed_z,uint seed_w,size_t work)",
161
  "{",
162
  "   uint z=seed_z+work;",
163
  "   uint w=seed_w+work;",
164
  "   uint jcong=seed_z+work;",
165
  "   uint jsr=seed_w+work;",
166
  "   ulong total=0;",
167
  "   for (ulong i=0;i<iterations;i++)",
168
  "   {",
169
  "      uint x= MWC>>17;",
170
  "      uint y= MWC>>17;",
171
  "      ulong inside=((x*x+y*y) <= THEONE32I) ? 1:0;",
172
  "      total+=inside;",
173
  "   }",
174
  "   return(total);",
175
  "}",
176
  "ulong MainLoop32F(ulong iterations,uint seed_z,uint seed_w,size_t work)",
177
  "{",
178
  "   uint z=seed_z+work;",
179
  "   uint w=seed_w+work;",
180
  "   uint jcong=seed_z+work;",
181
  "   uint jsr=seed_w+work;",
182
  "   ulong total=0;",
183
  "   for (ulong i=0;i<iterations;i++)",
184
  "   {",
185
  "      float x=(float)MWCfp ;",
186
  "      float y=(float)MWCfp ;",
187
  "      ulong inside=((x*x+y*y) <= THEONE32F) ? 1:0;",
188
  "      total+=inside;",
189
  "   }",
190
  "   return(total);",
191
  "}",
192
  "ulong MainLoop64I(ulong iterations,uint seed_z,uint seed_w,size_t work)",
193
  "{",
194
  "   uint z=seed_z+work;",
195
  "   uint w=seed_w+work;",
196
  "   uint jcong=seed_z+work;",
197
  "   uint jsr=seed_w+work;",
198
  "   ulong total=0;",
199
  "   for (ulong i=0;i<iterations;i++)",
200
  "   {",
201
  "      ulong x=(ulong)(MWC>>1);",
202
  "      ulong y=(ulong)(MWC>>1);",
203
  "      ulong inside=((x*x+y*y) <= THEONE64I) ? 1:0;",
204
  "      total+=inside;",
205
  "   }",
206
  "   return(total);",
207
  "}",
208
  "ulong MainLoop64F(ulong iterations,uint seed_z,uint seed_w,size_t work)",
209
  "{",
210
  "   uint z=seed_z+work;",
211
  "   uint w=seed_w+work;",
212
  "   uint jcong=seed_z+work;",
213
  "   uint jsr=seed_w+work;",
214
  "   ulong total=0;",
215
  "   for (ulong i=0;i<iterations;i++)",
216
  "{",
217
  "        double x=(double)MWCfp ;",
218
  "        double y=(double)MWCfp ;",
219
  "      ulong inside=((x*x+y*y) <= THEONE64F) ? 1:0;",
220
  "      total+=inside;",
221
  "}",
222
  "   return(total);",
223
  "}",
224
  "__kernel void MainLoopGlobal(__global ulong *s,ulong iterations,uint seed_w,uint seed_z,uint MyType)",
225
  "{",
226
  "   ulong total;",
227
  "   if (MyType==TFP32) {",
228
  "      total=(ulong)MainLoop32F(iterations,seed_z,seed_w,get_global_id(0));",
229
  "   }",
230
  "   else if (MyType==TFP64) {",
231
  "      total=(ulong)MainLoop64F(iterations,seed_z,seed_w,get_global_id(0));",
232
  "   }",  
233
  "   else if (MyType==TINT32) {",
234
  "      total=(ulong)MainLoop32I(iterations,seed_z,seed_w,get_global_id(0));",
235
  "   }",  
236
  "   else if (MyType==TINT64) {",
237
  "      total=(ulong)MainLoop64I(iterations,seed_z,seed_w,get_global_id(0));",
238
  "   }",  
239
  "   barrier(CLK_GLOBAL_MEM_FENCE);",
240
  "   s[get_global_id(0)]=(ulong)total;",
241
  "}",
242
  "__kernel void MainLoopLocal(__global ulong *s,ulong iterations,uint seed_w,uint seed_z,uint MyType)",
243
  "{",
244
  "   ulong total;",
245
  "   if (MyType==TFP32) {",
246
  "      total=(ulong)MainLoop32F(iterations,seed_z,seed_w,get_local_id(0));",
247
  "   }",
248
  "   else if (MyType==TFP64) {",
249
  "      total=(ulong)MainLoop64F(iterations,seed_z,seed_w,get_local_id(0));",
250
  "   }",  
251
  "   else if (MyType==TINT32) {",
252
  "      total=(ulong)MainLoop32I(iterations,seed_z,seed_w,get_local_id(0));",
253
  "   }",  
254
  "   else if (MyType==TINT64) {",
255
  "      total=(ulong)MainLoop64I(iterations,seed_z,seed_w,get_local_id(0));",
256
  "   }",  
257
  "   barrier(CLK_LOCAL_MEM_FENCE);",
258
  "   s[get_local_id(0)]=(ulong)total;",
259
  "}",
260
  "__kernel void MainLoopHybrid(__global ulong *s,ulong iterations,uint seed_w,uint seed_z,uint MyType)",
261
  "{",
262
  "   ulong total;",
263
  "   if (MyType==TFP32) {",
264
  "      total=(ulong)MainLoop32F(iterations,seed_z,seed_w,get_global_id(0));",
265
  "   }",
266
  "   else if (MyType==TFP64) {",
267
  "      total=(ulong)MainLoop64F(iterations,seed_z,seed_w,get_global_id(0));",
268
  "   }",  
269
  "   else if (MyType==TINT32) {",
270
  "      total=(ulong)MainLoop32I(iterations,seed_z,seed_w,get_global_id(0));",
271
  "   }",  
272
  "   else if (MyType==TINT64) {",
273
  "      total=(ulong)MainLoop64I(iterations,seed_z,seed_w,get_global_id(0));",
274
  "   }",  
275
  "   barrier(CLK_GLOBAL_MEM_FENCE || CLK_LOCAL_MEM_FENCE);",
276
  "   s[get_global_id(0)]=total;",
277
  "}"
278
};
279

  
280
int main(int argc, char **argv)
281
{
282
  if ((argc==1)||
283
      (strcmp(argv[1],"-h")==0)||
284
      (strcmp(argv[1],"--help")==0))
285
    {
286
      printf("\nPerforms a Pi estimation by Dart Dash:\n\n"
287
	     "\t#1 OpenCL Plateform ID (default 0)\n"
288
	     "\t#2 OpenCL Device ID (default 0)\n"
289
	     "\t#3 Minimal number of iterations (default 1000000)\n"
290
	     "\t#4 Parallel Rate (default 1024)\n"
291
	     "\t#5 Loops (default 1)\n"
292
	     "\t#6 Type of variable: INT32, INT64, FP32, FP64 (default FP32)\n\n");
293
      DetectOpenCLDevices();
294
    }
295
  else
296
    {
297
      
298
      int MyPlatform=atoi(argv[1]);
299
      int MyDevice=atoi(argv[2]);
300

  
301
      struct timeval tv1,tv2;
302
      
303
      uint64_t Iterations=1000000;
304
      if (argc>3) {
305
	Iterations=(uint64_t)atoll(argv[3]);
306
      }
307
      
308
      uint32_t ParallelRate=1024;
309
      if (argc>4) {
310
	ParallelRate=(uint32_t)atoi(argv[4]);
311
      }
312
      
313
      uint32_t Loops=1;
314
      if (argc>5) {
315
	Loops=(uint32_t)atoi(argv[5]);
316
      }
317
      
318
      uint32_t MyType=TFP32;
319
      if (argc>6) {
320
	if (strcmp(argv[6],"INT32")==0) {
321
	  MyType=(uint32_t)TINT32;
322
	}
323
	else if (strcmp(argv[6],"INT64")==0) {
324
	  MyType=(uint32_t)TINT64;
325
	}
326
	else if (strcmp(argv[6],"FP32")==0) {
327
	  MyType=(uint32_t)TFP32;
328
	}
329
	else if (strcmp(argv[6],"FP64")==0) {
330
	  MyType=(uint32_t)TFP64;
331
	}
332
      }
333

  
334
      printf("MyType %d\n",MyType);
335
      
336
      cl_int err;
337
      cl_context_properties props[3] = { CL_CONTEXT_PLATFORM, 0, 0 };
338
      
339
      // Detect, scan, get & initialize platform and device
340
      cl_uint platformCount;
341
      cl_platform_id* platforms;
342
      cl_uint deviceCount;
343
      cl_device_id* devices;      
344
      size_t valueSize;
345
      
346
      /* Setup OpenCL environment. */
347
     
348
      // Get all platforms
349
      err = clGetPlatformIDs(0, NULL, &platformCount);
350
      platforms = (cl_platform_id*) malloc(sizeof(cl_platform_id) * platformCount);
351
      err = clGetPlatformIDs(platformCount, platforms, NULL);
352

  
353
      // Get Device defined
354
      err = clGetDeviceIDs(platforms[MyPlatform], CL_DEVICE_TYPE_ALL, 0, NULL, &deviceCount);
355
      devices = (cl_device_id*) malloc(sizeof(cl_device_id) * deviceCount);
356
      err = clGetDeviceIDs(platforms[MyPlatform], CL_DEVICE_TYPE_ALL, deviceCount, devices, NULL);  
357

  
358
      // print device name
359
      err = clGetDeviceInfo(devices[MyDevice], CL_DEVICE_NAME, 0, NULL, &valueSize);
360
      char* deviceName=(char*)malloc(valueSize);
361
      err = clGetDeviceInfo(devices[MyDevice], CL_DEVICE_NAME, valueSize, deviceName, NULL);
362
      err = clGetDeviceInfo(devices[MyDevice], CL_DEVICE_VENDOR, 0, NULL, &valueSize);
363
      char* vendorName=(char*)malloc(valueSize);      
364
      err = clGetDeviceInfo(devices[MyDevice], CL_DEVICE_VENDOR, valueSize, vendorName, NULL);
365
      printf("\nDevice (%d,%d):\n\t- vendor: %s\n\t- device: %s\n\n",MyPlatform,MyDevice, vendorName,deviceName);
366
      free(deviceName);
367
      free(vendorName);
368
      
369
      props[1] = (cl_context_properties)platforms[MyPlatform];
370
      
371
      cl_context GPUContext = clCreateContext(props, 1, &devices[MyDevice], NULL, NULL, &err);
372
      cl_command_queue cqCommandQueue = clCreateCommandQueue(GPUContext,devices[MyDevice], 0, &err);
373

  
374
      cl_mem GPUInside = clCreateBuffer(GPUContext, CL_MEM_WRITE_ONLY,
375
					      sizeof(uint64_t) * ParallelRate, NULL, NULL);
376
      
377
      // 138 is the number of line for OpenCL code
378
      cl_program OpenCLProgram = clCreateProgramWithSource(GPUContext, 138 ,OpenCLSource,NULL,NULL);
379
      clBuildProgram(OpenCLProgram, 0, NULL, NULL, NULL, NULL);
380
      cl_kernel OpenCLMainLoopGlobal = clCreateKernel(OpenCLProgram, "MainLoopGlobal", NULL);
381
      cl_kernel OpenCLMainLoopLocal = clCreateKernel(OpenCLProgram, "MainLoopLocal", NULL);
382
      cl_kernel OpenCLMainLoopHybrid = clCreateKernel(OpenCLProgram, "MainLoopHybrid", NULL);
383

  
384
      // Divide the total number of iterations by the parallel rate
385
      // Add +1 to the number of per work iterations if division not integer
386
      uint64_t IterationsEach=((Iterations%ParallelRate)==0)?Iterations/ParallelRate:Iterations/ParallelRate+1;
387
      // Initialize seeds for MWC RNG generator from Marsaglia
388
      uint32_t seed_w=110271;
389
      uint32_t seed_z=101008;
390

  
391
      // Set the values of arguments for OpenCL function call
392
      clSetKernelArg(OpenCLMainLoopGlobal, 0, sizeof(cl_mem),&GPUInside);
393
      clSetKernelArg(OpenCLMainLoopGlobal, 1, sizeof(uint64_t),&IterationsEach);
394
      clSetKernelArg(OpenCLMainLoopGlobal, 2, sizeof(uint32_t),&seed_w);
395
      clSetKernelArg(OpenCLMainLoopGlobal, 3, sizeof(uint32_t),&seed_z);
396
      clSetKernelArg(OpenCLMainLoopGlobal, 4, sizeof(uint32_t),&MyType);
397
      
398
      size_t WorkSize[1] = {ParallelRate}; // one dimensional Range
399

  
400
      uint64_t HostInside[ParallelRate];
401

  
402
      for (uint32_t loop=0;loop<Loops;loop++) {
403
	// Set start timer
404
	gettimeofday(&tv1, NULL);
405
	
406
    	// Execute the OpenCL kernel with datas
407
	clEnqueueNDRangeKernel(cqCommandQueue, OpenCLMainLoopGlobal, 1, NULL,
408
			       WorkSize, NULL, 0, NULL, NULL);
409
	// Copy each result for each PR from Device to Host
410
	clEnqueueReadBuffer(cqCommandQueue, GPUInside, CL_TRUE, 0,
411
			    ParallelRate * sizeof(uint64_t), HostInside, 0, NULL, NULL);
412
	uint64_t inside=0;
413

  
414
	for (int i= 0; i < ParallelRate; i++) {
415
	  inside+=HostInside[i];
416
	}
417
	  
418
	// Set stop timer
419
	gettimeofday(&tv2, NULL);
420

  
421
	double elapsed=(double)((tv2.tv_sec-tv1.tv_sec) * 1000000L +
422
				(tv2.tv_usec-tv1.tv_usec))/1000000;  
423

  
424
	double itops=(double)(ParallelRate*IterationsEach)/elapsed;
425
      
426
	printf("Inside/Total %ld %ld\nParallelRate %i\nElapsed Time %.2f\nItops %.0f\nLogItops %.2f\nPi estimation %f\n\n",inside,ParallelRate*IterationsEach,ParallelRate,elapsed,itops,log10(itops),(4.*(float)inside/((float)(ParallelRate)*(float)(IterationsEach))));
427
      }
428
      printf("\n\n");
429
      
430
      clReleaseKernel(OpenCLMainLoopGlobal);
431
      clReleaseProgram(OpenCLProgram);
432
      clReleaseCommandQueue(cqCommandQueue);
433
      clReleaseContext(GPUContext);
434
      clReleaseMemObject(GPUInside);
435

  
436
      
437
      return 0;
438
    }
439
}
440

  
Pi/C/OpenCL/PiOpenCL_SHR3.c (revision 255)
1
// Pi Dart Dash in OpenCL in C, illustrative example
2
//
3
// Emmanuel Quemener <emmanuel.quemener@gmail.com>
4
//
5
// CC BY-NC-SA 2011 : Emmanuel QUEMENER <emmanuel.quemener@gmail.com> 
6
// Copyleft Cecill v2
7
//
8
// -h : print the documentation and detect devices as (platform,device)
9
//
10
// classical use:
11
// #1 OpenCL Plateform ID: get this information with -h option
12
// #2 OpenCL Device ID: get this information with -h option
13
// #3 Minimal number of iterations: 
14
// #4 Parallel Rate: scattering global work in parts executed //ly
15
// #5 Loops (to improve statistics)
16
// #6 Type of variables INT32, INT64, FP32, FP64
17
// 
18
// To compile : gcc -o PiOpenCL_SHR3 PiOpenCL_SHR3.c -lOpenCL -lm
19

  
20
#define CL_TARGET_OPENCL_VERSION 220
21
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
22
#include <stdio.h>
23
#include <stdlib.h>
24
#include <string.h>
25
#include <CL/cl.h>
26
#include <stdint.h>
27
#include <math.h>
28
#include <sys/time.h>
29

  
30
#define TINT32 0
31
#define TINT64 1
32
#define TFP32 2
33
#define TFP64 3
34

  
35
int DetectOpenCLDevices(void) 
36
{
37
  int i, j;
38
  char* value;
39
  size_t valueSize;
40
  cl_uint platformCount;
41
  cl_platform_id* platforms;
42
  cl_uint deviceCount;
43
  cl_device_id* devices;
44
  cl_uint maxComputeUnits;
45
  cl_int maxWorkGroupSize;
46
  cl_int maxWorkItemSizes;
47
  cl_device_type dev_type;
48

  
49
  // get all platforms
50
  clGetPlatformIDs(0, NULL, &platformCount);
51
  platforms = (cl_platform_id*) malloc(sizeof(cl_platform_id) * platformCount);
52
  clGetPlatformIDs(platformCount, platforms, NULL);
53

  
54
  printf("OpenCL statistics: %d platform(s) detected\n\n",platformCount);
55

  
56
  for (i = 0; i < platformCount; i++) {
57

  
58
    // get all devices
59
    clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, 0, NULL, &deviceCount);
60
    devices = (cl_device_id*) malloc(sizeof(cl_device_id) * deviceCount);
61
    clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, deviceCount, devices, NULL);
62

  
63
    // for each device print critical attributes
64
    for (j = 0; j < deviceCount; j++) {
65
      
66
      // print device name
67
      clGetDeviceInfo(devices[j], CL_DEVICE_NAME, 0, NULL, &valueSize);
68
      value = (char*) malloc(valueSize);
69
      clGetDeviceInfo(devices[j], CL_DEVICE_NAME, valueSize, value, NULL);
70
      printf("Device (%d,%d): %s\n",i, j, value);
71
      free(value);
72

  
73
      // print type device CPU/GPU/ACCELERATOR
74
      clGetDeviceInfo(devices[j], CL_DEVICE_TYPE, sizeof(dev_type), &dev_type, NULL);
75
      printf("\tDevice Type: ");
76
      if(dev_type & CL_DEVICE_TYPE_GPU)
77
	printf("CL_DEVICE_TYPE_GPU ");
78
      if(dev_type & CL_DEVICE_TYPE_CPU)
79
	printf("CL_DEVICE_TYPE_CPU ");
80
      if(dev_type & CL_DEVICE_TYPE_ACCELERATOR)
81
	printf("CL_DEVICE_TYPE_ACCELERATOR ");
82
      if(dev_type & CL_DEVICE_TYPE_DEFAULT)
83
	printf("CL_DEVICE_TYPE_DEFAULT ");
84
      printf("\n");
85

  
86
      // print device vendor
87
      clGetDeviceInfo(devices[j], CL_DEVICE_VENDOR, 0, NULL, &valueSize);
88
      value = (char*) malloc(valueSize);
89
      clGetDeviceInfo(devices[j], CL_DEVICE_VENDOR, valueSize, value, NULL);
90
      printf("\tDevice vendor: %s\n", value);
91
      free(value);
92

  
93
      // print hardware device version
94
      clGetDeviceInfo(devices[j], CL_DEVICE_VERSION, 0, NULL, &valueSize);
95
      value = (char*) malloc(valueSize);
96
      clGetDeviceInfo(devices[j], CL_DEVICE_VERSION, valueSize, value, NULL);
97
      printf("\tHardware version: %s\n", value);
98
      free(value);
99

  
100
      // print software driver version
101
      clGetDeviceInfo(devices[j], CL_DRIVER_VERSION, 0, NULL, &valueSize);
102
      value = (char*) malloc(valueSize);
103
      clGetDeviceInfo(devices[j], CL_DRIVER_VERSION, valueSize, value, NULL);
104
      printf("\tSoftware version: %s\n", value);
105
      free(value);
106
      
107
      // print c version supported by compiler for device
108
      clGetDeviceInfo(devices[j], CL_DEVICE_OPENCL_C_VERSION, 0, NULL, &valueSize);
109
      value = (char*) malloc(valueSize);
110
      clGetDeviceInfo(devices[j], CL_DEVICE_OPENCL_C_VERSION, valueSize, value, NULL);
111
      printf("\tOpenCL C version: %s\n", value);
112
      free(value);
113

  
114
      // print parallel compute units
115
      clGetDeviceInfo(devices[j], CL_DEVICE_MAX_COMPUTE_UNITS,
116
		      sizeof(maxComputeUnits), &maxComputeUnits, NULL);
117
      printf("\tParallel compute units: %d\n", maxComputeUnits);
118
      
119
      // print max work group size
120
      clGetDeviceInfo(devices[j], CL_DEVICE_MAX_WORK_GROUP_SIZE,
121
		      sizeof(maxWorkGroupSize), &maxWorkGroupSize, NULL);
122
      printf("\tMaximum Work Group Size: %d\n", maxWorkGroupSize);
123
      
124
      // print max work items size
125
      clGetDeviceInfo(devices[j], CL_DEVICE_MAX_WORK_ITEM_SIZES,
126
		      sizeof(maxWorkItemSizes), &maxWorkItemSizes, NULL);
127
      printf("\tMaximum Work Item Sizes: %d\n", maxWorkItemSizes);
128
      
129
    }
130
    printf("\n");
131
    free(devices);
132
  }
133

  
134
  free(platforms);
135
  return 0;
136

  
137
}
138

  
139
const char* OpenCLSource[] = {
140
  "#pragma OPENCL EXTENSION cl_khr_fp64: enable \n",
141
  "// Marsaglia RNG very simple implementation \n",
142
  "#define znew  ((z=36969*(z&65535)+(z>>16))<<16) \n",
143
  "#define wnew  ((w=18000*(w&65535)+(w>>16))&65535) \n",
144
  "#define MWC   (znew+wnew) \n",
145
  "#define SHR3  (jsr=(jsr=(jsr=jsr^(jsr<<17))^(jsr>>13))^(jsr<<5)) \n",
146
  "#define CONG  (jcong=69069*jcong+1234567) \n",
147
  "#define KISS  ((MWC^CONG)+SHR3) \n",
148
  "#define MWCfp MWC * 2.328306435454494e-10f \n",
149
  "#define KISSfp KISS * 2.328306435454494e-10f \n",
150
  "#define CONGfp CONG * 2.328306435454494e-10f \n",
151
  "#define SHR3fp SHR3 * 2.328306435454494e-10f \n",
152
  "#define TINT32 0 \n",
153
  "#define TINT64 1 \n",
154
  "#define TFP32 2 \n",
155
  "#define TFP64 3 \n",
156
  "#define THEONE32I 1073741824 \n",
157
  "#define THEONE32F 1.e0f \n",
158
  "#define THEONE64I 4611686018427387904 \n",
... Ce différentiel a été tronqué car il excède la taille maximale pouvant être affichée.

Formats disponibles : Unified diff