Statistiques
| Révision :

root / Ising / GPU / Ising2D-GPU.py-130527 @ 93

Historique | Voir | Annoter | Télécharger (16,78 ko)

1
#!/usr/bin/env python
2
#
3
# Ising2D model in serial mode
4
#
5
# CC BY-NC-SA 2011 : <emmanuel.quemener@ens-lyon.fr> 
6

    
7
import sys
8
import numpy
9
import math
10
from PIL import Image
11
from math import exp
12
from random import random
13
import time
14
import getopt
15
import matplotlib.pyplot as plt
16
from numpy.random import randint as nprnd
17

    
18
KERNEL_CODE_OPENCL="""
19

    
20
// Marsaglia RNG very simple implementation
21
#define znew  ((z=36969*(z&65535)+(z>>16))<<16)
22
#define wnew  ((w=18000*(w&65535)+(w>>16))&65535)
23
#define MWC   (znew+wnew)
24
#define SHR3  (jsr=(jsr=(jsr=jsr^(jsr<<17))^(jsr>>13))^(jsr<<5))
25
#define CONG  (jcong=69069*jcong+1234567)
26
#define KISS  ((MWC^CONG)+SHR3)
27

    
28
#define MWCfp MWC * 2.328306435454494e-10f
29
#define KISSfp KISS * 2.328306435454494e-10f
30

    
31
__kernel void MainLoopOne(__global char *s,float T,float J,
32
                          uint sizex,uint sizey,
33
                          uint iterations,uint seed_w,uint seed_z)
34

    
35
{
36
   uint z=seed_z;
37
   uint w=seed_w;
38

    
39
   for (uint i=0;i<iterations;i++) {
40

    
41
      uint x=(uint)(MWC%sizex) ;
42
      uint y=(uint)(MWC%sizey) ;
43

    
44
      int p=s[x+sizex*y];
45

    
46
      int d=s[x+sizex*((y+1)%sizey)];
47
      int u=s[x+sizex*((y-1)%sizey)];
48
      int l=s[((x-1)%sizex)+sizex*y];
49
      int r=s[((x+1)%sizex)+sizex*y];
50

    
51
      float DeltaE=2.0f*J*p*(u+d+l+r);
52

    
53
      int factor=((DeltaE < 0.0f) || (MWCfp < exp(-DeltaE/T))) ? -1:1;
54
      s[x%sizex+sizex*(y%sizey)] = (char)factor*p;
55
   }
56
   barrier(CLK_GLOBAL_MEM_FENCE);      
57
}
58

    
59
__kernel void MainLoopGlobal(__global char *s,__global float *T,float J,
60
                             uint sizex,uint sizey,
61
                             uint iterations,uint seed_w,uint seed_z)
62

    
63
{
64
   uint z=seed_z/(get_global_id(0)+1);
65
   uint w=seed_w/(get_global_id(0)+1);
66
   float t;
67
   uint ind=get_global_id(0);
68

    
69
   t=T[get_global_id(0)];
70

    
71
   for (uint i=0;i<iterations;i++) {
72

    
73
      uint x=(uint)(MWC%sizex) ;
74
      uint y=(uint)(MWC%sizey) ;
75

    
76
      int p=s[x+sizex*(y+sizey*ind)];
77

    
78
      int d=s[x+sizex*((y+1)%sizey+sizey*ind)];
79
      int u=s[x+sizex*((y-1)%sizey+sizey*ind)];
80
      int l=s[((x-1)%sizex)+sizex*(y+sizey*ind)];
81
      int r=s[((x+1)%sizex)+sizex*(y+sizey*ind)];
82

    
83
      float DeltaE=2.0f*J*p*(u+d+l+r);
84

    
85
      int factor=((DeltaE < 0.0f) || (MWCfp < exp(-DeltaE/t))) ? -1:1;
86
      s[x%sizex+sizex*(y%sizey+sizey*ind)] = (char)factor*p;
87
      
88
   }
89

    
90
   barrier(CLK_GLOBAL_MEM_FENCE);
91
      
92
}
93

    
94
__kernel void MainLoopLocal(__global char *s,__global float *T,float J,
95
                            uint sizex,uint sizey,
96
                            uint iterations,uint seed_w,uint seed_z)
97
{
98
   uint z=seed_z/(get_local_id(0)+1);
99
   uint w=seed_w/(get_local_id(0)+1);
100
   //float t=T[get_local_id(0)+get_global_id(0)*get_local_size(0)];
101
   //uint ind=get_local_id(0)+get_global_id(0)*get_local_size(0);
102
   float t=T[get_local_id(0)];
103
   uint ind=get_local_id(0);
104

    
105
   for (uint i=0;i<iterations;i++) {
106

    
107
      uint x=(uint)(MWC%sizex) ;
108
      uint y=(uint)(MWC%sizey) ;
109

    
110
      int p=s[x+sizex*(y+sizey*ind)];
111

    
112
      int d=s[x+sizex*((y+1)%sizey+sizey*ind)];
113
      int u=s[x+sizex*((y-1)%sizey+sizey*ind)];
114
      int l=s[((x-1)%sizex)+sizex*(y+sizey*ind)];
115
      int r=s[((x+1)%sizex)+sizex*(y+sizey*ind)];
116

    
117
      float DeltaE=2.0f*J*p*(u+d+l+r);
118

    
119
      int factor=((DeltaE < 0.0f) || (MWCfp < exp(-DeltaE/t))) ? -1:1;
120
      s[x%sizex+sizex*(y%sizey+sizey*ind)] = (char)factor*p;
121
   }
122

    
123
   barrier(CLK_LOCAL_MEM_FENCE);
124
   barrier(CLK_GLOBAL_MEM_FENCE);
125
      
126
}
127
"""
128

    
129
def ImageOutput(sigma,prefix):
130
    Max=sigma.max()
131
    Min=sigma.min()
132
    
133
    # Normalize value as 8bits Integer
134
    SigmaInt=(255*(sigma-Min)/(Max-Min)).astype('uint8')
135
    image = Image.fromarray(SigmaInt)
136
    image.save("%s.jpg" % prefix)
137
    
138
def Metropolis(sigma,J,B,T,iterations): 
139
    start=time.time()
140

    
141
    SizeX,SizeY=sigma.shape
142
    
143
    for p in xrange(0,iterations):
144
        # Random access coordonate
145
        X,Y=numpy.random.randint(SizeX),numpy.random.randint(SizeY)
146
        
147
        DeltaE=J*sigma[X,Y]*(2*(sigma[X,(Y+1)%SizeY]+
148
                                sigma[X,(Y-1)%SizeY]+
149
                                sigma[(X-1)%SizeX,Y]+
150
                                sigma[(X+1)%SizeX,Y])+B)
151
        
152
        if DeltaE < 0. or random() < exp(-DeltaE/T):
153
            sigma[X,Y]=-sigma[X,Y]
154
    duration=time.time()-start
155
    return(duration)
156

    
157
def MetropolisOpenCL(sigma,J,B,T,iterations,jobs,ParaStyle,Alu,Device):
158

    
159
    # Initialisation des variables en les CASTant correctement
160
    
161
    # Je detecte un peripherique GPU dans la liste des peripheriques
162
    # for platform in cl.get_platforms():
163
    #     for device in platform.get_devices():
164
    #             if cl.device_type.to_string(device.type)=='GPU':
165
    #                     GPU=device
166
    #print "GPU detected: ",device.name
167
    
168
    HasGPU=False
169
    Id=1
170
    # Device selection based on choice (default is GPU)
171
    for platform in cl.get_platforms():
172
        for device in platform.get_devices():
173
            if not HasGPU:
174
                deviceType=cl.device_type.to_string(device.type)
175
                if deviceType=="GPU" and Alu=="GPU" and Id==Device:
176
                    GPU=device
177
                    print "GPU selected: ",device.name
178
                    HasGPU=True
179
                if deviceType=="CPU" and Alu=="CPU":        
180
                    GPU=device
181
                    print "CPU selected: ",device.name
182
                    HasGPU=True
183
            Id=Id+1
184
                                
185
    # Je cree le contexte et la queue pour son execution
186
    # ctx = cl.create_some_context()
187
    ctx = cl.Context([GPU])
188
    queue = cl.CommandQueue(ctx,
189
                            properties=cl.command_queue_properties.PROFILING_ENABLE)
190
    
191
    # Je recupere les flag possibles pour les buffers
192
    mf = cl.mem_flags
193

    
194
    print sigma,sigma.shape
195
        
196
    # Attention au CAST ! C'est un int8 soit un char en OpenCL !
197
    sigmaCL = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=sigma)
198
   
199
    MetropolisCL = cl.Program(ctx,KERNEL_CODE_OPENCL).build( \
200
        options = "-cl-mad-enable -cl-fast-relaxed-math")
201

    
202
    SizeX,SizeY=sigma.shape
203

    
204
    if ParaStyle=='Blocks':
205
        # Call OpenCL kernel
206
        # (1,) is Global work size (only 1 work size)
207
        # (1,) is local work size
208
        CLLaunch=MetropolisCL.MainLoopOne(queue,(jobs,),None,
209
                                          sigmaCL,
210
                                          numpy.float32(T), 
211
                                          numpy.float32(J),
212
                                          numpy.uint32(SizeX),
213
                                          numpy.uint32(SizeY),
214
                                          numpy.uint32(iterations),
215
                                          numpy.uint32(nprnd(2**31-1)),
216
                                          numpy.uint32(nprnd(2**31-1)))
217
        print "%s with %i %s done" % (Alu,jobs,ParaStyle)
218
    else:
219
        # en OpenCL, necessaire de mettre un Global_id identique au local_id
220
        CLLaunch=MetropolisCL.MainLoopOne(queue,(jobs,),(jobs,),
221
                                          sigmaCL,
222
                                          numpy.float32(T), 
223
                                          numpy.float32(J),
224
                                          numpy.uint32(SizeX),
225
                                          numpy.uint32(SizeY),
226
                                          numpy.uint32(iterations),
227
                                          numpy.uint32(nprnd(2**31-1)),
228
                                          numpy.uint32(nprnd(2**31-1)))
229
        print "%s with %i %s done" % (Alu,jobs,ParaStyle)
230
        
231
    CLLaunch.wait()
232
    cl.enqueue_copy(queue, sigma, sigmaCL).wait()
233
    elapsed = 1e-9*(CLLaunch.profile.end - CLLaunch.profile.start)
234
    sigmaCL.release()
235
        
236
    return(elapsed)
237

    
238
def MetropolisAllOpenCL(sigmaDict,J,B,TList,iterations,jobs,ParaStyle,Alu,Device):
239

    
240
    # sigmaDict & Tlist are NOT respectively array & float
241
    # sigmaDict : dict of array for each temperatoire
242
    # TList : list of temperatures
243
          
244
    # Initialisation des variables en les CASTant correctement
245
    
246
    # Je detecte un peripherique GPU dans la liste des peripheriques
247
    # for platform in cl.get_platforms():
248
    #     for device in platform.get_devices():
249
    #             if cl.device_type.to_string(device.type)=='GPU':
250
    #                     GPU=device
251
    #print "GPU detected: ",device.name
252
    
253
    HasGPU=False
254
    Id=1
255
    # Device selection based on choice (default is GPU)
256
    for platform in cl.get_platforms():
257
        for device in platform.get_devices():
258
            if not HasGPU:
259
                deviceType=cl.device_type.to_string(device.type)
260
                if deviceType=="GPU" and Alu=="GPU" and Id==Device:
261
                    GPU=device
262
                    print "GPU selected: ",device.name
263
                    HasGPU=True
264
                if deviceType=="CPU" and Alu=="CPU":        
265
                    GPU=device
266
                    print "CPU selected: ",device.name
267
                    HasGPU=True
268
            Id=Id+1
269
                                
270
    # Je cree le contexte et la queue pour son execution
271
    # ctx = cl.create_some_context()
272
    ctx = cl.Context([GPU])
273
    queue = cl.CommandQueue(ctx,
274
                            properties=cl.command_queue_properties.PROFILING_ENABLE)
275
    
276
    # Je recupere les flag possibles pour les buffers
277
    mf = cl.mem_flags
278

    
279
    # Concatenate all sigma in single array
280
    sigma=numpy.copy(sigmaDict[TList[0]])
281
    for T in TList[1:]:
282
        sigma=numpy.concatenate((sigma,sigmaDict[T]),axis=1)
283

    
284
    print sigma,sigma.shape
285
        
286
    sigmaCL = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=sigma)
287
    TCL = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=TList)
288
   
289
    MetropolisCL = cl.Program(ctx,KERNEL_CODE_OPENCL).build( \
290
        options = "-cl-mad-enable -cl-fast-relaxed-math")
291

    
292
    SizeX,SizeY=sigmaDict[TList[0]].shape
293

    
294
    if ParaStyle=='Blocks':
295
        # Call OpenCL kernel
296
        # (1,) is Global work size (only 1 work size)
297
        # (1,) is local work size
298
        # SeedZCL is lattice translated in CL format
299
        # SeedWCL is lattice translated in CL format
300
        # step is number of iterations
301
        CLLaunch=MetropolisCL.MainLoopGlobal(queue,(jobs,),None,
302
                                             sigmaCL,
303
                                             TCL, 
304
                                             numpy.float32(J),
305
                                             numpy.uint32(SizeX),
306
                                             numpy.uint32(SizeY),
307
                                             numpy.uint32(iterations),
308
                                             numpy.uint32(nprnd(2**31-1)),
309
                                             numpy.uint32(nprnd(2**31-1)))
310
        print "%s with %i %s done" % (Alu,jobs,ParaStyle)
311
    else:
312
        blocks=int(math.sqrt(jobs))
313
        # en OpenCL, necessaire de mettre un Global_id identique au local_id
314
        CLLaunch=MetropolisCL.MainLoopLocal(queue,(jobs,),(2,),
315
                                            sigmaCL,
316
                                            TCL,
317
                                            numpy.float32(J),
318
                                            numpy.uint32(SizeX),
319
                                            numpy.uint32(SizeY),
320
                                            numpy.uint32(iterations),
321
                                            numpy.uint32(nprnd(2**31-1)),
322
                                            numpy.uint32(nprnd(2**31-1)))
323
        print "%s with %i %s done" % (Alu,jobs,ParaStyle)
324
        
325
    CLLaunch.wait()
326
    cl.enqueue_copy(queue, sigma, sigmaCL).wait()
327
    elapsed = 1e-9*(CLLaunch.profile.end - CLLaunch.profile.start)
328
    sigmaCL.release()
329

    
330
    results=numpy.split(sigma,len(TList),axis=1)
331
    for T in TList:
332
        sigmaDict[T]=numpy.copy(results[numpy.nonzero(TList == T)[0][0]])
333

    
334
    return(elapsed)
335

    
336

    
337
def Magnetization(sigma,M):
338
    return(numpy.sum(sigma)/(sigma.shape[0]*sigma.shape[1]*1.0))
339

    
340
def Energy(sigma,J):
341
    # Copier et caster 
342
    E=numpy.copy(sigma).astype(numpy.float32)
343
    
344
    # Appel par slice
345
    E[1:-1,1:-1]=-J*E[1:-1,1:-1]*(E[:-2,1:-1]+E[2:,1:-1]+
346
                                  E[1:-1,:-2]+E[1:-1,2:])
347
    
348
    # Bien nettoyer la peripherie
349
    E[:,0]=0
350
    E[:,-1]=0
351
    E[0,:]=0
352
    E[-1,:]=0
353
    
354
    Energy=numpy.sum(E)
355

    
356
    return(Energy/(E.shape[0]*E.shape[1]*1.0))
357

    
358
def DisplayCurves(T,E,M,J,B):
359

    
360
    plt.xlabel("Temperature")
361
    plt.ylabel("Energy")
362

    
363
    Experience,=plt.plot(T,E,label="Energy") 
364

    
365
    plt.legend()
366
    plt.show()
367

    
368

    
369
if __name__=='__main__':
370

    
371
    # Set defaults values
372
    # Alu can be CPU or GPU
373
    Alu='CPU'
374
    # Id of GPU
375
    Device=1
376
    # GPU style can be Cuda (Nvidia implementation) or OpenCL
377
    GpuStyle='OpenCL'
378
    # Parallel distribution can be on Threads or Blocks
379
    ParaStyle='Blocks'
380
    # Coupling factor
381
    J=1.
382
    # Magnetic Field
383
    B=0.
384
    # Size of Lattice
385
    Size=256
386
    # Default Temperatures (start, end, step)
387
    Tmin=0.1
388
    Tmax=5
389
    Tstep=0.1
390
    # Default Number of Iterations
391
    Iterations=Size*Size
392
    # Curves is True to print the curves
393
    Curves=False
394

    
395
    try:
396
        opts, args = getopt.getopt(sys.argv[1:],"hcj:b:z:i:s:e:p:a:d:g:t:",["coupling=","magneticfield=","size=","iterations=","tempstart=","tempend=","tempstep=","alu=","gpustyle=","parastyle="])
397
    except getopt.GetoptError:
398
        print '%s -j <Coupling Factor> -b <Magnetic Field> -z <Size of Lattice> -i <Iterations> -s <Minimum Temperature> -e <Maximum Temperature> -p <steP Temperature> -c (Print Curves) -a <CPU/GPU> -d <DeviceId> -g <CUDA/OpenCL> -p <Threads/Blocks> -t <ParaStyle>' % sys.argv[0]
399
        sys.exit(2)
400
    
401
 
402
    for opt, arg in opts:
403
        if opt == '-h':
404
            print '%s -j <Coupling Factor> -b <Magnetic Field> -z <Size of Lattice> -i <Iterations> -s <Minimum Temperature> -e <Maximum Temperature> -p <steP Temperature> -c (Print Curves) -a <CPU/GPU> -d <DeviceId> -g <CUDA/OpenCL> -p <Threads/Blocks> -t <ParaStyle>' % sys.argv[0]
405
            sys.exit()
406
        elif opt == '-c':
407
            Curves=True
408
        elif opt in ("-j", "--coupling"):
409
            J = float(arg)
410
        elif opt in ("-b", "--magneticfield"):
411
            B = float(arg)
412
        elif opt in ("-s", "--tempmin"):
413
            Tmin = float(arg)
414
        elif opt in ("-e", "--tempmax"):
415
            Tmax = float(arg)
416
        elif opt in ("-p", "--tempstep"):
417
            Tstep = float(arg)
418
        elif opt in ("-i", "--iterations"):
419
            Iterations = int(arg)
420
        elif opt in ("-z", "--size"):
421
            Size = int(arg)
422
        elif opt in ("-a", "--alu"):
423
            Alu = arg
424
        elif opt in ("-d", "--device"):
425
            Device = int(arg)
426
        elif opt in ("-g", "--gpustyle"):
427
            GpuStyle = arg
428
        elif opt in ("-t", "--parastyle"):
429
            ParaStyle = arg
430

    
431
    if Alu=='CPU' and GpuStyle=='CUDA':
432
        print "Alu can't be CPU for CUDA, set Alu to GPU"
433
        Alu='GPU'
434

    
435
    if ParaStyle not in ('Blocks','Threads','Hybrid'):
436
        print "%s not exists, ParaStyle set as Threads !" % ParaStyle
437
        ParaStyle='Blocks'
438
   
439
    print "Compute unit : %s" % Alu
440
    print "Device Identification : %s" % Device
441
    print "GpuStyle used : %s" % GpuStyle
442
    print "Parallel Style used : %s" % ParaStyle
443
    print "Coupling Factor : %s" % J
444
    print "Magnetic Field :  %s" % B
445
    print "Size of lattice : %s" % Size
446
    print "Iterations : %s" % Iterations
447
    print "Temperature on start : %s" % Tmin
448
    print "Temperature on end : %s" % Tmax
449
    print "Temperature step : %s" % Tstep
450

    
451
    if GpuStyle=='CUDA':
452
        # For PyCUDA import
453
        import pycuda.driver as cuda
454
        import pycuda.gpuarray as gpuarray
455
        import pycuda.autoinit
456
        from pycuda.compiler import SourceModule
457

    
458
    if GpuStyle=='OpenCL':
459
        # For PyOpenCL import
460
        import pyopencl as cl
461
        Id=1
462
        for platform in cl.get_platforms():
463
            for device in platform.get_devices():
464
                deviceType=cl.device_type.to_string(device.type)
465
                print "Device #%i of type %s : %s" % (Id,deviceType,device.name)
466
                Id=Id+1
467

    
468
    LAPIMAGE=False
469

    
470
    sigmaIn=numpy.where(numpy.random.randn(Size,Size)>0,1,-1).astype(numpy.int8)
471

    
472
    ImageOutput(sigmaIn,"Ising2D_Serial_%i_Initial" % (Size))
473

    
474
    # La temperature est passee comme parametre, attention au CAST !
475
    Trange=numpy.arange(Tmin,Tmax+Tstep,Tstep).astype(numpy.float32)
476

    
477
    E=[]
478
    M=[]
479

    
480
    print Trange,Trange.shape
481

    
482
    sigma={}
483
    for T in Trange:
484
        sigma[T]=numpy.copy(sigmaIn)
485

    
486
    # For GPU, all process are launched
487
    #MetropolisAllOpenCL(sigma,J,B,Trange,Iterations,len(Trange),
488
    #                    ParaStyle,Alu,Device)
489
    MetropolisAllOpenCL(sigma,J,B,Trange,Iterations,len(Trange),
490
                        ParaStyle,Alu,Device)
491
    
492
    for T in Trange:        
493
        ImageOutput(sigma[T],"Ising2D_Serial_%i_%1.1f_Final" % (Size,T))
494

    
495
    # if Curves:
496
    #     DisplayCurves(Trange,E,M,J,B)
497

    
498
    # # Save output
499
    # numpy.savez("Ising2D_Serial_%i_%.8i" % (Size,Iterations),(Trange,E,M))
500