Statistiques
| Révision :

root / Ising / GPU / Ising2D-GPU-OddEven.py @ 145

Historique | Voir | Annoter | Télécharger (11,99 ko)

1
#!/usr/bin/env python
2
#
3
# Ising2D model using PyOpenCL 
4
#
5
# CC BY-NC-SA 2011 : <emmanuel.quemener@ens-lyon.fr> 
6
#
7
# Thanks to Andreas Klockner for PyOpenCL:
8
# http://mathema.tician.de/software/pyopencl
9
# 
10
# Interesting links:
11
# http://viennacl.sourceforge.net/viennacl-documentation.html
12
# http://enja.org/2011/02/22/adventures-in-pyopencl-part-1-getting-started-with-python/
13

    
14
import pyopencl as cl
15
import numpy
16
from PIL import Image
17
import time,string
18
from numpy.random import randint as nprnd
19
import sys
20
import getopt
21
import matplotlib.pyplot as plt
22

    
23
# Size of micro blocks
24
BSZ=16
25

    
26
# 2097152 on HD5850 (with 1GB of RAM)
27
#  262144 on GT218
28
#STEP=262144
29
#STEP=1048576
30
#STEP=2097152
31
#STEP=4194304
32
#STEP=8388608
33
STEP=16777216
34
#STEP=268435456
35

    
36
# Flag to define LAPIMAGE between iteration on OpenCL kernel calls
37
#LAPIMAGE=True
38
LAPIMAGE=False
39

    
40
# Version 2 of kernel : much optimize one
41
# a string template is used to replace BSZ (named $block_size) by its value 
42
KERNEL_CODE_ORIG=string.Template("""
43
#define BSZ $block_size
44

45
/* Marsaglia RNG very simple implementation */
46
#define znew (z=36969*(z&65535)+(z>>16))
47
#define wnew (w=18000*(w&65535)+(w>>16))
48
#define MWC ((znew<<16)+wnew )
49
#define MWCfp (float)(MWC * 2.328306435454494e-10f)
50

51
__kernel void MainLoop(__global int *s,float J,float B,float T,uint size,
52
                       uint iterations,uint seed_w,uint seed_z)
53
{
54
   uint base_idx=(uint)(BSZ*get_global_id(0));
55
   uint base_idy=(uint)(BSZ*get_global_id(1));
56
   uint base_id=base_idx+base_idy*size;
57

58
   uint z=seed_z+(uint)get_global_id(0);
59
   uint w=seed_w+(uint)get_global_id(1);
60

61
   for (uint i=0;i<iterations;i++)
62
   {
63
      uint x=(uint)(MWC%BSZ);
64
      uint y=(uint)(MWC%BSZ);
65

66
      int p=s[base_id+x+size*y];
67

68
      int u=s[((base_idx+x)%size)+size*((base_idy+y-1)%size)];
69
      int d=s[((base_idx+x)%size)+size*((base_idy+y+1)%size)];
70
      int l=s[((base_idx+x-1)%size)+size*((base_idy+y)%size)];
71
      int r=s[((base_idx+x+1)%size)+size*((base_idy+y)%size)];
72

73
      float DeltaE=p*(2.0f*J*(float)(u+d+l+r)+B);
74

75
      float factor= ((DeltaE < 0.0f) || (MWCfp < exp(-DeltaE/T))) ? -1:1;
76

77
      s[base_id+x+size*y]= factor*p; 
78
   }
79
 
80
}
81
""")
82

    
83
# Version 2 of kernel : much optimize one
84
# a string template is used to replace BSZ (named $block_size) by its value 
85
KERNEL_CODE=string.Template("""
86
#define BSZ $block_size
87

88
/* Marsaglia RNG very simple implementation */
89
#define znew (z=36969*(z&65535)+(z>>16))
90
#define wnew (w=18000*(w&65535)+(w>>16))
91
#define MWC ((znew<<16)+wnew )
92
#define MWCfp (float)(MWC * 2.328306435454494e-10f)
93

94
__kernel void MainLoop(__global int *s,float J,float B,float T,uint size,
95
                       uint iterations,uint seed_w,uint seed_z)
96
{
97
   private uint z=seed_z+(uint)get_global_id(0);
98
   private uint w=seed_w+(uint)get_global_id(1);
99

100
   private uint x,y,base,size2;
101
   private int p,u,d,l,r,factor;
102
   private float DeltaE;
103

104
   for (uint i=0;i<iterations/2;i++)
105
   {
106
      // Odd pixel
107
      base=2*(uint)get_global_id(0);
108

109
      x=base%size;
110
      y=base/size;
111

112
      p=s[base];
113

114
      u= (y==     0) ? s[x+size*(size-1)]:s[base-size];
115
      d= (y==size-1) ? s[x]:s[base+size];
116
      l= (x==     0) ? s[y*size+size-1]:s[base-1];
117
      r= (x==size-1) ? s[y*size]:s[base+1];
118

119
      DeltaE=p*(2e0f*J*(float)(u+d+l+r)+B);
120

121
      factor= ((DeltaE < 0e0f) || (MWCfp < exp(-DeltaE/T))) ? -1:1;
122

123
      s[base]= factor*p; 
124
      barrier(CLK_GLOBAL_MEM_FENCE);
125

126
      // Even pixel
127

128
      base=2*(uint)get_global_id(0)+1;
129

130
      x=base%size;
131
      y=base/size;
132

133
      p=s[base];
134
      u= (y==     0) ? s[x+size*(size-1)]:s[base-size];
135
      d= (y==size-1) ? s[x]:s[base+size];
136
      l= (x==     0) ? s[y*size+size-1]:s[base-1];
137
      r= (x==size-1) ? s[y*size]:s[base+1];
138

139
      DeltaE=p*(2e0f*J*(float)(u+d+l+r)+B);
140

141
      factor= ((DeltaE < 0e0f) || (MWCfp < exp(-DeltaE/T))) ? -1:1;
142

143
      s[base]= factor*p; 
144
      barrier(CLK_GLOBAL_MEM_FENCE);
145
   }
146
 
147
}
148
""")
149

    
150
def ImageOutput(sigma,prefix):
151
    Max=sigma.max()
152
    Min=sigma.min()
153
    
154
    # Normalize value as 8bits Integer
155
    SigmaInt=(255*(sigma-Min)/(Max-Min)).astype('uint8')
156
    image = Image.fromarray(SigmaInt)
157
    image.save("%s.jpg" % prefix)
158
    
159
def CheckLattice(sigma):
160

    
161
    over=sigma[sigma>1]
162
    under=sigma[sigma<-1]
163
    
164
    if  (over.size+under.size) > 0:
165
        print "Problem on Lattice on %i spin(s)." % (over.size+under.size)
166
    else:
167
        print "No problem on Lattice"
168

    
169
def Metropolis(sigma,J,B,T,iterations,Device,Divider):
170

    
171
    kernel_params = {'block_size':sigma.shape[0]/Divider}
172
    
173
        # Je detecte un peripherique GPU dans la liste des peripheriques
174
    Id=1
175
    HasXPU=False
176
    for platform in cl.get_platforms():
177
        for device in platform.get_devices():
178
            if Id==Device:
179
                XPU=device
180
                print "CPU/GPU selected: ",device.name.lstrip()
181
                HasXPU=True
182
            Id+=1
183

    
184
    if HasXPU==False:
185
        print "No XPU #%i found in all of %i devices, sorry..." % (Device,Id-1)
186
        sys.exit()                
187
        
188
    ctx = cl.Context([XPU])
189
    queue = cl.CommandQueue(ctx,
190
                            properties=cl.command_queue_properties.PROFILING_ENABLE)
191

    
192
    # Je recupere les flag possibles pour les buffers
193
    mf = cl.mem_flags
194
    
195
    sigmaCL = cl.Buffer(ctx, mf.WRITE_ONLY | mf.COPY_HOST_PTR, hostbuf=sigma)
196
    #sigmaCL = cl.Buffer(ctx, mf.READ_WRITE, sigma.nbytes)
197
    # Program based on Kernel2
198
    MetropolisCL = cl.Program(ctx,KERNEL_CODE.substitute(kernel_params)).build()
199

    
200
    divide=Divider*Divider
201
    step=STEP/divide
202
    i=0
203
    duration=0.
204
    while (step*i < iterations/divide):
205
    
206
        # Call OpenCL kernel
207
        # sigmaCL is lattice translated in CL format
208
        # step is number of iterations
209
        
210
        start_time=time.time() 
211

    
212
        CLLaunch=MetropolisCL.MainLoop(queue,
213
                                       (numpy.int32(sigma.shape[0]*sigma.shape[1]/2),1),None,
214
                                       sigmaCL,
215
                                       numpy.float32(J),numpy.float32(B),
216
                                       numpy.float32(T),
217
                                       numpy.uint32(sigma.shape[0]),
218
                                       numpy.uint32(step),
219
                                       numpy.uint32(2008),
220
                                       numpy.uint32(1010))
221
                                     
222
        CLLaunch.wait()
223
            # elapsed = 1e-9*(CLLaunch.profile.end - CLLaunch.profile.start)
224
        elapsed = time.time()-start_time
225
        print "Iteration %i with T=%f and %i iterations in %f: " % (i,T,step,elapsed)
226
        if LAPIMAGE:
227
                cl.enqueue_copy(queue, sigma, sigmaCL).wait()
228
                checkLattice(sigma)
229
                ImageOutput(sigma,"Ising2D_GPU_OddEven_%i_%1.1f_%.3i_Lap" % (SIZE,T,i))
230
        i=i+1
231
        duration=duration+elapsed
232

    
233
    cl.enqueue_copy(queue, sigma,sigmaCL).wait()
234
    CheckLattice(sigma)
235
    sigmaCL.release()
236
    
237
    return(duration)
238

    
239
def Magnetization(sigma,M):
240
    return(numpy.sum(sigma)/(sigma.shape[0]*sigma.shape[1]*1.0))
241

    
242
def Energy(sigma,J,B):
243
    # Copy & Cast values
244
    E=numpy.copy(sigma).astype(numpy.float32)
245
    
246
    # Slice call to estimate Energy
247
    E[1:-1,1:-1]=E[1:-1,1:-1]*(2.0*J*(E[:-2,1:-1]+E[2:,1:-1]+
248
                                          E[1:-1,:-2]+E[1:-1,2:])+B)
249
    
250
    # Clean perimeter
251
    E[:,0]=0
252
    E[:,-1]=0
253
    E[0,:]=0
254
    E[-1,:]=0
255
    
256
    Energy=numpy.sum(E)
257

    
258
    return(Energy/(E.shape[0]*E.shape[1]*1.0))
259

    
260
def CriticalT(T,E):
261

    
262
    Epoly=numpy.poly1d(numpy.polyfit(T,E,T.size/3))
263
    dEpoly=numpy.diff(Epoly(T))
264
    dEpoly=numpy.insert(dEpoly,0,0)
265
    return(T[numpy.argmin(dEpoly)])
266

    
267
def PolyFitE(T,E):
268

    
269
    Epoly=numpy.poly1d(numpy.polyfit(T,E,T.size/3))
270
    return(Epoly(T))
271

    
272
def DisplayCurves(T,E,M,J,B):
273

    
274
    plt.xlabel("Temperature")
275
    plt.ylabel("Energy")
276

    
277
    Experience,=plt.plot(T,E,label="Energy") 
278

    
279
    plt.legend()
280
    plt.show()
281

    
282
if __name__=='__main__':
283
        
284
    # Set defaults values
285
    # Coupling factor
286
    J=1.
287
    # External Magnetic Field is null
288
    B=0.
289
    # Size of Lattice
290
    Size=256
291
    # Default Temperatures (start, end, step)
292
    Tmin=0.1
293
    Tmax=5
294
    Tstep=0.1
295
    # Default Number of Iterations
296
    Iterations=Size*Size
297
    # Default Device is first one
298
    Device=1
299
    # Default Divider
300
    Divider=Size/16
301

    
302
    # Curves is True to print the curves
303
    Curves=False
304

    
305
    OCL_vendor={}
306
    OCL_type={}
307
    OCL_description={}
308

    
309
    try:
310
        import pyopencl as cl
311
 
312
        print "\nHere are available OpenCL devices:"
313
        Id=1
314
        for platform in cl.get_platforms():
315
            for device in platform.get_devices():
316
                OCL_vendor[Id]=platform.vendor.lstrip().rstrip()
317
                #OCL_type[Id]=cl.device_type.to_string(device.type)
318
                OCL_type[Id]="xPU"
319
                OCL_description[Id]=device.name.lstrip().rstrip()
320
                print "* Device #%i from %s of type %s : %s" % (Id,OCL_vendor[Id],OCL_type[Id],OCL_description[Id])
321
                Id=Id+1
322
        OCL_MaxDevice=Id-1
323
        print
324
        
325
    except ImportError:
326
        print "Your platform does not seem to support OpenCL"
327
        sys.exit(0)   
328
    
329
    try:
330
        opts, args = getopt.getopt(sys.argv[1:],"hcj:b:z:i:s:e:p:d:v:",["coupling=","magneticfield=","size=","iterations=","tempstart=","tempend=","tempstep=","units",'device='])
331
    except getopt.GetoptError:
332
        print '%s -d <Device Id> -j <Coupling Factor> -b <Magnetic Field> -z <Size of Square Lattice> -i <Iterations> -s <Minimum Temperature> -e <Maximum Temperature> -p <steP Temperature> -v <diVider> -c (Print Curves)' % sys.argv[0]
333
        sys.exit(2)
334
    
335
    for opt, arg in opts:
336
        if opt == '-h':
337
            print '%s -d <Device Id> -j <Coupling Factor> -b <Magnetic Field> -z <Size of Square Lattice> -i <Iterations> -s <Minimum Temperature> -e <Maximum Temperature> -p <steP Temperature> -v <diVider> -c (Print Curves)' % sys.argv[0]
338
            sys.exit()
339
        elif opt == '-c':
340
            Curves=True
341
        elif opt in ("-d", "--device"):
342
            Device = int(arg)
343
            if Device>OCL_MaxDevice:
344
                "Device #%s seems not to be available !"
345
                sys.exit()
346
        elif opt in ("-j", "--coupling"):
347
            J = float(arg)
348
        elif opt in ("-b", "--magneticfield"):
349
            B = float(arg)
350
        elif opt in ("-s", "--tempmin"):
351
            Tmin = float(arg)
352
        elif opt in ("-e", "--tempmax"):
353
            Tmax = arg
354
        elif opt in ("-p", "--tempstep"):
355
            Tstep = numpy.uint32(arg)
356
        elif opt in ("-i", "--iterations"):
357
            Iterations = int(arg)
358
        elif opt in ("-z", "--size"):
359
            Size = int(arg)
360
        elif opt in ("-v", "--divider"):
361
            Divider = int(arg)
362
            
363
    print "Here are parameters of simulation:"
364
    print "* Device selected #%s: %s of type %s from %s" % (Device,OCL_description[Device],OCL_type[Device],OCL_vendor[Device])
365
    print "* Coupling Factor J : %s" % J
366
    print "* Magnetic Field B :  %s" % B
367
    print "* Size of lattice : %sx%s" % (Size,Size)
368
    print "* Parallel computing : %sx%s" % (Divider,Divider)
369
    print "* Iterations : %s" % Iterations
370
    print "* Temperatures from %s to %s by %s" % (Tmin,Tmax,Tstep)
371

    
372
    LAPIMAGE=False
373
        
374
    if Iterations<STEP:
375
        STEP=Iterations
376
    
377
    sigmaIn=numpy.where(numpy.random.randn(Size,Size)>0,1,-1).astype(numpy.int32)
378
        
379
    ImageOutput(sigmaIn,"Ising2D_GPU_Local_%i_Initial" % (Size))
380

    
381
        
382
    Trange=numpy.arange(Tmin,Tmax+Tstep,Tstep)
383
    
384
    E=[]
385
    M=[]
386
        
387
    for T in Trange:
388
        sigma=numpy.copy(sigmaIn)
389
        duration=Metropolis(sigma,J,B,T,Iterations,Device,Divider)
390
        E=numpy.append(E,Energy(sigma,J,B))
391
        M=numpy.append(M,Magnetization(sigma,B))
392
        ImageOutput(sigma,"Ising2D_GPU_OddEven_%i_%1.1f_Final" % (Size,T))
393
        print "GPU/CPU Time : %f" % (duration)
394
        print "Total Energy at Temperature %f : %f" % (T,E[-1])
395
        print "Total Magnetization at Temperature %f : %f" % (T,M[-1])
396
                
397
    # Save output
398
    numpy.savez("Ising2D_GPU_Global_%i_%.8i" % (Size,Iterations),(Trange,E,M))
399
    
400
    # Estimate Critical temperature
401
    print "The critical temperature on %ix%i lattice with J=%f, B=%f is %f " % (Size,Size,J,B,CriticalT(Trange,E))
402

    
403
    if Curves:
404
        DisplayCurves(Trange,E,M,J,B)
405

    
406