Statistiques
| Révision :

root / Ising / GPU / Ising2D-GPU-ChessBoard.py @ 308

Historique | Voir | Annoter | Télécharger (12,15 ko)

1
#!/usr/bin/env python
2
#
3
# Ising2D model using PyOpenCL 
4
#
5
# CC BY-NC-SA 2011 : <emmanuel.quemener@ens-lyon.fr> 
6
#
7
# Thanks to Andreas Klockner for PyOpenCL:
8
# http://mathema.tician.de/software/pyopencl
9
# 
10
# Interesting links:
11
# http://viennacl.sourceforge.net/viennacl-documentation.html
12
# http://enja.org/2011/02/22/adventures-in-pyopencl-part-1-getting-started-with-python/
13

    
14
import pyopencl as cl
15
import numpy
16
from PIL import Image
17
import time,string
18
from numpy.random import randint as nprnd
19
import sys
20
import getopt
21
import matplotlib.pyplot as plt
22

    
23
# Size of micro blocks
24
BSZ=16
25

    
26
# 2097152 on HD5850 (with 1GB of RAM)
27
#  262144 on GT218
28
#STEP=262144
29
#STEP=1048576
30
#STEP=2097152
31
#STEP=4194304
32
#STEP=8388608
33
STEP=16777216
34
#STEP=268435456
35

    
36
# Flag to define LAPIMAGE between iteration on OpenCL kernel calls
37
#LAPIMAGE=True
38
LAPIMAGE=False
39

    
40
# Version 2 of kernel : much optimize one
41
# a string template is used to replace BSZ (named $block_size) by its value 
42
KERNEL_CODE_ORIG=string.Template("""
43
#define BSZ $block_size
44

45
/* Marsaglia RNG very simple implementation */
46
#define znew (z=36969*(z&65535)+(z>>16))
47
#define wnew (w=18000*(w&65535)+(w>>16))
48
#define MWC ((znew<<16)+wnew )
49
#define MWCfp (float)(MWC * 2.328306435454494e-10f)
50

51
__kernel void MainLoop(__global int *s,float J,float B,float T,uint size,
52
                       uint iterations,uint seed_w,uint seed_z)
53
{
54
   uint base_idx=(uint)(BSZ*get_global_id(0));
55
   uint base_idy=(uint)(BSZ*get_global_id(1));
56
   uint base_id=base_idx+base_idy*size;
57

58
   uint z=seed_z+(uint)get_global_id(0);
59
   uint w=seed_w+(uint)get_global_id(1);
60

61
   for (uint i=0;i<iterations;i++)
62
   {
63
      uint x=(uint)(MWC%BSZ);
64
      uint y=(uint)(MWC%BSZ);
65

66
      int p=s[base_id+x+size*y];
67

68
      int u=s[((base_idx+x)%size)+size*((base_idy+y-1)%size)];
69
      int d=s[((base_idx+x)%size)+size*((base_idy+y+1)%size)];
70
      int l=s[((base_idx+x-1)%size)+size*((base_idy+y)%size)];
71
      int r=s[((base_idx+x+1)%size)+size*((base_idy+y)%size)];
72

73
      float DeltaE=p*(2.0f*J*(float)(u+d+l+r)+B);
74

75
      float factor= ((DeltaE < 0.0f) || (MWCfp < exp(-DeltaE/T))) ? -1:1;
76

77
      s[base_id+x+size*y]= factor*p; 
78
   }
79
 
80
}
81
""")
82

    
83
# Version 2 of kernel : much optimize one
84
# a string template is used to replace BSZ (named $block_size) by its value 
85
KERNEL_CODE=string.Template("""
86

87
/* Marsaglia RNG very simple implementation */
88
#define znew (z=36969*(z&65535)+(z>>16))
89
#define wnew (w=18000*(w&65535)+(w>>16))
90
#define MWC ((znew<<16)+wnew )
91
#define MWCfp (float)(MWC * 2.328306435454494e-10f)
92

93
__kernel void MainLoop(__global int *s,float J,float B,float T,uint size,
94
                       uint iterations,uint seed_w,uint seed_z)
95
{
96
   uint z=seed_z+(uint)get_global_id(0);
97
   uint w=seed_w+(uint)get_global_id(1);
98

99
   int xo,yo,xe,ye,base,OddSize;
100
   int p,u,d,l,r,factor;
101
   float DeltaE;
102

103
   OddSize=(size%2==0)?1:0;
104

105
   base=2*get_global_id(0);
106
   yo=base/size;
107
   xo=(yo%2==0) ? base%size:(base%size+OddSize)%size ;
108
   ye=yo;
109
   xe=(xo+1)%size;
110

111
   for (uint i=0;i<iterations;i++)
112
   {
113
      // Odd pixel
114
      p=s[yo*size+xo];
115

116
      u= (yo==     0) ? s[xo+size*(size-1)]:s[xo+size*(yo-1)];
117
      d= (yo==size-1) ? s[xo]:s[xo+size*(yo+1)];
118
      l= (xo==     0) ? s[yo*size+size-1]:s[xo-1+size*yo];
119
      r= (xo==size-1) ? s[yo*size]:s[xo+1+yo*size];
120

121
      DeltaE=(float)p*(2.e0f*J*(float)(u+d+l+r)+B);
122

123
      factor= ((DeltaE < 0.e0f) || (MWCfp < (float)exp(-DeltaE/T))) ? -1:1;
124

125
      s[yo*size+xo]= factor*p;
126

127
      // Even pixel
128

129
      p=s[ye*size+xe];
130
      u= (ye==     0) ? s[xe+size*(size-1)]:s[xe+size*(ye-1)];
131
      d= (ye==size-1) ? s[xe]:s[xe+size*(ye+1)];
132
      l= (xe==     0) ? s[ye*size+size-1]:s[xe-1+size*ye];
133
      r= (xe==size-1) ? s[ye*size]:s[xe+1+ye*size];
134

135
      DeltaE=(float)p*(2.e0f*J*(float)(u+d+l+r)+B);
136

137
      factor= ((DeltaE < 0.e0f) || (MWCfp < (float)exp(-DeltaE/T))) ? -1:1;
138

139
      s[ye*size+xe]= factor*p; 
140
      barrier(CLK_LOCAL_MEM_FENCE);
141
   }
142

143
 
144
}
145
""")
146

    
147
def ImageOutput(sigma,prefix):
148
    Max=sigma.max()
149
    Min=sigma.min()
150
    
151
    # Normalize value as 8bits Integer
152
    SigmaInt=(255*(sigma-Min)/(Max-Min)).astype('uint8')
153
    image = Image.fromarray(SigmaInt)
154
    image.save("%s.jpg" % prefix)
155
    
156
def CheckLattice(sigma):
157

    
158
    over=sigma[sigma>1]
159
    under=sigma[sigma<-1]
160
    
161
    if  (over.size+under.size) > 0:
162
        print "Problem on Lattice on %i spin(s)." % (over.size+under.size)
163
    else:
164
        print "No problem on Lattice"
165

    
166
def Metropolis(sigma,J,B,T,iterations,Device,Divider):
167

    
168
    kernel_params = {}
169

    
170
    print iterations,Divider
171
    
172
    # Je detecte un peripherique GPU dans la liste des peripheriques
173
    Id=1
174
    HasXPU=False
175
    for platform in cl.get_platforms():
176
        for device in platform.get_devices():
177
            if Id==Device:
178
                XPU=device
179
                print "CPU/GPU selected: ",device.name.lstrip()
180
                HasXPU=True
181
            Id+=1
182

    
183
    if HasXPU==False:
184
        print "No XPU #%i found in all of %i devices, sorry..." % (Device,Id-1)
185
        sys.exit()                
186
        
187
    ctx = cl.Context([XPU])
188
    queue = cl.CommandQueue(ctx,
189
                            properties=cl.command_queue_properties.PROFILING_ENABLE)
190

    
191
    MetropolisCL = cl.Program(ctx,KERNEL_CODE.substitute(kernel_params)).build()
192
    
193
    # Je recupere les flag possibles pour les buffers
194
    mf = cl.mem_flags
195

    
196
    # Program based on Kernel2
197
    sigmaCL = cl.Buffer(ctx, mf.WRITE_ONLY | mf.COPY_HOST_PTR, hostbuf=sigma)
198
    #sigmaCL = cl.Buffer(ctx, mf.READ_WRITE, sigma.nbytes)
199
    #cl.enqueue_copy(queue,sigmaCL,sigma).wait();
200
    
201
    i=0
202
    duration=0.
203
    if iterations%Divider == 0:
204
        steps=iterations/Divider;
205
    else:
206
        steps=iterations/Divider+1;
207

    
208
    step=0
209
    while (step*steps < iterations):
210
    
211
        # Call OpenCL kernel
212
        # sigmaCL is lattice translated in CL format
213
        # step is number of iterations
214
        
215
        start_time=time.time() 
216

    
217
        CLLaunch=MetropolisCL.MainLoop(queue,
218
                                       (int(sigma.shape[0]*sigma.shape[1]/2),1),None,
219
                                       sigmaCL,
220
                                       numpy.float32(J),numpy.float32(B),
221
                                       numpy.float32(T),
222
                                       numpy.uint32(sigma.shape[0]),
223
                                       numpy.uint32(steps),
224
                                       numpy.uint32(2008),
225
                                       numpy.uint32(1010))
226
                                     
227
        CLLaunch.wait()
228
            # elapsed = 1e-9*(CLLaunch.profile.end - CLLaunch.profile.start)
229
        elapsed = time.time()-start_time
230
        print "Iteration %i with T=%f and %i iterations in %f: " % (step,T,steps,elapsed)
231
        if LAPIMAGE:
232
                cl.enqueue_copy(queue, sigma, sigmaCL).wait()
233
                checkLattice(sigma)
234
                ImageOutput(sigma,"Ising2D_GPU_OddEven_%i_%1.1f_%.3i_Lap" % (SIZE,T,i))
235
        step=step+1
236
        duration=duration+elapsed
237

    
238
    cl.enqueue_copy(queue,sigma,sigmaCL).wait()
239
    CheckLattice(sigma)
240
    sigmaCL.release()
241
    
242
    return(duration)
243

    
244
def Magnetization(sigma,M):
245
    return(numpy.sum(sigma)/(sigma.shape[0]*sigma.shape[1]*1.0))
246

    
247
def Energy(sigma,J,B):
248
    # Copy & Cast values
249
    E=numpy.copy(sigma).astype(numpy.float32)
250
    
251
    # Slice call to estimate Energy
252
    E[1:-1,1:-1]=E[1:-1,1:-1]*(2.0*J*(E[:-2,1:-1]+E[2:,1:-1]+
253
                                          E[1:-1,:-2]+E[1:-1,2:])+B)
254
    
255
    # Clean perimeter
256
    E[:,0]=0
257
    E[:,-1]=0
258
    E[0,:]=0
259
    E[-1,:]=0
260
    
261
    Energy=numpy.sum(E)
262

    
263
    return(Energy/(E.shape[0]*E.shape[1]*1.0))
264

    
265
def CriticalT(T,E):
266

    
267
    Epoly=numpy.poly1d(numpy.polyfit(T,E,T.size/3))
268
    dEpoly=numpy.diff(Epoly(T))
269
    dEpoly=numpy.insert(dEpoly,0,0)
270
    return(T[numpy.argmin(dEpoly)])
271

    
272
def PolyFitE(T,E):
273

    
274
    Epoly=numpy.poly1d(numpy.polyfit(T,E,T.size/3))
275
    return(Epoly(T))
276

    
277
def DisplayCurves(T,E,M,J,B):
278

    
279
    plt.xlabel("Temperature")
280
    plt.ylabel("Energy")
281

    
282
    Experience,=plt.plot(T,E,label="Energy") 
283

    
284
    plt.legend()
285
    plt.show()
286

    
287
if __name__=='__main__':
288
        
289
    # Set defaults values
290
    # Coupling factor
291
    J=1.
292
    # External Magnetic Field is null
293
    B=0.
294
    # Size of Lattice
295
    Size=256
296
    # Default Temperatures (start, end, step)
297
    Tmin=0.1
298
    Tmax=5
299
    Tstep=0.1
300
    # Default Number of Iterations
301
    Iterations=Size*Size
302
    # Default Device is first one
303
    Device=1
304
    # Default Divider
305
    Divider=1
306

    
307
    # Curves is True to print the curves
308
    Curves=False
309

    
310
    OCL_vendor={}
311
    OCL_type={}
312
    OCL_description={}
313

    
314
    try:
315
        import pyopencl as cl
316
 
317
        print "\nHere are available OpenCL devices:"
318
        Id=1
319
        for platform in cl.get_platforms():
320
            for device in platform.get_devices():
321
                OCL_vendor[Id]=platform.vendor.lstrip().rstrip()
322
                #OCL_type[Id]=cl.device_type.to_string(device.type)
323
                OCL_type[Id]="xPU"
324
                OCL_description[Id]=device.name.lstrip().rstrip()
325
                print "* Device #%i from %s of type %s : %s" % (Id,OCL_vendor[Id],OCL_type[Id],OCL_description[Id])
326
                Id=Id+1
327
        OCL_MaxDevice=Id-1
328
        print
329
        
330
    except ImportError:
331
        print "Your platform does not seem to support OpenCL"
332
        sys.exit(0)   
333
    
334
    try:
335
        opts, args = getopt.getopt(sys.argv[1:],"hcj:b:z:i:s:e:p:d:v:",["coupling=","magneticfield=","size=","iterations=","tempstart=","tempend=","tempstep=","units",'device='])
336
    except getopt.GetoptError:
337
        print '%s -d <Device Id> -j <Coupling Factor> -b <Magnetic Field> -z <Size of Square Lattice> -i <Iterations> -s <Minimum Temperature> -e <Maximum Temperature> -p <steP Temperature> -v <diVider> -c (Print Curves)' % sys.argv[0]
338
        sys.exit(2)
339
    
340
    for opt, arg in opts:
341
        if opt == '-h':
342
            print '%s -d <Device Id> -j <Coupling Factor> -b <Magnetic Field> -z <Size of Square Lattice> -i <Iterations> -s <Minimum Temperature> -e <Maximum Temperature> -p <steP Temperature> -v <diVider> -c (Print Curves)' % sys.argv[0]
343
            sys.exit()
344
        elif opt == '-c':
345
            Curves=True
346
        elif opt in ("-d", "--device"):
347
            Device = int(arg)
348
            if Device>OCL_MaxDevice:
349
                "Device #%s seems not to be available !"
350
                sys.exit()
351
        elif opt in ("-j", "--coupling"):
352
            J = float(arg)
353
        elif opt in ("-b", "--magneticfield"):
354
            B = float(arg)
355
        elif opt in ("-s", "--tempmin"):
356
            Tmin = float(arg)
357
        elif opt in ("-e", "--tempmax"):
358
            Tmax = float(arg)
359
        elif opt in ("-p", "--tempstep"):
360
            Tstep = float(arg)
361
        elif opt in ("-i", "--iterations"):
362
            Iterations = int(arg)
363
        elif opt in ("-z", "--size"):
364
            Size = int(arg)
365
        elif opt in ("-v", "--divider"):
366
            Divider = int(arg)
367
            
368
    print "Here are parameters of simulation:"
369
    print "* Device selected #%s: %s of type %s from %s" % (Device,OCL_description[Device],OCL_type[Device],OCL_vendor[Device])
370
    print "* Coupling Factor J : %s" % J
371
    print "* Magnetic Field B :  %s" % B
372
    print "* Size of lattice : %sx%s" % (Size,Size)
373
    print "* Divider inside loop : %s" % Divider
374
    print "* Iterations : %s" % Iterations
375
    print "* Temperatures from %s to %s by %s" % (Tmin,Tmax,Tstep)
376

    
377
    LAPIMAGE=False
378
        
379
    if Iterations<STEP:
380
        STEP=Iterations
381

    
382
    numpy.random.seed(20081010)
383
    sigmaIn=numpy.where(numpy.random.randn(Size,Size)>0,1,-1).astype(numpy.int32)
384
        
385
    ImageOutput(sigmaIn,"Ising2D_GPU_Local_%i_Initial" % (Size))
386

    
387
        
388
    Trange=numpy.arange(Tmin,Tmax+Tstep,Tstep)
389
    
390
    E=[]
391
    M=[]
392
        
393
    for T in Trange:
394
        sigma=numpy.copy(sigmaIn)
395
        duration=Metropolis(sigma,J,B,T,Iterations,Device,Divider)
396
        E=numpy.append(E,Energy(sigma,J,B))
397
        M=numpy.append(M,Magnetization(sigma,B))
398
        ImageOutput(sigma,"Ising2D_GPU_OddEven_%i_%1.1f_Final" % (Size,T))
399
        print "GPU/CPU Time : %f" % (duration)
400
        print "Total Energy at Temperature %f : %f" % (T,E[-1])
401
        print "Total Magnetization at Temperature %f : %f" % (T,M[-1])
402
                
403
    # Save output
404
    numpy.savez("Ising2D_GPU_Global_%i_%.8i" % (Size,Iterations),(Trange,E,M))
405
    
406
    # Estimate Critical temperature
407
    print "The critical temperature on %ix%i lattice with J=%f, B=%f is %f " % (Size,Size,J,B,CriticalT(Trange,E))
408

    
409
    if Curves:
410
        DisplayCurves(Trange,E,M,J,B)
411

    
412