Révision 148

Ising/GPU/Ising2D-GPU-ChessBoard.py (revision 148)
1
#!/usr/bin/env python
2
#
3
# Ising2D model using PyOpenCL 
4
#
5
# CC BY-NC-SA 2011 : <emmanuel.quemener@ens-lyon.fr> 
6
#
7
# Thanks to Andreas Klockner for PyOpenCL:
8
# http://mathema.tician.de/software/pyopencl
9
# 
10
# Interesting links:
11
# http://viennacl.sourceforge.net/viennacl-documentation.html
12
# http://enja.org/2011/02/22/adventures-in-pyopencl-part-1-getting-started-with-python/
13

  
14
import pyopencl as cl
15
import numpy
16
from PIL import Image
17
import time,string
18
from numpy.random import randint as nprnd
19
import sys
20
import getopt
21
import matplotlib.pyplot as plt
22

  
23
# Size of micro blocks
24
BSZ=16
25

  
26
# 2097152 on HD5850 (with 1GB of RAM)
27
#  262144 on GT218
28
#STEP=262144
29
#STEP=1048576
30
#STEP=2097152
31
#STEP=4194304
32
#STEP=8388608
33
STEP=16777216
34
#STEP=268435456
35

  
36
# Flag to define LAPIMAGE between iteration on OpenCL kernel calls
37
#LAPIMAGE=True
38
LAPIMAGE=False
39

  
40
# Version 2 of kernel : much optimize one
41
# a string template is used to replace BSZ (named $block_size) by its value 
42
KERNEL_CODE_ORIG=string.Template("""
43
#define BSZ $block_size
44

  
45
/* Marsaglia RNG very simple implementation */
46
#define znew (z=36969*(z&65535)+(z>>16))
47
#define wnew (w=18000*(w&65535)+(w>>16))
48
#define MWC ((znew<<16)+wnew )
49
#define MWCfp (float)(MWC * 2.328306435454494e-10f)
50

  
51
__kernel void MainLoop(__global int *s,float J,float B,float T,uint size,
52
                       uint iterations,uint seed_w,uint seed_z)
53
{
54
   uint base_idx=(uint)(BSZ*get_global_id(0));
55
   uint base_idy=(uint)(BSZ*get_global_id(1));
56
   uint base_id=base_idx+base_idy*size;
57

  
58
   uint z=seed_z+(uint)get_global_id(0);
59
   uint w=seed_w+(uint)get_global_id(1);
60

  
61
   for (uint i=0;i<iterations;i++)
62
   {
63
      uint x=(uint)(MWC%BSZ);
64
      uint y=(uint)(MWC%BSZ);
65

  
66
      int p=s[base_id+x+size*y];
67

  
68
      int u=s[((base_idx+x)%size)+size*((base_idy+y-1)%size)];
69
      int d=s[((base_idx+x)%size)+size*((base_idy+y+1)%size)];
70
      int l=s[((base_idx+x-1)%size)+size*((base_idy+y)%size)];
71
      int r=s[((base_idx+x+1)%size)+size*((base_idy+y)%size)];
72

  
73
      float DeltaE=p*(2.0f*J*(float)(u+d+l+r)+B);
74

  
75
      float factor= ((DeltaE < 0.0f) || (MWCfp < exp(-DeltaE/T))) ? -1:1;
76

  
77
      s[base_id+x+size*y]= factor*p; 
78
   }
79
 
80
}
81
""")
82

  
83
# Version 2 of kernel : much optimize one
84
# a string template is used to replace BSZ (named $block_size) by its value 
85
KERNEL_CODE=string.Template("""
86

  
87
/* Marsaglia RNG very simple implementation */
88
#define znew (z=36969*(z&65535)+(z>>16))
89
#define wnew (w=18000*(w&65535)+(w>>16))
90
#define MWC ((znew<<16)+wnew )
91
#define MWCfp (float)(MWC * 2.328306435454494e-10f)
92

  
93
__kernel void MainLoop(__global int *s,float J,float B,float T,uint size,
94
                       uint iterations,uint seed_w,uint seed_z)
95
{
96
   uint z=seed_z+(uint)get_global_id(0);
97
   uint w=seed_w+(uint)get_global_id(1);
98

  
99
   int xo,yo,xe,ye,base,OddSize;
100
   int p,u,d,l,r,factor;
101
   float DeltaE;
102

  
103
   OddSize=(size%2==0)?1:0;
104

  
105
   base=2*get_global_id(0);
106
   yo=base/size;
107
   xo=(yo%2==0) ? base%size:(base%size+OddSize)%size ;
108
   ye=yo;
109
   xe=(xo+1)%size;
110

  
111
   for (uint i=0;i<iterations;i++)
112
   {
113
      // Odd pixel
114
      p=s[yo*size+xo];
115

  
116
      u= (yo==     0) ? s[xo+size*(size-1)]:s[xo+size*(yo-1)];
117
      d= (yo==size-1) ? s[xo]:s[xo+size*(yo+1)];
118
      l= (xo==     0) ? s[yo*size+size-1]:s[xo-1+size*yo];
119
      r= (xo==size-1) ? s[yo*size]:s[xo+1+yo*size];
120

  
121
      DeltaE=(float)p*(2.e0f*J*(float)(u+d+l+r)+B);
122

  
123
      factor= ((DeltaE < 0.e0f) || (MWCfp < (float)exp(-DeltaE/T))) ? -1:1;
124

  
125
      s[yo*size+xo]= factor*p;
126

  
127
      // Even pixel
128

  
129
      p=s[ye*size+xe];
130
      u= (ye==     0) ? s[xe+size*(size-1)]:s[xe+size*(ye-1)];
131
      d= (ye==size-1) ? s[xe]:s[xe+size*(ye+1)];
132
      l= (xe==     0) ? s[ye*size+size-1]:s[xe-1+size*ye];
133
      r= (xe==size-1) ? s[ye*size]:s[xe+1+ye*size];
134

  
135
      DeltaE=(float)p*(2.e0f*J*(float)(u+d+l+r)+B);
136

  
137
      factor= ((DeltaE < 0.e0f) || (MWCfp < (float)exp(-DeltaE/T))) ? -1:1;
138

  
139
      s[ye*size+xe]= factor*p; 
140
      barrier(CLK_LOCAL_MEM_FENCE);
141
   }
142

  
143
 
144
}
145
""")
146

  
147
def ImageOutput(sigma,prefix):
148
    Max=sigma.max()
149
    Min=sigma.min()
150
    
151
    # Normalize value as 8bits Integer
152
    SigmaInt=(255*(sigma-Min)/(Max-Min)).astype('uint8')
153
    image = Image.fromarray(SigmaInt)
154
    image.save("%s.jpg" % prefix)
155
    
156
def CheckLattice(sigma):
157

  
158
    over=sigma[sigma>1]
159
    under=sigma[sigma<-1]
160
    
161
    if  (over.size+under.size) > 0:
162
	print "Problem on Lattice on %i spin(s)." % (over.size+under.size)
163
    else:
164
	print "No problem on Lattice"
165

  
166
def Metropolis(sigma,J,B,T,iterations,Device,Divider):
167

  
168
    kernel_params = {}
169

  
170
    print iterations,Divider
171
    
172
    # Je detecte un peripherique GPU dans la liste des peripheriques
173
    Id=1
174
    HasXPU=False
175
    for platform in cl.get_platforms():
176
        for device in platform.get_devices():
177
            if Id==Device:
178
                XPU=device
179
                print "CPU/GPU selected: ",device.name.lstrip()
180
                HasXPU=True
181
            Id+=1
182

  
183
    if HasXPU==False:
184
        print "No XPU #%i found in all of %i devices, sorry..." % (Device,Id-1)
185
        sys.exit()		
186
	
187
    ctx = cl.Context([XPU])
188
    queue = cl.CommandQueue(ctx,
189
			    properties=cl.command_queue_properties.PROFILING_ENABLE)
190

  
191
    MetropolisCL = cl.Program(ctx,KERNEL_CODE.substitute(kernel_params)).build()
192
    
193
    # Je recupere les flag possibles pour les buffers
194
    mf = cl.mem_flags
195

  
196
    # Program based on Kernel2
197
    sigmaCL = cl.Buffer(ctx, mf.WRITE_ONLY | mf.COPY_HOST_PTR, hostbuf=sigma)
198
    #sigmaCL = cl.Buffer(ctx, mf.READ_WRITE, sigma.nbytes)
199
    #cl.enqueue_copy(queue,sigmaCL,sigma).wait();
200
    
201
    i=0
202
    duration=0.
203
    if iterations%Divider == 0:
204
        steps=iterations/Divider;
205
    else:
206
        steps=iterations/Divider+1;
207

  
208
    step=0
209
    while (step*steps < iterations):
210
    
211
        # Call OpenCL kernel
212
        # sigmaCL is lattice translated in CL format
213
        # step is number of iterations
214
        
215
        start_time=time.time() 
216

  
217
        CLLaunch=MetropolisCL.MainLoop(queue,
218
                                       (int(sigma.shape[0]*sigma.shape[1]/2),1),None,
219
                                       sigmaCL,
220
                                       numpy.float32(J),numpy.float32(B),
221
                                       numpy.float32(T),
222
                                       numpy.uint32(sigma.shape[0]),
223
                                       numpy.uint32(steps),
224
                                       numpy.uint32(2008),
225
                                       numpy.uint32(1010))
226
                                     
227
        CLLaunch.wait()
228
	    # elapsed = 1e-9*(CLLaunch.profile.end - CLLaunch.profile.start)
229
        elapsed = time.time()-start_time
230
        print "Iteration %i with T=%f and %i iterations in %f: " % (step,T,steps,elapsed)
231
        if LAPIMAGE:
232
	        cl.enqueue_copy(queue, sigma, sigmaCL).wait()
233
	        checkLattice(sigma)
234
	        ImageOutput(sigma,"Ising2D_GPU_OddEven_%i_%1.1f_%.3i_Lap" % (SIZE,T,i))
235
        step=step+1
236
        duration=duration+elapsed
237

  
238
    cl.enqueue_copy(queue,sigma,sigmaCL).wait()
239
    CheckLattice(sigma)
240
    sigmaCL.release()
241
    
242
    return(duration)
243

  
244
def Magnetization(sigma,M):
245
    return(numpy.sum(sigma)/(sigma.shape[0]*sigma.shape[1]*1.0))
246

  
247
def Energy(sigma,J,B):
248
    # Copy & Cast values
249
    E=numpy.copy(sigma).astype(numpy.float32)
250
    
251
    # Slice call to estimate Energy
252
    E[1:-1,1:-1]=E[1:-1,1:-1]*(2.0*J*(E[:-2,1:-1]+E[2:,1:-1]+
253
	                                  E[1:-1,:-2]+E[1:-1,2:])+B)
254
    
255
    # Clean perimeter
256
    E[:,0]=0
257
    E[:,-1]=0
258
    E[0,:]=0
259
    E[-1,:]=0
260
    
261
    Energy=numpy.sum(E)
262

  
263
    return(Energy/(E.shape[0]*E.shape[1]*1.0))
264

  
265
def CriticalT(T,E):
266

  
267
    Epoly=numpy.poly1d(numpy.polyfit(T,E,T.size/3))
268
    dEpoly=numpy.diff(Epoly(T))
269
    dEpoly=numpy.insert(dEpoly,0,0)
270
    return(T[numpy.argmin(dEpoly)])
271

  
272
def PolyFitE(T,E):
273

  
274
    Epoly=numpy.poly1d(numpy.polyfit(T,E,T.size/3))
275
    return(Epoly(T))
276

  
277
def DisplayCurves(T,E,M,J,B):
278

  
279
    plt.xlabel("Temperature")
280
    plt.ylabel("Energy")
281

  
282
    Experience,=plt.plot(T,E,label="Energy") 
283

  
284
    plt.legend()
285
    plt.show()
286

  
287
if __name__=='__main__':
288
	
289
    # Set defaults values
290
    # Coupling factor
291
    J=1.
292
    # External Magnetic Field is null
293
    B=0.
294
    # Size of Lattice
295
    Size=256
296
    # Default Temperatures (start, end, step)
297
    Tmin=0.1
298
    Tmax=5
299
    Tstep=0.1
300
    # Default Number of Iterations
301
    Iterations=Size*Size
302
    # Default Device is first one
303
    Device=1
304
    # Default Divider
305
    Divider=1
306

  
307
    # Curves is True to print the curves
308
    Curves=False
309

  
310
    OCL_vendor={}
311
    OCL_type={}
312
    OCL_description={}
313

  
314
    try:
315
        import pyopencl as cl
316
 
317
        print "\nHere are available OpenCL devices:"
318
        Id=1
319
        for platform in cl.get_platforms():
320
            for device in platform.get_devices():
321
                OCL_vendor[Id]=platform.vendor.lstrip().rstrip()
322
                #OCL_type[Id]=cl.device_type.to_string(device.type)
323
                OCL_type[Id]="xPU"
324
                OCL_description[Id]=device.name.lstrip().rstrip()
325
                print "* Device #%i from %s of type %s : %s" % (Id,OCL_vendor[Id],OCL_type[Id],OCL_description[Id])
326
                Id=Id+1
327
        OCL_MaxDevice=Id-1
328
        print
329
        
330
    except ImportError:
331
        print "Your platform does not seem to support OpenCL"
332
        sys.exit(0)   
333
    
334
    try:
335
        opts, args = getopt.getopt(sys.argv[1:],"hcj:b:z:i:s:e:p:d:v:",["coupling=","magneticfield=","size=","iterations=","tempstart=","tempend=","tempstep=","units",'device='])
336
    except getopt.GetoptError:
337
        print '%s -d <Device Id> -j <Coupling Factor> -b <Magnetic Field> -z <Size of Square Lattice> -i <Iterations> -s <Minimum Temperature> -e <Maximum Temperature> -p <steP Temperature> -v <diVider> -c (Print Curves)' % sys.argv[0]
338
        sys.exit(2)
339
    
340
    for opt, arg in opts:
341
        if opt == '-h':
342
            print '%s -d <Device Id> -j <Coupling Factor> -b <Magnetic Field> -z <Size of Square Lattice> -i <Iterations> -s <Minimum Temperature> -e <Maximum Temperature> -p <steP Temperature> -v <diVider> -c (Print Curves)' % sys.argv[0]
343
            sys.exit()
344
        elif opt == '-c':
345
            Curves=True
346
        elif opt in ("-d", "--device"):
347
            Device = int(arg)
348
            if Device>OCL_MaxDevice:
349
                "Device #%s seems not to be available !"
350
                sys.exit()
351
        elif opt in ("-j", "--coupling"):
352
            J = float(arg)
353
        elif opt in ("-b", "--magneticfield"):
354
            B = float(arg)
355
        elif opt in ("-s", "--tempmin"):
356
            Tmin = float(arg)
357
        elif opt in ("-e", "--tempmax"):
358
            Tmax = float(arg)
359
        elif opt in ("-p", "--tempstep"):
360
            Tstep = float(arg)
361
        elif opt in ("-i", "--iterations"):
362
            Iterations = int(arg)
363
        elif opt in ("-z", "--size"):
364
            Size = int(arg)
365
        elif opt in ("-v", "--divider"):
366
            Divider = int(arg)
367
            
368
    print "Here are parameters of simulation:"
369
    print "* Device selected #%s: %s of type %s from %s" % (Device,OCL_description[Device],OCL_type[Device],OCL_vendor[Device])
370
    print "* Coupling Factor J : %s" % J
371
    print "* Magnetic Field B :  %s" % B
372
    print "* Size of lattice : %sx%s" % (Size,Size)
373
    print "* Divider inside loop : %s" % Divider
374
    print "* Iterations : %s" % Iterations
375
    print "* Temperatures from %s to %s by %s" % (Tmin,Tmax,Tstep)
376

  
377
    LAPIMAGE=False
378
	
379
    if Iterations<STEP:
380
        STEP=Iterations
381

  
382
    numpy.random.seed(20081010)
383
    sigmaIn=numpy.where(numpy.random.randn(Size,Size)>0,1,-1).astype(numpy.int32)
384
	
385
    ImageOutput(sigmaIn,"Ising2D_GPU_Local_%i_Initial" % (Size))
386

  
387
	
388
    Trange=numpy.arange(Tmin,Tmax+Tstep,Tstep)
389
    
390
    E=[]
391
    M=[]
392
	
393
    for T in Trange:
394
        sigma=numpy.copy(sigmaIn)
395
        duration=Metropolis(sigma,J,B,T,Iterations,Device,Divider)
396
        E=numpy.append(E,Energy(sigma,J,B))
397
        M=numpy.append(M,Magnetization(sigma,B))
398
        ImageOutput(sigma,"Ising2D_GPU_OddEven_%i_%1.1f_Final" % (Size,T))
399
        print "GPU/CPU Time : %f" % (duration)
400
        print "Total Energy at Temperature %f : %f" % (T,E[-1])
401
        print "Total Magnetization at Temperature %f : %f" % (T,M[-1])
402
		
403
    # Save output
404
    numpy.savez("Ising2D_GPU_Global_%i_%.8i" % (Size,Iterations),(Trange,E,M))
405
    
406
    # Estimate Critical temperature
407
    print "The critical temperature on %ix%i lattice with J=%f, B=%f is %f " % (Size,Size,J,B,CriticalT(Trange,E))
408

  
409
    if Curves:
410
        DisplayCurves(Trange,E,M,J,B)
411

  
412

  
0 413

  

Formats disponibles : Unified diff