Statistiques
| Révision :

root / Ising / GPU / Ising2D-GPU-Local.py @ 94

Historique | Voir | Annoter | Télécharger (10,22 ko)

1
#!/usr/bin/env python
2
#
3
# Ising2D model using PyOpenCL 
4
#
5
# CC BY-NC-SA 2011 : <emmanuel.quemener@ens-lyon.fr> 
6
#
7
# Thanks to Andreas Klockner for PyOpenCL:
8
# http://mathema.tician.de/software/pyopencl
9
# 
10
# Interesting links:
11
# http://viennacl.sourceforge.net/viennacl-documentation.html
12
# http://enja.org/2011/02/22/adventures-in-pyopencl-part-1-getting-started-with-python/
13

    
14
import pyopencl as cl
15
import numpy
16
from PIL import Image
17
import time,string
18
from numpy.random import randint as nprnd
19
import sys
20
import getopt
21
import matplotlib.pyplot as plt
22

    
23
# Size of micro blocks
24
BSZ=16
25

    
26
# 2097152 on HD5850 (with 1GB of RAM)
27
#  262144 on GT218
28
#STEP=262144
29
#STEP=1048576
30
#STEP=2097152
31
#STEP=4194304
32
#STEP=8388608
33
STEP=16777216
34
#STEP=268435456
35

    
36
# Flag to define LAPIMAGE between iteration on OpenCL kernel calls
37
#LAPIMAGE=True
38
LAPIMAGE=False
39

    
40
# Version 2 of kernel : much optimize one
41
# a string template is used to replace BSZ (named $block_size) by its value 
42
KERNEL_CODE=string.Template("""
43
#define BSZ $block_size
44

45
/* Marsaglia RNG very simple implementation */
46
#define znew (z=36969*(z&65535)+(z>>16))
47
#define wnew (w=18000*(w&65535)+(w>>16))
48
#define MWC ((znew<<16)+wnew )
49
#define MWCfp (float)(MWC * 2.328306435454494e-10f)
50

51
__kernel void MainLoop(__global int *s,float J,float B,float T,uint size,
52
                       uint iterations,uint seed_w,uint seed_z)
53
{
54
   uint base_idx=(uint)(BSZ*get_global_id(0));
55
   uint base_idy=(uint)(BSZ*get_global_id(1));
56
   uint base_id=base_idx+base_idy*size;
57

58
   uint z=seed_z+(uint)get_global_id(0);
59
   uint w=seed_w+(uint)get_global_id(1);
60

61
   for (uint i=0;i<iterations;i++)
62
   {
63
      uint x=(uint)(MWC%BSZ);
64
      uint y=(uint)(MWC%BSZ);
65

66
      int p=s[base_id+x+size*y];
67

68
      int u=s[((base_idx+x)%size)+size*((base_idy+y-1)%size)];
69
      int d=s[((base_idx+x)%size)+size*((base_idy+y+1)%size)];
70
      int l=s[((base_idx+x-1)%size)+size*((base_idy+y)%size)];
71
      int r=s[((base_idx+x+1)%size)+size*((base_idy+y)%size)];
72

73
      float DeltaE=p*(2.0f*J*(float)(u+d+l+r)+B);
74

75
      float factor= ((DeltaE < 0.0f) || (MWCfp < exp(-DeltaE/T))) ? -1:1;
76

77
      s[base_id+x+size*y]= factor*p; 
78
   }
79
 
80
}
81
""")
82

    
83
def ImageOutput(sigma,prefix):
84
    Max=sigma.max()
85
    Min=sigma.min()
86
    
87
    # Normalize value as 8bits Integer
88
    SigmaInt=(255*(sigma-Min)/(Max-Min)).astype('uint8')
89
    image = Image.fromarray(SigmaInt)
90
    image.save("%s.jpg" % prefix)
91
    
92
def CheckLattice(sigma):
93

    
94
    over=sigma[sigma>1]
95
    under=sigma[sigma<-1]
96
    
97
    if  (over.size+under.size) > 0:
98
        print "Problem on Lattice on %i spin(s)." % (over.size+under.size)
99
    else:
100
        print "No problem on Lattice"
101

    
102
def Metropolis(sigma,J,B,T,iterations,Device,Divider):
103

    
104
    kernel_params = {'block_size':sigma.shape[0]/Divider}
105
    
106
        # Je detecte un peripherique GPU dans la liste des peripheriques
107
    Id=1
108
    HasXPU=False
109
    for platform in cl.get_platforms():
110
        for device in platform.get_devices():
111
            if Id==Device:
112
                XPU=device
113
                print "CPU/GPU selected: ",device.name.lstrip()
114
                HasXPU=True
115
            Id+=1
116

    
117
    if HasXPU==False:
118
        print "No XPU #%i found in all of %i devices, sorry..." % (Device,Id-1)
119
        sys.exit()                
120
        
121
    ctx = cl.Context([XPU])
122
    queue = cl.CommandQueue(ctx,
123
                            properties=cl.command_queue_properties.PROFILING_ENABLE)
124

    
125
    # Je recupere les flag possibles pour les buffers
126
    mf = cl.mem_flags
127
    
128
    sigmaCL = cl.Buffer(ctx, mf.WRITE_ONLY | mf.COPY_HOST_PTR, hostbuf=sigma)
129
    # Program based on Kernel2
130
    MetropolisCL = cl.Program(ctx,KERNEL_CODE.substitute(kernel_params)).build()
131

    
132
    divide=Divider*Divider
133
    step=STEP/divide
134
    i=0
135
    duration=0.
136
    while (step*i < iterations/divide):
137
    
138
        # Call OpenCL kernel
139
        # (Divider,Divider) is global work size
140
        # sigmaCL is lattice translated in CL format
141
        # step is number of iterations
142
        
143
        start_time=time.time() 
144
        CLLaunch=MetropolisCL.MainLoop(queue,
145
                                       (Divider,Divider),None ,
146
                                       sigmaCL,
147
                                       numpy.float32(J),numpy.float32(B),
148
                                       numpy.float32(T),
149
                                       numpy.uint32(sigma.shape[0]),
150
                                       numpy.uint32(step),
151
                                       numpy.uint32(nprnd(2**32)),
152
                                       numpy.uint32(nprnd(2**32)))
153
                                     
154
        CLLaunch.wait()
155
            # elapsed = 1e-9*(CLLaunch.profile.end - CLLaunch.profile.start)
156
        elapsed = time.time()-start_time
157
        print "Iteration %i with T=%f and %i iterations in %f: " % (i,T,step,elapsed)
158
        if LAPIMAGE:
159
                cl.enqueue_copy(queue, sigma, sigmaCL).wait()
160
                checkLattice(sigma)
161
                ImageOutput(sigma,"Ising2D_GPU_Local_%i_%1.1f_%.3i_Lap" % (SIZE,T,i))
162
        i=i+1
163
        duration=duration+elapsed
164

    
165
    cl.enqueue_copy(queue, sigma,sigmaCL).wait()
166
    CheckLattice(sigma)
167
    sigmaCL.release()
168
    
169
    return(duration)
170

    
171
def Magnetization(sigma,M):
172
    return(numpy.sum(sigma)/(sigma.shape[0]*sigma.shape[1]*1.0))
173

    
174
def Energy(sigma,J,B):
175
    # Copy & Cast values
176
    E=numpy.copy(sigma).astype(numpy.float32)
177
    
178
    # Slice call to estimate Energy
179
    E[1:-1,1:-1]=E[1:-1,1:-1]*(2.0*J*(E[:-2,1:-1]+E[2:,1:-1]+
180
                                          E[1:-1,:-2]+E[1:-1,2:])+B)
181
    
182
    # Clean perimeter
183
    E[:,0]=0
184
    E[:,-1]=0
185
    E[0,:]=0
186
    E[-1,:]=0
187
    
188
    Energy=numpy.sum(E)
189

    
190
    return(Energy/(E.shape[0]*E.shape[1]*1.0))
191

    
192
def CriticalT(T,E):
193

    
194
    Epoly=numpy.poly1d(numpy.polyfit(T,E,T.size/3))
195
    dEpoly=numpy.diff(Epoly(T))
196
    dEpoly=numpy.insert(dEpoly,0,0)
197
    return(T[numpy.argmin(dEpoly)])
198

    
199
def PolyFitE(T,E):
200

    
201
    Epoly=numpy.poly1d(numpy.polyfit(T,E,T.size/3))
202
    return(Epoly(T))
203

    
204
def DisplayCurves(T,E,M,J,B):
205

    
206
    plt.xlabel("Temperature")
207
    plt.ylabel("Energy")
208

    
209
    Experience,=plt.plot(T,E,label="Energy") 
210

    
211
    plt.legend()
212
    plt.show()
213

    
214
if __name__=='__main__':
215
        
216
    # Set defaults values
217
    # Coupling factor
218
    J=1.
219
    # External Magnetic Field is null
220
    B=0.
221
    # Size of Lattice
222
    Size=256
223
    # Default Temperatures (start, end, step)
224
    Tmin=0.1
225
    Tmax=5
226
    Tstep=0.1
227
    # Default Number of Iterations
228
    Iterations=Size*Size*Size
229
    # Default Device is first one
230
    Device=1
231
    # Default Divider
232
    Divider=Size/16
233

    
234
    # Curves is True to print the curves
235
    Curves=False
236

    
237
    OCL_vendor={}
238
    OCL_type={}
239
    OCL_description={}
240

    
241
    try:
242
        import pyopencl as cl
243
 
244
        print "\nHere are available OpenCL devices:"
245
        Id=1
246
        for platform in cl.get_platforms():
247
            for device in platform.get_devices():
248
                OCL_vendor[Id]=platform.vendor.lstrip().rstrip()
249
                OCL_type[Id]=cl.device_type.to_string(device.type)
250
                OCL_description[Id]=device.name.lstrip().rstrip()
251
                print "* Device #%i from %s of type %s : %s" % (Id,OCL_vendor[Id],OCL_type[Id],OCL_description[Id])
252
                Id=Id+1
253
        OCL_MaxDevice=Id-1
254
        print
255
        
256
    except ImportError:
257
        print "Your platform does not seem to support OpenCL"
258
        sys.exit(0)   
259
    
260
    try:
261
        opts, args = getopt.getopt(sys.argv[1:],"hcj:b:z:i:s:e:p:d:v:",["coupling=","magneticfield=","size=","iterations=","tempstart=","tempend=","tempstep=","units",'device='])
262
    except getopt.GetoptError:
263
        print '%s -d <Device Id> -j <Coupling Factor> -b <Magnetic Field> -z <Size of Square Lattice> -i <Iterations> -s <Minimum Temperature> -e <Maximum Temperature> -p <steP Temperature> -v <diVider> -c (Print Curves)' % sys.argv[0]
264
        sys.exit(2)
265
    
266
    for opt, arg in opts:
267
        if opt == '-h':
268
            print '%s -d <Device Id> -j <Coupling Factor> -b <Magnetic Field> -z <Size of Square Lattice> -i <Iterations> -s <Minimum Temperature> -e <Maximum Temperature> -p <steP Temperature> -v <diVider> -c (Print Curves)' % sys.argv[0]
269
            sys.exit()
270
        elif opt == '-c':
271
            Curves=True
272
        elif opt in ("-d", "--device"):
273
            Device = int(arg)
274
            if Device>OCL_MaxDevice:
275
                "Device #%s seems not to be available !"
276
                sys.exit()
277
        elif opt in ("-j", "--coupling"):
278
            J = float(arg)
279
        elif opt in ("-b", "--magneticfield"):
280
            B = float(arg)
281
        elif opt in ("-s", "--tempmin"):
282
            Tmin = float(arg)
283
        elif opt in ("-e", "--tempmax"):
284
            Tmax = arg
285
        elif opt in ("-p", "--tempstep"):
286
            Tstep = numpy.uint32(arg)
287
        elif opt in ("-i", "--iterations"):
288
            Iterations = int(arg)
289
        elif opt in ("-z", "--size"):
290
            Size = int(arg)
291
        elif opt in ("-v", "--divider"):
292
            Divider = int(arg)
293
            
294
    print "Here are parameters of simulation:"
295
    print "* Device selected #%s: %s of type %s from %s" % (Device,OCL_description[Device],OCL_type[Device],OCL_vendor[Device])
296
    print "* Coupling Factor J : %s" % J
297
    print "* Magnetic Field B :  %s" % B
298
    print "* Size of lattice : %sx%s" % (Size,Size)
299
    print "* Parallel computing : %sx%s" % (Divider,Divider)
300
    print "* Iterations : %s" % Iterations
301
    print "* Temperatures from %s to %s by %s" % (Tmin,Tmax,Tstep)
302

    
303
    LAPIMAGE=False
304
        
305
    if Iterations<STEP:
306
        STEP=Iterations
307
    
308
    sigmaIn=numpy.where(numpy.random.randn(Size,Size)>0,1,-1).astype        (numpy.int32)
309
        
310
    ImageOutput(sigmaIn,"Ising2D_GPU_Local_%i_Initial" % (Size))
311

    
312
        
313
    Trange=numpy.arange(Tmin,Tmax+Tstep,Tstep)
314
    
315
    E=[]
316
    M=[]
317
        
318
    for T in Trange:
319
        sigma=numpy.copy(sigmaIn)
320
        duration=Metropolis(sigma,J,B,T,Iterations,Device,Divider)
321
        E=numpy.append(E,Energy(sigma,J,B))
322
        M=numpy.append(M,Magnetization(sigma,B))
323
        ImageOutput(sigma,"Ising2D_GPU_Local_%i_%1.1f_Final" % (Size,T))
324
        print "GPU/CPU Time : %f" % (duration)
325
        print "Total Energy at Temperature %f : %f" % (T,E[-1])
326
        print "Total Magnetization at Temperature %f : %f" % (T,M[-1])
327
                
328
    # Save output
329
    numpy.savez("Ising2D_GPU_Global_%i_%.8i" % (Size,Iterations),(Trange,E,M))
330
    
331
    # Estimate Critical temperature
332
    print "The critical temperature on %ix%i lattice with J=%f, B=%f is %f " % (Size,Size,J,B,CriticalT(Trange,E))
333

    
334
    if Curves:
335
        DisplayCurves(Trange,E,M,J,B)
336

    
337