Statistiques
| Révision :

root / Ising / GPU / Ising2D-GPU-Local.py @ 144

Historique | Voir | Annoter | Télécharger (10,25 ko)

1
#!/usr/bin/env python
2
#
3
# Ising2D model using PyOpenCL 
4
#
5
# CC BY-NC-SA 2011 : <emmanuel.quemener@ens-lyon.fr> 
6
#
7
# Thanks to Andreas Klockner for PyOpenCL:
8
# http://mathema.tician.de/software/pyopencl
9
# 
10
# Interesting links:
11
# http://viennacl.sourceforge.net/viennacl-documentation.html
12
# http://enja.org/2011/02/22/adventures-in-pyopencl-part-1-getting-started-with-python/
13

    
14
import pyopencl as cl
15
import numpy
16
from PIL import Image
17
import time,string
18
from numpy.random import randint as nprnd
19
import sys
20
import getopt
21
import matplotlib.pyplot as plt
22

    
23
# Size of micro blocks
24
BSZ=16
25

    
26
# 2097152 on HD5850 (with 1GB of RAM)
27
#  262144 on GT218
28
#STEP=262144
29
#STEP=1048576
30
#STEP=2097152
31
#STEP=4194304
32
#STEP=8388608
33
STEP=16777216
34
#STEP=268435456
35

    
36
# Flag to define LAPIMAGE between iteration on OpenCL kernel calls
37
#LAPIMAGE=True
38
LAPIMAGE=False
39

    
40
# Version 2 of kernel : much optimize one
41
# a string template is used to replace BSZ (named $block_size) by its value 
42
KERNEL_CODE=string.Template("""
43
#define BSZ $block_size
44

45
/* Marsaglia RNG very simple implementation */
46
#define znew (z=36969*(z&65535)+(z>>16))
47
#define wnew (w=18000*(w&65535)+(w>>16))
48
#define MWC ((znew<<16)+wnew )
49
#define MWCfp (float)(MWC * 2.328306435454494e-10f)
50

51
__kernel void MainLoop(__global int *s,float J,float B,float T,uint size,
52
                       uint iterations,uint seed_w,uint seed_z)
53
{
54
   uint base_idx=(uint)(BSZ*get_global_id(0));
55
   uint base_idy=(uint)(BSZ*get_global_id(1));
56
   uint base_id=base_idx+base_idy*size;
57

58
   uint z=seed_z+(uint)get_global_id(0);
59
   uint w=seed_w+(uint)get_global_id(1);
60

61
   for (uint i=0;i<iterations;i++)
62
   {
63
      uint x=(uint)(MWC%BSZ);
64
      uint y=(uint)(MWC%BSZ);
65

66
      int p=s[base_id+x+size*y];
67

68
      int u=s[((base_idx+x)%size)+size*((base_idy+y-1)%size)];
69
      int d=s[((base_idx+x)%size)+size*((base_idy+y+1)%size)];
70
      int l=s[((base_idx+x-1)%size)+size*((base_idy+y)%size)];
71
      int r=s[((base_idx+x+1)%size)+size*((base_idy+y)%size)];
72

73
      float DeltaE=p*(2.0f*J*(float)(u+d+l+r)+B);
74

75
      float factor= ((DeltaE < 0.0f) || (MWCfp < exp(-DeltaE/T))) ? -1:1;
76

77
      s[base_id+x+size*y]= factor*p; 
78
   }
79
 
80
}
81
""")
82

    
83
def ImageOutput(sigma,prefix):
84
    Max=sigma.max()
85
    Min=sigma.min()
86
    
87
    # Normalize value as 8bits Integer
88
    SigmaInt=(255*(sigma-Min)/(Max-Min)).astype('uint8')
89
    image = Image.fromarray(SigmaInt)
90
    image.save("%s.jpg" % prefix)
91
    
92
def CheckLattice(sigma):
93

    
94
    over=sigma[sigma>1]
95
    under=sigma[sigma<-1]
96
    
97
    if  (over.size+under.size) > 0:
98
        print "Problem on Lattice on %i spin(s)." % (over.size+under.size)
99
    else:
100
        print "No problem on Lattice"
101

    
102
def Metropolis(sigma,J,B,T,iterations,Device,Divider):
103

    
104
    kernel_params = {'block_size':sigma.shape[0]/Divider}
105
    
106
        # Je detecte un peripherique GPU dans la liste des peripheriques
107
    Id=1
108
    HasXPU=False
109
    for platform in cl.get_platforms():
110
        for device in platform.get_devices():
111
            if Id==Device:
112
                XPU=device
113
                print "CPU/GPU selected: ",device.name.lstrip()
114
                HasXPU=True
115
            Id+=1
116

    
117
    if HasXPU==False:
118
        print "No XPU #%i found in all of %i devices, sorry..." % (Device,Id-1)
119
        sys.exit()                
120
        
121
    ctx = cl.Context([XPU])
122
    queue = cl.CommandQueue(ctx,
123
                            properties=cl.command_queue_properties.PROFILING_ENABLE)
124

    
125
    # Je recupere les flag possibles pour les buffers
126
    mf = cl.mem_flags
127
    
128
    sigmaCL = cl.Buffer(ctx, mf.WRITE_ONLY | mf.COPY_HOST_PTR, hostbuf=sigma)
129
    # Program based on Kernel2
130
    MetropolisCL = cl.Program(ctx,KERNEL_CODE.substitute(kernel_params)).build()
131

    
132
    divide=Divider*Divider
133
    step=STEP/divide
134
    i=0
135
    duration=0.
136
    while (step*i < iterations/divide):
137
    
138
        # Call OpenCL kernel
139
        # (Divider,Divider) is global work size
140
        # sigmaCL is lattice translated in CL format
141
        # step is number of iterations
142
        
143
        start_time=time.time() 
144
        CLLaunch=MetropolisCL.MainLoop(queue,
145
                                       (Divider,Divider),None ,
146
                                       sigmaCL,
147
                                       numpy.float32(J),numpy.float32(B),
148
                                       numpy.float32(T),
149
                                       numpy.uint32(sigma.shape[0]),
150
                                       numpy.uint32(step),
151
                                       numpy.uint32(nprnd(2**32)),
152
                                       numpy.uint32(nprnd(2**32)))
153
                                     
154
        CLLaunch.wait()
155
            # elapsed = 1e-9*(CLLaunch.profile.end - CLLaunch.profile.start)
156
        elapsed = time.time()-start_time
157
        print "Iteration %i with T=%f and %i iterations in %f: " % (i,T,step,elapsed)
158
        if LAPIMAGE:
159
                cl.enqueue_copy(queue, sigma, sigmaCL).wait()
160
                checkLattice(sigma)
161
                ImageOutput(sigma,"Ising2D_GPU_Local_%i_%1.1f_%.3i_Lap" % (SIZE,T,i))
162
        i=i+1
163
        duration=duration+elapsed
164

    
165
    cl.enqueue_copy(queue, sigma,sigmaCL).wait()
166
    CheckLattice(sigma)
167
    sigmaCL.release()
168
    
169
    return(duration)
170

    
171
def Magnetization(sigma,M):
172
    return(numpy.sum(sigma)/(sigma.shape[0]*sigma.shape[1]*1.0))
173

    
174
def Energy(sigma,J,B):
175
    # Copy & Cast values
176
    E=numpy.copy(sigma).astype(numpy.float32)
177
    
178
    # Slice call to estimate Energy
179
    E[1:-1,1:-1]=E[1:-1,1:-1]*(2.0*J*(E[:-2,1:-1]+E[2:,1:-1]+
180
                                          E[1:-1,:-2]+E[1:-1,2:])+B)
181
    
182
    # Clean perimeter
183
    E[:,0]=0
184
    E[:,-1]=0
185
    E[0,:]=0
186
    E[-1,:]=0
187
    
188
    Energy=numpy.sum(E)
189

    
190
    return(Energy/(E.shape[0]*E.shape[1]*1.0))
191

    
192
def CriticalT(T,E):
193

    
194
    Epoly=numpy.poly1d(numpy.polyfit(T,E,T.size/3))
195
    dEpoly=numpy.diff(Epoly(T))
196
    dEpoly=numpy.insert(dEpoly,0,0)
197
    return(T[numpy.argmin(dEpoly)])
198

    
199
def PolyFitE(T,E):
200

    
201
    Epoly=numpy.poly1d(numpy.polyfit(T,E,T.size/3))
202
    return(Epoly(T))
203

    
204
def DisplayCurves(T,E,M,J,B):
205

    
206
    plt.xlabel("Temperature")
207
    plt.ylabel("Energy")
208

    
209
    Experience,=plt.plot(T,E,label="Energy") 
210

    
211
    plt.legend()
212
    plt.show()
213

    
214
if __name__=='__main__':
215
        
216
    # Set defaults values
217
    # Coupling factor
218
    J=1.
219
    # External Magnetic Field is null
220
    B=0.
221
    # Size of Lattice
222
    Size=256
223
    # Default Temperatures (start, end, step)
224
    Tmin=0.1
225
    Tmax=5
226
    Tstep=0.1
227
    # Default Number of Iterations
228
    Iterations=Size*Size*Size
229
    # Default Device is first one
230
    Device=1
231
    # Default Divider
232
    Divider=Size/16
233

    
234
    # Curves is True to print the curves
235
    Curves=False
236

    
237
    OCL_vendor={}
238
    OCL_type={}
239
    OCL_description={}
240

    
241
    try:
242
        import pyopencl as cl
243
 
244
        print "\nHere are available OpenCL devices:"
245
        Id=1
246
        for platform in cl.get_platforms():
247
            for device in platform.get_devices():
248
                OCL_vendor[Id]=platform.vendor.lstrip().rstrip()
249
                #OCL_type[Id]=cl.device_type.to_string(device.type)
250
                OCL_type[Id]="xPU"
251
                OCL_description[Id]=device.name.lstrip().rstrip()
252
                print "* Device #%i from %s of type %s : %s" % (Id,OCL_vendor[Id],OCL_type[Id],OCL_description[Id])
253
                Id=Id+1
254
        OCL_MaxDevice=Id-1
255
        print
256
        
257
    except ImportError:
258
        print "Your platform does not seem to support OpenCL"
259
        sys.exit(0)   
260
    
261
    try:
262
        opts, args = getopt.getopt(sys.argv[1:],"hcj:b:z:i:s:e:p:d:v:",["coupling=","magneticfield=","size=","iterations=","tempstart=","tempend=","tempstep=","units",'device='])
263
    except getopt.GetoptError:
264
        print '%s -d <Device Id> -j <Coupling Factor> -b <Magnetic Field> -z <Size of Square Lattice> -i <Iterations> -s <Minimum Temperature> -e <Maximum Temperature> -p <steP Temperature> -v <diVider> -c (Print Curves)' % sys.argv[0]
265
        sys.exit(2)
266
    
267
    for opt, arg in opts:
268
        if opt == '-h':
269
            print '%s -d <Device Id> -j <Coupling Factor> -b <Magnetic Field> -z <Size of Square Lattice> -i <Iterations> -s <Minimum Temperature> -e <Maximum Temperature> -p <steP Temperature> -v <diVider> -c (Print Curves)' % sys.argv[0]
270
            sys.exit()
271
        elif opt == '-c':
272
            Curves=True
273
        elif opt in ("-d", "--device"):
274
            Device = int(arg)
275
            if Device>OCL_MaxDevice:
276
                "Device #%s seems not to be available !"
277
                sys.exit()
278
        elif opt in ("-j", "--coupling"):
279
            J = float(arg)
280
        elif opt in ("-b", "--magneticfield"):
281
            B = float(arg)
282
        elif opt in ("-s", "--tempmin"):
283
            Tmin = float(arg)
284
        elif opt in ("-e", "--tempmax"):
285
            Tmax = arg
286
        elif opt in ("-p", "--tempstep"):
287
            Tstep = numpy.uint32(arg)
288
        elif opt in ("-i", "--iterations"):
289
            Iterations = int(arg)
290
        elif opt in ("-z", "--size"):
291
            Size = int(arg)
292
        elif opt in ("-v", "--divider"):
293
            Divider = int(arg)
294
            
295
    print "Here are parameters of simulation:"
296
    print "* Device selected #%s: %s of type %s from %s" % (Device,OCL_description[Device],OCL_type[Device],OCL_vendor[Device])
297
    print "* Coupling Factor J : %s" % J
298
    print "* Magnetic Field B :  %s" % B
299
    print "* Size of lattice : %sx%s" % (Size,Size)
300
    print "* Parallel computing : %sx%s" % (Divider,Divider)
301
    print "* Iterations : %s" % Iterations
302
    print "* Temperatures from %s to %s by %s" % (Tmin,Tmax,Tstep)
303

    
304
    LAPIMAGE=False
305
        
306
    if Iterations<STEP:
307
        STEP=Iterations
308
    
309
    sigmaIn=numpy.where(numpy.random.randn(Size,Size)>0,1,-1).astype        (numpy.int32)
310
        
311
    ImageOutput(sigmaIn,"Ising2D_GPU_Local_%i_Initial" % (Size))
312

    
313
        
314
    Trange=numpy.arange(Tmin,Tmax+Tstep,Tstep)
315
    
316
    E=[]
317
    M=[]
318
        
319
    for T in Trange:
320
        sigma=numpy.copy(sigmaIn)
321
        duration=Metropolis(sigma,J,B,T,Iterations,Device,Divider)
322
        E=numpy.append(E,Energy(sigma,J,B))
323
        M=numpy.append(M,Magnetization(sigma,B))
324
        ImageOutput(sigma,"Ising2D_GPU_Local_%i_%1.1f_Final" % (Size,T))
325
        print "GPU/CPU Time : %f" % (duration)
326
        print "Total Energy at Temperature %f : %f" % (T,E[-1])
327
        print "Total Magnetization at Temperature %f : %f" % (T,M[-1])
328
                
329
    # Save output
330
    numpy.savez("Ising2D_GPU_Global_%i_%.8i" % (Size,Iterations),(Trange,E,M))
331
    
332
    # Estimate Critical temperature
333
    print "The critical temperature on %ix%i lattice with J=%f, B=%f is %f " % (Size,Size,J,B,CriticalT(Trange,E))
334

    
335
    if Curves:
336
        DisplayCurves(Trange,E,M,J,B)
337

    
338