Statistiques
| Révision :

root / Ising / GPU / Ising2D-GPU-OddEven.py @ 145

Historique | Voir | Annoter | Télécharger (11,99 ko)

1 145 equemene
#!/usr/bin/env python
2 145 equemene
#
3 145 equemene
# Ising2D model using PyOpenCL
4 145 equemene
#
5 145 equemene
# CC BY-NC-SA 2011 : <emmanuel.quemener@ens-lyon.fr>
6 145 equemene
#
7 145 equemene
# Thanks to Andreas Klockner for PyOpenCL:
8 145 equemene
# http://mathema.tician.de/software/pyopencl
9 145 equemene
#
10 145 equemene
# Interesting links:
11 145 equemene
# http://viennacl.sourceforge.net/viennacl-documentation.html
12 145 equemene
# http://enja.org/2011/02/22/adventures-in-pyopencl-part-1-getting-started-with-python/
13 145 equemene
14 145 equemene
import pyopencl as cl
15 145 equemene
import numpy
16 145 equemene
from PIL import Image
17 145 equemene
import time,string
18 145 equemene
from numpy.random import randint as nprnd
19 145 equemene
import sys
20 145 equemene
import getopt
21 145 equemene
import matplotlib.pyplot as plt
22 145 equemene
23 145 equemene
# Size of micro blocks
24 145 equemene
BSZ=16
25 145 equemene
26 145 equemene
# 2097152 on HD5850 (with 1GB of RAM)
27 145 equemene
#  262144 on GT218
28 145 equemene
#STEP=262144
29 145 equemene
#STEP=1048576
30 145 equemene
#STEP=2097152
31 145 equemene
#STEP=4194304
32 145 equemene
#STEP=8388608
33 145 equemene
STEP=16777216
34 145 equemene
#STEP=268435456
35 145 equemene
36 145 equemene
# Flag to define LAPIMAGE between iteration on OpenCL kernel calls
37 145 equemene
#LAPIMAGE=True
38 145 equemene
LAPIMAGE=False
39 145 equemene
40 145 equemene
# Version 2 of kernel : much optimize one
41 145 equemene
# a string template is used to replace BSZ (named $block_size) by its value
42 145 equemene
KERNEL_CODE_ORIG=string.Template("""
43 145 equemene
#define BSZ $block_size
44 145 equemene

45 145 equemene
/* Marsaglia RNG very simple implementation */
46 145 equemene
#define znew (z=36969*(z&65535)+(z>>16))
47 145 equemene
#define wnew (w=18000*(w&65535)+(w>>16))
48 145 equemene
#define MWC ((znew<<16)+wnew )
49 145 equemene
#define MWCfp (float)(MWC * 2.328306435454494e-10f)
50 145 equemene

51 145 equemene
__kernel void MainLoop(__global int *s,float J,float B,float T,uint size,
52 145 equemene
                       uint iterations,uint seed_w,uint seed_z)
53 145 equemene
{
54 145 equemene
   uint base_idx=(uint)(BSZ*get_global_id(0));
55 145 equemene
   uint base_idy=(uint)(BSZ*get_global_id(1));
56 145 equemene
   uint base_id=base_idx+base_idy*size;
57 145 equemene

58 145 equemene
   uint z=seed_z+(uint)get_global_id(0);
59 145 equemene
   uint w=seed_w+(uint)get_global_id(1);
60 145 equemene

61 145 equemene
   for (uint i=0;i<iterations;i++)
62 145 equemene
   {
63 145 equemene
      uint x=(uint)(MWC%BSZ);
64 145 equemene
      uint y=(uint)(MWC%BSZ);
65 145 equemene

66 145 equemene
      int p=s[base_id+x+size*y];
67 145 equemene

68 145 equemene
      int u=s[((base_idx+x)%size)+size*((base_idy+y-1)%size)];
69 145 equemene
      int d=s[((base_idx+x)%size)+size*((base_idy+y+1)%size)];
70 145 equemene
      int l=s[((base_idx+x-1)%size)+size*((base_idy+y)%size)];
71 145 equemene
      int r=s[((base_idx+x+1)%size)+size*((base_idy+y)%size)];
72 145 equemene

73 145 equemene
      float DeltaE=p*(2.0f*J*(float)(u+d+l+r)+B);
74 145 equemene

75 145 equemene
      float factor= ((DeltaE < 0.0f) || (MWCfp < exp(-DeltaE/T))) ? -1:1;
76 145 equemene

77 145 equemene
      s[base_id+x+size*y]= factor*p;
78 145 equemene
   }
79 145 equemene

80 145 equemene
}
81 145 equemene
""")
82 145 equemene
83 145 equemene
# Version 2 of kernel : much optimize one
84 145 equemene
# a string template is used to replace BSZ (named $block_size) by its value
85 145 equemene
KERNEL_CODE=string.Template("""
86 145 equemene
#define BSZ $block_size
87 145 equemene

88 145 equemene
/* Marsaglia RNG very simple implementation */
89 145 equemene
#define znew (z=36969*(z&65535)+(z>>16))
90 145 equemene
#define wnew (w=18000*(w&65535)+(w>>16))
91 145 equemene
#define MWC ((znew<<16)+wnew )
92 145 equemene
#define MWCfp (float)(MWC * 2.328306435454494e-10f)
93 145 equemene

94 145 equemene
__kernel void MainLoop(__global int *s,float J,float B,float T,uint size,
95 145 equemene
                       uint iterations,uint seed_w,uint seed_z)
96 145 equemene
{
97 145 equemene
   private uint z=seed_z+(uint)get_global_id(0);
98 145 equemene
   private uint w=seed_w+(uint)get_global_id(1);
99 145 equemene

100 145 equemene
   private uint x,y,base,size2;
101 145 equemene
   private int p,u,d,l,r,factor;
102 145 equemene
   private float DeltaE;
103 145 equemene

104 145 equemene
   for (uint i=0;i<iterations/2;i++)
105 145 equemene
   {
106 145 equemene
      // Odd pixel
107 145 equemene
      base=2*(uint)get_global_id(0);
108 145 equemene

109 145 equemene
      x=base%size;
110 145 equemene
      y=base/size;
111 145 equemene

112 145 equemene
      p=s[base];
113 145 equemene

114 145 equemene
      u= (y==     0) ? s[x+size*(size-1)]:s[base-size];
115 145 equemene
      d= (y==size-1) ? s[x]:s[base+size];
116 145 equemene
      l= (x==     0) ? s[y*size+size-1]:s[base-1];
117 145 equemene
      r= (x==size-1) ? s[y*size]:s[base+1];
118 145 equemene

119 145 equemene
      DeltaE=p*(2e0f*J*(float)(u+d+l+r)+B);
120 145 equemene

121 145 equemene
      factor= ((DeltaE < 0e0f) || (MWCfp < exp(-DeltaE/T))) ? -1:1;
122 145 equemene

123 145 equemene
      s[base]= factor*p;
124 145 equemene
      barrier(CLK_GLOBAL_MEM_FENCE);
125 145 equemene

126 145 equemene
      // Even pixel
127 145 equemene

128 145 equemene
      base=2*(uint)get_global_id(0)+1;
129 145 equemene

130 145 equemene
      x=base%size;
131 145 equemene
      y=base/size;
132 145 equemene

133 145 equemene
      p=s[base];
134 145 equemene
      u= (y==     0) ? s[x+size*(size-1)]:s[base-size];
135 145 equemene
      d= (y==size-1) ? s[x]:s[base+size];
136 145 equemene
      l= (x==     0) ? s[y*size+size-1]:s[base-1];
137 145 equemene
      r= (x==size-1) ? s[y*size]:s[base+1];
138 145 equemene

139 145 equemene
      DeltaE=p*(2e0f*J*(float)(u+d+l+r)+B);
140 145 equemene

141 145 equemene
      factor= ((DeltaE < 0e0f) || (MWCfp < exp(-DeltaE/T))) ? -1:1;
142 145 equemene

143 145 equemene
      s[base]= factor*p;
144 145 equemene
      barrier(CLK_GLOBAL_MEM_FENCE);
145 145 equemene
   }
146 145 equemene

147 145 equemene
}
148 145 equemene
""")
149 145 equemene
150 145 equemene
def ImageOutput(sigma,prefix):
151 145 equemene
    Max=sigma.max()
152 145 equemene
    Min=sigma.min()
153 145 equemene
154 145 equemene
    # Normalize value as 8bits Integer
155 145 equemene
    SigmaInt=(255*(sigma-Min)/(Max-Min)).astype('uint8')
156 145 equemene
    image = Image.fromarray(SigmaInt)
157 145 equemene
    image.save("%s.jpg" % prefix)
158 145 equemene
159 145 equemene
def CheckLattice(sigma):
160 145 equemene
161 145 equemene
    over=sigma[sigma>1]
162 145 equemene
    under=sigma[sigma<-1]
163 145 equemene
164 145 equemene
    if  (over.size+under.size) > 0:
165 145 equemene
        print "Problem on Lattice on %i spin(s)." % (over.size+under.size)
166 145 equemene
    else:
167 145 equemene
        print "No problem on Lattice"
168 145 equemene
169 145 equemene
def Metropolis(sigma,J,B,T,iterations,Device,Divider):
170 145 equemene
171 145 equemene
    kernel_params = {'block_size':sigma.shape[0]/Divider}
172 145 equemene
173 145 equemene
        # Je detecte un peripherique GPU dans la liste des peripheriques
174 145 equemene
    Id=1
175 145 equemene
    HasXPU=False
176 145 equemene
    for platform in cl.get_platforms():
177 145 equemene
        for device in platform.get_devices():
178 145 equemene
            if Id==Device:
179 145 equemene
                XPU=device
180 145 equemene
                print "CPU/GPU selected: ",device.name.lstrip()
181 145 equemene
                HasXPU=True
182 145 equemene
            Id+=1
183 145 equemene
184 145 equemene
    if HasXPU==False:
185 145 equemene
        print "No XPU #%i found in all of %i devices, sorry..." % (Device,Id-1)
186 145 equemene
        sys.exit()
187 145 equemene
188 145 equemene
    ctx = cl.Context([XPU])
189 145 equemene
    queue = cl.CommandQueue(ctx,
190 145 equemene
                            properties=cl.command_queue_properties.PROFILING_ENABLE)
191 145 equemene
192 145 equemene
    # Je recupere les flag possibles pour les buffers
193 145 equemene
    mf = cl.mem_flags
194 145 equemene
195 145 equemene
    sigmaCL = cl.Buffer(ctx, mf.WRITE_ONLY | mf.COPY_HOST_PTR, hostbuf=sigma)
196 145 equemene
    #sigmaCL = cl.Buffer(ctx, mf.READ_WRITE, sigma.nbytes)
197 145 equemene
    # Program based on Kernel2
198 145 equemene
    MetropolisCL = cl.Program(ctx,KERNEL_CODE.substitute(kernel_params)).build()
199 145 equemene
200 145 equemene
    divide=Divider*Divider
201 145 equemene
    step=STEP/divide
202 145 equemene
    i=0
203 145 equemene
    duration=0.
204 145 equemene
    while (step*i < iterations/divide):
205 145 equemene
206 145 equemene
        # Call OpenCL kernel
207 145 equemene
        # sigmaCL is lattice translated in CL format
208 145 equemene
        # step is number of iterations
209 145 equemene
210 145 equemene
        start_time=time.time()
211 145 equemene
212 145 equemene
        CLLaunch=MetropolisCL.MainLoop(queue,
213 145 equemene
                                       (numpy.int32(sigma.shape[0]*sigma.shape[1]/2),1),None,
214 145 equemene
                                       sigmaCL,
215 145 equemene
                                       numpy.float32(J),numpy.float32(B),
216 145 equemene
                                       numpy.float32(T),
217 145 equemene
                                       numpy.uint32(sigma.shape[0]),
218 145 equemene
                                       numpy.uint32(step),
219 145 equemene
                                       numpy.uint32(2008),
220 145 equemene
                                       numpy.uint32(1010))
221 145 equemene
222 145 equemene
        CLLaunch.wait()
223 145 equemene
            # elapsed = 1e-9*(CLLaunch.profile.end - CLLaunch.profile.start)
224 145 equemene
        elapsed = time.time()-start_time
225 145 equemene
        print "Iteration %i with T=%f and %i iterations in %f: " % (i,T,step,elapsed)
226 145 equemene
        if LAPIMAGE:
227 145 equemene
                cl.enqueue_copy(queue, sigma, sigmaCL).wait()
228 145 equemene
                checkLattice(sigma)
229 145 equemene
                ImageOutput(sigma,"Ising2D_GPU_OddEven_%i_%1.1f_%.3i_Lap" % (SIZE,T,i))
230 145 equemene
        i=i+1
231 145 equemene
        duration=duration+elapsed
232 145 equemene
233 145 equemene
    cl.enqueue_copy(queue, sigma,sigmaCL).wait()
234 145 equemene
    CheckLattice(sigma)
235 145 equemene
    sigmaCL.release()
236 145 equemene
237 145 equemene
    return(duration)
238 145 equemene
239 145 equemene
def Magnetization(sigma,M):
240 145 equemene
    return(numpy.sum(sigma)/(sigma.shape[0]*sigma.shape[1]*1.0))
241 145 equemene
242 145 equemene
def Energy(sigma,J,B):
243 145 equemene
    # Copy & Cast values
244 145 equemene
    E=numpy.copy(sigma).astype(numpy.float32)
245 145 equemene
246 145 equemene
    # Slice call to estimate Energy
247 145 equemene
    E[1:-1,1:-1]=E[1:-1,1:-1]*(2.0*J*(E[:-2,1:-1]+E[2:,1:-1]+
248 145 equemene
                                          E[1:-1,:-2]+E[1:-1,2:])+B)
249 145 equemene
250 145 equemene
    # Clean perimeter
251 145 equemene
    E[:,0]=0
252 145 equemene
    E[:,-1]=0
253 145 equemene
    E[0,:]=0
254 145 equemene
    E[-1,:]=0
255 145 equemene
256 145 equemene
    Energy=numpy.sum(E)
257 145 equemene
258 145 equemene
    return(Energy/(E.shape[0]*E.shape[1]*1.0))
259 145 equemene
260 145 equemene
def CriticalT(T,E):
261 145 equemene
262 145 equemene
    Epoly=numpy.poly1d(numpy.polyfit(T,E,T.size/3))
263 145 equemene
    dEpoly=numpy.diff(Epoly(T))
264 145 equemene
    dEpoly=numpy.insert(dEpoly,0,0)
265 145 equemene
    return(T[numpy.argmin(dEpoly)])
266 145 equemene
267 145 equemene
def PolyFitE(T,E):
268 145 equemene
269 145 equemene
    Epoly=numpy.poly1d(numpy.polyfit(T,E,T.size/3))
270 145 equemene
    return(Epoly(T))
271 145 equemene
272 145 equemene
def DisplayCurves(T,E,M,J,B):
273 145 equemene
274 145 equemene
    plt.xlabel("Temperature")
275 145 equemene
    plt.ylabel("Energy")
276 145 equemene
277 145 equemene
    Experience,=plt.plot(T,E,label="Energy")
278 145 equemene
279 145 equemene
    plt.legend()
280 145 equemene
    plt.show()
281 145 equemene
282 145 equemene
if __name__=='__main__':
283 145 equemene
284 145 equemene
    # Set defaults values
285 145 equemene
    # Coupling factor
286 145 equemene
    J=1.
287 145 equemene
    # External Magnetic Field is null
288 145 equemene
    B=0.
289 145 equemene
    # Size of Lattice
290 145 equemene
    Size=256
291 145 equemene
    # Default Temperatures (start, end, step)
292 145 equemene
    Tmin=0.1
293 145 equemene
    Tmax=5
294 145 equemene
    Tstep=0.1
295 145 equemene
    # Default Number of Iterations
296 145 equemene
    Iterations=Size*Size
297 145 equemene
    # Default Device is first one
298 145 equemene
    Device=1
299 145 equemene
    # Default Divider
300 145 equemene
    Divider=Size/16
301 145 equemene
302 145 equemene
    # Curves is True to print the curves
303 145 equemene
    Curves=False
304 145 equemene
305 145 equemene
    OCL_vendor={}
306 145 equemene
    OCL_type={}
307 145 equemene
    OCL_description={}
308 145 equemene
309 145 equemene
    try:
310 145 equemene
        import pyopencl as cl
311 145 equemene
312 145 equemene
        print "\nHere are available OpenCL devices:"
313 145 equemene
        Id=1
314 145 equemene
        for platform in cl.get_platforms():
315 145 equemene
            for device in platform.get_devices():
316 145 equemene
                OCL_vendor[Id]=platform.vendor.lstrip().rstrip()
317 145 equemene
                #OCL_type[Id]=cl.device_type.to_string(device.type)
318 145 equemene
                OCL_type[Id]="xPU"
319 145 equemene
                OCL_description[Id]=device.name.lstrip().rstrip()
320 145 equemene
                print "* Device #%i from %s of type %s : %s" % (Id,OCL_vendor[Id],OCL_type[Id],OCL_description[Id])
321 145 equemene
                Id=Id+1
322 145 equemene
        OCL_MaxDevice=Id-1
323 145 equemene
        print
324 145 equemene
325 145 equemene
    except ImportError:
326 145 equemene
        print "Your platform does not seem to support OpenCL"
327 145 equemene
        sys.exit(0)
328 145 equemene
329 145 equemene
    try:
330 145 equemene
        opts, args = getopt.getopt(sys.argv[1:],"hcj:b:z:i:s:e:p:d:v:",["coupling=","magneticfield=","size=","iterations=","tempstart=","tempend=","tempstep=","units",'device='])
331 145 equemene
    except getopt.GetoptError:
332 145 equemene
        print '%s -d <Device Id> -j <Coupling Factor> -b <Magnetic Field> -z <Size of Square Lattice> -i <Iterations> -s <Minimum Temperature> -e <Maximum Temperature> -p <steP Temperature> -v <diVider> -c (Print Curves)' % sys.argv[0]
333 145 equemene
        sys.exit(2)
334 145 equemene
335 145 equemene
    for opt, arg in opts:
336 145 equemene
        if opt == '-h':
337 145 equemene
            print '%s -d <Device Id> -j <Coupling Factor> -b <Magnetic Field> -z <Size of Square Lattice> -i <Iterations> -s <Minimum Temperature> -e <Maximum Temperature> -p <steP Temperature> -v <diVider> -c (Print Curves)' % sys.argv[0]
338 145 equemene
            sys.exit()
339 145 equemene
        elif opt == '-c':
340 145 equemene
            Curves=True
341 145 equemene
        elif opt in ("-d", "--device"):
342 145 equemene
            Device = int(arg)
343 145 equemene
            if Device>OCL_MaxDevice:
344 145 equemene
                "Device #%s seems not to be available !"
345 145 equemene
                sys.exit()
346 145 equemene
        elif opt in ("-j", "--coupling"):
347 145 equemene
            J = float(arg)
348 145 equemene
        elif opt in ("-b", "--magneticfield"):
349 145 equemene
            B = float(arg)
350 145 equemene
        elif opt in ("-s", "--tempmin"):
351 145 equemene
            Tmin = float(arg)
352 145 equemene
        elif opt in ("-e", "--tempmax"):
353 145 equemene
            Tmax = arg
354 145 equemene
        elif opt in ("-p", "--tempstep"):
355 145 equemene
            Tstep = numpy.uint32(arg)
356 145 equemene
        elif opt in ("-i", "--iterations"):
357 145 equemene
            Iterations = int(arg)
358 145 equemene
        elif opt in ("-z", "--size"):
359 145 equemene
            Size = int(arg)
360 145 equemene
        elif opt in ("-v", "--divider"):
361 145 equemene
            Divider = int(arg)
362 145 equemene
363 145 equemene
    print "Here are parameters of simulation:"
364 145 equemene
    print "* Device selected #%s: %s of type %s from %s" % (Device,OCL_description[Device],OCL_type[Device],OCL_vendor[Device])
365 145 equemene
    print "* Coupling Factor J : %s" % J
366 145 equemene
    print "* Magnetic Field B :  %s" % B
367 145 equemene
    print "* Size of lattice : %sx%s" % (Size,Size)
368 145 equemene
    print "* Parallel computing : %sx%s" % (Divider,Divider)
369 145 equemene
    print "* Iterations : %s" % Iterations
370 145 equemene
    print "* Temperatures from %s to %s by %s" % (Tmin,Tmax,Tstep)
371 145 equemene
372 145 equemene
    LAPIMAGE=False
373 145 equemene
374 145 equemene
    if Iterations<STEP:
375 145 equemene
        STEP=Iterations
376 145 equemene
377 145 equemene
    sigmaIn=numpy.where(numpy.random.randn(Size,Size)>0,1,-1).astype(numpy.int32)
378 145 equemene
379 145 equemene
    ImageOutput(sigmaIn,"Ising2D_GPU_Local_%i_Initial" % (Size))
380 145 equemene
381 145 equemene
382 145 equemene
    Trange=numpy.arange(Tmin,Tmax+Tstep,Tstep)
383 145 equemene
384 145 equemene
    E=[]
385 145 equemene
    M=[]
386 145 equemene
387 145 equemene
    for T in Trange:
388 145 equemene
        sigma=numpy.copy(sigmaIn)
389 145 equemene
        duration=Metropolis(sigma,J,B,T,Iterations,Device,Divider)
390 145 equemene
        E=numpy.append(E,Energy(sigma,J,B))
391 145 equemene
        M=numpy.append(M,Magnetization(sigma,B))
392 145 equemene
        ImageOutput(sigma,"Ising2D_GPU_OddEven_%i_%1.1f_Final" % (Size,T))
393 145 equemene
        print "GPU/CPU Time : %f" % (duration)
394 145 equemene
        print "Total Energy at Temperature %f : %f" % (T,E[-1])
395 145 equemene
        print "Total Magnetization at Temperature %f : %f" % (T,M[-1])
396 145 equemene
397 145 equemene
    # Save output
398 145 equemene
    numpy.savez("Ising2D_GPU_Global_%i_%.8i" % (Size,Iterations),(Trange,E,M))
399 145 equemene
400 145 equemene
    # Estimate Critical temperature
401 145 equemene
    print "The critical temperature on %ix%i lattice with J=%f, B=%f is %f " % (Size,Size,J,B,CriticalT(Trange,E))
402 145 equemene
403 145 equemene
    if Curves:
404 145 equemene
        DisplayCurves(Trange,E,M,J,B)
405 145 equemene