Statistiques
| Révision :

root / Ising / GPU / Ising2D-GPU-ChessBoard.py @ 185

Historique | Voir | Annoter | Télécharger (12,15 ko)

1 148 equemene
#!/usr/bin/env python
2 148 equemene
#
3 148 equemene
# Ising2D model using PyOpenCL
4 148 equemene
#
5 148 equemene
# CC BY-NC-SA 2011 : <emmanuel.quemener@ens-lyon.fr>
6 148 equemene
#
7 148 equemene
# Thanks to Andreas Klockner for PyOpenCL:
8 148 equemene
# http://mathema.tician.de/software/pyopencl
9 148 equemene
#
10 148 equemene
# Interesting links:
11 148 equemene
# http://viennacl.sourceforge.net/viennacl-documentation.html
12 148 equemene
# http://enja.org/2011/02/22/adventures-in-pyopencl-part-1-getting-started-with-python/
13 148 equemene
14 148 equemene
import pyopencl as cl
15 148 equemene
import numpy
16 148 equemene
from PIL import Image
17 148 equemene
import time,string
18 148 equemene
from numpy.random import randint as nprnd
19 148 equemene
import sys
20 148 equemene
import getopt
21 148 equemene
import matplotlib.pyplot as plt
22 148 equemene
23 148 equemene
# Size of micro blocks
24 148 equemene
BSZ=16
25 148 equemene
26 148 equemene
# 2097152 on HD5850 (with 1GB of RAM)
27 148 equemene
#  262144 on GT218
28 148 equemene
#STEP=262144
29 148 equemene
#STEP=1048576
30 148 equemene
#STEP=2097152
31 148 equemene
#STEP=4194304
32 148 equemene
#STEP=8388608
33 148 equemene
STEP=16777216
34 148 equemene
#STEP=268435456
35 148 equemene
36 148 equemene
# Flag to define LAPIMAGE between iteration on OpenCL kernel calls
37 148 equemene
#LAPIMAGE=True
38 148 equemene
LAPIMAGE=False
39 148 equemene
40 148 equemene
# Version 2 of kernel : much optimize one
41 148 equemene
# a string template is used to replace BSZ (named $block_size) by its value
42 148 equemene
KERNEL_CODE_ORIG=string.Template("""
43 148 equemene
#define BSZ $block_size
44 148 equemene

45 148 equemene
/* Marsaglia RNG very simple implementation */
46 148 equemene
#define znew (z=36969*(z&65535)+(z>>16))
47 148 equemene
#define wnew (w=18000*(w&65535)+(w>>16))
48 148 equemene
#define MWC ((znew<<16)+wnew )
49 148 equemene
#define MWCfp (float)(MWC * 2.328306435454494e-10f)
50 148 equemene

51 148 equemene
__kernel void MainLoop(__global int *s,float J,float B,float T,uint size,
52 148 equemene
                       uint iterations,uint seed_w,uint seed_z)
53 148 equemene
{
54 148 equemene
   uint base_idx=(uint)(BSZ*get_global_id(0));
55 148 equemene
   uint base_idy=(uint)(BSZ*get_global_id(1));
56 148 equemene
   uint base_id=base_idx+base_idy*size;
57 148 equemene

58 148 equemene
   uint z=seed_z+(uint)get_global_id(0);
59 148 equemene
   uint w=seed_w+(uint)get_global_id(1);
60 148 equemene

61 148 equemene
   for (uint i=0;i<iterations;i++)
62 148 equemene
   {
63 148 equemene
      uint x=(uint)(MWC%BSZ);
64 148 equemene
      uint y=(uint)(MWC%BSZ);
65 148 equemene

66 148 equemene
      int p=s[base_id+x+size*y];
67 148 equemene

68 148 equemene
      int u=s[((base_idx+x)%size)+size*((base_idy+y-1)%size)];
69 148 equemene
      int d=s[((base_idx+x)%size)+size*((base_idy+y+1)%size)];
70 148 equemene
      int l=s[((base_idx+x-1)%size)+size*((base_idy+y)%size)];
71 148 equemene
      int r=s[((base_idx+x+1)%size)+size*((base_idy+y)%size)];
72 148 equemene

73 148 equemene
      float DeltaE=p*(2.0f*J*(float)(u+d+l+r)+B);
74 148 equemene

75 148 equemene
      float factor= ((DeltaE < 0.0f) || (MWCfp < exp(-DeltaE/T))) ? -1:1;
76 148 equemene

77 148 equemene
      s[base_id+x+size*y]= factor*p;
78 148 equemene
   }
79 148 equemene

80 148 equemene
}
81 148 equemene
""")
82 148 equemene
83 148 equemene
# Version 2 of kernel : much optimize one
84 148 equemene
# a string template is used to replace BSZ (named $block_size) by its value
85 148 equemene
KERNEL_CODE=string.Template("""
86 148 equemene

87 148 equemene
/* Marsaglia RNG very simple implementation */
88 148 equemene
#define znew (z=36969*(z&65535)+(z>>16))
89 148 equemene
#define wnew (w=18000*(w&65535)+(w>>16))
90 148 equemene
#define MWC ((znew<<16)+wnew )
91 148 equemene
#define MWCfp (float)(MWC * 2.328306435454494e-10f)
92 148 equemene

93 148 equemene
__kernel void MainLoop(__global int *s,float J,float B,float T,uint size,
94 148 equemene
                       uint iterations,uint seed_w,uint seed_z)
95 148 equemene
{
96 148 equemene
   uint z=seed_z+(uint)get_global_id(0);
97 148 equemene
   uint w=seed_w+(uint)get_global_id(1);
98 148 equemene

99 148 equemene
   int xo,yo,xe,ye,base,OddSize;
100 148 equemene
   int p,u,d,l,r,factor;
101 148 equemene
   float DeltaE;
102 148 equemene

103 148 equemene
   OddSize=(size%2==0)?1:0;
104 148 equemene

105 148 equemene
   base=2*get_global_id(0);
106 148 equemene
   yo=base/size;
107 148 equemene
   xo=(yo%2==0) ? base%size:(base%size+OddSize)%size ;
108 148 equemene
   ye=yo;
109 148 equemene
   xe=(xo+1)%size;
110 148 equemene

111 148 equemene
   for (uint i=0;i<iterations;i++)
112 148 equemene
   {
113 148 equemene
      // Odd pixel
114 148 equemene
      p=s[yo*size+xo];
115 148 equemene

116 148 equemene
      u= (yo==     0) ? s[xo+size*(size-1)]:s[xo+size*(yo-1)];
117 148 equemene
      d= (yo==size-1) ? s[xo]:s[xo+size*(yo+1)];
118 148 equemene
      l= (xo==     0) ? s[yo*size+size-1]:s[xo-1+size*yo];
119 148 equemene
      r= (xo==size-1) ? s[yo*size]:s[xo+1+yo*size];
120 148 equemene

121 148 equemene
      DeltaE=(float)p*(2.e0f*J*(float)(u+d+l+r)+B);
122 148 equemene

123 148 equemene
      factor= ((DeltaE < 0.e0f) || (MWCfp < (float)exp(-DeltaE/T))) ? -1:1;
124 148 equemene

125 148 equemene
      s[yo*size+xo]= factor*p;
126 148 equemene

127 148 equemene
      // Even pixel
128 148 equemene

129 148 equemene
      p=s[ye*size+xe];
130 148 equemene
      u= (ye==     0) ? s[xe+size*(size-1)]:s[xe+size*(ye-1)];
131 148 equemene
      d= (ye==size-1) ? s[xe]:s[xe+size*(ye+1)];
132 148 equemene
      l= (xe==     0) ? s[ye*size+size-1]:s[xe-1+size*ye];
133 148 equemene
      r= (xe==size-1) ? s[ye*size]:s[xe+1+ye*size];
134 148 equemene

135 148 equemene
      DeltaE=(float)p*(2.e0f*J*(float)(u+d+l+r)+B);
136 148 equemene

137 148 equemene
      factor= ((DeltaE < 0.e0f) || (MWCfp < (float)exp(-DeltaE/T))) ? -1:1;
138 148 equemene

139 148 equemene
      s[ye*size+xe]= factor*p;
140 148 equemene
      barrier(CLK_LOCAL_MEM_FENCE);
141 148 equemene
   }
142 148 equemene

143 148 equemene

144 148 equemene
}
145 148 equemene
""")
146 148 equemene
147 148 equemene
def ImageOutput(sigma,prefix):
148 148 equemene
    Max=sigma.max()
149 148 equemene
    Min=sigma.min()
150 148 equemene
151 148 equemene
    # Normalize value as 8bits Integer
152 148 equemene
    SigmaInt=(255*(sigma-Min)/(Max-Min)).astype('uint8')
153 148 equemene
    image = Image.fromarray(SigmaInt)
154 148 equemene
    image.save("%s.jpg" % prefix)
155 148 equemene
156 148 equemene
def CheckLattice(sigma):
157 148 equemene
158 148 equemene
    over=sigma[sigma>1]
159 148 equemene
    under=sigma[sigma<-1]
160 148 equemene
161 148 equemene
    if  (over.size+under.size) > 0:
162 148 equemene
        print "Problem on Lattice on %i spin(s)." % (over.size+under.size)
163 148 equemene
    else:
164 148 equemene
        print "No problem on Lattice"
165 148 equemene
166 148 equemene
def Metropolis(sigma,J,B,T,iterations,Device,Divider):
167 148 equemene
168 148 equemene
    kernel_params = {}
169 148 equemene
170 148 equemene
    print iterations,Divider
171 148 equemene
172 148 equemene
    # Je detecte un peripherique GPU dans la liste des peripheriques
173 148 equemene
    Id=1
174 148 equemene
    HasXPU=False
175 148 equemene
    for platform in cl.get_platforms():
176 148 equemene
        for device in platform.get_devices():
177 148 equemene
            if Id==Device:
178 148 equemene
                XPU=device
179 148 equemene
                print "CPU/GPU selected: ",device.name.lstrip()
180 148 equemene
                HasXPU=True
181 148 equemene
            Id+=1
182 148 equemene
183 148 equemene
    if HasXPU==False:
184 148 equemene
        print "No XPU #%i found in all of %i devices, sorry..." % (Device,Id-1)
185 148 equemene
        sys.exit()
186 148 equemene
187 148 equemene
    ctx = cl.Context([XPU])
188 148 equemene
    queue = cl.CommandQueue(ctx,
189 148 equemene
                            properties=cl.command_queue_properties.PROFILING_ENABLE)
190 148 equemene
191 148 equemene
    MetropolisCL = cl.Program(ctx,KERNEL_CODE.substitute(kernel_params)).build()
192 148 equemene
193 148 equemene
    # Je recupere les flag possibles pour les buffers
194 148 equemene
    mf = cl.mem_flags
195 148 equemene
196 148 equemene
    # Program based on Kernel2
197 148 equemene
    sigmaCL = cl.Buffer(ctx, mf.WRITE_ONLY | mf.COPY_HOST_PTR, hostbuf=sigma)
198 148 equemene
    #sigmaCL = cl.Buffer(ctx, mf.READ_WRITE, sigma.nbytes)
199 148 equemene
    #cl.enqueue_copy(queue,sigmaCL,sigma).wait();
200 148 equemene
201 148 equemene
    i=0
202 148 equemene
    duration=0.
203 148 equemene
    if iterations%Divider == 0:
204 148 equemene
        steps=iterations/Divider;
205 148 equemene
    else:
206 148 equemene
        steps=iterations/Divider+1;
207 148 equemene
208 148 equemene
    step=0
209 148 equemene
    while (step*steps < iterations):
210 148 equemene
211 148 equemene
        # Call OpenCL kernel
212 148 equemene
        # sigmaCL is lattice translated in CL format
213 148 equemene
        # step is number of iterations
214 148 equemene
215 148 equemene
        start_time=time.time()
216 148 equemene
217 148 equemene
        CLLaunch=MetropolisCL.MainLoop(queue,
218 148 equemene
                                       (int(sigma.shape[0]*sigma.shape[1]/2),1),None,
219 148 equemene
                                       sigmaCL,
220 148 equemene
                                       numpy.float32(J),numpy.float32(B),
221 148 equemene
                                       numpy.float32(T),
222 148 equemene
                                       numpy.uint32(sigma.shape[0]),
223 148 equemene
                                       numpy.uint32(steps),
224 148 equemene
                                       numpy.uint32(2008),
225 148 equemene
                                       numpy.uint32(1010))
226 148 equemene
227 148 equemene
        CLLaunch.wait()
228 148 equemene
            # elapsed = 1e-9*(CLLaunch.profile.end - CLLaunch.profile.start)
229 148 equemene
        elapsed = time.time()-start_time
230 148 equemene
        print "Iteration %i with T=%f and %i iterations in %f: " % (step,T,steps,elapsed)
231 148 equemene
        if LAPIMAGE:
232 148 equemene
                cl.enqueue_copy(queue, sigma, sigmaCL).wait()
233 148 equemene
                checkLattice(sigma)
234 148 equemene
                ImageOutput(sigma,"Ising2D_GPU_OddEven_%i_%1.1f_%.3i_Lap" % (SIZE,T,i))
235 148 equemene
        step=step+1
236 148 equemene
        duration=duration+elapsed
237 148 equemene
238 148 equemene
    cl.enqueue_copy(queue,sigma,sigmaCL).wait()
239 148 equemene
    CheckLattice(sigma)
240 148 equemene
    sigmaCL.release()
241 148 equemene
242 148 equemene
    return(duration)
243 148 equemene
244 148 equemene
def Magnetization(sigma,M):
245 148 equemene
    return(numpy.sum(sigma)/(sigma.shape[0]*sigma.shape[1]*1.0))
246 148 equemene
247 148 equemene
def Energy(sigma,J,B):
248 148 equemene
    # Copy & Cast values
249 148 equemene
    E=numpy.copy(sigma).astype(numpy.float32)
250 148 equemene
251 148 equemene
    # Slice call to estimate Energy
252 148 equemene
    E[1:-1,1:-1]=E[1:-1,1:-1]*(2.0*J*(E[:-2,1:-1]+E[2:,1:-1]+
253 148 equemene
                                          E[1:-1,:-2]+E[1:-1,2:])+B)
254 148 equemene
255 148 equemene
    # Clean perimeter
256 148 equemene
    E[:,0]=0
257 148 equemene
    E[:,-1]=0
258 148 equemene
    E[0,:]=0
259 148 equemene
    E[-1,:]=0
260 148 equemene
261 148 equemene
    Energy=numpy.sum(E)
262 148 equemene
263 148 equemene
    return(Energy/(E.shape[0]*E.shape[1]*1.0))
264 148 equemene
265 148 equemene
def CriticalT(T,E):
266 148 equemene
267 148 equemene
    Epoly=numpy.poly1d(numpy.polyfit(T,E,T.size/3))
268 148 equemene
    dEpoly=numpy.diff(Epoly(T))
269 148 equemene
    dEpoly=numpy.insert(dEpoly,0,0)
270 148 equemene
    return(T[numpy.argmin(dEpoly)])
271 148 equemene
272 148 equemene
def PolyFitE(T,E):
273 148 equemene
274 148 equemene
    Epoly=numpy.poly1d(numpy.polyfit(T,E,T.size/3))
275 148 equemene
    return(Epoly(T))
276 148 equemene
277 148 equemene
def DisplayCurves(T,E,M,J,B):
278 148 equemene
279 148 equemene
    plt.xlabel("Temperature")
280 148 equemene
    plt.ylabel("Energy")
281 148 equemene
282 148 equemene
    Experience,=plt.plot(T,E,label="Energy")
283 148 equemene
284 148 equemene
    plt.legend()
285 148 equemene
    plt.show()
286 148 equemene
287 148 equemene
if __name__=='__main__':
288 148 equemene
289 148 equemene
    # Set defaults values
290 148 equemene
    # Coupling factor
291 148 equemene
    J=1.
292 148 equemene
    # External Magnetic Field is null
293 148 equemene
    B=0.
294 148 equemene
    # Size of Lattice
295 148 equemene
    Size=256
296 148 equemene
    # Default Temperatures (start, end, step)
297 148 equemene
    Tmin=0.1
298 148 equemene
    Tmax=5
299 148 equemene
    Tstep=0.1
300 148 equemene
    # Default Number of Iterations
301 148 equemene
    Iterations=Size*Size
302 148 equemene
    # Default Device is first one
303 148 equemene
    Device=1
304 148 equemene
    # Default Divider
305 148 equemene
    Divider=1
306 148 equemene
307 148 equemene
    # Curves is True to print the curves
308 148 equemene
    Curves=False
309 148 equemene
310 148 equemene
    OCL_vendor={}
311 148 equemene
    OCL_type={}
312 148 equemene
    OCL_description={}
313 148 equemene
314 148 equemene
    try:
315 148 equemene
        import pyopencl as cl
316 148 equemene
317 148 equemene
        print "\nHere are available OpenCL devices:"
318 148 equemene
        Id=1
319 148 equemene
        for platform in cl.get_platforms():
320 148 equemene
            for device in platform.get_devices():
321 148 equemene
                OCL_vendor[Id]=platform.vendor.lstrip().rstrip()
322 148 equemene
                #OCL_type[Id]=cl.device_type.to_string(device.type)
323 148 equemene
                OCL_type[Id]="xPU"
324 148 equemene
                OCL_description[Id]=device.name.lstrip().rstrip()
325 148 equemene
                print "* Device #%i from %s of type %s : %s" % (Id,OCL_vendor[Id],OCL_type[Id],OCL_description[Id])
326 148 equemene
                Id=Id+1
327 148 equemene
        OCL_MaxDevice=Id-1
328 148 equemene
        print
329 148 equemene
330 148 equemene
    except ImportError:
331 148 equemene
        print "Your platform does not seem to support OpenCL"
332 148 equemene
        sys.exit(0)
333 148 equemene
334 148 equemene
    try:
335 148 equemene
        opts, args = getopt.getopt(sys.argv[1:],"hcj:b:z:i:s:e:p:d:v:",["coupling=","magneticfield=","size=","iterations=","tempstart=","tempend=","tempstep=","units",'device='])
336 148 equemene
    except getopt.GetoptError:
337 148 equemene
        print '%s -d <Device Id> -j <Coupling Factor> -b <Magnetic Field> -z <Size of Square Lattice> -i <Iterations> -s <Minimum Temperature> -e <Maximum Temperature> -p <steP Temperature> -v <diVider> -c (Print Curves)' % sys.argv[0]
338 148 equemene
        sys.exit(2)
339 148 equemene
340 148 equemene
    for opt, arg in opts:
341 148 equemene
        if opt == '-h':
342 148 equemene
            print '%s -d <Device Id> -j <Coupling Factor> -b <Magnetic Field> -z <Size of Square Lattice> -i <Iterations> -s <Minimum Temperature> -e <Maximum Temperature> -p <steP Temperature> -v <diVider> -c (Print Curves)' % sys.argv[0]
343 148 equemene
            sys.exit()
344 148 equemene
        elif opt == '-c':
345 148 equemene
            Curves=True
346 148 equemene
        elif opt in ("-d", "--device"):
347 148 equemene
            Device = int(arg)
348 148 equemene
            if Device>OCL_MaxDevice:
349 148 equemene
                "Device #%s seems not to be available !"
350 148 equemene
                sys.exit()
351 148 equemene
        elif opt in ("-j", "--coupling"):
352 148 equemene
            J = float(arg)
353 148 equemene
        elif opt in ("-b", "--magneticfield"):
354 148 equemene
            B = float(arg)
355 148 equemene
        elif opt in ("-s", "--tempmin"):
356 148 equemene
            Tmin = float(arg)
357 148 equemene
        elif opt in ("-e", "--tempmax"):
358 148 equemene
            Tmax = float(arg)
359 148 equemene
        elif opt in ("-p", "--tempstep"):
360 148 equemene
            Tstep = float(arg)
361 148 equemene
        elif opt in ("-i", "--iterations"):
362 148 equemene
            Iterations = int(arg)
363 148 equemene
        elif opt in ("-z", "--size"):
364 148 equemene
            Size = int(arg)
365 148 equemene
        elif opt in ("-v", "--divider"):
366 148 equemene
            Divider = int(arg)
367 148 equemene
368 148 equemene
    print "Here are parameters of simulation:"
369 148 equemene
    print "* Device selected #%s: %s of type %s from %s" % (Device,OCL_description[Device],OCL_type[Device],OCL_vendor[Device])
370 148 equemene
    print "* Coupling Factor J : %s" % J
371 148 equemene
    print "* Magnetic Field B :  %s" % B
372 148 equemene
    print "* Size of lattice : %sx%s" % (Size,Size)
373 148 equemene
    print "* Divider inside loop : %s" % Divider
374 148 equemene
    print "* Iterations : %s" % Iterations
375 148 equemene
    print "* Temperatures from %s to %s by %s" % (Tmin,Tmax,Tstep)
376 148 equemene
377 148 equemene
    LAPIMAGE=False
378 148 equemene
379 148 equemene
    if Iterations<STEP:
380 148 equemene
        STEP=Iterations
381 148 equemene
382 148 equemene
    numpy.random.seed(20081010)
383 148 equemene
    sigmaIn=numpy.where(numpy.random.randn(Size,Size)>0,1,-1).astype(numpy.int32)
384 148 equemene
385 148 equemene
    ImageOutput(sigmaIn,"Ising2D_GPU_Local_%i_Initial" % (Size))
386 148 equemene
387 148 equemene
388 148 equemene
    Trange=numpy.arange(Tmin,Tmax+Tstep,Tstep)
389 148 equemene
390 148 equemene
    E=[]
391 148 equemene
    M=[]
392 148 equemene
393 148 equemene
    for T in Trange:
394 148 equemene
        sigma=numpy.copy(sigmaIn)
395 148 equemene
        duration=Metropolis(sigma,J,B,T,Iterations,Device,Divider)
396 148 equemene
        E=numpy.append(E,Energy(sigma,J,B))
397 148 equemene
        M=numpy.append(M,Magnetization(sigma,B))
398 148 equemene
        ImageOutput(sigma,"Ising2D_GPU_OddEven_%i_%1.1f_Final" % (Size,T))
399 148 equemene
        print "GPU/CPU Time : %f" % (duration)
400 148 equemene
        print "Total Energy at Temperature %f : %f" % (T,E[-1])
401 148 equemene
        print "Total Magnetization at Temperature %f : %f" % (T,M[-1])
402 148 equemene
403 148 equemene
    # Save output
404 148 equemene
    numpy.savez("Ising2D_GPU_Global_%i_%.8i" % (Size,Iterations),(Trange,E,M))
405 148 equemene
406 148 equemene
    # Estimate Critical temperature
407 148 equemene
    print "The critical temperature on %ix%i lattice with J=%f, B=%f is %f " % (Size,Size,J,B,CriticalT(Trange,E))
408 148 equemene
409 148 equemene
    if Curves:
410 148 equemene
        DisplayCurves(Trange,E,M,J,B)
411 148 equemene