Statistiques
| Révision :

root / Ising / GPU / Ising2D-GPU-Local.py @ 144

Historique | Voir | Annoter | Télécharger (10,25 ko)

1 93 equemene
#!/usr/bin/env python
2 93 equemene
#
3 93 equemene
# Ising2D model using PyOpenCL
4 93 equemene
#
5 93 equemene
# CC BY-NC-SA 2011 : <emmanuel.quemener@ens-lyon.fr>
6 93 equemene
#
7 93 equemene
# Thanks to Andreas Klockner for PyOpenCL:
8 93 equemene
# http://mathema.tician.de/software/pyopencl
9 93 equemene
#
10 93 equemene
# Interesting links:
11 93 equemene
# http://viennacl.sourceforge.net/viennacl-documentation.html
12 93 equemene
# http://enja.org/2011/02/22/adventures-in-pyopencl-part-1-getting-started-with-python/
13 93 equemene
14 93 equemene
import pyopencl as cl
15 93 equemene
import numpy
16 93 equemene
from PIL import Image
17 93 equemene
import time,string
18 93 equemene
from numpy.random import randint as nprnd
19 93 equemene
import sys
20 93 equemene
import getopt
21 93 equemene
import matplotlib.pyplot as plt
22 93 equemene
23 93 equemene
# Size of micro blocks
24 93 equemene
BSZ=16
25 93 equemene
26 93 equemene
# 2097152 on HD5850 (with 1GB of RAM)
27 93 equemene
#  262144 on GT218
28 93 equemene
#STEP=262144
29 93 equemene
#STEP=1048576
30 93 equemene
#STEP=2097152
31 93 equemene
#STEP=4194304
32 93 equemene
#STEP=8388608
33 93 equemene
STEP=16777216
34 93 equemene
#STEP=268435456
35 93 equemene
36 93 equemene
# Flag to define LAPIMAGE between iteration on OpenCL kernel calls
37 93 equemene
#LAPIMAGE=True
38 93 equemene
LAPIMAGE=False
39 93 equemene
40 93 equemene
# Version 2 of kernel : much optimize one
41 93 equemene
# a string template is used to replace BSZ (named $block_size) by its value
42 93 equemene
KERNEL_CODE=string.Template("""
43 93 equemene
#define BSZ $block_size
44 93 equemene

45 93 equemene
/* Marsaglia RNG very simple implementation */
46 93 equemene
#define znew (z=36969*(z&65535)+(z>>16))
47 93 equemene
#define wnew (w=18000*(w&65535)+(w>>16))
48 93 equemene
#define MWC ((znew<<16)+wnew )
49 93 equemene
#define MWCfp (float)(MWC * 2.328306435454494e-10f)
50 93 equemene

51 93 equemene
__kernel void MainLoop(__global int *s,float J,float B,float T,uint size,
52 93 equemene
                       uint iterations,uint seed_w,uint seed_z)
53 93 equemene
{
54 93 equemene
   uint base_idx=(uint)(BSZ*get_global_id(0));
55 93 equemene
   uint base_idy=(uint)(BSZ*get_global_id(1));
56 93 equemene
   uint base_id=base_idx+base_idy*size;
57 93 equemene

58 93 equemene
   uint z=seed_z+(uint)get_global_id(0);
59 93 equemene
   uint w=seed_w+(uint)get_global_id(1);
60 93 equemene

61 93 equemene
   for (uint i=0;i<iterations;i++)
62 93 equemene
   {
63 93 equemene
      uint x=(uint)(MWC%BSZ);
64 93 equemene
      uint y=(uint)(MWC%BSZ);
65 93 equemene

66 93 equemene
      int p=s[base_id+x+size*y];
67 93 equemene

68 93 equemene
      int u=s[((base_idx+x)%size)+size*((base_idy+y-1)%size)];
69 93 equemene
      int d=s[((base_idx+x)%size)+size*((base_idy+y+1)%size)];
70 93 equemene
      int l=s[((base_idx+x-1)%size)+size*((base_idy+y)%size)];
71 93 equemene
      int r=s[((base_idx+x+1)%size)+size*((base_idy+y)%size)];
72 93 equemene

73 93 equemene
      float DeltaE=p*(2.0f*J*(float)(u+d+l+r)+B);
74 93 equemene

75 93 equemene
      float factor= ((DeltaE < 0.0f) || (MWCfp < exp(-DeltaE/T))) ? -1:1;
76 93 equemene

77 93 equemene
      s[base_id+x+size*y]= factor*p;
78 93 equemene
   }
79 93 equemene

80 93 equemene
}
81 93 equemene
""")
82 93 equemene
83 93 equemene
def ImageOutput(sigma,prefix):
84 93 equemene
    Max=sigma.max()
85 93 equemene
    Min=sigma.min()
86 93 equemene
87 93 equemene
    # Normalize value as 8bits Integer
88 93 equemene
    SigmaInt=(255*(sigma-Min)/(Max-Min)).astype('uint8')
89 93 equemene
    image = Image.fromarray(SigmaInt)
90 93 equemene
    image.save("%s.jpg" % prefix)
91 93 equemene
92 93 equemene
def CheckLattice(sigma):
93 93 equemene
94 93 equemene
    over=sigma[sigma>1]
95 93 equemene
    under=sigma[sigma<-1]
96 93 equemene
97 93 equemene
    if  (over.size+under.size) > 0:
98 93 equemene
        print "Problem on Lattice on %i spin(s)." % (over.size+under.size)
99 93 equemene
    else:
100 93 equemene
        print "No problem on Lattice"
101 93 equemene
102 93 equemene
def Metropolis(sigma,J,B,T,iterations,Device,Divider):
103 93 equemene
104 93 equemene
    kernel_params = {'block_size':sigma.shape[0]/Divider}
105 93 equemene
106 93 equemene
        # Je detecte un peripherique GPU dans la liste des peripheriques
107 93 equemene
    Id=1
108 93 equemene
    HasXPU=False
109 93 equemene
    for platform in cl.get_platforms():
110 93 equemene
        for device in platform.get_devices():
111 93 equemene
            if Id==Device:
112 93 equemene
                XPU=device
113 93 equemene
                print "CPU/GPU selected: ",device.name.lstrip()
114 93 equemene
                HasXPU=True
115 93 equemene
            Id+=1
116 93 equemene
117 93 equemene
    if HasXPU==False:
118 93 equemene
        print "No XPU #%i found in all of %i devices, sorry..." % (Device,Id-1)
119 93 equemene
        sys.exit()
120 93 equemene
121 93 equemene
    ctx = cl.Context([XPU])
122 93 equemene
    queue = cl.CommandQueue(ctx,
123 93 equemene
                            properties=cl.command_queue_properties.PROFILING_ENABLE)
124 93 equemene
125 93 equemene
    # Je recupere les flag possibles pour les buffers
126 93 equemene
    mf = cl.mem_flags
127 93 equemene
128 93 equemene
    sigmaCL = cl.Buffer(ctx, mf.WRITE_ONLY | mf.COPY_HOST_PTR, hostbuf=sigma)
129 93 equemene
    # Program based on Kernel2
130 93 equemene
    MetropolisCL = cl.Program(ctx,KERNEL_CODE.substitute(kernel_params)).build()
131 93 equemene
132 93 equemene
    divide=Divider*Divider
133 93 equemene
    step=STEP/divide
134 93 equemene
    i=0
135 93 equemene
    duration=0.
136 93 equemene
    while (step*i < iterations/divide):
137 93 equemene
138 93 equemene
        # Call OpenCL kernel
139 93 equemene
        # (Divider,Divider) is global work size
140 93 equemene
        # sigmaCL is lattice translated in CL format
141 93 equemene
        # step is number of iterations
142 93 equemene
143 93 equemene
        start_time=time.time()
144 93 equemene
        CLLaunch=MetropolisCL.MainLoop(queue,
145 93 equemene
                                       (Divider,Divider),None ,
146 93 equemene
                                       sigmaCL,
147 93 equemene
                                       numpy.float32(J),numpy.float32(B),
148 93 equemene
                                       numpy.float32(T),
149 93 equemene
                                       numpy.uint32(sigma.shape[0]),
150 93 equemene
                                       numpy.uint32(step),
151 93 equemene
                                       numpy.uint32(nprnd(2**32)),
152 93 equemene
                                       numpy.uint32(nprnd(2**32)))
153 93 equemene
154 93 equemene
        CLLaunch.wait()
155 93 equemene
            # elapsed = 1e-9*(CLLaunch.profile.end - CLLaunch.profile.start)
156 93 equemene
        elapsed = time.time()-start_time
157 93 equemene
        print "Iteration %i with T=%f and %i iterations in %f: " % (i,T,step,elapsed)
158 93 equemene
        if LAPIMAGE:
159 93 equemene
                cl.enqueue_copy(queue, sigma, sigmaCL).wait()
160 93 equemene
                checkLattice(sigma)
161 93 equemene
                ImageOutput(sigma,"Ising2D_GPU_Local_%i_%1.1f_%.3i_Lap" % (SIZE,T,i))
162 93 equemene
        i=i+1
163 93 equemene
        duration=duration+elapsed
164 93 equemene
165 93 equemene
    cl.enqueue_copy(queue, sigma,sigmaCL).wait()
166 93 equemene
    CheckLattice(sigma)
167 93 equemene
    sigmaCL.release()
168 93 equemene
169 93 equemene
    return(duration)
170 93 equemene
171 93 equemene
def Magnetization(sigma,M):
172 93 equemene
    return(numpy.sum(sigma)/(sigma.shape[0]*sigma.shape[1]*1.0))
173 93 equemene
174 93 equemene
def Energy(sigma,J,B):
175 93 equemene
    # Copy & Cast values
176 93 equemene
    E=numpy.copy(sigma).astype(numpy.float32)
177 93 equemene
178 93 equemene
    # Slice call to estimate Energy
179 93 equemene
    E[1:-1,1:-1]=E[1:-1,1:-1]*(2.0*J*(E[:-2,1:-1]+E[2:,1:-1]+
180 93 equemene
                                          E[1:-1,:-2]+E[1:-1,2:])+B)
181 93 equemene
182 93 equemene
    # Clean perimeter
183 93 equemene
    E[:,0]=0
184 93 equemene
    E[:,-1]=0
185 93 equemene
    E[0,:]=0
186 93 equemene
    E[-1,:]=0
187 93 equemene
188 93 equemene
    Energy=numpy.sum(E)
189 93 equemene
190 93 equemene
    return(Energy/(E.shape[0]*E.shape[1]*1.0))
191 93 equemene
192 93 equemene
def CriticalT(T,E):
193 93 equemene
194 93 equemene
    Epoly=numpy.poly1d(numpy.polyfit(T,E,T.size/3))
195 93 equemene
    dEpoly=numpy.diff(Epoly(T))
196 93 equemene
    dEpoly=numpy.insert(dEpoly,0,0)
197 93 equemene
    return(T[numpy.argmin(dEpoly)])
198 93 equemene
199 93 equemene
def PolyFitE(T,E):
200 93 equemene
201 93 equemene
    Epoly=numpy.poly1d(numpy.polyfit(T,E,T.size/3))
202 93 equemene
    return(Epoly(T))
203 93 equemene
204 93 equemene
def DisplayCurves(T,E,M,J,B):
205 93 equemene
206 93 equemene
    plt.xlabel("Temperature")
207 93 equemene
    plt.ylabel("Energy")
208 93 equemene
209 93 equemene
    Experience,=plt.plot(T,E,label="Energy")
210 93 equemene
211 93 equemene
    plt.legend()
212 93 equemene
    plt.show()
213 93 equemene
214 93 equemene
if __name__=='__main__':
215 93 equemene
216 93 equemene
    # Set defaults values
217 93 equemene
    # Coupling factor
218 93 equemene
    J=1.
219 93 equemene
    # External Magnetic Field is null
220 93 equemene
    B=0.
221 93 equemene
    # Size of Lattice
222 93 equemene
    Size=256
223 93 equemene
    # Default Temperatures (start, end, step)
224 93 equemene
    Tmin=0.1
225 93 equemene
    Tmax=5
226 93 equemene
    Tstep=0.1
227 93 equemene
    # Default Number of Iterations
228 94 equemene
    Iterations=Size*Size*Size
229 93 equemene
    # Default Device is first one
230 93 equemene
    Device=1
231 93 equemene
    # Default Divider
232 93 equemene
    Divider=Size/16
233 93 equemene
234 93 equemene
    # Curves is True to print the curves
235 93 equemene
    Curves=False
236 93 equemene
237 93 equemene
    OCL_vendor={}
238 93 equemene
    OCL_type={}
239 93 equemene
    OCL_description={}
240 93 equemene
241 93 equemene
    try:
242 93 equemene
        import pyopencl as cl
243 93 equemene
244 93 equemene
        print "\nHere are available OpenCL devices:"
245 93 equemene
        Id=1
246 93 equemene
        for platform in cl.get_platforms():
247 93 equemene
            for device in platform.get_devices():
248 93 equemene
                OCL_vendor[Id]=platform.vendor.lstrip().rstrip()
249 144 equemene
                #OCL_type[Id]=cl.device_type.to_string(device.type)
250 144 equemene
                OCL_type[Id]="xPU"
251 93 equemene
                OCL_description[Id]=device.name.lstrip().rstrip()
252 93 equemene
                print "* Device #%i from %s of type %s : %s" % (Id,OCL_vendor[Id],OCL_type[Id],OCL_description[Id])
253 93 equemene
                Id=Id+1
254 93 equemene
        OCL_MaxDevice=Id-1
255 93 equemene
        print
256 93 equemene
257 93 equemene
    except ImportError:
258 93 equemene
        print "Your platform does not seem to support OpenCL"
259 93 equemene
        sys.exit(0)
260 93 equemene
261 93 equemene
    try:
262 93 equemene
        opts, args = getopt.getopt(sys.argv[1:],"hcj:b:z:i:s:e:p:d:v:",["coupling=","magneticfield=","size=","iterations=","tempstart=","tempend=","tempstep=","units",'device='])
263 93 equemene
    except getopt.GetoptError:
264 93 equemene
        print '%s -d <Device Id> -j <Coupling Factor> -b <Magnetic Field> -z <Size of Square Lattice> -i <Iterations> -s <Minimum Temperature> -e <Maximum Temperature> -p <steP Temperature> -v <diVider> -c (Print Curves)' % sys.argv[0]
265 93 equemene
        sys.exit(2)
266 93 equemene
267 93 equemene
    for opt, arg in opts:
268 93 equemene
        if opt == '-h':
269 93 equemene
            print '%s -d <Device Id> -j <Coupling Factor> -b <Magnetic Field> -z <Size of Square Lattice> -i <Iterations> -s <Minimum Temperature> -e <Maximum Temperature> -p <steP Temperature> -v <diVider> -c (Print Curves)' % sys.argv[0]
270 93 equemene
            sys.exit()
271 93 equemene
        elif opt == '-c':
272 93 equemene
            Curves=True
273 93 equemene
        elif opt in ("-d", "--device"):
274 93 equemene
            Device = int(arg)
275 93 equemene
            if Device>OCL_MaxDevice:
276 93 equemene
                "Device #%s seems not to be available !"
277 93 equemene
                sys.exit()
278 93 equemene
        elif opt in ("-j", "--coupling"):
279 93 equemene
            J = float(arg)
280 93 equemene
        elif opt in ("-b", "--magneticfield"):
281 93 equemene
            B = float(arg)
282 93 equemene
        elif opt in ("-s", "--tempmin"):
283 93 equemene
            Tmin = float(arg)
284 93 equemene
        elif opt in ("-e", "--tempmax"):
285 93 equemene
            Tmax = arg
286 93 equemene
        elif opt in ("-p", "--tempstep"):
287 93 equemene
            Tstep = numpy.uint32(arg)
288 93 equemene
        elif opt in ("-i", "--iterations"):
289 93 equemene
            Iterations = int(arg)
290 93 equemene
        elif opt in ("-z", "--size"):
291 93 equemene
            Size = int(arg)
292 93 equemene
        elif opt in ("-v", "--divider"):
293 93 equemene
            Divider = int(arg)
294 93 equemene
295 93 equemene
    print "Here are parameters of simulation:"
296 93 equemene
    print "* Device selected #%s: %s of type %s from %s" % (Device,OCL_description[Device],OCL_type[Device],OCL_vendor[Device])
297 93 equemene
    print "* Coupling Factor J : %s" % J
298 93 equemene
    print "* Magnetic Field B :  %s" % B
299 93 equemene
    print "* Size of lattice : %sx%s" % (Size,Size)
300 93 equemene
    print "* Parallel computing : %sx%s" % (Divider,Divider)
301 93 equemene
    print "* Iterations : %s" % Iterations
302 93 equemene
    print "* Temperatures from %s to %s by %s" % (Tmin,Tmax,Tstep)
303 93 equemene
304 93 equemene
    LAPIMAGE=False
305 93 equemene
306 93 equemene
    if Iterations<STEP:
307 93 equemene
        STEP=Iterations
308 93 equemene
309 93 equemene
    sigmaIn=numpy.where(numpy.random.randn(Size,Size)>0,1,-1).astype        (numpy.int32)
310 93 equemene
311 93 equemene
    ImageOutput(sigmaIn,"Ising2D_GPU_Local_%i_Initial" % (Size))
312 93 equemene
313 93 equemene
314 93 equemene
    Trange=numpy.arange(Tmin,Tmax+Tstep,Tstep)
315 93 equemene
316 93 equemene
    E=[]
317 93 equemene
    M=[]
318 93 equemene
319 93 equemene
    for T in Trange:
320 93 equemene
        sigma=numpy.copy(sigmaIn)
321 93 equemene
        duration=Metropolis(sigma,J,B,T,Iterations,Device,Divider)
322 93 equemene
        E=numpy.append(E,Energy(sigma,J,B))
323 93 equemene
        M=numpy.append(M,Magnetization(sigma,B))
324 93 equemene
        ImageOutput(sigma,"Ising2D_GPU_Local_%i_%1.1f_Final" % (Size,T))
325 93 equemene
        print "GPU/CPU Time : %f" % (duration)
326 93 equemene
        print "Total Energy at Temperature %f : %f" % (T,E[-1])
327 93 equemene
        print "Total Magnetization at Temperature %f : %f" % (T,M[-1])
328 93 equemene
329 93 equemene
    # Save output
330 93 equemene
    numpy.savez("Ising2D_GPU_Global_%i_%.8i" % (Size,Iterations),(Trange,E,M))
331 93 equemene
332 93 equemene
    # Estimate Critical temperature
333 93 equemene
    print "The critical temperature on %ix%i lattice with J=%f, B=%f is %f " % (Size,Size,J,B,CriticalT(Trange,E))
334 93 equemene
335 93 equemene
    if Curves:
336 93 equemene
        DisplayCurves(Trange,E,M,J,B)
337 93 equemene