Statistiques
| Révision :

root / Ising / GPU / Ising2D-GPU.py-130528 @ 308

Historique | Voir | Annoter | Télécharger (19 ko)

1 18 equemene
#!/usr/bin/env python
2 18 equemene
#
3 18 equemene
# Ising2D model in serial mode
4 18 equemene
#
5 18 equemene
# CC BY-NC-SA 2011 : <emmanuel.quemener@ens-lyon.fr>
6 18 equemene
7 18 equemene
import sys
8 18 equemene
import numpy
9 18 equemene
import math
10 18 equemene
from PIL import Image
11 18 equemene
from math import exp
12 18 equemene
from random import random
13 18 equemene
import time
14 18 equemene
import getopt
15 18 equemene
import matplotlib.pyplot as plt
16 18 equemene
from numpy.random import randint as nprnd
17 18 equemene
18 18 equemene
KERNEL_CODE_OPENCL="""
19 18 equemene
20 18 equemene
// Marsaglia RNG very simple implementation
21 18 equemene
#define znew  ((z=36969*(z&65535)+(z>>16))<<16)
22 18 equemene
#define wnew  ((w=18000*(w&65535)+(w>>16))&65535)
23 18 equemene
#define MWC   (znew+wnew)
24 18 equemene
#define SHR3  (jsr=(jsr=(jsr=jsr^(jsr<<17))^(jsr>>13))^(jsr<<5))
25 18 equemene
#define CONG  (jcong=69069*jcong+1234567)
26 18 equemene
#define KISS  ((MWC^CONG)+SHR3)
27 18 equemene
28 18 equemene
#define MWCfp MWC * 2.328306435454494e-10f
29 18 equemene
#define KISSfp KISS * 2.328306435454494e-10f
30 18 equemene
31 18 equemene
__kernel void MainLoopOne(__global char *s,float T,float J,float B,
32 18 equemene
                          uint sizex,uint sizey,
33 18 equemene
                          uint iterations,uint seed_w,uint seed_z)
34 18 equemene
35 18 equemene
{
36 18 equemene
   uint z=seed_z;
37 18 equemene
   uint w=seed_w;
38 18 equemene
39 18 equemene
   for (uint i=0;i<iterations;i++) {
40 18 equemene
41 18 equemene
      uint x=(uint)(MWC%sizex) ;
42 18 equemene
      uint y=(uint)(MWC%sizey) ;
43 18 equemene
44 18 equemene
      int p=s[x+sizex*y];
45 18 equemene
46 18 equemene
      int d=s[x+sizex*((y+1)%sizey)];
47 18 equemene
      int u=s[x+sizex*((y-1)%sizey)];
48 18 equemene
      int l=s[((x-1)%sizex)+sizex*y];
49 18 equemene
      int r=s[((x+1)%sizex)+sizex*y];
50 18 equemene
51 18 equemene
      float DeltaE=2.0f*p*(J*(u+d+l+r)+B);
52 18 equemene
53 18 equemene
      int factor=((DeltaE < 0.0f) || (MWCfp < exp(-DeltaE/T))) ? -1:1;
54 18 equemene
      s[x%sizex+sizex*(y%sizey)] = (char)factor*p;
55 18 equemene
   }
56 18 equemene
   barrier(CLK_GLOBAL_MEM_FENCE);
57 18 equemene
}
58 18 equemene
59 18 equemene
__kernel void MainLoopGlobal(__global char *s,__global float *T,float J,float B,
60 18 equemene
                             uint sizex,uint sizey,
61 18 equemene
                             uint iterations,uint seed_w,uint seed_z)
62 18 equemene
63 18 equemene
{
64 18 equemene
   uint z=seed_z/(get_global_id(0)+1);
65 18 equemene
   uint w=seed_w/(get_global_id(0)+1);
66 18 equemene
   float t;
67 18 equemene
   uint ind=get_global_id(0);
68 18 equemene
69 18 equemene
   t=T[get_global_id(0)];
70 18 equemene
71 18 equemene
   for (uint i=0;i<iterations;i++) {
72 18 equemene
73 18 equemene
      uint x=(uint)(MWC%sizex) ;
74 18 equemene
      uint y=(uint)(MWC%sizey) ;
75 18 equemene
76 18 equemene
      int p=s[x+sizex*(y+sizey*ind)];
77 18 equemene
78 18 equemene
      int d=s[x+sizex*((y+1)%sizey+sizey*ind)];
79 18 equemene
      int u=s[x+sizex*((y-1)%sizey+sizey*ind)];
80 18 equemene
      int l=s[((x-1)%sizex)+sizex*(y+sizey*ind)];
81 18 equemene
      int r=s[((x+1)%sizex)+sizex*(y+sizey*ind)];
82 18 equemene
83 18 equemene
      float DeltaE=2.0f*p*(J*(u+d+l+r)+B);
84 18 equemene
85 18 equemene
      int factor=((DeltaE < 0.0f) || (MWCfp < exp(-DeltaE/t))) ? -1:1;
86 18 equemene
      s[x%sizex+sizex*(y%sizey+sizey*ind)] = (char)factor*p;
87 18 equemene
88 18 equemene
   }
89 18 equemene
90 18 equemene
   barrier(CLK_GLOBAL_MEM_FENCE);
91 18 equemene
92 18 equemene
}
93 18 equemene
94 18 equemene
__kernel void MainLoopLocal(__global char *s,__global float *T,float J,float B,
95 18 equemene
                            uint sizex,uint sizey,
96 18 equemene
                            uint iterations,uint seed_w,uint seed_z)
97 18 equemene
{
98 18 equemene
   uint z=seed_z/(get_local_id(0)+1);
99 18 equemene
   uint w=seed_w/(get_local_id(0)+1);
100 18 equemene
   //float t=T[get_local_id(0)+get_global_id(0)*get_local_size(0)];
101 18 equemene
   //uint ind=get_local_id(0)+get_global_id(0)*get_local_size(0);
102 18 equemene
   float t=T[get_local_id(0)];
103 18 equemene
   uint ind=get_local_id(0);
104 18 equemene
105 18 equemene
   for (uint i=0;i<iterations;i++) {
106 18 equemene
107 18 equemene
      uint x=(uint)(MWC%sizex) ;
108 18 equemene
      uint y=(uint)(MWC%sizey) ;
109 18 equemene
110 18 equemene
      int p=s[x+sizex*(y+sizey*ind)];
111 18 equemene
112 18 equemene
      int d=s[x+sizex*((y+1)%sizey+sizey*ind)];
113 18 equemene
      int u=s[x+sizex*((y-1)%sizey+sizey*ind)];
114 18 equemene
      int l=s[((x-1)%sizex)+sizex*(y+sizey*ind)];
115 18 equemene
      int r=s[((x+1)%sizex)+sizex*(y+sizey*ind)];
116 18 equemene
117 18 equemene
      float DeltaE=2.0f*p*(J*(u+d+l+r)+B);
118 18 equemene
119 18 equemene
      int factor=((DeltaE < 0.0f) || (MWCfp < exp(-DeltaE/t))) ? -1:1;
120 18 equemene
      s[x%sizex+sizex*(y%sizey+sizey*ind)] = (char)factor*p;
121 18 equemene
   }
122 18 equemene
123 18 equemene
   barrier(CLK_LOCAL_MEM_FENCE);
124 18 equemene
   barrier(CLK_GLOBAL_MEM_FENCE);
125 18 equemene
126 18 equemene
}
127 18 equemene
"""
128 18 equemene
129 18 equemene
KERNEL_CODE_CUDA="""
130 18 equemene
131 18 equemene
// Marsaglia RNG very simple implementation
132 18 equemene
#define znew  ((z=36969*(z&65535)+(z>>16))<<16)
133 18 equemene
#define wnew  ((w=18000*(w&65535)+(w>>16))&65535)
134 18 equemene
#define MWC   (znew+wnew)
135 18 equemene
#define SHR3  (jsr=(jsr=(jsr=jsr^(jsr<<17))^(jsr>>13))^(jsr<<5))
136 18 equemene
#define CONG  (jcong=69069*jcong+1234567)
137 18 equemene
#define KISS  ((MWC^CONG)+SHR3)
138 18 equemene
139 18 equemene
#define MWCfp MWC * 2.328306435454494e-10f
140 18 equemene
#define KISSfp KISS * 2.328306435454494e-10f
141 18 equemene
142 18 equemene
__global__ void MainLoopOne(char *s,float T,float J,float B,
143 18 equemene
                            uint sizex,uint sizey,
144 18 equemene
                            uint iterations,uint seed_w,uint seed_z)
145 18 equemene
146 18 equemene
{
147 18 equemene
   uint z=seed_z;
148 18 equemene
   uint w=seed_w;
149 18 equemene
150 18 equemene
   for (uint i=0;i<iterations;i++) {
151 18 equemene
152 18 equemene
      uint x=(uint)(MWC%sizex) ;
153 18 equemene
      uint y=(uint)(MWC%sizey) ;
154 18 equemene
155 18 equemene
      int p=s[x+sizex*y];
156 18 equemene
157 18 equemene
      int d=s[x+sizex*((y+1)%sizey)];
158 18 equemene
      int u=s[x+sizex*((y-1)%sizey)];
159 18 equemene
      int l=s[((x-1)%sizex)+sizex*y];
160 18 equemene
      int r=s[((x+1)%sizex)+sizex*y];
161 18 equemene
162 18 equemene
      float DeltaE=2.0f*p*(J*(u+d+l+r)+B);
163 18 equemene
164 18 equemene
      int factor=((DeltaE < 0.0f) || (MWCfp < exp(-DeltaE/T))) ? -1:1;
165 18 equemene
      s[x%sizex+sizex*(y%sizey)] = (char)factor*p;
166 18 equemene
   }
167 18 equemene
   __syncthreads();
168 18 equemene
169 18 equemene
}
170 18 equemene
171 18 equemene
__global__ void MainLoopGlobal(char *s,float *T,float J,float B,
172 18 equemene
                               uint sizex,uint sizey,
173 18 equemene
                               uint iterations,uint seed_w,uint seed_z)
174 18 equemene
175 18 equemene
{
176 18 equemene
   uint z=seed_z/(blockIdx.x+1);
177 18 equemene
   uint w=seed_w/(blockIdx.x+1);
178 18 equemene
   float t=T[blockIdx.x];
179 18 equemene
   uint ind=blockIdx.x;
180 18 equemene
181 18 equemene
   for (uint i=0;i<iterations;i++) {
182 18 equemene
183 18 equemene
      uint x=(uint)(MWC%sizex) ;
184 18 equemene
      uint y=(uint)(MWC%sizey) ;
185 18 equemene
186 18 equemene
      int p=s[x+sizex*(y+sizey*ind)];
187 18 equemene
188 18 equemene
      int d=s[x+sizex*((y+1)%sizey+sizey*ind)];
189 18 equemene
      int u=s[x+sizex*((y-1)%sizey+sizey*ind)];
190 18 equemene
      int l=s[((x-1)%sizex)+sizex*(y+sizey*ind)];
191 18 equemene
      int r=s[((x+1)%sizex)+sizex*(y+sizey*ind)];
192 18 equemene
193 18 equemene
      float DeltaE=2.0f*p*(J*(u+d+l+r)+B);
194 18 equemene
195 18 equemene
      int factor=((DeltaE < 0.0f) || (MWCfp < exp(-DeltaE/t))) ? -1:1;
196 18 equemene
      s[x%sizex+sizex*(y%sizey+sizey*ind)] = (char)factor*p;
197 18 equemene
198 18 equemene
   }
199 18 equemene
   __syncthreads();
200 18 equemene
201 18 equemene
}
202 18 equemene
203 18 equemene
__global__ void MainLoopLocal(char *s,float *T,float J,float B,
204 18 equemene
                              uint sizex,uint sizey,
205 18 equemene
                              uint iterations,uint seed_w,uint seed_z)
206 18 equemene
{
207 18 equemene
   uint z=seed_z/(threadIdx.x+1);
208 18 equemene
   uint w=seed_w/(threadIdx.x+1);
209 18 equemene
   float t=T[threadIdx.x];
210 18 equemene
   uint ind=threadIdx.x;
211 18 equemene
212 18 equemene
   for (uint i=0;i<iterations;i++) {
213 18 equemene
214 18 equemene
      uint x=(uint)(MWC%sizex) ;
215 18 equemene
      uint y=(uint)(MWC%sizey) ;
216 18 equemene
217 18 equemene
      int p=s[x+sizex*(y+sizey*ind)];
218 18 equemene
219 18 equemene
      int d=s[x+sizex*((y+1)%sizey+sizey*ind)];
220 18 equemene
      int u=s[x+sizex*((y-1)%sizey+sizey*ind)];
221 18 equemene
      int l=s[((x-1)%sizex)+sizex*(y+sizey*ind)];
222 18 equemene
      int r=s[((x+1)%sizex)+sizex*(y+sizey*ind)];
223 18 equemene
224 18 equemene
      float DeltaE=2.0f*p*(J*(u+d+l+r)+B);
225 18 equemene
226 18 equemene
      int factor=((DeltaE < 0.0f) || (MWCfp < exp(-DeltaE/t))) ? -1:1;
227 18 equemene
      s[x%sizex+sizex*(y%sizey+sizey*ind)] = (char)factor*p;
228 18 equemene
   }
229 18 equemene
   __syncthreads();
230 18 equemene
231 18 equemene
}
232 18 equemene
"""
233 18 equemene
234 18 equemene
235 18 equemene
236 18 equemene
def ImageOutput(sigma,prefix):
237 18 equemene
    Max=sigma.max()
238 18 equemene
    Min=sigma.min()
239 18 equemene
240 18 equemene
    # Normalize value as 8bits Integer
241 18 equemene
    SigmaInt=(255*(sigma-Min)/(Max-Min)).astype('uint8')
242 18 equemene
    image = Image.fromarray(SigmaInt)
243 18 equemene
    image.save("%s.jpg" % prefix)
244 18 equemene
245 18 equemene
def Metropolis(sigma,T,J,B,iterations):
246 18 equemene
    start=time.time()
247 18 equemene
248 18 equemene
    SizeX,SizeY=sigma.shape
249 18 equemene
250 18 equemene
    for p in xrange(0,iterations):
251 18 equemene
        # Random access coordonate
252 18 equemene
        X,Y=numpy.random.randint(SizeX),numpy.random.randint(SizeY)
253 18 equemene
254 18 equemene
        DeltaE=J*sigma[X,Y]*(2*(sigma[X,(Y+1)%SizeY]+
255 18 equemene
                                sigma[X,(Y-1)%SizeY]+
256 18 equemene
                                sigma[(X-1)%SizeX,Y]+
257 18 equemene
                                sigma[(X+1)%SizeX,Y])+B)
258 18 equemene
259 18 equemene
        if DeltaE < 0. or random() < exp(-DeltaE/T):
260 18 equemene
            sigma[X,Y]=-sigma[X,Y]
261 18 equemene
    duration=time.time()-start
262 18 equemene
    return(duration)
263 18 equemene
264 18 equemene
def MetropolisAllOpenCL(sigmaDict,TList,J,B,iterations,jobs,ParaStyle,Alu,Device):
265 18 equemene
266 18 equemene
    # sigmaDict & Tlist are NOT respectively array & float
267 18 equemene
    # sigmaDict : dict of array for each temperatoire
268 18 equemene
    # TList : list of temperatures
269 18 equemene
270 18 equemene
    # Initialisation des variables en les CASTant correctement
271 18 equemene
272 18 equemene
    # Je detecte un peripherique GPU dans la liste des peripheriques
273 18 equemene
274 18 equemene
    HasGPU=False
275 18 equemene
    Id=1
276 18 equemene
    # Primary Device selection based on Device Id
277 18 equemene
    for platform in cl.get_platforms():
278 18 equemene
        for device in platform.get_devices():
279 18 equemene
            deviceType=cl.device_type.to_string(device.type)
280 18 equemene
            if Id==Device and not HasGPU:
281 18 equemene
                GPU=device
282 18 equemene
                print "CPU/GPU selected: ",device.name
283 18 equemene
                HasGPU=True
284 18 equemene
            Id=Id+1
285 18 equemene
    # Default Device selection based on ALU Type
286 18 equemene
    for platform in cl.get_platforms():
287 18 equemene
        for device in platform.get_devices():
288 18 equemene
            deviceType=cl.device_type.to_string(device.type)
289 18 equemene
            if deviceType=="GPU" and Alu=="GPU" and not HasGPU:
290 18 equemene
                GPU=device
291 18 equemene
                print "GPU selected: ",device.name
292 18 equemene
                HasGPU=True
293 18 equemene
            if deviceType=="CPU" and Alu=="CPU" and not HasGPU:
294 18 equemene
                GPU=device
295 18 equemene
                print "CPU selected: ",device.name
296 18 equemene
                HasGPU=True
297 18 equemene
298 18 equemene
    # Je cree le contexte et la queue pour son execution
299 18 equemene
    # ctx = cl.create_some_context()
300 18 equemene
    ctx = cl.Context([GPU])
301 18 equemene
    queue = cl.CommandQueue(ctx,
302 18 equemene
                            properties=cl.command_queue_properties.PROFILING_ENABLE)
303 18 equemene
304 18 equemene
    # Je recupere les flag possibles pour les buffers
305 18 equemene
    mf = cl.mem_flags
306 18 equemene
307 18 equemene
    # Concatenate all sigma in single array
308 18 equemene
    sigma=numpy.copy(sigmaDict[TList[0]])
309 18 equemene
    for T in TList[1:]:
310 18 equemene
        sigma=numpy.concatenate((sigma,sigmaDict[T]),axis=1)
311 18 equemene
312 18 equemene
    sigmaCL = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=sigma)
313 18 equemene
    TCL = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=TList)
314 18 equemene
315 18 equemene
    MetropolisCL = cl.Program(ctx,KERNEL_CODE_OPENCL).build( \
316 18 equemene
        options = "-cl-mad-enable -cl-fast-relaxed-math")
317 18 equemene
318 18 equemene
    SizeX,SizeY=sigmaDict[TList[0]].shape
319 18 equemene
320 18 equemene
    if ParaStyle=='Blocks':
321 18 equemene
        # Call OpenCL kernel
322 18 equemene
        # (1,) is Global work size (only 1 work size)
323 18 equemene
        # (1,) is local work size
324 18 equemene
        # SeedZCL is lattice translated in CL format
325 18 equemene
        # SeedWCL is lattice translated in CL format
326 18 equemene
        # step is number of iterations
327 18 equemene
        CLLaunch=MetropolisCL.MainLoopGlobal(queue,(jobs,),None,
328 18 equemene
                                             sigmaCL,
329 18 equemene
                                             TCL,
330 18 equemene
                                             numpy.float32(J),
331 18 equemene
                                             numpy.float32(B),
332 18 equemene
                                             numpy.uint32(SizeX),
333 18 equemene
                                             numpy.uint32(SizeY),
334 18 equemene
                                             numpy.uint32(iterations),
335 18 equemene
                                             numpy.uint32(nprnd(2**31-1)),
336 18 equemene
                                             numpy.uint32(nprnd(2**31-1)))
337 18 equemene
        print "%s with %i %s done" % (Alu,jobs,ParaStyle)
338 18 equemene
    else:
339 18 equemene
        blocks=int(math.sqrt(jobs))
340 18 equemene
        # en OpenCL, necessaire de mettre un Global_id identique au local_id
341 18 equemene
        CLLaunch=MetropolisCL.MainLoopLocal(queue,(jobs,),(jobs,),
342 18 equemene
                                            sigmaCL,
343 18 equemene
                                            TCL,
344 18 equemene
                                            numpy.float32(J),
345 18 equemene
                                            numpy.float32(B),
346 18 equemene
                                            numpy.uint32(SizeX),
347 18 equemene
                                            numpy.uint32(SizeY),
348 18 equemene
                                            numpy.uint32(iterations),
349 18 equemene
                                            numpy.uint32(nprnd(2**31-1)),
350 18 equemene
                                            numpy.uint32(nprnd(2**31-1)))
351 18 equemene
        print "%s with %i %s done" % (Alu,jobs,ParaStyle)
352 18 equemene
353 18 equemene
    CLLaunch.wait()
354 18 equemene
    cl.enqueue_copy(queue, sigma, sigmaCL).wait()
355 18 equemene
    elapsed = 1e-9*(CLLaunch.profile.end - CLLaunch.profile.start)
356 18 equemene
    sigmaCL.release()
357 18 equemene
358 18 equemene
    results=numpy.split(sigma,len(TList),axis=1)
359 18 equemene
    for T in TList:
360 18 equemene
        sigmaDict[T]=numpy.copy(results[numpy.nonzero(TList == T)[0][0]])
361 18 equemene
362 18 equemene
    return(elapsed)
363 18 equemene
364 18 equemene
def MetropolisAllCuda(sigmaDict,TList,J,B,iterations,jobs,ParaStyle,Alu,Device):
365 18 equemene
366 18 equemene
    # sigmaDict & Tlist are NOT respectively array & float
367 18 equemene
    # sigmaDict : dict of array for each temperatoire
368 18 equemene
    # TList : list of temperatures
369 18 equemene
370 18 equemene
    # Avec PyCUDA autoinit, rien a faire !
371 18 equemene
372 18 equemene
    mod = SourceModule(KERNEL_CODE_CUDA)
373 18 equemene
374 18 equemene
    MetropolisBlocksCuda=mod.get_function("MainLoopGlobal")
375 18 equemene
    MetropolisThreadsCuda=mod.get_function("MainLoopLocal")
376 18 equemene
377 18 equemene
    # Concatenate all sigma in single array
378 18 equemene
    sigma=numpy.copy(sigmaDict[TList[0]])
379 18 equemene
    for T in TList[1:]:
380 18 equemene
        sigma=numpy.concatenate((sigma,sigmaDict[T]),axis=1)
381 18 equemene
382 18 equemene
    sigmaCU=cuda.InOut(sigma)
383 18 equemene
    TCU=cuda.InOut(TList)
384 18 equemene
385 18 equemene
    SizeX,SizeY=sigmaDict[TList[0]].shape
386 18 equemene
387 18 equemene
    start = pycuda.driver.Event()
388 18 equemene
    stop = pycuda.driver.Event()
389 18 equemene
390 18 equemene
    start.record()
391 18 equemene
    start.synchronize()
392 18 equemene
    if ParaStyle=='Blocks':
393 18 equemene
        # Call CUDA kernel
394 18 equemene
        # (1,) is Global work size (only 1 work size)
395 18 equemene
        # (1,) is local work size
396 18 equemene
        # SeedZCL is lattice translated in CL format
397 18 equemene
        # SeedWCL is lattice translated in CL format
398 18 equemene
        # step is number of iterations
399 18 equemene
        MetropolisBlocksCuda(sigmaCU,
400 18 equemene
                             TCU,
401 18 equemene
                             numpy.float32(J),
402 18 equemene
                             numpy.float32(B),
403 18 equemene
                             numpy.uint32(SizeX),
404 18 equemene
                             numpy.uint32(SizeY),
405 18 equemene
                             numpy.uint32(iterations),
406 18 equemene
                             numpy.uint32(nprnd(2**31-1)),
407 18 equemene
                             numpy.uint32(nprnd(2**31-1)),
408 18 equemene
                             grid=(jobs,1),block=(1,1,1))
409 18 equemene
        print "%s with %i %s done" % (Alu,jobs,ParaStyle)
410 18 equemene
    else:
411 18 equemene
        blocks=int(math.sqrt(jobs))
412 18 equemene
        MetropolisThreadsCuda(sigmaCU,
413 18 equemene
                              TCU,
414 18 equemene
                              numpy.float32(J),
415 18 equemene
                              numpy.float32(B),
416 18 equemene
                              numpy.uint32(SizeX),
417 18 equemene
                              numpy.uint32(SizeY),
418 18 equemene
                              numpy.uint32(iterations),
419 18 equemene
                              numpy.uint32(nprnd(2**31-1)),
420 18 equemene
                              numpy.uint32(nprnd(2**31-1)),
421 18 equemene
                              grid=(1,1),block=(jobs,1,1))
422 18 equemene
        print "%s with %i %s done" % (Alu,jobs,ParaStyle)
423 18 equemene
424 18 equemene
    stop.record()
425 18 equemene
    stop.synchronize()
426 18 equemene
    elapsed = start.time_till(stop)*1e-3
427 18 equemene
428 18 equemene
    results=numpy.split(sigma,len(TList),axis=1)
429 18 equemene
    for T in TList:
430 18 equemene
        sigmaDict[T]=numpy.copy(results[numpy.nonzero(TList == T)[0][0]])
431 18 equemene
432 18 equemene
    return(elapsed)
433 18 equemene
434 18 equemene
435 18 equemene
def Magnetization(sigma,M):
436 18 equemene
    return(numpy.sum(sigma)/(sigma.shape[0]*sigma.shape[1]*1.0))
437 18 equemene
438 18 equemene
def Energy(sigma,J):
439 18 equemene
    # Copier et caster
440 18 equemene
    E=numpy.copy(sigma).astype(numpy.float32)
441 18 equemene
442 18 equemene
    # Appel par slice
443 18 equemene
    E[1:-1,1:-1]=-J*E[1:-1,1:-1]*(E[:-2,1:-1]+E[2:,1:-1]+
444 18 equemene
                                  E[1:-1,:-2]+E[1:-1,2:])
445 18 equemene
446 18 equemene
    # Bien nettoyer la peripherie
447 18 equemene
    E[:,0]=0
448 18 equemene
    E[:,-1]=0
449 18 equemene
    E[0,:]=0
450 18 equemene
    E[-1,:]=0
451 18 equemene
452 18 equemene
    Energy=numpy.sum(E)
453 18 equemene
454 18 equemene
    return(Energy/(E.shape[0]*E.shape[1]*1.0))
455 18 equemene
456 18 equemene
def DisplayCurves(T,E,M,J,B):
457 18 equemene
458 18 equemene
    plt.xlabel("Temperature")
459 18 equemene
    plt.ylabel("Energy")
460 18 equemene
461 18 equemene
    Experience,=plt.plot(T,E,label="Energy")
462 18 equemene
463 18 equemene
    plt.legend()
464 18 equemene
    plt.show()
465 18 equemene
466 18 equemene
467 18 equemene
if __name__=='__main__':
468 18 equemene
469 18 equemene
    # Set defaults values
470 18 equemene
    # Alu can be CPU or GPU
471 18 equemene
    Alu='CPU'
472 18 equemene
    # Id of GPU : 0
473 18 equemene
    Device=0
474 18 equemene
    # GPU style can be Cuda (Nvidia implementation) or OpenCL
475 18 equemene
    GpuStyle='OpenCL'
476 18 equemene
    # Parallel distribution can be on Threads or Blocks
477 18 equemene
    ParaStyle='Blocks'
478 18 equemene
    # Coupling factor
479 18 equemene
    J=1.
480 18 equemene
    # Magnetic Field
481 18 equemene
    B=0.
482 18 equemene
    # Size of Lattice
483 18 equemene
    Size=256
484 18 equemene
    # Default Temperatures (start, end, step)
485 18 equemene
    Tmin=0.1
486 18 equemene
    Tmax=5
487 18 equemene
    Tstep=0.1
488 18 equemene
    # Default Number of Iterations
489 18 equemene
    Iterations=Size*Size
490 18 equemene
    # Curves is True to print the curves
491 18 equemene
    Curves=False
492 18 equemene
493 18 equemene
    try:
494 18 equemene
        opts, args = getopt.getopt(sys.argv[1:],"hcj:b:z:i:s:e:p:a:d:g:t:",["coupling=","magneticfield=","size=","iterations=","tempstart=","tempend=","tempstep=","alu=","gpustyle=","parastyle="])
495 18 equemene
    except getopt.GetoptError:
496 18 equemene
        print '%s -j <Coupling Factor> -b <Magnetic Field> -z <Size of Lattice> -i <Iterations> -s <Minimum Temperature> -e <Maximum Temperature> -p <steP Temperature> -c (Print Curves) -a <CPU/GPU> -d <DeviceId> -g <CUDA/OpenCL> -t <Threads/Blocks>' % sys.argv[0]
497 18 equemene
        sys.exit(2)
498 18 equemene
499 18 equemene
500 18 equemene
    for opt, arg in opts:
501 18 equemene
        if opt == '-h':
502 18 equemene
            print '%s -j <Coupling Factor> -b <Magnetic Field> -z <Size of Lattice> -i <Iterations> -s <Minimum Temperature> -e <Maximum Temperature> -p <steP Temperature> -c (Print Curves) -a <CPU/GPU> -d <DeviceId> -g <CUDA/OpenCL> -t <Threads/Blocks>' % sys.argv[0]
503 18 equemene
            sys.exit()
504 18 equemene
        elif opt == '-c':
505 18 equemene
            Curves=True
506 18 equemene
        elif opt in ("-j", "--coupling"):
507 18 equemene
            J = float(arg)
508 18 equemene
        elif opt in ("-b", "--magneticfield"):
509 18 equemene
            B = float(arg)
510 18 equemene
        elif opt in ("-s", "--tempmin"):
511 18 equemene
            Tmin = float(arg)
512 18 equemene
        elif opt in ("-e", "--tempmax"):
513 18 equemene
            Tmax = float(arg)
514 18 equemene
        elif opt in ("-p", "--tempstep"):
515 18 equemene
            Tstep = float(arg)
516 18 equemene
        elif opt in ("-i", "--iterations"):
517 18 equemene
            Iterations = int(arg)
518 18 equemene
        elif opt in ("-z", "--size"):
519 18 equemene
            Size = int(arg)
520 18 equemene
        elif opt in ("-a", "--alu"):
521 18 equemene
            Alu = arg
522 18 equemene
        elif opt in ("-d", "--device"):
523 18 equemene
            Device = int(arg)
524 18 equemene
        elif opt in ("-g", "--gpustyle"):
525 18 equemene
            GpuStyle = arg
526 18 equemene
        elif opt in ("-t", "--parastyle"):
527 18 equemene
            ParaStyle = arg
528 18 equemene
529 18 equemene
    if Alu=='CPU' and GpuStyle=='CUDA':
530 18 equemene
        print "Alu can't be CPU for CUDA, set Alu to GPU"
531 18 equemene
        Alu='GPU'
532 18 equemene
533 18 equemene
    if ParaStyle not in ('Blocks','Threads','Hybrid'):
534 18 equemene
        print "%s not exists, ParaStyle set as Threads !" % ParaStyle
535 18 equemene
        ParaStyle='Blocks'
536 18 equemene
537 18 equemene
    print "Compute unit : %s" % Alu
538 18 equemene
    print "Device Identification : %s" % Device
539 18 equemene
    print "GpuStyle used : %s" % GpuStyle
540 18 equemene
    print "Parallel Style used : %s" % ParaStyle
541 18 equemene
    print "Coupling Factor : %s" % J
542 18 equemene
    print "Magnetic Field :  %s" % B
543 18 equemene
    print "Size of lattice : %s" % Size
544 18 equemene
    print "Iterations : %s" % Iterations
545 18 equemene
    print "Temperature on start : %s" % Tmin
546 18 equemene
    print "Temperature on end : %s" % Tmax
547 18 equemene
    print "Temperature step : %s" % Tstep
548 18 equemene
549 18 equemene
    if GpuStyle=='CUDA':
550 18 equemene
        # For PyCUDA import
551 18 equemene
        import pycuda.driver as cuda
552 18 equemene
        import pycuda.gpuarray as gpuarray
553 18 equemene
        import pycuda.autoinit
554 18 equemene
        from pycuda.compiler import SourceModule
555 18 equemene
556 18 equemene
    if GpuStyle=='OpenCL':
557 18 equemene
        # For PyOpenCL import
558 18 equemene
        import pyopencl as cl
559 18 equemene
        Id=1
560 18 equemene
        for platform in cl.get_platforms():
561 18 equemene
            for device in platform.get_devices():
562 18 equemene
                deviceType=cl.device_type.to_string(device.type)
563 18 equemene
                print "Device #%i of type %s : %s" % (Id,deviceType,device.name)
564 18 equemene
                Id=Id+1
565 18 equemene
566 18 equemene
    LAPIMAGE=False
567 18 equemene
568 18 equemene
    sigmaIn=numpy.where(numpy.random.randn(Size,Size)>0,1,-1).astype(numpy.int8)
569 18 equemene
570 18 equemene
    ImageOutput(sigmaIn,"Ising2D_Serial_%i_Initial" % (Size))
571 18 equemene
572 18 equemene
    # La temperature est passee comme parametre, attention au CAST !
573 18 equemene
    Trange=numpy.arange(Tmin,Tmax+Tstep,Tstep).astype(numpy.float32)
574 18 equemene
575 18 equemene
    E=[]
576 18 equemene
    M=[]
577 18 equemene
578 18 equemene
    sigma={}
579 18 equemene
    for T in Trange:
580 18 equemene
        sigma[T]=numpy.copy(sigmaIn)
581 18 equemene
582 18 equemene
    jobs=len(Trange)
583 18 equemene
584 18 equemene
    # For GPU, all process are launched
585 18 equemene
    if GpuStyle=='CUDA':
586 18 equemene
        duration=MetropolisAllCuda(sigma,Trange,J,B,Iterations,jobs,ParaStyle,Alu,Device)
587 18 equemene
    else:
588 18 equemene
        duration=MetropolisAllOpenCL(sigma,Trange,J,B,Iterations,jobs,ParaStyle,Alu,Device)
589 18 equemene
590 18 equemene
    for T in Trange:
591 18 equemene
        E=numpy.append(E,Energy(sigma[T],J))
592 18 equemene
        M=numpy.append(M,Magnetization(sigma[T],B))
593 18 equemene
        print "CPU Time for each : %f" % (duration/len(Trange))
594 18 equemene
        print "Total Energy at Temperature %f : %f" % (T,E[-1])
595 18 equemene
        print "Total Magnetization at Temperature %f : %f" % (T,M[-1])
596 18 equemene
        ImageOutput(sigma[T],"Ising2D_Serial_%i_%1.1f_Final" % (Size,T))
597 18 equemene
598 18 equemene
    if Curves:
599 18 equemene
        DisplayCurves(Trange,E,M,J,B)
600 18 equemene