Statistiques
| Révision :

root / Ising / GPU / Ising2D-GPU.py-130527 @ 95

Historique | Voir | Annoter | Télécharger (16,78 ko)

1 18 equemene
#!/usr/bin/env python
2 18 equemene
#
3 18 equemene
# Ising2D model in serial mode
4 18 equemene
#
5 18 equemene
# CC BY-NC-SA 2011 : <emmanuel.quemener@ens-lyon.fr>
6 18 equemene
7 18 equemene
import sys
8 18 equemene
import numpy
9 18 equemene
import math
10 18 equemene
from PIL import Image
11 18 equemene
from math import exp
12 18 equemene
from random import random
13 18 equemene
import time
14 18 equemene
import getopt
15 18 equemene
import matplotlib.pyplot as plt
16 18 equemene
from numpy.random import randint as nprnd
17 18 equemene
18 18 equemene
KERNEL_CODE_OPENCL="""
19 18 equemene
20 18 equemene
// Marsaglia RNG very simple implementation
21 18 equemene
#define znew  ((z=36969*(z&65535)+(z>>16))<<16)
22 18 equemene
#define wnew  ((w=18000*(w&65535)+(w>>16))&65535)
23 18 equemene
#define MWC   (znew+wnew)
24 18 equemene
#define SHR3  (jsr=(jsr=(jsr=jsr^(jsr<<17))^(jsr>>13))^(jsr<<5))
25 18 equemene
#define CONG  (jcong=69069*jcong+1234567)
26 18 equemene
#define KISS  ((MWC^CONG)+SHR3)
27 18 equemene
28 18 equemene
#define MWCfp MWC * 2.328306435454494e-10f
29 18 equemene
#define KISSfp KISS * 2.328306435454494e-10f
30 18 equemene
31 18 equemene
__kernel void MainLoopOne(__global char *s,float T,float J,
32 18 equemene
                          uint sizex,uint sizey,
33 18 equemene
                          uint iterations,uint seed_w,uint seed_z)
34 18 equemene
35 18 equemene
{
36 18 equemene
   uint z=seed_z;
37 18 equemene
   uint w=seed_w;
38 18 equemene
39 18 equemene
   for (uint i=0;i<iterations;i++) {
40 18 equemene
41 18 equemene
      uint x=(uint)(MWC%sizex) ;
42 18 equemene
      uint y=(uint)(MWC%sizey) ;
43 18 equemene
44 18 equemene
      int p=s[x+sizex*y];
45 18 equemene
46 18 equemene
      int d=s[x+sizex*((y+1)%sizey)];
47 18 equemene
      int u=s[x+sizex*((y-1)%sizey)];
48 18 equemene
      int l=s[((x-1)%sizex)+sizex*y];
49 18 equemene
      int r=s[((x+1)%sizex)+sizex*y];
50 18 equemene
51 18 equemene
      float DeltaE=2.0f*J*p*(u+d+l+r);
52 18 equemene
53 18 equemene
      int factor=((DeltaE < 0.0f) || (MWCfp < exp(-DeltaE/T))) ? -1:1;
54 18 equemene
      s[x%sizex+sizex*(y%sizey)] = (char)factor*p;
55 18 equemene
   }
56 18 equemene
   barrier(CLK_GLOBAL_MEM_FENCE);
57 18 equemene
}
58 18 equemene
59 18 equemene
__kernel void MainLoopGlobal(__global char *s,__global float *T,float J,
60 18 equemene
                             uint sizex,uint sizey,
61 18 equemene
                             uint iterations,uint seed_w,uint seed_z)
62 18 equemene
63 18 equemene
{
64 18 equemene
   uint z=seed_z/(get_global_id(0)+1);
65 18 equemene
   uint w=seed_w/(get_global_id(0)+1);
66 18 equemene
   float t;
67 18 equemene
   uint ind=get_global_id(0);
68 18 equemene
69 18 equemene
   t=T[get_global_id(0)];
70 18 equemene
71 18 equemene
   for (uint i=0;i<iterations;i++) {
72 18 equemene
73 18 equemene
      uint x=(uint)(MWC%sizex) ;
74 18 equemene
      uint y=(uint)(MWC%sizey) ;
75 18 equemene
76 18 equemene
      int p=s[x+sizex*(y+sizey*ind)];
77 18 equemene
78 18 equemene
      int d=s[x+sizex*((y+1)%sizey+sizey*ind)];
79 18 equemene
      int u=s[x+sizex*((y-1)%sizey+sizey*ind)];
80 18 equemene
      int l=s[((x-1)%sizex)+sizex*(y+sizey*ind)];
81 18 equemene
      int r=s[((x+1)%sizex)+sizex*(y+sizey*ind)];
82 18 equemene
83 18 equemene
      float DeltaE=2.0f*J*p*(u+d+l+r);
84 18 equemene
85 18 equemene
      int factor=((DeltaE < 0.0f) || (MWCfp < exp(-DeltaE/t))) ? -1:1;
86 18 equemene
      s[x%sizex+sizex*(y%sizey+sizey*ind)] = (char)factor*p;
87 18 equemene
88 18 equemene
   }
89 18 equemene
90 18 equemene
   barrier(CLK_GLOBAL_MEM_FENCE);
91 18 equemene
92 18 equemene
}
93 18 equemene
94 18 equemene
__kernel void MainLoopLocal(__global char *s,__global float *T,float J,
95 18 equemene
                            uint sizex,uint sizey,
96 18 equemene
                            uint iterations,uint seed_w,uint seed_z)
97 18 equemene
{
98 18 equemene
   uint z=seed_z/(get_local_id(0)+1);
99 18 equemene
   uint w=seed_w/(get_local_id(0)+1);
100 18 equemene
   //float t=T[get_local_id(0)+get_global_id(0)*get_local_size(0)];
101 18 equemene
   //uint ind=get_local_id(0)+get_global_id(0)*get_local_size(0);
102 18 equemene
   float t=T[get_local_id(0)];
103 18 equemene
   uint ind=get_local_id(0);
104 18 equemene
105 18 equemene
   for (uint i=0;i<iterations;i++) {
106 18 equemene
107 18 equemene
      uint x=(uint)(MWC%sizex) ;
108 18 equemene
      uint y=(uint)(MWC%sizey) ;
109 18 equemene
110 18 equemene
      int p=s[x+sizex*(y+sizey*ind)];
111 18 equemene
112 18 equemene
      int d=s[x+sizex*((y+1)%sizey+sizey*ind)];
113 18 equemene
      int u=s[x+sizex*((y-1)%sizey+sizey*ind)];
114 18 equemene
      int l=s[((x-1)%sizex)+sizex*(y+sizey*ind)];
115 18 equemene
      int r=s[((x+1)%sizex)+sizex*(y+sizey*ind)];
116 18 equemene
117 18 equemene
      float DeltaE=2.0f*J*p*(u+d+l+r);
118 18 equemene
119 18 equemene
      int factor=((DeltaE < 0.0f) || (MWCfp < exp(-DeltaE/t))) ? -1:1;
120 18 equemene
      s[x%sizex+sizex*(y%sizey+sizey*ind)] = (char)factor*p;
121 18 equemene
   }
122 18 equemene
123 18 equemene
   barrier(CLK_LOCAL_MEM_FENCE);
124 18 equemene
   barrier(CLK_GLOBAL_MEM_FENCE);
125 18 equemene
126 18 equemene
}
127 18 equemene
"""
128 18 equemene
129 18 equemene
def ImageOutput(sigma,prefix):
130 18 equemene
    Max=sigma.max()
131 18 equemene
    Min=sigma.min()
132 18 equemene
133 18 equemene
    # Normalize value as 8bits Integer
134 18 equemene
    SigmaInt=(255*(sigma-Min)/(Max-Min)).astype('uint8')
135 18 equemene
    image = Image.fromarray(SigmaInt)
136 18 equemene
    image.save("%s.jpg" % prefix)
137 18 equemene
138 18 equemene
def Metropolis(sigma,J,B,T,iterations):
139 18 equemene
    start=time.time()
140 18 equemene
141 18 equemene
    SizeX,SizeY=sigma.shape
142 18 equemene
143 18 equemene
    for p in xrange(0,iterations):
144 18 equemene
        # Random access coordonate
145 18 equemene
        X,Y=numpy.random.randint(SizeX),numpy.random.randint(SizeY)
146 18 equemene
147 18 equemene
        DeltaE=J*sigma[X,Y]*(2*(sigma[X,(Y+1)%SizeY]+
148 18 equemene
                                sigma[X,(Y-1)%SizeY]+
149 18 equemene
                                sigma[(X-1)%SizeX,Y]+
150 18 equemene
                                sigma[(X+1)%SizeX,Y])+B)
151 18 equemene
152 18 equemene
        if DeltaE < 0. or random() < exp(-DeltaE/T):
153 18 equemene
            sigma[X,Y]=-sigma[X,Y]
154 18 equemene
    duration=time.time()-start
155 18 equemene
    return(duration)
156 18 equemene
157 18 equemene
def MetropolisOpenCL(sigma,J,B,T,iterations,jobs,ParaStyle,Alu,Device):
158 18 equemene
159 18 equemene
    # Initialisation des variables en les CASTant correctement
160 18 equemene
161 18 equemene
    # Je detecte un peripherique GPU dans la liste des peripheriques
162 18 equemene
    # for platform in cl.get_platforms():
163 18 equemene
    #     for device in platform.get_devices():
164 18 equemene
    #             if cl.device_type.to_string(device.type)=='GPU':
165 18 equemene
    #                     GPU=device
166 18 equemene
    #print "GPU detected: ",device.name
167 18 equemene
168 18 equemene
    HasGPU=False
169 18 equemene
    Id=1
170 18 equemene
    # Device selection based on choice (default is GPU)
171 18 equemene
    for platform in cl.get_platforms():
172 18 equemene
        for device in platform.get_devices():
173 18 equemene
            if not HasGPU:
174 18 equemene
                deviceType=cl.device_type.to_string(device.type)
175 18 equemene
                if deviceType=="GPU" and Alu=="GPU" and Id==Device:
176 18 equemene
                    GPU=device
177 18 equemene
                    print "GPU selected: ",device.name
178 18 equemene
                    HasGPU=True
179 18 equemene
                if deviceType=="CPU" and Alu=="CPU":
180 18 equemene
                    GPU=device
181 18 equemene
                    print "CPU selected: ",device.name
182 18 equemene
                    HasGPU=True
183 18 equemene
            Id=Id+1
184 18 equemene
185 18 equemene
    # Je cree le contexte et la queue pour son execution
186 18 equemene
    # ctx = cl.create_some_context()
187 18 equemene
    ctx = cl.Context([GPU])
188 18 equemene
    queue = cl.CommandQueue(ctx,
189 18 equemene
                            properties=cl.command_queue_properties.PROFILING_ENABLE)
190 18 equemene
191 18 equemene
    # Je recupere les flag possibles pour les buffers
192 18 equemene
    mf = cl.mem_flags
193 18 equemene
194 18 equemene
    print sigma,sigma.shape
195 18 equemene
196 18 equemene
    # Attention au CAST ! C'est un int8 soit un char en OpenCL !
197 18 equemene
    sigmaCL = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=sigma)
198 18 equemene
199 18 equemene
    MetropolisCL = cl.Program(ctx,KERNEL_CODE_OPENCL).build( \
200 18 equemene
        options = "-cl-mad-enable -cl-fast-relaxed-math")
201 18 equemene
202 18 equemene
    SizeX,SizeY=sigma.shape
203 18 equemene
204 18 equemene
    if ParaStyle=='Blocks':
205 18 equemene
        # Call OpenCL kernel
206 18 equemene
        # (1,) is Global work size (only 1 work size)
207 18 equemene
        # (1,) is local work size
208 18 equemene
        CLLaunch=MetropolisCL.MainLoopOne(queue,(jobs,),None,
209 18 equemene
                                          sigmaCL,
210 18 equemene
                                          numpy.float32(T),
211 18 equemene
                                          numpy.float32(J),
212 18 equemene
                                          numpy.uint32(SizeX),
213 18 equemene
                                          numpy.uint32(SizeY),
214 18 equemene
                                          numpy.uint32(iterations),
215 18 equemene
                                          numpy.uint32(nprnd(2**31-1)),
216 18 equemene
                                          numpy.uint32(nprnd(2**31-1)))
217 18 equemene
        print "%s with %i %s done" % (Alu,jobs,ParaStyle)
218 18 equemene
    else:
219 18 equemene
        # en OpenCL, necessaire de mettre un Global_id identique au local_id
220 18 equemene
        CLLaunch=MetropolisCL.MainLoopOne(queue,(jobs,),(jobs,),
221 18 equemene
                                          sigmaCL,
222 18 equemene
                                          numpy.float32(T),
223 18 equemene
                                          numpy.float32(J),
224 18 equemene
                                          numpy.uint32(SizeX),
225 18 equemene
                                          numpy.uint32(SizeY),
226 18 equemene
                                          numpy.uint32(iterations),
227 18 equemene
                                          numpy.uint32(nprnd(2**31-1)),
228 18 equemene
                                          numpy.uint32(nprnd(2**31-1)))
229 18 equemene
        print "%s with %i %s done" % (Alu,jobs,ParaStyle)
230 18 equemene
231 18 equemene
    CLLaunch.wait()
232 18 equemene
    cl.enqueue_copy(queue, sigma, sigmaCL).wait()
233 18 equemene
    elapsed = 1e-9*(CLLaunch.profile.end - CLLaunch.profile.start)
234 18 equemene
    sigmaCL.release()
235 18 equemene
236 18 equemene
    return(elapsed)
237 18 equemene
238 18 equemene
def MetropolisAllOpenCL(sigmaDict,J,B,TList,iterations,jobs,ParaStyle,Alu,Device):
239 18 equemene
240 18 equemene
    # sigmaDict & Tlist are NOT respectively array & float
241 18 equemene
    # sigmaDict : dict of array for each temperatoire
242 18 equemene
    # TList : list of temperatures
243 18 equemene
244 18 equemene
    # Initialisation des variables en les CASTant correctement
245 18 equemene
246 18 equemene
    # Je detecte un peripherique GPU dans la liste des peripheriques
247 18 equemene
    # for platform in cl.get_platforms():
248 18 equemene
    #     for device in platform.get_devices():
249 18 equemene
    #             if cl.device_type.to_string(device.type)=='GPU':
250 18 equemene
    #                     GPU=device
251 18 equemene
    #print "GPU detected: ",device.name
252 18 equemene
253 18 equemene
    HasGPU=False
254 18 equemene
    Id=1
255 18 equemene
    # Device selection based on choice (default is GPU)
256 18 equemene
    for platform in cl.get_platforms():
257 18 equemene
        for device in platform.get_devices():
258 18 equemene
            if not HasGPU:
259 18 equemene
                deviceType=cl.device_type.to_string(device.type)
260 18 equemene
                if deviceType=="GPU" and Alu=="GPU" and Id==Device:
261 18 equemene
                    GPU=device
262 18 equemene
                    print "GPU selected: ",device.name
263 18 equemene
                    HasGPU=True
264 18 equemene
                if deviceType=="CPU" and Alu=="CPU":
265 18 equemene
                    GPU=device
266 18 equemene
                    print "CPU selected: ",device.name
267 18 equemene
                    HasGPU=True
268 18 equemene
            Id=Id+1
269 18 equemene
270 18 equemene
    # Je cree le contexte et la queue pour son execution
271 18 equemene
    # ctx = cl.create_some_context()
272 18 equemene
    ctx = cl.Context([GPU])
273 18 equemene
    queue = cl.CommandQueue(ctx,
274 18 equemene
                            properties=cl.command_queue_properties.PROFILING_ENABLE)
275 18 equemene
276 18 equemene
    # Je recupere les flag possibles pour les buffers
277 18 equemene
    mf = cl.mem_flags
278 18 equemene
279 18 equemene
    # Concatenate all sigma in single array
280 18 equemene
    sigma=numpy.copy(sigmaDict[TList[0]])
281 18 equemene
    for T in TList[1:]:
282 18 equemene
        sigma=numpy.concatenate((sigma,sigmaDict[T]),axis=1)
283 18 equemene
284 18 equemene
    print sigma,sigma.shape
285 18 equemene
286 18 equemene
    sigmaCL = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=sigma)
287 18 equemene
    TCL = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=TList)
288 18 equemene
289 18 equemene
    MetropolisCL = cl.Program(ctx,KERNEL_CODE_OPENCL).build( \
290 18 equemene
        options = "-cl-mad-enable -cl-fast-relaxed-math")
291 18 equemene
292 18 equemene
    SizeX,SizeY=sigmaDict[TList[0]].shape
293 18 equemene
294 18 equemene
    if ParaStyle=='Blocks':
295 18 equemene
        # Call OpenCL kernel
296 18 equemene
        # (1,) is Global work size (only 1 work size)
297 18 equemene
        # (1,) is local work size
298 18 equemene
        # SeedZCL is lattice translated in CL format
299 18 equemene
        # SeedWCL is lattice translated in CL format
300 18 equemene
        # step is number of iterations
301 18 equemene
        CLLaunch=MetropolisCL.MainLoopGlobal(queue,(jobs,),None,
302 18 equemene
                                             sigmaCL,
303 18 equemene
                                             TCL,
304 18 equemene
                                             numpy.float32(J),
305 18 equemene
                                             numpy.uint32(SizeX),
306 18 equemene
                                             numpy.uint32(SizeY),
307 18 equemene
                                             numpy.uint32(iterations),
308 18 equemene
                                             numpy.uint32(nprnd(2**31-1)),
309 18 equemene
                                             numpy.uint32(nprnd(2**31-1)))
310 18 equemene
        print "%s with %i %s done" % (Alu,jobs,ParaStyle)
311 18 equemene
    else:
312 18 equemene
        blocks=int(math.sqrt(jobs))
313 18 equemene
        # en OpenCL, necessaire de mettre un Global_id identique au local_id
314 18 equemene
        CLLaunch=MetropolisCL.MainLoopLocal(queue,(jobs,),(2,),
315 18 equemene
                                            sigmaCL,
316 18 equemene
                                            TCL,
317 18 equemene
                                            numpy.float32(J),
318 18 equemene
                                            numpy.uint32(SizeX),
319 18 equemene
                                            numpy.uint32(SizeY),
320 18 equemene
                                            numpy.uint32(iterations),
321 18 equemene
                                            numpy.uint32(nprnd(2**31-1)),
322 18 equemene
                                            numpy.uint32(nprnd(2**31-1)))
323 18 equemene
        print "%s with %i %s done" % (Alu,jobs,ParaStyle)
324 18 equemene
325 18 equemene
    CLLaunch.wait()
326 18 equemene
    cl.enqueue_copy(queue, sigma, sigmaCL).wait()
327 18 equemene
    elapsed = 1e-9*(CLLaunch.profile.end - CLLaunch.profile.start)
328 18 equemene
    sigmaCL.release()
329 18 equemene
330 18 equemene
    results=numpy.split(sigma,len(TList),axis=1)
331 18 equemene
    for T in TList:
332 18 equemene
        sigmaDict[T]=numpy.copy(results[numpy.nonzero(TList == T)[0][0]])
333 18 equemene
334 18 equemene
    return(elapsed)
335 18 equemene
336 18 equemene
337 18 equemene
def Magnetization(sigma,M):
338 18 equemene
    return(numpy.sum(sigma)/(sigma.shape[0]*sigma.shape[1]*1.0))
339 18 equemene
340 18 equemene
def Energy(sigma,J):
341 18 equemene
    # Copier et caster
342 18 equemene
    E=numpy.copy(sigma).astype(numpy.float32)
343 18 equemene
344 18 equemene
    # Appel par slice
345 18 equemene
    E[1:-1,1:-1]=-J*E[1:-1,1:-1]*(E[:-2,1:-1]+E[2:,1:-1]+
346 18 equemene
                                  E[1:-1,:-2]+E[1:-1,2:])
347 18 equemene
348 18 equemene
    # Bien nettoyer la peripherie
349 18 equemene
    E[:,0]=0
350 18 equemene
    E[:,-1]=0
351 18 equemene
    E[0,:]=0
352 18 equemene
    E[-1,:]=0
353 18 equemene
354 18 equemene
    Energy=numpy.sum(E)
355 18 equemene
356 18 equemene
    return(Energy/(E.shape[0]*E.shape[1]*1.0))
357 18 equemene
358 18 equemene
def DisplayCurves(T,E,M,J,B):
359 18 equemene
360 18 equemene
    plt.xlabel("Temperature")
361 18 equemene
    plt.ylabel("Energy")
362 18 equemene
363 18 equemene
    Experience,=plt.plot(T,E,label="Energy")
364 18 equemene
365 18 equemene
    plt.legend()
366 18 equemene
    plt.show()
367 18 equemene
368 18 equemene
369 18 equemene
if __name__=='__main__':
370 18 equemene
371 18 equemene
    # Set defaults values
372 18 equemene
    # Alu can be CPU or GPU
373 18 equemene
    Alu='CPU'
374 18 equemene
    # Id of GPU
375 18 equemene
    Device=1
376 18 equemene
    # GPU style can be Cuda (Nvidia implementation) or OpenCL
377 18 equemene
    GpuStyle='OpenCL'
378 18 equemene
    # Parallel distribution can be on Threads or Blocks
379 18 equemene
    ParaStyle='Blocks'
380 18 equemene
    # Coupling factor
381 18 equemene
    J=1.
382 18 equemene
    # Magnetic Field
383 18 equemene
    B=0.
384 18 equemene
    # Size of Lattice
385 18 equemene
    Size=256
386 18 equemene
    # Default Temperatures (start, end, step)
387 18 equemene
    Tmin=0.1
388 18 equemene
    Tmax=5
389 18 equemene
    Tstep=0.1
390 18 equemene
    # Default Number of Iterations
391 18 equemene
    Iterations=Size*Size
392 18 equemene
    # Curves is True to print the curves
393 18 equemene
    Curves=False
394 18 equemene
395 18 equemene
    try:
396 18 equemene
        opts, args = getopt.getopt(sys.argv[1:],"hcj:b:z:i:s:e:p:a:d:g:t:",["coupling=","magneticfield=","size=","iterations=","tempstart=","tempend=","tempstep=","alu=","gpustyle=","parastyle="])
397 18 equemene
    except getopt.GetoptError:
398 18 equemene
        print '%s -j <Coupling Factor> -b <Magnetic Field> -z <Size of Lattice> -i <Iterations> -s <Minimum Temperature> -e <Maximum Temperature> -p <steP Temperature> -c (Print Curves) -a <CPU/GPU> -d <DeviceId> -g <CUDA/OpenCL> -p <Threads/Blocks> -t <ParaStyle>' % sys.argv[0]
399 18 equemene
        sys.exit(2)
400 18 equemene
401 18 equemene
402 18 equemene
    for opt, arg in opts:
403 18 equemene
        if opt == '-h':
404 18 equemene
            print '%s -j <Coupling Factor> -b <Magnetic Field> -z <Size of Lattice> -i <Iterations> -s <Minimum Temperature> -e <Maximum Temperature> -p <steP Temperature> -c (Print Curves) -a <CPU/GPU> -d <DeviceId> -g <CUDA/OpenCL> -p <Threads/Blocks> -t <ParaStyle>' % sys.argv[0]
405 18 equemene
            sys.exit()
406 18 equemene
        elif opt == '-c':
407 18 equemene
            Curves=True
408 18 equemene
        elif opt in ("-j", "--coupling"):
409 18 equemene
            J = float(arg)
410 18 equemene
        elif opt in ("-b", "--magneticfield"):
411 18 equemene
            B = float(arg)
412 18 equemene
        elif opt in ("-s", "--tempmin"):
413 18 equemene
            Tmin = float(arg)
414 18 equemene
        elif opt in ("-e", "--tempmax"):
415 18 equemene
            Tmax = float(arg)
416 18 equemene
        elif opt in ("-p", "--tempstep"):
417 18 equemene
            Tstep = float(arg)
418 18 equemene
        elif opt in ("-i", "--iterations"):
419 18 equemene
            Iterations = int(arg)
420 18 equemene
        elif opt in ("-z", "--size"):
421 18 equemene
            Size = int(arg)
422 18 equemene
        elif opt in ("-a", "--alu"):
423 18 equemene
            Alu = arg
424 18 equemene
        elif opt in ("-d", "--device"):
425 18 equemene
            Device = int(arg)
426 18 equemene
        elif opt in ("-g", "--gpustyle"):
427 18 equemene
            GpuStyle = arg
428 18 equemene
        elif opt in ("-t", "--parastyle"):
429 18 equemene
            ParaStyle = arg
430 18 equemene
431 18 equemene
    if Alu=='CPU' and GpuStyle=='CUDA':
432 18 equemene
        print "Alu can't be CPU for CUDA, set Alu to GPU"
433 18 equemene
        Alu='GPU'
434 18 equemene
435 18 equemene
    if ParaStyle not in ('Blocks','Threads','Hybrid'):
436 18 equemene
        print "%s not exists, ParaStyle set as Threads !" % ParaStyle
437 18 equemene
        ParaStyle='Blocks'
438 18 equemene
439 18 equemene
    print "Compute unit : %s" % Alu
440 18 equemene
    print "Device Identification : %s" % Device
441 18 equemene
    print "GpuStyle used : %s" % GpuStyle
442 18 equemene
    print "Parallel Style used : %s" % ParaStyle
443 18 equemene
    print "Coupling Factor : %s" % J
444 18 equemene
    print "Magnetic Field :  %s" % B
445 18 equemene
    print "Size of lattice : %s" % Size
446 18 equemene
    print "Iterations : %s" % Iterations
447 18 equemene
    print "Temperature on start : %s" % Tmin
448 18 equemene
    print "Temperature on end : %s" % Tmax
449 18 equemene
    print "Temperature step : %s" % Tstep
450 18 equemene
451 18 equemene
    if GpuStyle=='CUDA':
452 18 equemene
        # For PyCUDA import
453 18 equemene
        import pycuda.driver as cuda
454 18 equemene
        import pycuda.gpuarray as gpuarray
455 18 equemene
        import pycuda.autoinit
456 18 equemene
        from pycuda.compiler import SourceModule
457 18 equemene
458 18 equemene
    if GpuStyle=='OpenCL':
459 18 equemene
        # For PyOpenCL import
460 18 equemene
        import pyopencl as cl
461 18 equemene
        Id=1
462 18 equemene
        for platform in cl.get_platforms():
463 18 equemene
            for device in platform.get_devices():
464 18 equemene
                deviceType=cl.device_type.to_string(device.type)
465 18 equemene
                print "Device #%i of type %s : %s" % (Id,deviceType,device.name)
466 18 equemene
                Id=Id+1
467 18 equemene
468 18 equemene
    LAPIMAGE=False
469 18 equemene
470 18 equemene
    sigmaIn=numpy.where(numpy.random.randn(Size,Size)>0,1,-1).astype(numpy.int8)
471 18 equemene
472 18 equemene
    ImageOutput(sigmaIn,"Ising2D_Serial_%i_Initial" % (Size))
473 18 equemene
474 18 equemene
    # La temperature est passee comme parametre, attention au CAST !
475 18 equemene
    Trange=numpy.arange(Tmin,Tmax+Tstep,Tstep).astype(numpy.float32)
476 18 equemene
477 18 equemene
    E=[]
478 18 equemene
    M=[]
479 18 equemene
480 18 equemene
    print Trange,Trange.shape
481 18 equemene
482 18 equemene
    sigma={}
483 18 equemene
    for T in Trange:
484 18 equemene
        sigma[T]=numpy.copy(sigmaIn)
485 18 equemene
486 18 equemene
    # For GPU, all process are launched
487 18 equemene
    #MetropolisAllOpenCL(sigma,J,B,Trange,Iterations,len(Trange),
488 18 equemene
    #                    ParaStyle,Alu,Device)
489 18 equemene
    MetropolisAllOpenCL(sigma,J,B,Trange,Iterations,len(Trange),
490 18 equemene
                        ParaStyle,Alu,Device)
491 18 equemene
492 18 equemene
    for T in Trange:
493 18 equemene
        ImageOutput(sigma[T],"Ising2D_Serial_%i_%1.1f_Final" % (Size,T))
494 18 equemene
495 18 equemene
    # if Curves:
496 18 equemene
    #     DisplayCurves(Trange,E,M,J,B)
497 18 equemene
498 18 equemene
    # # Save output
499 18 equemene
    # numpy.savez("Ising2D_Serial_%i_%.8i" % (Size,Iterations),(Trange,E,M))