Statistiques
| Révision :

root / Ising / GPU / Ising2D-GPU.py-130528 @ 308

Historique | Voir | Annoter | Télécharger (19 ko)

1
#!/usr/bin/env python
2
#
3
# Ising2D model in serial mode
4
#
5
# CC BY-NC-SA 2011 : <emmanuel.quemener@ens-lyon.fr> 
6

    
7
import sys
8
import numpy
9
import math
10
from PIL import Image
11
from math import exp
12
from random import random
13
import time
14
import getopt
15
import matplotlib.pyplot as plt
16
from numpy.random import randint as nprnd
17

    
18
KERNEL_CODE_OPENCL="""
19

    
20
// Marsaglia RNG very simple implementation
21
#define znew  ((z=36969*(z&65535)+(z>>16))<<16)
22
#define wnew  ((w=18000*(w&65535)+(w>>16))&65535)
23
#define MWC   (znew+wnew)
24
#define SHR3  (jsr=(jsr=(jsr=jsr^(jsr<<17))^(jsr>>13))^(jsr<<5))
25
#define CONG  (jcong=69069*jcong+1234567)
26
#define KISS  ((MWC^CONG)+SHR3)
27

    
28
#define MWCfp MWC * 2.328306435454494e-10f
29
#define KISSfp KISS * 2.328306435454494e-10f
30

    
31
__kernel void MainLoopOne(__global char *s,float T,float J,float B,
32
                          uint sizex,uint sizey,
33
                          uint iterations,uint seed_w,uint seed_z)
34

    
35
{
36
   uint z=seed_z;
37
   uint w=seed_w;
38

    
39
   for (uint i=0;i<iterations;i++) {
40

    
41
      uint x=(uint)(MWC%sizex) ;
42
      uint y=(uint)(MWC%sizey) ;
43

    
44
      int p=s[x+sizex*y];
45

    
46
      int d=s[x+sizex*((y+1)%sizey)];
47
      int u=s[x+sizex*((y-1)%sizey)];
48
      int l=s[((x-1)%sizex)+sizex*y];
49
      int r=s[((x+1)%sizex)+sizex*y];
50

    
51
      float DeltaE=2.0f*p*(J*(u+d+l+r)+B);
52

    
53
      int factor=((DeltaE < 0.0f) || (MWCfp < exp(-DeltaE/T))) ? -1:1;
54
      s[x%sizex+sizex*(y%sizey)] = (char)factor*p;
55
   }
56
   barrier(CLK_GLOBAL_MEM_FENCE);      
57
}
58

    
59
__kernel void MainLoopGlobal(__global char *s,__global float *T,float J,float B,
60
                             uint sizex,uint sizey,
61
                             uint iterations,uint seed_w,uint seed_z)
62

    
63
{
64
   uint z=seed_z/(get_global_id(0)+1);
65
   uint w=seed_w/(get_global_id(0)+1);
66
   float t;
67
   uint ind=get_global_id(0);
68

    
69
   t=T[get_global_id(0)];
70

    
71
   for (uint i=0;i<iterations;i++) {
72

    
73
      uint x=(uint)(MWC%sizex) ;
74
      uint y=(uint)(MWC%sizey) ;
75

    
76
      int p=s[x+sizex*(y+sizey*ind)];
77

    
78
      int d=s[x+sizex*((y+1)%sizey+sizey*ind)];
79
      int u=s[x+sizex*((y-1)%sizey+sizey*ind)];
80
      int l=s[((x-1)%sizex)+sizex*(y+sizey*ind)];
81
      int r=s[((x+1)%sizex)+sizex*(y+sizey*ind)];
82

    
83
      float DeltaE=2.0f*p*(J*(u+d+l+r)+B);
84

    
85
      int factor=((DeltaE < 0.0f) || (MWCfp < exp(-DeltaE/t))) ? -1:1;
86
      s[x%sizex+sizex*(y%sizey+sizey*ind)] = (char)factor*p;
87
      
88
   }
89

    
90
   barrier(CLK_GLOBAL_MEM_FENCE);
91
      
92
}
93

    
94
__kernel void MainLoopLocal(__global char *s,__global float *T,float J,float B,
95
                            uint sizex,uint sizey,
96
                            uint iterations,uint seed_w,uint seed_z)
97
{
98
   uint z=seed_z/(get_local_id(0)+1);
99
   uint w=seed_w/(get_local_id(0)+1);
100
   //float t=T[get_local_id(0)+get_global_id(0)*get_local_size(0)];
101
   //uint ind=get_local_id(0)+get_global_id(0)*get_local_size(0);
102
   float t=T[get_local_id(0)];
103
   uint ind=get_local_id(0);
104

    
105
   for (uint i=0;i<iterations;i++) {
106

    
107
      uint x=(uint)(MWC%sizex) ;
108
      uint y=(uint)(MWC%sizey) ;
109

    
110
      int p=s[x+sizex*(y+sizey*ind)];
111

    
112
      int d=s[x+sizex*((y+1)%sizey+sizey*ind)];
113
      int u=s[x+sizex*((y-1)%sizey+sizey*ind)];
114
      int l=s[((x-1)%sizex)+sizex*(y+sizey*ind)];
115
      int r=s[((x+1)%sizex)+sizex*(y+sizey*ind)];
116

    
117
      float DeltaE=2.0f*p*(J*(u+d+l+r)+B);
118

    
119
      int factor=((DeltaE < 0.0f) || (MWCfp < exp(-DeltaE/t))) ? -1:1;
120
      s[x%sizex+sizex*(y%sizey+sizey*ind)] = (char)factor*p;
121
   }
122

    
123
   barrier(CLK_LOCAL_MEM_FENCE);
124
   barrier(CLK_GLOBAL_MEM_FENCE);
125
      
126
}
127
"""
128

    
129
KERNEL_CODE_CUDA="""
130

    
131
// Marsaglia RNG very simple implementation
132
#define znew  ((z=36969*(z&65535)+(z>>16))<<16)
133
#define wnew  ((w=18000*(w&65535)+(w>>16))&65535)
134
#define MWC   (znew+wnew)
135
#define SHR3  (jsr=(jsr=(jsr=jsr^(jsr<<17))^(jsr>>13))^(jsr<<5))
136
#define CONG  (jcong=69069*jcong+1234567)
137
#define KISS  ((MWC^CONG)+SHR3)
138

    
139
#define MWCfp MWC * 2.328306435454494e-10f
140
#define KISSfp KISS * 2.328306435454494e-10f
141

    
142
__global__ void MainLoopOne(char *s,float T,float J,float B,
143
                            uint sizex,uint sizey,
144
                            uint iterations,uint seed_w,uint seed_z)
145

    
146
{
147
   uint z=seed_z;
148
   uint w=seed_w;
149

    
150
   for (uint i=0;i<iterations;i++) {
151

    
152
      uint x=(uint)(MWC%sizex) ;
153
      uint y=(uint)(MWC%sizey) ;
154

    
155
      int p=s[x+sizex*y];
156

    
157
      int d=s[x+sizex*((y+1)%sizey)];
158
      int u=s[x+sizex*((y-1)%sizey)];
159
      int l=s[((x-1)%sizex)+sizex*y];
160
      int r=s[((x+1)%sizex)+sizex*y];
161

    
162
      float DeltaE=2.0f*p*(J*(u+d+l+r)+B);
163

    
164
      int factor=((DeltaE < 0.0f) || (MWCfp < exp(-DeltaE/T))) ? -1:1;
165
      s[x%sizex+sizex*(y%sizey)] = (char)factor*p;
166
   }
167
   __syncthreads();
168

    
169
}
170

    
171
__global__ void MainLoopGlobal(char *s,float *T,float J,float B,
172
                               uint sizex,uint sizey,
173
                               uint iterations,uint seed_w,uint seed_z)
174

    
175
{
176
   uint z=seed_z/(blockIdx.x+1);
177
   uint w=seed_w/(blockIdx.x+1);
178
   float t=T[blockIdx.x];
179
   uint ind=blockIdx.x;
180

    
181
   for (uint i=0;i<iterations;i++) {
182

    
183
      uint x=(uint)(MWC%sizex) ;
184
      uint y=(uint)(MWC%sizey) ;
185

    
186
      int p=s[x+sizex*(y+sizey*ind)];
187

    
188
      int d=s[x+sizex*((y+1)%sizey+sizey*ind)];
189
      int u=s[x+sizex*((y-1)%sizey+sizey*ind)];
190
      int l=s[((x-1)%sizex)+sizex*(y+sizey*ind)];
191
      int r=s[((x+1)%sizex)+sizex*(y+sizey*ind)];
192

    
193
      float DeltaE=2.0f*p*(J*(u+d+l+r)+B);
194

    
195
      int factor=((DeltaE < 0.0f) || (MWCfp < exp(-DeltaE/t))) ? -1:1;
196
      s[x%sizex+sizex*(y%sizey+sizey*ind)] = (char)factor*p;
197
      
198
   }
199
   __syncthreads();
200

    
201
}
202

    
203
__global__ void MainLoopLocal(char *s,float *T,float J,float B,
204
                              uint sizex,uint sizey,
205
                              uint iterations,uint seed_w,uint seed_z)
206
{
207
   uint z=seed_z/(threadIdx.x+1);
208
   uint w=seed_w/(threadIdx.x+1);
209
   float t=T[threadIdx.x];
210
   uint ind=threadIdx.x;
211

    
212
   for (uint i=0;i<iterations;i++) {
213

    
214
      uint x=(uint)(MWC%sizex) ;
215
      uint y=(uint)(MWC%sizey) ;
216

    
217
      int p=s[x+sizex*(y+sizey*ind)];
218

    
219
      int d=s[x+sizex*((y+1)%sizey+sizey*ind)];
220
      int u=s[x+sizex*((y-1)%sizey+sizey*ind)];
221
      int l=s[((x-1)%sizex)+sizex*(y+sizey*ind)];
222
      int r=s[((x+1)%sizex)+sizex*(y+sizey*ind)];
223

    
224
      float DeltaE=2.0f*p*(J*(u+d+l+r)+B);
225

    
226
      int factor=((DeltaE < 0.0f) || (MWCfp < exp(-DeltaE/t))) ? -1:1;
227
      s[x%sizex+sizex*(y%sizey+sizey*ind)] = (char)factor*p;
228
   }
229
   __syncthreads();
230

    
231
}
232
"""
233

    
234

    
235

    
236
def ImageOutput(sigma,prefix):
237
    Max=sigma.max()
238
    Min=sigma.min()
239
    
240
    # Normalize value as 8bits Integer
241
    SigmaInt=(255*(sigma-Min)/(Max-Min)).astype('uint8')
242
    image = Image.fromarray(SigmaInt)
243
    image.save("%s.jpg" % prefix)
244
    
245
def Metropolis(sigma,T,J,B,iterations): 
246
    start=time.time()
247

    
248
    SizeX,SizeY=sigma.shape
249
    
250
    for p in xrange(0,iterations):
251
        # Random access coordonate
252
        X,Y=numpy.random.randint(SizeX),numpy.random.randint(SizeY)
253
        
254
        DeltaE=J*sigma[X,Y]*(2*(sigma[X,(Y+1)%SizeY]+
255
                                sigma[X,(Y-1)%SizeY]+
256
                                sigma[(X-1)%SizeX,Y]+
257
                                sigma[(X+1)%SizeX,Y])+B)
258
        
259
        if DeltaE < 0. or random() < exp(-DeltaE/T):
260
            sigma[X,Y]=-sigma[X,Y]
261
    duration=time.time()-start
262
    return(duration)
263

    
264
def MetropolisAllOpenCL(sigmaDict,TList,J,B,iterations,jobs,ParaStyle,Alu,Device):
265

    
266
    # sigmaDict & Tlist are NOT respectively array & float
267
    # sigmaDict : dict of array for each temperatoire
268
    # TList : list of temperatures
269
          
270
    # Initialisation des variables en les CASTant correctement
271
    
272
    # Je detecte un peripherique GPU dans la liste des peripheriques
273

    
274
    HasGPU=False
275
    Id=1
276
    # Primary Device selection based on Device Id
277
    for platform in cl.get_platforms():
278
        for device in platform.get_devices():
279
            deviceType=cl.device_type.to_string(device.type)
280
            if Id==Device and not HasGPU:
281
                GPU=device
282
                print "CPU/GPU selected: ",device.name
283
                HasGPU=True
284
            Id=Id+1
285
    # Default Device selection based on ALU Type
286
    for platform in cl.get_platforms():
287
        for device in platform.get_devices():
288
            deviceType=cl.device_type.to_string(device.type)
289
            if deviceType=="GPU" and Alu=="GPU" and not HasGPU:
290
                GPU=device
291
                print "GPU selected: ",device.name
292
                HasGPU=True
293
            if deviceType=="CPU" and Alu=="CPU" and not HasGPU:        
294
                GPU=device
295
                print "CPU selected: ",device.name
296
                HasGPU=True
297
                                
298
    # Je cree le contexte et la queue pour son execution
299
    # ctx = cl.create_some_context()
300
    ctx = cl.Context([GPU])
301
    queue = cl.CommandQueue(ctx,
302
                            properties=cl.command_queue_properties.PROFILING_ENABLE)
303
    
304
    # Je recupere les flag possibles pour les buffers
305
    mf = cl.mem_flags
306

    
307
    # Concatenate all sigma in single array
308
    sigma=numpy.copy(sigmaDict[TList[0]])
309
    for T in TList[1:]:
310
        sigma=numpy.concatenate((sigma,sigmaDict[T]),axis=1)
311

    
312
    sigmaCL = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=sigma)
313
    TCL = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=TList)
314
   
315
    MetropolisCL = cl.Program(ctx,KERNEL_CODE_OPENCL).build( \
316
        options = "-cl-mad-enable -cl-fast-relaxed-math")
317

    
318
    SizeX,SizeY=sigmaDict[TList[0]].shape
319

    
320
    if ParaStyle=='Blocks':
321
        # Call OpenCL kernel
322
        # (1,) is Global work size (only 1 work size)
323
        # (1,) is local work size
324
        # SeedZCL is lattice translated in CL format
325
        # SeedWCL is lattice translated in CL format
326
        # step is number of iterations
327
        CLLaunch=MetropolisCL.MainLoopGlobal(queue,(jobs,),None,
328
                                             sigmaCL,
329
                                             TCL, 
330
                                             numpy.float32(J),
331
                                             numpy.float32(B),
332
                                             numpy.uint32(SizeX),
333
                                             numpy.uint32(SizeY),
334
                                             numpy.uint32(iterations),
335
                                             numpy.uint32(nprnd(2**31-1)),
336
                                             numpy.uint32(nprnd(2**31-1)))
337
        print "%s with %i %s done" % (Alu,jobs,ParaStyle)
338
    else:
339
        blocks=int(math.sqrt(jobs))
340
        # en OpenCL, necessaire de mettre un Global_id identique au local_id
341
        CLLaunch=MetropolisCL.MainLoopLocal(queue,(jobs,),(jobs,),
342
                                            sigmaCL,
343
                                            TCL,
344
                                            numpy.float32(J),
345
                                            numpy.float32(B),
346
                                            numpy.uint32(SizeX),
347
                                            numpy.uint32(SizeY),
348
                                            numpy.uint32(iterations),
349
                                            numpy.uint32(nprnd(2**31-1)),
350
                                            numpy.uint32(nprnd(2**31-1)))
351
        print "%s with %i %s done" % (Alu,jobs,ParaStyle)
352
        
353
    CLLaunch.wait()
354
    cl.enqueue_copy(queue, sigma, sigmaCL).wait()
355
    elapsed = 1e-9*(CLLaunch.profile.end - CLLaunch.profile.start)
356
    sigmaCL.release()
357

    
358
    results=numpy.split(sigma,len(TList),axis=1)
359
    for T in TList:
360
        sigmaDict[T]=numpy.copy(results[numpy.nonzero(TList == T)[0][0]])
361

    
362
    return(elapsed)
363

    
364
def MetropolisAllCuda(sigmaDict,TList,J,B,iterations,jobs,ParaStyle,Alu,Device):
365

    
366
    # sigmaDict & Tlist are NOT respectively array & float
367
    # sigmaDict : dict of array for each temperatoire
368
    # TList : list of temperatures
369
          
370
    # Avec PyCUDA autoinit, rien a faire !
371

    
372
    mod = SourceModule(KERNEL_CODE_CUDA)
373
    
374
    MetropolisBlocksCuda=mod.get_function("MainLoopGlobal")
375
    MetropolisThreadsCuda=mod.get_function("MainLoopLocal")
376

    
377
    # Concatenate all sigma in single array
378
    sigma=numpy.copy(sigmaDict[TList[0]])
379
    for T in TList[1:]:
380
        sigma=numpy.concatenate((sigma,sigmaDict[T]),axis=1)
381

    
382
    sigmaCU=cuda.InOut(sigma)
383
    TCU=cuda.InOut(TList)
384

    
385
    SizeX,SizeY=sigmaDict[TList[0]].shape
386

    
387
    start = pycuda.driver.Event()
388
    stop = pycuda.driver.Event()
389
    
390
    start.record()
391
    start.synchronize()
392
    if ParaStyle=='Blocks':
393
        # Call CUDA kernel
394
        # (1,) is Global work size (only 1 work size)
395
        # (1,) is local work size
396
        # SeedZCL is lattice translated in CL format
397
        # SeedWCL is lattice translated in CL format
398
        # step is number of iterations
399
        MetropolisBlocksCuda(sigmaCU,
400
                             TCU, 
401
                             numpy.float32(J),
402
                             numpy.float32(B),
403
                             numpy.uint32(SizeX),
404
                             numpy.uint32(SizeY),
405
                             numpy.uint32(iterations),
406
                             numpy.uint32(nprnd(2**31-1)),
407
                             numpy.uint32(nprnd(2**31-1)),
408
                             grid=(jobs,1),block=(1,1,1))
409
        print "%s with %i %s done" % (Alu,jobs,ParaStyle)
410
    else:
411
        blocks=int(math.sqrt(jobs))
412
        MetropolisThreadsCuda(sigmaCU,
413
                              TCU,
414
                              numpy.float32(J),
415
                              numpy.float32(B),
416
                              numpy.uint32(SizeX),
417
                              numpy.uint32(SizeY),
418
                              numpy.uint32(iterations),
419
                              numpy.uint32(nprnd(2**31-1)),
420
                              numpy.uint32(nprnd(2**31-1)),
421
                              grid=(1,1),block=(jobs,1,1))
422
        print "%s with %i %s done" % (Alu,jobs,ParaStyle)
423
        
424
    stop.record()
425
    stop.synchronize()
426
    elapsed = start.time_till(stop)*1e-3
427

    
428
    results=numpy.split(sigma,len(TList),axis=1)
429
    for T in TList:
430
        sigmaDict[T]=numpy.copy(results[numpy.nonzero(TList == T)[0][0]])
431

    
432
    return(elapsed)
433

    
434

    
435
def Magnetization(sigma,M):
436
    return(numpy.sum(sigma)/(sigma.shape[0]*sigma.shape[1]*1.0))
437

    
438
def Energy(sigma,J):
439
    # Copier et caster 
440
    E=numpy.copy(sigma).astype(numpy.float32)
441
    
442
    # Appel par slice
443
    E[1:-1,1:-1]=-J*E[1:-1,1:-1]*(E[:-2,1:-1]+E[2:,1:-1]+
444
                                  E[1:-1,:-2]+E[1:-1,2:])
445
    
446
    # Bien nettoyer la peripherie
447
    E[:,0]=0
448
    E[:,-1]=0
449
    E[0,:]=0
450
    E[-1,:]=0
451
    
452
    Energy=numpy.sum(E)
453

    
454
    return(Energy/(E.shape[0]*E.shape[1]*1.0))
455

    
456
def DisplayCurves(T,E,M,J,B):
457

    
458
    plt.xlabel("Temperature")
459
    plt.ylabel("Energy")
460

    
461
    Experience,=plt.plot(T,E,label="Energy") 
462

    
463
    plt.legend()
464
    plt.show()
465

    
466

    
467
if __name__=='__main__':
468

    
469
    # Set defaults values
470
    # Alu can be CPU or GPU
471
    Alu='CPU'
472
    # Id of GPU : 0 
473
    Device=0
474
    # GPU style can be Cuda (Nvidia implementation) or OpenCL
475
    GpuStyle='OpenCL'
476
    # Parallel distribution can be on Threads or Blocks
477
    ParaStyle='Blocks'
478
    # Coupling factor
479
    J=1.
480
    # Magnetic Field
481
    B=0.
482
    # Size of Lattice
483
    Size=256
484
    # Default Temperatures (start, end, step)
485
    Tmin=0.1
486
    Tmax=5
487
    Tstep=0.1
488
    # Default Number of Iterations
489
    Iterations=Size*Size
490
    # Curves is True to print the curves
491
    Curves=False
492

    
493
    try:
494
        opts, args = getopt.getopt(sys.argv[1:],"hcj:b:z:i:s:e:p:a:d:g:t:",["coupling=","magneticfield=","size=","iterations=","tempstart=","tempend=","tempstep=","alu=","gpustyle=","parastyle="])
495
    except getopt.GetoptError:
496
        print '%s -j <Coupling Factor> -b <Magnetic Field> -z <Size of Lattice> -i <Iterations> -s <Minimum Temperature> -e <Maximum Temperature> -p <steP Temperature> -c (Print Curves) -a <CPU/GPU> -d <DeviceId> -g <CUDA/OpenCL> -t <Threads/Blocks>' % sys.argv[0]
497
        sys.exit(2)
498
    
499
 
500
    for opt, arg in opts:
501
        if opt == '-h':
502
            print '%s -j <Coupling Factor> -b <Magnetic Field> -z <Size of Lattice> -i <Iterations> -s <Minimum Temperature> -e <Maximum Temperature> -p <steP Temperature> -c (Print Curves) -a <CPU/GPU> -d <DeviceId> -g <CUDA/OpenCL> -t <Threads/Blocks>' % sys.argv[0]
503
            sys.exit()
504
        elif opt == '-c':
505
            Curves=True
506
        elif opt in ("-j", "--coupling"):
507
            J = float(arg)
508
        elif opt in ("-b", "--magneticfield"):
509
            B = float(arg)
510
        elif opt in ("-s", "--tempmin"):
511
            Tmin = float(arg)
512
        elif opt in ("-e", "--tempmax"):
513
            Tmax = float(arg)
514
        elif opt in ("-p", "--tempstep"):
515
            Tstep = float(arg)
516
        elif opt in ("-i", "--iterations"):
517
            Iterations = int(arg)
518
        elif opt in ("-z", "--size"):
519
            Size = int(arg)
520
        elif opt in ("-a", "--alu"):
521
            Alu = arg
522
        elif opt in ("-d", "--device"):
523
            Device = int(arg)
524
        elif opt in ("-g", "--gpustyle"):
525
            GpuStyle = arg
526
        elif opt in ("-t", "--parastyle"):
527
            ParaStyle = arg
528

    
529
    if Alu=='CPU' and GpuStyle=='CUDA':
530
        print "Alu can't be CPU for CUDA, set Alu to GPU"
531
        Alu='GPU'
532

    
533
    if ParaStyle not in ('Blocks','Threads','Hybrid'):
534
        print "%s not exists, ParaStyle set as Threads !" % ParaStyle
535
        ParaStyle='Blocks'
536
   
537
    print "Compute unit : %s" % Alu
538
    print "Device Identification : %s" % Device
539
    print "GpuStyle used : %s" % GpuStyle
540
    print "Parallel Style used : %s" % ParaStyle
541
    print "Coupling Factor : %s" % J
542
    print "Magnetic Field :  %s" % B
543
    print "Size of lattice : %s" % Size
544
    print "Iterations : %s" % Iterations
545
    print "Temperature on start : %s" % Tmin
546
    print "Temperature on end : %s" % Tmax
547
    print "Temperature step : %s" % Tstep
548

    
549
    if GpuStyle=='CUDA':
550
        # For PyCUDA import
551
        import pycuda.driver as cuda
552
        import pycuda.gpuarray as gpuarray
553
        import pycuda.autoinit
554
        from pycuda.compiler import SourceModule
555

    
556
    if GpuStyle=='OpenCL':
557
        # For PyOpenCL import
558
        import pyopencl as cl
559
        Id=1
560
        for platform in cl.get_platforms():
561
            for device in platform.get_devices():
562
                deviceType=cl.device_type.to_string(device.type)
563
                print "Device #%i of type %s : %s" % (Id,deviceType,device.name)
564
                Id=Id+1
565

    
566
    LAPIMAGE=False
567

    
568
    sigmaIn=numpy.where(numpy.random.randn(Size,Size)>0,1,-1).astype(numpy.int8)
569

    
570
    ImageOutput(sigmaIn,"Ising2D_Serial_%i_Initial" % (Size))
571

    
572
    # La temperature est passee comme parametre, attention au CAST !
573
    Trange=numpy.arange(Tmin,Tmax+Tstep,Tstep).astype(numpy.float32)
574

    
575
    E=[]
576
    M=[]
577

    
578
    sigma={}
579
    for T in Trange:
580
        sigma[T]=numpy.copy(sigmaIn)
581

    
582
    jobs=len(Trange)
583
    
584
    # For GPU, all process are launched
585
    if GpuStyle=='CUDA':
586
        duration=MetropolisAllCuda(sigma,Trange,J,B,Iterations,jobs,ParaStyle,Alu,Device)
587
    else:
588
        duration=MetropolisAllOpenCL(sigma,Trange,J,B,Iterations,jobs,ParaStyle,Alu,Device)
589
        
590
    for T in Trange:
591
        E=numpy.append(E,Energy(sigma[T],J))
592
        M=numpy.append(M,Magnetization(sigma[T],B))
593
        print "CPU Time for each : %f" % (duration/len(Trange))
594
        print "Total Energy at Temperature %f : %f" % (T,E[-1])
595
        print "Total Magnetization at Temperature %f : %f" % (T,M[-1])        
596
        ImageOutput(sigma[T],"Ising2D_Serial_%i_%1.1f_Final" % (Size,T))
597

    
598
    if Curves:
599
        DisplayCurves(Trange,E,M,J,B)
600

    
601