Statistiques
| Révision :

root / Ising / GPU / Ising2D-GPU-One.py @ 94

Historique | Voir | Annoter | Télécharger (13,27 ko)

1
#!/usr/bin/env python
2
#
3
# Ising2D model in serial mode
4
#
5
# CC BY-NC-SA 2011 : <emmanuel.quemener@ens-lyon.fr> 
6

    
7
import sys
8
import numpy
9
from PIL import Image
10
from math import exp
11
from random import random
12
import time
13
import getopt
14
import matplotlib.pyplot as plt
15
from numpy.random import randint as nprnd
16

    
17
KERNEL_CODE_OPENCL="""
18

19
// Marsaglia RNG very simple implementation
20
#define znew  ((z=36969*(z&65535)+(z>>16))<<16)
21
#define wnew  ((w=18000*(w&65535)+(w>>16))&65535)
22
#define MWC   (znew+wnew)
23
#define SHR3  (jsr=(jsr=(jsr=jsr^(jsr<<17))^(jsr>>13))^(jsr<<5))
24
#define CONG  (jcong=69069*jcong+1234567)
25
#define KISS  ((MWC^CONG)+SHR3)
26

27
#define MWCfp MWC * 2.328306435454494e-10f
28
#define KISSfp KISS * 2.328306435454494e-10f
29

30
__kernel void MainLoopOne(__global char *s,float T,float J,float B,
31
                          uint sizex,uint sizey,
32
                          uint iterations,uint seed_w,uint seed_z)
33

34
{
35
   uint z=seed_z;
36
   uint w=seed_w;
37

38
   for (uint i=0;i<iterations;i++) {
39

40
      uint x=(uint)(MWC%sizex) ;
41
      uint y=(uint)(MWC%sizey) ;
42

43
      int p=s[x+sizex*y];
44

45
      int d=s[x+sizex*((y+1)%sizey)];
46
      int u=s[x+sizex*((y-1)%sizey)];
47
      int l=s[((x-1)%sizex)+sizex*y];
48
      int r=s[((x+1)%sizex)+sizex*y];
49

50
      float DeltaE=2.0f*p*(J*(u+d+l+r)+B);
51

52
      int factor=((DeltaE < 0.0f) || (MWCfp < exp(-DeltaE/T))) ? -1:1;
53
      s[x%sizex+sizex*(y%sizey)] = (char)factor*p;
54
   }
55
   barrier(CLK_GLOBAL_MEM_FENCE);
56
   
57
}
58
"""
59

    
60
KERNEL_CODE_CUDA="""
61

62
// Marsaglia RNG very simple implementation
63
#define znew  ((z=36969*(z&65535)+(z>>16))<<16)
64
#define wnew  ((w=18000*(w&65535)+(w>>16))&65535)
65
#define MWC   (znew+wnew)
66
#define SHR3  (jsr=(jsr=(jsr=jsr^(jsr<<17))^(jsr>>13))^(jsr<<5))
67
#define CONG  (jcong=69069*jcong+1234567)
68
#define KISS  ((MWC^CONG)+SHR3)
69

70
#define MWCfp MWC * 2.328306435454494e-10f
71
#define KISSfp KISS * 2.328306435454494e-10f
72

73
__global__ void MainLoopOne(char *s,float T,float J,float B,
74
                            uint sizex,uint sizey,
75
                            uint iterations,uint seed_w,uint seed_z)
76

77
{
78
   uint z=seed_z;
79
   uint w=seed_w;
80

81
   for (uint i=0;i<iterations;i++) {
82

83
      uint x=(uint)(MWC%sizex) ;
84
      uint y=(uint)(MWC%sizey) ;
85

86
      int p=s[x+sizex*y];
87

88
      int d=s[x+sizex*((y+1)%sizey)];
89
      int u=s[x+sizex*((y-1)%sizey)];
90
      int l=s[((x-1)%sizex)+sizex*y];
91
      int r=s[((x+1)%sizex)+sizex*y];
92

93
      float DeltaE=2.0f*p*(J*(u+d+l+r)+B);
94

95
      int factor=((DeltaE < 0.0f) || (MWCfp < exp(-DeltaE/T))) ? -1:1;
96
      s[x%sizex+sizex*(y%sizey)] = (char)factor*p;
97
   }
98
   __syncthreads();
99
   
100
}
101
"""
102

    
103
def ImageOutput(sigma,prefix):
104
    Max=sigma.max()
105
    Min=sigma.min()
106
    
107
    # Normalize value as 8bits Integer
108
    SigmaInt=(255*(sigma-Min)/(Max-Min)).astype('uint8')
109
    image = Image.fromarray(SigmaInt)
110
    image.save("%s.jpg" % prefix)
111
    
112
def Metropolis(sigma,T,J,B,iterations): 
113
    start=time.time()
114

    
115
    SizeX,SizeY=sigma.shape
116
    
117
    for p in xrange(0,iterations):
118
        # Random access coordonate
119
        X,Y=numpy.random.randint(SizeX),numpy.random.randint(SizeY)
120
        
121
        DeltaE=J*sigma[X,Y]*(2*(sigma[X,(Y+1)%SizeY]+
122
                                sigma[X,(Y-1)%SizeY]+
123
                                sigma[(X-1)%SizeX,Y]+
124
                                sigma[(X+1)%SizeX,Y])+B)
125
        
126
        if DeltaE < 0. or random() < exp(-DeltaE/T):
127
            sigma[X,Y]=-sigma[X,Y]
128
    duration=time.time()-start
129
    return(duration)
130

    
131
def MetropolisCuda(sigma,T,J,B,iterations,ParaStyle,Alu,Device):
132
    
133
    # Avec PyCUDA autoinit, rien a faire !
134

    
135
    sigmaCU=cuda.InOut(sigma)
136

    
137
    mod = SourceModule(KERNEL_CODE_CUDA)
138
    
139
    MetropolisCU=mod.get_function("MainLoopOne")
140

    
141
    start = pycuda.driver.Event()
142
    stop = pycuda.driver.Event()
143

    
144
    SizeX,SizeY=sigma.shape
145

    
146
    start.record()
147
    start.synchronize()
148
    MetropolisCU(sigmaCU,
149
                 numpy.float32(T), 
150
                 numpy.float32(J),
151
                 numpy.float32(B),
152
                 numpy.uint32(SizeX),
153
                 numpy.uint32(SizeY),
154
                 numpy.uint32(iterations),
155
                 numpy.uint32(nprnd(2**31-1)),
156
                 numpy.uint32(nprnd(2**31-1)),
157
                 grid=(1,1),
158
                 block=(1,1,1))
159
    
160
    print "%s with %i %s done" % (Alu,1,ParaStyle)
161

    
162
    stop.record()
163
    stop.synchronize()
164
                
165
    #elapsed = stop.time_since(start)*1e-3
166
    elapsed = start.time_till(stop)*1e-3
167

    
168
    return(elapsed)
169

    
170

    
171
def MetropolisOpenCL(sigma,T,J,B,iterations,ParaStyle,Alu,Device):
172

    
173
    # Initialisation des variables en les CASTant correctement
174
    
175
    # Je detecte un peripherique GPU dans la liste des peripheriques
176
    # for platform in cl.get_platforms():
177
    #     for device in platform.get_devices():
178
    #             if cl.device_type.to_string(device.type)=='GPU':
179
    #                     GPU=device
180
    #print "GPU detected: ",device.name
181
    
182
    HasGPU=False
183
    Id=1
184
    # Primary Device selection based on Device Id
185
    for platform in cl.get_platforms():
186
        for device in platform.get_devices():
187
            deviceType=cl.device_type.to_string(device.type)
188
            if Id==Device and not HasGPU:
189
                GPU=device
190
                print "CPU/GPU selected: ",device.name
191
                HasGPU=True
192
            Id=Id+1
193
    # Default Device selection based on ALU Type
194
    for platform in cl.get_platforms():
195
        for device in platform.get_devices():
196
            deviceType=cl.device_type.to_string(device.type)
197
            if deviceType=="GPU" and Alu=="GPU" and not HasGPU:
198
                GPU=device
199
                print "GPU selected: ",device.name
200
                HasGPU=True
201
            if deviceType=="CPU" and Alu=="CPU" and not HasGPU:        
202
                GPU=device
203
                print "CPU selected: ",device.name
204
                HasGPU=True
205
            
206
    # Je cree le contexte et la queue pour son execution
207
    # ctx = cl.create_some_context()
208
    ctx = cl.Context([GPU])
209
    queue = cl.CommandQueue(ctx,
210
                            properties=cl.command_queue_properties.PROFILING_ENABLE)
211
    
212
    # Je recupere les flag possibles pour les buffers
213
    mf = cl.mem_flags
214

    
215
    # Attention au CAST ! C'est un int8 soit un char en OpenCL !
216
    sigmaCL = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=sigma)
217
   
218
    MetropolisCL = cl.Program(ctx,KERNEL_CODE_OPENCL).build( \
219
        options = "-cl-mad-enable -cl-fast-relaxed-math")
220

    
221
    SizeX,SizeY=sigma.shape
222

    
223
    if ParaStyle=='Blocks':
224
        # Call OpenCL kernel
225
        # (1,) is Global work size (only 1 work size)
226
        # (1,) is local work size
227
        CLLaunch=MetropolisCL.MainLoopOne(queue,(1,),None,
228
                                          sigmaCL,
229
                                          numpy.float32(T), 
230
                                          numpy.float32(J),
231
                                          numpy.float32(B),
232
                                          numpy.uint32(SizeX),
233
                                          numpy.uint32(SizeY),
234
                                          numpy.uint32(iterations),
235
                                          numpy.uint32(nprnd(2**31-1)),
236
                                          numpy.uint32(nprnd(2**31-1)))
237
        print "%s with %i %s done" % (Alu,1,ParaStyle)
238
    else:
239
        # en OpenCL, necessaire de mettre un Global_id identique au local_id
240
        CLLaunch=MetropolisCL.MainLoopOne(queue,(1,),(1,),
241
                                          sigmaCL,
242
                                          numpy.float32(T),
243
                                          numpy.float32(J),
244
                                          numpy.float32(B),
245
                                          numpy.uint32(SizeX),
246
                                          numpy.uint32(SizeY),
247
                                          numpy.uint32(iterations),
248
                                          numpy.uint32(nprnd(2**31-1)),
249
                                          numpy.uint32(nprnd(2**31-1)))
250
        print "%s with %i %s done" % (Alu,1,ParaStyle)
251
        
252
    CLLaunch.wait()
253
    cl.enqueue_copy(queue, sigma, sigmaCL).wait()
254
    elapsed = 1e-9*(CLLaunch.profile.end - CLLaunch.profile.start)
255
    sigmaCL.release()
256
        
257
    return(elapsed)
258

    
259
def Magnetization(sigma,M):
260
    return(numpy.sum(sigma)/(sigma.shape[0]*sigma.shape[1]*1.0))
261

    
262
def Energy(sigma,J):
263
    # Copier et caster 
264
    E=numpy.copy(sigma).astype(numpy.float32)
265
    
266
    # Appel par slice
267
    E[1:-1,1:-1]=-J*E[1:-1,1:-1]*(E[:-2,1:-1]+E[2:,1:-1]+
268
                                  E[1:-1,:-2]+E[1:-1,2:])
269
    
270
    # Bien nettoyer la peripherie
271
    E[:,0]=0
272
    E[:,-1]=0
273
    E[0,:]=0
274
    E[-1,:]=0
275
    
276
    Energy=numpy.sum(E)
277

    
278
    return(Energy/(E.shape[0]*E.shape[1]*1.0))
279

    
280
def DisplayCurves(T,E,M,J,B):
281

    
282
    plt.xlabel("Temperature")
283
    plt.ylabel("Energy")
284

    
285
    Experience,=plt.plot(T,E,label="Energy") 
286

    
287
    plt.legend()
288
    plt.show()
289

    
290

    
291
if __name__=='__main__':
292

    
293
    # Set defaults values
294
    # Alu can be CPU or GPU
295
    Alu='CPU'
296
    # Id of GPU : 0 will use the first find !
297
    Device=0
298
    # GPU style can be Cuda (Nvidia implementation) or OpenCL
299
    GpuStyle='OpenCL'
300
    # Parallel distribution can be on Threads or Blocks
301
    ParaStyle='Blocks'
302
    # Coupling factor
303
    J=1.
304
    # Magnetic Field
305
    B=0.
306
    # Size of Lattice
307
    Size=256
308
    # Default Temperatures (start, end, step)
309
    Tmin=0.1
310
    Tmax=5
311
    Tstep=0.1
312
    # Default Number of Iterations
313
    Iterations=Size*Size
314
    # Curves is True to print the curves
315
    Curves=False
316

    
317
    try:
318
        opts, args = getopt.getopt(sys.argv[1:],"hcj:b:z:i:s:e:p:a:d:g:t:",["coupling=","magneticfield=","size=","iterations=","tempstart=","tempend=","tempstep=","alu=","gpustyle=","parastyle="])
319
    except getopt.GetoptError:
320
        print '%s -j <Coupling Factor> -b <Magnetic Field> -z <Size of Lattice> -i <Iterations> -s <Minimum Temperature> -e <Maximum Temperature> -p <steP Temperature> -c (Print Curves) -a <CPU/GPU> -d <DeviceId> -g <CUDA/OpenCL> -p <Threads/Blocks> -t <ParaStyle>' % sys.argv[0]
321
        sys.exit(2)
322
    
323
 
324
    for opt, arg in opts:
325
        if opt == '-h':
326
            print '%s -j <Coupling Factor> -b <Magnetic Field> -z <Size of Lattice> -i <Iterations> -s <Minimum Temperature> -e <Maximum Temperature> -p <steP Temperature> -c (Print Curves) -a <CPU/GPU> -d <DeviceId> -g <CUDA/OpenCL> -p <Threads/Blocks> -t <ParaStyle>' % sys.argv[0]
327
            sys.exit()
328
        elif opt == '-c':
329
            Curves=True
330
        elif opt in ("-j", "--coupling"):
331
            J = float(arg)
332
        elif opt in ("-b", "--magneticfield"):
333
            B = float(arg)
334
        elif opt in ("-s", "--tempmin"):
335
            Tmin = float(arg)
336
        elif opt in ("-e", "--tempmax"):
337
            Tmax = float(arg)
338
        elif opt in ("-p", "--tempstep"):
339
            Tstep = float(arg)
340
        elif opt in ("-i", "--iterations"):
341
            Iterations = int(arg)
342
        elif opt in ("-z", "--size"):
343
            Size = int(arg)
344
        elif opt in ("-a", "--alu"):
345
            Alu = arg
346
        elif opt in ("-d", "--device"):
347
            Device = int(arg)
348
        elif opt in ("-g", "--gpustyle"):
349
            GpuStyle = arg
350
        elif opt in ("-t", "--parastyle"):
351
            ParaStyle = arg
352

    
353
    if Alu=='CPU' and GpuStyle=='CUDA':
354
        print "Alu can't be CPU for CUDA, set Alu to GPU"
355
        Alu='GPU'
356

    
357
    if ParaStyle not in ('Blocks','Threads','Hybrid'):
358
        print "%s not exists, ParaStyle set as Threads !" % ParaStyle
359
        ParaStyle='Blocks'
360
   
361
    print "Compute unit : %s" % Alu
362
    print "Device Identification : %s" % Device
363
    print "GpuStyle used : %s" % GpuStyle
364
    print "Parallel Style used : %s" % ParaStyle
365
    print "Coupling Factor : %s" % J
366
    print "Magnetic Field :  %s" % B
367
    print "Size of lattice : %s" % Size
368
    print "Iterations : %s" % Iterations
369
    print "Temperature on start : %s" % Tmin
370
    print "Temperature on end : %s" % Tmax
371
    print "Temperature step : %s" % Tstep
372

    
373
    if GpuStyle=='CUDA':
374
        # For PyCUDA import
375
        import pycuda.driver as cuda
376
        import pycuda.gpuarray as gpuarray
377
        import pycuda.autoinit
378
        from pycuda.compiler import SourceModule
379

    
380
    if GpuStyle=='OpenCL':
381
        # For PyOpenCL import
382
        import pyopencl as cl
383
        Id=1
384
        for platform in cl.get_platforms():
385
            for device in platform.get_devices():
386
                deviceType=cl.device_type.to_string(device.type)
387
                print "Device #%i of type %s : %s" % (Id,deviceType,device.name)
388
                Id=Id+1
389

    
390
    LAPIMAGE=False
391

    
392
    sigmaIn=numpy.where(numpy.random.randn(Size,Size)>0,1,-1).astype(numpy.int8)
393

    
394
    ImageOutput(sigmaIn,"Ising2D_Serial_%i_Initial" % (Size))
395

    
396
    Trange=numpy.arange(Tmin,Tmax+Tstep,Tstep)
397

    
398
    E=[]
399
    M=[]
400

    
401
    for T in Trange:
402
        sigma=numpy.copy(sigmaIn)
403
        if GpuStyle=='CUDA':
404
            duration=MetropolisCuda(sigma,T,J,B,Iterations,ParaStyle,Alu,Device)
405
        else:
406
            duration=MetropolisOpenCL(sigma,T,J,B,Iterations,ParaStyle,Alu,Device)
407
            
408
        E=numpy.append(E,Energy(sigma,J))
409
        M=numpy.append(M,Magnetization(sigma,B))
410
        ImageOutput(sigma,"Ising2D_Serial_%i_%1.1f_Final" % (Size,T))
411

    
412
        print "CPU Time : %f" % (duration)
413
        print "Total Energy at Temperature %f : %f" % (T,E[-1])
414
        print "Total Magnetization at Temperature %f : %f" % (T,M[-1])
415
       
416
    if Curves:
417
        DisplayCurves(Trange,E,M,J,B)
418

    
419
    # Save output
420
    numpy.savez("Ising2D_Serial_%i_%.8i" % (Size,Iterations),(Trange,E,M))
421