Statistiques
| Révision :

root / Ising / GPU / Ising2D-GPU-One.py @ 144

Historique | Voir | Annoter | Télécharger (12,8 ko)

1
#!/usr/bin/env python
2
#
3
# Ising2D model in serial mode
4
#
5
# CC BY-NC-SA 2011 : <emmanuel.quemener@ens-lyon.fr> 
6

    
7
import sys
8
import numpy
9
from PIL import Image
10
from math import exp
11
from random import random
12
import time
13
import getopt
14
import matplotlib.pyplot as plt
15
from numpy.random import randint as nprnd
16

    
17
KERNEL_CODE_OPENCL="""
18

19
// Marsaglia RNG very simple implementation
20
#define znew  ((z=36969*(z&65535)+(z>>16))<<16)
21
#define wnew  ((w=18000*(w&65535)+(w>>16))&65535)
22
#define MWC   (znew+wnew)
23
#define SHR3  (jsr=(jsr=(jsr=jsr^(jsr<<17))^(jsr>>13))^(jsr<<5))
24
#define CONG  (jcong=69069*jcong+1234567)
25
#define KISS  ((MWC^CONG)+SHR3)
26

27
#define MWCfp MWC * 2.328306435454494e-10f
28
#define KISSfp KISS * 2.328306435454494e-10f
29

30
__kernel void MainLoopOne(__global char *s,float T,float J,float B,
31
                          uint sizex,uint sizey,
32
                          uint iterations,uint seed_w,uint seed_z)
33

34
{
35
   uint z=seed_z;
36
   uint w=seed_w;
37

38
   for (uint i=0;i<iterations;i++) {
39

40
      uint x=(uint)(MWC%sizex) ;
41
      uint y=(uint)(MWC%sizey) ;
42

43
      int p=s[x+sizex*y];
44

45
      int d=s[x+sizex*((y+1)%sizey)];
46
      int u=s[x+sizex*((y-1)%sizey)];
47
      int l=s[((x-1)%sizex)+sizex*y];
48
      int r=s[((x+1)%sizex)+sizex*y];
49

50
      float DeltaE=2.0f*p*(J*(u+d+l+r)+B);
51

52
      int factor=((DeltaE < 0.0f) || (MWCfp < exp(-DeltaE/T))) ? -1:1;
53
      s[x%sizex+sizex*(y%sizey)] = (char)factor*p;
54
   }
55
   barrier(CLK_GLOBAL_MEM_FENCE);
56
   
57
}
58
"""
59

    
60
KERNEL_CODE_CUDA="""
61

62
// Marsaglia RNG very simple implementation
63
#define znew  ((z=36969*(z&65535)+(z>>16))<<16)
64
#define wnew  ((w=18000*(w&65535)+(w>>16))&65535)
65
#define MWC   (znew+wnew)
66
#define SHR3  (jsr=(jsr=(jsr=jsr^(jsr<<17))^(jsr>>13))^(jsr<<5))
67
#define CONG  (jcong=69069*jcong+1234567)
68
#define KISS  ((MWC^CONG)+SHR3)
69

70
#define MWCfp MWC * 2.328306435454494e-10f
71
#define KISSfp KISS * 2.328306435454494e-10f
72

73
__global__ void MainLoopOne(char *s,float T,float J,float B,
74
                            uint sizex,uint sizey,
75
                            uint iterations,uint seed_w,uint seed_z)
76

77
{
78
   uint z=seed_z;
79
   uint w=seed_w;
80

81
   for (uint i=0;i<iterations;i++) {
82

83
      uint x=(uint)(MWC%sizex) ;
84
      uint y=(uint)(MWC%sizey) ;
85

86
      int p=s[x+sizex*y];
87

88
      int d=s[x+sizex*((y+1)%sizey)];
89
      int u=s[x+sizex*((y-1)%sizey)];
90
      int l=s[((x-1)%sizex)+sizex*y];
91
      int r=s[((x+1)%sizex)+sizex*y];
92

93
      float DeltaE=2.0f*p*(J*(u+d+l+r)+B);
94

95
      int factor=((DeltaE < 0.0f) || (MWCfp < exp(-DeltaE/T))) ? -1:1;
96
      s[x%sizex+sizex*(y%sizey)] = (char)factor*p;
97
   }
98
   __syncthreads();
99
   
100
}
101
"""
102

    
103
def ImageOutput(sigma,prefix):
104
    Max=sigma.max()
105
    Min=sigma.min()
106
    
107
    # Normalize value as 8bits Integer
108
    SigmaInt=(255*(sigma-Min)/(Max-Min)).astype('uint8')
109
    image = Image.fromarray(SigmaInt)
110
    image.save("%s.jpg" % prefix)
111
    
112
def Metropolis(sigma,T,J,B,iterations): 
113
    start=time.time()
114

    
115
    SizeX,SizeY=sigma.shape
116
    
117
    for p in xrange(0,iterations):
118
        # Random access coordonate
119
        X,Y=numpy.random.randint(SizeX),numpy.random.randint(SizeY)
120
        
121
        DeltaE=J*sigma[X,Y]*(2*(sigma[X,(Y+1)%SizeY]+
122
                                sigma[X,(Y-1)%SizeY]+
123
                                sigma[(X-1)%SizeX,Y]+
124
                                sigma[(X+1)%SizeX,Y])+B)
125
        
126
        if DeltaE < 0. or random() < exp(-DeltaE/T):
127
            sigma[X,Y]=-sigma[X,Y]
128
    duration=time.time()-start
129
    return(duration)
130

    
131
def MetropolisCuda(sigma,T,J,B,iterations,ParaStyle,Alu,Device):
132
    
133
    # Avec PyCUDA autoinit, rien a faire !
134

    
135
    sigmaCU=cuda.InOut(sigma)
136

    
137
    mod = SourceModule(KERNEL_CODE_CUDA)
138
    
139
    MetropolisCU=mod.get_function("MainLoopOne")
140

    
141
    start = pycuda.driver.Event()
142
    stop = pycuda.driver.Event()
143

    
144
    SizeX,SizeY=sigma.shape
145

    
146
    start.record()
147
    start.synchronize()
148
    MetropolisCU(sigmaCU,
149
                 numpy.float32(T), 
150
                 numpy.float32(J),
151
                 numpy.float32(B),
152
                 numpy.uint32(SizeX),
153
                 numpy.uint32(SizeY),
154
                 numpy.uint32(iterations),
155
                 numpy.uint32(nprnd(2**31-1)),
156
                 numpy.uint32(nprnd(2**31-1)),
157
                 grid=(1,1),
158
                 block=(1,1,1))
159
    
160
    print "%s with %i %s done" % (Alu,1,ParaStyle)
161

    
162
    stop.record()
163
    stop.synchronize()
164
                
165
    #elapsed = stop.time_since(start)*1e-3
166
    elapsed = start.time_till(stop)*1e-3
167

    
168
    return(elapsed)
169

    
170

    
171
def MetropolisOpenCL(sigma,T,J,B,iterations,ParaStyle,Alu,Device):
172

    
173
    # Initialisation des variables en les CASTant correctement
174
    
175
    # Je detecte un peripherique GPU dans la liste des peripheriques
176
    # for platform in cl.get_platforms():
177
    #     for device in platform.get_devices():
178
    #             if cl.device_type.to_string(device.type)=='GPU':
179
    #                     GPU=device
180
    #print "GPU detected: ",device.name
181
    
182
    HasGPU=False
183
    Id=1
184
    # Primary Device selection based on Device Id
185
    for platform in cl.get_platforms():
186
        for device in platform.get_devices():
187
            #deviceType=cl.device_type.to_string(device.type)
188
            deviceType="xPU"
189
            if Id==Device and not HasGPU:
190
                GPU=device
191
                print "CPU/GPU selected: ",device.name
192
                HasGPU=True
193
            Id=Id+1
194
            
195
    # Je cree le contexte et la queue pour son execution
196
    # ctx = cl.create_some_context()
197
    ctx = cl.Context([GPU])
198
    queue = cl.CommandQueue(ctx,
199
                            properties=cl.command_queue_properties.PROFILING_ENABLE)
200
    
201
    # Je recupere les flag possibles pour les buffers
202
    mf = cl.mem_flags
203

    
204
    # Attention au CAST ! C'est un int8 soit un char en OpenCL !
205
    sigmaCL = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=sigma)
206
   
207
    MetropolisCL = cl.Program(ctx,KERNEL_CODE_OPENCL).build( \
208
        options = "-cl-mad-enable -cl-fast-relaxed-math")
209

    
210
    SizeX,SizeY=sigma.shape
211

    
212
    if ParaStyle=='Blocks':
213
        # Call OpenCL kernel
214
        # (1,) is Global work size (only 1 work size)
215
        # (1,) is local work size
216
        CLLaunch=MetropolisCL.MainLoopOne(queue,(1,),None,
217
                                          sigmaCL,
218
                                          numpy.float32(T), 
219
                                          numpy.float32(J),
220
                                          numpy.float32(B),
221
                                          numpy.uint32(SizeX),
222
                                          numpy.uint32(SizeY),
223
                                          numpy.uint32(iterations),
224
                                          numpy.uint32(nprnd(2**31-1)),
225
                                          numpy.uint32(nprnd(2**31-1)))
226
        print "%s with %i %s done" % (Alu,1,ParaStyle)
227
    else:
228
        # en OpenCL, necessaire de mettre un Global_id identique au local_id
229
        CLLaunch=MetropolisCL.MainLoopOne(queue,(1,),(1,),
230
                                          sigmaCL,
231
                                          numpy.float32(T),
232
                                          numpy.float32(J),
233
                                          numpy.float32(B),
234
                                          numpy.uint32(SizeX),
235
                                          numpy.uint32(SizeY),
236
                                          numpy.uint32(iterations),
237
                                          numpy.uint32(nprnd(2**31-1)),
238
                                          numpy.uint32(nprnd(2**31-1)))
239
        print "%s with %i %s done" % (Alu,1,ParaStyle)
240
        
241
    CLLaunch.wait()
242
    cl.enqueue_copy(queue, sigma, sigmaCL).wait()
243
    elapsed = 1e-9*(CLLaunch.profile.end - CLLaunch.profile.start)
244
    sigmaCL.release()
245
        
246
    return(elapsed)
247

    
248
def Magnetization(sigma,M):
249
    return(numpy.sum(sigma)/(sigma.shape[0]*sigma.shape[1]*1.0))
250

    
251
def Energy(sigma,J):
252
    # Copier et caster 
253
    E=numpy.copy(sigma).astype(numpy.float32)
254
    
255
    # Appel par slice
256
    E[1:-1,1:-1]=-J*E[1:-1,1:-1]*(E[:-2,1:-1]+E[2:,1:-1]+
257
                                  E[1:-1,:-2]+E[1:-1,2:])
258
    
259
    # Bien nettoyer la peripherie
260
    E[:,0]=0
261
    E[:,-1]=0
262
    E[0,:]=0
263
    E[-1,:]=0
264
    
265
    Energy=numpy.sum(E)
266

    
267
    return(Energy/(E.shape[0]*E.shape[1]*1.0))
268

    
269
def DisplayCurves(T,E,M,J,B):
270

    
271
    plt.xlabel("Temperature")
272
    plt.ylabel("Energy")
273

    
274
    Experience,=plt.plot(T,E,label="Energy") 
275

    
276
    plt.legend()
277
    plt.show()
278

    
279

    
280
if __name__=='__main__':
281

    
282
    # Set defaults values
283
    # Alu can be CPU or GPU
284
    Alu='CPU'
285
    # Id of GPU : 0 will use the first find !
286
    Device=0
287
    # GPU style can be Cuda (Nvidia implementation) or OpenCL
288
    GpuStyle='OpenCL'
289
    # Parallel distribution can be on Threads or Blocks
290
    ParaStyle='Blocks'
291
    # Coupling factor
292
    J=1.
293
    # Magnetic Field
294
    B=0.
295
    # Size of Lattice
296
    Size=256
297
    # Default Temperatures (start, end, step)
298
    Tmin=0.1
299
    Tmax=5
300
    Tstep=0.1
301
    # Default Number of Iterations
302
    Iterations=Size*Size
303
    # Curves is True to print the curves
304
    Curves=False
305

    
306
    try:
307
        opts, args = getopt.getopt(sys.argv[1:],"hcj:b:z:i:s:e:p:a:d:g:t:",["coupling=","magneticfield=","size=","iterations=","tempstart=","tempend=","tempstep=","alu=","gpustyle=","parastyle="])
308
    except getopt.GetoptError:
309
        print '%s -j <Coupling Factor> -b <Magnetic Field> -z <Size of Lattice> -i <Iterations> -s <Minimum Temperature> -e <Maximum Temperature> -p <steP Temperature> -c (Print Curves) -a <CPU/GPU> -d <DeviceId> -g <CUDA/OpenCL> -p <Threads/Blocks> -t <ParaStyle>' % sys.argv[0]
310
        sys.exit(2)
311
    
312
 
313
    for opt, arg in opts:
314
        if opt == '-h':
315
            print '%s -j <Coupling Factor> -b <Magnetic Field> -z <Size of Lattice> -i <Iterations> -s <Minimum Temperature> -e <Maximum Temperature> -p <steP Temperature> -c (Print Curves) -a <CPU/GPU> -d <DeviceId> -g <CUDA/OpenCL> -p <Threads/Blocks> -t <ParaStyle>' % sys.argv[0]
316
            sys.exit()
317
        elif opt == '-c':
318
            Curves=True
319
        elif opt in ("-j", "--coupling"):
320
            J = float(arg)
321
        elif opt in ("-b", "--magneticfield"):
322
            B = float(arg)
323
        elif opt in ("-s", "--tempmin"):
324
            Tmin = float(arg)
325
        elif opt in ("-e", "--tempmax"):
326
            Tmax = float(arg)
327
        elif opt in ("-p", "--tempstep"):
328
            Tstep = float(arg)
329
        elif opt in ("-i", "--iterations"):
330
            Iterations = int(arg)
331
        elif opt in ("-z", "--size"):
332
            Size = int(arg)
333
        elif opt in ("-a", "--alu"):
334
            Alu = arg
335
        elif opt in ("-d", "--device"):
336
            Device = int(arg)
337
        elif opt in ("-g", "--gpustyle"):
338
            GpuStyle = arg
339
        elif opt in ("-t", "--parastyle"):
340
            ParaStyle = arg
341

    
342
    if Alu=='CPU' and GpuStyle=='CUDA':
343
        print "Alu can't be CPU for CUDA, set Alu to GPU"
344
        Alu='GPU'
345

    
346
    if ParaStyle not in ('Blocks','Threads','Hybrid'):
347
        print "%s not exists, ParaStyle set as Threads !" % ParaStyle
348
        ParaStyle='Blocks'
349
   
350
    print "Compute unit : %s" % Alu
351
    print "Device Identification : %s" % Device
352
    print "GpuStyle used : %s" % GpuStyle
353
    print "Parallel Style used : %s" % ParaStyle
354
    print "Coupling Factor : %s" % J
355
    print "Magnetic Field :  %s" % B
356
    print "Size of lattice : %s" % Size
357
    print "Iterations : %s" % Iterations
358
    print "Temperature on start : %s" % Tmin
359
    print "Temperature on end : %s" % Tmax
360
    print "Temperature step : %s" % Tstep
361

    
362
    if GpuStyle=='CUDA':
363
        # For PyCUDA import
364
        import pycuda.driver as cuda
365
        import pycuda.gpuarray as gpuarray
366
        import pycuda.autoinit
367
        from pycuda.compiler import SourceModule
368

    
369
    if GpuStyle=='OpenCL':
370
        # For PyOpenCL import
371
        import pyopencl as cl
372
        Id=1
373
        for platform in cl.get_platforms():
374
            for device in platform.get_devices():
375
                #deviceType=cl.device_type.to_string(device.type)
376
                deviceType="xPU"
377
                print "Device #%i of type %s : %s" % (Id,deviceType,device.name)
378
                Id=Id+1
379

    
380
    LAPIMAGE=False
381

    
382
    sigmaIn=numpy.where(numpy.random.randn(Size,Size)>0,1,-1).astype(numpy.int8)
383

    
384
    ImageOutput(sigmaIn,"Ising2D_Serial_%i_Initial" % (Size))
385

    
386
    Trange=numpy.arange(Tmin,Tmax+Tstep,Tstep)
387

    
388
    E=[]
389
    M=[]
390

    
391
    for T in Trange:
392
        sigma=numpy.copy(sigmaIn)
393
        if GpuStyle=='CUDA':
394
            duration=MetropolisCuda(sigma,T,J,B,Iterations,ParaStyle,Alu,Device)
395
        else:
396
            duration=MetropolisOpenCL(sigma,T,J,B,Iterations,ParaStyle,Alu,Device)
397
            
398
        E=numpy.append(E,Energy(sigma,J))
399
        M=numpy.append(M,Magnetization(sigma,B))
400
        ImageOutput(sigma,"Ising2D_Serial_%i_%1.1f_Final" % (Size,T))
401

    
402
        print "CPU Time : %f" % (duration)
403
        print "Total Energy at Temperature %f : %f" % (T,E[-1])
404
        print "Total Magnetization at Temperature %f : %f" % (T,M[-1])
405
       
406
    if Curves:
407
        DisplayCurves(Trange,E,M,J,B)
408

    
409
    # Save output
410
    numpy.savez("Ising2D_Serial_%i_%.8i" % (Size,Iterations),(Trange,E,M))
411