Statistiques
| Révision :

root / Ising / GPU / Ising2D-GPU-One.py @ 94

Historique | Voir | Annoter | Télécharger (13,27 ko)

1 18 equemene
#!/usr/bin/env python
2 18 equemene
#
3 18 equemene
# Ising2D model in serial mode
4 18 equemene
#
5 18 equemene
# CC BY-NC-SA 2011 : <emmanuel.quemener@ens-lyon.fr>
6 18 equemene
7 18 equemene
import sys
8 18 equemene
import numpy
9 18 equemene
from PIL import Image
10 18 equemene
from math import exp
11 18 equemene
from random import random
12 18 equemene
import time
13 18 equemene
import getopt
14 18 equemene
import matplotlib.pyplot as plt
15 18 equemene
from numpy.random import randint as nprnd
16 18 equemene
17 18 equemene
KERNEL_CODE_OPENCL="""
18 18 equemene

19 18 equemene
// Marsaglia RNG very simple implementation
20 18 equemene
#define znew  ((z=36969*(z&65535)+(z>>16))<<16)
21 18 equemene
#define wnew  ((w=18000*(w&65535)+(w>>16))&65535)
22 18 equemene
#define MWC   (znew+wnew)
23 18 equemene
#define SHR3  (jsr=(jsr=(jsr=jsr^(jsr<<17))^(jsr>>13))^(jsr<<5))
24 18 equemene
#define CONG  (jcong=69069*jcong+1234567)
25 18 equemene
#define KISS  ((MWC^CONG)+SHR3)
26 18 equemene

27 18 equemene
#define MWCfp MWC * 2.328306435454494e-10f
28 18 equemene
#define KISSfp KISS * 2.328306435454494e-10f
29 18 equemene

30 18 equemene
__kernel void MainLoopOne(__global char *s,float T,float J,float B,
31 18 equemene
                          uint sizex,uint sizey,
32 18 equemene
                          uint iterations,uint seed_w,uint seed_z)
33 18 equemene

34 18 equemene
{
35 18 equemene
   uint z=seed_z;
36 18 equemene
   uint w=seed_w;
37 18 equemene

38 18 equemene
   for (uint i=0;i<iterations;i++) {
39 18 equemene

40 18 equemene
      uint x=(uint)(MWC%sizex) ;
41 18 equemene
      uint y=(uint)(MWC%sizey) ;
42 18 equemene

43 18 equemene
      int p=s[x+sizex*y];
44 18 equemene

45 18 equemene
      int d=s[x+sizex*((y+1)%sizey)];
46 18 equemene
      int u=s[x+sizex*((y-1)%sizey)];
47 18 equemene
      int l=s[((x-1)%sizex)+sizex*y];
48 18 equemene
      int r=s[((x+1)%sizex)+sizex*y];
49 18 equemene

50 18 equemene
      float DeltaE=2.0f*p*(J*(u+d+l+r)+B);
51 18 equemene

52 18 equemene
      int factor=((DeltaE < 0.0f) || (MWCfp < exp(-DeltaE/T))) ? -1:1;
53 18 equemene
      s[x%sizex+sizex*(y%sizey)] = (char)factor*p;
54 18 equemene
   }
55 18 equemene
   barrier(CLK_GLOBAL_MEM_FENCE);
56 18 equemene

57 18 equemene
}
58 18 equemene
"""
59 18 equemene
60 18 equemene
KERNEL_CODE_CUDA="""
61 18 equemene

62 18 equemene
// Marsaglia RNG very simple implementation
63 18 equemene
#define znew  ((z=36969*(z&65535)+(z>>16))<<16)
64 18 equemene
#define wnew  ((w=18000*(w&65535)+(w>>16))&65535)
65 18 equemene
#define MWC   (znew+wnew)
66 18 equemene
#define SHR3  (jsr=(jsr=(jsr=jsr^(jsr<<17))^(jsr>>13))^(jsr<<5))
67 18 equemene
#define CONG  (jcong=69069*jcong+1234567)
68 18 equemene
#define KISS  ((MWC^CONG)+SHR3)
69 18 equemene

70 18 equemene
#define MWCfp MWC * 2.328306435454494e-10f
71 18 equemene
#define KISSfp KISS * 2.328306435454494e-10f
72 18 equemene

73 18 equemene
__global__ void MainLoopOne(char *s,float T,float J,float B,
74 18 equemene
                            uint sizex,uint sizey,
75 18 equemene
                            uint iterations,uint seed_w,uint seed_z)
76 18 equemene

77 18 equemene
{
78 18 equemene
   uint z=seed_z;
79 18 equemene
   uint w=seed_w;
80 18 equemene

81 18 equemene
   for (uint i=0;i<iterations;i++) {
82 18 equemene

83 18 equemene
      uint x=(uint)(MWC%sizex) ;
84 18 equemene
      uint y=(uint)(MWC%sizey) ;
85 18 equemene

86 18 equemene
      int p=s[x+sizex*y];
87 18 equemene

88 18 equemene
      int d=s[x+sizex*((y+1)%sizey)];
89 18 equemene
      int u=s[x+sizex*((y-1)%sizey)];
90 18 equemene
      int l=s[((x-1)%sizex)+sizex*y];
91 18 equemene
      int r=s[((x+1)%sizex)+sizex*y];
92 18 equemene

93 18 equemene
      float DeltaE=2.0f*p*(J*(u+d+l+r)+B);
94 18 equemene

95 18 equemene
      int factor=((DeltaE < 0.0f) || (MWCfp < exp(-DeltaE/T))) ? -1:1;
96 18 equemene
      s[x%sizex+sizex*(y%sizey)] = (char)factor*p;
97 18 equemene
   }
98 18 equemene
   __syncthreads();
99 18 equemene

100 18 equemene
}
101 18 equemene
"""
102 18 equemene
103 18 equemene
def ImageOutput(sigma,prefix):
104 18 equemene
    Max=sigma.max()
105 18 equemene
    Min=sigma.min()
106 18 equemene
107 18 equemene
    # Normalize value as 8bits Integer
108 18 equemene
    SigmaInt=(255*(sigma-Min)/(Max-Min)).astype('uint8')
109 18 equemene
    image = Image.fromarray(SigmaInt)
110 18 equemene
    image.save("%s.jpg" % prefix)
111 18 equemene
112 18 equemene
def Metropolis(sigma,T,J,B,iterations):
113 18 equemene
    start=time.time()
114 18 equemene
115 18 equemene
    SizeX,SizeY=sigma.shape
116 18 equemene
117 18 equemene
    for p in xrange(0,iterations):
118 18 equemene
        # Random access coordonate
119 18 equemene
        X,Y=numpy.random.randint(SizeX),numpy.random.randint(SizeY)
120 18 equemene
121 18 equemene
        DeltaE=J*sigma[X,Y]*(2*(sigma[X,(Y+1)%SizeY]+
122 18 equemene
                                sigma[X,(Y-1)%SizeY]+
123 18 equemene
                                sigma[(X-1)%SizeX,Y]+
124 18 equemene
                                sigma[(X+1)%SizeX,Y])+B)
125 18 equemene
126 18 equemene
        if DeltaE < 0. or random() < exp(-DeltaE/T):
127 18 equemene
            sigma[X,Y]=-sigma[X,Y]
128 18 equemene
    duration=time.time()-start
129 18 equemene
    return(duration)
130 18 equemene
131 18 equemene
def MetropolisCuda(sigma,T,J,B,iterations,ParaStyle,Alu,Device):
132 18 equemene
133 18 equemene
    # Avec PyCUDA autoinit, rien a faire !
134 18 equemene
135 18 equemene
    sigmaCU=cuda.InOut(sigma)
136 18 equemene
137 18 equemene
    mod = SourceModule(KERNEL_CODE_CUDA)
138 18 equemene
139 18 equemene
    MetropolisCU=mod.get_function("MainLoopOne")
140 18 equemene
141 18 equemene
    start = pycuda.driver.Event()
142 18 equemene
    stop = pycuda.driver.Event()
143 18 equemene
144 18 equemene
    SizeX,SizeY=sigma.shape
145 18 equemene
146 18 equemene
    start.record()
147 18 equemene
    start.synchronize()
148 18 equemene
    MetropolisCU(sigmaCU,
149 18 equemene
                 numpy.float32(T),
150 18 equemene
                 numpy.float32(J),
151 18 equemene
                 numpy.float32(B),
152 18 equemene
                 numpy.uint32(SizeX),
153 18 equemene
                 numpy.uint32(SizeY),
154 18 equemene
                 numpy.uint32(iterations),
155 18 equemene
                 numpy.uint32(nprnd(2**31-1)),
156 18 equemene
                 numpy.uint32(nprnd(2**31-1)),
157 18 equemene
                 grid=(1,1),
158 18 equemene
                 block=(1,1,1))
159 18 equemene
160 18 equemene
    print "%s with %i %s done" % (Alu,1,ParaStyle)
161 18 equemene
162 18 equemene
    stop.record()
163 18 equemene
    stop.synchronize()
164 18 equemene
165 18 equemene
    #elapsed = stop.time_since(start)*1e-3
166 18 equemene
    elapsed = start.time_till(stop)*1e-3
167 18 equemene
168 18 equemene
    return(elapsed)
169 18 equemene
170 18 equemene
171 18 equemene
def MetropolisOpenCL(sigma,T,J,B,iterations,ParaStyle,Alu,Device):
172 18 equemene
173 18 equemene
    # Initialisation des variables en les CASTant correctement
174 18 equemene
175 18 equemene
    # Je detecte un peripherique GPU dans la liste des peripheriques
176 18 equemene
    # for platform in cl.get_platforms():
177 18 equemene
    #     for device in platform.get_devices():
178 18 equemene
    #             if cl.device_type.to_string(device.type)=='GPU':
179 18 equemene
    #                     GPU=device
180 18 equemene
    #print "GPU detected: ",device.name
181 18 equemene
182 18 equemene
    HasGPU=False
183 18 equemene
    Id=1
184 18 equemene
    # Primary Device selection based on Device Id
185 18 equemene
    for platform in cl.get_platforms():
186 18 equemene
        for device in platform.get_devices():
187 18 equemene
            deviceType=cl.device_type.to_string(device.type)
188 18 equemene
            if Id==Device and not HasGPU:
189 18 equemene
                GPU=device
190 18 equemene
                print "CPU/GPU selected: ",device.name
191 18 equemene
                HasGPU=True
192 18 equemene
            Id=Id+1
193 18 equemene
    # Default Device selection based on ALU Type
194 18 equemene
    for platform in cl.get_platforms():
195 18 equemene
        for device in platform.get_devices():
196 18 equemene
            deviceType=cl.device_type.to_string(device.type)
197 18 equemene
            if deviceType=="GPU" and Alu=="GPU" and not HasGPU:
198 18 equemene
                GPU=device
199 18 equemene
                print "GPU selected: ",device.name
200 18 equemene
                HasGPU=True
201 18 equemene
            if deviceType=="CPU" and Alu=="CPU" and not HasGPU:
202 18 equemene
                GPU=device
203 18 equemene
                print "CPU selected: ",device.name
204 18 equemene
                HasGPU=True
205 18 equemene
206 18 equemene
    # Je cree le contexte et la queue pour son execution
207 18 equemene
    # ctx = cl.create_some_context()
208 18 equemene
    ctx = cl.Context([GPU])
209 18 equemene
    queue = cl.CommandQueue(ctx,
210 18 equemene
                            properties=cl.command_queue_properties.PROFILING_ENABLE)
211 18 equemene
212 18 equemene
    # Je recupere les flag possibles pour les buffers
213 18 equemene
    mf = cl.mem_flags
214 18 equemene
215 18 equemene
    # Attention au CAST ! C'est un int8 soit un char en OpenCL !
216 18 equemene
    sigmaCL = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=sigma)
217 18 equemene
218 18 equemene
    MetropolisCL = cl.Program(ctx,KERNEL_CODE_OPENCL).build( \
219 18 equemene
        options = "-cl-mad-enable -cl-fast-relaxed-math")
220 18 equemene
221 18 equemene
    SizeX,SizeY=sigma.shape
222 18 equemene
223 18 equemene
    if ParaStyle=='Blocks':
224 18 equemene
        # Call OpenCL kernel
225 18 equemene
        # (1,) is Global work size (only 1 work size)
226 18 equemene
        # (1,) is local work size
227 18 equemene
        CLLaunch=MetropolisCL.MainLoopOne(queue,(1,),None,
228 18 equemene
                                          sigmaCL,
229 18 equemene
                                          numpy.float32(T),
230 18 equemene
                                          numpy.float32(J),
231 18 equemene
                                          numpy.float32(B),
232 18 equemene
                                          numpy.uint32(SizeX),
233 18 equemene
                                          numpy.uint32(SizeY),
234 18 equemene
                                          numpy.uint32(iterations),
235 18 equemene
                                          numpy.uint32(nprnd(2**31-1)),
236 18 equemene
                                          numpy.uint32(nprnd(2**31-1)))
237 18 equemene
        print "%s with %i %s done" % (Alu,1,ParaStyle)
238 18 equemene
    else:
239 18 equemene
        # en OpenCL, necessaire de mettre un Global_id identique au local_id
240 18 equemene
        CLLaunch=MetropolisCL.MainLoopOne(queue,(1,),(1,),
241 18 equemene
                                          sigmaCL,
242 18 equemene
                                          numpy.float32(T),
243 18 equemene
                                          numpy.float32(J),
244 18 equemene
                                          numpy.float32(B),
245 18 equemene
                                          numpy.uint32(SizeX),
246 18 equemene
                                          numpy.uint32(SizeY),
247 18 equemene
                                          numpy.uint32(iterations),
248 18 equemene
                                          numpy.uint32(nprnd(2**31-1)),
249 18 equemene
                                          numpy.uint32(nprnd(2**31-1)))
250 18 equemene
        print "%s with %i %s done" % (Alu,1,ParaStyle)
251 18 equemene
252 18 equemene
    CLLaunch.wait()
253 18 equemene
    cl.enqueue_copy(queue, sigma, sigmaCL).wait()
254 18 equemene
    elapsed = 1e-9*(CLLaunch.profile.end - CLLaunch.profile.start)
255 18 equemene
    sigmaCL.release()
256 18 equemene
257 18 equemene
    return(elapsed)
258 18 equemene
259 18 equemene
def Magnetization(sigma,M):
260 18 equemene
    return(numpy.sum(sigma)/(sigma.shape[0]*sigma.shape[1]*1.0))
261 18 equemene
262 18 equemene
def Energy(sigma,J):
263 18 equemene
    # Copier et caster
264 18 equemene
    E=numpy.copy(sigma).astype(numpy.float32)
265 18 equemene
266 18 equemene
    # Appel par slice
267 18 equemene
    E[1:-1,1:-1]=-J*E[1:-1,1:-1]*(E[:-2,1:-1]+E[2:,1:-1]+
268 18 equemene
                                  E[1:-1,:-2]+E[1:-1,2:])
269 18 equemene
270 18 equemene
    # Bien nettoyer la peripherie
271 18 equemene
    E[:,0]=0
272 18 equemene
    E[:,-1]=0
273 18 equemene
    E[0,:]=0
274 18 equemene
    E[-1,:]=0
275 18 equemene
276 18 equemene
    Energy=numpy.sum(E)
277 18 equemene
278 18 equemene
    return(Energy/(E.shape[0]*E.shape[1]*1.0))
279 18 equemene
280 18 equemene
def DisplayCurves(T,E,M,J,B):
281 18 equemene
282 18 equemene
    plt.xlabel("Temperature")
283 18 equemene
    plt.ylabel("Energy")
284 18 equemene
285 18 equemene
    Experience,=plt.plot(T,E,label="Energy")
286 18 equemene
287 18 equemene
    plt.legend()
288 18 equemene
    plt.show()
289 18 equemene
290 18 equemene
291 18 equemene
if __name__=='__main__':
292 18 equemene
293 18 equemene
    # Set defaults values
294 18 equemene
    # Alu can be CPU or GPU
295 18 equemene
    Alu='CPU'
296 18 equemene
    # Id of GPU : 0 will use the first find !
297 18 equemene
    Device=0
298 18 equemene
    # GPU style can be Cuda (Nvidia implementation) or OpenCL
299 18 equemene
    GpuStyle='OpenCL'
300 18 equemene
    # Parallel distribution can be on Threads or Blocks
301 18 equemene
    ParaStyle='Blocks'
302 18 equemene
    # Coupling factor
303 18 equemene
    J=1.
304 18 equemene
    # Magnetic Field
305 18 equemene
    B=0.
306 18 equemene
    # Size of Lattice
307 18 equemene
    Size=256
308 18 equemene
    # Default Temperatures (start, end, step)
309 18 equemene
    Tmin=0.1
310 18 equemene
    Tmax=5
311 18 equemene
    Tstep=0.1
312 18 equemene
    # Default Number of Iterations
313 18 equemene
    Iterations=Size*Size
314 18 equemene
    # Curves is True to print the curves
315 18 equemene
    Curves=False
316 18 equemene
317 18 equemene
    try:
318 18 equemene
        opts, args = getopt.getopt(sys.argv[1:],"hcj:b:z:i:s:e:p:a:d:g:t:",["coupling=","magneticfield=","size=","iterations=","tempstart=","tempend=","tempstep=","alu=","gpustyle=","parastyle="])
319 18 equemene
    except getopt.GetoptError:
320 18 equemene
        print '%s -j <Coupling Factor> -b <Magnetic Field> -z <Size of Lattice> -i <Iterations> -s <Minimum Temperature> -e <Maximum Temperature> -p <steP Temperature> -c (Print Curves) -a <CPU/GPU> -d <DeviceId> -g <CUDA/OpenCL> -p <Threads/Blocks> -t <ParaStyle>' % sys.argv[0]
321 18 equemene
        sys.exit(2)
322 18 equemene
323 18 equemene
324 18 equemene
    for opt, arg in opts:
325 18 equemene
        if opt == '-h':
326 18 equemene
            print '%s -j <Coupling Factor> -b <Magnetic Field> -z <Size of Lattice> -i <Iterations> -s <Minimum Temperature> -e <Maximum Temperature> -p <steP Temperature> -c (Print Curves) -a <CPU/GPU> -d <DeviceId> -g <CUDA/OpenCL> -p <Threads/Blocks> -t <ParaStyle>' % sys.argv[0]
327 18 equemene
            sys.exit()
328 18 equemene
        elif opt == '-c':
329 18 equemene
            Curves=True
330 18 equemene
        elif opt in ("-j", "--coupling"):
331 18 equemene
            J = float(arg)
332 18 equemene
        elif opt in ("-b", "--magneticfield"):
333 18 equemene
            B = float(arg)
334 18 equemene
        elif opt in ("-s", "--tempmin"):
335 18 equemene
            Tmin = float(arg)
336 18 equemene
        elif opt in ("-e", "--tempmax"):
337 18 equemene
            Tmax = float(arg)
338 18 equemene
        elif opt in ("-p", "--tempstep"):
339 18 equemene
            Tstep = float(arg)
340 18 equemene
        elif opt in ("-i", "--iterations"):
341 18 equemene
            Iterations = int(arg)
342 18 equemene
        elif opt in ("-z", "--size"):
343 18 equemene
            Size = int(arg)
344 18 equemene
        elif opt in ("-a", "--alu"):
345 18 equemene
            Alu = arg
346 18 equemene
        elif opt in ("-d", "--device"):
347 18 equemene
            Device = int(arg)
348 18 equemene
        elif opt in ("-g", "--gpustyle"):
349 18 equemene
            GpuStyle = arg
350 18 equemene
        elif opt in ("-t", "--parastyle"):
351 18 equemene
            ParaStyle = arg
352 18 equemene
353 18 equemene
    if Alu=='CPU' and GpuStyle=='CUDA':
354 18 equemene
        print "Alu can't be CPU for CUDA, set Alu to GPU"
355 18 equemene
        Alu='GPU'
356 18 equemene
357 18 equemene
    if ParaStyle not in ('Blocks','Threads','Hybrid'):
358 18 equemene
        print "%s not exists, ParaStyle set as Threads !" % ParaStyle
359 18 equemene
        ParaStyle='Blocks'
360 18 equemene
361 18 equemene
    print "Compute unit : %s" % Alu
362 18 equemene
    print "Device Identification : %s" % Device
363 18 equemene
    print "GpuStyle used : %s" % GpuStyle
364 18 equemene
    print "Parallel Style used : %s" % ParaStyle
365 18 equemene
    print "Coupling Factor : %s" % J
366 18 equemene
    print "Magnetic Field :  %s" % B
367 18 equemene
    print "Size of lattice : %s" % Size
368 18 equemene
    print "Iterations : %s" % Iterations
369 18 equemene
    print "Temperature on start : %s" % Tmin
370 18 equemene
    print "Temperature on end : %s" % Tmax
371 18 equemene
    print "Temperature step : %s" % Tstep
372 18 equemene
373 18 equemene
    if GpuStyle=='CUDA':
374 18 equemene
        # For PyCUDA import
375 18 equemene
        import pycuda.driver as cuda
376 18 equemene
        import pycuda.gpuarray as gpuarray
377 18 equemene
        import pycuda.autoinit
378 18 equemene
        from pycuda.compiler import SourceModule
379 18 equemene
380 18 equemene
    if GpuStyle=='OpenCL':
381 18 equemene
        # For PyOpenCL import
382 18 equemene
        import pyopencl as cl
383 18 equemene
        Id=1
384 18 equemene
        for platform in cl.get_platforms():
385 18 equemene
            for device in platform.get_devices():
386 18 equemene
                deviceType=cl.device_type.to_string(device.type)
387 18 equemene
                print "Device #%i of type %s : %s" % (Id,deviceType,device.name)
388 18 equemene
                Id=Id+1
389 18 equemene
390 18 equemene
    LAPIMAGE=False
391 18 equemene
392 18 equemene
    sigmaIn=numpy.where(numpy.random.randn(Size,Size)>0,1,-1).astype(numpy.int8)
393 18 equemene
394 18 equemene
    ImageOutput(sigmaIn,"Ising2D_Serial_%i_Initial" % (Size))
395 18 equemene
396 18 equemene
    Trange=numpy.arange(Tmin,Tmax+Tstep,Tstep)
397 18 equemene
398 18 equemene
    E=[]
399 18 equemene
    M=[]
400 18 equemene
401 18 equemene
    for T in Trange:
402 18 equemene
        sigma=numpy.copy(sigmaIn)
403 18 equemene
        if GpuStyle=='CUDA':
404 18 equemene
            duration=MetropolisCuda(sigma,T,J,B,Iterations,ParaStyle,Alu,Device)
405 18 equemene
        else:
406 18 equemene
            duration=MetropolisOpenCL(sigma,T,J,B,Iterations,ParaStyle,Alu,Device)
407 18 equemene
408 18 equemene
        E=numpy.append(E,Energy(sigma,J))
409 18 equemene
        M=numpy.append(M,Magnetization(sigma,B))
410 18 equemene
        ImageOutput(sigma,"Ising2D_Serial_%i_%1.1f_Final" % (Size,T))
411 18 equemene
412 18 equemene
        print "CPU Time : %f" % (duration)
413 18 equemene
        print "Total Energy at Temperature %f : %f" % (T,E[-1])
414 18 equemene
        print "Total Magnetization at Temperature %f : %f" % (T,M[-1])
415 18 equemene
416 18 equemene
    if Curves:
417 18 equemene
        DisplayCurves(Trange,E,M,J,B)
418 18 equemene
419 18 equemene
    # Save output
420 18 equemene
    numpy.savez("Ising2D_Serial_%i_%.8i" % (Size,Iterations),(Trange,E,M))