Statistiques
| Révision :

root / Ising / GPU / Ising2D-GPU-One.py @ 308

Historique | Voir | Annoter | Télécharger (12,8 ko)

1 18 equemene
#!/usr/bin/env python
2 18 equemene
#
3 18 equemene
# Ising2D model in serial mode
4 18 equemene
#
5 18 equemene
# CC BY-NC-SA 2011 : <emmanuel.quemener@ens-lyon.fr>
6 18 equemene
7 18 equemene
import sys
8 18 equemene
import numpy
9 18 equemene
from PIL import Image
10 18 equemene
from math import exp
11 18 equemene
from random import random
12 18 equemene
import time
13 18 equemene
import getopt
14 18 equemene
import matplotlib.pyplot as plt
15 18 equemene
from numpy.random import randint as nprnd
16 18 equemene
17 18 equemene
KERNEL_CODE_OPENCL="""
18 18 equemene

19 18 equemene
// Marsaglia RNG very simple implementation
20 18 equemene
#define znew  ((z=36969*(z&65535)+(z>>16))<<16)
21 18 equemene
#define wnew  ((w=18000*(w&65535)+(w>>16))&65535)
22 18 equemene
#define MWC   (znew+wnew)
23 18 equemene
#define SHR3  (jsr=(jsr=(jsr=jsr^(jsr<<17))^(jsr>>13))^(jsr<<5))
24 18 equemene
#define CONG  (jcong=69069*jcong+1234567)
25 18 equemene
#define KISS  ((MWC^CONG)+SHR3)
26 18 equemene

27 18 equemene
#define MWCfp MWC * 2.328306435454494e-10f
28 18 equemene
#define KISSfp KISS * 2.328306435454494e-10f
29 18 equemene

30 18 equemene
__kernel void MainLoopOne(__global char *s,float T,float J,float B,
31 18 equemene
                          uint sizex,uint sizey,
32 18 equemene
                          uint iterations,uint seed_w,uint seed_z)
33 18 equemene

34 18 equemene
{
35 18 equemene
   uint z=seed_z;
36 18 equemene
   uint w=seed_w;
37 18 equemene

38 18 equemene
   for (uint i=0;i<iterations;i++) {
39 18 equemene

40 18 equemene
      uint x=(uint)(MWC%sizex) ;
41 18 equemene
      uint y=(uint)(MWC%sizey) ;
42 18 equemene

43 18 equemene
      int p=s[x+sizex*y];
44 18 equemene

45 18 equemene
      int d=s[x+sizex*((y+1)%sizey)];
46 18 equemene
      int u=s[x+sizex*((y-1)%sizey)];
47 18 equemene
      int l=s[((x-1)%sizex)+sizex*y];
48 18 equemene
      int r=s[((x+1)%sizex)+sizex*y];
49 18 equemene

50 18 equemene
      float DeltaE=2.0f*p*(J*(u+d+l+r)+B);
51 18 equemene

52 18 equemene
      int factor=((DeltaE < 0.0f) || (MWCfp < exp(-DeltaE/T))) ? -1:1;
53 18 equemene
      s[x%sizex+sizex*(y%sizey)] = (char)factor*p;
54 18 equemene
   }
55 18 equemene
   barrier(CLK_GLOBAL_MEM_FENCE);
56 18 equemene

57 18 equemene
}
58 18 equemene
"""
59 18 equemene
60 18 equemene
KERNEL_CODE_CUDA="""
61 18 equemene

62 18 equemene
// Marsaglia RNG very simple implementation
63 18 equemene
#define znew  ((z=36969*(z&65535)+(z>>16))<<16)
64 18 equemene
#define wnew  ((w=18000*(w&65535)+(w>>16))&65535)
65 18 equemene
#define MWC   (znew+wnew)
66 18 equemene
#define SHR3  (jsr=(jsr=(jsr=jsr^(jsr<<17))^(jsr>>13))^(jsr<<5))
67 18 equemene
#define CONG  (jcong=69069*jcong+1234567)
68 18 equemene
#define KISS  ((MWC^CONG)+SHR3)
69 18 equemene

70 18 equemene
#define MWCfp MWC * 2.328306435454494e-10f
71 18 equemene
#define KISSfp KISS * 2.328306435454494e-10f
72 18 equemene

73 18 equemene
__global__ void MainLoopOne(char *s,float T,float J,float B,
74 18 equemene
                            uint sizex,uint sizey,
75 18 equemene
                            uint iterations,uint seed_w,uint seed_z)
76 18 equemene

77 18 equemene
{
78 18 equemene
   uint z=seed_z;
79 18 equemene
   uint w=seed_w;
80 18 equemene

81 18 equemene
   for (uint i=0;i<iterations;i++) {
82 18 equemene

83 18 equemene
      uint x=(uint)(MWC%sizex) ;
84 18 equemene
      uint y=(uint)(MWC%sizey) ;
85 18 equemene

86 18 equemene
      int p=s[x+sizex*y];
87 18 equemene

88 18 equemene
      int d=s[x+sizex*((y+1)%sizey)];
89 18 equemene
      int u=s[x+sizex*((y-1)%sizey)];
90 18 equemene
      int l=s[((x-1)%sizex)+sizex*y];
91 18 equemene
      int r=s[((x+1)%sizex)+sizex*y];
92 18 equemene

93 18 equemene
      float DeltaE=2.0f*p*(J*(u+d+l+r)+B);
94 18 equemene

95 18 equemene
      int factor=((DeltaE < 0.0f) || (MWCfp < exp(-DeltaE/T))) ? -1:1;
96 18 equemene
      s[x%sizex+sizex*(y%sizey)] = (char)factor*p;
97 18 equemene
   }
98 18 equemene
   __syncthreads();
99 18 equemene

100 18 equemene
}
101 18 equemene
"""
102 18 equemene
103 18 equemene
def ImageOutput(sigma,prefix):
104 18 equemene
    Max=sigma.max()
105 18 equemene
    Min=sigma.min()
106 18 equemene
107 18 equemene
    # Normalize value as 8bits Integer
108 18 equemene
    SigmaInt=(255*(sigma-Min)/(Max-Min)).astype('uint8')
109 18 equemene
    image = Image.fromarray(SigmaInt)
110 18 equemene
    image.save("%s.jpg" % prefix)
111 18 equemene
112 18 equemene
def Metropolis(sigma,T,J,B,iterations):
113 18 equemene
    start=time.time()
114 18 equemene
115 18 equemene
    SizeX,SizeY=sigma.shape
116 18 equemene
117 18 equemene
    for p in xrange(0,iterations):
118 18 equemene
        # Random access coordonate
119 18 equemene
        X,Y=numpy.random.randint(SizeX),numpy.random.randint(SizeY)
120 18 equemene
121 18 equemene
        DeltaE=J*sigma[X,Y]*(2*(sigma[X,(Y+1)%SizeY]+
122 18 equemene
                                sigma[X,(Y-1)%SizeY]+
123 18 equemene
                                sigma[(X-1)%SizeX,Y]+
124 18 equemene
                                sigma[(X+1)%SizeX,Y])+B)
125 18 equemene
126 18 equemene
        if DeltaE < 0. or random() < exp(-DeltaE/T):
127 18 equemene
            sigma[X,Y]=-sigma[X,Y]
128 18 equemene
    duration=time.time()-start
129 18 equemene
    return(duration)
130 18 equemene
131 18 equemene
def MetropolisCuda(sigma,T,J,B,iterations,ParaStyle,Alu,Device):
132 18 equemene
133 18 equemene
    # Avec PyCUDA autoinit, rien a faire !
134 18 equemene
135 18 equemene
    sigmaCU=cuda.InOut(sigma)
136 18 equemene
137 18 equemene
    mod = SourceModule(KERNEL_CODE_CUDA)
138 18 equemene
139 18 equemene
    MetropolisCU=mod.get_function("MainLoopOne")
140 18 equemene
141 18 equemene
    start = pycuda.driver.Event()
142 18 equemene
    stop = pycuda.driver.Event()
143 18 equemene
144 18 equemene
    SizeX,SizeY=sigma.shape
145 18 equemene
146 18 equemene
    start.record()
147 18 equemene
    start.synchronize()
148 18 equemene
    MetropolisCU(sigmaCU,
149 18 equemene
                 numpy.float32(T),
150 18 equemene
                 numpy.float32(J),
151 18 equemene
                 numpy.float32(B),
152 18 equemene
                 numpy.uint32(SizeX),
153 18 equemene
                 numpy.uint32(SizeY),
154 18 equemene
                 numpy.uint32(iterations),
155 18 equemene
                 numpy.uint32(nprnd(2**31-1)),
156 18 equemene
                 numpy.uint32(nprnd(2**31-1)),
157 18 equemene
                 grid=(1,1),
158 18 equemene
                 block=(1,1,1))
159 18 equemene
160 18 equemene
    print "%s with %i %s done" % (Alu,1,ParaStyle)
161 18 equemene
162 18 equemene
    stop.record()
163 18 equemene
    stop.synchronize()
164 18 equemene
165 18 equemene
    #elapsed = stop.time_since(start)*1e-3
166 18 equemene
    elapsed = start.time_till(stop)*1e-3
167 18 equemene
168 18 equemene
    return(elapsed)
169 18 equemene
170 18 equemene
171 18 equemene
def MetropolisOpenCL(sigma,T,J,B,iterations,ParaStyle,Alu,Device):
172 18 equemene
173 18 equemene
    # Initialisation des variables en les CASTant correctement
174 18 equemene
175 18 equemene
    # Je detecte un peripherique GPU dans la liste des peripheriques
176 18 equemene
    # for platform in cl.get_platforms():
177 18 equemene
    #     for device in platform.get_devices():
178 18 equemene
    #             if cl.device_type.to_string(device.type)=='GPU':
179 18 equemene
    #                     GPU=device
180 18 equemene
    #print "GPU detected: ",device.name
181 18 equemene
182 18 equemene
    HasGPU=False
183 18 equemene
    Id=1
184 18 equemene
    # Primary Device selection based on Device Id
185 18 equemene
    for platform in cl.get_platforms():
186 18 equemene
        for device in platform.get_devices():
187 144 equemene
            #deviceType=cl.device_type.to_string(device.type)
188 144 equemene
            deviceType="xPU"
189 18 equemene
            if Id==Device and not HasGPU:
190 18 equemene
                GPU=device
191 18 equemene
                print "CPU/GPU selected: ",device.name
192 18 equemene
                HasGPU=True
193 18 equemene
            Id=Id+1
194 18 equemene
195 18 equemene
    # Je cree le contexte et la queue pour son execution
196 18 equemene
    # ctx = cl.create_some_context()
197 18 equemene
    ctx = cl.Context([GPU])
198 18 equemene
    queue = cl.CommandQueue(ctx,
199 18 equemene
                            properties=cl.command_queue_properties.PROFILING_ENABLE)
200 18 equemene
201 18 equemene
    # Je recupere les flag possibles pour les buffers
202 18 equemene
    mf = cl.mem_flags
203 18 equemene
204 18 equemene
    # Attention au CAST ! C'est un int8 soit un char en OpenCL !
205 18 equemene
    sigmaCL = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=sigma)
206 18 equemene
207 18 equemene
    MetropolisCL = cl.Program(ctx,KERNEL_CODE_OPENCL).build( \
208 18 equemene
        options = "-cl-mad-enable -cl-fast-relaxed-math")
209 18 equemene
210 18 equemene
    SizeX,SizeY=sigma.shape
211 18 equemene
212 18 equemene
    if ParaStyle=='Blocks':
213 18 equemene
        # Call OpenCL kernel
214 18 equemene
        # (1,) is Global work size (only 1 work size)
215 18 equemene
        # (1,) is local work size
216 18 equemene
        CLLaunch=MetropolisCL.MainLoopOne(queue,(1,),None,
217 18 equemene
                                          sigmaCL,
218 18 equemene
                                          numpy.float32(T),
219 18 equemene
                                          numpy.float32(J),
220 18 equemene
                                          numpy.float32(B),
221 18 equemene
                                          numpy.uint32(SizeX),
222 18 equemene
                                          numpy.uint32(SizeY),
223 18 equemene
                                          numpy.uint32(iterations),
224 18 equemene
                                          numpy.uint32(nprnd(2**31-1)),
225 18 equemene
                                          numpy.uint32(nprnd(2**31-1)))
226 18 equemene
        print "%s with %i %s done" % (Alu,1,ParaStyle)
227 18 equemene
    else:
228 18 equemene
        # en OpenCL, necessaire de mettre un Global_id identique au local_id
229 18 equemene
        CLLaunch=MetropolisCL.MainLoopOne(queue,(1,),(1,),
230 18 equemene
                                          sigmaCL,
231 18 equemene
                                          numpy.float32(T),
232 18 equemene
                                          numpy.float32(J),
233 18 equemene
                                          numpy.float32(B),
234 18 equemene
                                          numpy.uint32(SizeX),
235 18 equemene
                                          numpy.uint32(SizeY),
236 18 equemene
                                          numpy.uint32(iterations),
237 18 equemene
                                          numpy.uint32(nprnd(2**31-1)),
238 18 equemene
                                          numpy.uint32(nprnd(2**31-1)))
239 18 equemene
        print "%s with %i %s done" % (Alu,1,ParaStyle)
240 18 equemene
241 18 equemene
    CLLaunch.wait()
242 18 equemene
    cl.enqueue_copy(queue, sigma, sigmaCL).wait()
243 18 equemene
    elapsed = 1e-9*(CLLaunch.profile.end - CLLaunch.profile.start)
244 18 equemene
    sigmaCL.release()
245 18 equemene
246 18 equemene
    return(elapsed)
247 18 equemene
248 18 equemene
def Magnetization(sigma,M):
249 18 equemene
    return(numpy.sum(sigma)/(sigma.shape[0]*sigma.shape[1]*1.0))
250 18 equemene
251 18 equemene
def Energy(sigma,J):
252 18 equemene
    # Copier et caster
253 18 equemene
    E=numpy.copy(sigma).astype(numpy.float32)
254 18 equemene
255 18 equemene
    # Appel par slice
256 18 equemene
    E[1:-1,1:-1]=-J*E[1:-1,1:-1]*(E[:-2,1:-1]+E[2:,1:-1]+
257 18 equemene
                                  E[1:-1,:-2]+E[1:-1,2:])
258 18 equemene
259 18 equemene
    # Bien nettoyer la peripherie
260 18 equemene
    E[:,0]=0
261 18 equemene
    E[:,-1]=0
262 18 equemene
    E[0,:]=0
263 18 equemene
    E[-1,:]=0
264 18 equemene
265 18 equemene
    Energy=numpy.sum(E)
266 18 equemene
267 18 equemene
    return(Energy/(E.shape[0]*E.shape[1]*1.0))
268 18 equemene
269 18 equemene
def DisplayCurves(T,E,M,J,B):
270 18 equemene
271 18 equemene
    plt.xlabel("Temperature")
272 18 equemene
    plt.ylabel("Energy")
273 18 equemene
274 18 equemene
    Experience,=plt.plot(T,E,label="Energy")
275 18 equemene
276 18 equemene
    plt.legend()
277 18 equemene
    plt.show()
278 18 equemene
279 18 equemene
280 18 equemene
if __name__=='__main__':
281 18 equemene
282 18 equemene
    # Set defaults values
283 18 equemene
    # Alu can be CPU or GPU
284 18 equemene
    Alu='CPU'
285 18 equemene
    # Id of GPU : 0 will use the first find !
286 18 equemene
    Device=0
287 18 equemene
    # GPU style can be Cuda (Nvidia implementation) or OpenCL
288 18 equemene
    GpuStyle='OpenCL'
289 18 equemene
    # Parallel distribution can be on Threads or Blocks
290 18 equemene
    ParaStyle='Blocks'
291 18 equemene
    # Coupling factor
292 18 equemene
    J=1.
293 18 equemene
    # Magnetic Field
294 18 equemene
    B=0.
295 18 equemene
    # Size of Lattice
296 18 equemene
    Size=256
297 18 equemene
    # Default Temperatures (start, end, step)
298 18 equemene
    Tmin=0.1
299 18 equemene
    Tmax=5
300 18 equemene
    Tstep=0.1
301 18 equemene
    # Default Number of Iterations
302 18 equemene
    Iterations=Size*Size
303 18 equemene
    # Curves is True to print the curves
304 18 equemene
    Curves=False
305 18 equemene
306 18 equemene
    try:
307 18 equemene
        opts, args = getopt.getopt(sys.argv[1:],"hcj:b:z:i:s:e:p:a:d:g:t:",["coupling=","magneticfield=","size=","iterations=","tempstart=","tempend=","tempstep=","alu=","gpustyle=","parastyle="])
308 18 equemene
    except getopt.GetoptError:
309 18 equemene
        print '%s -j <Coupling Factor> -b <Magnetic Field> -z <Size of Lattice> -i <Iterations> -s <Minimum Temperature> -e <Maximum Temperature> -p <steP Temperature> -c (Print Curves) -a <CPU/GPU> -d <DeviceId> -g <CUDA/OpenCL> -p <Threads/Blocks> -t <ParaStyle>' % sys.argv[0]
310 18 equemene
        sys.exit(2)
311 18 equemene
312 18 equemene
313 18 equemene
    for opt, arg in opts:
314 18 equemene
        if opt == '-h':
315 18 equemene
            print '%s -j <Coupling Factor> -b <Magnetic Field> -z <Size of Lattice> -i <Iterations> -s <Minimum Temperature> -e <Maximum Temperature> -p <steP Temperature> -c (Print Curves) -a <CPU/GPU> -d <DeviceId> -g <CUDA/OpenCL> -p <Threads/Blocks> -t <ParaStyle>' % sys.argv[0]
316 18 equemene
            sys.exit()
317 18 equemene
        elif opt == '-c':
318 18 equemene
            Curves=True
319 18 equemene
        elif opt in ("-j", "--coupling"):
320 18 equemene
            J = float(arg)
321 18 equemene
        elif opt in ("-b", "--magneticfield"):
322 18 equemene
            B = float(arg)
323 18 equemene
        elif opt in ("-s", "--tempmin"):
324 18 equemene
            Tmin = float(arg)
325 18 equemene
        elif opt in ("-e", "--tempmax"):
326 18 equemene
            Tmax = float(arg)
327 18 equemene
        elif opt in ("-p", "--tempstep"):
328 18 equemene
            Tstep = float(arg)
329 18 equemene
        elif opt in ("-i", "--iterations"):
330 18 equemene
            Iterations = int(arg)
331 18 equemene
        elif opt in ("-z", "--size"):
332 18 equemene
            Size = int(arg)
333 18 equemene
        elif opt in ("-a", "--alu"):
334 18 equemene
            Alu = arg
335 18 equemene
        elif opt in ("-d", "--device"):
336 18 equemene
            Device = int(arg)
337 18 equemene
        elif opt in ("-g", "--gpustyle"):
338 18 equemene
            GpuStyle = arg
339 18 equemene
        elif opt in ("-t", "--parastyle"):
340 18 equemene
            ParaStyle = arg
341 18 equemene
342 18 equemene
    if Alu=='CPU' and GpuStyle=='CUDA':
343 18 equemene
        print "Alu can't be CPU for CUDA, set Alu to GPU"
344 18 equemene
        Alu='GPU'
345 18 equemene
346 18 equemene
    if ParaStyle not in ('Blocks','Threads','Hybrid'):
347 18 equemene
        print "%s not exists, ParaStyle set as Threads !" % ParaStyle
348 18 equemene
        ParaStyle='Blocks'
349 18 equemene
350 18 equemene
    print "Compute unit : %s" % Alu
351 18 equemene
    print "Device Identification : %s" % Device
352 18 equemene
    print "GpuStyle used : %s" % GpuStyle
353 18 equemene
    print "Parallel Style used : %s" % ParaStyle
354 18 equemene
    print "Coupling Factor : %s" % J
355 18 equemene
    print "Magnetic Field :  %s" % B
356 18 equemene
    print "Size of lattice : %s" % Size
357 18 equemene
    print "Iterations : %s" % Iterations
358 18 equemene
    print "Temperature on start : %s" % Tmin
359 18 equemene
    print "Temperature on end : %s" % Tmax
360 18 equemene
    print "Temperature step : %s" % Tstep
361 18 equemene
362 18 equemene
    if GpuStyle=='CUDA':
363 18 equemene
        # For PyCUDA import
364 18 equemene
        import pycuda.driver as cuda
365 18 equemene
        import pycuda.gpuarray as gpuarray
366 18 equemene
        import pycuda.autoinit
367 18 equemene
        from pycuda.compiler import SourceModule
368 18 equemene
369 18 equemene
    if GpuStyle=='OpenCL':
370 18 equemene
        # For PyOpenCL import
371 18 equemene
        import pyopencl as cl
372 18 equemene
        Id=1
373 18 equemene
        for platform in cl.get_platforms():
374 18 equemene
            for device in platform.get_devices():
375 144 equemene
                #deviceType=cl.device_type.to_string(device.type)
376 144 equemene
                deviceType="xPU"
377 18 equemene
                print "Device #%i of type %s : %s" % (Id,deviceType,device.name)
378 18 equemene
                Id=Id+1
379 18 equemene
380 18 equemene
    LAPIMAGE=False
381 18 equemene
382 18 equemene
    sigmaIn=numpy.where(numpy.random.randn(Size,Size)>0,1,-1).astype(numpy.int8)
383 18 equemene
384 18 equemene
    ImageOutput(sigmaIn,"Ising2D_Serial_%i_Initial" % (Size))
385 18 equemene
386 18 equemene
    Trange=numpy.arange(Tmin,Tmax+Tstep,Tstep)
387 18 equemene
388 18 equemene
    E=[]
389 18 equemene
    M=[]
390 18 equemene
391 18 equemene
    for T in Trange:
392 18 equemene
        sigma=numpy.copy(sigmaIn)
393 18 equemene
        if GpuStyle=='CUDA':
394 18 equemene
            duration=MetropolisCuda(sigma,T,J,B,Iterations,ParaStyle,Alu,Device)
395 18 equemene
        else:
396 18 equemene
            duration=MetropolisOpenCL(sigma,T,J,B,Iterations,ParaStyle,Alu,Device)
397 18 equemene
398 18 equemene
        E=numpy.append(E,Energy(sigma,J))
399 18 equemene
        M=numpy.append(M,Magnetization(sigma,B))
400 18 equemene
        ImageOutput(sigma,"Ising2D_Serial_%i_%1.1f_Final" % (Size,T))
401 18 equemene
402 18 equemene
        print "CPU Time : %f" % (duration)
403 18 equemene
        print "Total Energy at Temperature %f : %f" % (T,E[-1])
404 18 equemene
        print "Total Magnetization at Temperature %f : %f" % (T,M[-1])
405 18 equemene
406 18 equemene
    if Curves:
407 18 equemene
        DisplayCurves(Trange,E,M,J,B)
408 18 equemene
409 18 equemene
    # Save output
410 18 equemene
    numpy.savez("Ising2D_Serial_%i_%.8i" % (Size,Iterations),(Trange,E,M))