root / Ising / GPU / Ising2D-GPU-One.py @ 94
Historique | Voir | Annoter | Télécharger (13,27 ko)
1 |
#!/usr/bin/env python
|
---|---|
2 |
#
|
3 |
# Ising2D model in serial mode
|
4 |
#
|
5 |
# CC BY-NC-SA 2011 : <emmanuel.quemener@ens-lyon.fr>
|
6 |
|
7 |
import sys |
8 |
import numpy |
9 |
from PIL import Image |
10 |
from math import exp |
11 |
from random import random |
12 |
import time |
13 |
import getopt |
14 |
import matplotlib.pyplot as plt |
15 |
from numpy.random import randint as nprnd |
16 |
|
17 |
KERNEL_CODE_OPENCL="""
|
18 |
|
19 |
// Marsaglia RNG very simple implementation
|
20 |
#define znew ((z=36969*(z&65535)+(z>>16))<<16)
|
21 |
#define wnew ((w=18000*(w&65535)+(w>>16))&65535)
|
22 |
#define MWC (znew+wnew)
|
23 |
#define SHR3 (jsr=(jsr=(jsr=jsr^(jsr<<17))^(jsr>>13))^(jsr<<5))
|
24 |
#define CONG (jcong=69069*jcong+1234567)
|
25 |
#define KISS ((MWC^CONG)+SHR3)
|
26 |
|
27 |
#define MWCfp MWC * 2.328306435454494e-10f
|
28 |
#define KISSfp KISS * 2.328306435454494e-10f
|
29 |
|
30 |
__kernel void MainLoopOne(__global char *s,float T,float J,float B,
|
31 |
uint sizex,uint sizey,
|
32 |
uint iterations,uint seed_w,uint seed_z)
|
33 |
|
34 |
{
|
35 |
uint z=seed_z;
|
36 |
uint w=seed_w;
|
37 |
|
38 |
for (uint i=0;i<iterations;i++) {
|
39 |
|
40 |
uint x=(uint)(MWC%sizex) ;
|
41 |
uint y=(uint)(MWC%sizey) ;
|
42 |
|
43 |
int p=s[x+sizex*y];
|
44 |
|
45 |
int d=s[x+sizex*((y+1)%sizey)];
|
46 |
int u=s[x+sizex*((y-1)%sizey)];
|
47 |
int l=s[((x-1)%sizex)+sizex*y];
|
48 |
int r=s[((x+1)%sizex)+sizex*y];
|
49 |
|
50 |
float DeltaE=2.0f*p*(J*(u+d+l+r)+B);
|
51 |
|
52 |
int factor=((DeltaE < 0.0f) || (MWCfp < exp(-DeltaE/T))) ? -1:1;
|
53 |
s[x%sizex+sizex*(y%sizey)] = (char)factor*p;
|
54 |
}
|
55 |
barrier(CLK_GLOBAL_MEM_FENCE);
|
56 |
|
57 |
}
|
58 |
"""
|
59 |
|
60 |
KERNEL_CODE_CUDA="""
|
61 |
|
62 |
// Marsaglia RNG very simple implementation
|
63 |
#define znew ((z=36969*(z&65535)+(z>>16))<<16)
|
64 |
#define wnew ((w=18000*(w&65535)+(w>>16))&65535)
|
65 |
#define MWC (znew+wnew)
|
66 |
#define SHR3 (jsr=(jsr=(jsr=jsr^(jsr<<17))^(jsr>>13))^(jsr<<5))
|
67 |
#define CONG (jcong=69069*jcong+1234567)
|
68 |
#define KISS ((MWC^CONG)+SHR3)
|
69 |
|
70 |
#define MWCfp MWC * 2.328306435454494e-10f
|
71 |
#define KISSfp KISS * 2.328306435454494e-10f
|
72 |
|
73 |
__global__ void MainLoopOne(char *s,float T,float J,float B,
|
74 |
uint sizex,uint sizey,
|
75 |
uint iterations,uint seed_w,uint seed_z)
|
76 |
|
77 |
{
|
78 |
uint z=seed_z;
|
79 |
uint w=seed_w;
|
80 |
|
81 |
for (uint i=0;i<iterations;i++) {
|
82 |
|
83 |
uint x=(uint)(MWC%sizex) ;
|
84 |
uint y=(uint)(MWC%sizey) ;
|
85 |
|
86 |
int p=s[x+sizex*y];
|
87 |
|
88 |
int d=s[x+sizex*((y+1)%sizey)];
|
89 |
int u=s[x+sizex*((y-1)%sizey)];
|
90 |
int l=s[((x-1)%sizex)+sizex*y];
|
91 |
int r=s[((x+1)%sizex)+sizex*y];
|
92 |
|
93 |
float DeltaE=2.0f*p*(J*(u+d+l+r)+B);
|
94 |
|
95 |
int factor=((DeltaE < 0.0f) || (MWCfp < exp(-DeltaE/T))) ? -1:1;
|
96 |
s[x%sizex+sizex*(y%sizey)] = (char)factor*p;
|
97 |
}
|
98 |
__syncthreads();
|
99 |
|
100 |
}
|
101 |
"""
|
102 |
|
103 |
def ImageOutput(sigma,prefix): |
104 |
Max=sigma.max() |
105 |
Min=sigma.min() |
106 |
|
107 |
# Normalize value as 8bits Integer
|
108 |
SigmaInt=(255*(sigma-Min)/(Max-Min)).astype('uint8') |
109 |
image = Image.fromarray(SigmaInt) |
110 |
image.save("%s.jpg" % prefix)
|
111 |
|
112 |
def Metropolis(sigma,T,J,B,iterations): |
113 |
start=time.time() |
114 |
|
115 |
SizeX,SizeY=sigma.shape |
116 |
|
117 |
for p in xrange(0,iterations): |
118 |
# Random access coordonate
|
119 |
X,Y=numpy.random.randint(SizeX),numpy.random.randint(SizeY) |
120 |
|
121 |
DeltaE=J*sigma[X,Y]*(2*(sigma[X,(Y+1)%SizeY]+ |
122 |
sigma[X,(Y-1)%SizeY]+
|
123 |
sigma[(X-1)%SizeX,Y]+
|
124 |
sigma[(X+1)%SizeX,Y])+B)
|
125 |
|
126 |
if DeltaE < 0. or random() < exp(-DeltaE/T): |
127 |
sigma[X,Y]=-sigma[X,Y] |
128 |
duration=time.time()-start |
129 |
return(duration)
|
130 |
|
131 |
def MetropolisCuda(sigma,T,J,B,iterations,ParaStyle,Alu,Device): |
132 |
|
133 |
# Avec PyCUDA autoinit, rien a faire !
|
134 |
|
135 |
sigmaCU=cuda.InOut(sigma) |
136 |
|
137 |
mod = SourceModule(KERNEL_CODE_CUDA) |
138 |
|
139 |
MetropolisCU=mod.get_function("MainLoopOne")
|
140 |
|
141 |
start = pycuda.driver.Event() |
142 |
stop = pycuda.driver.Event() |
143 |
|
144 |
SizeX,SizeY=sigma.shape |
145 |
|
146 |
start.record() |
147 |
start.synchronize() |
148 |
MetropolisCU(sigmaCU, |
149 |
numpy.float32(T), |
150 |
numpy.float32(J), |
151 |
numpy.float32(B), |
152 |
numpy.uint32(SizeX), |
153 |
numpy.uint32(SizeY), |
154 |
numpy.uint32(iterations), |
155 |
numpy.uint32(nprnd(2**31-1)), |
156 |
numpy.uint32(nprnd(2**31-1)), |
157 |
grid=(1,1), |
158 |
block=(1,1,1)) |
159 |
|
160 |
print "%s with %i %s done" % (Alu,1,ParaStyle) |
161 |
|
162 |
stop.record() |
163 |
stop.synchronize() |
164 |
|
165 |
#elapsed = stop.time_since(start)*1e-3
|
166 |
elapsed = start.time_till(stop)*1e-3
|
167 |
|
168 |
return(elapsed)
|
169 |
|
170 |
|
171 |
def MetropolisOpenCL(sigma,T,J,B,iterations,ParaStyle,Alu,Device): |
172 |
|
173 |
# Initialisation des variables en les CASTant correctement
|
174 |
|
175 |
# Je detecte un peripherique GPU dans la liste des peripheriques
|
176 |
# for platform in cl.get_platforms():
|
177 |
# for device in platform.get_devices():
|
178 |
# if cl.device_type.to_string(device.type)=='GPU':
|
179 |
# GPU=device
|
180 |
#print "GPU detected: ",device.name
|
181 |
|
182 |
HasGPU=False
|
183 |
Id=1
|
184 |
# Primary Device selection based on Device Id
|
185 |
for platform in cl.get_platforms(): |
186 |
for device in platform.get_devices(): |
187 |
deviceType=cl.device_type.to_string(device.type) |
188 |
if Id==Device and not HasGPU: |
189 |
GPU=device |
190 |
print "CPU/GPU selected: ",device.name |
191 |
HasGPU=True
|
192 |
Id=Id+1
|
193 |
# Default Device selection based on ALU Type
|
194 |
for platform in cl.get_platforms(): |
195 |
for device in platform.get_devices(): |
196 |
deviceType=cl.device_type.to_string(device.type) |
197 |
if deviceType=="GPU" and Alu=="GPU" and not HasGPU: |
198 |
GPU=device |
199 |
print "GPU selected: ",device.name |
200 |
HasGPU=True
|
201 |
if deviceType=="CPU" and Alu=="CPU" and not HasGPU: |
202 |
GPU=device |
203 |
print "CPU selected: ",device.name |
204 |
HasGPU=True
|
205 |
|
206 |
# Je cree le contexte et la queue pour son execution
|
207 |
# ctx = cl.create_some_context()
|
208 |
ctx = cl.Context([GPU]) |
209 |
queue = cl.CommandQueue(ctx, |
210 |
properties=cl.command_queue_properties.PROFILING_ENABLE) |
211 |
|
212 |
# Je recupere les flag possibles pour les buffers
|
213 |
mf = cl.mem_flags |
214 |
|
215 |
# Attention au CAST ! C'est un int8 soit un char en OpenCL !
|
216 |
sigmaCL = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=sigma) |
217 |
|
218 |
MetropolisCL = cl.Program(ctx,KERNEL_CODE_OPENCL).build( \ |
219 |
options = "-cl-mad-enable -cl-fast-relaxed-math")
|
220 |
|
221 |
SizeX,SizeY=sigma.shape |
222 |
|
223 |
if ParaStyle=='Blocks': |
224 |
# Call OpenCL kernel
|
225 |
# (1,) is Global work size (only 1 work size)
|
226 |
# (1,) is local work size
|
227 |
CLLaunch=MetropolisCL.MainLoopOne(queue,(1,),None, |
228 |
sigmaCL, |
229 |
numpy.float32(T), |
230 |
numpy.float32(J), |
231 |
numpy.float32(B), |
232 |
numpy.uint32(SizeX), |
233 |
numpy.uint32(SizeY), |
234 |
numpy.uint32(iterations), |
235 |
numpy.uint32(nprnd(2**31-1)), |
236 |
numpy.uint32(nprnd(2**31-1))) |
237 |
print "%s with %i %s done" % (Alu,1,ParaStyle) |
238 |
else:
|
239 |
# en OpenCL, necessaire de mettre un Global_id identique au local_id
|
240 |
CLLaunch=MetropolisCL.MainLoopOne(queue,(1,),(1,), |
241 |
sigmaCL, |
242 |
numpy.float32(T), |
243 |
numpy.float32(J), |
244 |
numpy.float32(B), |
245 |
numpy.uint32(SizeX), |
246 |
numpy.uint32(SizeY), |
247 |
numpy.uint32(iterations), |
248 |
numpy.uint32(nprnd(2**31-1)), |
249 |
numpy.uint32(nprnd(2**31-1))) |
250 |
print "%s with %i %s done" % (Alu,1,ParaStyle) |
251 |
|
252 |
CLLaunch.wait() |
253 |
cl.enqueue_copy(queue, sigma, sigmaCL).wait() |
254 |
elapsed = 1e-9*(CLLaunch.profile.end - CLLaunch.profile.start)
|
255 |
sigmaCL.release() |
256 |
|
257 |
return(elapsed)
|
258 |
|
259 |
def Magnetization(sigma,M): |
260 |
return(numpy.sum(sigma)/(sigma.shape[0]*sigma.shape[1]*1.0)) |
261 |
|
262 |
def Energy(sigma,J): |
263 |
# Copier et caster
|
264 |
E=numpy.copy(sigma).astype(numpy.float32) |
265 |
|
266 |
# Appel par slice
|
267 |
E[1:-1,1:-1]=-J*E[1:-1,1:-1]*(E[:-2,1:-1]+E[2:,1:-1]+ |
268 |
E[1:-1,:-2]+E[1:-1,2:]) |
269 |
|
270 |
# Bien nettoyer la peripherie
|
271 |
E[:,0]=0 |
272 |
E[:,-1]=0 |
273 |
E[0,:]=0 |
274 |
E[-1,:]=0 |
275 |
|
276 |
Energy=numpy.sum(E) |
277 |
|
278 |
return(Energy/(E.shape[0]*E.shape[1]*1.0)) |
279 |
|
280 |
def DisplayCurves(T,E,M,J,B): |
281 |
|
282 |
plt.xlabel("Temperature")
|
283 |
plt.ylabel("Energy")
|
284 |
|
285 |
Experience,=plt.plot(T,E,label="Energy")
|
286 |
|
287 |
plt.legend() |
288 |
plt.show() |
289 |
|
290 |
|
291 |
if __name__=='__main__': |
292 |
|
293 |
# Set defaults values
|
294 |
# Alu can be CPU or GPU
|
295 |
Alu='CPU'
|
296 |
# Id of GPU : 0 will use the first find !
|
297 |
Device=0
|
298 |
# GPU style can be Cuda (Nvidia implementation) or OpenCL
|
299 |
GpuStyle='OpenCL'
|
300 |
# Parallel distribution can be on Threads or Blocks
|
301 |
ParaStyle='Blocks'
|
302 |
# Coupling factor
|
303 |
J=1.
|
304 |
# Magnetic Field
|
305 |
B=0.
|
306 |
# Size of Lattice
|
307 |
Size=256
|
308 |
# Default Temperatures (start, end, step)
|
309 |
Tmin=0.1
|
310 |
Tmax=5
|
311 |
Tstep=0.1
|
312 |
# Default Number of Iterations
|
313 |
Iterations=Size*Size |
314 |
# Curves is True to print the curves
|
315 |
Curves=False
|
316 |
|
317 |
try:
|
318 |
opts, args = getopt.getopt(sys.argv[1:],"hcj:b:z:i:s:e:p:a:d:g:t:",["coupling=","magneticfield=","size=","iterations=","tempstart=","tempend=","tempstep=","alu=","gpustyle=","parastyle="]) |
319 |
except getopt.GetoptError:
|
320 |
print '%s -j <Coupling Factor> -b <Magnetic Field> -z <Size of Lattice> -i <Iterations> -s <Minimum Temperature> -e <Maximum Temperature> -p <steP Temperature> -c (Print Curves) -a <CPU/GPU> -d <DeviceId> -g <CUDA/OpenCL> -p <Threads/Blocks> -t <ParaStyle>' % sys.argv[0] |
321 |
sys.exit(2)
|
322 |
|
323 |
|
324 |
for opt, arg in opts: |
325 |
if opt == '-h': |
326 |
print '%s -j <Coupling Factor> -b <Magnetic Field> -z <Size of Lattice> -i <Iterations> -s <Minimum Temperature> -e <Maximum Temperature> -p <steP Temperature> -c (Print Curves) -a <CPU/GPU> -d <DeviceId> -g <CUDA/OpenCL> -p <Threads/Blocks> -t <ParaStyle>' % sys.argv[0] |
327 |
sys.exit() |
328 |
elif opt == '-c': |
329 |
Curves=True
|
330 |
elif opt in ("-j", "--coupling"): |
331 |
J = float(arg)
|
332 |
elif opt in ("-b", "--magneticfield"): |
333 |
B = float(arg)
|
334 |
elif opt in ("-s", "--tempmin"): |
335 |
Tmin = float(arg)
|
336 |
elif opt in ("-e", "--tempmax"): |
337 |
Tmax = float(arg)
|
338 |
elif opt in ("-p", "--tempstep"): |
339 |
Tstep = float(arg)
|
340 |
elif opt in ("-i", "--iterations"): |
341 |
Iterations = int(arg)
|
342 |
elif opt in ("-z", "--size"): |
343 |
Size = int(arg)
|
344 |
elif opt in ("-a", "--alu"): |
345 |
Alu = arg |
346 |
elif opt in ("-d", "--device"): |
347 |
Device = int(arg)
|
348 |
elif opt in ("-g", "--gpustyle"): |
349 |
GpuStyle = arg |
350 |
elif opt in ("-t", "--parastyle"): |
351 |
ParaStyle = arg |
352 |
|
353 |
if Alu=='CPU' and GpuStyle=='CUDA': |
354 |
print "Alu can't be CPU for CUDA, set Alu to GPU" |
355 |
Alu='GPU'
|
356 |
|
357 |
if ParaStyle not in ('Blocks','Threads','Hybrid'): |
358 |
print "%s not exists, ParaStyle set as Threads !" % ParaStyle |
359 |
ParaStyle='Blocks'
|
360 |
|
361 |
print "Compute unit : %s" % Alu |
362 |
print "Device Identification : %s" % Device |
363 |
print "GpuStyle used : %s" % GpuStyle |
364 |
print "Parallel Style used : %s" % ParaStyle |
365 |
print "Coupling Factor : %s" % J |
366 |
print "Magnetic Field : %s" % B |
367 |
print "Size of lattice : %s" % Size |
368 |
print "Iterations : %s" % Iterations |
369 |
print "Temperature on start : %s" % Tmin |
370 |
print "Temperature on end : %s" % Tmax |
371 |
print "Temperature step : %s" % Tstep |
372 |
|
373 |
if GpuStyle=='CUDA': |
374 |
# For PyCUDA import
|
375 |
import pycuda.driver as cuda |
376 |
import pycuda.gpuarray as gpuarray |
377 |
import pycuda.autoinit |
378 |
from pycuda.compiler import SourceModule |
379 |
|
380 |
if GpuStyle=='OpenCL': |
381 |
# For PyOpenCL import
|
382 |
import pyopencl as cl |
383 |
Id=1
|
384 |
for platform in cl.get_platforms(): |
385 |
for device in platform.get_devices(): |
386 |
deviceType=cl.device_type.to_string(device.type) |
387 |
print "Device #%i of type %s : %s" % (Id,deviceType,device.name) |
388 |
Id=Id+1
|
389 |
|
390 |
LAPIMAGE=False
|
391 |
|
392 |
sigmaIn=numpy.where(numpy.random.randn(Size,Size)>0,1,-1).astype(numpy.int8) |
393 |
|
394 |
ImageOutput(sigmaIn,"Ising2D_Serial_%i_Initial" % (Size))
|
395 |
|
396 |
Trange=numpy.arange(Tmin,Tmax+Tstep,Tstep) |
397 |
|
398 |
E=[] |
399 |
M=[] |
400 |
|
401 |
for T in Trange: |
402 |
sigma=numpy.copy(sigmaIn) |
403 |
if GpuStyle=='CUDA': |
404 |
duration=MetropolisCuda(sigma,T,J,B,Iterations,ParaStyle,Alu,Device) |
405 |
else:
|
406 |
duration=MetropolisOpenCL(sigma,T,J,B,Iterations,ParaStyle,Alu,Device) |
407 |
|
408 |
E=numpy.append(E,Energy(sigma,J)) |
409 |
M=numpy.append(M,Magnetization(sigma,B)) |
410 |
ImageOutput(sigma,"Ising2D_Serial_%i_%1.1f_Final" % (Size,T))
|
411 |
|
412 |
print "CPU Time : %f" % (duration) |
413 |
print "Total Energy at Temperature %f : %f" % (T,E[-1]) |
414 |
print "Total Magnetization at Temperature %f : %f" % (T,M[-1]) |
415 |
|
416 |
if Curves:
|
417 |
DisplayCurves(Trange,E,M,J,B) |
418 |
|
419 |
# Save output
|
420 |
numpy.savez("Ising2D_Serial_%i_%.8i" % (Size,Iterations),(Trange,E,M))
|
421 |
|