root / Ising / GPU / Ising2D-GPU-Global.py @ 94
Historique | Voir | Annoter | Télécharger (8,86 ko)
1 |
#!/usr/bin/env python
|
---|---|
2 |
#
|
3 |
# Ising2D model using PyOpenCL
|
4 |
#
|
5 |
# CC BY-NC-SA 2011 : <emmanuel.quemener@ens-lyon.fr>
|
6 |
#
|
7 |
# Thanks to Andreas Klockner for PyOpenCL:
|
8 |
# http://mathema.tician.de/software/pyopencl
|
9 |
#
|
10 |
# Interesting links:
|
11 |
# http://viennacl.sourceforge.net/viennacl-documentation.html
|
12 |
# http://enja.org/2011/02/22/adventures-in-pyopencl-part-1-getting-started-with-python/
|
13 |
|
14 |
import pyopencl as cl |
15 |
import numpy |
16 |
from numpy.random import randint as nprnd |
17 |
from PIL import Image |
18 |
import time |
19 |
import sys |
20 |
import getopt |
21 |
import matplotlib.pyplot as plt |
22 |
|
23 |
# 2097152 on HD5850 (with 1GB of RAM)
|
24 |
# 262144 on GT218
|
25 |
#STEP=262144
|
26 |
#STEP=1048576
|
27 |
#STEP=2097152
|
28 |
#STEP=4194304
|
29 |
#STEP=8388608
|
30 |
STEP=16777216
|
31 |
|
32 |
KERNEL_CODE="""
|
33 |
|
34 |
// Marsaglia RNG very simple implementation
|
35 |
|
36 |
#define znew (z=36969*(z&65535)+(z>>16))
|
37 |
#define wnew (w=18000*(w&65535)+(w>>16))
|
38 |
#define MWC ((znew<<16)+wnew )
|
39 |
#define MWCfp MWC * 2.328306435454494e-10f
|
40 |
|
41 |
__kernel void MainLoop(__global int *s,float J,float B,float T,uint size,
|
42 |
uint iterations,uint seed_w,uint seed_z)
|
43 |
{
|
44 |
uint z=seed_z;
|
45 |
uint w=seed_w;
|
46 |
|
47 |
for (uint i=0;i<iterations;i++) {
|
48 |
|
49 |
uint x=(uint)(MWC%size) ;
|
50 |
uint y=(uint)(MWC%size) ;
|
51 |
|
52 |
int p=s[x+size*y];
|
53 |
|
54 |
int d=s[x+size*((y+1)%size)];
|
55 |
int u=s[x+size*((y-1)%size)];
|
56 |
int l=s[((x-1)%size)+size*y];
|
57 |
int r=s[((x+1)%size)+size*y];
|
58 |
|
59 |
float DeltaE=p*(2.0f*J*(u+d+l+r)+B);
|
60 |
|
61 |
int factor=((DeltaE < 0.0f) || (MWCfp < exp(-DeltaE/T))) ? -1:1;
|
62 |
s[x%size+size*(y%size)] = factor*p;
|
63 |
// barrier(CLK_GLOBAL_MEM_FENCE);
|
64 |
}
|
65 |
barrier(CLK_GLOBAL_MEM_FENCE);
|
66 |
|
67 |
}
|
68 |
"""
|
69 |
|
70 |
def ImageOutput(sigma,prefix): |
71 |
|
72 |
Max=sigma.max() |
73 |
Min=sigma.min() |
74 |
|
75 |
# Normalize value as 8bits Integer
|
76 |
SigmaInt=(255*(sigma-Min)/(Max-Min)).astype('uint8') |
77 |
image = Image.fromarray(SigmaInt) |
78 |
image.save("%s.jpg" % prefix)
|
79 |
|
80 |
def Metropolis(sigma,J,B,T,iterations,Device): |
81 |
|
82 |
# Initialisation des variables en les CASTant correctement
|
83 |
|
84 |
# Je detecte un peripherique GPU dans la liste des peripheriques
|
85 |
Id=1
|
86 |
HasXPU=False
|
87 |
for platform in cl.get_platforms(): |
88 |
for device in platform.get_devices(): |
89 |
if Id==Device:
|
90 |
XPU=device |
91 |
print "CPU/GPU selected: ",device.name.lstrip() |
92 |
HasXPU=True
|
93 |
Id+=1
|
94 |
|
95 |
if HasXPU==False: |
96 |
print "No XPU #%i found in all of %i devices, sorry..." % (Device,Id-1) |
97 |
sys.exit() |
98 |
|
99 |
# Je cree le contexte et la queue pour son execution
|
100 |
ctx = cl.Context([XPU]) |
101 |
queue = cl.CommandQueue(ctx, |
102 |
properties=cl.command_queue_properties.PROFILING_ENABLE) |
103 |
|
104 |
# Je recupere les flag possibles pour les buffers
|
105 |
mf = cl.mem_flags |
106 |
|
107 |
sigmaCL = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=sigma) |
108 |
|
109 |
MetropolisCL = cl.Program(ctx,KERNEL_CODE).build( |
110 |
options = "-cl-mad-enable -cl-fast-relaxed-math")
|
111 |
|
112 |
i=0
|
113 |
step=STEP |
114 |
duration=0.
|
115 |
|
116 |
while (step*i < iterations):
|
117 |
# Call OpenCL kernel
|
118 |
# (1,) is global work size (only 1 work size)
|
119 |
# (1,) is local work size
|
120 |
# sigmaCL is lattice translated in CL format
|
121 |
# step is number of iterations
|
122 |
|
123 |
start_time=time.time() |
124 |
CLLaunch=MetropolisCL.MainLoop(queue,(1,),None, |
125 |
sigmaCL, |
126 |
numpy.float32(J),numpy.float32(B), |
127 |
numpy.float32(T), |
128 |
numpy.uint32(sigma.shape[0]),
|
129 |
numpy.uint32(step), |
130 |
numpy.uint32(nprnd(2**32)), |
131 |
numpy.uint32(nprnd(2**32))) |
132 |
CLLaunch.wait() |
133 |
# Does not seem to work under AMD/ATI
|
134 |
# elapsed = 1e-9*(CLLaunch.profile.end - CLLaunch.profile.start)
|
135 |
elapsed = time.time()-start_time |
136 |
print "Iteration %i with T=%f and %i iterations in %f: " % (i,T,step,elapsed) |
137 |
if LAPIMAGE:
|
138 |
cl.enqueue_copy(queue, sigma, sigmaCL).wait() |
139 |
ImageOutput(sigma,"Ising2D_GPU_Global_%i_%1.1f_%.3i_Lap" %
|
140 |
(SIZE,T,i)) |
141 |
i=i+1
|
142 |
duration=duration+elapsed |
143 |
|
144 |
cl.enqueue_copy(queue, sigma, sigmaCL).wait() |
145 |
sigmaCL.release() |
146 |
|
147 |
return(duration)
|
148 |
|
149 |
def Magnetization(sigma,M): |
150 |
return(numpy.sum(sigma)/(sigma.shape[0]*sigma.shape[1]*1.0)) |
151 |
|
152 |
def Energy(sigma,J,B): |
153 |
# Copier et caster
|
154 |
E=numpy.copy(sigma).astype(numpy.float32) |
155 |
|
156 |
# Appel par slice
|
157 |
E[1:-1,1:-1]=E[1:-1,1:-1]*(2.0*J*(E[:-2,1:-1]+E[2:,1:-1]+ |
158 |
E[1:-1,:-2]+E[1:-1,2:])+B) |
159 |
|
160 |
# Bien nettoyer la peripherie
|
161 |
E[:,0]=0 |
162 |
E[:,-1]=0 |
163 |
E[0,:]=0 |
164 |
E[-1,:]=0 |
165 |
|
166 |
Energy=numpy.sum(E) |
167 |
|
168 |
return(Energy/(E.shape[0]*E.shape[1]*1.0)) |
169 |
|
170 |
def CriticalT(T,E): |
171 |
|
172 |
Epoly=numpy.poly1d(numpy.polyfit(T,E,T.size/3))
|
173 |
dEpoly=numpy.diff(Epoly(T)) |
174 |
dEpoly=numpy.insert(dEpoly,0,0) |
175 |
return(T[numpy.argmin(dEpoly)])
|
176 |
|
177 |
def DisplayCurves(T,E,M,J,B): |
178 |
|
179 |
plt.xlabel("Temperature")
|
180 |
plt.ylabel("Energy")
|
181 |
|
182 |
Experience,=plt.plot(T,E,label="Energy")
|
183 |
|
184 |
plt.legend() |
185 |
plt.show() |
186 |
|
187 |
if __name__=='__main__': |
188 |
|
189 |
# Set defaults values
|
190 |
# Coupling factor
|
191 |
J=1.
|
192 |
# External Magnetic Field is null
|
193 |
B=0.
|
194 |
# Size of Lattice
|
195 |
Size=256
|
196 |
# Default Temperatures (start, end, step)
|
197 |
Tmin=0.1
|
198 |
Tmax=5
|
199 |
Tstep=0.1
|
200 |
# Default Number of Iterations
|
201 |
Iterations=Size*Size |
202 |
# Default Device is first
|
203 |
Device=1
|
204 |
|
205 |
# Curves is True to print the curves
|
206 |
Curves=False
|
207 |
|
208 |
OCL_vendor={} |
209 |
OCL_type={} |
210 |
OCL_description={} |
211 |
|
212 |
try:
|
213 |
import pyopencl as cl |
214 |
|
215 |
print "\nHere are available OpenCL devices:" |
216 |
Id=1
|
217 |
for platform in cl.get_platforms(): |
218 |
for device in platform.get_devices(): |
219 |
OCL_vendor[Id]=platform.vendor.lstrip().rstrip() |
220 |
OCL_type[Id]=cl.device_type.to_string(device.type) |
221 |
OCL_description[Id]=device.name.lstrip().rstrip() |
222 |
print "* Device #%i from %s of type %s : %s" % (Id,OCL_vendor[Id],OCL_type[Id],OCL_description[Id]) |
223 |
Id=Id+1
|
224 |
OCL_MaxDevice=Id-1
|
225 |
print
|
226 |
|
227 |
except ImportError: |
228 |
print "Your platform does not seem to support OpenCL" |
229 |
sys.exit(0)
|
230 |
|
231 |
try:
|
232 |
opts, args = getopt.getopt(sys.argv[1:],"hcj:b:z:i:s:e:p:d:",["coupling=","magneticfield=","size=","iterations=","tempstart=","tempend=","tempstep=","units",'device=']) |
233 |
except getopt.GetoptError:
|
234 |
print '%s -d <Device Id> -j <Coupling Factor> -b <Magnetic Field> -z <Size of Square Lattice> -i <Iterations> -s <Minimum Temperature> -e <Maximum Temperature> -p <steP Temperature> -c (Print Curves)' % sys.argv[0] |
235 |
sys.exit(2)
|
236 |
|
237 |
for opt, arg in opts: |
238 |
if opt == '-h': |
239 |
print '%s -d <Device Id> -j <Coupling Factor> -b <Magnetic Field> -z <Size of Square Lattice> -i <Iterations> -s <Minimum Temperature> -e <Maximum Temperature> -p <steP Temperature> -c (Print Curves)' % sys.argv[0] |
240 |
sys.exit() |
241 |
elif opt == '-c': |
242 |
Curves=True
|
243 |
elif opt in ("-d", "--device"): |
244 |
Device = int(arg)
|
245 |
if Device>OCL_MaxDevice:
|
246 |
"Device #%s seems not to be available !"
|
247 |
sys.exit() |
248 |
elif opt in ("-j", "--coupling"): |
249 |
J = float(arg)
|
250 |
elif opt in ("-b", "--magneticfield"): |
251 |
B = float(arg)
|
252 |
elif opt in ("-s", "--tempmin"): |
253 |
Tmin = float(arg)
|
254 |
elif opt in ("-e", "--tempmax"): |
255 |
Tmax = arg |
256 |
elif opt in ("-p", "--tempstep"): |
257 |
Tstep = numpy.uint32(arg) |
258 |
elif opt in ("-i", "--iterations"): |
259 |
Iterations = int(arg)
|
260 |
elif opt in ("-z", "--size"): |
261 |
Size = int(arg)
|
262 |
|
263 |
print "Here are parameters of simulation:" |
264 |
print "* Device selected #%s: %s of type %s from %s" % (Device,OCL_description[Device],OCL_type[Device],OCL_vendor[Device]) |
265 |
print "* Coupling Factor J : %s" % J |
266 |
print "* Magnetic Field B : %s" % B |
267 |
print "* Size of lattice : %sx%s" % (Size,Size) |
268 |
print "* Iterations : %s" % Iterations |
269 |
print "* Temperatures from %s to %s by %s" % (Tmin,Tmax,Tstep) |
270 |
|
271 |
LAPIMAGE=False
|
272 |
|
273 |
if Iterations<STEP:
|
274 |
STEP=Iterations |
275 |
|
276 |
sigmaIn=numpy.where(numpy.random.randn(Size,Size)>0,1,-1).astype (numpy.int32) |
277 |
|
278 |
ImageOutput(sigmaIn,"Ising2D_GPU_Global_%i_Initial" % (Size))
|
279 |
|
280 |
Trange=numpy.arange(Tmin,Tmax+Tstep,Tstep) |
281 |
|
282 |
E=[] |
283 |
M=[] |
284 |
|
285 |
for T in Trange: |
286 |
sigma=numpy.copy(sigmaIn) |
287 |
duration=Metropolis(sigma,J,B,T,Iterations,Device) |
288 |
E=numpy.append(E,Energy(sigma,J,B)) |
289 |
M=numpy.append(M,Magnetization(sigma,B)) |
290 |
ImageOutput(sigma,"Ising2D_GPU_Global_%i_%1.1f_Final" % (Size,T))
|
291 |
print "GPU Time : %f" % (duration) |
292 |
print "Total Energy at Temperature %f : %f" % (T,E[-1]) |
293 |
print "Total Magnetization at Temperature %f : %f" % (T,M[-1]) |
294 |
|
295 |
# Save output
|
296 |
numpy.savez("Ising2D_GPU_Global_%i_%.8i" % (Size,Iterations),(Trange,E,M))
|
297 |
|
298 |
# Estimate Critical temperature
|
299 |
print "The critical temperature on %ix%i lattice with J=%f, B=%f is %f " % (Size,Size,J,B,CriticalT(Trange,E)) |
300 |
|
301 |
if Curves:
|
302 |
DisplayCurves(Trange,E,M,J,B) |
303 |
|