Statistiques
| Révision :

root / Pi / GPU / Pi-GPU.py @ 77

Historique | Voir | Annoter | Télécharger (26,47 ko)

1 7 equemene
#!/usr/bin/env python
2 7 equemene
3 7 equemene
#
4 55 equemene
# Pi-by-MonteCarlo using PyCUDA/PyOpenCL
5 7 equemene
#
6 7 equemene
# CC BY-NC-SA 2011 : <emmanuel.quemener@ens-lyon.fr>
7 7 equemene
#
8 7 equemene
# Thanks to Andreas Klockner for PyCUDA:
9 7 equemene
# http://mathema.tician.de/software/pycuda
10 7 equemene
#
11 7 equemene
12 7 equemene
# 2013-01-01 : problems with launch timeout
13 7 equemene
# http://stackoverflow.com/questions/497685/how-do-you-get-around-the-maximum-cuda-run-time
14 7 equemene
# Option "Interactive" "0" in /etc/X11/xorg.conf
15 7 equemene
16 7 equemene
# Common tools
17 7 equemene
import numpy
18 7 equemene
from numpy.random import randint as nprnd
19 7 equemene
import sys
20 7 equemene
import getopt
21 7 equemene
import time
22 7 equemene
import math
23 7 equemene
from socket import gethostname
24 7 equemene
25 72 equemene
Marsaglia={'CONG':0,'SHR3':1,'MWC':2,'KISS':3}
26 72 equemene
Computing={'INT32':0,'INT64':1,'FP32':2,'FP64':3}
27 72 equemene
28 17 equemene
# find prime factors of a number
29 17 equemene
# Get for WWW :
30 17 equemene
# http://pythonism.wordpress.com/2008/05/17/looking-at-factorisation-in-python/
31 17 equemene
def PrimeFactors(x):
32 17 equemene
  factorlist=numpy.array([]).astype('uint32')
33 17 equemene
  loop=2
34 17 equemene
  while loop<=x:
35 17 equemene
    if x%loop==0:
36 17 equemene
      x/=loop
37 17 equemene
      factorlist=numpy.append(factorlist,[loop])
38 17 equemene
    else:
39 17 equemene
      loop+=1
40 17 equemene
  return factorlist
41 17 equemene
42 17 equemene
# Try to find the best thread number in Hybrid approach (Blocks&Threads)
43 17 equemene
# output is thread number
44 17 equemene
def BestThreadsNumber(jobs):
45 17 equemene
  factors=PrimeFactors(jobs)
46 17 equemene
  matrix=numpy.append([factors],[factors[::-1]],axis=0)
47 17 equemene
  threads=1
48 17 equemene
  for factor in matrix.transpose().ravel():
49 17 equemene
    threads=threads*factor
50 71 equemene
    if threads*threads>jobs or threads>512:
51 17 equemene
      break
52 17 equemene
  return(long(threads))
53 17 equemene
54 7 equemene
# Predicted Amdahl Law (Reduced with s=1-p)
55 7 equemene
def AmdahlR(N, T1, p):
56 7 equemene
  return (T1*(1-p+p/N))
57 7 equemene
58 7 equemene
# Predicted Amdahl Law
59 7 equemene
def Amdahl(N, T1, s, p):
60 7 equemene
  return (T1*(s+p/N))
61 7 equemene
62 7 equemene
# Predicted Mylq Law with first order
63 7 equemene
def Mylq(N, T1,s,c,p):
64 45 equemene
  return (T1*(s+p/N)+c*N)
65 7 equemene
66 7 equemene
# Predicted Mylq Law with second order
67 7 equemene
def Mylq2(N, T1,s,c1,c2,p):
68 45 equemene
  return (T1*(s+p/N)+c1*N+c2*N*N)
69 7 equemene
70 7 equemene
KERNEL_CODE_CUDA="""
71 73 equemene
#define TCONG 0
72 73 equemene
#define TSHR3 1
73 73 equemene
#define TMWC 2
74 73 equemene
#define TKISS 3
75 7 equemene

76 73 equemene
#define TINT32 0
77 73 equemene
#define TINT64 1
78 73 equemene
#define TFP32 2
79 73 equemene
#define TFP64 3
80 73 equemene

81 7 equemene
// Marsaglia RNG very simple implementation
82 7 equemene

83 7 equemene
#define znew  ((z=36969*(z&65535)+(z>>16))<<16)
84 7 equemene
#define wnew  ((w=18000*(w&65535)+(w>>16))&65535)
85 7 equemene
#define MWC   (znew+wnew)
86 7 equemene
#define SHR3  (jsr=(jsr=(jsr=jsr^(jsr<<17))^(jsr>>13))^(jsr<<5))
87 7 equemene
#define CONG  (jcong=69069*jcong+1234567)
88 7 equemene
#define KISS  ((MWC^CONG)+SHR3)
89 7 equemene

90 7 equemene
#define MWCfp MWC * 2.328306435454494e-10f
91 7 equemene
#define KISSfp KISS * 2.328306435454494e-10f
92 72 equemene
#define SHR3fp SHR3 * 2.328306435454494e-10f
93 72 equemene
#define CONGfp CONG * 2.328306435454494e-10f
94 7 equemene

95 74 equemene
__device__ ulong MainLoop(ulong iterations,uint seed_w,uint seed_z,size_t work)
96 7 equemene
{
97 7 equemene

98 74 equemene
#if TRNG == TCONG
99 74 equemene
   uint jcong=seed_z+work;
100 74 equemene
#elif TRNG == TSHR3
101 74 equemene
   uint jsr=seed_w+work;
102 74 equemene
#elif TRNG == TMWC
103 74 equemene
   uint z=seed_z+work;
104 74 equemene
   uint w=seed_w+work;
105 74 equemene
#elif TRNG == TKISS
106 74 equemene
   uint jcong=seed_z+work;
107 74 equemene
   uint jsr=seed_w+work;
108 74 equemene
   uint z=seed_z-work;
109 74 equemene
   uint w=seed_w-work;
110 74 equemene
#endif
111 7 equemene

112 17 equemene
   ulong total=0;
113 7 equemene

114 17 equemene
   for (ulong i=0;i<iterations;i++) {
115 7 equemene

116 74 equemene
#if TYPE == TINT32
117 74 equemene
    #define THEONE 1073741824
118 74 equemene
    #if TRNG == TCONG
119 74 equemene
        uint x=CONG>>17 ;
120 74 equemene
        uint y=CONG>>17 ;
121 74 equemene
    #elif TRNG == TSHR3
122 74 equemene
        uint x=SHR3>>17 ;
123 74 equemene
        uint y=SHR3>>17 ;
124 74 equemene
    #elif TRNG == TMWC
125 74 equemene
        uint x=MWC>>17 ;
126 74 equemene
        uint y=MWC>>17 ;
127 74 equemene
    #elif TRNG == TKISS
128 74 equemene
        uint x=KISS>>17 ;
129 74 equemene
        uint y=KISS>>17 ;
130 74 equemene
    #endif
131 74 equemene
#elif TYPE == TINT64
132 74 equemene
    #define THEONE 4611686018427387904
133 74 equemene
    #if TRNG == TCONG
134 74 equemene
        ulong x=(ulong)(CONG>>1) ;
135 74 equemene
        ulong y=(ulong)(CONG>>1) ;
136 74 equemene
    #elif TRNG == TSHR3
137 74 equemene
        ulong x=(ulong)(SHR3>>1) ;
138 74 equemene
        ulong y=(ulong)(SHR3>>1) ;
139 74 equemene
    #elif TRNG == TMWC
140 74 equemene
        ulong x=(ulong)(MWC>>1) ;
141 74 equemene
        ulong y=(ulong)(MWC>>1) ;
142 74 equemene
    #elif TRNG == TKISS
143 74 equemene
        ulong x=(ulong)(KISS>>1) ;
144 74 equemene
        ulong y=(ulong)(KISS>>1) ;
145 74 equemene
    #endif
146 74 equemene
#elif TYPE == TFP32
147 74 equemene
    #define THEONE 1.0f
148 74 equemene
    #if TRNG == TCONG
149 74 equemene
        float x=CONGfp ;
150 74 equemene
        float y=CONGfp ;
151 74 equemene
    #elif TRNG == TSHR3
152 74 equemene
        float x=SHR3fp ;
153 74 equemene
        float y=SHR3fp ;
154 74 equemene
    #elif TRNG == TMWC
155 74 equemene
        float x=MWCfp ;
156 74 equemene
        float y=MWCfp ;
157 74 equemene
    #elif TRNG == TKISS
158 74 equemene
      float x=KISSfp ;
159 74 equemene
      float y=KISSfp ;
160 74 equemene
    #endif
161 74 equemene
#elif TYPE == TFP64
162 74 equemene
    #define THEONE 1.0f
163 74 equemene
    #if TRNG == TCONG
164 74 equemene
        double x=(double)CONGfp ;
165 74 equemene
        double y=(double)CONGfp ;
166 74 equemene
    #elif TRNG == TSHR3
167 74 equemene
        double x=(double)SHR3fp ;
168 74 equemene
        double y=(double)SHR3fp ;
169 74 equemene
    #elif TRNG == TMWC
170 74 equemene
        double x=(double)MWCfp ;
171 74 equemene
        double y=(double)MWCfp ;
172 74 equemene
    #elif TRNG == TKISS
173 74 equemene
        double x=(double)KISSfp ;
174 74 equemene
        double y=(double)KISSfp ;
175 74 equemene
    #endif
176 74 equemene
#endif
177 7 equemene

178 74 equemene
      ulong inside=((x*x+y*y) <= THEONE) ? 1:0;
179 7 equemene
      total+=inside;
180 7 equemene
   }
181 7 equemene

182 74 equemene
   return(total);
183 7 equemene
}
184 7 equemene

185 74 equemene
__global__ void MainLoopBlocks(ulong *s,ulong iterations,uint seed_w,uint seed_z)
186 7 equemene
{
187 74 equemene
   ulong total=MainLoop(iterations,seed_z,seed_w,blockIdx.x);
188 50 equemene
   s[blockIdx.x]=total;
189 50 equemene
   __syncthreads();
190 50 equemene

191 50 equemene
}
192 50 equemene

193 74 equemene
__global__ void MainLoopThreads(ulong *s,ulong iterations,uint seed_w,uint seed_z)
194 50 equemene
{
195 74 equemene
   ulong total=MainLoop(iterations,seed_z,seed_w,threadIdx.x);
196 50 equemene
   s[threadIdx.x]=total;
197 50 equemene
   __syncthreads();
198 50 equemene

199 50 equemene
}
200 50 equemene

201 74 equemene
__global__ void MainLoopHybrid(ulong *s,ulong iterations,uint seed_w,uint seed_z)
202 50 equemene
{
203 74 equemene
   ulong total=MainLoop(iterations,seed_z,seed_w,blockDim.x*blockIdx.x+threadIdx.x);
204 50 equemene
   s[blockDim.x*blockIdx.x+threadIdx.x]=total;
205 50 equemene
   __syncthreads();
206 50 equemene
}
207 74 equemene

208 7 equemene
"""
209 7 equemene
210 7 equemene
KERNEL_CODE_OPENCL="""
211 72 equemene
#define TCONG 0
212 72 equemene
#define TSHR3 1
213 72 equemene
#define TMWC 2
214 72 equemene
#define TKISS 3
215 72 equemene

216 72 equemene
#define TINT32 0
217 72 equemene
#define TINT64 1
218 72 equemene
#define TFP32 2
219 72 equemene
#define TFP64 3
220 72 equemene

221 7 equemene
// Marsaglia RNG very simple implementation
222 7 equemene
#define znew  ((z=36969*(z&65535)+(z>>16))<<16)
223 7 equemene
#define wnew  ((w=18000*(w&65535)+(w>>16))&65535)
224 72 equemene

225 7 equemene
#define MWC   (znew+wnew)
226 7 equemene
#define SHR3  (jsr=(jsr=(jsr=jsr^(jsr<<17))^(jsr>>13))^(jsr<<5))
227 7 equemene
#define CONG  (jcong=69069*jcong+1234567)
228 7 equemene
#define KISS  ((MWC^CONG)+SHR3)
229 7 equemene

230 7 equemene
#define MWCfp MWC * 2.328306435454494e-10f
231 7 equemene
#define KISSfp KISS * 2.328306435454494e-10f
232 72 equemene
#define CONGfp CONG * 2.328306435454494e-10f
233 72 equemene
#define SHR3fp SHR3 * 2.328306435454494e-10f
234 7 equemene

235 73 equemene
ulong MainLoop(ulong iterations,uint seed_z,uint seed_w,size_t work)
236 7 equemene
{
237 72 equemene

238 72 equemene
#if TRNG == TCONG
239 73 equemene
   uint jcong=seed_z+work;
240 72 equemene
#elif TRNG == TSHR3
241 73 equemene
   uint jsr=seed_w+work;
242 72 equemene
#elif TRNG == TMWC
243 73 equemene
   uint z=seed_z+work;
244 73 equemene
   uint w=seed_w+work;
245 72 equemene
#elif TRNG == TKISS
246 73 equemene
   uint jcong=seed_z+work;
247 73 equemene
   uint jsr=seed_w+work;
248 73 equemene
   uint z=seed_z-work;
249 73 equemene
   uint w=seed_w-work;
250 72 equemene
#endif
251 7 equemene

252 17 equemene
   ulong total=0;
253 7 equemene

254 17 equemene
   for (ulong i=0;i<iterations;i++) {
255 7 equemene

256 72 equemene
#if TYPE == TINT32
257 73 equemene
    #define THEONE 1073741824
258 73 equemene
    #if TRNG == TCONG
259 73 equemene
        uint x=CONG>>17 ;
260 73 equemene
        uint y=CONG>>17 ;
261 73 equemene
    #elif TRNG == TSHR3
262 73 equemene
        uint x=SHR3>>17 ;
263 73 equemene
        uint y=SHR3>>17 ;
264 73 equemene
    #elif TRNG == TMWC
265 73 equemene
        uint x=MWC>>17 ;
266 73 equemene
        uint y=MWC>>17 ;
267 73 equemene
    #elif TRNG == TKISS
268 73 equemene
        uint x=KISS>>17 ;
269 73 equemene
        uint y=KISS>>17 ;
270 73 equemene
    #endif
271 72 equemene
#elif TYPE == TINT64
272 73 equemene
    #define THEONE 4611686018427387904
273 73 equemene
    #if TRNG == TCONG
274 73 equemene
        ulong x=(ulong)(CONG>>1) ;
275 73 equemene
        ulong y=(ulong)(CONG>>1) ;
276 73 equemene
    #elif TRNG == TSHR3
277 73 equemene
        ulong x=(ulong)(SHR3>>1) ;
278 73 equemene
        ulong y=(ulong)(SHR3>>1) ;
279 73 equemene
    #elif TRNG == TMWC
280 73 equemene
        ulong x=(ulong)(MWC>>1) ;
281 73 equemene
        ulong y=(ulong)(MWC>>1) ;
282 73 equemene
    #elif TRNG == TKISS
283 73 equemene
        ulong x=(ulong)(KISS>>1) ;
284 73 equemene
        ulong y=(ulong)(KISS>>1) ;
285 73 equemene
    #endif
286 72 equemene
#elif TYPE == TFP32
287 73 equemene
    #define THEONE 1.0f
288 73 equemene
    #if TRNG == TCONG
289 73 equemene
        float x=CONGfp ;
290 73 equemene
        float y=CONGfp ;
291 73 equemene
    #elif TRNG == TSHR3
292 73 equemene
        float x=SHR3fp ;
293 73 equemene
        float y=SHR3fp ;
294 73 equemene
    #elif TRNG == TMWC
295 73 equemene
        float x=MWCfp ;
296 73 equemene
        float y=MWCfp ;
297 73 equemene
    #elif TRNG == TKISS
298 72 equemene
      float x=KISSfp ;
299 72 equemene
      float y=KISSfp ;
300 73 equemene
    #endif
301 72 equemene
#elif TYPE == TFP64
302 73 equemene
#pragma OPENCL EXTENSION cl_khr_fp64: enable
303 73 equemene
    #define THEONE 1.0f
304 73 equemene
    #if TRNG == TCONG
305 73 equemene
        double x=(double)CONGfp ;
306 73 equemene
        double y=(double)CONGfp ;
307 73 equemene
    #elif TRNG == TSHR3
308 73 equemene
        double x=(double)SHR3fp ;
309 73 equemene
        double y=(double)SHR3fp ;
310 73 equemene
    #elif TRNG == TMWC
311 73 equemene
        double x=(double)MWCfp ;
312 73 equemene
        double y=(double)MWCfp ;
313 73 equemene
    #elif TRNG == TKISS
314 73 equemene
        double x=(double)KISSfp ;
315 73 equemene
        double y=(double)KISSfp ;
316 73 equemene
    #endif
317 72 equemene
#endif
318 7 equemene

319 72 equemene
      ulong inside=((x*x+y*y) <= THEONE) ? 1:0;
320 7 equemene
      total+=inside;
321 7 equemene
   }
322 73 equemene

323 73 equemene
   return(total);
324 73 equemene
}
325 72 equemene

326 73 equemene
__kernel void MainLoopGlobal(__global ulong *s,ulong iterations,uint seed_w,uint seed_z)
327 73 equemene
{
328 73 equemene
   ulong total=MainLoop(iterations,seed_z,seed_w,get_global_id(0));
329 71 equemene
   barrier(CLK_GLOBAL_MEM_FENCE);
330 73 equemene
   s[get_global_id(0)]=total;
331 7 equemene
}
332 7 equemene

333 73 equemene

334 17 equemene
__kernel void MainLoopLocal(__global ulong *s,ulong iterations,uint seed_w,uint seed_z)
335 7 equemene
{
336 73 equemene
   ulong total=MainLoop(iterations,seed_z,seed_w,get_local_id(0));
337 71 equemene
   barrier(CLK_LOCAL_MEM_FENCE);
338 7 equemene
   s[get_local_id(0)]=total;
339 7 equemene
}
340 7 equemene

341 17 equemene
__kernel void MainLoopHybrid(__global ulong *s,ulong iterations,uint seed_w,uint seed_z)
342 7 equemene
{
343 73 equemene
   ulong total=MainLoop(iterations,seed_z,seed_w,get_global_id(0));
344 73 equemene
   barrier(CLK_GLOBAL_MEM_FENCE || CLK_LOCAL_MEM_FENCE);
345 71 equemene
   s[get_global_id(0)]=total;
346 7 equemene
}
347 50 equemene

348 7 equemene
"""
349 7 equemene
350 74 equemene
def MetropolisCuda(circle,iterations,steps,jobs,ParaStyle,RNG,ValueType):
351 7 equemene
352 7 equemene
  # Avec PyCUDA autoinit, rien a faire !
353 7 equemene
354 7 equemene
  circleCU = cuda.InOut(circle)
355 7 equemene
356 74 equemene
  try:
357 74 equemene
    mod = SourceModule(KERNEL_CODE_CUDA,options=['--compiler-options','-Wall -DTRNG=%i -DTYPE=%s' % (Marsaglia[RNG],Computing[ValueType])])
358 74 equemene
  except:
359 74 equemene
    print "Compilation seems to brake"
360 74 equemene
361 7 equemene
  MetropolisBlocksCU=mod.get_function("MainLoopBlocks")
362 7 equemene
  MetropolisJobsCU=mod.get_function("MainLoopThreads")
363 7 equemene
  MetropolisHybridCU=mod.get_function("MainLoopHybrid")
364 7 equemene
365 7 equemene
  start = pycuda.driver.Event()
366 7 equemene
  stop = pycuda.driver.Event()
367 7 equemene
368 7 equemene
  MyPi=numpy.zeros(steps)
369 7 equemene
  MyDuration=numpy.zeros(steps)
370 50 equemene
371 7 equemene
  if iterations%jobs==0:
372 17 equemene
    iterationsCL=numpy.uint64(iterations/jobs)
373 7 equemene
    iterationsNew=iterationsCL*jobs
374 7 equemene
  else:
375 17 equemene
    iterationsCL=numpy.uint64(iterations/jobs+1)
376 7 equemene
    iterationsNew=iterations
377 7 equemene
378 7 equemene
  for i in range(steps):
379 7 equemene
    start.record()
380 7 equemene
    start.synchronize()
381 7 equemene
    if ParaStyle=='Blocks':
382 74 equemene
      MetropolisBlocksCU(circleCU,
383 74 equemene
                         numpy.uint64(iterationsCL),
384 74 equemene
                         numpy.uint32(nprnd(2**30/jobs)),
385 74 equemene
                         numpy.uint32(nprnd(2**30/jobs)),
386 74 equemene
                         grid=(jobs,1),
387 74 equemene
                         block=(1,1,1))
388 17 equemene
      print "%s with (WorkItems/Threads)=(%i,%i) %s method done" % \
389 17 equemene
            (Alu,jobs,1,ParaStyle)
390 7 equemene
    elif ParaStyle=='Hybrid':
391 17 equemene
      threads=BestThreadsNumber(jobs)
392 74 equemene
      MetropolisHybridCU(circleCU,
393 50 equemene
                         numpy.uint64(iterationsCL),
394 50 equemene
                         numpy.uint32(nprnd(2**30/jobs)),
395 50 equemene
                         numpy.uint32(nprnd(2**30/jobs)),
396 74 equemene
                         grid=(jobs,1),
397 74 equemene
                         block=(threads,1,1))
398 17 equemene
      print "%s with (WorkItems/Threads)=(%i,%i) %s method done" % \
399 74 equemene
            (Alu,jobs/threads,threads,ParaStyle)
400 74 equemene
    else:
401 74 equemene
      MetropolisJobsCU(circleCU,
402 74 equemene
                       numpy.uint64(iterationsCL),
403 74 equemene
                       numpy.uint32(nprnd(2**30/jobs)),
404 74 equemene
                       numpy.uint32(nprnd(2**30/jobs)),
405 74 equemene
                       grid=(1,1),
406 74 equemene
                       block=(jobs,1,1))
407 74 equemene
      print "%s with (WorkItems/Threads)=(%i,%i) %s method done" % \
408 17 equemene
            (Alu,jobs,1,ParaStyle)
409 7 equemene
    stop.record()
410 7 equemene
    stop.synchronize()
411 7 equemene
412 7 equemene
    elapsed = start.time_till(stop)*1e-3
413 7 equemene
414 7 equemene
    MyDuration[i]=elapsed
415 50 equemene
    AllPi=4./numpy.float32(iterationsCL)*circle.astype(numpy.float32)
416 50 equemene
    MyPi[i]=numpy.median(AllPi)
417 50 equemene
    print MyPi[i],numpy.std(AllPi),MyDuration[i]
418 7 equemene
419 50 equemene
420 75 equemene
  print jobs,numpy.mean(MyDuration),numpy.median(MyDuration),numpy.std(MyDuration),numpy.mean(Iterations/MyDuration),numpy.median(Iterations/MyDuration),numpy.std(Iterations/MyDuration)
421 7 equemene
422 75 equemene
  return(numpy.mean(MyDuration),numpy.median(MyDuration),numpy.std(MyDuration),numpy.mean(Iterations/MyDuration),numpy.median(Iterations/MyDuration),numpy.std(Iterations/MyDuration))
423 7 equemene
424 7 equemene
425 50 equemene
def MetropolisOpenCL(circle,iterations,steps,jobs,ParaStyle,Alu,Device,
426 73 equemene
                     RNG,ValueType):
427 7 equemene
428 7 equemene
  # Initialisation des variables en les CASTant correctement
429 7 equemene
430 46 equemene
  if Device==0:
431 46 equemene
    print "Enter XPU selector based on ALU type: first selected"
432 46 equemene
    HasXPU=False
433 46 equemene
    # Default Device selection based on ALU Type
434 46 equemene
    for platform in cl.get_platforms():
435 46 equemene
      for device in platform.get_devices():
436 46 equemene
        deviceType=cl.device_type.to_string(device.type)
437 46 equemene
        if deviceType=="GPU" and Alu=="GPU" and not HasXPU:
438 46 equemene
          XPU=device
439 46 equemene
          print "GPU selected: ",device.name
440 46 equemene
          HasXPU=True
441 46 equemene
        if deviceType=="CPU" and Alu=="CPU" and not HasXPU:
442 46 equemene
          XPU=device
443 46 equemene
          print "CPU selected: ",device.name
444 46 equemene
          HasXPU=True
445 46 equemene
  else:
446 46 equemene
    print "Enter XPU selector based on device number & ALU type"
447 46 equemene
    Id=1
448 46 equemene
    HasXPU=False
449 46 equemene
    # Primary Device selection based on Device Id
450 46 equemene
    for platform in cl.get_platforms():
451 46 equemene
      for device in platform.get_devices():
452 46 equemene
        deviceType=cl.device_type.to_string(device.type)
453 46 equemene
        if Id==Device and Alu==deviceType and HasXPU==False:
454 46 equemene
          XPU=device
455 73 equemene
          print "CPU/GPU selected: ",device.name.lstrip()
456 46 equemene
          HasXPU=True
457 46 equemene
        Id=Id+1
458 46 equemene
    if HasXPU==False:
459 46 equemene
      print "No XPU #%i of type %s found in all of %i devices, sorry..." % \
460 46 equemene
          (Device,Alu,Id-1)
461 46 equemene
      return(0,0,0)
462 7 equemene
463 7 equemene
  # Je cree le contexte et la queue pour son execution
464 46 equemene
  ctx = cl.Context([XPU])
465 7 equemene
  queue = cl.CommandQueue(ctx,
466 7 equemene
                          properties=cl.command_queue_properties.PROFILING_ENABLE)
467 7 equemene
468 7 equemene
  # Je recupere les flag possibles pour les buffers
469 7 equemene
  mf = cl.mem_flags
470 7 equemene
471 7 equemene
  circleCL = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=circle)
472 7 equemene
473 72 equemene
474 7 equemene
  MetropolisCL = cl.Program(ctx,KERNEL_CODE_OPENCL).build( \
475 72 equemene
    options = "-cl-mad-enable -cl-fast-relaxed-math -DTRNG=%i -DTYPE=%s" % (Marsaglia[RNG],Computing[ValueType]))
476 7 equemene
477 7 equemene
  i=0
478 7 equemene
479 7 equemene
  MyPi=numpy.zeros(steps)
480 7 equemene
  MyDuration=numpy.zeros(steps)
481 7 equemene
482 7 equemene
  if iterations%jobs==0:
483 41 equemene
    iterationsCL=numpy.uint64(iterations/jobs)
484 47 equemene
    iterationsNew=numpy.uint64(iterationsCL*jobs)
485 7 equemene
  else:
486 41 equemene
    iterationsCL=numpy.uint64(iterations/jobs+1)
487 47 equemene
    iterationsNew=numpy.uint64(iterations)
488 7 equemene
489 7 equemene
  for i in range(steps):
490 7 equemene
491 7 equemene
    if ParaStyle=='Blocks':
492 7 equemene
      # Call OpenCL kernel
493 7 equemene
      # (1,) is Global work size (only 1 work size)
494 7 equemene
      # (1,) is local work size
495 7 equemene
      # circleCL is lattice translated in CL format
496 7 equemene
      # SeedZCL is lattice translated in CL format
497 7 equemene
      # SeedWCL is lattice translated in CL format
498 7 equemene
      # step is number of iterations
499 73 equemene
      CLLaunch=MetropolisCL.MainLoopGlobal(queue,(jobs,),None,
500 73 equemene
                                           circleCL,
501 73 equemene
                                           numpy.uint64(iterationsCL),
502 73 equemene
                                           numpy.uint32(nprnd(2**30/jobs)),
503 73 equemene
                                           numpy.uint32(nprnd(2**30/jobs)))
504 17 equemene
      print "%s with (WorkItems/Threads)=(%i,%i) %s method done" % \
505 17 equemene
            (Alu,jobs,1,ParaStyle)
506 7 equemene
    elif ParaStyle=='Hybrid':
507 17 equemene
      threads=BestThreadsNumber(jobs)
508 7 equemene
      # en OpenCL, necessaire de mettre un Global_id identique au local_id
509 73 equemene
      CLLaunch=MetropolisCL.MainLoopHybrid(queue,(jobs,),(threads,),
510 73 equemene
                                          circleCL,
511 73 equemene
                                          numpy.uint64(iterationsCL),
512 73 equemene
                                          numpy.uint32(nprnd(2**30/jobs)),
513 73 equemene
                                          numpy.uint32(nprnd(2**30/jobs)))
514 50 equemene
515 17 equemene
      print "%s with (WorkItems/Threads)=(%i,%i) %s method done" % \
516 17 equemene
            (Alu,jobs/threads,threads,ParaStyle)
517 7 equemene
    else:
518 7 equemene
      # en OpenCL, necessaire de mettre un Global_id identique au local_id
519 73 equemene
      CLLaunch=MetropolisCL.MainLoopLocal(queue,(jobs,),(jobs,),
520 73 equemene
                                          circleCL,
521 73 equemene
                                          numpy.uint64(iterationsCL),
522 73 equemene
                                          numpy.uint32(nprnd(2**30/jobs)),
523 73 equemene
                                          numpy.uint32(nprnd(2**30/jobs)))
524 7 equemene
      print "%s with %i %s done" % (Alu,jobs,ParaStyle)
525 7 equemene
526 7 equemene
    CLLaunch.wait()
527 7 equemene
    cl.enqueue_copy(queue, circle, circleCL).wait()
528 7 equemene
529 7 equemene
    elapsed = 1e-9*(CLLaunch.profile.end - CLLaunch.profile.start)
530 7 equemene
531 71 equemene
    print circle,numpy.mean(circle),numpy.median(circle),numpy.std(circle)
532 7 equemene
    MyDuration[i]=elapsed
533 49 equemene
    AllPi=4./numpy.float32(iterationsCL)*circle.astype(numpy.float32)
534 49 equemene
    MyPi[i]=numpy.median(AllPi)
535 49 equemene
    print MyPi[i],numpy.std(AllPi),MyDuration[i]
536 7 equemene
537 7 equemene
  circleCL.release()
538 7 equemene
539 75 equemene
  print jobs,numpy.mean(MyDuration),numpy.median(MyDuration),numpy.std(MyDuration),numpy.mean(Iterations/MyDuration),numpy.median(Iterations/MyDuration),numpy.std(Iterations/MyDuration)
540 7 equemene
541 75 equemene
  return(numpy.mean(MyDuration),numpy.median(MyDuration),numpy.std(MyDuration),numpy.mean(Iterations/MyDuration),numpy.median(Iterations/MyDuration),numpy.std(Iterations/MyDuration))
542 7 equemene
543 7 equemene
544 7 equemene
def FitAndPrint(N,D,Curves):
545 7 equemene
546 55 equemene
  from scipy.optimize import curve_fit
547 55 equemene
  import matplotlib.pyplot as plt
548 55 equemene
549 7 equemene
  try:
550 7 equemene
    coeffs_Amdahl, matcov_Amdahl = curve_fit(Amdahl, N, D)
551 7 equemene
552 7 equemene
    D_Amdahl=Amdahl(N,coeffs_Amdahl[0],coeffs_Amdahl[1],coeffs_Amdahl[2])
553 7 equemene
    coeffs_Amdahl[1]=coeffs_Amdahl[1]*coeffs_Amdahl[0]/D[0]
554 7 equemene
    coeffs_Amdahl[2]=coeffs_Amdahl[2]*coeffs_Amdahl[0]/D[0]
555 7 equemene
    coeffs_Amdahl[0]=D[0]
556 7 equemene
    print "Amdahl Normalized: T=%.2f(%.6f+%.6f/N)" % \
557 7 equemene
        (coeffs_Amdahl[0],coeffs_Amdahl[1],coeffs_Amdahl[2])
558 7 equemene
  except:
559 7 equemene
    print "Impossible to fit for Amdahl law : only %i elements" % len(D)
560 7 equemene
561 7 equemene
  try:
562 7 equemene
    coeffs_AmdahlR, matcov_AmdahlR = curve_fit(AmdahlR, N, D)
563 7 equemene
564 7 equemene
    D_AmdahlR=AmdahlR(N,coeffs_AmdahlR[0],coeffs_AmdahlR[1])
565 7 equemene
    coeffs_AmdahlR[1]=coeffs_AmdahlR[1]*coeffs_AmdahlR[0]/D[0]
566 7 equemene
    coeffs_AmdahlR[0]=D[0]
567 7 equemene
    print "Amdahl Reduced Normalized: T=%.2f(%.6f+%.6f/N)" % \
568 7 equemene
        (coeffs_AmdahlR[0],1-coeffs_AmdahlR[1],coeffs_AmdahlR[1])
569 7 equemene
570 7 equemene
  except:
571 7 equemene
    print "Impossible to fit for Reduced Amdahl law : only %i elements" % len(D)
572 7 equemene
573 7 equemene
  try:
574 7 equemene
    coeffs_Mylq, matcov_Mylq = curve_fit(Mylq, N, D)
575 7 equemene
576 7 equemene
    coeffs_Mylq[1]=coeffs_Mylq[1]*coeffs_Mylq[0]/D[0]
577 45 equemene
    # coeffs_Mylq[2]=coeffs_Mylq[2]*coeffs_Mylq[0]/D[0]
578 7 equemene
    coeffs_Mylq[3]=coeffs_Mylq[3]*coeffs_Mylq[0]/D[0]
579 7 equemene
    coeffs_Mylq[0]=D[0]
580 45 equemene
    print "Mylq Normalized : T=%.2f(%.6f+%.6f/N)+%.6f*N" % (coeffs_Mylq[0],
581 7 equemene
                                                            coeffs_Mylq[1],
582 45 equemene
                                                            coeffs_Mylq[3],
583 45 equemene
                                                            coeffs_Mylq[2])
584 7 equemene
    D_Mylq=Mylq(N,coeffs_Mylq[0],coeffs_Mylq[1],coeffs_Mylq[2],
585 7 equemene
                coeffs_Mylq[3])
586 7 equemene
  except:
587 7 equemene
    print "Impossible to fit for Mylq law : only %i elements" % len(D)
588 7 equemene
589 7 equemene
  try:
590 7 equemene
    coeffs_Mylq2, matcov_Mylq2 = curve_fit(Mylq2, N, D)
591 7 equemene
592 7 equemene
    coeffs_Mylq2[1]=coeffs_Mylq2[1]*coeffs_Mylq2[0]/D[0]
593 45 equemene
    # coeffs_Mylq2[2]=coeffs_Mylq2[2]*coeffs_Mylq2[0]/D[0]
594 45 equemene
    # coeffs_Mylq2[3]=coeffs_Mylq2[3]*coeffs_Mylq2[0]/D[0]
595 7 equemene
    coeffs_Mylq2[4]=coeffs_Mylq2[4]*coeffs_Mylq2[0]/D[0]
596 7 equemene
    coeffs_Mylq2[0]=D[0]
597 45 equemene
    print "Mylq 2nd order Normalized: T=%.2f(%.6f+%.6f/N)+%.6f*N+%.6f*N^2" % \
598 45 equemene
        (coeffs_Mylq2[0],coeffs_Mylq2[1],
599 45 equemene
         coeffs_Mylq2[4],coeffs_Mylq2[2],coeffs_Mylq2[3])
600 7 equemene
601 7 equemene
  except:
602 7 equemene
    print "Impossible to fit for 2nd order Mylq law : only %i elements" % len(D)
603 7 equemene
604 7 equemene
  if Curves:
605 7 equemene
    plt.xlabel("Number of Threads/work Items")
606 7 equemene
    plt.ylabel("Total Elapsed Time")
607 7 equemene
608 7 equemene
    Experience,=plt.plot(N,D,'ro')
609 7 equemene
    try:
610 7 equemene
      pAmdahl,=plt.plot(N,D_Amdahl,label="Loi de Amdahl")
611 7 equemene
      pMylq,=plt.plot(N,D_Mylq,label="Loi de Mylq")
612 7 equemene
    except:
613 7 equemene
      print "Fit curves seem not to be available"
614 7 equemene
615 7 equemene
    plt.legend()
616 7 equemene
    plt.show()
617 7 equemene
618 7 equemene
if __name__=='__main__':
619 7 equemene
620 7 equemene
  # Set defaults values
621 55 equemene
622 55 equemene
  # Alu can be CPU, GPU or ACCELERATOR
623 7 equemene
  Alu='CPU'
624 46 equemene
  # Id of GPU : 1 is for first find !
625 17 equemene
  Device=0
626 7 equemene
  # GPU style can be Cuda (Nvidia implementation) or OpenCL
627 7 equemene
  GpuStyle='OpenCL'
628 7 equemene
  # Parallel distribution can be on Threads or Blocks
629 7 equemene
  ParaStyle='Blocks'
630 7 equemene
  # Iterations is integer
631 7 equemene
  Iterations=100000000
632 7 equemene
  # JobStart in first number of Jobs to explore
633 7 equemene
  JobStart=1
634 7 equemene
  # JobEnd is last number of Jobs to explore
635 7 equemene
  JobEnd=16
636 47 equemene
  # JobStep is the step of Jobs to explore
637 47 equemene
  JobStep=1
638 7 equemene
  # Redo is the times to redo the test to improve metrology
639 7 equemene
  Redo=1
640 7 equemene
  # OutMetrology is method for duration estimation : False is GPU inside
641 7 equemene
  OutMetrology=False
642 45 equemene
  Metrology='InMetro'
643 7 equemene
  # Curves is True to print the curves
644 7 equemene
  Curves=False
645 55 equemene
  # Fit is True to print the curves
646 55 equemene
  Fit=False
647 72 equemene
  # Marsaglia RNG
648 72 equemene
  RNG='KISS'
649 72 equemene
  # Value type : INT32, INT64, FP32, FP64
650 72 equemene
  ValueType='INT32'
651 72 equemene
652 7 equemene
  try:
653 72 equemene
    opts, args = getopt.getopt(sys.argv[1:],"hocfa:g:p:i:s:e:t:r:d:m:v:",["alu=","gpustyle=","parastyle=","iterations=","jobstart=","jobend=","jobstep=","redo=","device=","marsaglia=","valuetype="])
654 7 equemene
  except getopt.GetoptError:
655 72 equemene
    print '%s -o (Out of Core Metrology) -c (Print Curves) -f (Fit to Amdahl Law) -a <CPU/GPU/ACCELERATOR> -d <DeviceId> -g <CUDA/OpenCL> -p <Threads/Hybrid/Blocks> -i <Iterations> -s <JobStart> -e <JobEnd> -t <JobStep> -r <RedoToImproveStats> -m <SHR3/CONG/MWC/KISS> -v <INT32/INT64/FP32/FP64> ' % sys.argv[0]
656 7 equemene
    sys.exit(2)
657 7 equemene
658 7 equemene
  for opt, arg in opts:
659 7 equemene
    if opt == '-h':
660 72 equemene
      print '%s -o (Out of Core Metrology) -c (Print Curves) -f (Fit to Amdahl Law) -a <CPU/GPU/ACCELERATOR> -d <DeviceId> -g <CUDA/OpenCL> -p <Threads/Hybrid/Blocks> -i <Iterations> -s <JobStart> -e <JobEnd> -t <JobStep> -r <RedoToImproveStats> -m <SHR3/CONG/MWC/KISS> -v <INT32/INT64/FP32/FP64>' % sys.argv[0]
661 46 equemene
662 46 equemene
      print "\nInformations about devices detected under OpenCL:"
663 46 equemene
      # For PyOpenCL import
664 55 equemene
      try:
665 55 equemene
        import pyopencl as cl
666 55 equemene
        Id=1
667 55 equemene
        for platform in cl.get_platforms():
668 55 equemene
          for device in platform.get_devices():
669 55 equemene
            deviceType=cl.device_type.to_string(device.type)
670 73 equemene
            print "Device #%i from %s of type %s : %s" % (Id,platform.vendor.lstrip(),deviceType,device.name.lstrip())
671 55 equemene
            Id=Id+1
672 46 equemene
673 55 equemene
        print
674 55 equemene
        sys.exit()
675 55 equemene
      except ImportError:
676 55 equemene
        print "Your platform does not seem to support OpenCL"
677 55 equemene
678 7 equemene
    elif opt == '-o':
679 7 equemene
      OutMetrology=True
680 45 equemene
      Metrology='OutMetro'
681 7 equemene
    elif opt == '-c':
682 7 equemene
      Curves=True
683 55 equemene
    elif opt == '-f':
684 55 equemene
      Fit=True
685 7 equemene
    elif opt in ("-a", "--alu"):
686 7 equemene
      Alu = arg
687 7 equemene
    elif opt in ("-d", "--device"):
688 7 equemene
      Device = int(arg)
689 7 equemene
    elif opt in ("-g", "--gpustyle"):
690 7 equemene
      GpuStyle = arg
691 7 equemene
    elif opt in ("-p", "--parastyle"):
692 7 equemene
      ParaStyle = arg
693 72 equemene
    elif opt in ("-m", "--marsaglia"):
694 72 equemene
      RNG = arg
695 72 equemene
    elif opt in ("-v", "--valuetype"):
696 72 equemene
      ValueType = arg
697 7 equemene
    elif opt in ("-i", "--iterations"):
698 40 equemene
      Iterations = numpy.uint64(arg)
699 7 equemene
    elif opt in ("-s", "--jobstart"):
700 7 equemene
      JobStart = int(arg)
701 7 equemene
    elif opt in ("-e", "--jobend"):
702 7 equemene
      JobEnd = int(arg)
703 47 equemene
    elif opt in ("-t", "--jobstep"):
704 47 equemene
      JobStep = int(arg)
705 7 equemene
    elif opt in ("-r", "--redo"):
706 7 equemene
      Redo = int(arg)
707 7 equemene
708 7 equemene
  if Alu=='CPU' and GpuStyle=='CUDA':
709 7 equemene
    print "Alu can't be CPU for CUDA, set Alu to GPU"
710 7 equemene
    Alu='GPU'
711 7 equemene
712 7 equemene
  if ParaStyle not in ('Blocks','Threads','Hybrid'):
713 7 equemene
    print "%s not exists, ParaStyle set as Threads !" % ParaStyle
714 7 equemene
    ParaStyle='Threads'
715 7 equemene
716 7 equemene
  print "Compute unit : %s" % Alu
717 7 equemene
  print "Device Identification : %s" % Device
718 7 equemene
  print "GpuStyle used : %s" % GpuStyle
719 7 equemene
  print "Parallel Style used : %s" % ParaStyle
720 7 equemene
  print "Iterations : %s" % Iterations
721 7 equemene
  print "Number of threads on start : %s" % JobStart
722 7 equemene
  print "Number of threads on end : %s" % JobEnd
723 7 equemene
  print "Number of redo : %s" % Redo
724 7 equemene
  print "Metrology done out of CPU/GPU : %r" % OutMetrology
725 72 equemene
  print "Type of Marsaglia RNG used : %s" % RNG
726 72 equemene
  print "Type of variable : %s" % ValueType
727 7 equemene
728 7 equemene
  if GpuStyle=='CUDA':
729 55 equemene
    try:
730 55 equemene
      # For PyCUDA import
731 55 equemene
      import pycuda.driver as cuda
732 55 equemene
      import pycuda.gpuarray as gpuarray
733 55 equemene
      import pycuda.autoinit
734 55 equemene
      from pycuda.compiler import SourceModule
735 55 equemene
    except ImportError:
736 55 equemene
      print "Platform does not seem to support CUDA"
737 7 equemene
738 7 equemene
  if GpuStyle=='OpenCL':
739 55 equemene
    try:
740 55 equemene
      # For PyOpenCL import
741 55 equemene
      import pyopencl as cl
742 55 equemene
      Id=1
743 55 equemene
      for platform in cl.get_platforms():
744 55 equemene
        for device in platform.get_devices():
745 55 equemene
          deviceType=cl.device_type.to_string(device.type)
746 73 equemene
          print "Device #%i from %s of type %s : %s" % (Id,platform.vendor.lstrip(),deviceType,device.name.lstrip())
747 73 equemene
748 55 equemene
          if Id == Device:
749 55 equemene
            # Set the Alu as detected Device Type
750 55 equemene
            Alu=deviceType
751 55 equemene
          Id=Id+1
752 55 equemene
    except ImportError:
753 55 equemene
      print "Platform does not seem to support CUDA"
754 55 equemene
755 7 equemene
  average=numpy.array([]).astype(numpy.float32)
756 7 equemene
  median=numpy.array([]).astype(numpy.float32)
757 7 equemene
  stddev=numpy.array([]).astype(numpy.float32)
758 75 equemene
  averageRate=numpy.array([]).astype(numpy.float32)
759 75 equemene
  medianRate=numpy.array([]).astype(numpy.float32)
760 75 equemene
  stddevRate=numpy.array([]).astype(numpy.float32)
761 7 equemene
762 7 equemene
  ExploredJobs=numpy.array([]).astype(numpy.uint32)
763 7 equemene
764 7 equemene
  Jobs=JobStart
765 7 equemene
766 7 equemene
  while Jobs <= JobEnd:
767 7 equemene
    avg,med,std=0,0,0
768 7 equemene
    ExploredJobs=numpy.append(ExploredJobs,Jobs)
769 17 equemene
    circle=numpy.zeros(Jobs).astype(numpy.uint64)
770 7 equemene
771 75 equemene
    if OutMetrology:
772 7 equemene
      duration=numpy.array([]).astype(numpy.float32)
773 75 equemene
      rate=numpy.array([]).astype(numpy.float32)
774 7 equemene
      for i in range(Redo):
775 7 equemene
        start=time.time()
776 7 equemene
        if GpuStyle=='CUDA':
777 7 equemene
          try:
778 75 equemene
            a,m,s,aR,mR,sR=MetropolisCuda(circle,Iterations,1,Jobs,ParaStyle,RNG,ValueType)
779 7 equemene
          except:
780 7 equemene
            print "Problem with %i // computations on Cuda" % Jobs
781 7 equemene
        elif GpuStyle=='OpenCL':
782 7 equemene
          try:
783 75 equemene
            a,m,s,aR,mR,sR=MetropolisOpenCL(circle,Iterations,1,Jobs,ParaStyle,Alu,Device,RNG,ValueType)
784 7 equemene
          except:
785 7 equemene
            print "Problem with %i // computations on OpenCL" % Jobs
786 7 equemene
        duration=numpy.append(duration,time.time()-start)
787 75 equemene
        rate=numpy.append(rate,Iterations/(time.time()-start))
788 75 equemene
      if (a,m,s) != (0,0,0):
789 46 equemene
        avg=numpy.mean(duration)
790 46 equemene
        med=numpy.median(duration)
791 46 equemene
        std=numpy.std(duration)
792 75 equemene
        avgR=numpy.mean(Iterations/duration)
793 75 equemene
        medR=numpy.median(Iterations/duration)
794 75 equemene
        stdR=numpy.std(Iterations/duration)
795 46 equemene
      else:
796 46 equemene
        print "Values seem to be wrong..."
797 7 equemene
    else:
798 7 equemene
      if GpuStyle=='CUDA':
799 7 equemene
        try:
800 75 equemene
          avg,med,std,avgR,medR,stdR=MetropolisCuda(circle,Iterations,Redo,Jobs,ParaStyle,RNG,ValueType)
801 7 equemene
        except:
802 7 equemene
          print "Problem with %i // computations on Cuda" % Jobs
803 7 equemene
      elif GpuStyle=='OpenCL':
804 55 equemene
        try:
805 75 equemene
          avg,med,std,avgR,medR,stdR=MetropolisOpenCL(circle,Iterations,Redo,Jobs,ParaStyle,Alu,Device,RNG,ValueType)
806 55 equemene
        except:
807 55 equemene
          print "Problem with %i // computations on OpenCL" % Jobs
808 7 equemene
809 7 equemene
    if (avg,med,std) != (0,0,0):
810 15 equemene
      print "jobs,avg,med,std",Jobs,avg,med,std
811 7 equemene
      average=numpy.append(average,avg)
812 7 equemene
      median=numpy.append(median,med)
813 7 equemene
      stddev=numpy.append(stddev,std)
814 75 equemene
      averageRate=numpy.append(averageRate,avgR)
815 75 equemene
      medianRate=numpy.append(medianRate,medR)
816 75 equemene
      stddevRate=numpy.append(stddevRate,stdR)
817 7 equemene
    else:
818 7 equemene
      print "Values seem to be wrong..."
819 7 equemene
    #THREADS*=2
820 46 equemene
    if len(average)!=0:
821 75 equemene
      averageRate=averageRate.astype(int)
822 75 equemene
      medianRate=medianRate.astype(int)
823 75 equemene
      stddevRate=stddevRate.astype(int)
824 75 equemene
      numpy.savez("Pi_%s_%s_%s_%s_%s_%s_%i_%.8i_Device%i_%s_%s" % (ValueType,RNG,Alu,GpuStyle,ParaStyle,JobStart,JobEnd,Iterations,Device,Metrology,gethostname()),(ExploredJobs,average,median,stddev,averageRate,medianRate,stddevRate))
825 75 equemene
      ToSave=[ ExploredJobs,average,median,stddev,averageRate,medianRate,stddevRate ]
826 75 equemene
      numpy.savetxt("Pi_%s_%s_%s_%s_%s_%s_%i_%.8i_Device%i_%s_%s" % (ValueType,RNG,Alu,GpuStyle,ParaStyle,JobStart,JobEnd,Iterations,Device,Metrology,gethostname()),numpy.transpose(ToSave),fmt='%i %e %e %e %i %i %i')
827 47 equemene
    Jobs+=JobStep
828 7 equemene
829 55 equemene
  if Fit:
830 55 equemene
    FitAndPrint(ExploredJobs,median,Curves)