Révision 50
Pi/GPU/Pi-GPU.py (revision 50) | ||
---|---|---|
148 | 148 |
__syncthreads(); |
149 | 149 |
|
150 | 150 |
} |
151 |
|
|
152 |
__global__ void MainLoopBlocks64(ulong *s,ulong iterations,uint seed_w,uint seed_z) |
|
153 |
{ |
|
154 |
uint z=seed_z/(blockIdx.x+1); |
|
155 |
uint w=seed_w/(blockIdx.x+1); |
|
156 |
|
|
157 |
ulong total=0; |
|
158 |
|
|
159 |
for (ulong i=0;i<iterations;i++) { |
|
160 |
|
|
161 |
double x=(double)MWCfp ; |
|
162 |
double y=(double)MWCfp ; |
|
163 |
|
|
164 |
// Matching test |
|
165 |
ulong inside=((x*x+y*y) < 1.0f) ? 1:0; |
|
166 |
total+=inside; |
|
167 |
|
|
168 |
} |
|
169 |
|
|
170 |
s[blockIdx.x]=total; |
|
171 |
__syncthreads(); |
|
172 |
|
|
173 |
} |
|
174 |
|
|
175 |
__global__ void MainLoopThreads64(ulong *s,ulong iterations,uint seed_w,uint seed_z) |
|
176 |
{ |
|
177 |
uint z=seed_z/(threadIdx.x+1); |
|
178 |
uint w=seed_w/(threadIdx.x+1); |
|
179 |
|
|
180 |
ulong total=0; |
|
181 |
|
|
182 |
for (ulong i=0;i<iterations;i++) { |
|
183 |
|
|
184 |
double x=(double)MWCfp ; |
|
185 |
double y=(double)MWCfp ; |
|
186 |
|
|
187 |
// Matching test |
|
188 |
ulong inside=((x*x+y*y) < 1.0f) ? 1:0; |
|
189 |
total+=inside; |
|
190 |
|
|
191 |
} |
|
192 |
|
|
193 |
s[threadIdx.x]=total; |
|
194 |
__syncthreads(); |
|
195 |
|
|
196 |
} |
|
197 |
|
|
198 |
__global__ void MainLoopHybrid64(ulong *s,ulong iterations,uint seed_w,uint seed_z) |
|
199 |
{ |
|
200 |
uint z=seed_z/(blockDim.x*blockIdx.x+threadIdx.x+1); |
|
201 |
uint w=seed_w/(blockDim.x*blockIdx.x+threadIdx.x+1); |
|
202 |
|
|
203 |
ulong total=0; |
|
204 |
|
|
205 |
for (ulong i=0;i<iterations;i++) { |
|
206 |
|
|
207 |
double x=(double)MWCfp ; |
|
208 |
double y=(double)MWCfp ; |
|
209 |
|
|
210 |
// Matching test |
|
211 |
ulong inside=((x*x+y*y) < 1.0f) ? 1:0; |
|
212 |
total+=inside; |
|
213 |
|
|
214 |
} |
|
215 |
|
|
216 |
s[blockDim.x*blockIdx.x+threadIdx.x]=total; |
|
217 |
__syncthreads(); |
|
218 |
|
|
219 |
} |
|
151 | 220 |
""" |
152 | 221 |
|
153 | 222 |
KERNEL_CODE_OPENCL=""" |
223 |
#pragma OPENCL EXTENSION cl_khr_fp64: enable |
|
154 | 224 |
|
155 | 225 |
// Marsaglia RNG very simple implementation |
156 | 226 |
#define znew ((z=36969*(z&65535)+(z>>16))<<16) |
... | ... | |
225 | 295 |
s[get_group_id(0)*get_num_groups(0)+get_local_id(0)]=total; |
226 | 296 |
|
227 | 297 |
} |
298 |
|
|
299 |
__kernel void MainLoopGlobal64(__global ulong *s,ulong iterations,uint seed_w,uint seed_z) |
|
300 |
{ |
|
301 |
uint z=seed_z/(get_global_id(0)+1); |
|
302 |
uint w=seed_w/(get_global_id(0)+1); |
|
303 |
|
|
304 |
ulong total=0; |
|
305 |
|
|
306 |
for (ulong i=0;i<iterations;i++) { |
|
307 |
|
|
308 |
double x=(double)MWCfp ; |
|
309 |
double y=(double)MWCfp ; |
|
310 |
|
|
311 |
// Matching test |
|
312 |
ulong inside=((x*x+y*y) < 1.0f) ? 1:0; |
|
313 |
total+=inside; |
|
314 |
} |
|
315 |
s[get_global_id(0)]=total; |
|
316 |
barrier(CLK_GLOBAL_MEM_FENCE); |
|
317 |
|
|
318 |
} |
|
319 |
|
|
320 |
__kernel void MainLoopLocal64(__global ulong *s,ulong iterations,uint seed_w,uint seed_z) |
|
321 |
{ |
|
322 |
uint z=seed_z/(get_local_id(0)+1); |
|
323 |
uint w=seed_w/(get_local_id(0)+1); |
|
324 |
|
|
325 |
ulong total=0; |
|
326 |
|
|
327 |
for (ulong i=0;i<iterations;i++) { |
|
328 |
|
|
329 |
double x=(double)MWCfp ; |
|
330 |
double y=(double)MWCfp ; |
|
331 |
|
|
332 |
// Matching test |
|
333 |
ulong inside=((x*x+y*y) < 1.0f) ? 1:0; |
|
334 |
total+=inside; |
|
335 |
} |
|
336 |
s[get_local_id(0)]=total; |
|
337 |
barrier(CLK_LOCAL_MEM_FENCE); |
|
338 |
|
|
339 |
} |
|
340 |
|
|
341 |
__kernel void MainLoopHybrid64(__global ulong *s,ulong iterations,uint seed_w,uint seed_z) |
|
342 |
{ |
|
343 |
uint z=seed_z/(get_group_id(0)*get_num_groups(0)+get_local_id(0)+1); |
|
344 |
uint w=seed_w/(get_group_id(0)*get_num_groups(0)+get_local_id(0)+1); |
|
345 |
|
|
346 |
ulong total=0; |
|
347 |
|
|
348 |
for (uint i=0;i<iterations;i++) { |
|
349 |
|
|
350 |
double x=(double)MWCfp ; |
|
351 |
double y=(double)MWCfp ; |
|
352 |
|
|
353 |
// Matching test |
|
354 |
ulong inside=((x*x+y*y) < 1.0f) ? 1:0; |
|
355 |
total+=inside; |
|
356 |
} |
|
357 |
barrier(CLK_LOCAL_MEM_FENCE); |
|
358 |
s[get_group_id(0)*get_num_groups(0)+get_local_id(0)]=total; |
|
359 |
|
|
360 |
} |
|
228 | 361 |
""" |
229 | 362 |
|
230 |
def MetropolisCuda(circle,iterations,steps,jobs,ParaStyle): |
|
363 |
def MetropolisCuda(circle,iterations,steps,jobs,ParaStyle,DoublePrecision):
|
|
231 | 364 |
|
232 | 365 |
# Avec PyCUDA autoinit, rien a faire ! |
233 | 366 |
|
... | ... | |
238 | 371 |
MetropolisBlocksCU=mod.get_function("MainLoopBlocks") |
239 | 372 |
MetropolisJobsCU=mod.get_function("MainLoopThreads") |
240 | 373 |
MetropolisHybridCU=mod.get_function("MainLoopHybrid") |
374 |
MetropolisBlocks64CU=mod.get_function("MainLoopBlocks64") |
|
375 |
MetropolisJobs64CU=mod.get_function("MainLoopThreads64") |
|
376 |
MetropolisHybrid64CU=mod.get_function("MainLoopHybrid64") |
|
241 | 377 |
|
242 | 378 |
start = pycuda.driver.Event() |
243 | 379 |
stop = pycuda.driver.Event() |
244 | 380 |
|
245 | 381 |
MyPi=numpy.zeros(steps) |
246 | 382 |
MyDuration=numpy.zeros(steps) |
247 |
|
|
383 |
|
|
248 | 384 |
if iterations%jobs==0: |
249 | 385 |
iterationsCL=numpy.uint64(iterations/jobs) |
250 | 386 |
iterationsNew=iterationsCL*jobs |
... | ... | |
256 | 392 |
start.record() |
257 | 393 |
start.synchronize() |
258 | 394 |
if ParaStyle=='Blocks': |
259 |
MetropolisBlocksCU(circleCU, |
|
260 |
numpy.uint64(iterationsCL), |
|
261 |
numpy.uint32(nprnd(2**30/jobs)), |
|
262 |
numpy.uint32(nprnd(2**30/jobs)), |
|
263 |
grid=(jobs,1), |
|
264 |
block=(1,1,1)) |
|
395 |
if DoublePrecision: |
|
396 |
MetropolisBlocksCU(circleCU, |
|
397 |
numpy.uint64(iterationsCL), |
|
398 |
numpy.uint32(nprnd(2**30/jobs)), |
|
399 |
numpy.uint32(nprnd(2**30/jobs)), |
|
400 |
grid=(jobs,1), |
|
401 |
block=(1,1,1)) |
|
402 |
else: |
|
403 |
MetropolisBlocks64CU(circleCU, |
|
404 |
numpy.uint64(iterationsCL), |
|
405 |
numpy.uint32(nprnd(2**30/jobs)), |
|
406 |
numpy.uint32(nprnd(2**30/jobs)), |
|
407 |
grid=(jobs,1), |
|
408 |
block=(1,1,1)) |
|
409 |
|
|
265 | 410 |
print "%s with (WorkItems/Threads)=(%i,%i) %s method done" % \ |
266 | 411 |
(Alu,jobs,1,ParaStyle) |
267 | 412 |
elif ParaStyle=='Hybrid': |
268 | 413 |
threads=BestThreadsNumber(jobs) |
269 |
MetropolisHybridCU(circleCU, |
|
270 |
numpy.uint64(iterationsCL), |
|
271 |
numpy.uint32(nprnd(2**30/jobs)), |
|
272 |
numpy.uint32(nprnd(2**30/jobs)), |
|
273 |
grid=(jobs,1), |
|
274 |
block=(threads,1,1)) |
|
414 |
if DoublePrecision: |
|
415 |
MetropolisHybrid64CU(circleCU, |
|
416 |
numpy.uint64(iterationsCL), |
|
417 |
numpy.uint32(nprnd(2**30/jobs)), |
|
418 |
numpy.uint32(nprnd(2**30/jobs)), |
|
419 |
grid=(jobs,1), |
|
420 |
block=(threads,1,1)) |
|
421 |
else: |
|
422 |
MetropolisHybridCU(circleCU, |
|
423 |
numpy.uint64(iterationsCL), |
|
424 |
numpy.uint32(nprnd(2**30/jobs)), |
|
425 |
numpy.uint32(nprnd(2**30/jobs)), |
|
426 |
grid=(jobs,1), |
|
427 |
block=(threads,1,1)) |
|
275 | 428 |
print "%s with (WorkItems/Threads)=(%i,%i) %s method done" % \ |
276 | 429 |
(Alu,jobs/threads,threads,ParaStyle) |
277 | 430 |
else: |
278 |
MetropolisJobsCU(circleCU, |
|
279 |
numpy.uint64(iterationsCL), |
|
280 |
numpy.uint32(nprnd(2**30/jobs)), |
|
281 |
numpy.uint32(nprnd(2**30/jobs)), |
|
282 |
grid=(1,1), |
|
283 |
block=(jobs,1,1)) |
|
431 |
if DoublePrecision: |
|
432 |
MetropolisJobs64CU(circleCU, |
|
433 |
numpy.uint64(iterationsCL), |
|
434 |
numpy.uint32(nprnd(2**30/jobs)), |
|
435 |
numpy.uint32(nprnd(2**30/jobs)), |
|
436 |
grid=(1,1), |
|
437 |
block=(jobs,1,1)) |
|
438 |
else: |
|
439 |
MetropolisJobsCU(circleCU, |
|
440 |
numpy.uint64(iterationsCL), |
|
441 |
numpy.uint32(nprnd(2**30/jobs)), |
|
442 |
numpy.uint32(nprnd(2**30/jobs)), |
|
443 |
grid=(1,1), |
|
444 |
block=(jobs,1,1)) |
|
284 | 445 |
print "%s with (WorkItems/Threads)=(%i,%i) %s method done" % \ |
285 | 446 |
(Alu,jobs,1,ParaStyle) |
286 | 447 |
stop.record() |
287 | 448 |
stop.synchronize() |
288 | 449 |
|
289 |
#elapsed = stop.time_since(start)*1e-3 |
|
290 | 450 |
elapsed = start.time_till(stop)*1e-3 |
291 | 451 |
|
292 |
#print circle,float(numpy.sum(circle)) |
|
293 |
MyPi[i]=4.*float(numpy.sum(circle))/float(iterationsCL) |
|
294 | 452 |
MyDuration[i]=elapsed |
295 |
#print MyPi[i],MyDuration[i] |
|
296 |
#time.sleep(1) |
|
453 |
AllPi=4./numpy.float32(iterationsCL)*circle.astype(numpy.float32) |
|
454 |
MyPi[i]=numpy.median(AllPi) |
|
455 |
print MyPi[i],numpy.std(AllPi),MyDuration[i] |
|
297 | 456 |
|
457 |
|
|
298 | 458 |
print jobs,numpy.mean(MyDuration),numpy.median(MyDuration),numpy.std(MyDuration) |
299 | 459 |
|
300 | 460 |
return(numpy.mean(MyDuration),numpy.median(MyDuration),numpy.std(MyDuration)) |
301 | 461 |
|
302 | 462 |
|
303 |
def MetropolisOpenCL(circle,iterations,steps,jobs,ParaStyle,Alu,Device): |
|
463 |
def MetropolisOpenCL(circle,iterations,steps,jobs,ParaStyle,Alu,Device, |
|
464 |
DoublePrecision): |
|
304 | 465 |
|
305 | 466 |
# Initialisation des variables en les CASTant correctement |
306 | 467 |
|
... | ... | |
338 | 499 |
return(0,0,0) |
339 | 500 |
|
340 | 501 |
# Je cree le contexte et la queue pour son execution |
341 |
#ctx = cl.create_some_context() |
|
342 | 502 |
ctx = cl.Context([XPU]) |
343 | 503 |
queue = cl.CommandQueue(ctx, |
344 | 504 |
properties=cl.command_queue_properties.PROFILING_ENABLE) |
... | ... | |
351 | 511 |
MetropolisCL = cl.Program(ctx,KERNEL_CODE_OPENCL).build( \ |
352 | 512 |
options = "-cl-mad-enable -cl-fast-relaxed-math") |
353 | 513 |
|
354 |
#MetropolisCL = cl.Program(ctx,KERNEL_CODE_OPENCL).build() |
|
355 |
|
|
356 | 514 |
i=0 |
357 | 515 |
|
358 | 516 |
MyPi=numpy.zeros(steps) |
... | ... | |
375 | 533 |
# SeedZCL is lattice translated in CL format |
376 | 534 |
# SeedWCL is lattice translated in CL format |
377 | 535 |
# step is number of iterations |
378 |
CLLaunch=MetropolisCL.MainLoopGlobal(queue,(jobs,),None, |
|
379 |
circleCL, |
|
380 |
numpy.uint64(iterationsCL), |
|
381 |
numpy.uint32(nprnd(2**30/jobs)), |
|
382 |
numpy.uint32(nprnd(2**30/jobs))) |
|
536 |
if DoublePrecision: |
|
537 |
CLLaunch=MetropolisCL.MainLoopGlobal64(queue,(jobs,),None, |
|
538 |
circleCL, |
|
539 |
numpy.uint64(iterationsCL), |
|
540 |
numpy.uint32(nprnd(2**30/jobs)), |
|
541 |
numpy.uint32(nprnd(2**30/jobs))) |
|
542 |
else: |
|
543 |
CLLaunch=MetropolisCL.MainLoopGlobal(queue,(jobs,),None, |
|
544 |
circleCL, |
|
545 |
numpy.uint64(iterationsCL), |
|
546 |
numpy.uint32(nprnd(2**30/jobs)), |
|
547 |
numpy.uint32(nprnd(2**30/jobs))) |
|
383 | 548 |
print "%s with (WorkItems/Threads)=(%i,%i) %s method done" % \ |
384 | 549 |
(Alu,jobs,1,ParaStyle) |
385 | 550 |
elif ParaStyle=='Hybrid': |
386 | 551 |
threads=BestThreadsNumber(jobs) |
387 | 552 |
# en OpenCL, necessaire de mettre un Global_id identique au local_id |
388 |
CLLaunch=MetropolisCL.MainLoopHybrid(queue,(jobs,),(threads,), |
|
389 |
circleCL, |
|
390 |
numpy.uint64(iterationsCL), |
|
391 |
numpy.uint32(nprnd(2**30/jobs)), |
|
392 |
numpy.uint32(nprnd(2**30/jobs))) |
|
553 |
if DoublePrecision: |
|
554 |
CLLaunch=MetropolisCL.MainLoopHybrid64(queue,(jobs,),(threads,), |
|
555 |
circleCL, |
|
556 |
numpy.uint64(iterationsCL), |
|
557 |
numpy.uint32(nprnd(2**30/jobs)), |
|
558 |
numpy.uint32(nprnd(2**30/jobs))) |
|
559 |
else: |
|
560 |
CLLaunch=MetropolisCL.MainLoopHybrid(queue,(jobs,),(threads,), |
|
561 |
circleCL, |
|
562 |
numpy.uint64(iterationsCL), |
|
563 |
numpy.uint32(nprnd(2**30/jobs)), |
|
564 |
numpy.uint32(nprnd(2**30/jobs))) |
|
565 |
|
|
393 | 566 |
print "%s with (WorkItems/Threads)=(%i,%i) %s method done" % \ |
394 | 567 |
(Alu,jobs/threads,threads,ParaStyle) |
395 | 568 |
else: |
396 | 569 |
# en OpenCL, necessaire de mettre un Global_id identique au local_id |
397 |
CLLaunch=MetropolisCL.MainLoopLocal(queue,(jobs,),(jobs,), |
|
398 |
circleCL, |
|
399 |
numpy.uint64(iterationsCL), |
|
400 |
numpy.uint32(nprnd(2**30/jobs)), |
|
401 |
numpy.uint32(nprnd(2**30/jobs))) |
|
570 |
if DoublePrecision: |
|
571 |
CLLaunch=MetropolisCL.MainLoopLocal64(queue,(jobs,),(jobs,), |
|
572 |
circleCL, |
|
573 |
numpy.uint64(iterationsCL), |
|
574 |
numpy.uint32(nprnd(2**30/jobs)), |
|
575 |
numpy.uint32(nprnd(2**30/jobs))) |
|
576 |
else: |
|
577 |
CLLaunch=MetropolisCL.MainLoopLocal(queue,(jobs,),(jobs,), |
|
578 |
circleCL, |
|
579 |
numpy.uint64(iterationsCL), |
|
580 |
numpy.uint32(nprnd(2**30/jobs)), |
|
581 |
numpy.uint32(nprnd(2**30/jobs))) |
|
402 | 582 |
print "%s with %i %s done" % (Alu,jobs,ParaStyle) |
403 | 583 |
|
404 | 584 |
CLLaunch.wait() |
... | ... | |
515 | 695 |
Metrology='InMetro' |
516 | 696 |
# Curves is True to print the curves |
517 | 697 |
Curves=False |
698 |
# DoublePrecision on FP calculus |
|
699 |
DoublePrecision=False |
|
518 | 700 |
|
519 | 701 |
try: |
520 |
opts, args = getopt.getopt(sys.argv[1:],"hoca:g:p:i:s:e:t:r:d:",["alu=","gpustyle=","parastyle=","iterations=","jobstart=","jobend=","jobstep=","redo=","device="]) |
|
702 |
opts, args = getopt.getopt(sys.argv[1:],"hocla:g:p:i:s:e:t:r:d:",["alu=","gpustyle=","parastyle=","iterations=","jobstart=","jobend=","jobstep=","redo=","device="])
|
|
521 | 703 |
except getopt.GetoptError: |
522 |
print '%s -o (Out of Core Metrology) -c (Print Curves) -a <CPU/GPU/ACCELERATOR> -d <DeviceId> -g <CUDA/OpenCL> -p <Threads/Hybrid/Blocks> -i <Iterations> -s <JobStart> -e <JobEnd> -t <JobStep> -r <RedoToImproveStats>' % sys.argv[0]
|
|
704 |
print '%s -o (Out of Core Metrology) -c (Print Curves) -l (Double Precision) -a <CPU/GPU/ACCELERATOR> -d <DeviceId> -g <CUDA/OpenCL> -p <Threads/Hybrid/Blocks> -i <Iterations> -s <JobStart> -e <JobEnd> -t <JobStep> -r <RedoToImproveStats> ' % sys.argv[0]
|
|
523 | 705 |
sys.exit(2) |
524 | 706 |
|
525 | 707 |
for opt, arg in opts: |
526 | 708 |
if opt == '-h': |
527 |
print '%s -o (Out of Core Metrology) -c (Print Curves) -a <CPU/GPU/ACCELERATOR> -d <DeviceId> -g <CUDA/OpenCL> -p <Threads/Hybrid/Blocks> -i <Iterations> -s <JobStart> -e <JobEnd> -t <JobStep> -r <RedoToImproveStats>' % sys.argv[0] |
|
709 |
print '%s -o (Out of Core Metrology) -c (Print Curves) -l (Double Precision) -a <CPU/GPU/ACCELERATOR> -d <DeviceId> -g <CUDA/OpenCL> -p <Threads/Hybrid/Blocks> -i <Iterations> -s <JobStart> -e <JobEnd> -t <JobStep> -r <RedoToImproveStats>' % sys.argv[0]
|
|
528 | 710 |
|
529 | 711 |
print "\nInformations about devices detected under OpenCL:" |
530 | 712 |
# For PyOpenCL import |
... | ... | |
540 | 722 |
elif opt == '-o': |
541 | 723 |
OutMetrology=True |
542 | 724 |
Metrology='OutMetro' |
725 |
elif opt == '-l': |
|
726 |
DoublePrecision=True |
|
543 | 727 |
elif opt == '-c': |
544 | 728 |
Curves=True |
545 | 729 |
elif opt in ("-a", "--alu"): |
... | ... | |
578 | 762 |
print "Number of threads on end : %s" % JobEnd |
579 | 763 |
print "Number of redo : %s" % Redo |
580 | 764 |
print "Metrology done out of CPU/GPU : %r" % OutMetrology |
765 |
print "Double Precision in Kernels : %r" % DoublePrecision |
|
581 | 766 |
|
582 | 767 |
if GpuStyle=='CUDA': |
583 | 768 |
# For PyCUDA import |
... | ... | |
594 | 779 |
for device in platform.get_devices(): |
595 | 780 |
deviceType=cl.device_type.to_string(device.type) |
596 | 781 |
print "Device #%i of type %s : %s" % (Id,deviceType,device.name) |
782 |
if Id == Device: |
|
783 |
# Set the Alu as detected Device Type |
|
784 |
Alu=deviceType |
|
597 | 785 |
Id=Id+1 |
598 | 786 |
|
599 | 787 |
average=numpy.array([]).astype(numpy.float32) |
... | ... | |
615 | 803 |
start=time.time() |
616 | 804 |
if GpuStyle=='CUDA': |
617 | 805 |
try: |
618 |
a,m,s=MetropolisCuda(circle,Iterations,1,Jobs,ParaStyle) |
|
806 |
a,m,s=MetropolisCuda(circle,Iterations,1,Jobs,ParaStyle, |
|
807 |
DoublePrecision) |
|
619 | 808 |
except: |
620 | 809 |
print "Problem with %i // computations on Cuda" % Jobs |
621 | 810 |
elif GpuStyle=='OpenCL': |
622 | 811 |
try: |
623 |
a,m,s=MetropolisOpenCL(circle,Iterations,1,Jobs,ParaStyle,Alu,Device) |
|
812 |
a,m,s=MetropolisOpenCL(circle,Iterations,1,Jobs,ParaStyle, |
|
813 |
Alu,Device,DoublePrecision) |
|
624 | 814 |
except: |
625 | 815 |
print "Problem with %i // computations on OpenCL" % Jobs |
626 | 816 |
duration=numpy.append(duration,time.time()-start) |
... | ... | |
633 | 823 |
else: |
634 | 824 |
if GpuStyle=='CUDA': |
635 | 825 |
try: |
636 |
avg,med,std=MetropolisCuda(circle,Iterations,Redo,Jobs,ParaStyle) |
|
826 |
avg,med,std=MetropolisCuda(circle,Iterations,Redo,Jobs,ParaStyle, |
|
827 |
DoublePrecision) |
|
637 | 828 |
except: |
638 | 829 |
print "Problem with %i // computations on Cuda" % Jobs |
639 | 830 |
elif GpuStyle=='OpenCL': |
... | ... | |
641 | 832 |
# avg,med,std=MetropolisOpenCL(circle,Iterations,Redo,Jobs,ParaStyle,Alu,Device) |
642 | 833 |
# except: |
643 | 834 |
# print "Problem with %i // computations on OpenCL" % Jobs |
644 |
avg,med,std=MetropolisOpenCL(circle,Iterations,Redo,Jobs,ParaStyle,Alu,Device) |
|
835 |
avg,med,std=MetropolisOpenCL(circle,Iterations,Redo,Jobs,ParaStyle,Alu,Device,DoublePrecision)
|
|
645 | 836 |
|
646 | 837 |
if (avg,med,std) != (0,0,0): |
647 | 838 |
print "jobs,avg,med,std",Jobs,avg,med,std |
... | ... | |
651 | 842 |
else: |
652 | 843 |
print "Values seem to be wrong..." |
653 | 844 |
#THREADS*=2 |
845 |
if DoublePrecision: |
|
846 |
Precision='DP' |
|
847 |
else: |
|
848 |
Precision='SP' |
|
654 | 849 |
if len(average)!=0: |
655 |
numpy.savez("Pi_%s_%s_%s_%s_%i_%.8i_Device%i_%s_%s" % (Alu,GpuStyle,ParaStyle,JobStart,JobEnd,Iterations,Device,Metrology,gethostname()),(ExploredJobs,average,median,stddev)) |
|
656 |
numpy.savetxt("Pi_%s_%s_%s_%s_%i_%.8i_Device%i_%s_%s" % (Alu,GpuStyle,ParaStyle,JobStart,JobEnd,Iterations,Device,Metrology,gethostname()),(ExploredJobs,average,median,stddev)) |
|
850 |
numpy.savez("Pi%s_%s_%s_%s_%s_%i_%.8i_Device%i_%s_%s" % (Precision,Alu,GpuStyle,ParaStyle,JobStart,JobEnd,Iterations,Device,Metrology,gethostname()),(ExploredJobs,average,median,stddev)) |
|
851 |
ToSave=[ ExploredJobs,average,median,stddev ] |
|
852 |
numpy.savetxt("Pi%s_%s_%s_%s_%s_%i_%.8i_Device%i_%s_%s" % (Precision,Alu,GpuStyle,ParaStyle,JobStart,JobEnd,Iterations,Device,Metrology,gethostname()),numpy.transpose(ToSave)) |
|
657 | 853 |
Jobs+=JobStep |
658 | 854 |
|
659 | 855 |
FitAndPrint(ExploredJobs,median,Curves) |
Formats disponibles : Unified diff