Révision 307

Splutter/GPU/SplutterGPU.py (revision 307)
387 387
    start_time=time.time()
388 388
    Splutter[:]=0
389 389
    
390
    print Splutter,len(Splutter)
390
    print(Splutter,len(Splutter))
391 391

  
392 392
    SplutterCU = cuda.InOut(Splutter)
393 393

  
......
402 402
                         grid=(jobs,1),
403 403
                         block=(1,1,1))
404 404
        
405
      print "%s with (WorkItems/Threads)=(%i,%i) %s method done" % \
406
            (Alu,jobs,1,ParaStyle)      
405
      print("%s with (WorkItems/Threads)=(%i,%i) %s method done" % \
406
            (Alu,jobs,1,ParaStyle))
407 407
    elif ParaStyle=='Hybrid':
408 408
      threads=BestThreadsNumber(jobs)
409 409
      MetropolisHybridCU(SplutterCU,
......
413 413
                         numpy.uint32(nprnd(2**30/jobs)),
414 414
                         grid=(jobs,1),
415 415
                         block=(threads,1,1))
416
      print "%s with (WorkItems/Threads)=(%i,%i) %s method done" % \
417
            (Alu,jobs/threads,threads,ParaStyle)
416
      print("%s with (WorkItems/Threads)=(%i,%i) %s method done" % \
417
            (Alu,jobs/threads,threads,ParaStyle))
418 418
    else:
419 419
      MetropolisThreadsCU(SplutterCU,
420 420
                       numpy.uint32(len(Splutter)),
......
423 423
                       numpy.uint32(nprnd(2**30/jobs)),
424 424
                       grid=(1,1),
425 425
                       block=(jobs,1,1))
426
      print "%s with (WorkItems/Threads)=(%i,%i) %s method done" % \
427
            (Alu,1,jobs,ParaStyle)
426
      print("%s with (WorkItems/Threads)=(%i,%i) %s method done" % \
427
            (Alu,1,jobs,ParaStyle))
428 428
    stop.record()
429 429
    stop.synchronize()
430 430
                
431 431
#    elapsed = start.time_till(stop)*1e-3
432 432
    elapsed = time.time()-start_time
433 433

  
434
    print Splutter,sum(Splutter)
434
    print(Splutter,sum(Splutter))
435 435
    MySplutter[i]=numpy.median(Splutter)
436
    print numpy.mean(Splutter),MySplutter[i],numpy.std(Splutter)
436
    print(numpy.mean(Splutter),MySplutter[i],numpy.std(Splutter))
437 437

  
438 438
    MyDuration[i]=elapsed
439 439

  
......
442 442
    #print MyPi[i],numpy.std(AllPi),MyDuration[i]
443 443

  
444 444

  
445
  print jobs,numpy.mean(MyDuration),numpy.median(MyDuration),numpy.std(MyDuration)
445
  print(jobs,numpy.mean(MyDuration),numpy.median(MyDuration),numpy.std(MyDuration))
446 446

  
447 447
  return(numpy.mean(MyDuration),numpy.median(MyDuration),numpy.std(MyDuration))
448 448

  
......
456 456
  MinMemoryXPU=0
457 457

  
458 458
  if Device==0:
459
    print "Enter XPU selector based on ALU type: first selected"
459
    print("Enter XPU selector based on ALU type: first selected")
460 460
    HasXPU=False
461 461
    # Default Device selection based on ALU Type
462 462
    for platform in cl.get_platforms():
......
469 469
          MinMemoryXPU=deviceMemory
470 470
        if not HasXPU:        
471 471
          XPU=device
472
          print "XPU selected with Allocable Memory %i: %s" % (deviceMemory,device.name)
472
          print("XPU selected with Allocable Memory %i: %s" % (deviceMemory,device.name))
473 473
          HasXPU=True
474 474
          MemoryXPU=deviceMemory
475 475
          
476 476
  else:
477
    print "Enter XPU selector based on device number & ALU type"
477
    print("Enter XPU selector based on device number & ALU type")
478 478
    Id=1
479 479
    HasXPU=False
480 480
    # Primary Device selection based on Device Id
......
488 488
          MinMemoryXPU=deviceMemory
489 489
        if Id==Device  and HasXPU==False:
490 490
          XPU=device
491
          print "CPU/GPU selected with Allocable Memory %i: %s" % (deviceMemory,device.name)
491
          print("CPU/GPU selected with Allocable Memory %i: %s" % (deviceMemory,device.name))
492 492
          HasXPU=True
493 493
          MemoryXPU=deviceMemory
494 494
        Id=Id+1
495 495
    if HasXPU==False:
496
      print "No XPU #%i of type %s found in all of %i devices, sorry..." % \
497
          (Device,Alu,Id-1)
496
      print("No XPU #%i of type %s found in all of %i devices, sorry..." % \
497
          (Device,Alu,Id-1))
498 498
      return(0,0,0)
499 499

  
500
  print "Allocable Memory is %i, between %i and %i " % (MemoryXPU,MinMemoryXPU,MaxMemoryXPU)
500
  print("Allocable Memory is %i, between %i and %i " % (MemoryXPU,MinMemoryXPU,MaxMemoryXPU))
501 501

  
502 502
  # Je cree le contexte et la queue pour son execution
503 503
  ctx = cl.Context([XPU])
......
520 520
  MySplutter=numpy.zeros(steps)
521 521

  
522 522
  MaxWorks=2**(int)(numpy.log2(MinMemoryXPU/4))
523
  print MaxWorks,2**(int)(numpy.log2(MemoryXPU))
523
  print(MaxWorks,2**(int)(numpy.log2(MemoryXPU)))
524 524
  
525 525
  #Splutter=numpy.zeros((MaxWorks/jobs)*jobs).astype(numpy.uint32)
526 526
  #Splutter=numpy.zeros(jobs*16).astype(numpy.uint32)
......
535 535

  
536 536
    Splutter[:]=0
537 537

  
538
    print Splutter,len(Splutter)
538
    print(Splutter,len(Splutter))
539 539

  
540 540
    h2d_time=time.time()
541 541
    SplutterCL = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=Splutter)
......
563 563
                                           numpy.uint32(nprnd(2**30/jobs)),
564 564
                                           numpy.uint32(nprnd(2**30/jobs)))
565 565
        
566
      print "%s with (WorkItems/Threads)=(%i,%i) %s method done" % \
567
            (Alu,jobs,1,ParaStyle)
566
      print("%s with (WorkItems/Threads)=(%i,%i) %s method done" % \
567
            (Alu,jobs,1,ParaStyle))
568 568
    elif ParaStyle=='Hybrid':
569 569
      #threads=BestThreadsNumber(jobs)
570 570
      threads=BestThreadsNumber(256)
571
      print "print",threads      
571
      print("print",threads)
572 572
      # en OpenCL, necessaire de mettre un Global_id identique au local_id
573 573
      CLLaunch=MetropolisCL.SplutterHybrid(queue,(jobs,),(threads,),
574 574
                                           SplutterCL,
......
577 577
                                           numpy.uint32(nprnd(2**30/jobs)),
578 578
                                           numpy.uint32(nprnd(2**30/jobs)))
579 579
        
580
      print "%s with (WorkItems/Threads)=(%i,%i) %s method done" % \
581
            (Alu,jobs/threads,threads,ParaStyle)
580
      print("%s with (WorkItems/Threads)=(%i,%i) %s method done" % \
581
            (Alu,jobs/threads,threads,ParaStyle))
582 582
    else:
583 583
      # en OpenCL, necessaire de mettre un global_id identique au local_id
584 584
      CLLaunch=MetropolisCL.SplutterLocal(queue,(jobs,),(jobs,),
......
589 589
                                          numpy.uint32(nprnd(2**30/jobs)))
590 590
        
591 591
        
592
      print "%s with %i %s done" % (Alu,jobs,ParaStyle)
592
      print("%s with %i %s done" % (Alu,jobs,ParaStyle))
593 593

  
594 594
    CLLaunch.wait()
595 595
    d2h_time=time.time()
......
601 601
    print('Elapsed compute time %f' % elapsed)
602 602

  
603 603
    MyDuration[i]=elapsed
604
    #print Splutter,sum(Splutter)
604
    print(Splutter,sum(Splutter))
605 605
    #MySplutter[i]=numpy.median(Splutter)
606
    #print numpy.mean(Splutter)*len(Splutter),MySplutter[i]*len(Splutter),numpy.std(Splutter)
606
    #print(numpy.mean(Splutter)*len(Splutter),MySplutter[i]*len(Splutter),numpy.std(Splutter))
607 607
    
608 608
  SplutterCL.release()
609 609

  
610
  print jobs,numpy.mean(MyDuration),numpy.median(MyDuration),numpy.std(MyDuration)
610
  print(jobs,numpy.mean(MyDuration),numpy.median(MyDuration),numpy.std(MyDuration))
611 611
	
612 612
  return(numpy.mean(MyDuration),numpy.median(MyDuration),numpy.std(MyDuration))
613 613

  
......
624 624
    coeffs_Amdahl[1]=coeffs_Amdahl[1]*coeffs_Amdahl[0]/D[0]
625 625
    coeffs_Amdahl[2]=coeffs_Amdahl[2]*coeffs_Amdahl[0]/D[0]
626 626
    coeffs_Amdahl[0]=D[0]
627
    print "Amdahl Normalized: T=%.2f(%.6f+%.6f/N)" % \
628
        (coeffs_Amdahl[0],coeffs_Amdahl[1],coeffs_Amdahl[2])
627
    print("Amdahl Normalized: T=%.2f(%.6f+%.6f/N)" % \
628
        (coeffs_Amdahl[0],coeffs_Amdahl[1],coeffs_Amdahl[2]))
629 629
  except:
630
    print "Impossible to fit for Amdahl law : only %i elements" % len(D) 
630
    print("Impossible to fit for Amdahl law : only %i elements" % len(D))
631 631

  
632 632
  try:
633 633
    coeffs_AmdahlR, matcov_AmdahlR = curve_fit(AmdahlR, N, D)
......
635 635
    D_AmdahlR=AmdahlR(N,coeffs_AmdahlR[0],coeffs_AmdahlR[1])
636 636
    coeffs_AmdahlR[1]=coeffs_AmdahlR[1]*coeffs_AmdahlR[0]/D[0]
637 637
    coeffs_AmdahlR[0]=D[0]
638
    print "Amdahl Reduced Normalized: T=%.2f(%.6f+%.6f/N)" % \
639
        (coeffs_AmdahlR[0],1-coeffs_AmdahlR[1],coeffs_AmdahlR[1])
638
    print("Amdahl Reduced Normalized: T=%.2f(%.6f+%.6f/N)" % \
639
        (coeffs_AmdahlR[0],1-coeffs_AmdahlR[1],coeffs_AmdahlR[1]))
640 640

  
641 641
  except:
642
    print "Impossible to fit for Reduced Amdahl law : only %i elements" % len(D) 
642
    print("Impossible to fit for Reduced Amdahl law : only %i elements" % len(D))
643 643

  
644 644
  try:
645 645
    coeffs_Mylq, matcov_Mylq = curve_fit(Mylq, N, D)
......
648 648
    # coeffs_Mylq[2]=coeffs_Mylq[2]*coeffs_Mylq[0]/D[0]
649 649
    coeffs_Mylq[3]=coeffs_Mylq[3]*coeffs_Mylq[0]/D[0]
650 650
    coeffs_Mylq[0]=D[0]
651
    print "Mylq Normalized : T=%.2f(%.6f+%.6f/N)+%.6f*N" % (coeffs_Mylq[0],
651
    print("Mylq Normalized : T=%.2f(%.6f+%.6f/N)+%.6f*N" % (coeffs_Mylq[0],
652 652
                                                            coeffs_Mylq[1],
653 653
                                                            coeffs_Mylq[3],
654
                                                            coeffs_Mylq[2])
654
                                                            coeffs_Mylq[2]))
655 655
    D_Mylq=Mylq(N,coeffs_Mylq[0],coeffs_Mylq[1],coeffs_Mylq[2],
656 656
                coeffs_Mylq[3])
657 657
  except:
658
    print "Impossible to fit for Mylq law : only %i elements" % len(D) 
658
    print("Impossible to fit for Mylq law : only %i elements" % len(D))
659 659

  
660 660
  try:
661 661
    coeffs_Mylq2, matcov_Mylq2 = curve_fit(Mylq2, N, D)
......
665 665
    # coeffs_Mylq2[3]=coeffs_Mylq2[3]*coeffs_Mylq2[0]/D[0]
666 666
    coeffs_Mylq2[4]=coeffs_Mylq2[4]*coeffs_Mylq2[0]/D[0]
667 667
    coeffs_Mylq2[0]=D[0]
668
    print "Mylq 2nd order Normalized: T=%.2f(%.6f+%.6f/N)+%.6f*N+%.6f*N^2" % \
669
        (coeffs_Mylq2[0],coeffs_Mylq2[1],
670
         coeffs_Mylq2[4],coeffs_Mylq2[2],coeffs_Mylq2[3])
668
    print("Mylq 2nd order Normalized: T=%.2f(%.6f+%.6f/N)+%.6f*N+%.6f*N^2" % \
669
          (coeffs_Mylq2[0],coeffs_Mylq2[1],
670
           coeffs_Mylq2[4],coeffs_Mylq2[2],coeffs_Mylq2[3]))
671 671

  
672 672
  except:
673
    print "Impossible to fit for 2nd order Mylq law : only %i elements" % len(D) 
673
    print("Impossible to fit for 2nd order Mylq law : only %i elements" % len(D) )
674 674

  
675 675
  if Curves:
676 676
    plt.xlabel("Number of Threads/work Items")
......
681 681
      pAmdahl,=plt.plot(N,D_Amdahl,label="Loi de Amdahl")    
682 682
      pMylq,=plt.plot(N,D_Mylq,label="Loi de Mylq")
683 683
    except:
684
      print "Fit curves seem not to be available"
684
      print("Fit curves seem not to be available")
685 685

  
686 686
    plt.legend()
687 687
    plt.show()
......
721 721
  try:
722 722
    opts, args = getopt.getopt(sys.argv[1:],"hocfa:g:p:i:s:e:t:r:d:m:",["alu=","gpustyle=","parastyle=","iterations=","jobstart=","jobend=","jobstep=","redo=","device="])
723 723
  except getopt.GetoptError:
724
    print '%s -o (Out of Core Metrology) -c (Print Curves) -f (Fit to Amdahl Law) -a <CPU/GPU/ACCELERATOR> -d <DeviceId> -g <CUDA/OpenCL> -p <Threads/Hybrid/Blocks> -i <Iterations> -s <JobStart> -e <JobEnd> -t <JobStep> -r <RedoToImproveStats> -m <MemoryRaw>' % sys.argv[0]
724
    print('%s -o (Out of Core Metrology) -c (Print Curves) -f (Fit to Amdahl Law) -a <CPU/GPU/ACCELERATOR> -d <DeviceId> -g <CUDA/OpenCL> -p <Threads/Hybrid/Blocks> -i <Iterations> -s <JobStart> -e <JobEnd> -t <JobStep> -r <RedoToImproveStats> -m <MemoryRaw>' % sys.argv[0])
725 725
    sys.exit(2)
726 726
    
727 727
  for opt, arg in opts:
728 728
    if opt == '-h':
729
      print '%s -o (Out of Core Metrology) -c (Print Curves) -f (Fit to Amdahl Law) -a <CPU/GPU/ACCELERATOR> -d <DeviceId> -g <CUDA/OpenCL> -p <Threads/Hybrid/Blocks> -i <Iterations> -s <JobStart> -e <JobEnd> -t <JobStep> -r <RedoToImproveStats> -m <MemoryRaw>' % sys.argv[0]
729
      print('%s -o (Out of Core Metrology) -c (Print Curves) -f (Fit to Amdahl Law) -a <CPU/GPU/ACCELERATOR> -d <DeviceId> -g <CUDA/OpenCL> -p <Threads/Hybrid/Blocks> -i <Iterations> -s <JobStart> -e <JobEnd> -t <JobStep> -r <RedoToImproveStats> -m <MemoryRaw>' % sys.argv[0])
730 730

  
731
      print "\nInformations about devices detected under OpenCL:"
731
      print("\nInformations about devices detected under OpenCL:")
732 732
      # For PyOpenCL import
733 733
      try:
734 734
        import pyopencl as cl
......
737 737
          for device in platform.get_devices():
738 738
            #deviceType=cl.device_type.to_string(device.type)
739 739
            deviceMemory=device.max_mem_alloc_size
740
            print "Device #%i from %s with memory %i : %s" % (Id,platform.vendor,deviceMemory,device.name.lstrip())
740
            print("Device #%i from %s with memory %i : %s" % (Id,platform.vendor,deviceMemory,device.name.lstrip()))
741 741
            Id=Id+1
742 742

  
743
        print
743
        print()
744 744
        sys.exit()
745 745
      except ImportError:
746
        print "Your platform does not seem to support OpenCL"
746
        print("Your platform does not seem to support OpenCL")
747 747
        
748 748
    elif opt == '-o':
749 749
      OutMetrology=True
......
774 774
      Memory = int(arg)
775 775

  
776 776
  if Alu=='CPU' and GpuStyle=='CUDA':
777
    print "Alu can't be CPU for CUDA, set Alu to GPU"
777
    print("Alu can't be CPU for CUDA, set Alu to GPU")
778 778
    Alu='GPU'
779 779

  
780 780
  if ParaStyle not in ('Blocks','Threads','Hybrid'):
781
    print "%s not exists, ParaStyle set as Threads !" % ParaStyle
781
    print("%s not exists, ParaStyle set as Threads !" % ParaStyle)
782 782
    ParaStyle='Blocks'
783 783

  
784
  print "Compute unit : %s" % Alu
785
  print "Device Identification : %s" % Device
786
  print "GpuStyle used : %s" % GpuStyle
787
  print "Parallel Style used : %s" % ParaStyle
788
  print "Iterations : %s" % Iterations
789
  print "Number of threads on start : %s" % JobStart
790
  print "Number of threads on end : %s" % JobEnd
791
  print "Number of redo : %s" % Redo
792
  print "Memory  : %s" % Memory
793
  print "Metrology done out of CPU/GPU : %r" % OutMetrology
784
  print("Compute unit : %s" % Alu)
785
  print("Device Identification : %s" % Device)
786
  print("GpuStyle used : %s" % GpuStyle)
787
  print("Parallel Style used : %s" % ParaStyle)
788
  print("Iterations : %s" % Iterations)
789
  print("Number of threads on start : %s" % JobStart)
790
  print("Number of threads on end : %s" % JobEnd)
791
  print("Number of redo : %s" % Redo)
792
  print("Memory  : %s" % Memory)
793
  print("Metrology done out of CPU/GPU : %r" % OutMetrology)
794 794

  
795 795
  if GpuStyle=='CUDA':
796 796
    try:
......
800 800
      import pycuda.autoinit
801 801
      from pycuda.compiler import SourceModule
802 802
    except ImportError:
803
      print "Platform does not seem to support CUDA"
803
      print("Platform does not seem to support CUDA")
804 804

  
805 805
  if GpuStyle=='OpenCL':
806 806
    try:
......
810 810
      for platform in cl.get_platforms():
811 811
        for device in platform.get_devices():
812 812
          #deviceType=cl.device_type.to_string(device.type)
813
          print "Device #%i : %s" % (Id,device.name)
813
          print("Device #%i : %s" % (Id,device.name))
814 814
          if Id == Device:
815 815
            # Set the Alu as detected Device Type
816 816
            Alu='xPU'
817 817
          Id=Id+1
818 818
    except ImportError:
819
      print "Platform does not seem to support CUDA"
819
      print("Platform does not seem to support CUDA")
820 820
      
821 821
  average=numpy.array([]).astype(numpy.float32)
822 822
  median=numpy.array([]).astype(numpy.float32)
......
840 840
            a,m,s=MetropolisCuda(circle,Iterations,1,Jobs,ParaStyle,
841 841
                                 Memory)
842 842
          except:
843
            print "Problem with %i // computations on Cuda" % Jobs
843
            print("Problem with %i // computations on Cuda" % Jobs)
844 844
        elif GpuStyle=='OpenCL':
845 845
          try:
846 846
            a,m,s=MetropolisOpenCL(circle,Iterations,1,Jobs,ParaStyle,
847 847
                                   Alu,Device,Memory)
848 848
          except:
849
            print "Problem with %i // computations on OpenCL" % Jobs            
849
            print("Problem with %i // computations on OpenCL" % Jobs)
850 850
        duration=numpy.append(duration,time.time()-start)
851 851
      if (a,m,s) != (0,0,0):
852 852
        avg=numpy.mean(duration)
853 853
        med=numpy.median(duration)
854 854
        std=numpy.std(duration)
855 855
      else:
856
        print "Values seem to be wrong..."
856
        print("Values seem to be wrong...")
857 857
    else:
858 858
      if GpuStyle=='CUDA':
859 859
        try:
860 860
          avg,med,std=MetropolisCuda(circle,Iterations,Redo,
861 861
                                     Jobs,ParaStyle,Memory)
862 862
        except:
863
          print "Problem with %i // computations on Cuda" % Jobs
863
          print("Problem with %i // computations on Cuda" % Jobs)
864 864
      elif GpuStyle=='OpenCL':
865 865
        try:
866 866
          avg,med,std=MetropolisOpenCL(circle,Iterations,Redo,Jobs,
867 867
                                       ParaStyle,Alu,Device,Memory)
868 868
        except:
869
          print "Problem with %i // computations on OpenCL" % Jobs            
869
          print("Problem with %i // computations on OpenCL" % Jobs)           
870 870

  
871 871
    if (avg,med,std) != (0,0,0):
872
      print "jobs,avg,med,std",Jobs,avg,med,std
872
      print("jobs,avg,med,std",Jobs,avg,med,std)
873 873
      average=numpy.append(average,avg)
874 874
      median=numpy.append(median,med)
875 875
      stddev=numpy.append(stddev,std)
876 876
    else:
877
      print "Values seem to be wrong..."
877
      print("Values seem to be wrong...")
878 878
    #THREADS*=2
879 879
    if len(average)!=0:
880 880
      numpy.savez("Splutter_%s_%s_%s_%i_%i_%.8i_Device%i_%s_%s" % (Alu,GpuStyle,ParaStyle,JobStart,JobEnd,Iterations,Device,Metrology,gethostname()),(ExploredJobs,average,median,stddev))

Formats disponibles : Unified diff