Révision 217 TrouNoir/TrouNoir.py
TrouNoir.py (revision 217) | ||
---|---|---|
1 |
#!/usr/bin/env python |
|
1 |
#!/usr/bin/env python3
|
|
2 | 2 |
# |
3 | 3 |
# TrouNoir model using PyOpenCL |
4 | 4 |
# |
... | ... | |
45 | 45 |
#define EINSTEIN 0 |
46 | 46 |
#define NEWTON 1 |
47 | 47 |
|
48 |
#define TRACKPOINTS 2048 |
|
49 |
|
|
48 | 50 |
float atanp(float x,float y) |
49 | 51 |
{ |
50 | 52 |
float angle; |
... | ... | |
213 | 215 |
|
214 | 216 |
// Perform trajectory for each pixel, exit on hit |
215 | 217 |
|
216 |
float m,rs,ri,re,tho; |
|
217 |
int q,raie; |
|
218 |
private float m,rs,ri,re,tho;
|
|
219 |
private int q,raie;
|
|
218 | 220 |
|
219 | 221 |
m=Mass; |
220 | 222 |
rs=2.*m; |
... | ... | |
224 | 226 |
q=-2; |
225 | 227 |
raie=Line; |
226 | 228 |
|
227 |
float d,bmx,db,b,h; |
|
228 |
float rp[2048];
|
|
229 |
float phi,thi,phd,php,nr,r; |
|
230 |
int nh; |
|
231 |
float zp,fp; |
|
229 |
private float d,bmx,db,b,h;
|
|
230 |
private float rp[TRACKPOINTS];
|
|
231 |
private float phi,thi,phd,php,nr,r;
|
|
232 |
private int nh;
|
|
233 |
private float zp,fp;
|
|
232 | 234 |
|
233 | 235 |
// Autosize for image |
234 | 236 |
bmx=1.25*re; |
235 | 237 |
b=0.; |
236 | 238 |
|
237 |
h=4.e0f*PI/(float)2048;
|
|
239 |
h=4.e0f*PI/(float)TRACKPOINTS;
|
|
238 | 240 |
|
239 | 241 |
// set origin as center of image |
240 | 242 |
float x=(float)xi-(float)(sizex/2)+(float)5e-1f; |
... | ... | |
533 | 535 |
} |
534 | 536 |
""" |
535 | 537 |
|
538 |
def KernelCodeCuda(): |
|
539 |
BlobCUDA= """ |
|
540 |
|
|
541 |
#define PI (float)3.14159265359 |
|
542 |
#define nbr 256 |
|
543 |
|
|
544 |
#define EINSTEIN 0 |
|
545 |
#define NEWTON 1 |
|
546 |
|
|
547 |
#define TRACKPOINTS 2048 |
|
548 |
|
|
549 |
__device__ float nothing(float x) |
|
550 |
{ |
|
551 |
return(x); |
|
552 |
} |
|
553 |
|
|
554 |
__device__ float atanp(float x,float y) |
|
555 |
{ |
|
556 |
float angle; |
|
557 |
|
|
558 |
angle=atan2(y,x); |
|
559 |
|
|
560 |
if (angle<0.e0f) |
|
561 |
{ |
|
562 |
angle+=(float)2.e0f*PI; |
|
563 |
} |
|
564 |
|
|
565 |
return(angle); |
|
566 |
} |
|
567 |
|
|
568 |
__device__ float f(float v) |
|
569 |
{ |
|
570 |
return(v); |
|
571 |
} |
|
572 |
|
|
573 |
#if PHYSICS == NEWTON |
|
574 |
__device__ float g(float u,float m,float b) |
|
575 |
{ |
|
576 |
return (-u); |
|
577 |
} |
|
578 |
#else |
|
579 |
__device__ float g(float u,float m,float b) |
|
580 |
{ |
|
581 |
return (3.e0f*m/b*pow(u,2)-u); |
|
582 |
} |
|
583 |
#endif |
|
584 |
|
|
585 |
__device__ void calcul(float *us,float *vs,float up,float vp, |
|
586 |
float h,float m,float b) |
|
587 |
{ |
|
588 |
float c0,c1,c2,c3,d0,d1,d2,d3; |
|
589 |
|
|
590 |
c0=h*f(vp); |
|
591 |
c1=h*f(vp+c0/2.); |
|
592 |
c2=h*f(vp+c1/2.); |
|
593 |
c3=h*f(vp+c2); |
|
594 |
d0=h*g(up,m,b); |
|
595 |
d1=h*g(up+d0/2.,m,b); |
|
596 |
d2=h*g(up+d1/2.,m,b); |
|
597 |
d3=h*g(up+d2,m,b); |
|
598 |
|
|
599 |
*us=up+(c0+2.*c1+2.*c2+c3)/6.; |
|
600 |
*vs=vp+(d0+2.*d1+2.*d2+d3)/6.; |
|
601 |
} |
|
602 |
|
|
603 |
__device__ void rungekutta(float *ps,float *us,float *vs, |
|
604 |
float pp,float up,float vp, |
|
605 |
float h,float m,float b) |
|
606 |
{ |
|
607 |
calcul(us,vs,up,vp,h,m,b); |
|
608 |
*ps=pp+h; |
|
609 |
} |
|
610 |
|
|
611 |
__device__ float decalage_spectral(float r,float b,float phi, |
|
612 |
float tho,float m) |
|
613 |
{ |
|
614 |
return (sqrt(1-3*m/r)/(1+sqrt(m/pow(r,3))*b*sin(tho)*sin(phi))); |
|
615 |
} |
|
616 |
|
|
617 |
__device__ float spectre(float rf,int q,float b,float db, |
|
618 |
float h,float r,float m,float bss) |
|
619 |
{ |
|
620 |
float flx; |
|
621 |
|
|
622 |
flx=exp(q*log(r/m))*pow(rf,4)*b*db*h; |
|
623 |
return(flx); |
|
624 |
} |
|
625 |
|
|
626 |
__device__ float spectre_cn(float rf32,float b32,float db32, |
|
627 |
float h32,float r32,float m32,float bss32) |
|
628 |
{ |
|
629 |
|
|
630 |
#define MYFLOAT float |
|
631 |
|
|
632 |
MYFLOAT rf=(MYFLOAT)(rf32); |
|
633 |
MYFLOAT b=(MYFLOAT)(b32); |
|
634 |
MYFLOAT db=(MYFLOAT)(db32); |
|
635 |
MYFLOAT h=(MYFLOAT)(h32); |
|
636 |
MYFLOAT r=(MYFLOAT)(r32); |
|
637 |
MYFLOAT m=(MYFLOAT)(m32); |
|
638 |
MYFLOAT bss=(MYFLOAT)(bss32); |
|
639 |
|
|
640 |
MYFLOAT flx; |
|
641 |
MYFLOAT nu_rec,nu_em,qu,temp_em,flux_int; |
|
642 |
int fi,posfreq; |
|
643 |
|
|
644 |
#define planck 6.62e-34 |
|
645 |
#define k 1.38e-23 |
|
646 |
#define c2 9.e16 |
|
647 |
#define temp 3.e7 |
|
648 |
#define m_point 1. |
|
649 |
|
|
650 |
#define lplanck (log(6.62)-34.*log(10.)) |
|
651 |
#define lk (log(1.38)-23.*log(10.)) |
|
652 |
#define lc2 (log(9.)+16.*log(10.)) |
|
653 |
|
|
654 |
MYFLOAT v=1.-3./r; |
|
655 |
|
|
656 |
qu=1./sqrt((1.-3./r)*r)*(sqrt(r)-sqrt(6.)+sqrt(3.)/2.*log((sqrt(r)+sqrt(3.))/(sqrt(r)-sqrt(3.))* 0.17157287525380988 )); |
|
657 |
|
|
658 |
temp_em=temp*sqrt(m)*exp(0.25*log(m_point)-0.75*log(r)-0.125*log(v)+0.25*log(fabs(qu))); |
|
659 |
|
|
660 |
flux_int=0.; |
|
661 |
flx=0.; |
|
662 |
|
|
663 |
for (fi=0;fi<nbr;fi++) |
|
664 |
{ |
|
665 |
nu_em=bss*(MYFLOAT)fi/(MYFLOAT)nbr; |
|
666 |
nu_rec=nu_em*rf; |
|
667 |
posfreq=(int)(nu_rec*(MYFLOAT)nbr/bss); |
|
668 |
if ((posfreq>0)&&(posfreq<nbr)) |
|
669 |
{ |
|
670 |
// Initial version |
|
671 |
// flux_int=2.*planck/c2*pow(nu_em,3)/(exp(planck*nu_em/(k*temp_em))-1.); |
|
672 |
// Version with log used |
|
673 |
//flux_int=2.*exp(lplanck-lc2+3.*log(nu_em))/(exp(exp(lplanck-lk+log(nu_em/temp_em)))-1.); |
|
674 |
// flux_int*=pow(rf,3)*b*db*h; |
|
675 |
//flux_int*=exp(3.*log(rf))*b*db*h; |
|
676 |
flux_int=2.*exp(lplanck-lc2+3.*log(nu_em))/(exp(exp(lplanck-lk+log(nu_em/temp_em)))-1.)*exp(3.*log(rf))*b*db*h; |
|
677 |
|
|
678 |
flx+=flux_int; |
|
679 |
} |
|
680 |
} |
|
681 |
|
|
682 |
return((float)(flx)); |
|
683 |
} |
|
684 |
|
|
685 |
__device__ void impact(float phi,float r,float b,float tho,float m, |
|
686 |
float *zp,float *fp, |
|
687 |
int q,float db, |
|
688 |
float h,int raie) |
|
689 |
{ |
|
690 |
float flx,rf,bss; |
|
691 |
|
|
692 |
rf=decalage_spectral(r,b,phi,tho,m); |
|
693 |
|
|
694 |
if (raie==0) |
|
695 |
{ |
|
696 |
bss=1.e19; |
|
697 |
flx=spectre_cn(rf,b,db,h,r,m,bss); |
|
698 |
} |
|
699 |
else |
|
700 |
{ |
|
701 |
bss=2.; |
|
702 |
flx=spectre(rf,q,b,db,h,r,m,bss); |
|
703 |
} |
|
704 |
|
|
705 |
*zp=1./rf; |
|
706 |
*fp=flx; |
|
707 |
|
|
708 |
} |
|
709 |
|
|
710 |
__global__ void EachPixel(float *zImage,float *fImage, |
|
711 |
float Mass,float InternalRadius, |
|
712 |
float ExternalRadius,float Angle, |
|
713 |
int Line) |
|
714 |
{ |
|
715 |
uint xi=(uint)blockIdx.x; |
|
716 |
uint yi=(uint)blockIdx.y; |
|
717 |
uint sizex=(uint)gridDim.x*blockDim.x; |
|
718 |
uint sizey=(uint)gridDim.y*blockDim.y; |
|
719 |
|
|
720 |
// Perform trajectory for each pixel, exit on hit |
|
721 |
|
|
722 |
float m,rs,ri,re,tho; |
|
723 |
int q,raie; |
|
724 |
|
|
725 |
m=Mass; |
|
726 |
rs=2.*m; |
|
727 |
ri=InternalRadius; |
|
728 |
re=ExternalRadius; |
|
729 |
tho=Angle; |
|
730 |
q=-2; |
|
731 |
raie=Line; |
|
732 |
|
|
733 |
float d,bmx,db,b,h; |
|
734 |
float rp[TRACKPOINTS]; |
|
735 |
float phi,thi,phd,php,nr,r; |
|
736 |
int nh; |
|
737 |
float zp,fp; |
|
738 |
|
|
739 |
// Autosize for image |
|
740 |
bmx=1.25*re; |
|
741 |
b=0.; |
|
742 |
|
|
743 |
h=4.e0f*PI/(float)TRACKPOINTS; |
|
744 |
|
|
745 |
// set origin as center of image |
|
746 |
float x=(float)xi-(float)(sizex/2)+(float)5e-1f; |
|
747 |
float y=(float)yi-(float)(sizey/2)+(float)5e-1f; |
|
748 |
// angle extracted from cylindric symmetry |
|
749 |
phi=atanp(x,y); |
|
750 |
phd=atanp(cos(phi)*sin(tho),cos(tho)); |
|
751 |
|
|
752 |
float up,vp,pp,us,vs,ps; |
|
753 |
|
|
754 |
// impact parameter |
|
755 |
b=sqrt(x*x+y*y)*(float)2.e0f/(float)sizex*bmx; |
|
756 |
// step of impact parameter; |
|
757 |
// db=bmx/(float)(sizex/2); |
|
758 |
db=bmx/(float)(sizex); |
|
759 |
|
|
760 |
up=0.; |
|
761 |
vp=1.; |
|
762 |
|
|
763 |
pp=0.; |
|
764 |
nh=0; |
|
765 |
|
|
766 |
rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b); |
|
767 |
|
|
768 |
rp[nh]=fabs(b/us); |
|
769 |
|
|
770 |
int ExitOnImpact=0; |
|
771 |
|
|
772 |
do |
|
773 |
{ |
|
774 |
nh++; |
|
775 |
pp=ps; |
|
776 |
up=us; |
|
777 |
vp=vs; |
|
778 |
rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b); |
|
779 |
rp[nh]=fabs(b/us); |
|
780 |
ExitOnImpact = ((fmod(pp,PI)<fmod(phd,PI))&&(fmod(ps,PI)>fmod(phd,PI)))&&(rp[nh]>ri)&&(rp[nh]<re)?1:0; |
|
781 |
|
|
782 |
} while ((rp[nh]>=rs)&&(rp[nh]<=rp[0])&&(ExitOnImpact==0)); |
|
783 |
|
|
784 |
if (ExitOnImpact==1) { |
|
785 |
impact(phi,rp[nh-1],b,tho,m,&zp,&fp,q,db,h,raie); |
|
786 |
} |
|
787 |
else |
|
788 |
{ |
|
789 |
zp=0.; |
|
790 |
fp=0.; |
|
791 |
} |
|
792 |
|
|
793 |
zImage[yi+sizex*xi]=(float)zp; |
|
794 |
fImage[yi+sizex*xi]=(float)fp; |
|
795 |
} |
|
796 |
|
|
797 |
__global__ void Pixel(float *zImage,float *fImage, |
|
798 |
float *Trajectories,int *IdLast, |
|
799 |
uint ImpactParameter,uint TrackPoints, |
|
800 |
float Mass,float InternalRadius, |
|
801 |
float ExternalRadius,float Angle, |
|
802 |
int Line) |
|
803 |
{ |
|
804 |
uint xi=(uint)blockIdx.x; |
|
805 |
uint yi=(uint)blockIdx.y; |
|
806 |
uint sizex=(uint)gridDim.x*blockDim.x; |
|
807 |
uint sizey=(uint)gridDim.y*blockDim.y; |
|
808 |
|
|
809 |
// Perform trajectory for each pixel |
|
810 |
|
|
811 |
float m,rs,ri,re,tho; |
|
812 |
int q,raie; |
|
813 |
|
|
814 |
m=Mass; |
|
815 |
rs=2.*m; |
|
816 |
ri=InternalRadius; |
|
817 |
re=ExternalRadius; |
|
818 |
tho=Angle; |
|
819 |
q=-2; |
|
820 |
raie=Line; |
|
821 |
|
|
822 |
float d,bmx,db,b,h; |
|
823 |
float phi,thi,phd,php,nr,r; |
|
824 |
int nh; |
|
825 |
float zp=0,fp=0; |
|
826 |
// Autosize for image, 25% greater than external radius |
|
827 |
bmx=1.25*re; |
|
828 |
|
|
829 |
// Angular step of integration |
|
830 |
h=4.e0f*PI/(float)TrackPoints; |
|
831 |
|
|
832 |
// Step of Impact Parameter |
|
833 |
db=bmx/(2.e0*(float)ImpactParameter); |
|
834 |
|
|
835 |
// set origin as center of image |
|
836 |
float x=(float)xi-(float)(sizex/2)+(float)5e-1f; |
|
837 |
float y=(float)yi-(float)(sizey/2)+(float)5e-1f; |
|
838 |
// angle extracted from cylindric symmetry |
|
839 |
phi=atanp(x,y); |
|
840 |
phd=atanp(cos(phi)*sin(tho),cos(tho)); |
|
841 |
|
|
842 |
// Real Impact Parameter |
|
843 |
b=sqrt(x*x+y*y)*bmx/(float)ImpactParameter; |
|
844 |
|
|
845 |
// Integer Impact Parameter |
|
846 |
uint bi=(uint)sqrt(x*x+y*y); |
|
847 |
|
|
848 |
int HalfLap=0,ExitOnImpact=0,ni; |
|
849 |
|
|
850 |
if (bi<ImpactParameter) |
|
851 |
{ |
|
852 |
do |
|
853 |
{ |
|
854 |
php=phd+(float)HalfLap*PI; |
|
855 |
nr=php/h; |
|
856 |
ni=(int)nr; |
|
857 |
|
|
858 |
if (ni<IdLast[bi]) |
|
859 |
{ |
|
860 |
r=(Trajectories[bi*TrackPoints+ni+1]-Trajectories[bi*TrackPoints+ni])*(nr-ni*1.)+Trajectories[bi*TrackPoints+ni]; |
|
861 |
} |
|
862 |
else |
|
863 |
{ |
|
864 |
r=Trajectories[bi*TrackPoints+ni]; |
|
865 |
} |
|
866 |
|
|
867 |
if ((r<=re)&&(r>=ri)) |
|
868 |
{ |
|
869 |
ExitOnImpact=1; |
|
870 |
impact(phi,r,b,tho,m,&zp,&fp,q,db,h,raie); |
|
871 |
} |
|
872 |
|
|
873 |
HalfLap++; |
|
874 |
} while ((HalfLap<=2)&&(ExitOnImpact==0)); |
|
875 |
|
|
876 |
} |
|
877 |
|
|
878 |
zImage[yi+sizex*xi]=zp; |
|
879 |
fImage[yi+sizex*xi]=fp; |
|
880 |
} |
|
881 |
|
|
882 |
__global__ void Circle(float *Trajectories,int *IdLast, |
|
883 |
float *zImage,float *fImage, |
|
884 |
int TrackPoints, |
|
885 |
float Mass,float InternalRadius, |
|
886 |
float ExternalRadius,float Angle, |
|
887 |
int Line) |
|
888 |
{ |
|
889 |
// Integer Impact Parameter ID |
|
890 |
int bi=blockIdx.x; |
|
891 |
// Integer points on circle |
|
892 |
int i=blockIdx.y; |
|
893 |
// Integer Impact Parameter Size (half of image) |
|
894 |
int bmaxi=gridDim.x*blockDim.x; |
|
895 |
// Integer Points on circle |
|
896 |
int imx=gridDim.y*blockDim.y; |
|
897 |
|
|
898 |
// Perform trajectory for each pixel |
|
899 |
|
|
900 |
float m,rs,ri,re,tho; |
|
901 |
int q,raie; |
|
902 |
|
|
903 |
m=Mass; |
|
904 |
rs=2.*m; |
|
905 |
ri=InternalRadius; |
|
906 |
re=ExternalRadius; |
|
907 |
tho=Angle; |
|
908 |
raie=Line; |
|
909 |
|
|
910 |
float bmx,db,b,h; |
|
911 |
float phi,thi,phd; |
|
912 |
int nh; |
|
913 |
float zp=0,fp=0; |
|
914 |
|
|
915 |
// Autosize for image |
|
916 |
bmx=1.25*re; |
|
917 |
|
|
918 |
// Angular step of integration |
|
919 |
h=4.e0f*PI/(float)TrackPoints; |
|
920 |
|
|
921 |
// impact parameter |
|
922 |
b=(float)bi/(float)bmaxi*bmx; |
|
923 |
db=bmx/(2.e0*(float)bmaxi); |
|
924 |
|
|
925 |
phi=2.*PI/(float)imx*(float)i; |
|
926 |
phd=atanp(cos(phi)*sin(tho),cos(tho)); |
|
927 |
int yi=(int)((float)bi*sin(phi))+bmaxi; |
|
928 |
int xi=(int)((float)bi*cos(phi))+bmaxi; |
|
929 |
|
|
930 |
int HalfLap=0,ExitOnImpact=0,ni; |
|
931 |
float php,nr,r; |
|
932 |
|
|
933 |
do |
|
934 |
{ |
|
935 |
php=phd+(float)HalfLap*PI; |
|
936 |
nr=php/h; |
|
937 |
ni=(int)nr; |
|
938 |
|
|
939 |
if (ni<IdLast[bi]) |
|
940 |
{ |
|
941 |
r=(Trajectories[bi*TrackPoints+ni+1]-Trajectories[bi*TrackPoints+ni])*(nr-ni*1.)+Trajectories[bi*TrackPoints+ni]; |
|
942 |
} |
|
943 |
else |
|
944 |
{ |
|
945 |
r=Trajectories[bi*TrackPoints+ni]; |
|
946 |
} |
|
947 |
|
|
948 |
if ((r<=re)&&(r>=ri)) |
|
949 |
{ |
|
950 |
ExitOnImpact=1; |
|
951 |
impact(phi,r,b,tho,m,&zp,&fp,q,db,h,raie); |
|
952 |
} |
|
953 |
|
|
954 |
HalfLap++; |
|
955 |
} while ((HalfLap<=2)&&(ExitOnImpact==0)); |
|
956 |
|
|
957 |
zImage[yi+2*bmaxi*xi]=zp; |
|
958 |
fImage[yi+2*bmaxi*xi]=fp; |
|
959 |
|
|
960 |
} |
|
961 |
|
|
962 |
__global__ void Trajectory(float *Trajectories, |
|
963 |
int *IdLast,int TrackPoints, |
|
964 |
float Mass,float InternalRadius, |
|
965 |
float ExternalRadius,float Angle, |
|
966 |
int Line) |
|
967 |
{ |
|
968 |
// Integer Impact Parameter ID |
|
969 |
int bi=blockIdx.x; |
|
970 |
// Integer Impact Parameter Size (half of image) |
|
971 |
int bmaxi=gridDim.x*blockDim.x; |
|
972 |
|
|
973 |
// Perform trajectory for each pixel |
|
974 |
|
|
975 |
float m,rs,ri,re,tho; |
|
976 |
int raie,q; |
|
977 |
|
|
978 |
m=Mass; |
|
979 |
rs=2.*m; |
|
980 |
ri=InternalRadius; |
|
981 |
re=ExternalRadius; |
|
982 |
tho=Angle; |
|
983 |
q=-2; |
|
984 |
raie=Line; |
|
985 |
|
|
986 |
float d,bmx,db,b,h; |
|
987 |
float phi,thi,phd,php,nr,r; |
|
988 |
int nh; |
|
989 |
float zp,fp; |
|
990 |
|
|
991 |
// Autosize for image |
|
992 |
bmx=1.25*re; |
|
993 |
|
|
994 |
// Angular step of integration |
|
995 |
h=4.e0f*PI/(float)TrackPoints; |
|
996 |
|
|
997 |
// impact parameter |
|
998 |
b=(float)bi/(float)bmaxi*bmx; |
|
999 |
|
|
1000 |
float up,vp,pp,us,vs,ps; |
|
1001 |
|
|
1002 |
up=0.; |
|
1003 |
vp=1.; |
|
1004 |
|
|
1005 |
pp=0.; |
|
1006 |
nh=0; |
|
1007 |
|
|
1008 |
rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b); |
|
1009 |
|
|
1010 |
// b versus us |
|
1011 |
float bvus=fabs(b/us); |
|
1012 |
float bvus0=bvus; |
|
1013 |
Trajectories[bi*TrackPoints+nh]=bvus; |
|
1014 |
|
|
1015 |
do |
|
1016 |
{ |
|
1017 |
nh++; |
|
1018 |
pp=ps; |
|
1019 |
up=us; |
|
1020 |
vp=vs; |
|
1021 |
rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b); |
|
1022 |
bvus=fabs(b/us); |
|
1023 |
Trajectories[bi*TrackPoints+nh]=bvus; |
|
1024 |
|
|
1025 |
} while ((bvus>=rs)&&(bvus<=bvus0)); |
|
1026 |
|
|
1027 |
IdLast[bi]=nh; |
|
1028 |
|
|
1029 |
} |
|
1030 |
""" |
|
1031 |
return(BlobCUDA) |
|
1032 |
|
|
536 | 1033 |
# def ImageOutput(sigma,prefix): |
537 | 1034 |
# from PIL import Image |
538 | 1035 |
# Max=sigma.max() |
... | ... | |
577 | 1074 |
# Spectrum is Monochromatic Line one |
578 | 1075 |
Line=1 |
579 | 1076 |
|
580 |
Trajectories=numpy.zeros((InputCL['Size']/2,InputCL['TrackPoints']), |
|
581 |
dtype=numpy.float32) |
|
582 |
IdLast=numpy.zeros(InputCL['Size']/2,dtype=numpy.int32) |
|
1077 |
Trajectories=numpy.zeros((int(InputCL['Size']/2),InputCL['TrackPoints']),dtype=numpy.float32) |
|
1078 |
IdLast=numpy.zeros(int(InputCL['Size']/2),dtype=numpy.int32) |
|
583 | 1079 |
|
584 | 1080 |
# Je detecte un peripherique GPU dans la liste des peripheriques |
585 | 1081 |
Id=0 |
... | ... | |
588 | 1084 |
for device in platform.get_devices(): |
589 | 1085 |
if Id==Device: |
590 | 1086 |
XPU=device |
591 |
print "CPU/GPU selected: ",device.name.lstrip()
|
|
1087 |
print("CPU/GPU selected: ",device.name.lstrip())
|
|
592 | 1088 |
HasXPU=True |
593 | 1089 |
Id+=1 |
594 | 1090 |
|
595 | 1091 |
if HasXPU==False: |
596 |
print "No XPU #%i found in all of %i devices, sorry..." % (Device,Id-1)
|
|
1092 |
print("No XPU #%i found in all of %i devices, sorry..." % (Device,Id-1))
|
|
597 | 1093 |
sys.exit() |
598 | 1094 |
|
599 | 1095 |
ctx = cl.Context([XPU]) |
... | ... | |
658 | 1154 |
numpy.int32(Line)) |
659 | 1155 |
|
660 | 1156 |
CLLaunch=BlackHoleCL.Pixel(queue,(zImage.shape[0], |
661 |
zImage.shape[1]),None,
|
|
1157 |
zImage.shape[1]),None, |
|
662 | 1158 |
zImageCL,fImageCL,TrajectoriesCL,IdLastCL, |
663 | 1159 |
numpy.uint32(Trajectories.shape[0]), |
664 | 1160 |
numpy.uint32(Trajectories.shape[1]), |
... | ... | |
689 | 1185 |
|
690 | 1186 |
return(elapsed) |
691 | 1187 |
|
1188 |
def BlackHoleCUDA(zImage,fImage,InputCL): |
|
1189 |
|
|
1190 |
kernel_params = {} |
|
1191 |
|
|
1192 |
print(InputCL) |
|
1193 |
|
|
1194 |
Device=InputCL['Device'] |
|
1195 |
GpuStyle=InputCL['GpuStyle'] |
|
1196 |
VariableType=InputCL['VariableType'] |
|
1197 |
Size=InputCL['Size'] |
|
1198 |
Mass=InputCL['Mass'] |
|
1199 |
InternalRadius=InputCL['InternalRadius'] |
|
1200 |
ExternalRadius=InputCL['ExternalRadius'] |
|
1201 |
Angle=InputCL['Angle'] |
|
1202 |
Method=InputCL['Method'] |
|
1203 |
TrackPoints=InputCL['TrackPoints'] |
|
1204 |
Physics=InputCL['Physics'] |
|
1205 |
|
|
1206 |
PhysicsList=DictionariesAPI() |
|
1207 |
|
|
1208 |
if InputCL['BlackBody']: |
|
1209 |
# Spectrum is Black Body one |
|
1210 |
Line=0 |
|
1211 |
else: |
|
1212 |
# Spectrum is Monochromatic Line one |
|
1213 |
Line=1 |
|
1214 |
|
|
1215 |
Trajectories=numpy.zeros((int(InputCL['Size']/2),InputCL['TrackPoints']),dtype=numpy.float32) |
|
1216 |
IdLast=numpy.zeros(int(InputCL['Size']/2),dtype=numpy.int32) |
|
1217 |
|
|
1218 |
try: |
|
1219 |
# For PyCUDA import |
|
1220 |
import pycuda.driver as cuda |
|
1221 |
from pycuda.compiler import SourceModule |
|
1222 |
|
|
1223 |
cuda.init() |
|
1224 |
for Id in range(cuda.Device.count()): |
|
1225 |
if Id==Device: |
|
1226 |
XPU=cuda.Device(Id) |
|
1227 |
print("GPU selected %s" % XPU.name()) |
|
1228 |
|
|
1229 |
|
|
1230 |
except ImportError: |
|
1231 |
print("Platform does not seem to support CUDA") |
|
1232 |
|
|
1233 |
Context=XPU.make_context() |
|
1234 |
|
|
1235 |
try: |
|
1236 |
mod = SourceModule(KernelCodeCuda(),options=['--compiler-options','-DPHYSICS=%i' % (PhysicsList[Physics])]) |
|
1237 |
print("Compilation seems to be OK") |
|
1238 |
except: |
|
1239 |
print("Compilation seems to break") |
|
1240 |
|
|
1241 |
EachPixelCU=mod.get_function("EachPixel") |
|
1242 |
TrajectoryCU=mod.get_function("Trajectory") |
|
1243 |
PixelCU=mod.get_function("Pixel") |
|
1244 |
CircleCU=mod.get_function("Circle") |
|
1245 |
|
|
1246 |
TrajectoriesCU = cuda.mem_alloc(Trajectories.size*Trajectories.dtype.itemsize) |
|
1247 |
cuda.memcpy_htod(TrajectoriesCU, Trajectories) |
|
1248 |
zImageCU = cuda.mem_alloc(zImage.size*zImage.dtype.itemsize) |
|
1249 |
cuda.memcpy_htod(zImageCU, zImage) |
|
1250 |
fImageCU = cuda.mem_alloc(fImage.size*fImage.dtype.itemsize) |
|
1251 |
cuda.memcpy_htod(zImageCU, fImage) |
|
1252 |
IdLastCU = cuda.mem_alloc(IdLast.size*IdLast.dtype.itemsize) |
|
1253 |
cuda.memcpy_htod(IdLastCU, IdLast) |
|
1254 |
|
|
1255 |
start_time=time.time() |
|
1256 |
|
|
1257 |
if Method=='EachPixel': |
|
1258 |
EachPixelCU(zImageCU,fImageCU, |
|
1259 |
numpy.float32(Mass), |
|
1260 |
numpy.float32(InternalRadius), |
|
1261 |
numpy.float32(ExternalRadius), |
|
1262 |
numpy.float32(Angle), |
|
1263 |
numpy.int32(Line), |
|
1264 |
grid=(zImage.shape[0],zImage.shape[1]),block=(1,1,1)) |
|
1265 |
elif Method=='TrajectoCircle': |
|
1266 |
TrajectoryCU(TrajectoriesCU,IdLastCU, |
|
1267 |
numpy.uint32(Trajectories.shape[1]), |
|
1268 |
numpy.float32(Mass), |
|
1269 |
numpy.float32(InternalRadius), |
|
1270 |
numpy.float32(ExternalRadius), |
|
1271 |
numpy.float32(Angle), |
|
1272 |
numpy.int32(Line), |
|
1273 |
grid=(Trajectories.shape[0],1),block=(1,1,1)) |
|
1274 |
|
|
1275 |
CircleCU(TrajectoriesCU,IdLastCU,zImageCU,fImageCU, |
|
1276 |
numpy.uint32(Trajectories.shape[1]), |
|
1277 |
numpy.float32(Mass), |
|
1278 |
numpy.float32(InternalRadius), |
|
1279 |
numpy.float32(ExternalRadius), |
|
1280 |
numpy.float32(Angle), |
|
1281 |
numpy.int32(Line), |
|
1282 |
grid=(Trajectories.shape[0],zImage.shape[0]*4),block=(1,1,1)) |
|
1283 |
else: |
|
1284 |
TrajectoryCU(TrajectoriesCU,IdLastCU, |
|
1285 |
numpy.uint32(Trajectories.shape[1]), |
|
1286 |
numpy.float32(Mass), |
|
1287 |
numpy.float32(InternalRadius), |
|
1288 |
numpy.float32(ExternalRadius), |
|
1289 |
numpy.float32(Angle), |
|
1290 |
numpy.int32(Line), |
|
1291 |
grid=(Trajectories.shape[0],1),block=(1,1,1)) |
|
1292 |
|
|
1293 |
PixelCU(zImageCU,fImageCU,TrajectoriesCU,IdLastCU, |
|
1294 |
numpy.uint32(Trajectories.shape[0]), |
|
1295 |
numpy.uint32(Trajectories.shape[1]), |
|
1296 |
numpy.float32(Mass), |
|
1297 |
numpy.float32(InternalRadius), |
|
1298 |
numpy.float32(ExternalRadius), |
|
1299 |
numpy.float32(Angle), |
|
1300 |
numpy.int32(Line), |
|
1301 |
grid=(zImage.shape[0],zImage.shape[1],1),block=(1,1,1)) |
|
1302 |
|
|
1303 |
|
|
1304 |
elapsed = time.time()-start_time |
|
1305 |
print("\nElapsed Time : %f" % elapsed) |
|
1306 |
|
|
1307 |
cuda.memcpy_dtoh(zImage,zImageCU) |
|
1308 |
cuda.memcpy_dtoh(fImage,fImageCU) |
|
1309 |
|
|
1310 |
zMaxPosition=numpy.where(zImage[:,:]==zImage.max()) |
|
1311 |
fMaxPosition=numpy.where(fImage[:,:]==fImage.max()) |
|
1312 |
print("Z max @(%i,%i) : %f" % (zMaxPosition[0][0],zMaxPosition[1][0],zImage.max())) |
|
1313 |
print("Flux max @(%i,%i) : %f\n" % (fMaxPosition[0][0],fMaxPosition[1][0],fImage.max())) |
|
1314 |
|
|
1315 |
Context.detach() |
|
1316 |
|
|
1317 |
return(elapsed) |
|
1318 |
|
|
692 | 1319 |
if __name__=='__main__': |
693 | 1320 |
|
694 | 1321 |
GpuStyle = 'OpenCL' |
... | ... | |
858 | 1485 |
InputCL['Method']=Method |
859 | 1486 |
InputCL['TrackPoints']=TrackPoints |
860 | 1487 |
InputCL['Physics']=Physics |
861 |
|
|
862 |
duration=BlackHoleCL(zImage,fImage,InputCL) |
|
863 | 1488 |
|
1489 |
if GpuStyle=='OpenCL': |
|
1490 |
duration=BlackHoleCL(zImage,fImage,InputCL) |
|
1491 |
else: |
|
1492 |
duration=BlackHoleCUDA(zImage,fImage,InputCL) |
|
1493 |
|
|
864 | 1494 |
Hostname=gethostname() |
865 | 1495 |
Date=time.strftime("%Y%m%d_%H%M%S") |
866 | 1496 |
ImageInfo="%s_Device%i_%s_%s" % (Method,Device,Hostname,Date) |
Formats disponibles : Unified diff