#!/usr/bin/env python
#
# TrouNoir model using PyOpenCL 
#
# CC BY-NC-SA 2011 : <emmanuel.quemener@ens-lyon.fr> 
#
# Thanks to Andreas Klockner for PyOpenCL:
# http://mathema.tician.de/software/pyopencl
# 
# Interesting links:
# http://viennacl.sourceforge.net/viennacl-documentation.html
# http://enja.org/2011/02/22/adventures-in-pyopencl-part-1-getting-started-with-python/

import pyopencl as cl
import numpy
from PIL import Image
import time,string
from numpy.random import randint as nprnd
import sys
import getopt
import matplotlib.pyplot as plt

KERNEL_CODE=string.Template("""

#define PI (float)3.14159265359
#define nbr 200

float atanp(float x,float y)
{
  float angle;

  angle=atan2(y,x);

  if (angle<0)
    {
      angle+=(float)2.e0f*PI;
    }

  return angle;
}

float f(float v)
{
  return v;
}

float g(float u,float m,float b)
{
  return (3.*m/b*pow(u,2)-u);
}


void calcul(float *us,float *vs,float up,float vp,
	    float h,float m,float b)
{
  float c0,c1,c2,c3,d0,d1,d2,d3;

  c0=h*f(vp);
  c1=h*f(vp+c0/2.);
  c2=h*f(vp+c1/2.);
  c3=h*f(vp+c2);
  d0=h*g(up,m,b);
  d1=h*g(up+d0/2.,m,b);
  d2=h*g(up+d1/2.,m,b);
  d3=h*g(up+d2,m,b);

  *us=up+(c0+2.*c1+2.*c2+c3)/6.;
  *vs=vp+(d0+2.*d1+2.*d2+d3)/6.;
}

void rungekutta(float *ps,float *us,float *vs,
		float pp,float up,float vp,
		float h,float m,float b)
{
  calcul(us,vs,up,vp,h,m,b);
  *ps=pp+h;
}

float decalage_spectral(float r,float b,float phi,
			 float tho,float m)
{
  return (sqrt(1-3*m/r)/(1+sqrt(m/pow(r,3))*b*sin(tho)*sin(phi)));
}

float spectre(float rf,int q,float b,float db,
	     float h,float r,float m,float bss)
{
  float flx;
  int fi;

  fi=(int)(rf*nbr/bss);
  flx=pow(r/m,q)*pow(rf,4)*b*db*h;
  return(flx);
}

float spectre_cn(float rf,float b,float db,
		float h,float r,float m,float bss)
{
  float flx;
  float nu_rec,nu_em,qu,v,temp,temp_em,flux_int,m_point,planck,c,k;
  int fi,posfreq;

  planck=6.62e-34;
  k=1.38e-23;
  temp=3.e7;
  m_point=1.e14;
  v=1.-3./r;

  qu=1/sqrt(1-3./r)/sqrt(r)*(sqrt(r)-sqrt(6.)+sqrt(3.)/2.*log((sqrt(r)+sqrt(3.))/(sqrt(r)-sqrt(3.))*(sqrt(6.)-sqrt(3.))/(sqrt(6.)+sqrt(3.))));

  temp_em=temp*sqrt(m)*exp(0.25*log(m_point))*exp(-0.75*log(r))*
    exp(-0.125*log(v))*exp(0.25*log(qu));

  flux_int=0;
  flx=0;

  for (fi=1;fi<nbr;fi++)
    {
      nu_em=bss*fi/nbr;
      nu_rec=nu_em*rf; 
      posfreq=1./bss*nu_rec*nbr;
      if ((posfreq>0)&&(posfreq<nbr))
	{
	  flux_int=2*planck/9.e16f*pow(nu_em,3)/(exp(planck*nu_em/k/temp_em)-1.)*pow(rf,3)*b*db*h;
	  flx+=flux_int;
	}
    }
  return(flx);
}

void impact(float phi,float r,float b,float tho,float m,
	    float *zp,float *fp,
	    int q,float db,
	    float h,float bss,int raie)
{
  float flx,rf;

  rf=decalage_spectral(r,b,phi,tho,m);

  if (raie==0)
    {
      flx=spectre(rf,q,b,db,h,r,m,bss);
    }
  else
    {
      flx=spectre_cn(rf,b,db,h,r,m,bss);
    }
  
  *zp=1./rf;
  *fp=flx;

}

__kernel void EachPixel(__global float *MyImage)
{
   uint xi=(uint)get_global_id(0);
   uint yi=(uint)get_global_id(1);
   uint sizex=(uint)get_global_size(0);
   uint sizey=(uint)get_global_size(1);

   // Perform trajectory for each pixel

  float m,rs,ri,re,tho,ro;
  int q;

  float bss,stp;
  int nmx,dim;
  float d,bmx,db,b,h;
  float up,vp,pp;
  float us,vs,ps;
  float rp[2000];
  float zmx,fmx,zen,fen;
  float flux_tot,impc_tot;
  float phi,thi,thx,phd,php,nr,r;
  int ni,ii,i,imx,j,n,tst,dist,raie,pc,fcl,zcl;
  int nh;
  int ExitOnImpact;
  float zp,fp;

  m=1.;
  rs=2.*m;
  ri=3.*rs;
  re=12.;
  ro=100.;
  //tho=PI/180.*80;
  tho=PI/180.*0.;
  q=-2;
  dist=1;
  raie=0;

  nmx=(float)sizex/2.;
  stp=0.5;
  // Autosize for image
  bmx=1.25*re;
  b=0.;
  thx=asin(bmx/ro);
  pc=0;

  h=PI/5.e2f;

  // set origin as center of image
  float x=(float)xi-(float)(sizex/2)+(float)5e-1f;
  float y=(float)(sizey/2)-(float)yi-(float)5e-1f;
  // angle extracted from cylindric symmetry
  phi=atanp(x,y);
  phd=atanp(cos(phi)*sin(tho),cos(tho));

  if (dist==1) 
  {
     db=bmx/nmx;
     // impact parameter
     b=sqrt(x*x+y*y)*(float)2.e0f/(float)sizex*bmx;
     up=0.;
     vp=1.;
  }
  else 
  {
     b=sqrt(x*x+y*y)*(float)2.e0f/(float)sizex*bmx;
     thi=asin(b/ro);
     db=bmx/nmx;
     up=sin(thi);
     vp=cos(thi);
  }
      
  pp=0.;
  nh=0;

  rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b);
    
  rp[nh]=fabs(b/us);

  ExitOnImpact=0;

  do
  {
     nh++;
     pp=ps;
     up=us;
     vp=vs;
     rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b);	  
     rp[nh]=fabs(b/us);
     ExitOnImpact = (((fmod(pp,PI)<fmod(phd,PI))&&(fmod(ps,PI)>fmod(phd,PI)))||((fmod(ps,PI)<fmod(phd,PI))&&(fmod(pp,PI)>fmod(phd,PI))))&&(rp[nh]>ri)&&(rp[nh]<re)?1:0;          

  } while ((rp[nh]>=rs)&&(rp[nh]<=rp[0])&&(ExitOnImpact==0));

  if (ExitOnImpact==1) {
     impact(phi,rp[nh-1],b,tho,m,&zp,&fp,q,db,h,bss,raie);
  }
  else
  {
     zp=0.;
  }

//
  MyImage[yi+sizex*xi]=(float)zp;

  barrier(CLK_LOCAL_MEM_FENCE);
 
}
""")

def ImageOutput(sigma,prefix):
    Max=sigma.max()
    Min=sigma.min()
    
    # Normalize value as 8bits Integer
    SigmaInt=(255*(sigma-Min)/(Max-Min)).astype('uint8')
    image = Image.fromarray(SigmaInt)
    image.save("%s.jpg" % prefix)
    
def BlackHole(MyImage,Device):

    kernel_params = {}

    # Je detecte un peripherique GPU dans la liste des peripheriques
    Id=0
    HasXPU=False
    for platform in cl.get_platforms():
        for device in platform.get_devices():
            if Id==Device:
                XPU=device
                print "CPU/GPU selected: ",device.name.lstrip()
                HasXPU=True
            Id+=1

    if HasXPU==False:
        print "No XPU #%i found in all of %i devices, sorry..." % (Device,Id-1)
        sys.exit()		
	
    ctx = cl.Context([XPU])
    queue = cl.CommandQueue(ctx,
			    properties=cl.command_queue_properties.PROFILING_ENABLE)

    BlackHoleCL = cl.Program(ctx,KERNEL_CODE.substitute(kernel_params)).build()
    
    # Je recupere les flag possibles pour les buffers
    mf = cl.mem_flags

    print(MyImage)
    
    # Program based on Kernel2
    MyImageCL = cl.Buffer(ctx, mf.WRITE_ONLY | mf.COPY_HOST_PTR, hostbuf=MyImage)
    
    start_time=time.time() 

    CLLaunch=BlackHoleCL.EachPixel(queue,(MyImage.shape[0],MyImage.shape[1]),None,MyImageCL)
                                     
    CLLaunch.wait()
    elapsed = time.time()-start_time
    print("Elapsed %f: " % elapsed)
    
    cl.enqueue_copy(queue,MyImage,MyImageCL).wait()

    print(MyImage)
    MyImageCL.release()
    
    ImageOutput(MyImage,"TrouNoir")
    return(elapsed)

if __name__=='__main__':
	
    GpuStyle = 'OpenCL'
    Mass = 1.
    # Internal Radius 3 times de Schwarzschild Radius
    InternalRadius=6.*Mass
    #
    ExternalRadius=12.
    #
    ObserverDistance=100.
    # Angle with normal to disc 10 degrees
    Angle = numpy.pi/180.*(90.-10.)
    # Radiation of disc : BlackBody or Monochromatic
    BlackBody = True
    # Camera : Angular Camera or plate with the size of camera
    AngularCamera = True
    # Size of image
    Size=256
    # Variable Type
    VariableType='FP32'
    # ?
    q=-2
          
    HowToUse='%s -h [Help] -b [BlackBodyEmission] -c [AngularCamera] -s <SizeInPixels> -m <Mass> -i <DiscInternalRadius> -x <DiscExternalRadius> -o <ObservatorDistance> -a <AngleAboveDisc> -d <DeviceId> -g <CUDA/OpenCL> -t <FP32/FP64>'

    try:
        opts, args = getopt.getopt(sys.argv[1:],"hbcs:m:i:x:o:a:d:g:t:",["blackbody","camera","size=","mass=","internal=","external=","observer=","angle=","device=","gpustyle=","variabletype="])
    except getopt.GetoptError:
        print(HowToUse % sys.argv[0])
        sys.exit(2)

    # List of Devices
    Devices=[]
    Alu={}
        
    for opt, arg in opts:
        if opt == '-h':
            print(HowToUse % sys.argv[0])
            
            print("\nInformations about devices detected under OpenCL API:")
            # For PyOpenCL import
            try:
                import pyopencl as cl
                Id=0
                for platform in cl.get_platforms():
                    for device in platform.get_devices():
                        #deviceType=cl.device_type.to_string(device.type)
                        deviceType="xPU"
                        print("Device #%i from %s of type %s : %s" % (Id,platform.vendor.lstrip(),deviceType,device.name.lstrip()))
                        Id=Id+1
                        
            except:
                print("Your platform does not seem to support OpenCL")
                
            print("\nInformations about devices detected under CUDA API:")
                # For PyCUDA import
            try:
                import pycuda.driver as cuda
                cuda.init()
                for Id in range(cuda.Device.count()):
                    device=cuda.Device(Id)
                    print("Device #%i of type GPU : %s" % (Id,device.name()))
                print
            except:
                print("Your platform does not seem to support CUDA")
        
            sys.exit()
        
        elif opt in ("-d", "--device"):
#            Devices.append(int(arg))
            Device=int(arg)
        elif opt in ("-g", "--gpustyle"):
            GpuStyle = arg
        elif opt in ("-t", "--variabletype"):
            VariableType = arg
        elif opt in ("-s", "--size"):
            Size = int(arg)
        elif opt in ("-m", "--mass"):
            Mass = float(arg)
        elif opt in ("-i", "--internal"):
            InternalRadius = float(arg)
        elif opt in ("-e", "--external"):
            ExternalRadius = float(arg)
        elif opt in ("-o", "--observer"):
            ObserverDistance = float(arg)
        elif opt in ("-a", "--angle"):
            Angle = numpy.pi/180.*(90.-float(arg))
        elif opt in ("-b", "--blackbody"):
            BlackBody = True
        elif opt in ("-c", "--camera"):
            AngularCamera = True

    print("Device Identification selected : %s" % Device)
    print("GpuStyle used : %s" % GpuStyle)
    print("VariableType : %s" % VariableType)
    print("Size : %i" % Size)
    print("Mass : %f" % Mass)
    print("Internal Radius : %f" % InternalRadius)
    print("External Radius : %f" % ExternalRadius)
    print("Observer Distance : %f" % ObserverDistance)
    print("Angle with normal of (in radians) : %f" % Angle)
    print("Black Body Disc Emission (monochromatic instead) : %s" % BlackBody)
    print("Angular Camera (dimension of object instead) : %s" % AngularCamera)

    if GpuStyle=='CUDA':
        print("\nSelection of CUDA device") 
        try:
            # For PyCUDA import
            import pycuda.driver as cuda
            
            cuda.init()
            for Id in range(cuda.Device.count()):
                device=cuda.Device(Id)
                print("Device #%i of type GPU : %s" % (Id,device.name()))
                if Id in Devices:
                    Alu[Id]='GPU'
            
        except ImportError:
            print("Platform does not seem to support CUDA")

    if GpuStyle=='OpenCL':
        print("\nSelection of OpenCL device") 
        try:
            # For PyOpenCL import
            import pyopencl as cl
            Id=0
            for platform in cl.get_platforms():
                for device in platform.get_devices():
                    #deviceType=cl.device_type.to_string(device.type)
                    deviceType="xPU"
                    print("Device #%i from %s of type %s : %s" % (Id,platform.vendor.lstrip().rstrip(),deviceType,device.name.lstrip().rstrip()))

                    if Id in Devices:
                    # Set the Alu as detected Device Type
                        Alu[Id]=deviceType
                    Id=Id+1
        except ImportError:
            print("Platform does not seem to support OpenCL")

#    print(Devices,Alu)

#    MyImage=numpy.where(numpy.random.zeros(Size,Size)>0,1,-1).astype(numpy.float32)
    MyImage=numpy.zeros((Size,Size),dtype=numpy.float32)

    print(MyImage)
	
    duration=BlackHole(MyImage,Device)


