#!/usr/bin/python
# -*- coding: utf-8 -*-

import os
import re
import unicodedata
import sys
import csv
import string

from os import listdir, rename
from os.path import isfile, join

## répertoire depuis lequel on lance le script
mypath = os.getcwd()
mypath_results = sys.argv[1] ##''.join([mypath,'/results/'])
mypath_input = sys.argv[2] ##''.join([mypath,'/results/'])
mypath_input_papers = sys.argv[3]
#mypath_files = ''.join([mypath,'/txt/'])

respubfile = join(mypath_results, "table_publications_2.csv") 
file_publication = open(respubfile, 'w')
strLinePub = 'id_publication ; name ; year ; comment ; id_author ; id_paper \n'
file_publication.write(strLinePub)

respaperfile = join(mypath_results, "table_papers_2.csv") 
file_paper = open(respaperfile, 'w')
strLinePaper = 'id_paper ; name ; comment ; first_date ; last_date ; authors \n'
file_paper.write(strLinePaper)

onlyfiles = [f for f in listdir(mypath_input) if isfile(join(mypath_input, f))]


#A partir de fichiers "acteur" (ex: "Act3_Capuana.txt"), préparé par Marco Borelli
#Le script va compléter la table des journaux : Papers ###### 
#Le script va remplir la table des re-publications : Publications ###### 

def prepareListPapers():
  papers_map = {}
  mypath_input_papers

  ##############################
  # PARSING du fichier CSV     #
  ##############################
  with open(mypath_input_papers) as csvfile:
    filereader = csv.reader(csvfile, delimiter='#')
    i = 0
    for row in filereader:
      paper_line = {}
      if i == 0:
        print "First line (and first column) : ",row[0]
      else : 
        name_paper = row[1]
        name_paper = name_paper.strip(" ")
        paper_line = {"id": row[0], "name": name_paper,"comment": row[2],"first_date": row[3],"last_date": row[4],"authors": row[5]}
        papers_map[name_paper] = paper_line
      i = i + 1
  return papers_map

def prepareListDates(ldates, id_author, id_paper, nbPaper, nbPub):
  print "list of dates for [",id_paper,"]"
  #1888 (3) uno in due puntate; 
  #1894
  PY = {}
  dates = ldates.split(";")
  for dateStr in dates :
    date = dateStr.strip(" ")
    date = date.strip(" ")
   
    ### SHOULD CHECK if multiple dates, ce serait une erreur d'écriture du fichier
    ### la liste des dates séparées par des "," au lieu de ";" 

    realdate = date[0:5] #PREVIOUSLY date = dateStr[0:5]
    realdate = realdate.strip("\n")
    realdate = realdate.strip(" ")
    realdate = realdate.translate(None, string.whitespace)
    print "\tDate [", realdate,"]"

    nbpubstr = date[5:len(date)]
    num = ''
    comment = ''
    if nbpubstr :
      print "\tNb Pub =", nbpubstr
      pos_paradeb = nbpubstr.find('(')
      pos_paraend = nbpubstr.find(')')
      if (pos_paradeb != -1 and pos_paraend != -1) :
        num = nbpubstr[pos_paradeb+1:pos_paraend]
        #print "\tNumber of publications = ", num
        if num.isdigit() :
          PY[realdate] = num
        else :
          #print "\tNot a number !!! alors c'est 1 et ajout de commentaire"
          comment = num
          print "\tNb Pub = 1 and Comment : ",comment
          PY[realdate] = 1
          id_pub = ''.join(['Publi', str(nbPaper), '-', str(nbPub)])
          nbPub = nbPub+1
      else:
        #print "\t!!!!!!!!! RIEN TROUVE COMME NUM !!!!!!!! alors c'est 1"
        print "\tNb Pub = 1 (a) !!!"
        PY[realdate] = 1
        id_pub = ''.join(['Publi', str(nbPaper), '-', str(nbPub)])
        nbPub = nbPub+1

      pos_dashdeb = nbpubstr.find('#')
      pos_dashend = nbpubstr.rfind('#')
      if (pos_dashdeb != -1 and pos_dashend != -1) :
        comment = nbpubstr[pos_dashdeb+1:pos_dashend]
        print "\tCommentaire = ", comment
 
      if num.isdigit() :
          PY[realdate] = num
          for n in range(0,int(num)):
            id_pub = ''.join(['Publi', str(nbPaper), '-', str(nbPub)])
            nbPub = nbPub+1
            strLinePub = ''.join([id_pub, ';UNKNOWN;',str(realdate),';', comment ,';',id_author,';', id_paper,'\n'])
            file_publication.write(strLinePub)
      else :
        if comment :
          strLinePub = ''.join([id_pub, ';UNKNOWN;',str(realdate),';', comment ,';',id_author,';', id_paper,'\n'])
        else : 
          strLinePub = ''.join([id_pub, ';UNKNOWN;',str(realdate),';;',id_author,';', id_paper,'\n'])
        file_publication.write(strLinePub)
      
    else:
      #print "\t!!!!!!!!! RIEN TROUVE COMME NUM !!!!!!!! alors c'est 1"
      print "\tNb Pub = 1 (b) !!!"
      PY[realdate] = 1  
      id_pub = ''.join(['Publi', str(nbPaper), '-', str(nbPub)])  
      strLinePub = ''.join([id_pub, ';UNKNOWN;',realdate,';;',id_author,';', id_paper,'\n'])
      file_publication.write(strLinePub)
      nbPub = nbPub+1
  return PY
 
def findAuthor(path_file) :
  #/Users/gazelledess/ownCloud/Sev/Projets/Triangle/Doctorants/ProjetMarcoBorelli/Transformation/txt//Act9_DiGiacomo.txt
  pos_backslash = path_file.rfind('/')
  if pos_backslash != -1:
    name_file = path_file[pos_backslash+1:len(path_file)]
    pos_underscore = name_file.find('_')
    prefix_id = name_file[0:pos_underscore]
    return prefix_id
  else :
    print "NO BACKSLASH ??? isn't it strange ?"
    return "UNKNOWN_ID"

def addAuthorToPaper(id_author, paper):
  print "addAuthorToPaper === > ",id_author, " in ",paper["id"]
  
  authors_list = paper["authors"]
  
  if authors_list is list :
    print "|||[authors_list - in file : ",authors_list,"]|||"
    authors_list_str = str(authors_list)
    authors_list_str = authors_list_str.strip(",")
    authors_list_2 = authors_list_str.split(",")
    #authors_list = authors_list.split(",")
    if id_author in authors_list :
      print "ALREADY (a) in the list of authors !!!!!!"
    else :
      authors_list.append(''.join([id_author, ', ']))
      print "ADDING ",id_author," to the list of authors"
      #paper["authors"] = authors_list
  else :
    if authors_list :
      if id_author in authors_list :
        print "ALREADY (b) in the list of authors !!!!!!"
      else :
        authors_list_str = ''.join(authors_list)
        authors_list_str = authors_list_str.strip(",")
        print "NEW LIST : ",authors_list_str
        #authors_list_2 = authors_list_str.split(",")
        #authors_list_2.append(''.join([', ',id_author, ', ']))
        authors_list_2 = ''.join([authors_list_str,id_author, ', '])
        print "NEW LIST 2 : ",authors_list_2
        authors_list = authors_list_2
        paper["authors"] = authors_list
        #author_list.append(''.join([id_author, ', ']))
        #paper["authors"] = authors_list_2
    else : 
      authors_list = []
      authors_list.append(''.join([id_author, ', ']))
      paper["authors"] = authors_list
      print "NO AUTHOR FOR THIS PAPER YET - ADDING ++++"

""""""""""""""""""""""""""""""
""" Prepare the name for the paper """
""""""""""""""""""""""""""""""
def preparePaperName(name, papers):
  pos_para = name.find('(')
  if pos_para != -1:
    namesubstring = name[0:pos_para]
    cleanname = namesubstring.strip(" ")
    name = cleanname.strip(" ")
    name = name.upper()
    #print "NAME : ", name
    #return cleanname
  else:
    #print "NAME STAYS LIKE THIS => ", name
    name = name.strip(" ")
    name = name.upper()

  #id_paper = -1
  name = ''.join(['[',name,"]"])
  paper_name = name
  return paper_name

""""""""""""""""""""""""""""""
""" Initialize a paper with this 
    name """
""""""""""""""""""""""""""""""
def setPaper(paper_name, nbpaper, papers):
  id_paper = ''.join(['Paper',str(nbpaper)])
  print "----->> NEW PAPER ", paper_name ,"with id : ",id_paper
  papers[paper_name] = {"id": id_paper, "name": paper_name,"comment": "","first_date": "","last_date": "","authors": ""}
  paper = papers[paper_name]
  return paper

"""Fonction inutile en réalité !!!""" 
def getPaper(id_paper, papers):
  for paper_key in papers.keys() :
    paper = papers[paper_key]
    paper_id = paper["id"]
    if paper_id == id_paper:
      return paper
  return -1

""""""""""""""""""""""""""""""
""" Find which paper corresponds
    to that name """
""""""""""""""""""""""""""""""
def getPaperByName(paper_name, papers):
  for paper_key in papers.keys() :
    paper = papers[paper_key]
    name_of_paper = paper["name"]
    if paper_name == name_of_paper:
      return paper
  return -1

""""""""""""""""""""""""""""""
""" Set the min and max among all
    the publication dates for 
    this paper """
""""""""""""""""""""""""""""""
def setDatesForPaper(dates, paper):
  #### min date ####
  mindate = 2000
  date_min_from_file = paper["first_date"]
  if date_min_from_file :
      mindate = int(date_min_from_file)
      print "\t\t HAS mindate => ", mindate

  #### max date ####
  maxdate = 1000
  date_max_from_file = paper["last_date"]
  if date_max_from_file :
      maxdate = int(date_max_from_file)
      print "\t\t HAS maxdate => ", maxdate

  ## Find if there are other min or max among the dates
  for date in dates.keys():
    #print "\t D = ", date
    if mindate > int(date) :
      mindate = int(date)
      print "\t\t CHANGING mindate => ", mindate
    if maxdate < int(date) :
      maxdate = int(date)
      print "\t\t CHANGING maxdate => ", maxdate
              
  paper["first_date"] = mindate
  paper["last_date"] = maxdate

""""""""""""""""""""""""""""""
""" Output the resulted table 
    about Papers """
""""""""""""""""""""""""""""""
def writePapersTable(papers):
  #papers[paper_name]= {"id": id_paper, "name": paper_name,"comment": "","first_date": "","last_date": "","authors": ""}
    
  for paper_key in papers.keys():
    paper = papers[paper_key] 
    name_paper = paper["name"]
    id_paper = paper["id"]
    first_date = paper["first_date"]
    last_date = paper["last_date"]
    authors = paper["authors"]
    authors = ''.join(authors)
    strLinePaper = ''.join([id_paper,' ; ', name_paper,' ; ; ',str(first_date),' ; ',str(last_date),' ; ',authors,'\n'])
    file_paper.write(strLinePaper)

def getMaxId(papers):
  max_id = 0
  for paper_key in papers.keys():
    paper = papers[paper_key] 
    paper_id = paper["id"]
    id_str = paper_id[5:len(paper_id)]
    print "id = ",id_str
    if int(id_str)> max_id:
      max_id = int(id_str)
  return max_id

def treatFiles():
  #papers = {}
  papers = prepareListPapers()
  authors_id = []
  authorsByPaper = {}
  nbPub = 1
  nbPaper = getMaxId(papers)

  for fi in onlyfiles:
    if(fi =='.DS_Store'):
      print 'FILE Macintosh is : ', fi
    else :
        mypath_file = ''.join([mypath_input, '/', fi])
        print "############### READING FILE : ", mypath_file

        ######### Create Author ID #########
        id_author = findAuthor(mypath_file)
        authors_id.append(id_author)
    
        ######### Analyse each line (left str and right str of :) #########
        #exemple : 
        #Rivista Minima (5 novelle): 1872; 1873; 1877 (dal ciclo Mio figlio! ma non so quante); 1879; 1883

        text_file = open(mypath_file, "r")
        lines = text_file.readlines()
        for l in lines:
          pos_dot = l.find(':')
          left_line = l[0:pos_dot]
          right_line = l[pos_dot+1:len(l)]
          id_paper = -1
          ### Prepare name of the journal with the left string ###
          paper_name = preparePaperName(left_line, papers)
          ### Create the paper and add its info in the table ###
          paper = getPaperByName(paper_name, papers)
          if paper == -1:
            nbPaper = nbPaper+1
            paper = setPaper(paper_name, nbPaper, papers)
            
          addAuthorToPaper(id_author, paper)
          
          dates = prepareListDates(right_line, id_author, paper["id"], nbPaper, nbPub)
          setDatesForPaper(dates, paper)
          
          
          print "------",paper_name,"--------"

        text_file.close()
        print "##############################"
  writePapersTable(papers)
  print "Nb papers = ",len(papers)
treatFiles()

