Statistics
| Revision:

root / tmp / org.txm.core / src / groovy / org / txm / importer / hyperprince / hyperprinceLoader.groovy @ 187

History | View | Annotate | Download (4.4 kB)

1
/**
2
 * Main.
3
 *
4
 * @param args the args
5
 */
6
// Copyright © 2010-2013 ENS de Lyon.
7
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
8
// Lyon 2, University of Franche-Comté, University of Nice
9
// Sophia Antipolis, University of Paris 3.
10
// 
11
// The TXM platform is free software: you can redistribute it
12
// and/or modify it under the terms of the GNU General Public
13
// License as published by the Free Software Foundation,
14
// either version 2 of the License, or (at your option) any
15
// later version.
16
// 
17
// The TXM platform is distributed in the hope that it will be
18
// useful, but WITHOUT ANY WARRANTY; without even the implied
19
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
20
// PURPOSE. See the GNU General Public License for more
21
// details.
22
// 
23
// You should have received a copy of the GNU General
24
// Public License along with the TXM platform. If not, see
25
// http://www.gnu.org/licenses.
26
// 
27
// 
28
// 
29
// $LastChangedDate: 2013-05-06 17:38:43 +0200 (lun., 06 mai 2013) $
30
// $LastChangedRevision: 2386 $
31
// $LastChangedBy: mdecorde $ 
32
//
33
package org.txm.importer.hyperprince;
34

    
35
import org.txm.importer.hyperprince.importer;
36
import org.txm.importer.hyperprince.compiler;
37
import org.txm.objects.*;
38
import org.txm.Toolbox;
39
import org.txm.scripts.teitxm.*;
40
import org.txm.utils.i18n.*;
41

    
42
// TODO: Auto-generated Javadoc
43
/* (non-Javadoc)
44
 * @see groovy.lang.Script#run()
45
 */
46
String userDir = System.getProperty("user.home");
47
String rootDir;
48
String lang;
49
String encoding;
50
String model;
51
try{rootDir = rootDirBinding;lang=langBinding;encoding=encodingBinding;model=modelBinding;}
52
catch(Exception)
53
{        println "DEV MODE";//exception means we debug
54
        if(!org.txm.Toolbox.isInitialized())
55
        {
56
                rootDir = userDir+"/xml/hyperprince/";
57
                lang="fr";
58
                encoding= "UTF-8";
59
                model="rgaqcj";
60
                Toolbox.workspace = new Workspace(new File(userDir,"TXM/workspaces/default.xml"));
61
                Toolbox.setParam(Toolbox.INSTALL_DIR,new File(userDir,"TXM"));
62
                Toolbox.setParam(Toolbox.TREETAGGER_INSTALL_PATH,new File(userDir,"TXM/treetagger"));
63
                Toolbox.setParam(Toolbox.TREETAGGER_MODELS_PATH,new File(userDir,"TXM/treetagger/models"));
64
                Toolbox.setParam(Toolbox.USER_TXM_HOME, new File(System.getProperty("user.home"),"TXM"));
65
        }
66
}
67

    
68
File homedir = new File(rootDir);
69
String corpusSrc = "Corpus-Hyperprince_2009-06-10.xml";//"Corpus-Hyperprince_2010-04-06.xml";
70

    
71
//IMPORT TXT
72
if(!new File(rootDir,corpusSrc).exists())
73
{
74
        println("source file : "+new File(rootDir,corpusSrc)+" does not exists")
75
        return false;
76
}
77

    
78
println "-- IMPORTER - Reading source files"
79
List<File> srcfiles = [new File(rootDir,corpusSrc)];
80
new importer().run(rootDir, corpusSrc);
81

    
82
//ANNOTATE
83
println "-- ANNOTATE - Running NLP tools"
84
new Annotate().run(homedir,model+".par");
85

    
86
//COMPILATION
87
println "COMPILING"
88
def c = new compiler()
89
//c.setCwbPath("D:\\Travail_Sev\\Logiciels\\TXM\\cwb\\bin"); // for developers
90
c.setLang(lang);
91
c.run(rootDir);
92

    
93
//move registry file to cwb registry dir
94
File registryfile = new File(rootDir+"/registry","hyperprince");
95
if(registryfile.exists())
96
        org.txm.utils.FileCopy.copy(registryfile,new File(Toolbox.getParam(Toolbox.USER_TXM_HOME),"registry/hyperprince"))
97

    
98
Workspace w = org.txm.Toolbox.workspace;
99
Project p = w.getProject("default")
100
p.removeBase("hyperprince")
101
Base b = p.addBase("hyperprince");
102
b.addDirectory(new File(rootDir,"txm"));
103
b.setAttribute("lang", lang)
104
b.propagateAttribute("lang")
105

    
106
println "-- EDITION"
107
new File(rootDir+"/HTML/").deleteDir();
108
new File(rootDir+"/HTML/").mkdir();
109
new File(rootDir,"/HTML/default").mkdir();
110
List<File> filelist = new File(rootDir,"txm").listFiles();
111

    
112
for(String textname : b.getTextsID())
113
{
114
        Text text = b .getText(textname);
115
        File srcfile = text.getSource();
116
        File resultfile = new File(rootDir+"/HTML",srcfile.getName().substring(0,srcfile.getName().length()-4)+".html");
117
        List<String> NoSpaceBefore = LangFormater.getNoSpaceBefore(lang);
118
        List<String> NoSpaceAfter = LangFormater.getNoSpaceAfter(lang);
119
        println("build hyperprince xml-tei file : "+srcfile+" to : "+resultfile );
120
        
121
        def ed = new pager(srcfile,resultfile, NoSpaceBefore, NoSpaceAfter,500);
122
        
123
        Edition editionweb = text.addEdition("default","html",resultfile);
124
        //println("pages "+ed.getPageFiles())
125
        //println("idx "+ed.getIdx())
126
        for(int i = 0 ; i < ed.getPageFiles().size();i++)
127
        {
128
                File f = ed.getPageFiles().get(i);
129
                String idx = ed.getIdx().get(i);
130
                editionweb.addPage(f,idx);
131
        }
132
        
133
//        Edition editionbp = text.addEdition("onepage","html",resultfile);
134
//        editionbp.addPage(resultfile,ed.getIdx().get(0));
135
}
136

    
137
w.save()