Statistics
| Revision:

root / tmp / org.txm.groovy.core / src / groovy / org / txm / scripts / importer / fleurs / fleursLoader.groovy @ 1000

History | View | Annotate | Download (4.5 kB)

1
/**
2
 * Main.
3
 *
4
 * @param args the args
5
 */
6
// Copyright © 2010-2013 ENS de Lyon.
7
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
8
// Lyon 2, University of Franche-Comté, University of Nice
9
// Sophia Antipolis, University of Paris 3.
10
// 
11
// The TXM platform is free software: you can redistribute it
12
// and/or modify it under the terms of the GNU General Public
13
// License as published by the Free Software Foundation,
14
// either version 2 of the License, or (at your option) any
15
// later version.
16
// 
17
// The TXM platform is distributed in the hope that it will be
18
// useful, but WITHOUT ANY WARRANTY; without even the implied
19
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
20
// PURPOSE. See the GNU General Public License for more
21
// details.
22
// 
23
// You should have received a copy of the GNU General
24
// Public License along with the TXM platform. If not, see
25
// http://www.gnu.org/licenses.
26
// 
27
// 
28
// 
29
// $LastChangedDate: 2013-05-06 17:38:43 +0200 (lun. 06 mai 2013) $
30
// $LastChangedRevision: 2386 $
31
// $LastChangedBy: mdecorde $ 
32
//
33
package org.txm.scripts.importer.fleurs
34
;
35

    
36
import java.io.File;
37
import org.txm.scripts.importer.fleurs.compiler;
38
import org.txm.scripts.importer.fleurs.pager;
39
import org.txm.scripts.importer.fleurs.importer;
40
import org.txm.objects.*;
41
import org.txm.utils.*
42
import org.txm.utils.io.*;
43
import org.txm.*;
44
import org.txm.core.engines.*;
45
import org.txm.importer.scripts.xmltxm.*;
46
import org.txm.utils.i18n.*;
47

    
48
// TODO: Auto-generated Javadoc
49
/* (non-Javadoc)
50
 * @see groovy.lang.Script#run()
51
 */
52
String userDir = System.getProperty("user.home");
53
String rootDir;
54
String lang;
55
String encoding;
56
String model;
57
try{rootDir = rootDirBinding;lang=langBinding;encoding=encodingBinding;model=modelBinding;}
58
catch(Exception)
59
{        println "DEV MODE";//exception means we debug
60
        if(!org.txm.Toolbox.isInitialized())
61
        {
62
                rootDir = userDir+"/xml/fleurs/";
63
                lang="fr";
64
                encoding= "UTF-8";// not used
65
                model="rgaqcj"; // not used
66
                Toolbox.setParam(Toolbox.INSTALL_DIR,new File(userDir,"TXMinstall"));
67
                Toolbox.setParam(Toolbox.METADATA_ENCODING, "UTF-8");
68
                Toolbox.setParam(Toolbox.METADATA_COLSEPARATOR, ",");
69
                Toolbox.setParam(Toolbox.METADATA_TXTSEPARATOR, "\"");
70
                Toolbox.setParam(Toolbox.USER_TXM_HOME, new File(System.getProperty("user.home"),"TXM"));
71
        }
72
}
73

    
74
String basename = "fleurs";
75
System.out.println("basename : "+basename);
76
println "-- IMPORTER - Reading source files"
77
File homedir = new File(rootDir);
78
def imp = new importer();
79
imp.run(homedir, basename);
80

    
81
File binDir = new File(Toolbox.getTxmHomePath(),"corpora/"+basename);
82
rootDir = binDir.getAbsolutePath()+"/";
83

    
84
println "-- ANNOTATE - Running NLP tools"
85
println "No annotation to do"
86
//new Annotate().run(new File(rootDir),model+".par");
87

    
88
println "-- COMPILING - Building Search Engine indexes"
89
List<File> files = new File(rootDir,"txm").listFiles();
90
def c = new compiler()
91
//c.setCwbPath("")// for developers
92
c.setLang(lang);
93
c.run(rootDir);
94

    
95
//move registry file to cwb registry dir
96
File registryfile = new File(rootDir,"registry/"+basename);
97
if(registryfile.exists())
98
        FileCopy.copy(registryfile,new File(Toolbox.getTxmHomePath(),"registry/"+basename.toLowerCase()))
99

    
100
Workspace w = org.txm.Toolbox.workspace;
101

    
102
Project p = w.getProject("default")
103
p.removeBase(basename)
104
Base b = p.addBase(basename);
105
b.addDirectory(new File(rootDir,"txm"));
106
b.setAttribute("lang", lang)
107
b.propagateAttribute("lang")
108

    
109
println "-- EDITION - Building edition"
110
new File(rootDir,"HTML").deleteDir();
111
new File(rootDir,"HTML").mkdir();
112
new File(rootDir,"HTML/default").mkdir();
113
List<File> filelist = new File(rootDir,"txm").listFiles();
114
def second = 0
115

    
116
println "Paginating text: "
117
for(Text text : b.getTexts())
118
{
119
        File srcfile = text.getSource();
120
        File resultfile = new File(rootDir,"HTML/"+srcfile.getName().substring(0,srcfile.getName().length()-4)+".html");
121
        List<String> NoSpaceBefore = LangFormater.getNoSpaceBefore(lang);
122
                List<String> NoSpaceAfter = LangFormater.getNoSpaceAfter(lang);
123
                
124
                if (second) { print(", ") }
125
                if (second > 0 && (second % 5) == 0) println ""
126
                print(srcfile.getName());
127
                second++
128
        
129
        def ed = new pager(srcfile,resultfile, NoSpaceBefore, NoSpaceAfter,500,basename);
130
        
131
        Edition editionweb = text.addEdition("default","html",resultfile);
132
        for(int i = 0 ; i < ed.getPageFiles().size();i++)
133
        {
134
                File f = ed.getPageFiles().get(i);
135
                String idx = ed.getIdx().get(i);
136
                editionweb.addPage(f,idx);
137
        }
138
        
139
//        Edition editionbp = text.addEdition("onepage","html",resultfile);
140
//        editionbp.addPage(resultfile,ed.getIdx().get(0));
141
}
142

    
143
w.save();
144
println ""