Statistics
| Revision:

root / tmp / org.txm.groovy.core / src / groovy / org / txm / scripts / importer / factiva / factivamailLoader.groovy @ 1488

History | View | Annotate | Download (4.9 kB)

1
// Copyright © 2010-2013 ENS de Lyon.
2
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
3
// Lyon 2, University of Franche-Comté, University of Nice
4
// Sophia Antipolis, University of Paris 3.
5
// 
6
// The TXM platform is free software: you can redistribute it
7
// and/or modify it under the terms of the GNU General Public
8
// License as published by the Free Software Foundation,
9
// either version 2 of the License, or (at your option) any
10
// later version.
11
// 
12
// The TXM platform is distributed in the hope that it will be
13
// useful, but WITHOUT ANY WARRANTY; without even the implied
14
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15
// PURPOSE. See the GNU General Public License for more
16
// details.
17
// 
18
// You should have received a copy of the GNU General
19
// Public License along with the TXM platform. If not, see
20
// http://www.gnu.org/licenses.
21
//
22
//
23
//
24
// $LastChangedDate: 2012-07-20 17:33:42 +0200 (ven., 20 juil. 2012) $
25
// $LastChangedRevision: 2221 $
26
// $LastChangedBy: mdecorde $
27
//
28
package org.txm.scripts.importer.factiva;
29

    
30
import org.txm.scripts.importer.alceste.importer;
31
import org.txm.scripts.importer.alceste.compiler;
32
import org.txm.scripts.importer.xml.pager_old;
33
import org.txm.objects.*;
34
import org.txm.utils.*;
35
import org.txm.*;
36
import org.txm.core.engines.*;
37
import org.txm.importer.scripts.xmltxm.*;
38
import org.txm.utils.i18n.*;
39
import org.w3c.dom.Element
40
import org.txm.utils.xml.DomUtils;
41

    
42
String userDir = System.getProperty("user.home");
43

    
44
def MONITOR;
45
Project project;
46

    
47
try {project=projectBinding;MONITOR=monitor} catch (Exception)
48
{        }
49
if (project == null) { println "no project set. Aborting"; return; }
50

    
51
String corpusname = project.getName();
52
String basename = corpusname
53
String rootDir = project.getSrcdir();
54
String lang = project.getLang()
55
String model = lang
56
String encoding = project.getEncoding()
57
boolean annotate = project.getAnnotate()
58
String xsl = project.getFrontXSL();
59
def xslParams = project.getXsltParameters();
60
int wordsPerPage = project.getEditionDefinition("default").getWordsPerPage()
61
String page_element = project.getEditionDefinition("default").getPageElement()
62
boolean build_edition = project.getEditionDefinition("default").getBuildEdition()
63

    
64
File srcDir = new File(rootDir);
65
File binDir = project.getProjectDirectory();
66
binDir.mkdirs();
67
if (!binDir.exists()) {
68
        println "Could not create binDir "+binDir
69
        return;
70
}
71

    
72
File txmDir = new File(binDir,"txm/$corpusname");
73
txmDir.deleteDir();
74
txmDir.mkdirs();
75

    
76
// TRANSFORM FACTIVA TO ALCESTE HERE
77
File newSrcDir = new File(binDir, "src")
78
newSrcDir.mkdir();
79
File alcesteFile = new File(newSrcDir, srcDir.getName()+".txt");
80
FactivaMail2Alceste converter = new FactivaMail2Alceste(srcDir, alcesteFile, "UTF-8", "UTF-8")
81
if (!converter.doparse()) {
82
        println "Failed to convert files in $srcDir to one Alceste file"
83
        return false;
84
}
85
if (MONITOR != null) MONITOR.worked(1, "IMPORTER")
86
if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); }
87
println "-- IMPORTER - Reading source files"
88

    
89
if (!(new importer().run(newSrcDir, binDir, txmDir, encoding, basename, lang))) {
90
        println "import process stopped";
91
        return;
92
}
93

    
94
if (MONITOR != null) MONITOR.worked(20, "ANNOTATE")
95

    
96
boolean annotationSuccess = false;
97
if (annotate) {
98
        println "-- ANNOTATE - Running NLP tools"
99
        def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine("TreeTagger")
100
        if (engine.processDirectory(txmDir, binDir, ["lang":model])) {
101
                annotationSuccess = true;
102
        }
103
}
104

    
105
if (MONITOR != null) MONITOR.worked(25, "COMPILING")
106
println "-- COMPILING - Building Search Engine indexes"
107
def c = new compiler();
108
c.setAnnotationSuccess(annotationSuccess)
109
if (debug) c.setDebug();
110
c.setLang(lang);
111
if (!c.run(project)) {
112
        println "import process stopped";
113
        return;
114
}
115

    
116
if (MONITOR != null) MONITOR.worked(20, "EDITION")
117
println "-- EDITION - Building edition"
118
new File(binDir,"HTML/$corpusname").deleteDir();
119
new File(binDir,"HTML/$corpusname").mkdirs();
120
File outdir = new File(binDir,"/HTML/$corpusname/default/");
121
outdir.mkdirs();
122
List<File> filelist = txmDir.listFiles();
123
Collections.sort(filelist);
124
def second = 0
125

    
126
println "Paginating texts: "
127
for (File txmFile : filelist) {
128
        print "."
129
        String txtname = txmFile.getName();
130
        int i = txtname.lastIndexOf(".");
131
        if(i > 0) txtname = txtname.substring(0, i);
132
        
133
        List<String> NoSpaceBefore = LangFormater.getNoSpaceBefore(lang);
134
        List<String> NoSpaceAfter = LangFormater.getNoSpaceAfter(lang);
135

    
136
        Text t = new Text(project);
137
                t.setName(txtname);
138
                t.setSourceFile(txmFile)
139
                t.setTXMFile(txmFile)        
140
        def ed = new pager_old(srcfile, outdir, txtname, NoSpaceBefore, NoSpaceAfter, 500, basename, "pb");
141
        Edition edition = new Edition(t);
142
                edition.setName("default");
143
                edition.setIndex(outdir.getAbsolutePath());
144
        for (i = 0 ; i < ed.getPageFiles().size();) {
145
                File f = ed.getPageFiles().get(i);
146
                String wordid = "w_0";
147
                                if (i < ed.getIdx().size()) wordid = ed.getIdx().get(i);
148
                edition.addPage(""+(++i), wordid);
149
        }
150
}
151

    
152
if (MONITOR != null) MONITOR.worked(20, "FINALIZING")
153
        
154
readyToLoad = project.save();