Statistics
| Revision:

root / tmp / org.txm.groovy.core / src / groovy / org / txm / scripts / importer / alceste / alcesteLoader.groovy @ 1094

History | View | Annotate | Download (5.2 kB)

1
// Copyright © 2010-2013 ENS de Lyon.
2
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
3
// Lyon 2, University of Franche-Comté, University of Nice
4
// Sophia Antipolis, University of Paris 3.
5
//
6
// The TXM platform is free software: you can redistribute it
7
// and/or modify it under the terms of the GNU General Public
8
// License as published by the Free Software Foundation,
9
// either version 2 of the License, or (at your option) any
10
// later version.
11
//
12
// The TXM platform is distributed in the hope that it will be
13
// useful, but WITHOUT ANY WARRANTY; without even the implied
14
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15
// PURPOSE. See the GNU General Public License for more
16
// details.
17
//
18
// You should have received a copy of the GNU General
19
// Public License along with the TXM platform. If not, see
20
// http://www.gnu.org/licenses.
21
//
22
//
23
//
24
// $LastChangedDate: 2015-06-03 15:04:53 +0200 (mer. 03 juin 2015) $
25
// $LastChangedRevision: 2984 $
26
// $LastChangedBy: mdecorde $
27
//
28
package org.txm.scripts.importer.alceste;
29

    
30
import org.txm.scripts.importer.alceste.importer;
31
import org.txm.scripts.importer.alceste.compiler;
32
import org.txm.scripts.importer.xml.pager_old;
33
import org.txm.objects.*;
34
import org.txm.utils.*;
35
import org.txm.*;
36
import org.txm.core.engines.*;
37
import org.txm.core.engines.EngineType
38
import org.txm.importer.scripts.xmltxm.*;
39
import org.txm.utils.i18n.*;
40
import org.w3c.dom.Element
41
import org.txm.utils.xml.DomUtils;
42

    
43
String userDir = System.getProperty("user.home");
44

    
45
def MONITOR;
46
Project project;
47
boolean debug = org.txm.utils.logger.Log.isPrintingErrors();
48

    
49
try {project=projectBinding;MONITOR=monitor} catch (Exception)
50
{        }
51
if (project == null) { println "no project set. Aborting"; return; }
52

    
53
String corpusname = project.getName();
54
String basename = corpusname
55
String rootDir = project.getSrcdir();
56
String lang = project.getLang()
57
String model = lang
58
String encoding = project.getEncoding()
59
boolean annotate = project.getAnnotate()
60
String xsl = project.getFrontXSL();
61
def xslParams = project.getXsltParameters();
62
int wordsPerPage = project.getEditionDefinition("default").getWordsPerPage()
63
boolean build_edition = project.getEditionDefinition("default").getBuildEdition()
64

    
65
File srcDir = new File(rootDir);
66
File binDir = project.getProjectDirectory();
67
binDir.mkdirs();
68
if (!binDir.exists()) {
69
        println "Could not create binDir "+binDir
70
        return;
71
}
72

    
73
File txmDir = new File(binDir,"txm/$corpusname");
74
txmDir.deleteDir();
75
txmDir.mkdirs();
76

    
77
if (MONITOR != null) MONITOR.worked(1, "IMPORTER")
78
if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); }
79
println "-- IMPORTER - Reading source files"
80

    
81
if (!(new importer().run(srcDir, binDir, txmDir, encoding, basename, lang))) {
82
        println "import process stopped";
83
        return;
84
}
85

    
86
if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); }
87
if (MONITOR != null) MONITOR.worked(20, "ANNOTATE")
88
println "-- ANNOTATE - Running NLP tools"
89
boolean annotationSuccess = false;
90

    
91
if (annotate) {
92
        def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine("TreeTagger")
93
        if (engine.processDirectory(txmDir, binDir, ["lang":model])) {
94
                annotationSuccess = true;
95
        }
96
}
97

    
98
if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); }
99
if (MONITOR != null) MONITOR.worked(25, "COMPILING")
100
println "-- COMPILING - Building Search Engine indexes"
101
def c = new compiler();
102
c.setAnnotationSuccess(annotationSuccess)
103
if (debug) c.setDebug();
104
c.setLang(lang);
105
if (!c.run(project)) {
106
        println "import process stopped";
107
        return;
108
}
109

    
110
if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); }
111

    
112
new File(binDir,"HTML/$corpusname").deleteDir();
113
new File(binDir,"HTML/$corpusname").mkdirs();
114
if (build_edition) {
115
        
116
        if (MONITOR != null) MONITOR.worked(20, "EDITION")
117

    
118
        println "-- EDITION - Building edition"
119
        File outdir = new File(binDir,"/HTML/$corpusname/default/");
120
        outdir.mkdirs();
121
        List<File> filelist = txmDir.listFiles();
122
        Collections.sort(filelist);
123
        def second = 0
124

    
125
        println "Paginating texts: "
126

    
127
        for (File srcfile : filelist) {
128
                print "."
129
                String txtname = srcfile.getName();
130
                int i = txtname.lastIndexOf(".");
131
                if(i > 0) txtname = txtname.substring(0, i);
132

    
133
                List<String> NoSpaceBefore = LangFormater.getNoSpaceBefore(lang);
134
                List<String> NoSpaceAfter = LangFormater.getNoSpaceAfter(lang);
135

    
136
                //Element text = params.addText(params.corpora.get(corpusname), txtname, srcfile);
137
                Text t = new Text(project);
138
                t.setName(txtname);
139
                t.setSourceFile(srcfile)
140
                t.setTXMFile(srcfile)
141
                def ed = new pager_old(srcfile, outdir, txtname, NoSpaceBefore, NoSpaceAfter, wordsPerPage, basename, "pb");
142
                //Element edition = params.addEdition(text, "default", outdir.getAbsolutePath(), "html");
143
                Edition edition = new Edition(t);
144
                edition.setName("default");
145
                edition.setIndex(outdir.getAbsolutePath());
146
                for (i = 0 ; i < ed.getPageFiles().size();) {
147
                        File f = ed.getPageFiles().get(i);
148
                        String wordid = ed.getIdx().get(i);
149
                        edition.addPage(""+(++i), wordid);
150
                }
151
        }
152
}
153

    
154
if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); }
155
if (MONITOR != null) MONITOR.worked(20, "FINALIZING")
156
//File paramFile = new File(binDir, "import.xml");
157
//DomUtils.save(params.root.getOwnerDocument(), paramFile);readyToLoad = true;
158
readyToLoad = project.save();