25 |
25 |
// $LastChangedRevision: 3426 $
|
26 |
26 |
// $LastChangedBy: mdecorde $
|
27 |
27 |
//
|
28 |
|
package org.txm.scripts.importer.txt;
|
|
28 |
package org.txm.scripts.importer.txt
|
29 |
29 |
|
30 |
|
import org.txm.scripts.importer.txt.importer;
|
31 |
|
import org.txm.scripts.importer.txt.compiler;
|
32 |
|
import org.txm.scripts.importer.xml.pager;
|
33 |
|
import org.txm.objects.*;
|
34 |
|
import org.txm.importer.scripts.xmltxm.*;
|
35 |
|
import org.txm.*;
|
36 |
|
import org.txm.objects.*;
|
37 |
|
import org.txm.core.engines.*;
|
|
30 |
import org.txm.scripts.importer.txt.importer
|
|
31 |
import org.txm.scripts.importer.txt.compiler
|
|
32 |
import org.txm.scripts.importer.xml.pager
|
|
33 |
import org.txm.objects.*
|
|
34 |
import org.txm.importer.scripts.xmltxm.*
|
|
35 |
import org.txm.*
|
|
36 |
import org.txm.objects.*
|
|
37 |
import org.txm.core.engines.*
|
38 |
38 |
import org.txm.utils.ConsoleProgressBar
|
39 |
|
import org.txm.utils.i18n.*;
|
40 |
|
import org.txm.metadatas.*;
|
41 |
|
import org.txm.utils.io.FileCopy;
|
|
39 |
import org.txm.utils.i18n.*
|
|
40 |
import org.txm.metadatas.*
|
|
41 |
import org.txm.utils.io.FileCopy
|
42 |
42 |
import org.w3c.dom.Element
|
43 |
|
import org.txm.utils.xml.DomUtils;
|
|
43 |
import org.txm.utils.xml.DomUtils
|
44 |
44 |
import org.txm.importer.*
|
45 |
45 |
|
46 |
|
String userDir = System.getProperty("user.home");
|
|
46 |
String userDir = System.getProperty("user.home")
|
47 |
47 |
|
48 |
|
def MONITOR;
|
49 |
|
Project project;
|
|
48 |
def MONITOR
|
|
49 |
Project project
|
50 |
50 |
|
51 |
51 |
try {project=projectBinding;MONITOR=monitor} catch (Exception)
|
52 |
52 |
{ }
|
53 |
53 |
if (project == null) { println "no project set. Aborting"; return; }
|
54 |
54 |
|
55 |
|
String corpusname = project.getName();
|
|
55 |
String corpusname = project.getName()
|
56 |
56 |
String basename = corpusname
|
57 |
|
String rootDir = project.getSrcdir();
|
|
57 |
String rootDir = project.getSrcdir()
|
58 |
58 |
String lang = project.getLang()
|
59 |
59 |
String model = lang
|
60 |
60 |
String encoding = project.getEncoding()
|
61 |
61 |
boolean annotate = project.getAnnotate()
|
62 |
|
String xsl = project.getFrontXSL();
|
63 |
|
def xslParams = project.getXsltParameters();
|
|
62 |
String xsl = project.getFrontXSL()
|
|
63 |
def xslParams = project.getXsltParameters()
|
64 |
64 |
int wordsPerPage = project.getEditionDefinition("default").getWordsPerPage()
|
65 |
65 |
boolean build_edition = project.getEditionDefinition("default").getBuildEdition()
|
66 |
66 |
|
67 |
67 |
File srcDir = new File(rootDir);
|
68 |
68 |
File binDir = project.getProjectDirectory();
|
69 |
|
binDir.mkdirs();
|
|
69 |
binDir.mkdirs()
|
70 |
70 |
if (!binDir.exists()) {
|
71 |
71 |
println "Error: could not create corpus binary directory: "+binDir
|
72 |
|
return;
|
|
72 |
return
|
73 |
73 |
}
|
74 |
74 |
|
75 |
|
File txmDir = new File(binDir, "txm/$corpusname");
|
76 |
|
txmDir.deleteDir();
|
77 |
|
txmDir.mkdirs();
|
|
75 |
File txmDir = new File(binDir, "txm/$corpusname")
|
|
76 |
txmDir.deleteDir()
|
|
77 |
txmDir.mkdirs()
|
78 |
78 |
|
79 |
79 |
//get metadata values from CSV
|
80 |
|
Metadatas metadatas; // text metadata
|
81 |
|
File allMetadataFile = Metadatas.findMetadataFile(srcDir);
|
|
80 |
Metadatas metadatas // text metadata
|
|
81 |
File allMetadataFile = Metadatas.findMetadataFile(srcDir)
|
82 |
82 |
|
83 |
83 |
if (allMetadataFile != null && allMetadataFile.exists()) {
|
84 |
84 |
println "Trying to read metadata from: "+allMetadataFile
|
85 |
85 |
File copy = new File(binDir, allMetadataFile.getName())
|
86 |
86 |
if (!FileCopy.copy(allMetadataFile, copy)) {
|
87 |
|
println "Error: could not create a copy of metadata file "+allMetadataFile.getAbsoluteFile();
|
88 |
|
return;
|
|
87 |
println "Error: could not create a copy of metadata file "+allMetadataFile.getAbsoluteFile()
|
|
88 |
return
|
89 |
89 |
}
|
90 |
90 |
metadatas = new Metadatas(copy, Toolbox.getMetadataEncoding(),
|
91 |
91 |
Toolbox.getMetadataColumnSeparator(),
|
... | ... | |
97 |
97 |
if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); }
|
98 |
98 |
if (MONITOR != null) MONITOR.worked(20, "IMPORTER - Reading source files with extension "+suffixes)
|
99 |
99 |
if (!new importer().run(srcDir, binDir, txmDir,encoding, suffixes, basename, lang, project)) {
|
100 |
|
println "Import process stopped";
|
101 |
|
return;
|
|
100 |
println "Import process stopped"
|
|
101 |
return
|
102 |
102 |
}
|
103 |
103 |
|
104 |
104 |
if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); }
|
... | ... | |
110 |
110 |
ConsoleProgressBar cpb = new ConsoleProgressBar(filesToInject.size())
|
111 |
111 |
for (File infile : filesToInject) {
|
112 |
112 |
cpb.tick()
|
113 |
|
File outfile = File.createTempFile("temp", ".xml", infile.getParentFile());
|
|
113 |
File outfile = File.createTempFile("temp", ".xml", infile.getParentFile())
|
114 |
114 |
|
115 |
115 |
if (!metadatas.injectMetadatasInXml(infile, outfile, "text", null)) {
|
116 |
116 |
outfile.delete();
|
... | ... | |
118 |
118 |
if (!(infile.delete() && outfile.renameTo(infile))) println "Warning can't rename file "+outfile+" to "+infile
|
119 |
119 |
if (!infile.exists()) {
|
120 |
120 |
println "Error: could not replace $infile by $outfile"
|
121 |
|
return false;
|
|
121 |
return false
|
122 |
122 |
}
|
123 |
123 |
}
|
124 |
124 |
}
|
... | ... | |
134 |
134 |
String engineName = project.getImportParameters().node("annotate").get("engine", "TreeTagger")
|
135 |
135 |
def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine(engineName)
|
136 |
136 |
if (engine.processDirectory(txmDir, binDir, ["lang":model])) {
|
137 |
|
annotationSuccess = true;
|
|
137 |
annotationSuccess = true
|
138 |
138 |
if (project.getCleanAfterBuild()) {
|
139 |
139 |
new File(binDir, "treetagger").deleteDir()
|
140 |
140 |
new File(binDir, "ptreetagger").deleteDir()
|
... | ... | |
146 |
146 |
println "-- COMPILING - Building Search Engine indexes"
|
147 |
147 |
if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); }
|
148 |
148 |
if (MONITOR != null) MONITOR.worked(20, "COMPILING - Building Search Engine indexes")
|
149 |
|
def c = new compiler(null, "", corpusname, "default");
|
|
149 |
def c = new compiler(null, "", corpusname, "default")
|
150 |
150 |
//c.setCwbPath(userDir+"/TXM/cwb/bin/")// for developers
|
151 |
151 |
if (metadatas != null)
|
152 |
152 |
c.setMetadataAttributes(metadatas.getSattributes())
|
... | ... | |
154 |
154 |
c.setAnnotationSuccess(annotationSuccess)
|
155 |
155 |
if (debug) c.setDebug();
|
156 |
156 |
if (!c.run(project)) {
|
157 |
|
println "Import process stopped";
|
158 |
|
return;
|
|
157 |
println "Import process stopped"
|
|
158 |
return
|
159 |
159 |
}
|
160 |
160 |
//println "basename :"+basename;
|
161 |
161 |
if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); }
|
162 |
|
new File(binDir,"HTML/$corpusname").deleteDir();
|
163 |
|
new File(binDir,"HTML/$corpusname").mkdirs();
|
|
162 |
new File(binDir,"HTML/$corpusname").deleteDir()
|
|
163 |
new File(binDir,"HTML/$corpusname").mkdirs()
|
164 |
164 |
if (build_edition) {
|
165 |
165 |
|
166 |
166 |
println "-- EDITION - Building edition"
|
167 |
167 |
if (MONITOR != null) MONITOR.worked(20, "EDITION - Building edition")
|
168 |
168 |
|
169 |
|
File outdir = new File(binDir, "HTML/$corpusname/default/");
|
170 |
|
outdir.mkdirs();
|
171 |
|
List<File> filelist = txmDir.listFiles();
|
172 |
|
Collections.sort(filelist);
|
|
169 |
File outdir = new File(binDir, "HTML/$corpusname/default/")
|
|
170 |
outdir.mkdirs()
|
|
171 |
List<File> filelist = txmDir.listFiles()
|
|
172 |
Collections.sort(filelist)
|
173 |
173 |
def second = 0
|
174 |
174 |
|
175 |
|
ConsoleProgressBar cpb = new ConsoleProgressBar(filelist.size());
|
|
175 |
ConsoleProgressBar cpb = new ConsoleProgressBar(filelist.size())
|
176 |
176 |
for (File srcfile : filelist) {
|
177 |
177 |
cpb.tick()
|
178 |
|
String txtname = srcfile.getName();
|
179 |
|
int i = txtname.lastIndexOf(".");
|
180 |
|
if (i > 0) txtname = txtname.substring(0, i);
|
|
178 |
String txtname = srcfile.getName()
|
|
179 |
int i = txtname.lastIndexOf(".")
|
|
180 |
if (i > 0) txtname = txtname.substring(0, i)
|
181 |
181 |
|
182 |
|
List<String> NoSpaceBefore = LangFormater.getNoSpaceBefore(lang);
|
183 |
|
List<String> NoSpaceAfter = LangFormater.getNoSpaceAfter(lang);
|
|
182 |
List<String> NoSpaceBefore = LangFormater.getNoSpaceBefore(lang)
|
|
183 |
List<String> NoSpaceAfter = LangFormater.getNoSpaceAfter(lang)
|
184 |
184 |
|
185 |
|
Text t = new Text(project);
|
186 |
|
t.setName(txtname);
|
|
185 |
Text t = new Text(project)
|
|
186 |
t.setName(txtname)
|
187 |
187 |
t.setSourceFile(srcfile)
|
188 |
188 |
t.setTXMFile(srcfile)
|
189 |
189 |
|
190 |
|
def ed = new pager(srcfile, outdir, txtname, NoSpaceBefore, NoSpaceAfter, basename, project);
|
191 |
|
Edition edition = new Edition(t);
|
192 |
|
edition.setName("default");
|
193 |
|
edition.setIndex(outdir.getAbsolutePath());
|
|
190 |
def ed = new pager(srcfile, outdir, txtname, NoSpaceBefore, NoSpaceAfter, basename, project)
|
|
191 |
Edition edition = new Edition(t)
|
|
192 |
edition.setName("default")
|
|
193 |
edition.setIndex(outdir.getAbsolutePath())
|
194 |
194 |
for (i = 0 ; i < ed.getPageFiles().size();) {
|
195 |
|
File f = ed.getPageFiles().get(i);
|
196 |
|
String wordid = "w_0";
|
197 |
|
if (i < ed.getIdx().size()) wordid = ed.getIdx().get(i);
|
198 |
|
edition.addPage(""+(++i), wordid);
|
|
195 |
File f = ed.getPageFiles().get(i)
|
|
196 |
String wordid = "w_0"
|
|
197 |
if (i < ed.getIdx().size()) {
|
|
198 |
wordid = ed.getIdx().get(i)
|
|
199 |
}
|
|
200 |
edition.addPage(""+(++i), wordid)
|
199 |
201 |
}
|
200 |
202 |
}
|
201 |
203 |
cpb.done()
|
... | ... | |
204 |
206 |
if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); }
|
205 |
207 |
if (MONITOR != null) MONITOR.worked(20, "FINALIZING")
|
206 |
208 |
|
207 |
|
readyToLoad = project.save();
|
|
209 |
readyToLoad = project.save()
|