root / tmp / org.txm.groovy.core / src / groovy / org / txm / macro / prototypes / importer / XTZImporterMacro.groovy @ 1000
History | View | Annotate | Download (14.8 kB)
1 | 321 | mdecorde | // Copyright © 2010-2013 ENS de Lyon.
|
---|---|---|---|
2 | 321 | mdecorde | // Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
|
3 | 321 | mdecorde | // Lyon 2, University of Franche-Comté, University of Nice
|
4 | 321 | mdecorde | // Sophia Antipolis, University of Paris 3.
|
5 | 321 | mdecorde | //
|
6 | 321 | mdecorde | // The TXM platform is free software: you can redistribute it
|
7 | 321 | mdecorde | // and/or modify it under the terms of the GNU General Public
|
8 | 321 | mdecorde | // License as published by the Free Software Foundation,
|
9 | 321 | mdecorde | // either version 2 of the License, or (at your option) any
|
10 | 321 | mdecorde | // later version.
|
11 | 321 | mdecorde | //
|
12 | 321 | mdecorde | // The TXM platform is distributed in the hope that it will be
|
13 | 321 | mdecorde | // useful, but WITHOUT ANY WARRANTY; without even the implied
|
14 | 321 | mdecorde | // warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
15 | 321 | mdecorde | // PURPOSE. See the GNU General Public License for more
|
16 | 321 | mdecorde | // details.
|
17 | 321 | mdecorde | //
|
18 | 321 | mdecorde | // You should have received a copy of the GNU General
|
19 | 321 | mdecorde | // Public License along with the TXM platform. If not, see
|
20 | 321 | mdecorde | // http://www.gnu.org/licenses.
|
21 | 321 | mdecorde | //
|
22 | 321 | mdecorde | //
|
23 | 321 | mdecorde | //
|
24 | 321 | mdecorde | // $LastChangedDate: 2013-05-15 14:30:48 +0200 (mer., 15 mai 2013) $
|
25 | 321 | mdecorde | // $LastChangedRevision: 2392 $
|
26 | 321 | mdecorde | // $LastChangedBy: mdecorde $
|
27 | 321 | mdecorde | //
|
28 | 321 | mdecorde | package org.txm.macroproto.importer;
|
29 | 321 | mdecorde | |
30 | 479 | mdecorde | import javax.xml.stream.XMLStreamReader; |
31 | 321 | mdecorde | |
32 | 1000 | mdecorde | import org.txm.scripts.importer.RemoveTag; |
33 | 1000 | mdecorde | import org.txm.importer.ApplyXsl2; |
34 | 1000 | mdecorde | import org.txm.importer.ValidateXml; |
35 | 986 | mdecorde | import org.txm.scripts.importer.xml.importer; |
36 | 986 | mdecorde | import org.txm.scripts.importer.xml.compiler; |
37 | 986 | mdecorde | import org.txm.scripts.importer.xml.pager; |
38 | 479 | mdecorde | import org.txm.objects.*; |
39 | 479 | mdecorde | import org.txm.tokenizer.TokenizerClasses; |
40 | 927 | mdecorde | import org.txm.utils.* |
41 | 928 | mdecorde | import org.txm.utils.io.*; |
42 | 479 | mdecorde | import org.txm.*; |
43 | 1000 | mdecorde | import org.txm.importer.scripts.xmltxm.*; |
44 | 479 | mdecorde | import org.txm.utils.i18n.*; |
45 | 479 | mdecorde | import org.txm.metadatas.*; |
46 | 479 | mdecorde | import javax.xml.stream.*; |
47 | 479 | mdecorde | import org.w3c.dom.Element |
48 | 479 | mdecorde | import org.txm.utils.xml.DomUtils; |
49 | 321 | mdecorde | |
50 | 479 | mdecorde | import org.txm.macro.edition.EditionUpdaterMacro |
51 | 479 | mdecorde | import org.txm.macro.edition.AddFacsAttributeMacro |
52 | 479 | mdecorde | import org.txm.macro.edition.FacsEditionBuilderMacro |
53 | 479 | mdecorde | |
54 | 321 | mdecorde | import org.kohsuke.args4j.* |
55 | 479 | mdecorde | import groovy.transform.Field |
56 | 499 | mdecorde | import org.txm.rcp.swt.widget.parameters.* |
57 | 321 | mdecorde | |
58 | 479 | mdecorde | import groovy.lang.Binding; |
59 | 479 | mdecorde | import groovy.util.GroovyScriptEngine; |
60 | 499 | mdecorde | import org.txm.rcp.commands.workspace.* |
61 | 499 | mdecorde | import org.txm.rcp.commands.* |
62 | 479 | mdecorde | |
63 | 321 | mdecorde | String userDir = System.getProperty("user.home"); |
64 | 321 | mdecorde | |
65 | 321 | mdecorde | // BEGINNING OF PARAMETERS
|
66 | 321 | mdecorde | @Field @Option(name="srcDirectory", usage="the directory containing the DOC/ODT/RTF files to convert", widget="Folder", required=true, def="srcDirectory") |
67 | 321 | mdecorde | File srcDirectory
|
68 | 321 | mdecorde | |
69 | 321 | mdecorde | @Field @Option(name="SpannedEdition", usage="edition xsl", widget="Boolean", required=false, def="true") |
70 | 321 | mdecorde | Boolean SpannedEdition // call EditionUpdater |
71 | 321 | mdecorde | // calls EditionUpdater :
|
72 | 321 | mdecorde | // Defaults corpus=imported corpus, xslEdition:XTZ xsl, xslPages=XTZ xsl2, editionname=default, useTokenizedDirectory=false
|
73 | 321 | mdecorde | |
74 | 321 | mdecorde | @Field @Option(name="synoptic", usage="syn", widget="Boolean", required=false, def="true") |
75 | 321 | mdecorde | Boolean synoptic
|
76 | 321 | mdecorde | // calls AddFacsAttribute + FacsEditionBuilder.
|
77 | 321 | mdecorde | // Defaults AddFacsAttribute : imageDirectory=$SRC/img, outputDirectory=temp, element=pb, attribute=n, prefix=mediaPath
|
78 | 321 | mdecorde | // Default FacsEditionBuilder : editionName=facs, element=pb, attribute=facs
|
79 | 321 | mdecorde | |
80 | 321 | mdecorde | @Field @Option(name="mediaPath", usage="absolute or relative path to images directory", widget="String", required=false, def="img") |
81 | 321 | mdecorde | String mediaPath
|
82 | 321 | mdecorde | |
83 | 321 | mdecorde | @Field @Option(name="facsEditionName", usage="facs edition name to create", widget="String", required=false, def="facs") |
84 | 321 | mdecorde | String facsEditionName
|
85 | 321 | mdecorde | |
86 | 321 | mdecorde | // Open the parameters input dialog box
|
87 | 321 | mdecorde | if (!ParametersDialog.open(this)) return; |
88 | 321 | mdecorde | |
89 | 321 | mdecorde | if (!mediaPath.endsWith("/")) mediaPath += "/" |
90 | 321 | mdecorde | |
91 | 321 | mdecorde | println "Parameters: "
|
92 | 321 | mdecorde | println " srcDirectory: $srcDirectory"
|
93 | 321 | mdecorde | println " SpannedEdition: $SpannedEdition"
|
94 | 321 | mdecorde | println " synoptic: $synoptic"
|
95 | 321 | mdecorde | println " mediaPath: $mediaPath"
|
96 | 321 | mdecorde | println " facsEditionName: $facsEditionName"
|
97 | 321 | mdecorde | |
98 | 321 | mdecorde | def MONITOR;
|
99 | 321 | mdecorde | boolean debug = false; |
100 | 321 | mdecorde | File importFile = new File(srcDirectory, "import.xml") |
101 | 321 | mdecorde | if (!importFile.exists()) {
|
102 | 321 | mdecorde | println "No import.xml file found in $srcDirectory"
|
103 | 321 | mdecorde | println "Aborting."
|
104 | 321 | mdecorde | return;
|
105 | 321 | mdecorde | } |
106 | 321 | mdecorde | BaseParameters params = new BaseParameters(importFile);
|
107 | 321 | mdecorde | params.load() |
108 | 321 | mdecorde | params.rootDir = srcDirectory.getAbsolutePath(); |
109 | 321 | mdecorde | TokenizerClasses.loadFromNode(params.getTokenizerElement(params.getCorpusElement())); |
110 | 321 | mdecorde | if (params == null) { println "no parameters. Aborting"; return; } |
111 | 321 | mdecorde | |
112 | 321 | mdecorde | String corpusname = params.getCorpusName();
|
113 | 321 | mdecorde | Element corpusElem = params.corpora.get(corpusname);
|
114 | 321 | mdecorde | String basename = params.name;
|
115 | 321 | mdecorde | String rootDir = params.rootDir;
|
116 | 321 | mdecorde | String lang = corpusElem.getAttribute("lang"); |
117 | 321 | mdecorde | String model = lang
|
118 | 321 | mdecorde | String encoding = corpusElem.getAttribute("encoding"); |
119 | 321 | mdecorde | boolean annotate = "true" == corpusElem.getAttribute("annotate"); |
120 | 321 | mdecorde | String xsl = params.getXsltElement(corpusElem).getAttribute("xsl") |
121 | 321 | mdecorde | def xslParams = params.getXsltParams(corpusElem);
|
122 | 321 | mdecorde | |
123 | 321 | mdecorde | File srcDir = new File(rootDir); |
124 | 878 | sjacqu01 | File binDir = new File(Toolbox.getTxmHomePath(), "corpora/"+basename); |
125 | 321 | mdecorde | binDir.deleteDir(); |
126 | 321 | mdecorde | binDir.mkdirs(); |
127 | 321 | mdecorde | if (!binDir.exists()) {
|
128 | 321 | mdecorde | println "Could not create binDir "+binDir
|
129 | 321 | mdecorde | return;
|
130 | 321 | mdecorde | } |
131 | 321 | mdecorde | |
132 | 321 | mdecorde | File txmDir = new File(binDir, "txm/$corpusname"); |
133 | 321 | mdecorde | txmDir.deleteDir(); |
134 | 321 | mdecorde | txmDir.mkdirs(); |
135 | 321 | mdecorde | |
136 | 321 | mdecorde | File propertyFile = new File(rootDir, "import.properties")//default |
137 | 321 | mdecorde | Properties props = new Properties(); |
138 | 321 | mdecorde | String[] metadatasToKeep; |
139 | 321 | mdecorde | |
140 | 321 | mdecorde | String textSortAttribute = null; |
141 | 321 | mdecorde | String paginationElement = "pb"; |
142 | 321 | mdecorde | boolean normalizeMetadata = false; |
143 | 321 | mdecorde | String ignoredElements = null; |
144 | 321 | mdecorde | boolean stopIfMalformed = false; |
145 | 321 | mdecorde | |
146 | 321 | mdecorde | println "Trying to read import properties file: "+propertyFile
|
147 | 321 | mdecorde | if (propertyFile.exists() && propertyFile.canRead()) {
|
148 | 321 | mdecorde | InputStreamReader input = new InputStreamReader(new FileInputStream(propertyFile) , "UTF-8"); |
149 | 321 | mdecorde | props.load(input); |
150 | 321 | mdecorde | input.close(); |
151 | 321 | mdecorde | if(props.getProperty("sortmetadata") != null) |
152 | 321 | mdecorde | textSortAttribute = props.get("sortmetadata").toString();
|
153 | 321 | mdecorde | if (props.getProperty("editionpage") != null) |
154 | 321 | mdecorde | paginationElement = props.get("editionpage").toString();
|
155 | 321 | mdecorde | if (props.getProperty("normalizemetadata") != null) |
156 | 321 | mdecorde | normalizeMetadata = Boolean.parseBoolean(props.get("normalizemetadata").toString()); |
157 | 321 | mdecorde | if (props.getProperty("ignoredelements") != null) |
158 | 321 | mdecorde | ignoredElements = props.get("ignoredelements").toString();
|
159 | 321 | mdecorde | if (props.getProperty("stopifmalformed") != null) |
160 | 321 | mdecorde | stopIfMalformed = Boolean.parseBoolean(props.get("stopifmalformed").toString()); |
161 | 321 | mdecorde | |
162 | 321 | mdecorde | println "import properties: "
|
163 | 321 | mdecorde | println " sort metadata: "+textSortAttribute
|
164 | 321 | mdecorde | println " edition page tag: "+paginationElement
|
165 | 321 | mdecorde | println " normalize attributes: "+normalizeMetadata
|
166 | 321 | mdecorde | println " ignored elements: "+ignoredElements
|
167 | 321 | mdecorde | println " stop if a XML source is malformed: "+stopIfMalformed
|
168 | 321 | mdecorde | } |
169 | 321 | mdecorde | |
170 | 1000 | mdecorde | File allMetadataFile = Metadatas.findMetadataFile(srcDir);
|
171 | 321 | mdecorde | |
172 | 321 | mdecorde | // Apply XSL
|
173 | 321 | mdecorde | if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); } |
174 | 321 | mdecorde | if (MONITOR != null) MONITOR.worked(1, "APPLYING XSL") |
175 | 321 | mdecorde | if (xsl != null && xslParams != null && xsl.trim().length() > 0) { |
176 | 321 | mdecorde | if (ApplyXsl2.processImportSources(new File(xsl), srcDir, new File(binDir, "src"), xslParams)) |
177 | 321 | mdecorde | // return; // error during process
|
178 | 321 | mdecorde | srcDir = new File(binDir, "src"); |
179 | 321 | mdecorde | println ""
|
180 | 321 | mdecorde | } |
181 | 321 | mdecorde | |
182 | 321 | mdecorde | String[] roots = [ System.getProperty("user.home")+"/TXM/scripts/macro/" ]; |
183 | 321 | mdecorde | GroovyScriptEngine gse = new GroovyScriptEngine(roots, monitor.getClass().getClassLoader());
|
184 | 321 | mdecorde | File jardir = new File(System.getProperty("user.home")+"/TXM/scripts/lib"); //$NON-NLS-1$ |
185 | 321 | mdecorde | if (jardir.exists() && jardir.isDirectory()) {
|
186 | 321 | mdecorde | for (File f: jardir.listFiles(new FilenameFilter() { |
187 | 321 | mdecorde | public boolean accept(File dir, String name) { |
188 | 321 | mdecorde | return name.endsWith(".jar"); //$NON-NLS-1$ |
189 | 321 | mdecorde | } |
190 | 321 | mdecorde | })) { |
191 | 321 | mdecorde | gse.getGroovyClassLoader().addURL(f.toURI().toURL()); |
192 | 321 | mdecorde | } |
193 | 321 | mdecorde | } |
194 | 321 | mdecorde | // Call AddFacsAttribute - must be called before starting the importer step
|
195 | 321 | mdecorde | // Defaults AddFacsAttribute : imageDirectory=$SRC/img, outputDirectory=temp, element=pb, attribute=facs, prefix=mediaPath
|
196 | 321 | mdecorde | if (synoptic) {
|
197 | 321 | mdecorde | println "-- Adding the $paginationElement@facs attributes"
|
198 | 321 | mdecorde | File srcDirTemp = new File(binDir, "facs_src") |
199 | 321 | mdecorde | srcDirTemp.mkdir() // create a new src directory that contains the modified src files
|
200 | 321 | mdecorde | Binding b = new Binding(); |
201 | 321 | mdecorde | b.setVariable("args", ["monitor":monitor, "sourceDirectory":srcDir, "imageDirectory":new File(srcDir, "img"), "outputDirectory":srcDirTemp, "element":paginationElement, "attribute":"facs", "prefix":mediaPath]); |
202 | 321 | mdecorde | gse.run "org/txm/macro/edition/AddFacsAttributeMacro.groovy", b
|
203 | 321 | mdecorde | println "switching srcDir to $srcDirTemp"
|
204 | 321 | mdecorde | srcDir = srcDirTemp; |
205 | 321 | mdecorde | } |
206 | 321 | mdecorde | |
207 | 321 | mdecorde | // copy xml+dtd files
|
208 | 321 | mdecorde | if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); } |
209 | 321 | mdecorde | List<File> srcfiles = srcDir.listFiles(); |
210 | 321 | mdecorde | if (srcfiles != null) |
211 | 321 | mdecorde | for (int i = 0 ; i < srcfiles.size() ; i++) {// check XML format, and copy file into binDir |
212 | 321 | mdecorde | File f = srcfiles.get(i)
|
213 | 967 | mdecorde | if (f.getName().equals("import.xml") || f.getName().matches("metadata\\.....?") || f.getName().endsWith(".properties")) { |
214 | 321 | mdecorde | srcfiles.remove(i); |
215 | 321 | mdecorde | i--; |
216 | 321 | mdecorde | continue;
|
217 | 321 | mdecorde | } |
218 | 321 | mdecorde | if (ValidateXml.test(f)) {
|
219 | 321 | mdecorde | FileCopy.copy(f, new File(txmDir, f.getName())); |
220 | 321 | mdecorde | } else {
|
221 | 321 | mdecorde | println "Won't process file "+f;
|
222 | 321 | mdecorde | } |
223 | 321 | mdecorde | } |
224 | 321 | mdecorde | |
225 | 321 | mdecorde | if (txmDir.listFiles() == null) { |
226 | 321 | mdecorde | println "No txm file to process"
|
227 | 321 | mdecorde | return;
|
228 | 321 | mdecorde | } |
229 | 321 | mdecorde | |
230 | 321 | mdecorde | //get metadatas values from CSV
|
231 | 321 | mdecorde | Metadatas metadatas; // text metadatas
|
232 | 321 | mdecorde | |
233 | 1000 | mdecorde | println "Trying to read metadatas from: "+allMetadataFile
|
234 | 321 | mdecorde | if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); } |
235 | 1000 | mdecorde | if (allMetadataFile.exists()) {
|
236 | 1000 | mdecorde | File copy = new File(binDir, allMetadataFile.getName()) |
237 | 1000 | mdecorde | if (!FileCopy.copy(allMetadataFile, copy)) {
|
238 | 1000 | mdecorde | println "Error: could not create a copy of metadata file "+allMetadataFile.getAbsoluteFile();
|
239 | 321 | mdecorde | return;
|
240 | 321 | mdecorde | } |
241 | 788 | mdecorde | metadatas = new Metadatas(copy, Toolbox.getMetadataEncoding(), Toolbox.getMetadataColumnSeparator(), Toolbox.getMetadataTextSeparator(), 1) |
242 | 321 | mdecorde | } else {
|
243 | 1000 | mdecorde | println "no metadata file: "+allMetadataFile
|
244 | 321 | mdecorde | } |
245 | 321 | mdecorde | |
246 | 321 | mdecorde | if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); } |
247 | 321 | mdecorde | if (MONITOR != null) MONITOR.worked(5, "IMPORTER") |
248 | 321 | mdecorde | println "-- IMPORTER - Reading source files"
|
249 | 321 | mdecorde | def imp = new importer(); |
250 | 321 | mdecorde | imp.doValidation(true) // change this to not validate xml |
251 | 321 | mdecorde | imp.doTokenize(true) // change this, to not tokenize xml |
252 | 321 | mdecorde | imp.setStopIfMalformed(stopIfMalformed); |
253 | 321 | mdecorde | if (!imp.run( srcDir, binDir, txmDir, basename, ignoredElements, lang)) {
|
254 | 321 | mdecorde | println "import process stopped";
|
255 | 321 | mdecorde | return;
|
256 | 321 | mdecorde | } |
257 | 321 | mdecorde | |
258 | 321 | mdecorde | if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); } |
259 | 321 | mdecorde | if (MONITOR != null) MONITOR.worked(20, "INJECTING METADATA") |
260 | 321 | mdecorde | if (metadatas != null) { |
261 | 321 | mdecorde | println("-- INJECTING METADATA - "+metadatas.getHeadersList()+" in texts of directory "+new File(binDir,"txm")) |
262 | 321 | mdecorde | for (File infile : txmDir.listFiles()) { |
263 | 321 | mdecorde | print "."
|
264 | 321 | mdecorde | File outfile = File.createTempFile("temp", ".xml", infile.getParentFile()); |
265 | 321 | mdecorde | if (!metadatas.injectMetadatasInXml(infile, outfile, "text", null)) { |
266 | 321 | mdecorde | outfile.delete(); |
267 | 321 | mdecorde | } else {
|
268 | 321 | mdecorde | if (!(infile.delete() && outfile.renameTo(infile))) println "Warning can't rename file "+outfile+" to "+infile |
269 | 321 | mdecorde | if (!infile.exists()) {
|
270 | 321 | mdecorde | println "Error: could not replace $infile by $outfile"
|
271 | 321 | mdecorde | return false; |
272 | 321 | mdecorde | } |
273 | 321 | mdecorde | } |
274 | 321 | mdecorde | } |
275 | 321 | mdecorde | println ""
|
276 | 321 | mdecorde | } |
277 | 321 | mdecorde | List<File> files = txmDir.listFiles() |
278 | 321 | mdecorde | if (files == null || files.size() == 0) { |
279 | 321 | mdecorde | return;
|
280 | 321 | mdecorde | } |
281 | 321 | mdecorde | |
282 | 321 | mdecorde | if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); } |
283 | 321 | mdecorde | if (MONITOR != null) MONITOR.worked(20, "ANNOTATE") |
284 | 321 | mdecorde | println "-- ANNOTATE - Running NLP tools"
|
285 | 321 | mdecorde | boolean annotationSuccess = false; |
286 | 321 | mdecorde | if (annotate && new Annotate().run(binDir, txmDir, model+".par")) { |
287 | 321 | mdecorde | annotationSuccess = true;
|
288 | 321 | mdecorde | } |
289 | 321 | mdecorde | |
290 | 321 | mdecorde | if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); } |
291 | 321 | mdecorde | if (MONITOR != null) MONITOR.worked(25, "COMPILING") |
292 | 321 | mdecorde | println "-- COMPILING - Building Search Engine indexes"
|
293 | 321 | mdecorde | def c = new compiler(); |
294 | 321 | mdecorde | if(debug) c.setDebug();
|
295 | 321 | mdecorde | //c.setCwbPath("~/TXM/cwb/bin");
|
296 | 321 | mdecorde | c.setOptions(textSortAttribute, normalizeMetadata); |
297 | 321 | mdecorde | c.setAnnotationSuccess(annotationSuccess) |
298 | 321 | mdecorde | c.setLang(lang); |
299 | 321 | mdecorde | if (!c.run(binDir, txmDir, corpusname, null, srcfiles, metadatas)) { |
300 | 321 | mdecorde | println "import process stopped";
|
301 | 321 | mdecorde | return;
|
302 | 321 | mdecorde | } |
303 | 321 | mdecorde | |
304 | 321 | mdecorde | if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); } |
305 | 321 | mdecorde | if (MONITOR != null) MONITOR.worked(25, "EDITION") |
306 | 321 | mdecorde | println "-- EDITION - Building edition"
|
307 | 321 | mdecorde | new File(binDir,"HTML/$corpusname").deleteDir(); |
308 | 321 | mdecorde | new File(binDir,"HTML/$corpusname").mkdirs(); |
309 | 321 | mdecorde | File outdir = new File(binDir,"/HTML/$corpusname/default/"); |
310 | 321 | mdecorde | outdir.mkdirs(); |
311 | 321 | mdecorde | List<File> filelist = txmDir.listFiles(); |
312 | 321 | mdecorde | Collections.sort(filelist);
|
313 | 321 | mdecorde | def second = 0 |
314 | 321 | mdecorde | |
315 | 321 | mdecorde | println "Paginating texts: "+filelist
|
316 | 321 | mdecorde | for (File txmFile : filelist) { |
317 | 321 | mdecorde | print "."
|
318 | 321 | mdecorde | String txtname = txmFile.getName(); |
319 | 321 | mdecorde | int i = txtname.lastIndexOf("."); |
320 | 321 | mdecorde | if(i > 0) txtname = txtname.substring(0, i); |
321 | 321 | mdecorde | |
322 | 321 | mdecorde | List<String> NoSpaceBefore = LangFormater.getNoSpaceBefore(lang); |
323 | 321 | mdecorde | List<String> NoSpaceAfter = LangFormater.getNoSpaceAfter(lang); |
324 | 321 | mdecorde | |
325 | 321 | mdecorde | Element text = params.addText(corpusElem, txtname, txmFile);
|
326 | 321 | mdecorde | |
327 | 321 | mdecorde | def ed = new pager(txmFile, outdir, txtname, NoSpaceBefore, NoSpaceAfter, 30000, basename, paginationElement); |
328 | 321 | mdecorde | Element edition = params.addEdition(text, "default", outdir.getAbsolutePath(), "html"); |
329 | 321 | mdecorde | |
330 | 321 | mdecorde | for (i = 0 ; i < ed.getPageFiles().size();) { |
331 | 321 | mdecorde | File f = ed.getPageFiles().get(i);
|
332 | 321 | mdecorde | String wordid = ed.getIdx().get(i);
|
333 | 321 | mdecorde | params.addPage(edition, ""+(++i), wordid);
|
334 | 321 | mdecorde | } |
335 | 321 | mdecorde | } |
336 | 321 | mdecorde | |
337 | 321 | mdecorde | if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); } |
338 | 321 | mdecorde | if (MONITOR != null) MONITOR.worked(20, "FINALIZING") |
339 | 321 | mdecorde | File paramFile = new File(binDir, "import.xml"); |
340 | 321 | mdecorde | DomUtils.save(params.root.getOwnerDocument(), paramFile);readyToLoad = true;
|
341 | 321 | mdecorde | |
342 | 321 | mdecorde | // Call FacsEditionBuilder
|
343 | 321 | mdecorde | // Default FacsEditionBuilder : editionName=facs, element=pb, attribute=facs
|
344 | 321 | mdecorde | if (synoptic) {
|
345 | 321 | mdecorde | println "-- Building the '$facsEditionName' image edition..."
|
346 | 321 | mdecorde | b = new Binding(); |
347 | 321 | mdecorde | b.setVariable("args", ["monitor":monitor,"params":params, "binDirectory":binDir, "txmDirectory":txmDir,"corpus":srcDirectory, "editionName":facsEditionName, "tag":paginationElement, "attribute":"facs"]); |
348 | 321 | mdecorde | gse.run "org/txm/macro/edition/FacsEditionBuilderMacro.groovy", b
|
349 | 321 | mdecorde | |
350 | 321 | mdecorde | //copy images into the binary corpus
|
351 | 321 | mdecorde | if (mediaPath.length() > 0 && !mediaPath.startsWith("http")) { |
352 | 321 | mdecorde | File binImageDirectory = new File(binDir,"HTML/$basename/$facsEditionName/$mediaPath") |
353 | 321 | mdecorde | println "copying images into binary corpus: $binImageDirectory ..."
|
354 | 321 | mdecorde | FileCopy.copyFiles(new File(srcDirectory, "img"), binImageDirectory) |
355 | 321 | mdecorde | println "copy done"
|
356 | 321 | mdecorde | } else {
|
357 | 321 | mdecorde | println "Note: the $facsEditionName edition images are not embedded in the binary corpus"
|
358 | 321 | mdecorde | } |
359 | 321 | mdecorde | } |
360 | 321 | mdecorde | |
361 | 321 | mdecorde | // Call EditionUpdater
|
362 | 321 | mdecorde | // corpus=imported corpus, xslEdition:XTZ xsl, xslPages=XTZ xsl2, editionname=default, useTokenizedDirectory=false
|
363 | 321 | mdecorde | if (SpannedEdition) {
|
364 | 321 | mdecorde | println "-- Building Spanned edition..."
|
365 | 321 | mdecorde | |
366 | 321 | mdecorde | Binding b = new Binding(); |
367 | 321 | mdecorde | b.setVariable("args", ["monitor":monitor,"params":params, "binDirectory":binDir, "txmDirectory":txmDir,"corpus":srcDirectory, "xslEdition":"txm-edition-xtz.xsl", "xslPages":"txm-edition-page-split.xsl", "editionname":"default", "useTokenizedDirectory":false]); |
368 | 321 | mdecorde | gse.run "org/txm/macro/edition/EditionUpdaterMacro.groovy", b
|
369 | 321 | mdecorde | } |
370 | 321 | mdecorde | // // create the ".txm" file
|
371 | 321 | mdecorde | // try {
|
372 | 321 | mdecorde | // File exportzip = new File(binDir.getParent(), binDir.getName()+".txm")
|
373 | 321 | mdecorde | // println "creating the $exportzip binary corpus file..."
|
374 | 321 | mdecorde | // Zip.compress(binDir, exportzip);
|
375 | 321 | mdecorde | // println "done."
|
376 | 321 | mdecorde | // } catch (IOException e) {
|
377 | 321 | mdecorde | // println "Error during binary file creation: $e"
|
378 | 321 | mdecorde | // }
|
379 | 321 | mdecorde | |
380 | 321 | mdecorde | // load corpus
|
381 | 321 | mdecorde | println "Loading corpus..."
|
382 | 321 | mdecorde | try {LoadBinaryCorpus.loadBase(binDir)} catch(Exception e777){AddBases.loadBase(binDir, monitor)} // LoadBinaryCorpus does not exist if TXM version is < 0.7.7 |
383 | 321 | mdecorde | |
384 | 321 | mdecorde | Toolbox.restartWorkspace(); |
385 | 321 | mdecorde | Toolbox.restartSearchEngine(); |
386 | 321 | mdecorde | |
387 | 321 | mdecorde | monitor.syncExec(new Runnable() { |
388 | 321 | mdecorde | @Override
|
389 | 321 | mdecorde | public void run() { |
390 | 321 | mdecorde | println "Reloading corpora view..."
|
391 | 321 | mdecorde | RestartTXM.reloadViews(); |
392 | 321 | mdecorde | println "import done."
|
393 | 321 | mdecorde | } |
394 | 321 | mdecorde | }); |