Statistics
| Revision:

root / tmp / org.txm.groovy.core / src / groovy / org / txm / scripts / importer / xtz / XTZPager.groovy @ 2126

History | View | Annotate | Download (16.1 kB)

1 986 mdecorde
package org.txm.scripts.importer.xtz
2 321 mdecorde
3 321 mdecorde
import java.io.File;
4 321 mdecorde
import java.util.ArrayList;
5 321 mdecorde
6 321 mdecorde
import org.txm.objects.BaseParameters
7 321 mdecorde
import org.w3c.dom.Element
8 1217 mdecorde
9 986 mdecorde
import org.txm.scripts.importer.*
10 1613 mdecorde
import org.txm.utils.ConsoleProgressBar
11 321 mdecorde
import org.txm.utils.BundleUtils;
12 479 mdecorde
import org.txm.utils.io.FileCopy;
13 321 mdecorde
import org.txm.utils.i18n.*
14 1000 mdecorde
import org.txm.importer.xtz.*
15 321 mdecorde
import javax.xml.stream.*
16 1137 mdecorde
import org.txm.objects.*
17 1196 mdecorde
import org.txm.importer.ApplyXsl2
18 321 mdecorde
19 321 mdecorde
class XTZPager extends Pager {
20 321 mdecorde
21 1137 mdecorde
        Project project;
22 321 mdecorde
23 321 mdecorde
        Element corpusElem;
24 321 mdecorde
        String lang;
25 321 mdecorde
        String page_element;
26 321 mdecorde
        String wordTag;
27 321 mdecorde
        int wordsPerPage;
28 321 mdecorde
29 321 mdecorde
        File cssDirectory, jsDirectory, imagesDirectory;
30 321 mdecorde
31 321 mdecorde
        public XTZPager(ImportModule module) {
32 321 mdecorde
                super(module, "default");
33 321 mdecorde
34 1137 mdecorde
                project = module.getProject()
35 321 mdecorde
36 1137 mdecorde
                lang = project.getLang();
37 1137 mdecorde
                wordsPerPage = project.getEditionDefinition("default").getWordsPerPage()
38 1137 mdecorde
                page_element = project.getEditionDefinition("default").getPageElement()
39 1137 mdecorde
                wordTag = project.getTokenizerWordElement()
40 321 mdecorde
41 321 mdecorde
                cssDirectory = new File(module.getSourceDirectory(), "css")
42 321 mdecorde
                jsDirectory = new File(module.getSourceDirectory(), "js")
43 321 mdecorde
                imagesDirectory = new File(module.getSourceDirectory(), "images")
44 321 mdecorde
        }
45 321 mdecorde
46 321 mdecorde
        public void process(ArrayList<File> files) {
47 321 mdecorde
                super.process(files);
48 1063 mdecorde
49 321 mdecorde
                if (files == null) {
50 321 mdecorde
                        files = inputDirectory.listFiles();
51 321 mdecorde
                        if (files != null) Collections.sort(files);
52 321 mdecorde
                }
53 1063 mdecorde
54 321 mdecorde
                if (!doDefaultEditionStep()) return;
55 321 mdecorde
                if (!doFacsEditionStep()) return;
56 2009 mdecorde
57 2009 mdecorde
                // remove extra XSL editions -> they will be recreated by the doPostEditionXSLStep call
58 2009 mdecorde
                for (EditionDefinition eDef : project.getEditionDefinitions()) {
59 2109 mdecorde
                        if (eDef.getName() != "facs" && eDef.getName() != "default") {
60 2009 mdecorde
                                eDef.delete();
61 2009 mdecorde
                        }
62 2009 mdecorde
                }
63 321 mdecorde
                if (!doPostEditionXSLStep()) return;
64 321 mdecorde
65 321 mdecorde
                isSuccessFul = true;
66 321 mdecorde
                println ""
67 321 mdecorde
        }
68 321 mdecorde
69 321 mdecorde
        public boolean doDefaultEditionStep() {
70 1063 mdecorde
71 1137 mdecorde
                boolean build_edition = project.getEditionDefinition("default").getBuildEdition()
72 321 mdecorde
                if (!build_edition) {
73 321 mdecorde
                        return true;
74 321 mdecorde
                }
75 1063 mdecorde
76 321 mdecorde
                def second = 0
77 321 mdecorde
78 321 mdecorde
                println "-- Building 'default' edition of  ${files.size()} texts..."
79 321 mdecorde
80 321 mdecorde
                def css = ["css/txm.css", "css/${corpusname}.css"] // default CSS inclusion
81 321 mdecorde
82 321 mdecorde
                // scan existing css files that must be declared in each HTML page
83 321 mdecorde
                if (cssDirectory.exists()) {
84 321 mdecorde
                        def cssFiles = cssDirectory.listFiles();
85 321 mdecorde
                        if (cssFiles != null)
86 321 mdecorde
                                for (File cssFile : cssFiles) {
87 321 mdecorde
                                        if (cssFile.isFile() && !cssFile.isHidden() && cssFile.getName().endsWith(".css"))
88 321 mdecorde
                                                css << "css/"+cssFile.getName();
89 321 mdecorde
                                }
90 321 mdecorde
                }
91 1063 mdecorde
92 321 mdecorde
                ConsoleProgressBar cpb = new ConsoleProgressBar(files.size())
93 321 mdecorde
                for (File txmFile : files) {
94 1063 mdecorde
                        try {
95 1063 mdecorde
                                cpb.tick()
96 1063 mdecorde
                                String textname = txmFile.getName();
97 1063 mdecorde
                                int i = textname.lastIndexOf(".");
98 1063 mdecorde
                                if (i > 0) textname = textname.substring(0, i);
99 321 mdecorde
100 1063 mdecorde
                                File firstHTMLPageFile = new File(outputDirectory, textname+"_1.html");
101 1063 mdecorde
                                if (firstHTMLPageFile.exists() && firstHTMLPageFile.lastModified() >= txmFile.lastModified()) continue;
102 321 mdecorde
103 1063 mdecorde
                                List<String> NoSpaceBefore = LangFormater.getNoSpaceBefore(lang);
104 1063 mdecorde
                                List<String> NoSpaceAfter = LangFormater.getNoSpaceAfter(lang);
105 321 mdecorde
106 1137 mdecorde
                                Text t = project.getText(textname);
107 1137 mdecorde
                                if (t == null) {
108 1137 mdecorde
                                        t = new Text(project);
109 321 mdecorde
                                }
110 1137 mdecorde
                                t.setName(textname);
111 1137 mdecorde
                                t.setSourceFile(txmFile)
112 1137 mdecorde
                                t.setTXMFile(txmFile)
113 1137 mdecorde
114 1063 mdecorde
                                def ed = new XTZDefaultPagerStep(this, txmFile, textname, NoSpaceBefore, NoSpaceAfter, css);
115 1063 mdecorde
                                if (!ed.process()) {
116 1063 mdecorde
                                        println "Fail to build 'default' edition for text: $txmFile"
117 1063 mdecorde
                                        continue;
118 1063 mdecorde
                                }
119 1137 mdecorde
                                Edition edition = t.getEdition("default")
120 1063 mdecorde
                                if (edition == null) {
121 1137 mdecorde
                                        edition = new Edition(t);
122 1063 mdecorde
                                }
123 1137 mdecorde
                                edition.setName("default");
124 1137 mdecorde
                                edition.setIndex(outputDirectory.getAbsolutePath());
125 2009 mdecorde
126 1063 mdecorde
                                for (i = 0 ; i < ed.getPageFiles().size();) {
127 1063 mdecorde
                                        File f = ed.getPageFiles().get(i);
128 1193 mdecorde
                                        String wordid = "w_0";
129 2009 mdecorde
                                        if (i < ed.getIdx().size()) wordid = ed.getIdx().get(i);
130 1137 mdecorde
                                        edition.addPage(""+(++i), wordid);
131 1063 mdecorde
                                }
132 1063 mdecorde
                        } catch(Exception e) {
133 1063 mdecorde
                                println "Error: could not create $txmFile 'default' edition: "+e
134 1619 mdecorde
                                e.printStackTrace()
135 321 mdecorde
                        }
136 321 mdecorde
                }
137 1063 mdecorde
138 321 mdecorde
                // copy default TXM css file in the "facs" edition directory
139 321 mdecorde
                File csshtmlDirectory = new File(outputDirectory, "css")
140 321 mdecorde
                csshtmlDirectory.mkdirs()
141 812 mdecorde
                BundleUtils.copyFiles("org.txm.core", "res", "org/txm/css", "txm.css", csshtmlDirectory);
142 1063 mdecorde
143 321 mdecorde
                // copy CSS files in the "default" edition directory
144 321 mdecorde
                if (cssDirectory.exists()) {
145 321 mdecorde
                        FileCopy.copyFiles(cssDirectory, csshtmlDirectory)
146 321 mdecorde
                }
147 321 mdecorde
                if (jsDirectory.exists()) {
148 321 mdecorde
                        File jshtmlDirectory = new File(outputDirectory, "js")
149 321 mdecorde
                        FileCopy.copyFiles(jsDirectory, jshtmlDirectory)
150 321 mdecorde
                }
151 321 mdecorde
                if (imagesDirectory.exists()) {
152 321 mdecorde
                        File imageshtmlDirectory = new File(outputDirectory, "images")
153 321 mdecorde
                        FileCopy.copyFiles(imagesDirectory, imageshtmlDirectory)
154 321 mdecorde
                }
155 321 mdecorde
156 321 mdecorde
                // save changes
157 321 mdecorde
                println ""
158 321 mdecorde
                return true;
159 321 mdecorde
        }
160 321 mdecorde
161 321 mdecorde
        public boolean doFacsEditionStep() {
162 321 mdecorde
163 1137 mdecorde
                boolean mustBuildFacsEdition = project.getEditionDefinition("facs").getBuildEdition()
164 321 mdecorde
                if (!mustBuildFacsEdition) return true;
165 1063 mdecorde
166 2075 mdecorde
                String imageDirectoryPath = project.getEditionDefinition("facs").getImagesDirectory();
167 2113 mdecorde
                File imageDirectory = null
168 2113 mdecorde
169 2075 mdecorde
                if (imageDirectoryPath != null) {
170 2075 mdecorde
                        imageDirectoryPath = imageDirectoryPath.trim()
171 2126 mdecorde
                        imageDirectory = new File(imageDirectoryPath)
172 2075 mdecorde
                        if (!imageDirectoryPath.startsWith("http") && imageDirectoryPath.length()== 0 && !imageDirectory.exists() && !imageDirectory.isDirectory()) {
173 2075 mdecorde
                                imageDirectory = null;
174 2075 mdecorde
                        }
175 321 mdecorde
                }
176 321 mdecorde
177 321 mdecorde
                def second = 0
178 321 mdecorde
179 321 mdecorde
                println "-- Building 'facs' edition of ${files.size()} texts..."
180 321 mdecorde
                File newEditionDirectory = new File(htmlDirectory, "facs");
181 321 mdecorde
                newEditionDirectory.mkdir();
182 321 mdecorde
183 321 mdecorde
                ConsoleProgressBar cpb = new ConsoleProgressBar(files.size())
184 321 mdecorde
                for (File txmFile : files) {
185 321 mdecorde
                        cpb.tick()
186 321 mdecorde
                        String txtname = txmFile.getName();
187 321 mdecorde
                        int i = txtname.lastIndexOf(".");
188 321 mdecorde
                        if (i > 0) txtname = txtname.substring(0, i);
189 321 mdecorde
190 321 mdecorde
                        File firstHTMLPageFile = new File(newEditionDirectory, txtname+"_1.html");
191 321 mdecorde
                        if (firstHTMLPageFile.exists() && firstHTMLPageFile.lastModified() >= txmFile.lastModified()) continue;
192 321 mdecorde
193 321 mdecorde
                        List<String> NoSpaceBefore = LangFormater.getNoSpaceBefore(lang);
194 321 mdecorde
                        List<String> NoSpaceAfter = LangFormater.getNoSpaceAfter(lang);
195 321 mdecorde
196 1137 mdecorde
                        Text t = project.getText(txtname);
197 1137 mdecorde
                        if (t == null) {
198 1137 mdecorde
                                t = new Text(project);
199 321 mdecorde
                        }
200 1137 mdecorde
                        t.setName(txtname);
201 1137 mdecorde
                        t.setSourceFile(txmFile)
202 1137 mdecorde
                        t.setTXMFile(txmFile)
203 1137 mdecorde
204 1137 mdecorde
                        Edition edition = t.getEdition("facs")
205 321 mdecorde
                        if (edition == null) {
206 1137 mdecorde
                                edition = new Edition(t);
207 321 mdecorde
                        }
208 1137 mdecorde
                        edition.setName("facs");
209 1137 mdecorde
                        edition.setIndex(outputDirectory.getAbsolutePath());
210 321 mdecorde
211 321 mdecorde
                        try {
212 321 mdecorde
                                def ed = new XTZFacsPagerStep(txmFile, newEditionDirectory, imageDirectory, txtname, corpusname, "pb", "facs", wordTag, debug);
213 321 mdecorde
                                if (!ed.process()) {
214 1063 mdecorde
                                        println "Fail to build 'facs' edition for text: $txmFile"
215 321 mdecorde
                                        continue;
216 321 mdecorde
                                }
217 321 mdecorde
218 321 mdecorde
                                def pages = ed.getPageFiles()
219 321 mdecorde
                                for (i = 0 ; i < pages.size();) {
220 321 mdecorde
                                        File f = pages[i][0];
221 321 mdecorde
                                        String wordid = pages[i][1]
222 698 mdecorde
                                        //TODO replace '""+(++i)' with something that fetch/findout the page 'name'
223 1063 mdecorde
                                        // TODO or move the Edition and Page corpus declaration in the XTZDefaultPagerStep
224 1137 mdecorde
                                        edition.addPage(""+(++i), wordid);
225 321 mdecorde
                                        //println "add facs page: $f $wordid"
226 321 mdecorde
                                }
227 321 mdecorde
                        } catch (Exception e) {
228 321 mdecorde
                                println "Error while processing $txmFile text: "+e
229 321 mdecorde
                                e.printStackTrace();
230 321 mdecorde
                                return false;
231 321 mdecorde
                        }
232 321 mdecorde
                }
233 321 mdecorde
234 2113 mdecorde
235 321 mdecorde
                if (!imageDirectoryPath.startsWith("http") && imageDirectory != null) { // copy files only if local
236 321 mdecorde
                        File editionImagesDirectory = new File(newEditionDirectory, "res/images/"+corpusname+"/facs");
237 321 mdecorde
                        editionImagesDirectory.mkdirs();
238 321 mdecorde
                        FileCopy.copyFiles(imageDirectory, editionImagesDirectory);
239 321 mdecorde
                }
240 1063 mdecorde
241 321 mdecorde
                // copy SimpleViewer files in the "facs" edition directory
242 321 mdecorde
                File jshtmlDirectory = new File(newEditionDirectory, "js")
243 321 mdecorde
                jshtmlDirectory.mkdirs()
244 812 mdecorde
                BundleUtils.copyFiles("org.txm.core", "res", "org/txm/js", "viewer", jshtmlDirectory);
245 812 mdecorde
                BundleUtils.copyFiles("org.txm.core", "res", "org/txm/", "images", newEditionDirectory);
246 1063 mdecorde
247 321 mdecorde
                // copy default TXM css file in the "facs" edition directory
248 321 mdecorde
                File csshtmlDirectory = new File(newEditionDirectory, "css")
249 321 mdecorde
                csshtmlDirectory.mkdirs()
250 812 mdecorde
                BundleUtils.copyFiles("org.txm.core", "res", "org/txm/css", "txm.css", csshtmlDirectory);
251 1063 mdecorde
252 321 mdecorde
                // copy CSS/JS/Images sources files in the "facs" edition directory
253 321 mdecorde
                if (cssDirectory.exists()) {
254 321 mdecorde
                        FileCopy.copyFiles(cssDirectory, csshtmlDirectory)
255 321 mdecorde
                }
256 321 mdecorde
                if (jsDirectory.exists()) {
257 321 mdecorde
                        FileCopy.copyFiles(jsDirectory, jshtmlDirectory)
258 321 mdecorde
                }
259 321 mdecorde
                if (imagesDirectory.exists()) {
260 321 mdecorde
                        File imageshtmlDirectory = new File(newEditionDirectory, "images")
261 321 mdecorde
                        FileCopy.copyFiles(imagesDirectory, imageshtmlDirectory)
262 321 mdecorde
                }
263 1063 mdecorde
264 1137 mdecorde
                project.setDefaultEditionName("default,facs");
265 321 mdecorde
266 321 mdecorde
                println ""
267 321 mdecorde
                return true;
268 321 mdecorde
        }
269 321 mdecorde
270 321 mdecorde
        /**
271 321 mdecorde
         * read from $bindir/txm and write the result in $bindir/txm
272 321 mdecorde
         *
273 321 mdecorde
         */
274 321 mdecorde
        public boolean doPostEditionXSLStep() {
275 321 mdecorde
276 321 mdecorde
                File xslDirectory = new File(module.getSourceDirectory(), "xsl/4-edition")
277 321 mdecorde
                if (xslDirectory.exists()) {
278 321 mdecorde
279 321 mdecorde
                        // prepare XSL parameters
280 2011 mdecorde
                        def xslParams = project.getXsltParameters()
281 2011 mdecorde
                        String s = project.getEditionDefinition("default").getWordsPerPage();
282 321 mdecorde
                        if (s != null && s.length() > 0)
283 321 mdecorde
284 1217 mdecorde
                        // shared XSL parameters
285 1217 mdecorde
                        xslParams["number-words-per-page"] = Integer.parseInt(s);
286 2011 mdecorde
                        xslParams["pagination-element"] = project.getEditionDefinition("default").getPageElement();
287 2011 mdecorde
                        xslParams["import-xml-path"] = project.getProjectDirectory()
288 2020 mdecorde
                        //println "XSL PARAMS: "+xslParams
289 321 mdecorde
290 321 mdecorde
                        def xslFiles = xslDirectory.listFiles()
291 1217 mdecorde
                        xslFiles = xslFiles.sort() { f ->
292 1217 mdecorde
                                try {
293 1217 mdecorde
                                return Integer.parseInt(f.getName().substring(0, f.getName().indexOf("-")))
294 1217 mdecorde
                                } catch(Exception e) {}
295 1217 mdecorde
                                return -1;
296 1217 mdecorde
                        }
297 321 mdecorde
                        def editionsCreated = [:]
298 321 mdecorde
                        for (File xslFile : xslFiles) {
299 321 mdecorde
                                if (xslFile.isDirectory() || xslFile.isHidden() || !xslFile.getName().endsWith(".xsl")) continue;
300 1466 mdecorde
                                if (!xslFile.getName().matches("[1-9]{1,3}-.+")) continue;
301 1063 mdecorde
302 321 mdecorde
                                String xslName = xslFile.getName().substring(2); // remove the "1-", "2-", etc.
303 321 mdecorde
                                int idx2 = xslName.indexOf(".")
304 321 mdecorde
                                if (idx2 > 0) xslName = xslName.substring(0, idx2)
305 321 mdecorde
                                else {
306 321 mdecorde
                                        println "$xslFile is not a '.xsl' file"
307 321 mdecorde
                                        continue;
308 321 mdecorde
                                }
309 321 mdecorde
                                int idx3 = xslName.indexOf("-")
310 321 mdecorde
                                if (idx3 < 0) {
311 321 mdecorde
                                        println "$xslFile file does not follow the '{Number}-{editionName}-{step}.xsl' name pattern"
312 321 mdecorde
                                        continue;
313 321 mdecorde
                                }
314 321 mdecorde
                                String pagerStep = xslName.substring(idx3 + 1);
315 321 mdecorde
                                String editionName = xslName.substring(0, idx3);
316 321 mdecorde
317 321 mdecorde
                                int idx = editionName.indexOf(".")
318 321 mdecorde
                                if (idx > 0) editionName = editionName.substring(0, idx);
319 321 mdecorde
                                println "-- Building '$editionName' XSL edition with step '$pagerStep'..."
320 321 mdecorde
321 321 mdecorde
                                File newEditionDirectory = new File(htmlDirectory, editionName);
322 321 mdecorde
                                xslParams["output-directory"] = newEditionDirectory.toURI().toString()
323 321 mdecorde
324 321 mdecorde
                                if (editionsCreated[editionName] == null) { // first XSL, replace an edition
325 321 mdecorde
                                        editionsCreated[editionName] = xslFile
326 321 mdecorde
                                        //if (!importModule.isUpdatingCorpus()) {
327 321 mdecorde
                                        //TODO: optimisation if update is enable,
328 321 mdecorde
                                        newEditionDirectory.deleteDir(); // delete previous edition if any
329 321 mdecorde
                                        //}
330 321 mdecorde
                                        newEditionDirectory.mkdir()
331 321 mdecorde
332 321 mdecorde
                                        boolean deleteOutputFiles = "pager" == pagerStep;
333 321 mdecorde
                                        if (ApplyXsl2.processImportSources(xslFile, inputDirectory, newEditionDirectory, xslParams, deleteOutputFiles)) {
334 321 mdecorde
                                                println ""
335 321 mdecorde
                                        } else {
336 321 mdecorde
                                                reason = "Fail to apply edition XSL: $xslFile"
337 321 mdecorde
                                                return false;
338 321 mdecorde
                                        }
339 321 mdecorde
340 321 mdecorde
                                        // copy CSS files in the newEditionDirector edition directory
341 321 mdecorde
                                        if (cssDirectory.exists()) {
342 321 mdecorde
                                                File csshtmlDirectory = new File(newEditionDirectory, "css")
343 321 mdecorde
                                                FileCopy.copyFiles(cssDirectory, csshtmlDirectory)
344 321 mdecorde
                                        }
345 321 mdecorde
                                        if (jsDirectory.exists()) {
346 321 mdecorde
                                                File jshtmlDirectory = new File(newEditionDirectory, "js")
347 321 mdecorde
                                                FileCopy.copyFiles(jsDirectory, jshtmlDirectory)
348 321 mdecorde
                                        }
349 321 mdecorde
                                        if (imagesDirectory.exists()) {
350 321 mdecorde
                                                File imageshtmlDirectory = new File(newEditionDirectory, "images")
351 321 mdecorde
                                                FileCopy.copyFiles(imagesDirectory, imageshtmlDirectory)
352 321 mdecorde
                                        }
353 321 mdecorde
                                } else { // N+1 XSL working with HTML files
354 321 mdecorde
                                        def htmlFiles = newEditionDirectory.listFiles()
355 321 mdecorde
                                        htmlFiles.sort()
356 321 mdecorde
357 321 mdecorde
                                        if (ApplyXsl2.processImportSources(xslFile, htmlFiles, xslParams)) {
358 321 mdecorde
                                                if ("pager".equals(pagerStep)) {
359 321 mdecorde
                                                        // delete the one page HTML files only if the XSL step is "pager"
360 321 mdecorde
                                                        for (File f : htmlFiles) f.delete();
361 321 mdecorde
                                                }
362 321 mdecorde
                                                //        println ""
363 321 mdecorde
                                        } else {
364 321 mdecorde
                                                reason = "Fail to apply edition XSL: $xslFile"
365 321 mdecorde
                                                return false;
366 321 mdecorde
                                        }
367 321 mdecorde
                                }
368 321 mdecorde
                        }
369 321 mdecorde
370 321 mdecorde
                        // UPDATE import.xml: for each XML-TXM file, we must retrieve the first word ID from the XSL output files
371 321 mdecorde
                        //println "retrieve word ids from $inputDirectory"
372 321 mdecorde
                        println "-- Fetching page word IDs..."
373 321 mdecorde
                        ConsoleProgressBar cpb = new ConsoleProgressBar(editionsCreated.keySet().size())
374 321 mdecorde
                        for (String editionName : editionsCreated.keySet()) {
375 321 mdecorde
                                cpb.tick()
376 321 mdecorde
377 321 mdecorde
                                File newEditionDirectory = new File(htmlDirectory, editionName);
378 321 mdecorde
                                File xslFile = editionsCreated[editionName]
379 321 mdecorde
                                for (File txmFile : inputDirectory.listFiles()) {
380 321 mdecorde
                                        if (txmFile.isDirectory()) continue;
381 321 mdecorde
                                        String textName = txmFile.getName()
382 321 mdecorde
                                        int idx4 = textName.indexOf(".")
383 321 mdecorde
                                        if (idx4 > 0) textName = textName.substring(0, idx4);
384 321 mdecorde
385 1137 mdecorde
                                        getFirstWordIDs(textName, editionName, newEditionDirectory, xslFile, txmFile);
386 321 mdecorde
                                }
387 2020 mdecorde
388 2020 mdecorde
                                def editionDeclaration = project.getEditionDefinition(editionName); // create the edition definition
389 2020 mdecorde
                                editionDeclaration.setBuildEdition(true)
390 2020 mdecorde
                                editionDeclaration.setPageBreakTag(project.getEditionDefinition("default").getPageElement())
391 2020 mdecorde
                                editionDeclaration.setWordsPerPage(project.getEditionDefinition("default").getWordsPerPage())
392 321 mdecorde
                        }
393 321 mdecorde
                        println ""
394 321 mdecorde
                }
395 321 mdecorde
                return true;
396 321 mdecorde
        }
397 321 mdecorde
398 1137 mdecorde
        private void getFirstWordIDs(String textName, String editionName, File newEditionDirectory, File xslFile, File txmFile) {
399 321 mdecorde
                //                println "call getFirstWordIDs textName=$textName editionName=$editionName dir=$newEditionDirectory xsl=$xslFile"
400 1137 mdecorde
                Text t = project.getText(textName);
401 1137 mdecorde
                if (t == null) {
402 1137 mdecorde
                        t = new Text(project);
403 321 mdecorde
                }
404 1137 mdecorde
                t.setName(textName);
405 1137 mdecorde
                t.setSourceFile(txmFile)
406 1137 mdecorde
                t.setTXMFile(txmFile)
407 321 mdecorde
408 1137 mdecorde
                Edition edition = t.getEdition(editionName)
409 2021 mdecorde
                if (edition == null) { // new edition
410 1137 mdecorde
                        edition = new Edition(t);
411 2021 mdecorde
                } else { // replacing existing edition
412 2021 mdecorde
                        edition.resetPages()
413 321 mdecorde
                }
414 1137 mdecorde
                edition.setName(editionName);
415 1137 mdecorde
                edition.setIndex(outputDirectory.getAbsolutePath());
416 321 mdecorde
417 321 mdecorde
                LinkedHashMap<File, String> words = new LinkedHashMap<File, String>()
418 321 mdecorde
                def files = []
419 321 mdecorde
                newEditionDirectory.eachFile() {it -> if (it.isFile()) files << it}
420 321 mdecorde
421 321 mdecorde
                files.sort() { f1, f2 ->
422 321 mdecorde
                        String s1 = f1.getName()
423 321 mdecorde
                        String s2 = f2.getName()
424 321 mdecorde
                        int n1 = Integer.parseInt(s1.substring(s1.lastIndexOf("_")+1, s1.lastIndexOf(".")))
425 321 mdecorde
                        int n2 = Integer.parseInt(s2.substring(s2.lastIndexOf("_")+1, s2.lastIndexOf(".")))
426 321 mdecorde
                        return n1 - n2;
427 321 mdecorde
                }
428 321 mdecorde
429 321 mdecorde
                for (File f : files) {
430 321 mdecorde
                        String pagename = f.getName();
431 321 mdecorde
                        if (pagename.startsWith(textName+"_")) { // this is a page
432 321 mdecorde
                                String firstWordID = getMetaContent(f);
433 321 mdecorde
                                pagename = pagename.substring((textName+"_").length(), pagename.indexOf(".html")) // !!!!
434 1137 mdecorde
                                edition.addPage(pagename, firstWordID)
435 321 mdecorde
                        }
436 321 mdecorde
                }
437 321 mdecorde
        }
438 321 mdecorde
439 321 mdecorde
        public static String getMetaContent(File f) {
440 321 mdecorde
                def inputData = f.toURI().toURL().openStream();
441 321 mdecorde
                def factory = XMLInputFactory.newInstance();
442 321 mdecorde
                factory.setProperty("javax.xml.stream.supportDTD", false); // ignore the DTD declared in doctype
443 321 mdecorde
444 321 mdecorde
                def parser = factory.createXMLStreamReader(inputData);
445 321 mdecorde
                String META = "meta"
446 321 mdecorde
                String BODY = "body"
447 321 mdecorde
                String NAME = "name"
448 321 mdecorde
                String DESCRIPTION = "txm:first-word-id"
449 321 mdecorde
                String CONTENT = "content"
450 321 mdecorde
451 321 mdecorde
                String content = "";
452 321 mdecorde
                String desc = "";
453 321 mdecorde
454 321 mdecorde
                for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) {
455 321 mdecorde
                        if (event == XMLStreamConstants.START_ELEMENT) { // start elem
456 321 mdecorde
457 321 mdecorde
                                if (META.equals(parser.getLocalName())) { // ana elem
458 321 mdecorde
                                        desc = "";
459 321 mdecorde
                                        // fetch attribute values
460 321 mdecorde
                                        for (int i = 0 ; i < parser.getAttributeCount(); i++) { // scan attributes
461 321 mdecorde
                                                if (NAME.equals(parser.getAttributeLocalName(i))) { // found @name
462 321 mdecorde
                                                        desc = parser.getAttributeValue(i)
463 321 mdecorde
                                                } else if (CONTENT.equals(parser.getAttributeLocalName(i))) { // found @content
464 321 mdecorde
                                                        content = parser.getAttributeValue(i)
465 321 mdecorde
                                                }
466 321 mdecorde
                                        }
467 321 mdecorde
                                        if (DESCRIPTION.equals(desc)) { // stop now
468 321 mdecorde
                                                break;
469 321 mdecorde
                                        }
470 321 mdecorde
                                } else if (BODY.equals(parser.getLocalName())) { // no need to go further, meta@name="description" not found :(
471 321 mdecorde
                                        content = "";
472 321 mdecorde
                                        break;
473 321 mdecorde
                                }
474 321 mdecorde
                        }
475 321 mdecorde
                }
476 1688 mdecorde
                if (parser != null) parser.close();
477 1688 mdecorde
                if (inputData != null) inputData.close();
478 321 mdecorde
479 321 mdecorde
                return content;
480 321 mdecorde
        }
481 321 mdecorde
482 321 mdecorde
        public static void main(def args) {
483 321 mdecorde
                println "RESULT: "+getMetaContent(new File("/home/mdecorde/TXM/corpora/QGRAALXTZ/HTML/QGRAALXTZ/default", "qgraal_cm_test201510_page_160_2.html"))
484 321 mdecorde
        }
485 321 mdecorde
}