root / tmp / org.txm.groovy.core / src / groovy / org / txm / scripts / importer / xtz / XTZPager.groovy @ 2126
History | View | Annotate | Download (16.1 kB)
1 | 986 | mdecorde | package org.txm.scripts.importer.xtz
|
---|---|---|---|
2 | 321 | mdecorde | |
3 | 321 | mdecorde | import java.io.File; |
4 | 321 | mdecorde | import java.util.ArrayList; |
5 | 321 | mdecorde | |
6 | 321 | mdecorde | import org.txm.objects.BaseParameters |
7 | 321 | mdecorde | import org.w3c.dom.Element |
8 | 1217 | mdecorde | |
9 | 986 | mdecorde | import org.txm.scripts.importer.* |
10 | 1613 | mdecorde | import org.txm.utils.ConsoleProgressBar |
11 | 321 | mdecorde | import org.txm.utils.BundleUtils; |
12 | 479 | mdecorde | import org.txm.utils.io.FileCopy; |
13 | 321 | mdecorde | import org.txm.utils.i18n.* |
14 | 1000 | mdecorde | import org.txm.importer.xtz.* |
15 | 321 | mdecorde | import javax.xml.stream.* |
16 | 1137 | mdecorde | import org.txm.objects.* |
17 | 1196 | mdecorde | import org.txm.importer.ApplyXsl2 |
18 | 321 | mdecorde | |
19 | 321 | mdecorde | class XTZPager extends Pager { |
20 | 321 | mdecorde | |
21 | 1137 | mdecorde | Project project; |
22 | 321 | mdecorde | |
23 | 321 | mdecorde | Element corpusElem;
|
24 | 321 | mdecorde | String lang;
|
25 | 321 | mdecorde | String page_element;
|
26 | 321 | mdecorde | String wordTag;
|
27 | 321 | mdecorde | int wordsPerPage;
|
28 | 321 | mdecorde | |
29 | 321 | mdecorde | File cssDirectory, jsDirectory, imagesDirectory;
|
30 | 321 | mdecorde | |
31 | 321 | mdecorde | public XTZPager(ImportModule module) {
|
32 | 321 | mdecorde | super(module, "default"); |
33 | 321 | mdecorde | |
34 | 1137 | mdecorde | project = module.getProject() |
35 | 321 | mdecorde | |
36 | 1137 | mdecorde | lang = project.getLang(); |
37 | 1137 | mdecorde | wordsPerPage = project.getEditionDefinition("default").getWordsPerPage()
|
38 | 1137 | mdecorde | page_element = project.getEditionDefinition("default").getPageElement()
|
39 | 1137 | mdecorde | wordTag = project.getTokenizerWordElement() |
40 | 321 | mdecorde | |
41 | 321 | mdecorde | cssDirectory = new File(module.getSourceDirectory(), "css") |
42 | 321 | mdecorde | jsDirectory = new File(module.getSourceDirectory(), "js") |
43 | 321 | mdecorde | imagesDirectory = new File(module.getSourceDirectory(), "images") |
44 | 321 | mdecorde | } |
45 | 321 | mdecorde | |
46 | 321 | mdecorde | public void process(ArrayList<File> files) { |
47 | 321 | mdecorde | super.process(files);
|
48 | 1063 | mdecorde | |
49 | 321 | mdecorde | if (files == null) { |
50 | 321 | mdecorde | files = inputDirectory.listFiles(); |
51 | 321 | mdecorde | if (files != null) Collections.sort(files); |
52 | 321 | mdecorde | } |
53 | 1063 | mdecorde | |
54 | 321 | mdecorde | if (!doDefaultEditionStep()) return; |
55 | 321 | mdecorde | if (!doFacsEditionStep()) return; |
56 | 2009 | mdecorde | |
57 | 2009 | mdecorde | // remove extra XSL editions -> they will be recreated by the doPostEditionXSLStep call
|
58 | 2009 | mdecorde | for (EditionDefinition eDef : project.getEditionDefinitions()) {
|
59 | 2109 | mdecorde | if (eDef.getName() != "facs" && eDef.getName() != "default") { |
60 | 2009 | mdecorde | eDef.delete(); |
61 | 2009 | mdecorde | } |
62 | 2009 | mdecorde | } |
63 | 321 | mdecorde | if (!doPostEditionXSLStep()) return; |
64 | 321 | mdecorde | |
65 | 321 | mdecorde | isSuccessFul = true;
|
66 | 321 | mdecorde | println ""
|
67 | 321 | mdecorde | } |
68 | 321 | mdecorde | |
69 | 321 | mdecorde | public boolean doDefaultEditionStep() { |
70 | 1063 | mdecorde | |
71 | 1137 | mdecorde | boolean build_edition = project.getEditionDefinition("default").getBuildEdition() |
72 | 321 | mdecorde | if (!build_edition) {
|
73 | 321 | mdecorde | return true; |
74 | 321 | mdecorde | } |
75 | 1063 | mdecorde | |
76 | 321 | mdecorde | def second = 0 |
77 | 321 | mdecorde | |
78 | 321 | mdecorde | println "-- Building 'default' edition of ${files.size()} texts..."
|
79 | 321 | mdecorde | |
80 | 321 | mdecorde | def css = ["css/txm.css", "css/${corpusname}.css"] // default CSS inclusion |
81 | 321 | mdecorde | |
82 | 321 | mdecorde | // scan existing css files that must be declared in each HTML page
|
83 | 321 | mdecorde | if (cssDirectory.exists()) {
|
84 | 321 | mdecorde | def cssFiles = cssDirectory.listFiles();
|
85 | 321 | mdecorde | if (cssFiles != null) |
86 | 321 | mdecorde | for (File cssFile : cssFiles) { |
87 | 321 | mdecorde | if (cssFile.isFile() && !cssFile.isHidden() && cssFile.getName().endsWith(".css")) |
88 | 321 | mdecorde | css << "css/"+cssFile.getName();
|
89 | 321 | mdecorde | } |
90 | 321 | mdecorde | } |
91 | 1063 | mdecorde | |
92 | 321 | mdecorde | ConsoleProgressBar cpb = new ConsoleProgressBar(files.size())
|
93 | 321 | mdecorde | for (File txmFile : files) { |
94 | 1063 | mdecorde | try {
|
95 | 1063 | mdecorde | cpb.tick() |
96 | 1063 | mdecorde | String textname = txmFile.getName();
|
97 | 1063 | mdecorde | int i = textname.lastIndexOf("."); |
98 | 1063 | mdecorde | if (i > 0) textname = textname.substring(0, i); |
99 | 321 | mdecorde | |
100 | 1063 | mdecorde | File firstHTMLPageFile = new File(outputDirectory, textname+"_1.html"); |
101 | 1063 | mdecorde | if (firstHTMLPageFile.exists() && firstHTMLPageFile.lastModified() >= txmFile.lastModified()) continue; |
102 | 321 | mdecorde | |
103 | 1063 | mdecorde | List<String> NoSpaceBefore = LangFormater.getNoSpaceBefore(lang); |
104 | 1063 | mdecorde | List<String> NoSpaceAfter = LangFormater.getNoSpaceAfter(lang); |
105 | 321 | mdecorde | |
106 | 1137 | mdecorde | Text t = project.getText(textname); |
107 | 1137 | mdecorde | if (t == null) { |
108 | 1137 | mdecorde | t = new Text(project);
|
109 | 321 | mdecorde | } |
110 | 1137 | mdecorde | t.setName(textname); |
111 | 1137 | mdecorde | t.setSourceFile(txmFile) |
112 | 1137 | mdecorde | t.setTXMFile(txmFile) |
113 | 1137 | mdecorde | |
114 | 1063 | mdecorde | def ed = new XTZDefaultPagerStep(this, txmFile, textname, NoSpaceBefore, NoSpaceAfter, css); |
115 | 1063 | mdecorde | if (!ed.process()) {
|
116 | 1063 | mdecorde | println "Fail to build 'default' edition for text: $txmFile"
|
117 | 1063 | mdecorde | continue;
|
118 | 1063 | mdecorde | } |
119 | 1137 | mdecorde | Edition edition = t.getEdition("default")
|
120 | 1063 | mdecorde | if (edition == null) { |
121 | 1137 | mdecorde | edition = new Edition(t);
|
122 | 1063 | mdecorde | } |
123 | 1137 | mdecorde | edition.setName("default");
|
124 | 1137 | mdecorde | edition.setIndex(outputDirectory.getAbsolutePath()); |
125 | 2009 | mdecorde | |
126 | 1063 | mdecorde | for (i = 0 ; i < ed.getPageFiles().size();) { |
127 | 1063 | mdecorde | File f = ed.getPageFiles().get(i);
|
128 | 1193 | mdecorde | String wordid = "w_0"; |
129 | 2009 | mdecorde | if (i < ed.getIdx().size()) wordid = ed.getIdx().get(i);
|
130 | 1137 | mdecorde | edition.addPage(""+(++i), wordid);
|
131 | 1063 | mdecorde | } |
132 | 1063 | mdecorde | } catch(Exception e) { |
133 | 1063 | mdecorde | println "Error: could not create $txmFile 'default' edition: "+e
|
134 | 1619 | mdecorde | e.printStackTrace() |
135 | 321 | mdecorde | } |
136 | 321 | mdecorde | } |
137 | 1063 | mdecorde | |
138 | 321 | mdecorde | // copy default TXM css file in the "facs" edition directory
|
139 | 321 | mdecorde | File csshtmlDirectory = new File(outputDirectory, "css") |
140 | 321 | mdecorde | csshtmlDirectory.mkdirs() |
141 | 812 | mdecorde | BundleUtils.copyFiles("org.txm.core", "res", "org/txm/css", "txm.css", csshtmlDirectory); |
142 | 1063 | mdecorde | |
143 | 321 | mdecorde | // copy CSS files in the "default" edition directory
|
144 | 321 | mdecorde | if (cssDirectory.exists()) {
|
145 | 321 | mdecorde | FileCopy.copyFiles(cssDirectory, csshtmlDirectory) |
146 | 321 | mdecorde | } |
147 | 321 | mdecorde | if (jsDirectory.exists()) {
|
148 | 321 | mdecorde | File jshtmlDirectory = new File(outputDirectory, "js") |
149 | 321 | mdecorde | FileCopy.copyFiles(jsDirectory, jshtmlDirectory) |
150 | 321 | mdecorde | } |
151 | 321 | mdecorde | if (imagesDirectory.exists()) {
|
152 | 321 | mdecorde | File imageshtmlDirectory = new File(outputDirectory, "images") |
153 | 321 | mdecorde | FileCopy.copyFiles(imagesDirectory, imageshtmlDirectory) |
154 | 321 | mdecorde | } |
155 | 321 | mdecorde | |
156 | 321 | mdecorde | // save changes
|
157 | 321 | mdecorde | println ""
|
158 | 321 | mdecorde | return true; |
159 | 321 | mdecorde | } |
160 | 321 | mdecorde | |
161 | 321 | mdecorde | public boolean doFacsEditionStep() { |
162 | 321 | mdecorde | |
163 | 1137 | mdecorde | boolean mustBuildFacsEdition = project.getEditionDefinition("facs").getBuildEdition() |
164 | 321 | mdecorde | if (!mustBuildFacsEdition) return true; |
165 | 1063 | mdecorde | |
166 | 2075 | mdecorde | String imageDirectoryPath = project.getEditionDefinition("facs").getImagesDirectory(); |
167 | 2113 | mdecorde | File imageDirectory = null |
168 | 2113 | mdecorde | |
169 | 2075 | mdecorde | if (imageDirectoryPath != null) { |
170 | 2075 | mdecorde | imageDirectoryPath = imageDirectoryPath.trim() |
171 | 2126 | mdecorde | imageDirectory = new File(imageDirectoryPath) |
172 | 2075 | mdecorde | if (!imageDirectoryPath.startsWith("http") && imageDirectoryPath.length()== 0 && !imageDirectory.exists() && !imageDirectory.isDirectory()) { |
173 | 2075 | mdecorde | imageDirectory = null;
|
174 | 2075 | mdecorde | } |
175 | 321 | mdecorde | } |
176 | 321 | mdecorde | |
177 | 321 | mdecorde | def second = 0 |
178 | 321 | mdecorde | |
179 | 321 | mdecorde | println "-- Building 'facs' edition of ${files.size()} texts..."
|
180 | 321 | mdecorde | File newEditionDirectory = new File(htmlDirectory, "facs"); |
181 | 321 | mdecorde | newEditionDirectory.mkdir(); |
182 | 321 | mdecorde | |
183 | 321 | mdecorde | ConsoleProgressBar cpb = new ConsoleProgressBar(files.size())
|
184 | 321 | mdecorde | for (File txmFile : files) { |
185 | 321 | mdecorde | cpb.tick() |
186 | 321 | mdecorde | String txtname = txmFile.getName();
|
187 | 321 | mdecorde | int i = txtname.lastIndexOf("."); |
188 | 321 | mdecorde | if (i > 0) txtname = txtname.substring(0, i); |
189 | 321 | mdecorde | |
190 | 321 | mdecorde | File firstHTMLPageFile = new File(newEditionDirectory, txtname+"_1.html"); |
191 | 321 | mdecorde | if (firstHTMLPageFile.exists() && firstHTMLPageFile.lastModified() >= txmFile.lastModified()) continue; |
192 | 321 | mdecorde | |
193 | 321 | mdecorde | List<String> NoSpaceBefore = LangFormater.getNoSpaceBefore(lang); |
194 | 321 | mdecorde | List<String> NoSpaceAfter = LangFormater.getNoSpaceAfter(lang); |
195 | 321 | mdecorde | |
196 | 1137 | mdecorde | Text t = project.getText(txtname); |
197 | 1137 | mdecorde | if (t == null) { |
198 | 1137 | mdecorde | t = new Text(project);
|
199 | 321 | mdecorde | } |
200 | 1137 | mdecorde | t.setName(txtname); |
201 | 1137 | mdecorde | t.setSourceFile(txmFile) |
202 | 1137 | mdecorde | t.setTXMFile(txmFile) |
203 | 1137 | mdecorde | |
204 | 1137 | mdecorde | Edition edition = t.getEdition("facs")
|
205 | 321 | mdecorde | if (edition == null) { |
206 | 1137 | mdecorde | edition = new Edition(t);
|
207 | 321 | mdecorde | } |
208 | 1137 | mdecorde | edition.setName("facs");
|
209 | 1137 | mdecorde | edition.setIndex(outputDirectory.getAbsolutePath()); |
210 | 321 | mdecorde | |
211 | 321 | mdecorde | try {
|
212 | 321 | mdecorde | def ed = new XTZFacsPagerStep(txmFile, newEditionDirectory, imageDirectory, txtname, corpusname, "pb", "facs", wordTag, debug); |
213 | 321 | mdecorde | if (!ed.process()) {
|
214 | 1063 | mdecorde | println "Fail to build 'facs' edition for text: $txmFile"
|
215 | 321 | mdecorde | continue;
|
216 | 321 | mdecorde | } |
217 | 321 | mdecorde | |
218 | 321 | mdecorde | def pages = ed.getPageFiles()
|
219 | 321 | mdecorde | for (i = 0 ; i < pages.size();) { |
220 | 321 | mdecorde | File f = pages[i][0]; |
221 | 321 | mdecorde | String wordid = pages[i][1] |
222 | 698 | mdecorde | //TODO replace '""+(++i)' with something that fetch/findout the page 'name'
|
223 | 1063 | mdecorde | // TODO or move the Edition and Page corpus declaration in the XTZDefaultPagerStep
|
224 | 1137 | mdecorde | edition.addPage(""+(++i), wordid);
|
225 | 321 | mdecorde | //println "add facs page: $f $wordid"
|
226 | 321 | mdecorde | } |
227 | 321 | mdecorde | } catch (Exception e) { |
228 | 321 | mdecorde | println "Error while processing $txmFile text: "+e
|
229 | 321 | mdecorde | e.printStackTrace(); |
230 | 321 | mdecorde | return false; |
231 | 321 | mdecorde | } |
232 | 321 | mdecorde | } |
233 | 321 | mdecorde | |
234 | 2113 | mdecorde | |
235 | 321 | mdecorde | if (!imageDirectoryPath.startsWith("http") && imageDirectory != null) { // copy files only if local |
236 | 321 | mdecorde | File editionImagesDirectory = new File(newEditionDirectory, "res/images/"+corpusname+"/facs"); |
237 | 321 | mdecorde | editionImagesDirectory.mkdirs(); |
238 | 321 | mdecorde | FileCopy.copyFiles(imageDirectory, editionImagesDirectory); |
239 | 321 | mdecorde | } |
240 | 1063 | mdecorde | |
241 | 321 | mdecorde | // copy SimpleViewer files in the "facs" edition directory
|
242 | 321 | mdecorde | File jshtmlDirectory = new File(newEditionDirectory, "js") |
243 | 321 | mdecorde | jshtmlDirectory.mkdirs() |
244 | 812 | mdecorde | BundleUtils.copyFiles("org.txm.core", "res", "org/txm/js", "viewer", jshtmlDirectory); |
245 | 812 | mdecorde | BundleUtils.copyFiles("org.txm.core", "res", "org/txm/", "images", newEditionDirectory); |
246 | 1063 | mdecorde | |
247 | 321 | mdecorde | // copy default TXM css file in the "facs" edition directory
|
248 | 321 | mdecorde | File csshtmlDirectory = new File(newEditionDirectory, "css") |
249 | 321 | mdecorde | csshtmlDirectory.mkdirs() |
250 | 812 | mdecorde | BundleUtils.copyFiles("org.txm.core", "res", "org/txm/css", "txm.css", csshtmlDirectory); |
251 | 1063 | mdecorde | |
252 | 321 | mdecorde | // copy CSS/JS/Images sources files in the "facs" edition directory
|
253 | 321 | mdecorde | if (cssDirectory.exists()) {
|
254 | 321 | mdecorde | FileCopy.copyFiles(cssDirectory, csshtmlDirectory) |
255 | 321 | mdecorde | } |
256 | 321 | mdecorde | if (jsDirectory.exists()) {
|
257 | 321 | mdecorde | FileCopy.copyFiles(jsDirectory, jshtmlDirectory) |
258 | 321 | mdecorde | } |
259 | 321 | mdecorde | if (imagesDirectory.exists()) {
|
260 | 321 | mdecorde | File imageshtmlDirectory = new File(newEditionDirectory, "images") |
261 | 321 | mdecorde | FileCopy.copyFiles(imagesDirectory, imageshtmlDirectory) |
262 | 321 | mdecorde | } |
263 | 1063 | mdecorde | |
264 | 1137 | mdecorde | project.setDefaultEditionName("default,facs");
|
265 | 321 | mdecorde | |
266 | 321 | mdecorde | println ""
|
267 | 321 | mdecorde | return true; |
268 | 321 | mdecorde | } |
269 | 321 | mdecorde | |
270 | 321 | mdecorde | /**
|
271 | 321 | mdecorde | * read from $bindir/txm and write the result in $bindir/txm
|
272 | 321 | mdecorde | *
|
273 | 321 | mdecorde | */
|
274 | 321 | mdecorde | public boolean doPostEditionXSLStep() { |
275 | 321 | mdecorde | |
276 | 321 | mdecorde | File xslDirectory = new File(module.getSourceDirectory(), "xsl/4-edition") |
277 | 321 | mdecorde | if (xslDirectory.exists()) {
|
278 | 321 | mdecorde | |
279 | 321 | mdecorde | // prepare XSL parameters
|
280 | 2011 | mdecorde | def xslParams = project.getXsltParameters()
|
281 | 2011 | mdecorde | String s = project.getEditionDefinition("default").getWordsPerPage(); |
282 | 321 | mdecorde | if (s != null && s.length() > 0) |
283 | 321 | mdecorde | |
284 | 1217 | mdecorde | // shared XSL parameters
|
285 | 1217 | mdecorde | xslParams["number-words-per-page"] = Integer.parseInt(s); |
286 | 2011 | mdecorde | xslParams["pagination-element"] = project.getEditionDefinition("default").getPageElement(); |
287 | 2011 | mdecorde | xslParams["import-xml-path"] = project.getProjectDirectory()
|
288 | 2020 | mdecorde | //println "XSL PARAMS: "+xslParams
|
289 | 321 | mdecorde | |
290 | 321 | mdecorde | def xslFiles = xslDirectory.listFiles()
|
291 | 1217 | mdecorde | xslFiles = xslFiles.sort() { f -> |
292 | 1217 | mdecorde | try {
|
293 | 1217 | mdecorde | return Integer.parseInt(f.getName().substring(0, f.getName().indexOf("-"))) |
294 | 1217 | mdecorde | } catch(Exception e) {} |
295 | 1217 | mdecorde | return -1; |
296 | 1217 | mdecorde | } |
297 | 321 | mdecorde | def editionsCreated = [:]
|
298 | 321 | mdecorde | for (File xslFile : xslFiles) { |
299 | 321 | mdecorde | if (xslFile.isDirectory() || xslFile.isHidden() || !xslFile.getName().endsWith(".xsl")) continue; |
300 | 1466 | mdecorde | if (!xslFile.getName().matches("[1-9]{1,3}-.+")) continue; |
301 | 1063 | mdecorde | |
302 | 321 | mdecorde | String xslName = xslFile.getName().substring(2); // remove the "1-", "2-", etc. |
303 | 321 | mdecorde | int idx2 = xslName.indexOf(".") |
304 | 321 | mdecorde | if (idx2 > 0) xslName = xslName.substring(0, idx2) |
305 | 321 | mdecorde | else {
|
306 | 321 | mdecorde | println "$xslFile is not a '.xsl' file"
|
307 | 321 | mdecorde | continue;
|
308 | 321 | mdecorde | } |
309 | 321 | mdecorde | int idx3 = xslName.indexOf("-") |
310 | 321 | mdecorde | if (idx3 < 0) { |
311 | 321 | mdecorde | println "$xslFile file does not follow the '{Number}-{editionName}-{step}.xsl' name pattern"
|
312 | 321 | mdecorde | continue;
|
313 | 321 | mdecorde | } |
314 | 321 | mdecorde | String pagerStep = xslName.substring(idx3 + 1); |
315 | 321 | mdecorde | String editionName = xslName.substring(0, idx3); |
316 | 321 | mdecorde | |
317 | 321 | mdecorde | int idx = editionName.indexOf(".") |
318 | 321 | mdecorde | if (idx > 0) editionName = editionName.substring(0, idx); |
319 | 321 | mdecorde | println "-- Building '$editionName' XSL edition with step '$pagerStep'..."
|
320 | 321 | mdecorde | |
321 | 321 | mdecorde | File newEditionDirectory = new File(htmlDirectory, editionName); |
322 | 321 | mdecorde | xslParams["output-directory"] = newEditionDirectory.toURI().toString()
|
323 | 321 | mdecorde | |
324 | 321 | mdecorde | if (editionsCreated[editionName] == null) { // first XSL, replace an edition |
325 | 321 | mdecorde | editionsCreated[editionName] = xslFile |
326 | 321 | mdecorde | //if (!importModule.isUpdatingCorpus()) {
|
327 | 321 | mdecorde | //TODO: optimisation if update is enable,
|
328 | 321 | mdecorde | newEditionDirectory.deleteDir(); // delete previous edition if any
|
329 | 321 | mdecorde | //}
|
330 | 321 | mdecorde | newEditionDirectory.mkdir() |
331 | 321 | mdecorde | |
332 | 321 | mdecorde | boolean deleteOutputFiles = "pager" == pagerStep; |
333 | 321 | mdecorde | if (ApplyXsl2.processImportSources(xslFile, inputDirectory, newEditionDirectory, xslParams, deleteOutputFiles)) {
|
334 | 321 | mdecorde | println ""
|
335 | 321 | mdecorde | } else {
|
336 | 321 | mdecorde | reason = "Fail to apply edition XSL: $xslFile"
|
337 | 321 | mdecorde | return false; |
338 | 321 | mdecorde | } |
339 | 321 | mdecorde | |
340 | 321 | mdecorde | // copy CSS files in the newEditionDirector edition directory
|
341 | 321 | mdecorde | if (cssDirectory.exists()) {
|
342 | 321 | mdecorde | File csshtmlDirectory = new File(newEditionDirectory, "css") |
343 | 321 | mdecorde | FileCopy.copyFiles(cssDirectory, csshtmlDirectory) |
344 | 321 | mdecorde | } |
345 | 321 | mdecorde | if (jsDirectory.exists()) {
|
346 | 321 | mdecorde | File jshtmlDirectory = new File(newEditionDirectory, "js") |
347 | 321 | mdecorde | FileCopy.copyFiles(jsDirectory, jshtmlDirectory) |
348 | 321 | mdecorde | } |
349 | 321 | mdecorde | if (imagesDirectory.exists()) {
|
350 | 321 | mdecorde | File imageshtmlDirectory = new File(newEditionDirectory, "images") |
351 | 321 | mdecorde | FileCopy.copyFiles(imagesDirectory, imageshtmlDirectory) |
352 | 321 | mdecorde | } |
353 | 321 | mdecorde | } else { // N+1 XSL working with HTML files |
354 | 321 | mdecorde | def htmlFiles = newEditionDirectory.listFiles()
|
355 | 321 | mdecorde | htmlFiles.sort() |
356 | 321 | mdecorde | |
357 | 321 | mdecorde | if (ApplyXsl2.processImportSources(xslFile, htmlFiles, xslParams)) {
|
358 | 321 | mdecorde | if ("pager".equals(pagerStep)) { |
359 | 321 | mdecorde | // delete the one page HTML files only if the XSL step is "pager"
|
360 | 321 | mdecorde | for (File f : htmlFiles) f.delete(); |
361 | 321 | mdecorde | } |
362 | 321 | mdecorde | // println ""
|
363 | 321 | mdecorde | } else {
|
364 | 321 | mdecorde | reason = "Fail to apply edition XSL: $xslFile"
|
365 | 321 | mdecorde | return false; |
366 | 321 | mdecorde | } |
367 | 321 | mdecorde | } |
368 | 321 | mdecorde | } |
369 | 321 | mdecorde | |
370 | 321 | mdecorde | // UPDATE import.xml: for each XML-TXM file, we must retrieve the first word ID from the XSL output files
|
371 | 321 | mdecorde | //println "retrieve word ids from $inputDirectory"
|
372 | 321 | mdecorde | println "-- Fetching page word IDs..."
|
373 | 321 | mdecorde | ConsoleProgressBar cpb = new ConsoleProgressBar(editionsCreated.keySet().size())
|
374 | 321 | mdecorde | for (String editionName : editionsCreated.keySet()) { |
375 | 321 | mdecorde | cpb.tick() |
376 | 321 | mdecorde | |
377 | 321 | mdecorde | File newEditionDirectory = new File(htmlDirectory, editionName); |
378 | 321 | mdecorde | File xslFile = editionsCreated[editionName]
|
379 | 321 | mdecorde | for (File txmFile : inputDirectory.listFiles()) { |
380 | 321 | mdecorde | if (txmFile.isDirectory()) continue; |
381 | 321 | mdecorde | String textName = txmFile.getName()
|
382 | 321 | mdecorde | int idx4 = textName.indexOf(".") |
383 | 321 | mdecorde | if (idx4 > 0) textName = textName.substring(0, idx4); |
384 | 321 | mdecorde | |
385 | 1137 | mdecorde | getFirstWordIDs(textName, editionName, newEditionDirectory, xslFile, txmFile); |
386 | 321 | mdecorde | } |
387 | 2020 | mdecorde | |
388 | 2020 | mdecorde | def editionDeclaration = project.getEditionDefinition(editionName); // create the edition definition |
389 | 2020 | mdecorde | editionDeclaration.setBuildEdition(true)
|
390 | 2020 | mdecorde | editionDeclaration.setPageBreakTag(project.getEditionDefinition("default").getPageElement())
|
391 | 2020 | mdecorde | editionDeclaration.setWordsPerPage(project.getEditionDefinition("default").getWordsPerPage())
|
392 | 321 | mdecorde | } |
393 | 321 | mdecorde | println ""
|
394 | 321 | mdecorde | } |
395 | 321 | mdecorde | return true; |
396 | 321 | mdecorde | } |
397 | 321 | mdecorde | |
398 | 1137 | mdecorde | private void getFirstWordIDs(String textName, String editionName, File newEditionDirectory, File xslFile, File txmFile) { |
399 | 321 | mdecorde | // println "call getFirstWordIDs textName=$textName editionName=$editionName dir=$newEditionDirectory xsl=$xslFile"
|
400 | 1137 | mdecorde | Text t = project.getText(textName); |
401 | 1137 | mdecorde | if (t == null) { |
402 | 1137 | mdecorde | t = new Text(project);
|
403 | 321 | mdecorde | } |
404 | 1137 | mdecorde | t.setName(textName); |
405 | 1137 | mdecorde | t.setSourceFile(txmFile) |
406 | 1137 | mdecorde | t.setTXMFile(txmFile) |
407 | 321 | mdecorde | |
408 | 1137 | mdecorde | Edition edition = t.getEdition(editionName) |
409 | 2021 | mdecorde | if (edition == null) { // new edition |
410 | 1137 | mdecorde | edition = new Edition(t);
|
411 | 2021 | mdecorde | } else { // replacing existing edition |
412 | 2021 | mdecorde | edition.resetPages() |
413 | 321 | mdecorde | } |
414 | 1137 | mdecorde | edition.setName(editionName); |
415 | 1137 | mdecorde | edition.setIndex(outputDirectory.getAbsolutePath()); |
416 | 321 | mdecorde | |
417 | 321 | mdecorde | LinkedHashMap<File, String> words = new LinkedHashMap<File, String>() |
418 | 321 | mdecorde | def files = [] |
419 | 321 | mdecorde | newEditionDirectory.eachFile() {it -> if (it.isFile()) files << it} |
420 | 321 | mdecorde | |
421 | 321 | mdecorde | files.sort() { f1, f2 -> |
422 | 321 | mdecorde | String s1 = f1.getName()
|
423 | 321 | mdecorde | String s2 = f2.getName()
|
424 | 321 | mdecorde | int n1 = Integer.parseInt(s1.substring(s1.lastIndexOf("_")+1, s1.lastIndexOf("."))) |
425 | 321 | mdecorde | int n2 = Integer.parseInt(s2.substring(s2.lastIndexOf("_")+1, s2.lastIndexOf("."))) |
426 | 321 | mdecorde | return n1 - n2;
|
427 | 321 | mdecorde | } |
428 | 321 | mdecorde | |
429 | 321 | mdecorde | for (File f : files) { |
430 | 321 | mdecorde | String pagename = f.getName();
|
431 | 321 | mdecorde | if (pagename.startsWith(textName+"_")) { // this is a page |
432 | 321 | mdecorde | String firstWordID = getMetaContent(f);
|
433 | 321 | mdecorde | pagename = pagename.substring((textName+"_").length(), pagename.indexOf(".html")) // !!!! |
434 | 1137 | mdecorde | edition.addPage(pagename, firstWordID) |
435 | 321 | mdecorde | } |
436 | 321 | mdecorde | } |
437 | 321 | mdecorde | } |
438 | 321 | mdecorde | |
439 | 321 | mdecorde | public static String getMetaContent(File f) { |
440 | 321 | mdecorde | def inputData = f.toURI().toURL().openStream();
|
441 | 321 | mdecorde | def factory = XMLInputFactory.newInstance();
|
442 | 321 | mdecorde | factory.setProperty("javax.xml.stream.supportDTD", false); // ignore the DTD declared in doctype |
443 | 321 | mdecorde | |
444 | 321 | mdecorde | def parser = factory.createXMLStreamReader(inputData);
|
445 | 321 | mdecorde | String META = "meta" |
446 | 321 | mdecorde | String BODY = "body" |
447 | 321 | mdecorde | String NAME = "name" |
448 | 321 | mdecorde | String DESCRIPTION = "txm:first-word-id" |
449 | 321 | mdecorde | String CONTENT = "content" |
450 | 321 | mdecorde | |
451 | 321 | mdecorde | String content = ""; |
452 | 321 | mdecorde | String desc = ""; |
453 | 321 | mdecorde | |
454 | 321 | mdecorde | for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) { |
455 | 321 | mdecorde | if (event == XMLStreamConstants.START_ELEMENT) { // start elem |
456 | 321 | mdecorde | |
457 | 321 | mdecorde | if (META.equals(parser.getLocalName())) { // ana elem |
458 | 321 | mdecorde | desc = "";
|
459 | 321 | mdecorde | // fetch attribute values
|
460 | 321 | mdecorde | for (int i = 0 ; i < parser.getAttributeCount(); i++) { // scan attributes |
461 | 321 | mdecorde | if (NAME.equals(parser.getAttributeLocalName(i))) { // found @name |
462 | 321 | mdecorde | desc = parser.getAttributeValue(i) |
463 | 321 | mdecorde | } else if (CONTENT.equals(parser.getAttributeLocalName(i))) { // found @content |
464 | 321 | mdecorde | content = parser.getAttributeValue(i) |
465 | 321 | mdecorde | } |
466 | 321 | mdecorde | } |
467 | 321 | mdecorde | if (DESCRIPTION.equals(desc)) { // stop now |
468 | 321 | mdecorde | break;
|
469 | 321 | mdecorde | } |
470 | 321 | mdecorde | } else if (BODY.equals(parser.getLocalName())) { // no need to go further, meta@name="description" not found :( |
471 | 321 | mdecorde | content = "";
|
472 | 321 | mdecorde | break;
|
473 | 321 | mdecorde | } |
474 | 321 | mdecorde | } |
475 | 321 | mdecorde | } |
476 | 1688 | mdecorde | if (parser != null) parser.close(); |
477 | 1688 | mdecorde | if (inputData != null) inputData.close(); |
478 | 321 | mdecorde | |
479 | 321 | mdecorde | return content;
|
480 | 321 | mdecorde | } |
481 | 321 | mdecorde | |
482 | 321 | mdecorde | public static void main(def args) { |
483 | 321 | mdecorde | println "RESULT: "+getMetaContent(new File("/home/mdecorde/TXM/corpora/QGRAALXTZ/HTML/QGRAALXTZ/default", "qgraal_cm_test201510_page_160_2.html")) |
484 | 321 | mdecorde | } |
485 | 321 | mdecorde | } |