Révision 3715
TXM/trunk/bundles/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/xtz/XTZDefaultPagerStep.groovy (revision 3715) | ||
---|---|---|
13 | 13 |
import org.txm.importer.xtz.* |
14 | 14 |
|
15 | 15 |
public class XTZDefaultPagerStep { |
16 |
|
|
16 |
|
|
17 | 17 |
List<String> NoSpaceBefore; |
18 |
|
|
18 |
|
|
19 | 19 |
/** The No space after. */ |
20 | 20 |
List<String> NoSpaceAfter; |
21 |
|
|
21 |
|
|
22 | 22 |
/** The wordcount. */ |
23 | 23 |
int wordcount = 0; |
24 |
|
|
24 |
|
|
25 | 25 |
/** The pagecount. */ |
26 | 26 |
int pagecount = 0; |
27 |
|
|
27 |
|
|
28 | 28 |
/** The wordmax. */ |
29 | 29 |
int wordmax = 0; |
30 |
|
|
30 |
|
|
31 | 31 |
/** The basename. */ |
32 | 32 |
String basename = ""; |
33 | 33 |
String txtname = ""; |
34 | 34 |
File outdir; |
35 |
|
|
35 |
|
|
36 | 36 |
/** The wordid. */ |
37 | 37 |
String wordid; |
38 |
|
|
38 |
|
|
39 | 39 |
/** The first word. */ |
40 | 40 |
boolean firstWord = true; |
41 |
|
|
41 |
|
|
42 | 42 |
boolean enableCollapsibles = false; |
43 |
|
|
43 |
|
|
44 | 44 |
/** The wordvalue. */ |
45 | 45 |
String wordvalue = ""; |
46 |
|
|
46 |
|
|
47 | 47 |
/** The interpvalue. */ |
48 | 48 |
String interpvalue = ""; |
49 |
|
|
49 |
|
|
50 | 50 |
/** The lastword. */ |
51 | 51 |
String lastword = " "; |
52 |
|
|
52 |
|
|
53 | 53 |
/** The wordtype. */ |
54 | 54 |
String wordtype; |
55 |
|
|
55 |
|
|
56 | 56 |
/** The flagform. */ |
57 | 57 |
boolean flagform = false; |
58 |
|
|
58 |
|
|
59 | 59 |
/** The flaginterp. */ |
60 | 60 |
boolean flaginterp = false; |
61 |
|
|
61 |
|
|
62 | 62 |
/** The url. */ |
63 | 63 |
private def url; |
64 |
|
|
64 |
|
|
65 | 65 |
/** The input data. */ |
66 | 66 |
private def inputData; |
67 |
|
|
67 |
|
|
68 | 68 |
/** The factory. */ |
69 | 69 |
private def factory; |
70 |
|
|
70 |
|
|
71 | 71 |
/** The parser. */ |
72 | 72 |
private XMLStreamReader parser; |
73 |
|
|
73 |
|
|
74 | 74 |
/** The writer. */ |
75 | 75 |
OutputStreamWriter writer; |
76 |
|
|
76 |
|
|
77 | 77 |
/** The pagedWriter. */ |
78 | 78 |
StaxStackWriter pagedWriter = null; |
79 |
|
|
79 |
|
|
80 | 80 |
/** The infile. */ |
81 | 81 |
File infile; |
82 |
|
|
82 |
|
|
83 | 83 |
/** The outfile. */ |
84 | 84 |
File outfile; |
85 |
|
|
85 |
|
|
86 | 86 |
/** The pages. */ |
87 | 87 |
//TODO enhance this to store the page name/id as well |
88 | 88 |
ArrayList<File> pages = new ArrayList<File>(); |
89 |
|
|
89 |
|
|
90 | 90 |
/** The idxstart. */ |
91 | 91 |
ArrayList<String> idxstart = new ArrayList<String>(); |
92 | 92 |
String paginationElement; |
... | ... | |
96 | 96 |
def noteElements = new HashSet<String>(); |
97 | 97 |
def outOfTextElements = new HashSet<String>(); |
98 | 98 |
XTZPager pager; |
99 |
|
|
99 |
|
|
100 | 100 |
/** |
101 | 101 |
* Instantiates a new pager. |
102 | 102 |
* |
... | ... | |
123 | 123 |
this.wordTag= pager.wordTag; |
124 | 124 |
outdir.mkdirs() |
125 | 125 |
this.enableCollapsibles = pager.getImportModule().getProject().getEditionDefinition("default").getEnableCollapsibleMetadata(); |
126 |
|
|
126 |
|
|
127 | 127 |
inputData = new BufferedInputStream(url.openStream()); |
128 | 128 |
factory = XMLInputFactory.newInstance(); |
129 | 129 |
parser = factory.createXMLStreamReader(inputData); |
130 |
|
|
130 |
|
|
131 | 131 |
String notesListString = pager.getImportModule().getProject().getTextualPlan("Note") |
132 | 132 |
if (notesListString != null) for (def s : notesListString.split(",")) noteElements << s; |
133 |
|
|
133 |
|
|
134 | 134 |
String elems = pager.getImportModule().getProject().getTextualPlan("OutSideTextTagsAndKeepContent") |
135 | 135 |
if (elems != null) for (def s : elems.split(",")) outOfTextElements << s; |
136 |
|
|
136 |
|
|
137 | 137 |
//process(); |
138 | 138 |
} |
139 |
|
|
139 |
|
|
140 | 140 |
public String getAttributeValue(def parser, String ns, String name) { |
141 | 141 |
for (int i = 0 ; i < parser.getAttributeCount() ; i++) { |
142 | 142 |
if (name == parser.getAttributeLocalName(i)) { |
... | ... | |
145 | 145 |
} |
146 | 146 |
return null; |
147 | 147 |
} |
148 |
|
|
148 |
|
|
149 | 149 |
private def closeMultiWriter() { |
150 | 150 |
if (pagedWriter != null) { |
151 | 151 |
def tags = [] |
152 | 152 |
tags.addAll(pagedWriter.getTagStack()) |
153 |
|
|
153 |
|
|
154 | 154 |
//println "CLOSING... STACK="+pagedWriter.getTagStack() |
155 | 155 |
// def stack = Thread.currentThread().getStackTrace(); |
156 | 156 |
// int m = Math.min(15, stack.size()-1) |
157 | 157 |
// for (def s : stack[1..m]) println s |
158 | 158 |
// println "FILE ="+outfile |
159 | 159 |
if (firstWord) { // there was no words |
160 |
pagedWriter.writeCharacters("");
|
|
160 |
pagedWriter.writeCharacters("") |
|
161 | 161 |
this.idxstart.add("w_0") |
162 |
pagedWriter.write("<span id=\"w_0\"/>");
|
|
162 |
pagedWriter.write("<span id=\"w_0\"/>") |
|
163 | 163 |
} |
164 |
|
|
165 |
// write notes before closing all tags
|
|
164 |
|
|
165 |
// write notes before closing all tags |
|
166 | 166 |
if (notes.size() > 0) { |
167 |
pagedWriter.writeStartElement("hr", ["id":"notes", "width":"20%", "align":"left"]);
|
|
167 |
pagedWriter.writeStartElement("hr", ["id":"notes", "width":"20%", "align":"left"]) |
|
168 | 168 |
pagedWriter.writeEndElement() // </hr> |
169 | 169 |
//pagedWriter.writeStartElement("ol"); |
170 | 170 |
int i = 1; |
171 | 171 |
for (String note : notes) { |
172 | 172 |
//pagedWriter.writeStartElement("li"); |
173 |
pagedWriter.writeStartElement("a", ["href":"#noteref_"+i, "name":"note_"+i]);
|
|
173 |
pagedWriter.writeStartElement("a", ["href":"#noteref_"+i, "name":"note_"+i]) |
|
174 | 174 |
pagedWriter.writeStartElement("sup") |
175 | 175 |
pagedWriter.writeCharacters(""+i) |
176 | 176 |
pagedWriter.writeEndElement() // </sub> |
... | ... | |
181 | 181 |
} |
182 | 182 |
notes.clear() |
183 | 183 |
} |
184 |
|
|
184 |
|
|
185 | 185 |
pagedWriter.writeEndElements(); |
186 |
|
|
186 |
|
|
187 | 187 |
pagedWriter.close(); |
188 |
|
|
188 |
|
|
189 | 189 |
//println "STACK TO REWRITE: $tags" |
190 | 190 |
int removedDiv = 0; |
191 | 191 |
for (int i = 0 ; i < tags.size() ; i++) { |
... | ... | |
203 | 203 |
return []; |
204 | 204 |
} |
205 | 205 |
} |
206 |
|
|
206 |
|
|
207 | 207 |
/** |
208 | 208 |
* Creates the next output. |
209 | 209 |
* |
... | ... | |
213 | 213 |
wordcount = 0; |
214 | 214 |
try { |
215 | 215 |
def tags = closeMultiWriter() |
216 |
|
|
216 |
|
|
217 | 217 |
outfile = new File(outdir, txtname+"_"+(++pagecount)+".html") |
218 | 218 |
pages.add(outfile) |
219 | 219 |
firstWord = true; // waiting for next word |
220 |
|
|
220 |
|
|
221 | 221 |
pagedWriter = new StaxStackWriter(outfile, "UTF-8") |
222 |
|
|
222 |
|
|
223 | 223 |
//pagedWriter.writeStartDocument() |
224 | 224 |
pagedWriter.writeDTD("<!DOCTYPE html>") |
225 | 225 |
pagedWriter.writeCharacters("\n") |
226 | 226 |
pagedWriter.writeStartElement("html") |
227 |
|
|
227 |
|
|
228 | 228 |
pagedWriter.writeCharacters("\n\t") |
229 | 229 |
pagedWriter.writeEmptyElement("meta", ["http-equiv":"Content-Type", "content":"text/html","charset":"UTF-8"]) |
230 |
|
|
231 |
|
|
230 |
|
|
231 |
|
|
232 | 232 |
for (String css : cssList) { |
233 | 233 |
pagedWriter.writeCharacters("\t\n") |
234 | 234 |
pagedWriter.writeEmptyElement("link", ["rel":"stylesheet", "type":"text/css","href":"$css"]) |
235 |
|
|
236 | 235 |
} |
236 |
|
|
237 | 237 |
pagedWriter.writeCharacters("\t\n") |
238 | 238 |
pagedWriter.writeStartElement("head") |
239 | 239 |
pagedWriter.writeCharacters("\t\t\n") |
... | ... | |
252 | 252 |
pagedWriter.writeCharacters("\t\t\n") |
253 | 253 |
pagedWriter.writeStartElement("div", ["class": "txmeditionpage"]) //<div> |
254 | 254 |
pagedWriter.writeCharacters("\n") |
255 |
|
|
256 |
// println "NEW HTML: "+outfile |
|
257 |
// println "TAGS: "+tags |
|
255 |
|
|
256 |
// println "NEW HTML: "+outfile
|
|
257 |
// println "TAGS: "+tags
|
|
258 | 258 |
pagedWriter.writeStartElements(tags) |
259 | 259 |
return true; |
260 | 260 |
} catch (Exception e) { |
... | ... | |
263 | 263 |
return false; |
264 | 264 |
} |
265 | 265 |
} |
266 |
|
|
266 |
|
|
267 | 267 |
/** |
268 | 268 |
* Creates the output. |
269 | 269 |
* |
... | ... | |
278 | 278 |
return false; |
279 | 279 |
} |
280 | 280 |
} |
281 |
|
|
281 |
|
|
282 | 282 |
/** |
283 | 283 |
* Gets the page files. |
284 | 284 |
* |
... | ... | |
287 | 287 |
public ArrayList<File> getPageFiles() { |
288 | 288 |
return pages; |
289 | 289 |
} |
290 |
|
|
290 |
|
|
291 | 291 |
/** |
292 | 292 |
* Gets the idx. |
293 | 293 |
* |
... | ... | |
296 | 296 |
public ArrayList<String> getIdx() { |
297 | 297 |
return idxstart; |
298 | 298 |
} |
299 |
|
|
299 |
|
|
300 | 300 |
/** |
301 | 301 |
* Go to text. |
302 | 302 |
*/ |
303 | 303 |
private void goToText() { |
304 |
|
|
304 | 305 |
for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) { |
305 |
if (event == XMLStreamConstants.END_ELEMENT) |
|
306 |
if (parser.getLocalName().matches("teiHeader")) |
|
306 |
if (event == XMLStreamConstants.END_ELEMENT) {
|
|
307 |
if (parser.getLocalName().matches("teiHeader")) {
|
|
307 | 308 |
return; |
309 |
} |
|
310 |
} |
|
308 | 311 |
} |
309 | 312 |
} |
310 |
|
|
313 |
|
|
311 | 314 |
def notes = [] |
312 | 315 |
def currentOutOfTextElements = [] // stack of element with out of text to edit opened element |
313 | 316 |
def writeOutOfTextToEditText = false |
... | ... | |
315 | 318 |
* Process. |
316 | 319 |
*/ |
317 | 320 |
public boolean process() { |
318 |
|
|
321 |
|
|
319 | 322 |
try { |
320 | 323 |
def anaValues = [:] |
321 | 324 |
def anaType = "" |
322 | 325 |
def anaResp = "" |
323 | 326 |
def anaValue = new StringBuilder() |
324 |
|
|
327 |
|
|
325 | 328 |
boolean flagNote = false |
326 | 329 |
boolean flagW = false |
327 |
|
|
330 |
|
|
328 | 331 |
boolean allTags = true |
329 | 332 |
boolean ignoreUnmanagedTags = true |
330 | 333 |
// unmanagedElementsPolicyCombo.setItems("ignore", "keep as is", "rename to span"); |
... | ... | |
342 | 345 |
String noteType = "" |
343 | 346 |
String rend = "" |
344 | 347 |
goToText(); |
345 |
|
|
348 |
|
|
346 | 349 |
String localname = "" |
347 | 350 |
if (!createNextOutput()) { |
348 | 351 |
return false; |
349 | 352 |
} |
350 |
|
|
353 |
|
|
351 | 354 |
for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) { |
352 | 355 |
rend = ""; |
353 | 356 |
switch (event) { |
... | ... | |
359 | 362 |
} else if (currentOutOfTextElements.size() > 0) { |
360 | 363 |
currentOutOfTextElements << localname |
361 | 364 |
} |
362 |
|
|
365 |
|
|
363 | 366 |
if (localname == paginationElement) { |
364 | 367 |
if (paginate) { |
365 | 368 |
createNextOutput() |
366 | 369 |
} |
367 |
|
|
370 |
|
|
368 | 371 |
wordcount = 0; |
369 | 372 |
pagedWriter.write("\n") |
370 | 373 |
if (getAttributeValue(parser, null,"n") != null) { |
371 | 374 |
pagedWriter.writeElement("p", ["class":"txmeditionpb", "align":"center"], getAttributeValue(parser, null,"n")) |
372 | 375 |
} |
373 | 376 |
} |
374 |
|
|
377 |
|
|
375 | 378 |
rend = getAttributeValue(parser, null, "rend") |
376 |
//if (rend == null) rend = localname;
|
|
377 |
|
|
379 |
//if (rend == null) rend = localname; |
|
380 |
|
|
378 | 381 |
switch (localname) { |
379 | 382 |
case "text": |
380 | 383 |
LinkedHashMap attributes = new LinkedHashMap(); |
381 | 384 |
for (int i = 0 ; i < parser.getAttributeCount() ; i++) { |
382 | 385 |
attributes[parser.getAttributeLocalName(i)] = parser.getAttributeValue(i).toString() |
383 | 386 |
} |
384 |
|
|
387 |
|
|
385 | 388 |
pagedWriter.write("\n") |
386 | 389 |
pagedWriter.writeStartElement("p") |
387 | 390 |
pagedWriter.writeAttribute("class", rend) |
388 | 391 |
if (attributes.containsKey("id")) { |
389 | 392 |
pagedWriter.writeElement("h3", attributes["id"]) |
390 | 393 |
} |
391 |
|
|
392 |
if (enableCollapsibles && parser.getAttributeCount() > 2) { |
|
393 |
pagedWriter.writeStartElement("button"); |
|
394 |
pagedWriter.writeAttribute("class", "collapsible"); |
|
395 |
pagedWriter.writeAttribute("onclick", "onCollapsibleClicked(this)"); |
|
396 |
pagedWriter.writeCharacters("➕"); |
|
397 |
pagedWriter.writeEndElement() |
|
398 |
pagedWriter.writeCharacters("\n") |
|
399 |
} |
|
400 |
pagedWriter.writeStartElement("table"); |
|
401 |
if (enableCollapsibles && parser.getAttributeCount() > 2) { |
|
402 |
pagedWriter.writeAttribute("class", "collapsiblecontent") |
|
403 |
pagedWriter.writeAttribute("style", "display:none;") |
|
404 |
} else { |
|
405 |
pagedWriter.writeAttribute("class", "metadata"); |
|
406 |
} |
|
407 |
|
|
394 |
|
|
395 |
if (enableCollapsibles && parser.getAttributeCount() > 2) {
|
|
396 |
pagedWriter.writeStartElement("button");
|
|
397 |
pagedWriter.writeAttribute("class", "collapsible");
|
|
398 |
pagedWriter.writeAttribute("onclick", "onCollapsibleClicked(this)");
|
|
399 |
pagedWriter.writeCharacters("➕");
|
|
400 |
pagedWriter.writeEndElement()
|
|
401 |
pagedWriter.writeCharacters("\n")
|
|
402 |
}
|
|
403 |
pagedWriter.writeStartElement("table");
|
|
404 |
if (enableCollapsibles && parser.getAttributeCount() > 2) {
|
|
405 |
pagedWriter.writeAttribute("class", "collapsiblecontent")
|
|
406 |
pagedWriter.writeAttribute("style", "display:none;")
|
|
407 |
} else {
|
|
408 |
pagedWriter.writeAttribute("class", "metadata");
|
|
409 |
}
|
|
410 |
|
|
408 | 411 |
for (String k : attributes.keySet()) { |
409 | 412 |
if (k == "id") continue; |
410 | 413 |
if (k == "rend") continue; |
411 |
|
|
414 |
|
|
412 | 415 |
pagedWriter.writeStartElement("tr") |
413 | 416 |
pagedWriter.writeAttribute("class", "metadata-line") |
414 | 417 |
pagedWriter.writeElement("td", ["class": "metadata-cell"], k) |
... | ... | |
416 | 419 |
pagedWriter.writeEndElement() //tr |
417 | 420 |
} |
418 | 421 |
pagedWriter.writeEndElement() // table |
419 |
|
|
422 |
|
|
420 | 423 |
pagedWriter.writeEndElement() // p |
421 | 424 |
pagedWriter.writeCharacters("\n") |
422 | 425 |
break; |
... | ... | |
425 | 428 |
pagedWriter.writeAttribute("class", "sync") |
426 | 429 |
if (parser.getAttributeValue(null,"time") != null) { |
427 | 430 |
pagedWriter.writeCharacters(parser.getAttributeValue(null,"time")) |
428 |
|
|
431 |
|
|
429 | 432 |
writeMediaAccess(parser.getAttributeValue(null,"time"), corpus, txtname) |
430 | 433 |
} |
431 | 434 |
break; |
... | ... | |
438 | 441 |
break; |
439 | 442 |
case "sp": |
440 | 443 |
pagedWriter.writeStartElement("p", ["class":"turn", "type":localname]) |
441 |
|
|
444 |
|
|
442 | 445 |
if (parser.getAttributeValue(null,"speaker") != null) { |
443 | 446 |
pagedWriter.writeStartElement("span") |
444 | 447 |
pagedWriter.writeAttribute("class", "spk") |
445 | 448 |
pagedWriter.writeCharacters(parser.getAttributeValue(null,"speaker")+": ") |
446 | 449 |
pagedWriter.writeEndElement() // span@class=spk |
447 | 450 |
} |
448 |
|
|
451 |
|
|
449 | 452 |
break; |
450 | 453 |
case "cb": |
451 | 454 |
pagedWriter.write("\n") |
... | ... | |
545 | 548 |
if (paginate && wordcount >= wordmax) { |
546 | 549 |
createNextOutput(); |
547 | 550 |
} |
548 |
|
|
551 |
|
|
549 | 552 |
if (firstWord) { |
550 | 553 |
firstWord = false; |
551 | 554 |
this.idxstart.add(wordid); |
... | ... | |
573 | 576 |
break; |
574 | 577 |
case XMLStreamConstants.END_ELEMENT: |
575 | 578 |
localname = parser.getLocalName(); |
576 |
|
|
579 |
|
|
577 | 580 |
if (currentOutOfTextElements.size() > 0) currentOutOfTextElements.pop() |
578 |
|
|
581 |
|
|
579 | 582 |
writeOutOfTextToEditText = currentOutOfTextElements.size() > 0 |
580 |
|
|
583 |
|
|
581 | 584 |
if (localname == paginationElement) { |
582 | 585 |
break; // element already processed in the START_ELEMENT event |
583 | 586 |
} |
584 |
|
|
587 |
|
|
585 | 588 |
switch (localname) { |
586 | 589 |
case "text": |
587 | 590 |
break; |
... | ... | |
600 | 603 |
break; |
601 | 604 |
case "lb": |
602 | 605 |
break; |
603 |
|
|
606 |
|
|
604 | 607 |
case "body": |
605 | 608 |
case "div": |
606 | 609 |
case "div1": |
... | ... | |
659 | 662 |
if (l > 0) { |
660 | 663 |
endOfLastWord = lastword.subSequence(l-1, l) |
661 | 664 |
} |
662 |
|
|
665 |
|
|
663 | 666 |
String interpvalue = null; |
664 | 667 |
def tooltipProperties = pager.project.getEditionDefinition("default").get(TBXPreferences.EDITION_DEFINITION_TOOLTIP_PROPERTIES, "*"); |
665 | 668 |
if (tooltipProperties.equals("*")) { |
... | ... | |
671 | 674 |
interpvalue += "- "+wordid |
672 | 675 |
} |
673 | 676 |
|
674 |
if (NoSpaceBefore.contains(wordvalue) || |
|
675 |
NoSpaceAfter.contains(lastword) || |
|
676 |
wordvalue.startsWith("-") || |
|
677 |
NoSpaceAfter.contains(endOfLastWord)) { |
|
678 |
pagedWriter.writeStartElement("span", ["title":interpvalue, "id":wordid]) |
|
677 |
if (!flagNote) { // don't write words of the note elements |
|
678 |
if (NoSpaceBefore.contains(wordvalue) || |
|
679 |
NoSpaceAfter.contains(lastword) || |
|
680 |
wordvalue.startsWith("-") || |
|
681 |
NoSpaceAfter.contains(endOfLastWord)) { |
|
682 |
pagedWriter.writeStartElement("span", ["title":interpvalue, "id":wordid]) |
|
683 |
} else { |
|
684 |
pagedWriter.writeCharacters("\n") |
|
685 |
pagedWriter.writeStartElement("span", ["title":interpvalue, "id":wordid]) |
|
686 |
} |
|
687 |
|
|
688 |
pagedWriter.writeCharacters(wordvalue) |
|
689 |
pagedWriter.writeEndElement() |
|
679 | 690 |
} else { |
680 |
pagedWriter.writeCharacters("\n") |
|
681 |
pagedWriter.writeStartElement("span", ["title":interpvalue, "id":wordid]) |
|
691 |
|
|
682 | 692 |
} |
683 |
|
|
684 |
pagedWriter.writeCharacters(wordvalue) |
|
685 |
pagedWriter.writeEndElement() |
|
686 | 693 |
//pagedWriter.writeComment("\n") |
687 | 694 |
lastword=wordvalue; |
688 | 695 |
wordvalue="" // reset |
... | ... | |
744 | 751 |
} |
745 | 752 |
return true; |
746 | 753 |
} |
747 |
|
|
754 |
|
|
748 | 755 |
private void writeMediaAccess(def time) { |
749 |
|
|
756 |
|
|
750 | 757 |
pagedWriter.writeCharacters(" "); |
751 | 758 |
pagedWriter.writeStartElement("a"); |
752 | 759 |
pagedWriter.writeAttribute("onclick", "txmcommand('id', 'org.txm.backtomedia.commands.function.BackToMedia', 'corpus', '"+basename+"', 'text', '"+txtname+"', 'time', '"+time+"')"); |
Formats disponibles : Unified diff