Révision 2268
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/transcriber/transcriberLoader.groovy (revision 2268) | ||
---|---|---|
112 | 112 |
Toolbox.getMetadataColumnSeparator(), |
113 | 113 |
Toolbox.getMetadataTextSeparator(), 1) |
114 | 114 |
} |
115 |
else |
|
115 |
else {
|
|
116 | 116 |
println "no metadata file: "+allMetadataFile |
117 |
} |
|
117 | 118 |
|
118 | 119 |
File propertyFile = new File(srcDir, "import.properties")//default |
119 | 120 |
Properties props = new Properties(); |
... | ... | |
289 | 290 |
cpb.done() |
290 | 291 |
|
291 | 292 |
//copy transcriber.css |
292 |
File cssfile = new File(Toolbox.getTxmHomePath(), "css/transcriber.css") |
|
293 |
File cssfile = new File(Toolbox.getTxmHomePath(), "css/transcriber.css") |
|
294 |
File cssTXMFile = new File(Toolbox.getTxmHomePath(), "css/txm.css") |
|
293 | 295 |
if (cssfile.exists() && htmlDir.exists()) { |
294 |
FileCopy.copy(cssfile, new File(htmlDir, "transcriber.css"));
|
|
295 |
FileCopy.copy(cssfile, new File(htmlDir, "onepage/transcriber.css"));
|
|
296 |
FileCopy.copy(cssfile, new File(htmlDir, "onepage/transcriber.css"));
|
|
297 |
FileCopy.copy(cssfile, new File(htmlDir, "default/txm.css"));
|
|
296 | 298 |
FileCopy.copy(cssfile, new File(htmlDir, "default/transcriber.css")); |
297 | 299 |
} |
298 | 300 |
|
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/transcriber/pager.groovy (revision 2268) | ||
---|---|---|
44 | 44 |
* |
45 | 45 |
*/ |
46 | 46 |
class pager { |
47 |
|
|
47 |
|
|
48 | 48 |
boolean SIMPLE_TOOLTIP = false; // show less properties in word tooltips |
49 | 49 |
String ENQ_HIGHLIGHT_ELEMENT = "b" |
50 |
|
|
50 |
|
|
51 | 51 |
List<String> NoSpaceBefore; |
52 |
|
|
52 |
|
|
53 | 53 |
/** The No space after. */ |
54 | 54 |
List<String> NoSpaceAfter; |
55 |
|
|
55 |
|
|
56 | 56 |
/** The pages. */ |
57 | 57 |
def pages = []; |
58 | 58 |
def indexes = []; |
59 |
|
|
59 |
|
|
60 | 60 |
/** The wordcount. */ |
61 | 61 |
int wordcount = 0; |
62 |
|
|
62 |
|
|
63 | 63 |
/** The pagecount. */ |
64 | 64 |
int pagecount = 0; |
65 |
|
|
65 |
|
|
66 | 66 |
/** The wordmax. */ |
67 | 67 |
int wordmax = 10; |
68 |
|
|
68 |
|
|
69 | 69 |
/** The wordid. */ |
70 | 70 |
String wordid; |
71 |
|
|
71 |
|
|
72 | 72 |
/** The first word. */ |
73 | 73 |
boolean firstWord = true; |
74 |
|
|
74 |
|
|
75 | 75 |
/** The wordvalue. */ |
76 | 76 |
String wordvalue; |
77 |
|
|
77 |
|
|
78 | 78 |
/** The interpvalue. */ |
79 | 79 |
String interpvalue; |
80 |
|
|
80 |
|
|
81 | 81 |
/** The lastword. */ |
82 | 82 |
String lastword = " "; |
83 |
|
|
83 |
|
|
84 | 84 |
/** The wordtype. */ |
85 | 85 |
String wordtype; |
86 |
|
|
86 |
|
|
87 | 87 |
/** The flagform. */ |
88 | 88 |
boolean flagform = false; |
89 |
|
|
89 |
|
|
90 | 90 |
/** The flaginterp. */ |
91 | 91 |
boolean flaginterp = false; |
92 |
|
|
92 |
|
|
93 | 93 |
boolean flagcomment = false; |
94 |
|
|
94 |
|
|
95 | 95 |
/** The url. */ |
96 | 96 |
private def url; |
97 |
|
|
97 |
|
|
98 | 98 |
/** The input data. */ |
99 | 99 |
private def inputData; |
100 |
|
|
100 |
|
|
101 | 101 |
/** The factory. */ |
102 | 102 |
private def factory; |
103 |
|
|
103 |
|
|
104 | 104 |
/** The parser. */ |
105 | 105 |
private XMLStreamReader parser; |
106 |
|
|
106 |
|
|
107 | 107 |
/** The writer. */ |
108 | 108 |
XMLStreamWriter writer; |
109 | 109 |
BufferedOutputStream output; |
110 |
|
|
110 |
|
|
111 | 111 |
File txmfile; |
112 |
|
|
112 |
|
|
113 | 113 |
File outfile; |
114 |
|
|
114 |
|
|
115 | 115 |
String corpusname =""; |
116 | 116 |
String cuttingTag = "pb" |
117 | 117 |
String txtname; |
118 | 118 |
File htmlDir; |
119 | 119 |
File defaultDir; |
120 | 120 |
Metadatas metadatas; |
121 |
|
|
121 |
|
|
122 | 122 |
def interviewers = []; |
123 | 123 |
def eventTranslations = ["^^":"mot inconnu", "?":"orthographe incertaine", |
124 | 124 |
"()":"rupture de syntaxe", "b":"bruit indéterminé", |
... | ... | |
160 | 160 |
this.txmfile = txmfile; |
161 | 161 |
this.htmlDir = htmlDir; |
162 | 162 |
this.txtname = txtname; |
163 |
|
|
163 |
|
|
164 | 164 |
inputData = url.openStream(); |
165 | 165 |
factory = XMLInputFactory.newInstance(); |
166 | 166 |
parser = factory.createXMLStreamReader(inputData); |
167 |
|
|
167 |
|
|
168 | 168 |
defaultDir = new File(htmlDir, "default") |
169 | 169 |
defaultDir.mkdir() |
170 | 170 |
new File(htmlDir, "onepage").mkdir() |
171 | 171 |
outfile = new File(htmlDir, "onepage/${txtname}.html"); |
172 | 172 |
createOutput(outfile) |
173 |
|
|
173 |
|
|
174 | 174 |
try { |
175 | 175 |
process(); |
176 | 176 |
} catch(Exception e) { |
... | ... | |
181 | 181 |
} |
182 | 182 |
} |
183 | 183 |
} |
184 |
|
|
184 |
|
|
185 | 185 |
/** |
186 | 186 |
* Creates the output. |
187 | 187 |
* |
... | ... | |
194 | 194 |
XMLOutputFactory outfactory = XMLOutputFactory.newInstance(); |
195 | 195 |
output = new BufferedOutputStream(new FileOutputStream(outfile)) |
196 | 196 |
writer = outfactory.createXMLStreamWriter(output, "UTF-8");//create a new file |
197 |
|
|
197 |
|
|
198 | 198 |
return true; |
199 | 199 |
} catch (Exception e) { |
200 | 200 |
System.out.println(e.getLocalizedMessage()); |
201 | 201 |
return false; |
202 | 202 |
} |
203 | 203 |
} |
204 |
|
|
204 |
|
|
205 | 205 |
/** The events. */ |
206 | 206 |
List<String> events = []; |
207 | 207 |
String previousEvent = "", nextEvent = ""; |
... | ... | |
209 | 209 |
* Process. |
210 | 210 |
*/ |
211 | 211 |
void process() { |
212 |
|
|
212 |
|
|
213 | 213 |
String previousElem = ""; |
214 | 214 |
boolean parolesRaportees = false; |
215 | 215 |
boolean firstWord = true; |
... | ... | |
221 | 221 |
ArrayList<String> whos = []; |
222 | 222 |
HashMap<String, String> speakers = new HashMap<String, String>(); |
223 | 223 |
HashMap<String, String> topics = new HashMap<String, String>(); |
224 |
|
|
224 |
|
|
225 | 225 |
writer.writeStartDocument("UTF-8","1.0"); |
226 | 226 |
writer.writeStartElement("html"); |
227 | 227 |
//<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/> |
... | ... | |
231 | 231 |
writer.writeAttribute("charset", "UTF-8"); |
232 | 232 |
writer.writeEndElement(); |
233 | 233 |
writer.writeStartElement("head"); |
234 |
|
|
234 |
|
|
235 | 235 |
//<link rel="stylesheet" type="text/css" href="class.css" /> |
236 | 236 |
writer.writeStartElement("link"); |
237 | 237 |
writer.writeAttribute("rel", "stylesheet"); |
... | ... | |
239 | 239 |
writer.writeAttribute("href", "transcriber.css"); |
240 | 240 |
writer.writeEndElement(); |
241 | 241 |
writer.writeEndElement(); |
242 |
|
|
242 |
|
|
243 | 243 |
nbBreak++ |
244 | 244 |
writer.writeStartElement("body"); |
245 |
writer.writeAttribute("class", "txmeditionpage") |
|
245 | 246 |
writer.writeEmptyElement("pb"); |
246 | 247 |
writer.writeAttribute("id", ""+nbBreak); |
247 | 248 |
pages << new File(defaultDir, "${txtname}_${nbBreak}.html") |
248 |
|
|
249 |
|
|
249 | 250 |
for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) { |
250 | 251 |
switch (event) { |
251 | 252 |
case XMLStreamConstants.START_ELEMENT: |
252 | 253 |
localname = parser.getLocalName(); |
253 | 254 |
switch (localname) { |
254 | 255 |
case "text": |
256 |
|
|
255 | 257 |
writer.writeStartElement("h2"); |
256 |
writer.writeAttribute("class","titre");
|
|
258 |
writer.writeAttribute("class","title");
|
|
257 | 259 |
String title = parser.getAttributeValue(null, "title"); |
260 |
|
|
258 | 261 |
if (title != null) { |
259 | 262 |
writer.writeCharacters(title); |
260 | 263 |
} else { |
261 | 264 |
writer.writeCharacters("Transcription "+txmfile.getName().substring(0, txmfile.getName().length() - 4)); |
262 | 265 |
} |
263 |
writer.writeEndElement(); |
|
264 |
if(metadatas != null) { |
|
266 |
|
|
267 |
writer.writeEmptyElement("br"); |
|
268 |
writer.writeStartElement("a"); |
|
269 |
writer.writeAttribute("onclick", "txmcommand('id', 'org.txm.backtomedia.commands.function.BackToMedia', 'corpus', '"+corpusname+"', 'text', '"+txtname+"', 'time', '0.0')"); |
|
270 |
writer.writeAttribute("style", "cursor: pointer;") |
|
271 |
writer.writeAttribute("class", "play-media") |
|
272 |
writer.writeCharacters(" ♪♪"); |
|
273 |
writer.writeEndElement(); // a |
|
274 |
|
|
275 |
writer.writeEndElement(); // h2 |
|
276 |
|
|
277 |
String subtitle = parser.getAttributeValue(null, "subtitle"); |
|
278 |
if (subtitle != null && subtitle.length() > 0) { |
|
279 |
writer.writeStartElement("h3"); |
|
280 |
writer.writeAttribute("class", "subtitle"); |
|
281 |
writer.writeCharacters(subtitle); |
|
282 |
writer.writeEndElement(); // h3 |
|
283 |
} |
|
284 |
|
|
285 |
// println "metadatas != null: "+(metadatas != null) |
|
286 |
// if (metadatas != null) { |
|
265 | 287 |
writer.writeStartElement("table"); |
266 | 288 |
boolean grey = false; |
267 |
for (String name : metadatas.getPropertyNames()) { |
|
268 |
if ("title" == name) continue; // ignore "title" metadata |
|
289 |
for (int i = 0 ; i < parser.getAttributeCount() ; i++) { |
|
290 |
String name = parser.getAttributeName(i); |
|
291 |
String value = parser.getAttributeValue(i); |
|
292 |
|
|
293 |
if ("title" == name) { |
|
294 |
continue; // ignore "title" metadata |
|
295 |
} |
|
296 |
|
|
269 | 297 |
grey = !grey; |
270 | 298 |
writer.writeStartElement("tr"); |
271 |
if (grey) |
|
299 |
if (grey) {
|
|
272 | 300 |
writer.writeAttribute("style","background-color:lightgrey;") |
273 |
String value = parser.getAttributeValue(null, name); |
|
301 |
} |
|
302 |
|
|
274 | 303 |
if (value != null) { |
275 | 304 |
writer.writeStartElement("td"); |
276 | 305 |
writer.writeCharacters(name); |
... | ... | |
286 | 315 |
writer.writeEndElement(); |
287 | 316 |
} |
288 | 317 |
writer.writeEndElement(); |
289 |
} |
|
318 |
// }
|
|
290 | 319 |
break; |
291 | 320 |
case "Topics": |
292 | 321 |
/*writer.writeStartElement("h2"); |
... | ... | |
347 | 376 |
flagcomment = true; |
348 | 377 |
break; |
349 | 378 |
case "div": |
350 |
writer.writeStartElement("div"); |
|
351 |
writer.writeAttribute("class", "section"); |
|
352 |
String type = parser.getAttributeValue(null,"type"); |
|
353 |
String desc = parser.getAttributeValue(null,"topic"); |
|
354 |
String metadata = parser.getAttributeValue(null,"metadata"); |
|
379 |
|
|
380 |
nbBreak++ |
|
381 |
writer.writeEmptyElement("pb"); |
|
382 |
writer.writeAttribute("id", ""+nbBreak); |
|
383 |
writer.writeCharacters("\n"); |
|
384 |
|
|
385 |
pages << new File(defaultDir, "${txtname}_${nbBreak}.html") |
|
386 |
indexes << wordid |
|
387 |
|
|
388 |
wordcount = 0; |
|
389 |
shouldBreak = false; |
|
390 |
|
|
391 |
writer.writeStartElement("div") |
|
392 |
writer.writeAttribute("class", "section") |
|
393 |
|
|
394 |
String type = parser.getAttributeValue(null, "type") |
|
395 |
writer.writeAttribute("type", ""+type) |
|
396 |
String desc = parser.getAttributeValue(null, "topic") |
|
397 |
|
|
355 | 398 |
if (type != null || desc != null) { |
356 |
writer.writeStartElement("h3"); |
|
357 |
if (type != null || type.length() ==0) { |
|
399 |
writer.writeStartElement("h2"); |
|
400 |
writer.writeAttribute("class", "section-title") |
|
401 |
if (type != null || type.length() == 0) { |
|
358 | 402 |
writer.writeCharacters(type+": "+desc); |
359 | 403 |
} else { |
360 |
writer.writeCharacters(desc);
|
|
404 |
writer.writeCharacters(desc) |
|
361 | 405 |
} |
362 |
writer.writeEndElement(); // h3 |
|
363 | 406 |
|
364 |
if (metadata != null && metadata.length() > 0) { // the metadata to show |
|
365 |
writer.writeStartElement("ul"); |
|
366 |
for (def m : metadata.split("\t")) { |
|
367 |
writer.writeStartElement("li"); |
|
368 |
writer.writeCharacters(m); |
|
369 |
writer.writeEndElement(); // li |
|
407 |
if (parser.getAttributeValue(null,"startTime") != null) { |
|
408 |
writer.writeEmptyElement("br"); |
|
409 |
writer.writeStartElement("a") |
|
410 |
writer.writeAttribute("onclick", "txmcommand('id', 'org.txm.backtomedia.commands.function.BackToMedia', 'corpus', '"+corpusname+"', 'text', '"+txtname+"', 'time', '"+parser.getAttributeValue(null,"startTime")+"')"); |
|
411 |
writer.writeAttribute("style", "cursor: pointer;") |
|
412 |
writer.writeAttribute("class", "play-media") |
|
413 |
writer.writeCharacters(" ♪♪") |
|
414 |
writer.writeEndElement() // a |
|
415 |
} |
|
416 |
|
|
417 |
writer.writeEndElement(); // h2 |
|
418 |
} |
|
419 |
|
|
420 |
String metadata = parser.getAttributeValue(null, "metadata") |
|
421 |
if (metadata != null && metadata.length() > 0) { // the metadata to show |
|
422 |
writer.writeStartElement("ul") |
|
423 |
//println "metadata=$metadata" |
|
424 |
for (def m : metadata.split("<li>")) { |
|
425 |
writer.writeStartElement("li") |
|
426 |
writer.writeCharacters(m) |
|
427 |
writer.writeEndElement() // li |
|
428 |
} |
|
429 |
writer.writeEndElement() // ul |
|
430 |
} else if (parser.getAttributeCount() > 1) { // process all attributes |
|
431 |
writer.writeStartElement("ul") |
|
432 |
for (int i = 0 ; i < parser.getAttributeCount() ; i++) { |
|
433 |
String name = parser.getAttributeLocalName(i) |
|
434 |
if (!"type".equals(name) |
|
435 |
&& !"topic".equals(name) |
|
436 |
&& !"startTime".equals(name) |
|
437 |
&& !"endTime".equals(name)) { |
|
438 |
writer.writeStartElement("li") |
|
439 |
writer.writeCharacters(""+name+": "+parser.getAttributeValue(i)) |
|
440 |
writer.writeEndElement() // li |
|
370 | 441 |
} |
371 |
writer.writeEndElement(); // ul |
|
372 |
} else if (parser.getAttributeCount() > 1) { // process all attributes |
|
373 |
writer.writeStartElement("ul"); |
|
374 |
for (int i = 0 ; i < parser.getAttributeCount() ; i++) { |
|
375 |
String name = parser.getAttributeLocalName(i); |
|
376 |
if (!"type".equals(name) |
|
377 |
&& !"topic".equals(name) |
|
378 |
&& !"startTime".equals(name) |
|
379 |
&& !"endTime".equals(name)) { |
|
380 |
writer.writeStartElement("li"); |
|
381 |
writer.writeCharacters(""+name+": "+parser.getAttributeValue(i)); |
|
382 |
writer.writeEndElement(); // li |
|
383 |
} |
|
384 |
} |
|
385 |
writer.writeEndElement(); // ul |
|
386 | 442 |
} |
443 |
writer.writeEndElement(); // ul |
|
387 | 444 |
} |
445 |
|
|
388 | 446 |
break; |
389 | 447 |
case "sp": |
390 | 448 |
endBoldIfNeeded() |
... | ... | |
392 | 450 |
firstWho = true; |
393 | 451 |
spokenTurn = false; |
394 | 452 |
overlapping = false |
395 |
|
|
453 |
|
|
396 | 454 |
writer.writeStartElement("p"); |
397 | 455 |
writer.writeAttribute("class", "turn"); |
398 |
|
|
456 |
|
|
399 | 457 |
overlapping = ("true" == parser.getAttributeValue(null,"overlap")) |
400 | 458 |
String spid = parser.getAttributeValue(null,"speaker"); |
459 |
|
|
401 | 460 |
whos = [] |
402 | 461 |
if (overlapping) { |
403 | 462 |
writer.writeEmptyElement("br"); |
404 | 463 |
writeSpeaker(parser.getAttributeValue(null,"speaker"), false) |
405 |
|
|
464 |
|
|
406 | 465 |
writer.writeEmptyElement("br"); |
407 | 466 |
whos = spid.split(" ") |
408 | 467 |
} |
409 |
|
|
468 |
|
|
410 | 469 |
break; |
411 | 470 |
case "u": |
412 | 471 |
writer.writeCharacters("\n"); |
413 | 472 |
this.currentTime = parser.getAttributeValue(null,"time"); |
414 |
|
|
473 |
|
|
415 | 474 |
if (previousElem == "u" && writenLength == 0) { // if previous u had no words, it was a silence |
416 | 475 |
writer.writeStartElement("span"); |
417 | 476 |
writer.writeAttribute("class", "event"); |
... | ... | |
419 | 478 |
writer.writeEndElement(); // span |
420 | 479 |
writer.writeEmptyElement("br"); |
421 | 480 |
} |
422 |
|
|
481 |
|
|
423 | 482 |
String spk = parser.getAttributeValue(null, "spk") |
424 | 483 |
if (spk != null && spk != previousSPK) { |
425 | 484 |
endBoldIfNeeded() |
... | ... | |
427 | 486 |
writeSpeaker(parser.getAttributeValue(null, "spk"), overlapping) |
428 | 487 |
startBoldIfNeeded() |
429 | 488 |
} |
430 |
|
|
489 |
|
|
431 | 490 |
writeCurrentTime() |
432 | 491 |
previousSPK = spk |
433 |
|
|
492 |
|
|
434 | 493 |
// writenLength = 0; |
435 | 494 |
/*writer.writeStartElement("span"); |
436 | 495 |
writer.writeAttribute("class", "sync"); |
437 | 496 |
writer.writeCharacters("["+parser.getAttributeValue(null,"time")+"]"); |
438 | 497 |
writer.writeEndElement();*/ |
439 |
|
|
498 |
|
|
440 | 499 |
break; |
441 | 500 |
case "event": |
442 | 501 |
spokenTurn = true; |
... | ... | |
458 | 517 |
events.remove(events.size()-1) |
459 | 518 |
} |
460 | 519 |
else if (parser.getAttributeValue(null, "extent") == "begin") { |
461 |
|
|
520 |
|
|
462 | 521 |
writer.writeCharacters(" ["+desc+"> "); |
463 | 522 |
events.add(desc) |
464 | 523 |
} |
... | ... | |
484 | 543 |
wordid = (parser.getAttributeValue(i)); |
485 | 544 |
break; |
486 | 545 |
} |
487 |
|
|
546 |
|
|
488 | 547 |
wordcount++; |
489 | 548 |
if (wordcount >= wordmax) { |
490 | 549 |
shouldBreak = true; |
491 | 550 |
} |
492 |
|
|
551 |
|
|
493 | 552 |
if (firstWord) { |
494 | 553 |
indexes << wordid |
495 | 554 |
firstWord = false; |
496 | 555 |
} |
497 |
|
|
556 |
|
|
498 | 557 |
break; |
499 |
|
|
558 |
|
|
500 | 559 |
case "ana": |
501 |
|
|
560 |
|
|
502 | 561 |
String type = parser.getAttributeValue(null,"type").substring(1); |
503 | 562 |
if (SIMPLE_TOOLTIP) { |
504 | 563 |
if (type.contains("lemma") || type.contains("pos")) { |
... | ... | |
510 | 569 |
interpvalue+=", "+type+"=" |
511 | 570 |
} |
512 | 571 |
break; |
513 |
|
|
572 |
|
|
514 | 573 |
case "form": |
515 | 574 |
wordvalue=""; |
516 | 575 |
interpvalue =""; |
... | ... | |
536 | 595 |
break; |
537 | 596 |
case "Speaker": |
538 | 597 |
break; |
539 |
|
|
598 |
|
|
540 | 599 |
case "div": |
541 | 600 |
//writer.writeCharacters("}"); |
542 |
|
|
601 |
|
|
543 | 602 |
writer.writeEndElement(); // div |
544 | 603 |
writer.writeCharacters("\n"); |
545 | 604 |
break; |
... | ... | |
553 | 612 |
writer.writeEndElement(); |
554 | 613 |
writer.writeEmptyElement("br"); |
555 | 614 |
} |
556 |
|
|
615 |
|
|
557 | 616 |
writer.writeEndElement(); // p |
558 |
|
|
617 |
|
|
559 | 618 |
if (shouldBreak) { |
560 | 619 |
nbBreak++ |
561 | 620 |
writer.writeEmptyElement("pb"); |
562 | 621 |
writer.writeAttribute("id", ""+nbBreak); |
563 | 622 |
writer.writeCharacters("\n"); |
564 |
|
|
623 |
|
|
565 | 624 |
pages << new File(defaultDir, "${txtname}_${nbBreak}.html") |
566 | 625 |
indexes << wordid |
567 |
|
|
626 |
|
|
568 | 627 |
wordcount = 0; |
569 | 628 |
shouldBreak = false; |
570 | 629 |
} |
... | ... | |
590 | 649 |
String endOfLastWord = ""; |
591 | 650 |
if(l > 0) |
592 | 651 |
endOfLastWord = lastword.subSequence(l-1, l); |
593 |
|
|
652 |
|
|
594 | 653 |
if(interpvalue != null) |
595 | 654 |
interpvalue = interpvalue.replace("\"","""); |
596 | 655 |
if(events.size() > 0) |
597 | 656 |
interpvalue = interpvalue.replace("event=", "event="+events.toString().replace("\"",""")); // remove ", " |
598 |
|
|
657 |
|
|
599 | 658 |
if(nextEvent.length() > 0) |
600 | 659 |
{ |
601 | 660 |
interpvalue = interpvalue.replace("event=", "event="+nextEvent+", ") |
... | ... | |
617 | 676 |
// println " SPACE" |
618 | 677 |
writer.writeCharacters(" "); |
619 | 678 |
} |
620 |
|
|
679 |
|
|
621 | 680 |
if (interpvalue.contains("rapp1")) { |
622 | 681 |
writer.writeCharacters(" «"); |
623 | 682 |
} else if (wordvalue == "\"") { |
... | ... | |
642 | 701 |
writer.writeCharacters("_[!]"); |
643 | 702 |
writer.writeEndElement(); |
644 | 703 |
} |
645 |
|
|
704 |
|
|
646 | 705 |
if (interpvalue.contains("rapp2")) { |
647 | 706 |
writer.writeCharacters(" » "); |
648 | 707 |
} |
649 |
|
|
708 |
|
|
650 | 709 |
lastword=wordvalue; |
651 | 710 |
break; |
652 | 711 |
} |
653 |
|
|
712 |
|
|
654 | 713 |
break; |
655 |
|
|
714 |
|
|
656 | 715 |
case XMLStreamConstants.CHARACTERS: |
657 | 716 |
if(flagform) |
658 | 717 |
if(parser.getText().length() > 0) |
... | ... | |
664 | 723 |
} |
665 | 724 |
} |
666 | 725 |
writer.writeEndElement(); |
667 |
|
|
726 |
|
|
668 | 727 |
writer.writeEmptyElement("pb"); |
669 | 728 |
nbBreak++ |
670 | 729 |
writer.writeAttribute("id", ""+nbBreak); |
671 |
|
|
730 |
|
|
672 | 731 |
writer.writeEndElement(); |
673 | 732 |
writer.close(); |
674 | 733 |
output.close(); |
675 | 734 |
if (parser != null) parser.close(); |
676 | 735 |
if (inputData != null) inputData.close(); |
677 |
|
|
736 |
|
|
678 | 737 |
File txmhome = new File(org.txm.Toolbox.getTxmHomePath()); |
679 | 738 |
File xlsDir = new File(txmhome, "xsl"); |
680 | 739 |
File xslfile = new File(xlsDir,"breakByMilestone.xsl"); |
... | ... | |
685 | 744 |
// println "html: "+outfile |
686 | 745 |
// println "pages: "+pages |
687 | 746 |
// println "words: "+indexes |
688 |
|
|
689 |
|
|
747 |
|
|
748 |
|
|
690 | 749 |
if (pages.size() > 1) { |
691 | 750 |
for (int i = 1 ; i < nbBreak ; i++) { |
692 | 751 |
ApplyXsl2 a = new ApplyXsl2(xslfile.getAbsolutePath()); |
693 |
String[] params = ["pbval1", i,"pbval2", i+1]; |
|
694 |
|
|
752 |
String[] params = ["pbval1", i, "pbval2", i+1];
|
|
753 |
|
|
695 | 754 |
File resultfile = pages[i-1] |
696 | 755 |
//println "BBmilestones: "+i+" "+(i+1)+" in file "+resultfile |
697 | 756 |
//println "process $outfile -> $resultfile" |
... | ... | |
702 | 761 |
FileCopy.copy(outfile, page) |
703 | 762 |
} |
704 | 763 |
} |
705 |
|
|
706 |
private void writeCurrentTime() |
|
707 |
{ |
|
764 |
|
|
765 |
private void writeCurrentTime() { |
|
708 | 766 |
writer.writeStartElement("span"); |
709 | 767 |
writer.writeAttribute("class", "sync"); |
710 | 768 |
writer.writeCharacters(currentTime); |
769 |
|
|
770 |
writer.writeStartElement("a"); |
|
771 |
writer.writeAttribute("onclick", "txmcommand('id', 'org.txm.backtomedia.commands.function.BackToMedia', 'corpus', '"+corpusname+"', 'text', '"+txtname+"', 'time', '"+currentTime+"')"); |
|
772 |
writer.writeAttribute("style", "cursor: pointer;") |
|
773 |
writer.writeAttribute("class", "play-media") |
|
774 |
writer.writeCharacters(" ♪♪"); |
|
775 |
writer.writeEndElement(); // a |
|
776 |
|
|
711 | 777 |
writer.writeEndElement(); |
712 | 778 |
} |
713 |
|
|
779 |
|
|
714 | 780 |
private void writeSpeaker(String spk, boolean overlapping) { |
715 |
|
|
781 |
|
|
716 | 782 |
writer.writeStartElement("span"); |
717 | 783 |
writer.writeAttribute("class", "spk"); |
718 | 784 |
if(interviewers.contains(spk)) |
... | ... | |
721 | 787 |
bold = false; |
722 | 788 |
spk = spk.replaceAll('^([^0-9]*)([0-9]+)$', '$1 $2'); |
723 | 789 |
if (overlapping) writer.writeCharacters("// ") |
790 |
|
|
724 | 791 |
writer.writeCharacters(spk+": ") |
792 |
|
|
725 | 793 |
writer.writeEndElement(); // span@class=spk |
726 | 794 |
} |
727 |
|
|
795 |
|
|
728 | 796 |
private String translateEvent(String desc) { |
729 | 797 |
if(eventTranslations.containsKey(desc)) |
730 | 798 |
return eventTranslations.get(desc); |
731 | 799 |
else |
732 | 800 |
return desc; |
733 | 801 |
} |
734 |
|
|
802 |
|
|
735 | 803 |
boolean boldOpenned = false; |
736 | 804 |
private void startBoldIfNeeded() { |
737 | 805 |
if (bold) { |
... | ... | |
739 | 807 |
boldOpenned = true; |
740 | 808 |
} |
741 | 809 |
} |
742 |
|
|
810 |
|
|
743 | 811 |
private endBoldIfNeeded() { |
744 | 812 |
if (boldOpenned) { |
745 | 813 |
// println "CLOSE BOLD" |
... | ... | |
747 | 815 |
boldOpenned = false; |
748 | 816 |
} |
749 | 817 |
} |
750 |
|
|
818 |
|
|
751 | 819 |
// private String formatTime(float time, boolean doshort) |
752 | 820 |
// { |
753 | 821 |
// String rez = " "; |
... | ... | |
787 | 855 |
// // } |
788 | 856 |
// return rez; |
789 | 857 |
// } |
790 |
|
|
858 |
|
|
791 | 859 |
/** |
792 | 860 |
* Gets the page files. |
793 | 861 |
* |
... | ... | |
796 | 864 |
public ArrayList<File> getPageFiles() { |
797 | 865 |
return pages; |
798 | 866 |
} |
799 |
|
|
867 |
|
|
800 | 868 |
/** |
801 | 869 |
* Gets the idx. |
802 | 870 |
* |
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/transcriber/AddSections.groovy (revision 2268) | ||
---|---|---|
10 | 10 |
File trsDirectory = new File("/home/mdecorde/TEMP/ANTRACT/AF/trs") |
11 | 11 |
def idTRSColumn = "Lien notice principale" |
12 | 12 |
def typeColumns = ["Identifiant de la notice"] |
13 |
def topicColumns = ["Titre propre", "Genre", "antract_debut", "antract_fin"]
|
|
13 |
def topicColumns = ["Titre propre", "Notes du titre"]
|
|
14 | 14 |
def startTimeColumn = "antract_debut" |
15 | 15 |
def endTimeColumn = "antract_fin" |
16 | 16 |
//def metadataColumns = ["Identifiant de la notice", "Titre propre", "antract_debut", "antract_fin"] |
... | ... | |
21 | 21 |
return |
22 | 22 |
} |
23 | 23 |
|
24 |
|
|
24 |
println "Loading data from $metadataFile..." |
|
25 | 25 |
CsvReader reader = new CsvReader(metadataFile.getAbsolutePath(), "\t".charAt(0), Charset.forName("UTF-8")) |
26 | 26 |
reader.readHeaders() |
27 | 27 |
def header = reader.getHeaders() |
... | ... | |
80 | 80 |
} |
81 | 81 |
|
82 | 82 |
def sectionGroupsToInsert = [:] |
83 |
println "Reading data..." |
|
83 | 84 |
while (reader.readRecord()) { |
84 | 85 |
String id = reader.get(idTRSColumn).trim() |
85 | 86 |
if (id.endsWith(".mp4")) id = id.substring(0, id.length()-4) |
... | ... | |
95 | 96 |
|
96 | 97 |
def m = [:] |
97 | 98 |
|
98 |
for (def todo : ["metadata":metadataColumns, "topic":topicColumns, "type":typeColumns]) {
|
|
99 |
for (def todo : ["topic":topicColumns, "type":typeColumns]) { |
|
99 | 100 |
def data = [] |
100 | 101 |
for (def col : todo.value) { |
101 | 102 |
if (reader.get(col).trim().length() > 0) { |
... | ... | |
104 | 105 |
} |
105 | 106 |
m[todo.key] = data.join("\t") |
106 | 107 |
} |
108 |
for (def col : metadataColumns) { |
|
109 |
m[AsciiUtils.buildAttributeId(col)] = reader.get(col) |
|
110 |
} |
|
107 | 111 |
|
108 | 112 |
m["startTime"] = strTotime(reader.get(startTimeColumn)) |
109 | 113 |
m["endTime"] = strTotime(reader.get(endTimeColumn)) |
... | ... | |
113 | 117 |
} |
114 | 118 |
} |
115 | 119 |
|
116 |
println "N sections: "+sectionGroupsToInsert.size()
|
|
120 |
println "Inserting sections... "+sectionGroupsToInsert.size()
|
|
117 | 121 |
|
118 | 122 |
ConsoleProgressBar cpb = new ConsoleProgressBar(sectionGroupsToInsert.keySet().size()) |
119 | 123 |
for (String id : sectionGroupsToInsert.keySet()) { |
... | ... | |
181 | 185 |
|
182 | 186 |
currentSection = found |
183 | 187 |
currentNode = new Node(trsEpisode, "Section", currentSection[2]) |
184 |
//trsEpisode.appendNode(currentNode) |
|
185 | 188 |
} |
186 | 189 |
} |
187 | 190 |
trsSection.remove(turn) |
... | ... | |
201 | 204 |
} |
202 | 205 |
} |
203 | 206 |
cpb.done() |
207 |
println "Done." |
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/transcriber/importer.groovy (revision 2268) | ||
---|---|---|
176 | 176 |
//TOKENIZE |
177 | 177 |
println "Tokenizing "+files.length+" files from $txmDir" |
178 | 178 |
File tokenizedDir = new File(binDir, "tokenized") |
179 |
tokenizedDir.deleteDir() |
|
179 | 180 |
tokenizedDir.mkdir() |
180 | 181 |
cpb = new ConsoleProgressBar(files.length) |
181 | 182 |
for (File pfile : files) { |
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/xtz/XTZDefaultPagerStep.groovy (revision 2268) | ||
---|---|---|
323 | 323 |
|
324 | 324 |
rend = getAttributeValue(parser, null, "rend") |
325 | 325 |
if (rend == null) rend = ""; |
326 |
|
|
326 | 327 |
switch (localname) { |
327 | 328 |
case "text": |
328 | 329 |
LinkedHashMap attributes = new LinkedHashMap(); |
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/office/Table2CorpusMacro.groovy (revision 2268) | ||
---|---|---|
1 |
package org.txm.macro.csv |
|
2 |
|
|
3 |
import org.kohsuke.args4j.* |
|
4 |
import groovy.transform.Field |
|
5 |
import java.nio.charset.Charset |
|
6 |
import org.txm.rcp.swt.widget.parameters.* |
|
7 |
import org.txm.utils.* |
|
8 |
import javax.xml.stream.* |
|
9 |
import java.net.URL |
|
10 |
|
|
11 |
@Field @Option(name="inputFile", usage="CSV File", widget="File", required=false, def="file.xlsx") |
|
12 |
File inputFile; |
|
13 |
|
|
14 |
@Field @Option(name="outputDirectory", usage="output directory", widget="File", required=false, def="directory") |
|
15 |
File outputDirectory; |
|
16 |
|
|
17 |
@Field @Option(name="textSelector", usage="column_to_test=regexp", widget="String", required=false, def="Type de notice=Notice sommaire") |
|
18 |
def textSelector; |
|
19 |
|
|
20 |
@Field @Option(name="structureSelector", usage="column_to_test=regexp", widget="String", required=false, def="Type de notice=Notice sujet") |
|
21 |
def structureSelector; |
|
22 |
|
|
23 |
@Field @Option(name="textIDColumn", usage="text id column", widget="String", required=false, def="Identifiant de la notice") |
|
24 |
def textIDColumn; |
|
25 |
|
|
26 |
@Field @Option(name="joinColumn", usage="jointure column, values should point to the textIDColumn values", widget="String", required=false, def="Lien notice principale") |
|
27 |
def joinColumn; |
|
28 |
|
|
29 |
@Field @Option(name="textMetadataColumnList", usage="text metadata columns", widget="String", required=false, def="Identifiant de la notice,Date de diffusion,Durée,Genre,Identifiant Matériels (info.)") |
|
30 |
def textMetadataColumnList; |
|
31 |
|
|
32 |
@Field @Option(name="textContentColumnList", usage="text content columns", widget="String", required=false, def="Titre propre,Résumé,Descripteurs (Aff. Lig.),Descripteurs (Aff. Col.),Séquences") |
|
33 |
def textContentColumnList; |
|
34 |
|
|
35 |
@Field @Option(name="structureTag", usage="structure metadata columns", widget="String", required=false, def="div") |
|
36 |
def structureTag; |
|
37 |
|
|
38 |
@Field @Option(name="structureMetadataColumnList", usage="structure metadata columns", widget="String", required=false, def="Identifiant de la notice,Date de diffusion,Durée,Genre,Identifiant Matériels (info.)") |
|
39 |
def structureMetadataColumnList; |
|
40 |
|
|
41 |
@Field @Option(name="structureContentColumnList", usage="structure content columns", widget="String", required=false, def="Titre propre,Résumé,Descripteurs (Aff. Lig.),Descripteurs (Aff. Col.),Séquences") |
|
42 |
def structureContentColumnList; |
|
43 |
|
|
44 |
@Field @Option(name="metadataDateColumnList", usage="metadata columns of type=Date", widget="String", required=false, def="") |
|
45 |
def metadataDateColumnList; |
|
46 |
@Field @Option(name="prefixContentColumnList", usage="metadata columns of type=Date", widget="String", required=false, def="") |
|
47 |
def prefixContentColumnList; |
|
48 |
@Field @Option(name="listContentColumnList", usage="metadata columns of type=Date", widget="String", required=false, def="") |
|
49 |
def listContentColumnList; |
|
50 |
|
|
51 |
|
|
52 |
//@Field @Option(name="structureOrderColumn", usage="structure column coding structure order", widget="String", required=false, def="Titre propre,Résumé,Descripteurs (Aff. Lig.),Descripteurs (Aff. Col.),Séquences") |
|
53 |
// def structureOrderColumn; |
|
54 |
|
|
55 |
if (!ParametersDialog.open(this)) return; |
|
56 |
|
|
57 |
textMetadataColumnList = textMetadataColumnList.split(",") |
|
58 |
textContentColumnList = textContentColumnList.split(",") |
|
59 |
structureMetadataColumnList = structureMetadataColumnList.split(",") |
|
60 |
structureContentColumnList = structureContentColumnList.split(",") |
|
61 |
metadataDateColumnList = metadataDateColumnList.split(",") |
|
62 |
prefixContentColumnList = prefixContentColumnList.split(",") |
|
63 |
listContentColumnList = listContentColumnList.split(",") |
|
64 |
int ti = textSelector.indexOf("=") |
|
65 |
String p1 = textSelector.substring(0, ti) |
|
66 |
String p2 = textSelector.substring(ti+1) |
|
67 |
textSelector = [p1, p2] |
|
68 |
structureSelector = [structureSelector.substring(0, structureSelector.indexOf("=")), structureSelector.substring(structureSelector.indexOf("=")+1)] |
|
69 |
|
|
70 |
|
|
71 |
println "textIDColumn, joinColumn=$textIDColumn, $joinColumn" |
|
72 |
println "textMetadataColumnList columns: $textMetadataColumnList" |
|
73 |
println "textContentColumnList columns: $textContentColumnList" |
|
74 |
println "structureMetadataColumnList columns: $structureMetadataColumnList" |
|
75 |
println "structureContentColumnList columns: $structureContentColumnList" |
|
76 |
|
|
77 |
println "text selector="+textSelector |
|
78 |
println "structure selector="+structureSelector |
|
79 |
println "structureTag="+structureTag |
|
80 |
|
|
81 |
TableReader reader = new TableReader(inputFile); |
|
82 |
if (!reader.readHeaders()) { |
|
83 |
println "Error: no header" |
|
84 |
return |
|
85 |
} |
|
86 |
def headers = Arrays.asList(reader.getHeaders()) |
|
87 |
println "table columns: $headers" |
|
88 |
|
|
89 |
def ok = true |
|
90 |
for (def list : ["selection":[textIDColumn, joinColumn], "textMetadataColumnList":textMetadataColumnList, |
|
91 |
"textContentColumnList":textContentColumnList, "structureMetadataColumnList":structureMetadataColumnList, |
|
92 |
"structureContentColumnList":structureContentColumnList]) { |
|
93 |
for (String m : list.value) { |
|
94 |
m = m.trim() |
|
95 |
if (!headers.contains(m)) { |
|
96 |
println "Error: missing ${list.key} column: $m" |
|
97 |
ok = false |
|
98 |
} |
|
99 |
} |
|
100 |
} |
|
101 |
if (!ok) { return; } |
|
102 |
|
|
103 |
// group by text |
|
104 |
def texts = new LinkedHashMap() |
|
105 |
def nRecord = 0 |
|
106 |
while (reader.readRecord()) { |
|
107 |
nRecord++ |
|
108 |
//println "record="+reader.getRecord().get(textSelector[0])+" "+reader.getRecord().get(structureSelector[0]) |
|
109 |
|
|
110 |
String id = reader.get(textIDColumn).trim() |
|
111 |
String join = reader.get(joinColumn).trim() |
|
112 |
String textSelectorValue = reader.get(textSelector[0]).trim() |
|
113 |
String structureSelectorValue = reader.get(structureSelector[0]).trim() |
|
114 |
if (textSelectorValue != null && textSelectorValue.matches(textSelector[1])) { |
|
115 |
if (!texts.containsKey(id)) texts[id] = [] |
|
116 |
texts[id].add(0, reader.getRecord()) |
|
117 |
} else if (structureSelectorValue != null && structureSelectorValue.matches(structureSelector[1])) { |
|
118 |
if (!texts.containsKey(join)) texts[join] = [] |
|
119 |
texts[join].add(reader.getRecord()) |
|
120 |
} else { |
|
121 |
// ignore |
|
122 |
} |
|
123 |
} |
|
124 |
println "N lines: "+nRecord |
|
125 |
println "N groups: "+texts.size() |
|
126 |
if (texts.size() == 0) { |
|
127 |
println "No text found. Aborting." |
|
128 |
return |
|
129 |
} |
|
130 |
outputDirectory.mkdir() |
|
131 |
|
|
132 |
for (def id : texts.keySet()) { |
|
133 |
def toWrite = texts[id] |
|
134 |
def text = toWrite[0] |
|
135 |
String textSelectorValue = text.get(textSelector[0]).trim() |
|
136 |
if (textSelectorValue != null && textSelectorValue.matches(textSelector[1])) { |
|
137 |
println "Processing text: $id" |
|
138 |
|
|
139 |
File outputfile = new File(outputDirectory, id+".xml") |
|
140 |
XMLOutputFactory factory = XMLOutputFactory.newInstance() |
|
141 |
FileOutputStream output = new FileOutputStream(outputfile) |
|
142 |
XMLStreamWriter writer = factory.createXMLStreamWriter(output, "UTF-8") |
|
143 |
|
|
144 |
writer.writeStartDocument("UTF-8","1.0") |
|
145 |
writer.writeCharacters("\n") // simple XML formating |
|
146 |
writer.writeStartElement("TEI") |
|
147 |
writer.writeCharacters("\n") // simple XML formating |
|
148 |
writer.writeStartElement("teiHeader") |
|
149 |
writer.writeEndElement() // teiHeader |
|
150 |
writer.writeCharacters("\n") // simple XML formating |
|
151 |
writer.writeStartElement("text") |
|
152 |
writer.writeAttribute("id", id) |
|
153 |
|
|
154 |
for (String att : textMetadataColumnList) { |
|
155 |
if (att in metadataDateColumnList) { |
|
156 |
writeMetadataDate(text, att, writer) |
|
157 |
} else { |
|
158 |
writer.writeAttribute(AsciiUtils.buildAttributeId(att), text.get(att)) // struct |
|
159 |
} |
|
160 |
} |
|
161 |
|
|
162 |
writer.writeCharacters("\n") // simple XML formating |
|
163 |
|
|
164 |
writer.writeStartElement("metadata") |
|
165 |
writer.writeStartElement("list") |
|
166 |
writer.writeAttribute("type", "unordered") |
|
167 |
writer.writeCharacters("\n") |
|
168 |
textMetadataColumnList.each { att -> |
|
169 |
writer.writeStartElement("item") |
|
170 |
writer.writeCharacters(att+" : "+text.get(att).replaceAll("\n", ";")) |
|
171 |
writer.writeEndElement() // item |
|
172 |
writer.writeCharacters("\n") |
|
173 |
} |
|
174 |
writer.writeEndElement() // list |
|
175 |
writer.writeEndElement() // metadata |
|
176 |
writer.writeCharacters("\n") |
|
177 |
|
|
178 |
for (String att : textContentColumnList) { |
|
179 |
if (att in prefixContentColumnList) { |
|
180 |
writeContentPrefix(text, att, writer) |
|
181 |
} else if (att in listContentColumnList) { |
|
182 |
writeContentList(text, att, writer) |
|
183 |
} else { |
|
184 |
writer.writeStartElement("p"); |
|
185 |
writer.writeAttribute("type", att.trim()) |
|
186 |
writer.writeCharacters("\n") // simple XML formating |
|
187 |
|
|
188 |
writer.writeStartElement("head") |
|
189 |
writer.writeStartElement("hi") |
|
190 |
writer.writeCharacters(att+" : ") |
|
191 |
writer.writeEndElement() // hi |
|
192 |
writer.writeEndElement() // head |
|
193 |
|
|
194 |
writer.writeCharacters(text.get(att)) // get textColumnList content |
|
195 |
writer.writeEndElement() // t |
|
196 |
writer.writeCharacters("\n") // simple XML formating) |
|
197 |
} |
|
198 |
} |
|
199 |
|
|
200 |
int pb_n = 1; |
|
201 |
for (int i = 1 ; i < toWrite.size() ; i++) { |
|
202 |
def record = toWrite[i] |
|
203 |
|
|
204 |
writer.writeEmptyElement("pb") // <pb/> |
|
205 |
writer.writeAttribute("n", ""+pb_n++) |
|
206 |
|
|
207 |
writer.writeStartElement(structureTag) |
|
208 |
|
|
209 |
for (String att : structureMetadataColumnList) { |
|
210 |
if (att in metadataDateColumnList) { |
|
211 |
writeMetadataDate(record, att, writer) |
|
212 |
} else { |
|
213 |
writer.writeAttribute(AsciiUtils.buildAttributeId(att), record.get(att)) // struct |
|
214 |
} |
|
215 |
} |
|
216 |
writer.writeCharacters("\n") |
|
217 |
|
|
218 |
writer.writeStartElement("metadata") |
|
219 |
writer.writeStartElement("list") |
|
220 |
writer.writeAttribute("type", "unordered") |
|
221 |
writer.writeCharacters("\n") |
|
222 |
structureMetadataColumnList.each { att -> |
|
223 |
writer.writeStartElement("item") |
|
224 |
writer.writeCharacters(att+" : "+record.get(att).replaceAll("\n", ";")) |
|
225 |
writer.writeEndElement() // item |
|
226 |
writer.writeCharacters("\n") |
|
227 |
} |
|
228 |
writer.writeEndElement() // list |
|
229 |
writer.writeEndElement() // metadata |
|
230 |
writer.writeCharacters("\n") |
|
231 |
|
|
232 |
for (String att : structureContentColumnList) { |
|
233 |
if (att in prefixContentColumnList) { |
|
234 |
writeContentPrefix(text, att, writer) |
|
235 |
} else if (att in listContentColumnList) { |
|
236 |
writeContentList(text, att, writer) |
|
237 |
} else { |
|
238 |
writer.writeStartElement("p"); |
|
239 |
writer.writeAttribute("type", att.trim()) |
|
240 |
writer.writeAttribute("id", record.get(textIDColumn)) |
|
241 |
writer.writeCharacters("\n") // simple XML formating |
|
242 |
|
|
243 |
writer.writeStartElement("head") |
|
244 |
writer.writeStartElement("hi") |
|
245 |
writer.writeCharacters(att+" : ") |
|
246 |
writer.writeEndElement() // hi |
|
247 |
writer.writeEndElement() // head |
|
248 |
|
|
249 |
writer.writeCharacters(record.get(att)) // get textColumnList content |
|
250 |
writer.writeEndElement() // t |
|
251 |
writer.writeCharacters("\n") // simple XML formating |
|
252 |
} |
|
253 |
} |
|
254 |
|
|
255 |
writer.writeEndElement() // struct |
|
256 |
writer.writeCharacters("\n") // simple XML formating |
|
257 |
} |
|
258 |
|
|
259 |
writer.writeEndElement() // text |
|
260 |
writer.writeCharacters("\n") // simple XML formating |
|
261 |
writer.writeEndElement() // TEI |
|
262 |
writer.close() |
|
263 |
output.close() |
|
264 |
reader.close() |
|
265 |
} else { |
|
266 |
// error |
|
267 |
println "ERROR: '$id' text group with no text line" |
|
268 |
} |
|
269 |
} |
|
270 |
|
|
271 |
def writeContentList(def record, def att, def writer) { |
|
272 |
writer.writeCharacters("\n") |
|
273 |
found = false |
|
274 |
def value = record.get(att) |
|
275 |
value.findAll( /(?s)([^\n]+?)[\n]/ ).each { desc -> |
|
276 |
found = true |
|
277 |
writer.writeCharacters("\t") |
|
278 |
writer.writeStartElement("p") |
|
279 |
writer.writeAttribute("rend", "list") |
|
280 |
matches = (desc =~ /(?s)([^\n]+?)[\n]/) |
|
281 |
writer.writeCharacters(matches[0][1]) |
|
282 |
writer.writeEndElement() // p |
|
283 |
writer.writeCharacters("\n") |
|
284 |
} |
|
285 |
if (!found) { |
|
286 |
writer.writeCharacters("\t") |
|
287 |
writer.writeStartElement("p") |
|
288 |
writer.writeAttribute("rend", "no-list") |
|
289 |
writer.writeCharacters(value) |
|
290 |
writer.writeEndElement() // p |
|
291 |
writer.writeCharacters("\n") |
|
292 |
} |
|
293 |
} |
|
294 |
|
|
295 |
def writeContentPrefix(def record, def att, def writer) { |
|
296 |
writer.writeCharacters("\n") |
|
297 |
writer.writeStartElement("list") |
|
298 |
writer.writeAttribute("rend", "prefixes") |
|
299 |
writer.writeAttribute("type", "unordered") |
|
300 |
writer.writeCharacters("\n") |
|
301 |
found = false |
|
302 |
def value = record.get(att) |
|
303 |
value.findAll( /(?s)[A-Z]{3}:? *([^;\n]+?) +[;\n]/ ).each { desc -> |
|
304 |
found = true |
|
305 |
writer.writeCharacters("\t") |
|
306 |
writer.writeStartElement("item") |
|
307 |
matches = (desc =~ /(?s)([A-Z]{3}):? *([^;\n]+?) +[;\n]/) |
|
308 |
writer.writeAttribute("type", matches[0][1]) |
|
309 |
writer.writeStartElement("span") |
|
310 |
writer.writeCharacters(matches[0][1]+" ") |
|
311 |
writer.writeEndElement() // span |
|
312 |
writer.writeCharacters(matches[0][2]) |
|
313 |
writer.writeEndElement() // item |
|
314 |
writer.writeCharacters("\n") |
|
315 |
} |
|
316 |
if (!found) { |
|
317 |
writer.writeCharacters(value) |
|
318 |
} |
|
319 |
writer.writeEndElement() // list |
|
320 |
writer.writeCharacters("\n") |
|
321 |
} |
|
322 |
|
|
323 |
def writeMetadataDate(def record, def att, def writer) { |
|
324 |
String value = record.get(att) |
|
325 |
String att_normalized = AsciiUtils.buildAttributeId(att) |
|
326 |
matches = (value =~ /([0-9]{2})\/([0-9]{2})\/([0-9]{4})/) |
|
327 |
writer.writeAttribute(att_normalized+"jour", matches[0][1]) |
|
328 |
writer.writeAttribute(att_normalized+"joursemaine", new java.text.SimpleDateFormat('EEEE').format(Date.parse("dd/MM/yyyy", value))) |
|
329 |
writer.writeAttribute(att_normalized+"mois", matches[0][2]) |
|
330 |
writer.writeAttribute(att_normalized+"annee", matches[0][3]) |
|
331 |
writer.writeAttribute(att_normalized+"tri", matches[0][3]+"-"+matches[0][2]+"-"+matches[0][1]) |
|
332 |
} |
|
333 |
|
|
334 |
/* |
|
335 |
String name = inputFile.getName() |
|
336 |
int idx = name.lastIndexOf(".") |
|
337 |
if (idx > 0) name = name.substring(0, idx) |
|
338 |
*/ |
Formats disponibles : Unified diff