Statistics
| Revision:

root / tmp / org.txm.groovy.core / src / groovy / org / txm / scripts / importer / transcriber / pager.groovy @ 2361

History | View | Annotate | Download (26.7 kB)

1
// Copyright © 2010-2013 ENS de Lyon.
2
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
3
// Lyon 2, University of Franche-Comté, University of Nice
4
// Sophia Antipolis, University of Paris 3.
5
//
6
// The TXM platform is free software: you can redistribute it
7
// and/or modify it under the terms of the GNU General Public
8
// License as published by the Free Software Foundation,
9
// either version 2 of the License, or (at your option) any
10
// later version.
11
//
12
// The TXM platform is distributed in the hope that it will be
13
// useful, but WITHOUT ANY WARRANTY; without even the implied
14
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15
// PURPOSE. See the GNU General Public License for more
16
// details.
17
//
18
// You should have received a copy of the GNU General
19
// Public License along with the TXM platform. If not, see
20
// http://www.gnu.org/licenses.
21
//
22
//
23
//
24
// $LastChangedDate:$
25
// $LastChangedRevision:$
26
// $LastChangedBy:$
27
//
28
package org.txm.scripts.importer.transcriber
29

    
30
import java.io.File;
31
import java.util.ArrayList;
32

    
33
import javax.xml.stream.*
34

    
35
import org.txm.importer.ApplyXsl2
36
import org.txm.metadatas.MetadataGroup
37
import org.txm.metadatas.Metadatas
38
import org.txm.utils.io.FileCopy;
39

    
40

    
41
// TODO: Auto-generated Javadoc
42
/** Build Discours corpus simple edition from a xml-tei.
43
 * 
44
 *  @author mdecorde
45
 *  
46
 */
47
class pager {
48
        
49
        boolean SIMPLE_TOOLTIP = false; // show less properties in word tooltips
50
        String ENQ_HIGHLIGHT_ELEMENT = "b"
51
        
52
        List<String> NoSpaceBefore;
53
        
54
        /** The No space after. */
55
        List<String> NoSpaceAfter;
56
        
57
        /** The pages. */
58
        def pages = [];
59
        def indexes = [];
60
        
61
        /** The wordcount. */
62
        int wordcount = 0;
63
        
64
        /** The pagecount. */
65
        int pagecount = 0;
66
        
67
        /** The wordmax. */
68
        int wordmax = 10;
69
        
70
        /** The wordid. */
71
        String wordid;
72
        
73
        /** The first word. */
74
        boolean firstWord = true;
75
        
76
        /** The wordvalue. */
77
        String wordvalue;
78
        
79
        /** The interpvalue. */
80
        String interpvalue;
81
        
82
        /** The lastword. */
83
        String lastword = " ";
84
        
85
        /** The wordtype. */
86
        String wordtype;
87
        
88
        /** The flagform. */
89
        boolean flagform = false;
90
        
91
        /** The flaginterp. */
92
        boolean flaginterp = false;
93
        
94
        boolean flagcomment = false;
95
        
96
        /** The url. */
97
        private def url;
98
        
99
        /** The input data. */
100
        private def inputData;
101
        
102
        /** The factory. */
103
        private def factory;
104
        
105
        /** The parser. */
106
        private XMLStreamReader parser;
107
        
108
        /** The writer. */
109
        XMLStreamWriter writer;
110
        BufferedOutputStream output;
111
        
112
        File txmfile;
113
        
114
        File outfile;
115
        
116
        String corpusname ="";
117
        String cuttingTag = "pb"
118
        String txtname;
119
        File htmlDir;
120
        File defaultDir;
121
        Metadatas metadatas;
122
        
123
        def interviewers = [];
124
        def eventTranslations = ["^^":"mot inconnu", "?":"orthographe incertaine",
125
                "()":"rupture de syntaxe", "b":"bruit indéterminé",
126
                "*":"mot corrigé",
127
                "bb":"bruit de bouche", "bg":"bruit de gorge",
128
                "ch":"voix chuchotée", "conv":"conversations de fond",
129
                "e":"expiration", "i":"inspiration",
130
                "mic":"bruits micro", "n":"reniflement",
131
                "nontrant":"non transcrit", "pap":"froissement de papiers",
132
                "pf":"souffle", "pi":"inintelligible",
133
                "pif":"inaudible", "r":"respiration",
134
                "rire":"rire du locuteur", "shh":"soufle électrique",
135
                "sif":"sifflement du locuteur", "tx":"toux"];
136
        String currentTime = "";
137
        boolean bold = false;
138
        int writenLength = 0;
139
        boolean spokenTurn = false;
140
        boolean firstSync = false;
141
        boolean firstWho = false;
142
        /**
143
         * Instantiates a new pager.
144
         *
145
         * @param infile the infile
146
         * @param outfile the outfile
147
         * @param NoSpaceBefore the no space before
148
         * @param NoSpaceAfter the no space after
149
         * @param max the max
150
         * @param metadatas the metadatas
151
         */
152
        pager(File txmfile, File htmlDir, String txtname, List<String> NoSpaceBefore,
153
        List<String> NoSpaceAfter, int max, String corpusname, String cuttingTag, Metadatas metadatas) {
154
                this.metadatas = metadatas
155
                this.wordmax = max;
156
                this.cuttingTag = cuttingTag;
157
                this.corpusname = corpusname;
158
                this.NoSpaceBefore = NoSpaceBefore;
159
                this.NoSpaceAfter = NoSpaceAfter;
160
                this.url = txmfile.toURI().toURL();
161
                this.txmfile = txmfile;
162
                this.htmlDir = htmlDir;
163
                this.txtname = txtname;
164
                
165
                inputData = url.openStream();
166
                factory = XMLInputFactory.newInstance();
167
                parser = factory.createXMLStreamReader(inputData);
168
                
169
                defaultDir = new File(htmlDir, "default")
170
                defaultDir.mkdir()
171
                new File(htmlDir, "onepage").mkdir()
172
                outfile = new File(htmlDir, "onepage/${txtname}.html");
173
                createOutput(outfile)
174
                
175
                try {
176
                        process();
177
                } catch(Exception e) {
178
                        org.txm.utils.logger.Log.printStackTrace(e);
179
                        if (writer != null) {
180
                                writer.close();
181
                                output.close();
182
                        }
183
                }
184
        }
185
        
186
        /**
187
         * Creates the output.
188
         *
189
         * @param outfile the outfile
190
         * @return true, if successful
191
         */
192
        private boolean createOutput(File outfile) {
193
                try {
194
                        //println "write html in : "+outfile
195
                        XMLOutputFactory outfactory = XMLOutputFactory.newInstance();
196
                        output = new BufferedOutputStream(new FileOutputStream(outfile))
197
                        writer = outfactory.createXMLStreamWriter(output, "UTF-8");//create a new file
198
                        
199
                        return true;
200
                } catch (Exception e) {
201
                        System.out.println(e.getLocalizedMessage());
202
                        return false;
203
                }
204
        }
205
        
206
        /** The events. */
207
        List<String> events = [];
208
        String previousEvent = "", nextEvent = "";
209
        /**
210
         * Process.
211
         */
212
        void process() {
213
                
214
                String previousElem = "";
215
                boolean parolesRaportees = false;
216
                boolean firstWord = true;
217
                boolean shouldBreak = false;
218
                boolean overlapping = false;
219
                int nbBreak = 0;
220
                String previousSPK;
221
                String localname = "";
222
                ArrayList<String> whos = [];
223
                HashMap<String, String> speakers = new HashMap<String, String>();
224
                HashMap<String, String> topics = new HashMap<String, String>();
225
                
226
                writer.writeStartDocument("UTF-8","1.0");
227
                writer.writeStartElement("html");
228
                //<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
229
                writer.writeStartElement("meta");
230
                writer.writeAttribute("http-equiv", "Content-Type");
231
                writer.writeAttribute("content", "text/html");
232
                writer.writeAttribute("charset", "UTF-8");
233
                writer.writeEndElement(); // meta
234
                writer.writeStartElement("head");
235
                //<link rel="stylesheet" type="text/css" href="class.css" />
236
                writer.writeStartElement("link");
237
                writer.writeAttribute("rel", "stylesheet");
238
                writer.writeAttribute("type", "text/css");
239
                writer.writeAttribute("href", "transcriber.css");
240
                writer.writeEndElement(); // link
241
                writer.writeStartElement("link");
242
                writer.writeAttribute("rel", "stylesheet");
243
                writer.writeAttribute("type", "text/css");
244
                writer.writeAttribute("href", corpusname+".css");
245
                writer.writeEndElement(); // link
246
                writer.writeEndElement(); // head
247
                
248
                nbBreak++
249
                writer.writeStartElement("body");
250
                writer.writeAttribute("class", "txmeditionpage")
251
                writer.writeEmptyElement("pb");
252
                writer.writeAttribute("id", ""+nbBreak);
253
                pages << new File(defaultDir, "${txtname}_${nbBreak}.html")
254
                
255
                for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) {
256
                        switch (event) {
257
                                case XMLStreamConstants.START_ELEMENT:
258
                                        localname = parser.getLocalName();
259
                                        switch (localname) {
260
                                                case "text":
261
                                                
262
                                                        writer.writeStartElement("h1");
263
                                                        writer.writeAttribute("class", "title");
264
                                                        String title = parser.getAttributeValue(null, "title");
265
                                                
266
                                                        if (title != null) {
267
                                                                writer.writeCharacters(title);
268
                                                        } else {
269
                                                                writer.writeCharacters("Transcription "+txmfile.getName().substring(0, txmfile.getName().length() - 4));
270
                                                        }
271
                                                
272
                                                        writeMediaAccess("0.0")
273
                                                
274
                                                        writer.writeEndElement(); // h1
275
                                                
276
                                                        String subtitle = parser.getAttributeValue(null, "subtitle");
277
                                                        if (subtitle != null && subtitle.length() > 0) {
278
                                                                writer.writeStartElement("h2");
279
                                                                writer.writeAttribute("class", "subtitle");
280
                                                                writer.writeCharacters(subtitle);
281
                                                                writer.writeEndElement(); // h2
282
                                                        }
283
                                                
284
                                                        writer.writeStartElement("table");
285
                                                        writer.writeAttribute("class", "transcription-table");
286
                                                        boolean grey = false;
287
                                                        for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
288
                                                                String name = parser.getAttributeName(i);
289
                                                                String value = parser.getAttributeValue(i);
290
                                                                
291
                                                                if ("title" == name) {
292
                                                                        continue; // ignore "title" metadata
293
                                                                }
294
                                                                
295
                                                                grey = !grey;
296
                                                                writer.writeStartElement("tr");
297
                                                                if (grey) {
298
                                                                        writer.writeAttribute("style","background-color:lightgrey;")
299
                                                                }
300
                                                                
301
                                                                if (value != null) {
302
                                                                        writer.writeStartElement("td");
303
                                                                        writer.writeCharacters(name);
304
                                                                        writer.writeEndElement(); // td
305
                                                                        writer.writeStartElement("td");
306
                                                                        writer.writeCharacters(value);
307
                                                                        writer.writeEndElement(); // td
308
                                                                }
309
                                                                //get enqueteur to style their names
310
                                                                if (name.startsWith("enq")) {
311
                                                                        interviewers.add(value)
312
                                                                }
313
                                                                writer.writeEndElement(); // tr
314
                                                        }
315
                                                        writer.writeEndElement(); // table
316
                                                //                                                        }
317
                                                        break;
318
                                                case "Topics":
319
                                                /*writer.writeStartElement("h2");
320
                                         writer.writeCharacters("Topics");
321
                                         writer.writeEndElement();
322
                                         writer.writeStartElement("ul");
323
                                         */
324
                                                        break;
325
                                                case "Topic":
326
                                                        topics.put(parser.getAttributeValue(null,"id"), parser.getAttributeValue(null,"desc"))
327
                                                /*writer.writeStartElement("li");
328
                                         writer.writeCharacters(parser.getAttributeValue(null,"desc"));
329
                                         writer.writeStartElement("ul");
330
                                         for(int i = 0 ; i < parser.getAttributeCount() ; i++)
331
                                         {
332
                                         if(parser.getAttributeLocalName(i) != "desc")
333
                                         {
334
                                         writer.writeStartElement("li");
335
                                         writer.writeCharacters(parser.getAttributeLocalName(i)+": "+parser.getAttributeValue(i));
336
                                         writer.writeEndElement();
337
                                         }
338
                                         }
339
                                         writer.writeEndElement();
340
                                         writer.writeEndElement();
341
                                         */
342
                                                        break;
343
                                                case "Speakers":
344
                                                /*writer.writeStartElement("h2");
345
                                         writer.writeCharacters("Speakers");
346
                                         writer.writeEndElement();
347
                                         writer.writeStartElement("ul");*/
348
                                                        break;
349
                                                case "Speaker":
350
                                                        whos.add(parser.getAttributeValue(null,"name"));
351
                                                        speakers.put(parser.getAttributeValue(null,"id"), parser.getAttributeValue(null,"name"))
352
                                                /*writer.writeStartElement("li");
353
                                         writer.writeStartElement("ul");
354
                                         writer.writeCharacters(parser.getAttributeValue(null,"name"));
355
                                         for(int i = 0 ; i < parser.getAttributeCount() ; i++)
356
                                         {
357
                                         if(parser.getAttributeLocalName(i) != "name")
358
                                         {
359
                                         writer.writeStartElement("li");
360
                                         writer.writeCharacters(parser.getAttributeLocalName(i)+": "+parser.getAttributeValue(i));
361
                                         writer.writeEndElement();
362
                                         }
363
                                         }
364
                                         writer.writeEndElement();
365
                                         writer.writeEndElement();*/
366
                                                        break;
367
                                                case "Comment":
368
                                                        spokenTurn = true;
369
                                                        writenLength++;
370
                                                        writer.writeStartElement("span");
371
                                                        writer.writeAttribute("class", "comment");
372
                                                        writer.writeCharacters(" ["+parser.getAttributeValue(0)+"] ");
373
                                                        writer.writeEndElement();
374
                                                        flagcomment = true;
375
                                                        break;
376
                                                case "div":
377
                                                
378
                                                        nbBreak++
379
                                                        writer.writeEmptyElement("pb");
380
                                                        writer.writeAttribute("id", ""+nbBreak);
381
                                                        writer.writeCharacters("\n");
382
                                                
383
                                                        pages << new File(defaultDir, "${txtname}_${nbBreak}.html")
384
                                                        indexes << wordid
385
                                                
386
                                                        wordcount = 0;
387
                                                        shouldBreak = false;
388
                                                
389
                                                        writer.writeStartElement("div")
390
                                                        writer.writeAttribute("class", "section")
391
                                                
392
                                                        String type = parser.getAttributeValue(null, "type")
393
                                                        writer.writeAttribute("type", ""+type)
394
                                                
395
                                                        String desc = parser.getAttributeValue(null, "topic")
396
                                                
397
                                                        if (type != null && type.length() > 0) {
398
                                                                writer.writeStartElement("h1");
399
                                                                writer.writeAttribute("class", "section-title")
400
                                                                writer.writeCharacters(type);
401
                                                                
402
                                                                if (parser.getAttributeValue(null,"startTime") != null) {
403
                                                                        writeMediaAccess(parser.getAttributeValue(null,"startTime"))
404
                                                                }
405
                                                                
406
                                                                writer.writeEndElement(); // h1
407
                                                        }
408
                                                
409
                                                        if (desc != null && desc.length() > 0) {
410
                                                                writer.writeStartElement("h2");
411
                                                                writer.writeAttribute("class", "section-desc")
412
                                                                writer.writeCharacters(desc)
413
                                                                writer.writeEndElement(); // h2
414
                                                        }
415
                                                
416
                                                        def metadata = new LinkedHashMap<String, String>() // temp to store attributes
417
                                                        def metadataGroups = ["metadata":[]] // default metadata group
418
                                                        def metadataDeclared = false
419
                                                        if (parser.getAttributeValue(null, "metadata") != null && parser.getAttributeValue(null, "metadata_groups") != null) {
420
                                                                def l1 = parser.getAttributeValue(null, "metadata").split("\\|");
421
                                                                def l2 = parser.getAttributeValue(null, "metadata_groups").split("\\|");
422
                                                                for (int i = 0 ; i < l1.size() ; i++) {
423
                                                                        def m = l1[i]
424
                                                                        def g = l2[i]
425
                                                                        metadata[m] = "" // forcing order of metadata by pre-declaring
426
                                                                        if (!metadataGroups.containsKey(g)) metadataGroups[g] = []
427
                                                                        metadataGroups[g] << m // declaring a metadata type
428
                                                                }
429
                                                                metadataDeclared = true
430
                                                        }
431
                                                
432
                                                        for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
433
                                                                String name = parser.getAttributeLocalName(i)
434
                                                                if (!"type".equals(name)
435
                                                                && !"topic".equals(name)
436
                                                                && !"startTime".equals(name)
437
                                                                && !"endTime".equals(name)) {
438
                                                                        if (metadataDeclared && !metadata.containsKey(name)) {
439
                                                                                continue; // ignoring metadata since not in declared metadata
440
                                                                        } else {
441
                                                                                metadataGroups["metadata"] << name
442
                                                                        }
443
                                                                        
444
                                                                        metadata[name] = parser.getAttributeValue(i)
445
                                                                }
446
                                                        }
447
                                                
448
                                                        writer.writeStartElement("p")
449
                                                        writer.writeAttribute("class", "section-all-metadata");
450
                                                        for (String groupName : metadataGroups.keySet()) {
451
                                                                def group = metadataGroups[groupName]
452
                                                                if (group.size() > 0) {
453
                                                                        if (groupName.equals("text")) {
454
                                                                                writer.writeStartElement("p")
455
                                                                                writer.writeAttribute("class", "section-"+groupName);
456
                                                                                for (String k : group) {
457
                                                                                        writer.writeStartElement("p")
458
                                                                                        writer.writeAttribute("class", ""+groupName)
459
                                                                                        writer.writeStartElement("h4")
460
                                                                                        writer.writeCharacters(k)
461
                                                                                        writer.writeEndElement() // h4
462
                                                                                        writer.writeCharacters(metadata[k])
463
                                                                                        writer.writeEndElement() // p
464
                                                                                }
465
                                                                        } else {
466
                                                                                writer.writeStartElement("ul")
467
                                                                                writer.writeAttribute("class", "section-"+groupName);
468
                                                                                for (String k : group) {
469
                                                                                        writer.writeStartElement("li")
470
                                                                                        writer.writeAttribute("class", ""+groupName)
471
                                                                                        writer.writeCharacters(""+k+": "+metadata[k])
472
                                                                                        writer.writeEndElement() // li
473
                                                                                }
474
                                                                        }
475
                                                                        
476
                                                                        writer.writeEndElement(); // ul or p
477
                                                                }
478
                                                        }
479
                                                        writer.writeEndElement(); // p
480
                                                
481
                                                        break;
482
                                                case "sp":
483
                                                        endBoldIfNeeded()
484
                                                        firstSync = true;
485
                                                        firstWho = true;
486
                                                        spokenTurn = false;
487
                                                        overlapping = false
488
                                                
489
                                                        writer.writeStartElement("p");
490
                                                        writer.writeAttribute("class", "turn");
491
                                                
492
                                                        overlapping = ("true" == parser.getAttributeValue(null,"overlap"))
493
                                                        String spid = parser.getAttributeValue(null,"speaker");
494
                                                
495
                                                        whos = []
496
                                                        if (overlapping) {
497
                                                                writer.writeEmptyElement("br");
498
                                                                writeSpeaker(parser.getAttributeValue(null,"speaker"), false)
499
                                                                
500
                                                                writer.writeEmptyElement("br");
501
                                                                whos = spid.split(" ")
502
                                                        }
503
                                                
504
                                                        break;
505
                                                case "u":
506
                                                        writer.writeCharacters("\n");
507
                                                        this.currentTime = parser.getAttributeValue(null,"time");
508
                                                
509
                                                        if (previousElem == "u" && writenLength == 0) { // if previous u had no words, it was a silence
510
                                                                writer.writeStartElement("span");
511
                                                                writer.writeAttribute("class", "event");
512
                                                                writer.writeCharacters("[silence]");
513
                                                                writer.writeEndElement(); // span
514
                                                                writer.writeEmptyElement("br");
515
                                                        }
516
                                                
517
                                                        String spk = parser.getAttributeValue(null, "spk")
518
                                                        if (spk != null && spk != previousSPK) {
519
                                                                endBoldIfNeeded()
520
                                                                writer.writeEmptyElement("br");
521
                                                                writeSpeaker(parser.getAttributeValue(null, "spk"), overlapping)
522
                                                                startBoldIfNeeded()
523
                                                        }
524
                                                
525
                                                        writeCurrentTime()
526
                                                        previousSPK = spk
527
                                                
528
                                                //                                                        writenLength = 0;
529
                                                /*writer.writeStartElement("span");
530
                                         writer.writeAttribute("class", "sync");
531
                                         writer.writeCharacters("["+parser.getAttributeValue(null,"time")+"]");
532
                                         writer.writeEndElement();*/
533
                                                
534
                                                        break;
535
                                                case "event":
536
                                                        spokenTurn = true;
537
                                                        writenLength++;
538
                                                        String desc = parser.getAttributeValue(null,"desc");
539
                                                        desc = translateEvent(desc);
540
                                                        String type = parser.getAttributeValue(null,"type");
541
                                                        if (desc.equals("paroles rapportées")) {
542
                                                                if (parser.getAttributeValue(null, "extent") == "end")
543
                                                                        writer.writeCharacters("» ");
544
                                                                else if (parser.getAttributeValue(null, "extent") == "begin")
545
                                                                        writer.writeCharacters(" «");
546
                                                        } else {
547
                                                                writer.writeStartElement("span");
548
                                                                writer.writeAttribute("class", "event");
549
                                                                if (parser.getAttributeValue(null, "extent") == "end") {
550
                                                                        writer.writeCharacters(" <"+desc+"] ");
551
                                                                        if(events.size() > 0)
552
                                                                                events.remove(events.size()-1)
553
                                                                }
554
                                                                else if (parser.getAttributeValue(null, "extent") == "begin")         {
555
                                                                        
556
                                                                        writer.writeCharacters(" ["+desc+"> ");
557
                                                                        events.add(desc)
558
                                                                }
559
                                                                else if (parser.getAttributeValue(null, "extent") == "previous") {
560
                                                                        if(parser.getAttributeValue(null, "type") == "pronounce")
561
                                                                                writer.writeCharacters("_["+desc+"] ");
562
                                                                        else
563
                                                                                writer.writeCharacters("_["+desc+"] ");
564
                                                                        previousEvent = desc;
565
                                                                }
566
                                                                else if (parser.getAttributeValue(null, "extent") == "next") {
567
                                                                        writer.writeCharacters(" ["+desc+"]_");
568
                                                                        nextEvent = desc
569
                                                                }
570
                                                                else
571
                                                                        writer.writeCharacters(" ["+desc+"] ");
572
                                                                writer.writeEndElement(); // span@class=event
573
                                                        }
574
                                                        break;
575
                                                case "w":
576
                                                        for(int i = 0 ; i < parser.getAttributeCount() ; i++)
577
                                                                if(parser.getAttributeLocalName(i) == "id") {
578
                                                                        wordid = (parser.getAttributeValue(i));
579
                                                                        break;
580
                                                                }
581
                                                
582
                                                        wordcount++;
583
                                                        if (wordcount >= wordmax) {
584
                                                                shouldBreak = true;
585
                                                        }
586
                                                
587
                                                        if (firstWord) {
588
                                                                indexes << wordid
589
                                                                firstWord = false;
590
                                                        }
591
                                                
592
                                                        break;
593
                                                
594
                                                case "ana":
595
                                                
596
                                                        String type = parser.getAttributeValue(null,"type").substring(1);
597
                                                        if (SIMPLE_TOOLTIP) {
598
                                                                if (type.contains("lemma") || type.contains("pos")) {
599
                                                                        flaginterp=true;
600
                                                                        interpvalue+=", ";
601
                                                                }
602
                                                        } else {
603
                                                                flaginterp=true;
604
                                                                interpvalue+=", "+type+"="
605
                                                        }
606
                                                        break;
607
                                                
608
                                                case "form":
609
                                                        wordvalue="";
610
                                                        interpvalue ="";
611
                                                        flagform=true;
612
                                                        break;
613
                                        }
614
                                        previousElem = localname;
615
                                        break;
616
                                
617
                                case XMLStreamConstants.END_ELEMENT:
618
                                        localname = parser.getLocalName();
619
                                        switch(localname) {
620
                                                case "text":
621
                                                        break;
622
                                                case "Topics":
623
                                                //writer.writeEndElement();
624
                                                        break;
625
                                                case "Topic":
626
                                                        break;
627
                                                case "Speakers":
628
                                                //println "Speakers: "+speakers
629
                                                //writer.writeEndElement();
630
                                                        break;
631
                                                case "Speaker":
632
                                                        break;
633
                                                
634
                                                case "div":
635
                                                //writer.writeCharacters("}");
636
                                                
637
                                                        writer.writeEndElement(); // div
638
                                                        writer.writeCharacters("\n");
639
                                                        break;
640
                                                case "sp":
641
                                                //println "CLOSING: "+parser.getLocalName()
642
                                                        endBoldIfNeeded()
643
                                                        if (!spokenTurn) {
644
                                                                writer.writeStartElement("span");
645
                                                                writer.writeAttribute("class", "event");
646
                                                                writer.writeCharacters("[silence]");
647
                                                                writer.writeEndElement();
648
                                                                writer.writeEmptyElement("br");
649
                                                        }
650
                                                
651
                                                        writer.writeEndElement(); // p
652
                                                
653
                                                        if (shouldBreak) {
654
                                                                nbBreak++
655
                                                                writer.writeEmptyElement("pb");
656
                                                                writer.writeAttribute("id", ""+nbBreak);
657
                                                                writer.writeCharacters("\n");
658
                                                                
659
                                                                pages << new File(defaultDir, "${txtname}_${nbBreak}.html")
660
                                                                indexes << wordid
661
                                                                
662
                                                                wordcount = 0;
663
                                                                shouldBreak = false;
664
                                                        }
665
                                                        writer.writeCharacters("\n");
666
                                                        break;
667
                                                case "u":
668
                                                //writer.writeEndElement() // span@class=u
669
                                                //writer.writeEmptyElement("br");
670
                                                //if (overlapping) writer.writeEndElement(); // b
671
                                                        break;
672
                                                case "event":
673
                                                        break;
674
                                                case "form":
675
                                                        flagform = false
676
                                                        break;
677
                                                case "ana":
678
                                                        flaginterp = false
679
                                                        break;
680
                                                case "w":
681
                                                        writenLength++;
682
                                                        spokenTurn = true;
683
                                                        int l = lastword.length();
684
                                                        String endOfLastWord = "";
685
                                                        if(l > 0)
686
                                                                endOfLastWord = lastword.subSequence(l-1, l);
687
                                                
688
                                                        if(interpvalue != null)
689
                                                                interpvalue = interpvalue.replace("\"","&quot;");
690
                                                        if(events.size() > 0)
691
                                                                interpvalue = interpvalue.replace("event=", "event="+events.toString().replace("\"","&quot;")); // remove ", "
692
                                                
693
                                                        if(nextEvent.length() > 0)
694
                                                        {
695
                                                                interpvalue = interpvalue.replace("event=", "event="+nextEvent+", ")
696
                                                                nextEvent = ""
697
                                                        }
698
                                                        interpvalue = interpvalue.replace("=, ","='', "); // add '' to empty interp value
699
                                                        if (interpvalue.startsWith(", ")) interpvalue = interpvalue.substring(2)
700
                                                //                                                        println "** SPACE TEST"
701
                                                //                                                        println "NoSpaceBefore: "+NoSpaceBefore+" contains ? "+wordvalue
702
                                                //                                                        println "NoSpaceAfter: "+NoSpaceAfter+" contains ? "+lastword
703
                                                //                                                        println "wordvalue starts with '-' ? "+wordvalue
704
                                                //                                                        println "NoSpaceAfter: "+NoSpaceAfter+" contains endOfLastWord ? "+endOfLastWord
705
                                                        if(NoSpaceBefore.contains(wordvalue) ||
706
                                                        NoSpaceAfter.contains(lastword) ||
707
                                                        wordvalue.startsWith("-") ||
708
                                                        NoSpaceAfter.contains(endOfLastWord)) {
709
                                                                //                                                                println " NO SPACE"
710
                                                        } else {
711
                                                                //                                                                println " SPACE"
712
                                                                writer.writeCharacters(" ");
713
                                                        }
714
                                                
715
                                                        if (interpvalue.contains("rapp1")) {
716
                                                                writer.writeCharacters(" «");
717
                                                        } else if (wordvalue == "\"") {
718
                                                                // don't write this char
719
                                                        } else {
720
                                                                writer.writeStartElement("span");
721
                                                                writer.writeAttribute("class", "word");
722
                                                                writer.writeAttribute("title", interpvalue);
723
                                                                writer.writeAttribute("id", wordid);
724
                                                                writer.writeCharacters(wordvalue);
725
                                                                writer.writeEndElement();
726
                                                        }
727
                                                        if (interpvalue.contains("orth")) {
728
                                                                writer.writeStartElement("span");
729
                                                                writer.writeAttribute("class", "event");
730
                                                                writer.writeCharacters("_[?]");
731
                                                                writer.writeEndElement();
732
                                                        }
733
                                                        if (interpvalue.contains("corr")) {
734
                                                                writer.writeStartElement("span");
735
                                                                writer.writeAttribute("class", "event");
736
                                                                writer.writeCharacters("_[!]");
737
                                                                writer.writeEndElement();
738
                                                        }
739
                                                
740
                                                        if (interpvalue.contains("rapp2")) {
741
                                                                writer.writeCharacters(" » ");
742
                                                        }
743
                                                
744
                                                        lastword=wordvalue;
745
                                                        break;
746
                                        }
747
                                
748
                                        break;
749
                                
750
                                case XMLStreamConstants.CHARACTERS:
751
                                        if(flagform)
752
                                                if(parser.getText().length() > 0)
753
                                                        wordvalue+=(parser.getText().trim());
754
                                        if(flaginterp)
755
                                                if(parser.getText().length() > 0)
756
                                                        interpvalue+=(parser.getText().trim());
757
                                        break;
758
                        }
759
                }
760
                writer.writeEndElement(); // body
761
                
762
                writer.writeEmptyElement("pb");
763
                nbBreak++
764
                writer.writeAttribute("id", ""+nbBreak);
765
                
766
                writer.writeEndElement(); // html
767
                writer.close();
768
                output.close();
769
                if (parser != null) parser.close();
770
                if (inputData != null) inputData.close();
771
                
772
                File txmhome = new File(org.txm.Toolbox.getTxmHomePath());
773
                File xlsDir  = new File(txmhome, "xsl");
774
                File xslfile = new File(xlsDir,"breakByMilestone.xsl");
775
                if (!xslfile.exists()) {
776
                        println ""
777
                }
778
                //                println "xsl: "+xslfile
779
                //                println "html: "+outfile
780
                //                println "pages: "+pages
781
                //                println "words: "+indexes
782
                
783
                
784
                if (pages.size() > 1) {
785
                        for (int i = 1 ; i < nbBreak ; i++) {
786
                                ApplyXsl2 a = new ApplyXsl2(xslfile.getAbsolutePath());
787
                                String[] params = ["pbval1", i, "pbval2", i+1];
788
                                
789
                                File resultfile = pages[i-1]
790
                                //println "BBmilestones: "+i+" "+(i+1)+" in file "+resultfile
791
                                //println "process $outfile -> $resultfile"
792
                                a.process(outfile.getAbsolutePath(), resultfile.getAbsolutePath(), params);
793
                        }
794
                } else {
795
                        File page = pages[0]
796
                        FileCopy.copy(outfile, page)
797
                }
798
        }
799
        
800
        private void writeCurrentTime() {
801
                writer.writeStartElement("span");
802
                writer.writeAttribute("class", "sync");
803
                writer.writeCharacters(currentTime);
804
                
805
                writeMediaAccess(currentTime)
806
                
807
                writer.writeEndElement(); // span
808
        }
809
        
810
        private void writeMediaAccess(def time) {
811
                writer.writeCharacters(" ");
812
                writer.writeStartElement("a");
813
                writer.writeAttribute("onclick", "txmcommand('id', 'org.txm.backtomedia.commands.function.BackToMedia', 'corpus', '"+corpusname+"', 'text', '"+txtname+"', 'time', '"+time+"')");
814
                writer.writeAttribute("style", "cursor: pointer;")
815
                writer.writeAttribute("class", "play-media")
816
                writer.writeCharacters("");
817
                writer.writeEndElement(); // a
818
        }
819
        
820
        private void writeSpeaker(String spk, boolean overlapping) {
821
                
822
                writer.writeStartElement("span");
823
                writer.writeAttribute("class", "spk");
824
                if(interviewers.contains(spk))
825
                        bold = true;
826
                else
827
                        bold = false;
828
                spk = spk.replaceAll('^([^0-9]*)([0-9]+)$', '$1 $2');
829
                if (overlapping) writer.writeCharacters("// ")
830
                
831
                writer.writeCharacters(spk+": ")
832
                
833
                writer.writeEndElement(); // span@class=spk
834
        }
835
        
836
        private String translateEvent(String desc) {
837
                if(eventTranslations.containsKey(desc))
838
                        return eventTranslations.get(desc);
839
                else
840
                        return desc;
841
        }
842
        
843
        boolean boldOpenned = false;
844
        private void startBoldIfNeeded() {
845
                if (bold) {
846
                        writer.writeStartElement(ENQ_HIGHLIGHT_ELEMENT);
847
                        boldOpenned = true;
848
                }
849
        }
850
        
851
        private endBoldIfNeeded() {
852
                if (boldOpenned) {
853
                        //                        println "CLOSE BOLD"
854
                        writer.writeEndElement(); // b
855
                        boldOpenned = false;
856
                }
857
        }
858
        
859
        //        private String formatTime(float time, boolean doshort)
860
        //        {
861
        //                String rez = " ";
862
        //                //                if(time >= 3600) // >= 1h
863
        //                //                {
864
        //                float h = time / 3600;
865
        //                time = time%3600;
866
        //                float min = (time%3600) / 60;
867
        //                int sec = (int)time%60;
868
        //
869
        //                if(min < 10)
870
        //                        rez = ""+(int)h+":0"+(int)min;//+":"+time%60;
871
        //                else
872
        //                        rez = ""+(int)h+":"+(int)min;//+":"+time%60;
873
        //                //if (!doshort)
874
        //                if (sec > 9)
875
        //                        rez += ":"+(int)time%60;
876
        //                else
877
        //                        rez += ":0"+(int)time%60;
878
        //                //                }
879
        //                //                else if(time >= 60) // >= 1min
880
        //                //                {
881
        //                //                        int min = time/60;
882
        //                //                        if(min < 10)
883
        //                //                                rez = "00:0"+min;//+":"+time%60;
884
        //                //                        else
885
        //                //                                rez = "00:"+min;//+":"+time%60;
886
        //                //                        if(!doshort)
887
        //                //                                rez += ":"+(int)time%60;
888
        //                //                }
889
        //                //                else // < 60
890
        //                //                {
891
        //                //                        if(time < 10)
892
        //                //                                return " 0:0"+time;
893
        //                //                        else
894
        //                //                                return " 0:"+time;
895
        //                //                }
896
        //                return rez;
897
        //        }
898
        
899
        /**
900
         * Gets the page files.
901
         *
902
         * @return the page files
903
         */
904
        public ArrayList<File> getPageFiles() {
905
                return pages;
906
        }
907
        
908
        /**
909
         * Gets the idx.
910
         *
911
         * @return the idx
912
         */
913
        public ArrayList<String> getIdx() {
914
                return indexes;
915
        }
916
}