Statistics
| Revision:

root / tmp / org.txm.groovy.core / src / groovy / org / txm / scripts / importer / transcriber / pager.groovy @ 2369

History | View | Annotate | Download (26.9 kB)

1
// Copyright © 2010-2013 ENS de Lyon.
2
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
3
// Lyon 2, University of Franche-Comté, University of Nice
4
// Sophia Antipolis, University of Paris 3.
5
//
6
// The TXM platform is free software: you can redistribute it
7
// and/or modify it under the terms of the GNU General Public
8
// License as published by the Free Software Foundation,
9
// either version 2 of the License, or (at your option) any
10
// later version.
11
//
12
// The TXM platform is distributed in the hope that it will be
13
// useful, but WITHOUT ANY WARRANTY; without even the implied
14
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15
// PURPOSE. See the GNU General Public License for more
16
// details.
17
//
18
// You should have received a copy of the GNU General
19
// Public License along with the TXM platform. If not, see
20
// http://www.gnu.org/licenses.
21
//
22
//
23
//
24
// $LastChangedDate:$
25
// $LastChangedRevision:$
26
// $LastChangedBy:$
27
//
28
package org.txm.scripts.importer.transcriber
29

    
30
import java.io.File;
31
import java.util.ArrayList;
32

    
33
import javax.xml.stream.*
34

    
35
import org.txm.importer.ApplyXsl2
36
import org.txm.metadatas.MetadataGroup
37
import org.txm.metadatas.Metadatas
38
import org.txm.utils.io.FileCopy;
39

    
40

    
41
// TODO: Auto-generated Javadoc
42
/** Build Discours corpus simple edition from a xml-tei.
43
 * 
44
 *  @author mdecorde
45
 *  
46
 */
47
class pager {
48
        
49
        boolean SIMPLE_TOOLTIP = false; // show less properties in word tooltips
50
        String ENQ_HIGHLIGHT_ELEMENT = "b"
51
        
52
        List<String> NoSpaceBefore;
53
        
54
        /** The No space after. */
55
        List<String> NoSpaceAfter;
56
        
57
        /** The pages. */
58
        def pages = [];
59
        def indexes = [];
60
        
61
        /** The wordcount. */
62
        int wordcount = 0;
63
        
64
        /** The pagecount. */
65
        int pagecount = 0;
66
        
67
        /** The wordmax. */
68
        int wordmax = 10;
69
        
70
        /** The wordid. */
71
        String wordid;
72
        
73
        /** The first word. */
74
        boolean firstWord = true;
75
        
76
        /** The wordvalue. */
77
        String wordvalue;
78
        
79
        /** The interpvalue. */
80
        String interpvalue;
81
        
82
        /** The lastword. */
83
        String lastword = " ";
84
        
85
        /** The wordtype. */
86
        String wordtype;
87
        
88
        /** The flagform. */
89
        boolean flagform = false;
90
        
91
        /** The flaginterp. */
92
        boolean flaginterp = false;
93
        
94
        boolean flagcomment = false;
95
        
96
        /** The url. */
97
        private def url;
98
        
99
        /** The input data. */
100
        private def inputData;
101
        
102
        /** The factory. */
103
        private def factory;
104
        
105
        /** The parser. */
106
        private XMLStreamReader parser;
107
        
108
        /** The writer. */
109
        XMLStreamWriter writer;
110
        BufferedOutputStream output;
111
        
112
        File txmfile;
113
        
114
        File outfile;
115
        
116
        String corpusname ="";
117
        String cuttingTag = "pb"
118
        String txtname;
119
        File htmlDir;
120
        File defaultDir;
121
        Metadatas metadatas;
122
        
123
        def interviewers = [];
124
        def eventTranslations = ["^^":"mot inconnu", "?":"orthographe incertaine",
125
                "()":"rupture de syntaxe", "b":"bruit indéterminé",
126
                "*":"mot corrigé",
127
                "bb":"bruit de bouche", "bg":"bruit de gorge",
128
                "ch":"voix chuchotée", "conv":"conversations de fond",
129
                "e":"expiration", "i":"inspiration",
130
                "mic":"bruits micro", "n":"reniflement",
131
                "nontrant":"non transcrit", "pap":"froissement de papiers",
132
                "pf":"souffle", "pi":"inintelligible",
133
                "pif":"inaudible", "r":"respiration",
134
                "rire":"rire du locuteur", "shh":"soufle électrique",
135
                "sif":"sifflement du locuteur", "tx":"toux"];
136
        String currentTime = "";
137
        boolean bold = false;
138
        int writenLength = 0;
139
        boolean spokenTurn = false;
140
        boolean firstSync = false;
141
        boolean firstWho = false;
142
        /**
143
         * Instantiates a new pager.
144
         *
145
         * @param infile the infile
146
         * @param outfile the outfile
147
         * @param NoSpaceBefore the no space before
148
         * @param NoSpaceAfter the no space after
149
         * @param max the max
150
         * @param metadatas the metadatas
151
         */
152
        pager(File txmfile, File htmlDir, String txtname, List<String> NoSpaceBefore,
153
        List<String> NoSpaceAfter, int max, String corpusname, String cuttingTag, Metadatas metadatas) {
154
                this.metadatas = metadatas
155
                this.wordmax = max;
156
                this.cuttingTag = cuttingTag;
157
                this.corpusname = corpusname;
158
                this.NoSpaceBefore = NoSpaceBefore;
159
                this.NoSpaceAfter = NoSpaceAfter;
160
                this.url = txmfile.toURI().toURL();
161
                this.txmfile = txmfile;
162
                this.htmlDir = htmlDir;
163
                this.txtname = txtname;
164
                
165
                inputData = url.openStream();
166
                factory = XMLInputFactory.newInstance();
167
                parser = factory.createXMLStreamReader(inputData);
168
                
169
                defaultDir = new File(htmlDir, "default")
170
                defaultDir.mkdir()
171
                new File(htmlDir, "onepage").mkdir()
172
                outfile = new File(htmlDir, "onepage/${txtname}.html");
173
                createOutput(outfile)
174
                
175
                try {
176
                        process();
177
                } catch(Exception e) {
178
                        org.txm.utils.logger.Log.printStackTrace(e);
179
                        if (writer != null) {
180
                                writer.close();
181
                                output.close();
182
                        }
183
                }
184
        }
185
        
186
        /**
187
         * Creates the output.
188
         *
189
         * @param outfile the outfile
190
         * @return true, if successful
191
         */
192
        private boolean createOutput(File outfile) {
193
                try {
194
                        //println "write html in : "+outfile
195
                        XMLOutputFactory outfactory = XMLOutputFactory.newInstance();
196
                        output = new BufferedOutputStream(new FileOutputStream(outfile))
197
                        writer = outfactory.createXMLStreamWriter(output, "UTF-8");//create a new file
198
                        
199
                        return true;
200
                } catch (Exception e) {
201
                        System.out.println(e.getLocalizedMessage());
202
                        return false;
203
                }
204
        }
205
        
206
        /** The events. */
207
        List<String> events = [];
208
        String previousEvent = "", nextEvent = "";
209
        /**
210
         * Process.
211
         */
212
        void process() {
213
                
214
                String previousElem = "";
215
                boolean parolesRaportees = false;
216
                boolean firstWord = true;
217
                boolean shouldBreak = false;
218
                boolean overlapping = false;
219
                int nbBreak = 0;
220
                String previousSPK;
221
                String localname = "";
222
                ArrayList<String> whos = [];
223
                HashMap<String, String> speakers = new HashMap<String, String>();
224
                HashMap<String, String> topics = new HashMap<String, String>();
225
                
226
                writer.writeStartDocument("UTF-8","1.0");
227
                writer.writeStartElement("html");
228
                //<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
229
                writer.writeStartElement("meta");
230
                writer.writeAttribute("http-equiv", "Content-Type");
231
                writer.writeAttribute("content", "text/html");
232
                writer.writeAttribute("charset", "UTF-8");
233
                writer.writeEndElement(); // meta
234
                writer.writeStartElement("head");
235
                //<link rel="stylesheet" type="text/css" href="class.css" />
236
                writer.writeStartElement("link");
237
                writer.writeAttribute("rel", "stylesheet");
238
                writer.writeAttribute("type", "text/css");
239
                writer.writeAttribute("href", "transcriber.css");
240
                writer.writeEndElement(); // link
241
                writer.writeStartElement("link");
242
                writer.writeAttribute("rel", "stylesheet");
243
                writer.writeAttribute("type", "text/css");
244
                writer.writeAttribute("href", corpusname+".css");
245
                writer.writeEndElement(); // link
246
                writer.writeEndElement(); // head
247
                
248
                nbBreak++
249
                writer.writeStartElement("body");
250
                writer.writeAttribute("class", "txmeditionpage")
251
                writer.writeEmptyElement("pb");
252
                writer.writeAttribute("id", ""+nbBreak);
253
                pages << new File(defaultDir, "${txtname}_${nbBreak}.html")
254
                
255
                for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) {
256
                        switch (event) {
257
                                case XMLStreamConstants.START_ELEMENT:
258
                                        localname = parser.getLocalName();
259
                                        switch (localname) {
260
                                                case "text":
261
                                                
262
                                                        writer.writeStartElement("h1");
263
                                                        writer.writeAttribute("class", "title");
264
                                                        String title = parser.getAttributeValue(null, "title");
265
                                                
266
                                                        if (title != null) {
267
                                                                writer.writeCharacters(title);
268
                                                        } else {
269
                                                                writer.writeCharacters("Transcription "+txmfile.getName().substring(0, txmfile.getName().length() - 4));
270
                                                        }
271
                                                
272
                                                        writeMediaAccess("0.0")
273
                                                
274
                                                        writer.writeEndElement(); // h1
275
                                                
276
                                                        String subtitle = parser.getAttributeValue(null, "subtitle");
277
                                                        if (subtitle != null && subtitle.length() > 0) {
278
                                                                writer.writeStartElement("h2");
279
                                                                writer.writeAttribute("class", "subtitle");
280
                                                                writer.writeCharacters(subtitle);
281
                                                                writer.writeEndElement(); // h2
282
                                                        }
283
                                                
284
                                                        writer.writeStartElement("table");
285
                                                        writer.writeAttribute("class", "transcription-table");
286
                                                        boolean grey = false;
287
                                                        for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
288
                                                                String name = parser.getAttributeName(i);
289
                                                                String value = parser.getAttributeValue(i);
290
                                                                
291
                                                                if ("title" == name) {
292
                                                                        continue; // ignore "title" metadata
293
                                                                }
294
                                                                
295
                                                                grey = !grey;
296
                                                                writer.writeStartElement("tr");
297
                                                                if (grey) {
298
                                                                        writer.writeAttribute("style","background-color:lightgrey;")
299
                                                                }
300
                                                                
301
                                                                if (value != null) {
302
                                                                        writer.writeStartElement("td");
303
                                                                        writer.writeCharacters(name);
304
                                                                        writer.writeEndElement(); // td
305
                                                                        writer.writeStartElement("td");
306
                                                                        writer.writeCharacters(value);
307
                                                                        writer.writeEndElement(); // td
308
                                                                }
309
                                                                //get enqueteur to style their names
310
                                                                if (name.startsWith("enq")) {
311
                                                                        interviewers.add(value)
312
                                                                }
313
                                                                writer.writeEndElement(); // tr
314
                                                        }
315
                                                        writer.writeEndElement(); // table
316
                                                //                                                        }
317
                                                        break;
318
                                                case "Topics":
319
                                                /*writer.writeStartElement("h2");
320
                                         writer.writeCharacters("Topics");
321
                                         writer.writeEndElement();
322
                                         writer.writeStartElement("ul");
323
                                         */
324
                                                        break;
325
                                                case "Topic":
326
                                                        topics.put(parser.getAttributeValue(null,"id"), parser.getAttributeValue(null,"desc"))
327
                                                /*writer.writeStartElement("li");
328
                                         writer.writeCharacters(parser.getAttributeValue(null,"desc"));
329
                                         writer.writeStartElement("ul");
330
                                         for(int i = 0 ; i < parser.getAttributeCount() ; i++)
331
                                         {
332
                                         if(parser.getAttributeLocalName(i) != "desc")
333
                                         {
334
                                         writer.writeStartElement("li");
335
                                         writer.writeCharacters(parser.getAttributeLocalName(i)+": "+parser.getAttributeValue(i));
336
                                         writer.writeEndElement();
337
                                         }
338
                                         }
339
                                         writer.writeEndElement();
340
                                         writer.writeEndElement();
341
                                         */
342
                                                        break;
343
                                                case "Speakers":
344
                                                /*writer.writeStartElement("h2");
345
                                         writer.writeCharacters("Speakers");
346
                                         writer.writeEndElement();
347
                                         writer.writeStartElement("ul");*/
348
                                                        break;
349
                                                case "Speaker":
350
                                                        whos.add(parser.getAttributeValue(null,"name"));
351
                                                        speakers.put(parser.getAttributeValue(null,"id"), parser.getAttributeValue(null,"name"))
352
                                                /*writer.writeStartElement("li");
353
                                         writer.writeStartElement("ul");
354
                                         writer.writeCharacters(parser.getAttributeValue(null,"name"));
355
                                         for(int i = 0 ; i < parser.getAttributeCount() ; i++)
356
                                         {
357
                                         if(parser.getAttributeLocalName(i) != "name")
358
                                         {
359
                                         writer.writeStartElement("li");
360
                                         writer.writeCharacters(parser.getAttributeLocalName(i)+": "+parser.getAttributeValue(i));
361
                                         writer.writeEndElement();
362
                                         }
363
                                         }
364
                                         writer.writeEndElement();
365
                                         writer.writeEndElement();*/
366
                                                        break;
367
                                                case "Comment":
368
                                                        spokenTurn = true;
369
                                                        writenLength++;
370
                                                        writer.writeStartElement("span");
371
                                                        writer.writeAttribute("class", "comment");
372
                                                        writer.writeCharacters(" ["+parser.getAttributeValue(0)+"] ");
373
                                                        writer.writeEndElement();
374
                                                        flagcomment = true;
375
                                                        break;
376
                                                case "div":
377
                                                
378
                                                        nbBreak++
379
                                                        writer.writeEmptyElement("pb");
380
                                                        writer.writeAttribute("id", ""+nbBreak);
381
                                                        writer.writeCharacters("\n");
382
                                                
383
                                                        pages << new File(defaultDir, "${txtname}_${nbBreak}.html")
384
                                                        indexes << wordid
385
                                                
386
                                                        wordcount = 0;
387
                                                        shouldBreak = false;
388
                                                
389
                                                        writer.writeStartElement("div")
390
                                                        writer.writeAttribute("class", "section")
391
                                                
392
                                                        String type = parser.getAttributeValue(null, "type")
393
                                                        writer.writeAttribute("type", ""+type)
394
                                                
395
                                                        String desc = parser.getAttributeValue(null, "topic")
396
                                                
397
                                                        if (type != null && type.length() > 0) {
398
                                                                writer.writeStartElement("h2");
399
                                                                writer.writeAttribute("class", "section-title")
400
                                                                writer.writeCharacters(type);
401
                                                                
402
                                                                if (parser.getAttributeValue(null,"startTime") != null) {
403
                                                                        writeMediaAccess(parser.getAttributeValue(null,"startTime"))
404
                                                                }
405
                                                                
406
                                                                writer.writeEndElement(); // h1
407
                                                        }
408
                                                
409
                                                        if (desc != null && desc.length() > 0) {
410
                                                                writer.writeStartElement("h2");
411
                                                                writer.writeAttribute("class", "section-desc")
412
                                                                writer.writeCharacters(desc)
413
                                                                writer.writeEndElement(); // h2
414
                                                        }
415
                                                
416
                                                        def metadata = new LinkedHashMap<String, String>() // temp to store attributes
417
                                                        def metadataGroups = ["metadata":[]] // default metadata group
418
                                                        def metadataDeclared = false
419
                                                        if (parser.getAttributeValue(null, "metadata") != null && parser.getAttributeValue(null, "metadata_groups") != null) {
420
                                                                def l1 = parser.getAttributeValue(null, "metadata").split("\\|");
421
                                                                def l2 = parser.getAttributeValue(null, "metadata_groups").split("\\|");
422
                                                                for (int i = 0 ; i < l1.size() ; i++) {
423
                                                                        def m = l1[i]
424
                                                                        def g = l2[i]
425
                                                                        metadata[m] = "" // forcing order of metadata by pre-declaring
426
                                                                        if (!metadataGroups.containsKey(g)) metadataGroups[g] = []
427
                                                                        metadataGroups[g] << m // declaring a metadata type
428
                                                                }
429
                                                                metadataDeclared = true
430
                                                        }
431
                                                
432
                                                        for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
433
                                                                String name = parser.getAttributeLocalName(i)
434
                                                                if (!"type".equals(name)
435
                                                                && !"topic".equals(name)
436
                                                                && !"startTime".equals(name)
437
                                                                && !"endTime".equals(name)) {
438
                                                                        if (metadataDeclared && !metadata.containsKey(name)) {
439
                                                                                continue; // ignoring metadata since not in declared metadata
440
                                                                        } else {
441
                                                                                metadataGroups["metadata"] << name
442
                                                                        }
443
                                                                        
444
                                                                        metadata[name] = parser.getAttributeValue(i)
445
                                                                }
446
                                                        }
447
                                                
448
                                                        if (metadataGroups.keySet().size() > 0) {
449
                                                                writer.writeStartElement("p")
450
                                                                writer.writeAttribute("class", "section-all-metadata");
451
                                                                for (String groupName : metadataGroups.keySet()) {
452
                                                                        def group = metadataGroups[groupName]
453
                                                                        if (group.size() > 0) {
454
                                                                                if (groupName.equals("text")) {
455
                                                                                        writer.writeStartElement("p")
456
                                                                                        writer.writeAttribute("class", "section-"+groupName);
457
                                                                                        for (String k : group) {
458
                                                                                                writer.writeStartElement("p")
459
                                                                                                writer.writeAttribute("class", ""+groupName)
460
                                                                                                writer.writeStartElement("h4")
461
                                                                                                writer.writeCharacters(k)
462
                                                                                                writer.writeEndElement() // h4
463
                                                                                                writer.writeCharacters(metadata[k])
464
                                                                                                writer.writeEndElement() // p
465
                                                                                        }
466
                                                                                } else {
467
                                                                                        writer.writeStartElement("ul")
468
                                                                                        writer.writeAttribute("class", "section-"+groupName);
469
                                                                                        for (String k : group) {
470
                                                                                                writer.writeStartElement("li")
471
                                                                                                writer.writeAttribute("class", ""+groupName)
472
                                                                                                writer.writeCharacters(""+k+": "+metadata[k])
473
                                                                                                writer.writeEndElement() // li
474
                                                                                        }
475
                                                                                }
476
                                                                                
477
                                                                                writer.writeEndElement(); // ul or p
478
                                                                        }
479
                                                                }
480
                                                                writer.writeEndElement(); // p
481
                                                                writer.writeEmptyElement("hr")
482
                                                        }
483
                                                
484
                                                        break;
485
                                                case "sp":
486
                                                        endBoldIfNeeded()
487
                                                        firstSync = true;
488
                                                        firstWho = true;
489
                                                        spokenTurn = false;
490
                                                        overlapping = false
491
                                                
492
                                                        writer.writeStartElement("p");
493
                                                        writer.writeAttribute("class", "turn");
494
                                                
495
                                                        overlapping = ("true" == parser.getAttributeValue(null,"overlap"))
496
                                                        String spid = parser.getAttributeValue(null,"speaker");
497
                                                
498
                                                        whos = []
499
                                                        if (overlapping) {
500
                                                                writer.writeEmptyElement("br");
501
                                                                writeSpeaker(parser.getAttributeValue(null,"speaker"), false)
502
                                                                
503
                                                                writer.writeEmptyElement("br");
504
                                                                whos = spid.split(" ")
505
                                                        }
506
                                                
507
                                                        break;
508
                                                case "u":
509
                                                        writer.writeCharacters("\n");
510
                                                        this.currentTime = parser.getAttributeValue(null,"time");
511
                                                
512
                                                        if (previousElem == "u" && writenLength == 0) { // if previous u had no words, it was a silence
513
                                                                writer.writeStartElement("span");
514
                                                                writer.writeAttribute("class", "event");
515
                                                                writer.writeCharacters("[silence]");
516
                                                                writer.writeEndElement(); // span
517
                                                                writer.writeEmptyElement("br");
518
                                                        }
519
                                                
520
                                                        String spk = parser.getAttributeValue(null, "spk")
521
                                                        if (spk != null && spk != previousSPK) {
522
                                                                endBoldIfNeeded()
523
                                                                writer.writeEmptyElement("br");
524
                                                                writeSpeaker(parser.getAttributeValue(null, "spk"), overlapping)
525
                                                                startBoldIfNeeded()
526
                                                        }
527
                                                
528
                                                        writeCurrentTime()
529
                                                        previousSPK = spk
530
                                                
531
                                                //                                                        writenLength = 0;
532
                                                /*writer.writeStartElement("span");
533
                                         writer.writeAttribute("class", "sync");
534
                                         writer.writeCharacters("["+parser.getAttributeValue(null,"time")+"]");
535
                                         writer.writeEndElement();*/
536
                                                
537
                                                        break;
538
                                                case "event":
539
                                                        spokenTurn = true;
540
                                                        writenLength++;
541
                                                        String desc = parser.getAttributeValue(null,"desc");
542
                                                        desc = translateEvent(desc);
543
                                                        String type = parser.getAttributeValue(null,"type");
544
                                                        if (desc.equals("paroles rapportées")) {
545
                                                                if (parser.getAttributeValue(null, "extent") == "end")
546
                                                                        writer.writeCharacters("» ");
547
                                                                else if (parser.getAttributeValue(null, "extent") == "begin")
548
                                                                        writer.writeCharacters(" «");
549
                                                        } else {
550
                                                                writer.writeStartElement("span");
551
                                                                writer.writeAttribute("class", "event");
552
                                                                if (parser.getAttributeValue(null, "extent") == "end") {
553
                                                                        writer.writeCharacters(" <"+desc+"] ");
554
                                                                        if(events.size() > 0)
555
                                                                                events.remove(events.size()-1)
556
                                                                }
557
                                                                else if (parser.getAttributeValue(null, "extent") == "begin")         {
558
                                                                        
559
                                                                        writer.writeCharacters(" ["+desc+"> ");
560
                                                                        events.add(desc)
561
                                                                }
562
                                                                else if (parser.getAttributeValue(null, "extent") == "previous") {
563
                                                                        if(parser.getAttributeValue(null, "type") == "pronounce")
564
                                                                                writer.writeCharacters("_["+desc+"] ");
565
                                                                        else
566
                                                                                writer.writeCharacters("_["+desc+"] ");
567
                                                                        previousEvent = desc;
568
                                                                }
569
                                                                else if (parser.getAttributeValue(null, "extent") == "next") {
570
                                                                        writer.writeCharacters(" ["+desc+"]_");
571
                                                                        nextEvent = desc
572
                                                                }
573
                                                                else
574
                                                                        writer.writeCharacters(" ["+desc+"] ");
575
                                                                writer.writeEndElement(); // span@class=event
576
                                                        }
577
                                                        break;
578
                                                case "w":
579
                                                        for(int i = 0 ; i < parser.getAttributeCount() ; i++)
580
                                                                if(parser.getAttributeLocalName(i) == "id") {
581
                                                                        wordid = (parser.getAttributeValue(i));
582
                                                                        break;
583
                                                                }
584
                                                
585
                                                        wordcount++;
586
                                                        if (wordcount >= wordmax) {
587
                                                                shouldBreak = true;
588
                                                        }
589
                                                
590
                                                        if (firstWord) {
591
                                                                indexes << wordid
592
                                                                firstWord = false;
593
                                                        }
594
                                                
595
                                                        break;
596
                                                
597
                                                case "ana":
598
                                                
599
                                                        String type = parser.getAttributeValue(null,"type").substring(1);
600
                                                        if (SIMPLE_TOOLTIP) {
601
                                                                if (type.contains("lemma") || type.contains("pos")) {
602
                                                                        flaginterp=true;
603
                                                                        interpvalue+=", ";
604
                                                                }
605
                                                        } else {
606
                                                                flaginterp=true;
607
                                                                interpvalue+=", "+type+"="
608
                                                        }
609
                                                        break;
610
                                                
611
                                                case "form":
612
                                                        wordvalue="";
613
                                                        interpvalue ="";
614
                                                        flagform=true;
615
                                                        break;
616
                                        }
617
                                        previousElem = localname;
618
                                        break;
619
                                
620
                                case XMLStreamConstants.END_ELEMENT:
621
                                        localname = parser.getLocalName();
622
                                        switch(localname) {
623
                                                case "text":
624
                                                        break;
625
                                                case "Topics":
626
                                                //writer.writeEndElement();
627
                                                        break;
628
                                                case "Topic":
629
                                                        break;
630
                                                case "Speakers":
631
                                                //println "Speakers: "+speakers
632
                                                //writer.writeEndElement();
633
                                                        break;
634
                                                case "Speaker":
635
                                                        break;
636
                                                
637
                                                case "div":
638
                                                //writer.writeCharacters("}");
639
                                                
640
                                                        writer.writeEndElement(); // div
641
                                                        writer.writeCharacters("\n");
642
                                                        break;
643
                                                case "sp":
644
                                                //println "CLOSING: "+parser.getLocalName()
645
                                                        endBoldIfNeeded()
646
                                                        if (!spokenTurn) {
647
                                                                writer.writeStartElement("span");
648
                                                                writer.writeAttribute("class", "event");
649
                                                                writer.writeCharacters("[silence]");
650
                                                                writer.writeEndElement();
651
                                                                writer.writeEmptyElement("br");
652
                                                        }
653
                                                
654
                                                        writer.writeEndElement(); // p
655
                                                
656
                                                        if (shouldBreak) {
657
                                                                nbBreak++
658
                                                                writer.writeEmptyElement("pb");
659
                                                                writer.writeAttribute("id", ""+nbBreak);
660
                                                                writer.writeCharacters("\n");
661
                                                                
662
                                                                pages << new File(defaultDir, "${txtname}_${nbBreak}.html")
663
                                                                indexes << wordid
664
                                                                
665
                                                                wordcount = 0;
666
                                                                shouldBreak = false;
667
                                                        }
668
                                                        writer.writeCharacters("\n");
669
                                                        break;
670
                                                case "u":
671
                                                //writer.writeEndElement() // span@class=u
672
                                                //writer.writeEmptyElement("br");
673
                                                //if (overlapping) writer.writeEndElement(); // b
674
                                                        break;
675
                                                case "event":
676
                                                        break;
677
                                                case "form":
678
                                                        flagform = false
679
                                                        break;
680
                                                case "ana":
681
                                                        flaginterp = false
682
                                                        break;
683
                                                case "w":
684
                                                        writenLength++;
685
                                                        spokenTurn = true;
686
                                                        int l = lastword.length();
687
                                                        String endOfLastWord = "";
688
                                                        if(l > 0)
689
                                                                endOfLastWord = lastword.subSequence(l-1, l);
690
                                                
691
                                                        if(interpvalue != null)
692
                                                                interpvalue = interpvalue.replace("\"","&quot;");
693
                                                        if(events.size() > 0)
694
                                                                interpvalue = interpvalue.replace("event=", "event="+events.toString().replace("\"","&quot;")); // remove ", "
695
                                                
696
                                                        if(nextEvent.length() > 0)
697
                                                        {
698
                                                                interpvalue = interpvalue.replace("event=", "event="+nextEvent+", ")
699
                                                                nextEvent = ""
700
                                                        }
701
                                                        interpvalue = interpvalue.replace("=, ","='', "); // add '' to empty interp value
702
                                                        if (interpvalue.startsWith(", ")) interpvalue = interpvalue.substring(2)
703
                                                //                                                        println "** SPACE TEST"
704
                                                //                                                        println "NoSpaceBefore: "+NoSpaceBefore+" contains ? "+wordvalue
705
                                                //                                                        println "NoSpaceAfter: "+NoSpaceAfter+" contains ? "+lastword
706
                                                //                                                        println "wordvalue starts with '-' ? "+wordvalue
707
                                                //                                                        println "NoSpaceAfter: "+NoSpaceAfter+" contains endOfLastWord ? "+endOfLastWord
708
                                                        if(NoSpaceBefore.contains(wordvalue) ||
709
                                                        NoSpaceAfter.contains(lastword) ||
710
                                                        wordvalue.startsWith("-") ||
711
                                                        NoSpaceAfter.contains(endOfLastWord)) {
712
                                                                //                                                                println " NO SPACE"
713
                                                        } else {
714
                                                                //                                                                println " SPACE"
715
                                                                writer.writeCharacters(" ");
716
                                                        }
717
                                                
718
                                                        if (interpvalue.contains("rapp1")) {
719
                                                                writer.writeCharacters(" «");
720
                                                        } else if (wordvalue == "\"") {
721
                                                                // don't write this char
722
                                                        } else {
723
                                                                writer.writeStartElement("span");
724
                                                                writer.writeAttribute("class", "word");
725
                                                                writer.writeAttribute("title", interpvalue);
726
                                                                writer.writeAttribute("id", wordid);
727
                                                                writer.writeCharacters(wordvalue);
728
                                                                writer.writeEndElement();
729
                                                        }
730
                                                        if (interpvalue.contains("orth")) {
731
                                                                writer.writeStartElement("span");
732
                                                                writer.writeAttribute("class", "event");
733
                                                                writer.writeCharacters("_[?]");
734
                                                                writer.writeEndElement();
735
                                                        }
736
                                                        if (interpvalue.contains("corr")) {
737
                                                                writer.writeStartElement("span");
738
                                                                writer.writeAttribute("class", "event");
739
                                                                writer.writeCharacters("_[!]");
740
                                                                writer.writeEndElement();
741
                                                        }
742
                                                
743
                                                        if (interpvalue.contains("rapp2")) {
744
                                                                writer.writeCharacters(" » ");
745
                                                        }
746
                                                
747
                                                        lastword=wordvalue;
748
                                                        break;
749
                                        }
750
                                
751
                                        break;
752
                                
753
                                case XMLStreamConstants.CHARACTERS:
754
                                        if(flagform)
755
                                                if(parser.getText().length() > 0)
756
                                                        wordvalue+=(parser.getText().trim());
757
                                        if(flaginterp)
758
                                                if(parser.getText().length() > 0)
759
                                                        interpvalue+=(parser.getText().trim());
760
                                        break;
761
                        }
762
                }
763
                writer.writeEndElement(); // body
764
                
765
                writer.writeEmptyElement("pb");
766
                nbBreak++
767
                writer.writeAttribute("id", ""+nbBreak);
768
                
769
                writer.writeEndElement(); // html
770
                writer.close();
771
                output.close();
772
                if (parser != null) parser.close();
773
                if (inputData != null) inputData.close();
774
                
775
                File txmhome = new File(org.txm.Toolbox.getTxmHomePath());
776
                File xlsDir  = new File(txmhome, "xsl");
777
                File xslfile = new File(xlsDir,"breakByMilestone.xsl");
778
                if (!xslfile.exists()) {
779
                        println ""
780
                }
781
                //                println "xsl: "+xslfile
782
                //                println "html: "+outfile
783
                //                println "pages: "+pages
784
                //                println "words: "+indexes
785
                
786
                
787
                if (pages.size() > 1) {
788
                        for (int i = 1 ; i < nbBreak ; i++) {
789
                                ApplyXsl2 a = new ApplyXsl2(xslfile.getAbsolutePath());
790
                                String[] params = ["pbval1", i, "pbval2", i+1];
791
                                
792
                                File resultfile = pages[i-1]
793
                                //println "BBmilestones: "+i+" "+(i+1)+" in file "+resultfile
794
                                //println "process $outfile -> $resultfile"
795
                                a.process(outfile.getAbsolutePath(), resultfile.getAbsolutePath(), params);
796
                        }
797
                } else {
798
                        File page = pages[0]
799
                        FileCopy.copy(outfile, page)
800
                }
801
                outfile.delete() // onepage edition -> no more needed
802
        }
803
        
804
        private void writeCurrentTime() {
805
                writer.writeStartElement("span");
806
                writer.writeAttribute("class", "sync");
807
                writer.writeCharacters(currentTime);
808
                
809
                writeMediaAccess(currentTime)
810
                
811
                writer.writeEndElement() // span
812
        }
813
        
814
        private void writeMediaAccess(def time) {
815
                writer.writeCharacters(" ");
816
                writer.writeStartElement("a");
817
                writer.writeAttribute("onclick", "txmcommand('id', 'org.txm.backtomedia.commands.function.BackToMedia', 'corpus', '"+corpusname+"', 'text', '"+txtname+"', 'time', '"+time+"')");
818
                writer.writeAttribute("style", "cursor: pointer;")
819
                writer.writeAttribute("class", "play-media")
820
                writer.writeCharacters("");
821
                writer.writeEndElement(); // a
822
        }
823
        
824
        private void writeSpeaker(String spk, boolean overlapping) {
825
                
826
                writer.writeStartElement("span");
827
                writer.writeAttribute("class", "spk");
828
                if(interviewers.contains(spk))
829
                        bold = true;
830
                else
831
                        bold = false;
832
                spk = spk.replaceAll('^([^0-9]*)([0-9]+)$', '$1 $2');
833
                if (overlapping) writer.writeCharacters("// ")
834
                
835
                writer.writeCharacters(spk+": ")
836
                
837
                writer.writeEndElement(); // span@class=spk
838
        }
839
        
840
        private String translateEvent(String desc) {
841
                if(eventTranslations.containsKey(desc))
842
                        return eventTranslations.get(desc);
843
                else
844
                        return desc;
845
        }
846
        
847
        boolean boldOpenned = false;
848
        private void startBoldIfNeeded() {
849
                if (bold) {
850
                        writer.writeStartElement(ENQ_HIGHLIGHT_ELEMENT);
851
                        boldOpenned = true;
852
                }
853
        }
854
        
855
        private endBoldIfNeeded() {
856
                if (boldOpenned) {
857
                        //                        println "CLOSE BOLD"
858
                        writer.writeEndElement(); // b
859
                        boldOpenned = false;
860
                }
861
        }
862
        
863
        //        private String formatTime(float time, boolean doshort)
864
        //        {
865
        //                String rez = " ";
866
        //                //                if(time >= 3600) // >= 1h
867
        //                //                {
868
        //                float h = time / 3600;
869
        //                time = time%3600;
870
        //                float min = (time%3600) / 60;
871
        //                int sec = (int)time%60;
872
        //
873
        //                if(min < 10)
874
        //                        rez = ""+(int)h+":0"+(int)min;//+":"+time%60;
875
        //                else
876
        //                        rez = ""+(int)h+":"+(int)min;//+":"+time%60;
877
        //                //if (!doshort)
878
        //                if (sec > 9)
879
        //                        rez += ":"+(int)time%60;
880
        //                else
881
        //                        rez += ":0"+(int)time%60;
882
        //                //                }
883
        //                //                else if(time >= 60) // >= 1min
884
        //                //                {
885
        //                //                        int min = time/60;
886
        //                //                        if(min < 10)
887
        //                //                                rez = "00:0"+min;//+":"+time%60;
888
        //                //                        else
889
        //                //                                rez = "00:"+min;//+":"+time%60;
890
        //                //                        if(!doshort)
891
        //                //                                rez += ":"+(int)time%60;
892
        //                //                }
893
        //                //                else // < 60
894
        //                //                {
895
        //                //                        if(time < 10)
896
        //                //                                return " 0:0"+time;
897
        //                //                        else
898
        //                //                                return " 0:"+time;
899
        //                //                }
900
        //                return rez;
901
        //        }
902
        
903
        /**
904
         * Gets the page files.
905
         *
906
         * @return the page files
907
         */
908
        public ArrayList<File> getPageFiles() {
909
                return pages;
910
        }
911
        
912
        /**
913
         * Gets the idx.
914
         *
915
         * @return the idx
916
         */
917
        public ArrayList<String> getIdx() {
918
                return indexes;
919
        }
920
}