Statistics
| Revision:

root / tmp / org.txm.groovy.core / src / groovy / org / txm / scripts / importer / transcriber / pager.groovy @ 1688

History | View | Annotate | Download (22.1 kB)

1 321 mdecorde
// Copyright © 2010-2013 ENS de Lyon.
2 321 mdecorde
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
3 321 mdecorde
// Lyon 2, University of Franche-Comté, University of Nice
4 321 mdecorde
// Sophia Antipolis, University of Paris 3.
5 321 mdecorde
//
6 321 mdecorde
// The TXM platform is free software: you can redistribute it
7 321 mdecorde
// and/or modify it under the terms of the GNU General Public
8 321 mdecorde
// License as published by the Free Software Foundation,
9 321 mdecorde
// either version 2 of the License, or (at your option) any
10 321 mdecorde
// later version.
11 321 mdecorde
//
12 321 mdecorde
// The TXM platform is distributed in the hope that it will be
13 321 mdecorde
// useful, but WITHOUT ANY WARRANTY; without even the implied
14 321 mdecorde
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 321 mdecorde
// PURPOSE. See the GNU General Public License for more
16 321 mdecorde
// details.
17 321 mdecorde
//
18 321 mdecorde
// You should have received a copy of the GNU General
19 321 mdecorde
// Public License along with the TXM platform. If not, see
20 321 mdecorde
// http://www.gnu.org/licenses.
21 321 mdecorde
//
22 321 mdecorde
//
23 321 mdecorde
//
24 321 mdecorde
// $LastChangedDate:$
25 321 mdecorde
// $LastChangedRevision:$
26 321 mdecorde
// $LastChangedBy:$
27 321 mdecorde
//
28 986 mdecorde
package org.txm.scripts.importer.transcriber
29 321 mdecorde
30 321 mdecorde
import java.io.File;
31 321 mdecorde
import java.util.ArrayList;
32 321 mdecorde
33 321 mdecorde
import javax.xml.stream.*
34 321 mdecorde
35 1000 mdecorde
import org.txm.importer.ApplyXsl2
36 321 mdecorde
import org.txm.metadatas.Metadatas
37 321 mdecorde
import org.txm.utils.io.FileCopy;
38 321 mdecorde
39 321 mdecorde
40 321 mdecorde
// TODO: Auto-generated Javadoc
41 321 mdecorde
/** Build Discours corpus simple edition from a xml-tei.
42 321 mdecorde
 *
43 321 mdecorde
 *  @author mdecorde
44 321 mdecorde
 *
45 321 mdecorde
 */
46 321 mdecorde
class pager {
47 479 mdecorde
48 321 mdecorde
        boolean SIMPLE_TOOLTIP = false; // show less properties in word tooltips
49 321 mdecorde
        String ENQ_HIGHLIGHT_ELEMENT = "b"
50 479 mdecorde
51 321 mdecorde
        List<String> NoSpaceBefore;
52 321 mdecorde
53 321 mdecorde
        /** The No space after. */
54 321 mdecorde
        List<String> NoSpaceAfter;
55 321 mdecorde
56 321 mdecorde
        /** The pages. */
57 321 mdecorde
        def pages = [];
58 321 mdecorde
        def indexes = [];
59 321 mdecorde
60 321 mdecorde
        /** The wordcount. */
61 321 mdecorde
        int wordcount = 0;
62 321 mdecorde
63 321 mdecorde
        /** The pagecount. */
64 321 mdecorde
        int pagecount = 0;
65 321 mdecorde
66 321 mdecorde
        /** The wordmax. */
67 321 mdecorde
        int wordmax = 10;
68 321 mdecorde
69 321 mdecorde
        /** The wordid. */
70 321 mdecorde
        String wordid;
71 321 mdecorde
72 321 mdecorde
        /** The first word. */
73 321 mdecorde
        boolean firstWord = true;
74 321 mdecorde
75 321 mdecorde
        /** The wordvalue. */
76 321 mdecorde
        String wordvalue;
77 321 mdecorde
78 321 mdecorde
        /** The interpvalue. */
79 321 mdecorde
        String interpvalue;
80 321 mdecorde
81 321 mdecorde
        /** The lastword. */
82 321 mdecorde
        String lastword = " ";
83 321 mdecorde
84 321 mdecorde
        /** The wordtype. */
85 321 mdecorde
        String wordtype;
86 321 mdecorde
87 321 mdecorde
        /** The flagform. */
88 321 mdecorde
        boolean flagform = false;
89 321 mdecorde
90 321 mdecorde
        /** The flaginterp. */
91 321 mdecorde
        boolean flaginterp = false;
92 321 mdecorde
93 321 mdecorde
        boolean flagcomment = false;
94 321 mdecorde
95 321 mdecorde
        /** The url. */
96 321 mdecorde
        private def url;
97 321 mdecorde
98 321 mdecorde
        /** The input data. */
99 321 mdecorde
        private def inputData;
100 321 mdecorde
101 321 mdecorde
        /** The factory. */
102 321 mdecorde
        private def factory;
103 321 mdecorde
104 321 mdecorde
        /** The parser. */
105 321 mdecorde
        private XMLStreamReader parser;
106 321 mdecorde
107 321 mdecorde
        /** The writer. */
108 321 mdecorde
        XMLStreamWriter writer;
109 321 mdecorde
        BufferedOutputStream output;
110 321 mdecorde
111 321 mdecorde
        File txmfile;
112 321 mdecorde
113 321 mdecorde
        File outfile;
114 321 mdecorde
115 321 mdecorde
        String corpusname ="";
116 321 mdecorde
        String cuttingTag = "pb"
117 321 mdecorde
        String txtname;
118 321 mdecorde
        File htmlDir;
119 321 mdecorde
        File defaultDir;
120 321 mdecorde
        Metadatas metadatas;
121 321 mdecorde
122 321 mdecorde
        def interviewers = [];
123 321 mdecorde
        def eventTranslations = ["^^":"mot inconnu", "?":"orthographe incertaine",
124 321 mdecorde
                "()":"rupture de syntaxe", "b":"bruit indéterminé",
125 321 mdecorde
                "*":"mot corrigé",
126 321 mdecorde
                "bb":"bruit de bouche", "bg":"bruit de gorge",
127 321 mdecorde
                "ch":"voix chuchotée", "conv":"conversations de fond",
128 321 mdecorde
                "e":"expiration", "i":"inspiration",
129 321 mdecorde
                "mic":"bruits micro", "n":"reniflement",
130 321 mdecorde
                "nontrant":"non transcrit", "pap":"froissement de papiers",
131 321 mdecorde
                "pf":"souffle", "pi":"inintelligible",
132 321 mdecorde
                "pif":"inaudible", "r":"respiration",
133 321 mdecorde
                "rire":"rire du locuteur", "shh":"soufle électrique",
134 321 mdecorde
                "sif":"sifflement du locuteur", "tx":"toux"];
135 321 mdecorde
        String currentTime = "";
136 321 mdecorde
        boolean bold = false;
137 321 mdecorde
        int writenLength = 0;
138 321 mdecorde
        boolean spokenTurn = false;
139 321 mdecorde
        boolean firstSync = false;
140 321 mdecorde
        boolean firstWho = false;
141 321 mdecorde
        /**
142 321 mdecorde
         * Instantiates a new pager.
143 321 mdecorde
         *
144 321 mdecorde
         * @param infile the infile
145 321 mdecorde
         * @param outfile the outfile
146 321 mdecorde
         * @param NoSpaceBefore the no space before
147 321 mdecorde
         * @param NoSpaceAfter the no space after
148 321 mdecorde
         * @param max the max
149 321 mdecorde
         * @param metadatas the metadatas
150 321 mdecorde
         */
151 321 mdecorde
        pager(File txmfile, File htmlDir, String txtname, List<String> NoSpaceBefore,
152 321 mdecorde
        List<String> NoSpaceAfter, int max, String corpusname, String cuttingTag, Metadatas metadatas) {
153 321 mdecorde
                this.metadatas = metadatas
154 321 mdecorde
                this.wordmax = max;
155 321 mdecorde
                this.cuttingTag = cuttingTag;
156 321 mdecorde
                this.corpusname = corpusname;
157 321 mdecorde
                this.NoSpaceBefore = NoSpaceBefore;
158 321 mdecorde
                this.NoSpaceAfter = NoSpaceAfter;
159 321 mdecorde
                this.url = txmfile.toURI().toURL();
160 321 mdecorde
                this.txmfile = txmfile;
161 321 mdecorde
                this.htmlDir = htmlDir;
162 321 mdecorde
                this.txtname = txtname;
163 321 mdecorde
164 321 mdecorde
                inputData = url.openStream();
165 321 mdecorde
                factory = XMLInputFactory.newInstance();
166 321 mdecorde
                parser = factory.createXMLStreamReader(inputData);
167 321 mdecorde
168 321 mdecorde
                defaultDir = new File(htmlDir, "default")
169 321 mdecorde
                defaultDir.mkdir()
170 321 mdecorde
                new File(htmlDir, "onepage").mkdir()
171 321 mdecorde
                outfile = new File(htmlDir, "onepage/${txtname}.html");
172 321 mdecorde
                createOutput(outfile)
173 321 mdecorde
174 321 mdecorde
                try {
175 321 mdecorde
                        process();
176 321 mdecorde
                } catch(Exception e) {
177 321 mdecorde
                        org.txm.utils.logger.Log.printStackTrace(e);
178 321 mdecorde
                        if (writer != null) {
179 321 mdecorde
                                writer.close();
180 321 mdecorde
                                output.close();
181 321 mdecorde
                        }
182 321 mdecorde
                }
183 321 mdecorde
        }
184 321 mdecorde
185 321 mdecorde
        /**
186 321 mdecorde
         * Creates the output.
187 321 mdecorde
         *
188 321 mdecorde
         * @param outfile the outfile
189 321 mdecorde
         * @return true, if successful
190 321 mdecorde
         */
191 321 mdecorde
        private boolean createOutput(File outfile) {
192 321 mdecorde
                try {
193 321 mdecorde
                        //println "write html in : "+outfile
194 321 mdecorde
                        XMLOutputFactory outfactory = XMLOutputFactory.newInstance();
195 321 mdecorde
                        output = new BufferedOutputStream(new FileOutputStream(outfile))
196 321 mdecorde
                        writer = outfactory.createXMLStreamWriter(output, "UTF-8");//create a new file
197 321 mdecorde
198 321 mdecorde
                        return true;
199 321 mdecorde
                } catch (Exception e) {
200 321 mdecorde
                        System.out.println(e.getLocalizedMessage());
201 321 mdecorde
                        return false;
202 321 mdecorde
                }
203 321 mdecorde
        }
204 479 mdecorde
205 321 mdecorde
        /** The events. */
206 321 mdecorde
        List<String> events = [];
207 321 mdecorde
        String previousEvent = "", nextEvent = "";
208 321 mdecorde
        /**
209 321 mdecorde
         * Process.
210 321 mdecorde
         */
211 321 mdecorde
        void process() {
212 321 mdecorde
213 321 mdecorde
                String previousElem = "";
214 321 mdecorde
                boolean parolesRaportees = false;
215 321 mdecorde
                boolean firstWord = true;
216 321 mdecorde
                boolean shouldBreak = false;
217 321 mdecorde
                boolean overlapping = false;
218 321 mdecorde
                int nbBreak = 0;
219 321 mdecorde
                String previousSPK;
220 321 mdecorde
                String localname = "";
221 321 mdecorde
                ArrayList<String> whos = [];
222 321 mdecorde
                HashMap<String, String> speakers = new HashMap<String, String>();
223 321 mdecorde
                HashMap<String, String> topics = new HashMap<String, String>();
224 321 mdecorde
225 321 mdecorde
                writer.writeStartDocument("UTF-8","1.0");
226 321 mdecorde
                writer.writeStartElement("html");
227 321 mdecorde
                //<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
228 321 mdecorde
                writer.writeStartElement("meta");
229 321 mdecorde
                writer.writeAttribute("http-equiv", "Content-Type");
230 321 mdecorde
                writer.writeAttribute("content", "text/html");
231 321 mdecorde
                writer.writeAttribute("charset", "UTF-8");
232 321 mdecorde
                writer.writeEndElement();
233 321 mdecorde
                writer.writeStartElement("head");
234 321 mdecorde
235 321 mdecorde
                //<link rel="stylesheet" type="text/css" href="class.css" />
236 321 mdecorde
                writer.writeStartElement("link");
237 321 mdecorde
                writer.writeAttribute("rel", "stylesheet");
238 321 mdecorde
                writer.writeAttribute("type", "text/css");
239 321 mdecorde
                writer.writeAttribute("href", "transcriber.css");
240 321 mdecorde
                writer.writeEndElement();
241 321 mdecorde
                writer.writeEndElement();
242 321 mdecorde
243 321 mdecorde
                nbBreak++
244 321 mdecorde
                writer.writeStartElement("body");
245 321 mdecorde
                writer.writeEmptyElement("pb");
246 321 mdecorde
                writer.writeAttribute("id", ""+nbBreak);
247 321 mdecorde
                pages << new File(defaultDir, "${txtname}_${nbBreak}.html")
248 321 mdecorde
249 321 mdecorde
                for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) {
250 321 mdecorde
                        switch (event) {
251 321 mdecorde
                                case XMLStreamConstants.START_ELEMENT:
252 321 mdecorde
                                        localname = parser.getLocalName();
253 321 mdecorde
                                        switch (localname) {
254 321 mdecorde
                                                case "text":
255 321 mdecorde
                                                        writer.writeStartElement("h2");
256 321 mdecorde
                                                        writer.writeAttribute("class","titre");
257 321 mdecorde
                                                        String title = parser.getAttributeValue(null, "title");
258 321 mdecorde
                                                        if (title != null) {
259 321 mdecorde
                                                                writer.writeCharacters(title);
260 321 mdecorde
                                                        } else {
261 321 mdecorde
                                                                writer.writeCharacters("Transcription "+txmfile.getName().substring(0, txmfile.getName().length() - 4));
262 321 mdecorde
                                                        }
263 321 mdecorde
                                                        writer.writeEndElement();
264 321 mdecorde
                                                        if(metadatas != null) {
265 321 mdecorde
                                                                writer.writeStartElement("table");
266 321 mdecorde
                                                                boolean grey = false;
267 321 mdecorde
                                                                for (String name : metadatas.getPropertyNames()) {
268 321 mdecorde
                                                                        if ("title" == name) continue; // ignore "title" metadata
269 321 mdecorde
                                                                        grey = !grey;
270 321 mdecorde
                                                                        writer.writeStartElement("tr");
271 321 mdecorde
                                                                        if (grey)
272 321 mdecorde
                                                                                writer.writeAttribute("style","background-color:lightgrey;")
273 321 mdecorde
                                                                        String value = parser.getAttributeValue(null, name);
274 321 mdecorde
                                                                        if (value != null) {
275 321 mdecorde
                                                                                writer.writeStartElement("td");
276 321 mdecorde
                                                                                writer.writeCharacters(name);
277 321 mdecorde
                                                                                writer.writeEndElement();
278 321 mdecorde
                                                                                writer.writeStartElement("td");
279 321 mdecorde
                                                                                writer.writeCharacters(value);
280 321 mdecorde
                                                                                writer.writeEndElement();
281 321 mdecorde
                                                                        }
282 321 mdecorde
                                                                        //get enqueteur to style their names
283 321 mdecorde
                                                                        if (name.startsWith("enq")) {
284 321 mdecorde
                                                                                interviewers.add(value)
285 321 mdecorde
                                                                        }
286 321 mdecorde
                                                                        writer.writeEndElement();
287 321 mdecorde
                                                                }
288 321 mdecorde
                                                                writer.writeEndElement();
289 321 mdecorde
                                                        }
290 321 mdecorde
                                                        break;
291 321 mdecorde
                                                case "Topics":
292 321 mdecorde
                                                /*writer.writeStartElement("h2");
293 321 mdecorde
                                         writer.writeCharacters("Topics");
294 321 mdecorde
                                         writer.writeEndElement();
295 321 mdecorde
                                         writer.writeStartElement("ul");
296 321 mdecorde
                                         */
297 321 mdecorde
                                                        break;
298 321 mdecorde
                                                case "Topic":
299 321 mdecorde
                                                        topics.put(parser.getAttributeValue(null,"id"), parser.getAttributeValue(null,"desc"))
300 321 mdecorde
                                                /*writer.writeStartElement("li");
301 321 mdecorde
                                         writer.writeCharacters(parser.getAttributeValue(null,"desc"));
302 321 mdecorde
                                         writer.writeStartElement("ul");
303 321 mdecorde
                                         for(int i = 0 ; i < parser.getAttributeCount() ; i++)
304 321 mdecorde
                                         {
305 321 mdecorde
                                         if(parser.getAttributeLocalName(i) != "desc")
306 321 mdecorde
                                         {
307 321 mdecorde
                                         writer.writeStartElement("li");
308 321 mdecorde
                                         writer.writeCharacters(parser.getAttributeLocalName(i)+": "+parser.getAttributeValue(i));
309 321 mdecorde
                                         writer.writeEndElement();
310 321 mdecorde
                                         }
311 321 mdecorde
                                         }
312 321 mdecorde
                                         writer.writeEndElement();
313 321 mdecorde
                                         writer.writeEndElement();
314 321 mdecorde
                                         */
315 321 mdecorde
                                                        break;
316 321 mdecorde
                                                case "Speakers":
317 321 mdecorde
                                                /*writer.writeStartElement("h2");
318 321 mdecorde
                                         writer.writeCharacters("Speakers");
319 321 mdecorde
                                         writer.writeEndElement();
320 321 mdecorde
                                         writer.writeStartElement("ul");*/
321 321 mdecorde
                                                        break;
322 321 mdecorde
                                                case "Speaker":
323 321 mdecorde
                                                        whos.add(parser.getAttributeValue(null,"name"));
324 321 mdecorde
                                                        speakers.put(parser.getAttributeValue(null,"id"), parser.getAttributeValue(null,"name"))
325 321 mdecorde
                                                /*writer.writeStartElement("li");
326 321 mdecorde
                                         writer.writeStartElement("ul");
327 321 mdecorde
                                         writer.writeCharacters(parser.getAttributeValue(null,"name"));
328 321 mdecorde
                                         for(int i = 0 ; i < parser.getAttributeCount() ; i++)
329 321 mdecorde
                                         {
330 321 mdecorde
                                         if(parser.getAttributeLocalName(i) != "name")
331 321 mdecorde
                                         {
332 321 mdecorde
                                         writer.writeStartElement("li");
333 321 mdecorde
                                         writer.writeCharacters(parser.getAttributeLocalName(i)+": "+parser.getAttributeValue(i));
334 321 mdecorde
                                         writer.writeEndElement();
335 321 mdecorde
                                         }
336 321 mdecorde
                                         }
337 321 mdecorde
                                         writer.writeEndElement();
338 321 mdecorde
                                         writer.writeEndElement();*/
339 321 mdecorde
                                                        break;
340 321 mdecorde
                                                case "Comment":
341 321 mdecorde
                                                        spokenTurn = true;
342 321 mdecorde
                                                        writenLength++;
343 321 mdecorde
                                                        writer.writeStartElement("span");
344 321 mdecorde
                                                        writer.writeAttribute("class", "comment");
345 321 mdecorde
                                                        writer.writeCharacters(" ["+parser.getAttributeValue(0)+"] ");
346 321 mdecorde
                                                        writer.writeEndElement();
347 321 mdecorde
                                                        flagcomment = true;
348 321 mdecorde
                                                        break;
349 321 mdecorde
                                                case "div":
350 321 mdecorde
                                                        writer.writeStartElement("div");
351 321 mdecorde
                                                        writer.writeAttribute("class", "section");
352 321 mdecorde
                                                        String type = parser.getAttributeValue(null,"type");
353 321 mdecorde
                                                        String desc = parser.getAttributeValue(null,"topic");
354 321 mdecorde
                                                        if (type != null && desc != null) {
355 321 mdecorde
                                                                writer.writeStartElement("h3");
356 321 mdecorde
                                                                writer.writeCharacters(type+": "+desc);
357 321 mdecorde
                                                                for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
358 321 mdecorde
                                                                        String name = parser.getAttributeLocalName(i);
359 321 mdecorde
                                                                        if (!"type".equals(name)
360 321 mdecorde
                                                                        && !"topic".equals(name)
361 321 mdecorde
                                                                        && !"startTime".equals(name)
362 321 mdecorde
                                                                        && !"endTime".equals(name)) {
363 321 mdecorde
                                                                                writer.writeCharacters(", "+name+": "+parser.getAttributeValue(i));
364 321 mdecorde
                                                                        }
365 321 mdecorde
                                                                }
366 321 mdecorde
                                                                writer.writeEndElement(); // h3
367 321 mdecorde
                                                        }
368 321 mdecorde
                                                        break;
369 321 mdecorde
                                                case "sp":
370 321 mdecorde
                                                        endBoldIfNeeded()
371 321 mdecorde
                                                        firstSync = true;
372 321 mdecorde
                                                        firstWho = true;
373 321 mdecorde
                                                        spokenTurn = false;
374 321 mdecorde
                                                        overlapping = false
375 321 mdecorde
376 321 mdecorde
                                                        writer.writeStartElement("p");
377 321 mdecorde
                                                        writer.writeAttribute("class", "turn");
378 321 mdecorde
379 321 mdecorde
                                                        overlapping = ("true" == parser.getAttributeValue(null,"overlap"))
380 321 mdecorde
                                                        String spid = parser.getAttributeValue(null,"speaker");
381 321 mdecorde
                                                        whos = []
382 321 mdecorde
                                                        if (overlapping) {
383 321 mdecorde
                                                                writer.writeEmptyElement("br");
384 321 mdecorde
                                                                writeSpeaker(parser.getAttributeValue(null,"speaker"), false)
385 479 mdecorde
386 321 mdecorde
                                                                writer.writeEmptyElement("br");
387 321 mdecorde
                                                                whos = spid.split(" ")
388 321 mdecorde
                                                        }
389 321 mdecorde
390 321 mdecorde
                                                        break;
391 321 mdecorde
                                                case "u":
392 321 mdecorde
                                                        writer.writeCharacters("\n");
393 321 mdecorde
                                                        this.currentTime = parser.getAttributeValue(null,"time");
394 479 mdecorde
395 321 mdecorde
                                                        if (previousElem == "u" && writenLength == 0) { // if previous u had no words, it was a silence
396 321 mdecorde
                                                                writer.writeStartElement("span");
397 321 mdecorde
                                                                writer.writeAttribute("class", "event");
398 321 mdecorde
                                                                writer.writeCharacters("[silence]");
399 321 mdecorde
                                                                writer.writeEndElement(); // span
400 321 mdecorde
                                                                writer.writeEmptyElement("br");
401 321 mdecorde
                                                        }
402 479 mdecorde
403 321 mdecorde
                                                        String spk = parser.getAttributeValue(null, "spk")
404 321 mdecorde
                                                        if (spk != null && spk != previousSPK) {
405 321 mdecorde
                                                                endBoldIfNeeded()
406 321 mdecorde
                                                                writer.writeEmptyElement("br");
407 321 mdecorde
                                                                writeSpeaker(parser.getAttributeValue(null, "spk"), overlapping)
408 321 mdecorde
                                                                startBoldIfNeeded()
409 321 mdecorde
                                                        }
410 321 mdecorde
411 321 mdecorde
                                                        writeCurrentTime()
412 321 mdecorde
                                                        previousSPK = spk
413 321 mdecorde
414 321 mdecorde
                                                //                                                        writenLength = 0;
415 321 mdecorde
                                                /*writer.writeStartElement("span");
416 321 mdecorde
                                         writer.writeAttribute("class", "sync");
417 321 mdecorde
                                         writer.writeCharacters("["+parser.getAttributeValue(null,"time")+"]");
418 321 mdecorde
                                         writer.writeEndElement();*/
419 321 mdecorde
420 321 mdecorde
                                                        break;
421 321 mdecorde
                                                case "event":
422 321 mdecorde
                                                        spokenTurn = true;
423 321 mdecorde
                                                        writenLength++;
424 321 mdecorde
                                                        String desc = parser.getAttributeValue(null,"desc");
425 321 mdecorde
                                                        desc = translateEvent(desc);
426 321 mdecorde
                                                        String type = parser.getAttributeValue(null,"type");
427 321 mdecorde
                                                        if (desc.equals("paroles rapportées")) {
428 321 mdecorde
                                                                if (parser.getAttributeValue(null, "extent") == "end")
429 321 mdecorde
                                                                        writer.writeCharacters("» ");
430 321 mdecorde
                                                                else if (parser.getAttributeValue(null, "extent") == "begin")
431 321 mdecorde
                                                                        writer.writeCharacters(" «");
432 321 mdecorde
                                                        } else {
433 321 mdecorde
                                                                writer.writeStartElement("span");
434 321 mdecorde
                                                                writer.writeAttribute("class", "event");
435 321 mdecorde
                                                                if (parser.getAttributeValue(null, "extent") == "end") {
436 321 mdecorde
                                                                        writer.writeCharacters(" <"+desc+"] ");
437 321 mdecorde
                                                                        if(events.size() > 0)
438 321 mdecorde
                                                                                events.remove(events.size()-1)
439 321 mdecorde
                                                                }
440 321 mdecorde
                                                                else if (parser.getAttributeValue(null, "extent") == "begin")         {
441 321 mdecorde
442 321 mdecorde
                                                                        writer.writeCharacters(" ["+desc+"> ");
443 321 mdecorde
                                                                        events.add(desc)
444 321 mdecorde
                                                                }
445 321 mdecorde
                                                                else if (parser.getAttributeValue(null, "extent") == "previous") {
446 321 mdecorde
                                                                        if(parser.getAttributeValue(null, "type") == "pronounce")
447 321 mdecorde
                                                                                writer.writeCharacters("_["+desc+"] ");
448 321 mdecorde
                                                                        else
449 321 mdecorde
                                                                                writer.writeCharacters("_["+desc+"] ");
450 321 mdecorde
                                                                        previousEvent = desc;
451 321 mdecorde
                                                                }
452 321 mdecorde
                                                                else if (parser.getAttributeValue(null, "extent") == "next") {
453 321 mdecorde
                                                                        writer.writeCharacters(" ["+desc+"]_");
454 321 mdecorde
                                                                        nextEvent = desc
455 321 mdecorde
                                                                }
456 321 mdecorde
                                                                else
457 321 mdecorde
                                                                        writer.writeCharacters(" ["+desc+"] ");
458 321 mdecorde
                                                                writer.writeEndElement(); // span@class=event
459 321 mdecorde
                                                        }
460 321 mdecorde
                                                        break;
461 321 mdecorde
                                                case "w":
462 321 mdecorde
                                                        for(int i = 0 ; i < parser.getAttributeCount() ; i++)
463 321 mdecorde
                                                                if(parser.getAttributeLocalName(i) == "id") {
464 321 mdecorde
                                                                        wordid = (parser.getAttributeValue(i));
465 321 mdecorde
                                                                        break;
466 321 mdecorde
                                                                }
467 479 mdecorde
468 479 mdecorde
                                                        wordcount++;
469 479 mdecorde
                                                        if (wordcount >= wordmax) {
470 479 mdecorde
                                                                shouldBreak = true;
471 479 mdecorde
                                                        }
472 479 mdecorde
473 321 mdecorde
                                                        if (firstWord) {
474 321 mdecorde
                                                                indexes << wordid
475 321 mdecorde
                                                                firstWord = false;
476 321 mdecorde
                                                        }
477 479 mdecorde
478 321 mdecorde
                                                        break;
479 321 mdecorde
480 321 mdecorde
                                                case "ana":
481 479 mdecorde
482 321 mdecorde
                                                        String type = parser.getAttributeValue(null,"type").substring(1);
483 321 mdecorde
                                                        if (SIMPLE_TOOLTIP) {
484 321 mdecorde
                                                                if (type.contains("lemma") || type.contains("pos")) {
485 321 mdecorde
                                                                        flaginterp=true;
486 321 mdecorde
                                                                        interpvalue+=", ";
487 321 mdecorde
                                                                }
488 321 mdecorde
                                                        } else {
489 321 mdecorde
                                                                flaginterp=true;
490 321 mdecorde
                                                                interpvalue+=", "+type+"="
491 321 mdecorde
                                                        }
492 321 mdecorde
                                                        break;
493 321 mdecorde
494 321 mdecorde
                                                case "form":
495 321 mdecorde
                                                        wordvalue="";
496 321 mdecorde
                                                        interpvalue ="";
497 321 mdecorde
                                                        flagform=true;
498 321 mdecorde
                                                        break;
499 321 mdecorde
                                        }
500 321 mdecorde
                                        previousElem = localname;
501 321 mdecorde
                                        break;
502 321 mdecorde
                                case XMLStreamConstants.END_ELEMENT:
503 321 mdecorde
                                        localname = parser.getLocalName();
504 321 mdecorde
                                        switch(localname)
505 321 mdecorde
                                        {
506 321 mdecorde
                                                case "text":
507 321 mdecorde
                                                        break;
508 321 mdecorde
                                                case "Topics":
509 321 mdecorde
                                                //writer.writeEndElement();
510 321 mdecorde
                                                        break;
511 321 mdecorde
                                                case "Topic":
512 321 mdecorde
                                                        break;
513 321 mdecorde
                                                case "Speakers":
514 321 mdecorde
                                                //println "Speakers: "+speakers
515 321 mdecorde
                                                //writer.writeEndElement();
516 321 mdecorde
                                                        break;
517 321 mdecorde
                                                case "Speaker":
518 321 mdecorde
                                                        break;
519 321 mdecorde
520 321 mdecorde
                                                case "div":
521 321 mdecorde
                                                //writer.writeCharacters("}");
522 479 mdecorde
523 321 mdecorde
                                                        writer.writeEndElement(); // div
524 321 mdecorde
                                                        writer.writeCharacters("\n");
525 321 mdecorde
                                                        break;
526 321 mdecorde
                                                case "sp":
527 479 mdecorde
                                                //println "CLOSING: "+parser.getLocalName()
528 321 mdecorde
                                                        endBoldIfNeeded()
529 321 mdecorde
                                                        if (!spokenTurn) {
530 321 mdecorde
                                                                writer.writeStartElement("span");
531 321 mdecorde
                                                                writer.writeAttribute("class", "event");
532 321 mdecorde
                                                                writer.writeCharacters("[silence]");
533 321 mdecorde
                                                                writer.writeEndElement();
534 321 mdecorde
                                                                writer.writeEmptyElement("br");
535 321 mdecorde
                                                        }
536 321 mdecorde
537 321 mdecorde
                                                        writer.writeEndElement(); // p
538 321 mdecorde
539 321 mdecorde
                                                        if (shouldBreak) {
540 321 mdecorde
                                                                nbBreak++
541 321 mdecorde
                                                                writer.writeEmptyElement("pb");
542 321 mdecorde
                                                                writer.writeAttribute("id", ""+nbBreak);
543 321 mdecorde
                                                                writer.writeCharacters("\n");
544 479 mdecorde
545 321 mdecorde
                                                                pages << new File(defaultDir, "${txtname}_${nbBreak}.html")
546 321 mdecorde
                                                                indexes << wordid
547 321 mdecorde
548 321 mdecorde
                                                                wordcount = 0;
549 321 mdecorde
                                                                shouldBreak = false;
550 321 mdecorde
                                                        }
551 321 mdecorde
                                                        writer.writeCharacters("\n");
552 321 mdecorde
                                                        break;
553 321 mdecorde
                                                case "u":
554 479 mdecorde
                                                //writer.writeEndElement() // span@class=u
555 479 mdecorde
                                                //writer.writeEmptyElement("br");
556 321 mdecorde
                                                //if (overlapping) writer.writeEndElement(); // b
557 321 mdecorde
                                                        break;
558 321 mdecorde
                                                case "event":
559 321 mdecorde
                                                        break;
560 321 mdecorde
                                                case "form":
561 321 mdecorde
                                                        flagform = false
562 321 mdecorde
                                                        break;
563 321 mdecorde
                                                case "ana":
564 321 mdecorde
                                                        flaginterp = false
565 321 mdecorde
                                                        break;
566 321 mdecorde
                                                case "w":
567 321 mdecorde
                                                        writenLength++;
568 321 mdecorde
                                                        spokenTurn = true;
569 321 mdecorde
                                                        int l = lastword.length();
570 321 mdecorde
                                                        String endOfLastWord = "";
571 321 mdecorde
                                                        if(l > 0)
572 321 mdecorde
                                                                endOfLastWord = lastword.subSequence(l-1, l);
573 321 mdecorde
574 321 mdecorde
                                                        if(interpvalue != null)
575 321 mdecorde
                                                                interpvalue = interpvalue.replace("\"","&quot;");
576 321 mdecorde
                                                        if(events.size() > 0)
577 321 mdecorde
                                                                interpvalue = interpvalue.replace("event=", "event="+events.toString().replace("\"","&quot;")); // remove ", "
578 321 mdecorde
579 321 mdecorde
                                                        if(nextEvent.length() > 0)
580 321 mdecorde
                                                        {
581 321 mdecorde
                                                                interpvalue = interpvalue.replace("event=", "event="+nextEvent+", ")
582 321 mdecorde
                                                                nextEvent = ""
583 321 mdecorde
                                                        }
584 321 mdecorde
                                                        interpvalue = interpvalue.replace("=, ","='', "); // add '' to empty interp value
585 321 mdecorde
                                                        if (interpvalue.startsWith(", ")) interpvalue = interpvalue.substring(2)
586 479 mdecorde
                                                //                                                        println "** SPACE TEST"
587 479 mdecorde
                                                //                                                        println "NoSpaceBefore: "+NoSpaceBefore+" contains ? "+wordvalue
588 479 mdecorde
                                                //                                                        println "NoSpaceAfter: "+NoSpaceAfter+" contains ? "+lastword
589 479 mdecorde
                                                //                                                        println "wordvalue starts with '-' ? "+wordvalue
590 479 mdecorde
                                                //                                                        println "NoSpaceAfter: "+NoSpaceAfter+" contains endOfLastWord ? "+endOfLastWord
591 321 mdecorde
                                                        if(NoSpaceBefore.contains(wordvalue) ||
592 321 mdecorde
                                                        NoSpaceAfter.contains(lastword) ||
593 321 mdecorde
                                                        wordvalue.startsWith("-") ||
594 321 mdecorde
                                                        NoSpaceAfter.contains(endOfLastWord)) {
595 479 mdecorde
                                                                //                                                                println " NO SPACE"
596 321 mdecorde
                                                        } else {
597 479 mdecorde
                                                                //                                                                println " SPACE"
598 321 mdecorde
                                                                writer.writeCharacters(" ");
599 321 mdecorde
                                                        }
600 321 mdecorde
601 321 mdecorde
                                                        if (interpvalue.contains("rapp1")) {
602 321 mdecorde
                                                                writer.writeCharacters(" «");
603 321 mdecorde
                                                        } else if (wordvalue == "\"") {
604 321 mdecorde
                                                                // don't write this char
605 321 mdecorde
                                                        } else {
606 321 mdecorde
                                                                writer.writeStartElement("span");
607 321 mdecorde
                                                                writer.writeAttribute("class", "word");
608 321 mdecorde
                                                                writer.writeAttribute("title", interpvalue);
609 321 mdecorde
                                                                writer.writeAttribute("id", wordid);
610 321 mdecorde
                                                                writer.writeCharacters(wordvalue);
611 321 mdecorde
                                                                writer.writeEndElement();
612 321 mdecorde
                                                        }
613 321 mdecorde
                                                        if (interpvalue.contains("orth")) {
614 321 mdecorde
                                                                writer.writeStartElement("span");
615 321 mdecorde
                                                                writer.writeAttribute("class", "event");
616 321 mdecorde
                                                                writer.writeCharacters("_[?]");
617 321 mdecorde
                                                                writer.writeEndElement();
618 321 mdecorde
                                                        }
619 321 mdecorde
                                                        if (interpvalue.contains("corr")) {
620 321 mdecorde
                                                                writer.writeStartElement("span");
621 321 mdecorde
                                                                writer.writeAttribute("class", "event");
622 321 mdecorde
                                                                writer.writeCharacters("_[!]");
623 321 mdecorde
                                                                writer.writeEndElement();
624 321 mdecorde
                                                        }
625 321 mdecorde
626 321 mdecorde
                                                        if (interpvalue.contains("rapp2")) {
627 321 mdecorde
                                                                writer.writeCharacters(" » ");
628 321 mdecorde
                                                        }
629 479 mdecorde
630 321 mdecorde
                                                        lastword=wordvalue;
631 321 mdecorde
                                                        break;
632 321 mdecorde
                                        }
633 479 mdecorde
634 321 mdecorde
                                        break;
635 321 mdecorde
636 321 mdecorde
                                case XMLStreamConstants.CHARACTERS:
637 321 mdecorde
                                        if(flagform)
638 321 mdecorde
                                                if(parser.getText().length() > 0)
639 321 mdecorde
                                                        wordvalue+=(parser.getText().trim());
640 321 mdecorde
                                        if(flaginterp)
641 321 mdecorde
                                                if(parser.getText().length() > 0)
642 321 mdecorde
                                                        interpvalue+=(parser.getText().trim());
643 321 mdecorde
                                        break;
644 321 mdecorde
                        }
645 321 mdecorde
                }
646 321 mdecorde
                writer.writeEndElement();
647 321 mdecorde
648 321 mdecorde
                writer.writeEmptyElement("pb");
649 321 mdecorde
                nbBreak++
650 321 mdecorde
                writer.writeAttribute("id", ""+nbBreak);
651 321 mdecorde
652 321 mdecorde
                writer.writeEndElement();
653 321 mdecorde
                writer.close();
654 321 mdecorde
                output.close();
655 1688 mdecorde
                if (parser != null) parser.close();
656 1688 mdecorde
                if (inputData != null) inputData.close();
657 321 mdecorde
658 878 sjacqu01
                File txmhome = new File(org.txm.Toolbox.getTxmHomePath());
659 321 mdecorde
                File xlsDir  = new File(txmhome, "xsl");
660 321 mdecorde
                File xslfile = new File(xlsDir,"breakByMilestone.xsl");
661 321 mdecorde
                if (!xslfile.exists()) {
662 321 mdecorde
                        println ""
663 321 mdecorde
                }
664 321 mdecorde
                //                println "xsl: "+xslfile
665 321 mdecorde
                //                println "html: "+outfile
666 321 mdecorde
                //                println "pages: "+pages
667 321 mdecorde
                //                println "words: "+indexes
668 321 mdecorde
669 321 mdecorde
670 479 mdecorde
                if (pages.size() > 1) {
671 479 mdecorde
                        for (int i = 1 ; i < nbBreak ; i++) {
672 479 mdecorde
                                ApplyXsl2 a = new ApplyXsl2(xslfile.getAbsolutePath());
673 479 mdecorde
                                String[] params = ["pbval1", i,"pbval2", i+1];
674 479 mdecorde
675 479 mdecorde
                                File resultfile = pages[i-1]
676 479 mdecorde
                                //println "BBmilestones: "+i+" "+(i+1)+" in file "+resultfile
677 479 mdecorde
                                //println "process $outfile -> $resultfile"
678 479 mdecorde
                                a.process(outfile.getAbsolutePath(), resultfile.getAbsolutePath(), params);
679 479 mdecorde
                        }
680 479 mdecorde
                } else {
681 479 mdecorde
                        File page = pages[0]
682 479 mdecorde
                        FileCopy.copy(outfile, page)
683 479 mdecorde
                }
684 321 mdecorde
        }
685 321 mdecorde
686 321 mdecorde
        private void writeCurrentTime()
687 321 mdecorde
        {
688 321 mdecorde
                writer.writeStartElement("span");
689 321 mdecorde
                writer.writeAttribute("class", "sync");
690 321 mdecorde
                writer.writeCharacters(currentTime);
691 321 mdecorde
                writer.writeEndElement();
692 321 mdecorde
        }
693 321 mdecorde
694 321 mdecorde
        private void writeSpeaker(String spk, boolean overlapping) {
695 479 mdecorde
696 321 mdecorde
                writer.writeStartElement("span");
697 321 mdecorde
                writer.writeAttribute("class", "spk");
698 321 mdecorde
                if(interviewers.contains(spk))
699 321 mdecorde
                        bold = true;
700 321 mdecorde
                else
701 321 mdecorde
                        bold = false;
702 321 mdecorde
                spk = spk.replaceAll('^([^0-9]*)([0-9]+)$', '$1 $2');
703 321 mdecorde
                if (overlapping) writer.writeCharacters("// ")
704 321 mdecorde
                writer.writeCharacters(spk+": ")
705 321 mdecorde
                writer.writeEndElement(); // span@class=spk
706 321 mdecorde
        }
707 321 mdecorde
708 321 mdecorde
        private String translateEvent(String desc) {
709 321 mdecorde
                if(eventTranslations.containsKey(desc))
710 321 mdecorde
                        return eventTranslations.get(desc);
711 321 mdecorde
                else
712 321 mdecorde
                        return desc;
713 321 mdecorde
        }
714 479 mdecorde
715 321 mdecorde
        boolean boldOpenned = false;
716 321 mdecorde
        private void startBoldIfNeeded() {
717 321 mdecorde
                if (bold) {
718 321 mdecorde
                        writer.writeStartElement(ENQ_HIGHLIGHT_ELEMENT);
719 321 mdecorde
                        boldOpenned = true;
720 321 mdecorde
                }
721 321 mdecorde
        }
722 479 mdecorde
723 321 mdecorde
        private endBoldIfNeeded() {
724 321 mdecorde
                if (boldOpenned) {
725 479 mdecorde
                        //                        println "CLOSE BOLD"
726 321 mdecorde
                        writer.writeEndElement(); // b
727 321 mdecorde
                        boldOpenned = false;
728 321 mdecorde
                }
729 321 mdecorde
        }
730 321 mdecorde
731 321 mdecorde
        //        private String formatTime(float time, boolean doshort)
732 321 mdecorde
        //        {
733 321 mdecorde
        //                String rez = " ";
734 321 mdecorde
        //                //                if(time >= 3600) // >= 1h
735 321 mdecorde
        //                //                {
736 321 mdecorde
        //                float h = time / 3600;
737 321 mdecorde
        //                time = time%3600;
738 321 mdecorde
        //                float min = (time%3600) / 60;
739 321 mdecorde
        //                int sec = (int)time%60;
740 321 mdecorde
        //
741 321 mdecorde
        //                if(min < 10)
742 321 mdecorde
        //                        rez = ""+(int)h+":0"+(int)min;//+":"+time%60;
743 321 mdecorde
        //                else
744 321 mdecorde
        //                        rez = ""+(int)h+":"+(int)min;//+":"+time%60;
745 321 mdecorde
        //                //if (!doshort)
746 321 mdecorde
        //                if (sec > 9)
747 321 mdecorde
        //                        rez += ":"+(int)time%60;
748 321 mdecorde
        //                else
749 321 mdecorde
        //                        rez += ":0"+(int)time%60;
750 321 mdecorde
        //                //                }
751 321 mdecorde
        //                //                else if(time >= 60) // >= 1min
752 321 mdecorde
        //                //                {
753 321 mdecorde
        //                //                        int min = time/60;
754 321 mdecorde
        //                //                        if(min < 10)
755 321 mdecorde
        //                //                                rez = "00:0"+min;//+":"+time%60;
756 321 mdecorde
        //                //                        else
757 321 mdecorde
        //                //                                rez = "00:"+min;//+":"+time%60;
758 321 mdecorde
        //                //                        if(!doshort)
759 321 mdecorde
        //                //                                rez += ":"+(int)time%60;
760 321 mdecorde
        //                //                }
761 321 mdecorde
        //                //                else // < 60
762 321 mdecorde
        //                //                {
763 321 mdecorde
        //                //                        if(time < 10)
764 321 mdecorde
        //                //                                return " 0:0"+time;
765 321 mdecorde
        //                //                        else
766 321 mdecorde
        //                //                                return " 0:"+time;
767 321 mdecorde
        //                //                }
768 321 mdecorde
        //                return rez;
769 321 mdecorde
        //        }
770 321 mdecorde
771 321 mdecorde
        /**
772 321 mdecorde
         * Gets the page files.
773 321 mdecorde
         *
774 321 mdecorde
         * @return the page files
775 321 mdecorde
         */
776 321 mdecorde
        public ArrayList<File> getPageFiles() {
777 321 mdecorde
                return pages;
778 321 mdecorde
        }
779 321 mdecorde
780 321 mdecorde
        /**
781 321 mdecorde
         * Gets the idx.
782 321 mdecorde
         *
783 321 mdecorde
         * @return the idx
784 321 mdecorde
         */
785 321 mdecorde
        public ArrayList<String> getIdx() {
786 321 mdecorde
                return indexes;
787 321 mdecorde
        }
788 321 mdecorde
}