Statistics
| Revision:

root / tmp / org.txm.core / src / groovy / org / txm / importer / transcriber / ConcatTrs.groovy @ 187

History | View | Annotate | Download (11.2 kB)

1
// Copyright © 2010-2013 ENS de Lyon.
2
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
3
// Lyon 2, University of Franche-Comté, University of Nice
4
// Sophia Antipolis, University of Paris 3.
5
// 
6
// The TXM platform is free software: you can redistribute it
7
// and/or modify it under the terms of the GNU General Public
8
// License as published by the Free Software Foundation,
9
// either version 2 of the License, or (at your option) any
10
// later version.
11
// 
12
// The TXM platform is distributed in the hope that it will be
13
// useful, but WITHOUT ANY WARRANTY; without even the implied
14
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15
// PURPOSE. See the GNU General Public License for more
16
// details.
17
// 
18
// You should have received a copy of the GNU General
19
// Public License along with the TXM platform. If not, see
20
// http://www.gnu.org/licenses.
21
//
22
//
23
//
24
// $LastChangedDate:$
25
// $LastChangedRevision:$
26
// $LastChangedBy:$
27
//
28
package org.txm.importer.transcriber
29

    
30
import javax.xml.parsers.DocumentBuilder;
31
import javax.xml.parsers.DocumentBuilderFactory;
32
import javax.xml.parsers.ParserConfigurationException;
33
import javax.xml.transform.*;
34

    
35
import javax.xml.stream.*;
36
import java.io.File;
37
import java.net.URL;
38
import java.text.DecimalFormat;
39

    
40
// TODO: Auto-generated Javadoc
41
/**
42
 * The Class ConcatTrs.
43
 */
44
class ConcatTrs {
45

    
46
        /** The localname. */
47
        String localname;
48

    
49
        /** The prefix. */
50
        String prefix;
51

    
52
        /** The input data1. */
53
        InputStream inputData1,/** The input data2. */
54
        inputData2;
55

    
56
        /** The parser1. */
57
        XMLStreamReader parser1,
58
        /** The parser2. */
59
        parser2;
60

    
61
        /** The output. */
62
        BufferedOutputStream output;
63

    
64
        /** The writer. */
65
        XMLStreamWriter writer;
66

    
67
        /** The inputfactory. */
68
        XMLInputFactory inputfactory = XMLInputFactory.newInstance();
69

    
70
        /** The factory. */
71
        XMLOutputFactory factory = XMLOutputFactory.newInstance();
72

    
73
        /** The speakers. */
74
        HashMap<File, String> speakers = new HashMap<File, String>();
75

    
76
        /**
77
         * Instantiates a new concat trs.
78
         *
79
         * @param trs1 the trs1
80
         * @param trs2 the trs2
81
         * @param result the result
82
         */
83
        public ConcatTrs(File trs1, File trs2, File result)
84
        {
85
                // t1<-open("t1.trs")
86
                // t2<-open("t2.trs")
87
                // out<-open("t3.trs", "w")
88
                inputData1 = trs1.toURI().toURL().openStream();
89
                parser1 = inputfactory.createXMLStreamReader(inputData1);
90

    
91
                inputData2 = trs2.toURI().toURL().openStream();
92
                parser2 = inputfactory.createXMLStreamReader(inputData2);
93

    
94
                output = new BufferedOutputStream(new FileOutputStream(result))
95
                writer = factory.createXMLStreamWriter(output, "UTF-8");
96

    
97
                // check-heads(t1, t2) // same speakers
98
                if(!testSpeakers())
99
                {
100
                        println "stop"
101
                        return;
102
                }
103

    
104
                //get shift time trs1
105
                inputData1.close();
106
                inputData1 = trs1.toURI().toURL().openStream();
107
                parser1 = inputfactory.createXMLStreamReader(inputData1);
108
                float shift = getLastTime(parser1);
109

    
110
                //get last time trs2
111
                inputData2.close();
112
                inputData2 = trs2.toURI().toURL().openStream();
113
                parser2 = inputfactory.createXMLStreamReader(inputData2);
114
                float end = getLastTime(parser2);
115

    
116
                end += shift;
117
                println "shift: $shift ; end: $end"
118

    
119
                // copy-head-body-no-tail(t1, out) // shift<-last("sp", t1).time
120
                inputData1.close();
121
                inputData1 = trs1.toURI().toURL().openStream();
122
                parser1 = inputfactory.createXMLStreamReader(inputData1);
123
                writeFirstPartAndGetLastTime(parser1, end);
124

    
125
                // copy-body+tail-time-shifted(t2, out, shift)
126
                inputData2.close();
127
                inputData2 = trs2.toURI().toURL().openStream();
128
                parser2 = inputfactory.createXMLStreamReader(inputData2);
129
                writeSecondPartAndShiftTime(parser2, shift);
130

    
131
                writer.flush();
132
                writer.close();
133
                output.close();
134

    
135
                inputData1.close();
136
                inputData2.close();
137
        }
138

    
139
        /**
140
         * Test speakers.
141
         *
142
         * @return true, if successful
143
         */
144
        protected boolean testSpeakers()
145
        {
146
                String speakers1 = getSpeaker(parser1);
147
                String speakers2 = getSpeaker(parser2);
148

    
149
                boolean ret = speakers1 == speakers2;
150

    
151
                if(!ret)
152
                {
153
                        println "Speakers are differents : "
154
                        println "  speakers1= $speakers1"
155
                        println "  speakers2= $speakers2"
156
                }
157
                else
158
                        println "speakers OK $speakers1 and $speakers2"
159
                return ret;
160
        }
161

    
162
        /**
163
         * Gets the speaker.
164
         *
165
         * @param parser the parser
166
         * @return the speaker
167
         */
168
        protected String getSpeaker(XMLStreamReader parser)
169
        {
170
                //println "get speakers of $parser"
171

    
172
                ArrayList<String> spksvalues = [];
173
                for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next())
174
                {
175
                        switch (event)
176
                        {
177
                                case XMLStreamConstants.START_ELEMENT:
178
                                        localname = parser.getLocalName();
179
                                //println localname
180
                                        if(localname == "Speaker")
181
                                        {
182
                                                spksvalues << "("+parser.getAttributeValue(null,"id")+"-"+parser.getAttributeValue(null,"name")+")";
183
                                        }
184
                                        break;
185
                                case XMLStreamConstants.END_ELEMENT:
186
                                        localname = parser.getLocalName();
187
                                        if(localname == "Speakers")
188
                                        {
189
                                                Collections.sort(spksvalues);
190
                                                return spksvalues.toString();
191
                                        }
192
                                        break;
193
                        }
194
                }
195
        }
196

    
197
        /**
198
         * Gets the last time.
199
         *
200
         * @param parser the parser
201
         * @return the last time
202
         */
203
        protected float getLastTime(XMLStreamReader parser)
204
        {
205
                float timef = 0.0f
206

    
207
                for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next())
208
                {
209
                        switch (event)
210
                        {
211
                                case XMLStreamConstants.START_ELEMENT:
212

    
213
                                        localname = parser.getLocalName();
214
                                        prefix = parser.getPrefix();
215

    
216
                                        if(localname == "Turn")
217
                                        {
218
                                                String time = parser.getAttributeValue(null, "endTime")
219
                                                if(time != null)
220
                                                        timef = Float.parseFloat(time)
221
                                        }
222
                        }
223
                }
224
                return timef;
225
        }
226

    
227
        /**
228
         * Write first part and get last time.
229
         *
230
         * @param parser the parser
231
         * @param end the end
232
         */
233
        protected void writeFirstPartAndGetLastTime(XMLStreamReader parser, float end)
234
        {
235
                println "First part"
236

    
237
                DecimalFormat f = new DecimalFormat("#.000"); //$NON-NLS-1$
238
                writer.writeStartDocument("UTF-8", "1.0");
239
                writer.writeCharacters("\n")
240
                writer.writeDTD("<!DOCTYPE Trans SYSTEM \"trans-14.dtd\">")
241
                writer.writeCharacters("\n")
242
                for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next())
243
                {
244
                        switch (event)
245
                        {
246
                                case XMLStreamConstants.START_ELEMENT:
247

    
248
                                        localname = parser.getLocalName();
249
                                        prefix = parser.getPrefix();
250

    
251
                                        if(prefix != null && prefix.length() > 0)
252
                                                writer.writeStartElement(prefix+":"+localname);
253
                                        else
254
                                                writer.writeStartElement(localname);
255

    
256
                                        for(int i= 0 ; i < parser.getAttributeCount() ;i++ )
257
                                        {
258
                                                String name = parser.getAttributeLocalName(i)
259
                                                String value = parser.getAttributeValue(i);
260

    
261
                                                if(parser.getAttributePrefix(i)!= "")
262
                                                        writer.writeAttribute(parser.getAttributePrefix(i)+":"+name, value);
263
                                                else
264
                                                        writer.writeAttribute(name, value);
265
                                        }
266

    
267

    
268
                                        break;
269

    
270
                                case XMLStreamConstants.END_ELEMENT:
271

    
272
                                        localname = parser.getLocalName()
273
                                // dont close the transcription here
274
                                        if(localname != "Trans" && localname != "Episode")
275
                                                writer.writeEndElement();
276
                                        break;
277

    
278
                                case XMLStreamConstants.CHARACTERS:
279
                                        writer.writeCharacters(parser.getText());
280
                                        break;
281
                        }
282
                }
283
        }
284

    
285
        /**
286
         * Write second part and shift time.
287
         *
288
         * @param parser the parser
289
         * @param shift the shift
290
         */
291
        protected void writeSecondPartAndShiftTime(XMLStreamReader parser, float shift)
292
        {
293
                println "Second part"
294
                DecimalFormat f = new DecimalFormat("#.000"); //$NON-NLS-1$
295

    
296
                for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next())
297
                {
298
                        switch (event)
299
                        {
300
                                case XMLStreamConstants.START_ELEMENT:
301
                                        localname = parser.getLocalName();
302
                                        prefix = parser.getPrefix();
303

    
304
                                // skip header
305
                                        if(localname == "Trans" || localname == "Episode" || localname == "Topics" || localname == "Topic"
306
                                        || localname == "Speakers" || localname == "Speaker")
307
                                                break;
308

    
309
                                        if(prefix != null && prefix.length() > 0)
310
                                                writer.writeStartElement(prefix+":"+localname);
311
                                        else
312
                                                writer.writeStartElement(localname);
313

    
314
                                        for(int i= 0 ; i < parser.getAttributeCount() ;i++ )
315
                                        {
316
                                                String name = parser.getAttributeLocalName(i);
317
                                                String value = parser.getAttributeValue(i);
318

    
319
                                                if(name == "time" || name == "endTime" || name == "startTime")
320
                                                {
321
                                                        float newf = Float.parseFloat(value)+shift;
322
                                                        value = f.format(newf)
323
                                                        value = value.replace(",",".")
324
                                                }
325

    
326
                                                if(parser.getAttributePrefix(i)!= "")
327
                                                        writer.writeAttribute(parser.getAttributePrefix(i)+":"+name, value);
328
                                                else
329
                                                        writer.writeAttribute(name, value);
330
                                        }
331

    
332

    
333
                                        break;
334

    
335
                                case XMLStreamConstants.END_ELEMENT:
336
                                        localname = parser.getLocalName()
337
                                        if(localname == "Topics" || localname == "Topic"
338
                                        || localname == "Speakers" || localname == "Speaker" )
339
                                                break;
340
                                        writer.writeEndElement();
341
                                        break;
342

    
343
                                case XMLStreamConstants.CHARACTERS:
344
                                        if (parser.getText().trim().length() > 0)
345
                                                writer.writeCharacters(parser.getText());
346
                                        break;
347
                        }
348
                }
349
        }
350

    
351
        /**
352
         * Process.
353
         */
354
        protected void process()
355
        {
356
                writer.writeStartDocument("UTF-8", "1.0");
357

    
358
                for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next())
359
                {
360
                        switch (event)
361
                        {
362
                                case XMLStreamConstants.START_ELEMENT:
363
                                        localname = parser.getLocalName();
364
                                        prefix = parser.getPrefix();
365

    
366
                                        if(prefix != null && prefix.length() > 0)
367
                                                writer.writeStartElement(prefix+":"+localname);
368
                                        else
369
                                                writer.writeStartElement(localname);
370

    
371

    
372
                                        for(int i= 0 ; i < parser.getAttributeCount() ;i++ )
373
                                        {
374
                                                String prefix = parser.getAttributePrefix(i)
375
                                                String attname = parser.getAttributeLocalName(i)
376
                                                String attvalue = parser.getAttributeValue(i);
377

    
378
                                                if(prefix != "")
379
                                                        writer.writeAttribute(prefix+":"+attname, attvalue);
380
                                                else
381
                                                        writer.writeAttribute(attname, attvalue);
382
                                        }
383

    
384

    
385
                                        break;
386

    
387
                                case XMLStreamConstants.END_ELEMENT:
388
                                        localname = parser.getLocalName()
389
                                        writer.writeEndElement();
390
                                        break;
391

    
392
                                case XMLStreamConstants.CHARACTERS:
393
                                        writer.writeCharacters(parser.getText());
394
                                        break;
395
                        }
396
                }
397
        }
398

    
399

    
400
        /**
401
         * The main method.
402
         *
403
         * @param args the arguments
404
         */
405
        public static void main(String[] args)
406
        {
407
                File homedir = new File(System.getProperty("user.home"),"xml/concattrs/new");
408
                //                File trs1 = new File(homedir, "int17A.trs")
409
                //                File trs2 = new File(homedir, "int17B.trs")
410
                //                File trs3 = new File(homedir, "int17C.trs")
411
                //                File trs4 = new File(homedir, "int17D.trs")
412
                //                File tmp1 = new File(homedir, "int17tmp1.trs")
413
                //                File tmp2 = new File(homedir, "int17tmp2.trs")
414
                //                File trs =  new File(homedir, "int17.trs")
415

    
416
                //                File trs1 = new File(homedir, "int24.trs")
417
                //                File trs2 = new File(homedir, "int24-2.trs")
418
                //                File trs3 = new File(homedir, "int24-3.trs")
419
                //                File trs4 = new File(homedir, "int24-4.trs")
420
                //                File trs5 = new File(homedir, "int24-5.trs")
421
                //                File trs6 = new File(homedir, "int24-6.trs")
422
                //                File tmp1 = new File(homedir, "int24tmp1.trs")
423
                //                File tmp2 = new File(homedir, "int24tmp2.trs")
424
                //                File tmp3 = new File(homedir, "int24tmp3.trs")
425
                //                File tmp4 = new File(homedir, "int24tmp4.trs")
426
                //                File trs =  new File(homedir, "int24-corr.trs")
427

    
428
                File trs1 = new File(homedir, "int14.trs")
429
                File trs2 = new File(homedir, "int14bis.trs")
430
                File trs =  new File(homedir, "int14-corr.trs")
431

    
432
                new ConcatTrs(trs1, trs2, trs)
433

    
434
                ValidateTRS.checkTRS(trs)
435
        }
436
}