Statistics
| Revision:

root / tmp / org.txm.core / src / groovy / org / txm / macro / transcription / FixTransanaTimings.groovy @ 187

History | View | Annotate | Download (6.5 kB)

1
// Copyright © 2010-2013 ENS de Lyon.
2
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
3
// Lyon 2, University of Franche-Comté, University of Nice
4
// Sophia Antipolis, University of Paris 3.
5
//
6
// The TXM platform is free software: you can redistribute it
7
// and/or modify it under the terms of the GNU General Public
8
// License as published by the Free Software Foundation,
9
// either version 2 of the License, or (at your option) any
10
// later version.
11
//
12
// The TXM platform is distributed in the hope that it will be
13
// useful, but WITHOUT ANY WARRANTY; without even the implied
14
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15
// PURPOSE. See the GNU General Public License for more
16
// details.
17
//
18
// You should have received a copy of the GNU General
19
// Public License along with the TXM platform. If not, see
20
// http://www.gnu.org/licenses.
21

    
22
package org.txm.macro.transcription
23

    
24
import java.text.SimpleDateFormat;
25
import java.util.Date;
26

    
27
class FixTransanaTimings {
28

    
29
        File trsInFile = new File("/home/mdecorde/xml/transana/out.xml")
30
        File trsOutFile = new File("/home/mdecorde/xml/transana/TrP1S8_08102010.trs")
31

    
32
        public FixTransanaTimings(File trsInFile, File trsOutFile) {
33
                this.trsInFile = trsInFile;
34
                this.trsOutFile = trsOutFile;
35
        }
36

    
37
        public boolean process() {
38
                def slurper = new XmlParser();
39
                def trs = slurper.parse(trsInFile)
40

    
41
                def speakers = new HashSet<String>();
42
                for (def turn : trs.Episode.Section.Turn) {
43
                        String tmp = turn.@speaker.toUpperCase();
44
                        speakers << tmp
45
                        turn.@speaker = tmp
46
                }
47
                for (def speakersNode : trs.Speakers) {
48
                        for (String spk : speakers) {
49
                                def speakerNode = new Node(speakersNode, 'Speaker',
50
                                [id:spk, name:spk, type:'unknown', check:'yes', dialect:'native', accent:'no', scope:'local'])
51
                        }
52
                        def speakerNode = new Node(speakersNode, 'Speaker',
53
                                [id:"none", name:"none", type:'unknown', check:'yes', dialect:'native', accent:'no', scope:'local'])
54
                }
55
                
56
                String currentEnd = null;
57
                for (def turn : trs.Episode.Section.Turn) {
58
                        def children = turn.children()
59
        
60
                        if (children.size() > 0) {
61
                                //println "fixing turn start="+turn.@startTime+" end="+turn.@endTime+" previous end="+currentEnd
62
                                if (currentEnd != null && currentEnd.length() > 0 && turn.@startTime.length() == 0) { // using last Sync
63
                                        //println "fixing with previous Turn $currentEnd"
64
                                        turn.@startTime = currentEnd
65
                                        setStartTime(children[0], currentEnd)
66
                                } else if (!(children[0] instanceof String)) { // no previous Sync, using next Sync
67
                                        def start = getStartTime(children[0])
68
                                        if (start != null && start.length() > 0)        turn.@startTime = start
69
                                }
70
                                currentEnd = null
71
        
72
                                if (!(children[children.size() - 1] instanceof String)) { // this is the last Sync
73
                                        def end = getStartTime(children[children.size() - 1])
74
                                        if (end != null && end.length() > 0)  {
75
                                                turn.@endTime = end
76
                                                currentEnd = end;
77
                                        }
78
                                }
79
                        }
80
                }
81
        
82
                def nodes = []
83
                // get Sync nodes to fix
84
                for (def section : trs.Episode.Section) { // all Section
85
                        for (def child : section.children()) { // all Section children
86
                                if (!(child instanceof String)) { // is a Tag
87
                                        switch(child.name()) {
88
                                                case "Turn": // get Sync in Turn
89
                                                        //nodes << child;
90
                                                        for (def sync : child.Sync)
91
                                                                nodes << sync
92
                                                        break;
93
                                                case "Sync":
94
                                                        nodes << child;
95
                                                        break;
96
                                                default: break;
97
                                        }
98
                                }
99
                        }
100
                }
101
                
102
                //Fixing Sync@time
103
                for (int i = 0 ; i < nodes.size() ; i++) {
104
                        def node = nodes[i]
105
                        String time = getStartTime(node)        
106
                        
107
                        if (time.length() == 0) {                                
108
                                def list = [];
109
                                String previous;
110
                                if (i > 0) { previous = getStartTime(nodes[i-1]);
111
                                } else { previous = "0.0" }
112
                                
113
                                String next = null;
114
                                while ((next == null || next.length() == 0) && i <= nodes.size()) {
115
                                        list << node
116
                                        node = nodes[++i]
117
                                        next = getStartTime(node)
118
                                }
119
                                if (next != null) {
120
                                        if (node != null) list << node
121
                                        fixSyncTimes(list, previous, next)
122
                                } else {
123
                                        println "ERROR: no end time in the transcription"
124
                                }
125
                        }
126
                }
127
        
128
                // fixing startTime of Turn using next Sync.@time
129
                def turns = trs.Episode.Section.Turn
130
                for (int i = 0 ; i < turns.size() ; i++) {
131
                        def turn = turns[i]
132
                        def syncs = turn.Sync
133
                        if (syncs.size() > 0) {
134
                                turn.@startTime = syncs[0].@time
135
                        } else {
136
                                println "Error: Turn with no Sync milestone"
137
                        }
138
                }
139
        
140
                // fixing endTimes of Turn using next Turn.@startTime
141
                for (int i = 0 ; i < turns.size() ; i++) {
142
                        def turn = turns[i]
143
                        //println "turn start="+turn.@startTime+" end="+turn.@endTime
144
                        if (i < turns.size() - 1) {
145
                                if (turn.@endTime.length() == 0 && turns[i+1].@startTime.length() > 0) {
146
                                        //println "fixing turn.@endTime "+turn.@endTime+" with turns[i+1].@startTime "+(turns[i+1]);
147
                                        turn.@endTime = turns[i+1].@startTime
148
                                }
149
                        }  else if (turn.@endTime.length() == 0) { // last turn has no timing, aproximate it 
150
                                Float e = Float.parseFloat(turn.@startTime)
151
                                e += turn.text().length()*1.0f 
152
                                turn.@endTime = e.toString()
153
                        }
154
                }
155
                
156
                // fixing endTimes of Section using last Turn.@endTime
157
                // fixing startTimes of Section using first Turn.@startTime
158
                def sections = trs.Episode.Section
159
                for (int i = 0 ; i < sections.size() ; i++) {
160
                        def t = sections[i].Turn
161
                        if (t.size() == 0) continue;
162
                        sections[i].@startTime = t[0].@startTime
163
                        sections[i].@endTime = t[-1].@endTime
164
                }
165
                
166
                trsOutFile.withWriter("UTF-8") { writer ->
167
                        writer.write('<?xml version="1.0" encoding="UTF-8"?>\n<!DOCTYPE Trans SYSTEM "trans-14.dtd">\n')
168
                        new groovy.util.XmlNodePrinter(new PrintWriter(writer)).print(trs) }
169
                
170
                return true;
171
        }
172
        
173
        def getStartTime(def node) {
174
                def ret = null;
175
                if (node == null) return "0.0"
176
                if (node.name() == null) return "0.0"
177
                switch(node.name()) {
178
                        case "Turn":
179
                        case "Section":
180
                        //                        println "Turn"
181
                                ret = node.@startTime
182
                                break
183
                        case "Sync":
184
                        //                println "Sync"
185
                                ret = node.@time
186
                                break
187
                        default: break;
188
                }
189
                //println "getStartTime "+node.name()+" $ret"
190
                return ret
191
        }
192

    
193
        def setStartTime(def node, def value) {
194
                switch(node.name()) {
195
                        case "Turn":
196
                        case "Section":
197
                                node.@startTime = ""+value
198
                                break;
199
                        
200
                        case "Sync":
201
                                return node.@time = ""+value
202
                        default: break;
203
                }
204
        }
205

    
206
        def fixSyncTimes(def list, def start, def end) {
207
                //println "Nfix: "+list.size()+" "+list
208
                //println "$start $end"
209
                
210
                def startf = Double.parseDouble(start)
211
                def endf = Double.parseDouble(end)
212
                def delta = (endf-startf)/list.size()
213
                //println delta
214
                float c = startf;
215
                for (int i = 0 ; i < list.size() ; i++) {
216
                        c += delta;
217
                        list[i].@time = ""+c
218
                }
219
        }
220
}