Statistics
| Revision:

ccc / projets / CMC2ELAN / src / FixMissingTimings.groovy @ 2

History | View | Annotate | Download (5.8 kB)

1
// Copyright © 2010-2013 ENS de Lyon.
2
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
3
// Lyon 2, University of Franche-Comté, University of Nice
4
// Sophia Antipolis, University of Paris 3.
5
//
6
// The TXM platform is free software: you can redistribute it
7
// and/or modify it under the terms of the GNU General Public
8
// License as published by the Free Software Foundation,
9
// either version 2 of the License, or (at your option) any
10
// later version.
11
//
12
// The TXM platform is distributed in the hope that it will be
13
// useful, but WITHOUT ANY WARRANTY; without even the implied
14
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15
// PURPOSE. See the GNU General Public License for more
16
// details.
17
//
18
// You should have received a copy of the GNU General
19
// Public License along with the TXM platform. If not, see
20
// http://www.gnu.org/licenses.
21

    
22
import java.text.SimpleDateFormat;
23
import java.util.Date;
24

    
25
class FixMissingTimings {
26

    
27
        File trsInFile = new File("/home/mdecorde/xml/transana/out.xml")
28
        File trsOutFile = new File("/home/mdecorde/xml/transana/TrP1S8_08102010.trs")
29

    
30
        public FixMissingTimings(File trsInFile, File trsOutFile) {
31
                this.trsInFile = trsInFile;
32
                this.trsOutFile = trsOutFile;
33
        }
34

    
35
        public boolean process() {
36
                def slurper = new XmlParser();
37
                def trs = slurper.parse(trsInFile)
38
        
39
                String currentEnd = null;
40
                for (def turn : trs.Episode.Section.Turn) {
41
                        def children = turn.children()
42
        
43
                        if (children.size() > 0) {
44
                                //println "fixing turn start="+turn.@startTime+" end="+turn.@endTime+" previous end="+currentEnd
45
                                if (currentEnd != null && currentEnd.length() > 0 && turn.@startTime.length() == 0) { // using last Sync
46
                                        //println "fixing with previous Turn $currentEnd"
47
                                        turn.@startTime = currentEnd
48
                                        setStartTime(children[0], currentEnd)
49
                                } else if (!(children[0] instanceof String)) { // no previous Sync, using next Sync
50
                                        def start = getStartTime(children[0])
51
                                        if (start != null && start.length() > 0)        turn.@startTime = start
52
                                }
53
                                currentEnd = null
54
        
55
                                if (!(children[children.size() - 1] instanceof String)) { // this is the last Sync
56
                                        def end = getStartTime(children[children.size() - 1])
57
                                        if (end != null && end.length() > 0)  {
58
                                                turn.@endTime = end
59
                                                currentEnd = end;
60
                                        }
61
                                }
62
                        }
63
                }
64
        
65
                def nodes = []
66
                // get Sync nodes to fix
67
                for (def section : trs.Episode.Section) { // all Section
68
                        for (def child : section.children()) { // all Section children
69
                                if (!(child instanceof String)) { // is a Tag
70
                                        switch(child.name()) {
71
                                                case "Turn": // get Sync in Turn
72
                                                        //nodes << child;
73
                                                        for (def sync : child.Sync)
74
                                                                nodes << sync
75
                                                        break;
76
                                                case "Sync":
77
                                                        nodes << child;
78
                                                        break;
79
                                                default: break;
80
                                        }
81
                                }
82
                        }
83
                }
84
                
85
                //Fixing Sync@time
86
                for (int i = 0 ; i < nodes.size() ; i++) {
87
                        def node = nodes[i]
88
                        String time = getStartTime(node)        
89
                        
90
                        if (time.length() == 0) {                                
91
                                def list = [];
92
                                String previous;
93
                                if (i > 0) { previous = getStartTime(nodes[i-1]);
94
                                } else { previous = "0.0" }
95
                                
96
                                String next = null;
97
                                while ((next == null || next.length() == 0) && i <= nodes.size()) {
98
                                        list << node
99
                                        node = nodes[++i]
100
                                        next = getStartTime(node)
101
                                }
102
                                if (next != null) {
103
                                        if (node != null) list << node
104
                                        fixSyncTimes(list, previous, next)
105
                                } else {
106
                                        println "ERROR: no end time in the transcription"
107
                                }
108
                        }
109
                }
110
        
111
                // fixing startTime of Turn using next Sync.@time
112
                def turns = trs.Episode.Section.Turn
113
                for (int i = 0 ; i < turns.size() ; i++) {
114
                        def turn = turns[i]
115
                        def syncs = turn.Sync
116
                        if (syncs.size() > 0) {
117
                                turn.@startTime = syncs[0].@time
118
                        } else {
119
                                println "Error: Turn with no Sync milestone"
120
                        }
121
                }
122
        
123
                // fixing endTimes of Turn using next Turn.@startTime
124
                for (int i = 0 ; i < turns.size() ; i++) {
125
                        def turn = turns[i]
126
                        //println "turn start="+turn.@startTime+" end="+turn.@endTime
127
                        if (i < turns.size() - 1) {
128
                                if (turn.@endTime.length() == 0 && turns[i+1].@startTime.length() > 0) {
129
                                        //println "fixing turn.@endTime "+turn.@endTime+" with turns[i+1].@startTime "+(turns[i+1]);
130
                                        turn.@endTime = turns[i+1].@startTime
131
                                } 
132
                        } else {
133
                                Float e = Float.parseFloat(turn.@startTime)
134
                                e += turn.text().length()*1.0f 
135
                                turn.@endTime = e.toString()
136
                        }
137
                }
138
                
139
                // fixing endTimes of Section using last Turn.@endTime
140
                // fixing startTimes of Section using first Turn.@startTime
141
                def sections = trs.Episode.Section
142
                for (int i = 0 ; i < sections.size() ; i++) {
143
                        def t = sections[i].Turn
144
                        if (t.size() == 0) continue;
145
                        sections[i].@startTime = t[0].@startTime
146
                        sections[i].@endTime = t[-1].@endTime
147
                }
148
                
149
                trsOutFile.withWriter("UTF-8") { writer ->
150
                        writer.write('<?xml version="1.0" encoding="UTF-8"?>\n<!DOCTYPE Trans SYSTEM "trans-14.dtd">\n')
151
                        new groovy.util.XmlNodePrinter(new PrintWriter(writer)).print(trs) }
152
                
153
                return true;
154
        }
155
        
156
        def getStartTime(def node) {
157
                def ret = null;
158
                if (node == null) return "0.0"
159
                if (node.name() == null) return "0.0"
160
                switch(node.name()) {
161
                        case "Turn":
162
                        case "Section":
163
                        //                        println "Turn"
164
                                ret = node.@startTime
165
                                break
166
                        case "Sync":
167
                        //                println "Sync"
168
                                ret = node.@time
169
                                break
170
                        default: break;
171
                }
172
                //println "getStartTime "+node.name()+" $ret"
173
                return ret
174
        }
175

    
176
        def setStartTime(def node, def value) {
177
                switch(node.name()) {
178
                        case "Turn":
179
                        case "Section":
180
                                node.@startTime = ""+value
181
                                break;
182
                        
183
                        case "Sync":
184
                                return node.@time = ""+value
185
                        default: break;
186
                }
187
        }
188

    
189
        def fixSyncTimes(def list, def start, def end) {
190
                //println "Nfix: "+list.size()+" "+list
191
                //println "$start $end"
192
                
193
                def startf = Double.parseDouble(start)
194
                def endf = Double.parseDouble(end)
195
                def delta = (endf-startf)/list.size()
196
                //println delta
197
                float c = startf;
198
                for (int i = 0 ; i < list.size() ; i++) {
199
                        c += delta;
200
                        list[i].@time = ""+c
201
                }
202
        }
203
}