Statistics
| Revision:

root / tmp / org.txm.core / src / groovy / org / txm / importer / transcriber / FixTiming.groovy @ 187

History | View | Annotate | Download (6.6 kB)

1
// Copyright © - ENS de Lyon - http://textometrie.ens-lyon.fr
2
//
3
// This file is part of the TXM platform.
4
//
5
// The TXM platform is free software: you can redistribute it and/or modify
6
// it under the terms of the GNU General Public License as published by
7
// the Free Software Foundation, either version 3 of the License, or
8
// (at your option) any later version.
9
//
10
// The TXM platform is distributed in the hope that it will be useful,
11
// but WITHOUT ANY WARRANTY; without even the implied warranty of
12
// MERCHANTABILITY or FITNESS  FOR A PARTICULAR PURPOSE.  See the
13
// GNU General Public License for more details.
14
//
15
// You should have received a copy of the GNU General Public License
16
// along with the TXM platform.  If not, see <http://www.gnu.org/licenses/>.
17
//
18
// $LastChangedDate: 2011-11-01 16:12:36 +0100 (mar., 01 nov. 2011) $
19
// $LastChangedRevision: 2049 $
20
// $LastChangedBy: sheiden $
21
//
22

    
23
package org.txm.importer.transcriber
24
import java.text.DecimalFormat;
25
// parameters
26

    
27
String userdir = System.getProperty("user.home")
28
File infile = new File(userdir, "xml/concattrs/Reçues/int23.trs")
29
File outfile = new File(userdir, "xml/concattrs/Reçues/int23-corr.trs")
30

    
31
public boolean checkSpk(String spk) {
32
        return spk.matches(ValidateTRS.spkPattern);
33
}
34

    
35
public fixTopics(def trs) {
36
        
37
        def okTopics = ValidateTRS.okTopics;
38
        
39
        // get Topic ids
40
        def ids2desc = [:];
41
        def declaredTopics = trs.Topics.Topic;
42
        for (def topic : declaredTopics) {
43
                ids2desc[topic.@id]=topic.@desc
44
        }
45
        
46
        // fix Sections
47
        for(def section : trs.Episode.Section) {
48

    
49
                if (section.@topic != null)
50
                if (okTopics.contains(ids2desc[section.@topic])) {
51
                        section.@topic = ids2desc[section.@topic]
52
                }
53
        }
54
        
55
        // fix Section decl
56
        for(def topic : trs.Topics.Topic) {
57
                if (okTopics.contains(ids2desc[topic.@id])) {
58
                        topic.@id = topic.@desc
59
                }
60
        }
61
}
62

    
63
def fixTurn(prevTurn, turn) // end < start
64
{
65
        def start =  Float.parseFloat(turn.'@startTime')
66
        def startS = turn.'@startTime'
67
        def end =  Float.parseFloat(turn.'@endTime')
68
        def endS = turn.'@endTime'
69

    
70
        // patch previous turn
71
        if (prevTurn != null) {
72
                def prevstart = Float.parseFloat(prevTurn.'@startTime')
73
                if (prevstart > end) {
74
                        println "** FixTiming: Warning: previous turn not updated ($prevstart > $end): $prevTurn"
75
                } else {
76
                        prevTurn.'@endTime' = endS;
77
                }
78
        } else {
79
                turn.parent().'@startTime' = endS
80
        }
81

    
82
        // patch current turn
83
        turn.'@startTime' = endS;
84
        turn.'@endTime' = startS;
85

    
86
        // patch next turn
87
        def i = turn.parent().children().indexOf(turn)
88
        if (turn.parent().children().size() > i+1) {
89
                def nextTurn = turn.parent().children().get(i+1)
90
                if (nextTurn != null) {
91
                        def nextend = Float.parseFloat(nextTurn.'@endTime')
92
                        nextTurn.'@startTime' = startS; // patch next Turn even if nextTurn.end < start, loop will continue to patch next turns
93
                }
94
        } else {
95
                turn.parent().'@endTime' = startS
96
        }
97
        println "** FixTiming: fixed Turn: old time[$startS, $endS] to ["+turn.@startTime+", "+turn.@endTime+"]"
98
}
99

    
100
def fixTurn2(turn, timeResolution) // start == end
101
{
102
        DecimalFormat formater = DecimalFormat.getInstance(Locale.ENGLISH)
103
        formater.setMaximumFractionDigits(3)
104
        //DecimalFormat formater = new DecimalFormat("#.000");
105
        def start =  Float.parseFloat(turn.'@startTime')
106
        def startS = turn.'@startTime'
107
        def end =  Float.parseFloat(turn.'@endTime')+timeResolution*10
108
        def endS = formater.format(end)
109
        println "ENDS: "+endS
110

    
111
        // patch current turn
112
        turn.'@endTime' = endS;
113

    
114
        // patch next turn
115
        def i = turn.parent().children().indexOf(turn)
116
        if (turn.parent().children().size() > i+1) {
117
                def nextTurn = turn.parent().children().get(i+1)
118
                if (nextTurn != null) {
119
                        def nextstart = Float.parseFloat(nextTurn.'@startTime')
120
                        nextTurn.'@startTime' = endS; // patch next Turn even if nextTurn.end < start, loop will continue to patch next turns
121
                }
122
        } else {
123
                turn.parent().'@endTime' = endS
124
        }
125

    
126
        println "** FixTiming: fixed Turn: equal old time[$startS, $endS] to ["+turn.@startTime+", "+turn.@endTime+"]"
127
}
128

    
129
def fixSync(turn)
130
{
131
        def start =  Float.parseFloat(turn.'@startTime')
132
        def end =  Float.parseFloat(turn.'@endTime')
133
        def syncs = turn.Sync
134
        def badSyncList = []
135

    
136
        def lastValidSyncTime = start
137
        def firstSync = true
138

    
139
        for (def sync : syncs) {
140
                if (firstSync) {
141
                        if (sync.@time != turn.'@startTime') {
142
                                println "** FixTiming: fixed first Sync: old time "+sync.@time+" to "+turn.'@startTime'
143
                        }
144
                        sync.@time = turn.'@startTime'
145
                        lastValidSyncTime = start
146
                        firstSync = false
147
                } else {
148
                        def time = Float.parseFloat(sync.@time)
149
                        if (time > lastValidSyncTime && time < end) {
150
                                fixBadSyncList(badSyncList, lastValidSyncTime, time)
151
                                lastValidSyncTime = time
152
                                badSyncList = []
153
                        } else {
154
                                badSyncList << sync
155
                        }
156
                }
157
        }
158
        fixBadSyncList(badSyncList, lastValidSyncTime, end)
159
}
160

    
161
def fixBadSyncList(list, start, end) {
162
        def timeResolution = 0.001
163
        if (start >= end) {
164
                println("** FixTiming: fixBadSyncList: start($start) >= end($end)")
165
                return
166
        }
167
        if (list.size() > 0) {
168
                def dt = (end - start)/(list.size()+1)
169
                if (dt < timeResolution) {
170
                        println("** FixTiming: fixBadSyncList: dt($dt) < $timeResolution seconds")
171
                        return
172
                }
173
                def st = start
174
                list.each { sync ->
175
                        st += dt
176
                        def oldtime = sync.@time
177
                        sync.@time = ""+st
178
                        println "** FixTiming: fixed Sync: old time $oldtime to $st"
179
                }
180
        }
181
}
182

    
183
public def fixTRS(File infile, File outfile) {
184
        def timeResolution = 0.001
185
        URL u = infile.toURI().toURL()
186
        InputStream ins = u.openStream()
187

    
188
        // Open input file
189
        def slurper = new XmlParser();
190
        def trs = slurper.parse(infile.toURI().toString())
191

    
192
        // First fix all <Turn>
193
        def prevTurn = null
194
        for (def section : trs.Episode.Section) {
195
                section.Turn.each{ turn ->
196
                        def start = Float.parseFloat(turn.'@startTime')
197
                        def end =  Float.parseFloat(turn.'@endTime')
198
                        if (end < start) {
199
                                fixTurn(prevTurn, turn)
200
                        } else if (start == end) {
201
                                fixTurn2(turn, timeResolution)
202
                        }
203
                        prevTurn = turn
204
                        //} catch (Exception e){ org.txm.utils.logger.Log.printStackTrace(e); println "start: "+turn.'@startTime' println "end: "+turn.'@endTime'}
205
                }
206
        }
207

    
208
        // Then fix all <Sync>s of Turns
209
        for (def section : trs.Episode.Section) {
210
                section.Turn.each{ turn ->
211
                        fixSync(turn)
212
                }
213
        }
214
        
215
        // Finally fix topics declarations
216
        fixTopics(trs);
217

    
218
        String xml = "";
219

    
220
        println ""+xml
221
        outfile.withWriter("UTF-8"){ writer ->
222
                writer.write('<?xml version="1.0" encoding="UTF-8"?>\n<!DOCTYPE Trans SYSTEM "trans-14.dtd">\n')
223
                new groovy.util.XmlNodePrinter(new PrintWriter(writer)).print(trs) }
224
}
225

    
226
/// MAIN ///
227
fixTRS(infile, outfile);