Statistics
| Revision:

root / tmp / org.txm.groovy.core / src / groovy / org / txm / macro / transcription / FixTransanaTimings.groovy @ 499

History | View | Annotate | Download (6.5 kB)

1 321 mdecorde
// Copyright © 2010-2013 ENS de Lyon.
2 321 mdecorde
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
3 321 mdecorde
// Lyon 2, University of Franche-Comté, University of Nice
4 321 mdecorde
// Sophia Antipolis, University of Paris 3.
5 321 mdecorde
//
6 321 mdecorde
// The TXM platform is free software: you can redistribute it
7 321 mdecorde
// and/or modify it under the terms of the GNU General Public
8 321 mdecorde
// License as published by the Free Software Foundation,
9 321 mdecorde
// either version 2 of the License, or (at your option) any
10 321 mdecorde
// later version.
11 321 mdecorde
//
12 321 mdecorde
// The TXM platform is distributed in the hope that it will be
13 321 mdecorde
// useful, but WITHOUT ANY WARRANTY; without even the implied
14 321 mdecorde
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 321 mdecorde
// PURPOSE. See the GNU General Public License for more
16 321 mdecorde
// details.
17 321 mdecorde
//
18 321 mdecorde
// You should have received a copy of the GNU General
19 321 mdecorde
// Public License along with the TXM platform. If not, see
20 321 mdecorde
// http://www.gnu.org/licenses.
21 321 mdecorde
22 321 mdecorde
package org.txm.macro.transcription
23 321 mdecorde
24 321 mdecorde
import java.text.SimpleDateFormat;
25 321 mdecorde
import java.util.Date;
26 321 mdecorde
27 321 mdecorde
class FixTransanaTimings {
28 321 mdecorde
29 321 mdecorde
        File trsInFile = new File("/home/mdecorde/xml/transana/out.xml")
30 321 mdecorde
        File trsOutFile = new File("/home/mdecorde/xml/transana/TrP1S8_08102010.trs")
31 321 mdecorde
32 321 mdecorde
        public FixTransanaTimings(File trsInFile, File trsOutFile) {
33 321 mdecorde
                this.trsInFile = trsInFile;
34 321 mdecorde
                this.trsOutFile = trsOutFile;
35 321 mdecorde
        }
36 321 mdecorde
37 321 mdecorde
        public boolean process() {
38 321 mdecorde
                def slurper = new XmlParser();
39 321 mdecorde
                def trs = slurper.parse(trsInFile)
40 321 mdecorde
41 321 mdecorde
                def speakers = new HashSet<String>();
42 321 mdecorde
                for (def turn : trs.Episode.Section.Turn) {
43 321 mdecorde
                        String tmp = turn.@speaker.toUpperCase();
44 321 mdecorde
                        speakers << tmp
45 321 mdecorde
                        turn.@speaker = tmp
46 321 mdecorde
                }
47 321 mdecorde
                for (def speakersNode : trs.Speakers) {
48 321 mdecorde
                        for (String spk : speakers) {
49 321 mdecorde
                                def speakerNode = new Node(speakersNode, 'Speaker',
50 321 mdecorde
                                [id:spk, name:spk, type:'unknown', check:'yes', dialect:'native', accent:'no', scope:'local'])
51 321 mdecorde
                        }
52 321 mdecorde
                        def speakerNode = new Node(speakersNode, 'Speaker',
53 321 mdecorde
                                [id:"none", name:"none", type:'unknown', check:'yes', dialect:'native', accent:'no', scope:'local'])
54 321 mdecorde
                }
55 321 mdecorde
56 321 mdecorde
                String currentEnd = null;
57 321 mdecorde
                for (def turn : trs.Episode.Section.Turn) {
58 321 mdecorde
                        def children = turn.children()
59 321 mdecorde
60 321 mdecorde
                        if (children.size() > 0) {
61 321 mdecorde
                                //println "fixing turn start="+turn.@startTime+" end="+turn.@endTime+" previous end="+currentEnd
62 321 mdecorde
                                if (currentEnd != null && currentEnd.length() > 0 && turn.@startTime.length() == 0) { // using last Sync
63 321 mdecorde
                                        //println "fixing with previous Turn $currentEnd"
64 321 mdecorde
                                        turn.@startTime = currentEnd
65 321 mdecorde
                                        setStartTime(children[0], currentEnd)
66 321 mdecorde
                                } else if (!(children[0] instanceof String)) { // no previous Sync, using next Sync
67 321 mdecorde
                                        def start = getStartTime(children[0])
68 321 mdecorde
                                        if (start != null && start.length() > 0)        turn.@startTime = start
69 321 mdecorde
                                }
70 321 mdecorde
                                currentEnd = null
71 321 mdecorde
72 321 mdecorde
                                if (!(children[children.size() - 1] instanceof String)) { // this is the last Sync
73 321 mdecorde
                                        def end = getStartTime(children[children.size() - 1])
74 321 mdecorde
                                        if (end != null && end.length() > 0)  {
75 321 mdecorde
                                                turn.@endTime = end
76 321 mdecorde
                                                currentEnd = end;
77 321 mdecorde
                                        }
78 321 mdecorde
                                }
79 321 mdecorde
                        }
80 321 mdecorde
                }
81 321 mdecorde
82 321 mdecorde
                def nodes = []
83 321 mdecorde
                // get Sync nodes to fix
84 321 mdecorde
                for (def section : trs.Episode.Section) { // all Section
85 321 mdecorde
                        for (def child : section.children()) { // all Section children
86 321 mdecorde
                                if (!(child instanceof String)) { // is a Tag
87 321 mdecorde
                                        switch(child.name()) {
88 321 mdecorde
                                                case "Turn": // get Sync in Turn
89 321 mdecorde
                                                        //nodes << child;
90 321 mdecorde
                                                        for (def sync : child.Sync)
91 321 mdecorde
                                                                nodes << sync
92 321 mdecorde
                                                        break;
93 321 mdecorde
                                                case "Sync":
94 321 mdecorde
                                                        nodes << child;
95 321 mdecorde
                                                        break;
96 321 mdecorde
                                                default: break;
97 321 mdecorde
                                        }
98 321 mdecorde
                                }
99 321 mdecorde
                        }
100 321 mdecorde
                }
101 321 mdecorde
102 321 mdecorde
                //Fixing Sync@time
103 321 mdecorde
                for (int i = 0 ; i < nodes.size() ; i++) {
104 321 mdecorde
                        def node = nodes[i]
105 321 mdecorde
                        String time = getStartTime(node)
106 321 mdecorde
107 321 mdecorde
                        if (time.length() == 0) {
108 321 mdecorde
                                def list = [];
109 321 mdecorde
                                String previous;
110 321 mdecorde
                                if (i > 0) { previous = getStartTime(nodes[i-1]);
111 321 mdecorde
                                } else { previous = "0.0" }
112 321 mdecorde
113 321 mdecorde
                                String next = null;
114 321 mdecorde
                                while ((next == null || next.length() == 0) && i <= nodes.size()) {
115 321 mdecorde
                                        list << node
116 321 mdecorde
                                        node = nodes[++i]
117 321 mdecorde
                                        next = getStartTime(node)
118 321 mdecorde
                                }
119 321 mdecorde
                                if (next != null) {
120 321 mdecorde
                                        if (node != null) list << node
121 321 mdecorde
                                        fixSyncTimes(list, previous, next)
122 321 mdecorde
                                } else {
123 321 mdecorde
                                        println "ERROR: no end time in the transcription"
124 321 mdecorde
                                }
125 321 mdecorde
                        }
126 321 mdecorde
                }
127 321 mdecorde
128 321 mdecorde
                // fixing startTime of Turn using next Sync.@time
129 321 mdecorde
                def turns = trs.Episode.Section.Turn
130 321 mdecorde
                for (int i = 0 ; i < turns.size() ; i++) {
131 321 mdecorde
                        def turn = turns[i]
132 321 mdecorde
                        def syncs = turn.Sync
133 321 mdecorde
                        if (syncs.size() > 0) {
134 321 mdecorde
                                turn.@startTime = syncs[0].@time
135 321 mdecorde
                        } else {
136 321 mdecorde
                                println "Error: Turn with no Sync milestone"
137 321 mdecorde
                        }
138 321 mdecorde
                }
139 321 mdecorde
140 321 mdecorde
                // fixing endTimes of Turn using next Turn.@startTime
141 321 mdecorde
                for (int i = 0 ; i < turns.size() ; i++) {
142 321 mdecorde
                        def turn = turns[i]
143 321 mdecorde
                        //println "turn start="+turn.@startTime+" end="+turn.@endTime
144 321 mdecorde
                        if (i < turns.size() - 1) {
145 321 mdecorde
                                if (turn.@endTime.length() == 0 && turns[i+1].@startTime.length() > 0) {
146 321 mdecorde
                                        //println "fixing turn.@endTime "+turn.@endTime+" with turns[i+1].@startTime "+(turns[i+1]);
147 321 mdecorde
                                        turn.@endTime = turns[i+1].@startTime
148 321 mdecorde
                                }
149 321 mdecorde
                        }  else if (turn.@endTime.length() == 0) { // last turn has no timing, aproximate it
150 321 mdecorde
                                Float e = Float.parseFloat(turn.@startTime)
151 321 mdecorde
                                e += turn.text().length()*1.0f
152 321 mdecorde
                                turn.@endTime = e.toString()
153 321 mdecorde
                        }
154 321 mdecorde
                }
155 321 mdecorde
156 321 mdecorde
                // fixing endTimes of Section using last Turn.@endTime
157 321 mdecorde
                // fixing startTimes of Section using first Turn.@startTime
158 321 mdecorde
                def sections = trs.Episode.Section
159 321 mdecorde
                for (int i = 0 ; i < sections.size() ; i++) {
160 321 mdecorde
                        def t = sections[i].Turn
161 321 mdecorde
                        if (t.size() == 0) continue;
162 321 mdecorde
                        sections[i].@startTime = t[0].@startTime
163 321 mdecorde
                        sections[i].@endTime = t[-1].@endTime
164 321 mdecorde
                }
165 321 mdecorde
166 321 mdecorde
                trsOutFile.withWriter("UTF-8") { writer ->
167 321 mdecorde
                        writer.write('<?xml version="1.0" encoding="UTF-8"?>\n<!DOCTYPE Trans SYSTEM "trans-14.dtd">\n')
168 321 mdecorde
                        new groovy.util.XmlNodePrinter(new PrintWriter(writer)).print(trs) }
169 321 mdecorde
170 321 mdecorde
                return true;
171 321 mdecorde
        }
172 321 mdecorde
173 321 mdecorde
        def getStartTime(def node) {
174 321 mdecorde
                def ret = null;
175 321 mdecorde
                if (node == null) return "0.0"
176 321 mdecorde
                if (node.name() == null) return "0.0"
177 321 mdecorde
                switch(node.name()) {
178 321 mdecorde
                        case "Turn":
179 321 mdecorde
                        case "Section":
180 321 mdecorde
                        //                        println "Turn"
181 321 mdecorde
                                ret = node.@startTime
182 321 mdecorde
                                break
183 321 mdecorde
                        case "Sync":
184 321 mdecorde
                        //                println "Sync"
185 321 mdecorde
                                ret = node.@time
186 321 mdecorde
                                break
187 321 mdecorde
                        default: break;
188 321 mdecorde
                }
189 321 mdecorde
                //println "getStartTime "+node.name()+" $ret"
190 321 mdecorde
                return ret
191 321 mdecorde
        }
192 321 mdecorde
193 321 mdecorde
        def setStartTime(def node, def value) {
194 321 mdecorde
                switch(node.name()) {
195 321 mdecorde
                        case "Turn":
196 321 mdecorde
                        case "Section":
197 321 mdecorde
                                node.@startTime = ""+value
198 321 mdecorde
                                break;
199 321 mdecorde
200 321 mdecorde
                        case "Sync":
201 321 mdecorde
                                return node.@time = ""+value
202 321 mdecorde
                        default: break;
203 321 mdecorde
                }
204 321 mdecorde
        }
205 321 mdecorde
206 321 mdecorde
        def fixSyncTimes(def list, def start, def end) {
207 321 mdecorde
                //println "Nfix: "+list.size()+" "+list
208 321 mdecorde
                //println "$start $end"
209 321 mdecorde
210 321 mdecorde
                def startf = Double.parseDouble(start)
211 321 mdecorde
                def endf = Double.parseDouble(end)
212 321 mdecorde
                def delta = (endf-startf)/list.size()
213 321 mdecorde
                //println delta
214 321 mdecorde
                float c = startf;
215 321 mdecorde
                for (int i = 0 ; i < list.size() ; i++) {
216 321 mdecorde
                        c += delta;
217 321 mdecorde
                        list[i].@time = ""+c
218 321 mdecorde
                }
219 321 mdecorde
        }
220 321 mdecorde
}