Statistics
| Revision:

root / tmp / org.txm.groovy.core / src / groovy / org / txm / scripts / importer / transcriber / TRS2SRT.groovy @ 1688

History | View | Annotate | Download (5.7 kB)

1
package org.txm.scripts.importer.transcriber
2
/**
3
 * Converts a file from Transcriber XML to STR subtitle file
4
 * 
5
 * @author mdecorde
6
 *
7
 */
8

    
9

    
10
import java.util.ArrayList
11

    
12
import javax.xml.parsers.*
13
import javax.xml.stream.*
14

    
15
import java.net.URL
16

    
17
class TRS2SRT {
18

    
19
        private def url
20
        private def inputData
21
        private def factory
22
        private XMLStreamReader parser
23
        def writer
24

    
25
        def colors = ["white", "red", "blue", "green", "yellow", "grey", "violet", "pink", "orange", "brown"];
26
        def icolor = 0;
27

    
28
        File trsFile;
29
        boolean debug = false
30

    
31
        public TRS2SRT(File trsFile) {
32
                inputData = trsFile.toURI().toURL().openStream()
33
                factory = XMLInputFactory.newInstance()
34
                parser = factory.createXMLStreamReader(inputData)
35

    
36
                this.trsFile = trsFile
37
        }
38

    
39
        /**
40
         * Transform the trsFile to STR format
41
         * 
42
         * @param srtFile: result file
43
         */
44
        public void process(File srtFile) {
45
                writer = srtFile.newWriter("UTF-8")
46
                for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) {
47
                        switch (event) {
48
                                case XMLStreamConstants.START_ELEMENT:
49
                                        processStartElement()
50
                                        break;
51
                                case XMLStreamConstants.END_ELEMENT:
52
                                        processEndElement()
53
                                        break;
54
                                case XMLStreamConstants.CHARACTERS:
55
                                        processText()
56
                                        break;
57
                        }
58
                }
59
                writer.close()
60
                if (parser != null) parser.close();
61
                if (inputData != null) inputData.close();
62
        }
63

    
64
        def speakers = [:]
65
        def currentSpeakers = []
66
        def currentSpeaker = "";
67
        def currentSpeach = ""
68
        def currentStartTime = ""
69
        def currentEndTime = ""
70
        def inTurn = false
71
        def nTurn = 1
72
        def syncCounter = 0;
73
        def whoCounter = 0;
74

    
75
        protected void processStartElement() {
76
                String localname = parser.getLocalName()
77
                switch (localname) {
78
                        case "Speaker":
79
                                def info = [:]
80
                                speakers[parser.getAttributeValue(null, "id")] = info
81
                                info["name"] = parser.getAttributeValue(null, "name")
82
                                info["color"] = colors[icolor]
83
                                info["count"] = 0;
84
                                icolor++;
85
                                break;
86
                        case "Episode":
87
                                if (debug) println "speakers $speakers"
88
                                break;
89
                        case "Turn":
90
                                if (debug) println "Turn!"
91
                                currentSpeakers = parser.getAttributeValue(null, "speaker").split(" ")
92
                                currentSpeaker = currentSpeakers[0]
93
                                currentSpeach = ""
94
                                String s = parser.getAttributeValue(null, "startTime")
95
                                String e = parser.getAttributeValue(null, "endTime")
96
                                currentStartTime = Float.parseFloat(s)
97
                                if (e != null) currentEndTime = Float.parseFloat(e)
98
                                else currentEndTime = currentStartTime+3.0; // temporary fix
99
                                inTurn = true
100
                                syncCounter = 0;
101
                                whoCounter = 0;
102
                                break;
103
                        case "Event":
104
                        if (debug) println "Event!"
105
                                currentSpeach += " ["+parser.getAttributeValue(null, "desc")+"] "
106
                                break;
107
                        case "Sync": // cut a Turn, the Turn@endTime must be replace with Sync@time
108
                                if (debug) println "Sync!"
109
                                syncCounter++; 
110
                                if (syncCounter > 1) { // ignore first 'Sync', there is no speech to write
111
                                        def end = currentEndTime
112
                                        currentEndTime = Float.parseFloat(parser.getAttributeValue(null, "time"))
113
                                        writeSRTTurn() // write previous Turn, so currentEndTime is the Sync@time
114
                                        currentEndTime = end; // restore Turn@endTime
115
                                }
116
                                break;
117
                        case "Who":
118
                                if (debug) println "Who!"
119
                                whoCounter++;
120
                                if (whoCounter > 1) { // ignore first 'Who', there is no speach to write
121
                                        int n = Integer.parseInt(parser.getAttributeValue(null, "nb")) - 1
122
                                        if (currentSpeakers.size() <= n) {
123
                                                println "'Who@nb' Error at "+parser.getLocation()
124
                                                break;
125
                                        }
126
                                        
127
                                        writeSRTTurn() // write previous speach                
128
                                        // switch current speaker
129
                                        currentSpeaker = currentSpeakers[Integer.parseInt(parser.getAttributeValue(null, "nb")) - 1]
130
                                }
131
                                break;
132
                }
133
        }
134

    
135
        protected void processEndElement() {
136
                String localname = parser.getLocalName()
137
                switch (localname) {
138
                        case "Speaker":
139
                                break;
140
                        case "Turn":
141
                                inTurn = false
142
                                writeSRTTurn()
143
                                break;
144
                        case "Sync":
145
                                break;
146
                        case "Who":
147
                                break;
148
                }
149
        }
150

    
151
        protected writeSRTTurn() {
152
                currentSpeach = currentSpeach.trim()
153

    
154
                if (currentSpeach.length() == 0) return; // nothing to write
155

    
156
                //println "Writing Turn of '$currentSpeaker': "+speakers[currentSpeaker]
157
                def color = speakers[currentSpeaker]["color"]
158

    
159
                if (speakers[currentSpeaker]["count"] < 2) {
160
                        speakers[currentSpeaker]["count"] = speakers[currentSpeaker]["count"] + 1;
161
                        currentSpeach = speakers[currentSpeaker]["name"]+"\t"+currentSpeach
162
                }
163

    
164
                currentSpeach = "<font color=\"$color\">$currentSpeach</font>".replaceAll("\n", " ")
165

    
166
                def s = formatTime(currentStartTime)
167
                def e = formatTime(currentEndTime)
168
                currentStartTime = currentEndTime
169
                writer.println """
170
$nTurn
171
$s --> $e
172
$currentSpeach"""
173
                nTurn++
174
                currentSpeach = "" // reset speach
175
        }
176

    
177
        protected void processText() {
178
                String txt = parser.getText();
179
                if (inTurn) {
180
                        currentSpeach += txt
181
                }
182
        }
183

    
184
        private String formatTime(float time) {
185
                String rez = " ";
186

    
187
                int ms = (time - (int)time) * 1000
188

    
189
                float h = time / 3600;
190
                time = time%3600;
191

    
192
                float min = (time%3600) / 60;
193
                int sec = (int)time%60
194

    
195
                if (min < 10)
196
                        rez = ""+(int)h+":0"+(int)min;//+":"+time%60;
197
                else
198
                        rez = ""+(int)h+":"+(int)min;//+":"+time%60;
199

    
200
                if (sec >= 10)
201
                        rez += ":"+sec;
202
                else
203
                        rez += ":0"+sec;
204

    
205
                rez += "."+ms
206
                return rez;
207
        }
208

    
209
        public static void main(String[] args) {
210
                if (args.length == 0) {
211
                        println "Usage:"
212
                        println "java -jar TRS2SRT.jar file1 file2 file3"
213
                        println "\nResult files are saved in the save directory as fileN files."
214
                }
215
        //        args = ["/home/mdecorde/CCC/RECETTE/TRS2SRT/victor/pazinterior_ok.trs"]
216
                for (String path : args) {
217
                        println "Processing path=$path ..."
218
                        
219
                        File trsFile = new File(path)
220
                        String name = trsFile.getName()
221
                        name = name.substring(0, name.lastIndexOf("."))
222
                        File srtFile = new File(trsFile.getParentFile(), name+".srt")
223
                        TRS2SRT t = new TRS2SRT(trsFile)
224
                        t.process(srtFile)
225
                }
226
        }
227
}