Statistics
| Revision:

ccc / projets / TRS2SRT-standalone / src / org / txm / importer / transcriber / TRS2SRT.groovy @ 4

History | View | Annotate | Download (5.6 kB)

1 1 mdecorde
package org.txm.importer.transcriber
2 1 mdecorde
/**
3 1 mdecorde
 * Converts a file from Transcriber XML to STR subtitle file
4 1 mdecorde
 *
5 1 mdecorde
 * @author mdecorde
6 1 mdecorde
 *
7 1 mdecorde
 */
8 1 mdecorde
9 1 mdecorde
10 1 mdecorde
import java.util.ArrayList
11 1 mdecorde
12 1 mdecorde
import javax.xml.parsers.*
13 1 mdecorde
import javax.xml.stream.*
14 1 mdecorde
15 1 mdecorde
import java.net.URL
16 1 mdecorde
17 1 mdecorde
class TRS2SRT {
18 1 mdecorde
19 1 mdecorde
        private def url
20 1 mdecorde
        private def inputData
21 1 mdecorde
        private def factory
22 1 mdecorde
        private XMLStreamReader parser
23 1 mdecorde
        def writer
24 1 mdecorde
25 1 mdecorde
        def colors = ["white", "red", "blue", "green", "yellow", "grey", "violet", "pink", "orange", "brown"];
26 1 mdecorde
        def icolor = 0;
27 1 mdecorde
28 1 mdecorde
        File trsFile;
29 1 mdecorde
        boolean debug = false
30 1 mdecorde
31 1 mdecorde
        public TRS2SRT(File trsFile) {
32 1 mdecorde
                inputData = trsFile.toURI().toURL().openStream()
33 1 mdecorde
                factory = XMLInputFactory.newInstance()
34 1 mdecorde
                parser = factory.createXMLStreamReader(inputData)
35 1 mdecorde
36 1 mdecorde
                this.trsFile = trsFile
37 1 mdecorde
        }
38 1 mdecorde
39 1 mdecorde
        /**
40 1 mdecorde
         * Transform the trsFile to STR format
41 1 mdecorde
         *
42 1 mdecorde
         * @param srtFile: result file
43 1 mdecorde
         */
44 1 mdecorde
        public void process(File srtFile) {
45 1 mdecorde
                writer = srtFile.newWriter("UTF-8")
46 1 mdecorde
                for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) {
47 1 mdecorde
                        switch (event) {
48 1 mdecorde
                                case XMLStreamConstants.START_ELEMENT:
49 1 mdecorde
                                        processStartElement()
50 1 mdecorde
                                        break;
51 1 mdecorde
                                case XMLStreamConstants.END_ELEMENT:
52 1 mdecorde
                                        processEndElement()
53 1 mdecorde
                                        break;
54 1 mdecorde
                                case XMLStreamConstants.CHARACTERS:
55 1 mdecorde
                                        processText()
56 1 mdecorde
                                        break;
57 1 mdecorde
                        }
58 1 mdecorde
                }
59 1 mdecorde
                writer.close()
60 1 mdecorde
        }
61 1 mdecorde
62 1 mdecorde
        def speakers = [:]
63 1 mdecorde
        def currentSpeakers = []
64 1 mdecorde
        def currentSpeaker = "";
65 1 mdecorde
        def currentSpeach = ""
66 1 mdecorde
        def currentStartTime = ""
67 1 mdecorde
        def currentEndTime = ""
68 1 mdecorde
        def inTurn = false
69 1 mdecorde
        def nTurn = 1
70 1 mdecorde
        def syncCounter = 0;
71 1 mdecorde
        def whoCounter = 0;
72 1 mdecorde
73 1 mdecorde
        protected void processStartElement() {
74 1 mdecorde
                String localname = parser.getLocalName()
75 1 mdecorde
                switch (localname) {
76 1 mdecorde
                        case "Speaker":
77 1 mdecorde
                                def info = [:]
78 1 mdecorde
                                speakers[parser.getAttributeValue(null, "id")] = info
79 1 mdecorde
                                info["name"] = parser.getAttributeValue(null, "name")
80 1 mdecorde
                                info["color"] = colors[icolor]
81 1 mdecorde
                                info["count"] = 0;
82 1 mdecorde
                                icolor++;
83 1 mdecorde
                                break;
84 1 mdecorde
                        case "Episode":
85 1 mdecorde
                                if (debug) println "speakers $speakers"
86 1 mdecorde
                                break;
87 1 mdecorde
                        case "Turn":
88 1 mdecorde
                                if (debug) println "Turn!"
89 1 mdecorde
                                currentSpeakers = parser.getAttributeValue(null, "speaker").split(" ")
90 1 mdecorde
                                currentSpeaker = currentSpeakers[0]
91 1 mdecorde
                                currentSpeach = ""
92 4 mdecorde
                                String s = parser.getAttributeValue(null, "startTime")
93 4 mdecorde
                                String e = parser.getAttributeValue(null, "endTime")
94 4 mdecorde
                                currentStartTime = Float.parseFloat(s)
95 4 mdecorde
                                if (e != null) currentEndTime = Float.parseFloat(e)
96 4 mdecorde
                                else currentEndTime = currentStartTime+3.0; // temporary fix
97 1 mdecorde
                                inTurn = true
98 1 mdecorde
                                syncCounter = 0;
99 1 mdecorde
                                whoCounter = 0;
100 1 mdecorde
                                break;
101 1 mdecorde
                        case "Event":
102 1 mdecorde
                        if (debug) println "Event!"
103 1 mdecorde
                                currentSpeach += " ["+parser.getAttributeValue(null, "desc")+"] "
104 1 mdecorde
                                break;
105 1 mdecorde
                        case "Sync": // cut a Turn, the Turn@endTime must be replace with Sync@time
106 1 mdecorde
                                if (debug) println "Sync!"
107 1 mdecorde
                                syncCounter++;
108 4 mdecorde
                                if (syncCounter > 1) { // ignore first 'Sync', there is no speech to write
109 1 mdecorde
                                        def end = currentEndTime
110 1 mdecorde
                                        currentEndTime = Float.parseFloat(parser.getAttributeValue(null, "time"))
111 4 mdecorde
                                        writeSRTTurn() // write previous Turn, so currentEndTime is the Sync@time
112 1 mdecorde
                                        currentEndTime = end; // restore Turn@endTime
113 1 mdecorde
                                }
114 1 mdecorde
                                break;
115 1 mdecorde
                        case "Who":
116 1 mdecorde
                                if (debug) println "Who!"
117 1 mdecorde
                                whoCounter++;
118 1 mdecorde
                                if (whoCounter > 1) { // ignore first 'Who', there is no speach to write
119 1 mdecorde
                                        int n = Integer.parseInt(parser.getAttributeValue(null, "nb")) - 1
120 1 mdecorde
                                        if (currentSpeakers.size() <= n) {
121 1 mdecorde
                                                println "'Who@nb' Error at "+parser.getLocation()
122 1 mdecorde
                                                break;
123 1 mdecorde
                                        }
124 1 mdecorde
125 1 mdecorde
                                        writeSRTTurn() // write previous speach
126 1 mdecorde
                                        // switch current speaker
127 1 mdecorde
                                        currentSpeaker = currentSpeakers[Integer.parseInt(parser.getAttributeValue(null, "nb")) - 1]
128 1 mdecorde
                                }
129 1 mdecorde
                                break;
130 1 mdecorde
                }
131 1 mdecorde
        }
132 1 mdecorde
133 1 mdecorde
        protected void processEndElement() {
134 1 mdecorde
                String localname = parser.getLocalName()
135 1 mdecorde
                switch (localname) {
136 1 mdecorde
                        case "Speaker":
137 1 mdecorde
                                break;
138 1 mdecorde
                        case "Turn":
139 1 mdecorde
                                inTurn = false
140 1 mdecorde
                                writeSRTTurn()
141 1 mdecorde
                                break;
142 1 mdecorde
                        case "Sync":
143 1 mdecorde
                                break;
144 1 mdecorde
                        case "Who":
145 1 mdecorde
                                break;
146 1 mdecorde
                }
147 1 mdecorde
        }
148 1 mdecorde
149 1 mdecorde
        protected writeSRTTurn() {
150 1 mdecorde
                currentSpeach = currentSpeach.trim()
151 1 mdecorde
152 1 mdecorde
                if (currentSpeach.length() == 0) return; // nothing to write
153 1 mdecorde
154 1 mdecorde
                //println "Writing Turn of '$currentSpeaker': "+speakers[currentSpeaker]
155 1 mdecorde
                def color = speakers[currentSpeaker]["color"]
156 1 mdecorde
157 1 mdecorde
                if (speakers[currentSpeaker]["count"] < 2) {
158 1 mdecorde
                        speakers[currentSpeaker]["count"] = speakers[currentSpeaker]["count"] + 1;
159 1 mdecorde
                        currentSpeach = speakers[currentSpeaker]["name"]+"\t"+currentSpeach
160 1 mdecorde
                }
161 1 mdecorde
162 1 mdecorde
                currentSpeach = "<font color=\"$color\">$currentSpeach</font>".replaceAll("\n", " ")
163 1 mdecorde
164 1 mdecorde
                def s = formatTime(currentStartTime)
165 1 mdecorde
                def e = formatTime(currentEndTime)
166 4 mdecorde
                currentStartTime = currentEndTime
167 1 mdecorde
                writer.println """
168 1 mdecorde
$nTurn
169 1 mdecorde
$s --> $e
170 1 mdecorde
$currentSpeach"""
171 1 mdecorde
                nTurn++
172 1 mdecorde
                currentSpeach = "" // reset speach
173 1 mdecorde
        }
174 1 mdecorde
175 1 mdecorde
        protected void processText() {
176 1 mdecorde
                String txt = parser.getText();
177 1 mdecorde
                if (inTurn) {
178 1 mdecorde
                        currentSpeach += txt
179 1 mdecorde
                }
180 1 mdecorde
        }
181 1 mdecorde
182 1 mdecorde
        private String formatTime(float time) {
183 1 mdecorde
                String rez = " ";
184 1 mdecorde
185 1 mdecorde
                int ms = (time - (int)time) * 1000
186 1 mdecorde
187 1 mdecorde
                float h = time / 3600;
188 1 mdecorde
                time = time%3600;
189 1 mdecorde
190 1 mdecorde
                float min = (time%3600) / 60;
191 1 mdecorde
                int sec = (int)time%60
192 1 mdecorde
193 1 mdecorde
                if (min < 10)
194 1 mdecorde
                        rez = ""+(int)h+":0"+(int)min;//+":"+time%60;
195 1 mdecorde
                else
196 1 mdecorde
                        rez = ""+(int)h+":"+(int)min;//+":"+time%60;
197 1 mdecorde
198 1 mdecorde
                if (sec >= 10)
199 1 mdecorde
                        rez += ":"+sec;
200 1 mdecorde
                else
201 1 mdecorde
                        rez += ":0"+sec;
202 1 mdecorde
203 1 mdecorde
                rez += "."+ms
204 1 mdecorde
                return rez;
205 1 mdecorde
        }
206 1 mdecorde
207 1 mdecorde
        public static void main(String[] args) {
208 1 mdecorde
                if (args.length == 0) {
209 1 mdecorde
                        println "Usage:"
210 1 mdecorde
                        println "java -jar TRS2SRT.jar file1 file2 file3"
211 1 mdecorde
                        println "\nResult files are saved in the save directory as fileN files."
212 1 mdecorde
                }
213 4 mdecorde
        //        args = ["/home/mdecorde/CCC/RECETTE/TRS2SRT/victor/pazinterior_ok.trs"]
214 1 mdecorde
                for (String path : args) {
215 1 mdecorde
                        println "Processing path=$path ..."
216 1 mdecorde
217 1 mdecorde
                        File trsFile = new File(path)
218 1 mdecorde
                        String name = trsFile.getName()
219 1 mdecorde
                        name = name.substring(0, name.indexOf("."))
220 1 mdecorde
                        File srtFile = new File(trsFile.getParentFile(), name+".srt")
221 1 mdecorde
                        TRS2SRT t = new TRS2SRT(trsFile)
222 1 mdecorde
                        t.process(srtFile)
223 1 mdecorde
                }
224 1 mdecorde
        }
225 1 mdecorde
}