|
1 |
package org.txm.importer.transcriber
|
|
2 |
/**
|
|
3 |
* Converts a file from Transcriber XML to STR subtitle file
|
|
4 |
*
|
|
5 |
* @author mdecorde
|
|
6 |
*
|
|
7 |
*/
|
|
8 |
|
|
9 |
|
|
10 |
import java.util.ArrayList
|
|
11 |
|
|
12 |
import javax.xml.parsers.*
|
|
13 |
import javax.xml.stream.*
|
|
14 |
|
|
15 |
import java.net.URL
|
|
16 |
|
|
17 |
class TRS2SRT {
|
|
18 |
|
|
19 |
private def url
|
|
20 |
private def inputData
|
|
21 |
private def factory
|
|
22 |
private XMLStreamReader parser
|
|
23 |
def writer
|
|
24 |
|
|
25 |
def colors = ["white", "red", "blue", "green", "yellow", "grey", "violet", "pink", "orange", "brown"];
|
|
26 |
def icolor = 0;
|
|
27 |
|
|
28 |
File trsFile;
|
|
29 |
boolean debug = false
|
|
30 |
|
|
31 |
public TRS2SRT(File trsFile) {
|
|
32 |
inputData = trsFile.toURI().toURL().openStream()
|
|
33 |
factory = XMLInputFactory.newInstance()
|
|
34 |
parser = factory.createXMLStreamReader(inputData)
|
|
35 |
|
|
36 |
this.trsFile = trsFile
|
|
37 |
}
|
|
38 |
|
|
39 |
/**
|
|
40 |
* Transform the trsFile to STR format
|
|
41 |
*
|
|
42 |
* @param srtFile: result file
|
|
43 |
*/
|
|
44 |
public void process(File srtFile) {
|
|
45 |
writer = srtFile.newWriter("UTF-8")
|
|
46 |
for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) {
|
|
47 |
switch (event) {
|
|
48 |
case XMLStreamConstants.START_ELEMENT:
|
|
49 |
processStartElement()
|
|
50 |
break;
|
|
51 |
case XMLStreamConstants.END_ELEMENT:
|
|
52 |
processEndElement()
|
|
53 |
break;
|
|
54 |
case XMLStreamConstants.CHARACTERS:
|
|
55 |
processText()
|
|
56 |
break;
|
|
57 |
}
|
|
58 |
}
|
|
59 |
writer.close()
|
|
60 |
}
|
|
61 |
|
|
62 |
def speakers = [:]
|
|
63 |
def currentSpeakers = []
|
|
64 |
def currentSpeaker = "";
|
|
65 |
def currentSpeach = ""
|
|
66 |
def currentStartTime = ""
|
|
67 |
def currentEndTime = ""
|
|
68 |
def inTurn = false
|
|
69 |
def nTurn = 1
|
|
70 |
def syncCounter = 0;
|
|
71 |
def whoCounter = 0;
|
|
72 |
|
|
73 |
protected void processStartElement() {
|
|
74 |
String localname = parser.getLocalName()
|
|
75 |
switch (localname) {
|
|
76 |
case "Speaker":
|
|
77 |
def info = [:]
|
|
78 |
speakers[parser.getAttributeValue(null, "id")] = info
|
|
79 |
info["name"] = parser.getAttributeValue(null, "name")
|
|
80 |
info["color"] = colors[icolor]
|
|
81 |
info["count"] = 0;
|
|
82 |
icolor++;
|
|
83 |
break;
|
|
84 |
case "Episode":
|
|
85 |
if (debug) println "speakers $speakers"
|
|
86 |
break;
|
|
87 |
case "Turn":
|
|
88 |
if (debug) println "Turn!"
|
|
89 |
currentSpeakers = parser.getAttributeValue(null, "speaker").split(" ")
|
|
90 |
currentSpeaker = currentSpeakers[0]
|
|
91 |
currentSpeach = ""
|
|
92 |
currentStartTime = Float.parseFloat(parser.getAttributeValue(null, "startTime"))
|
|
93 |
currentEndTime = Float.parseFloat(parser.getAttributeValue(null, "endTime"))
|
|
94 |
inTurn = true
|
|
95 |
syncCounter = 0;
|
|
96 |
whoCounter = 0;
|
|
97 |
break;
|
|
98 |
case "Event":
|
|
99 |
if (debug) println "Event!"
|
|
100 |
currentSpeach += " ["+parser.getAttributeValue(null, "desc")+"] "
|
|
101 |
break;
|
|
102 |
case "Sync": // cut a Turn, the Turn@endTime must be replace with Sync@time
|
|
103 |
if (debug) println "Sync!"
|
|
104 |
syncCounter++;
|
|
105 |
if (syncCounter > 1) { // ignore first 'Sync', there is no speach to write
|
|
106 |
def end = currentEndTime
|
|
107 |
currentEndTime = Float.parseFloat(parser.getAttributeValue(null, "time"))
|
|
108 |
writeSRTTurn()
|
|
109 |
currentEndTime = end; // restore Turn@endTime
|
|
110 |
}
|
|
111 |
break;
|
|
112 |
case "Who":
|
|
113 |
if (debug) println "Who!"
|
|
114 |
whoCounter++;
|
|
115 |
if (whoCounter > 1) { // ignore first 'Who', there is no speach to write
|
|
116 |
int n = Integer.parseInt(parser.getAttributeValue(null, "nb")) - 1
|
|
117 |
if (currentSpeakers.size() <= n) {
|
|
118 |
println "'Who@nb' Error at "+parser.getLocation()
|
|
119 |
break;
|
|
120 |
}
|
|
121 |
|
|
122 |
writeSRTTurn() // write previous speach
|
|
123 |
// switch current speaker
|
|
124 |
currentSpeaker = currentSpeakers[Integer.parseInt(parser.getAttributeValue(null, "nb")) - 1]
|
|
125 |
}
|
|
126 |
break;
|
|
127 |
}
|
|
128 |
}
|
|
129 |
|
|
130 |
protected void processEndElement() {
|
|
131 |
String localname = parser.getLocalName()
|
|
132 |
switch (localname) {
|
|
133 |
case "Speaker":
|
|
134 |
break;
|
|
135 |
case "Turn":
|
|
136 |
inTurn = false
|
|
137 |
writeSRTTurn()
|
|
138 |
break;
|
|
139 |
case "Sync":
|
|
140 |
break;
|
|
141 |
case "Who":
|
|
142 |
break;
|
|
143 |
}
|
|
144 |
}
|
|
145 |
|
|
146 |
protected writeSRTTurn() {
|
|
147 |
currentSpeach = currentSpeach.trim()
|
|
148 |
|
|
149 |
if (currentSpeach.length() == 0) return; // nothing to write
|
|
150 |
|
|
151 |
//println "Writing Turn of '$currentSpeaker': "+speakers[currentSpeaker]
|
|
152 |
def color = speakers[currentSpeaker]["color"]
|
|
153 |
|
|
154 |
if (speakers[currentSpeaker]["count"] < 2) {
|
|
155 |
speakers[currentSpeaker]["count"] = speakers[currentSpeaker]["count"] + 1;
|
|
156 |
currentSpeach = speakers[currentSpeaker]["name"]+"\t"+currentSpeach
|
|
157 |
}
|
|
158 |
|
|
159 |
currentSpeach = "<font color=\"$color\">$currentSpeach</font>".replaceAll("\n", " ")
|
|
160 |
|
|
161 |
def s = formatTime(currentStartTime)
|
|
162 |
def e = formatTime(currentEndTime)
|
|
163 |
writer.println """
|
|
164 |
$nTurn
|
|
165 |
$s --> $e
|
|
166 |
$currentSpeach"""
|
|
167 |
nTurn++
|
|
168 |
currentSpeach = "" // reset speach
|
|
169 |
}
|
|
170 |
|
|
171 |
protected void processText() {
|
|
172 |
String txt = parser.getText();
|
|
173 |
if (inTurn) {
|
|
174 |
currentSpeach += txt
|
|
175 |
}
|
|
176 |
}
|
|
177 |
|
|
178 |
private String formatTime(float time) {
|
|
179 |
String rez = " ";
|
|
180 |
|
|
181 |
int ms = (time - (int)time) * 1000
|
|
182 |
|
|
183 |
float h = time / 3600;
|
|
184 |
time = time%3600;
|
|
185 |
|
|
186 |
float min = (time%3600) / 60;
|
|
187 |
int sec = (int)time%60
|
|
188 |
|
|
189 |
if (min < 10)
|
|
190 |
rez = ""+(int)h+":0"+(int)min;//+":"+time%60;
|
|
191 |
else
|
|
192 |
rez = ""+(int)h+":"+(int)min;//+":"+time%60;
|
|
193 |
|
|
194 |
if (sec >= 10)
|
|
195 |
rez += ":"+sec;
|
|
196 |
else
|
|
197 |
rez += ":0"+sec;
|
|
198 |
|
|
199 |
rez += "."+ms
|
|
200 |
return rez;
|
|
201 |
}
|
|
202 |
|
|
203 |
public static void main(String[] args) {
|
|
204 |
if (args.length == 0) {
|
|
205 |
println "Usage:"
|
|
206 |
println "java -jar TRS2SRT.jar file1 file2 file3"
|
|
207 |
println "\nResult files are saved in the save directory as fileN files."
|
|
208 |
}
|
|
209 |
|
|
210 |
for (String path : args) {
|
|
211 |
println "Processing path=$path ..."
|
|
212 |
|
|
213 |
File trsFile = new File(path)
|
|
214 |
String name = trsFile.getName()
|
|
215 |
name = name.substring(0, name.indexOf("."))
|
|
216 |
File srtFile = new File(trsFile.getParentFile(), name+".srt")
|
|
217 |
TRS2SRT t = new TRS2SRT(trsFile)
|
|
218 |
t.process(srtFile)
|
|
219 |
}
|
|
220 |
}
|
|
221 |
}
|