5 |
5 |
import java.time.LocalTime
|
6 |
6 |
import java.time.format.DateTimeFormatter
|
7 |
7 |
import org.txm.utils.*
|
|
8 |
import org.txm.utils.logger.*
|
8 |
9 |
|
9 |
10 |
@Field @Option(name="metadataFile", usage="Tableau des metadonnées de sections", widget="FileOpen", required=true, def="")
|
10 |
11 |
File metadataFile;
|
... | ... | |
104 |
105 |
|
105 |
106 |
LocalTime time1 = LocalTime.parse(totalFrame, dateTimeFormatter)
|
106 |
107 |
totalFrame = (time1.getHour()*60*60) + (time1.getMinute()*60) + time1.getSecond()
|
|
108 |
|
|
109 |
def ret = totalFrame + (bonusFrame/25)
|
|
110 |
return ret
|
|
111 |
}
|
107 |
112 |
|
108 |
|
def ret = totalFrame + (bonusFrame/25)
|
109 |
|
return ret
|
110 |
|
}
|
111 |
|
|
112 |
|
def sectionGroupsToInsert = [:]
|
113 |
|
println "Reading data..."
|
114 |
|
while (reader.readRecord()) {
|
115 |
|
String id = reader.get(joinTRSColumn).trim()
|
116 |
|
if (id.endsWith(".mp4")) id = id.substring(0, id.length()-4)
|
117 |
|
if (id.length() == 0) continue;
|
|
113 |
try {
|
|
114 |
def sectionGroupsToInsert = [:]
|
|
115 |
println "Reading data..."
|
|
116 |
while (reader.readRecord()) {
|
|
117 |
String id = reader.get(joinTRSColumn).trim()
|
|
118 |
if (id.endsWith(".mp4")) id = id.substring(0, id.length()-4)
|
|
119 |
if (id.length() == 0) continue;
|
118 |
120 |
|
119 |
|
if (!sectionGroupsToInsert.containsKey(id)) {
|
120 |
|
sectionGroupsToInsert[id] = []
|
121 |
|
}
|
122 |
|
def section = sectionGroupsToInsert[id]
|
123 |
|
|
124 |
|
if (reader.get(startTimeColumn) != null && reader.get(startTimeColumn).length() > 0) { // ignore non timed sections
|
|
121 |
if (!sectionGroupsToInsert.containsKey(id)) {
|
|
122 |
sectionGroupsToInsert[id] = []
|
|
123 |
}
|
|
124 |
def section = sectionGroupsToInsert[id]
|
125 |
125 |
|
126 |
|
def m = [:]
|
127 |
|
|
128 |
|
for (def todo : ["topic":topicColumns, "type":typeColumns]) {
|
129 |
|
def data = []
|
130 |
|
for (def col : todo.value) {
|
131 |
|
if (reader.get(col).trim().length() > 0) {
|
132 |
|
data << reader.get(col).trim().replace("\n", "")
|
|
126 |
if (reader.get(startTimeColumn) != null && reader.get(startTimeColumn).length() > 0) { // ignore non timed sections
|
|
127 |
|
|
128 |
def m = [:]
|
|
129 |
|
|
130 |
for (def todo : ["topic":topicColumns, "type":typeColumns]) {
|
|
131 |
def data = []
|
|
132 |
for (def col : todo.value) {
|
|
133 |
if (reader.get(col).trim().length() > 0) {
|
|
134 |
data << reader.get(col).trim().replace("\n", "")
|
|
135 |
}
|
133 |
136 |
}
|
|
137 |
m[todo.key] = data.join("\t")
|
134 |
138 |
}
|
135 |
|
m[todo.key] = data.join("\t")
|
|
139 |
def metadataList = []
|
|
140 |
def metadataGroupList = []
|
|
141 |
for (int i = 0 ; i < metadataColumns.size() ; i++) {
|
|
142 |
def col = metadataColumns[i]
|
|
143 |
String c = AsciiUtils.buildAttributeId(col)
|
|
144 |
m[c] = reader.get(col)
|
|
145 |
metadataList << c
|
|
146 |
metadataGroupList << metadataColumnsGroups[i]
|
|
147 |
}
|
|
148 |
m["metadata"] = metadataList.join("|")
|
|
149 |
m["metadata_groups"] = metadataGroupList.join("|")
|
|
150 |
|
|
151 |
m["startTime"] = strTotime(reader.get(startTimeColumn))
|
|
152 |
m["endTime"] = strTotime(reader.get(endTimeColumn))
|
|
153 |
m["synchronized"] = "true"
|
|
154 |
|
|
155 |
section << [m["startTime"], m["endTime"], m]
|
136 |
156 |
}
|
137 |
|
def metadataList = []
|
138 |
|
def metadataGroupList = []
|
139 |
|
for (int i = 0 ; i < metadataColumns.size() ; i++) {
|
140 |
|
def col = metadataColumns[i]
|
141 |
|
String c = AsciiUtils.buildAttributeId(col)
|
142 |
|
m[c] = reader.get(col)
|
143 |
|
metadataList << c
|
144 |
|
metadataGroupList << metadataColumnsGroups[i]
|
145 |
|
}
|
146 |
|
m["metadata"] = metadataList.join("|")
|
147 |
|
m["metadata_groups"] = metadataGroupList.join("|")
|
148 |
|
|
149 |
|
m["startTime"] = strTotime(reader.get(startTimeColumn))
|
150 |
|
m["endTime"] = strTotime(reader.get(endTimeColumn))
|
151 |
|
m["synchronized"] = "true"
|
152 |
|
|
153 |
|
section << [m["startTime"], m["endTime"], m]
|
154 |
157 |
}
|
155 |
|
}
|
156 |
|
|
157 |
|
println "Inserting sections... "+sectionGroupsToInsert.size()
|
158 |
|
|
159 |
|
ConsoleProgressBar cpb = new ConsoleProgressBar(sectionGroupsToInsert.keySet().size())
|
160 |
|
for (String id : sectionGroupsToInsert.keySet()) {
|
161 |
|
cpb.tick()
|
162 |
|
File trsFile = new File(trsDirectory, id+".trs")
|
163 |
|
if (!trsFile.exists()) {
|
164 |
|
continue
|
165 |
|
}
|
166 |
|
//println "Processing $id..."
|
167 |
|
def sections = sectionGroupsToInsert[id]
|
168 |
|
sections = sections.sort() { a, b -> a[0] <=> b[0] ?: a[1] <=> b[1] }
|
169 |
158 |
|
170 |
|
// Open input file
|
171 |
|
def slurper = new groovy.util.XmlParser(false, true, true);
|
172 |
|
slurper.setFeature("http://apache.org/xml/features/disallow-doctype-decl", false) // allow DTD declaration
|
173 |
|
slurper.setProperty("http://javax.xml.XMLConstants/property/accessExternalDTD", "all"); // allow to read DTD from local file
|
174 |
|
def trs = slurper.parse(trsFile.toURI().toString())
|
175 |
|
def trsEpisodes = trs.Episode // 1
|
176 |
|
if (trsEpisodes.size() > 1) {
|
177 |
|
println "multiple Episode node in $trsFile"
|
178 |
|
continue
|
179 |
|
}
|
180 |
|
def trsEpisode = trsEpisodes[0]
|
181 |
|
def trsSections = trs.Episode.Section // 1
|
182 |
|
if (trsSections.size() > 1) {
|
183 |
|
println "multiple Section node in $trsFile"
|
184 |
|
continue
|
185 |
|
}
|
186 |
|
def trsSection = trsSections[0]
|
|
159 |
println "Inserting sections... "+sectionGroupsToInsert.size()
|
187 |
160 |
|
188 |
|
def turns = trsSection.Turn
|
189 |
|
def newSections = []
|
190 |
|
def iSection = 0;
|
191 |
|
def currentSection = null
|
192 |
|
def currentNode = null
|
193 |
|
|
194 |
|
for (def turn : turns) {
|
195 |
|
def start = Float.parseFloat(turn.@startTime)
|
196 |
|
def end = Float.parseFloat(turn.@endTime)
|
|
161 |
ConsoleProgressBar cpb = new ConsoleProgressBar(sectionGroupsToInsert.keySet().size())
|
|
162 |
for (String id : sectionGroupsToInsert.keySet()) {
|
|
163 |
cpb.tick()
|
|
164 |
File trsFile = new File(trsDirectory, id+".trs")
|
|
165 |
if (!trsFile.exists()) {
|
|
166 |
continue
|
|
167 |
}
|
|
168 |
//println "Processing $id..."
|
|
169 |
def sections = sectionGroupsToInsert[id]
|
|
170 |
sections = sections.sort() { a, b -> a[0] <=> b[0] ?: a[1] <=> b[1] }
|
197 |
171 |
|
198 |
|
def found = null;
|
199 |
|
for (int i = iSection ; i < sections.size() ; i++) {
|
200 |
|
if (end < sections[i][0]) { // Turn is before section
|
201 |
|
|
202 |
|
} else if (sections[i][1] < start) { // Turn is before section
|
203 |
|
|
204 |
|
} else {
|
205 |
|
found = sections[i]
|
206 |
|
iSection = i
|
207 |
|
break; // stop searching and set iSection to accelerate next search
|
208 |
|
}
|
|
172 |
// Open input file
|
|
173 |
def slurper = new groovy.util.XmlParser(false, true, true);
|
|
174 |
slurper.setFeature("http://apache.org/xml/features/disallow-doctype-decl", false) // allow DTD declaration
|
|
175 |
slurper.setProperty("http://javax.xml.XMLConstants/property/accessExternalDTD", "all"); // allow to read DTD from local file
|
|
176 |
def trs = slurper.parse(trsFile.toURI().toString())
|
|
177 |
def trsEpisodes = trs.Episode // 1
|
|
178 |
if (trsEpisodes.size() > 1) {
|
|
179 |
println "multiple Episode node in $trsFile"
|
|
180 |
continue
|
209 |
181 |
}
|
|
182 |
def trsEpisode = trsEpisodes[0]
|
|
183 |
def trsSections = trs.Episode.Section // 1
|
|
184 |
if (trsSections.size() > 1) {
|
|
185 |
println "multiple Section node in $trsFile"
|
|
186 |
continue
|
|
187 |
}
|
|
188 |
def trsSection = trsSections[0]
|
210 |
189 |
|
211 |
|
if (found == null) {
|
212 |
|
if (currentSection != null || currentNode == null) {
|
213 |
|
currentNode = new Node(trsEpisode, "Section", ["type":"Sujet non synchronisé", "startTime":turn.@startTime, "endTime":"", "synchronized":"false"] )
|
214 |
|
currentSection = null;
|
|
190 |
def turns = trsSection.Turn
|
|
191 |
def newSections = []
|
|
192 |
def iSection = 0;
|
|
193 |
def currentSection = null
|
|
194 |
def currentNode = null
|
|
195 |
|
|
196 |
for (def turn : turns) {
|
|
197 |
def start = Float.parseFloat(turn.@startTime)
|
|
198 |
def end = Float.parseFloat(turn.@endTime)
|
|
199 |
|
|
200 |
def found = null;
|
|
201 |
for (int i = iSection ; i < sections.size() ; i++) {
|
|
202 |
if (end < sections[i][0]) { // Turn is before section
|
|
203 |
|
|
204 |
} else if (sections[i][1] < start) { // Turn is before section
|
|
205 |
|
|
206 |
} else {
|
|
207 |
found = sections[i]
|
|
208 |
iSection = i
|
|
209 |
break; // stop searching and set iSection to accelerate next search
|
|
210 |
}
|
215 |
211 |
}
|
216 |
|
} else {
|
217 |
|
if (found != currentSection) {
|
218 |
|
if (currentNode != null && currentNode.@synchronized == "false") {
|
219 |
|
def tmp = currentNode.Turn
|
220 |
|
currentNode.@endTime = tmp[-1].@endTime
|
|
212 |
|
|
213 |
if (found == null) {
|
|
214 |
if (currentSection != null || currentNode == null) {
|
|
215 |
currentNode = new Node(trsEpisode, "Section", ["type":"Sujet non synchronisé", "startTime":turn.@startTime, "endTime":"", "synchronized":"false"] )
|
|
216 |
currentSection = null;
|
221 |
217 |
}
|
222 |
|
|
223 |
|
currentSection = found
|
224 |
|
currentNode = new Node(trsEpisode, "Section", currentSection[2])
|
|
218 |
} else {
|
|
219 |
if (found != currentSection) {
|
|
220 |
if (currentNode != null && currentNode.@synchronized == "false") {
|
|
221 |
def tmp = currentNode.Turn
|
|
222 |
currentNode.@endTime = tmp[-1].@endTime
|
|
223 |
}
|
|
224 |
|
|
225 |
currentSection = found
|
|
226 |
currentNode = new Node(trsEpisode, "Section", currentSection[2])
|
|
227 |
}
|
225 |
228 |
}
|
|
229 |
trsSection.remove(turn)
|
|
230 |
currentNode.append(turn)
|
226 |
231 |
}
|
227 |
|
trsSection.remove(turn)
|
228 |
|
currentNode.append(turn)
|
|
232 |
|
|
233 |
//remove the initial section
|
|
234 |
trsEpisode.remove(trsSection)
|
|
235 |
|
|
236 |
outputDirectory.mkdir()
|
|
237 |
File outfile = new File(outputDirectory, trsFile.getName())
|
|
238 |
outfile.withWriter("UTF-8") { writer ->
|
|
239 |
writer.write('<?xml version="1.0" encoding="UTF-8"?>\n<!DOCTYPE Trans SYSTEM "trans-14.dtd">\n')
|
|
240 |
def printer = new groovy.util.XmlNodePrinter(new PrintWriter(writer))
|
|
241 |
printer.setPreserveWhitespace(true)
|
|
242 |
printer.print(trs)
|
|
243 |
}
|
229 |
244 |
}
|
|
245 |
cpb.done()
|
|
246 |
reader.close()
|
|
247 |
println "Done."
|
230 |
248 |
|
231 |
|
//remove the initial section
|
232 |
|
trsEpisode.remove(trsSection)
|
233 |
|
|
234 |
|
outputDirectory.mkdir()
|
235 |
|
File outfile = new File(outputDirectory, trsFile.getName())
|
236 |
|
outfile.withWriter("UTF-8") { writer ->
|
237 |
|
writer.write('<?xml version="1.0" encoding="UTF-8"?>\n<!DOCTYPE Trans SYSTEM "trans-14.dtd">\n')
|
238 |
|
def printer = new groovy.util.XmlNodePrinter(new PrintWriter(writer))
|
239 |
|
printer.setPreserveWhitespace(true)
|
240 |
|
printer.print(trs)
|
241 |
|
}
|
|
249 |
} catch(Exception e) {
|
|
250 |
println "Error: "+e
|
|
251 |
Log.printStackTrace(e)
|
242 |
252 |
}
|
243 |
|
cpb.done()
|
244 |
|
reader.close()
|
245 |
|
println "Done."
|