Révision 3416
TXM/trunk/org.txm.groovy.core/src/groovy/org/txm/macro/transcription/AddSectionsFromTableV2Macro.groovy (revision 3416) | ||
---|---|---|
1 |
package org.txm.macro.transcription |
|
2 |
|
|
3 |
import java.nio.charset.Charset |
|
4 |
|
|
5 |
import java.time.LocalTime |
|
6 |
import java.time.format.DateTimeFormatter |
|
7 |
|
|
8 |
import org.eclipse.core.internal.localstore.IsSynchronizedVisitor |
|
9 |
import org.txm.utils.* |
|
10 |
import org.txm.utils.logger.* |
|
11 |
|
|
12 |
@Field @Option(name="metadataFile", usage="Tableau des metadonnées de sections", widget="FileOpen", required=true, def="") |
|
13 |
File metadataFile; |
|
14 |
|
|
15 |
@Field @Option(name="trsDirectory", usage="Dossier qui contient les fichiers TRS", widget="Folder", required=true, def="") |
|
16 |
File trsDirectory; |
|
17 |
|
|
18 |
@Field @Option(name="joinTRSColumn", usage="Colonne de jointure de transcription", widget="String", required=true, def="Lien notice principale") |
|
19 |
def joinTRSColumn |
|
20 |
|
|
21 |
@Field @Option(name="startTimeColumn", usage="Colonne de timing de début de section", widget="String", required=true, def="antract_debut") |
|
22 |
def startTimeColumn = "antract_debut" |
|
23 |
|
|
24 |
@Field @Option(name="endTimeColumn", usage="Colonne de timing de fin de section", widget="String", required=true, def="antract_fin") |
|
25 |
def endTimeColumn = "antract_fin" |
|
26 |
|
|
27 |
@Field @Option(name="typeColumns", usage="Colonnes des métadonnées de type de section", widget="String", required=true, def="Titre propre") |
|
28 |
def typeColumns |
|
29 |
|
|
30 |
@Field @Option(name="topicColumns", usage="Colonnes des métadonnées de topic de section", widget="String", required=true, def="Date de diffusion") |
|
31 |
def topicColumns |
|
32 |
|
|
33 |
@Field @Option(name="metadataColumns", usage="Colonnes de metadonnées de section", widget="String", required=true, def="Titre propre;Date de diffusion;Identifiant de la notice;Notes du titre;Type de date;Durée;Genre;Langue VO / VE;Nature de production;Producteurs (Aff.);Thématique;Nom fichier segmenté (info);antract_video;antract_debut;antract_fin;antract_duree;antract_tc_type;antract_tc_date;Résumé;Séquences;Descripteurs (Aff. Lig.);Générique (Aff. Lig.)") |
|
34 |
def metadataColumns |
|
35 |
|
|
36 |
@Field @Option(name="metadataColumnsGroups", usage="Colonnes des gruopes de metadonnées de section", widget="String", required=true, def="metadata;metadata;metadata;metadata;metadata;metadata;metadata;metadata;metadata;metadata;metadata;secondary;secondary;secondary;secondary;secondary;secondary;secondary;text;text;text;text") |
|
37 |
def metadataColumnsGroups |
|
38 |
|
|
39 |
@Field @Option(name="fixSectionsLimits", usage="Correction des limites de sections du tableau de metadonnees", widget="Boolean", required=true, def="true") |
|
40 |
def fixSectionsLimits |
|
41 |
|
|
42 |
@Field @Option(name="sectionsMergeActivationThreashold", usage="marge d'erreur de corrections des limites de sections", widget="Float", required=true, def="1.0") |
|
43 |
def sectionsMergeActivationThreashold |
|
44 |
|
|
45 |
@Field @Option(name="fixTurnsLimits", usage="Correction des limites de sections du tableau de metadonnees", widget="Boolean", required=true, def="true") |
|
46 |
def fixTurnsLimits |
|
47 |
|
|
48 |
@Field @Option(name="turnsCutActivationThreashold", usage="marge d'erreur de corrections des limites de tours", widget="Float", required=true, def="0.1") |
|
49 |
def turnsCutActivationThreashold |
|
50 |
|
|
51 |
@Field @Option(name="debug", usage="show debug messages", widget="String", required=true, def="false") |
|
52 |
def debug |
|
53 |
|
|
54 |
if (!ParametersDialog.open(this)) return; |
|
55 |
debug = true |
|
56 |
|
|
57 |
typeColumns = typeColumns.split(";") |
|
58 |
topicColumns = topicColumns.split(";") |
|
59 |
metadataColumns = metadataColumns.split(";") |
|
60 |
metadataColumnsGroups = metadataColumnsGroups.split(";") |
|
61 |
|
|
62 |
if (metadataColumns.size() != metadataColumnsGroups.size()) { |
|
63 |
println "ERROR in metadata declarations&groups:" |
|
64 |
println "COLUMNS: "+metadataColumns |
|
65 |
println "GROUPS : "+metadataColumnsGroups |
|
66 |
return |
|
67 |
} |
|
68 |
|
|
69 |
if (!trsDirectory.exists()) { |
|
70 |
println "$trsDirectory not found" |
|
71 |
return |
|
72 |
} |
|
73 |
|
|
74 |
println "Loading data from $metadataFile..." |
|
75 |
TableReader reader = new TableReader(metadataFile)//, "\t".charAt(0), Charset.forName("UTF-8") |
|
76 |
reader.readHeaders() |
|
77 |
def header = reader.getHeaders() |
|
78 |
if (!header.contains(joinTRSColumn)) { |
|
79 |
println "No TRS ID $joinTRSColumn column found" |
|
80 |
return |
|
81 |
} |
|
82 |
if (!header.contains(startTimeColumn)) { |
|
83 |
println "No start time $startTimeColumn column found" |
|
84 |
return |
|
85 |
} |
|
86 |
if (!header.contains(endTimeColumn)) { |
|
87 |
println "No end time $endTimeColumn column found" |
|
88 |
return |
|
89 |
} |
|
90 |
for (def col : metadataColumns) { |
|
91 |
if (!header.contains(endTimeColumn)) { |
|
92 |
println "No $col column found" |
|
93 |
return |
|
94 |
} |
|
95 |
} |
|
96 |
for (def col : typeColumns) { |
|
97 |
if (!header.contains(endTimeColumn)) { |
|
98 |
println "No type $col column found" |
|
99 |
return |
|
100 |
} |
|
101 |
} |
|
102 |
for (def col : topicColumns) { |
|
103 |
if (!header.contains(endTimeColumn)) { |
|
104 |
println "No topic $col column found" |
|
105 |
return |
|
106 |
} |
|
107 |
} |
|
108 |
|
|
109 |
File outputDirectory = new File(trsDirectory, "out") |
|
110 |
println "Writing result to $outputDirectory..." |
|
111 |
|
|
112 |
dateTimeFormatter = DateTimeFormatter.ISO_LOCAL_TIME |
|
113 |
def strTotime(def str) { |
|
114 |
if (str.lastIndexOf(":") == -1) { |
|
115 |
return null |
|
116 |
} |
|
117 |
|
|
118 |
bonusFrame = Integer.parseInt(str.substring(str.lastIndexOf(":")+1)) |
|
119 |
//if (str.contains("135475")) println "ERROR $str in $infos -> $bonusFrame" |
|
120 |
if (bonusFrame > 25) { |
|
121 |
bonusFrame=0; |
|
122 |
} |
|
123 |
totalFrame = str.substring(0, str.lastIndexOf(":")) |
|
124 |
|
|
125 |
LocalTime time1 = LocalTime.parse(totalFrame, dateTimeFormatter) |
|
126 |
totalFrame = (time1.getHour()*60*60) + (time1.getMinute()*60) + time1.getSecond() |
|
127 |
|
|
128 |
def ret = totalFrame + (bonusFrame/25) |
|
129 |
return ret |
|
130 |
} |
|
131 |
|
|
132 |
try { |
|
133 |
def sectionGroupsToInsert = [:] |
|
134 |
println "Reading data..." |
|
135 |
while (reader.readRecord()) { // loading & sorting sections |
|
136 |
String id = reader.get(joinTRSColumn).trim() |
|
137 |
if (id.endsWith(".mp4")) id = id.substring(0, id.length()-4) |
|
138 |
if (id.length() == 0) continue; |
|
139 |
|
|
140 |
if (!sectionGroupsToInsert.containsKey(id)) { |
|
141 |
sectionGroupsToInsert[id] = [] |
|
142 |
} |
|
143 |
def section = sectionGroupsToInsert[id] |
|
144 |
|
|
145 |
if (reader.get(startTimeColumn) != null && reader.get(startTimeColumn).length() > 0) { // ignore non timed sections |
|
146 |
|
|
147 |
def m = [:] |
|
148 |
|
|
149 |
for (def todo : ["topic":topicColumns, "type":typeColumns]) { |
|
150 |
def data = [] |
|
151 |
for (def col : todo.value) { |
|
152 |
if (reader.get(col).trim().length() > 0) { |
|
153 |
data << reader.get(col).trim().replace("\n", "") |
|
154 |
} |
|
155 |
} |
|
156 |
m[todo.key] = data.join("\t") |
|
157 |
} |
|
158 |
def metadataList = [] |
|
159 |
def metadataGroupList = [] |
|
160 |
for (int i = 0 ; i < metadataColumns.size() ; i++) { |
|
161 |
def col = metadataColumns[i] |
|
162 |
String c = AsciiUtils.buildAttributeId(col) |
|
163 |
m[c] = reader.get(col) |
|
164 |
metadataList << c |
|
165 |
metadataGroupList << metadataColumnsGroups[i] |
|
166 |
} |
|
167 |
m["metadata"] = metadataList.join("|") |
|
168 |
m["metadata_groups"] = metadataGroupList.join("|") |
|
169 |
|
|
170 |
m["startTime"] = strTotime(reader.get(startTimeColumn)) |
|
171 |
m["endTime"] = strTotime(reader.get(endTimeColumn)) |
|
172 |
m["synchronized"] = "true" |
|
173 |
|
|
174 |
section << [m["startTime"], m["endTime"], m] |
|
175 |
} |
|
176 |
} |
|
177 |
|
|
178 |
println "Inserting sections... "+sectionGroupsToInsert.size() |
|
179 |
|
|
180 |
ConsoleProgressBar cpb = new ConsoleProgressBar(sectionGroupsToInsert.keySet().size()) |
|
181 |
for (String id : sectionGroupsToInsert.keySet()) { |
|
182 |
|
|
183 |
File trsFile = new File(trsDirectory, id+".trs") |
|
184 |
if (!trsFile.exists()) { |
|
185 |
cpb.tick() |
|
186 |
continue |
|
187 |
} |
|
188 |
|
|
189 |
if (debug) println "== $id ==" |
|
190 |
else cpb.tick() |
|
191 |
|
|
192 |
//println "Processing $id..." |
|
193 |
sections = sectionGroupsToInsert[id] |
|
194 |
sections = sections.sort() { a, b -> a[0] <=> b[0] ?: a[1] <=> -b[1] } // negative second test for sections inclusion |
|
195 |
|
|
196 |
if (fixSectionsLimits) { |
|
197 |
if (debug) println "Fixing sections of $id" |
|
198 |
for (int iSection = 1 ; iSection < sections.size() ; iSection++) { |
|
199 |
//println sections[iSection] |
|
200 |
if (Math.abs(sections[iSection][0] - sections[iSection - 1][1]) < sectionsMergeActivationThreashold) { |
|
201 |
if (debug) println "s=$iSection start <- end : "+sections[iSection][0]+ " <- "+sections[iSection - 1][1] |
|
202 |
sections[iSection][0] = sections[iSection - 1][1] // fix the start time with the previous section end time |
|
203 |
} |
|
204 |
} |
|
205 |
} |
|
206 |
|
|
207 |
// Open input file |
|
208 |
slurper = new groovy.util.XmlParser(false, true, true); |
|
209 |
slurper.setFeature("http://apache.org/xml/features/disallow-doctype-decl", false) // allow DTD declaration |
|
210 |
slurper.setProperty("http://javax.xml.XMLConstants/property/accessExternalDTD", "all"); // allow to read DTD from local file |
|
211 |
trs = slurper.parse(trsFile.toURI().toString()) |
|
212 |
trsEpisodes = trs.Episode // 1 |
|
213 |
if (trsEpisodes.size() > 1) { |
|
214 |
println "Error: multiple Episode node in $trsFile" |
|
215 |
continue |
|
216 |
} |
|
217 |
def trsEpisode = trsEpisodes[0] |
|
218 |
def trsSections = trs.Episode.Section // 1 |
|
219 |
if (trsSections.size() > 1) { |
|
220 |
println "Error: multiple Section node in $trsFile" |
|
221 |
continue |
|
222 |
} |
|
223 |
|
|
224 |
trsSection = trsSections[0] |
|
225 |
|
|
226 |
turns = trsSection.Turn |
|
227 |
newSections = [] |
|
228 |
iSection = 0; |
|
229 |
currentSection = null |
|
230 |
currentNode = null |
|
231 |
|
|
232 |
foundSection=null |
|
233 |
isTurnSynchronized=false |
|
234 |
cutCheck=false |
|
235 |
|
|
236 |
// boucle sur les tours dans l'ordre |
|
237 |
for (iTurn = 0 ; iTurn < turns.size() ; iTurn++) { |
|
238 |
|
|
239 |
turn = turns[iTurn] |
|
240 |
start = Float.parseFloat(turn.@startTime) |
|
241 |
end = Float.parseFloat(turn.@endTime) |
|
242 |
//println "Turn: $iTurn ($start, $end)" |
|
243 |
|
|
244 |
// Etape 1 : y aura-t-il besoin de couper le tour, et dans quelle section est le tour (ou sa première partie) |
|
245 |
foundSection = null; |
|
246 |
for (int i = iSection ; i < sections.size() ; i++) { |
|
247 |
// if section_end < turn_start OU |turn_start - section_end| < turn_threshold |
|
248 |
if (sections[i][1] < start || Math.abs(start - sections[i][1]) < turnsCutActivationThreashold) { // Turn is after section |
|
249 |
// Cas 1 : la section est complètement avant (modulo la marge) |
|
250 |
} else { |
|
251 |
// Cas 2 : on est arrivés à la section à considérer |
|
252 |
iSection = i |
|
253 |
// if section_start > turn_end OU |section_start - turn_end| < turn_threshold |
|
254 |
if (sections[i][0] > end || Math.abs(sections[i][0] - end) < turnsCutActivationThreashold) { // Turn is before section |
|
255 |
// Cas 2.1 : la section est complètement après (modulo la marge) (et les suivantes le seront aussi) |
|
256 |
foundSection = null |
|
257 |
isTurnSynchronized = false |
|
258 |
cutCheck = false |
|
259 |
} else { |
|
260 |
// if |section_start - turn_start| > turn_threshold |
|
261 |
if (Math.abs(start - sections[i][0]) > turnsCutActivationThreashold) { // Turn begins before section does |
|
262 |
// Cas 2.2 : la section commence significativement après le début du tour (le début est non synchronisé) |
|
263 |
foundSection = sections[i] // (c'est la première section rencontrée, mais elle sera pour le tour suivant) |
|
264 |
isTurnSynchronized = false |
|
265 |
cutCheck = true |
|
266 |
} else { |
|
267 |
// Cas 2.3 : le début du tour est dans la section (on n'a pas besoin de chercher d'autres sections car si ce n'est pas la seule on coupera le tour et ce sera un autre tour). |
|
268 |
foundSection = sections[i] // (c'est la section qui commence le tour, au moins) |
|
269 |
isTurnSynchronized = true |
|
270 |
cutCheck = true |
|
271 |
} |
|
272 |
} |
|
273 |
break; // stop searching and set iSection to accelerate next search |
|
274 |
} |
|
275 |
} |
|
276 |
|
|
277 |
// Etape 2 : positionne *le* tour dans *le* noeud |
|
278 |
// (on ne gère qu'un seul tour et un seul noeud à chaque itération de la boucle tour, |
|
279 |
// puisqu'on a retaillé le tour pour qu'il ne concerne pas plusieurs noeuds) |
|
280 |
if (foundSection != null) { // on complète ou on ajoute une div. |
|
281 |
|
|
282 |
if (foundSection != currentSection || currentSection == null) { |
|
283 |
if (currentNode != null && currentNode.@synchronized == "false") { |
|
284 |
def tmp = currentNode.Turn |
|
285 |
currentNode.@endTime = tmp[-1].@endTime |
|
286 |
} |
|
287 |
|
|
288 |
currentSection = foundSection |
|
289 |
currentNode = new Node(trsEpisode, "Section", currentSection[2]) |
|
290 |
} |
|
291 |
} else { // on complète ou on ajoute un noeud (div) non synchronisé. |
|
292 |
if (currentSection != null || currentNode == null) { // create a new unsynchronized section if there is no opened synchronized section or no un-synchronized section |
|
293 |
currentNode = new Node(trsEpisode, "Section", ["type":"Sujet non synchronisé", "startTime":sections[iSection][0], "endTime":sections[iSection][1], "synchronized":"false"] ) |
|
294 |
currentSection = null; |
|
295 |
} |
|
296 |
} |
|
297 |
|
|
298 |
// Etape 3 : on coupe le tour s'il y a besoin |
|
299 |
if (cutCheck && fixTurnsLimits) { |
|
300 |
if (isTurnSynchronized) { |
|
301 |
cutTurn(true) // iSection++ et test avec le **end** de la section quand on coupe |
|
302 |
} else { |
|
303 |
if (foundSection != null) { |
|
304 |
cutTurn(false) // test avec le **start** de la section quand on coupe |
|
305 |
} |
|
306 |
} |
|
307 |
} |
|
308 |
|
|
309 |
trsSection.remove(turn) |
|
310 |
currentNode.append(turn) |
|
311 |
} |
|
312 |
|
|
313 |
outputDirectory.mkdir() |
|
314 |
File outfile = new File(outputDirectory, trsFile.getName()) |
|
315 |
outfile.withWriter("UTF-8") { writer -> |
|
316 |
writer.write('<?xml version="1.0" encoding="UTF-8"?>\n<!DOCTYPE Trans SYSTEM "trans-14.dtd">\n') |
|
317 |
def printer = new groovy.util.XmlNodePrinter(new PrintWriter(writer)) |
|
318 |
printer.setPreserveWhitespace(true) |
|
319 |
printer.print(trs) |
|
320 |
} |
|
321 |
} |
|
322 |
cpb.done() |
|
323 |
reader.close() |
|
324 |
println "Done." |
|
325 |
|
|
326 |
} catch(Exception e) { |
|
327 |
println "Error: "+e |
|
328 |
Log.printStackTrace(e) |
|
329 |
} |
|
330 |
|
|
331 |
def cutTurn(def testWithSectionEndTime) { |
|
332 |
def children = turn.children() |
|
333 |
Node newTurnKaNode = null;//new Node(trsEpisode, "Turn", currentSection[2]) |
|
334 |
|
|
335 |
def startSection = currentNode.@startTime |
|
336 |
if (startSection instanceof String) startSection = Float.parseFloat(currentNode.@startTime) |
|
337 |
def endSection = currentNode.@endTime |
|
338 |
if (endSection instanceof String) endSection = Float.parseFloat(currentNode.@endTime) |
|
339 |
//println "Cut the last turn if necessary" |
|
340 |
for (int iChildren = 0 ; iChildren < children.size() ; iChildren++) { |
|
341 |
|
|
342 |
def c = children[iChildren] |
|
343 |
if (c instanceof String) continue; |
|
344 |
|
|
345 |
if (newTurnKaNode != null) { // append the remaining children to the new turn |
|
346 |
turn.remove(c) |
|
347 |
newTurnKaNode.append(c) |
|
348 |
if (debug) c.@moved="yes" |
|
349 |
iChildren-- |
|
350 |
} else { |
|
351 |
if ("w".equals(c.name())) { |
|
352 |
def start2 = Float.parseFloat(c.@startTime) |
|
353 |
def end2 = Float.parseFloat(c.@endTime) |
|
354 |
|
|
355 |
boolean test = null |
|
356 |
if (testWithSectionEndTime) { |
|
357 |
test = start2 > endSection |
|
358 |
} else { |
|
359 |
test = start2 >= startSection |
|
360 |
} |
|
361 |
|
|
362 |
if (test) { // && Math.abs(start2 - endSection) > turnsCutActivationThreashold // no more needed |
|
363 |
if (debug) println "cut with a w at ($start2, $end2) for section ("+startSection+", "+endSection+")" |
|
364 |
newTurnKaNode = new Node(trsSection, "Turn", ["startTime":""+start2, "endTime":""+turn.@endTime, "speaker":turn.@speaker]) |
|
365 |
new Node(newTurnKaNode, "Sync", ["time":""+start2]) // TRS |
|
366 |
turns.add(iTurn, newTurnKaNode) |
|
367 |
iTurn-- |
|
368 |
if (debug) newTurnKaNode.@created = "yes" |
|
369 |
turn.@endTime = ""+start2; |
|
370 |
turn.remove(c) |
|
371 |
newTurnKaNode.append(c) |
|
372 |
|
|
373 |
if (debug) c.@moved="yes" |
|
374 |
if (testWithSectionEndTime) iSection++ |
|
375 |
iChildren-- |
|
376 |
} |
|
377 |
} else if ("Sync".equals(c.name())) { |
|
378 |
def start2 = c.@time |
|
379 |
if (start2 instanceof String) start2 = Float.parseFloat(c.@time) |
|
380 |
def end2 = start2 |
|
381 |
|
|
382 |
boolean test = null |
|
383 |
if (testWithSectionEndTime) { |
|
384 |
test = start2 > endSection |
|
385 |
} else { |
|
386 |
test = start2 >= startSection |
|
387 |
} |
|
388 |
|
|
389 |
if (test) { // && Math.abs(start2 - endSection) > turnsCutActivationThreashold |
|
390 |
if (debug) println "cut with a Sync at ($start2, $end2) for section ("+startSection+", "+endSection+")" |
|
391 |
newTurnKaNode = new Node(trsSection, "Turn", ["startTime":""+start2, "endTime":""+turn.@endTime, "speaker":turn.@speaker]) |
|
392 |
turns.add(iTurn, newTurnKaNode) |
|
393 |
iTurn-- |
|
394 |
if (debug) newTurnKaNode.@created = "yes" |
|
395 |
turn.@endTime = ""+start2; |
|
396 |
turn.remove(c) |
|
397 |
if (debug) c.@moved="yes" |
|
398 |
if (testWithSectionEndTime) iSection++ |
|
399 |
newTurnKaNode.append(c) |
|
400 |
iChildren-- |
|
401 |
} |
|
402 |
} else { |
|
403 |
// no time to check |
|
404 |
} |
|
405 |
} |
|
406 |
} |
|
407 |
} |
TXM/trunk/org.txm.groovy.core/src/groovy/org/txm/macro/transcription/AddSectionsFromTableMacroOld.groovy (revision 3416) | ||
---|---|---|
1 |
package org.txm.macro.transcription |
|
2 |
|
|
3 |
import java.nio.charset.Charset |
|
4 |
|
|
5 |
import java.time.LocalTime |
|
6 |
import java.time.format.DateTimeFormatter |
|
7 |
import org.txm.utils.* |
|
8 |
import org.txm.utils.logger.* |
|
9 |
|
|
10 |
@Field @Option(name="metadataFile", usage="Tableau des metadonnées de sections", widget="FileOpen", required=true, def="") |
|
11 |
File metadataFile; |
|
12 |
|
|
13 |
@Field @Option(name="trsDirectory", usage="Dossier qui contient les fichiers TRS", widget="Folder", required=true, def="") |
|
14 |
File trsDirectory; |
|
15 |
|
|
16 |
@Field @Option(name="joinTRSColumn", usage="Colonne de jointure de transcription", widget="String", required=true, def="Lien notice principale") |
|
17 |
def joinTRSColumn |
|
18 |
|
|
19 |
@Field @Option(name="startTimeColumn", usage="Colonne de timing de début de section", widget="String", required=true, def="antract_debut") |
|
20 |
def startTimeColumn = "antract_debut" |
|
21 |
|
|
22 |
@Field @Option(name="endTimeColumn", usage="Colonne de timing de fin de section", widget="String", required=true, def="antract_fin") |
|
23 |
def endTimeColumn = "antract_fin" |
|
24 |
|
|
25 |
@Field @Option(name="typeColumns", usage="Colonnes des métadonnées de type de section", widget="String", required=true, def="Titre propre") |
|
26 |
def typeColumns |
|
27 |
|
|
28 |
@Field @Option(name="topicColumns", usage="Colonnes des métadonnées de topic de section", widget="String", required=true, def="Date de diffusion") |
|
29 |
def topicColumns |
|
30 |
|
|
31 |
@Field @Option(name="metadataColumns", usage="Colonnes de metadonnées de section", widget="String", required=true, def="Titre propre;Date de diffusion;Identifiant de la notice;Notes du titre;Type de date;Durée;Genre;Langue VO / VE;Nature de production;Producteurs (Aff.);Thématique;Nom fichier segmenté (info);antract_video;antract_debut;antract_fin;antract_duree;antract_tc_type;antract_tc_date;Résumé;Séquences;Descripteurs (Aff. Lig.);Générique (Aff. Lig.)") |
|
32 |
def metadataColumns |
|
33 |
|
|
34 |
@Field @Option(name="metadataColumnsGroups", usage="Colonnes des gruopes de metadonnées de section", widget="String", required=true, def="metadata;metadata;metadata;metadata;metadata;metadata;metadata;metadata;metadata;metadata;metadata;secondary;secondary;secondary;secondary;secondary;secondary;secondary;text;text;text;text") |
|
35 |
def metadataColumnsGroups |
|
36 |
|
|
37 |
@Field @Option(name="fixSectionsLimits", usage="Correction des limites de sections du tableau de metadonnees", widget="Boolean", required=true, def="true") |
|
38 |
def fixSectionsLimits |
|
39 |
|
|
40 |
@Field @Option(name="sectionsMergeActivationThreashold", usage="marge d'erreur de corrections des limites de sections", widget="Float", required=true, def="1.0") |
|
41 |
def sectionsMergeActivationThreashold |
|
42 |
|
|
43 |
@Field @Option(name="fixTurnsLimits", usage="Correction des limites de sections du tableau de metadonnees", widget="Boolean", required=true, def="true") |
|
44 |
def fixTurnsLimits |
|
45 |
|
|
46 |
@Field @Option(name="turnsCutActivationThreashold", usage="marge d'erreur de corrections des limites de tours", widget="Float", required=true, def="0.1") |
|
47 |
def turnsCutActivationThreashold |
|
48 |
|
|
49 |
@Field @Option(name="debug", usage="show debug messages", widget="String", required=true, def="false") |
|
50 |
def debug |
|
51 |
|
|
52 |
if (!ParametersDialog.open(this)) return; |
|
53 |
|
|
54 |
typeColumns = typeColumns.split(";") |
|
55 |
topicColumns = topicColumns.split(";") |
|
56 |
metadataColumns = metadataColumns.split(";") |
|
57 |
metadataColumnsGroups = metadataColumnsGroups.split(";") |
|
58 |
|
|
59 |
if (metadataColumns.size() != metadataColumnsGroups.size()) { |
|
60 |
println "ERROR in metadata declarations&groups:" |
|
61 |
println "COLUMNS: "+metadataColumns |
|
62 |
println "GROUPS : "+metadataColumnsGroups |
|
63 |
return |
|
64 |
} |
|
65 |
|
|
66 |
if (!trsDirectory.exists()) { |
|
67 |
println "$trsDirectory not found" |
|
68 |
return |
|
69 |
} |
|
70 |
|
|
71 |
println "Loading data from $metadataFile..." |
|
72 |
TableReader reader = new TableReader(metadataFile)//, "\t".charAt(0), Charset.forName("UTF-8") |
|
73 |
reader.readHeaders() |
|
74 |
def header = reader.getHeaders() |
|
75 |
if (!header.contains(joinTRSColumn)) { |
|
76 |
println "No TRS ID $joinTRSColumn column found" |
|
77 |
return |
|
78 |
} |
|
79 |
if (!header.contains(startTimeColumn)) { |
|
80 |
println "No start time $startTimeColumn column found" |
|
81 |
return |
|
82 |
} |
|
83 |
if (!header.contains(endTimeColumn)) { |
|
84 |
println "No end time $endTimeColumn column found" |
|
85 |
return |
|
86 |
} |
|
87 |
for (def col : metadataColumns) { |
|
88 |
if (!header.contains(endTimeColumn)) { |
|
89 |
println "No $col column found" |
|
90 |
return |
|
91 |
} |
|
92 |
} |
|
93 |
for (def col : typeColumns) { |
|
94 |
if (!header.contains(endTimeColumn)) { |
|
95 |
println "No type $col column found" |
|
96 |
return |
|
97 |
} |
|
98 |
} |
|
99 |
for (def col : topicColumns) { |
|
100 |
if (!header.contains(endTimeColumn)) { |
|
101 |
println "No topic $col column found" |
|
102 |
return |
|
103 |
} |
|
104 |
} |
|
105 |
|
|
106 |
File outputDirectory = new File(trsDirectory, "out") |
|
107 |
println "Writing result to $outputDirectory..." |
|
108 |
|
|
109 |
dateTimeFormatter = DateTimeFormatter.ISO_LOCAL_TIME |
|
110 |
def strTotime(def str) { |
|
111 |
if (str.lastIndexOf(":") == -1) { |
|
112 |
return null |
|
113 |
} |
|
114 |
|
|
115 |
bonusFrame = Integer.parseInt(str.substring(str.lastIndexOf(":")+1)) |
|
116 |
//if (str.contains("135475")) println "ERROR $str in $infos -> $bonusFrame" |
|
117 |
if (bonusFrame > 25) { |
|
118 |
bonusFrame=0; |
|
119 |
} |
|
120 |
totalFrame = str.substring(0, str.lastIndexOf(":")) |
|
121 |
|
|
122 |
LocalTime time1 = LocalTime.parse(totalFrame, dateTimeFormatter) |
|
123 |
totalFrame = (time1.getHour()*60*60) + (time1.getMinute()*60) + time1.getSecond() |
|
124 |
|
|
125 |
def ret = totalFrame + (bonusFrame/25) |
|
126 |
return ret |
|
127 |
} |
|
128 |
|
|
129 |
try { |
|
130 |
def sectionGroupsToInsert = [:] |
|
131 |
println "Reading data..." |
|
132 |
while (reader.readRecord()) { // loading & sorting sections |
|
133 |
String id = reader.get(joinTRSColumn).trim() |
|
134 |
if (id.endsWith(".mp4")) id = id.substring(0, id.length()-4) |
|
135 |
if (id.length() == 0) continue; |
|
136 |
|
|
137 |
if (!sectionGroupsToInsert.containsKey(id)) { |
|
138 |
sectionGroupsToInsert[id] = [] |
|
139 |
} |
|
140 |
def section = sectionGroupsToInsert[id] |
|
141 |
|
|
142 |
if (reader.get(startTimeColumn) != null && reader.get(startTimeColumn).length() > 0) { // ignore non timed sections |
|
143 |
|
|
144 |
def m = [:] |
|
145 |
|
|
146 |
for (def todo : ["topic":topicColumns, "type":typeColumns]) { |
|
147 |
def data = [] |
|
148 |
for (def col : todo.value) { |
|
149 |
if (reader.get(col).trim().length() > 0) { |
|
150 |
data << reader.get(col).trim().replace("\n", "") |
|
151 |
} |
|
152 |
} |
|
153 |
m[todo.key] = data.join("\t") |
|
154 |
} |
|
155 |
def metadataList = [] |
|
156 |
def metadataGroupList = [] |
|
157 |
for (int i = 0 ; i < metadataColumns.size() ; i++) { |
|
158 |
def col = metadataColumns[i] |
|
159 |
String c = AsciiUtils.buildAttributeId(col) |
|
160 |
m[c] = reader.get(col) |
|
161 |
metadataList << c |
|
162 |
metadataGroupList << metadataColumnsGroups[i] |
|
163 |
} |
|
164 |
m["metadata"] = metadataList.join("|") |
|
165 |
m["metadata_groups"] = metadataGroupList.join("|") |
|
166 |
|
|
167 |
m["startTime"] = strTotime(reader.get(startTimeColumn)) |
|
168 |
m["endTime"] = strTotime(reader.get(endTimeColumn)) |
|
169 |
m["synchronized"] = "true" |
|
170 |
|
|
171 |
section << [m["startTime"], m["endTime"], m] |
|
172 |
} |
|
173 |
} |
|
174 |
|
|
175 |
println "Inserting sections... "+sectionGroupsToInsert.size() |
|
176 |
|
|
177 |
ConsoleProgressBar cpb = new ConsoleProgressBar(sectionGroupsToInsert.keySet().size()) |
|
178 |
for (String id : sectionGroupsToInsert.keySet()) { |
|
179 |
|
|
180 |
File trsFile = new File(trsDirectory, id+".trs") |
|
181 |
if (!trsFile.exists()) { |
|
182 |
cpb.tick() |
|
183 |
continue |
|
184 |
} |
|
185 |
|
|
186 |
if (debug) println "== $id ==" |
|
187 |
else cpb.tick() |
|
188 |
|
|
189 |
//println "Processing $id..." |
|
190 |
def sections = sectionGroupsToInsert[id] |
|
191 |
sections = sections.sort() { a, b -> a[0] <=> b[0] ?: a[1] <=> -b[1] } // negative second test for sections inclusion |
|
192 |
|
|
193 |
if (fixSectionsLimits) { |
|
194 |
if (debug) println "Fixing sections of $id" |
|
195 |
for (int iSection = 1 ; iSection < sections.size() ; iSection++) { |
|
196 |
//println sections[iSection] |
|
197 |
if (Math.abs(sections[iSection][0] - sections[iSection - 1][1]) < sectionsMergeActivationThreashold) { |
|
198 |
if (debug) println "s=$iSection start <- end : "+sections[iSection][0]+ " <- "+sections[iSection - 1][1] |
|
199 |
sections[iSection][0] = sections[iSection - 1][1] // fix the start time with the previous section end time |
|
200 |
} |
|
201 |
} |
|
202 |
} |
|
203 |
|
|
204 |
// Open input file |
|
205 |
def slurper = new groovy.util.XmlParser(false, true, true); |
|
206 |
slurper.setFeature("http://apache.org/xml/features/disallow-doctype-decl", false) // allow DTD declaration |
|
207 |
slurper.setProperty("http://javax.xml.XMLConstants/property/accessExternalDTD", "all"); // allow to read DTD from local file |
|
208 |
def trs = slurper.parse(trsFile.toURI().toString()) |
|
209 |
def trsEpisodes = trs.Episode // 1 |
|
210 |
if (trsEpisodes.size() > 1) { |
|
211 |
println "Error: multiple Episode node in $trsFile" |
|
212 |
continue |
|
213 |
} |
|
214 |
def trsEpisode = trsEpisodes[0] |
|
215 |
def trsSections = trs.Episode.Section // 1 |
|
216 |
if (trsSections.size() > 1) { |
|
217 |
println "Error: multiple Section node in $trsFile" |
|
218 |
continue |
|
219 |
} |
|
220 |
def trsSection = trsSections[0] |
|
221 |
|
|
222 |
def turns = trsSection.Turn |
|
223 |
def newSections = [] |
|
224 |
def iSection = 0; |
|
225 |
def currentSection = null |
|
226 |
def currentNode = null |
|
227 |
|
|
228 |
for (int iTurn = 0 ; iTurn < turns.size() ; iTurn++) { |
|
229 |
|
|
230 |
def turn = turns[iTurn] |
|
231 |
def start = Float.parseFloat(turn.@startTime) |
|
232 |
def end = Float.parseFloat(turn.@endTime) |
|
233 |
//println "Turn: $iTurn ($start, $end)" |
|
234 |
|
|
235 |
def foundSection = null; |
|
236 |
for (int i = iSection ; i < sections.size() ; i++) { |
|
237 |
if (end < sections[i][0]) { // the Turn is before the section |
|
238 |
|
|
239 |
} else if (sections[i][1] < start) { // the Turn is after the section |
|
240 |
|
|
241 |
} else { |
|
242 |
foundSection = sections[i] |
|
243 |
iSection = i |
|
244 |
break; // stop searching and set iSection to accelerate next search |
|
245 |
} |
|
246 |
} |
|
247 |
|
|
248 |
if (foundSection == null) { |
|
249 |
if (currentSection != null || currentNode == null) { |
|
250 |
currentNode = new Node(trsEpisode, "Section", ["type":"Sujet non synchronisé", "startTime":turn.@startTime, "endTime":"", "synchronized":"false"] ) |
|
251 |
currentSection = null; |
|
252 |
} |
|
253 |
} else { |
|
254 |
if (foundSection != currentSection) { |
|
255 |
if (currentNode != null && currentNode.@synchronized == "false") { // set the un-synchronized section endTime using its last Turn endTime |
|
256 |
def tmp = currentNode.Turn |
|
257 |
currentNode.@endTime = tmp[-1].@endTime |
|
258 |
} |
|
259 |
|
|
260 |
currentSection = foundSection |
|
261 |
currentNode = new Node(trsEpisode, "Section", currentSection[2]) |
|
262 |
} |
|
263 |
} |
|
264 |
|
|
265 |
trsSection.remove(turn) |
|
266 |
currentNode.append(turn) |
|
267 |
} |
|
268 |
|
|
269 |
//remove the initial section which is empty now |
|
270 |
trsEpisode.remove(trsSection) |
|
271 |
|
|
272 |
if (fixTurnsLimits) { |
|
273 |
if (debug) println "Fixing Turn limits..." |
|
274 |
def partOfTurnToInsertInthePreviousSection = null; |
|
275 |
def partOfTurnToInsertIntheNextSection = null; |
|
276 |
sections = trs.Episode.Section |
|
277 |
for (int i = 0 ; i < sections.size() ; i++) { // browse created sections but stop before the last one (whichc can not be fixed) |
|
278 |
|
|
279 |
def section = sections[i] |
|
280 |
if (partOfTurnToInsertIntheNextSection != null) { |
|
281 |
if (debug) println "Moving part-of turn: "+partOfTurnToInsertIntheNextSection+" in section ("+section.@startTime+", "+section.@endTime+")" |
|
282 |
section.children().add(0, partOfTurnToInsertIntheNextSection) // insert the slited part of the turn in the section |
|
283 |
} |
|
284 |
|
|
285 |
def startSection = section.@startTime // Float.parseFloat(section.@startTime) |
|
286 |
def endSection = section.@endTime // Float.parseFloat(section.@endTime) |
|
287 |
if (startSection instanceof String) startSection = Float.parseFloat(section.@startTime) |
|
288 |
if (endSection instanceof String) endSection = Float.parseFloat(section.@endTime) |
|
289 |
|
|
290 |
partOfTurnToInsertInthePreviousSection = null |
|
291 |
partOfTurnToInsertIntheNextSection = null |
|
292 |
|
|
293 |
turns = section.Turn |
|
294 |
if (turns.size() == 0) continue; |
|
295 |
|
|
296 |
// Fix the first Turn |
|
297 |
def turn = turns[0] |
|
298 |
start = turn.@startTime // Float.parseFloat(section.@startTime) |
|
299 |
end = turn.@endTime // Float.parseFloat(section.@endTime) |
|
300 |
if (start instanceof String) start = Float.parseFloat(turn.@startTime) |
|
301 |
if (end instanceof String) end = Float.parseFloat(turn.@endTime) |
|
302 |
|
|
303 |
if (start < startSection) { // the start of the Turn is outside of its current section |
|
304 |
|
|
305 |
} |
|
306 |
|
|
307 |
// Fix the last Turn |
|
308 |
turn = turns[-1] |
|
309 |
start = turn.@startTime // Float.parseFloat(section.@startTime) |
|
310 |
end = turn.@endTime // Float.parseFloat(section.@endTime) |
|
311 |
if (start instanceof String) start = Float.parseFloat(turn.@startTime) |
|
312 |
if (end instanceof String) end = Float.parseFloat(turn.@endTime) |
|
313 |
if (end > endSection) { // the end of the Turn is outside of its current section |
|
314 |
|
|
315 |
def children = turn.children() |
|
316 |
Node newTurnKaNode = null;//new Node(trsEpisode, "Turn", currentSection[2]) |
|
317 |
//println "Cut the last turn if necessary" |
|
318 |
for (int iChildren = 0 ; iChildren < children.size() ; iChildren++) { |
|
319 |
|
|
320 |
def c = children[iChildren] |
|
321 |
if (c instanceof String) continue; |
|
322 |
|
|
323 |
if (newTurnKaNode != null) { |
|
324 |
turn.remove(c) |
|
325 |
newTurnKaNode.append(c) |
|
326 |
if (debug) c.@moved="yes" |
|
327 |
iChildren-- |
|
328 |
} else { |
|
329 |
if ("w".equals(c.name())) { |
|
330 |
def start2 = Float.parseFloat(c.@startTime) |
|
331 |
def end2 = Float.parseFloat(c.@endTime) |
|
332 |
|
|
333 |
if (start2 > endSection && Math.abs(start2 - endSection) > turnsCutActivationThreashold) { |
|
334 |
if (debug) println "cut with a w at [$start2, $end2] for section ("+startSection+", "+endSection+")" |
|
335 |
newTurnKaNode = new Node(trsSection, "Turn", ["startTime":""+start2, "endTime":""+turn.@endTime, "speaker":turn.@speaker]) |
|
336 |
if (debug) newTurnKaNode.@created = "yes" |
|
337 |
turn.@endTime = ""+start2; |
|
338 |
turn.remove(c) |
|
339 |
newTurnKaNode.append(c) |
|
340 |
if (debug) c.@moved="yes" |
|
341 |
iChildren-- |
|
342 |
} |
|
343 |
} else if ("Sync".equals(c.name())) { |
|
344 |
def start2 = Float.parseFloat(c.@time) |
|
345 |
def end2 = Float.parseFloat(c.@time) |
|
346 |
|
|
347 |
if (start2 > endSection && Math.abs(start2 - endSection) > turnsCutActivationThreashold) { |
|
348 |
if (debug) println "cut with a Sync at [$start2, $end2] for section "+endSection |
|
349 |
newTurnKaNode = new Node(trsSection, "Turn", ["startTime":""+start2, "endTime":""+turn.@endTime, "speaker":turn.@speaker]) |
|
350 |
if (debug) newTurnKaNode.@created = "yes" |
|
351 |
turn.@endTime = ""+start2; |
|
352 |
turn.remove(c) |
|
353 |
if (debug) c.@moved="yes" |
|
354 |
newTurnKaNode.append(c) |
|
355 |
iChildren-- |
|
356 |
} |
|
357 |
} else { |
|
358 |
// no time to check |
|
359 |
} |
|
360 |
} |
|
361 |
} |
|
362 |
|
|
363 |
} |
|
364 |
partOfTurnToInsertIntheNextSection = newTurnKaNode |
|
365 |
} |
|
366 |
} |
|
367 |
|
|
368 |
outputDirectory.mkdir() |
|
369 |
File outfile = new File(outputDirectory, trsFile.getName()) |
|
370 |
outfile.withWriter("UTF-8") { writer -> |
|
371 |
writer.write('<?xml version="1.0" encoding="UTF-8"?>\n<!DOCTYPE Trans SYSTEM "trans-14.dtd">\n') |
|
372 |
def printer = new groovy.util.XmlNodePrinter(new PrintWriter(writer)) |
|
373 |
printer.setPreserveWhitespace(true) |
|
374 |
printer.print(trs) |
|
375 |
} |
|
376 |
} |
|
377 |
cpb.done() |
|
378 |
reader.close() |
|
379 |
println "Done." |
|
380 |
|
|
381 |
} catch(Exception e) { |
|
382 |
println "Error: "+e |
|
383 |
Log.printStackTrace(e) |
|
384 |
} |
TXM/trunk/org.txm.groovy.core/src/groovy/org/txm/macro/transcription/AddSectionsFromTableMacro.groovy (revision 3416) | ||
---|---|---|
4 | 4 |
|
5 | 5 |
import java.time.LocalTime |
6 | 6 |
import java.time.format.DateTimeFormatter |
7 |
|
|
8 |
import org.eclipse.core.internal.localstore.IsSynchronizedVisitor |
|
7 | 9 |
import org.txm.utils.* |
8 | 10 |
import org.txm.utils.logger.* |
9 | 11 |
|
... | ... | |
39 | 41 |
|
40 | 42 |
@Field @Option(name="sectionsMergeActivationThreashold", usage="marge d'erreur de corrections des limites de sections", widget="Float", required=true, def="1.0") |
41 | 43 |
def sectionsMergeActivationThreashold |
42 |
|
|
44 |
|
|
43 | 45 |
@Field @Option(name="fixTurnsLimits", usage="Correction des limites de sections du tableau de metadonnees", widget="Boolean", required=true, def="true") |
44 | 46 |
def fixTurnsLimits |
45 | 47 |
|
46 | 48 |
@Field @Option(name="turnsCutActivationThreashold", usage="marge d'erreur de corrections des limites de tours", widget="Float", required=true, def="0.1") |
47 | 49 |
def turnsCutActivationThreashold |
48 | 50 |
|
49 |
@Field @Option(name="debug", usage="show debug messages", widget="String", required=true, def="false")
|
|
51 |
@Field @Option(name="debug", usage="show debug messages", widget="Boolean", required=true, def="false")
|
|
50 | 52 |
def debug |
51 | 53 |
|
52 | 54 |
if (!ParametersDialog.open(this)) return; |
53 |
debug = true |
|
54 | 55 |
|
55 | 56 |
typeColumns = typeColumns.split(";") |
56 | 57 |
topicColumns = topicColumns.split(";") |
... | ... | |
188 | 189 |
else cpb.tick() |
189 | 190 |
|
190 | 191 |
//println "Processing $id..." |
191 |
def sections = sectionGroupsToInsert[id]
|
|
192 |
sections = sectionGroupsToInsert[id] |
|
192 | 193 |
sections = sections.sort() { a, b -> a[0] <=> b[0] ?: a[1] <=> -b[1] } // negative second test for sections inclusion |
193 | 194 |
|
194 | 195 |
if (fixSectionsLimits) { |
... | ... | |
203 | 204 |
} |
204 | 205 |
|
205 | 206 |
// Open input file |
206 |
def slurper = new groovy.util.XmlParser(false, true, true);
|
|
207 |
slurper = new groovy.util.XmlParser(false, true, true); |
|
207 | 208 |
slurper.setFeature("http://apache.org/xml/features/disallow-doctype-decl", false) // allow DTD declaration |
208 | 209 |
slurper.setProperty("http://javax.xml.XMLConstants/property/accessExternalDTD", "all"); // allow to read DTD from local file |
209 |
def trs = slurper.parse(trsFile.toURI().toString())
|
|
210 |
def trsEpisodes = trs.Episode // 1
|
|
210 |
trs = slurper.parse(trsFile.toURI().toString()) |
|
211 |
trsEpisodes = trs.Episode // 1 |
|
211 | 212 |
if (trsEpisodes.size() > 1) { |
212 | 213 |
println "Error: multiple Episode node in $trsFile" |
213 | 214 |
continue |
... | ... | |
218 | 219 |
println "Error: multiple Section node in $trsFile" |
219 | 220 |
continue |
220 | 221 |
} |
221 |
def trsSection = trsSections[0] |
|
222 | 222 |
|
223 |
def turns = trsSection.Turn |
|
224 |
def newSections = [] |
|
225 |
def iSection = 0; |
|
226 |
def currentSection = null |
|
227 |
def currentNode = null |
|
223 |
trsSection = trsSections[0] |
|
228 | 224 |
|
229 |
for (int iTurn = 0 ; iTurn < turns.size() ; iTurn++) { |
|
230 |
|
|
231 |
def turn = turns[iTurn] |
|
232 |
def start = Float.parseFloat(turn.@startTime) |
|
233 |
def end = Float.parseFloat(turn.@endTime) |
|
225 |
turns = trsSection.Turn |
|
226 |
newSections = [] |
|
227 |
iSection = 0; |
|
228 |
currentSection = null |
|
229 |
currentNode = null |
|
230 |
|
|
231 |
foundSection=null |
|
232 |
isTurnSynchronized=false |
|
233 |
cutCheck=false |
|
234 |
|
|
235 |
// boucle sur les tours dans l'ordre |
|
236 |
for (iTurn = 0 ; iTurn < turns.size() ; iTurn++) { |
|
237 |
if (debug) println "iTurn=$iTurn turn="+turns[iTurn].attributes() |
|
238 |
turn = turns[iTurn] |
|
239 |
start = Float.parseFloat(turn.@startTime) |
|
240 |
end = Float.parseFloat(turn.@endTime) |
|
234 | 241 |
//println "Turn: $iTurn ($start, $end)" |
235 | 242 |
|
236 |
def foundSection = null; |
|
243 |
// Etape 1 : y aura-t-il besoin de couper le tour, et dans quelle section est le tour (ou sa première partie) |
|
244 |
foundSection = null; |
|
237 | 245 |
for (int i = iSection ; i < sections.size() ; i++) { |
238 |
if (end < sections[i][0]) { // Turn is before section |
|
239 |
|
|
240 |
} else if (sections[i][1] < start) { // Section is before section |
|
241 |
|
|
246 |
// if section_end < turn_start OU |turn_start - section_end| < turn_threshold |
|
247 |
if (sections[i][1] < start || Math.abs(start - sections[i][1]) < turnsCutActivationThreashold) { // Turn is after section |
|
248 |
// Cas 1 : la section est complètement avant (modulo la marge) |
|
242 | 249 |
} else { |
243 |
foundSection = sections[i]
|
|
250 |
// Cas 2 : on est arrivés à la section à considérer
|
|
244 | 251 |
iSection = i |
252 |
// if section_start > turn_end OU |section_start - turn_end| < turn_threshold |
|
253 |
if (sections[i][0] > end || Math.abs(sections[i][0] - end) < turnsCutActivationThreashold) { // Turn is before section |
|
254 |
// Cas 2.1 : la section est complètement après (modulo la marge) (et les suivantes le seront aussi) |
|
255 |
foundSection = null |
|
256 |
isTurnSynchronized = false |
|
257 |
cutCheck = false |
|
258 |
} else { |
|
259 |
// if |section_start - turn_start| > turn_threshold |
|
260 |
if (start - sections[i][0] < 0) { // Turn begins before section does |
|
261 |
// Cas 2.2 : la section commence significativement après le début du tour (le début est non synchronisé) |
|
262 |
foundSection = sections[i] // (c'est la première section rencontrée, mais elle sera pour le tour suivant) |
|
263 |
isTurnSynchronized = false |
|
264 |
cutCheck = true |
|
265 |
} else { |
|
266 |
// Cas 2.3 : le début du tour est dans la section (on n'a pas besoin de chercher d'autres sections car si ce n'est pas la seule on coupera le tour et ce sera un autre tour). |
|
267 |
foundSection = sections[i] // (c'est la section qui commence le tour, au moins) |
|
268 |
isTurnSynchronized = true |
|
269 |
cutCheck = true |
|
270 |
} |
|
271 |
} |
|
245 | 272 |
break; // stop searching and set iSection to accelerate next search |
246 | 273 |
} |
247 | 274 |
} |
248 | 275 |
|
249 |
if (foundSection == null) { |
|
250 |
if (currentSection != null || currentNode == null) { |
|
251 |
currentNode = new Node(trsEpisode, "Section", ["type":"Sujet non synchronisé", "startTime":turn.@startTime, "endTime":"", "synchronized":"false"] ) |
|
252 |
currentSection = null; |
|
253 |
} |
|
254 |
} else { |
|
255 |
if (foundSection != currentSection) { |
|
276 |
// Etape 2 : positionne *le* tour dans *le* noeud |
|
277 |
// (on ne gère qu'un seul tour et un seul noeud à chaque itération de la boucle tour, |
|
278 |
// puisqu'on a retaillé le tour pour qu'il ne concerne pas plusieurs noeuds) |
|
279 |
if (foundSection != null) { // on complète ou on ajoute une div. |
|
280 |
|
|
281 |
if (foundSection != currentSection || currentSection == null) { |
|
256 | 282 |
if (currentNode != null && currentNode.@synchronized == "false") { |
257 | 283 |
def tmp = currentNode.Turn |
258 | 284 |
currentNode.@endTime = tmp[-1].@endTime |
... | ... | |
260 | 286 |
|
261 | 287 |
currentSection = foundSection |
262 | 288 |
currentNode = new Node(trsEpisode, "Section", currentSection[2]) |
289 |
if (debug) println " create synchronized turn at start="+foundSection[0] |
|
263 | 290 |
} |
291 |
} else { // on complète ou on ajoute un noeud (div) non synchronisé. |
|
292 |
if (currentSection != null || currentNode == null) { // create a new unsynchronized section if there is no opened synchronized section or no un-synchronized section |
|
293 |
currentNode = new Node(trsEpisode, "Section", ["type":"Sujet non synchronisé", "startTime":turn.@startTime, "endTime":"", "synchronized":"false"] ) |
|
294 |
currentSection = null; |
|
295 |
if (debug) println " create un-synchronized turn at "+turn.@startTime |
|
296 |
} |
|
264 | 297 |
} |
265 | 298 |
|
299 |
// Etape 3 : on coupe le tour s'il y a besoin |
|
300 |
if (cutCheck && fixTurnsLimits) { |
|
301 |
if (debug) println " fixing turn in turns ("+turns.size()+") section ("+trsSection.children().size()+")" |
|
302 |
if (isTurnSynchronized) { |
|
303 |
cutTurn(true) // iSection++ et test avec le **end** de la section quand on coupe |
|
304 |
} else { |
|
305 |
if (foundSection != null) { |
|
306 |
cutTurn(false) // test avec le **start** de la iSection quand on coupe |
|
307 |
} |
|
308 |
} |
|
309 |
} |
|
310 |
|
|
311 |
if (debug) println " remove turn in turns ("+turns.size()+") section ("+trsSection.children().size()+")" |
|
312 |
turns.remove(turn) |
|
266 | 313 |
trsSection.remove(turn) |
267 | 314 |
currentNode.append(turn) |
315 |
if (debug) println " removed turn in turns ("+turns.size()+") section ("+trsSection.children().size()+")" |
|
268 | 316 |
} |
269 | 317 |
|
270 |
//remove the initial section which is empty now or not |
|
271 | 318 |
trsEpisode.remove(trsSection) |
272 | 319 |
|
273 |
if (fixTurnsLimits) { |
|
274 |
if (debug) println "Fixing Turn limits..." |
|
275 |
def turnToInsert = null; |
|
276 |
sections = trs.Episode.Section |
|
277 |
for (int i = 0 ; i < sections.size() -1 ; i++) { // browse created sections but stop before the last one (whichc can not be fixed) |
|
278 |
|
|
279 |
def section = sections[i] |
|
280 |
if (turnToInsert != null) { |
|
281 |
if (debug) println "Moving part-of turn: "+turnToInsert+" in section ("+section.@startTime+", "+section.@endTime+")" |
|
282 |
section.children().add(0, turnToInsert) // insert the slited part of the turn in the section |
|
283 |
} |
|
284 |
|
|
285 |
def startSection = section.@startTime // Float.parseFloat(section.@startTime) |
|
286 |
def endSection = section.@endTime // Float.parseFloat(section.@endTime) |
|
287 |
if (startSection instanceof String) startSection = Float.parseFloat(section.@startTime) |
|
288 |
if (endSection instanceof String) endSection = Float.parseFloat(section.@endTime) |
|
289 |
|
|
290 |
turnToInsert = null |
|
291 |
|
|
292 |
turns = section.Turn |
|
293 |
if (turns.size() == 0) continue; |
|
294 |
|
|
295 |
def turn = turns[-1] |
|
296 |
|
|
297 |
def children = turn.children() |
|
298 |
Node newTurnKaNode = null;//new Node(trsEpisode, "Turn", currentSection[2]) |
|
299 |
//println "Cut the last turn if necessary" |
|
300 |
for (int iChildren = 0 ; iChildren < children.size() ; iChildren++) { |
|
301 |
|
|
302 |
def c = children[iChildren] |
|
303 |
if (c instanceof String) continue; |
|
304 |
|
|
305 |
if (newTurnKaNode != null) { |
|
306 |
turn.remove(c) |
|
307 |
newTurnKaNode.append(c) |
|
308 |
if (debug) c.@moved="yes" |
|
309 |
iChildren-- |
|
310 |
} else { |
|
311 |
if ("w".equals(c.name())) { |
|
312 |
def start2 = Float.parseFloat(c.@startTime) |
|
313 |
def end2 = Float.parseFloat(c.@endTime) |
|
314 |
|
|
315 |
if (start2 > endSection && Math.abs(start2 - endSection) > turnsCutActivationThreashold) { |
|
316 |
if (debug) println "cut with a w at [$start2, $end2] for section ("+startSection+", "+endSection+")" |
|
317 |
newTurnKaNode = new Node(trsSection, "Turn", ["startTime":""+start2, "endTime":""+turn.@endTime, "speaker":turn.@speaker]) |
|
318 |
if (debug) newTurnKaNode.@created = "yes" |
|
319 |
turn.@endTime = ""+start2; |
|
320 |
turn.remove(c) |
|
321 |
newTurnKaNode.append(c) |
|
322 |
if (debug) c.@moved="yes" |
|
323 |
iChildren-- |
|
324 |
} |
|
325 |
} else if ("Sync".equals(c.name())) { |
|
326 |
def start2 = Float.parseFloat(c.@time) |
|
327 |
def end2 = Float.parseFloat(c.@time) |
|
328 |
|
|
329 |
if (start2 > endSection && Math.abs(start2 - endSection) > turnsCutActivationThreashold) { |
|
330 |
if (debug) println "cut with a Sync at [$start2, $end2] for section "+endSection |
|
331 |
newTurnKaNode = new Node(trsSection, "Turn", ["startTime":""+start2, "endTime":""+turn.@endTime, "speaker":turn.@speaker]) |
|
332 |
if (debug) newTurnKaNode.@created = "yes" |
|
333 |
turn.@endTime = ""+start2; |
|
334 |
turn.remove(c) |
|
335 |
if (debug) c.@moved="yes" |
|
336 |
newTurnKaNode.append(c) |
|
337 |
iChildren-- |
|
338 |
} |
|
339 |
} else { |
|
340 |
// no time to check |
|
341 |
} |
|
342 |
} |
|
343 |
} |
|
344 |
|
|
345 |
turnToInsert = newTurnKaNode |
|
346 |
|
|
347 |
} |
|
348 |
} |
|
349 |
|
|
350 | 320 |
outputDirectory.mkdir() |
351 | 321 |
File outfile = new File(outputDirectory, trsFile.getName()) |
352 | 322 |
outfile.withWriter("UTF-8") { writer -> |
... | ... | |
364 | 334 |
println "Error: "+e |
365 | 335 |
Log.printStackTrace(e) |
366 | 336 |
} |
337 |
|
|
338 |
def cutTurn(def testWithSectionEndTime) { |
|
339 |
|
|
340 |
def children = turn.children() |
|
341 |
Node newTurnKaNode = null;//new Node(trsEpisode, "Turn", currentSection[2]) |
|
342 |
|
|
343 |
def startSection = sections[iSection][0] // currentNode.@startTime |
|
344 |
if (startSection instanceof String) startSection = Float.parseFloat(sections[iSection][0]) //currentNode.@startTime) |
|
345 |
def endSection = sections[iSection][1] // currentNode.@endTime |
|
346 |
if (endSection instanceof String) endSection = Float.parseFloat(sections[iSection][1]) //currentNode.@endTime) |
|
347 |
//println "Cut the last turn if necessary" |
|
348 |
|
|
349 |
if (debug) println " cut turn and test with end ? $testWithSectionEndTime of iSection=$iSection at iTurn=$iTurn start=${turn.@startTime} end=${turn.@endTime} children="+turn.children().size() |
|
350 |
for (int iChildren = 0 ; iChildren < children.size() ; iChildren++) { |
|
351 |
|
|
352 |
def c = children[iChildren] |
|
353 |
if (c instanceof String) continue; // a Turn contains Sync or w tags |
|
354 |
|
|
355 |
if (newTurnKaNode != null) { // append the remaining children to the new turn |
|
356 |
turn.remove(c) |
|
357 |
newTurnKaNode.append(c) |
|
358 |
if (debug) c.@moved="yes" |
|
359 |
iChildren-- |
|
360 |
} else { |
|
361 |
if ("w".equals(c.name())) { |
|
362 |
def start2 = Float.parseFloat(c.@startTime) |
|
363 |
def end2 = Float.parseFloat(c.@endTime) |
|
364 |
|
|
365 |
boolean test = null |
|
366 |
if (testWithSectionEndTime) { |
|
367 |
test = start2 > endSection |
|
368 |
} else { |
|
369 |
test = start2 >= startSection |
|
370 |
} |
|
371 |
|
|
372 |
if (test) { // && Math.abs(start2 - endSection) > turnsCutActivationThreashold // no more needed |
|
373 |
if (debug) println " cut with a w at ($start2, $end2) for section ("+startSection+", "+endSection+")" |
|
374 |
newTurnKaNode = new Node(trsSection, "Turn", ["startTime":""+start2, "endTime":""+turn.@endTime, "speaker":turn.@speaker]) |
|
375 |
new Node(newTurnKaNode, "Sync", ["time":""+start2]) // TRS |
|
376 |
|
|
377 |
turns.add(iTurn, newTurnKaNode) // set as next turn to process |
|
378 |
iTurn-- |
|
379 |
if (debug) newTurnKaNode.@created = "yes" |
|
380 |
turn.@endTime = ""+start2; |
|
381 |
turn.remove(c) |
|
382 |
newTurnKaNode.append(c) |
|
383 |
|
|
384 |
if (debug) c.@moved="yes" |
|
385 |
iChildren-- |
|
386 |
} |
|
387 |
} else if ("Sync".equals(c.name())) { |
|
388 |
def start2 = c.@time |
|
389 |
if (start2 instanceof String) start2 = Float.parseFloat(c.@time) |
|
390 |
def end2 = start2 |
|
391 |
|
|
392 |
boolean test = null |
|
393 |
if (testWithSectionEndTime) { |
|
394 |
test = start2 > endSection |
|
395 |
} else { |
|
396 |
test = start2 >= startSection |
|
397 |
} |
|
398 |
|
|
399 |
if (test) { // && Math.abs(start2 - endSection) > turnsCutActivationThreashold |
|
400 |
if (debug) println " cut with a Sync at ($start2, $end2) for section ("+startSection+", "+endSection+")" |
|
401 |
newTurnKaNode = new Node(trsSection, "Turn", ["startTime":""+start2, "endTime":""+turn.@endTime, "speaker":turn.@speaker]) |
|
402 |
turns.add(iTurn, newTurnKaNode) |
|
403 |
iTurn-- |
|
404 |
if (debug) newTurnKaNode.@created = "yes" |
|
405 |
turn.@endTime = ""+start2; |
|
406 |
turn.remove(c) |
|
407 |
if (debug) c.@moved="yes" |
|
408 |
newTurnKaNode.append(c) |
|
409 |
iChildren-- |
|
410 |
} |
|
411 |
} else { |
|
412 |
// no time to check |
|
413 |
} |
|
414 |
} |
|
415 |
} |
|
416 |
} |
TXM/trunk/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/transcriber/pager.groovy (revision 3416) | ||
---|---|---|
687 | 687 |
if (SIMPLE_TOOLTIP) { |
688 | 688 |
if (type.contains("lemma") || type.contains("pos")) { |
689 | 689 |
flaginterp=true; |
690 |
interpvalue+=", ";
|
|
690 |
interpvalue+="\n- ";
|
|
691 | 691 |
} |
692 | 692 |
} else { |
693 | 693 |
flaginterp=true; |
694 |
interpvalue+=", "+type+"="
|
|
694 |
interpvalue+="\n- "+type+"="
|
|
695 | 695 |
} |
696 | 696 |
break; |
697 | 697 |
|
TXM/trunk/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/transcriber/importer.groovy (revision 3416) | ||
---|---|---|
188 | 188 |
|
189 | 189 |
if (!doTokenizeStep) { |
190 | 190 |
|
191 |
println "No tokenization do to."
|
|
191 |
println "No tokenization to do."
|
|
192 | 192 |
cpb = new ConsoleProgressBar(files.length) |
193 | 193 |
for (File f : files) { |
194 | 194 |
File outfile = new File(tokenizedDir, f.getName()); |
TXM/trunk/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/xtz/XTZDefaultPagerStep.groovy (revision 3416) | ||
---|---|---|
106 | 106 |
* @param max the max |
107 | 107 |
* @param basename the basename |
108 | 108 |
*/ |
109 |
public XTZDefaultPagerStep(XTZPager pager, File infile, String txtname, List<String> NoSpaceBefore, |
|
110 |
List<String> NoSpaceAfter, def cssList) { |
|
109 |
public XTZDefaultPagerStep(XTZPager pager, File infile, String txtname, List<String> NoSpaceBefore, List<String> NoSpaceAfter, def cssList) { |
|
111 | 110 |
this.pager = pager; |
112 | 111 |
this.paginationElement = pager.page_element; |
113 | 112 |
this.paginate = pager.paginate |
114 | 113 |
this.cssList = cssList; |
115 |
this.basename = pager.corpusname;
|
|
114 |
this.c = pager.corpusname;
|
|
116 | 115 |
this.txtname = txtname; |
117 | 116 |
this.outdir = pager.outputDirectory; |
118 | 117 |
this.wordmax = pager.wordsPerPage; |
... | ... | |
410 | 409 |
pagedWriter.writeAttribute("class", "sync") |
411 | 410 |
if (parser.getAttributeValue(null,"time") != null) { |
412 | 411 |
pagedWriter.writeCharacters(parser.getAttributeValue(null,"time")) |
412 |
|
|
413 |
writeMediaAccess(parser.getAttributeValue(null,"time"), corpus, txtname) |
|
413 | 414 |
} |
414 | 415 |
break; |
415 | 416 |
case "p": |
... | ... | |
635 | 636 |
endOfLastWord = lastword.subSequence(l-1, l) |
636 | 637 |
} |
637 | 638 |
|
638 |
String interpvalue = anaValues.entrySet().join(", ")
|
|
639 |
String interpvalue = "- "+anaValues.entrySet().join("\n- ")+"\n- "+wordid
|
|
639 | 640 |
|
640 | 641 |
if (NoSpaceBefore.contains(wordvalue) || |
641 | 642 |
NoSpaceAfter.contains(lastword) || |
... | ... | |
704 | 705 |
} |
705 | 706 |
return true; |
706 | 707 |
} |
708 |
|
|
709 |
private void writeMediaAccess(def time) { |
|
710 |
|
|
711 |
pagedWriter.writeCharacters(" "); |
|
712 |
pagedWriter.writeStartElement("a"); |
|
713 |
pagedWriter.writeAttribute("onclick", "txmcommand('id', 'org.txm.backtomedia.commands.function.BackToMedia', 'corpus', '"+basename+"', 'text', '"+txtname+"', 'time', '"+time+"')"); |
|
714 |
pagedWriter.writeAttribute("style", "cursor: pointer;") |
|
715 |
pagedWriter.writeAttribute("class", "play-media") |
|
716 |
pagedWriter.writeCharacters("▶"); |
|
717 |
pagedWriter.writeEndElement(); // a |
|
718 |
} |
|
707 | 719 |
} |
TXM/trunk/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/xtz/XTZImporter.groovy (revision 3416) | ||
---|---|---|
378 | 378 |
|
379 | 379 |
//if (wordTag != "w") { |
380 | 380 |
if (!doTokenizeStep) { |
381 |
println "No tokenization do to."
|
|
381 |
println "No tokenization to do."
|
|
382 | 382 |
// ConsoleProgressBar cpb = new ConsoleProgressBar(filesToProcess.size()) |
383 | 383 |
for (File f : filesToProcess) { |
384 | 384 |
File outfile = new File(module.getBinaryDirectory(),"tokenized/"+f.getName()); |
TXM/trunk/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/xml/pager.groovy (revision 3416) | ||
---|---|---|
448 | 448 |
if (l > 0) |
449 | 449 |
endOfLastWord = lastword.subSequence(l-1, l); |
450 | 450 |
|
451 |
String interpvalue = anaValues.entrySet().join(", ") |
|
451 |
anaValues.put("id", wordid) |
|
452 |
String interpvalue = "- "+anaValues.entrySet().join("\n- ") |
|
452 | 453 |
|
453 | 454 |
if (NoSpaceBefore.contains(wordvalue) || |
454 | 455 |
NoSpaceAfter.contains(lastword) || |
Formats disponibles : Unified diff