Révision 3434
TXM/trunk/org.txm.rcp/src/main/java/org/txm/rcp/ApplicationWorkbenchWindowAdvisor.java (revision 3434) | ||
---|---|---|
193 | 193 |
for (TXMResult result : altered) { |
194 | 194 |
mess.append("\n- "+result.getName()+ " ("+result.getResultType()+") : "+result.getSimpleDetails()); |
195 | 195 |
} |
196 |
mess.append("\n\nYou're going to lost those modifications. Continue?");
|
|
197 |
return MessageDialog.openConfirm(this.getWindowConfigurer().getWindow().getShell(), "Some result are modified", mess.toString());
|
|
196 |
mess.append("\n\n"+"You're going to lost those modifications if not already exported. Continue?"); //$NON-NLS-1$
|
|
197 |
return MessageDialog.openConfirm(this.getWindowConfigurer().getWindow().getShell(), "Some result have been manually modified", mess.toString());
|
|
198 | 198 |
} |
199 | 199 |
|
200 | 200 |
AnnotationEnginesManager aem = (AnnotationEnginesManager) Toolbox.getEngineManager(EngineType.ANNOTATION); |
TXM/trunk/org.txm.concordance.rcp/src/org/txm/concordance/rcp/editors/ConcordanceEditor.java (revision 3434) | ||
---|---|---|
2318 | 2318 |
// System.out.println("REFSIZE="+s); |
2319 | 2319 |
} |
2320 | 2320 |
|
2321 |
|
|
2322 |
|
|
2321 | 2323 |
/** |
2322 | 2324 |
* @return the number of lines in the table |
2323 | 2325 |
*/ |
TXM/trunk/org.txm.groovy.core/src/groovy/org/txm/macro/transcription/AddSectionsFromTableMacroOld.groovy (revision 3434) | ||
---|---|---|
1 |
package org.txm.macro.transcription |
|
2 |
|
|
3 |
import java.nio.charset.Charset |
|
4 |
|
|
5 |
import java.time.LocalTime |
|
6 |
import java.time.format.DateTimeFormatter |
|
7 |
import org.txm.utils.* |
|
8 |
import org.txm.utils.logger.* |
|
9 |
|
|
10 |
@Field @Option(name="metadataFile", usage="Tableau des metadonnées de sections", widget="FileOpen", required=true, def="") |
|
11 |
File metadataFile; |
|
12 |
|
|
13 |
@Field @Option(name="trsDirectory", usage="Dossier qui contient les fichiers TRS", widget="Folder", required=true, def="") |
|
14 |
File trsDirectory; |
|
15 |
|
|
16 |
@Field @Option(name="joinTRSColumn", usage="Colonne de jointure de transcription", widget="String", required=true, def="Lien notice principale") |
|
17 |
def joinTRSColumn |
|
18 |
|
|
19 |
@Field @Option(name="startTimeColumn", usage="Colonne de timing de début de section", widget="String", required=true, def="antract_debut") |
|
20 |
def startTimeColumn = "antract_debut" |
|
21 |
|
|
22 |
@Field @Option(name="endTimeColumn", usage="Colonne de timing de fin de section", widget="String", required=true, def="antract_fin") |
|
23 |
def endTimeColumn = "antract_fin" |
|
24 |
|
|
25 |
@Field @Option(name="typeColumns", usage="Colonnes des métadonnées de type de section", widget="String", required=true, def="Titre propre") |
|
26 |
def typeColumns |
|
27 |
|
|
28 |
@Field @Option(name="topicColumns", usage="Colonnes des métadonnées de topic de section", widget="String", required=true, def="Date de diffusion") |
|
29 |
def topicColumns |
|
30 |
|
|
31 |
@Field @Option(name="metadataColumns", usage="Colonnes de metadonnées de section", widget="String", required=true, def="Titre propre;Date de diffusion;Identifiant de la notice;Notes du titre;Type de date;Durée;Genre;Langue VO / VE;Nature de production;Producteurs (Aff.);Thématique;Nom fichier segmenté (info);antract_video;antract_debut;antract_fin;antract_duree;antract_tc_type;antract_tc_date;Résumé;Séquences;Descripteurs (Aff. Lig.);Générique (Aff. Lig.)") |
|
32 |
def metadataColumns |
|
33 |
|
|
34 |
@Field @Option(name="metadataColumnsGroups", usage="Colonnes des gruopes de metadonnées de section", widget="String", required=true, def="metadata;metadata;metadata;metadata;metadata;metadata;metadata;metadata;metadata;metadata;metadata;secondary;secondary;secondary;secondary;secondary;secondary;secondary;text;text;text;text") |
|
35 |
def metadataColumnsGroups |
|
36 |
|
|
37 |
@Field @Option(name="fixSectionsLimits", usage="Correction des limites de sections du tableau de metadonnees", widget="Boolean", required=true, def="true") |
|
38 |
def fixSectionsLimits |
|
39 |
|
|
40 |
@Field @Option(name="sectionsMergeActivationThreashold", usage="marge d'erreur de corrections des limites de sections", widget="Float", required=true, def="1.0") |
|
41 |
def sectionsMergeActivationThreashold |
|
42 |
|
|
43 |
@Field @Option(name="fixTurnsLimits", usage="Correction des limites de sections du tableau de metadonnees", widget="Boolean", required=true, def="true") |
|
44 |
def fixTurnsLimits |
|
45 |
|
|
46 |
@Field @Option(name="turnsCutActivationThreashold", usage="marge d'erreur de corrections des limites de tours", widget="Float", required=true, def="0.1") |
|
47 |
def turnsCutActivationThreashold |
|
48 |
|
|
49 |
@Field @Option(name="debug", usage="show debug messages", widget="String", required=true, def="false") |
|
50 |
def debug |
|
51 |
|
|
52 |
if (!ParametersDialog.open(this)) return; |
|
53 |
|
|
54 |
typeColumns = typeColumns.split(";") |
|
55 |
topicColumns = topicColumns.split(";") |
|
56 |
metadataColumns = metadataColumns.split(";") |
|
57 |
metadataColumnsGroups = metadataColumnsGroups.split(";") |
|
58 |
|
|
59 |
if (metadataColumns.size() != metadataColumnsGroups.size()) { |
|
60 |
println "ERROR in metadata declarations&groups:" |
|
61 |
println "COLUMNS: "+metadataColumns |
|
62 |
println "GROUPS : "+metadataColumnsGroups |
|
63 |
return |
|
64 |
} |
|
65 |
|
|
66 |
if (!trsDirectory.exists()) { |
|
67 |
println "$trsDirectory not found" |
|
68 |
return |
|
69 |
} |
|
70 |
|
|
71 |
println "Loading data from $metadataFile..." |
|
72 |
TableReader reader = new TableReader(metadataFile)//, "\t".charAt(0), Charset.forName("UTF-8") |
|
73 |
reader.readHeaders() |
|
74 |
def header = reader.getHeaders() |
|
75 |
if (!header.contains(joinTRSColumn)) { |
|
76 |
println "No TRS ID $joinTRSColumn column found" |
|
77 |
return |
|
78 |
} |
|
79 |
if (!header.contains(startTimeColumn)) { |
|
80 |
println "No start time $startTimeColumn column found" |
|
81 |
return |
|
82 |
} |
|
83 |
if (!header.contains(endTimeColumn)) { |
|
84 |
println "No end time $endTimeColumn column found" |
|
85 |
return |
|
86 |
} |
|
87 |
for (def col : metadataColumns) { |
|
88 |
if (!header.contains(endTimeColumn)) { |
|
89 |
println "No $col column found" |
|
90 |
return |
|
91 |
} |
|
92 |
} |
|
93 |
for (def col : typeColumns) { |
|
94 |
if (!header.contains(endTimeColumn)) { |
|
95 |
println "No type $col column found" |
|
96 |
return |
|
97 |
} |
|
98 |
} |
|
99 |
for (def col : topicColumns) { |
|
100 |
if (!header.contains(endTimeColumn)) { |
|
101 |
println "No topic $col column found" |
|
102 |
return |
|
103 |
} |
|
104 |
} |
|
105 |
|
|
106 |
File outputDirectory = new File(trsDirectory, "out") |
|
107 |
println "Writing result to $outputDirectory..." |
|
108 |
|
|
109 |
dateTimeFormatter = DateTimeFormatter.ISO_LOCAL_TIME |
|
110 |
def strTotime(def str) { |
|
111 |
if (str.lastIndexOf(":") == -1) { |
|
112 |
return null |
|
113 |
} |
|
114 |
|
|
115 |
bonusFrame = Integer.parseInt(str.substring(str.lastIndexOf(":")+1)) |
|
116 |
//if (str.contains("135475")) println "ERROR $str in $infos -> $bonusFrame" |
|
117 |
if (bonusFrame > 25) { |
|
118 |
bonusFrame=0; |
|
119 |
} |
|
120 |
totalFrame = str.substring(0, str.lastIndexOf(":")) |
|
121 |
|
|
122 |
LocalTime time1 = LocalTime.parse(totalFrame, dateTimeFormatter) |
|
123 |
totalFrame = (time1.getHour()*60*60) + (time1.getMinute()*60) + time1.getSecond() |
|
124 |
|
|
125 |
def ret = totalFrame + (bonusFrame/25) |
|
126 |
return ret |
|
127 |
} |
|
128 |
|
|
129 |
try { |
|
130 |
def sectionGroupsToInsert = [:] |
|
131 |
println "Reading data..." |
|
132 |
while (reader.readRecord()) { // loading & sorting sections |
|
133 |
String id = reader.get(joinTRSColumn).trim() |
|
134 |
if (id.endsWith(".mp4")) id = id.substring(0, id.length()-4) |
|
135 |
if (id.length() == 0) continue; |
|
136 |
|
|
137 |
if (!sectionGroupsToInsert.containsKey(id)) { |
|
138 |
sectionGroupsToInsert[id] = [] |
|
139 |
} |
|
140 |
def section = sectionGroupsToInsert[id] |
|
141 |
|
|
142 |
if (reader.get(startTimeColumn) != null && reader.get(startTimeColumn).length() > 0) { // ignore non timed sections |
|
143 |
|
|
144 |
def m = [:] |
|
145 |
|
|
146 |
for (def todo : ["topic":topicColumns, "type":typeColumns]) { |
|
147 |
def data = [] |
|
148 |
for (def col : todo.value) { |
|
149 |
if (reader.get(col).trim().length() > 0) { |
|
150 |
data << reader.get(col).trim().replace("\n", "") |
|
151 |
} |
|
152 |
} |
|
153 |
m[todo.key] = data.join("\t") |
|
154 |
} |
|
155 |
def metadataList = [] |
|
156 |
def metadataGroupList = [] |
|
157 |
for (int i = 0 ; i < metadataColumns.size() ; i++) { |
|
158 |
def col = metadataColumns[i] |
|
159 |
String c = AsciiUtils.buildAttributeId(col) |
|
160 |
m[c] = reader.get(col) |
|
161 |
metadataList << c |
|
162 |
metadataGroupList << metadataColumnsGroups[i] |
|
163 |
} |
|
164 |
m["metadata"] = metadataList.join("|") |
|
165 |
m["metadata_groups"] = metadataGroupList.join("|") |
|
166 |
|
|
167 |
m["startTime"] = strTotime(reader.get(startTimeColumn)) |
|
168 |
m["endTime"] = strTotime(reader.get(endTimeColumn)) |
|
169 |
m["synchronized"] = "true" |
|
170 |
|
|
171 |
section << [m["startTime"], m["endTime"], m] |
|
172 |
} |
|
173 |
} |
|
174 |
|
|
175 |
println "Inserting sections... "+sectionGroupsToInsert.size() |
|
176 |
|
|
177 |
ConsoleProgressBar cpb = new ConsoleProgressBar(sectionGroupsToInsert.keySet().size()) |
|
178 |
for (String id : sectionGroupsToInsert.keySet()) { |
|
179 |
|
|
180 |
File trsFile = new File(trsDirectory, id+".trs") |
|
181 |
if (!trsFile.exists()) { |
|
182 |
cpb.tick() |
|
183 |
continue |
|
184 |
} |
|
185 |
|
|
186 |
if (debug) println "== $id ==" |
|
187 |
else cpb.tick() |
|
188 |
|
|
189 |
//println "Processing $id..." |
|
190 |
def sections = sectionGroupsToInsert[id] |
|
191 |
sections = sections.sort() { a, b -> a[0] <=> b[0] ?: a[1] <=> -b[1] } // negative second test for sections inclusion |
|
192 |
|
|
193 |
if (fixSectionsLimits) { |
|
194 |
if (debug) println "Fixing sections of $id" |
|
195 |
for (int iSection = 1 ; iSection < sections.size() ; iSection++) { |
|
196 |
//println sections[iSection] |
|
197 |
if (Math.abs(sections[iSection][0] - sections[iSection - 1][1]) < sectionsMergeActivationThreashold) { |
|
198 |
if (debug) println "s=$iSection start <- end : "+sections[iSection][0]+ " <- "+sections[iSection - 1][1] |
|
199 |
sections[iSection][0] = sections[iSection - 1][1] // fix the start time with the previous section end time |
|
200 |
} |
|
201 |
} |
|
202 |
} |
|
203 |
|
|
204 |
// Open input file |
|
205 |
def slurper = new groovy.util.XmlParser(false, true, true); |
|
206 |
slurper.setFeature("http://apache.org/xml/features/disallow-doctype-decl", false) // allow DTD declaration |
|
207 |
slurper.setProperty("http://javax.xml.XMLConstants/property/accessExternalDTD", "all"); // allow to read DTD from local file |
|
208 |
def trs = slurper.parse(trsFile.toURI().toString()) |
|
209 |
def trsEpisodes = trs.Episode // 1 |
|
210 |
if (trsEpisodes.size() > 1) { |
|
211 |
println "Error: multiple Episode node in $trsFile" |
|
212 |
continue |
|
213 |
} |
|
214 |
def trsEpisode = trsEpisodes[0] |
|
215 |
def trsSections = trs.Episode.Section // 1 |
|
216 |
if (trsSections.size() > 1) { |
|
217 |
println "Error: multiple Section node in $trsFile" |
|
218 |
continue |
|
219 |
} |
|
220 |
def trsSection = trsSections[0] |
|
221 |
|
|
222 |
def turns = trsSection.Turn |
|
223 |
def newSections = [] |
|
224 |
def iSection = 0; |
|
225 |
def currentSection = null |
|
226 |
def currentNode = null |
|
227 |
|
|
228 |
for (int iTurn = 0 ; iTurn < turns.size() ; iTurn++) { |
|
229 |
|
|
230 |
def turn = turns[iTurn] |
|
231 |
def start = Float.parseFloat(turn.@startTime) |
|
232 |
def end = Float.parseFloat(turn.@endTime) |
|
233 |
//println "Turn: $iTurn ($start, $end)" |
|
234 |
|
|
235 |
def foundSection = null; |
|
236 |
for (int i = iSection ; i < sections.size() ; i++) { |
|
237 |
if (end < sections[i][0]) { // the Turn is before the section |
|
238 |
|
|
239 |
} else if (sections[i][1] < start) { // the Turn is after the section |
|
240 |
|
|
241 |
} else { |
|
242 |
foundSection = sections[i] |
|
243 |
iSection = i |
|
244 |
break; // stop searching and set iSection to accelerate next search |
|
245 |
} |
|
246 |
} |
|
247 |
|
|
248 |
if (foundSection == null) { |
|
249 |
if (currentSection != null || currentNode == null) { |
|
250 |
currentNode = new Node(trsEpisode, "Section", ["type":"Sujet non synchronisé", "startTime":turn.@startTime, "endTime":"", "synchronized":"false"] ) |
|
251 |
currentSection = null; |
|
252 |
} |
|
253 |
} else { |
|
254 |
if (foundSection != currentSection) { |
|
255 |
if (currentNode != null && currentNode.@synchronized == "false") { // set the un-synchronized section endTime using its last Turn endTime |
|
256 |
def tmp = currentNode.Turn |
|
257 |
currentNode.@endTime = tmp[-1].@endTime |
|
258 |
} |
|
259 |
|
|
260 |
currentSection = foundSection |
|
261 |
currentNode = new Node(trsEpisode, "Section", currentSection[2]) |
|
262 |
} |
|
263 |
} |
|
264 |
|
|
265 |
trsSection.remove(turn) |
|
266 |
currentNode.append(turn) |
|
267 |
} |
|
268 |
|
|
269 |
//remove the initial section which is empty now |
|
270 |
trsEpisode.remove(trsSection) |
|
271 |
|
|
272 |
if (fixTurnsLimits) { |
|
273 |
if (debug) println "Fixing Turn limits..." |
|
274 |
def partOfTurnToInsertInthePreviousSection = null; |
|
275 |
def partOfTurnToInsertIntheNextSection = null; |
|
276 |
sections = trs.Episode.Section |
|
277 |
for (int i = 0 ; i < sections.size() ; i++) { // browse created sections but stop before the last one (whichc can not be fixed) |
|
278 |
|
|
279 |
def section = sections[i] |
|
280 |
if (partOfTurnToInsertIntheNextSection != null) { |
|
281 |
if (debug) println "Moving part-of turn: "+partOfTurnToInsertIntheNextSection+" in section ("+section.@startTime+", "+section.@endTime+")" |
|
282 |
section.children().add(0, partOfTurnToInsertIntheNextSection) // insert the slited part of the turn in the section |
|
283 |
} |
|
284 |
|
|
285 |
def startSection = section.@startTime // Float.parseFloat(section.@startTime) |
|
286 |
def endSection = section.@endTime // Float.parseFloat(section.@endTime) |
|
287 |
if (startSection instanceof String) startSection = Float.parseFloat(section.@startTime) |
|
288 |
if (endSection instanceof String) endSection = Float.parseFloat(section.@endTime) |
|
289 |
|
|
290 |
partOfTurnToInsertInthePreviousSection = null |
|
291 |
partOfTurnToInsertIntheNextSection = null |
|
292 |
|
|
293 |
turns = section.Turn |
|
294 |
if (turns.size() == 0) continue; |
|
295 |
|
|
296 |
// Fix the first Turn |
|
297 |
def turn = turns[0] |
|
298 |
start = turn.@startTime // Float.parseFloat(section.@startTime) |
|
299 |
end = turn.@endTime // Float.parseFloat(section.@endTime) |
|
300 |
if (start instanceof String) start = Float.parseFloat(turn.@startTime) |
|
301 |
if (end instanceof String) end = Float.parseFloat(turn.@endTime) |
|
302 |
|
|
303 |
if (start < startSection) { // the start of the Turn is outside of its current section |
|
304 |
|
|
305 |
} |
|
306 |
|
|
307 |
// Fix the last Turn |
|
308 |
turn = turns[-1] |
|
309 |
start = turn.@startTime // Float.parseFloat(section.@startTime) |
|
310 |
end = turn.@endTime // Float.parseFloat(section.@endTime) |
|
311 |
if (start instanceof String) start = Float.parseFloat(turn.@startTime) |
|
312 |
if (end instanceof String) end = Float.parseFloat(turn.@endTime) |
|
313 |
if (end > endSection) { // the end of the Turn is outside of its current section |
|
314 |
|
|
315 |
def children = turn.children() |
|
316 |
Node newTurnKaNode = null;//new Node(trsEpisode, "Turn", currentSection[2]) |
|
317 |
//println "Cut the last turn if necessary" |
|
318 |
for (int iChildren = 0 ; iChildren < children.size() ; iChildren++) { |
|
319 |
|
|
320 |
def c = children[iChildren] |
|
321 |
if (c instanceof String) continue; |
|
322 |
|
|
323 |
if (newTurnKaNode != null) { |
|
324 |
turn.remove(c) |
|
325 |
newTurnKaNode.append(c) |
|
326 |
if (debug) c.@moved="yes" |
|
327 |
iChildren-- |
|
328 |
} else { |
|
329 |
if ("w".equals(c.name())) { |
|
330 |
def start2 = Float.parseFloat(c.@startTime) |
|
331 |
def end2 = Float.parseFloat(c.@endTime) |
|
332 |
|
|
333 |
if (start2 > endSection && Math.abs(start2 - endSection) > turnsCutActivationThreashold) { |
|
334 |
if (debug) println "cut with a w at [$start2, $end2] for section ("+startSection+", "+endSection+")" |
|
335 |
newTurnKaNode = new Node(trsSection, "Turn", ["startTime":""+start2, "endTime":""+turn.@endTime, "speaker":turn.@speaker]) |
|
336 |
if (debug) newTurnKaNode.@created = "yes" |
|
337 |
turn.@endTime = ""+start2; |
|
338 |
turn.remove(c) |
|
339 |
newTurnKaNode.append(c) |
|
340 |
if (debug) c.@moved="yes" |
|
341 |
iChildren-- |
|
342 |
} |
|
343 |
} else if ("Sync".equals(c.name())) { |
|
344 |
def start2 = Float.parseFloat(c.@time) |
|
345 |
def end2 = Float.parseFloat(c.@time) |
|
346 |
|
|
347 |
if (start2 > endSection && Math.abs(start2 - endSection) > turnsCutActivationThreashold) { |
|
348 |
if (debug) println "cut with a Sync at [$start2, $end2] for section "+endSection |
|
349 |
newTurnKaNode = new Node(trsSection, "Turn", ["startTime":""+start2, "endTime":""+turn.@endTime, "speaker":turn.@speaker]) |
|
350 |
if (debug) newTurnKaNode.@created = "yes" |
|
351 |
turn.@endTime = ""+start2; |
|
352 |
turn.remove(c) |
|
353 |
if (debug) c.@moved="yes" |
|
354 |
newTurnKaNode.append(c) |
|
355 |
iChildren-- |
|
356 |
} |
|
357 |
} else { |
|
358 |
// no time to check |
|
359 |
} |
|
360 |
} |
|
361 |
} |
|
362 |
|
|
363 |
} |
|
364 |
partOfTurnToInsertIntheNextSection = newTurnKaNode |
|
365 |
} |
|
366 |
} |
|
367 |
|
|
368 |
outputDirectory.mkdir() |
|
369 |
File outfile = new File(outputDirectory, trsFile.getName()) |
|
370 |
outfile.withWriter("UTF-8") { writer -> |
|
371 |
writer.write('<?xml version="1.0" encoding="UTF-8"?>\n<!DOCTYPE Trans SYSTEM "trans-14.dtd">\n') |
|
372 |
def printer = new groovy.util.XmlNodePrinter(new PrintWriter(writer)) |
|
373 |
printer.setPreserveWhitespace(true) |
|
374 |
printer.print(trs) |
|
375 |
} |
|
376 |
} |
|
377 |
cpb.done() |
|
378 |
reader.close() |
|
379 |
println "Done." |
|
380 |
|
|
381 |
} catch(Exception e) { |
|
382 |
println "Error: "+e |
|
383 |
Log.printStackTrace(e) |
|
384 |
} |
TXM/trunk/org.txm.groovy.core/src/groovy/org/txm/macro/transcription/AddSectionsFromTableMacro.groovy (revision 3434) | ||
---|---|---|
1 |
// Copyright © 2022 ENS de Lyon, CNRS, University of Franche-Comté |
|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
3 |
// @author bpincemin mdecorde |
|
4 |
|
|
1 | 5 |
package org.txm.macro.transcription |
2 | 6 |
|
3 | 7 |
import java.nio.charset.Charset |
... | ... | |
42 | 46 |
@Field @Option(name="sectionsMergeActivationThreashold", usage="marge d'erreur de corrections des limites de sections", widget="Float", required=true, def="1.0") |
43 | 47 |
def sectionsMergeActivationThreashold |
44 | 48 |
|
45 |
@Field @Option(name="fixTurnsLimits", usage="Correction des limites de sections du tableau de metadonnees", widget="Boolean", required=true, def="true")
|
|
49 |
@Field @Option(name="fixTurnsLimits", usage="Découpage des tours à cheval sur plusieurs sujets", widget="Boolean", required=true, def="true") // description màj
|
|
46 | 50 |
def fixTurnsLimits |
47 | 51 |
|
48 | 52 |
@Field @Option(name="turnsCutActivationThreashold", usage="marge d'erreur de corrections des limites de tours", widget="Float", required=true, def="0.1") |
... | ... | |
129 | 133 |
} |
130 | 134 |
|
131 | 135 |
try { |
136 |
debugMore=false |
|
132 | 137 |
def sectionGroupsToInsert = [:] |
133 | 138 |
println "Reading data..." |
139 |
//BP boucle de préparation qui constitue les listes de sections, avec sectionGroupsToInsert[identifiant-émission][section][start,end,contenu] |
|
140 |
//BP l'indice de section commence à zéro dans chaque émission |
|
134 | 141 |
while (reader.readRecord()) { // loading & sorting sections |
135 | 142 |
String id = reader.get(joinTRSColumn).trim() |
136 | 143 |
if (id.endsWith(".mp4")) id = id.substring(0, id.length()-4) |
137 | 144 |
if (id.length() == 0) continue; |
138 | 145 |
|
139 |
if (!sectionGroupsToInsert.containsKey(id)) { |
|
146 |
if (!sectionGroupsToInsert.containsKey(id)) { //BP initialisation du contenu pour chaque émission
|
|
140 | 147 |
sectionGroupsToInsert[id] = [] |
141 | 148 |
} |
142 |
def section = sectionGroupsToInsert[id]
|
|
149 |
// def section = sectionGroupsToInsert[id] // plus homogène si sections ; utilisée une seule fois (30 lignes après)
|
|
143 | 150 |
|
144 | 151 |
if (reader.get(startTimeColumn) != null && reader.get(startTimeColumn).length() > 0) { // ignore non timed sections |
145 | 152 |
|
... | ... | |
170 | 177 |
m["endTime"] = strTotime(reader.get(endTimeColumn)) |
171 | 178 |
m["synchronized"] = "true" |
172 | 179 |
|
173 |
section << [m["startTime"], m["endTime"], m] |
|
180 |
// section << [m["startTime"], m["endTime"], m] |
|
181 |
sectionGroupsToInsert[id] << [m["startTime"], m["endTime"], m] |
|
174 | 182 |
} |
175 | 183 |
} |
176 | 184 |
|
177 | 185 |
println "Inserting sections... "+sectionGroupsToInsert.size() |
178 | 186 |
|
179 | 187 |
ConsoleProgressBar cpb = new ConsoleProgressBar(sectionGroupsToInsert.keySet().size()) |
188 |
//BP boucle générale sur les émissions, avec id=identifiant de l'émission |
|
180 | 189 |
for (String id : sectionGroupsToInsert.keySet()) { |
181 | 190 |
|
191 |
//BP on crée le fichier .trs |
|
182 | 192 |
File trsFile = new File(trsDirectory, id+".trs") |
183 | 193 |
if (!trsFile.exists()) { |
184 | 194 |
cpb.tick() |
... | ... | |
189 | 199 |
else cpb.tick() |
190 | 200 |
|
191 | 201 |
//println "Processing $id..." |
202 |
|
|
203 |
//BP liste des sections de l'émission : sections[indice][start,end,contenu] |
|
192 | 204 |
sections = sectionGroupsToInsert[id] |
193 |
sections = sections.sort() { a, b -> a[0] <=> b[0] ?: a[1] <=> -b[1] } // negative second test for sections inclusion |
|
194 | 205 |
|
206 |
//BP on résoud les faibles chevauchements |
|
195 | 207 |
if (fixSectionsLimits) { |
196 |
if (debug) println "Fixing sections of $id" |
|
208 |
if (debug && debugMore) println "Fixing sections of $id" |
|
209 |
//BP on unifie les start entre eux |
|
210 |
sections = sections.sort() { a, b -> a[0] <=> b[0] } |
|
197 | 211 |
for (int iSection = 1 ; iSection < sections.size() ; iSection++) { |
198 | 212 |
//println sections[iSection] |
199 |
if (Math.abs(sections[iSection][0] - sections[iSection - 1][1]) < sectionsMergeActivationThreashold) {
|
|
200 |
if (debug) println "s=$iSection start <- end : "+sections[iSection][0]+ " <- "+sections[iSection - 1][1]
|
|
201 |
sections[iSection][0] = sections[iSection - 1][1] // fix the start time with the previous section end time
|
|
213 |
if ((sections[iSection][0] - sections[iSection - 1][0] < sectionsMergeActivationThreashold) && (sections[iSection][0] != sections[iSection - 1][0])) {
|
|
214 |
if (debug && debugMore) println "s=$iSection start <- start : "+sections[iSection][0]+ " <- "+sections[iSection - 1][0]
|
|
215 |
sections[iSection][0] = sections[iSection - 1][0] // fix the start time with a close preceeding start time
|
|
202 | 216 |
} |
203 | 217 |
} |
218 |
//BP on unifie les end entre eux |
|
219 |
sections = sections.sort() { a, b -> a[1] <=> b[1] } |
|
220 |
for (int iSection = 1 ; iSection < sections.size() ; iSection++) { |
|
221 |
//println sections[iSection] |
|
222 |
if ((sections[iSection][1] - sections[iSection - 1][1] < sectionsMergeActivationThreashold) && (sections[iSection][1] != sections[iSection - 1][1])) { |
|
223 |
if (debug && debugMore) println "s=$iSection end <- end : "+sections[iSection][1]+ " <- "+sections[iSection - 1][1] |
|
224 |
sections[iSection][1] = sections[iSection - 1][1] // fix the end time with a close preceeding end time |
|
225 |
} |
|
226 |
} |
|
227 |
//BP on unifie les start avec les end |
|
228 |
// sections = sections.sort() { a, b -> a[0] <=> b[0] } |
|
229 |
for (int iSection = 1 ; iSection < sections.size() ; iSection++) { // pour chaque start... |
|
230 |
//println sections[iSection] |
|
231 |
int j = 1 |
|
232 |
boolean continuer = true |
|
233 |
while (continuer && (j < (iSection + 1))) { // ...on regarde les end avant son propre end |
|
234 |
if ((Math.abs(sections[iSection][0] - sections[iSection - j][1]) < sectionsMergeActivationThreashold) && (sections[iSection][0] != sections[iSection - j][1])) { // on a trouvé des valeurs à unifier |
|
235 |
if (sections[iSection][0] < sections[iSection - j][1]) { // on cherche lequel est avant pour unifier vers sa valeur |
|
236 |
if (debug && debugMore) println "s=$iSection end <- start : "+sections[iSection - j][1]+ " <- "+sections[iSection][0] |
|
237 |
sections[iSection - j][1] = sections[iSection][0] |
|
238 |
} else { |
|
239 |
if (debug && debugMore) println "s=$iSection start <- end : "+sections[iSection][0]+ " <- "+sections[iSection - j][1] |
|
240 |
sections[iSection][0] = sections[iSection - j][1] |
|
241 |
} |
|
242 |
continuer = false |
|
243 |
} else { |
|
244 |
if (sections[iSection][0] < sections[iSection - j][1]) { // l'écart ne peut plus que grandir |
|
245 |
continuer = false |
|
246 |
} else { // le end est au-dessus, le prochain sera plus bas, il peut se rapprocher |
|
247 |
j++ |
|
248 |
} |
|
249 |
} |
|
250 |
} |
|
251 |
} |
|
204 | 252 |
} |
253 |
|
|
254 |
//BP les sections sont triées |
|
255 |
sections = sections.sort() { a, b -> a[0] <=> b[0] ?: a[1] <=> -b[1] } // negative second test for sections inclusion |
|
205 | 256 |
|
206 | 257 |
// Open input file |
207 | 258 |
slurper = new groovy.util.XmlParser(false, true, true); |
... | ... | |
227 | 278 |
currentSection = null |
228 | 279 |
currentNode = null |
229 | 280 |
|
230 |
foundSection=null
|
|
231 |
isTurnSynchronized=false
|
|
232 |
cutCheck=false
|
|
281 |
def startSection = 0;
|
|
282 |
def endSection = 0;
|
|
283 |
def endPreviousSection = 0;
|
|
233 | 284 |
|
285 |
// foundSection=null |
|
286 |
// isTurnSynchronized=false |
|
287 |
// cutCheck=false |
|
288 |
// => ces initialisations ont été déplacées dans la boucle des tours (la 1ère y était déjà) |
|
289 |
|
|
234 | 290 |
// boucle sur les tours dans l'ordre |
291 |
// for (iTurn = 0 ; iTurn < turns.size() ; iTurn++) { |
|
235 | 292 |
for (iTurn = 0 ; iTurn < turns.size() ; iTurn++) { |
236 |
if (debug) println "iTurn=$iTurn turn="+turns[iTurn].attributes()+" iSection=$iSection ("+sections[iSection][0]+", "+sections[iSection][1]+")" |
|
293 |
if (debug && debugMore) println "iTurn=$iTurn turn="+turns[iTurn].attributes()+" iSection=$iSection ("+sections[iSection][0]+", "+sections[iSection][1]+")" |
|
294 |
//if (debug && iTurn == 199) println "WARNING: boucle probable sur iTurn dans cette transcription (ou bien plus de 200 tours dans la transcription)" |
|
237 | 295 |
turn = turns[iTurn] |
238 |
start = Float.parseFloat(turn.@startTime) |
|
296 |
start = Float.parseFloat(turn.@startTime) // Float sera utile pour les tests ?
|
|
239 | 297 |
end = Float.parseFloat(turn.@endTime) |
240 | 298 |
//println "Turn: $iTurn ($start, $end)" |
241 |
|
|
242 |
// Etape 1 : y aura-t-il besoin de couper le tour, et dans quelle section est le tour (ou sa première partie) |
|
299 |
|
|
300 |
// Etape 1 : dans quelle section est le (début du) tour et doit-il être coupé |
|
301 |
|
|
243 | 302 |
foundSection = null; |
303 |
isTurnSynchronized=false |
|
304 |
cutCheck=false |
|
305 |
|
|
244 | 306 |
for (int i = iSection ; i < sections.size() ; i++) { |
245 |
// if section_end < turn_start OU |turn_start - section_end| < turn_threshold
|
|
246 |
if (sections[i][1] < start || Math.abs(start - sections[i][1]) < turnsCutActivationThreashold) { // Turn is after section
|
|
247 |
// Cas 1 : la section est complètement avant (modulo la marge)
|
|
307 |
// if section_end < turn_start OU (|turn_start - section_end| < turn_threshold ET (turn_end - section_end) >= turn_threshold)
|
|
308 |
if ((sections[i][1] < start) || ((Math.abs(start - sections[i][1]) < turnsCutActivationThreashold) && (end - sections[i][1] >= turnsCutActivationThreashold))) { // Section is before Turn
|
|
309 |
// Cas 1 : la section est complètement avant (modulo la marge) |
|
248 | 310 |
} else { |
249 |
// Cas 2 : on est arrivés à la section à considérer
|
|
311 |
// Cas 2 : on est arrivés à la section à considérer |
|
250 | 312 |
iSection = i |
251 |
// if section_start > turn_end OU |section_start - turn_end| < turn_threshold |
|
252 |
if (sections[i][0] > end || Math.abs(sections[i][0] - end) < turnsCutActivationThreashold) { // Turn is before section |
|
253 |
// Cas 2.1 : la section est complètement après (modulo la marge) (et les suivantes le seront aussi) |
|
254 |
foundSection = null |
|
313 |
// if turn_end < section_start OU (|section_start - turn_end| < turn_threshold ET (section_start - turn_start) >= turn_threshold) |
|
314 |
if ((end < sections[i][0]) || ((Math.abs(sections[i][0] - end) < turnsCutActivationThreashold) && (sections[i][0] - start >= turnsCutActivationThreashold))) { // Section is after Turn |
|
315 |
// Cas 2.1 : la section est complètement après (modulo la marge) (et les suivantes le seront aussi) |
|
316 |
if (debug && debugMore) println "Etape 1, Cas 2.1" |
|
317 |
foundSection = sections[i] // (c'est la prochaine section, qui mettra fin à la section non-synchronisée) |
|
255 | 318 |
isTurnSynchronized = false |
256 | 319 |
cutCheck = false |
257 | 320 |
} else { |
258 |
// if |section_start - turn_start| > turn_threshold |
|
259 |
if (start - sections[i][0] < 0) { // Turn begins before section does |
|
260 |
// Cas 2.2 : la section commence significativement après le début du tour (le début est non synchronisé) |
|
261 |
foundSection = sections[i] // (c'est la première section rencontrée, mais elle sera pour le tour suivant) |
|
321 |
// if (section_start - turn_start) >= turn_threshold |
|
322 |
if (sections[i][0] - start >= turnsCutActivationThreashold) { // Turn begins before section does |
|
323 |
// Cas 2.2 : la section commence significativement après le début du tour (le début est non synchronisé) |
|
324 |
if (debug && debugMore) println "Etape 1, Cas 2.2" |
|
325 |
foundSection = sections[i] // (c'est la première section rencontrée, mais elle sera pour le tour suivant, elle va servir à savoir où couper) |
|
262 | 326 |
isTurnSynchronized = false |
263 | 327 |
cutCheck = true |
264 | 328 |
} else { |
265 |
// Cas 2.3 : le début du tour est dans la section (on n'a pas besoin de chercher d'autres sections car si ce n'est pas la seule on coupera le tour et ce sera un autre tour). |
|
266 |
foundSection = sections[i] // (c'est la section qui commence le tour, au moins) |
|
267 |
isTurnSynchronized = true |
|
268 |
cutCheck = true |
|
329 |
// if turn_end < section_end OU |section_end - turn_end| < turn_threshold |
|
330 |
if (end < sections[i][1] || Math.abs(sections[i][1] - end) < turnsCutActivationThreashold) { |
|
331 |
// Cas 2.3 : le début du tour est dans la section et la fin aussi. |
|
332 |
if (debug && debugMore) println "Etape 1, Cas 2.3" |
|
333 |
foundSection = sections[i] // (c'est la section qui contient le tour) |
|
334 |
isTurnSynchronized = true |
|
335 |
cutCheck = false |
|
336 |
} else { |
|
337 |
// Cas 2.4 : le début du tour est dans la section mais il dépasse. |
|
338 |
if (debug && debugMore) println "Etape 1, Cas 2.4" |
|
339 |
foundSection = sections[i] // (c'est la section qui contient le début du tour) |
|
340 |
isTurnSynchronized = true |
|
341 |
cutCheck = true |
|
342 |
} |
|
269 | 343 |
} |
270 | 344 |
} |
271 | 345 |
break; // stop searching and set iSection to accelerate next search |
272 | 346 |
} |
273 | 347 |
} |
348 |
|
|
274 | 349 |
|
275 |
// Etape 2 : positionne *le* tour dans *le* noeud |
|
350 |
// Etape 2 : on met à jour currentSection et currentNode, |
|
351 |
// ainsi que des variables startSection, endSection, endPreviousSection |
|
276 | 352 |
// (on ne gère qu'un seul tour et un seul noeud à chaque itération de la boucle tour, |
277 |
// puisqu'on a retaillé le tour pour qu'il ne concerne pas plusieurs noeuds) |
|
278 |
if (foundSection != null) { // on complète ou on ajoute une div. |
|
279 |
if (debug) println "found iSection=$iSection ("+sections[iSection][0]+", "+sections[iSection][1]+")" |
|
280 |
if (foundSection != currentSection || currentSection == null) { |
|
281 |
if (currentNode != null && currentNode.@synchronized == "false") { |
|
282 |
def tmp = currentNode.Turn |
|
283 |
currentNode.@endTime = tmp[-1].@endTime |
|
353 |
// puisqu'on retaille le tour pour qu'il ne concerne pas plusieurs noeuds) |
|
354 |
// currentSection == null au début et quand c'est une section non synchronisée |
|
355 |
// currentNode == null au début |
|
356 |
|
|
357 |
if (isTurnSynchronized) { // on sait que foundSection != null |
|
358 |
if (debug && debugMore) println "iSection=$iSection ("+sections[iSection][0]+", "+sections[iSection][1]+") will be used" |
|
359 |
if (foundSection != currentSection) { // le tour ouvre une nouvelle section (au lieu de compléter une section existante) |
|
360 |
if (currentSection != null) { |
|
361 |
endPreviousSection = endSection // faut-il préciser float ? |
|
284 | 362 |
} |
285 |
|
|
363 |
startSection = sections[iSection][0] // currentNode.@startTime |
|
364 |
if (startSection instanceof String) startSection = Float.parseFloat(sections[iSection][0]) //currentNode.@startTime) |
|
365 |
endSection = sections[iSection][1] // currentNode.@endTime |
|
366 |
if (endSection instanceof String) endSection = Float.parseFloat(sections[iSection][1]) //currentNode.@endTime) |
|
367 |
|
|
286 | 368 |
currentSection = foundSection |
287 | 369 |
currentNode = new Node(trsEpisode, "Section", currentSection[2]) |
288 |
if (debug) println " create synchronized turn at start="+foundSection[0] |
|
370 |
if (debug && debugMore) println " create synchronized turn at start="+foundSection[0]
|
|
289 | 371 |
} |
290 |
} else { // on complète ou on ajoute un noeud (div) non synchronisé. |
|
291 |
if (debug) println "not synchronized with current iSection=$iSection ("+sections[iSection][0]+", "+sections[iSection][1]+")" |
|
292 |
if (currentSection != null || currentNode == null) { // create a new unsynchronized section if there is no opened synchronized section or no un-synchronized section |
|
293 |
currentNode = new Node(trsEpisode, "Section", ["type":"Sujet non synchronisé", "startTime":turn.@startTime, "endTime":"", "synchronized":"false"] ) |
|
294 |
currentSection = null; |
|
295 |
if (debug) println " create un-synchronized turn at "+turn.@startTime |
|
372 |
} else { |
|
373 |
if (debug && debugMore) println "not synchronized with current iSection=$iSection ("+sections[iSection][0]+", "+sections[iSection][1]+")" |
|
374 |
if (currentNode == null) { // il n'y a pas de section avant, et il faut créer un noeud puisqu'il n'y en a pas |
|
375 |
if (foundSection == null) { // il n'y a pas de section après |
|
376 |
currentNode = new Node(trsEpisode, "Section", ["type":"Sujet non synchronisé", "startTime":trsSection.@startTime, "endTime":trsSection.@endTime, "synchronized":"false"] ) |
|
377 |
//BP pas besoin de Float ici ? Dans le code originel on avait un ex. de "startTime":turn.@startTime |
|
378 |
} else { |
|
379 |
// if (currentSection != null) { // a priori ce cas n'est jamais réalisé : quand currentNode == null, currentSection aussi |
|
380 |
// endPreviousSection = endSection // faut-il préciser float ? |
|
381 |
// } |
|
382 |
startSection = sections[iSection][0] // currentNode.@startTime |
|
383 |
if (startSection instanceof String) startSection = Float.parseFloat(sections[iSection][0]) //currentNode.@startTime) |
|
384 |
endSection = sections[iSection][1] // currentNode.@endTime |
|
385 |
if (endSection instanceof String) endSection = Float.parseFloat(sections[iSection][1]) //currentNode.@endTime) |
|
386 |
|
|
387 |
currentNode = new Node(trsEpisode, "Section", ["type":"Sujet non synchronisé", "startTime":trsSection.@startTime, "endTime":startSection, "synchronized":"false"] ) |
|
388 |
} |
|
389 |
// currentSection = null; |
|
390 |
if (debug && debugMore) println " create un-synchronized turn at "+trsSection.@startTime |
|
391 |
} else { |
|
392 |
if (currentSection != null) { // il y a un noeud et il correspond à une section synchronisée, donc il faut créer un noeud non-synchronisé |
|
393 |
if (foundSection == null) { // il n'y a pas de section après |
|
394 |
currentNode = new Node(trsEpisode, "Section", ["type":"Sujet non synchronisé", "startTime":endSection, "endTime":trsSection.@endTime, "synchronized":"false"] ) |
|
395 |
} else { |
|
396 |
endPreviousSection = endSection // faut-il préciser float ? |
|
397 |
startSection = sections[iSection][0] // currentNode.@startTime |
|
398 |
if (startSection instanceof String) startSection = Float.parseFloat(sections[iSection][0]) //currentNode.@startTime) |
|
399 |
endSection = sections[iSection][1] // currentNode.@endTime |
|
400 |
if (endSection instanceof String) endSection = Float.parseFloat(sections[iSection][1]) //currentNode.@endTime) |
|
401 |
|
|
402 |
currentNode = new Node(trsEpisode, "Section", ["type":"Sujet non synchronisé", "startTime":endPreviousSection, "endTime":startSection, "synchronized":"false"] ) |
|
403 |
} |
|
404 |
currentSection = null; |
|
405 |
if (debug && debugMore) println " create un-synchronized turn at $endPreviousSection" |
|
406 |
} |
|
296 | 407 |
} |
297 | 408 |
} |
298 | 409 |
|
299 | 410 |
// Etape 3 : on coupe le tour s'il y a besoin |
300 | 411 |
if (cutCheck && fixTurnsLimits) { |
301 |
if (debug) println " fixing current turn iTurn=$iTurn ($start, $end) )in turns ("+turns.size()+") section ("+trsSection.children().size()+")" |
|
302 |
if (isTurnSynchronized) { |
|
303 |
cutTurn(true) // iSection++ et test avec le **end** de la section quand on coupe |
|
304 |
} else { |
|
305 |
if (foundSection != null) { |
|
306 |
cutTurn(false) // test avec le **start** de la iSection quand on coupe |
|
412 |
if (debug && debugMore) println " fixing current turn iTurn=$iTurn ($start, $end) )in turns ("+turns.size()+") section ("+trsSection.children().size()+")" |
|
413 |
|
|
414 |
def children = turn.children() |
|
415 |
Node newTurnKaNode = null;//new Node(trsEpisode, "Turn", currentSection[2]) |
|
416 |
|
|
417 |
//println "Cut the last turn if necessary" |
|
418 |
|
|
419 |
if (debug && debugMore) println " cut turn and test with end ? $isTurnSynchronized of iSection=$iSection ($startSection, $endSection) at iTurn=$iTurn (${turn.@startTime}, ${turn.@endTime}) children="+turn.children().size() |
|
420 |
for (int iChildren = 0 ; iChildren < children.size() ; iChildren++) { |
|
421 |
//BP boucle gérée comme un genre de pile. iChildren vaut 0 ou est incrémenté à 0 |
|
422 |
//BP (modulo des string qui vont rester dans le tour initial, à la queue leu leu pour la partie coupée : |
|
423 |
//BP je propose de déplacer la ligne pour que les chaînes suivent le reste, |
|
424 |
//BP sous réserves que remove et append peuvent s'appliquer aux enfants chaîne. |
|
425 |
|
|
426 |
def c = children[iChildren] |
|
427 |
// if (c instanceof String) continue; // a Turn contains Sync or w tags |
|
428 |
|
|
429 |
if (newTurnKaNode != null) { // append the remaining children to the new turn |
|
430 |
turn.remove(c) |
|
431 |
newTurnKaNode.append(c) |
|
432 |
if (debug && debugMore) c.@moved="yes" |
|
433 |
iChildren-- |
|
434 |
} else { |
|
435 |
if (c instanceof String) continue; // a Turn contains Sync or w tags |
|
436 |
if ("w".equals(c.name())) { |
|
437 |
def start2 = Float.parseFloat(c.@startTime) |
|
438 |
def end2 = Float.parseFloat(c.@endTime) |
|
439 |
|
|
440 |
boolean test = null |
|
441 |
if (isTurnSynchronized) { |
|
442 |
test = start2 > endSection |
|
443 |
} else { |
|
444 |
test = start2 >= startSection |
|
445 |
} |
|
446 |
|
|
447 |
if (test) { // && Math.abs(start2 - endSection) > turnsCutActivationThreashold // no more needed |
|
448 |
if (debug && debugMore) println " cut with a w at ($start2, $end2) for section ("+startSection+", "+endSection+")" |
|
449 |
newTurnKaNode = new Node(trsSection, "Turn", ["startTime":""+start2, "endTime":""+turn.@endTime, "speaker":turn.@speaker]) |
|
450 |
new Node(newTurnKaNode, "Sync", ["time":""+start2]) // TRS |
|
451 |
turns.add(iTurn+1, newTurnKaNode) // set as next turn to process |
|
452 |
// iTurn-- //BP doute sur pertinence -> test sans |
|
453 |
if (debug && debugMore) newTurnKaNode.@created = "yes" |
|
454 |
turn.@endTime = ""+start2; |
|
455 |
turn.remove(c) |
|
456 |
newTurnKaNode.append(c) |
|
457 |
if (debug && debugMore) c.@moved="yes" |
|
458 |
iChildren-- |
|
459 |
} |
|
460 |
} else if ("Sync".equals(c.name())) { |
|
461 |
def start2 = c.@time |
|
462 |
if (start2 instanceof String) start2 = Float.parseFloat(c.@time) |
|
463 |
def end2 = start2 |
|
464 |
|
|
465 |
boolean test = null |
|
466 |
if (isTurnSynchronized) { |
|
467 |
test = start2 > endSection |
|
468 |
} else { |
|
469 |
test = start2 >= startSection |
|
470 |
} |
|
471 |
|
|
472 |
if (test) { // && Math.abs(start2 - endSection) > turnsCutActivationThreashold |
|
473 |
if (debug && debugMore) println " cut with a Sync at ($start2, $end2) for section ("+startSection+", "+endSection+")" |
|
474 |
newTurnKaNode = new Node(trsSection, "Turn", ["startTime":""+start2, "endTime":""+turn.@endTime, "speaker":turn.@speaker]) |
|
475 |
turns.add(iTurn+1, newTurnKaNode) |
|
476 |
// iTurn-- //BP doute sur pertinence -> test sans |
|
477 |
if (debug && debugMore) newTurnKaNode.@created = "yes" |
|
478 |
turn.@endTime = ""+start2; |
|
479 |
turn.remove(c) |
|
480 |
newTurnKaNode.append(c) |
|
481 |
if (debug && debugMore) c.@moved="yes" |
|
482 |
iChildren-- |
|
483 |
} |
|
484 |
} else { |
|
485 |
// no time to check |
|
486 |
} |
|
307 | 487 |
} |
308 | 488 |
} |
309 |
} |
|
489 |
} // fin étape 3
|
|
310 | 490 |
|
311 |
if (debug) println " remove turn in initial section ("+trsSection.children().size()+" remaining turns before removing this one)" |
|
491 |
if (debug && debugMore) println " remove turn in initial section ("+trsSection.children().size()+" remaining turns before removing this one)"
|
|
312 | 492 |
//turns.remove(turn) |
313 | 493 |
trsSection.remove(turn) |
314 | 494 |
currentNode.append(turn) |
315 |
//if (debug) println " removed turn in turns ("+turns.size()+") section ("+trsSection.children().size()+")" |
|
495 |
//if (debug && debugMore) println " removed turn in turns ("+turns.size()+") section ("+trsSection.children().size()+")"
|
|
316 | 496 |
} |
317 | 497 |
|
318 |
trsEpisode.remove(trsSection) |
|
498 |
trsEpisode.remove(trsSection) //BP on pourrait éventuellement vérifier que trsSection est bien vide.
|
|
319 | 499 |
|
320 | 500 |
outputDirectory.mkdir() |
321 | 501 |
File outfile = new File(outputDirectory, trsFile.getName()) |
... | ... | |
335 | 515 |
Log.printStackTrace(e) |
336 | 516 |
} |
337 | 517 |
|
338 |
def cutTurn(def testWithSectionEndTime) { |
|
339 |
|
|
340 |
def children = turn.children() |
|
341 |
Node newTurnKaNode = null;//new Node(trsEpisode, "Turn", currentSection[2]) |
|
342 |
|
|
343 |
def startSection = sections[iSection][0] // currentNode.@startTime |
|
344 |
if (startSection instanceof String) startSection = Float.parseFloat(sections[iSection][0]) //currentNode.@startTime) |
|
345 |
def endSection = sections[iSection][1] // currentNode.@endTime |
|
346 |
if (endSection instanceof String) endSection = Float.parseFloat(sections[iSection][1]) //currentNode.@endTime) |
|
347 |
//println "Cut the last turn if necessary" |
|
348 |
|
|
349 |
if (debug) println " cut turn and test with end ? $testWithSectionEndTime of iSection=$iSection ($startSection, $endSection) at iTurn=$iTurn (${turn.@startTime}, ${turn.@endTime}) children="+turn.children().size() |
|
350 |
for (int iChildren = 0 ; iChildren < children.size() ; iChildren++) { |
|
351 |
|
|
352 |
def c = children[iChildren] |
|
353 |
if (c instanceof String) continue; // a Turn contains Sync or w tags |
|
354 |
|
|
355 |
if (newTurnKaNode != null) { // append the remaining children to the new turn |
|
356 |
turn.remove(c) |
|
357 |
newTurnKaNode.append(c) |
|
358 |
if (debug) c.@moved="yes" |
|
359 |
iChildren-- |
|
360 |
} else { |
|
361 |
if ("w".equals(c.name())) { |
|
362 |
def start2 = Float.parseFloat(c.@startTime) |
|
363 |
def end2 = Float.parseFloat(c.@endTime) |
|
364 |
|
|
365 |
boolean test = null |
|
366 |
if (testWithSectionEndTime) { |
|
367 |
test = start2 > endSection |
|
368 |
} else { |
|
369 |
test = start2 >= startSection |
|
370 |
} |
|
371 |
|
|
372 |
if (test) { // && Math.abs(start2 - endSection) > turnsCutActivationThreashold // no more needed |
|
373 |
if (debug) println " cut with a w at ($start2, $end2) for section ("+startSection+", "+endSection+")" |
|
374 |
newTurnKaNode = new Node(trsSection, "Turn", ["startTime":""+start2, "endTime":""+turn.@endTime, "speaker":turn.@speaker]) |
|
375 |
new Node(newTurnKaNode, "Sync", ["time":""+start2]) // TRS |
|
376 |
|
|
377 |
turns.add(iTurn+1, newTurnKaNode) // set as next turn to process |
|
378 |
iTurn-- |
|
379 |
if (debug) newTurnKaNode.@created = "yes" |
|
380 |
turn.@endTime = ""+start2; |
|
381 |
turn.remove(c) |
|
382 |
newTurnKaNode.append(c) |
|
383 |
|
|
384 |
if (debug) c.@moved="yes" |
|
385 |
iChildren-- |
|
386 |
} |
|
387 |
} else if ("Sync".equals(c.name())) { |
|
388 |
def start2 = c.@time |
|
389 |
if (start2 instanceof String) start2 = Float.parseFloat(c.@time) |
|
390 |
def end2 = start2 |
|
391 |
|
|
392 |
boolean test = null |
|
393 |
if (testWithSectionEndTime) { |
|
394 |
test = start2 > endSection |
|
395 |
} else { |
|
396 |
test = start2 >= startSection |
|
397 |
} |
|
398 |
|
|
399 |
if (test) { // && Math.abs(start2 - endSection) > turnsCutActivationThreashold |
|
400 |
if (debug) println " cut with a Sync at ($start2, $end2) for section ("+startSection+", "+endSection+")" |
|
401 |
newTurnKaNode = new Node(trsSection, "Turn", ["startTime":""+start2, "endTime":""+turn.@endTime, "speaker":turn.@speaker]) |
|
402 |
turns.add(iTurn+1, newTurnKaNode) |
|
403 |
iTurn-- |
|
404 |
if (debug) newTurnKaNode.@created = "yes" |
|
405 |
turn.@endTime = ""+start2; |
|
406 |
turn.remove(c) |
|
407 |
if (debug) c.@moved="yes" |
|
408 |
newTurnKaNode.append(c) |
|
409 |
iChildren-- |
|
410 |
} |
|
411 |
} else { |
|
412 |
// no time to check |
|
413 |
} |
|
414 |
} |
|
415 |
} |
|
416 |
} |
TXM/trunk/org.txm.cooccurrence.core/src/org/txm/cooccurrence/core/functions/Cooccurrence.java (revision 3434) | ||
---|---|---|
32 | 32 |
import java.io.FileOutputStream; |
33 | 33 |
import java.io.IOException; |
34 | 34 |
import java.io.OutputStreamWriter; |
35 |
import java.io.PrintWriter; |
|
36 | 35 |
import java.io.Writer; |
37 | 36 |
import java.util.ArrayList; |
38 | 37 |
import java.util.Collections; |
... | ... | |
58 | 57 |
import org.txm.searchengine.cqp.clientExceptions.CqiClientException; |
59 | 58 |
import org.txm.searchengine.cqp.clientExceptions.UnexpectedAnswerException; |
60 | 59 |
import org.txm.searchengine.cqp.corpus.CQPCorpus; |
61 |
import org.txm.searchengine.cqp.corpus.CorpusManager; |
|
62 | 60 |
import org.txm.searchengine.cqp.corpus.Property; |
63 | 61 |
import org.txm.searchengine.cqp.corpus.QueryResult; |
64 | 62 |
import org.txm.searchengine.cqp.corpus.StructuralUnit; |
... | ... | |
1392 | 1390 |
continue; |
1393 | 1391 |
} |
1394 | 1392 |
|
1395 |
if (debug) System.out.println("n="+n); |
|
1396 |
if (debug) System.out.println("o="+o); |
|
1393 |
// if (debug) System.out.println("n="+n);
|
|
1394 |
// if (debug) System.out.println("o="+o);
|
|
1397 | 1395 |
int start = n.getStart(); |
1398 | 1396 |
int size = n.getEnd() - start + 1; |
1399 | 1397 |
// if (size > 0) |
... | ... | |
1412 | 1410 |
continue; |
1413 | 1411 |
} |
1414 | 1412 |
|
1415 |
if (allsignaturesstr.get(position).equals("[1308]")) {
|
|
1416 |
int a = 1+1; |
|
1417 |
} |
|
1413 |
// if (allsignaturesstr.get(position).equals("[1304]") || position == 13313) {
|
|
1414 |
// int a = 1+1;
|
|
1415 |
// }
|
|
1418 | 1416 |
|
1419 | 1417 |
int dist; |
1420 | 1418 |
if (position < m.getStart()) { |
... | ... | |
1424 | 1422 |
dist = position - m.getEnd() - 1; |
1425 | 1423 |
} |
1426 | 1424 |
else { // the n match is in the m match !? |
1427 |
System.out.println("Warning: the n match is in the m match ? " + n + " " + m);
|
|
1425 |
Log.warning("Warning: the n match is in the m match ? " + n + " " + m);
|
|
1428 | 1426 |
dist = 0; |
1429 | 1427 |
} |
1430 | 1428 |
|
1431 | 1429 |
if (debug) System.out.println(" p="+position+" sign="+allsignaturesstr.get(position)+" dist="+dist); |
1432 |
if (positionsDistances.containsKey(position) && positionsDistances.get(position) < dist) {
|
|
1430 |
if (positionsDistances.containsKey(position) && positionsDistances.get(position) > dist) {
|
|
1433 | 1431 |
positionsDistances.put(position, dist); |
1434 |
if (debug) System.out.println(" using the higher distance"); |
|
1435 |
} else { |
|
1432 |
// if (debug) System.out.println(" using the higher distance");
|
|
1433 |
} else if (!positionsDistances.containsKey(position)){
|
|
1436 | 1434 |
positionsDistances.put(position, dist); |
1437 |
if (debug) System.out.println(" add"); |
|
1435 |
// if (debug) System.out.println(" add");
|
|
1438 | 1436 |
} |
1439 | 1437 |
|
1440 | 1438 |
} |
... | ... | |
1465 | 1463 |
counts.put(signaturestr, 1); |
1466 | 1464 |
} |
1467 | 1465 |
|
1468 |
if (allsignaturesstr.get(position).equals("[1308]")) {
|
|
1469 |
System.out.println("dist= "+dist+" distances="+distances.get(signaturestr)+" counts="+counts.get(signaturestr)); |
|
1470 |
} |
|
1466 |
// if (allsignaturesstr.get(position).equals("[1304]")) {
|
|
1467 |
// System.out.println("dist= "+dist+" distances="+distances.get(signaturestr)+" counts="+counts.get(signaturestr));
|
|
1468 |
// }
|
|
1471 | 1469 |
|
1472 | 1470 |
// if ("[1599]".equals(signaturestr)) { |
1473 | 1471 |
// System.out.println("sign="+allsignaturesstr.get(position)+" p=" + position + " c=" + counts.get(signaturestr) + " d=" + dist + " total(d)=" + distances.get(signaturestr)); |
... | ... | |
1590 | 1588 |
indexfreqs.get(specifrownames[ii]), scores[ii][1], // freq |
1591 | 1589 |
((float) (distances.get(signaturestr) / counts.get(signaturestr))), // mean distance |
1592 | 1590 |
-1); |
1593 |
cline.debug = "";//" "+signaturestr;
|
|
1591 |
cline.debug = " "+signaturestr; |
|
1594 | 1592 |
// System.out.println("Line: "+specifrownames[ii]+" dists="+distances.get(signaturestr)+" counts="+counts.get(signaturestr)+" mean="+((float) (distances.get(signaturestr) / counts.get(signaturestr)))); |
1595 | 1593 |
// select the line |
1596 | 1594 |
if (cline.freq >= this.pFminFilter && cline.nbocc >= this.pFCoocFilter && cline.score >= this.pScoreMinFilter) { |
TXM/trunk/org.txm.annotation.kr.rcp/src/org/txm/annotation/kr/rcp/concordance/SimpleKRAnnotation.java (revision 3434) | ||
---|---|---|
216 | 216 |
|
217 | 217 |
annotationArea = new GLComposite(parent, SWT.NONE, KRAnnotationUIMessages.concordanceAnnotationArea); |
218 | 218 |
annotationArea.getLayout().numColumns = 12; |
219 |
annotationArea.getLayout().horizontalSpacing = 2; |
|
219 | 220 |
annotationArea.setLayoutData(new GridData(SWT.FILL, SWT.FILL, true, false)); |
220 | 221 |
|
221 | 222 |
addRemoveCombo = new Combo(annotationArea, SWT.READ_ONLY); |
... | ... | |
249 | 250 |
public void widgetDefaultSelected(SelectionEvent e) {} |
250 | 251 |
}); |
251 | 252 |
GridData gdata = new GridData(SWT.CENTER, SWT.CENTER, false, false); |
252 |
gdata.widthHint = 90;
|
|
253 |
//gdata.widthHint = 100;
|
|
253 | 254 |
addRemoveCombo.setLayoutData(gdata); |
254 | 255 |
|
255 | 256 |
withLabel = new Label(annotationArea, SWT.NONE); |
... | ... | |
338 | 339 |
affectCombo.setItems(items2); |
339 | 340 |
affectCombo.select(0); |
340 | 341 |
gdata = new GridData(SWT.CENTER, SWT.CENTER, false, false); |
341 |
gdata.widthHint = 140;
|
|
342 |
//gdata.widthHint = 200;
|
|
342 | 343 |
affectCombo.setLayoutData(gdata); |
343 | 344 |
|
344 | 345 |
affectAnnotationButton = new Button(annotationArea, SWT.PUSH); |
TXM/trunk/org.txm.annotation.kr.rcp/src/org/txm/annotation/kr/rcp/concordance/KRAnnotation.java (revision 3434) | ||
---|---|---|
240 | 240 |
|
241 | 241 |
annotationArea = new GLComposite(parent, SWT.NONE, KRAnnotationUIMessages.concordanceAnnotationArea); |
242 | 242 |
annotationArea.getLayout().numColumns = 12; |
243 |
annotationArea.getLayout().horizontalSpacing = 2; |
|
243 | 244 |
annotationArea.setLayoutData(new GridData(SWT.FILL, SWT.FILL, true, false)); |
244 | 245 |
|
245 | 246 |
addRemoveCombo = new Combo(annotationArea, SWT.READ_ONLY); |
... | ... | |
275 | 276 |
public void widgetDefaultSelected(SelectionEvent e) {} |
276 | 277 |
}); |
277 | 278 |
GridData gdata = new GridData(SWT.CENTER, SWT.CENTER, false, false); |
278 |
gdata.widthHint = 90; |
|
279 |
//gdata.widthHint = 90;
|
|
279 | 280 |
addRemoveCombo.setLayoutData(gdata); |
280 | 281 |
|
281 | 282 |
withLabel = new Label(annotationArea, SWT.NONE); |
... | ... | |
545 | 546 |
affectCombo.setItems(items2); |
546 | 547 |
affectCombo.select(0); |
547 | 548 |
gdata = new GridData(SWT.CENTER, SWT.CENTER, false, false); |
548 |
gdata.widthHint = 140; |
|
549 |
//gdata.widthHint = 140;
|
|
549 | 550 |
affectCombo.setLayoutData(gdata); |
550 | 551 |
|
551 | 552 |
affectAnnotationButton = new Button(annotationArea, SWT.PUSH); |
TXM/trunk/org.txm.annotation.kr.rcp/src/org/txm/annotation/kr/rcp/concordance/WordAnnotationToolbar.java (revision 3434) | ||
---|---|---|
852 | 852 |
} |
853 | 853 |
|
854 | 854 |
annotationColumn.setText(type.getName()); |
855 |
|
|
855 | 856 |
annotations.setViewAnnotation(type); |
856 | 857 |
annotations.setAnnotationOverlap(true); |
857 | 858 |
try { |
... | ... | |
1002 | 1003 |
|
1003 | 1004 |
// update annotation column width |
1004 | 1005 |
if (annotationArea != null) { |
1005 |
annotationColumn.pack(); |
|
1006 |
//annotationColumn.pack();
|
|
1006 | 1007 |
|
1007 | 1008 |
annotationColumn.setResizable(true); |
1008 | 1009 |
} |
Formats disponibles : Unified diff