Révision 3411

TXM/trunk/org.txm.groovy.core/src/groovy/org/txm/macro/transcription/AddSectionsFromTableV2Macro.groovy (revision 3411)
1
package org.txm.macro.transcription
2

  
3
import java.nio.charset.Charset
4

  
5
import java.time.LocalTime
6
import java.time.format.DateTimeFormatter
7

  
8
import org.eclipse.core.internal.localstore.IsSynchronizedVisitor
9
import org.txm.utils.*
10
import org.txm.utils.logger.*
11

  
12
@Field @Option(name="metadataFile", usage="Tableau des metadonnées de sections", widget="FileOpen", required=true, def="")
13
		File metadataFile;
14

  
15
@Field @Option(name="trsDirectory", usage="Dossier qui contient les fichiers TRS", widget="Folder", required=true, def="")
16
		File trsDirectory;
17

  
18
@Field @Option(name="joinTRSColumn", usage="Colonne de jointure de transcription", widget="String", required=true, def="Lien notice principale")
19
		def joinTRSColumn
20

  
21
@Field @Option(name="startTimeColumn", usage="Colonne de timing de début de section", widget="String", required=true, def="antract_debut")
22
		def startTimeColumn = "antract_debut"
23

  
24
@Field @Option(name="endTimeColumn", usage="Colonne de timing de fin de section", widget="String", required=true, def="antract_fin")
25
		def endTimeColumn = "antract_fin"
26

  
27
@Field @Option(name="typeColumns", usage="Colonnes des métadonnées de type de section", widget="String", required=true, def="Titre propre")
28
		def typeColumns
29

  
30
@Field @Option(name="topicColumns", usage="Colonnes des métadonnées de topic de section", widget="String", required=true, def="Date de diffusion")
31
		def topicColumns
32

  
33
@Field @Option(name="metadataColumns", usage="Colonnes de metadonnées de section", widget="String", required=true, def="Titre propre;Date de diffusion;Identifiant de la notice;Notes du titre;Type de date;Durée;Genre;Langue VO / VE;Nature de production;Producteurs (Aff.);Thématique;Nom fichier segmenté (info);antract_video;antract_debut;antract_fin;antract_duree;antract_tc_type;antract_tc_date;Résumé;Séquences;Descripteurs (Aff. Lig.);Générique (Aff. Lig.)")
34
		def metadataColumns
35

  
36
@Field @Option(name="metadataColumnsGroups", usage="Colonnes des gruopes de metadonnées de section", widget="String", required=true, def="metadata;metadata;metadata;metadata;metadata;metadata;metadata;metadata;metadata;metadata;metadata;secondary;secondary;secondary;secondary;secondary;secondary;secondary;text;text;text;text")
37
		def metadataColumnsGroups
38

  
39
@Field @Option(name="fixSectionsLimits", usage="Correction des limites de sections du tableau de metadonnees", widget="Boolean", required=true, def="true")
40
		def fixSectionsLimits
41

  
42
@Field @Option(name="sectionsMergeActivationThreashold", usage="marge d'erreur de corrections des limites de sections", widget="Float", required=true, def="1.0")
43
		def sectionsMergeActivationThreashold
44

  
45
@Field @Option(name="fixTurnsLimits", usage="Correction des limites de sections du tableau de metadonnees", widget="Boolean", required=true, def="true")
46
		def fixTurnsLimits
47

  
48
@Field @Option(name="turnsCutActivationThreashold", usage="marge d'erreur de corrections des limites de tours", widget="Float", required=true, def="0.1")
49
		def turnsCutActivationThreashold
50

  
51
@Field @Option(name="debug", usage="show debug messages", widget="String", required=true, def="false")
52
		def debug
53

  
54
if (!ParametersDialog.open(this)) return;
55
debug = true
56

  
57
typeColumns = typeColumns.split(";")
58
topicColumns = topicColumns.split(";")
59
metadataColumns = metadataColumns.split(";")
60
metadataColumnsGroups = metadataColumnsGroups.split(";")
61

  
62
if (metadataColumns.size() != metadataColumnsGroups.size()) {
63
	println "ERROR in metadata declarations&groups:"
64
	println "COLUMNS: "+metadataColumns
65
	println "GROUPS : "+metadataColumnsGroups
66
	return
67
}
68

  
69
if (!trsDirectory.exists()) {
70
	println "$trsDirectory not found"
71
	return
72
}
73

  
74
println "Loading data from $metadataFile..."
75
TableReader reader = new TableReader(metadataFile)//, "\t".charAt(0), Charset.forName("UTF-8")
76
reader.readHeaders()
77
def header = reader.getHeaders()
78
if (!header.contains(joinTRSColumn)) {
79
	println "No TRS ID $joinTRSColumn column found"
80
	return
81
}
82
if (!header.contains(startTimeColumn)) {
83
	println "No start time $startTimeColumn column found"
84
	return
85
}
86
if (!header.contains(endTimeColumn)) {
87
	println "No end time $endTimeColumn column found"
88
	return
89
}
90
for (def col : metadataColumns) {
91
	if (!header.contains(endTimeColumn)) {
92
		println "No $col column found"
93
		return
94
	}
95
}
96
for (def col : typeColumns) {
97
	if (!header.contains(endTimeColumn)) {
98
		println "No type $col column found"
99
		return
100
	}
101
}
102
for (def col : topicColumns) {
103
	if (!header.contains(endTimeColumn)) {
104
		println "No topic $col column found"
105
		return
106
	}
107
}
108

  
109
File outputDirectory = new File(trsDirectory, "out")
110
println "Writing result to $outputDirectory..."
111

  
112
dateTimeFormatter = DateTimeFormatter.ISO_LOCAL_TIME
113
def strTotime(def str) {
114
	if (str.lastIndexOf(":") == -1) {
115
		return null
116
	}
117
	
118
	bonusFrame = Integer.parseInt(str.substring(str.lastIndexOf(":")+1))
119
	//if (str.contains("135475")) println "ERROR $str in $infos -> $bonusFrame"
120
	if (bonusFrame > 25) {
121
		bonusFrame=0;
122
	}
123
	totalFrame = str.substring(0, str.lastIndexOf(":"))
124
	
125
	LocalTime time1 = LocalTime.parse(totalFrame, dateTimeFormatter)
126
	totalFrame = (time1.getHour()*60*60) + (time1.getMinute()*60) + time1.getSecond()
127
	
128
	def ret = totalFrame + (bonusFrame/25)
129
	return ret
130
}
131

  
132
try {
133
	def sectionGroupsToInsert = [:]
134
	println "Reading data..."
135
	while (reader.readRecord()) { // loading & sorting sections
136
		String id = reader.get(joinTRSColumn).trim()
137
		if (id.endsWith(".mp4")) id = id.substring(0, id.length()-4)
138
		if (id.length() == 0) continue;
139
		
140
		if (!sectionGroupsToInsert.containsKey(id)) {
141
			sectionGroupsToInsert[id] = []
142
		}
143
		def section = sectionGroupsToInsert[id]
144
		
145
		if (reader.get(startTimeColumn) != null && reader.get(startTimeColumn).length() > 0) { // ignore non timed sections
146
			
147
			def m = [:]
148
			
149
			for (def todo : ["topic":topicColumns, "type":typeColumns]) {
150
				def data = []
151
				for (def col : todo.value) {
152
					if (reader.get(col).trim().length() > 0) {
153
						data << reader.get(col).trim().replace("\n", "")
154
					}
155
				}
156
				m[todo.key] = data.join("\t")
157
			}
158
			def metadataList = []
159
			def metadataGroupList = []
160
			for (int i = 0 ;  i < metadataColumns.size() ; i++) {
161
				def col = metadataColumns[i]
162
				String c = AsciiUtils.buildAttributeId(col)
163
				m[c] = reader.get(col)
164
				metadataList << c
165
				metadataGroupList << metadataColumnsGroups[i]
166
			}
167
			m["metadata"] = metadataList.join("|")
168
			m["metadata_groups"] = metadataGroupList.join("|")
169
			
170
			m["startTime"] = strTotime(reader.get(startTimeColumn))
171
			m["endTime"] = strTotime(reader.get(endTimeColumn))
172
			m["synchronized"] = "true"
173
			
174
			section << [m["startTime"], m["endTime"], m]
175
		}
176
	}
177
	
178
	println "Inserting sections... "+sectionGroupsToInsert.size()
179
	
180
	ConsoleProgressBar cpb = new ConsoleProgressBar(sectionGroupsToInsert.keySet().size())
181
	for (String id : sectionGroupsToInsert.keySet()) {
182
		
183
		File trsFile = new File(trsDirectory, id+".trs")
184
		if (!trsFile.exists()) {
185
			cpb.tick()
186
			continue
187
		}
188
		
189
		if (debug) println "== $id =="
190
		else cpb.tick()
191
		
192
		//println "Processing $id..."
193
		sections = sectionGroupsToInsert[id]
194
		sections = sections.sort() { a, b -> a[0] <=> b[0] ?: a[1] <=> -b[1] } // negative second test for sections inclusion
195
		
196
		if (fixSectionsLimits) {
197
			if (debug) println "Fixing sections of $id"
198
			for (int iSection = 1 ; iSection < sections.size() ; iSection++) {
199
				//println sections[iSection]
200
				if (Math.abs(sections[iSection][0] - sections[iSection - 1][1]) < sectionsMergeActivationThreashold) {
201
					if (debug) println "s=$iSection start <- end : "+sections[iSection][0]+ " <- "+sections[iSection - 1][1]
202
					sections[iSection][0] = sections[iSection - 1][1] // fix the start time with the previous section end time
203
				}
204
			}
205
		}
206
		
207
		// Open input file
208
		slurper = new groovy.util.XmlParser(false, true, true);
209
		slurper.setFeature("http://apache.org/xml/features/disallow-doctype-decl", false) // allow DTD declaration
210
		slurper.setProperty("http://javax.xml.XMLConstants/property/accessExternalDTD", "all"); // allow to read DTD from local file
211
		trs = slurper.parse(trsFile.toURI().toString())
212
		trsEpisodes = trs.Episode // 1
213
		if (trsEpisodes.size() > 1) {
214
			println "Error: multiple Episode node in $trsFile"
215
			continue
216
		}
217
		def trsEpisode = trsEpisodes[0]
218
		def trsSections =  trs.Episode.Section // 1
219
		if (trsSections.size() > 1) {
220
			println "Error: multiple Section node in $trsFile"
221
			continue
222
		}
223
		
224
		trsSection = trsSections[0]
225
		
226
		turns = trsSection.Turn
227
		newSections = []
228
		iSection = 0;
229
		currentSection = null
230
		currentNode = null
231
		
232
		foundSection=null
233
		isTurnSynchronized=false
234
		cutCheck=false
235
		
236
		// boucle sur les tours dans l'ordre
237
		for (iTurn = 0 ; iTurn < turns.size() ; iTurn++) {
238
			
239
			turn = turns[iTurn]
240
			start = Float.parseFloat(turn.@startTime)
241
			end = Float.parseFloat(turn.@endTime)
242
			//println "Turn: $iTurn ($start, $end)"
243
			
244
			// Etape 1 : y aura-t-il besoin de couper le tour, et dans quelle section est le tour (ou sa première partie)
245
			foundSection = null;
246
			for (int i = iSection ; i < sections.size() ; i++) {
247
				// if section_end < turn_start OU |turn_start - section_end| < turn_threshold
248
				if (sections[i][1] < start || Math.abs(start - sections[i][1]) < turnsCutActivationThreashold) { // Turn is before section
249
					// Cas 1 : la section est complètement avant (modulo la marge)
250
				} else {
251
					// Cas 2 : on est arrivés à la section à considérer
252
					iSection = i
253
					// if section_start > turn_end OU |section_start - turn_end| < turn_threshold
254
					if (sections[i][0] > end || Math.abs(sections[i][0] - end) < turnsCutActivationThreashold) { // Section is before section
255
						// Cas 2.1 : la section est complètement après (modulo la marge) (et les suivantes le seront aussi)
256
						foundSection=null
257
						isTurnSynchronized=false
258
						cutCheck=false
259
					} else {
260
						// if |section_start - turn_start| > turn_threshold
261
						if (Math.abs(start - sections[i][0]) > turnsCutActivationThreashold) { // Section is before section
262
							// Cas 2.2 : la section commence significativement après le début du tour (le début est non synchronisé)
263
							foundSection=sections[i] // (c'est la première section rencontrée, mais elle sera pour le tour suivant)
264
							isTurnSynchronized=false
265
							cutCheck=true
266
						} else {
267
							// Cas 2.3 : le début du tour est dans la section (on n'a pas besoin de chercher d'autres sections car si ce n'est pas la seule on coupera le tour et ce sera un autre tour).
268
							foundSection=sections[i]  // (c'est la section qui commence le tour, au moins)
269
							isTurnSynchronized=true
270
							cutCheck=true
271
						}
272
					}
273
					break; // stop searching and set iSection to accelerate next search
274
				}
275
			}
276
			
277
			// Etape 3 : positionne *le* tour dans *le* noeud (cf. ~ l.247-264 ?)
278
			// (on ne gère qu'un seul tour et un seul noeud à chaque itération de la boucle tour,
279
			// puisqu'on a retaillé le tour pour qu'il ne concerne pas plusieurs noeuds)
280
			if (foundSection != null) {  // on complète ou on ajoute une div.
281
				
282
				if (foundSection != currentSection || currentSection == null) {
283
					if (currentNode != null && currentNode.@synchronized == "false") {
284
						def tmp = currentNode.Turn
285
						currentNode.@endTime = tmp[-1].@endTime
286
					}
287
					
288
					currentSection = foundSection
289
					currentNode = new Node(trsEpisode, "Section", currentSection[2])
290
				}
291
			} else {  // on complète ou on ajoute un noeud (div) non synchronisé.
292
				
293
				currentNode = new Node(trsEpisode, "Section", ["type":"Sujet non synchronisé", "startTime":sections[iSection][0], "endTime":sections[iSection][1], "synchronized":"false"] )
294
				currentSection = null;
295
			}
296
			
297
			// Etape 2 : on coupe le tour s'il y a besoin (cf. ~l.267-315 ?)
298
			if (cutCheck) {
299
				if (isTurnSynchronized) {
300
					cutTurn(true) // iSection++ quand on coupe
301
				} else {
302
					if (foundSection != null) {
303
						cutTurn(false)
304
					}
305
				}
306
			}
307
			
308
	
309
			
310
			trsSection.remove(turn)
311
			currentNode.append(turn)
312
		}
313
		
314
		outputDirectory.mkdir()
315
		File outfile = new File(outputDirectory, trsFile.getName())
316
		outfile.withWriter("UTF-8") { writer ->
317
			writer.write('<?xml version="1.0" encoding="UTF-8"?>\n<!DOCTYPE Trans SYSTEM "trans-14.dtd">\n')
318
			def printer = new groovy.util.XmlNodePrinter(new PrintWriter(writer))
319
			printer.setPreserveWhitespace(true)
320
			printer.print(trs)
321
		}
322
	}
323
	cpb.done()
324
	reader.close()
325
	println "Done."
326
	
327
} catch(Exception e) {
328
	println "Error: "+e
329
	Log.printStackTrace(e)
330
}
331

  
332
def cutTurn(def incrementISection) {
333
	def children = turn.children()
334
	Node newTurnKaNode = null;//new Node(trsEpisode, "Turn", currentSection[2])
335
	
336
	def startSection = currentNode.@startTime
337
	if (startSection instanceof String) startSection = Float.parseFloat(currentNode.@startTime)
338
	def endSection = currentNode.@endTime
339
	if (endSection instanceof String) endSection = Float.parseFloat(currentNode.@endTime)
340
	//println "Cut the last turn if necessary"
341
	for (int iChildren = 0 ; iChildren < children.size() ; iChildren++) {
342
		
343
		def c = children[iChildren]
344
		if (c instanceof String) continue;
345
		
346
		if (newTurnKaNode != null) { // append the remaining children to the new turn
347
			turn.remove(c)
348
			newTurnKaNode.append(c)
349
			if (debug) c.@moved="yes"
350
			iChildren--
351
		} else {
352
			if ("w".equals(c.name())) {
353
				def start2 = Float.parseFloat(c.@startTime)
354
				def end2 = Float.parseFloat(c.@endTime)
355
				
356
				if (start2 > endSection && Math.abs(start2 - endSection) > turnsCutActivationThreashold) {
357
					if (debug) println "cut with a w at ($start2, $end2) for section ("+startSection+", "+endSection+")"
358
					newTurnKaNode = new Node(trsSection, "Turn", ["startTime":""+start2, "endTime":""+turn.@endTime, "speaker":turn.@speaker])
359
					turns.add(iTurn, newTurnKaNode)
360
					iTurn--
361
					if (debug) newTurnKaNode.@created = "yes"
362
					turn.@endTime = ""+start2;
363
					turn.remove(c)
364
					newTurnKaNode.append(c)
365
					
366
					if (debug) c.@moved="yes"
367
					if (incrementISection) iSection++
368
					iChildren--
369
				}
370
			} else if ("Sync".equals(c.name())) {
371
				def start2 = c.@time
372
				if (start2 instanceof String) start2 = Float.parseFloat(c.@time)
373
				def end2 = start2
374
				
375
				if (start2 > endSection && Math.abs(start2 - endSection) > turnsCutActivationThreashold) {
376
					if (debug) println "cut with a Sync at ($start2, $end2) for section ("+startSection+", "+endSection+")"
377
					newTurnKaNode = new Node(trsSection, "Turn", ["startTime":""+start2, "endTime":""+turn.@endTime, "speaker":turn.@speaker])
378
					turns.add(iTurn, newTurnKaNode)
379
					iTurn--
380
					if (debug) newTurnKaNode.@created = "yes"
381
					turn.@endTime = ""+start2;
382
					turn.remove(c)
383
					if (debug) c.@moved="yes"
384
					if (incrementISection) iSection++
385
					newTurnKaNode.append(c)
386
					iChildren--
387
				}
388
			} else {
389
				// no time to check
390
			}
391
		}
392
	}
393
}
TXM/trunk/org.txm.groovy.core/src/groovy/org/txm/macro/transcription/AddSectionsFromTableMacro.groovy (revision 3411)
33 33

  
34 34
@Field @Option(name="metadataColumnsGroups", usage="Colonnes des gruopes de metadonnées de section", widget="String", required=true, def="metadata;metadata;metadata;metadata;metadata;metadata;metadata;metadata;metadata;metadata;metadata;secondary;secondary;secondary;secondary;secondary;secondary;secondary;text;text;text;text")
35 35
		def metadataColumnsGroups
36
		
36

  
37 37
@Field @Option(name="fixSectionsLimits", usage="Correction des limites de sections du tableau de metadonnees", widget="Boolean", required=true, def="true")
38 38
		def fixSectionsLimits
39
		
39

  
40 40
@Field @Option(name="sectionsMergeActivationThreashold", usage="marge d'erreur de corrections des limites de sections", widget="Float", required=true, def="1.0")
41 41
		def sectionsMergeActivationThreashold
42 42
		
43
@Field @Option(name="turnsMergeActivationThreashold", usage="marge d'erreur de corrections des limites de tours", widget="Float", required=true, def="0.1")
44
		def turnsMergeActivationThreashold
43
@Field @Option(name="fixTurnsLimits", usage="Correction des limites de sections du tableau de metadonnees", widget="Boolean", required=true, def="true")
44
		def fixTurnsLimits
45 45

  
46
@Field @Option(name="turnsCutActivationThreashold", usage="marge d'erreur de corrections des limites de tours", widget="Float", required=true, def="0.1")
47
		def turnsCutActivationThreashold
48

  
46 49
@Field @Option(name="debug", usage="show debug messages", widget="String", required=true, def="false")
47 50
		def debug
48 51

  
49 52
if (!ParametersDialog.open(this)) return;
50
debug = "true".equals(debug);
53
debug = true
51 54

  
52 55
typeColumns = typeColumns.split(";")
53 56
topicColumns = topicColumns.split(";")
......
127 130
try {
128 131
	def sectionGroupsToInsert = [:]
129 132
	println "Reading data..."
130
	while (reader.readRecord()) {
133
	while (reader.readRecord()) { // loading & sorting sections
131 134
		String id = reader.get(joinTRSColumn).trim()
132 135
		if (id.endsWith(".mp4")) id = id.substring(0, id.length()-4)
133 136
		if (id.length() == 0) continue;
......
175 178
	ConsoleProgressBar cpb = new ConsoleProgressBar(sectionGroupsToInsert.keySet().size())
176 179
	for (String id : sectionGroupsToInsert.keySet()) {
177 180
		
178
		
179 181
		File trsFile = new File(trsDirectory, id+".trs")
180 182
		if (!trsFile.exists()) {
181 183
			cpb.tick()
......
193 195
			if (debug) println "Fixing sections of $id"
194 196
			for (int iSection = 1 ; iSection < sections.size() ; iSection++) {
195 197
				//println sections[iSection]
196
				if (sections[iSection][0] - sections[iSection - 1][1] > sectionsMergeActivationThreashold) {
198
				if (Math.abs(sections[iSection][0] - sections[iSection - 1][1]) < sectionsMergeActivationThreashold) {
197 199
					if (debug) println "s=$iSection start <- end : "+sections[iSection][0]+ " <- "+sections[iSection - 1][1]
198 200
					sections[iSection][0] = sections[iSection - 1][1] // fix the start time with the previous section end time
199 201
				}
......
207 209
		def trs = slurper.parse(trsFile.toURI().toString())
208 210
		def trsEpisodes = trs.Episode // 1
209 211
		if (trsEpisodes.size() > 1) {
210
			println "multiple Episode node in $trsFile"
212
			println "Error: multiple Episode node in $trsFile"
211 213
			continue
212 214
		}
213 215
		def trsEpisode = trsEpisodes[0]
214 216
		def trsSections =  trs.Episode.Section // 1
215 217
		if (trsSections.size() > 1) {
216
			println "multiple Section node in $trsFile"
218
			println "Error: multiple Section node in $trsFile"
217 219
			continue
218 220
		}
219 221
		def trsSection = trsSections[0]
......
231 233
			def end = Float.parseFloat(turn.@endTime)
232 234
			//println "Turn: $iTurn ($start, $end)"
233 235
			
234
			def found = null;
236
			def foundSection = null;
235 237
			for (int i = iSection ; i < sections.size() ; i++) {
236 238
				if (end < sections[i][0]) { // Turn is before section
237 239
					
238
				} else if (sections[i][1] < start) { // Turn is before section
240
				} else if (sections[i][1] < start) { // Section is before section
239 241
					
240 242
				} else {
241
					found = sections[i]
243
					foundSection = sections[i]
242 244
					iSection = i
243 245
					break; // stop searching and set iSection to accelerate next search
244 246
				}
245 247
			}
246 248
			
247
			if (found == null) {
249
			if (foundSection == null) {
248 250
				if (currentSection != null || currentNode == null) {
249 251
					currentNode = new Node(trsEpisode, "Section", ["type":"Sujet non synchronisé", "startTime":turn.@startTime, "endTime":"", "synchronized":"false"] )
250 252
					currentSection = null;
251 253
				}
252 254
			} else {
253
				if (found != currentSection) {
255
				if (foundSection != currentSection) {
254 256
					if (currentNode != null && currentNode.@synchronized == "false") {
255 257
						def tmp = currentNode.Turn
256 258
						currentNode.@endTime = tmp[-1].@endTime
257 259
					}
258 260
					
259
					currentSection = found
261
					currentSection = foundSection
260 262
					currentNode = new Node(trsEpisode, "Section", currentSection[2])
261 263
				}
262 264
			}
265
			
263 266
			trsSection.remove(turn)
264 267
			currentNode.append(turn)
265
			
266
			
267
			if (found) {
268
		}
269
		
270
		//remove the initial section which is empty now or not
271
		trsEpisode.remove(trsSection)
272
		
273
		if (fixTurnsLimits) {
274
			if (debug) println "Fixing Turn limits..."
275
			def turnToInsert = null;
276
			sections = trs.Episode.Section
277
			for (int i = 0 ; i < sections.size() -1 ; i++) { // browse created sections but stop before the last one (whichc can not be fixed)
278
				
279
				def section = sections[i]
280
				if (turnToInsert != null) {
281
					if (debug) println "Moving part-of turn: "+turnToInsert+" in section ("+section.@startTime+", "+section.@endTime+")"
282
					section.children().add(0, turnToInsert) // insert the slited part of the turn in the section
283
				}
284
				
285
				def startSection = section.@startTime // Float.parseFloat(section.@startTime)
286
				def endSection = section.@endTime //  Float.parseFloat(section.@endTime)
287
				if (startSection instanceof String) startSection = Float.parseFloat(section.@startTime)
288
				if (endSection instanceof String) endSection = Float.parseFloat(section.@endTime)
289
					
290
				turnToInsert = null
291
				
292
				turns = section.Turn
293
				if (turns.size() == 0) continue;
294
				
295
				def turn = turns[-1]
296
				
268 297
				def children = turn.children()
269 298
				Node newTurnKaNode = null;//new Node(trsEpisode, "Turn", currentSection[2])
270 299
				//println "Cut the last turn if necessary"
271 300
				for (int iChildren = 0 ; iChildren < children.size() ; iChildren++) {
301
					
272 302
					def c = children[iChildren]
303
					if (c instanceof String) continue;
304
					
273 305
					if (newTurnKaNode != null) {
274 306
						turn.remove(c)
275 307
						newTurnKaNode.append(c)
......
280 312
							def start2 = Float.parseFloat(c.@startTime)
281 313
							def end2 = Float.parseFloat(c.@endTime)
282 314
							
283
							if (start2 > sections[iSection][1] && Math.abs(start2 - sections[iSection][1]) > turnsMergeActivationThreashold) {
284
								if (debug) println "cut with w at [$start2, $end2] for section "+sections[iSection][1]
315
							if (start2 > endSection && Math.abs(start2 - endSection) > turnsCutActivationThreashold) {
316
								if (debug) println "cut with a w at [$start2, $end2] for section ("+startSection+", "+endSection+")"
285 317
								newTurnKaNode = new Node(trsSection, "Turn", ["startTime":""+start2, "endTime":""+turn.@endTime, "speaker":turn.@speaker])
286 318
								if (debug) newTurnKaNode.@created = "yes"
287 319
								turn.@endTime = ""+start2;
288 320
								turn.remove(c)
289 321
								newTurnKaNode.append(c)
290 322
								if (debug) c.@moved="yes"
291
								turns.add(iTurn+1, newTurnKaNode)
292 323
								iChildren--
293 324
							}
294 325
						} else if ("Sync".equals(c.name())) {
295 326
							def start2 = Float.parseFloat(c.@time)
296 327
							def end2 = Float.parseFloat(c.@time)
297 328
							
298
							if (start2 > sections[iSection][1] && Math.abs(start2 - sections[iSection][1]) > turnsMergeActivationThreashold) {
299
								if (debug) println "cut with Sync at [$start2, $end2] for section "+sections[iSection][1]
329
							if (start2 > endSection && Math.abs(start2 - endSection) > turnsCutActivationThreashold) {
330
								if (debug) println "cut with a Sync at [$start2, $end2] for section "+endSection
300 331
								newTurnKaNode = new Node(trsSection, "Turn", ["startTime":""+start2, "endTime":""+turn.@endTime, "speaker":turn.@speaker])
301 332
								if (debug) newTurnKaNode.@created = "yes"
302 333
								turn.@endTime = ""+start2;
303 334
								turn.remove(c)
304 335
								if (debug) c.@moved="yes"
305 336
								newTurnKaNode.append(c)
306
								turns.add(iTurn+1, newTurnKaNode)
307 337
								iChildren--
308 338
							}
309 339
						} else {
......
311 341
						}
312 342
					}
313 343
				}
344
				
345
				turnToInsert = newTurnKaNode
346
				
314 347
			}
315 348
		}
316 349
		
317
		//remove the initial section
318
		trsEpisode.remove(trsSection)
319
		
320 350
		outputDirectory.mkdir()
321 351
		File outfile = new File(outputDirectory, trsFile.getName())
322 352
		outfile.withWriter("UTF-8") { writer ->
TXM/trunk/org.txm.groovy.core/src/groovy/org/txm/macro/prototypes/nlp/TT2XMLInDirectoryMacro.groovy (revision 3411)
1
package org.txm.macro.nlp;
2
	
3
	import org.kohsuke.args4j.*
4
	import groovy.transform.Field
5
	import java.nio.charset.Charset
6
	import org.txm.rcpapplication.swt.widget.parameters.*
7
	import org.txm.utils.*
8
	import javax.xml.stream.*
9
	
10
	@Field @Option(name="inputDirectory", usage="TXT directory", widget="Folder", required=true, def="dir")
11
	File inputDirectory
12
	
13
	@Field @Option(name="encoding", usage="File encoding", widget="String", required=false, def="UTF-8")
14
	String encoding
15
	
16
	@Field @Option(name="debug", usage="Debug mode", widget="Boolean", required=false, def="false")
17
	Boolean debug
18
	
19
	if (!ParametersDialog.open(this)) return
20
	
21
	encoding = encoding.trim()
22
	outputDirectory = new File(inputDirectory, "xml")
23
	outputDirectory.mkdir()
24
	
25
	println "Processing: "+inputDirectory
26
	
27
	XMLOutputFactory factory = XMLOutputFactory.newInstance()
28
	
29
	def files = inputDirectory.listFiles().sort{ it.name }
30
	if (files == null || files.length == 0) {
31
		println "Error: no file to process in $inputDirectory"
32
		return false;
33
	}
34
	for (File inputfile : files.sort()) {
35
		if (inputfile.isDirectory() || inputfile.isHidden() || !inputfile.getName().endsWith(".tt")) continue // ignore
36
		println " file: "+inputfile
37
	
38
		name = inputfile.getName()
39
		idx = name.lastIndexOf(".")
40
		if (idx > 0) name = name.substring(0, idx)
41
		outputfile = new File(outputDirectory, name+".xml")
42
		
43
		Writer output = new OutputStreamWriter(new FileOutputStream(outputfile) , "UTF-8")
44
		XMLStreamWriter writer = factory.createXMLStreamWriter(output)
45
	
46
		writer.writeStartDocument("UTF-8","1.0")
47
		writer.writeCharacters("\n")
48
		writer.writeStartElement("text")
49
		writer.writeCharacters("\n")
50
	
51
		inSent = 0
52
		nLine = 0
53
		inputfile.eachLine(encoding) { line ->
54
		
55
			nLine++
56
			
57
			if (debug) {
58
				println nLine+": "+line
59
			}
60
			
61
			def sent = false
62
	        def sent_pattern = ~/^([^\t]+)\t(SENT)\t([^\t]+)$/
63
	        def sent_m = (line =~ sent_pattern)
64
	        if (sent_m) sent = true
65
	        	
66
			if (sent && inSent) {
67
				writer.writeStartElement("w")
68
				writer.writeAttribute("frpos", sent_m[0][2])
69
				writer.writeAttribute("frlemma", sent_m[0][3])
70
				writer.writeCharacters(sent_m[0][3])
71
				writer.writeEndElement() // close w		
72
				writer.writeComment("\n")
73
				writer.writeEndElement() // close s
74
				writer.writeEndElement() // close p (sent)
75
				writer.writeCharacters("\n")
76
				inSent=0
77
			} else {
78
				if (!inSent) {
79
					writer.writeStartElement("p")
80
					writer.writeStartElement("s")
81
					inSent=1
82
				}
83
				def w_pattern = ~/^([^\t]+)\t([^\t]+)\t([^\t]+)$/
84
	        	def w_m = (line =~ w_pattern)
85
	
86
	        	if (w_m.size() > 0) {
87
	        		writer.writeStartElement("w")
88
					writer.writeAttribute("frpos", w_m[0][2])
89
					writer.writeAttribute("frlemma", w_m[0][3])
90
					writer.writeCharacters(w_m[0][1])
91
					writer.writeEndElement() // close w
92
					writer.writeCharacters("\n")
93
				} else {
94
				    writer.writeStartElement("w")
95
					writer.writeAttribute("frpos", "NAM")
96
					writer.writeAttribute("frlemma", "<unknown>")
97
					writer.writeCharacters(line)
98
					writer.writeEndElement() // close w
99
					writer.writeCharacters("\n")
100
					println "** TT2XMLInDirectory: unknown [word] line pattern, "+nLine+": <"+line+">."
101
				}
102
			}
103
		}
104
	
105
		if (inSent) {
106
			writer.writeEndElement() // s
107
			writer.writeEndElement() // p
108
			writer.writeCharacters("\n")
109
		}
110
		writer.writeEndElement()
111
		writer.writeCharacters("\n")
112
		writer.close()
113
		output.close()
114
	}
115
	
116

  
TXM/trunk/org.txm.groovy.core/src/groovy/org/txm/macro/projects/antract/PrepareAFVOIXOFFCorpusMacro.groovy (revision 3411)
43 43
	, "topicColumns": "Date de diffusion"
44 44
	, "metadataColumns": "id;Titre propre;Date de diffusion;Identifiant de la notice;Notes du titre;Type de date;Durée;Genre;Langue VO / VE;Nature de production;Producteurs (Aff.);Thématique;Nom fichier segmenté (info);antract_video;antract_debut;antract_fin;antract_duree;antract_tc_type;antract_tc_date;Résumé;Séquences;Descripteurs (Aff. Lig.);Générique (Aff. Lig.)"
45 45
	, "metadataColumnsGroups": "secondary;metadata;metadata;metadata;metadata;metadata;metadata;metadata;metadata;metadata;metadata;metadata;secondary;secondary;secondary;secondary;secondary;secondary;secondary;text;text;text;text"
46
	, "sectionsMergeActivationThreashold":1.0
47
	, "turnsMergeActivationThreashold":0.1
46
	, "fixSectionsLimits":true
47
	, "sectionsMergeActivationThreashold":4.0
48
	, "fixTurnsLimits":true
49
	, "turnsCutActivationThreashold":1.0
48 50
	, "debug":debug])
49 51

  
50 52
//copy the emissions file in the source directory
TXM/trunk/org.txm.groovy.core/src/groovy/org/txm/macro/projects/antract/BuildAFMetadataMacro.groovy (revision 3411)
78 78
		"Résumé", "Séquences", "Descripteurs (Aff. Lig.)", "Générique (Aff. Lig.)");
79 79
	
80 80
	lineRules.put("Type de notice", "Notice sujet");
81
	
82
	dateColumnsSelection.add("Date de diffusion");
83
	datePattern = "dd/MM/yyyy"
84
	
81

  
85 82
	columnsToCopy.put("Identifiant de la notice", ["id"] as String[])
86 83
	
87 84
	dateColumnsSelection.add("Date de diffusion");
TXM/trunk/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/xml/pager.groovy (revision 3411)
325 325
								pagedWriter.writeCharacters("\n")
326 326
							}
327 327
							pagedWriter.writeStartElement("table");
328
							if (enableCollapsibles && parser.getAttributeCount() > 2) {
329
								pagedWriter.writeAttribute("class", "transcription-table collapsiblecontent")
330
								pagedWriter.writeAttribute("style", "display:none;")
331
							} else {
332
								pagedWriter.writeAttribute("class", "transcription-table");
333
							}
328 334
							for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
329 335
								pagedWriter.writeStartElement("tr");
330 336
								pagedWriter.writeElement("td", parser.getAttributeLocalName(i));
331 337
								pagedWriter.writeElement("td", parser.getAttributeValue(i).toString());
332 338
								pagedWriter.writeEndElement();
333 339
							}
334
							if (enableCollapsibles && parser.getAttributeCount() > 2) {
335
								pagedWriter.writeAttribute("class", "transcription-table collapsiblecontent")
336
								pagedWriter.writeAttribute("style", "display:none;")
337
							} else {
338
								pagedWriter.writeAttribute("class", "transcription-table");
339
							}
340 340
							
341 341
							pagedWriter.writeEndElement() // table
342 342
							pagedWriter.writeEmptyElement("br")
TXM/trunk/org.txm.groovy.core/src/java/org/txm/groovy/core/GroovyScriptedImportEngine.java (revision 3411)
151 151
					// Log.info(GroovyMessages.restartingToolboxSearchengines);
152 152
					// Toolbox.getEngineManager(EngineType.SEARCH).restartEngines();
153 153
					if (project.getDoUpdate()) {
154
						Log.info(NLS.bind(TXMCoreMessages.corpusUpdateDoneInP0, ExecTimer.stop()));
154
						System.out.println(NLS.bind(TXMCoreMessages.corpusUpdateDoneInP0, ExecTimer.stop()));
155 155
					}
156 156
					else {
157
						Log.info(NLS.bind(TXMCoreMessages.corpusImportDoneInP0, ExecTimer.stop()));
157
						System.out.println(NLS.bind(TXMCoreMessages.corpusImportDoneInP0, ExecTimer.stop()));
158 158
					}
159 159
					
160 160
				}
TXM/trunk/org.txm.annotation.kr.core/src/org/txm/annotation/kr/core/KRAnnotationEngine.java (revision 3411)
428 428
	public void notify(TXMResult r, String state) {
429 429
		
430 430
		if (r instanceof MainCorpus && "clean".equals(state)) {
431
			
431 432
			MainCorpus c = (MainCorpus) r;
432 433
			
433 434
			if (c.getProject() == null || c.getProject().getDoUpdate()) {
......
441 442
			if (c.getProjectDirectory() == null || !c.getProjectDirectory().exists()) {
442 443
				return;
443 444
			}
445
			
446
			if (ams.get(c) != null && ams.get(c).tempManager != null && ams.get(c).tempManager.getEntityManager() != null ) {
447
				ams.get(c).tempManager.close(); // free files
448
			}
449
			
444 450
			File buildDirectory = new File(c.getProjectDirectory(), "temporary_annotations/" + c.getID());
445 451
			if (buildDirectory.exists()) {
446 452
				DeleteDir.deleteDirectory(buildDirectory);
447 453
			}
448 454
		}
449 455
		else if (r instanceof Project && "clean".equals(state)) {
456
			
450 457
			Project p = (Project) r;
451 458
			for (MainCorpus c : p.getChildren(MainCorpus.class)) { // if any MainCorpus is remaining, clean it
452 459
				for (String krname : KRAnnotationEngine.getKnowledgeRepositoryNames(c)) {
TXM/trunk/org.txm.core/src/java/org/txm/objects/Project.java (revision 3411)
1134 1134
	
1135 1135
	@Override
1136 1136
	public void clean() {
1137
		
1137 1138
		if (rcpProject != null) {
1138 1139
			try {
1139 1140
				// Toolbox.getEngineManager(EngineType.SEARCH).getEngine("CQP").stop(); //$NON-NLS-1$
......
1143 1144
				// Toolbox.getEngineManager(EngineType.SEARCH).getEngine("CQP").start(null); //$NON-NLS-1$
1144 1145
			}
1145 1146
			catch (Exception e) {
1146
				// TODO Auto-generated catch block
1147
				e.printStackTrace();
1147
				Log.warning("Warning: error while deleting the corpus: "+e);
1148
				Log.printStackTrace(e);
1148 1149
			}
1149 1150
		}
1150 1151
		
TXM/trunk/org.txm.rcp/src/main/java/org/txm/rcp/editors/imports/sections/TextualPlansSection.java (revision 3411)
85 85
		gdata = getTextGridData();
86 86
		milestoneElementsText.setLayoutData(gdata);
87 87
		
88
		Label label = toolkit.createLabel(sectionClient, "Projections", SWT.WRAP);
88
		Label label = toolkit.createLabel(sectionClient, "CQP Structure properties projections", SWT.WRAP);
89 89
		gdata = getLabelGridData();
90 90
		gdata.colspan = 2;
91 91
		label.setLayoutData(gdata);

Formats disponibles : Unified diff