Révision 3416

TXM/trunk/org.txm.groovy.core/src/groovy/org/txm/macro/transcription/AddSectionsFromTableV2Macro.groovy (revision 3416)
1
package org.txm.macro.transcription
2

  
3
import java.nio.charset.Charset
4

  
5
import java.time.LocalTime
6
import java.time.format.DateTimeFormatter
7

  
8
import org.eclipse.core.internal.localstore.IsSynchronizedVisitor
9
import org.txm.utils.*
10
import org.txm.utils.logger.*
11

  
12
@Field @Option(name="metadataFile", usage="Tableau des metadonnées de sections", widget="FileOpen", required=true, def="")
13
		File metadataFile;
14

  
15
@Field @Option(name="trsDirectory", usage="Dossier qui contient les fichiers TRS", widget="Folder", required=true, def="")
16
		File trsDirectory;
17

  
18
@Field @Option(name="joinTRSColumn", usage="Colonne de jointure de transcription", widget="String", required=true, def="Lien notice principale")
19
		def joinTRSColumn
20

  
21
@Field @Option(name="startTimeColumn", usage="Colonne de timing de début de section", widget="String", required=true, def="antract_debut")
22
		def startTimeColumn = "antract_debut"
23

  
24
@Field @Option(name="endTimeColumn", usage="Colonne de timing de fin de section", widget="String", required=true, def="antract_fin")
25
		def endTimeColumn = "antract_fin"
26

  
27
@Field @Option(name="typeColumns", usage="Colonnes des métadonnées de type de section", widget="String", required=true, def="Titre propre")
28
		def typeColumns
29

  
30
@Field @Option(name="topicColumns", usage="Colonnes des métadonnées de topic de section", widget="String", required=true, def="Date de diffusion")
31
		def topicColumns
32

  
33
@Field @Option(name="metadataColumns", usage="Colonnes de metadonnées de section", widget="String", required=true, def="Titre propre;Date de diffusion;Identifiant de la notice;Notes du titre;Type de date;Durée;Genre;Langue VO / VE;Nature de production;Producteurs (Aff.);Thématique;Nom fichier segmenté (info);antract_video;antract_debut;antract_fin;antract_duree;antract_tc_type;antract_tc_date;Résumé;Séquences;Descripteurs (Aff. Lig.);Générique (Aff. Lig.)")
34
		def metadataColumns
35

  
36
@Field @Option(name="metadataColumnsGroups", usage="Colonnes des gruopes de metadonnées de section", widget="String", required=true, def="metadata;metadata;metadata;metadata;metadata;metadata;metadata;metadata;metadata;metadata;metadata;secondary;secondary;secondary;secondary;secondary;secondary;secondary;text;text;text;text")
37
		def metadataColumnsGroups
38

  
39
@Field @Option(name="fixSectionsLimits", usage="Correction des limites de sections du tableau de metadonnees", widget="Boolean", required=true, def="true")
40
		def fixSectionsLimits
41

  
42
@Field @Option(name="sectionsMergeActivationThreashold", usage="marge d'erreur de corrections des limites de sections", widget="Float", required=true, def="1.0")
43
		def sectionsMergeActivationThreashold
44

  
45
@Field @Option(name="fixTurnsLimits", usage="Correction des limites de sections du tableau de metadonnees", widget="Boolean", required=true, def="true")
46
		def fixTurnsLimits
47

  
48
@Field @Option(name="turnsCutActivationThreashold", usage="marge d'erreur de corrections des limites de tours", widget="Float", required=true, def="0.1")
49
		def turnsCutActivationThreashold
50

  
51
@Field @Option(name="debug", usage="show debug messages", widget="String", required=true, def="false")
52
		def debug
53

  
54
if (!ParametersDialog.open(this)) return;
55
debug = true
56

  
57
typeColumns = typeColumns.split(";")
58
topicColumns = topicColumns.split(";")
59
metadataColumns = metadataColumns.split(";")
60
metadataColumnsGroups = metadataColumnsGroups.split(";")
61

  
62
if (metadataColumns.size() != metadataColumnsGroups.size()) {
63
	println "ERROR in metadata declarations&groups:"
64
	println "COLUMNS: "+metadataColumns
65
	println "GROUPS : "+metadataColumnsGroups
66
	return
67
}
68

  
69
if (!trsDirectory.exists()) {
70
	println "$trsDirectory not found"
71
	return
72
}
73

  
74
println "Loading data from $metadataFile..."
75
TableReader reader = new TableReader(metadataFile)//, "\t".charAt(0), Charset.forName("UTF-8")
76
reader.readHeaders()
77
def header = reader.getHeaders()
78
if (!header.contains(joinTRSColumn)) {
79
	println "No TRS ID $joinTRSColumn column found"
80
	return
81
}
82
if (!header.contains(startTimeColumn)) {
83
	println "No start time $startTimeColumn column found"
84
	return
85
}
86
if (!header.contains(endTimeColumn)) {
87
	println "No end time $endTimeColumn column found"
88
	return
89
}
90
for (def col : metadataColumns) {
91
	if (!header.contains(endTimeColumn)) {
92
		println "No $col column found"
93
		return
94
	}
95
}
96
for (def col : typeColumns) {
97
	if (!header.contains(endTimeColumn)) {
98
		println "No type $col column found"
99
		return
100
	}
101
}
102
for (def col : topicColumns) {
103
	if (!header.contains(endTimeColumn)) {
104
		println "No topic $col column found"
105
		return
106
	}
107
}
108

  
109
File outputDirectory = new File(trsDirectory, "out")
110
println "Writing result to $outputDirectory..."
111

  
112
dateTimeFormatter = DateTimeFormatter.ISO_LOCAL_TIME
113
def strTotime(def str) {
114
	if (str.lastIndexOf(":") == -1) {
115
		return null
116
	}
117
	
118
	bonusFrame = Integer.parseInt(str.substring(str.lastIndexOf(":")+1))
119
	//if (str.contains("135475")) println "ERROR $str in $infos -> $bonusFrame"
120
	if (bonusFrame > 25) {
121
		bonusFrame=0;
122
	}
123
	totalFrame = str.substring(0, str.lastIndexOf(":"))
124
	
125
	LocalTime time1 = LocalTime.parse(totalFrame, dateTimeFormatter)
126
	totalFrame = (time1.getHour()*60*60) + (time1.getMinute()*60) + time1.getSecond()
127
	
128
	def ret = totalFrame + (bonusFrame/25)
129
	return ret
130
}
131

  
132
try {
133
	def sectionGroupsToInsert = [:]
134
	println "Reading data..."
135
	while (reader.readRecord()) { // loading & sorting sections
136
		String id = reader.get(joinTRSColumn).trim()
137
		if (id.endsWith(".mp4")) id = id.substring(0, id.length()-4)
138
		if (id.length() == 0) continue;
139
		
140
		if (!sectionGroupsToInsert.containsKey(id)) {
141
			sectionGroupsToInsert[id] = []
142
		}
143
		def section = sectionGroupsToInsert[id]
144
		
145
		if (reader.get(startTimeColumn) != null && reader.get(startTimeColumn).length() > 0) { // ignore non timed sections
146
			
147
			def m = [:]
148
			
149
			for (def todo : ["topic":topicColumns, "type":typeColumns]) {
150
				def data = []
151
				for (def col : todo.value) {
152
					if (reader.get(col).trim().length() > 0) {
153
						data << reader.get(col).trim().replace("\n", "")
154
					}
155
				}
156
				m[todo.key] = data.join("\t")
157
			}
158
			def metadataList = []
159
			def metadataGroupList = []
160
			for (int i = 0 ;  i < metadataColumns.size() ; i++) {
161
				def col = metadataColumns[i]
162
				String c = AsciiUtils.buildAttributeId(col)
163
				m[c] = reader.get(col)
164
				metadataList << c
165
				metadataGroupList << metadataColumnsGroups[i]
166
			}
167
			m["metadata"] = metadataList.join("|")
168
			m["metadata_groups"] = metadataGroupList.join("|")
169
			
170
			m["startTime"] = strTotime(reader.get(startTimeColumn))
171
			m["endTime"] = strTotime(reader.get(endTimeColumn))
172
			m["synchronized"] = "true"
173
			
174
			section << [m["startTime"], m["endTime"], m]
175
		}
176
	}
177
	
178
	println "Inserting sections... "+sectionGroupsToInsert.size()
179
	
180
	ConsoleProgressBar cpb = new ConsoleProgressBar(sectionGroupsToInsert.keySet().size())
181
	for (String id : sectionGroupsToInsert.keySet()) {
182
		
183
		File trsFile = new File(trsDirectory, id+".trs")
184
		if (!trsFile.exists()) {
185
			cpb.tick()
186
			continue
187
		}
188
		
189
		if (debug) println "== $id =="
190
		else cpb.tick()
191
		
192
		//println "Processing $id..."
193
		sections = sectionGroupsToInsert[id]
194
		sections = sections.sort() { a, b -> a[0] <=> b[0] ?: a[1] <=> -b[1] } // negative second test for sections inclusion
195
		
196
		if (fixSectionsLimits) {
197
			if (debug) println "Fixing sections of $id"
198
			for (int iSection = 1 ; iSection < sections.size() ; iSection++) {
199
				//println sections[iSection]
200
				if (Math.abs(sections[iSection][0] - sections[iSection - 1][1]) < sectionsMergeActivationThreashold) {
201
					if (debug) println "s=$iSection start <- end : "+sections[iSection][0]+ " <- "+sections[iSection - 1][1]
202
					sections[iSection][0] = sections[iSection - 1][1] // fix the start time with the previous section end time
203
				}
204
			}
205
		}
206
		
207
		// Open input file
208
		slurper = new groovy.util.XmlParser(false, true, true);
209
		slurper.setFeature("http://apache.org/xml/features/disallow-doctype-decl", false) // allow DTD declaration
210
		slurper.setProperty("http://javax.xml.XMLConstants/property/accessExternalDTD", "all"); // allow to read DTD from local file
211
		trs = slurper.parse(trsFile.toURI().toString())
212
		trsEpisodes = trs.Episode // 1
213
		if (trsEpisodes.size() > 1) {
214
			println "Error: multiple Episode node in $trsFile"
215
			continue
216
		}
217
		def trsEpisode = trsEpisodes[0]
218
		def trsSections =  trs.Episode.Section // 1
219
		if (trsSections.size() > 1) {
220
			println "Error: multiple Section node in $trsFile"
221
			continue
222
		}
223
		
224
		trsSection = trsSections[0]
225
		
226
		turns = trsSection.Turn
227
		newSections = []
228
		iSection = 0;
229
		currentSection = null
230
		currentNode = null
231
		
232
		foundSection=null
233
		isTurnSynchronized=false
234
		cutCheck=false
235
		
236
		// boucle sur les tours dans l'ordre
237
		for (iTurn = 0 ; iTurn < turns.size() ; iTurn++) {
238
			
239
			turn = turns[iTurn]
240
			start = Float.parseFloat(turn.@startTime)
241
			end = Float.parseFloat(turn.@endTime)
242
			//println "Turn: $iTurn ($start, $end)"
243
			
244
			// Etape 1 : y aura-t-il besoin de couper le tour, et dans quelle section est le tour (ou sa première partie)
245
			foundSection = null;
246
			for (int i = iSection ; i < sections.size() ; i++) {
247
				// if section_end < turn_start OU |turn_start - section_end| < turn_threshold
248
				if (sections[i][1] < start || Math.abs(start - sections[i][1]) < turnsCutActivationThreashold) { // Turn is after section
249
					// Cas 1 : la section est complètement avant (modulo la marge)
250
				} else {
251
					// Cas 2 : on est arrivés à la section à considérer
252
					iSection = i
253
					// if section_start > turn_end OU |section_start - turn_end| < turn_threshold
254
					if (sections[i][0] > end || Math.abs(sections[i][0] - end) < turnsCutActivationThreashold) { // Turn is before section
255
						// Cas 2.1 : la section est complètement après (modulo la marge) (et les suivantes le seront aussi)
256
						foundSection = null
257
						isTurnSynchronized = false
258
						cutCheck = false
259
					} else {
260
						// if |section_start - turn_start| > turn_threshold
261
						if (Math.abs(start - sections[i][0]) > turnsCutActivationThreashold) { // Turn begins before section does
262
							// Cas 2.2 : la section commence significativement après le début du tour (le début est non synchronisé)
263
							foundSection = sections[i] // (c'est la première section rencontrée, mais elle sera pour le tour suivant)
264
							isTurnSynchronized = false
265
							cutCheck = true
266
						} else {
267
							// Cas 2.3 : le début du tour est dans la section (on n'a pas besoin de chercher d'autres sections car si ce n'est pas la seule on coupera le tour et ce sera un autre tour).
268
							foundSection = sections[i]  // (c'est la section qui commence le tour, au moins)
269
							isTurnSynchronized = true
270
							cutCheck = true
271
						}
272
					}
273
					break; // stop searching and set iSection to accelerate next search
274
				}
275
			}
276
			
277
			// Etape 2 : positionne *le* tour dans *le* noeud
278
			// (on ne gère qu'un seul tour et un seul noeud à chaque itération de la boucle tour,
279
			// puisqu'on a retaillé le tour pour qu'il ne concerne pas plusieurs noeuds)
280
			if (foundSection != null) { // on complète ou on ajoute une div.
281
				
282
				if (foundSection != currentSection || currentSection == null) {
283
					if (currentNode != null && currentNode.@synchronized == "false") {
284
						def tmp = currentNode.Turn
285
						currentNode.@endTime = tmp[-1].@endTime
286
					}
287
					
288
					currentSection = foundSection
289
					currentNode = new Node(trsEpisode, "Section", currentSection[2])
290
				}
291
			} else { // on complète ou on ajoute un noeud (div) non synchronisé.
292
				if (currentSection != null || currentNode == null) { // create a new unsynchronized section if there is no opened synchronized section or no un-synchronized section
293
				    currentNode = new Node(trsEpisode, "Section", ["type":"Sujet non synchronisé", "startTime":sections[iSection][0], "endTime":sections[iSection][1], "synchronized":"false"] )
294
				    currentSection = null;
295
                }
296
			}
297
			
298
			// Etape 3 : on coupe le tour s'il y a besoin
299
			if (cutCheck && fixTurnsLimits) {
300
				if (isTurnSynchronized) {
301
					cutTurn(true) // iSection++  et test avec le **end** de la section quand on coupe
302
				} else {
303
					if (foundSection != null) {
304
						cutTurn(false) // test avec le **start** de la section quand on coupe
305
					}
306
				}
307
			}
308
			
309
			trsSection.remove(turn)
310
			currentNode.append(turn)
311
		}
312
		
313
		outputDirectory.mkdir()
314
		File outfile = new File(outputDirectory, trsFile.getName())
315
		outfile.withWriter("UTF-8") { writer ->
316
			writer.write('<?xml version="1.0" encoding="UTF-8"?>\n<!DOCTYPE Trans SYSTEM "trans-14.dtd">\n')
317
			def printer = new groovy.util.XmlNodePrinter(new PrintWriter(writer))
318
			printer.setPreserveWhitespace(true)
319
			printer.print(trs)
320
		}
321
	}
322
	cpb.done()
323
	reader.close()
324
	println "Done."
325
	
326
} catch(Exception e) {
327
	println "Error: "+e
328
	Log.printStackTrace(e)
329
}
330

  
331
def cutTurn(def testWithSectionEndTime) {
332
	def children = turn.children()
333
	Node newTurnKaNode = null;//new Node(trsEpisode, "Turn", currentSection[2])
334
	
335
	def startSection = currentNode.@startTime
336
	if (startSection instanceof String) startSection = Float.parseFloat(currentNode.@startTime)
337
	def endSection = currentNode.@endTime
338
	if (endSection instanceof String) endSection = Float.parseFloat(currentNode.@endTime)
339
	//println "Cut the last turn if necessary"
340
	for (int iChildren = 0 ; iChildren < children.size() ; iChildren++) {
341
		
342
		def c = children[iChildren]
343
		if (c instanceof String) continue;
344
		
345
		if (newTurnKaNode != null) { // append the remaining children to the new turn
346
			turn.remove(c)
347
			newTurnKaNode.append(c)
348
			if (debug) c.@moved="yes"
349
			iChildren--
350
		} else {
351
			if ("w".equals(c.name())) {
352
				def start2 = Float.parseFloat(c.@startTime)
353
				def end2 = Float.parseFloat(c.@endTime)
354
                    
355
                boolean test = null
356
                if (testWithSectionEndTime) {
357
                   test = start2 > endSection
358
                } else {
359
                   test = start2 >= startSection
360
                }
361
				
362
				if (test) { // && Math.abs(start2 - endSection) > turnsCutActivationThreashold // no more needed
363
					if (debug) println "cut with a w at ($start2, $end2) for section ("+startSection+", "+endSection+")"
364
					newTurnKaNode = new Node(trsSection, "Turn", ["startTime":""+start2, "endTime":""+turn.@endTime, "speaker":turn.@speaker])
365
                    new Node(newTurnKaNode, "Sync", ["time":""+start2]) // TRS
366
					turns.add(iTurn, newTurnKaNode)
367
					iTurn--
368
					if (debug) newTurnKaNode.@created = "yes"
369
					turn.@endTime = ""+start2;
370
					turn.remove(c)
371
					newTurnKaNode.append(c)
372
					
373
					if (debug) c.@moved="yes"
374
					if (testWithSectionEndTime) iSection++
375
					iChildren--
376
				}
377
			} else if ("Sync".equals(c.name())) {
378
				def start2 = c.@time
379
				if (start2 instanceof String) start2 = Float.parseFloat(c.@time)
380
				def end2 = start2
381
                    
382
                boolean test = null
383
                if (testWithSectionEndTime) {
384
                   test = start2 > endSection
385
                } else {
386
                   test = start2 >= startSection
387
                }
388
				
389
				if (test) { //  && Math.abs(start2 - endSection) > turnsCutActivationThreashold
390
					if (debug) println "cut with a Sync at ($start2, $end2) for section ("+startSection+", "+endSection+")"
391
					newTurnKaNode = new Node(trsSection, "Turn", ["startTime":""+start2, "endTime":""+turn.@endTime, "speaker":turn.@speaker])
392
					turns.add(iTurn, newTurnKaNode)
393
					iTurn--
394
					if (debug) newTurnKaNode.@created = "yes"
395
					turn.@endTime = ""+start2;
396
					turn.remove(c)
397
					if (debug) c.@moved="yes"
398
					if (testWithSectionEndTime) iSection++
399
					newTurnKaNode.append(c)
400
					iChildren--
401
				}
402
			} else {
403
				// no time to check
404
			}
405
		}
406
	}
407
}
TXM/trunk/org.txm.groovy.core/src/groovy/org/txm/macro/transcription/AddSectionsFromTableMacroOld.groovy (revision 3416)
1
package org.txm.macro.transcription
2

  
3
import java.nio.charset.Charset
4

  
5
import java.time.LocalTime
6
import java.time.format.DateTimeFormatter
7
import org.txm.utils.*
8
import org.txm.utils.logger.*
9

  
10
@Field @Option(name="metadataFile", usage="Tableau des metadonnées de sections", widget="FileOpen", required=true, def="")
11
		File metadataFile;
12

  
13
@Field @Option(name="trsDirectory", usage="Dossier qui contient les fichiers TRS", widget="Folder", required=true, def="")
14
		File trsDirectory;
15

  
16
@Field @Option(name="joinTRSColumn", usage="Colonne de jointure de transcription", widget="String", required=true, def="Lien notice principale")
17
		def joinTRSColumn
18

  
19
@Field @Option(name="startTimeColumn", usage="Colonne de timing de début de section", widget="String", required=true, def="antract_debut")
20
		def startTimeColumn = "antract_debut"
21

  
22
@Field @Option(name="endTimeColumn", usage="Colonne de timing de fin de section", widget="String", required=true, def="antract_fin")
23
		def endTimeColumn = "antract_fin"
24

  
25
@Field @Option(name="typeColumns", usage="Colonnes des métadonnées de type de section", widget="String", required=true, def="Titre propre")
26
		def typeColumns
27

  
28
@Field @Option(name="topicColumns", usage="Colonnes des métadonnées de topic de section", widget="String", required=true, def="Date de diffusion")
29
		def topicColumns
30

  
31
@Field @Option(name="metadataColumns", usage="Colonnes de metadonnées de section", widget="String", required=true, def="Titre propre;Date de diffusion;Identifiant de la notice;Notes du titre;Type de date;Durée;Genre;Langue VO / VE;Nature de production;Producteurs (Aff.);Thématique;Nom fichier segmenté (info);antract_video;antract_debut;antract_fin;antract_duree;antract_tc_type;antract_tc_date;Résumé;Séquences;Descripteurs (Aff. Lig.);Générique (Aff. Lig.)")
32
		def metadataColumns
33

  
34
@Field @Option(name="metadataColumnsGroups", usage="Colonnes des gruopes de metadonnées de section", widget="String", required=true, def="metadata;metadata;metadata;metadata;metadata;metadata;metadata;metadata;metadata;metadata;metadata;secondary;secondary;secondary;secondary;secondary;secondary;secondary;text;text;text;text")
35
		def metadataColumnsGroups
36

  
37
@Field @Option(name="fixSectionsLimits", usage="Correction des limites de sections du tableau de metadonnees", widget="Boolean", required=true, def="true")
38
		def fixSectionsLimits
39

  
40
@Field @Option(name="sectionsMergeActivationThreashold", usage="marge d'erreur de corrections des limites de sections", widget="Float", required=true, def="1.0")
41
		def sectionsMergeActivationThreashold
42

  
43
@Field @Option(name="fixTurnsLimits", usage="Correction des limites de sections du tableau de metadonnees", widget="Boolean", required=true, def="true")
44
		def fixTurnsLimits
45

  
46
@Field @Option(name="turnsCutActivationThreashold", usage="marge d'erreur de corrections des limites de tours", widget="Float", required=true, def="0.1")
47
		def turnsCutActivationThreashold
48

  
49
@Field @Option(name="debug", usage="show debug messages", widget="String", required=true, def="false")
50
		def debug
51

  
52
if (!ParametersDialog.open(this)) return;
53

  
54
typeColumns = typeColumns.split(";")
55
topicColumns = topicColumns.split(";")
56
metadataColumns = metadataColumns.split(";")
57
metadataColumnsGroups = metadataColumnsGroups.split(";")
58

  
59
if (metadataColumns.size() != metadataColumnsGroups.size()) {
60
	println "ERROR in metadata declarations&groups:"
61
	println "COLUMNS: "+metadataColumns
62
	println "GROUPS : "+metadataColumnsGroups
63
	return
64
}
65

  
66
if (!trsDirectory.exists()) {
67
	println "$trsDirectory not found"
68
	return
69
}
70

  
71
println "Loading data from $metadataFile..."
72
TableReader reader = new TableReader(metadataFile)//, "\t".charAt(0), Charset.forName("UTF-8")
73
reader.readHeaders()
74
def header = reader.getHeaders()
75
if (!header.contains(joinTRSColumn)) {
76
	println "No TRS ID $joinTRSColumn column found"
77
	return
78
}
79
if (!header.contains(startTimeColumn)) {
80
	println "No start time $startTimeColumn column found"
81
	return
82
}
83
if (!header.contains(endTimeColumn)) {
84
	println "No end time $endTimeColumn column found"
85
	return
86
}
87
for (def col : metadataColumns) {
88
	if (!header.contains(endTimeColumn)) {
89
		println "No $col column found"
90
		return
91
	}
92
}
93
for (def col : typeColumns) {
94
	if (!header.contains(endTimeColumn)) {
95
		println "No type $col column found"
96
		return
97
	}
98
}
99
for (def col : topicColumns) {
100
	if (!header.contains(endTimeColumn)) {
101
		println "No topic $col column found"
102
		return
103
	}
104
}
105

  
106
File outputDirectory = new File(trsDirectory, "out")
107
println "Writing result to $outputDirectory..."
108

  
109
dateTimeFormatter = DateTimeFormatter.ISO_LOCAL_TIME
110
def strTotime(def str) {
111
	if (str.lastIndexOf(":") == -1) {
112
		return null
113
	}
114
	
115
	bonusFrame = Integer.parseInt(str.substring(str.lastIndexOf(":")+1))
116
	//if (str.contains("135475")) println "ERROR $str in $infos -> $bonusFrame"
117
	if (bonusFrame > 25) {
118
		bonusFrame=0;
119
	}
120
	totalFrame = str.substring(0, str.lastIndexOf(":"))
121
	
122
	LocalTime time1 = LocalTime.parse(totalFrame, dateTimeFormatter)
123
	totalFrame = (time1.getHour()*60*60) + (time1.getMinute()*60) + time1.getSecond()
124
	
125
	def ret = totalFrame + (bonusFrame/25)
126
	return ret
127
}
128

  
129
try {
130
	def sectionGroupsToInsert = [:]
131
	println "Reading data..."
132
	while (reader.readRecord()) { // loading & sorting sections
133
		String id = reader.get(joinTRSColumn).trim()
134
		if (id.endsWith(".mp4")) id = id.substring(0, id.length()-4)
135
		if (id.length() == 0) continue;
136
		
137
		if (!sectionGroupsToInsert.containsKey(id)) {
138
			sectionGroupsToInsert[id] = []
139
		}
140
		def section = sectionGroupsToInsert[id]
141
		
142
		if (reader.get(startTimeColumn) != null && reader.get(startTimeColumn).length() > 0) { // ignore non timed sections
143
			
144
			def m = [:]
145
			
146
			for (def todo : ["topic":topicColumns, "type":typeColumns]) {
147
				def data = []
148
				for (def col : todo.value) {
149
					if (reader.get(col).trim().length() > 0) {
150
						data << reader.get(col).trim().replace("\n", "")
151
					}
152
				}
153
				m[todo.key] = data.join("\t")
154
			}
155
			def metadataList = []
156
			def metadataGroupList = []
157
			for (int i = 0 ;  i < metadataColumns.size() ; i++) {
158
				def col = metadataColumns[i]
159
				String c = AsciiUtils.buildAttributeId(col)
160
				m[c] = reader.get(col)
161
				metadataList << c
162
				metadataGroupList << metadataColumnsGroups[i]
163
			}
164
			m["metadata"] = metadataList.join("|")
165
			m["metadata_groups"] = metadataGroupList.join("|")
166
			
167
			m["startTime"] = strTotime(reader.get(startTimeColumn))
168
			m["endTime"] = strTotime(reader.get(endTimeColumn))
169
			m["synchronized"] = "true"
170
			
171
			section << [m["startTime"], m["endTime"], m]
172
		}
173
	}
174
	
175
	println "Inserting sections... "+sectionGroupsToInsert.size()
176
	
177
	ConsoleProgressBar cpb = new ConsoleProgressBar(sectionGroupsToInsert.keySet().size())
178
	for (String id : sectionGroupsToInsert.keySet()) {
179
		
180
		File trsFile = new File(trsDirectory, id+".trs")
181
		if (!trsFile.exists()) {
182
			cpb.tick()
183
			continue
184
		}
185
		
186
		if (debug) println "== $id =="
187
		else cpb.tick()
188
		
189
		//println "Processing $id..."
190
		def sections = sectionGroupsToInsert[id]
191
		sections = sections.sort() { a, b -> a[0] <=> b[0] ?: a[1] <=> -b[1] } // negative second test for sections inclusion
192
		
193
		if (fixSectionsLimits) {
194
			if (debug) println "Fixing sections of $id"
195
			for (int iSection = 1 ; iSection < sections.size() ; iSection++) {
196
				//println sections[iSection]
197
				if (Math.abs(sections[iSection][0] - sections[iSection - 1][1]) < sectionsMergeActivationThreashold) {
198
					if (debug) println "s=$iSection start <- end : "+sections[iSection][0]+ " <- "+sections[iSection - 1][1]
199
					sections[iSection][0] = sections[iSection - 1][1] // fix the start time with the previous section end time
200
				}
201
			}
202
		}
203
		
204
		// Open input file
205
		def slurper = new groovy.util.XmlParser(false, true, true);
206
		slurper.setFeature("http://apache.org/xml/features/disallow-doctype-decl", false) // allow DTD declaration
207
		slurper.setProperty("http://javax.xml.XMLConstants/property/accessExternalDTD", "all"); // allow to read DTD from local file
208
		def trs = slurper.parse(trsFile.toURI().toString())
209
		def trsEpisodes = trs.Episode // 1
210
		if (trsEpisodes.size() > 1) {
211
			println "Error: multiple Episode node in $trsFile"
212
			continue
213
		}
214
		def trsEpisode = trsEpisodes[0]
215
		def trsSections =  trs.Episode.Section // 1
216
		if (trsSections.size() > 1) {
217
			println "Error: multiple Section node in $trsFile"
218
			continue
219
		}
220
		def trsSection = trsSections[0]
221
		
222
		def turns = trsSection.Turn
223
		def newSections = []
224
		def iSection = 0;
225
		def currentSection = null
226
		def currentNode = null
227
		
228
		for (int iTurn = 0 ; iTurn < turns.size() ; iTurn++) {
229
			
230
			def turn = turns[iTurn]
231
			def start = Float.parseFloat(turn.@startTime)
232
			def end = Float.parseFloat(turn.@endTime)
233
			//println "Turn: $iTurn ($start, $end)"
234
			
235
			def foundSection = null;
236
			for (int i = iSection ; i < sections.size() ; i++) {
237
				if (end < sections[i][0]) { // the Turn is before the section
238
					
239
				} else if (sections[i][1] < start) { // the Turn is after the section
240
					
241
				} else {
242
					foundSection = sections[i]
243
					iSection = i
244
					break; // stop searching and set iSection to accelerate next search
245
				}
246
			}
247
			
248
			if (foundSection == null) {
249
				if (currentSection != null || currentNode == null) {
250
					currentNode = new Node(trsEpisode, "Section", ["type":"Sujet non synchronisé", "startTime":turn.@startTime, "endTime":"", "synchronized":"false"] )
251
					currentSection = null;
252
				}
253
			} else {
254
				if (foundSection != currentSection) {
255
					if (currentNode != null && currentNode.@synchronized == "false") { // set the un-synchronized section endTime using its last Turn endTime
256
						def tmp = currentNode.Turn
257
						currentNode.@endTime = tmp[-1].@endTime
258
					}
259
					
260
					currentSection = foundSection
261
					currentNode = new Node(trsEpisode, "Section", currentSection[2])
262
				}
263
			}
264
			
265
			trsSection.remove(turn)
266
			currentNode.append(turn)
267
		}
268
		
269
		//remove the initial section which is empty now
270
		trsEpisode.remove(trsSection)
271
		
272
		if (fixTurnsLimits) {
273
			if (debug) println "Fixing Turn limits..."
274
			def partOfTurnToInsertInthePreviousSection = null;
275
			def partOfTurnToInsertIntheNextSection = null;
276
			sections = trs.Episode.Section
277
			for (int i = 0 ; i < sections.size() ; i++) { // browse created sections but stop before the last one (whichc can not be fixed)
278
				
279
				def section = sections[i]
280
				if (partOfTurnToInsertIntheNextSection != null) {
281
					if (debug) println "Moving part-of turn: "+partOfTurnToInsertIntheNextSection+" in section ("+section.@startTime+", "+section.@endTime+")"
282
					section.children().add(0, partOfTurnToInsertIntheNextSection) // insert the slited part of the turn in the section
283
				}
284
				
285
				def startSection = section.@startTime // Float.parseFloat(section.@startTime)
286
				def endSection = section.@endTime //  Float.parseFloat(section.@endTime)
287
				if (startSection instanceof String) startSection = Float.parseFloat(section.@startTime)
288
				if (endSection instanceof String) endSection = Float.parseFloat(section.@endTime)
289
				
290
				partOfTurnToInsertInthePreviousSection = null
291
				partOfTurnToInsertIntheNextSection = null
292
				
293
				turns = section.Turn
294
				if (turns.size() == 0) continue;
295
				
296
				// Fix the first Turn
297
				def turn = turns[0]
298
				start = turn.@startTime // Float.parseFloat(section.@startTime)
299
				end = turn.@endTime //  Float.parseFloat(section.@endTime)
300
				if (start instanceof String) start = Float.parseFloat(turn.@startTime)
301
				if (end instanceof String) end = Float.parseFloat(turn.@endTime)
302
				
303
				if (start < startSection) { // the start of the Turn is outside of its current section
304
					
305
				}
306
				
307
				// Fix the last Turn
308
				turn = turns[-1]
309
				start = turn.@startTime // Float.parseFloat(section.@startTime)
310
				end = turn.@endTime //  Float.parseFloat(section.@endTime)
311
				if (start instanceof String) start = Float.parseFloat(turn.@startTime)
312
				if (end instanceof String) end = Float.parseFloat(turn.@endTime)
313
				if (end > endSection) { // the end of the Turn is outside of its current section
314
					
315
					def children = turn.children()
316
					Node newTurnKaNode = null;//new Node(trsEpisode, "Turn", currentSection[2])
317
					//println "Cut the last turn if necessary"
318
					for (int iChildren = 0 ; iChildren < children.size() ; iChildren++) {
319
						
320
						def c = children[iChildren]
321
						if (c instanceof String) continue;
322
						
323
						if (newTurnKaNode != null) {
324
							turn.remove(c)
325
							newTurnKaNode.append(c)
326
							if (debug) c.@moved="yes"
327
							iChildren--
328
						} else {
329
							if ("w".equals(c.name())) {
330
								def start2 = Float.parseFloat(c.@startTime)
331
								def end2 = Float.parseFloat(c.@endTime)
332
								
333
								if (start2 > endSection && Math.abs(start2 - endSection) > turnsCutActivationThreashold) {
334
									if (debug) println "cut with a w at [$start2, $end2] for section ("+startSection+", "+endSection+")"
335
									newTurnKaNode = new Node(trsSection, "Turn", ["startTime":""+start2, "endTime":""+turn.@endTime, "speaker":turn.@speaker])
336
									if (debug) newTurnKaNode.@created = "yes"
337
									turn.@endTime = ""+start2;
338
									turn.remove(c)
339
									newTurnKaNode.append(c)
340
									if (debug) c.@moved="yes"
341
									iChildren--
342
								}
343
							} else if ("Sync".equals(c.name())) {
344
								def start2 = Float.parseFloat(c.@time)
345
								def end2 = Float.parseFloat(c.@time)
346
								
347
								if (start2 > endSection && Math.abs(start2 - endSection) > turnsCutActivationThreashold) {
348
									if (debug) println "cut with a Sync at [$start2, $end2] for section "+endSection
349
									newTurnKaNode = new Node(trsSection, "Turn", ["startTime":""+start2, "endTime":""+turn.@endTime, "speaker":turn.@speaker])
350
									if (debug) newTurnKaNode.@created = "yes"
351
									turn.@endTime = ""+start2;
352
									turn.remove(c)
353
									if (debug) c.@moved="yes"
354
									newTurnKaNode.append(c)
355
									iChildren--
356
								}
357
							} else {
358
								// no time to check
359
							}
360
						}
361
					}
362
					
363
				}
364
				partOfTurnToInsertIntheNextSection = newTurnKaNode
365
			}
366
		}
367
		
368
		outputDirectory.mkdir()
369
		File outfile = new File(outputDirectory, trsFile.getName())
370
		outfile.withWriter("UTF-8") { writer ->
371
			writer.write('<?xml version="1.0" encoding="UTF-8"?>\n<!DOCTYPE Trans SYSTEM "trans-14.dtd">\n')
372
			def printer = new groovy.util.XmlNodePrinter(new PrintWriter(writer))
373
			printer.setPreserveWhitespace(true)
374
			printer.print(trs)
375
		}
376
	}
377
	cpb.done()
378
	reader.close()
379
	println "Done."
380
	
381
} catch(Exception e) {
382
	println "Error: "+e
383
	Log.printStackTrace(e)
384
}
TXM/trunk/org.txm.groovy.core/src/groovy/org/txm/macro/transcription/AddSectionsFromTableMacro.groovy (revision 3416)
4 4

  
5 5
import java.time.LocalTime
6 6
import java.time.format.DateTimeFormatter
7

  
8
import org.eclipse.core.internal.localstore.IsSynchronizedVisitor
7 9
import org.txm.utils.*
8 10
import org.txm.utils.logger.*
9 11

  
......
39 41

  
40 42
@Field @Option(name="sectionsMergeActivationThreashold", usage="marge d'erreur de corrections des limites de sections", widget="Float", required=true, def="1.0")
41 43
		def sectionsMergeActivationThreashold
42
		
44

  
43 45
@Field @Option(name="fixTurnsLimits", usage="Correction des limites de sections du tableau de metadonnees", widget="Boolean", required=true, def="true")
44 46
		def fixTurnsLimits
45 47

  
46 48
@Field @Option(name="turnsCutActivationThreashold", usage="marge d'erreur de corrections des limites de tours", widget="Float", required=true, def="0.1")
47 49
		def turnsCutActivationThreashold
48 50

  
49
@Field @Option(name="debug", usage="show debug messages", widget="String", required=true, def="false")
51
@Field @Option(name="debug", usage="show debug messages", widget="Boolean", required=true, def="false")
50 52
		def debug
51 53

  
52 54
if (!ParametersDialog.open(this)) return;
53
debug = true
54 55

  
55 56
typeColumns = typeColumns.split(";")
56 57
topicColumns = topicColumns.split(";")
......
188 189
		else cpb.tick()
189 190
		
190 191
		//println "Processing $id..."
191
		def sections = sectionGroupsToInsert[id]
192
		sections = sectionGroupsToInsert[id]
192 193
		sections = sections.sort() { a, b -> a[0] <=> b[0] ?: a[1] <=> -b[1] } // negative second test for sections inclusion
193 194
		
194 195
		if (fixSectionsLimits) {
......
203 204
		}
204 205
		
205 206
		// Open input file
206
		def slurper = new groovy.util.XmlParser(false, true, true);
207
		slurper = new groovy.util.XmlParser(false, true, true);
207 208
		slurper.setFeature("http://apache.org/xml/features/disallow-doctype-decl", false) // allow DTD declaration
208 209
		slurper.setProperty("http://javax.xml.XMLConstants/property/accessExternalDTD", "all"); // allow to read DTD from local file
209
		def trs = slurper.parse(trsFile.toURI().toString())
210
		def trsEpisodes = trs.Episode // 1
210
		trs = slurper.parse(trsFile.toURI().toString())
211
		trsEpisodes = trs.Episode // 1
211 212
		if (trsEpisodes.size() > 1) {
212 213
			println "Error: multiple Episode node in $trsFile"
213 214
			continue
......
218 219
			println "Error: multiple Section node in $trsFile"
219 220
			continue
220 221
		}
221
		def trsSection = trsSections[0]
222 222
		
223
		def turns = trsSection.Turn
224
		def newSections = []
225
		def iSection = 0;
226
		def currentSection = null
227
		def currentNode = null
223
		trsSection = trsSections[0]
228 224
		
229
		for (int iTurn = 0 ; iTurn < turns.size() ; iTurn++) {
230
			
231
			def turn = turns[iTurn]
232
			def start = Float.parseFloat(turn.@startTime)
233
			def end = Float.parseFloat(turn.@endTime)
225
		turns = trsSection.Turn
226
		newSections = []
227
		iSection = 0;
228
		currentSection = null
229
		currentNode = null
230
		
231
		foundSection=null
232
		isTurnSynchronized=false
233
		cutCheck=false
234
		
235
		// boucle sur les tours dans l'ordre
236
		for (iTurn = 0 ; iTurn < turns.size() ; iTurn++) {
237
			if (debug) println "iTurn=$iTurn turn="+turns[iTurn].attributes()
238
			turn = turns[iTurn]
239
			start = Float.parseFloat(turn.@startTime)
240
			end = Float.parseFloat(turn.@endTime)
234 241
			//println "Turn: $iTurn ($start, $end)"
235 242
			
236
			def foundSection = null;
243
			// Etape 1 : y aura-t-il besoin de couper le tour, et dans quelle section est le tour (ou sa première partie)
244
			foundSection = null;
237 245
			for (int i = iSection ; i < sections.size() ; i++) {
238
				if (end < sections[i][0]) { // Turn is before section
239
					
240
				} else if (sections[i][1] < start) { // Section is before section
241
					
246
				// if section_end < turn_start OU |turn_start - section_end| < turn_threshold
247
				if (sections[i][1] < start || Math.abs(start - sections[i][1]) < turnsCutActivationThreashold) { // Turn is after section
248
					// Cas 1 : la section est complètement avant (modulo la marge)
242 249
				} else {
243
					foundSection = sections[i]
250
					// Cas 2 : on est arrivés à la section à considérer
244 251
					iSection = i
252
					// if section_start > turn_end OU |section_start - turn_end| < turn_threshold
253
					if (sections[i][0] > end || Math.abs(sections[i][0] - end) < turnsCutActivationThreashold) { // Turn is before section
254
						// Cas 2.1 : la section est complètement après (modulo la marge) (et les suivantes le seront aussi)
255
						foundSection = null
256
						isTurnSynchronized = false
257
						cutCheck = false
258
					} else {
259
						// if |section_start - turn_start| > turn_threshold
260
						if (start - sections[i][0] < 0) { // Turn begins before section does
261
							// Cas 2.2 : la section commence significativement après le début du tour (le début est non synchronisé)
262
							foundSection = sections[i] // (c'est la première section rencontrée, mais elle sera pour le tour suivant)
263
							isTurnSynchronized = false
264
							cutCheck = true
265
						} else {
266
							// Cas 2.3 : le début du tour est dans la section (on n'a pas besoin de chercher d'autres sections car si ce n'est pas la seule on coupera le tour et ce sera un autre tour).
267
							foundSection = sections[i]  // (c'est la section qui commence le tour, au moins)
268
							isTurnSynchronized = true
269
							cutCheck = true
270
						}
271
					}
245 272
					break; // stop searching and set iSection to accelerate next search
246 273
				}
247 274
			}
248 275
			
249
			if (foundSection == null) {
250
				if (currentSection != null || currentNode == null) {
251
					currentNode = new Node(trsEpisode, "Section", ["type":"Sujet non synchronisé", "startTime":turn.@startTime, "endTime":"", "synchronized":"false"] )
252
					currentSection = null;
253
				}
254
			} else {
255
				if (foundSection != currentSection) {
276
			// Etape 2 : positionne *le* tour dans *le* noeud
277
			// (on ne gère qu'un seul tour et un seul noeud à chaque itération de la boucle tour,
278
			// puisqu'on a retaillé le tour pour qu'il ne concerne pas plusieurs noeuds)
279
			if (foundSection != null) { // on complète ou on ajoute une div.
280
				
281
				if (foundSection != currentSection || currentSection == null) {
256 282
					if (currentNode != null && currentNode.@synchronized == "false") {
257 283
						def tmp = currentNode.Turn
258 284
						currentNode.@endTime = tmp[-1].@endTime
......
260 286
					
261 287
					currentSection = foundSection
262 288
					currentNode = new Node(trsEpisode, "Section", currentSection[2])
289
					if (debug) println " create synchronized turn at start="+foundSection[0]
263 290
				}
291
			} else { // on complète ou on ajoute un noeud (div) non synchronisé.
292
				if (currentSection != null || currentNode == null) { // create a new unsynchronized section if there is no opened synchronized section or no un-synchronized section
293
				    currentNode = new Node(trsEpisode, "Section", ["type":"Sujet non synchronisé", "startTime":turn.@startTime, "endTime":"", "synchronized":"false"] )
294
				    currentSection = null;
295
					if (debug) println " create un-synchronized turn at "+turn.@startTime
296
                }
264 297
			}
265 298
			
299
			// Etape 3 : on coupe le tour s'il y a besoin
300
			if (cutCheck && fixTurnsLimits) {
301
				if (debug) println " fixing turn in turns ("+turns.size()+") section ("+trsSection.children().size()+")"
302
				if (isTurnSynchronized) {
303
					cutTurn(true) // iSection++  et test avec le **end** de la section quand on coupe
304
				} else {
305
					if (foundSection != null) {
306
						cutTurn(false) // test avec le **start** de la iSection quand on coupe
307
					}
308
				}
309
			}
310
			
311
			if (debug) println " remove turn in turns ("+turns.size()+") section ("+trsSection.children().size()+")"
312
			turns.remove(turn)
266 313
			trsSection.remove(turn)
267 314
			currentNode.append(turn)
315
			if (debug) println " removed turn in turns ("+turns.size()+") section ("+trsSection.children().size()+")"
268 316
		}
269 317
		
270
		//remove the initial section which is empty now or not
271 318
		trsEpisode.remove(trsSection)
272 319
		
273
		if (fixTurnsLimits) {
274
			if (debug) println "Fixing Turn limits..."
275
			def turnToInsert = null;
276
			sections = trs.Episode.Section
277
			for (int i = 0 ; i < sections.size() -1 ; i++) { // browse created sections but stop before the last one (whichc can not be fixed)
278
				
279
				def section = sections[i]
280
				if (turnToInsert != null) {
281
					if (debug) println "Moving part-of turn: "+turnToInsert+" in section ("+section.@startTime+", "+section.@endTime+")"
282
					section.children().add(0, turnToInsert) // insert the slited part of the turn in the section
283
				}
284
				
285
				def startSection = section.@startTime // Float.parseFloat(section.@startTime)
286
				def endSection = section.@endTime //  Float.parseFloat(section.@endTime)
287
				if (startSection instanceof String) startSection = Float.parseFloat(section.@startTime)
288
				if (endSection instanceof String) endSection = Float.parseFloat(section.@endTime)
289
					
290
				turnToInsert = null
291
				
292
				turns = section.Turn
293
				if (turns.size() == 0) continue;
294
				
295
				def turn = turns[-1]
296
				
297
				def children = turn.children()
298
				Node newTurnKaNode = null;//new Node(trsEpisode, "Turn", currentSection[2])
299
				//println "Cut the last turn if necessary"
300
				for (int iChildren = 0 ; iChildren < children.size() ; iChildren++) {
301
					
302
					def c = children[iChildren]
303
					if (c instanceof String) continue;
304
					
305
					if (newTurnKaNode != null) {
306
						turn.remove(c)
307
						newTurnKaNode.append(c)
308
						if (debug) c.@moved="yes"
309
						iChildren--
310
					} else {
311
						if ("w".equals(c.name())) {
312
							def start2 = Float.parseFloat(c.@startTime)
313
							def end2 = Float.parseFloat(c.@endTime)
314
							
315
							if (start2 > endSection && Math.abs(start2 - endSection) > turnsCutActivationThreashold) {
316
								if (debug) println "cut with a w at [$start2, $end2] for section ("+startSection+", "+endSection+")"
317
								newTurnKaNode = new Node(trsSection, "Turn", ["startTime":""+start2, "endTime":""+turn.@endTime, "speaker":turn.@speaker])
318
								if (debug) newTurnKaNode.@created = "yes"
319
								turn.@endTime = ""+start2;
320
								turn.remove(c)
321
								newTurnKaNode.append(c)
322
								if (debug) c.@moved="yes"
323
								iChildren--
324
							}
325
						} else if ("Sync".equals(c.name())) {
326
							def start2 = Float.parseFloat(c.@time)
327
							def end2 = Float.parseFloat(c.@time)
328
							
329
							if (start2 > endSection && Math.abs(start2 - endSection) > turnsCutActivationThreashold) {
330
								if (debug) println "cut with a Sync at [$start2, $end2] for section "+endSection
331
								newTurnKaNode = new Node(trsSection, "Turn", ["startTime":""+start2, "endTime":""+turn.@endTime, "speaker":turn.@speaker])
332
								if (debug) newTurnKaNode.@created = "yes"
333
								turn.@endTime = ""+start2;
334
								turn.remove(c)
335
								if (debug) c.@moved="yes"
336
								newTurnKaNode.append(c)
337
								iChildren--
338
							}
339
						} else {
340
							// no time to check
341
						}
342
					}
343
				}
344
				
345
				turnToInsert = newTurnKaNode
346
				
347
			}
348
		}
349
		
350 320
		outputDirectory.mkdir()
351 321
		File outfile = new File(outputDirectory, trsFile.getName())
352 322
		outfile.withWriter("UTF-8") { writer ->
......
364 334
	println "Error: "+e
365 335
	Log.printStackTrace(e)
366 336
}
337

  
338
def cutTurn(def testWithSectionEndTime) {
339
	
340
	def children = turn.children()
341
	Node newTurnKaNode = null;//new Node(trsEpisode, "Turn", currentSection[2])
342
	
343
	def startSection = sections[iSection][0] // currentNode.@startTime
344
	if (startSection instanceof String) startSection = Float.parseFloat(sections[iSection][0]) //currentNode.@startTime)
345
	def endSection = sections[iSection][1] // currentNode.@endTime
346
	if (endSection instanceof String) endSection = Float.parseFloat(sections[iSection][1]) //currentNode.@endTime)
347
	//println "Cut the last turn if necessary"
348
		
349
	if (debug) println " cut turn and test with end ? $testWithSectionEndTime of iSection=$iSection at iTurn=$iTurn start=${turn.@startTime} end=${turn.@endTime} children="+turn.children().size()
350
	for (int iChildren = 0 ; iChildren < children.size() ; iChildren++) {
351
		
352
		def c = children[iChildren]
353
		if (c instanceof String) continue; // a Turn contains Sync or w tags
354
		
355
		if (newTurnKaNode != null) { // append the remaining children to the new turn
356
			turn.remove(c)
357
			newTurnKaNode.append(c)
358
			if (debug) c.@moved="yes"
359
			iChildren--
360
		} else {
361
			if ("w".equals(c.name())) {
362
				def start2 = Float.parseFloat(c.@startTime)
363
				def end2 = Float.parseFloat(c.@endTime)
364
                    
365
                boolean test = null
366
                if (testWithSectionEndTime) {
367
                   test = start2 > endSection
368
                } else {
369
                   test = start2 >= startSection
370
                }
371
				
372
				if (test) { // && Math.abs(start2 - endSection) > turnsCutActivationThreashold // no more needed
373
					if (debug) println " cut with a w at ($start2, $end2) for section ("+startSection+", "+endSection+")"
374
					newTurnKaNode = new Node(trsSection, "Turn", ["startTime":""+start2, "endTime":""+turn.@endTime, "speaker":turn.@speaker])
375
                    new Node(newTurnKaNode, "Sync", ["time":""+start2]) // TRS
376
					
377
					turns.add(iTurn, newTurnKaNode) // set as next turn to process
378
					iTurn--
379
					if (debug) newTurnKaNode.@created = "yes"
380
					turn.@endTime = ""+start2;
381
					turn.remove(c)
382
					newTurnKaNode.append(c)
383
					
384
					if (debug) c.@moved="yes"
385
					iChildren--
386
				}
387
			} else if ("Sync".equals(c.name())) {
388
				def start2 = c.@time
389
				if (start2 instanceof String) start2 = Float.parseFloat(c.@time)
390
				def end2 = start2
391
                    
392
                boolean test = null
393
                if (testWithSectionEndTime) {
394
                   test = start2 > endSection
395
                } else {
396
                   test = start2 >= startSection
397
                }
398
				
399
				if (test) { //  && Math.abs(start2 - endSection) > turnsCutActivationThreashold
400
					if (debug) println " cut with a Sync at ($start2, $end2) for section ("+startSection+", "+endSection+")"
401
					newTurnKaNode = new Node(trsSection, "Turn", ["startTime":""+start2, "endTime":""+turn.@endTime, "speaker":turn.@speaker])
402
					turns.add(iTurn, newTurnKaNode)
403
					iTurn--
404
					if (debug) newTurnKaNode.@created = "yes"
405
					turn.@endTime = ""+start2;
406
					turn.remove(c)
407
					if (debug) c.@moved="yes"
408
					newTurnKaNode.append(c)
409
					iChildren--
410
				}
411
			} else {
412
				// no time to check
413
			}
414
		}
415
	}
416
}
TXM/trunk/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/transcriber/pager.groovy (revision 3416)
687 687
							if (SIMPLE_TOOLTIP) {
688 688
								if (type.contains("lemma") || type.contains("pos")) {
689 689
									flaginterp=true;
690
									interpvalue+=", ";
690
									interpvalue+="\n- ";
691 691
								}
692 692
							} else {
693 693
								flaginterp=true;
694
								interpvalue+=", "+type+"="
694
								interpvalue+="\n- "+type+"="
695 695
							}
696 696
							break;
697 697
						
TXM/trunk/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/transcriber/importer.groovy (revision 3416)
188 188
		
189 189
		if (!doTokenizeStep) {
190 190
			
191
			println "No tokenization do to."
191
			println "No tokenization to do."
192 192
			cpb = new ConsoleProgressBar(files.length)
193 193
			for (File f : files) {
194 194
				File outfile = new File(tokenizedDir, f.getName());
TXM/trunk/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/xtz/XTZDefaultPagerStep.groovy (revision 3416)
106 106
	 * @param max the max
107 107
	 * @param basename the basename
108 108
	 */
109
	public XTZDefaultPagerStep(XTZPager pager, File infile, String txtname, List<String> NoSpaceBefore,
110
	List<String> NoSpaceAfter, def cssList) {
109
	public XTZDefaultPagerStep(XTZPager pager, File infile, String txtname, List<String> NoSpaceBefore, List<String> NoSpaceAfter, def cssList) {
111 110
		this.pager = pager;
112 111
		this.paginationElement = pager.page_element;
113 112
		this.paginate = pager.paginate
114 113
		this.cssList = cssList;
115
		this.basename = pager.corpusname;
114
		this.c = pager.corpusname;
116 115
		this.txtname = txtname;
117 116
		this.outdir = pager.outputDirectory;
118 117
		this.wordmax = pager.wordsPerPage;
......
410 409
								pagedWriter.writeAttribute("class", "sync")
411 410
								if (parser.getAttributeValue(null,"time") != null) {
412 411
									pagedWriter.writeCharacters(parser.getAttributeValue(null,"time"))
412
									
413
									writeMediaAccess(parser.getAttributeValue(null,"time"), corpus, txtname)
413 414
								}
414 415
								break;
415 416
							case "p":
......
635 636
									endOfLastWord = lastword.subSequence(l-1, l)
636 637
								}
637 638
							
638
								String interpvalue = anaValues.entrySet().join(", ")
639
								String interpvalue = "- "+anaValues.entrySet().join("\n- ")+"\n- "+wordid
639 640
							
640 641
								if (NoSpaceBefore.contains(wordvalue) ||
641 642
										NoSpaceAfter.contains(lastword) ||
......
704 705
		}
705 706
		return true;
706 707
	}
708
	
709
	private void writeMediaAccess(def time) {
710
		
711
		pagedWriter.writeCharacters(" ");
712
		pagedWriter.writeStartElement("a");
713
		pagedWriter.writeAttribute("onclick", "txmcommand('id', 'org.txm.backtomedia.commands.function.BackToMedia', 'corpus', '"+basename+"', 'text', '"+txtname+"', 'time', '"+time+"')");
714
		pagedWriter.writeAttribute("style", "cursor: pointer;")
715
		pagedWriter.writeAttribute("class", "play-media")
716
		pagedWriter.writeCharacters("▶");
717
		pagedWriter.writeEndElement(); // a
718
	}
707 719
}
TXM/trunk/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/xtz/XTZImporter.groovy (revision 3416)
378 378
		
379 379
		//if (wordTag != "w") {
380 380
		if (!doTokenizeStep) {
381
			println "No tokenization do to."
381
			println "No tokenization to do."
382 382
			// ConsoleProgressBar cpb = new ConsoleProgressBar(filesToProcess.size())
383 383
			for (File f : filesToProcess) {
384 384
				File outfile = new File(module.getBinaryDirectory(),"tokenized/"+f.getName());
TXM/trunk/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/xml/pager.groovy (revision 3416)
448 448
							if (l > 0)
449 449
								endOfLastWord = lastword.subSequence(l-1, l);
450 450

  
451
							String interpvalue = anaValues.entrySet().join(", ")
451
							anaValues.put("id", wordid)
452
							String interpvalue = "- "+anaValues.entrySet().join("\n- ")
452 453
							
453 454
							if (NoSpaceBefore.contains(wordvalue) ||
454 455
							NoSpaceAfter.contains(lastword) ||

Formats disponibles : Unified diff