Révision 2354

tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/transcriber/pager.groovy (revision 2354)
33 33
import javax.xml.stream.*
34 34

  
35 35
import org.txm.importer.ApplyXsl2
36
import org.txm.metadatas.MetadataGroup
36 37
import org.txm.metadatas.Metadatas
37 38
import org.txm.utils.io.FileCopy;
38 39

  
......
229 230
		writer.writeAttribute("http-equiv", "Content-Type");
230 231
		writer.writeAttribute("content", "text/html");
231 232
		writer.writeAttribute("charset", "UTF-8");
232
		writer.writeEndElement();
233
		writer.writeEndElement(); // meta
233 234
		writer.writeStartElement("head");
234
		
235 235
		//<link rel="stylesheet" type="text/css" href="class.css" />
236 236
		writer.writeStartElement("link");
237 237
		writer.writeAttribute("rel", "stylesheet");
238 238
		writer.writeAttribute("type", "text/css");
239 239
		writer.writeAttribute("href", "transcriber.css");
240
		writer.writeEndElement();
241
		writer.writeEndElement();
240
		writer.writeEndElement(); // link
241
		writer.writeEndElement(); // head
242 242
		
243 243
		nbBreak++
244 244
		writer.writeStartElement("body");
......
254 254
					switch (localname) {
255 255
						case "text":
256 256
						
257
							writer.writeStartElement("h2");
257
							writer.writeStartElement("h1");
258 258
							writer.writeAttribute("class","title");
259 259
							String title = parser.getAttributeValue(null, "title");
260 260
						
......
264 264
								writer.writeCharacters("Transcription "+txmfile.getName().substring(0, txmfile.getName().length() - 4));
265 265
							}
266 266
						
267
							writer.writeEmptyElement("br");
268
							writer.writeStartElement("a");
269
							writer.writeAttribute("onclick", "txmcommand('id', 'org.txm.backtomedia.commands.function.BackToMedia', 'corpus', '"+corpusname+"', 'text', '"+txtname+"', 'time', '0.0')");
270
							writer.writeAttribute("style", "cursor: pointer;")
271
							writer.writeAttribute("class", "play-media")
272
							writer.writeCharacters(" ♪♪");
273
							writer.writeEndElement(); // a
267
							writeMediaAccess("0.0")
274 268
						
275
							writer.writeEndElement(); // h2
269
							writer.writeEndElement(); // h1
276 270
						
277 271
							String subtitle = parser.getAttributeValue(null, "subtitle");
278 272
							if (subtitle != null && subtitle.length() > 0) {
279
								writer.writeStartElement("h3");
273
								writer.writeStartElement("h2");
280 274
								writer.writeAttribute("class", "subtitle");
281 275
								writer.writeCharacters(subtitle);
282
								writer.writeEndElement(); // h3
276
								writer.writeEndElement(); // h2
283 277
							}
284 278
						
285
//							println "metadatas != null: "+(metadatas != null)
286
//							if (metadatas != null) {
287
								writer.writeStartElement("table");
288
								boolean grey = false;
289
								for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
290
									String name = parser.getAttributeName(i);
291
									String value = parser.getAttributeValue(i);
292
									
293
									if ("title" == name) {
294
										continue; // ignore "title" metadata
295
									}
296
									
297
									grey = !grey;
298
									writer.writeStartElement("tr");
299
									if (grey) {
300
										writer.writeAttribute("style","background-color:lightgrey;")
301
									}
302
									
303
									if (value != null) {
304
										writer.writeStartElement("td");
305
										writer.writeCharacters(name);
306
										writer.writeEndElement();
307
										writer.writeStartElement("td");
308
										writer.writeCharacters(value);
309
										writer.writeEndElement();
310
									}
311
									//get enqueteur to style their names
312
									if (name.startsWith("enq")) {
313
										interviewers.add(value)
314
									}
315
									writer.writeEndElement();
279
							writer.writeStartElement("table");
280
							writer.writeAttribute("class", "transcription-table");
281
							boolean grey = false;
282
							for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
283
								String name = parser.getAttributeName(i);
284
								String value = parser.getAttributeValue(i);
285
								
286
								if ("title" == name) {
287
									continue; // ignore "title" metadata
316 288
								}
317
								writer.writeEndElement();
318
//							}
289
								
290
								grey = !grey;
291
								writer.writeStartElement("tr");
292
								if (grey) {
293
									writer.writeAttribute("style","background-color:lightgrey;")
294
								}
295
								
296
								if (value != null) {
297
									writer.writeStartElement("td");
298
									writer.writeCharacters(name);
299
									writer.writeEndElement(); // td
300
									writer.writeStartElement("td");
301
									writer.writeCharacters(value);
302
									writer.writeEndElement(); // td
303
								}
304
								//get enqueteur to style their names
305
								if (name.startsWith("enq")) {
306
									interviewers.add(value)
307
								}
308
								writer.writeEndElement(); // tr
309
							}
310
							writer.writeEndElement(); // table
311
						//							}
319 312
							break;
320 313
						case "Topics":
321 314
						/*writer.writeStartElement("h2");
......
393 386
						
394 387
							String type = parser.getAttributeValue(null, "type")
395 388
							writer.writeAttribute("type", ""+type)
389
						
396 390
							String desc = parser.getAttributeValue(null, "topic")
397 391
						
398
							if (type != null || desc != null) {
399
								writer.writeStartElement("h2");
392
							if (type != null && type.length() > 0) {
393
								writer.writeStartElement("h1");
400 394
								writer.writeAttribute("class", "section-title")
401
								if (type != null || type.length() == 0) {
402
									writer.writeCharacters(type+": "+desc);
403
								} else {
404
									writer.writeCharacters(desc)
405
								}
395
								writer.writeCharacters(type);
406 396
								
407 397
								if (parser.getAttributeValue(null,"startTime") != null) {
408
									writer.writeEmptyElement("br");
409
									writer.writeStartElement("a")
410
									writer.writeAttribute("onclick", "txmcommand('id', 'org.txm.backtomedia.commands.function.BackToMedia', 'corpus', '"+corpusname+"', 'text', '"+txtname+"', 'time', '"+parser.getAttributeValue(null,"startTime")+"')");
411
									writer.writeAttribute("style", "cursor: pointer;")
412
									writer.writeAttribute("class", "play-media")
413
									writer.writeCharacters(" ♪♪")
414
									writer.writeEndElement() // a
398
									writeMediaAccess(parser.getAttributeValue(null,"startTime"))
415 399
								}
416 400
								
401
								writer.writeEndElement(); // h1
402
							}
403
						
404
							if (desc != null && desc.length() > 0) {
405
								writer.writeStartElement("h2");
406
								writer.writeAttribute("class", "section-desc")
407
								writer.writeCharacters(desc)
417 408
								writer.writeEndElement(); // h2
418 409
							}
419 410
						
420
							String metadata = parser.getAttributeValue(null, "metadata")
421
							if (metadata != null && metadata.length() > 0) { // the metadata to show
422
								writer.writeStartElement("ul")
423
								//println "metadata=$metadata"
424
								for (def m : metadata.split("<li>")) {
425
									writer.writeStartElement("li")
426
									writer.writeCharacters(m)
427
									writer.writeEndElement() // li
411
							def metadata = new LinkedHashMap<String, String>() // temp to store attributes
412
							def metadataGroups = ["metadata":[]] // default metadata group
413
							def metadataDeclared = false
414
							if (parser.getAttributeValue(null, "metadata") != null && parser.getAttributeValue(null, "metadata_groups") != null) {
415
								def l1 = parser.getAttributeValue(null, "metadata").split("\\|");
416
								def l2 = parser.getAttributeValue(null, "metadata_groups").split("\\|");
417
								for (int i = 0 ; i < l1.size() ; i++) {
418
									def m = l1[i]
419
									def g = l2[i]
420
									metadata[m] = "" // forcing order of metadata by pre-declaring
421
									if (!metadataGroups.containsKey(g)) metadataGroups[g] = []
422
									metadataGroups[g] << m // declaring a metadata type
428 423
								}
429
								writer.writeEndElement() // ul
430
							} else if (parser.getAttributeCount() > 1) { // process all attributes
431
								writer.writeStartElement("ul")
432
								for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
433
									String name = parser.getAttributeLocalName(i)
434
									if (!"type".equals(name)
435
									&& !"topic".equals(name)
436
									&& !"startTime".equals(name)
437
									&& !"endTime".equals(name)) {
438
										writer.writeStartElement("li")
439
										writer.writeCharacters(""+name+": "+parser.getAttributeValue(i))
440
										writer.writeEndElement() // li
424
								metadataDeclared = true
425
							}
426
						
427
							for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
428
								String name = parser.getAttributeLocalName(i)
429
								if (!"type".equals(name)
430
								&& !"topic".equals(name)
431
								&& !"startTime".equals(name)
432
								&& !"endTime".equals(name)) {
433
									if (metadataDeclared && !metadata.containsKey(name)) {
434
										continue; // ignoring metadata since not in declared metadata
435
									} else {
436
										metadataGroups["metadata"] << name
441 437
									}
438
									
439
									metadata[name] = parser.getAttributeValue(i)
442 440
								}
443
								writer.writeEndElement(); // ul
444 441
							}
445 442
						
443
							writer.writeStartElement("p")
444
							writer.writeAttribute("class", "section-all-metadata");
445
							for (String groupName : metadataGroups.keySet()) {
446
								def group = metadataGroups[groupName]
447
								if (group.size() > 0) {
448
									if (groupName.equals("text")) {
449
										writer.writeStartElement("p")
450
										writer.writeAttribute("class", "section-"+groupName);
451
										for (String k : group) {
452
											writer.writeStartElement("p")
453
											writer.writeAttribute("class", ""+groupName)
454
											writer.writeStartElement("h4")
455
											writer.writeCharacters(k)
456
											writer.writeEndElement() // li
457
											writer.writeCharacters(metadata[k])
458
											writer.writeEndElement() // li
459
										}
460
									} else {
461
										writer.writeStartElement("ul")
462
										writer.writeAttribute("class", "section-"+groupName);
463
										for (String k : group) {
464
											writer.writeStartElement("li")
465
											writer.writeAttribute("class", ""+groupName)
466
											writer.writeCharacters(""+k+": "+metadata[k])
467
											writer.writeEndElement() // li
468
										}
469
									}
470
									
471
									writer.writeEndElement(); // ul or p
472
								}
473
							}
474
							writer.writeEndElement(); // p
475
						
446 476
							break;
447 477
						case "sp":
448 478
							endBoldIfNeeded()
......
578 608
					}
579 609
					previousElem = localname;
580 610
					break;
611
				
581 612
				case XMLStreamConstants.END_ELEMENT:
582 613
					localname = parser.getLocalName();
583
					switch(localname)
584
					{
614
					switch(localname) {
585 615
						case "text":
586 616
							break;
587 617
						case "Topics":
......
722 752
					break;
723 753
			}
724 754
		}
725
		writer.writeEndElement();
755
		writer.writeEndElement(); // body
726 756
		
727 757
		writer.writeEmptyElement("pb");
728 758
		nbBreak++
729 759
		writer.writeAttribute("id", ""+nbBreak);
730 760
		
731
		writer.writeEndElement();
761
		writer.writeEndElement(); // html
732 762
		writer.close();
733 763
		output.close();
734 764
		if (parser != null) parser.close();
......
767 797
		writer.writeAttribute("class", "sync");
768 798
		writer.writeCharacters(currentTime);
769 799
		
800
		writeMediaAccess(currentTime)
801
		
802
		writer.writeEndElement(); // span
803
	}
804
	
805
	private void writeMediaAccess(def time) {
770 806
		writer.writeStartElement("a");
771
		writer.writeAttribute("onclick", "txmcommand('id', 'org.txm.backtomedia.commands.function.BackToMedia', 'corpus', '"+corpusname+"', 'text', '"+txtname+"', 'time', '"+currentTime+"')");
807
		writer.writeAttribute("onclick", "txmcommand('id', 'org.txm.backtomedia.commands.function.BackToMedia', 'corpus', '"+corpusname+"', 'text', '"+txtname+"', 'time', '"+time+"')");
772 808
		writer.writeAttribute("style", "cursor: pointer;")
773 809
		writer.writeAttribute("class", "play-media")
774
		writer.writeCharacters(" ???♪");
810
		writer.writeCharacters(" ???");
775 811
		writer.writeEndElement(); // a
776
		
777
		writer.writeEndElement();
778 812
	}
779 813
	
780 814
	private void writeSpeaker(String spk, boolean overlapping) {
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/transcriber/AddSections.groovy (revision 2354)
1 1
import java.nio.charset.Charset
2 2

  
3
import org.txm.utils.AsciiUtils
4
import org.txm.utils.ConsoleProgressBar
5
import org.txm.utils.CsvReader
6 3
import java.time.LocalTime
7 4
import java.time.format.DateTimeFormatter
5
import org.txm.utils.*
8 6

  
9
File metadataFile = new File("/home/mdecorde/TEMP/ANTRACT/AF/metadata.tsv")
7
File metadataFile = new File("/home/mdecorde/TEMP/ANTRACT/AF/sujets.xlsx")
10 8
File trsDirectory = new File("/home/mdecorde/TEMP/ANTRACT/AF/trs")
11
def idTRSColumn = "Lien notice principale"
12
def typeColumns = ["Identifiant de la notice"]
13
def topicColumns = ["Titre propre", "Notes du titre"]
9

  
10
def joinTRSColumn = "Lien notice principale"
11
def typeColumns = ["Date de diffusion"]
12
def topicColumns = ["Titre propre"]
14 13
def startTimeColumn = "antract_debut"
15 14
def endTimeColumn = "antract_fin"
16 15
//def metadataColumns = ["Identifiant de la notice", "Titre propre", "antract_debut", "antract_fin"]
17
def metadataColumns = ["Date de diffusion", "Descripteurs (Aff. Col.)", "Durée", "Générique (Aff. Col.)", "Genre", "Identifiant de la notice", "Langue VO / VE ", "Lien notice principale", "Nature de production ", "Nom fichier segmenté (info)", "Notes du titre ", "Producteurs (Aff.)", "Résumé", "Séquences", "Thématique", "Titre propre", "Type de date", "Type de notice"]
16
def metadataColumns = [
17
				"Titre propre", "Date de diffusion", "Identifiant de la notice", "Notes du titre", "Type de date", "Durée", "Genre", "Langue VO / VE", "Nature de production", "Producteurs (Aff.)", "Thématique",
18
				"Nom fichier segmenté (info)", "antract_video", "antract_debut","antract_fin","antract_duree","antract_tc_type","antract_tc_date",
19
				"Résumé", "Séquences", "Descripteurs (Aff. Lig.)", "Générique (Aff. Lig.)"]
20
def metadataColumnsGroups = [
21
	"metadata", "metadata", "metadata", "metadata", "metadata", "metadata", "metadata", "metadata", "metadata", "metadata", "metadata",
22
	"secondary", "secondary", "secondary","secondary","secondary","secondary","secondary",
23
	"text", "text", "text", "text"]
18 24

  
25
if (metadataColumns.size() != metadataColumnsGroups.size()) {
26
	println "ERROR in metadata declarations&groups:"
27
	println "COLUMNS: "+metadataColumns
28
	println "GROUPS : "+metadataColumnsGroups
29
	return
30
}
31

  
19 32
if (!trsDirectory.exists()) {
20 33
	println "$trsDirectory not found"
21 34
	return
22 35
}
23 36

  
24 37
println "Loading data from $metadataFile..."
25
CsvReader reader = new CsvReader(metadataFile.getAbsolutePath(), "\t".charAt(0), Charset.forName("UTF-8"))
38
TableReader reader = new TableReader(metadataFile)//, "\t".charAt(0), Charset.forName("UTF-8")
26 39
reader.readHeaders()
27 40
def header = reader.getHeaders()
28
if (!header.contains(idTRSColumn)) {
29
	println "No TRS ID $idTRSColumn column found"
41
if (!header.contains(joinTRSColumn)) {
42
	println "No TRS ID $joinTRSColumn column found"
30 43
	return
31 44
}
32 45
if (!header.contains(startTimeColumn)) {
......
82 95
def sectionGroupsToInsert = [:]
83 96
println "Reading data..."
84 97
while (reader.readRecord()) {
85
	String id = reader.get(idTRSColumn).trim()
98
	String id = reader.get(joinTRSColumn).trim()
86 99
	if (id.endsWith(".mp4")) id = id.substring(0, id.length()-4)
87 100
	if (id.length() == 0) continue;
88 101
		
......
91 104
	}
92 105
	def section = sectionGroupsToInsert[id]
93 106
	
94
	
95 107
	if (reader.get(startTimeColumn) != null && reader.get(startTimeColumn).length() > 0) { // ignore non timed sections
96 108
		
97 109
		def m = [:]
......
100 112
			def data = []
101 113
			for (def col : todo.value) {
102 114
				if (reader.get(col).trim().length() > 0) {
103
					data << col+": "+reader.get(col).trim().replace("\n", "")
115
					data << reader.get(col).trim().replace("\n", "")
104 116
				}
105 117
			}
106 118
			m[todo.key] = data.join("\t")
107 119
		}
108
		for (def col : metadataColumns) {
109
			m[AsciiUtils.buildAttributeId(col)] = reader.get(col)
120
		def metadataList = []
121
		def metadataGroupList = []
122
		for (int i = 0 ;  i < metadataColumns.size() ; i++) {
123
			def col = metadataColumns[i]
124
			String c = AsciiUtils.buildAttributeId(col)
125
			m[c] = reader.get(col)
126
			metadataList << c
127
			metadataGroupList << metadataColumnsGroups[i]
110 128
		}
129
		m["metadata"] = metadataList.join("|") 
130
		m["metadata_groups"] = metadataGroupList.join("|")
111 131
		
112 132
		m["startTime"] = strTotime(reader.get(startTimeColumn))
113 133
		m["endTime"] = strTotime(reader.get(endTimeColumn))
......
204 224
	}
205 225
}
206 226
cpb.done()
227
reader.close()
207 228
println "Done."
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/transcriber/transcriberLoader.groovy (revision 2354)
85 85
int wordsPerPage = project.getEditionDefinition("default").getWordsPerPage()
86 86
String page_element = project.getEditionDefinition("default").getPageElement()
87 87
boolean build_edition = project.getEditionDefinition("default").getBuildEdition()
88
boolean update = project.getDoUpdate()
88 89

  
89 90
File srcDir = new File(rootDir);
90 91
File binDir = project.getProjectDirectory();
......
95 96
}
96 97

  
97 98
File txmDir = new File(binDir,"txm/$corpusname");
98
txmDir.deleteDir();
99
if (!update) txmDir.deleteDir();
99 100
txmDir.mkdirs();
100 101

  
101 102
//get metadata values from CSV
......
115 116
else {
116 117
	println "no metadata file: "+allMetadataFile
117 118
}
119

  
120
final HashMap<String, String> textordersInfo = new HashMap<String, String>();
121
for (String t : metadatas.keySet()) {
122
	def ti = metadatas.get(t)
123
	for (org.txm.metadatas.Entry e : ti) {
124
		if ("textorder".equals(e.getId())) {
125
			textordersInfo[t+".trs"] = ti.value()
126
		}
127
	}
128
}
118 129

  
119 130
File propertyFile = new File(srcDir, "import.properties")//default
120 131
Properties props = new Properties();
......
123 134
	FileInputStream input = new FileInputStream(propertyFile);
124 135
	props.load(input);
125 136
	input.close();
126

  
137
	
127 138
	if (props.getProperty("removeInterviewer") != null)
128 139
		removeInterviewer = Boolean.parseBoolean(props.get("removeInterviewer").toString());
129 140
	if (props.getProperty("ignoreTranscriberMetadata") != null)
......
134 145
		csvHeaderNumber = props.get("csvHeaderNumber").toString().split("|");
135 146
	//if (props.getProperty("includeComments") != null)
136 147
	//	includeComments = props.get("includeComments").toString();
137

  
148
	
138 149
	println "import properties: "
139 150
	println " removeInterviewer: "+removeInterviewer
140 151
	println " ignoreTranscriberMetadata: "+ignoreTranscriberMetadata
......
145 156

  
146 157

  
147 158

  
148
// Apply XSL
149
if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); }
150
if (MONITOR != null) MONITOR.worked(1, "XSL")
151
if (xsl != null && xsl.trim().length() > 0) {
152
	if (ApplyXsl2.processImportSources(new File(xsl), srcDir, new File(binDir, "src")))
153
		srcDir = new File(binDir, "src");
154
	println ""
155
}
156 159

  
157
try {
158
	// select only trs files
159
	String ext = "trs";
160
	ArrayList<File> trsfiles = srcDir.listFiles(); //find all trs files
161
	if (trsfiles  == null) {
162
		println ("No files in "+srcDir.getAbsolutePath())
163
		return false;
164
	}
165
	for (int i = 0 ; i < trsfiles.size() ; i++) {
166
		File f = trsfiles.get(i);
167
		if (!f.getName().endsWith(ext) || !f.canRead() || f.isHidden()) {
168
			trsfiles.remove(i)
169
			i--;
170
		}
171
	}
172 160

  
173
	if (trsfiles.size() == 0) {
174
		println ("No transcription file (*.trs) found in "+srcDir.getAbsolutePath()+". Aborting.")
175
		return false;
176
	}
177

  
178
	if (MONITOR != null) MONITOR.worked(1, "IMPORTER")
179
	if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); }
180
	println "-- IMPORTER"
181
	def imp = new importer(trsfiles, binDir, txmDir, metadatas, lang) //put result in the txm folder of binDir
182
	if (!imp.run()) {
183
		println "Failed to prepare files - Aborting";
184
		return;
185
	}
186
	if (MONITOR != null) MONITOR.worked(20)
187

  
188
	println "-- Xml Validation"
189
	if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); }
190
	for (File infile : txmDir.listFiles()) {
191
		if (!ValidateXml.test(infile)) {
192
			println "$infile : Validation failed";
193
			infile.delete();
161
try {
162
	if (!update) {
163
		// Apply XSL
164
		if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); }
165
		if (MONITOR != null) MONITOR.worked(1, "XSL")
166
		if (xsl != null && xsl.trim().length() > 0) {
167
			if (ApplyXsl2.processImportSources(new File(xsl), srcDir, new File(binDir, "src")))
168
				srcDir = new File(binDir, "src");
169
			println ""
170
		}
171
		
172
		// select only trs files
173
		String ext = "trs";
174
		ArrayList<File> trsfiles = srcDir.listFiles(); //find all trs files
175
		if (trsfiles  == null) {
176
			println ("No files in "+srcDir.getAbsolutePath())
177
			return false;
194 178
		}
195
	}
196

  
197
	if (MONITOR != null) MONITOR.worked(5)
198
	if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); }
199
	println "-- Remove interviewer: "+removeInterviewer
200
	if (removeInterviewer) 	{
201
		if (metadatas == null) {
202
			println "Can't remove interviewer without a metadata.csv file defining who are the interviewers."
203
		} else {
204
			println "Removing some speakers in "+txmDir.listFiles().length+" file(s)"
205
			for (File infile : txmDir.listFiles()) {
206
				String filename = infile.getName();
207
				int idx = filename.indexOf(".xml");
208
				if (idx > 0)
209
					filename = filename.substring(0, idx);
210

  
211
				ArrayList<Pair<String, String>> metas = metadatas.get(filename)
212
				//println "filename=$filename metas= $metas"
213
				for (Pair p : metas) {
214
					if (p.getFirst().startsWith("enq")) {
215
						new RemoveSpeaker(infile, infile, p.getFirst())
179
		for (int i = 0 ; i < trsfiles.size() ; i++) {
180
			File f = trsfiles.get(i);
181
			if (!f.getName().endsWith(ext) || !f.canRead() || f.isHidden()) {
182
				trsfiles.remove(i)
183
				i--;
184
			}
185
		}
186
		
187
		if (trsfiles.size() == 0) {
188
			println ("No transcription file (*.trs) found in "+srcDir.getAbsolutePath()+". Aborting.")
189
			return false;
190
		}
191
		
192
		if (MONITOR != null) MONITOR.worked(1, "IMPORTER")
193
		if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); }
194
		println "-- IMPORTER"
195
		def imp = new importer(trsfiles, binDir, txmDir, metadatas, lang) //put result in the txm folder of binDir
196
		if (!imp.run()) {
197
			println "Failed to prepare files - Aborting";
198
			return;
199
		}
200
		if (MONITOR != null) MONITOR.worked(20)
201
		
202
		println "-- Xml Validation"
203
		if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); }
204
		for (File infile : txmDir.listFiles()) {
205
			if (!ValidateXml.test(infile)) {
206
				println "$infile : Validation failed";
207
				infile.delete();
208
			}
209
		}
210
		
211
		if (MONITOR != null) MONITOR.worked(5)
212
		if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); }
213
		println "-- Remove interviewer: "+removeInterviewer
214
		if (removeInterviewer) 	{
215
			if (metadatas == null) {
216
				println "Can't remove interviewer without a metadata.csv file defining who are the interviewers."
217
			} else {
218
				println "Removing some speakers in "+txmDir.listFiles().length+" file(s)"
219
				for (File infile : txmDir.listFiles()) {
220
					String filename = infile.getName();
221
					int idx = filename.indexOf(".xml");
222
					if (idx > 0)
223
						filename = filename.substring(0, idx);
224
					
225
					ArrayList<Pair<String, String>> metas = metadatas.get(filename)
226
					//println "filename=$filename metas= $metas"
227
					for (Pair p : metas) {
228
						if (p.getFirst().startsWith("enq")) {
229
							new RemoveSpeaker(infile, infile, p.getFirst())
230
						}
216 231
					}
217
				}
232
				}
233
			}
234
		}
235
		
236
		if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); }
237
		if (MONITOR != null) MONITOR.worked(20, "ANNOTATE")
238
		
239
		boolean annotationSuccess = false;
240
		if (annotate) {
241
			println "-- ANNOTATE - Running NLP tools"
242
			def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine("TreeTagger")
243
			if (engine.processDirectory(txmDir, binDir, ["lang":model])) {
244
				annotationSuccess = true;
218 245
			}
219 246
		}
220
	}
221

  
247
	}
248
	trsfiles = new ArrayList<File>(Arrays.asList(txmDir.listFiles()));
249
	
250
	if (metadatas != null && metadatas.getPropertyNames().contains("textorder")) {
251
		Collections.sort(trsfiles, new Comparator<File>() {
252
					public int compare(File f1, File f2) {
253
						String o1 = textorder[f1.getName()];
254
						String o2 = textorder[f2.getName()];
255
						if (o1 == null && o2 == null) {
256
							return f1.compareTo(f2);
257
						} else if (o1 == null) {
258
							return 1
259
						} else if (o2 == null) {
260
							return -1
261
						} else {
262
							int c = o1.compareTo(o2);
263
							if (c == 0) return f1.compareTo(f2);
264
							else return c;
265
						}
266
					}
267
				});
268
	} else {
269
		Collections.sort(trsfiles);
270
	}
271
	
222 272
	if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); }
223
	if (MONITOR != null) MONITOR.worked(20, "ANNOTATE")
224

  
225
	boolean annotationSuccess = false;
226
	if (annotate) {
227
		println "-- ANNOTATE - Running NLP tools"
228
		def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine("TreeTagger")
229
		if (engine.processDirectory(txmDir, binDir, ["lang":model])) {
230
			annotationSuccess = true;
231
		}
232
	}
233

  
234
	if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); }
235 273
	if (MONITOR != null) MONITOR.worked(25, "COMPILING")
236 274
	println "--COMPILING - Building Search Engine indexes"
237
	trsfiles = txmDir.listFiles();
238

  
275
	
239 276
	def comp = new compiler()
240 277
	if(debug) comp.setDebug();
241 278
	comp.removeInterviewers(removeInterviewer);
242 279
	comp.setIgnoreTranscriberMetadata(ignoreTranscriberMetadata);
243
	comp.setAnnotationSucces(annotationSuccess)
244 280
	if (!comp.run(project, trsfiles, corpusname, "default", binDir)) {
245 281
		println "Failed to compile files";
246 282
		return;
247 283
	}
248

  
284
	
249 285
	if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); }
250

  
286
	
251 287
	File htmlDir = new File(binDir,"HTML/$corpusname");
252 288
	htmlDir.deleteDir()
253 289
	htmlDir.mkdirs();
254 290
	if (build_edition) {
255

  
291
		
256 292
		if (MONITOR != null) MONITOR.worked(20, "EDITION")
257 293
		println "-- EDITION - Building editions"
258

  
259
		List<File> filelist = txmDir.listFiles();
260
		Collections.sort(filelist);
294
		
261 295
		def second = 0
262

  
263
		println "Paginating "+filelist.size()+" texts"
264
		ConsoleProgressBar cpb = new ConsoleProgressBar(filelist.size());
265
		for (File txmFile : filelist) {
296
		
297
		println "Paginating "+trsfiles.size()+" texts"
298
		ConsoleProgressBar cpb = new ConsoleProgressBar(trsfiles.size());
299
		for (File txmFile : trsfiles) {
266 300
			cpb.tick()
267 301
			String txtname = txmFile.getName();
268 302
			int i = txtname.lastIndexOf(".");
269 303
			if(i > 0) txtname = txtname.substring(0, i);
270

  
304
			
271 305
			List<String> NoSpaceBefore = LangFormater.getNoSpaceBefore(lang);
272 306
			List<String> NoSpaceAfter = LangFormater.getNoSpaceAfter(lang);
273

  
274
			Text t = new Text(project);
275
			t.setName(txtname);
276
			t.setSourceFile(txmFile)
277
			t.setTXMFile(txmFile)
278

  
307
			
308
			Text t = project.getText(txtname)
309
			if (t == null) {
310
				new Text(project);
311
				t.setName(txtname);
312
				t.setSourceFile(txmFile)
313
				t.setTXMFile(txmFile)
314
			}
279 315
			def ed = new pager(txmFile, htmlDir, txtname, NoSpaceBefore, NoSpaceAfter, wordsPerPage, basename, page_element, metadatas);
280 316
			Edition edition = new Edition(t);
281 317
			edition.setName("default");
......
288 324
			}
289 325
		}
290 326
		cpb.done()
291

  
327
		
292 328
		//copy transcriber.css
293 329
		File cssfile = new File(Toolbox.getTxmHomePath(), "css/transcriber.css")
294 330
		File cssTXMFile = new File(Toolbox.getTxmHomePath(), "css/txm.css")
......
299 335
		}
300 336
		
301 337
		//copy media files
302
		println "Copying media files if any (mp3, wav, mp4 or avi) "+filelist.size()+" texts"
303
		cpb = new ConsoleProgressBar(filelist.size());
304
		for (File txmFile : filelist) {
338
		println "Copying media files if any (mp3, wav, mp4 or avi) "+trsfiles.size()+" texts"
339
		cpb = new ConsoleProgressBar(trsfiles.size());
340
		for (File txmFile : trsfiles) {
305 341
			cpb.tick()
306 342
			String txtname = txmFile.getName();
307 343
			int i = txtname.lastIndexOf(".");
......
310 346
			if (!mediaFile.exists()) mediaFile = new File(project.getSrcdir(), txtname + ".wav")
311 347
			if (!mediaFile.exists()) mediaFile = new File(project.getSrcdir(), txtname + ".mp4")
312 348
			if (!mediaFile.exists()) mediaFile = new File(project.getSrcdir(), txtname + ".avi")
313
				
349
			
314 350
			if (mediaFile.exists()) {
315 351
				File copy = new File(binDir, "media/"+mediaFile.getName())
316 352
				copy.getParentFile().mkdirs()
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/transcriber/compiler.groovy (revision 2354)
73 73
	/** The debug. */
74 74
	boolean debug = false;
75 75

  
76
	/** The annotation succes. */
77
	boolean annotationSucces = false;
78

  
79 76
	/** The removeinterviewers. */
80 77
	boolean removeinterviewers = false;
81 78

  
......
162 159

  
163 160
		println("Compiling "+xmlfiles.size()+" files")
164 161
		ConsoleProgressBar cpb = new ConsoleProgressBar(xmlfiles.size())
165
		for (File f :xmlfiles) {
166
			if (f.exists()) {
162
		for (File txmFile :xmlfiles) {
163
			if (txmFile.exists()) {
167 164
				cpb.tick()
168
				if (!process(f)) {
169
					println("Failed to compile "+f)
165
				if (!process(txmFile)) {
166
					println("Failed to compile "+txmFile)
170 167
				}
171 168
			}
172 169
		}
......
680 677
		debug = true;
681 678
	}
682 679

  
683
	/**
684
	 * Sets the annotation success.
685
	 *
686
	 * @param val the new annotation success
687
	 */
688
	public void setAnnotationSuccess(boolean val) {
689
		annotationSucces = val
690
	}
691 680
}

Formats disponibles : Unified diff