Révision 3407

TXM/trunk/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/transcriber/transcriberLoader.groovy (revision 3407)
172 172
			println "Failed to prepare files - Aborting";
173 173
			return;
174 174
		}
175
		if (MONITOR != null) MONITOR.worked(20)
175
		if (MONITOR != null) MONITOR.worked(20)
176
			
177
//		File antractXSL = new File(srcDir, "special.xsl")
178
//		if (antractXSL.exists()) {
179
//			println "Applying special XSL: $antractXSL"
180
//			if (!ApplyXsl2.processImportSources(antractXSL, txmDir, txmDir)) {
181
//				println "Error while processing XML-TXM files."
182
//				return false;
183
//			}
184
//		}
176 185

  
177 186
		println "-- Xml Validation"
178 187
		if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); }
......
187 196
		cpb.done()
188 197

  
189 198
		if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); }
190
		if (MONITOR != null) MONITOR.worked(20, "ANNOTATE")
199
		if (MONITOR != null) MONITOR.worked(20, "ANNOTATE")
200
			
201
			
191 202

  
192 203
		boolean annotationSuccess = false;
193 204
		if (annotate) {
TXM/trunk/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/transcriber/compiler.groovy (revision 3407)
46 46
 * The Class compiler.
47 47
 */
48 48
class compiler {
49

  
49
	
50 50
	boolean ADD_TEXTID_TO_REF = true
51 51
	
52 52
	/** The input data. */
53 53
	private def inputData;
54

  
54
	
55 55
	/** The factory. */
56 56
	private def factory;
57

  
57
	
58 58
	/** The parser. */
59 59
	private XMLStreamReader parser;
60

  
60
	
61 61
	/** The output. */
62 62
	OutputStreamWriter output;
63

  
63
	
64 64
	/** The basename. */
65 65
	String corpusname;
66

  
66
	
67 67
	/** The projectname. */
68 68
	String projectname
69

  
69
	
70 70
	/** The outdir. */
71 71
	String outdir;
72

  
72
	
73 73
	/** The debug. */
74 74
	boolean debug = false;
75

  
75
	
76 76
	/** The indexInterviewer: index interviewer speech if true. */
77 77
	boolean indexInterviewer = true;
78

  
78
	
79 79
	/** The trans. */
80 80
	HashMap<String, ArrayList<Pair<String, String>>> trans;
81

  
81
	
82 82
	/** The speakers. */
83 83
	HashMap<String, ArrayList<Pair<String, String>>> speakers;
84

  
84
	
85 85
	/** The speakersname. */
86 86
	HashMap<String, String> speakersname = new HashMap<String, String>();
87

  
87
	
88 88
	/** The topics. */
89 89
	HashMap<String, ArrayList<Pair<String, String>>> topics;
90

  
91

  
90
	
91
	
92 92
	/** The interviewers regex */
93 93
	def interviewers = null
94 94
	static LinkedHashSet<String> sectionAttrs;
95 95
	static LinkedHashSet<String> spAttrs;
96 96
	static LinkedHashSet<String> uAttrs;
97

  
97
	
98 98
	/** The anatypes. */
99 99
	private static anatypes = []
100 100
	private static anavalues = [:]
101

  
101
	
102 102
	/**
103 103
	 * Removes the interviewers.
104 104
	 *
......
108 108
	public setIndexInterviewer(boolean value) {
109 109
		this.indexInterviewer = value;
110 110
	}
111

  
111
	
112 112
	File cqpFile
113
	LinkedHashMap<String, LinkedHashMap<String, String>> projectionsFromValues = new LinkedHashMap<String, LinkedHashMap<String, String>>(); // values of properties to inject
114
	LinkedHashMap<String, LinkedHashMap<String, ArrayList<ArrayList>>> projectionsToDo = new LinkedHashMap<String, LinkedHashMap<String, ArrayList<ArrayList>>>(); // list of projections to do
115
	
113 116
	/**
114 117
	 * Run.
115 118
	 *
......
125 128
		this.outdir = binDir;
126 129
		this.corpusname = corpusname;
127 130
		this.projectname = projectname;
128

  
131
		
129 132
		anatypes = ["event"] // reset
130 133
		anavalues = [:] // reset
131

  
134
		
132 135
		sectionAttrs = new LinkedHashSet<String>() // reset section attributs set
133 136
		spAttrs = new LinkedHashSet<String>() // reset section attributs set
134 137
		uAttrs = new LinkedHashSet<String>() // reset section attributs set
135

  
138
		
136 139
		CorpusBuild corpus = project.getCorpusBuild(project.getName(), MainCorpus.class);
137 140
		if (corpus != null) {
138 141
			if (project.getDoUpdate()) {
......
152 155
		new File(binDir,"cqp").mkdirs()
153 156
		new File(binDir,"data").mkdirs()
154 157
		new File(binDir,"registry").mkdirs()
155

  
158
		
156 159
		// get all anatypes
157 160
		for (File f : xmlfiles) {
158 161
			getAnaTypes(f)
159 162
		}
163
		
164
		// Building projections datas to use for each step
165
		String projectionsParameterValue = project.getTextualPlan("Projections").trim()
166
		projectionsParameterValue = projectionsParameterValue.replace("\n", "\t")
167
		def projectionsParameter = projectionsParameterValue.split("\t");
168
		if (projectionsParameterValue.length() > 0) {
169
			for (def projection : projectionsParameter) {
170
				if (!projection.contains("->")) continue;
171
				String[] fromTo = projection.split("->", 2)
172
				String from = fromTo[0].trim()
173
				String to = fromTo[1].trim()
174
				if (projection.contains("->") && from.contains("_") && to.contains("_")) {
175
					String toStructure = to.substring(0, to.indexOf("_"))
176
					String toStructureProperty = to.substring(to.indexOf("_") + 1)
177
					String fromStructure = from.substring(0, from.indexOf("_"))
178
					String fromStructureProperty = from.substring(from.indexOf("_") + 1)
179
					
180
					if (!projectionsToDo.containsKey(toStructure)) {
181
						projectionsToDo[toStructure] = new LinkedHashMap<String, ArrayList<ArrayList>>();
182
					}
183
					if (!projectionsToDo[toStructure].containsKey(fromStructure)) {
184
						projectionsToDo[toStructure][fromStructure] = new ArrayList<ArrayList>();
185
					}
186
					projectionsToDo[toStructure][fromStructure].add([toStructureProperty, fromStructureProperty])
187
					
188
					if (!projectionsFromValues.containsKey(fromStructure)) projectionsFromValues[fromStructure] = new LinkedHashMap<String, String>();
189
					projectionsFromValues[fromStructure][fromStructureProperty] = "";
190
				}
191
			}
192
		}
193
		
160 194
		//println "ANATYPES: "+anatypes
161 195
		if (!createOutput(cqpFile)) return false;
162 196
		output.write("<txmcorpus lang=\"fr\">\n")
163 197
		output.close();
164

  
198
		
165 199
		println("Compiling "+xmlfiles.size()+" files")
166 200
		ConsoleProgressBar cpb = new ConsoleProgressBar(xmlfiles.size())
167 201
		for (File txmFile :xmlfiles) {
......
176 210
		if (!createOutput(cqpFile)) return false;
177 211
		output.write("</txmcorpus>\n")
178 212
		output.close();
179

  
213
		
180 214
		//2- Import into CWB
181 215
		File registryFile = new File(binDir, "registry/"+corpusname.toLowerCase())
182 216
		File dataDir = new File(binDir, "data/$corpusname")
183

  
217
		
184 218
		new File(binDir, "registry").mkdir();
185 219
		if (!new File(binDir, "registry").exists()) {
186 220
			println "Can't create registry directory"
187 221
			return false;
188 222
		}
189

  
223
		
190 224
		if (!(CwbEncode.isExecutableAvailable() && CwbMakeAll.isExecutableAvailable())) {
191 225
			println ("Error: CWB executables not well set.")
192 226
			return false;
......
195 229
		CwbMakeAll cwbMa = new CwbMakeAll();
196 230
		cwbEn.setDebug(debug);
197 231
		cwbMa.setDebug(debug);
198

  
232
		
199 233
		String uAttr = "u:0";
200 234
		for (String attr : uAttrs) {
201 235
			uAttr += "+"+attr
......
218 252
					if (ignoreTranscriberMetadata) {
219 253
						String meta =p.getFirst();
220 254
						if (meta != "scribe" && meta != "audio_filename" &&
221
							meta != "version" && meta != "version_date")
255
								meta != "version" && meta != "version_date")
222 256
							textAttr+="+"+meta
223 257
					} else {
224 258
						textAttr+="+"+p.getFirst()
......
227 261
				break;
228 262
			}
229 263
		}
230

  
264
		
231 265
		List<String> pargs = ["spk", "ref", "id", "entitytype", "entityid"]
232 266
		for (String ana : anatypes) if (!pargs.contains(ana)) pargs.add(ana)
233

  
267
		
234 268
		String[] pAttributes = pargs
235

  
269
		
236 270
		String[] sAttributes = ["txmcorpus:0+lang", uAttr , textAttr, "event:0+id+desc+type+extent", sectionAttr, spAttr];
237

  
271
		
272
		// registering the projected structure properties
273
		//println "registering: $projectionsToDo"
274
		for (String struct : projectionsToDo.keySet()) {
275
			for (String struct2 : projectionsToDo[struct].keySet()) {
276
				for (def couple : projectionsToDo[struct][struct2]) {
277
					//sattrsListener.getStructs()[struct].add(couple[0])
278
					//println "add $struct $couple"
279
					for (int i = 0 ; i < sAttributes.size() ; i++) {
280
						if (sAttributes[i].startsWith(struct+":") && !sAttributes[i].contains(couple[0])) {
281
							sAttributes[i] = sAttributes[i] + "+"+couple[0]
282
						}
283
					}
284
				}
285
			}
286
		}
287
		
238 288
		println "pAttributes: $pAttributes"
239 289
		println "sAttributes: $sAttributes"
240 290
		//return;
......
246 296
				return false;
247 297
			}
248 298
			cwbMa.run(corpusname, registryFile.getParent());
249

  
299
			
250 300
		} catch (Exception ex) {System.out.println(ex); return false;}
251

  
301
		
252 302
		if (project.getCleanAfterBuild()) {
253 303
			new File(project.getProjectDirectory(), "cqp").deleteDir()
254 304
		}
255 305
		
256 306
		return true;
257 307
	}
258

  
308
	
259 309
	/**
260 310
	 * Creates the output.
261 311
	 *
......
273 323
			return false;
274 324
		}
275 325
	}
276

  
326
	
277 327
	/** The text_id. */
278 328
	String text_id
279

  
329
	
280 330
	/** The u opened. */
281 331
	boolean uOpened = false;
282

  
332
	
283 333
	/** The idturn. */
284 334
	int idturn = 1;
285

  
335
	
286 336
	/** The idsection. */
287 337
	int idsection = 1;
288

  
338
	
289 339
	/** The idu. */
290 340
	int idu = 1;
291

  
341
	
292 342
	/** The idevent. */
293 343
	int idevent = 1;
294

  
344
	
295 345
	/** The events. */
296 346
	List<String> events = [];
297 347
	static int vEntityId = 0;
298 348
	static int vEntityIdCount = 1;
299

  
349
	
350
	protected void writeProjections(String localname) {
351
		if (projectionsToDo.containsKey(localname)) {
352
			for (String from : projectionsToDo[localname].keySet()) {
353
				for (def couple : projectionsToDo[localname][from]) {
354
					def o = couple[0]
355
					def p = couple[1]
356
					def r = projectionsFromValues[from][p]
357
					//println "o=$o p=$p r=$r"
358
					output.write(" "+o+"=\""+r+"\"");
359
				}
360
			}
361
		}
362
	}
363
	
300 364
	/**
301 365
	 * Process.
302 366
	 *
......
306 370
	private boolean process(File xmlfile) {
307 371
		text_id = xmlfile.getName();
308 372
		text_id = text_id.substring(0, text_id.length() -4);
309

  
373
		
310 374
		idturn = 1;
311 375
		idsection = 1;
312 376
		idu = 1;
313

  
377
		
314 378
		boolean flagAna;
315 379
		boolean flagForm;
316 380
		boolean flagWord;
......
324 388
		String anatype = "";
325 389
		String anaresp = "";
326 390
		String anavalue = "";
327

  
391
		
328 392
		String formatedTime;
329

  
393
		
330 394
		LinkedHashMap<String, String> anahash = new LinkedHashMap<String, String>();
331 395
		String currentType;
332

  
396
		
333 397
		URL url = xmlfile.toURI().toURL();
334 398
		inputData = url.openStream();
335 399
		factory = XMLInputFactory.newInstance();
336 400
		parser = factory.createXMLStreamReader(inputData);
337 401
		String filename = xmlfile.getName()
338 402
		String textid = filename.substring(0, filename.length() - 4);
339

  
403
		
340 404
		createOutput(cqpFile);
341 405
		String localname;
342

  
406
		
343 407
		//get all metadatas declared before Episode tag
344 408
		speakers = new HashMap<String, ArrayList<Pair<String, String>>>();
345 409
		trans = new HashMap<String, ArrayList<Pair<String, String>>>();
346 410
		topics = new HashMap<String, ArrayList<Pair<String, String>>>();
347 411
		//println "parse infos"
348 412
		parseInfos();
349

  
413
		
350 414
		//		println "Trans: $trans"
351 415
		//		println "Topics: $topics"
352 416
		//		println "Speakers: $speakers"
......
358 422
		//		}
359 423
		//		println "Trans properties: "+transproperties
360 424
		List<String> localspeakers;
361

  
362

  
425
		
426
		
363 427
		for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) {
364 428
			//print "event: "+event +" "
365 429
			switch (event) {
366 430
				case XMLStreamConstants.START_ELEMENT:
367 431
					localname = parser.getLocalName();
432
				
433
					if (projectionsFromValues.containsKey(localname)) { // get projections values
434
						for (String attr : projectionsFromValues[localname].keySet()) {
435
							projectionsFromValues[localname][attr] = parser.getAttributeValue(null, attr);
436
						}
437
					}
438
				
368 439
				//println localname
369 440
					switch(localname) {
441
						
370 442
						case "div":
371 443
							output.write("<div");
372 444
							for (int i = 0 ; i < parser.getAttributeCount() ; i ++) {
......
374 446
								output.write(" "+name+"=\""+parser.getAttributeValue(i).replace("\"", "&quot;")+"\"");
375 447
								sectionAttrs << name
376 448
							}
449
							writeProjections(localname)
377 450
							output.write ">\n"
378 451
							break;
379 452
						case "sp":
......
383 456
								output.write(" "+name+"=\""+parser.getAttributeValue(i).replace("\"", "&quot;")+"\"");
384 457
								spAttrs << name
385 458
							}
459
							writeProjections(localname)
386 460
							output.write ">\n"
387 461
							break;
388 462
						case "u":
......
390 464
							for (int i = 0 ; i < parser.getAttributeCount() ; i ++) {
391 465
								String name = parser.getAttributeLocalName(i).replace("_","").toLowerCase()
392 466
								output.write(" "+name+"=\""+parser.getAttributeValue(i).replace("\"", "&quot;")+"\"");
393
								if (name == "time") { 
394
									formatedTime = parser.getAttributeValue(i) 
395
								} else if (name == "who") { 
396
									u_name = parser.getAttributeValue(i) 
467
								if (name == "time") {
468
									formatedTime = parser.getAttributeValue(i)
469
								} else if (name == "who") {
470
									u_name = parser.getAttributeValue(i)
397 471
								}
398
									
472
								
399 473
								uAttrs << name
400 474
							}
475
							writeProjections(localname)
401 476
							output.write ">\n"
402 477
							break;
403 478
						case "event":
404 479
							output.write("<event");
405 480
							writeAttributes();
481
							writeProjections(localname)
406 482
							output.write ">\n"
407

  
483
						
408 484
							if (parser.getAttributeValue(null, "type") == "entities") {
409 485
								if (parser.getAttributeValue(null, "extent") == "begin") {
410 486
									vEntityType = parser.getAttributeValue(null, "desc");
......
463 539
					break;
464 540
				case XMLStreamConstants.END_ELEMENT:
465 541
					localname = parser.getLocalName();
466

  
542
				
543
					if (projectionsFromValues.containsKey(localname)) { // reset values
544
						for (String attr : projectionsFromValues[localname].keySet()) {
545
							projectionsFromValues[localname][attr] = "";
546
						}
547
					}
548
				
467 549
					switch (localname) {
468 550
						case "text":
469 551
							output.write("</text>\n")
......
507 589
							String ref = (u_name+", "+formatedTime+""+isEnq)
508 590
							if (ADD_TEXTID_TO_REF) ref = textid+", "+ref
509 591
							vForm +="\t"+u_name+"\t"+ref
510

  
592
						
511 593
						// concat entity and entity ID
512 594
							vAna+= "\t"+vEntityType+"\t"+vEntityId;
513

  
595
						
514 596
						//concat ana values
515 597
							for (String type : anatypes) {
516 598
								def v = anavalues.get(type);
517 599
								if (v == null) v = "";
518

  
600
								
519 601
								if ("event" == type) {
520 602
									if (v.length() > 0)
521 603
										vAna+="\t#"+v;
522 604
									else
523 605
										vAna+="\t";
524

  
606
									
525 607
									//concat <Event> values
526 608
									if (vEvents != null && vEvents.length() > 0 && vEvents != "N/A")
527 609
										vAna += "#"+vEvents;
......
529 611
									vAna+="\t"+v;
530 612
								}
531 613
							}
532

  
614
						
533 615
							vForm = vForm.replaceAll("\n", "").replaceAll("&", "&amp;").replaceAll("<", "&lt;");
534

  
616
						
535 617
							if (interviewers != null && !indexInterviewer) { // we must remove some words
536 618
								if (!interviewers.matches(u_name)) { // keep what is now an interviewer
537 619
									output.write(vForm+"\t"+wordid+vAna+"\n");
......
539 621
							} else {
540 622
								output.write(vForm+"\t"+wordid+vAna+"\n");
541 623
							}
542

  
624
						
543 625
							vAna = "";
544 626
							vForm = "";
545 627
							break;
......
554 636
					break;
555 637
			}
556 638
		}
557

  
639
		
558 640
		parser.close();
559 641
		inputData.close();
560 642
		output.close();
561 643
		return true;
562 644
	}
563

  
645
	
564 646
	/** The u_name. */
565 647
	String u_name;
566

  
648
	
567 649
	/**
568 650
	 * Write start tag.
569 651
	 */
......
572 654
		writeAttributes();
573 655
		output.write ">\n"
574 656
	}
575

  
657
	
576 658
	/**
577 659
	 * Write attributes.
578 660
	 */
......
581 663
			output.write(" "+parser.getAttributeLocalName(i).replace("_","").toLowerCase()+"=\""+parser.getAttributeValue(i).replace("\"", "&quot;")+"\"");
582 664
		}
583 665
	}
584

  
666
	
585 667
	private void getAnaTypes(File xmlFile) {
586 668
		inputData = xmlFile.toURI().toURL().openStream();
587 669
		factory = XMLInputFactory.newInstance();
......
609 691
			}
610 692
		}
611 693
	}
612

  
694
	
613 695
	/**
614 696
	 * Write start tag.
615 697
	 *
......
621 703
		writeAttributes();
622 704
		output.write ">\n"
623 705
	}
624

  
706
	
625 707
	/**
626 708
	 * Write end tag.
627 709
	 */
628 710
	private void writeEndTag() {
629 711
		output.write("</"+parser.getLocalName().toLowerCase()+">\n");
630 712
	}
631

  
713
	
632 714
	/** The ignore transcriber metadata. */
633 715
	boolean ignoreTranscriberMetadata = false;
634

  
716
	
635 717
	/**
636 718
	 * Sets the ignore transcriber metadata.
637 719
	 *
......
640 722
	public void setIgnoreTranscriberMetadata(boolean state) {
641 723
		this.ignoreTranscriberMetadata = state;
642 724
	}
643

  
725
	
644 726
	public static int MAXATTRIBUTEVALUELENGTH = 8191;
645 727
	
646 728
	/**
......
652 734
		for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) {
653 735
			if (event == XMLStreamConstants.START_ELEMENT) {
654 736
				localname = parser.getLocalName();
737
				
738
				if (projectionsFromValues.containsKey(localname)) { // get projections values
739
					for (String attr : projectionsFromValues[localname].keySet()) {
740
						projectionsFromValues[localname][attr] = parser.getAttributeValue(null, attr);
741
					}
742
				}
743
				
655 744
				switch (localname) {
656 745
					case "text":
657 746
						output.write("<text project=\""+projectname+"\" base=\""+corpusname+"\"")
......
659 748
							
660 749
							String value = parser.getAttributeValue(i).replace("\"", "&quot;");
661 750
							if (value.length() > MAXATTRIBUTEVALUELENGTH) {
662
//								value = value.substring(0, MAXATTRIBUTEVALUELENGTH-1)
751
								//								value = value.substring(0, MAXATTRIBUTEVALUELENGTH-1)
663 752
								println "WARNING: attribute value is too long ( > $MAXATTRIBUTEVALUELENGTH). The value will be truncated to: $value"
664 753
							}
665 754
							
666 755
							if (ignoreTranscriberMetadata) {
667 756
								if (parser.getAttributeLocalName(i) != "scribe" &&
668
								parser.getAttributeLocalName(i) != "audio_filename" &&
669
								parser.getAttributeLocalName(i) != "version" &&
670
								parser.getAttributeLocalName(i) != "version_date") {
757
										parser.getAttributeLocalName(i) != "audio_filename" &&
758
										parser.getAttributeLocalName(i) != "version" &&
759
										parser.getAttributeLocalName(i) != "version_date") {
671 760
									output.write(" "+parser.getAttributeLocalName(i).replace("_","").toLowerCase()+"=\""+value+"\"");
672 761
								}
673 762
							} else {
674 763
								output.write(" "+parser.getAttributeLocalName(i).replace("_","").toLowerCase()+"=\""+value+"\"");
675 764
							}
676 765
						}
677

  
766
					
678 767
						output.write ">\n"
679

  
768
					
680 769
						ArrayList list = new ArrayList<Pair<String, String>>()
681 770
						trans.put("trans", list);
682

  
771
					
683 772
						for (int i = 0 ; i < parser.getAttributeCount() ; i ++) {
684 773
							list.add(new Pair(parser.getAttributeLocalName(i).replace("_","").toLowerCase(), parser.getAttributeValue(i)));
685 774
							if (parser.getAttributeLocalName(i).equals("interviewer-id-regex"))
......
708 797
						if (id != null) {
709 798
							ArrayList list = new ArrayList<Pair<String, String>>()
710 799
							speakers.put(id, list);
711

  
800
							
712 801
							for (int i = 0 ; i < parser.getAttributeCount() ; i ++) {
713 802
								list.add(new Pair(parser.getAttributeLocalName(i), parser.getAttributeValue(i)));
714 803
							}
......
721 810
			}
722 811
		}
723 812
	}
724

  
813
	
725 814
	/**
726 815
	 * Sets the debug.
727 816
	 */
TXM/trunk/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/xtz/XTZCompilerStep.groovy (revision 3407)
52 52
		this.normalizeFormValues = n
53 53
	}
54 54

  
55
	public XTZCompilerStep(File xmlFile, File cqpFile, String textname, String corpusname, String projectname, def anatypes, def wtag) {
55
	public XTZCompilerStep(ImportStep importStep, File xmlFile, File cqpFile, String textname, String corpusname, String projectname, def anatypes, def wtag) {
56
		super(importStep)
57
		
56 58
		this.xmlFile = xmlFile
57 59
		this.cqpFile = cqpFile
58 60
		this.textname = textname
......
95 97
	 * @return true, if successful
96 98
	 */
97 99
	public boolean process() {
100
		
98 101
		if (!createOutput(cqpFile)) {
99 102
			return false
100 103
		}
......
122 125
		boolean foundtei = false;
123 126
		boolean foundtext = false;
124 127
		int nWords = 0;
128
	
129
		
125 130
		try {
126 131
			String localname;
127 132
			for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) {
128 133
				switch (event) {
129 134
					case XMLStreamConstants.START_ELEMENT:
130 135
						localname = parser.getLocalName().toLowerCase();
136
						
137
						if (importStep.projectionsFromValues.containsKey(localname)) {
138
							for (String attr : importStep.projectionsFromValues[localname].keySet()) {
139
								importStep.projectionsFromValues[localname][attr] = parser.getAttributeValue(null, attr);
140
							}
141
						}
142
						
131 143
						if ("tei".equals(localname)) foundtei = true;
132 144
						switch (localname) {
133 145
							case "text":
......
180 192
								if (!foundtei || !foundtext) break;
181 193

  
182 194
								output.write("<"+localname)
183

  
195
								def toWrite = new LinkedHashMap()
184 196
								for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
185 197
									String attrname = parser.getAttributeLocalName(i)
186 198

  
......
188 200
									if (normalizeAttributeValues) {
189 201
										attrvalue = attrvalue.trim()
190 202
									}
191
									output.write(" "+attrname.toLowerCase()+"=\""+attrvalue.replace("\"", "&quot;")+QUOTE)
203
									//output.write(" "+attrname.toLowerCase()+"=\""+attrvalue.replace("\"", "&quot;")+QUOTE)
204
									toWrite[attrname.toLowerCase()] = attrvalue.replace("\"", "&quot;")
192 205
								}
193 206
								if (parser.getAttributeCount() == 0) { // add the n attribute
194 207
									if (!ncounts.containsKey(localname)) ncounts.put(localname, 0)
195 208
									int ncount = ncounts.get(localname)
196 209
									ncounts.put(localname, ncount+1)
197 210
									output.write(" n=\""+ncount+QUOTE)
211
									toWrite["n"] = ncount
198 212
								}
213
								
214
								if (importStep.projectionsToDo.containsKey(localname)) {
215
									for (String from : importStep.projectionsToDo[localname].keySet()) {
216
										for (def couple : importStep.projectionsToDo[localname][from]) {
217
											def o = couple[0]
218
											def p = couple[1]
219
											def r = importStep.projectionsFromValues[from][p]
220
											//println "o=$o p=$p r=$r"
221
											toWrite[o] = r
222
										}
223
									}
224
								}
225
								
226
								//println "toWrite=$toWrite"
227
								
228
								for (String attr : toWrite.keySet()) {
229
									output.write(" "+attr+"=\""+toWrite[attr]+QUOTE)
230
								}
199 231
								output.write(">\n")
200 232
						}
201 233
						break;
202 234

  
203 235
					case XMLStreamConstants.END_ELEMENT:
204 236
						localname = parser.getLocalName().toLowerCase();
237
						
238
						if (importStep.projectionsFromValues.containsKey(localname)) { // reset projection values
239
							for (String attr : importStep.projectionsFromValues[localname].keySet()) {
240
								importStep.projectionsFromValues[localname][attr] = "";
241
							}
242
						}
243
						
205 244
						switch (localname) {
206 245
							case WTAG:
207 246
								for (String type : anatypes) {
TXM/trunk/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/xtz/XTZFacsPagerStep.groovy (revision 3407)
37 37
	String wtag;
38 38
	boolean debug = false;
39 39

  
40
	public XTZFacsPagerStep(File xmlFile, File editionDir, File imageDirectory, String txtname, String corpusname, String tag, String attribute, String wtag, boolean debug) {
40
	public XTZFacsPagerStep(ImportStep importStep, File xmlFile, File editionDir, File imageDirectory, String txtname, String corpusname, String tag, String attribute, String wtag, boolean debug) {
41
		super(importStep)
42
		
41 43
		inputData = xmlFile.toURI().toURL().openStream()
42 44
		factory = XMLInputFactory.newInstance()
43 45
		parser = factory.createXMLStreamReader(inputData)
TXM/trunk/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/xtz/XTZCompiler.groovy (revision 3407)
18 18
import org.txm.searchengine.cqp.corpus.*
19 19

  
20 20
class XTZCompiler extends Compiler {
21

  
21
	
22 22
	SAttributesListener sattrsListener; // store scanned structures
23

  
23
	
24 24
	String regPath;
25 25
	String corpusname;
26 26
	String wtag;
27

  
27
	
28 28
	boolean doNormalizeAttributeValues = false;
29 29
	boolean doNormalizeAnaValues = true;
30

  
30
	
31 31
	public XTZCompiler(ImportModule module) {
32 32
		super(module);
33

  
33
		
34 34
		corpusname = module.getProject().getName();
35 35
		regPath = module.getBinaryDirectory().getAbsolutePath() + "/registry/"+corpusname.toLowerCase()
36

  
36
		
37 37
		wtag = module.getProject().getTokenizerWordElement();
38

  
38
		
39 39
		doNormalizeAttributeValues = "true".equals(module.getProject().getPreferencesScope().getNode("import").get(TBXPreferences.NORMALISEATTRIBUTEVALUES, "false"))
40 40
		doNormalizeAnaValues = "true".equals(module.getProject().getPreferencesScope().getNode("import").get(TBXPreferences.NORMALISEANAVALUES, "false"))
41 41
	}
42

  
42
	
43 43
	/**
44 44
	 * the Text list
45 45
	 */
......
51 51
	def initialTypesValues;
52 52
	@Override
53 53
	public void _process() {
54

  
54
		
55 55
		if (orderedTextIDs == null) orderedTextIDs = module.getProject().getTextsID() ;
56

  
56
		
57 57
		Project project = module.getProject();
58

  
58
		
59 59
		texts = orderedTextIDs.collect() { id -> module.getProject().getText(id) }
60 60
		textsToProcess = getTextsToProcess(texts)
61

  
61
		
62 62
		// get all structures
63 63
		sattrsListener = new SAttributesListener() // will store the structure and properties declaration
64 64
		sattrsListener.W = wtag
65

  
65
		
66 66
		File registryFile = new File(regPath)
67 67
		
68 68
		initialTypesValues = new HashSet<String>()
......
87 87
			corpus.setName(project.getName());
88 88
		}
89 89
		corpus.setDescription("Built with the XTZ import module");
90

  
90
		
91 91
		if (!doScanStep()) return;
92 92
		if (!doCQPStep()) return;
93 93
		if (!doCWBEncodeStep()) return;
94 94
		if (!doCWBMakeAllStep()) return;
95

  
96
		if (module.getProject().getCleanAfterBuild() 
97
			&& !module.getProject().getDoUpdate()) { // for optimization purpose, don't clean the CQP files
95
		
96
		if (module.getProject().getCleanAfterBuild()
97
				&& !module.getProject().getDoUpdate()) { // for optimization purpose, don't clean the CQP files
98 98
			new File(module.getBinaryDirectory(), "cqp").deleteDir()
99 99
		}
100

  
100
		
101 101
		isSuccessFul = true;
102 102
	}
103

  
103
	
104 104
	/**
105 105
	 * Scan all XML-TXM files to find out structures and word properties
106 106
	 */
107 107
	public boolean doScanStep() {
108

  
108
		
109 109
		println "-- Scanning structures&properties to create for "+texts.size()+" texts..."
110

  
111

  
112

  
110
		
111
		
112
		
113 113
		//		def initialTypesValues = new HashSet<String>()
114 114
		//		initialTypesValues.addAll(sattrsListener.getAnatypes())
115

  
115
		
116 116
		// get all word properties
117 117
		ConsoleProgressBar cpb = new ConsoleProgressBar(texts.size())
118 118
		for (Text t : texts) {
......
129 129
				return false;
130 130
			}
131 131
		}
132

  
132
		
133 133
		if (initialTypesValues.size() == sattrsListener.getAnatypes().size()
134
			&& initialTypesValues.containsAll(sattrsListener.getAnatypes())) { // the word properties changed all CQP files must be recreated
134
				&& initialTypesValues.containsAll(sattrsListener.getAnatypes())) { // the word properties changed all CQP files must be recreated
135 135
			// no new property
136 136
		} else {
137 137
			if (module.isUpdatingCorpus()) {
......
140 140
			textsToProcess.clear()
141 141
			textsToProcess.addAll(texts)
142 142
		}
143

  
143
		
144 144
		println ""
145 145
		return true;
146 146
	}
147

  
147
	
148 148
	def getTextsToProcess(def texts) {
149
		
149 150
		def textsToProcess = texts.findAll() { text ->
150 151
			File xmlFile = text.getXMLTXMFile()
151 152
			String textname = text.getName()
152

  
153
			
153 154
			File cqpFile = new File(cqpDirectory, textname + ".cqp")
154 155
			cqpFiles << cqpFile // insert cqp files to concat later
155 156
			// skip step if cqpFile exists AND is more recent than the XML-TXM File
......
157 158
			if (!cqpFile.exists() || xmlFile.lastModified() >= cqpFile.lastModified()) {
158 159
				return true
159 160
			}
160

  
161
			
161 162
			if (!text.isDirty() && !mustBuild) {
162 163
				Log.finer("skipping .cqp step of $text");
163 164
				return false
164 165
			}
165

  
166
			
166 167
			return true
167 168
		}
168

  
169
		
169 170
		return textsToProcess
170 171
	}
171

  
172
	
172 173
	def cqpFiles = [] // ordered cqp files to concat before calling cwb-encode
173 174
	int cqpFilesUpdated = 0;
175
	LinkedHashMap<String, LinkedHashMap<String, String>> projectionsFromValues = new LinkedHashMap<String, LinkedHashMap<String, String>>(); // values of properties to inject
176
	LinkedHashMap<String, LinkedHashMap<String, ArrayList<ArrayList>>> projectionsToDo = new LinkedHashMap<String, LinkedHashMap<String, ArrayList<ArrayList>>>(); // list of projections to do
174 177
	public boolean doCQPStep() {
175

  
178
		
176 179
		cqpDirectory.mkdir(); // if not created
177

  
180
		
178 181
		println "-- Building CQP files ${textsToProcess.size()}/${texts.size()}..."
179

  
182
		
183
		// Building projections datas to use for each step
184
		String projectionsParameterValue = module.project.getTextualPlan("Projections").trim()
185
		projectionsParameterValue = projectionsParameterValue.replace("\n", "\t")
186
		def projectionsParameter = projectionsParameterValue.split("\t");
187
		if (projectionsParameterValue.length() > 0) {
188
			for (def projection : projectionsParameter) {
189
				if (!projection.contains("->")) continue;
190
				
191
				String[] fromTo = projection.split("->", 2)
192
				String from = fromTo[0].trim()
193
				String to = fromTo[1].trim()
194
				if (projection.contains("->") && from.contains("_") && to.contains("_")) {
195
					String toStructure = to.substring(0, to.indexOf("_"))
196
					String toStructureProperty = to.substring(to.indexOf("_") + 1)
197
					String fromStructure = from.substring(0, from.indexOf("_"))
198
					String fromStructureProperty = from.substring(from.indexOf("_") + 1)
199
					
200
					if (!projectionsToDo.containsKey(toStructure)) {
201
						projectionsToDo[toStructure] = new LinkedHashMap<String, ArrayList<ArrayList>>();
202
					}
203
					if (!projectionsToDo[toStructure].containsKey(fromStructure)) {
204
						projectionsToDo[toStructure][fromStructure] = new ArrayList<ArrayList>();
205
					}
206
					projectionsToDo[toStructure][fromStructure].add([toStructureProperty, fromStructureProperty])
207
					
208
					if (!projectionsFromValues.containsKey(fromStructure)) projectionsFromValues[fromStructure] = new LinkedHashMap<String, String>();
209
					projectionsFromValues[fromStructure][fromStructureProperty] = "";
210
				}
211
			}
212
		}
213
		// registering the new structure properties
214
		for (String struct : projectionsToDo.keySet()) {
215
			for (String struct2 : projectionsToDo[struct].keySet()) {
216
				for (def couple : projectionsToDo[struct][struct2]) {
217
					sattrsListener.getStructs()[struct].add(couple[0])
218
				}
219
			}
220
		}
221
		
180 222
		ConsoleProgressBar cpb = new ConsoleProgressBar(textsToProcess.size())
181 223
		cqpFilesUpdated = 0;
182 224
		for (Text text : textsToProcess) {
183 225
			cpb.tick();
184

  
226
			
185 227
			File xmlFile = text.getXMLTXMFile()
186 228
			String textname = text.getName()
187

  
229
			
188 230
			File cqpFile = new File(cqpDirectory, textname + ".cqp")
189

  
231
			
190 232
			cqpFilesUpdated++
191

  
192
			XTZCompilerStep step = new XTZCompilerStep(xmlFile, cqpFile, textname, corpusname, "default", sattrsListener.getAnatypes(), wtag)
233
			
234
			XTZCompilerStep step = new XTZCompilerStep(this, xmlFile, cqpFile, textname, corpusname, "default", sattrsListener.getAnatypes(), wtag)
193 235
			step.setNormalizeAnaValues(doNormalizeAnaValues)
194 236
			step.setNormalizeAttributeValues(doNormalizeAttributeValues)
195 237
			if (!step.process()) {
......
200 242
		println ""
201 243
		return true;
202 244
	}
203

  
245
	
204 246
	public boolean doCWBEncodeStep() {
205 247
		println "-- Running cwb-encode..."
206 248
		
......
208 250
		DeleteDir.deleteDirectory(outputDirectory);
209 251
		outputDirectory.mkdirs();
210 252
		dataDirectory.mkdirs();
211

  
253
		
212 254
		DeleteDir.deleteDirectory(registryDirectory);
213 255
		registryDirectory.mkdirs();
214 256
		
215 257
		CwbEncode cwbEn = new CwbEncode()
216 258
		cwbEn.setDebug(debug)
217

  
259
		
218 260
		List<String> pargs = ["id"]
219 261
		for (String ana : sattrsListener.getAnatypes()) {
220 262
			if (ana == "word") continue; // no need to be added, cwb will declared it automatically
221 263
			if (ana == "id") continue; // no need to be added, we did it already
222 264
			pargs.add(ana)
223 265
		}
224

  
266
		
225 267
		String[] pAttrs = pargs
226

  
268
		
227 269
		def structs = sattrsListener.getStructs()
228 270
		def structsProf = sattrsListener.getProfs()
229

  
271
		
230 272
		if (debug) {
231 273
			println structs
232 274
			println structsProf
233 275
		}
234

  
276
		
235 277
		List<String> sargs = new ArrayList<String>()
236 278
		def tmpTextAttrs = []
237 279
		for (String name : structs.keySet()) {
238 280
			if (name == "txmcorpus") continue;
239

  
281
			
240 282
			if (name == "text") {
241 283
				for (String value : structs.get(name)) // append the attributes
242 284
					tmpTextAttrs << value // added after
243 285
				continue;
244 286
			}
245

  
287
			
246 288
			String concat = name+":"+structsProf.get(name); // append the depth
247 289
			for (String attributeName : structs.get(name)) { // append the attributes
248 290
				concat += "+"+attributeName.toLowerCase();
249 291
			}
250

  
292
			
251 293
			if (structs.get(name).size() == 0) {
252 294
				concat += "+n";
253 295
			} else {
......
255 297
					concat += "+n"
256 298
				}
257 299
			}
258

  
300
			
259 301
			if ((name == "p" || name == "body" || name == "back" || name == "front")
260
			&& !concat.contains("+n+") && !concat.endsWith("+n")) {
302
					&& !concat.contains("+n+") && !concat.endsWith("+n")) {
261 303
				concat += "+n"
262 304
			}
263 305
			sargs.add(concat)
264 306
		}
265

  
307
		
266 308
		String textSAttributes = "text:0+id+base+project";
267 309
		for (String name : tmpTextAttrs) {
268 310
			if (!("id".equals(name) || "base".equals(name) || "project".equals(name))) {
269 311
				textSAttributes += "+"+name.toLowerCase()
270 312
			}
271 313
		}
272

  
314
		
273 315
		sargs.add(textSAttributes)
274 316
		sargs.add("txmcorpus:0+lang")
275

  
317
		
276 318
		sargs.sort()
277

  
319
		
278 320
		String[] sAttributes = sargs
279 321
		String[] pAttributes = pAttrs
280 322
		println " Word properties: "+pAttributes.join(', ')
......
286 328
				println "Fail to write the master cqp file: "+allcqpFile
287 329
				return false;
288 330
			}
289

  
331
			
290 332
			new File(regPath).delete()// ensure the registry file is deleted
291

  
333
			
292 334
			if (!cwbEn.run(outputDirectory.getAbsolutePath() + "/$corpusname",
293
			allcqpFile.getAbsolutePath(), regPath, pAttributes, sAttributes, false)) {
335
					allcqpFile.getAbsolutePath(), regPath, pAttributes, sAttributes, false)) {
294 336
				println "** cwb-encode did not ends well. Please activate a finer log level to see more details."
295 337
				return false;
296 338
			}
297

  
339
			
298 340
			allcqpFile.delete(); // clean
299 341
		} catch (Exception e) {
300 342
			println "Error while running cwb-encode: "+e
......
305 347
		println ""
306 348
		return true;
307 349
	}
308

  
350
	
309 351
	public boolean doCWBMakeAllStep() {
310 352
		println "-- Running cwb-makeall..."
311 353
		try {
312 354
			CwbMakeAll cwbMa = new CwbMakeAll();
313 355
			cwbMa.setDebug(debug);
314

  
356
			
315 357
			if (!new File(regPath).exists()) {
316 358
				println "Error: The registry file was not created: $regPath. See https://groupes.renater.fr/wiki/txm-users/public/faq"
317 359
				return false;
......
320 362
				println "** cwb-makeall did not ends well. Activate finer logs to see details."
321 363
				return false;
322 364
			}
323

  
365
			
324 366
			// remove milestones from CWB registry and data files
325 367
			FixMilestoneDeclarations fm = new FixMilestoneDeclarations(
326 368
					new File(regPath), new File(outputDirectory.getAbsolutePath(), corpusname));
TXM/trunk/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/xtz/XTZPager.groovy (revision 3407)
236 236
			edition.setIndex(outputDirectory.getAbsolutePath());
237 237

  
238 238
			try {
239
				def ed = new XTZFacsPagerStep(txmFile, newEditionDirectory, imageDirectory, txtname, corpusname, "pb", "facs", wordTag, debug);
239
				def ed = new XTZFacsPagerStep(this, txmFile, newEditionDirectory, imageDirectory, txtname, corpusname, "pb", "facs", wordTag, debug);
240 240
				if (!ed.process()) {
241 241
					println "Fail to build 'facs' edition for text: $txmFile"
242 242
					continue;
TXM/trunk/org.txm.core/src/java/org/txm/importer/xtz/Step.java (revision 3407)
1 1
package org.txm.importer.xtz;
2 2

  
3 3
public class Step {
4
	
5
	protected ImportStep importStep;
4 6

  
5
	public Step() {
6
		// TODO Auto-generated constructor stub
7
	public Step(ImportStep importStep) {
8
		this.importStep = importStep;
7 9
	}
8 10

  
9 11
	public boolean process() {
TXM/trunk/org.txm.rcp/src/main/java/org/txm/rcp/editors/imports/sections/TextualPlansSection.java (revision 3407)
26 26
	private Text noteElementsText;
27 27
	
28 28
	private Text milestoneElementsText;
29

  
30
	private Text projectionsText;
29 31
	
30 32
	public TextualPlansSection(ImportFormEditor editor, FormToolkit toolkit2, ScrolledForm form2, Composite parent, int style) {
31 33
		super(editor, toolkit2, form2, parent, style, "textualplans");
......
82 84
		milestoneElementsText = toolkit.createText(sectionClient, "", SWT.BORDER); //$NON-NLS-1$
83 85
		gdata = getTextGridData();
84 86
		milestoneElementsText.setLayoutData(gdata);
87
		
88
		Label label = toolkit.createLabel(sectionClient, "Projections", SWT.WRAP);
89
		gdata = getLabelGridData();
90
		gdata.colspan = 2;
91
		label.setLayoutData(gdata);
92
		
93
		projectionsText = toolkit.createText(sectionClient, "", SWT.BORDER | SWT.MULTI | SWT.V_SCROLL); //$NON-NLS-1$
94
		gdata = getTextGridData();
95
		gdata.heightHint = 50;
96
		gdata.colspan = 2;
97
		
98
		projectionsText.setLayoutData(gdata);
85 99
	}
86 100
	
87 101
	@Override
......
91 105
			outsideTextElementsToEditText.setText(project.getTextualPlan("OutSideTextTagsAndKeepContent"));
92 106
			noteElementsText.setText(project.getTextualPlan("Note"));
93 107
			milestoneElementsText.setText(project.getTextualPlan("MileStones"));
108
			projectionsText.setText(project.getTextualPlan("Projections"));
94 109
		}
95 110
	}
96 111
	
......
106 121
			
107 122
			project.setTextualPlan("MileStones", milestoneElementsText.getText().trim());
108 123
			
124
			project.setTextualPlan("Projections", projectionsText.getText().trim());
125
			
109 126
			return true;
110 127
		}
111 128
		
TXM/trunk/org.txm.rcp/src/main/java/org/txm/rcp/editors/imports/ImportModuleCustomization.java (revision 3407)
168 168
		params.put(PREBUILD, true);
169 169
		params.put(QUERIES, true);
170 170
		params.put(UI, true);
171
		params.put(TEXTUALPLANS, false);
171
		params.put(TEXTUALPLANS, true);
172 172
		params.put(OPTIONS, true);
173 173
		sectionsPerImportModule.put("transcriberLoader.groovy", params); //$NON-NLS-1$
174 174
		names.put("transcriberLoader.groovy", "XML-TRS + CSV"); //$NON-NLS-1$ //$NON-NLS-2$

Formats disponibles : Unified diff