Révision 2203

tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/tmx/compiler.groovy (revision 2203)
2 2
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
3 3
// Lyon 2, University of Franche-Comté, University of Nice
4 4
// Sophia Antipolis, University of Paris 3.
5
// 
5
//
6 6
// The TXM platform is free software: you can redistribute it
7 7
// and/or modify it under the terms of the GNU General Public
8 8
// License as published by the Free Software Foundation,
9 9
// either version 2 of the License, or (at your option) any
10 10
// later version.
11
// 
11
//
12 12
// The TXM platform is distributed in the hope that it will be
13 13
// useful, but WITHOUT ANY WARRANTY; without even the implied
14 14
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 15
// PURPOSE. See the GNU General Public License for more
16 16
// details.
17
// 
17
//
18 18
// You should have received a copy of the GNU General
19 19
// Public License along with the TXM platform. If not, see
20 20
// http://www.gnu.org/licenses.
......
283 283
								flagAna = true;
284 284
								anaType = parser.getAttributeValue(null, "type")
285 285
								if (anaType.length() > 0) anaType = anaType.substring(1);
286
								//println "anatype $anaType"
286
							//println "anatype $anaType"
287 287
								anahash.put(anaType, "");
288 288
								if (firstWord) {
289 289
									anaTypes << anaType;
......
310 310
							case "w":
311 311
								firstWord = false;
312 312
								output.write( vForm.replaceAll("&", "&amp;").replaceAll("<", "&lt;")
313
									 +"\t"+wordid+"\t"+tuRef);
313
										+"\t"+wordid+"\t"+tuRef);
314 314
								for(String type : anaTypes) {
315 315
									output.write("\t"+anahash.get(type));
316 316
								}
......
342 342
			}
343 343
			output.close();
344 344
			if (parser != null) parser.close();
345
		if (inputData != null) inputData.close();
345
			if (inputData != null) inputData.close();
346 346
		}
347 347
		catch (Exception ex) {
348 348
			System.out.println("Exception while parsing " + inputData);
......
410 410
	{
411 411
		this.langGroups = groups;
412 412
	}
413
	
413

  
414 414
	public static HashMap<Integer, ArrayList<String>> corpusIDS;
415 415
	public void setCorpusIDS(HashMap<Integer, ArrayList<String>> corpusIDS)
416 416
	{
417 417
		this.corpusIDS = corpusIDS;
418 418
	}
419
	
419

  
420 420
	/**
421 421
	 * Run.
422 422
	 *
......
430 430
		sattrsListener = null; // reset SAttribute Listener for each new import
431 431
		String rootDir = binDir.getAbsolutePath();
432 432
		seg_id=1;
433
		
433

  
434 434
		if (!(CwbEncode.isExecutableAvailable() && CwbMakeAll.isExecutableAvailable())) {
435 435
			println ("Error: CWB executables not well set.")
436 436
			return false;
437 437
		}
438
		
438

  
439 439
		new File(binDir,"cqp").mkdirs()
440 440
		new File(binDir,"data").mkdirs()
441 441
		new File(binDir,"registry").mkdirs()
......
458 458
			}
459 459
		}
460 460
		println "Using corpus ID: $corpusIDS"
461
		
461

  
462 462
		String cqpName;
463 463
		for (int group : langGroups.keySet()) {
464 464
			String lang = langs.get(langGroups.get(group)[0]);
......
479 479
		for (int group : langGroups.keySet()) {
480 480
			//String lang = langs.get(langGroups.get(group)[0]);
481 481
			cqpName = basename+"_"+corpusIDS.get(group);
482
			
483
		CorpusBuild corpus = project.getCorpusBuild(project.getName(), MainCorpus.class);
484
		if (corpus != null) {
485
			if (project.getDoUpdate()) {
486
				corpus.clean(); // remove old files
482

  
483
			CorpusBuild corpus = project.getCorpusBuild(cqpName, MainCorpus.class);
484
			if (corpus != null) {
485
				if (project.getDoUpdate()) {
486
					corpus.clean(); // remove old files
487
				} else {
488
					corpus.delete(); // remove old files and TXMResult children
489
				}
487 490
			} else {
488
				corpus.delete(); // remove old files and TXMResult children
491
				corpus = new MainCorpus(project);
492
				corpus.setID(cqpName);
493
				corpus.setName(cqpName);
489 494
			}
490
		} else {
491
			corpus = new MainCorpus(project);
492
			corpus.setID(project.getName());
493
			corpus.setName(project.getName());
494
		}
495 495
			corpus.setDescription("Built with the TMX import module");
496
			
496

  
497 497
			File cqpFile = new File(binDir,"cqp/"+cqpName+".cqp");
498
			
498

  
499 499
			def filenames = langGroups.get(group);
500 500
			filenames.sort()
501 501
			//println("Process group no $group of files "+filenames)
......
524 524
			}
525 525
		}
526 526
		println ""
527
		
527

  
528 528
		for (int group : langGroups.keySet()) {
529 529
			//String lang = langs.get(langGroups.get(group)[0]);
530 530
			createOutput(rootDir+"/cqp", basename+"_"+corpusIDS.get(group)+".cqp");
......
543 543
		def cqpFiles = [:]
544 544
		for (File cqpfile : new File(rootDir, "cqp").listFiles()) {
545 545
			String corpusname = cqpfile.getName();
546
			
546

  
547 547
			corpusname = corpusname.substring(0, corpusname.length()-4);
548 548
			def corpusAnaTypes = anaTypesPerCqp[corpusname];
549
			
549

  
550 550
			CwbEncode cwbEn = new CwbEncode();
551 551
			cwbEn.setDebug(debug);
552 552
			CwbMakeAll cwbMa = new CwbMakeAll();
......
614 614
					if (!corpusName.equals(targetName)) {
615 615
						try {
616 616
							PatchCwbRegistry.patchAlignment(new File(registryDirectory, corpusName.getName()), targetName.getName());
617
							
617

  
618 618
							File cqpFile1 = new File(cqpFiles[corpusName.getName()]);
619 619
							File cqpFile2 = new File(cqpFiles[targetName.getName()]);
620 620
							File alignOutFile = new File(outDir, "align.out");
621
							
621

  
622 622
							BuildAlignOut bao = new BuildAlignOut(cqpFile1, cqpFile2);
623 623
							if (!bao.process(alignOutFile, "seg", "id")) {
624 624
								println "Error while creating alignement file of $corpusName. Aborting."
625 625
								return false;
626 626
							}
627
							
627

  
628 628
							CwbAlign tt = new CwbAlign();
629 629
							// ./cwb-align-encode -D -r ~/TXM/corpora/tmxtest/registry/ -v out.align
630 630
							tt.setD();

Formats disponibles : Unified diff