Révision 803

tmp/org.txm.groovy.core/META-INF/MANIFEST.MF (revision 803)
68 68
 org.txm.importer.bfm,
69 69
 org.txm.importer.bvh,
70 70
 org.txm.importer.corptef,
71
 org.txm.importer.cqp,
71 72
 org.txm.importer.discours,
72 73
 org.txm.importer.doc,
73 74
 org.txm.importer.factiva,
......
83 84
 org.txm.importer.tmx,
84 85
 org.txm.importer.transcriber,
85 86
 org.txm.importer.txt,
86
 org.txm.importer.wtc,
87 87
 org.txm.importer.xml,
88 88
 org.txm.importer.xmltxm,
89 89
 org.txm.importer.xmltxmpara,
tmp/org.txm.groovy.core/src/java/org/txm/groovy/core/InstallGroovyFiles.java (revision 803)
40 40
		publishedImports.put(IMPORTERPACKAGE+"transcriber","transcriberLoader.groovy");
41 41
		publishedImports.put(IMPORTERPACKAGE+"tmx","tmxLoader.groovy");
42 42
		publishedImports.put(IMPORTERPACKAGE+"doc","docLoader.groovy");
43
		publishedImports.put(IMPORTERPACKAGE+"wtc","wtcLoader.groovy");
43
		publishedImports.put(IMPORTERPACKAGE+"cqp","cqpLoader.groovy");
44 44
		
45 45
		String bundle_id = "org.txm.groovy.core";
46 46
		for (String p : publishedImports.keySet()) {
tmp/org.txm.groovy.core/src/groovy/org/txm/macroproto/RunImportMacro.groovy (revision 803)
39 39
scripts["CNRCSV"] = "discoursLoader.groovy"
40 40
scripts["HYPERBASE"] = "hyperbaseLoader.groovy"
41 41
scripts["ALCESTE"] = "alcesteLoader.groovy"
42
scripts["CWB"] = "wtcLoader.groovy"
42
scripts["CWB"] = "cqpLoader.groovy"
43 43
scripts["DOC"] = "docLoader.groovy"
44 44

  
45 45
def scriptName = scripts.get(importName)
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/testTT.groovy (revision 803)
78 78

  
79 79
//Import to CWB
80 80
BuildXmlRGAQCJ.process( anafiles,  rootDir)
81
println("build wtc RGAQCJ");
81
println("build cqp RGAQCJ");
82 82
println("encode + makeall");
83 83

  
84 84
//create TT TRAIN files
tmp/org.txm.groovy.core/src/groovy/org/txm/importer/tmx/compiler.groovy (revision 803)
159 159
	private boolean createOutput(String dirPathName, String fileName){
160 160
		try {
161 161
			File f = new File(dirPathName, fileName)
162
			//We don't want to concatenate the wtc files
162
			//We don't want to concatenate the cqp files
163 163
			output = new OutputStreamWriter(new FileOutputStream(f, f.exists()) , "UTF-8");
164 164
			return true;
165 165
		} catch (Exception e) {
......
183 183
	}
184 184

  
185 185
	/**
186
	 * Transfom file wtc.
186
	 * Transfom file cqp.
187 187
	 *
188 188
	 * @param dirPathName the dir path name
189 189
	 * @param fileName the file name
......
439 439
			return false;
440 440
		}
441 441

  
442
		new File(binDir,"/wtc/").deleteDir();
443
		new File(binDir,"/wtc/").mkdir();
442
		new File(binDir,"/cqp/").deleteDir();
443
		new File(binDir,"/cqp/").mkdir();
444 444
		new File(binDir,"/data/").deleteDir();
445 445
		new File(binDir,"/data/").mkdir();
446 446
		new File(binDir,"registry/").mkdir();
......
464 464
		}
465 465
		println "Using corpus ID: $corpusIDS"
466 466
		
467
		String wtcName;
467
		String cqpName;
468 468
		for (int group : langGroups.keySet()) {
469 469
			String lang = langs.get(langGroups.get(group)[0]);
470 470
			segs_id.put(group, 0);
471
			wtcName = basename+"_"+corpusIDS.get(group);
472
			createOutput(rootDir+"/wtc", "${wtcName}.wtc");
473
			output.write("<txmcorpus id=\"${wtcName}\" lang=\"$lang\">\n")
471
			cqpName = basename+"_"+corpusIDS.get(group);
472
			createOutput(rootDir+"/cqp", "${cqpName}.cqp");
473
			output.write("<txmcorpus id=\"${cqpName}\" lang=\"$lang\">\n")
474 474
			output.close();
475 475

  
476 476
			//create txmDirs
477
			new File(rootDir, "txm/"+wtcName.toUpperCase()).mkdir();
477
			new File(rootDir, "txm/"+cqpName.toUpperCase()).mkdir();
478 478
		}
479 479

  
480 480
		//1- Transform into WTC file and build a corpus per file
......
483 483
		def builder = null;
484 484
		for (int group : langGroups.keySet()) {
485 485
			//String lang = langs.get(langGroups.get(group)[0]);
486
			wtcName = basename+"_"+corpusIDS.get(group);
486
			cqpName = basename+"_"+corpusIDS.get(group);
487 487
			def filenames = langGroups.get(group);
488 488
			filenames.sort()
489 489
			//println("Process group no $group of files "+filenames)
......
496 496
				}
497 497

  
498 498
				filename = filename.substring(0, filename.length()-4);
499
				String corpusname = (wtcName).toLowerCase();
499
				String corpusname = (cqpName).toLowerCase();
500 500
				counttext++;
501 501

  
502 502
				String txtname = f.getName();
503 503
				txtname = txtname.substring(0, txtname.lastIndexOf("_"));
504 504
				seg_id = segs_id.get(group);
505 505
				builder = new compiler(f.toURI().toURL(), txtname, basename, "default", tuprops);
506
				builder.transfomFileWtc(rootDir+"/wtc",wtcName+".wtc");
507
				anaTypesPerWtc[wtcName] = builder.getAnaTypes();
506
				builder.transfomFileWtc(rootDir+"/cqp",cqpName+".cqp");
507
				anaTypesPerWtc[cqpName] = builder.getAnaTypes();
508 508
				segs_id.put(group, seg_id);
509 509

  
510 510
				// move xml-txm file
511
				File txmCorpusDir = new File(rootDir, "txm/"+wtcName.toUpperCase())
511
				File txmCorpusDir = new File(rootDir, "txm/"+cqpName.toUpperCase())
512 512
				f.renameTo(new File(txmCorpusDir, f.getName()));
513 513
			}
514 514
		}
......
516 516
		
517 517
		for (int group : langGroups.keySet()) {
518 518
			//String lang = langs.get(langGroups.get(group)[0]);
519
			createOutput(rootDir+"/wtc", basename+"_"+corpusIDS.get(group)+".wtc");
519
			createOutput(rootDir+"/cqp", basename+"_"+corpusIDS.get(group)+".cqp");
520 520
			output.write("</txmcorpus>")
521 521
			output.close();
522 522
		}
......
529 529
		def outDir = rootDir;
530 530
		def outDirTxm = rootDir;
531 531

  
532
		def wtcFiles = [:]
533
		for (File wtcfile : new File(rootDir, "wtc").listFiles()) {
534
			String corpusname = wtcfile.getName();
532
		def cqpFiles = [:]
533
		for (File cqpfile : new File(rootDir, "cqp").listFiles()) {
534
			String corpusname = cqpfile.getName();
535 535
			
536 536
			corpusname = corpusname.substring(0, corpusname.length()-4);
537 537
			def corpusAnaTypes = anaTypesPerWtc[corpusname];
......
583 583
			println "S-attributes: "+sargs
584 584

  
585 585
			try {
586
				wtcFiles[corpusname.toLowerCase()] = outDir + "/wtc/"+wtcfile.getName();
586
				cqpFiles[corpusname.toLowerCase()] = outDir + "/cqp/"+cqpfile.getName();
587 587
				String regPath = outDirTxm + "/registry/"+corpusname.toLowerCase()
588
				cwbEn.run(outDirTxm + "/data/"+corpusname.toUpperCase()+"/", outDir + "/wtc/"+wtcfile.getName(), regPath, pAttributes, sAttributes);
588
				cwbEn.run(outDirTxm + "/data/"+corpusname.toUpperCase()+"/", outDir + "/cqp/"+cqpfile.getName(), regPath, pAttributes, sAttributes);
589 589
				if (!new File(regPath).exists()) {
590 590
					println "Error: The registry file was not created: $regPath. See https://groupes.renater.fr/wiki/txm-users/public/faq"
591 591
					return false;
......
604 604
						try {
605 605
							PatchCwbRegistry.patchAlignment(new File(registryDirectory, corpusName.getName()), targetName.getName());
606 606
							
607
							File wtcFile1 = new File(wtcFiles[corpusName.getName()]);
608
							File wtcFile2 = new File(wtcFiles[targetName.getName()]);
607
							File cqpFile1 = new File(cqpFiles[corpusName.getName()]);
608
							File cqpFile2 = new File(cqpFiles[targetName.getName()]);
609 609
							File alignOutFile = new File(outDir, "align.out");
610 610
							
611
							BuildAlignOut bao = new BuildAlignOut(wtcFile1, wtcFile2);
611
							BuildAlignOut bao = new BuildAlignOut(cqpFile1, cqpFile2);
612 612
							if (!bao.process(alignOutFile, "seg", "id")) {
613 613
								println "Error while creating alignement file of $corpusName. Aborting."
614 614
								return false;
tmp/org.txm.groovy.core/src/groovy/org/txm/importer/bvh/compiler.groovy (revision 803)
162 162
	}
163 163
	
164 164
	/**
165
	 * Transfom file wtc.
165
	 * Transfom file cqp.
166 166
	 *
167 167
	 * @param dirPathName the dir path name
168 168
	 * @param fileName the file name
......
545 545
			println ("binary directory does not exists: "+rootDir)
546 546
			return false;
547 547
		}
548
		new File(rootDir+"/wtc/","bvh.wtc").delete();//cleaning&preparing
549
		new File(rootDir,"/wtc/").deleteDir();
550
		new File(rootDir,"/wtc/").mkdir();
548
		new File(rootDir+"/cqp/","bvh.cqp").delete();//cleaning&preparing
549
		new File(rootDir,"/cqp/").deleteDir();
550
		new File(rootDir,"/cqp/").mkdir();
551 551
		new File(rootDir,"/data/").deleteDir();
552 552
		new File(rootDir,"/data/").mkdir();
553 553
		new File(rootDir,"registry/").mkdir();
......
570 570
				String txtname = f.getName().substring(0,f.getName().length()-4);
571 571
				def builder = new compiler(f.toURL(),txtname, "bvh", "default");
572 572
				builder.setLang lang
573
				builder.transfomFileWtc(rootDir+"/wtc","bvh.wtc");
573
				builder.transfomFileWtc(rootDir+"/cqp","bvh.cqp");
574 574
			}
575 575
		}
576 576
		
......
586 586
		String[] sAttributes = ["txmcorpus:0+lang", "text:2+id+type+base+project","div:2+id+type","p:0+id","q:0+id","said:0+id","foreign:0+id","lg:0+id","l:0+id","sp:0+id","speaker:0+id","stage:0+id"];
587 587
		try {
588 588
			String regPath = outDirTxm + "/registry/"+"bvh"
589
			cwbEn.run(outDirTxm + "/data", outDir + "/wtc/"+"bvh.wtc", regPath, pAttributes, sAttributes);
589
			cwbEn.run(outDirTxm + "/data", outDir + "/cqp/"+"bvh.cqp", regPath, pAttributes, sAttributes);
590 590
			if (!new File(regPath).exists()) {
591 591
				println "Error: The registry file was not created: $regPath. See https://groupes.renater.fr/wiki/txm-users/public/faq"
592 592
				return false;
tmp/org.txm.groovy.core/src/groovy/org/txm/importer/discours/compiler.groovy (revision 803)
180 180
			println ("Error: CWB executables not well set.")
181 181
			return false;
182 182
		}
183
		File wtcFile = new File(binDir,"wtc/${corpusname}.wtc");
184
		new File(binDir, "wtc.").deleteDir();
185
		new File(binDir, "wtc").mkdirs();
183
		File cqpFile = new File(binDir,"cqp/${corpusname}.cqp");
184
		new File(binDir, "cqp.").deleteDir();
185
		new File(binDir, "cqp").mkdirs();
186 186
		new File(binDir, "data/$corpusname").deleteDir();
187 187
		new File(binDir, "data/$corpusname").mkdir();
188 188
		new File(binDir, "registry").deleteDir();
189 189
		new File(binDir, "registry").mkdir();
190 190

  
191 191
		//start corpus
192
		if (createOutput(wtcFile)) {
192
		if (createOutput(cqpFile)) {
193 193
			output.write("<txmcorpus lang=\""+lang+"\">\n");
194 194
			output.close();
195 195
		}
......
198 198
		//1- Transform into WTC file
199 199

  
200 200
		Collections.sort(files);
201
		XMLTXM2WTC wtcbuilder = null;
201
		XMLTXM2WTC cqpbuilder = null;
202 202
		for (File f : files) {
203 203
			if (second) { print(", ") }
204 204
			if (second > 0 && (second % 5) == 0) println ""
205 205
			print(f.getName().replaceFirst("\\.xml", ""));
206 206
			second++
207 207

  
208
			wtcbuilder = new XMLTXM2WTC(f.toURI().toURL());
208
			cqpbuilder = new XMLTXM2WTC(f.toURI().toURL());
209 209
			String txtname = f.getName().substring(0,f.getName().length()-4);
210
			wtcbuilder.setTextInfo(txtname, corpusname, "project");
210
			cqpbuilder.setTextInfo(txtname, corpusname, "project");
211 211

  
212
			wtcbuilder.setBalisesToKeep(["text","p","s"]);
213
			wtcbuilder.setSendToPAttributes(["s":["id"], "p":["id"]]);
214
			wtcbuilder.setLang(lang);
215
			if (!wtcbuilder.transformFile(wtcFile)) {
212
			cqpbuilder.setBalisesToKeep(["text","p","s"]);
213
			cqpbuilder.setSendToPAttributes(["s":["id"], "p":["id"]]);
214
			cqpbuilder.setLang(lang);
215
			if (!cqpbuilder.transformFile(cqpFile)) {
216 216
				println("Failed to compile "+f)
217 217
			}
218 218

  
219 219
		}
220 220

  
221 221
		//end corpus
222
		if (createOutput(wtcFile)) {
222
		if (createOutput(cqpFile)) {
223 223
			output.write("</txmcorpus>\n");
224 224
			output.close();
225 225
		}
226 226

  
227
		if (wtcbuilder == null) {
227
		if (cqpbuilder == null) {
228 228
			println "there was no files to process: "+files
229 229
			return false;
230 230
		}
......
235 235
		CwbEncode cwbEn = new CwbEncode();
236 236
		CwbMakeAll cwbMa = new CwbMakeAll();
237 237

  
238
		List<String> pAttributesList = wtcbuilder.getpAttributs();
239
		List<String> sAttributesList = wtcbuilder.getsAttributs();
238
		List<String> pAttributesList = cqpbuilder.getpAttributs();
239
		List<String> sAttributesList = cqpbuilder.getsAttributs();
240 240
		println "pAttrs : "+pAttributesList
241 241
		println "sAttrs : "+sAttributesList
242 242
		String[] pAttributes = pAttributesList.toArray(new String[pAttributesList.size()])
......
248 248
			String regPath = outDir + "registry/"+corpusname.toLowerCase()
249 249
			cwbEn.run(
250 250
					outDir + "data/$corpusname",
251
					outDir + "/wtc/"+corpusname+".wtc",
251
					outDir + "/cqp/"+corpusname+".cqp",
252 252
					regPath,
253 253
					pAttributes,
254 254
					sAttributes);
tmp/org.txm.groovy.core/src/groovy/org/txm/importer/RGAQCJ/compiler.groovy (revision 803)
161 161
	}
162 162
	
163 163
	/**
164
	 * Transfom file wtc.
164
	 * Transfom file cqp.
165 165
	 *
166 166
	 * @param dirPathName the dir path name
167 167
	 * @param fileName the file name
......
516 516
			println ("binary directory does not exists: "+rootDir)
517 517
			return false;
518 518
		}
519
		new File(rootDir+"/wtc/","bfm3tt.wtc").delete();//cleaning&preparing
520
		new File(rootDir,"/wtc/").deleteDir();
521
		new File(rootDir,"/wtc/").mkdir();
519
		new File(rootDir+"/cqp/","bfm3tt.cqp").delete();//cleaning&preparing
520
		new File(rootDir,"/cqp/").deleteDir();
521
		new File(rootDir,"/cqp/").mkdir();
522 522
		new File(rootDir,"/data/").deleteDir();
523 523
		new File(rootDir,"/data/").mkdir();
524 524
		new File(rootDir,"registry/").mkdir();
......
539 539
				println("process file "+f)
540 540
				def builder = new compiler(f.toURL(),f.getName(),"bfm3tt","default");
541 541
				builder.setLang(lang);
542
				builder.transfomFileWtc(rootDir+"/wtc","bfm3tt.wtc");
542
				builder.transfomFileWtc(rootDir+"/cqp","bfm3tt.cqp");
543 543
			}
544 544
		}
545 545
		
......
554 554
		 String[] sAttributes = ["txmcorpus:0+lang","text:0+id+titre+auteur+datecompo+ssiecle+domaine+genre+forme+dialecte+base+project","front:0+n","body:0+n","ab:0+n+rend","div:0+id+type","q:1+n","p:0+n","back:0+n"];
555 555
		 try
556 556
		 {
557
		 cwbEn.run(outDirTxm + "/data", outDir + "/wtc/"+"bfm3tt.wtc", outDirTxm + "/registry/"+"bfm3tt",pAttributes, sAttributes);
557
		 cwbEn.run(outDirTxm + "/data", outDir + "/cqp/"+"bfm3tt.cqp", outDirTxm + "/registry/"+"bfm3tt",pAttributes, sAttributes);
558 558
		 cwbMa.run("BFM3TT", outDirTxm + "/registry");
559 559
		 } catch (Exception ex) {System.out.println(ex); return false;}
560 560
		 
tmp/org.txm.groovy.core/src/groovy/org/txm/importer/RGAQCJ/importRGAQCJ.groovy (revision 803)
169 169
	} 
170 170
	
171 171
	/**
172
	 * Transfom file wtc.
172
	 * Transfom file cqp.
173 173
	 *
174 174
	 * @param dirPathName the dir path name
175 175
	 * @param fileName the file name
......
345 345
	 */
346 346
	public static void process(List<String> files, String rootDir) 
347 347
	{
348
		new File(rootDir+"wtc/").mkdir();
348
		new File(rootDir+"cqp/").mkdir();
349 349
		new File(rootDir+"registry/").mkdir();
350 350
		//String[] files = ["roland-ana.xml","qgraal_cm-ana.xml","artu-ana.xml","qjm-ana.xml","commyn1-ana.xml","jehpar-ana.xml"];
351 351
		//String rootDir = "~/xml/rgaqcj/";
......
353 353
		println(types);
354 354
		
355 355
		//1- Transform into WTC file
356
		File f = new File(rootDir+"wtc/","RGAQCJ.wtc");
356
		File f = new File(rootDir+"cqp/","RGAQCJ.cqp");
357 357
		f.delete();
358 358
		
359 359
		def output = new OutputStreamWriter(new FileOutputStream(f) , "UTF-8");
......
367 367
			println("process file "+files[i])
368 368
			String file = files[i]; 
369 369
			def builder = new BuildXmlRGAQCJ(new File(rootDir+"/anainline/",file).toURL(),""+initiales.charAt(i),types);
370
			builder.transfomFileWtc(rootDir+"wtc","RGAQCJ.wtc");
370
			builder.transfomFileWtc(rootDir+"cqp","RGAQCJ.cqp");
371 371
		}
372 372
		
373 373
		output = new OutputStreamWriter(new FileOutputStream(f,true) , "UTF-8");
......
391 391
		{
392 392
			if (System.getProperty("os.name").contains("Windows"))
393 393
			{
394
				cwbEn.run(outDirTxm + "data/"+"RGAQCJ", outDir +"wtc/"+"RGAQCJ"+".wtc", outDirTxm + "registry/"+"rgaqcj",pAttributes, sAttributes);
394
				cwbEn.run(outDirTxm + "data/"+"RGAQCJ", outDir +"cqp/"+"RGAQCJ"+".cqp", outDirTxm + "registry/"+"rgaqcj",pAttributes, sAttributes);
395 395
				cwbMa.run("RGAQCJ", outDirTxm + "registry");
396 396
			}
397 397
			else
398 398
			{
399
				cwbEn.run(outDirTxm + "data/"+"RGAQCJ", outDir + "/wtc/"+"RGAQCJ.wtc", outDirTxm + "registry/"+"rgaqcj",pAttributes, sAttributes);
399
				cwbEn.run(outDirTxm + "data/"+"RGAQCJ", outDir + "/cqp/"+"RGAQCJ.cqp", outDirTxm + "registry/"+"rgaqcj",pAttributes, sAttributes);
400 400
				cwbMa.run("RGAQCJ", outDirTxm + "registry");
401 401

  
402 402
			}
tmp/org.txm.groovy.core/src/groovy/org/txm/importer/ImportXmlTag.groovy (revision 803)
179 179
	public static void main(String[] args) {
180 180

  
181 181
		String rootDir = "~/xml/rgaqcj/";
182
		new File(rootDir+"/wtc/").mkdir();
182
		new File(rootDir+"/cqp/").mkdir();
183 183
		new File(rootDir,"/registry/").mkdir();
184 184

  
185 185
		File srcfile = new File(rootDir+"/src/","roland.xml");
186
		File resultfile = new File(rootDir+"/wtc/","roland-xmltag.wtc");
186
		File resultfile = new File(rootDir+"/cqp/","roland-xmltag.cqp");
187 187
		println("importXmlTag : "+srcfile+" to : "+resultfile );
188 188

  
189 189
		def builder = new ImportXmlTag(srcfile.toURL());
190 190
		builder.process(resultfile);
191 191

  
192 192
		def inDir = rootDir;
193
		def outDir =rootDir+"wtc/";
193
		def outDir =rootDir+"cqp/";
194 194
		def outDirTxm = rootDir;
195 195
		CwbEncode cwbEn = new CwbEncode();
196 196
		CwbMakeAll cwbMa = new CwbMakeAll();
......
199 199
		String[] sAttributes = ["children:"+builder.maxprof+"+n"];
200 200

  
201 201
		try {
202
			cwbEn.run(outDirTxm + "data/"+"ROLANDXML", outDir +"roland-xmltag.wtc", outDirTxm + "registry/"+"rolandxml",pAttributes, sAttributes);
202
			cwbEn.run(outDirTxm + "data/"+"ROLANDXML", outDir +"roland-xmltag.cqp", outDirTxm + "registry/"+"rolandxml",pAttributes, sAttributes);
203 203
			cwbMa.run("ROLANDXML", outDirTxm + "registry");
204 204
		} catch (Exception ex) {System.out.println(ex);}
205 205

  
tmp/org.txm.groovy.core/src/groovy/org/txm/importer/cqp/compiler.groovy (revision 803)
1

  
2

  
3
// Copyright © 2010-2013 ENS de Lyon.
4
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
5
// Lyon 2, University of Franche-Comté, University of Nice
6
// Sophia Antipolis, University of Paris 3.
7
//
8
// The TXM platform is free software: you can redistribute it
9
// and/or modify it under the terms of the GNU General Public
10
// License as published by the Free Software Foundation,
11
// either version 2 of the License, or (at your option) any
12
// later version.
13
//
14
// The TXM platform is distributed in the hope that it will be
15
// useful, but WITHOUT ANY WARRANTY; without even the implied
16
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
17
// PURPOSE. See the GNU General Public License for more
18
// details.
19
//
20
// You should have received a copy of the GNU General
21
// Public License along with the TXM platform. If not, see
22
// http://www.gnu.org/licenses.
23
//
24
//
25
//
26
// $LastChangedDate: 2012-01-05 14:27:34 +0100 (jeu., 05 janv. 2012) $
27
// $LastChangedRevision: 2096 $
28
// $LastChangedBy: mdecorde $
29
//
30
package org.txm.importer.cqp
31

  
32
import org.txm.Toolbox;
33
import org.txm.importer.cwb.*
34
import org.txm.importer.*;
35
import org.txm.scripts.*;
36
import org.txm.scripts.teitxm.*;
37
import org.txm.utils.io.FileCopy;
38
import org.txm.utils.treetagger.TreeTagger;
39
import java.util.ArrayList;
40
import javax.xml.stream.*;
41
import java.net.URL;
42
import java.io.File;
43
import java.util.HashMap;
44
import java.util.List;
45

  
46
// TODO: Auto-generated Javadoc
47
/**
48
 * The Class compiler.
49
 */
50
class compiler
51
{
52
	/** The debug. */
53
	boolean debug = false;
54

  
55
	/** The dir. */
56
	private def dir;
57

  
58
	File srcWTCFile, srcRegistryFile;
59

  
60
	public def pAttributesList = [];
61
	public def sAttributesList = [];
62

  
63
	public compiler(File cqpFile, File registryFile) {
64
		this.srcWTCFile = cqpFile;
65
		this.srcRegistryFile = registryFile
66
	}
67

  
68
	/**
69
	 * Sets the debug.
70
	 *
71
	 * @return the java.lang. object
72
	 */
73
	public setDebug()
74
	{
75
		debug =true;
76
	}
77

  
78
	/**
79
	 * Run.
80
	 *
81
	 * @param rootDirFile the root dir file
82
	 * @param basename the basename
83
	 * @return true, if successful
84
	 */
85
	public boolean run(File binDir, String corpusname)
86
	{
87
		if (!(CwbEncode.isExecutableAvailable() && CwbMakeAll.isExecutableAvailable())) {
88
			println ("Error: CWB executables not well set.")
89
			return false;
90
		}
91
		if (!binDir.exists()) {
92
			println ("binary directory does not exists: "+binDir)
93
			return false;
94
		}
95

  
96
		if (!srcWTCFile.exists()) {
97
			println "Error: cannot find the WTC file $srcWTCFile"
98
			return false;
99
		}
100

  
101
		File cqpFile = new File(binDir,"cqp/"+corpusname+".cqp");
102
		new File(binDir, "cqp").deleteDir();
103
		new File(binDir, "cqp").mkdir();
104
		new File(binDir, "data").deleteDir();
105
		new File(binDir, "data").mkdir();
106
		new File(binDir, "registry").mkdir();
107

  
108
		FileCopy.copy(srcWTCFile, cqpFile);
109

  
110
		//2- Import into CWB
111
		def outDir = binDir.getAbsolutePath()+"/";
112

  
113
		CwbEncode cwbEn = new CwbEncode();
114
		CwbMakeAll cwbMa = new CwbMakeAll();
115
		cwbEn.setDebug(debug);
116
		cwbMa.setDebug(debug);
117

  
118
		String[] pAttributes;
119
		String[] sAttributes;
120

  
121
		if (srcRegistryFile == null) {
122
			println "WARNING: No registry file found in source directory"
123
			println "We'll search for positional attributes and structural attributes in the WTC file"
124

  
125
			// s attributes
126
			BuildCwbEncodeArgsFromWTC argsgetter = new BuildCwbEncodeArgsFromWTC(); // XML stream
127
			argsgetter.process(cqpFile); //$NON-NLS-1$
128
			sAttributesList = argsgetter.getSAttributes();
129

  
130
			// p attributes
131
			int nbAttr = -1;
132
			File tmp = File.createTempFile("txm", ".cqp", cqpFile.getParentFile());
133
			int wcounter = 1;
134
			println "Adding the 'id' property to the WTC file and getting word properties number."
135
			tmp.withWriter("UTF-8") { writer ->
136
				cqpFile.eachLine("UTF-8") { line ->
137
					if (!line.startsWith("<")) {
138
						if (nbAttr == -1) nbAttr = line.split("\t").size();
139
						writer.println(line+"\tw_"+(wcounter++))
140
					} else {
141
						writer.println(line)
142
					}
143
					writer.flush();
144
				}
145
			}
146
			cqpFile.delete()
147
			tmp.renameTo(cqpFile)
148

  
149
			System.out.println("Found "+(nbAttr-1)+" word properties, $nbAttr with the 'id'");
150
			for (int i = 1; i < nbAttr ; i++) {
151
				pAttributesList << "p$i";
152
			}
153
			pAttributesList << "id"
154
		} else {
155
			ReadRegistryFile reader = new ReadRegistryFile(srcRegistryFile);
156
			pAttributesList = reader.getPAttributes();
157
			sAttributesList = reader.getSAttributes();
158
			pAttributesList.remove(0) // remove word
159

  
160
			if (!pAttributesList.contains("id")) {
161
				System.out.println("Error: The registry file does not declare the 'id' word property");
162
				return false;
163
			}
164
		}
165

  
166
		pAttributes = pAttributesList; // cast to array
167
		sAttributes = sAttributesList; // cast to array
168

  
169
		println "pAttrs : "+Arrays.toString(pAttributes)
170
		println "sAttrs : "+Arrays.toString(sAttributes)
171

  
172
		try {
173
			cwbEn.setDebug(debug);
174
			cwbMa.setDebug(debug);
175
			String regPath =outDir + "/registry/"+corpusname.toLowerCase()
176
			cwbEn.run(outDir + "/data/$corpusname", 
177
					cqpFile.getAbsolutePath(),
178
					regPath, pAttributes, sAttributes);
179
			if (!new File(regPath).exists()) {
180
				println "Error: The registry file was not created: $regPath. See https://groupes.renater.fr/wiki/txm-users/public/faq"
181
				return false;
182
			}
183
			cwbMa.run(corpusname, outDir + "/registry");
184
			return true;
185
		} catch (Exception ex) {System.out.println(ex); return false;}
186

  
187
		return true;
188
	}
189

  
190
	/**
191
	 * The main method.
192
	 *
193
	 * @param args the arguments
194
	 */
195
	public static void main(String[] args)
196
	{
197
		File dir = new File("~/xml/perrault/txm/");
198
		List<File> files = dir.listFiles();
199
		new compiler().run(files);
200
	}
201
}
tmp/org.txm.groovy.core/src/groovy/org/txm/importer/cqp/cqpLoader.groovy (revision 803)
1
package org.txm.importer.cqp
2

  
3
import org.txm.importer.cwb.BuildCwbEncodeArgs;
4
import java.io.File;
5
import java.util.ArrayList;
6

  
7
import org.txm.objects.*;
8
import org.txm.scripts.teitxm.*;
9
import org.txm.*;
10
import org.txm.utils.i18n.*;
11
import org.txm.metadatas.*;
12
import org.txm.utils.io.FileCopy;
13
import org.w3c.dom.Element
14
import org.txm.utils.xml.DomUtils;
15

  
16
String userDir = System.getProperty("user.home");
17

  
18
def MONITOR;
19
boolean debug = org.txm.utils.logger.Log.isPrintingErrors();
20
BaseParameters params;
21
try {params = paramsBinding;MONITOR=monitor} catch (Exception)
22
{	println "DEV MODE";//exception means we debug
23
	debug = true
24
	params = new BaseParameters(new File(userDir, "xml/cqp/import.xml"))
25
	params.load()
26
	if (!org.txm.Toolbox.isInitialized()) {
27
		Toolbox.setParam(Toolbox.INSTALL_DIR,new File("/usr/lib/TXM"));
28
		//Toolbox.setParam(Toolbox.INSTALL_DIR,new File("C:\\Program Files\\TXM"));//For Windows
29
		Toolbox.setParam(Toolbox.TREETAGGER_INSTALL_PATH,new File(userDir,"treetagger"));
30
		//Toolbox.setParam(Toolbox.TREETAGGER_INSTALL_PATH,new File("C:\\Program Files\\treetagger"));//for Windows
31
		Toolbox.setParam(Toolbox.TREETAGGER_MODELS_PATH,new File(userDir,"treetagger/models"));
32
		Toolbox.setParam(Toolbox.METADATA_ENCODING, "UTF-8");
33
		Toolbox.setParam(Toolbox.METADATA_COLSEPARATOR, ",");
34
		Toolbox.setParam(Toolbox.METADATA_TXTSEPARATOR, "\"");
35
		//Toolbox.setParam(Toolbox.TREETAGGER_MODELS_PATH,new File("C:\\Program Files\\treetagger\\models"));//for Windows
36
		Toolbox.setParam(Toolbox.USER_TXM_HOME, new File(System.getProperty("user.home"), "TXM"));
37
	}
38
}
39
if (params == null) { println "no parameters. Aborting"; return; }
40

  
41
String corpusname = params.getCorpusName();
42
Element corpusElem = params.corpora.get(corpusname);
43
String basename = params.name;
44
String rootDir = params.rootDir;
45
String lang = corpusElem.getAttribute("lang");
46
String model = lang
47
String encoding = corpusElem.getAttribute("encoding");
48
boolean annotate = "true" == corpusElem.getAttribute("annotate");
49
String xsl = params.getXsltElement(corpusElem).getAttribute("xsl")
50
def xslParams = params.getXsltParams(corpusElem);
51
int wordsPerPage = params.getWordsPerPage("default")
52
boolean build_edition = params.getDoEdition("default")
53

  
54
File srcDir = new File(rootDir);
55
File binDir = new File(Toolbox.getTXMHOMEPATH(),"corpora/"+basename);
56
binDir.deleteDir();
57
binDir.mkdirs();
58
if (!binDir.exists()) {
59
	println "Could not create binDir "+binDir
60
	return;
61
}
62

  
63
// FAKE: will contains no XML-TXM files
64
File txmDir = new File(binDir, "txm/$corpusname");
65
txmDir.deleteDir();
66
txmDir.mkdirs();
67

  
68
// Will contains the edition files
69
File htmlDir = new File(binDir, "HTML/$corpusname");
70
htmlDir.deleteDir();
71
htmlDir.mkdirs();
72

  
73
// get the cqp file and the registry file
74
File cqpFile;
75
File registryFile;
76
println "Looking for files to process..."
77
for(File f : srcDir.listFiles()) {
78
	if (f.isHidden()) continue;
79
	if (f.isDirectory()) continue;
80
	if (f.getName().endsWith(".cqp")) {
81
		if (cqpFile == null) {
82
			cqpFile = f;
83
			println "Using WTC file : $cqpFile"
84
		}
85
	} else if (f.getName().endsWith(".xml")) {
86

  
87
	} else if (!f.getName().contains(".")){ // its the registry file
88
		if (registryFile == null) {
89
			registryFile = f;
90
			println "Using registry file : $f"
91
		}
92
	}
93
}
94

  
95
if (cqpFile == null) {
96
	println "No WTC file in source directory: "+srcDir;
97
	return false;
98
}
99

  
100
// get pAttributes an run cwb-encode + cwb-makeall
101
if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); }
102
if (MONITOR != null) MONITOR.worked(25, "COMPILING")
103
println "-- COMPILING - Building Search Engine indexes"
104
def c = new compiler(cqpFile, registryFile)
105
c.setDebug(debug)
106
if (!c.run(binDir, corpusname)) {
107
	println "Import stopped"
108
	return;
109
}
110

  
111
if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); }
112

  
113
new File(binDir,"HTML/$corpusname").deleteDir();
114
new File(binDir,"HTML/$corpusname").mkdirs();
115
if (build_edition) {
116
	
117
	println "-- EDITION - Building edition"
118
	if (MONITOR != null) MONITOR.worked(20, "EDITION - Building edition")
119

  
120
	File outdir = new File(binDir,"/HTML/$corpusname/default/");
121
	outdir.mkdirs();
122

  
123
	print "."
124

  
125
	List<String> NoSpaceBefore = LangFormater.getNoSpaceBefore(lang);
126
	List<String> NoSpaceAfter = LangFormater.getNoSpaceAfter(lang);
127

  
128
	File binWtcFile = new File(binDir,"cqp/"+corpusname+".cqp");
129
	def ed = new pager(binWtcFile, outdir, NoSpaceBefore, NoSpaceAfter, wordsPerPage, basename, registryFile !=null, c.pAttributesList);
130
	def allIndexes = ed.getIdx();
131
	def allPages = ed.getPageFiles();
132
	for (String txtname : ed.getTextNames()) {
133
		def txtIndexes = allIndexes[txtname];
134
		def txtPages = allPages[txtname];
135

  
136
		Element text = params.addText(params.corpora.get(corpusname), txtname, cqpFile);
137
		Element edition = params.addEdition(text, "default", outdir.getAbsolutePath(), "html");
138

  
139
		for (i = 0 ; i < txtPages.size();) {
140
			File f = txtPages.get(i);
141
			String wordid = txtIndexes.get(i);
142
			params.addPage(edition, ""+(++i), wordid);
143
		}
144
	}
145
}
146
println ""
147

  
148
if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); }
149
if (MONITOR != null) MONITOR.worked(20, "FINALIZING")
150
File paramFile = new File(binDir, "import.xml");
151
DomUtils.save(params.root.getOwnerDocument(), paramFile);readyToLoad = true;
tmp/org.txm.groovy.core/src/groovy/org/txm/importer/cqp/package.html (revision 803)
1
<html>
2
<body>
3
<p>CWB import module.</p>
4
</body>
5
</html>
0 6

  
tmp/org.txm.groovy.core/src/groovy/org/txm/importer/cqp/pager.groovy (revision 803)
1
package org.txm.importer.cqp;
2
//Copyright © - ANR Textométrie - http://textometrie.ens-lyon.fr
3
//
4
//This file is part of the TXM platform.
5
//
6
//The TXM platform is free software: you can redistribute it and/or modif y
7
//it under the terms of the GNU General Public License as published by
8
//the Free Software Foundation, either version 3 of the License, or
9
//(at your option) any later version.
10
//
11
//The TXM platform is distributed in the hope that it will be useful,
12
//but WITHOUT ANY WARRANTY; without even the implied warranty of
13
//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
//GNU General Public License for more details.
15
//
16
//You should have received a copy of the GNU General Public License
17
//along with the TXM platform.  If not, see <http://www.gnu.org/licenses/>.
18
//
19
//
20
//
21
//$LastChangedDate: 2012-06-01 17:47:31 +0200 (ven., 01 juin 2012) $
22
//$LastChangedRevision: 2185 $
23
//$LastChangedBy: mdecorde $
24
//
25

  
26
import org.txm.importer.*;
27
import org.xml.sax.Attributes;
28
import org.txm.importer.filters.*;
29
import java.io.File;
30
import java.io.IOException;
31
import java.util.ArrayList;
32
import javax.xml.parsers.SAXParserFactory;
33
import javax.xml.parsers.ParserConfigurationException;
34
import javax.xml.parsers.SAXParser;
35
import javax.xml.stream.*;
36
import java.net.URL;
37
import org.xml.sax.InputSource;
38
import org.xml.sax.helpers.DefaultHandler;
39

  
40
/** Build WTC corpus simple edition from a WTC file. 
41
 * 
42
 * @author mdecorde
43
 */
44
class pager {
45
	List<String> NoSpaceBefore;
46

  
47
	/** The No space after. */
48
	List<String> NoSpaceAfter;
49

  
50
	/** The wordcount. */
51
	int wordcount = 0;
52

  
53
	/** The pagecount. */
54
	int pagecount = 0;
55

  
56
	/** The wordmax. */
57
	int wordmax = 0;
58

  
59
	/** The basename. */
60
	String basename = "";
61
	String txtname = "";
62
	File outdir;
63

  
64
	/** The wordid. */
65
	String wordid;
66

  
67
	/** The first word. */
68
	boolean firstWord = true;
69

  
70
	/** The wordvalue. */
71
	String wordvalue;
72

  
73
	/** The interpvalue. */
74
	String interpvalue;
75

  
76
	/** The lastword. */
77
	String lastword = " ";
78

  
79
	/** The wordtype. */
80
	String wordtype;
81

  
82
	/** The flagform. */
83
	boolean flagform = false;
84

  
85
	/** The flaginterp. */
86
	boolean flaginterp = false;
87

  
88
	/** The url. */
89
	private def url;
90

  
91
	/** The input data. */
92
	private def inputData;
93

  
94
	/** The factory. */
95
	private def factory;
96

  
97
	/** The parser. */
98
	private XMLStreamReader parser;
99

  
100
	/** The writer. */
101
	OutputStreamWriter writer;
102

  
103
	/** The pagedWriter. */
104
	OutputStreamWriter pagedWriter = null;
105

  
106
	/** The cqpFile. */
107
	File cqpFile;
108

  
109
	/** The outfile. */
110
	File outfile;
111

  
112
	/** The pages. */
113
	def pages = [:];
114

  
115
	/** The idxstart. */
116
	def idxstart = [:]
117
	String editionPage;
118
	ArrayList<Integer> splitTUs; // contains the tu ids used to split pages
119
	boolean shouldSplit = false;
120
	boolean useSplitTUs = false;
121
	boolean hasWordId = false;
122
	int noWordIdProperty = -1;
123
	/**
124
	 * Instantiates a new pager.
125
	 *
126
	 * @param cqpFile the cqpFile
127
	 * @param outfile the outfile
128
	 * @param NoSpaceBefore the no space before
129
	 * @param NoSpaceAfter the no space after
130
	 * @param max the max
131
	 * @param basename the basename
132
	 */
133
	pager(File cqpFile, File outdir, List<String> NoSpaceBefore,
134
	List<String> NoSpaceAfter, int max, String basename, boolean hasWordId, List pAttrs) {
135
		this.editionPage = editionPage;
136
		this.basename = basename;
137
		this.txtname = "text1";
138
		this.outdir = outdir;
139
		this.wordmax = max;
140
		this.NoSpaceBefore = NoSpaceBefore;
141
		this.NoSpaceAfter = NoSpaceAfter;
142
		this.cqpFile = cqpFile;
143
		this.hasWordId = hasWordId;
144
		if (hasWordId) {
145
			noWordIdProperty = pAttrs.indexOf("id") + 1;
146
			if (noWordIdProperty == 0)
147
			hasWordId = false;
148
		}
149
		
150
		process();
151
	}
152

  
153
	private void closeMultiWriter()
154
	{
155
		if (pagedWriter != null) {
156
			if (firstWord) { // there was no words
157
				this.idxstart[txtname] = ["w_0"]
158
				pagedWriter.write("<span id=\"w_0\"/>");
159
			}
160
			pagedWriter.write("</p>\n")
161
			pagedWriter.write("</body>");
162
			pagedWriter.write("</html>");
163
			pagedWriter.close();
164
		}
165
	}
166

  
167
	/**
168
	 * Creates the next output.
169
	 *
170
	 * @return true, if successful
171
	 */
172
	private boolean createNextOutput()
173
	{
174
		wordcount = 0;
175
		shouldSplit = false;
176
		try {
177
			closeMultiWriter();
178
			File outfile = new File(outdir, txtname+"_"+(++pagecount)+".html")
179
			//println "outfile: "+outfile
180
			if (pages[txtname] == null) pages[txtname] = []
181
			pages[txtname] << outfile;
182
			firstWord = true; // waiting for next word
183

  
184
			pagedWriter = new OutputStreamWriter(new BufferedOutputStream(new FileOutputStream(outfile)) , "UTF-8");
185
			pagedWriter.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
186
			pagedWriter.write("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\"   \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n")
187
			pagedWriter.write("<html>");
188
			pagedWriter.write("<head>");
189
			pagedWriter.write("<title>"+basename.toUpperCase()+" $txtname Edition - Page "+pagecount+"</title>");
190
			pagedWriter.write("<link rel=\"stylesheet\" type=\"text/css\" href=\"tmx.css\"/>");
191
			pagedWriter.write("<link rel=\"stylesheet\" type=\"text/css\" href=\"style.css\"/>");
192
			pagedWriter.write("<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\"/>");
193
			pagedWriter.write("</head>");
194
			pagedWriter.write("<body>");
195

  
196
			return true;
197
		} catch (Exception e) {
198
			System.out.println(e.getLocalizedMessage());
199
			return false;
200
		}
201
	}
202

  
203
	/**
204
	 * Creates the output.
205
	 *
206
	 * @param outfile the outfile
207
	 * @return true, if successful
208
	 */
209
	private boolean createOutput() {
210
		try {
211
			return createNextOutput();
212
		} catch (Exception e) {
213
			System.out.println(e.getLocalizedMessage());
214
			return false;
215
		}
216
	}
217

  
218
	/**
219
	 * Gets the page files.
220
	 *
221
	 * @return the page files
222
	 */
223
	public def getPageFiles() {
224
		return pages;
225
	}
226

  
227
	/**
228
	 * Gets the idx.
229
	 *
230
	 * @return the idx
231
	 */
232
	public def getIdx() {
233
		return idxstart;
234
	}
235

  
236
	def texts = []
237

  
238
	/**
239
	 * Process.
240
	 */
241
	void process() {
242

  
243
		String localname = "";
244
		String lastword= "";
245
		createNextOutput();
246
		int wcounter = 1;
247
		int txtwcounter = 1;
248

  
249
		BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(cqpFile) , "UTF-8"));
250
		String line = reader.readLine();
251
		int cline = 0;
252
		while (line != null) {
253
			pagedWriter.flush();
254
			cline++;
255
			//println "line: "+line
256
			if (line.startsWith("<")) {
257
				if (line.startsWith("<lb/") || line.startsWith("<br/") || line.startsWith("</p>") || 
258
					line.startsWith("<lb>") || line.startsWith("<br>")) {
259
					pagedWriter.write("<br/>");
260
				} 
261
				if (line.startsWith("<text>") || line.startsWith("<text ")) {
262
					String tmp = line;
263
					int idx = tmp.indexOf("id=\"");
264
					if (idx > 0) {
265
						tmp = tmp.substring(idx+4);
266
						//println "tmp1: "+tmp
267
						int idx2 = tmp.indexOf("\"");
268
						//println "tmp2: "+tmp
269
						if (idx2 > 0) {
270
							tmp = tmp.substring(0, idx2);
271
							txtname = tmp
272
							texts << txtname;
273
							pagecount = 0;
274
							createNextOutput();
275
						}
276
					}
277
				} else if (line.startsWith("<s>") || line.startsWith("<s ")) {
278
					pagedWriter.write(line.replaceAll("<s", "<sent "));
279
				} else if (line.startsWith("</s>")) {
280
					pagedWriter.write("</sent>");
281
				}  else if (line.startsWith("<pb/>") || line.startsWith("<pb>")) {
282
					pagedWriter.write(line);
283
					txtwcounter = 1;
284
					createNextOutput();
285
				} else {
286
					pagedWriter.write(line);
287
				}
288
			} else { // word
289
				if (txtwcounter > wordmax) {
290
					txtwcounter = 1;
291
					createNextOutput();
292
				}
293
				def split = line.split("\t");
294
				if (split == null || split.size() == 0) {
295
					// empty line
296
				} else {
297
					String wordid;
298
					if (hasWordId) {
299
						if (split.size() <= noWordIdProperty) { // +1 since line contains word
300
							println "Error: line $cline"
301
						} else {
302
							wordid = split[noWordIdProperty];
303
						}
304
					} else {
305
						wordid = "w_"+(wcounter++);
306
					}
307
					txtwcounter++;
308
					String wordvalue = split[0];
309

  
310
					if (firstWord) {
311
						firstWord = false;
312
						if (this.idxstart[txtname] == null) {
313
							this.idxstart[txtname] = []
314
						}
315
						this.idxstart[txtname] << [wordid];
316
					}
317

  
318
					int l = lastword.length();
319
					String endOfLastWord = "";
320
					if (l > 0) {
321
						endOfLastWord = lastword.subSequence(l-1, l);
322
					}
323

  
324
					String interpvalue = "";
325
					if (split.size() > 1) {
326
						interpvalue = split[1..split.size()-1].join(" ")
327
						interpvalue = interpvalue.replace("&", "&amp;").replace("<", "&lt;").replace("\"","&quot;");
328
					}
329

  
330

  
331
					if (NoSpaceBefore.contains(wordvalue) ||
332
					NoSpaceAfter.contains(lastword) ||
333
					wordvalue.startsWith("-") ||
334
					NoSpaceAfter.contains(endOfLastWord)) {
335
						pagedWriter.write("<span title=\""+interpvalue+"\" id=\""+wordid+"\">");
336
					} else {
337
						pagedWriter.write(" <span title=\""+interpvalue+"\" id=\""+wordid+"\">");
338
					}
339

  
340
					pagedWriter.write(wordvalue.replace("&", "&amp;").replace("\"","&quot;").replace("<", "&lt;")+"</span>\n");
341
					pagedWriter.flush()
342
					lastword = wordvalue;
343
				}
344
			}
345
			line = reader.readLine();
346
		}
347

  
348
		closeMultiWriter();
349
	}
350

  
351
	def getTextNames() {
352
		return texts;
353
	}
354
}
tmp/org.txm.groovy.core/src/groovy/org/txm/importer/corptef/compiler.groovy (revision 803)
160 160
	}
161 161

  
162 162
	/**
163
	 * Transfom file wtc.
163
	 * Transfom file cqp.
164 164
	 *
165 165
	 * @param dirPathName the dir path name
166 166
	 * @param fileName the file name
......
515 515
			println ("binary directory does not exists: "+rootDir)
516 516
			return false;
517 517
		}
518
		new File(rootDir+"/wtc/","corptef.wtc").delete();//cleaning&preparing
519
		new File(rootDir,"/wtc/").deleteDir();
520
		new File(rootDir,"/wtc/").mkdir();
518
		new File(rootDir+"/cqp/","corptef.cqp").delete();//cleaning&preparing
519
		new File(rootDir,"/cqp/").deleteDir();
520
		new File(rootDir,"/cqp/").mkdir();
521 521
		new File(rootDir,"/data/").deleteDir();
522 522
		new File(rootDir,"/data/").mkdir();
523 523
		new File(rootDir,"registry/").mkdir();
......
540 540
				String txtname = f.getName().substring(0,f.getName().length()-4);
541 541
				def builder = new compiler(f.toURL(),txtname, "corptef", "default");
542 542
				builder.setLang lang
543
				builder.transfomFileWtc(rootDir+"/wtc","corptef.wtc");
543
				builder.transfomFileWtc(rootDir+"/cqp","corptef.cqp");
544 544
			}
545 545
		}
546 546

  
......
557 557
			String regPath = new File(outDir , "registry/corptef").getAbsolutePath();
558 558
			cwbEn.run(
559 559
					new File(outDir, "/data").getAbsolutePath(),
560
					new File(outDir, "wtc/"+"corptef.wtc").getAbsolutePath(),
560
					new File(outDir, "cqp/"+"corptef.cqp").getAbsolutePath(),
561 561
					regPath,
562 562
					pAttributes, sAttributes);
563 563
			if (!new File(regPath).exists()) {
......
568 568
					"CORPTEF",
569 569
					new File(outDir, "registry").getAbsolutePath());
570 570
		} catch (Exception ex) {System.out.println(ex); return false;}
571
		// "C:\Documents and Settings\alavrent\TXM\cwb\bin\cwb-encode" -d "C:\Documents and Settings\alavrent\TXM\corpora\corptef\data" -f "C:\Documents and Settings\alavrent\TXM\corpora\corptef\wtc\corpte.wtc" -R "C:\Documents and Settings\alavrent\TXM\corpora\corptef\registry\corptef" -c utf8 -xsB -xsB -P ttpos -P ttlemme -P id -P pb -P lb -P orig -P sic -P abbr -S text:0+id+titre+auteur+datecompo+ssiecle+domaine+genre+forme+dialecte+base+project -S front:0+n -S body:0+n -S ab:0+n+rend -S div:0+id+type -S p:0+n -S back:0+n
571
		// "C:\Documents and Settings\alavrent\TXM\cwb\bin\cwb-encode" -d "C:\Documents and Settings\alavrent\TXM\corpora\corptef\data" -f "C:\Documents and Settings\alavrent\TXM\corpora\corptef\cqp\corpte.cqp" -R "C:\Documents and Settings\alavrent\TXM\corpora\corptef\registry\corptef" -c utf8 -xsB -xsB -P ttpos -P ttlemme -P id -P pb -P lb -P orig -P sic -P abbr -S text:0+id+titre+auteur+datecompo+ssiecle+domaine+genre+forme+dialecte+base+project -S front:0+n -S body:0+n -S ab:0+n+rend -S div:0+id+type -S p:0+n -S back:0+n
572 572
		System.out.println("Done.")
573 573

  
574 574
		return true;
tmp/org.txm.groovy.core/src/groovy/org/txm/importer/transcriber/compiler.groovy (revision 803)
131 131

  
132 132
		sectionAttrs = new HashSet<String>() // reset section attributs set
133 133

  
134
		File wtcDir = new File(binDir, "wtc")
135
		new File(binDir, "wtc").delete();
136
		wtcDir.mkdir();
137
		if (!new File(binDir, "wtc").exists()) {
138
			println "Can't create wtc directory"
134
		File cqpDir = new File(binDir, "cqp")
135
		new File(binDir, "cqp").delete();
136
		cqpDir.mkdir();
137
		if (!new File(binDir, "cqp").exists()) {
138
			println "Can't create cqp directory"
139 139
			return false;
140 140
		}
141 141

  
142
		File wtcFile = new File(wtcDir, corpusname.toLowerCase()+".wtc")
142
		File cqpFile = new File(cqpDir, corpusname.toLowerCase()+".cqp")
143 143

  
144 144
		// get all anatypes
145 145
		for (File f : xmlfiles) {
146 146
			getAnaTypes(f)
147 147
		}
148 148
		//println "ANATYPES: "+anatypes
149
		if (!createOutput(wtcFile)) return false;
149
		if (!createOutput(cqpFile)) return false;
150 150
		output.write("<txmcorpus lang=\"fr\">\n")
151 151
		output.close();
152 152

  
......
158 158
				}
159 159
		}
160 160

  
161
		if (!createOutput(wtcFile)) return false;
161
		if (!createOutput(cqpFile)) return false;
162 162
		output.write("</txmcorpus>\n")
163 163
		output.close();
164 164

  
......
216 216
		//return;
217 217
		try {
218 218
			cwbEn.run(dataDir.getAbsolutePath(),
219
					wtcFile.getAbsolutePath(),
219
					cqpFile.getAbsolutePath(),
220 220
					registryFile.getAbsolutePath(), pAttributes, sAttributes);
221 221
			if (!registryFile.exists()) {
222 222
				println "Error: The registry file was not created: $regPath. See https://groupes.renater.fr/wiki/txm-users/public/faq"
......
311 311
		String filename = xmlfile.getName()
312 312
		String textid = filename.substring(0, filename.length() - 4);
313 313

  
314
		createOutput(new File(outdir, "wtc/"+corpusname.toLowerCase()+".wtc"));
314
		createOutput(new File(outdir, "cqp/"+corpusname.toLowerCase()+".cqp"));
315 315
		String localname;
316 316

  
317 317
		//get all metadatas declared before Episode tag
tmp/org.txm.groovy.core/src/groovy/org/txm/importer/alceste/compiler.groovy (revision 803)
47 47

  
48 48
// TODO: Auto-generated Javadoc
49 49
/**
50
 * build wtc
50
 * build cqp
51 51
 * build CWB indexes
52 52
 * create registry file.
53 53
 *
......
184 184
			println ("binary directory does not exists: "+binDir)
185 185
			return false;
186 186
		}
187
		File wtcFile = new File(binDir,"wtc/"+corpusname+".wtc");
188
		new File(binDir, "wtc").deleteDir();
189
		new File(binDir, "wtc").mkdir();
187
		File cqpFile = new File(binDir,"cqp/"+corpusname+".cqp");
188
		new File(binDir, "cqp").deleteDir();
189
		new File(binDir, "cqp").mkdir();
190 190
		new File(binDir, "data/$corpusname").deleteDir();
191 191
		new File(binDir, "data/$corpusname").mkdir();
192 192
		new File(binDir, "registry").mkdir();
......
200 200
		}
201 201
		
202 202
		//start corpus
203
		if (createOutput(wtcFile)) {
203
		if (createOutput(cqpFile)) {
204 204
			output.write("<txmcorpus lang=\""+lang+"\">\n");
205 205
			output.close();
206 206
		}
207 207
		
208 208
		Collections.sort(files);
209 209
		//1- Transform into WTC file
210
		XMLTXM2WTC wtcbuilder = null;
210
		XMLTXM2WTC cqpbuilder = null;
211 211
		println("process "+files.size()+" files")
212 212
		for (File f : files) {
213 213
			print "."
......
215 215
			if (!f.exists()) {
216 216
				println("file "+f+ " does not exists")	
217 217
			} else {			
218
				wtcbuilder = new XMLTXM2WTC(f.toURI().toURL());
218
				cqpbuilder = new XMLTXM2WTC(f.toURI().toURL());
219 219
				String txtname = f.getName().substring(0, f.getName().length()-4);
220
				wtcbuilder.setTextInfo(txtname, corpusname, "default");
220
				cqpbuilder.setTextInfo(txtname, corpusname, "default");
221 221

  
222
				wtcbuilder.setBalisesToKeep(["text", "s", "p", "lb"]);
223
				wtcbuilder.setSendToPAttributes(["lb":["n"], "p":["n"], "s":["n"]])
224
				wtcbuilder.setLang(lang);
225
				if (!wtcbuilder.transformFile(wtcFile)) {
222
				cqpbuilder.setBalisesToKeep(["text", "s", "p", "lb"]);
223
				cqpbuilder.setSendToPAttributes(["lb":["n"], "p":["n"], "s":["n"]])
224
				cqpbuilder.setLang(lang);
225
				if (!cqpbuilder.transformFile(cqpFile)) {
226 226
					println("Failed to compile "+f)
227 227
				}
228 228
			}
......
230 230
		println ""
231 231
		
232 232
		//end corpus
233
		if (createOutput(wtcFile)) {
233
		if (createOutput(cqpFile)) {
234 234
			output.write("</txmcorpus>\n");
235 235
			output.close();
236 236
		}
237 237
		
238
		if (wtcbuilder == null) {
238
		if (cqpbuilder == null) {
239 239
			println "there was no files in bin dir : "+txmDir
240 240
			return false;
241 241
		}
......
247 247
		CwbMakeAll cwbMa = new CwbMakeAll();
248 248
		cwbMa.setDebug(debug);
249 249
		
250
		List<String> pAttributesList = wtcbuilder.getpAttributs();
251
		List<String> sAttributesList = wtcbuilder.getsAttributs();
250
		List<String> pAttributesList = cqpbuilder.getpAttributs();
251
		List<String> sAttributesList = cqpbuilder.getsAttributs();
252 252
		println "pAttrs : "+pAttributesList
253 253
		println "sAttrs : "+sAttributesList
254 254
		String[] pAttributes = pAttributesList.toArray(new String[pAttributesList.size()])
......
256 256
		
257 257
		try {
258 258
			String regPath = rootDir + "/registry/"+corpusname.toLowerCase(); // CQP wants lowercase registry name
259
			cwbEn.run(rootDir + "/data/${corpusname}", rootDir + "/wtc/"+corpusname+".wtc", regPath,pAttributes, sAttributes);
259
			cwbEn.run(rootDir + "/data/${corpusname}", rootDir + "/cqp/"+corpusname+".cqp", regPath,pAttributes, sAttributes);
260 260
			if (!new File(regPath).exists()) {
261 261
				println "The registry file was not created: $regPath. See https://groupes.renater.fr/wiki/txm-users/public/faq"
262 262
				return false;
tmp/org.txm.groovy.core/src/groovy/org/txm/importer/xmltxm/compiler.groovy (revision 803)
176 176
	}
177 177

  
178 178
	/**
179
	 * Transfom file wtc.
179
	 * Transfom file cqp.
180 180
	 *
181 181
	 * @param dirPathName the dir path name
182 182
	 * @param fileName the file name
183 183
	 * @return true, if successful
184 184
	 */
185
	public boolean transfomFileWtc(File wtcFile)
185
	public boolean transfomFileWtc(File cqpFile)
186 186
	{
187
		createOutput(wtcFile);
187
		createOutput(cqpFile);
188 188
		String headvalue=""
189 189
		String vAna = "";
190 190
		String vForm = "";
......
356 356
			return false;
357 357
		}
358 358

  
359
		File wtcFile = new File(binDir,"wtc/"+corpusname.toLowerCase()+".wtc");
360
		new File(rootDir,"/wtc/").deleteDir();
361
		new File(rootDir,"/wtc/").mkdir();
359
		File cqpFile = new File(binDir,"cqp/"+corpusname.toLowerCase()+".cqp");
360
		new File(rootDir,"/cqp/").deleteDir();
361
		new File(rootDir,"/cqp/").mkdir();
362 362
		new File(rootDir,"data/"+corpusname).deleteDir();
363 363
		new File(rootDir,"data/"+corpusname).mkdir();
364 364
		new File(rootDir,"registry/").mkdir();
......
373 373
		}
374 374

  
375 375
		//0 set Lang
376
		if (createOutput(wtcFile)) {
376
		if (createOutput(cqpFile)) {
377 377
			output.write("<txmcorpus lang=\""+lang+"\">\n");
378 378
			output.close();
379 379
		}
......
388 388
				String txtname = f.getName().substring(0,f.getName().length()-4);
389 389
				builder = new compiler(f.toURI().toURL(), txtname, corpusname.toLowerCase(), "default");
390 390
				builder.setLang(lang);
391
				if(!builder.transfomFileWtc(wtcFile))
391
				if(!builder.transfomFileWtc(cqpFile))
392 392
					return false;
393 393
			}
394 394
		}
395 395

  
396 396
		//end corpus
397
		if (createOutput(wtcFile)) {
397
		if (createOutput(cqpFile)) {
398 398
			output.write("</txmcorpus>\n");
399 399
			output.close();
400 400
		}
......
448 448
			String regPath = rootDir + "/registry/"+corpusname.toLowerCase() 
449 449
			cwbEn.run(
450 450
				rootDir + "/data/$corpusname", 
451
				wtcFile.getAbsolutePath(), 
451
				cqpFile.getAbsolutePath(), 
452 452
				regPath, pAttributes, sAttributes);
453 453
			if (!new File(regPath).exists()) {
454 454
				println "Error: The registry file was not created: $regPath. See https://groupes.renater.fr/wiki/txm-users/public/faq"
tmp/org.txm.groovy.core/src/groovy/org/txm/importer/fleurs/compiler.groovy (revision 803)
140 140
	}
141 141
		
142 142
	/**
143
	 * Transfom file wtc.
143
	 * Transfom file cqp.
144 144
	 *
145 145
	 * @param dirPathName the dir path name
146 146
	 * @param fileName the file name
......
240 240
			println ("Error: CWB executables not well set.")
241 241
			return false;
... Ce différentiel a été tronqué car il excède la taille maximale pouvant être affichée.

Formats disponibles : Unified diff