Revision 479 tmp/org.txm.groovy.core/src/groovy/org/txm/importer/XMLTXM2WTC.groovy

XMLTXM2WTC.groovy (revision 479)
21 21
// 
22 22
// 
23 23
// 
24
// $LastChangedDate: 2016-03-29 09:51:35 +0200 (Tue, 29 Mar 2016) $
25
// $LastChangedRevision: 3185 $
24
// $LastChangedDate: 2017-04-11 15:30:35 +0200 (mar. 11 avril 2017) $
25
// $LastChangedRevision: 3426 $
26 26
// $LastChangedBy: mdecorde $ 
27 27
//
28 28
package org.txm.importer
......
225 225
					String localname = parser.getLocalName().toLowerCase();
226 226
				
227 227
				// we will only declare found tags in cwb registry
228
					if(balisesToKeep.contains(localname))
229
					{
230
						if(!balisesfound.containsKey(localname))
231
						{
232
							balisesfound.put(localname,[]);
228
					if(balisesToKeep.contains(localname)) {
229
						if(!balisesfound.containsKey(localname)) {
230
							balisesfound.put(localname, []);
233 231
						}
234 232
						
235 233
						List<String> attrlist = balisesfound.get(localname);
236
						for(int i= 0 ; i < parser.getAttributeCount() ;i++ )
234
						for (int i= 0 ; i < parser.getAttributeCount() ;i++ )
237 235
							if(!attrlist.contains(parser.getAttributeLocalName(i)))
238 236
								attrlist.add(parser.getAttributeLocalName(i));
239
						
240 237
					}
241
					switch (localname) 
242
					{						
238
					
239
					switch (localname) {						
243 240
						case "w": // get word id !!
244 241
							wordattributes.put("id", parser.getAttributeValue(null, "id"));
245 242
							break;
......
423 420
	 */
424 421
	public List<String> getsAttributs()
425 422
	{
423
		println balisesfound
426 424
		def sAttributs = [];
427 425
		for (String balise : this.balisesfound.keySet()) {
428 426
			List<String> sAtt = this.balisesfound.get(balise);
......
488 486
	 */
489 487
	public static void main(String[] args) {
490 488
		
491
		String rootDir = "C:/Documents and Settings/H/xml/quick/txm";
489
		String rootDir = "/home/mdecorde/TXM/corpora/CORNEILLEMOLIERETER/txm/CORNEILLEMOLIERETER";
492 490
		
493
		File srcfile = new File(rootDir,"quick.xml");
494
		File wtcfile = new File(rootDir+"/wtc","quick.wtc");
495
		new File(rootDir,"wtc").deleteDir()
496
		new File(rootDir,"wtc").mkdir()
491
		File srcfile = new File(rootDir,"CORNEILLEP_AGESILAS_1666.xml");
492
		println srcfile.exists()
493
		File wtcfile = new File(rootDir, "out/CORNEILLEP_AGESILAS_1666.wtc");
494
		new File(rootDir,"out").deleteDir()
495
		new File(rootDir,"out").mkdir()
497 496
		
498 497
		System.out.println("XMLTXM2WTC : "+srcfile+" >> "+wtcfile);
499 498
		def builder = new XMLTXM2WTC(srcfile.toURL());
500
		def balises = ["text","lb","pb"];
499
		def balises = ["text", "s"];
501 500
		builder.setBalisesToKeep(balises);
502
		builder.transformFile wtcfile;
501
		builder.transformFile(wtcfile);
503 502
		
504
		println(builder.getsAttributs());
505
		println(builder.getpAttributs());
503
		println("SATTRIBUTS: "+builder.getsAttributs());
504
		println("PATTRIBUTS: "+builder.getpAttributs());
506 505
		return;
507 506
	}
508 507
}

Also available in: Unified diff