Révision 3371

TXM/trunk/org.txm.treetagger.core/src/org/txm/importer/xmltxm/Annotate.groovy (revision 3371)
370 370
			}
371 371
			modelfile = new File(modelsDirectory, tmpModelFileName);
372 372
			if (debug)
373
				println "model file : "+modelfile;
373
				println "model file: "+modelfile;
374 374
			
375
			File
376 375
			
376
			
377 377
			if (!modelfile.exists()) {
378 378
				println "Skipping ANNOTATE: '$modelfile' TreeTagger language model file not found."
379
				if(System.getProperty("os.name").startsWith("Windows") || System.getProperty("os.name").startsWith("Mac"))
379
				if(System.getProperty("os.name").startsWith("Windows") || System.getProperty("os.name").startsWith("Mac")) {
380 380
					println "Windows&Mac users: the operating system might be hiding file extensions. Use your file explorer to check the file name."
381
				}
381 382
				return false;
382 383
			} else if (!modelfile.canRead()) {
383 384
				println "Skipping ANNOTATE: impossible to access the '$modelfile' TreeTagger language model file."
......
548 549
		
549 550
		File modelfile = new File(modelsDirectory, lang+".par");
550 551
		if (!"??".equals(lang) && !modelfile.exists()) {
551
			println "Error: No Modelfile available for lang "+modelfile+". Continue import process ";
552
			println "Error: No Modelfile available for lang "+modelfile+" (and no lang detection set). Continue import process ";
552 553
			return false;
553 554
		}
554 555
		File annotfile = new File(annotDir, f.getName()+"-STDOFF.xml");
......
563 564
		String tmpModelFileName = modelfile.getName()
564 565
		if (tmpModelFileName.startsWith("??")) {
565 566
			if (f.length() > LangDetector.MINIMALSIZE) {
567
				
566 568
				String dlang = new LangDetector(f).getLang();
567 569
				//println "$f detected lang is $dlang"
568 570
				tmpModelFileName = dlang+".par"
571
				println "Lang detection activated lang="+dlang
572
			} else {
573
				tmpModelFileName = Locale.getDefault().getCountry().toLowerCase()+".par"
574
				println "Lang detection activated but the input file is too small: using the defaut lang="+Locale.getDefault().getCountry().toLowerCase()
569 575
			}
570 576
		}
571 577
		modelfile = new File(modelsDirectory, tmpModelFileName);
TXM/trunk/org.txm.utils/src/org/txm/utils/treetagger/TreeTagger.java (revision 3371)
41 41
import java.util.Locale;
42 42

  
43 43
import org.apache.commons.lang.StringUtils;
44
import org.txm.utils.logger.Log;
44 45

  
45 46
// Tag files
46 47
/**
......
524 525
		if (debug)
525 526
			System.out.println(StringUtils.join(args.toArray(), " "));
526 527

  
528
		Log.fine(StringUtils.join(args.toArray(), " "));
529
		
527 530
		ProcessBuilder pb = new ProcessBuilder(args);
528 531
		pb.redirectErrorStream(true);
529 532
		Process process = null;
TXM/trunk/org.txm.utils/src/org/txm/utils/LangDetector.java (revision 3371)
15 15
import org.xml.sax.SAXException;
16 16

  
17 17
public class LangDetector {
18
	public static int MINIMALSIZE = 10000;
18
	public static int MINIMALSIZE = 5000;
19 19

  
20 20
	String lang = System.getProperty("user.language");
21 21
	String encoding = System.getProperty("file.encoding");

Formats disponibles : Unified diff