Revision 3051

tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/tmx/tmxLoader.groovy (revision 3051)
88 88
if (annotate) {
89 89
	println "-- ANNOTATE - Running NLP tools"
90 90
	def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine("TreeTagger")
91
	if (engine.processDirectory(txmDir, binDir, ["lang":model])) {
91
	def mapForTreeTagger = [:]
92
	for (def k : textLangs.keySet()) mapForTreeTagger[k] = textLangs[k].toLowerCase();
93
		println "TreeTagger models to use per text: $mapForTreeTagger"
94
	if (engine.processDirectory(txmDir, binDir, ["langs":mapForTreeTagger])) {
92 95
		annotationSuccess = true;
93 96
		if (project.getCleanAfterBuild()) {
94 97
			new File(binDir, "treetagger").deleteDir()
......
97 100
		}
98 101
	}
99 102
}
100
println "langs : "+textLangs
101
println "texts : "+langGroups
103
//println "langs : "+textLangs
104
//println "texts : "+langGroups
102 105

  
103 106
println "-- COMPILING - Building Search Engine indexes"
104 107
if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); }
tmp/org.txm.treetagger.core/src/org/txm/importer/xmltxm/Annotate.groovy (revision 3051)
403 403
			//					println "convert "+f+" latin1 >> UTF-8"
404 404
			//				new EncodingConverter(f, "ISO-8859-1", "UTF-8")
405 405
			//			}
406
			
407
			
408
			
409
			initTTOutfileInfos(binDir, modelfile, modelfilename);
410
			
411
			File annotfile = new File(binDir, "annotations/"+outfile.getName()+"-STOFF.xml");
412
			if (!writeStandoffFile(outfile, annotfile)) {
413
				println("Failed to build standoff file of "+outfile);
414
			}
415
			if (cancelNow) return;
406 416
		}
407 417
		println("")
408 418
		
......
410 420
			return false;
411 421
		}
412 422
		
413
		//BUILD STAND-OFF FILES
414
		if (modelfile == null) {
415
			println "no model applied"
416
			return false;
417
		}
418
		
419
		initTTOutfileInfos(binDir, modelfile, modelfilename);
420
		files = new File(binDir, "treetagger").listFiles(IOUtils.HIDDENFILE_FILTER)// now contains the result files of TT
421
		println "Building stdoff files ("+files.size()+") from dir:"+new File(binDir, "treetagger")+" to "+new File(binDir, "annotations");
422
		if (files == null || files.size() == 0) {
423
			return false;
424
		}
425
		cpb = new ConsoleProgressBar(files.size())
426
		for (File ttrezfile : files) {
427
			cpb.tick()
428
			File annotfile = new File(binDir, "annotations/"+ttrezfile.getName()+"-STOFF.xml");
429
			if (!writeStandoffFile(ttrezfile, annotfile)) {
430
				println("Failed to build standoff file of "+ttrezfile);
431
			}
432
		}
433
		println("")
434
		
435
		if (cancelNow) return;
436
		
437 423
		//INJECT ANNOTATIONS
438 424
		List<File> interpfiles = new File(binDir, "annotations").listFiles(IOUtils.HIDDENFILE_FILTER);
439 425
		List<File> txmfiles = txmDir.listFiles(IOUtils.HIDDENFILE_FILTER);
tmp/org.txm.treetagger.core/src/org/txm/treetagger/core/TreeTaggerEngine.java (revision 3051)
2 2

  
3 3
import java.io.File;
4 4
import java.util.HashMap;
5
import java.util.Map;
5 6

  
6 7
import org.eclipse.core.runtime.IProgressMonitor;
7 8
import org.eclipse.osgi.util.NLS;
......
82 83
	public boolean processFile(File xmlFile, File binaryCorpusDirectory, HashMap<String, Object> parameters) {
83 84
		if (!isRunning()) return false;
84 85
		
86
		String lang = null;
87
		Object ps = parameters.get("langs");
88
		Object p = parameters.get("lang");
85 89
		
86
		Object p = parameters.get("lang");
87
		if (p == null) {
90
		if (p == null && ps == null) {
91
			Log.warning("Warning: can't annotate. No 'lang' (String) or 'langs' (Map<String, String>) parameter specified in " + parameters);
92
			return false;
93
		}
94
		
95
		if (ps != null && ps instanceof Map) {
96
			Map<?, ?> map = (Map<?, ?>) ps;
97
			String text_id = xmlFile.getName();
98
			if (map.get(text_id) != null) {
99
				lang = map.get(text_id).toString().toLowerCase();
100
				if (!canAnnotateLang(lang)) {
101
					Log.warning("Warning: can't annotate text_id=${text_id} with $lang, will use the default lang=$p");
102
					return false;
103
				}
104
			}
105
		}
106
		
107
		if (lang == null && p == null) {
88 108
			System.out.println(NLS.bind("** Error: no 'lang' parameter given: {0}. Aborting TreeTagger annotation.", parameters));
89 109
			return false;
90 110
		}
91
		String lang = p.toString();
111
		else {
112
			lang = p.toString();
113
		}
114
		
92 115
		if (!canAnnotateLang(lang)) {
93 116
			return false;
94 117
		}
118
		
95 119
		boolean fixExistingValues = false; // default behavior is to replace existing values
96 120
		if (parameters.get("fix_existing_values") != null) {
97 121
			fixExistingValues = "true".equals(parameters.get("fix_existing_values"));
......
99 123
		if (FileUtils.isExtension(xmlFile, "cqp")) {
100 124
			AnnotateCQP annotate = new AnnotateCQP();
101 125
			return annotate.run(xmlFile, lang, binaryCorpusDirectory, xmlFile.getParentFile());
102
		} else {
126
		}
127
		else {
103 128
			Annotate annotate = new Annotate();
104 129
			return annotate.run(xmlFile, lang, fixExistingValues, binaryCorpusDirectory, xmlFile.getParentFile());
105 130
		}
......
134 159
	}
135 160
	
136 161
	public static boolean canAnnotateLang(String lang) {
137
		
162
		if (lang == null) {
163
			Log.warning("** Error: not lang given (null)");
164
			return false;
165
		}
138 166
		File ttInstallDirectory = new File(TreeTaggerPreferences.getInstance().getString(TreeTaggerPreferences.INSTALL_PATH)); // default models directory is set in the Toolbox
139 167
		if (!ttInstallDirectory.exists()) {
140
			System.out.println(NLS.bind("** Error: TreeTagger install directory not found at {0}", ttInstallDirectory));
168
			Log.warning(NLS.bind("** Error: TreeTagger install directory not found at {0}", ttInstallDirectory));
141 169
			return false;
142 170
		}
143 171
		File modelsDirectory = new File(TreeTaggerPreferences.getInstance().getString(TreeTaggerPreferences.MODELS_PATH)); // default models directory is set in the Toolbox
144 172
		File modelfile = new File(modelsDirectory, lang + ".par");
145 173
		if (!"??".equals(lang) && !modelfile.exists()) {
146
			System.out.println(NLS.bind("** Error: no {0} model file found for the {1} lang.", modelfile, lang));
174
			Log.warning(NLS.bind("** Error: no {0} model file found for the {1} lang.", modelfile, lang));
147 175
			return false;
148 176
		}
149 177
		return true;

Also available in: Unified diff