Révision 3055

tmp/org.txm.concordance.rcp/src/org/txm/concordance/rcp/editors/ConcordanceEditor.java (revision 3055)
1902 1902
	 * Refresh reference column title.
1903 1903
	 */
1904 1904
	public void refreshReferenceColumnTitle() {
1905
		
1906
		referenceColumn.setText(concordance.getRefViewPattern().getTitle());
1905
		if (concordance.getRefViewPattern() != null) {
1906
			referenceColumn.setText(concordance.getRefViewPattern().getTitle());
1907
		}
1908
		else {
1909
			referenceColumn.setText("References");
1910
		}
1907 1911
	}
1908 1912
	
1909 1913
	/**
tmp/org.txm.searchengine.cqp.core/src/org/txm/searchengine/cqp/corpus/StructuralUnitProperty.java (revision 3055)
39 39
import org.txm.searchengine.cqp.clientExceptions.CqiClientException;
40 40
import org.txm.searchengine.cqp.clientExceptions.UnexpectedAnswerException;
41 41
import org.txm.searchengine.cqp.corpus.query.CQLQuery;
42
import org.txm.searchengine.cqp.corpus.query.MatchUtils;
42 43
import org.txm.searchengine.cqp.serverException.CqiServerError;
43 44
import org.txm.utils.logger.Log;
44 45

  
......
301 302
		
302 303
		return null;
303 304
	}
304

  
305
	
305 306
	public String cpos2Str(int position) throws UnexpectedAnswerException, IOException, CqiServerError {
306 307
		AbstractCqiClient cqiClient = CorpusManager.getCorpusManager().getCqiClient();
307
		return cqiClient.getSingleData(this, new int[] {position}).get(0);
308
		return cqiClient.getSingleData(this, new int[] { position }).get(0);
308 309
	}
309 310
	
310 311
	public String[] cpos2Str(int[] positions) throws UnexpectedAnswerException, IOException, CqiServerError {
311 312
		AbstractCqiClient cqiClient = CorpusManager.getCorpusManager().getCqiClient();
312 313
		return cqiClient.getSingleData(this, positions).toArray(new String[positions.length]);
313 314
	}
315
	
316
	/**
317
	 * Very slow
318
	 * 
319
	 * @return
320
	 * @throws IOException
321
	 * @throws CqiServerError
322
	 * @throws CqiClientException
323
	 */
324
	public int getNValues() throws IOException, CqiServerError, CqiClientException {
325
		return getValues().size();
326
	}
327
	
328
	/**
329
	 * Very slow
330
	 * 
331
	 * @return
332
	 * @throws IOException
333
	 * @throws CqiServerError
334
	 * @throws CqiClientException
335
	 */
336
	public String[] getValuesAsStrings() throws IOException, CqiServerError, CqiClientException {
337
		List<String> values = getValues();
338
		return values.toArray(new String[values.size()]);
339
	}
314 340
}
tmp/org.txm.searchengine.cqp.core/src/org/txm/searchengine/cqp/corpus/CQPCorpus.java (revision 3055)
1484 1484
		
1485 1485
		return null;
1486 1486
	}
1487
	
1488
	/**
1489
	 * parse a "struct_prop" property name to find the matching StructuralUnitProperty
1490
	 * 
1491
	 * @param p
1492
	 * @return
1493
	 */
1494
	public StructuralUnitProperty getStructuralUnitProperty(String p) {
1495
		
1496
		try {
1497
			return StructuralUnitProperty.stringToStructuralUnitProperty(this, p);
1498
		}
1499
		catch (CqiClientException e) {
1500
			e.printStackTrace();
1501
			return null;
1502
		}
1503
	}
1487 1504
}
tmp/org.txm.searchengine.cqp.core/src/org/txm/searchengine/cqp/corpus/WordProperty.java (revision 3055)
3 3
import java.io.IOException;
4 4
import java.util.List;
5 5

  
6
import org.apache.commons.lang.math.IntRange;
7
import org.txm.objects.Match;
6 8
import org.txm.searchengine.cqp.clientExceptions.UnexpectedAnswerException;
7 9
import org.txm.searchengine.cqp.corpus.query.CQLQuery;
10
import org.txm.searchengine.cqp.corpus.query.MatchUtils;
8 11
import org.txm.searchengine.cqp.serverException.CqiServerError;
9 12

  
10 13
/**
......
62 65
	}
63 66
	
64 67
	public String cpos2Str(int position) throws UnexpectedAnswerException, IOException, CqiServerError {
65
		return CorpusManager.getCorpusManager().getCqiClient().cpos2Str(this.getQualifiedName(), new int[] {position})[0];
68
		return CorpusManager.getCorpusManager().getCqiClient().cpos2Str(this.getQualifiedName(), new int[] { position })[0];
66 69
	}
67 70
	
68 71
	public String[] cpos2Str(int[] positions) throws UnexpectedAnswerException, IOException, CqiServerError {
......
82 85
	}
83 86
	
84 87
	public int str2Id(String value) throws UnexpectedAnswerException, IOException, CqiServerError {
85
		return CorpusManager.getCorpusManager().getCqiClient().str2Id(this.getQualifiedName(), new String[] {value})[0];
88
		return CorpusManager.getCorpusManager().getCqiClient().str2Id(this.getQualifiedName(), new String[] { value })[0];
86 89
	}
87 90
	
88 91
	public String getCQLTest(String value) {
89
		return this.getName() + "=\"" +  CQLQuery.addBackSlash(value)+ "\"";
92
		return this.getName() + "=\"" + CQLQuery.addBackSlash(value) + "\"";
90 93
	}
91 94
	
92 95
	public String getCQLTest(List<String> values) {
93 96
		StringBuilder sb = new StringBuilder();
94 97
		sb.append(this.getName() + "=\""); //$NON-NLS-1$
95
		for (int i = 0 ; i < values.size() ; i++) {
98
		for (int i = 0; i < values.size(); i++) {
96 99
			String s = values.get(i);
97 100
			s = CQLQuery.addBackSlash(s);
98
			sb.append(s); //$NON-NLS-1$
99
			if (i < values.size() -1) {
101
			sb.append(s);
102
			if (i < values.size() - 1) {
100 103
				sb.append("|"); //$NON-NLS-1$
101 104
			}
102 105
		}
103 106
		sb.append("\"");
104 107
		return sb.toString();
108
	}
109
	
110
	public String[] getValues() throws UnexpectedAnswerException, IOException, CqiServerError {
111
		int n = CorpusManager.getCorpusManager().getCqiClient().lexiconSize(this.getQualifiedName());
112
		int[] ids = MatchUtils.toPositions(0, n - 1);
113
		return CorpusManager.getCorpusManager().getCqiClient().id2Str(this.getQualifiedName(), ids);
114
	}
115
	
116
	public int getNValues() throws UnexpectedAnswerException, IOException, CqiServerError {
117
		return CorpusManager.getCorpusManager().getCqiClient().lexiconSize(this.getQualifiedName());
105 118
	};
106 119
}
tmp/org.txm.searchengine.cqp.core/src/org/txm/searchengine/cqp/corpus/query/MatchUtils.java (revision 3055)
70 70
	 * @return array of int position from a Match start to another Match end
71 71
	 */
72 72
	public static int[] toPositions(Match match1, Match match2) {
73
		int len = match2.end - match1.start + 1;
73
		return toPositions(match1.start, match2.end);
74
	}
75
	
76
	/**
77
	 * 
78
	 * @param match1
79
	 * @param match2
80
	 * @return array of int position from a Match start to another Match end
81
	 */
82
	public static int[] toPositions(int from, int to) {
83
		int len = to - from + 1;
74 84
		
75 85
		if (len <= 0) {
76 86
			return new int[0];
77 87
		}
78 88
		else if (len == 1) {
79
			return new int[] { match1.start };
89
			return new int[] { from };
80 90
		}
81 91
		else if (len == 2) {
82
			return new int[] { match1.start, match2.end };
92
			return new int[] { from, to };
83 93
		}
84 94
		else {
85 95
			int[] positions = new int[len];
86 96
			int n = 0;
87
			for (int i = match1.start; i <= match2.end; i++) {
97
			for (int i = from; i <= to; i++) {
88 98
				positions[n++] = i;
89 99
			}
90 100
			return positions;
tmp/org.txm.annotation.kr.rcp/src/org/txm/annotation/kr/rcp/commands/RecodeCorpus.java (revision 3055)
36 36
import org.txm.utils.logger.Log;
37 37

  
38 38
public class RecodeCorpus extends AbstractHandler {
39

  
39
	
40 40
	public static final String ID = RecodeCorpus.class.getCanonicalName();
41

  
42
	@Option(name="conversionFile", usage="conversionFile", widget="File", required=true, def="conv.tsv")
41
	
42
	@Option(name = "conversionFile", usage = "conversionFile", widget = "File", required = true, def = "conv.tsv")
43 43
	protected File conversionFile;
44
	@Option(name="oldType", usage="oldType", widget="String", required=true, def="pos")
44
	
45
	@Option(name = "oldType", usage = "oldType", widget = "String", required = true, def = "pos")
45 46
	protected String oldType;
46
	@Option(name="newType", usage="newType", widget="String", required=true, def="pos2")
47
	
48
	@Option(name = "newType", usage = "newType", widget = "String", required = true, def = "pos2")
47 49
	protected String newType;
48
	@Option(name="gestionInconnus", usage="oldType", widget="String", required=true, def="abandon")
50
	
51
	@Option(name = "gestionInconnus", usage = "how to manage failed conversion", widget = "String", required = true, def = "abandon")
49 52
	protected String gestionInconnus;
50

  
53
	
51 54
	@Override
52 55
	public Object execute(ExecutionEvent event) throws ExecutionException {
53

  
56
		
54 57
		ISelection isel = HandlerUtil.getCurrentSelection(event);
55
		if (isel == null) return null ;
56
		if (!(isel instanceof IStructuredSelection)) return null ;
58
		if (isel == null) return null;
59
		if (!(isel instanceof IStructuredSelection)) return null;
57 60
		IStructuredSelection selection = (IStructuredSelection) isel;
58 61
		Object s = selection.getFirstElement();
59 62
		if (!(s instanceof MainCorpus)) return null;
60

  
63
		
61 64
		final MainCorpus corpus = (MainCorpus) s;
62

  
65
		
63 66
		try {
64 67
			if (ParametersDialog.open(this)) {
65

  
66
				JobHandler job = new JobHandler("Recode corpus "+corpus) {
68
				
69
				JobHandler job = new JobHandler("Recode corpus " + corpus) {
70
					
67 71
					@Override
68 72
					protected IStatus run(IProgressMonitor monitor) {
69 73
						try {
70 74
							recode(corpus, conversionFile, oldType, newType, monitor);
71
						} catch (Exception e) {
72
							System.out.println("Fail to convert properties of corpus "+corpus);
75
						}
76
						catch (Exception e) {
77
							System.out.println("Fail to convert properties of corpus " + corpus);
73 78
							e.printStackTrace();
74 79
							return Status.CANCEL_STATUS;
75 80
						}
......
79 84
				job.startJob();
80 85
				return corpus;
81 86
			}
82
		} catch (Exception e) {
87
		}
88
		catch (Exception e) {
83 89
			e.printStackTrace();
84 90
		}
85

  
91
		
86 92
		return corpus;
87 93
	}
88

  
89
	public static JobHandler recode(final MainCorpus corpus, CorpusRuledConvertion crc, String oldType, String newType, IProgressMonitor monitor) throws IOException, CqiClientException, XMLStreamException {
90

  
94
	
95
	public static JobHandler recode(final MainCorpus corpus, CorpusRuledConvertion crc, String oldType, String newType, IProgressMonitor monitor) throws IOException, CqiClientException,
96
			XMLStreamException {
97
		
91 98
		Project project = corpus.getProject();
92 99
		if (!"xtz".equals(project.getImportModuleName())) {
93
			System.out.println("Corpus "+corpus+" was imported with XTZ import module");
100
			System.out.println("Corpus " + corpus + " was not imported with the XTZ import module");
94 101
			return null;
95 102
		}
96 103
		
97 104
		Property p1 = corpus.getProperty(oldType);
98 105
		if (p1 == null) {
99
			System.out.println("Corpus "+corpus+" has not property "+oldType);
106
			System.out.println("Corpus " + corpus + " has not property " + oldType);
100 107
			return null;
101 108
		}
102

  
103
		System.out.println("Recoding "+corpus+" property "+oldType+" in "+newType);
104

  
109
		
110
		System.out.println("Recoding " + corpus + " property " + oldType + " in " + newType);
111
		
105 112
		monitor.beginTask("Recoding corpus properties...", 2);
106 113
		monitor.setTaskName("Recoding XML-TXM files");
107 114
		// apply conversion file
108 115
		if (!crc.process(corpus)) {
109
			System.out.println("Failed to edit XML-TXM of the corpus: "+corpus);
116
			System.out.println("Failed to edit XML-TXM of the corpus: " + corpus);
110 117
			return null;
111 118
		}
112 119
		monitor.worked(1);
113 120
		
114 121
		// update corpus indexes and edition
115
//		String txmhome = Toolbox.getTxmHomePath();
116

  
122
		// String txmhome = Toolbox.getTxmHomePath();
117 123
		
118
		project.setDoMultiThread(false); //too soon
124
		
125
		project.setDoMultiThread(false); // too soon
119 126
		project.setDoUpdate(true);
120

  
121
//		monitor.setTaskName("Updating corpus");
122
//		File scriptDir = new File(txmhome, "scripts/groovy/user/org/txm/scripts/importer/xtz");
123
//		File script = new File(scriptDir, "xtzLoader.groovy");
127
		
128
		// monitor.setTaskName("Updating corpus");
129
		// File scriptDir = new File(txmhome, "scripts/groovy/user/org/txm/scripts/importer/xtz");
130
		// File script = new File(scriptDir, "xtzLoader.groovy");
124 131
		System.out.println(NLS.bind(KRAnnotationUIMessages.updatingTheP0Corpus, corpus));
125 132
		JobHandler ret = ExecuteImportScript.executeScript(project);
126 133
		Display.getDefault().syncExec(new Runnable() {
134
			
127 135
			@Override
128
			public void run() {CloseEditorsUsing.corpus(corpus);}
136
			public void run() {
137
				CloseEditorsUsing.corpus(corpus);
138
			}
129 139
		});
130 140
		monitor.worked(1);
131 141
		return ret;
132 142
	}
133

  
134
	public static JobHandler recode(final MainCorpus corpus, LinkedHashMap<Pattern, String> rules, String oldType, String newType, IProgressMonitor monitor) throws IOException, CqiClientException, XMLStreamException {
135

  
143
	
144
	public static JobHandler recode(final MainCorpus corpus, LinkedHashMap<Pattern, String> rules, String oldType, String newType, IProgressMonitor monitor) throws IOException, CqiClientException,
145
			XMLStreamException {
146
		
136 147
		// apply conversion file
137 148
		CorpusRuledConvertion crc = new CorpusRuledConvertion(rules, newType, oldType);
138
		return recode(corpus, crc, oldType, newType , monitor);
149
		return recode(corpus, crc, oldType, newType, monitor);
139 150
	}
140

  
151
	
141 152
	public static JobHandler recode(final MainCorpus corpus, File conversionFile, String oldType, String newType, IProgressMonitor monitor) throws IOException, CqiClientException, XMLStreamException {
142
		System.out.println("Create CorpusRuledConvertion with "+conversionFile);
153
		System.out.println("Create CorpusRuledConvertion with " + conversionFile);
143 154
		// apply conversion file
144 155
		CorpusRuledConvertion crc = new CorpusRuledConvertion(conversionFile, oldType, newType);
145

  
156
		
146 157
		return recode(corpus, crc, oldType, newType, monitor);
147 158
	}
148
}
159
}
tmp/org.txm.ocaml.core/META-INF/MANIFEST.MF (revision 3055)
20 20
Bundle-Name: Ocaml
21 21
Bundle-Version: 2.0
22 22
Bundle-ClassPath: .,lib/ocamlrun-scripting.jar,lib/ocaml-lib-files.jar
23
Require-Bundle: org.txm.core,org.eclipse.core.runtime
23
Require-Bundle: org.txm.core;visibility:=reexport,
24
 org.eclipse.core.runtime;visibility:=reexport
24 25
Bundle-ManifestVersion: 2
25 26
Bundle-RequiredExecutionEnvironment: JavaSE-1.8
26 27
Bundle-Vendor: Textometrie.org
tmp/org.txm.groovy.core/src/java/org/txm/groovy/core/GSERunner.java (revision 3055)
22 22

  
23 23
import cern.colt.Arrays;
24 24
import groovy.lang.Binding;
25
import groovy.lang.GroovyShell;
25 26
import groovy.util.GroovyScriptEngine;
26 27
import groovy.util.ResourceException;
27 28
import groovy.util.ScriptException;
......
128 129
		return defaultGSE;
129 130
	}
130 131
	
132
	public Object evaluate(String code) {
133
		return new GroovyShell(this.getGroovyClassLoader(), new Binding()).evaluate(code);
134
	}
135
	
131 136
	public static void getDependancies(String bundle, HashSet<String> defaultPlugins) {
132 137
		// if (defaultPlugins.contains(bundle)) return;
133 138
		
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/hyperprince/hyperprinceLoader.groovy (revision 3055)
80 80
//ANNOTATE
81 81
println "-- ANNOTATE - Running NLP tools"
82 82
if (annotate) {
83
	def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine("TreeTagger")
83
	String engineName = project.getImportParameters().node("annotate").get("engine", "TreeTagger")
84
	def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine(engineName)
84 85
	if (engine.processDirectory(txmDir, binDir, ["lang":model])) {
85 86
		annotationSuccess = true;
86 87
	}
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/frantext/frantextLoader.groovy (revision 3055)
156 156
boolean annotate_status = true;
157 157
if (annotate) {
158 158
	println "-- ANNOTATE - Running NLP tools - $model model"
159
	def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine("TreeTagger")
159
	String engineName = project.getImportParameters().node("annotate").get("engine", "TreeTagger")
160
	def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine(engineName)
160 161
	if (engine.processDirectory(txmDir, binDir, ["lang":model])) {
161 162
		annotate_status = true;
162 163
		if (project.getCleanAfterBuild()) {
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/xml/xmlLoader.groovy (revision 3055)
237 237
boolean annotationSuccess = false;
238 238
if (annotate) {
239 239
	println "-- ANNOTATE - Running NLP tools"
240
	def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine("TreeTagger")
240
	String engineName = project.getImportParameters().node("annotate").get("engine", "TreeTagger")
241
	def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine(engineName)
241 242
	if (engine.processDirectory(txmDir, binDir, ["lang":model])) {
242 243
		annotationSuccess = true
243 244
		if (project.getCleanAfterBuild()) {
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/bfm/bfmLoader.groovy (revision 3055)
136 136
boolean annotate_status = true;
137 137
if (annotate) {
138 138
	println "-- ANNOTATE - Running NLP tools - $model model"
139
	def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine("TreeTagger")
139
	String engineName = project.getImportParameters().node("annotate").get("engine", "TreeTagger")
140
	def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine(engineName)
140 141
	if (engine.processDirectory(txmDir, binDir, ["lang":model])) {
141 142
		annotate_status = true;
142 143
		
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/doc/docLoader.groovy (revision 3055)
257 257
boolean annotationSuccess = false;
258 258
if (annotate) {
259 259
	println "-- ANNOTATE - Running NLP tools"
260
	def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine("TreeTagger")
260
	String engineName = project.getImportParameters().node("annotate").get("engine", "TreeTagger")
261
	def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine(engineName)
261 262
	if (engine.processDirectory(txmDir, binDir, ["lang":model])) {
262 263
		annotationSuccess = true;
263 264
		if (project.getCleanAfterBuild()) {
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/tmx/tmxLoader.groovy (revision 3055)
87 87
if (MONITOR != null) MONITOR.worked(20, "ANNOTATE - Running NLP tools");
88 88
if (annotate) {
89 89
	println "-- ANNOTATE - Running NLP tools"
90
	def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine("TreeTagger")
90
	String engineName = project.getImportParameters().node("annotate").get("engine", "TreeTagger")
91
	def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine(engineName)
91 92
	def mapForTreeTagger = [:]
92 93
	for (def k : textLangs.keySet()) mapForTreeTagger[k] = textLangs[k].toLowerCase();
93 94
		println "TreeTagger models to use per text: $mapForTreeTagger"
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/factiva/factivamailLoader.groovy (revision 3055)
104 104
boolean annotationSuccess = false;
105 105
if (annotate) {
106 106
	println "-- ANNOTATE - Running NLP tools"
107
	def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine("TreeTagger")
107
	String engineName = project.getImportParameters().node("annotate").get("engine", "TreeTagger")
108
	def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine(engineName)
108 109
	if (engine.processDirectory(txmDir, binDir, ["lang":model])) {
109 110
		annotationSuccess = true;
110 111
	}
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/factiva/factivaLoader.groovy (revision 3055)
215 215
boolean annotationSuccess = false;
216 216
if (annotate) {
217 217
	println "-- ANNOTATE - Running NLP tools"
218
	def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine("TreeTagger")
218
	String engineName = project.getImportParameters().node("annotate").get("engine", "TreeTagger")
219
	def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine(engineName)
219 220
	if (engine.processDirectory(txmDir, binDir, ["lang":model])) {
220 221
		annotationSuccess = true;
221 222
		if (project.getCleanAfterBuild()) {
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/transcriber/transcriberLoader.groovy (revision 3055)
191 191
		boolean annotationSuccess = false;
192 192
		if (annotate) {
193 193
			println "-- ANNOTATE - Running NLP tools"
194
			def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine("TreeTagger")
194
			String engineName = project.getImportParameters().node("annotate").get("engine", "TreeTagger")
195
	def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine(engineName)
195 196
			if (engine.processDirectory(txmDir, binDir, ["lang":model])) {
196 197
				annotationSuccess = true;
197 198
				
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/txt/txtLoader.groovy (revision 3055)
133 133
boolean annotationSuccess = true;
134 134
if (annotate) {
135 135
	println "-- ANNOTATE - Running NLP tools"
136
	def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine("TreeTagger")
136
	String engineName = project.getImportParameters().node("annotate").get("engine", "TreeTagger")
137
	def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine(engineName)
137 138
	if (engine.processDirectory(txmDir, binDir, ["lang":model])) {
138 139
		annotationSuccess = true;
139 140
		if (project.getCleanAfterBuild()) {
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/alceste/alcesteLoader.groovy (revision 3055)
90 90

  
91 91
if (annotate) {
92 92
	println "-- ANNOTATE - Running NLP tools"
93
	def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine("TreeTagger")
93
	String engineName = project.getImportParameters().node("annotate").get("engine", "TreeTagger")
94
	def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine(engineName)
94 95
	if (engine.processDirectory(txmDir, binDir, ["lang":model])) {
95 96
		annotationSuccess = true;
96 97
	}
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/alceste/importer.groovy (revision 3055)
47 47
import java.io.PrintStream;
48 48
import java.net.URL;
49 49

  
50
import org.txm.core.engines*
50 51
import org.txm.scripts.filters.CutHeader.*;
51 52
import org.txm.scripts.filters.Tokeniser.*;
52 53
import org.txm.scripts.filters.FusionHeader.*;
......
108 109
			cpb.tick()
109 110
			File resultfile = new File(tokenizeDir, f.getName());
110 111
			try {
111
				def builder = new SimpleTokenizerXml(f, resultfile, lang);
112
				builder.setStartTag("text")
113
				if (!builder.process()) {
112
				def tokenizer = new SimpleTokenizerXml(f, resultfile, lang);
113
				if (project.getAnnotate()) { // an annotation will be done, does the annotation engine needs another tokenizer ?
114
					String engineName = project.getImportParameters().node("annotate").get("engine", "TreeTagger")
115
					def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine(engineName)
116
					def stringTokenizer = engine.getStringTokenizer(lang)
117
					if (stringTokenizer != null) {
118
						tokenizer.setStringTokenizer(stringTokenizer)
119
					}
120
				}
121
				tokenizer.setStartTag("text")
122
				if (!tokenizer.process()) {
114 123
					println "Failed to tokenize: "+f;
115 124
					resultfile.delete();
116 125
				}
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/hyperbase/importer.groovy (revision 3055)
51 51
import org.txm.*;
52 52
import org.txm.core.engines.*;
53 53
import org.txm.utils.*;
54
// TODO: Auto-generated Javadoc
55 54

  
56 55
/**
57 56
 * The Class importer.
......
66 65
	 * @param basename the basename
67 66
	 * @return true, if successful
68 67
	 */
69
	public static boolean run(File srcDir, File binDir, File txmDir, String encoding, String basename, String lang)
68
	public static boolean run(File srcDir, File binDir, File txmDir, String encoding, String basename, String lang, def project)
70 69
	{
71 70
		String rootDir = srcDir.getAbsolutePath()+"/"
72 71
		//cleaning
......
113 112
			File tfile = new File(binDir, "tokenized/"+filename+".xml");
114 113
			
115 114
			SimpleTokenizerXml tokenizer = new SimpleTokenizerXml(pfile, tfile, lang);
115
			if (project.getAnnotate()) { // an annotation will be done, does the annotation engine needs another tokenizer ?
116
				String engineName = project.getImportParameters().node("annotate").get("engine", "TreeTagger")
117
				def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine(engineName)
118
				def stringTokenizer = engine.getStringTokenizer(lang)
119
				if (stringTokenizer != null) {
120
					tokenizer.setStringTokenizer(stringTokenizer)
121
				}
122
			}
116 123
			if (!tokenizer.process()) {
117 124
				println("Failed to tokenize file: "+pfile);
118 125
				tfile.delete();
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/hyperbase/hyperbaseLoader.groovy (revision 3055)
78 78
if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); }
79 79
println "-- IMPORTER - Reading source files"
80 80

  
81
if (!(new importer().run(srcDir, binDir, txmDir, encoding, basename, lang))) {
81
if (!(new importer().run(srcDir, binDir, txmDir, encoding, basename, lang, project))) {
82 82
	println "import process stopped";
83 83
	return;
84 84
}
......
88 88
println "-- ANNOTATE - Running NLP tools"
89 89
boolean annotationSuccess = false;
90 90
if (annotate) {
91
	def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine("TreeTagger")
91
	String engineName = project.getImportParameters().node("annotate").get("engine", "TreeTagger")
92
	def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine(engineName)
92 93
	if (engine.processDirectory(txmDir, binDir, ["lang":model])) {
93 94
		annotationSuccess = true;
94 95
		if (project.getCleanAfterBuild()) {
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/xtz/XTZImporter.groovy (revision 3055)
26 26
import org.txm.utils.ConsoleProgressBar
27 27
import org.txm.utils.io.*
28 28
import org.txm.importer.xtz.*
29
import org.txm.core.engines.*
29 30

  
30 31
class XTZImporter extends Importer {
31 32
	
......
381 382
				File infile = f;
382 383
				File outfile = new File(module.getBinaryDirectory(),"tokenized/"+f.getName());
383 384
				SimpleTokenizerXml tokenizer = new SimpleTokenizerXml(infile, outfile, lang)
385
				if (module.getProject().getAnnotate()) { // an annotation will be done, does the annotation engine needs another tokenizer ?
386
					String engineName = module.getProject().getImportParameters().node("annotate").get("engine", "TreeTagger")
387
					def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine(engineName)
388
					def stringTokenizer = engine.getStringTokenizer(lang)
389
					if (stringTokenizer != null) {
390
						tokenizer.setStringTokenizer(stringTokenizer)
391
					}
392
				}
384 393
				tokenizer.setRetokenize(retokenize)
385 394
				if (outSideTextTagsRegex != null && outSideTextTagsRegex.trim().length() > 0) {
386 395
					tokenizer.setOutSideTextTags(outSideTextTagsRegex)
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/xtz/TTAnnotater.groovy (revision 3055)
28 28
		String corpusname = module.getProject().getName();
29 29
		String lang = module.getProject().getLang();
30 30

  
31
		def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine("TreeTagger")
31
		String engineName = module.getProject().getImportParameters().node("annotate").get("engine", "TreeTagger")
32
		def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine(engineName)
32 33
		if (module.isMultiThread()) {
33 34
			def hash = [:]
34 35
			for (File txmFile : inputDirectory.listFiles()) {
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/filters/Tokeniser/SimpleTokenizerXml.groovy (revision 3055)
52 52
import javax.xml.stream.*
53 53

  
54 54
import org.txm.importer.PersonalNamespaceContext
55
import org.txm.tokenizer.StringTokenizer
56
import org.txm.tokenizer.SimpleStringTokenizer
55 57
import org.txm.tokenizer.TokenizerClasses
56 58

  
57 59
@CompileStatic
58 60
public class SimpleTokenizerXml {
59 61
	
62
	StringTokenizer stringTokenizer;
60 63
	boolean retokenize = false
61 64
	LinkedHashMap<String, String>retokenizedWordProperties = new LinkedHashMap()
62 65
	
......
67 70
	/** The intraword_tags. */
68 71
	String intraword_tags = TokenizerClasses.intraword_tags;
69 72
	
70
	/** The punct_strong. */
71
	String punct_strong = TokenizerClasses.punct_strong;
72
	
73
	/** The punct_all. */
74
	String punct_all = TokenizerClasses.punct_all;
75
	
76 73
	/** The word_chars. */
77 74
	String word_chars = TokenizerClasses.word_chars;
78 75
	
79
	String fclitics = null; // default behavior don't manage clitics
80
	String pclitics = null; // default behavior don't manage clitics
81
	
82 76
	/** The outside_text_tags_ignore_content. */
83 77
	String note_content = null;
84 78
	String outside_text_tags_keep_content = null // tag and content NOT removed but not tokenized
......
89 83
	Pattern reg_outside_text_tags;
90 84
	Pattern reg_startTag;
91 85
	
92
	Pattern reg_punct_other = Pattern.compile("\\p{P}")
93
	
94 86
	/** The DEBUG. */
95 87
	public boolean DEBUG = false;
96 88
	
97
	/** The outfile. */
89
	/** The outfile is the result file. */
98 90
	File outfile;
99 91
	
100 92
	/** The infile. */
......
119 111
	String prefix;
120 112
	String filename;
121 113
	
122
	def regElision = null;
123
	def reg3pts = null;
124
	def regPunct;
125
	def regFClitics = null ;
126
	def regPClitics = null ;
127
	String whitespaces;
128
	Pattern regWhitespaces;
129 114
	Pattern regLN;
130 115
	Pattern regCTRL;
131
	Pattern regSplitWhiteSpaces;
132 116
	
133 117
	public SimpleTokenizerXml(File infile, File outfile) {
134 118
		this(infile, outfile, "");
......
142 126
	 */
143 127
	public SimpleTokenizerXml(File infile, File outfile, String lang) {
144 128
		this.lang = lang;
145
		if (lang != null)
146
			if (lang.startsWith("en")) {
147
				fclitics = TokenizerClasses.FClitic_en
148
			} else if (lang.startsWith("fr")) {
149
				fclitics = TokenizerClasses.FClitic_fr
150
				pclitics = TokenizerClasses.PClitic_fr
151
			} else if (lang.startsWith("gl")) {
152
				fclitics = TokenizerClasses.FClitic_gl
153
			} else if (lang.startsWith("it")) {
154
				pclitics = TokenizerClasses.PClitic_it
155
			}
129
		this.stringTokenizer = new SimpleStringTokenizer(lang);
156 130
		
157 131
		word_tags = TokenizerClasses.word_tags;
158 132
		reg_word_tags = Pattern.compile(word_tags);
159 133
		
160 134
		intraword_tags = TokenizerClasses.intraword_tags;
161
		punct_strong = TokenizerClasses.punct_strong;
162
		punct_all = TokenizerClasses.punct_all;
163 135
		word_chars = TokenizerClasses.word_chars;
164 136
		
165 137
		this.outfile = outfile;
......
168 140
		int index = filename.lastIndexOf(".");
169 141
		if (index > 0) filename = filename.substring(0, index);
170 142
		
171
		String strRegElision = TokenizerClasses.regElision;
172
		if (strRegElision != null && strRegElision.length() > 0)
173
			regElision = ~/^([\p{L}-]++${strRegElision}[\p{L}-]++)(.*)$/ // the test must start with the start of string  ^
174
		reg3pts = ~/^(.*?)(\.\.\.)(.*)$/
175
		
176
		String strRegPunct = TokenizerClasses.regPunct;
177
		if (strRegPunct != null && strRegPunct.length() > 0)
178
			regPunct = ~/^(.*?)($strRegPunct)(.*)$/
179
		
180
		if (fclitics != null && fclitics.length() > 0)
181
			regFClitics = ~/(.+)($fclitics)$/ // the test must end with the end of string $
182
		
183
		if (pclitics != null && pclitics.length() > 0)
184
			regPClitics = /^($pclitics)(.*)/ // the test must start with the start of string  ^
185
		
186
		if (TokenizerClasses.whitespaces != null && TokenizerClasses.whitespaces.length() > 0)
187
			regWhitespaces = ~TokenizerClasses.whitespaces;
188
		
189
		if (TokenizerClasses.whitespaces != null && TokenizerClasses.whitespaces.length() > 0)
190
			regSplitWhiteSpaces = Pattern.compile(TokenizerClasses.whitespaces);
191
		
192 143
		regLN = Pattern.compile("/\n/");
193 144
		regCTRL = Pattern.compile("/\\p{C}/");
194 145
	}
195 146
	
196 147
	/**
148
	 * Replace the default SimpleStringTokenizer with another
149
	 * @param stringTokenizer a StringTokenizer
150
	 */
151
	public void seStringTokenizer(StringTokenizer stringTokenizer) {
152
		if (stringTokenizer == null) return;
153
		this.stringTokenizer = stringTokenizer;
154
	}
155
	
156
	/**
197 157
	 * Fill infos.
198 158
	 *
199 159
	 * @param event the event
......
545 505
		//if (DEBUG) println "-- chars: "+text+"--";
546 506
		text = regLN.matcher(text).replaceAll(WHITESPACE);
547 507
		text = regCTRL.matcher(text).replaceAll(EMPTY);						// remove ctrl characters
548
		if (regSplitWhiteSpaces != null) {
549
			for (String s : regSplitWhiteSpaces.split(text)) {		// separate with unicode white spaces
550
				//	if (DEBUG){println "process $s"}
551
				iterate(s);
552
			}
553
		} else {
554
			iterate(text);
555
		}
556
	}
557
	
558
	/**
559
	 * Iterate. a String, should be called when a word is found in a String
560
	 *
561
	 * @param s the s
562
	 * @return the java.lang. object
563
	 */
564
	protected iterate(String s) {
565
		while (s != null && s.length() > 0) {
566
			//	if (DEBUG){println "  > $s"}
567
			s = standardChecks(s);
568
		}
569
	}
570
	
571
	/**
572
	 * Standard checks.
573
	 *
574
	 * @param s the s
575
	 * @return the java.lang. object
576
	 */
577
	//	@CompileStatic(SKIP)
578
	public String standardChecks(String s) {
579
		Matcher m;
580
		if (fclitics != null && (m = s =~ regFClitics) ) {
581
			//	if (DEBUG) println "CLITIC found: $s ->"+ m
582
			iterate(m.group(1))
583
			
508

  
509
				def words = stringTokenizer.processText(text);
510
		for (def word : words) {
584 511
			wordcount++;
585 512
			writer.writeStartElement(word_element_to_create);
586 513
			writeWordAttributes();// id
587
			//writer.writeAttribute("type", "w");
588
			writer.writeCharacters(m.group(2));
514
			writer.writeCharacters(word);
589 515
			writer.writeEndElement();
590 516
			writer.writeCharacters("\n");
591
			
592
			return "";
593
		} else if (pclitics != null && (m = s =~ regPClitics) ) {
594
			if (DEBUG) println "PCLITIC found: $s ->"+ m
595
			
596
			wordcount++;
597
			writer.writeStartElement(word_element_to_create);
598
			writeWordAttributes();// id
599
			//writer.writeAttribute("type", "w");
600
			writer.writeCharacters(m.group(1));
601
			writer.writeEndElement();
602
			writer.writeCharacters("\n");
603
			
604
			iterate(m.group(2))
605
			
606
			return "";
607
		} else if (regElision != null && (m = s =~ regElision) ) {
608
			//	if (DEBUG) println "Elision found: $s ->"+ m.group(1)+" + "+m.group(2)+" + "+m.group(3)
609
			//iterate(m.group(1))
610
			
611
			//			int sep = s.indexOf("'");
612
			//			if (sep < 0)
613
			//				sep = s.indexOf("’");
614
			//			if (sep < 0)
615
			//				sep = s.indexOf("‘");
616
			
617
			wordcount++;
618
			writer.writeStartElement(word_element_to_create);
619
			writeWordAttributes();// id
620
			//writer.writeAttribute("type", "w");
621
			writer.writeCharacters(m.group(1));
622
			writer.writeEndElement();
623
			writer.writeCharacters("\n");
624
			
625
			iterate(m.group(2))
626
			
627
			return "";
628
		} else if (reg3pts != null && (m = s =~ reg3pts) )	{
629
			//	if(DEBUG){println "REG '...' found: $s -> "+m.group(1)+" + "+m.group(2)+" + "+m.group(3)}
630
			iterate(m.group(1))
631
			
632
			wordcount++;
633
			writer.writeStartElement(word_element_to_create);
634
			writeWordAttributes();// id
635
			//writer.writeAttribute("type","pon");
636
			writer.writeCharacters("...");
637
			writer.writeEndElement();
638
			writer.writeCharacters("\n");
639
			
640
			return m.group(3);
641
		} else if (regPunct != null && (m = s =~ regPunct) ) {
642
			if(DEBUG){println "PUNCT found: $s ->"+m.group(1)+" + "+m.group(2)+" + "+m.group(3)}
643
			iterate(m.group(1));
644
			
645
			wordcount++;
646
			writer.writeStartElement(word_element_to_create);
647
			writeWordAttributes();// id
648
			//writer.writeAttribute("type","pon");
649
			writer.writeCharacters(m.group(2));
650
			writer.writeEndElement();
651
			writer.writeCharacters("\n");
652
			
653
			return m.group(3);
654
		} else {
655
			//		if(DEBUG){println "Other found: "+s}
656
			wordcount++;
657
			writer.writeStartElement(word_element_to_create);
658
			writeWordAttributes();// id
659
			//			if (reg_punct_other.matcher(s).matches())
660
			//				writer.writeAttribute("type","pon");
661
			//			else
662
			//				writer.writeAttribute("type","w");
663
			writer.writeCharacters(s);
664
			writer.writeEndElement();
665
			writer.writeCharacters("\n");
666
			
667
			return "";
668 517
		}
669 518
	}
670
	
519

  
671 520
	/**
672 521
	 * Write word attributes.
673 522
	 *
......
783 632
		//		println "Done"
784 633
		
785 634
		String lang = "fr"
786
		File inFile = new File(System.getProperty("user.home"), "xml/vocapia/test.trs")
787
		File outFile = new File(System.getProperty("user.home"), "xml/vocapia/test-retokenized.trs")
635
		File inFile = new File(System.getProperty("user.home"), "SVN/TXMSVN/trunk/corpora/tokenizer/test1.xml")
636
		File outFile = new File(System.getProperty("user.home"), "SVN/TXMSVN/trunk/corpora/tokenizer/test1-tmp.xml")
788 637
		
789 638
		println "processing "+inFile
790 639
		
791 640
		SimpleTokenizerXml tokenizer = new SimpleTokenizerXml(inFile, outFile, lang)
792
		tokenizer.setRetokenize(true)
641
		tokenizer.setRetokenize(false)
642
		tokenizer.setNote("note")
643
		//tokenizer.setOutSideTextTags("outsideToEdit")
644
		tokenizer.setOutSideTextTagsAndKeepContent("outsideToEdit")
793 645
		//tokenizer.setDEBUG false
794 646
		tokenizer.process();
795 647
		
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/annotation/InjectWordPropTableMacro.groovy (revision 3055)
81 81

  
82 82
println "Injecting..."
83 83
//for (String textid : injections.keySet()) {
84
for (File currentXMLFile : inputDirectory.listFiles()) {
84
for (File currentXMLFile : inputDirectory.listFiles().sort{ it.name }) {
85 85
	if (currentXMLFile.isDirectory()) continue;
86 86
	if (currentXMLFile.isHidden()) continue;
87 87
	
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/prototypes/GetXPathMacro.groovy (revision 3055)
53 53

  
54 54
if (srcDirectory!=null && srcDirectory.exists()) {
55 55

  
56
	def files = srcDirectory.listFiles()
56
	def files = srcDirectory.listFiles().sort{ it.name }
57 57
	if (files == null || files.size() == 0) {
58 58
		println "No files in $srcDirectory"
59 59
		return;
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/prototypes/xml/XSL2CQPMacro.groovy (revision 3055)
42 42
	return false;
43 43
}
44 44

  
45
def xmlFiles = txmDir.listFiles();
45
def xmlFiles = txmDir.listFiles().sort{ it.name };
46 46
if (xmlFiles == null || xmlFiles.size() == 0) {
47 47
	println "Error: no file found in $txmDir"
48 48
	return false;
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/prototypes/xml/ApplyXQueryMacro.groovy (revision 3055)
53 53
	return false;
54 54
}
55 55

  
56
def xmlFiles = txmDir.listFiles();
56
def xmlFiles = txmDir.listFiles().sort{ it.name };
57 57
if (xmlFiles == null || xmlFiles.size() == 0) {
58 58
	println "Error: no file found in $txmDir"
59 59
	return false;
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/prototypes/ApplyXQueryMacro.groovy (revision 3055)
53 53
	return false;
54 54
}
55 55

  
56
def xmlFiles = txmDir.listFiles();
56
def xmlFiles = txmDir.listFiles().sort{ it.name };
57 57
if (xmlFiles == null || xmlFiles.size() == 0) {
58 58
	println "Error: no file found in $txmDir"
59 59
	return false;
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/prototypes/file/DirectoryInfoMacro.groovy (revision 3055)
29 29
println " write? "+directory.canWrite()
30 30
println " executable? "+directory.canExecute()
31 31
println " hidden? "+directory.isHidden()
32
def files = directory.listFiles()
32
def files = directory.listFiles().sort{ it.name }
33 33
println " number of files? "+files.size()
34 34

  
35 35

  
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/prototypes/FindMultiLineRegExpMacro.groovy (revision 3055)
66 66

  
67 67
println "-- looking for: $regexp"
68 68

  
69
for (File inputfile : inputDirectory.listFiles()) {
69
for (File inputfile : inputDirectory.listFiles().sort{ it.name }) {
70 70

  
71 71
	if (inputfile.isDirectory()) continue // ignore
72 72

  
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/nlp/TT2XMLInDirectoryMacro.groovy (revision 3055)
26 26
	
27 27
	XMLOutputFactory factory = XMLOutputFactory.newInstance()
28 28
	
29
	def files = inputDirectory.listFiles()
29
	def files = inputDirectory.listFiles().sort{ it.name }
30 30
	if (files == null || files.length == 0) {
31 31
		println "Error: no file to process in $inputDirectory"
32 32
		return false;
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/projects/antract/FixINATRSMacro.groovy (revision 3055)
14 14
if (!ParametersDialog.open(this)) return;
15 15

  
16 16
println "Fixing $trsDirectory"
17
def files = trsDirectory.listFiles()
17
def files = trsDirectory.listFiles().sort{ it.name }
18 18
if (files == null) {
19 19
	println "No files found in $trsDirectory"
20 20
	return 0;
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/imports/TeiHeader2MetadataCSVMacro.groovy (revision 3055)
39 39
	return
40 40
}
41 41

  
42
List<File> files = inputDirectory.listFiles()
42
List<File> files = inputDirectory.listFiles().sort{ it.name }
43 43
if (files == null || files.size() == 0) {
44 44
	println "** TeiHeader2MetadataCSV: no files found in '$inputDirectory' directory."
45 45
	return
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/xml/CombineAttributesMacro.groovy (revision 3055)
1
// STANDARD DECLARATIONS
2
package org.txm.macro.xml
3

  
4
import org.txm.annotation.kr.core.conversion.*
5
import javax.xml.stream.XMLStreamReader
6
import java.io.File;
7
import java.io.IOException;
8
import java.util.HashSet;
9
import java.util.LinkedHashMap;
10
import java.util.regex.Pattern;
11

  
12
@Field @Option(name="xmlFile", usage="fichier XML lu", widget="FileOpen", required=true, def="in.xml") // /home/mdecorde/TEMP/ANTRACT/AF/all.xlsx
13
File xmlFile
14

  
15
@Field @Option(name="resultFile", usage="fichier XML resultat", widget="FileSave", required=true, def="out.xml") // /home/mdecorde/TEMP/ANTRACT/AF/all.xlsx
16
File resultFile
17

  
18
@Field @Option(name = "elementName", usage = "oldType", widget = "String", required = true, def = "w")
19
String elementName
20

  
21
//@Field @Option(name = "oldType", usage = "oldType", widget = "String", required = true, def = "myattr")
22
def oldType = null // not needed
23
	
24
@Field @Option(name = "newType", usage = "newType", widget = "String", required = true, def = "myattr")
25
String newType
26

  
27
@Field @Option(name = "separator", usage = "newType", widget = "Separator", required = false, def = "Code")
28
def separator
29

  
30
@Field @Option(name = "getValueCode", usage = "code of the getValue(XMLStreamReader parser, String localname, String attribute, String value) method", widget = "Text", required = true, def = "\"NEWGROOVE\" + parser.getAttributeValue(null, \"myattr\") ")
31
String getValueCode
32

  
33
@Field @Option(name = "additionalImportsCode", usage = "code of the getValue(XMLStreamReader parser, String localname, String attribute, String value) method", widget = "Text", required = true, def = "import java.io.File")
34
String additionalImportsCode
35

  
36
@Field @Option(name = "debug", usage = "show debug messages", widget = "Boolean", required = true, def = "false")
37
boolean debug
38

  
39
if (!ParametersDialog.open(this)) return;
40

  
41

  
42
// not needed
43
LinkedHashMap<Pattern, String> rules = new LinkedHashMap<>();
44

  
45
XMLTXMFileRuledConversion converter = new XMLTXMFileRuledConversion(xmlFile, rules, elementName, oldType, newType, XMLTXMFileRuledConversion.ABANDON);
46

  
47
// initialize a converter using getValueCode value to fill the getValue(parser, localname, attribute, value) method
48
converter.converter = gse.evaluate("""import org.txm.annotation.kr.core.conversion.*;
49
import javax.xml.stream.XMLStreamReader
50
$additionalImportsCode
51

  
52
return new XMLTXMFileRuledConversion.Converter() {
53
	public String getValue(XMLStreamReader parser, String localname, String attribute, String value) {
54
		$getValueCode
55
	}
56
};
57
""");
58
converter.debug = debug
59
converter.process(resultFile)
tmp/org.txm.groovy.core/META-INF/MANIFEST.MF (revision 3055)
4 4
Bundle-Version: 1.0.0.qualifier
5 5
Bundle-Name: Macro
6 6
Bundle-ClassPath: .,bin/
7
Require-Bundle: org.txm.core;bundle-version="0.8.0";visibility:=reexpo
8
 rt,org.txm.annotation.core;visibility:=reexport,org.txm.searchengine.
9
 cqp.core;visibility:=reexport,org.txm.tokenizer.core;visibility:=reex
10
 port,org.eclipse.osgi
7
Require-Bundle: org.txm.core;bundle-version="0.8.0";visibility:=reexport,
8
 org.txm.annotation.core;visibility:=reexport,
9
 org.txm.searchengine.cqp.core;visibility:=reexport,
10
 org.txm.tokenizer.core;visibility:=reexport,
11
 org.eclipse.osgi;visibility:=reexport
11 12
Bundle-ActivationPolicy: lazy
12 13
Bundle-ManifestVersion: 2
13 14
Bundle-RequiredExecutionEnvironment: JavaSE-1.8
tmp/org.txm.annotation.kr.core/META-INF/MANIFEST.MF (revision 3055)
1 1
Manifest-Version: 1.0
2 2
Automatic-Module-Name: org.txm.annotation.kr.core
3 3
Bundle-SymbolicName: org.txm.annotation.kr.core;singleton:=true
4
Require-Bundle: org.txm.searchengine.cqp.core;visibility:=reexport,jav
5
 ax.persistence;bundle-version="2.1.0";visibility:=reexport,org.eclips
6
 e.persistence.jpa;bundle-version="2.6.0";visibility:=reexport,org.ecl
7
 ipse.persistence.jpa.jpql;bundle-version="2.6.0";visibility:=reexport
8
 ,org.txm.annotation.core;visibility:=reexport,org.eclipse.persistence
9
 .asm;bundle-version="6.0.0"
4
Require-Bundle: org.txm.searchengine.cqp.core;visibility:=reexport,
5
 javax.persistence;bundle-version="2.1.0";visibility:=reexport,
6
 org.eclipse.persistence.jpa;bundle-version="2.6.0";visibility:=reexport,
7
 org.eclipse.persistence.jpa.jpql;bundle-version="2.6.0";visibility:=reexport,
8
 org.txm.annotation.core;visibility:=reexport,
9
 org.eclipse.persistence.asm;bundle-version="6.0.0";visibility:=reexport
10 10
Meta-Persistence: META-INF/persistence.xml
11 11
Bundle-ActivationPolicy: lazy
12 12
Bundle-ManifestVersion: 2
tmp/org.txm.annotation.kr.core/src/org/txm/annotation/kr/core/conversion/XMLTXMFileRuledConversion.java (revision 3055)
7 7
import java.util.regex.Pattern;
8 8

  
9 9
import javax.xml.stream.XMLStreamException;
10
import javax.xml.stream.XMLStreamReader;
10 11

  
11 12
import org.txm.importer.StaxIdentityParser;
12 13

  
13 14
public class XMLTXMFileRuledConversion extends StaxIdentityParser {
14 15
	
15
	protected LinkedHashMap<Pattern, String> rules = new LinkedHashMap<Pattern, String>();
16
	public boolean debug = false;
16 17
	
18
	protected LinkedHashMap<Pattern, String> rules = new LinkedHashMap<>();
19
	
20
	protected String elementName = null;
21
	
22
	boolean fixingElement = false; // true when the current element needs to be fixed
23
	
17 24
	protected String oldType;
18 25
	
19 26
	protected String newType;
......
24 31
	
25 32
	public static final String ABANDON = "abandon";
26 33
	
27
	HashSet<String> noMatchValues = new HashSet<String>();
34
	HashSet<String> noMatchValues = new HashSet<>();
28 35
	
29
	public XMLTXMFileRuledConversion(File infile, LinkedHashMap<Pattern, String> rules, String oldType, String newType, String mode) throws IOException, XMLStreamException {
36
	/**
37
	 * 
38
	 * @param infile the file to read
39
	 * @param rules the conversion rules
40
	 * @param elementPath if null, the conversion happens
41
	 * @param oldType word type or element attribute to read
42
	 * @param newType word type or element attribute to write
43
	 * @param mode XMLTXMFileRuledConversion.DELETE or XMLTXMFileRuledConversion.COPY or XMLTXMFileRuledConversion.ABANDON
44
	 * @throws IOException
45
	 * @throws XMLStreamException
46
	 */
47
	public XMLTXMFileRuledConversion(File infile, LinkedHashMap<Pattern, String> rules, String elementPath, String oldType, String newType, String mode) throws IOException, XMLStreamException {
30 48
		super(infile);
31 49
		this.rules = rules;
32 50
		this.oldType = oldType;
......
34 52
		
35 53
		this.mode = mode;
36 54
		
37
		if (!this.newType.startsWith("#")) this.newType = "#" + this.newType;
38
		if (!this.oldType.startsWith("#")) this.oldType = "#" + this.oldType;
55
		if (elementPath != null && elementPath.length() > 0) {
56
			this.elementName = elementPath;
57
		}
58
		
59
		if (elementPath == null) { // where fixing txm:words
60
			if (!this.newType.startsWith("#")) this.newType = "#" + this.newType;
61
			if (!this.oldType.startsWith("#")) this.oldType = "#" + this.oldType;
62
		}
39 63
	}
40 64
	
41 65
	boolean inW = false, inAna = false, inForm;
42 66
	
43
	LinkedHashMap<String, String> anaValues = new LinkedHashMap<String, String>();
67
	LinkedHashMap<String, String> anaValues = new LinkedHashMap<>();
44 68
	
45
	LinkedHashMap<String, String> anaResps = new LinkedHashMap<String, String>();
69
	LinkedHashMap<String, String> anaResps = new LinkedHashMap<>();
46 70
	
47 71
	String typeName = null;
48 72
	
......
52 76
	
53 77
	private Object mode;
54 78
	
79
	public static class Converter {
80
		
81
		public Converter() {
82
			
83
		}
84
		
85
		public String getValue(XMLStreamReader parser, String localname, String attribute, String value) {
86
			return value;
87
		}
88
	}
89
	
90
	private Converter converter = null;
91
	
55 92
	@Override
93
	protected void writeAttributes() throws XMLStreamException {
94
		if (fixingElement) {
95
			String newValue = null;
96
			for (int i = 0; i < parser.getAttributeCount(); i++) {
97
				String att = parser.getAttributeLocalName(i);
98
				String value = parser.getAttributeValue(i);
99
				if (oldType != null && oldType.equals(att)) {
100
					if (converter != null) {
101
						if (debug) System.out.println("CALL CONVERTER with attribute $att");
102
						newValue = converter.getValue(parser, localname, att, value);
103
					}
104
					else {
105
						newValue = getValueIfMatch(value);
106
					}
107
				}
108
				if (!newType.equals(att)) { // don't write the newType, it will be done at the end
109
					writeAttribute(parser.getAttributePrefix(i), att, value);
110
				}
111
			}
112
			
113
			if (oldType == null && converter != null) {
114
				if (debug) System.out.println("CALL CONVERTER with no attribute set");
115
				newValue = converter.getValue(parser, localname, null, null);
116
			}
117
			
118
			if (newValue != null) {
119
				if (debug) System.out.println("WRITE NEWTYPE: " + newType + "=" + newValue);
120
				writeAttribute(null, newType, newValue);
121
			}
122
		}
123
		else {
124
			super.writeAttributes();
125
		}
126
	}
127
	
128
	@Override
56 129
	public void processStartElement() throws XMLStreamException, IOException {
57
		if (!inW) super.processStartElement(); // don't write W content
58 130
		
59
		if (localname.equals("w")) {
60
			inW = true;
61
			anaValues.clear();
62
			anaResps.clear();
131
		
132
		if (elementName != null) {
133
			if (this.localname.equals(elementName)) {
134
				fixingElement = true;
135
			}
63 136
			
64
			// initialize the new type to a empty value in case there is transformation rule
65
			anaValues.put(newType, "");
66
			anaResps.put(newType, "#txm_recode");
137
			super.processStartElement();
138
			fixingElement = false;
67 139
		}
68
		else if (localname.equals("ana")) {
69
			inAna = true;
70
			typeName = parser.getAttributeValue(null, "type");
71
			respName = parser.getAttributeValue(null, "resp");
72
			anaResps.put(typeName, respName);
73
			// if (typeName != null) typeName = typeName.substring(1); // remove #
74
			typeValue = "";
140
		else {
141
			
142
			if (!inW) super.processStartElement(); // don't write W content
143
			
144
			if (localname.equals("w")) {
145
				inW = true;
146
				anaValues.clear();
147
				anaResps.clear();
148
				
149
				// initialize the new type to a empty value in case there is transformation rule
150
				anaValues.put(newType, "");
151
				anaResps.put(newType, "#txm_recode");
152
			}
153
			else if (localname.equals("ana")) {
154
				inAna = true;
155
				typeName = parser.getAttributeValue(null, "type");
156
				respName = parser.getAttributeValue(null, "resp");
157
				anaResps.put(typeName, respName);
158
				// if (typeName != null) typeName = typeName.substring(1); // remove #
159
				typeValue = "";
160
			}
161
			else if (localname.equals("form")) {
162
				inForm = true;
163
				formValue = "";
164
			}
75 165
		}
76
		else if (localname.equals("form")) {
77
			inForm = true;
78
			formValue = "";
79
		}
80 166
	}
81 167
	
82 168
	@Override
83 169
	public void processCharacters() throws XMLStreamException {
84
		if (inW && inAna) typeValue += parser.getText();
85
		else if (inW && inForm) formValue += parser.getText();
86
		else super.processCharacters();
170
		if (elementName != null) {
171
			super.processCharacters();
172
		}
173
		else {
174
			if (inW && inAna) typeValue += parser.getText();
175
			else if (inW && inForm) formValue += parser.getText();
176
			else super.processCharacters();
177
		}
87 178
	}
88 179
	
89 180
	@Override
90 181
	public void processEndElement() throws XMLStreamException {
91
		if (localname.equals("w")) {
92
			inW = false;
182
		
183
		if (elementName != null) {
184
			super.processEndElement();
185
		}
186
		else {
93 187
			
94
			// write W content
95
			try {
96
				// get the value to test
97
				String value = null;
98
				if (oldType.equals("word")) {
99
					value = formValue;
100
				}
101
				else {
102
					value = anaValues.get(oldType);
103
				}
188
			if (localname.equals("w")) {
189
				inW = false;
104 190
				
105
				if (newType.equals("word")) { // update form property
106
					updateFormValueIfMatch(value);
107
				}
108
				else { // update another word property
109
					if (value != null) {
110
						updateAnaValuesIfMatch(value);
191
				// write W content
192
				try {
193
					// get the value to test
194
					String value = null;
195
					if (oldType.equals("word")) {
196
						value = formValue;
111 197
					}
112
				}
113
				
114
				// write the word element
115
				writer.writeStartElement("txm:form");
116
				writer.writeCharacters(formValue);
117
				writer.writeEndElement();
118
				
119
				for (String k : anaValues.keySet()) {
120
					String resp = anaResps.get(k);
121
					if (resp == null) resp = "#txm_recode";
198
					else {
199
						value = anaValues.get(oldType);
200
					}
122 201
					
123
					writer.writeStartElement("txm:ana");
124
					writer.writeAttribute("resp", resp);
125
					writer.writeAttribute("type", k);
126
					writer.writeCharacters(anaValues.get(k));
202
					if (newType.equals("word")) { // update form property
203
						updateFormValueIfMatch(value);
204
					}
205
					else { // update a ana property
206
						if (value != null) {
207
							updateAnaValuesIfMatch(value);
208
						}
209
					}
210
					
211
					// write the word element
212
					writer.writeStartElement("txm:form");
213
					writer.writeCharacters(formValue);
127 214
					writer.writeEndElement();
215
					
216
					for (String k : anaValues.keySet()) {
... Ce différentiel a été tronqué car il excède la taille maximale pouvant être affichée.

Formats disponibles : Unified diff