Révision 3731

TXM/trunk/bundles/org.txm.rcp/src/main/java/org/txm/rcp/commands/function/WordPropertiesFromTable.java (revision 3731)
30 30
		
31 31
		MainCorpus corpus = (MainCorpus)first;
32 32
		
33
		File script = new File(Toolbox.getTxmHomePath(),         "/scripts/groovy/user/org/txm/macro/annotation/ImportWordPropertiesFromTableMacro.groovy"); //$NON-NLS-1$
33
		File script = new File(Toolbox.getTxmHomePath(), "/scripts/groovy/user/org/txm/macro/annotation/ImportWordPropertiesFromTableMacro.groovy"); //$NON-NLS-1$
34 34
		//File parametersFile = new File(Toolbox.getTxmHomePath(), "/scripts/groovy/user/org/txm/macro/annotation/ImportWordPropertiesFromTableMacro.properties");
35 35

  
36 36
		HashMap<String, Object> defaultParameters = new HashMap<String, Object>();
......
38 38
		defaultParameters.put("csvFile", corpus.getName()+"_annotations.tsv"); //$NON-NLS-1$ //$NON-NLS-2$
39 39
		
40 40
		ExecuteGroovyMacro.execute(script.getAbsolutePath(), part, selection, null, null, defaultParameters); //$NON-NLS-1$
41
		
42
		
41 43
		return null;
42 44
	}
43 45
	
TXM/trunk/bundles/org.txm.rcp/src/main/java/org/txm/rcp/commands/workspace/UpdateCorpus.java (revision 3731)
2 2

  
3 3
import java.io.File;
4 4
import java.io.FileFilter;
5
import java.util.Date;
5 6

  
6 7
import org.eclipse.core.commands.AbstractHandler;
7 8
import org.eclipse.core.commands.ExecutionEvent;
......
15 16
import org.eclipse.osgi.util.NLS;
16 17
import org.eclipse.swt.widgets.Display;
17 18
import org.eclipse.ui.handlers.HandlerUtil;
19
import org.txm.Toolbox;
18 20
import org.txm.core.preferences.TBXPreferences;
19 21
import org.txm.objects.Project;
20 22
import org.txm.rcp.commands.CloseEditorsUsing;
......
144 146
				try {
145 147
					if (project.compute(monitor, true)) { // TODO children should be recomputed later only when the user needs it
146 148
						
149
						project.appendToHistory("Updated");
150
						
147 151
						this.syncExec(new Runnable() {
148 152
							
149 153
							@Override
TXM/trunk/bundles/org.txm.rcp/src/main/java/org/txm/rcp/swt/widget/QueryWidget.java (revision 3731)
78 78

  
79 79
		h = null;
80 80
		
81
//		if (this.project != null) {
82
//			h = this.project.getFirstChild(QueryHistory.class);
83
//
84
//			if (h == null) {
85
//				h = new QueryHistory(project);
86
//			}
87
//		}
88
//		
89
//		try { // load history from queries.txt file
90
//			h.compute(false);
91
//		} catch (InterruptedException e) {
92
//			// TODO Auto-generated catch block
93
//			e.printStackTrace();
94
//		}
81
		if (this.project != null) {
82
			h = this.project.getFirstChild(QueryHistory.class);
83

  
84
			if (h == null) {
85
				h = new QueryHistory(project);
86
			}
87
		}
95 88
		
89
		try { // load history from queries.txt file
90
			h.compute(false);
91
		} catch (InterruptedException e) {
92
			// TODO Auto-generated catch block
93
			e.printStackTrace();
94
		}
95
		
96 96
		setHistoryItems();
97 97
	}
98 98
	
TXM/trunk/bundles/org.txm.rcp/src/main/java/org/txm/rcp/swt/widget/parameters/SeparatorField.java (revision 3731)
1 1
package org.txm.rcp.swt.widget.parameters;
2 2

  
3 3
import org.eclipse.swt.SWT;
4
import org.eclipse.swt.graphics.Font;
5
import org.eclipse.swt.graphics.FontData;
6 4
import org.eclipse.swt.layout.GridData;
7 5
import org.eclipse.swt.layout.GridLayout;
8 6
import org.eclipse.swt.widgets.Composite;
9 7
import org.eclipse.swt.widgets.Label;
10
import org.eclipse.swt.widgets.Text;
11 8
import org.kohsuke.args4j.NamedOptionDef;
12 9

  
13 10
/**
......
37 34
			l.setLayoutData(gd);
38 35
			l.setText(str);
39 36
			l.setToolTipText(getWidgetUsage());
40
			Font f = parent.getFont();
41
			l.setFont(new Font(f.getDevice(), new FontData(f.getFontData()[0].getName(), f.getFontData()[0].getHeight(), f.getFontData()[0].getStyle()|SWT.BOLD)));
37
//			Font f = parent.getFont();
38
//			l.setFont(new Font(Display.getCurrent(), new FontData(f.getFontData()[0].getName(), f.getFontData()[0].getHeight(), f.getFontData()[0].getStyle()|SWT.BOLD)));
42 39
			Label dt = new Label(this, SWT.SEPARATOR | SWT.HORIZONTAL);
43 40
			dt.setLayoutData(new GridData(SWT.FILL, SWT.END, true, false));
44 41
			dt.setToolTipText(getWidgetUsage());
TXM/trunk/bundles/org.txm.rcp/src/main/java/org/txm/rcp/ApplicationWorkbenchAdvisor.java (revision 3731)
513 513
	 */
514 514
	@Override
515 515
	public void postShutdown() {
516
		
516 517
		callPreStopScript();
517 518

  
518 519
		Toolbox.shutdown();
TXM/trunk/bundles/org.txm.tigersearch.rcp/src/org/txm/tigersearch/commands/ImportTIGERAnnotations.java (revision 3731)
232 232
		Log.info("Finalizing TIGERSearch corpus");
233 233
		if (numberOfWordsAnnotated > 0) { // copy the TIGERcorpus to import
234 234
			FileCopy.copyFiles(tigerCorpusDirectory, tigerCorpusExistingDirectory);
235
			
236
			corpus.getProject().appendToHistory("TIGER Annotations imported from "+tigerDirectory);
237
			
235 238
			Log.info("Done. " + numberOfWordsAnnotated + " words annotated.");
236 239
		}
237 240
		else {
TXM/trunk/bundles/org.txm.analec.rcp/src/org/txm/annotation/urs/commands/ImportGlozzAnnotations.java (revision 3731)
95 95
		Log.info(Messages.ImportGlozzAnnotations_9);
96 96
		URSCorpora.saveCorpus(analecCorpus);
97 97
		
98
		mainCorpus.getProject().appendToHistory("URS Annotations imported from "+aafile+", "+aamfile+" and "+acfile);
99
		
98 100
		Log.info(Messages.ImportGlozzAnnotations_10);
99 101
		return true;
100 102
	}
TXM/trunk/bundles/org.txm.analec.rcp/src/org/txm/annotation/urs/commands/SaveCorpus.java (revision 3731)
87 87
			Log.warning(Messages.SaveCorpus_5);
88 88
			return false;
89 89
		}
90
		
91
		mainCorpus.getProject().appendToHistory("URS annotations saved");
90 92

  
91 93
		mainCorpus.setIsModified(false);
92 94
		if (event != null) {
TXM/trunk/bundles/org.txm.analec.rcp/src/org/txm/annotation/urs/commands/ImportTEIAnnotations.java (revision 3731)
76 76
							Log.warning(Messages.ImportTEIAnnotations_2);
77 77
							return Status.CANCEL_STATUS;
78 78
						} else {
79
							mainCorpus.getProject().appendToHistory("URS annotations imported from "+ directory +" : "+analecCorpus.getStructure().toString());
79 80
							return Status.OK_STATUS;
80 81
						}
81 82
					} catch (Throwable e) {
......
136 137
		if (ret) {
137 138
			Log.info(TXMCoreMessages.bind(Messages.ImportTEIAnnotations_5, analecCorpus.getToutesUnites().size(), analecCorpus.getToutesRelations().size(), analecCorpus.getTousSchemas().size()));
138 139
			mainCorpus.setIsModified(true);
140
			
141
			mainCorpus.getProject().appendToHistory("URS Annotations imported from TEI files of "+annotationDirectory);
142
			
139 143
			CorporaView.refreshObject(mainCorpus);
140 144
		}
141 145
		return ret;
TXM/trunk/bundles/org.txm.conllu.rcp/src/org/txm/conllu/rcp/commands/ExportCorpusAsFullCoNLLU.java (revision 3731)
42 42
import org.eclipse.osgi.util.NLS;
43 43
import org.eclipse.ui.handlers.HandlerUtil;
44 44
import org.kohsuke.args4j.Option;
45
import org.txm.conllu.core.function.ImportCoNLLUAnnotations;
45 46
import org.txm.rcp.swt.widget.parameters.ParametersDialog;
46 47
import org.txm.searchengine.cqp.CQPSearchEngine;
47 48
import org.txm.searchengine.cqp.clientExceptions.CqiClientException;
......
63 64
 * @author mdecorde.
64 65
 */
65 66
public class ExportCorpusAsFullCoNLLU extends AbstractHandler {
66
	
67

  
67 68
	public static final String ID = ExportCorpusAsFullCoNLLU.class.getName();
68
	
69

  
69 70
	@Option(name = "conlluResultDirectory", usage = "conlluResultDirectory", widget = "Folder", required = true, def = "conllu-result-directory")
70 71
	File conlluResultDirectory;
71
	
72

  
72 73
	@Option(name = "propertiesPrefix", usage = "optional prefix for the properties to create", widget = "String", required = true, def = "ud-")
73 74
	String propertiesPrefix;
74
	
75

  
75 76
	@Option(name = "separator", usage = "Options", widget = "Separator", required = true, def = "comment properties")
76 77
	Boolean separator = false;
77
	
78

  
78 79
	@Option(name = "insertParagraphs", usage = "Insert paragraph marks in the CoNLLU corpus", widget = "Boolean", required = true, def = "true")
79 80
	Boolean insertParagraphs = false;
80
	
81

  
81 82
	@Option(name = "detectGap", usage = "Insert gap comment using the CQP 'gap' property", widget = "Boolean", required = true, def = "true")
82 83
	Boolean detectGap = false;
83
	
84

  
84 85
	@Option(name = "separator3", usage = "Options", widget = "Separator", required = true, def = "tokens options")
85 86
	Boolean separator3 = false;
86
	
87

  
87 88
	@Option(name = "insertNoSpaceAfter", usage = "Insert the NoSpaceAfter misc property if not initially in the CoNLLU corpus", widget = "Boolean", required = true, def = "true")
88 89
	Boolean insertNoSpaceAfter = true;
89
	
90

  
90 91
	@Option(name = "insertTokenWithoutUdAnnotations", usage = "if checked words without ud annotations are exported as well", widget = "Boolean", required = false, def = "false")
91 92
	Boolean insertTokenWithoutUdAnnotations;
92
	
93

  
93 94
	// "form", "lemma", "upos", "xpos", "feats", "head", "deprel", "deps", "misc" };
94 95
	@Option(name = "separator_properties", usage = "Options", widget = "Separator", required = true, def = "fixing UD properties")
95 96
	Boolean separator_properties = false;
96
	
97

  
97 98
	@Option(name = "defaultFormPropertyName", usage = "optional CQP property to fix the missing 'form' ud property", widget = "String", required = false, def = "")
98 99
	String defaultFormPropertyName;
99
	
100

  
100 101
	@Option(name = "defaultLemmaPropertyName", usage = "optional CQP property to fix the missing 'lemma' ud property", widget = "String", required = false, def = "")
101 102
	String defaultLemmaPropertyName;
102
	
103

  
103 104
	@Option(name = "defaultUposPropertyName", usage = "optional CQP property to fix the missing 'upos' ud property", widget = "String", required = false, def = "")
104 105
	String defaultUposPropertyName;
105
	
106

  
106 107
	@Option(name = "defaultXposPropertyName", usage = "optional CQP property to fix the missing 'xpos' ud property", widget = "String", required = false, def = "")
107 108
	String defaultXposPropertyName;
108
	
109

  
109 110
	@Option(name = "defaultFeatsPropertyName", usage = "optional CQP property to fix the missing 'feats' ud property", widget = "String", required = false, def = "")
110 111
	String defaultFeatsPropertyName;
111
	
112

  
112 113
	@Option(name = "defaultHeadPropertyName", usage = "optional CQP property to fix the missing 'head' ud property", widget = "String", required = false, def = "")
113 114
	String defaultHeadPropertyName;
114
	
115

  
115 116
	@Option(name = "defaultDeprelPropertyName", usage = "optional CQP property to fix the missing 'deprel' ud property", widget = "String", required = false, def = "")
116 117
	String defaultDeprelPropertyName;
117
	
118

  
118 119
	@Option(name = "defaultDepsPropertyName", usage = "optional CQP property to fix the missing 'deps' ud property", widget = "String", required = false, def = "")
119 120
	String defaultDepsPropertyName;
120
	
121

  
121 122
	@Option(name = "defaultMiscPropertyName", usage = "optional CQP property to fix the missing 'misc' ud property", widget = "String", required = false, def = "")
122 123
	String defaultMiscPropertyName;
123
	
124

  
124 125
	@Option(name = "separator2", usage = "Options", widget = "Separator", required = true, def = "sentence fix options")
125 126
	Boolean separator2 = false;
126
	
127

  
127 128
	@Option(name = "openingPunct", usage = "optional prefix for the properties to create", widget = "String", required = true, def = "[\\-–«‘“\\(]")
128 129
	String openingPunct;
129
	
130

  
130 131
	/**
131 132
	 * the UD property suffixes, will be used to create the CQP properties like propertiesPrefix + suffix
132 133
	 */
133
	public static String[] propNames = { "id", "form", "lemma", "upos", "xpos", "feats", "head", "deprel", "deps", "misc" };
134
	
134
	public static String[] propNames = ImportCoNLLUAnnotations.UD_PROPERTY_NAMES;
135

  
135 136
	/*
136 137
	 * (non-Javadoc)
137 138
	 * @see org.eclipse.core.commands.AbstractHandler#execute(org.eclipse.core.commands.ExecutionEvent)
138 139
	 */
139 140
	@Override
140 141
	public Object execute(final ExecutionEvent event) throws ExecutionException {
141
		
142

  
142 143
		IStructuredSelection selection = (IStructuredSelection) HandlerUtil.getCurrentSelection(event);
143
		
144

  
144 145
		Object s = selection.getFirstElement();
145 146
		if (!(s instanceof MainCorpus)) {
146 147
			Log.warning("Selection is not a corpus. Aborting.");
147 148
			return null;
148 149
		}
149
		
150

  
150 151
		if (!ParametersDialog.open(this)) {
151 152
			return null;
152 153
		}
153
		
154

  
154 155
		conlluResultDirectory.mkdirs();
155 156
		if (conlluResultDirectory == null || !conlluResultDirectory.exists() || !conlluResultDirectory.isDirectory()) {
156 157
			Log.warning("Error: conllu result directory does not exists: " + conlluResultDirectory);
157 158
			return null;
158 159
		}
159
		
160

  
160 161
		CQPCorpus corpus = (CQPCorpus) s;
161 162
		MainCorpus mainCorpus = corpus.getMainCorpus();
162
		
163

  
163 164
		try {
164 165
			return exportAnnotationsAsCorpus(mainCorpus, conlluResultDirectory, propertiesPrefix, openingPunct, insertTokenWithoutUdAnnotations,
165 166
					defaultFormPropertyName, defaultLemmaPropertyName, defaultUposPropertyName, defaultXposPropertyName,
......
171 172
			Log.warning(e);
172 173
			Log.printStackTrace(e);
173 174
		}
174
		
175

  
175 176
		return null;
176 177
	}
177
	
178

  
178 179
	/**
179 180
	 * export the corpus in a directory of conllu files (one per text)
180 181
	 * 
......
202 203
			String defaultFeatsPropertyName, String defaultHeadPropertyName, String defaultDeprelPropertyName, String defaultDepsPropertyName,
203 204
			String defaultMiscPropertyName,
204 205
			boolean detectGap, boolean insertParagraphs, boolean insertNoSpaceAfter)
205
			throws UnexpectedAnswerException,
206
			IOException,
207
			CqiServerError,
208
			CqiClientException, InvalidCqpIdException {
209
		
206
					throws UnexpectedAnswerException,
207
					IOException,
208
					CqiServerError,
209
					CqiClientException, InvalidCqpIdException {
210

  
210 211
		if (!conlluResultDirectory.exists()) {
211 212
			conlluResultDirectory.mkdirs();
212 213
		}
213 214
		int numberOfWordsWritten = 0;
214 215
		int numberOfSentencesWritten = 0;
215 216
		int numberOfTextsWritten = 0;
216
		
217

  
217 218
		String[] textIds = mainCorpus.getCorpusTextIdsList();
218 219
		int[] start_limits = mainCorpus.getTextStartLimits();
219 220
		int[] end_limits = mainCorpus.getTextEndLimits();
220
		
221

  
221 222
		String lang = mainCorpus.getLang();
222 223
		// HashSet<String> beforeSpacesRules = new HashSet<>(LangFormater.getNoSpaceBefore(mainCorpus.getLang()));
223 224
		// HashSet<String> afterSpacesRules = new HashSet<>(LangFormater.getNoSpaceAfter(mainCorpus.getLang()));
224
		
225

  
225 226
		for (String p : propNames) {
226 227
			WordProperty wp = mainCorpus.getProperty(prefix + p);
227 228
			if (wp == null) {
228
				Log.warning("Error: cannot find the Conllu property: " + prefix + p);
229
				return 0;
229
				Log.warning("Warning: cannot find the Conllu property: " + prefix + p);
230
				//return 0;
230 231
			}
231 232
		}
232
		
233

  
233 234
		if (insertTokenWithoutUdAnnotations && (defaultFormPropertyName == null || mainCorpus.getProperty(defaultFormPropertyName) == null)) {
234 235
			Log.warning("Error: the defaultFormPropertyName parameter needs to be set if insertTokenWithoutUdAnnotations is set to true");
235 236
			return 0;
236 237
		}
237
		
238

  
238 239
		for (int iText = 0; iText < start_limits.length; iText++) {
239
			
240

  
240 241
			// Build corpus positions
241 242
			int[] positions = new int[end_limits[iText] - start_limits[iText] + 1];
242 243
			int tmp = 0;
......
244 245
				positions[tmp++] = n;
245 246
			}
246 247
			numberOfWordsWritten += positions.length;
247
			
248

  
248 249
			// Get UD properties
249 250
			WordProperty wp;
250 251
			wp = mainCorpus.getProperty(prefix + "id");
......
259 260
				}
260 261
			}
261 262
			tmpValues = null;
262
			
263
			WordProperty formWordProperty = mainCorpus.getProperty(prefix + "form");
264
			String[] formValues = CQPSearchEngine.getCqiClient().cpos2Str(formWordProperty.getQualifiedName(), positions);
265
			fixUNDEFValues(formValues);
266
			
263

  
264
			String[] emptyvalues = new String[positions.length];
265
			for (int i = 0 ; i < emptyvalues.length ; i++) {
266
				emptyvalues[i] = "_";
267
			}
268

  
269
			wp = mainCorpus.getProperty(prefix + "form");
270
			String[] formValues = null;
271
			if (wp != null) {
272
				formValues = CQPSearchEngine.getCqiClient().cpos2Str(wp.getQualifiedName(), positions);
273
				fixUNDEFValues(formValues);
274
			} else {
275
				formValues = emptyvalues;
276
			}
277

  
278

  
267 279
			wp = mainCorpus.getProperty(prefix + "lemma");
268
			String[] lemmaValues = CQPSearchEngine.getCqiClient().cpos2Str(wp.getQualifiedName(), positions);
269
			fixUNDEFValues(lemmaValues);
270
			
280
			String[] lemmaValues = null;
281
			if (wp != null) {
282
				lemmaValues = CQPSearchEngine.getCqiClient().cpos2Str(wp.getQualifiedName(), positions);
283
				fixUNDEFValues(lemmaValues);
284
			} else {
285
				lemmaValues = emptyvalues;
286
			}
287

  
271 288
			wp = mainCorpus.getProperty(prefix + "upos");
272
			String[] uposValues = CQPSearchEngine.getCqiClient().cpos2Str(wp.getQualifiedName(), positions);
273
			fixUNDEFValues(uposValues);
274
			
289
			String[] uposValues = null;
290
			if (wp != null) {
291
				uposValues = CQPSearchEngine.getCqiClient().cpos2Str(wp.getQualifiedName(), positions);
292
				fixUNDEFValues(uposValues);
293
			} else {
294
				uposValues = emptyvalues;
295
			}
296

  
275 297
			wp = mainCorpus.getProperty(prefix + "xpos");
276
			String[] xposValues = CQPSearchEngine.getCqiClient().cpos2Str(wp.getQualifiedName(), positions);
277
			fixUNDEFValues(xposValues);
278
			
298
			String[] xposValues = null;
299
			if (wp != null) {
300
				xposValues = CQPSearchEngine.getCqiClient().cpos2Str(wp.getQualifiedName(), positions);
301
				fixUNDEFValues(xposValues);
302
			} else {
303
				xposValues = emptyvalues;
304
			}
305

  
279 306
			wp = mainCorpus.getProperty(prefix + "feats");
280
			String[] featsValues = CQPSearchEngine.getCqiClient().cpos2Str(wp.getQualifiedName(), positions);
281
			fixUNDEFValues(featsValues);
282
			
307
			String[] featsValues = null;
308
			if (wp != null) {
309
				featsValues = CQPSearchEngine.getCqiClient().cpos2Str(wp.getQualifiedName(), positions);
310
				fixUNDEFValues(featsValues);
311
			} else {
312
				featsValues = emptyvalues;
313
			}
314

  
283 315
			wp = mainCorpus.getProperty(prefix + "head");
284 316
			// String[] headValues = CQPSearchEngine.getCqiClient().cpos2Str(wp.getQualifiedName(), positions);
285 317
			tmpValues = CQPSearchEngine.getCqiClient().cpos2Str(wp.getQualifiedName(), positions);
......
293 325
				}
294 326
			}
295 327
			tmpValues = null;
296
			
328

  
297 329
			wp = mainCorpus.getProperty(prefix + "deprel");
298
			String[] deprelValues = CQPSearchEngine.getCqiClient().cpos2Str(wp.getQualifiedName(), positions);
299
			fixUNDEFValues(deprelValues);
300
			
330
			String[] deprelValues = null;
331
			if (wp != null) {
332
				deprelValues = CQPSearchEngine.getCqiClient().cpos2Str(wp.getQualifiedName(), positions);
333
				fixUNDEFValues(deprelValues);
334
			} else {
335
				deprelValues = emptyvalues;
336
			}
337

  
301 338
			wp = mainCorpus.getProperty(prefix + "deps");
302
			String[] depsValues = CQPSearchEngine.getCqiClient().cpos2Str(wp.getQualifiedName(), positions);
303
			fixUNDEFValues(depsValues);
304
			
339
			String[] depsValues = null;
340
			if (wp != null) {
341
				depsValues = CQPSearchEngine.getCqiClient().cpos2Str(wp.getQualifiedName(), positions);
342
				fixUNDEFValues(depsValues);
343
			} else {
344
				depsValues = emptyvalues;
345
			}
346

  
305 347
			wp = mainCorpus.getProperty(prefix + "misc");
306
			String[] miscValues = CQPSearchEngine.getCqiClient().cpos2Str(wp.getQualifiedName(), positions);
307
			fixUNDEFValues(miscValues);
308
			
348
			String[] miscValues = null;
349
			if (wp != null) {
350
				miscValues = CQPSearchEngine.getCqiClient().cpos2Str(wp.getQualifiedName(), positions);
351
				fixUNDEFValues(miscValues);
352
			} else {
353
				miscValues = emptyvalues;
354
			}
355

  
309 356
			HashSet<Integer> paragraphsStartPositions = new HashSet<>();
310 357
			if (insertParagraphs) {
311 358
				StructuralUnit p_struct = mainCorpus.getStructuralUnit("p");
......
319 366
					}
320 367
				}
321 368
			}
322
			
369

  
323 370
			HashMap<Integer, String> sentidStartPositions = getNonUNDEFPositionsAndValues(mainCorpus, prefix+"sentid");
324 371
			HashMap<Integer, String> newdocidStartPositions = getNonUNDEFPositionsAndValues(mainCorpus, prefix+"newdocid");
325
			
372

  
326 373
			// build sentence, first pass using UD word sentence positions
327 374
			ArrayList<ArrayList<Integer>> sentences = new ArrayList<>();
328 375
			ArrayList<Integer> tmpSentence = new ArrayList<>();
......
331 378
				// + featsValues[p] + " head="
332 379
				// + headValues[p] + " deprel=" + deprelValues[p] + " deps=" + depsValues[p] + " misc=" + miscValues[p]);
333 380
				if (sentidStartPositions.containsKey(p)) { // new ud sentence
334
					
381

  
335 382
					if (tmpSentence.size() > 0) {
336 383
						sentences.add(new ArrayList<>(tmpSentence));
337 384
					}
338
					
385

  
339 386
					// System.out.println("new sentence: " + " id=" + idValues[p] + " form=" + formValues[p] + " lemma=" + lemmaValues[p] + " upos=" + uposValues[p] + " xpos=" + xposValues[p] + "
340 387
					// feats="
341 388
					// + featsValues[p] + " head="
342 389
					// + headValues[p] + " deprel=" + deprelValues[p] + " deps=" + depsValues[p] + " misc=" + miscValues[p]);
343 390
					tmpSentence.clear();
344 391
				}
345
				
392

  
346 393
				if (insertTokenWithoutUdAnnotations) {
347 394
					tmpSentence.add(p); // insert all tokens
348 395
				}
349 396
				else if (idValues[p] != 0) {
350 397
					tmpSentence.add(p); // insert all tokens
351 398
				}
352
				
399

  
353 400
			}
354 401
			positions = null; // free memory
355
			
402

  
356 403
			// fixing sentences
357 404
			for (int s = 0; s < sentences.size(); s++) {
358
				
405

  
359 406
				// fix only ud sentences limits
360 407
				ArrayList<Integer> sentence = sentences.get(s);
361
				
408

  
362 409
				if (sentidStartPositions.get(sentence.get(0)) == null) {
363 410
					continue; // this is not a UD sentence
364 411
				}
365
				
412

  
366 413
				int max = -1;
367 414
				int imax = 0;
368 415
				for (int ip = 0; ip < sentence.size(); ip++) {
......
372 419
						imax = ip;
373 420
					}
374 421
				}
375
				
422

  
376 423
				ArrayList<Integer> newSentence = new ArrayList<>();
377 424
				for (int ip = imax + 1; ip < sentence.size(); ip++) {
378 425
					newSentence.add(sentence.get(ip));
......
388 435
					sentences.add(s + 1, newSentence);
389 436
				}
390 437
			}
391
			
438

  
392 439
			if (tmpSentence.size() > 0) { // add last sentence
393 440
				sentences.add(new ArrayList<>(tmpSentence));
394 441
			}
395
			
442

  
396 443
			// fixing sentence __NULL__ ud properties
397 444
			for (int iSentence = 0; iSentence < sentences.size(); iSentence++) {
398 445
				ArrayList<Integer> sentence = sentences.get(iSentence);
399
				
446

  
400 447
				int[] sentencePositions = new int[sentence.size()];
401 448
				for (int p = 0; p < sentence.size(); p++) {
402 449
					sentencePositions[p] = sentence.get(p);
403 450
				}
404
				
451

  
405 452
				// get CQP values fixing "form", "lemma", "upos", "xpos", "feats", "head", "deprel", "deps"
406 453
				String[] ids = CQPSearchEngine.getCqiClient().cpos2Str(mainCorpus.getProperty("id").getQualifiedName(), sentencePositions);
407
				
454

  
408 455
				String[] words = null;
409 456
				if (defaultFormPropertyName != null && defaultFormPropertyName.length() > 0) {
410 457
					words = getDefaultValues(mainCorpus, defaultFormPropertyName, sentencePositions);
......
421 468
				if (defaultXposPropertyName != null && defaultXposPropertyName.length() > 0) {
422 469
					xposs = getDefaultValues(mainCorpus, defaultXposPropertyName, sentencePositions);
423 470
				}
424
				
471

  
425 472
				String[] feats = null;
426 473
				if (defaultFeatsPropertyName != null && defaultFeatsPropertyName.length() > 0) {
427 474
					feats = getDefaultValues(mainCorpus, defaultFeatsPropertyName, sentencePositions);
......
442 489
				if (defaultMiscPropertyName != null && defaultMiscPropertyName.length() > 0) {
443 490
					miscs = getDefaultValues(mainCorpus, defaultMiscPropertyName, sentencePositions);
444 491
				}
445
				
492

  
446 493
				// String[] feats = CQPSearchEngine.getCqiClient().cpos2Str(mainCorpus.getProperty(featsCorrPropertyName).getQualifiedName(), sentencePositions);
447 494
				// String[] head = CQPSearchEngine.getCqiClient().cpos2Str(mainCorpus.getProperty(headCorrPropertyName).getQualifiedName(), sentencePositions);
448 495
				// String[] deprel = CQPSearchEngine.getCqiClient().cpos2Str(mainCorpus.getProperty(deprelCorrPropertyName).getQualifiedName(), sentencePositions);
449 496
				// String[] deps = CQPSearchEngine.getCqiClient().cpos2Str(mainCorpus.getProperty(deprelCorrPropertyName).getQualifiedName(), sentencePositions);
450
				
497

  
451 498
				// fix ud properties using CQP values
452 499
				for (int ip = 0; ip < sentence.size(); ip++) {
453
					
500

  
454 501
					int p = sentence.get(ip);
455
					
502

  
456 503
					// new word
457 504
					if (miscValues[p].equals("_")) {
458 505
						miscValues[p] = "XmlId=" + ids[ip];
459 506
					}
460
					
507

  
461 508
					// { "id", "form", "lemma", "upos", "xpos", "feats", "head", "deprel", "deps", "misc" };
462 509
					if (words != null && formValues[p].equals("_")) {
463 510
						formValues[p] = words[ip];
......
487 534
						miscValues[p] = miscs[ip];
488 535
					}
489 536
				}
490
				
537

  
491 538
				if (insertNoSpaceAfter) {
492 539
					for (int ip = 0; ip < sentence.size(); ip++) { // fix SpaceAfter. !!! this needs to be done after ud properties are fixed
493 540
						int p = sentence.get(ip);
......
503 550
						}
504 551
					}
505 552
				}
506
				
553

  
507 554
				// fixing sentence punct limits
508 555
				while (sentence.size() > 0 && iSentence > 0 && formValues[sentence.get(0)].matches("\\p{P}") && !formValues[sentence.get(0)].matches(openingPunct)) {
509 556
					// System.out.println("FIXING: first non-openingPunct position " + formValues[sentence.get(0)] + " in " + iSentence);
......
516 563
					int p2 = sentence.remove(sentence.size() - 1);
517 564
					sentences.get(iSentence + 1).add(0, p2);
518 565
				}
519
				
566

  
520 567
				if (sentence.size() == 0) { // sentence was depleted after fixing it
521 568
					sentences.remove(iSentence);
522 569
					iSentence--;
523 570
					continue;
524 571
				}
525 572
			}
526
			
573

  
527 574
			for (int s = 0; s < sentences.size(); s++) {
528
				
575

  
529 576
				// fix only ud sentences limits
530 577
				ArrayList<Integer> sentence = sentences.get(s);
531 578
				HashMap<Integer, Integer> oldToNewIds = new HashMap<>();
532 579
				for (int ip = 0; ip < sentence.size(); ip++) { // computing old to new ids
533 580
					int p = sentence.get(ip);
534
					
581

  
535 582
					if (idValues[p] != 0) { // store "old id -> new id"
536 583
						oldToNewIds.put(idValues[p], (ip + 1)); // from 1 to N
537 584
					}
538 585
				}
539
				
586

  
540 587
				// fixing head and set missing head to 0 and root
541 588
				for (int ip = 0; ip < sentence.size(); ip++) {
542 589
					int p = sentence.get(ip);
543
					
590

  
544 591
					// fixing id value
545 592
					idValues[p] = (ip + 1);  // from 1 to N
546
					
593

  
547 594
					// fixing head values
548 595
					if (oldToNewIds.containsKey(headValues[p])) {
549 596
						headValues[p] = oldToNewIds.get(headValues[p]);
......
555 602
					}
556 603
				}
557 604
			}
558
			
605

  
559 606
			// writing sentences
560 607
			File resultConlluFile = new File(conlluResultDirectory, textIds[iText] + ".conllu");
561 608
			PrintWriter writer = IOUtils.getWriter(resultConlluFile);
562
			
609

  
563 610
			int iParagraph = 1;
564
			
611

  
565 612
			for (int iSentence = 0; iSentence < sentences.size(); iSentence++) {
566 613
				ArrayList<Integer> sentence = sentences.get(iSentence);
567
				
614

  
568 615
				int[] sentencePositions = new int[sentence.size()];
569 616
				for (int p = 0; p < sentence.size(); p++) {
570 617
					sentencePositions[p] = sentence.get(p);
571 618
				}
572
				
619

  
573 620
				String[] gap = null;
574 621
				if (detectGap && mainCorpus.getProperty("gap") != null) {
575 622
					gap = CQPSearchEngine.getCqiClient().cpos2Str(mainCorpus.getProperty("gap").getQualifiedName(), sentencePositions);
576 623
				}
577
				
624

  
578 625
				String[] tokens = new String[sentence.size()];
579 626
				for (int ip = 0; ip < sentence.size(); ip++) {
580 627
					tokens[ip] = formValues[sentence.get(ip)];
581 628
				}
582
				
629

  
583 630
				if (insertNoSpaceAfter) {
584 631
					writer.println("# text = " + LangFormater.format(StringUtils.join(tokens, " "), mainCorpus.getLang()));
585 632
				}
586 633
				else {
587 634
					writer.println("# text = " + StringUtils.join(tokens, " "));
588 635
				}
589
				
636

  
590 637
				if (newdocidStartPositions.containsKey(sentence.get(0))) {
591 638
					writer.println("# newdoc id = " + newdocidStartPositions.get(sentence.get(0)));
592 639
				}
593 640
				else {
594 641
					writer.println("# newdoc id = " + textIds[iText]);
595 642
				}
596
				
643

  
597 644
				boolean foundSentId = false;
598 645
				for (int ip : sentence) {
599 646
					if (!foundSentId && sentidStartPositions.containsKey(ip)) {
......
604 651
				if (!foundSentId) { // no sent_id found
605 652
					writer.println("# sent_id = " + textIds[iText] + "-" + (iSentence + 1) + ".new");
606 653
				}
607
				
654

  
608 655
				if (paragraphsStartPositions.contains(sentence.get(0))) { // paragraphsStartPositions is empty if the injectParagraph option is not set
609 656
					writer.println("# newpar id = " + iParagraph);
610 657
					iParagraph++;
611 658
				}
612
				
659

  
613 660
				for (int ip = 0; ip < sentence.size(); ip++) {
614 661
					int p = sentence.get(ip);
615
					
662

  
616 663
					// { "id", "form", "lemma", "upos", "xpos", "feats", "head", "deprel", "deps", "misc" };
617 664
					writer.println(idValues[p] + "\t" + formValues[p] + "\t" + lemmaValues[p] + "\t" + uposValues[p]
618 665
							+ "\t" + xposValues[p] + "\t" + featsValues[p] + "\t" + headValues[p] + "\t" + deprelValues[p]
619
							+ "\t" + depsValues[p] + "\t" + miscValues[p]);
620
					
666
									+ "\t" + depsValues[p] + "\t" + miscValues[p]);
667

  
621 668
					if (gap != null && gap[ip].equals("next")) {
622 669
						writer.println("# gap");
623 670
					}
......
626 673
				numberOfSentencesWritten++;
627 674
			}
628 675
			writer.close();
629
			
676

  
630 677
			System.out.println(" Text done: " + resultConlluFile);
631 678
			numberOfTextsWritten++;
632 679
		}
633
		
680

  
634 681
		System.out.println("# words written: " + numberOfWordsWritten);
635 682
		System.out.println("# sentences written: " + numberOfSentencesWritten);
636 683
		System.out.println("# texts written: " + numberOfTextsWritten);
637
		
684

  
638 685
		return numberOfWordsWritten;
639 686
	}
640
	
687

  
641 688
	private static String[] getDefaultValues(MainCorpus mainCorpus, String property, int[] positions) throws UnexpectedAnswerException, IOException, CqiServerError, CqiClientException {
642
		
689

  
643 690
		String[] values = CQPSearchEngine.getCqiClient().cpos2Str(mainCorpus.getProperty(property).getQualifiedName(), positions);
644 691
		for (int iupos = 0; iupos < values.length; iupos++) { // recode the || CQP multiple values to ud multiple values
645 692
			if (values[iupos].length() > 2 && values[iupos].startsWith("|") && values[iupos].endsWith("|")) {
646 693
				values[iupos] = values[iupos].substring(1, values[iupos].length() - 1);
647 694
			}
648 695
		}
649
		
696

  
650 697
		return values;
651 698
	}
652
	
699

  
653 700
	private static HashMap<Integer, String> getNonUNDEFPositionsAndValues(MainCorpus mainCorpus, String property) throws UnexpectedAnswerException, IOException, CqiServerError, CqiClientException {
654
		
701

  
702

  
703

  
655 704
		HashMap<Integer, String> sentidStartPositions = new HashMap<>();
656
		int[] ids = CQPSearchEngine.getCqiClient().regex2Id(mainCorpus.getProperty(property).getQualifiedName(), "(?!__UNDEF__).+");
657
		String[] strs = CQPSearchEngine.getCqiClient().id2Str(mainCorpus.getProperty(property).getQualifiedName(), ids);
658
		for (int iId = 0; iId < ids.length; iId++) {
659
			int id = ids[iId];
660
			int[] pp = CQPSearchEngine.getCqiClient().id2Cpos(mainCorpus.getProperty(property).getQualifiedName(), id);
661
			for (int p : pp) {
662
				sentidStartPositions.put(p, strs[iId]);
705
		if (mainCorpus.getProperty(property) != null) {
706
			int[] ids = CQPSearchEngine.getCqiClient().regex2Id(mainCorpus.getProperty(property).getQualifiedName(), "(?!__UNDEF__).+");
707
			String[] strs = CQPSearchEngine.getCqiClient().id2Str(mainCorpus.getProperty(property).getQualifiedName(), ids);
708
			for (int iId = 0; iId < ids.length; iId++) {
709
				int id = ids[iId];
710
				int[] pp = CQPSearchEngine.getCqiClient().id2Cpos(mainCorpus.getProperty(property).getQualifiedName(), id);
711
				for (int p : pp) {
712
					sentidStartPositions.put(p, strs[iId]);
713
				}
663 714
			}
664 715
		}
665
		
666 716
		return sentidStartPositions;
667 717
	}
668
	
718

  
669 719
	private static void fixUNDEFValues(String[] values) {
670
		
720

  
671 721
		for (int i = 0; i < values.length; i++) {
672 722
			if (values[i].equals("__UNDEF__") || values[i].equals("") || values[i].equals("|_|")) {
673 723
				values[i] = "_";
TXM/trunk/bundles/org.txm.conllu.rcp/src/org/txm/conllu/rcp/commands/ToCoNLL2009.java (revision 3731)
12 12
import org.txm.searchengine.cqp.corpus.CorpusManager;
13 13
import org.txm.searchengine.cqp.corpus.Property;
14 14
import org.txm.searchengine.cqp.corpus.StructuralUnit;
15
import org.txm.searchengine.cqp.corpus.StructuralUnitProperty;
16 15
import org.txm.searchengine.cqp.corpus.WordProperty;
17 16
import org.txm.searchengine.cqp.corpus.query.CQLQuery;
18 17
import org.txm.searchengine.cqp.corpus.query.Match;
19 18
import org.txm.searchengine.cqp.serverException.CqiServerError;
20 19
import org.txm.utils.ConsoleProgressBar;
20
import org.txm.utils.logger.Log;
21 21

  
22
import jline.internal.Log;
23

  
24 22
public class ToCoNLL2009 {
25 23
	
26 24
	boolean debug = false;
TXM/trunk/bundles/org.txm.conllu.rcp/src/org/txm/conllu/rcp/commands/ExportCorpusAsCoNLLU.java (revision 3731)
42 42
import org.eclipse.osgi.util.NLS;
43 43
import org.eclipse.ui.handlers.HandlerUtil;
44 44
import org.kohsuke.args4j.Option;
45
import org.txm.conllu.core.function.ImportCoNLLUAnnotations;
45 46
import org.txm.rcp.swt.widget.parameters.ParametersDialog;
46 47
import org.txm.searchengine.cqp.CQPSearchEngine;
47 48
import org.txm.searchengine.cqp.clientExceptions.CqiClientException;
......
63 64
 * @author mdecorde.
64 65
 */
65 66
public class ExportCorpusAsCoNLLU extends AbstractHandler {
66
	
67

  
67 68
	public static final String ID = ExportCorpusAsCoNLLU.class.getName();
68
	
69

  
69 70
	@Option(name="outputDirectory", usage="an example file", widget="Folder", required=true, def="outputDirectory")
70 71
	File outputDirectory;
71
	
72

  
72 73
	@Option(name="encoding", usage="sentenceProperty", widget="String", required=true, def="UTF-8")
73 74
	String encoding = "UTF-8";
74 75

  
75
	 @Option(name="sentenceStructure", usage="sentenceProperty", widget="String", required=true, def="s")
76
	@Option(name="sentenceStructure", usage="sentenceProperty", widget="String", required=true, def="s")
76 77
	String sentenceStructure;
77
	
78

  
78 79
	@Option(name="posProperty", usage="if set posProperty used to fill the UPOS ud property", widget="String", required=true, def="frpos")
79 80
	String posProperty;
80
	
81

  
81 82
	@Option(name="lemmaProperty", usage="if set lemmaProperty used to fill the LEMMA ud property", widget="String", required=true, def="frlemma")
82 83
	String lemmaProperty;
83
	
84

  
84 85
	/**
85 86
	 * the UD property suffixes, will be used to create the CQP properties like propertiesPrefix + suffix
86 87
	 */
87
	public static String[] propNames = { "id", "form", "lemma", "upos", "xpos", "feats", "head", "deprel", "deps", "misc" };
88
	
88
	public static String[] propNames = ImportCoNLLUAnnotations.UD_PROPERTY_NAMES;
89

  
89 90
	/*
90 91
	 * (non-Javadoc)
91 92
	 * @see org.eclipse.core.commands.AbstractHandler#execute(org.eclipse.core.commands.ExecutionEvent)
92 93
	 */
93 94
	@Override
94 95
	public Object execute(final ExecutionEvent event) throws ExecutionException {
95
		
96

  
96 97
		IStructuredSelection selection = (IStructuredSelection) HandlerUtil.getCurrentSelection(event);
97
		
98

  
98 99
		Object s = selection.getFirstElement();
99 100
		if (!(s instanceof MainCorpus)) {
100 101
			Log.warning("Selection is not a corpus. Aborting.");
101 102
			return null;
102 103
		}
103
		
104

  
104 105
		if (!ParametersDialog.open(this)) {
105 106
			return null;
106 107
		}
107
		
108

  
108 109
		outputDirectory.mkdirs();
109 110
		if (outputDirectory == null || !outputDirectory.exists() || !outputDirectory.isDirectory()) {
110 111
			Log.warning("Error: conllu result directory does not exists: " + outputDirectory);
111 112
			return null;
112 113
		}
113
		
114

  
114 115
		CQPCorpus corpus = (CQPCorpus) s;
115 116
		MainCorpus mainCorpus = corpus.getMainCorpus();
116
		
117

  
117 118
		return exportCorpus(mainCorpus, outputDirectory, sentenceStructure, lemmaProperty, posProperty, encoding);
118
		
119

  
119 120
	}
120
	
121

  
121 122
	public static boolean exportCorpus(MainCorpus mainCorpus, File outputDirectory, String sentenceStructure, String lemmaProperty, String posProperty, String encoding) {
122 123
		try {
123 124
			return new ToCoNLL2009().process(outputDirectory, mainCorpus, mainCorpus.getStructuralUnit(sentenceStructure), mainCorpus.getProperty("word"), mainCorpus.getProperty(lemmaProperty), mainCorpus.getProperty(posProperty), encoding);
124
			
125

  
125 126
		} catch (Exception e) {
126 127
			Log.warning(e);
127 128
			Log.printStackTrace(e);
......
156 157
			String defaultFeatsPropertyName, String defaultHeadPropertyName, String defaultDeprelPropertyName, String defaultDepsPropertyName,
157 158
			String defaultMiscPropertyName,
158 159
			boolean detectGap, boolean insertParagraphs, boolean insertNoSpaceAfter)
159
			throws UnexpectedAnswerException,
160
			IOException,
161
			CqiServerError,
162
			CqiClientException, InvalidCqpIdException {
163
		
160
					throws UnexpectedAnswerException,
161
					IOException,
162
					CqiServerError,
163
					CqiClientException, InvalidCqpIdException {
164

  
164 165
		if (!conlluResultDirectory.exists()) {
165 166
			conlluResultDirectory.mkdirs();
166 167
		}
167 168
		int numberOfWordsWritten = 0;
168 169
		int numberOfSentencesWritten = 0;
169 170
		int numberOfTextsWritten = 0;
170
		
171

  
171 172
		String[] textIds = mainCorpus.getCorpusTextIdsList();
172 173
		int[] start_limits = mainCorpus.getTextStartLimits();
173 174
		int[] end_limits = mainCorpus.getTextEndLimits();
174
		
175

  
175 176
		String lang = mainCorpus.getLang();
176 177
		// HashSet<String> beforeSpacesRules = new HashSet<>(LangFormater.getNoSpaceBefore(mainCorpus.getLang()));
177 178
		// HashSet<String> afterSpacesRules = new HashSet<>(LangFormater.getNoSpaceAfter(mainCorpus.getLang()));
178
		
179

  
179 180
		for (String p : propNames) {
180 181
			WordProperty wp = mainCorpus.getProperty(prefix + p);
181 182
			if (wp == null) {
182
				Log.warning("Error: cannot find the Conllu property: " + prefix + p);
183
				Log.warning("Error: cannot find the CoNLLU property: " + prefix + p);
183 184
				return 0;
184 185
			}
185 186
		}
186
		
187

  
187 188
		if (insertTokenWithoutUdAnnotations && (defaultFormPropertyName == null || mainCorpus.getProperty(defaultFormPropertyName) == null)) {
188 189
			Log.warning("Error: the defaultFormPropertyName parameter needs to be set if insertTokenWithoutUdAnnotations is set to true");
189 190
			return 0;
190 191
		}
191
		
192

  
192 193
		for (int iText = 0; iText < start_limits.length; iText++) {
193
			
194

  
194 195
			// Build corpus positions
195 196
			int[] positions = new int[end_limits[iText] - start_limits[iText] + 1];
196 197
			int tmp = 0;
......
198 199
				positions[tmp++] = n;
199 200
			}
200 201
			numberOfWordsWritten += positions.length;
201
			
202

  
202 203
			// Get UD properties
203 204
			WordProperty wp;
204 205
			wp = mainCorpus.getProperty(prefix + "id");
......
213 214
				}
214 215
			}
215 216
			tmpValues = null;
216
			
217

  
217 218
			WordProperty formWordProperty = mainCorpus.getProperty(prefix + "form");
218 219
			String[] formValues = CQPSearchEngine.getCqiClient().cpos2Str(formWordProperty.getQualifiedName(), positions);
219 220
			fixUNDEFValues(formValues);
220
			
221

  
221 222
			wp = mainCorpus.getProperty(prefix + "lemma");
222 223
			String[] lemmaValues = CQPSearchEngine.getCqiClient().cpos2Str(wp.getQualifiedName(), positions);
223 224
			fixUNDEFValues(lemmaValues);
224
			
225

  
225 226
			wp = mainCorpus.getProperty(prefix + "upos");
226 227
			String[] uposValues = CQPSearchEngine.getCqiClient().cpos2Str(wp.getQualifiedName(), positions);
227 228
			fixUNDEFValues(uposValues);
228
			
229

  
229 230
			wp = mainCorpus.getProperty(prefix + "xpos");
230 231
			String[] xposValues = CQPSearchEngine.getCqiClient().cpos2Str(wp.getQualifiedName(), positions);
231 232
			fixUNDEFValues(xposValues);
232
			
233

  
233 234
			wp = mainCorpus.getProperty(prefix + "feats");
234 235
			String[] featsValues = CQPSearchEngine.getCqiClient().cpos2Str(wp.getQualifiedName(), positions);
235 236
			fixUNDEFValues(featsValues);
236
			
237

  
237 238
			wp = mainCorpus.getProperty(prefix + "head");
238 239
			// String[] headValues = CQPSearchEngine.getCqiClient().cpos2Str(wp.getQualifiedName(), positions);
239 240
			tmpValues = CQPSearchEngine.getCqiClient().cpos2Str(wp.getQualifiedName(), positions);
......
247 248
				}
248 249
			}
249 250
			tmpValues = null;
250
			
251

  
251 252
			wp = mainCorpus.getProperty(prefix + "deprel");
252 253
			String[] deprelValues = CQPSearchEngine.getCqiClient().cpos2Str(wp.getQualifiedName(), positions);
253 254
			fixUNDEFValues(deprelValues);
254
			
255

  
255 256
			wp = mainCorpus.getProperty(prefix + "deps");
256 257
			String[] depsValues = CQPSearchEngine.getCqiClient().cpos2Str(wp.getQualifiedName(), positions);
257 258
			fixUNDEFValues(depsValues);
258
			
259

  
259 260
			wp = mainCorpus.getProperty(prefix + "misc");
260 261
			String[] miscValues = CQPSearchEngine.getCqiClient().cpos2Str(wp.getQualifiedName(), positions);
261 262
			fixUNDEFValues(miscValues);
262
			
263

  
263 264
			HashSet<Integer> paragraphsStartPositions = new HashSet<>();
264 265
			if (insertParagraphs) {
265 266
				StructuralUnit p_struct = mainCorpus.getStructuralUnit("p");
......
273 274
					}
274 275
				}
275 276
			}
276
			
277

  
277 278
			HashMap<Integer, String> sentidStartPositions = getNonUNDEFPositionsAndValues(mainCorpus, prefix+"sentid");
278 279
			HashMap<Integer, String> newdocidStartPositions = getNonUNDEFPositionsAndValues(mainCorpus, prefix+"newdocid");
279
			
280

  
280 281
			// build sentence, first pass using UD word sentence positions
281 282
			ArrayList<ArrayList<Integer>> sentences = new ArrayList<>();
282 283
			ArrayList<Integer> tmpSentence = new ArrayList<>();
283 284
			for (int p = 0; p < positions.length; p++) {
284
				// System.out.println("p=" + p + " id=" + idValues[p] + " form=" + formValues[p] + " lemma=" + lemmaValues[p] + " upos=" + uposValues[p] + " xpos=" + xposValues[p] + " feats="
285
				// + featsValues[p] + " head="
286
				// + headValues[p] + " deprel=" + deprelValues[p] + " deps=" + depsValues[p] + " misc=" + miscValues[p]);
287 285
				if (sentidStartPositions.containsKey(p)) { // new ud sentence
288
					
286

  
289 287
					if (tmpSentence.size() > 0) {
290 288
						sentences.add(new ArrayList<>(tmpSentence));
291 289
					}
292
					
293
					// System.out.println("new sentence: " + " id=" + idValues[p] + " form=" + formValues[p] + " lemma=" + lemmaValues[p] + " upos=" + uposValues[p] + " xpos=" + xposValues[p] + "
294
					// feats="
295
					// + featsValues[p] + " head="
296
					// + headValues[p] + " deprel=" + deprelValues[p] + " deps=" + depsValues[p] + " misc=" + miscValues[p]);
290

  
297 291
					tmpSentence.clear();
298 292
				}
299
				
293

  
300 294
				if (insertTokenWithoutUdAnnotations) {
301 295
					tmpSentence.add(p); // insert all tokens
302 296
				}
303 297
				else if (idValues[p] != 0) {
304 298
					tmpSentence.add(p); // insert all tokens
305 299
				}
306
				
300

  
307 301
			}
308 302
			positions = null; // free memory
309
			
303

  
310 304
			// fixing sentences
311 305
			for (int s = 0; s < sentences.size(); s++) {
312
				
306

  
313 307
				// fix only ud sentences limits
314 308
				ArrayList<Integer> sentence = sentences.get(s);
315
				
309

  
316 310
				if (sentidStartPositions.get(sentence.get(0)) == null) {
317 311
					continue; // this is not a UD sentence
318 312
				}
319
				
313

  
320 314
				int max = -1;
321 315
				int imax = 0;
322 316
				for (int ip = 0; ip < sentence.size(); ip++) {
......
326 320
						imax = ip;
327 321
					}
328 322
				}
329
				
323

  
330 324
				ArrayList<Integer> newSentence = new ArrayList<>();
331 325
				for (int ip = imax + 1; ip < sentence.size(); ip++) {
332 326
					newSentence.add(sentence.get(ip));
......
342 336
					sentences.add(s + 1, newSentence);
343 337
				}
344 338
			}
345
			
339

  
346 340
			if (tmpSentence.size() > 0) { // add last sentence
347 341
				sentences.add(new ArrayList<>(tmpSentence));
348 342
			}
349
			
343

  
350 344
			// fixing sentence __NULL__ ud properties
351 345
			for (int iSentence = 0; iSentence < sentences.size(); iSentence++) {
352 346
				ArrayList<Integer> sentence = sentences.get(iSentence);
353
				
347

  
354 348
				int[] sentencePositions = new int[sentence.size()];
355 349
				for (int p = 0; p < sentence.size(); p++) {
356 350
					sentencePositions[p] = sentence.get(p);
357 351
				}
358
				
352

  
359 353
				// get CQP values fixing "form", "lemma", "upos", "xpos", "feats", "head", "deprel", "deps"
360 354
				String[] ids = CQPSearchEngine.getCqiClient().cpos2Str(mainCorpus.getProperty("id").getQualifiedName(), sentencePositions);
361
				
355

  
362 356
				String[] words = null;
363 357
				if (defaultFormPropertyName != null && defaultFormPropertyName.length() > 0) {
364 358
					words = getDefaultValues(mainCorpus, defaultFormPropertyName, sentencePositions);
......
375 369
				if (defaultXposPropertyName != null && defaultXposPropertyName.length() > 0) {
376 370
					xposs = getDefaultValues(mainCorpus, defaultXposPropertyName, sentencePositions);
377 371
				}
378
				
372

  
379 373
				String[] feats = null;
380 374
				if (defaultFeatsPropertyName != null && defaultFeatsPropertyName.length() > 0) {
381 375
					feats = getDefaultValues(mainCorpus, defaultFeatsPropertyName, sentencePositions);
......
396 390
				if (defaultMiscPropertyName != null && defaultMiscPropertyName.length() > 0) {
397 391
					miscs = getDefaultValues(mainCorpus, defaultMiscPropertyName, sentencePositions);
398 392
				}
399
				
393

  
400 394
				// String[] feats = CQPSearchEngine.getCqiClient().cpos2Str(mainCorpus.getProperty(featsCorrPropertyName).getQualifiedName(), sentencePositions);
401 395
				// String[] head = CQPSearchEngine.getCqiClient().cpos2Str(mainCorpus.getProperty(headCorrPropertyName).getQualifiedName(), sentencePositions);
402 396
				// String[] deprel = CQPSearchEngine.getCqiClient().cpos2Str(mainCorpus.getProperty(deprelCorrPropertyName).getQualifiedName(), sentencePositions);
403 397
				// String[] deps = CQPSearchEngine.getCqiClient().cpos2Str(mainCorpus.getProperty(deprelCorrPropertyName).getQualifiedName(), sentencePositions);
404
				
398

  
405 399
				// fix ud properties using CQP values
406 400
				for (int ip = 0; ip < sentence.size(); ip++) {
407
					
401

  
408 402
					int p = sentence.get(ip);
409
					
403

  
410 404
					// new word
411 405
					if (miscValues[p].equals("_")) {
412 406
						miscValues[p] = "XmlId=" + ids[ip];
413 407
					}
414
					
408

  
415 409
					// { "id", "form", "lemma", "upos", "xpos", "feats", "head", "deprel", "deps", "misc" };
416 410
					if (words != null && formValues[p].equals("_")) {
417 411
						formValues[p] = words[ip];
......
441 435
						miscValues[p] = miscs[ip];
442 436
					}
443 437
				}
444
				
438

  
445 439
				if (insertNoSpaceAfter) {
446 440
					for (int ip = 0; ip < sentence.size(); ip++) { // fix SpaceAfter. !!! this needs to be done after ud properties are fixed
447 441
						int p = sentence.get(ip);
......
457 451
						}
458 452
					}
459 453
				}
460
				
454

  
461 455
				// fixing sentence punct limits
462 456
				while (sentence.size() > 0 && iSentence > 0 && formValues[sentence.get(0)].matches("\\p{P}") && !formValues[sentence.get(0)].matches(openingPunct)) {
463 457
					// System.out.println("FIXING: first non-openingPunct position " + formValues[sentence.get(0)] + " in " + iSentence);
......
470 464
					int p2 = sentence.remove(sentence.size() - 1);
471 465
					sentences.get(iSentence + 1).add(0, p2);
472 466
				}
473
				
467

  
474 468
				if (sentence.size() == 0) { // sentence was depleted after fixing it
475 469
					sentences.remove(iSentence);
476 470
					iSentence--;
477 471
					continue;
478 472
				}
479 473
			}
480
			
474

  
481 475
			for (int s = 0; s < sentences.size(); s++) {
482
				
476

  
483 477
				// fix only ud sentences limits
484 478
				ArrayList<Integer> sentence = sentences.get(s);
485 479
				HashMap<Integer, Integer> oldToNewIds = new HashMap<>();
486 480
				for (int ip = 0; ip < sentence.size(); ip++) { // computing old to new ids
487 481
					int p = sentence.get(ip);
488
					
482

  
489 483
					if (idValues[p] != 0) { // store "old id -> new id"
490 484
						oldToNewIds.put(idValues[p], (ip + 1)); // from 1 to N
491 485
					}
492 486
				}
493
				
487

  
494 488
				// fixing head and set missing head to 0 and root
495 489
				for (int ip = 0; ip < sentence.size(); ip++) {
496 490
					int p = sentence.get(ip);
497
					
491

  
498 492
					// fixing id value
499 493
					idValues[p] = (ip + 1);  // from 1 to N
500
					
494

  
501 495
					// fixing head values
502 496
					if (oldToNewIds.containsKey(headValues[p])) {
503 497
						headValues[p] = oldToNewIds.get(headValues[p]);
......
509 503
					}
510 504
				}
511 505
			}
512
			
506

  
513 507
			// writing sentences
514 508
			File resultConlluFile = new File(conlluResultDirectory, textIds[iText] + ".conllu");
515 509
			PrintWriter writer = IOUtils.getWriter(resultConlluFile);
516
			
510

  
517 511
			int iParagraph = 1;
518
			
512

  
519 513
			for (int iSentence = 0; iSentence < sentences.size(); iSentence++) {
520 514
				ArrayList<Integer> sentence = sentences.get(iSentence);
521
				
515

  
522 516
				int[] sentencePositions = new int[sentence.size()];
523 517
				for (int p = 0; p < sentence.size(); p++) {
524 518
					sentencePositions[p] = sentence.get(p);
525 519
				}
526
				
520

  
527 521
				String[] gap = null;
528 522
				if (detectGap && mainCorpus.getProperty("gap") != null) {
529 523
					gap = CQPSearchEngine.getCqiClient().cpos2Str(mainCorpus.getProperty("gap").getQualifiedName(), sentencePositions);
530 524
				}
531
				
525

  
532 526
				String[] tokens = new String[sentence.size()];
533 527
				for (int ip = 0; ip < sentence.size(); ip++) {
534 528
					tokens[ip] = formValues[sentence.get(ip)];
535 529
				}
536
				
530

  
537 531
				if (insertNoSpaceAfter) {
538 532
					writer.println("# text = " + LangFormater.format(StringUtils.join(tokens, " "), mainCorpus.getLang()));
539 533
				}
540 534
				else {
541 535
					writer.println("# text = " + StringUtils.join(tokens, " "));
542 536
				}
543
				
537

  
544 538
				if (newdocidStartPositions.containsKey(sentence.get(0))) {
545 539
					writer.println("# newdoc id = " + newdocidStartPositions.get(sentence.get(0)));
546 540
				}
547 541
				else {
548 542
					writer.println("# newdoc id = " + textIds[iText]);
549 543
				}
550
				
544

  
551 545
				boolean foundSentId = false;
552 546
				for (int ip : sentence) {
553 547
					if (!foundSentId && sentidStartPositions.containsKey(ip)) {
......
558 552
				if (!foundSentId) { // no sent_id found
559 553
					writer.println("# sent_id = " + textIds[iText] + "-" + (iSentence + 1) + ".new");
560 554
				}
561
				
555

  
562 556
				if (paragraphsStartPositions.contains(sentence.get(0))) { // paragraphsStartPositions is empty if the injectParagraph option is not set
563 557
					writer.println("# newpar id = " + iParagraph);
564 558
					iParagraph++;
565 559
				}
566
				
560

  
567 561
				for (int ip = 0; ip < sentence.size(); ip++) {
568 562
					int p = sentence.get(ip);
569
					
563

  
570 564
					// { "id", "form", "lemma", "upos", "xpos", "feats", "head", "deprel", "deps", "misc" };
571 565
					writer.println(idValues[p] + "\t" + formValues[p] + "\t" + lemmaValues[p] + "\t" + uposValues[p]
572 566
							+ "\t" + xposValues[p] + "\t" + featsValues[p] + "\t" + headValues[p] + "\t" + deprelValues[p]
573
							+ "\t" + depsValues[p] + "\t" + miscValues[p]);
574
					
567
									+ "\t" + depsValues[p] + "\t" + miscValues[p]);
568

  
575 569
					if (gap != null && gap[ip].equals("next")) {
576 570
						writer.println("# gap");
577 571
					}
......
580 574
				numberOfSentencesWritten++;
581 575
			}
582 576
			writer.close();
583
			
577

  
584 578
			System.out.println(" Text done: " + resultConlluFile);
585 579
			numberOfTextsWritten++;
586 580
		}
587
		
581

  
588 582
		System.out.println("# words written: " + numberOfWordsWritten);
589 583
		System.out.println("# sentences written: " + numberOfSentencesWritten);
590 584
		System.out.println("# texts written: " + numberOfTextsWritten);
591
		
585

  
592 586
		return numberOfWordsWritten;
593 587
	}
594
	
588

  
595 589
	private static String[] getDefaultValues(MainCorpus mainCorpus, String property, int[] positions) throws UnexpectedAnswerException, IOException, CqiServerError, CqiClientException {
596
		
597
		String[] values = CQPSearchEngine.getCqiClient().cpos2Str(mainCorpus.getProperty(property).getQualifiedName(), positions);
598
		for (int iupos = 0; iupos < values.length; iupos++) { // recode the || CQP multiple values to ud multiple values
599
			if (values[iupos].length() > 2 && values[iupos].startsWith("|") && values[iupos].endsWith("|")) {
600
				values[iupos] = values[iupos].substring(1, values[iupos].length() - 1);
590

  
591
		if (mainCorpus.getProperty(property) != null) {
592
			String[] emptyvalues = new String[positions.length];
593
			for (int i = 0 ; i < emptyvalues.length ; i++) {
594
				emptyvalues[i] = "_";
601 595
			}
596
			return emptyvalues;
597
		} else {
598
			String[] values = CQPSearchEngine.getCqiClient().cpos2Str(mainCorpus.getProperty(property).getQualifiedName(), positions);
599
			for (int iupos = 0; iupos < values.length; iupos++) { // recode the || CQP multiple values to ud multiple values
600
				if (values[iupos].length() > 2 && values[iupos].startsWith("|") && values[iupos].endsWith("|")) {
601
					values[iupos] = values[iupos].substring(1, values[iupos].length() - 1);
602
				}
603
			}
604

  
605
			return values;
602 606
		}
603
		
604
		return values;
605 607
	}
606
	
608

  
607 609
	private static HashMap<Integer, String> getNonUNDEFPositionsAndValues(MainCorpus mainCorpus, String property) throws UnexpectedAnswerException, IOException, CqiServerError, CqiClientException {
608
		
610

  
609 611
		HashMap<Integer, String> sentidStartPositions = new HashMap<>();
610 612
		int[] ids = CQPSearchEngine.getCqiClient().regex2Id(mainCorpus.getProperty(property).getQualifiedName(), "(?!__UNDEF__).+");
611 613
		String[] strs = CQPSearchEngine.getCqiClient().id2Str(mainCorpus.getProperty(property).getQualifiedName(), ids);
......
616 618
				sentidStartPositions.put(p, strs[iId]);
617 619
			}
618 620
		}
619
		
621

  
620 622
		return sentidStartPositions;
621 623
	}
622
	
624

  
623 625
	private static void fixUNDEFValues(String[] values) {
624
		
626

  
625 627
		for (int i = 0; i < values.length; i++) {
626 628
			if (values[i].equals("__UNDEF__") || values[i].equals("") || values[i].equals("|_|")) {
627 629
				values[i] = "_";
TXM/trunk/bundles/org.txm.conllu.rcp/src/org/txm/conllu/rcp/commands/ImportCoNLLUAnnotationsFromDirectory.java (revision 3731)
189 189
			return 0;
190 190
		}
191 191
		
192
		mainCorpus.getProject().appendToHistory("CoNLL-U annotations imported from "+conlluDirectory+" : "+nTextProcessed+" texts and "+nWordsInserted+" words processed.");
193
		
192 194
		Log.info("XML-TXM source files updated. Updating indexes...");
193 195
		
194 196
		UDPreferences.getInstance().setProjectPreferenceValue(mainCorpus.getProject(), UDPreferences.UDPREFIX, propertiesPrefix);
......
226 228
			return 0;
227 229
		}
228 230
		
231
		mainCorpus.getProject().appendToHistory("CoNLL-U annotations imported from "+conlluFile+" texts and "+nWordsInserted+" words processed.");
232
		
229 233
		Log.info("XML-TXM source files updated. Updating indexes...");
230 234
		
231 235
		UDPreferences.getInstance().setProjectPreferenceValue(mainCorpus.getProject(), UDPreferences.UDPREFIX, propertiesPrefix);
TXM/trunk/bundles/org.txm.annotation.kr.rcp/src/org/txm/annotation/kr/rcp/commands/ImportTable.java (revision 3731)
130 130
					}
131 131
					monitor.worked(30);
132 132
					
133
					corpus.getProject().appendToHistory("CQP Annotations imported from "+annotationsFile);
133 134
					Log.info("Done.");
... Ce différentiel a été tronqué car il excède la taille maximale pouvant être affichée.

Formats disponibles : Unified diff