Révision 2953

tmp/org.txm.tigersearch.rcp/src/org/txm/tigersearch/commands/ImportCONNLUAnnotationsFromFile.java (revision 2953)
82 82
	@Option(name = "overwrite_cqp_properties", usage = "if set the CQP properties are replaced", widget = "Boolean", required = true, def = "false")
83 83
	Boolean overwrite_cqp_properties;
84 84
	
85
	@Option(name = "normalize_word_ids", usage = "if set the CQP properties are replaced", widget = "Boolean", required = true, def = "false")
86
	Boolean normalize_word_ids;
87
	
85 88
	/*
86 89
	 * (non-Javadoc)
87 90
	 * @see org.eclipse.core.commands.AbstractHandler#execute(org.eclipse.core.commands.ExecutionEvent)
......
120 123
				return null;
121 124
			}
122 125
			
123
			return ImportCONNLUAnnotationsFromDirectory.importAnnotationsFromCoNNLUFile(mainCorpus, connluFile, propertiesPrefix, textId);
126
			return ImportCONNLUAnnotationsFromDirectory.importAnnotationsFromCoNNLUFile(mainCorpus, connluFile, propertiesPrefix, textId, normalize_word_ids);
124 127
		}
125 128
		catch (Exception e) {
126 129
			Log.warning(e);
tmp/org.txm.tigersearch.rcp/src/org/txm/tigersearch/commands/ImportCONNLUAnnotationsFromDirectory.java (revision 2953)
52 52
import org.txm.searchengine.cqp.corpus.CQPCorpus;
53 53
import org.txm.searchengine.cqp.corpus.MainCorpus;
54 54
import org.txm.searchengine.cqp.serverException.CqiServerError;
55
import org.txm.utils.AsciiUtils;
55 56
import org.txm.utils.io.FileCopy;
56 57
import org.txm.utils.io.IOUtils;
57 58
import org.txm.utils.logger.Log;
......
81 82
	@Option(name = "overwrite_cqp_properties", usage = "if set the CQP properties are replaced", widget = "Boolean", required = true, def = "false")
82 83
	Boolean overwrite_cqp_properties;
83 84
	
85
	@Option(name = "normalize_word_ids", usage = "if set the CQP properties are replaced", widget = "Boolean", required = true, def = "false")
86
	Boolean normalize_word_ids;
87
	
84 88
	/*
85 89
	 * (non-Javadoc)
86 90
	 * @see org.eclipse.core.commands.AbstractHandler#execute(org.eclipse.core.commands.ExecutionEvent)
......
118 122
				Log.warning(NLS.bind("Error: can't use the {0} prefix because some properties are already used: {1}", propertiesPrefix, test));
119 123
				return null;
120 124
			}
121
			return importAnnotations(mainCorpus, connluDirectory, propertiesPrefix);
125
			return importAnnotations(mainCorpus, connluDirectory, propertiesPrefix, normalize_word_ids);
122 126
		}
123 127
		catch (Exception e) {
124 128
			Log.warning(e);
......
141 145
	 * @throws IOException
142 146
	 * @throws XMLStreamException
143 147
	 */
144
	public static int importAnnotations(MainCorpus mainCorpus, File connluDirectory, String propertiesPrefix) throws IOException,
148
	public static int importAnnotations(MainCorpus mainCorpus, File connluDirectory, String propertiesPrefix, Boolean normalizeWordIds) throws IOException,
145 149
			CqiServerError, CqiClientException, XMLStreamException {
146 150
		Log.info(TXMCoreMessages.bind("Importing CONNL-u annotations of {0} in {1} using the ''{2}'' prefix...", connluDirectory, mainCorpus, propertiesPrefix));
147 151
		
......
157 161
		int nWordsInserted = 0;
158 162
		for (File coonluFile : files) {
159 163
			
160
			nWordsInserted += _importAnnotations(coonluFile, mainCorpus, propertiesPrefix, null);
164
			nWordsInserted += _importAnnotations(coonluFile, mainCorpus, propertiesPrefix, null, normalizeWordIds);
161 165
			nTextProcessed++;
162 166
		}
163 167
		
......
187 191
	 * @param corpus
188 192
	 * @param connluFile
189 193
	 * @param propertiesPrefix
194
	 * @param normalize_word_ids
190 195
	 * @return the number of imported annotations
191 196
	 * @throws CqiClientException
192 197
	 * @throws CqiServerError
193 198
	 * @throws IOException
194 199
	 * @throws XMLStreamException
195 200
	 */
196
	public static int importAnnotationsFromCoNNLUFile(MainCorpus mainCorpus, File connluFile, String propertiesPrefix, String textId) throws IOException,
201
	public static int importAnnotationsFromCoNNLUFile(MainCorpus mainCorpus, File connluFile, String propertiesPrefix, String textId, Boolean normalize_word_ids) throws IOException,
197 202
			CqiServerError, CqiClientException, XMLStreamException {
198 203
		Log.info(TXMCoreMessages.bind("Importing CONNL-u annotations of {0} in {1} using the ''{2}'' prefix...", connluFile, mainCorpus, propertiesPrefix));
199 204
		
200 205
		
201
		int nWordsInserted = _importAnnotations(connluFile, mainCorpus, propertiesPrefix, textId);
206
		int nWordsInserted = _importAnnotations(connluFile, mainCorpus, propertiesPrefix, textId, normalize_word_ids);
202 207
		
203 208
		if (nWordsInserted == 0) {
204 209
			Log.warning("** No annotation imported. Aborting.");
......
214 219
		return 0;
215 220
	}
216 221
	
217
	private static int _importAnnotations(File coonluFile, MainCorpus mainCorpus, String propertiesPrefix, String textId) throws IOException, XMLStreamException {
222
	private static int _importAnnotations(File coonluFile, MainCorpus mainCorpus, String propertiesPrefix, String textId, Boolean normalize_word_ids) throws IOException, XMLStreamException {
218 223
		if (textId == null || textId.length() == 0) { // no text id provided, using the connlu file name
219 224
			textId = coonluFile.getName().substring(0, coonluFile.getName().length() - 7);
220 225
		}
......
235 240
		BufferedReader reader = IOUtils.getReader(coonluFile);
236 241
		String line = reader.readLine();
237 242
		
238
		
239 243
		int nWords2 = 0;
240 244
		int nLine = 0;
241 245
		String sent_id = null;
......
315 319
				Log.warning("No 'XmlId=' found for UD line: " + line);
316 320
			}
317 321
			else {
322
				
323
				if (normalize_word_ids) {
324
					if (!id.startsWith("w_"))
325
						id = "w_" + id.substring(1);
326
					id = AsciiUtils.buildId(id);
327
				}
328
				
318 329
				processor.addProperty(id, properties);
319 330
				nWords2++;
320 331
			}

Formats disponibles : Unified diff