Révision 2924

tmp/org.txm.searchengine.cqp.core/src/org/txm/searchengine/cqp/corpus/MainCorpus.java (revision 2924)
129 129
	@Override
130 130
	protected boolean __compute(TXMProgressMonitor monitor) throws Exception {
131 131
		
132
		
133
		
134 132
		if (!CQPSearchEngine.isInitialized()) {
135 133
			Log.warning("** TXM can't load MainCorpus when CQP search engine is not ready.");
136 134
			return false;
......
167 165
				PatchCwbRegistry.patch(this.registryFile, this.dataDirectory);
168 166
			}
169 167
			catch (IOException e) {
170
				Log.severe(TXMCoreMessages.bind("Error while updating the {0} registry file .", this.registryFile));
168
				Log.severe(TXMCoreMessages.bind("Error while updating the {0} registry file.", this.registryFile));
171 169
				Log.printStackTrace(e);
172 170
				return false;
173 171
			}
174 172
			
173
			Log.fine(NLS.bind("Call CQI: load_a_system_corpus with {0} and {1}", this.registryFile.getParent(), this.pID));
175 174
			CQPSearchEngine.getCqiClient().load_a_system_corpus(this.registryFile.getParent(), this.pID);
176 175
			
176
			List tmp = Arrays.asList(CQPSearchEngine.getCqiClient().listCorpora());
177
			if (tmp.contains(this.pID)) {
178
				Log.fine("Corpus registered: " + pID);
179
				Log.fine(NLS.bind("Call CQI: corpusProperties with {0}.", this.pID));
180
				try {
181
					String[] props = CQPSearchEngine.getCqiClient().corpusProperties(this.pID);
182
					Log.fine(NLS.bind("Corpus {0} loaded with properties: {1}.", pID, Arrays.asList(props)));
183
				}
184
				catch (Exception e) {
185
					Log.warning(TXMCoreMessages.bind("Error while loading the {0} corpus: ", pID, e.getMessage()));
186
					return false;
187
				}
188
			}
189
			else {
190
				Log.severe(TXMCoreMessages.bind("Error while loading the {0} corpus. Not found in {1}", this.pID, tmp));
191
				return false;
192
			}
193
			
177 194
			corpora.put(this.pID, this); // register the corpus
178 195
		}
179 196
		catch (Exception e) {
......
263 280
	public void clean() {
264 281
		super.clean();
265 282
		
266
		if (CorpusManager.getCorpusManager().getCorpora().get(this.pID) == this) { // un register the MainCorpus
283
		if (CorpusManager.getCorpusManager().getCorpora().get(this.pID) == this) { // unregister the MainCorpus
267 284
			CorpusManager.getCorpusManager().getCorpora().remove(this.pID);
268 285
		}
269 286
		
......
274 291
		}
275 292
		catch (Exception e) {
276 293
			Log.fine(e.getLocalizedMessage());
277
			//Log.printStackTrace(e);
294
			// Log.printStackTrace(e);
278 295
		}
279 296
		
280 297
		if (dataDirectory != null) {
......
381 398
	 */
382 399
	@Override
383 400
	public List<WordProperty> getProperties() throws CqiClientException {
384
		if (this.lexicalUnitsProperties != null)
401
		if (this.lexicalUnitsProperties != null) {
385 402
			return this.lexicalUnitsProperties;
403
		}
386 404
		
387 405
		String[] propertiesName;
388
		CorpusManager cm = null;
389
		AbstractCqiClient cc = null;
390 406
		try {
391
			cm = CorpusManager.getCorpusManager();
392
			cc = cm.getCqiClient();
393
			
394
			// if (cc != null)
395
			// System.out.println("cqiclient OK");
396
			
407
			propertiesName = CQPSearchEngine.getCqiClient().corpusPositionalAttributes(this.pID);
397 408
		}
398 409
		catch (Exception e) {
399 410
			throw new CqiClientException(e);
400 411
		}
401
		try {
402
			// System.out.println(this.pID); // temp : toLowerCAse
403
			propertiesName = cc.corpusPositionalAttributes(this.pID);
412
		
413
		List<WordProperty> properties = new ArrayList<>(propertiesName.length);
414
		for (int i = 0; i < propertiesName.length; i++) {
415
			properties.add(new WordProperty(propertiesName[i], this));
404 416
		}
405
		catch (Exception e) {
406
			throw new CqiClientException(e);
407
		}
408
		List<WordProperty> properties = new ArrayList<>(
409
				propertiesName.length);
410
		for (int i = 0; i < propertiesName.length; i++)
411
			properties.add(new WordProperty(propertiesName[i], this));
412 417
		this.lexicalUnitsProperties = properties;
413 418
		return properties;
414 419
	}
tmp/org.txm.searchengine.cqp.core/src/org/txm/searchengine/cqp/AbstractCqiClient.java (revision 2924)
56 56
 *
57 57
 * @author mdecorde
58 58
 */
59
public abstract class AbstractCqiClient implements ICqiClient{
60

  
59
public abstract class AbstractCqiClient implements ICqiClient {
60
	
61 61
	static Pattern pattern = Pattern.compile("\\p{Upper}(\\p{Upper}|\\p{Digit}|[_-])*"); //$NON-NLS-1$
62
	
62 63
	static Pattern pattern2 = Pattern.compile("\\p{Upper}(\\p{Lower}|\\p{Digit}|[_-])*"); //$NON-NLS-1$
63

  
64
	
64 65
	String lastError;
65

  
66 66
	
67
	
67 68
	/**
68 69
	 * Check wether <code>id</code> is a valid CQi for a corpus.
69 70
	 *
......
71 72
	 * @return true if <code>id</code> is in uppercase chararcters
72 73
	 */
73 74
	public static synchronized boolean checkCorpusId(String id) {
74
		//System.out.println("Pattern: "+pattern+" test with "+id);
75
		// System.out.println("Pattern: "+pattern+" test with "+id);
75 76
		return pattern.matcher(id).matches();
76 77
	}
77

  
78
	
78 79
	/**
79 80
	 * Check whether <code>id</code> is a valid CQi for a subcorpus.
80 81
	 *
81 82
	 * @param id the id
82 83
	 * @return true if <code>id</code> is an uppercase character followed by
83
	 * lowercase characters
84
	 *         lowercase characters
84 85
	 */
85 86
	public static synchronized boolean checkSubcorpusId(String id) {
86
		//System.out.println("Pattern: "+pattern+" test with "+id);
87
		// System.out.println("Pattern: "+pattern+" test with "+id);
87 88
		return pattern2.matcher(id).matches();
88 89
	}
89 90
	
90
	public synchronized String getLastError(){	
91
		if(lastError != null && lastError.length() > 0) {
91
	public synchronized String getLastError() {
92
		if (lastError != null && lastError.length() > 0) {
92 93
			return lastError;
93 94
		}
94 95
		return CQPSearchEngineCoreMessages.noError;
......
101 102
		
102 103
		if (!isWordProperty) {
103 104
			StructuralUnitProperty sprop = (StructuralUnitProperty) prop;
104
			QueryResult qresult = prop.getCorpus().query(new CQLQuery("<"+sprop.getFullName()+">[] expand to "+sprop.getName()), "TMP", false); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
105
			QueryResult qresult = prop.getCorpus().query(new CQLQuery("<" + sprop.getFullName() + ">[] expand to " + sprop.getName()), "TMP", false); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
105 106
			int[] strucs = this.cpos2Struc(qname, qresult.getStarts());
106 107
			return Arrays.asList(cqiClient.struc2Str(qname, strucs));
107
		} else {
108
			int[] positions = {0,1,2,3};
109
			return Arrays.asList(cqiClient.cpos2Str(qname,positions));
110 108
		}
109
		else {
110
			int[] positions = { 0, 1, 2, 3 };
111
			return Arrays.asList(cqiClient.cpos2Str(qname, positions));
112
		}
111 113
	}
112 114
	
113 115
	public List<String> getSingleData(Property prop, int[] positions) throws UnexpectedAnswerException, IOException, CqiServerError {
......
118 120
			int[] strucs = cqiClient.cpos2Struc(qname, positions);
119 121
			return Arrays.asList(cqiClient.struc2Str(qname, strucs));
120 122
			
121
		} else {
122
			return Arrays.asList(cqiClient.cpos2Str(qname,positions));
123 123
		}
124
		else {
125
			return Arrays.asList(cqiClient.cpos2Str(qname, positions));
126
		}
124 127
	}
125 128
	
126 129
	public List<List<String>> getData(Property prop, List<Integer> positions,
127 130
			List<Integer> nWords) throws CqiClientException, IOException, CqiServerError {
128 131
		
129
		//System.out.println("START prop: "+prop);
130
		//System.out.println("positions : "+positions);
131
		//System.out.println("nwords    : "+nWords);
132
		// System.out.println("START prop: "+prop);
133
		// System.out.println("positions : "+positions);
134
		// System.out.println("nwords : "+nWords);
132 135
		
133
		List<List<String>> result = new ArrayList<List<String>>();
136
		List<List<String>> result = new ArrayList<>();
134 137
		
135 138
		// get all the positions needed, possible overlap
136
		HashSet<Integer> allPositionsNeeded = new HashSet<Integer>();
139
		HashSet<Integer> allPositionsNeeded = new HashSet<>();
137 140
		for (int i = 0; i < positions.size(); i++) {
138 141
			int n = nWords.get(i);
139 142
			for (int j = 0; j < n; j++) {
......
144 147
		// conversion from List<Integer> to int[]
145 148
		int[] cpos = new int[allPositionsNeeded.size()];
146 149
		int c = 0;
147
		for (int i : allPositionsNeeded) cpos[c++] = i;
150
		for (int i : allPositionsNeeded)
151
			cpos[c++] = i;
148 152
		
149 153
		// get values for positions
150 154
		String[] values;
151 155
		boolean isWordProperty = !(prop instanceof StructuralUnitProperty);
152 156
		if (!isWordProperty) {
153
			String qname = ((StructuralUnitProperty)prop).getQualifiedName();
157
			String qname = ((StructuralUnitProperty) prop).getQualifiedName();
154 158
			int[] structs = this.cpos2Struc(qname, cpos);
155 159
			values = this.struc2Str(qname, structs);
156
		} else {
160
		}
161
		else {
157 162
			String qname = prop.getQualifiedName();
158 163
			values = this.cpos2Str(qname, cpos);
159 164
		}
160 165
		
161 166
		// sort results by position
162
		TreeMap<Integer, String> map = new TreeMap<Integer, String>();
163
		for (int i = 0 ; i < values.length ; i++) {
167
		TreeMap<Integer, String> map = new TreeMap<>();
168
		for (int i = 0; i < values.length; i++) {
164 169
			map.put(cpos[i], values[i]);
165 170
		}
166 171
		
167 172
		// fill results
168 173
		int start, end;
169 174
		SortedMap<Integer, String> smap;
170
		for (int i = 0 ; i < positions.size() ; i++) {
175
		for (int i = 0; i < positions.size(); i++) {
171 176
			start = positions.get(i);
172 177
			end = positions.get(i) + nWords.get(i);
173 178
			if (start > end) {
174
				Log.warning("Error: trying to get "+prop+" values from "+start+" to "+end);
179
				Log.warning("Error: trying to get " + prop + " values from " + start + " to " + end);
175 180
				result.add(new ArrayList<String>());
176
			} else {
181
			}
182
			else {
177 183
				smap = map.subMap(start, end);
178
				result.add(new ArrayList<String>(smap.values()));
184
				result.add(new ArrayList<>(smap.values()));
179 185
			}
180 186
		}
181

  
187
		
182 188
		return result;
183 189
	}
184

  
190
	
185 191
	/**
186 192
	 * Return lists of strings.
187 193
	 *
......
189 195
	 * @throws CqiClientException the cqi client exception
190 196
	 */
191 197
	public List<String> getData(StructuralUnitProperty property, CQPCorpus corpus) throws CqiClientException {
192
			QueryResult tmp = corpus.query(new CQLQuery("<" + property.getFullName() + ">[]"), UUID.randomUUID().toString(), false); //$NON-NLS-1$ //$NON-NLS-2$
193
			List<Match> matches = tmp.getMatches();
194
			tmp.drop();
195
			ArrayList<String> ret =  new ArrayList<String>(new HashSet<String>(Match
196
					.getValuesForProperty(property, matches)));
197
			return ret;
198
		QueryResult tmp = corpus.query(new CQLQuery("<" + property.getFullName() + ">[]"), UUID.randomUUID().toString(), false); //$NON-NLS-1$ //$NON-NLS-2$
199
		List<Match> matches = tmp.getMatches();
200
		tmp.drop();
201
		ArrayList<String> ret = new ArrayList<>(new HashSet<>(Match
202
				.getValuesForProperty(property, matches)));
203
		return ret;
198 204
	}
199 205
	
200 206
	/**
......
206 212
	 */
207 213
	public List<String> getData(StructuralUnitProperty prop, int number) throws CqiClientException {
208 214
		
209
			QueryResult tmp = prop.getCorpus()
210
					.query(
211
							new CQLQuery("<" + prop.getFullName() + ">[] expand to "+prop.getName()), UUID.randomUUID().toString(), false); //$NON-NLS-1$ //$NON-NLS-2$
212
			if (number > tmp.getNMatch()) number = tmp.getNMatch();
213
			List<Match> matches = tmp.getMatches(0, number);
214
			tmp.drop();
215
			return new ArrayList<String>(new HashSet<String>(Match
216
					.getValuesForProperty(prop, matches)));
217
			// System.out.println("Data "+property.getQualifiedName()+": "+data);
215
		QueryResult tmp = prop.getCorpus()
216
				.query(
217
						new CQLQuery("<" + prop.getFullName() + ">[] expand to " + prop.getName()), UUID.randomUUID().toString(), false); //$NON-NLS-1$ //$NON-NLS-2$
218
		if (number > tmp.getNMatch()) number = tmp.getNMatch();
219
		List<Match> matches = tmp.getMatches(0, number);
220
		tmp.drop();
221
		return new ArrayList<>(new HashSet<>(Match
222
				.getValuesForProperty(prop, matches)));
223
		// System.out.println("Data "+property.getQualifiedName()+": "+data);
218 224
		
219 225
	}
220 226
	
......
227 233
	 * @return true, if successful
228 234
	 */
229 235
	@Override
230
	public abstract boolean connect(String username, String password) throws UnexpectedAnswerException, IOException, CqiServerError ;
231

  
236
	public abstract boolean connect(String username, String password) throws UnexpectedAnswerException, IOException, CqiServerError;
237
	
232 238
	// None
233 239
	/**
234 240
	 * Disconnect.
......
237 243
	 */
238 244
	@Override
239 245
	public abstract boolean disconnect() throws UnexpectedAnswerException, CqiServerError, IOException;
240

  
246
	
241 247
	// CQI_CTRL_LAST_GENERAL_ERROR
242 248
	/**
243 249
	 * return the last CQP error.
......
245 251
	 * @return the last error
246 252
	 */
247 253
	@Override
248
	public abstract String getLastCqiError() throws UnexpectedAnswerException, IOException,	CqiServerError;
249

  
254
	public abstract String getLastCqiError() throws UnexpectedAnswerException, IOException, CqiServerError;
255
	
250 256
	// CQI_CTRL_LAST_CQP_ERROR
251 257
	/**
252 258
	 * return the last CQP error.
......
254 260
	 * @return the last error
255 261
	 */
256 262
	@Override
257
	public abstract String getLastCQPError() throws UnexpectedAnswerException, IOException,	CqiServerError;
258

  
263
	public abstract String getLastCQPError() throws UnexpectedAnswerException, IOException, CqiServerError;
264
	
259 265
	// None
260 266
	/**
261 267
	 * Lists the corpora available on the server.
......
264 270
	 */
265 271
	@Override
266 272
	public abstract String[] listCorpora() throws UnexpectedAnswerException, IOException, CqiServerError;
267

  
273
	
268 274
	// None
269 275
	/**
270 276
	 * Gives the corpus charset.
......
274 280
	 */
275 281
	@Override
276 282
	public abstract String corpusCharset(String corpus) throws UnexpectedAnswerException, IOException, CqiServerError;
277

  
283
	
278 284
	// None (not really implemented anyway)
279 285
	/**
280 286
	 * Gives the corpus properties.
......
284 290
	 */
285 291
	@Override
286 292
	public abstract String[] corpusProperties(String corpus) throws UnexpectedAnswerException, IOException, CqiServerError;
287

  
293
	
288 294
	// CQI_CQP_ERROR_NO_SUCH_CORPUS
289 295
	/**
290 296
	 * Gives the corpus positional attributes.
......
294 300
	 */
295 301
	@Override
296 302
	public abstract String[] corpusPositionalAttributes(String corpusID) throws UnexpectedAnswerException, IOException, CqiServerError;
297

  
303
	
298 304
	// CQI_CQP_ERROR_NO_SUCH_CORPUS
299 305
	/**
300 306
	 * Gives the corpus structural attributes.
......
307 313
	 */
308 314
	@Override
309 315
	public abstract String[] corpusStructuralAttributes(String corpus) throws UnexpectedAnswerException, IOException, CqiServerError;
310

  
316
	
311 317
	// CQI_CQP_ERROR_NO_SUCH_CORPUS, CQI_CL_ERROR_NO_SUCH_ATTRIBUTE,
312 318
	// CQI_CL_ERROR_WRONG_ATTRIBUTE_TYPE
313 319
	/**
......
321 327
	 */
322 328
	@Override
323 329
	public abstract boolean corpusStructuralAttributeHasValues(String attribute) throws UnexpectedAnswerException, IOException,
324
	CqiServerError;
325

  
330
			CqiServerError;
331
	
326 332
	// CQI_CQP_ERROR_NO_SUCH_CORPUS
327 333
	/**
328 334
	 * Gives the corpus alignement attributes.
......
335 341
	 */
336 342
	@Override
337 343
	public abstract String[] corpusAlignementAttributes(String corpus) throws UnexpectedAnswerException, IOException, CqiServerError;
338

  
344
	
339 345
	// CQI_CQP_ERROR_NO_SUCH_CORPUS
340 346
	/**
341 347
	 * Gives the corpus full name.
......
348 354
	 */
349 355
	@Override
350 356
	public abstract String corpusFullName(String corpus) throws UnexpectedAnswerException, IOException, CqiServerError;
351

  
357
	
352 358
	/**
353 359
	 * Gives the corpus info listed in the .INFO file.
354 360
	 *
......
360 366
	 */
361 367
	@Override
362 368
	public abstract String[] corpusInfo(String corpus) throws UnexpectedAnswerException, IOException, CqiServerError;
363

  
369
	
364 370
	/**
365 371
	 * Drop a corpus.
366 372
	 * 
......
369 375
	 */
370 376
	@Override
371 377
	public abstract void dropCorpus(String corpus) throws Exception;
372

  
378
	
373 379
	// CQI_CQP_ERROR_NO_SUCH_CORPUS, CQI_CL_ERROR_NO_SUCH_ATTRIBUTE,
374 380
	// CQI_CL_ERROR_WRONG_ATTRIBUTE_TYPE, CQI_CL_ERROR_CORPUS_ACCESS
375 381
	/**
......
383 389
	 */
384 390
	@Override
385 391
	public abstract int attributeSize(String attribute) throws IOException, UnexpectedAnswerException, CqiServerError;
386

  
392
	
387 393
	// CQI_CQP_ERROR_NO_SUCH_CORPUS, CQI_CL_ERROR_NO_SUCH_ATTRIBUTE,
388 394
	// CQI_CL_ERROR_WRONG_ATTRIBUTE_TYPE, CQI_CL_ERROR_CORPUS_ACCESS
389 395
	/**
......
398 404
	 */
399 405
	@Override
400 406
	public abstract int lexiconSize(String attribute) throws IOException, UnexpectedAnswerException, CqiServerError;
401

  
407
	
402 408
	/**
403 409
	 * Drop attribute.
404 410
	 *
......
409 415
	 */
410 416
	@Override
411 417
	public abstract void dropAttribute(String attribute) throws IOException, UnexpectedAnswerException, CqiServerError;
412

  
418
	
413 419
	/**
414 420
	 * Converts an array of attribute values to their ID.
415 421
	 *
......
422 428
	 */
423 429
	@Override
424 430
	public abstract int[] str2Id(String attribute, String[] strings) throws IOException, UnexpectedAnswerException, CqiServerError;
425

  
431
	
426 432
	/**
427 433
	 * Converts an array of attribute ID to their values.
428 434
	 *
......
435 441
	 */
436 442
	@Override
437 443
	public abstract String[] id2Str(String attribute, int[] ids) throws UnexpectedAnswerException, IOException, CqiServerError;
438

  
444
	
439 445
	/**
440 446
	 * Converts an array of attribute IDs to their frequency.
441 447
	 *
......
448 454
	 */
449 455
	@Override
450 456
	public abstract int[] id2Freq(String attribute, int[] ids) throws UnexpectedAnswerException, IOException, CqiServerError;
451

  
457
	
452 458
	/**
453 459
	 * Converts an array of position to their ID given an attribute.
454 460
	 *
......
461 467
	 */
462 468
	@Override
463 469
	public abstract int[] cpos2Id(String attribute, int[] cpos) throws UnexpectedAnswerException, IOException, CqiServerError;
464

  
470
	
465 471
	/**
466 472
	 * Converts an array of position to their value given an attribute.
467 473
	 *
......
474 480
	 */
475 481
	@Override
476 482
	public abstract String[] cpos2Str(String attribute, int[] cpos) throws UnexpectedAnswerException, IOException, CqiServerError;
477

  
483
	
478 484
	/**
479 485
	 * Computes for each position of an array the Id of the enclosing structural
480 486
	 * attribute.
......
488 494
	 */
489 495
	@Override
490 496
	public abstract int[] cpos2Struc(String attribute, int[] cpos) throws UnexpectedAnswerException, IOException, CqiServerError;
491

  
497
	
492 498
	/**
493 499
	 * Computes for each position of an array the position of the left boundary
494 500
	 * of the enclosing structural attribute.
......
502 508
	 */
503 509
	@Override
504 510
	public abstract int[] cpos2LBound(String attribute, int[] cpos) throws UnexpectedAnswerException, IOException, CqiServerError;
505

  
511
	
506 512
	/**
507 513
	 * Computes for each position of an array the position of the right boundary
508 514
	 * of the enclosing structural attribute.
......
516 522
	 */
517 523
	@Override
518 524
	public abstract int[] cpos2RBound(String attribute, int[] cpos) throws UnexpectedAnswerException, IOException, CqiServerError;
519

  
525
	
520 526
	/**
521 527
	 * Computes for each position of an array the Id of the enclosing alignment
522 528
	 * attribute.
......
530 536
	 */
531 537
	@Override
532 538
	public abstract int[] cpos2Alg(String attribute, int[] cpos) throws UnexpectedAnswerException, IOException, CqiServerError;
533

  
539
	
534 540
	/**
535 541
	 * Retrieves annotated string values of structure regions in <strucs>; "" if
536 542
	 * out of range.
......
544 550
	 */
545 551
	@Override
546 552
	public abstract String[] struc2Str(String attribute, int[] strucs) throws UnexpectedAnswerException, IOException, CqiServerError;
547

  
553
	
548 554
	/**
549 555
	 * Retrieves all corpus positions where the given token occurs.
550 556
	 *
......
557 563
	 */
558 564
	@Override
559 565
	public abstract int[] id2Cpos(String attribute, int id) throws UnexpectedAnswerException, IOException, CqiServerError;
560

  
566
	
561 567
	/**
562 568
	 * Retrieves all corpus positions where one of the tokens in <id_list>
563 569
	 * occurs; the returned list is sorted as a whole, not per token id.
......
571 577
	 */
572 578
	@Override
573 579
	public abstract int[] idList2Cpos(String attribute, int[] ids) throws UnexpectedAnswerException, IOException, CqiServerError;
574

  
580
	
575 581
	/**
576 582
	 * Retrieves the lexicon IDs of all tokens that match <regex>; the returned
577 583
	 * list may be empty (size 0).
......
585 591
	 */
586 592
	@Override
587 593
	public abstract int[] regex2Id(String attribute, String regex) throws UnexpectedAnswerException, IOException, CqiServerError;
588

  
594
	
589 595
	/**
590 596
	 * Retrieves the start and end corpus positions of structure region <struc>.
591 597
	 *
......
598 604
	 */
599 605
	@Override
600 606
	public abstract int[] struc2Cpos(String attribute, int struc) throws UnexpectedAnswerException, IOException, CqiServerError;
601

  
607
	
602 608
	/**
603 609
	 * Retrieves start and end corpus positions of an alignement region in the
604 610
	 * source and target corpora<struc>.
......
606 612
	 * @param attribute the attribute
607 613
	 * @param struc the struc
608 614
	 * @return an array of size 4 containing (src_start, src_end, target_start,
609
	 * target_end)
615
	 *         target_end)
610 616
	 * @throws UnexpectedAnswerException Signals that the data read on the socket is unexpected
611 617
	 * @throws IOException Signals that an I/O exception has occurred.
612 618
	 * @throws CqiServerError the cqi server error
613 619
	 */
614 620
	@Override
615 621
	public abstract int[] alg2Cpos(String attribute, int struc) throws UnexpectedAnswerException, IOException, CqiServerError;
616

  
622
	
617 623
	/**
618 624
	 * Runs a CQL query.
619 625
	 *
......
626 632
	 */
627 633
	@Override
628 634
	public abstract void cqpQuery(String motherCorpus, String subcorpus, String query) throws IOException, UnexpectedAnswerException, CqiServerError;
629

  
635
	
630 636
	/**
631 637
	 * Runs a CQP query line.
632 638
	 *
......
639 645
	 */
640 646
	@Override
641 647
	public abstract void query(String query) throws IOException, UnexpectedAnswerException, CqiServerError;
642

  
648
	
643 649
	/**
644
	 * Runs a CQP query.
650
	 * Load a CQP corpus (system) from a registry file
645 651
	 *
646 652
	 * @param motherCorpus the mother corpus
647
	 * @param subcorpus the subcorpus
648 653
	 * @param query the query
649
	 * @return 
654
	 * @return
650 655
	 * @throws IOException Signals that an I/O exception has occurred.
651 656
	 * @throws UnexpectedAnswerException Signals that the data read on the socket is unexpected
652 657
	 * @throws CqiServerError the cqi server error
653 658
	 */
654 659
	@Override
655 660
	public abstract boolean load_a_system_corpus(String regfilepath, String entry) throws IOException, UnexpectedAnswerException, CqiServerError;
656

  
661
	
657 662
	/**
658 663
	 * Lists all the subcorpora of a corpus.
659 664
	 *
......
665 670
	 */
666 671
	@Override
667 672
	public abstract String[] listSubcorpora(String corpus) throws UnexpectedAnswerException, IOException, CqiServerError;
668

  
673
	
669 674
	/**
670 675
	 * Gives the size of a subcorpus .
671 676
	 * 
......
683 688
	 */
684 689
	@Override
685 690
	public abstract int subCorpusSize(String subcorpus) throws IOException,
686
	UnexpectedAnswerException, CqiServerError;
687

  
691
			UnexpectedAnswerException, CqiServerError;
692
	
688 693
	/**
689 694
	 * Checks wether a subcorpus has a field.
690 695
	 * 
......
704 709
	 */
705 710
	@Override
706 711
	public abstract boolean subCorpusHasField(String subcorpus, byte field) throws IOException, UnexpectedAnswerException, CqiServerError;
707

  
712
	
708 713
	/**
709 714
	 * Dumps the values of <field> for match ranges <first> .. <last> in
710 715
	 * <subcorpus>. <field> is one of the CQI_CONST_FIELD_* constants.
......
729 734
	 */
730 735
	@Override
731 736
	public abstract int[] dumpSubCorpus(String subcorpus, byte field, int first, int last) throws IOException, UnexpectedAnswerException,
732
	CqiServerError;
733

  
737
			CqiServerError;
738
	
734 739
	/**
735 740
	 * Drops a subcorpus.
736 741
	 * 
......
746 751
	 */
747 752
	@Override
748 753
	public abstract void dropSubCorpus(String subcorpus) throws IOException,
749
	UnexpectedAnswerException, CqiServerError;
750

  
754
			UnexpectedAnswerException, CqiServerError;
755
	
751 756
	/**
752 757
	 * Returns <n> (id, frequency) pairs flattened into a list of size 2*<n> NB:
753 758
	 * pairs are sorted by frequency desc.
......
775 780
	public abstract int[][] fdist1(String subcorpus, int cutoff,
776 781
			byte field, String attribute) throws IOException,
777 782
			UnexpectedAnswerException, CqiServerError;
778

  
783
	
779 784
	/**
780 785
	 * Returns <n> (id1, id2, frequency) pairs flattened into a list of size
781 786
	 * 3*<n> NB: triples are sorted by frequency desc. .
......
805 810
	@Override
806 811
	public abstract int[][] fdist2(String subcorpus, int cutoff,
807 812
			byte field1, String attribute1, byte field2, String attribute2) throws IOException, UnexpectedAnswerException, CqiServerError;
808

  
813
	
809 814
	@Override
810
	public abstract boolean reconnect() ;
811
}
815
	public abstract boolean reconnect();
816
}
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/export/ExportTextContentMacro.groovy (revision 2924)
1
// Copyright © 2020 ENS de Lyon, CNRS, University of Franche-Comté
2
// @author mdecorde
3

  
1 4
// STANDARD DECLARATIONS
2 5
package org.txm.macro.export
3 6

  
......
11 14
import groovy.transform.Field
12 15
import org.txm.rcp.swt.widget.parameters.*
13 16

  
14
println "corpora selection: "+corpusViewSelection
15

  
16 17
if (!(corpusViewSelection instanceof CQPCorpus)) {
17 18
	println "Please select a corpus"
18
	return;
19
	return
19 20
}
20 21

  
21 22
// PARAMETERS
22 23

  
23
@Field @Option(name="exportDirectory", usage="Result directory ", widget="Folder", required=true, def="export")
24
@Field @Option(name="exportDirectory", usage="results directory", widget="Folder", required=true, def="export")
24 25
File exportDirectory
25 26

  
26
@Field @Option(name="lineSeparatorStructureName", usage="line separator structure", widget="String", required=false, def="p")
27
@Field @Option(name="lineSeparatorStructureName", usage="name of the structure to use to produce the output lines", widget="String", required=false, def="p")
27 28
def lineSeparatorStructureName
28 29

  
29 30
if (!ParametersDialog.open(this)) return
......
32 33

  
33 34
if (!exportDirectory.exists()) exportDirectory.mkdirs()
34 35

  
35
CQPCorpus corpus = corpusViewSelection
36
CQI = CQPSearchEngine.getCqiClient();
36
def corpus = corpusViewSelection
37
def CQI = CQPSearchEngine.getCqiClient()
37 38

  
38 39
def lineSeparatorStructure = corpus.getStructuralUnit(lineSeparatorStructureName)
39 40

  
40 41
if (lineSeparatorStructure == null) {
41
	println "The $lineSeparatorStructureName structure does not exists in the $corpus corpus"
42
	return;
42
	println "** The $lineSeparatorStructureName structure does not exist in the $corpus corpus"
43
	return
43 44
}
44 45

  
45 46
def breaks_pos = Arrays.asList(corpus.query(new CQLQuery("[]</"+lineSeparatorStructureName+">"),"test", false).getEnds())
46
println breaks_pos
47
println "End of structure positions = "+breaks_pos
47 48

  
48
println "Exporting $corpus text content to $exportDirectory"
49
println "Exporting $corpus text content to $exportDirectory..."
49 50

  
50 51
def wordProperty = corpus.getWordProperty()
51 52
def textidProperty = corpus.getStructuralUnit("text").getProperty("id")
52
def textStartBoundaries = corpus.getTextStartLimits();
53
def textEndBoundaries = corpus.getTextEndLimits();
53
def textStartBoundaries = corpus.getTextStartLimits()
54
def textEndBoundaries = corpus.getTextEndLimits()
54 55
int[] struct_pos = CQI.cpos2Struc(textidProperty.getQualifiedName(), textStartBoundaries)
55 56
String[] textids =  CQI.struc2Str(textidProperty.getQualifiedName(), struct_pos)
56
println ""+textStartBoundaries.size()+" texts"
57
if (textStartBoundaries.size() == 1) {
58
	println "1 text"
59
	} else {
60
	println ""+textStartBoundaries.size()+" texts"
61
}
57 62

  
58 63
for (int i = 0 ; i < textStartBoundaries.size() ; i++) {
59
	int start = textStartBoundaries[i];
64
	int start = textStartBoundaries[i]
60 65
	int end = textEndBoundaries[i]
61 66

  
62 67
	File txtFile = new File(exportDirectory, textids[i]+".txt")
63
	print ".."
68
	print "."
64 69
	def writer = txtFile.newWriter("UTF-8")
65 70
	int[] positions = new int[end - start + 1]
66
	int c = 0;
71
	int c = 0
67 72
	for (int p : start..end) {
68 73
		positions[c++] = p
69 74
	}
70 75
	int[] idx = CQI.cpos2Id(wordProperty.getQualifiedName(), positions)
71 76
	def words = CQI.id2Str(wordProperty.getQualifiedName(), idx)
77
	def tmp = []
72 78
	for (int j = 0 ; j < positions.length ; j++) {
73 79
		int p = positions[j]
74
		if (breaks_pos.contains(p)) words[j] = words[j] +"\n" 
80
		tmp << words[j]
81
		if (breaks_pos.contains(p)) {
82
			writer.println LangFormater.format(tmp.join(" "), corpus.getLang())
83
			tmp = []
84
		} 
75 85
	}
76
	writer.println LangFormater.format(StringUtils.join(words, " "),
77
				corpus.getLang());
78
	writer.close();
86
	if (tmp.size() > 0) {
87
		writer.println LangFormater.format(tmp.join(" "), corpus.getLang())
88
	} 
89
	writer.close()
79 90
}  
80 91

  
81 92
println "\nDone, result saved in "+exportDirectory.getAbsolutePath()
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/xtz/XTZDefaultPagerStep.groovy (revision 2924)
12 12
import org.txm.importer.xtz.*
13 13

  
14 14
public class XTZDefaultPagerStep {
15

  
15
	
16 16
	List<String> NoSpaceBefore;
17

  
17
	
18 18
	/** The No space after. */
19 19
	List<String> NoSpaceAfter;
20

  
20
	
21 21
	/** The wordcount. */
22 22
	int wordcount = 0;
23

  
23
	
24 24
	/** The pagecount. */
25 25
	int pagecount = 0;
26

  
26
	
27 27
	/** The wordmax. */
28 28
	int wordmax = 0;
29

  
29
	
30 30
	/** The basename. */
31 31
	String basename = "";
32 32
	String txtname = "";
33 33
	File outdir;
34

  
34
	
35 35
	/** The wordid. */
36 36
	String wordid;
37

  
37
	
38 38
	/** The first word. */
39 39
	boolean firstWord = true;
40

  
40
	
41 41
	/** The wordvalue. */
42 42
	String wordvalue = "";
43

  
43
	
44 44
	/** The interpvalue. */
45 45
	String interpvalue = "";
46

  
46
	
47 47
	/** The lastword. */
48 48
	String lastword = " ";
49

  
49
	
50 50
	/** The wordtype. */
51 51
	String wordtype;
52

  
52
	
53 53
	/** The flagform. */
54 54
	boolean flagform = false;
55

  
55
	
56 56
	/** The flaginterp. */
57 57
	boolean flaginterp = false;
58

  
58
	
59 59
	/** The url. */
60 60
	private def url;
61

  
61
	
62 62
	/** The input data. */
63 63
	private def inputData;
64

  
64
	
65 65
	/** The factory. */
66 66
	private def factory;
67

  
67
	
68 68
	/** The parser. */
69 69
	private XMLStreamReader parser;
70

  
70
	
71 71
	/** The writer. */
72 72
	OutputStreamWriter writer;
73

  
73
	
74 74
	/** The pagedWriter. */
75 75
	StaxStackWriter pagedWriter = null;
76

  
76
	
77 77
	/** The infile. */
78 78
	File infile;
79

  
79
	
80 80
	/** The outfile. */
81 81
	File outfile;
82

  
82
	
83 83
	/** The pages. */
84 84
	//TODO enhance this to store the page name/id as well
85 85
	ArrayList<File> pages = new ArrayList<File>();
86

  
86
	
87 87
	/** The idxstart. */
88 88
	ArrayList<String> idxstart = new ArrayList<String>();
89 89
	String paginationElement;
......
92 92
	def noteElements = new HashSet<String>();
93 93
	def outOfTextElements = new HashSet<String>();
94 94
	XTZPager pager;
95

  
95
	
96 96
	/**
97 97
	 * Instantiates a new pager.
98 98
	 *
......
118 118
		this.infile = infile;
119 119
		this.wordTag= pager.wordTag;
120 120
		outdir.mkdirs()
121

  
121
		
122 122
		inputData = new BufferedInputStream(url.openStream());
123 123
		factory = XMLInputFactory.newInstance();
124 124
		parser = factory.createXMLStreamReader(inputData);
125

  
125
		
126 126
		String notesListString = pager.getImportModule().getProject().getTextualPlan("Note")
127 127
		if (notesListString != null) for (def s : notesListString.split(",")) noteElements << s;
128

  
128
		
129 129
		String elems = pager.getImportModule().getProject().getTextualPlan("OutSideTextTagsAndKeepContent")
130 130
		if (elems != null) for (def s : elems.split(",")) outOfTextElements << s;
131

  
131
		
132 132
		//process();
133 133
	}
134

  
134
	
135 135
	public String getAttributeValue(def parser, String ns, String name) {
136 136
		for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
137 137
			if (name == parser.getAttributeLocalName(i)) {
......
140 140
		}
141 141
		return "";
142 142
	}
143

  
143
	
144 144
	private def closeMultiWriter() {
145 145
		if (pagedWriter != null) {
146 146
			def tags = pagedWriter.getTagStack().clone();
147
//			println "STACK="+pagedWriter.getTagStack()
148
//			def stack = Thread.currentThread().getStackTrace();
149
//			int m = Math.min(15, stack.size()-1)
150
//			for (def s : stack[1..m]) println s
151
//			println "FILE ="+outfile
147
			//			println "STACK="+pagedWriter.getTagStack()
148
			//			def stack = Thread.currentThread().getStackTrace();
149
			//			int m = Math.min(15, stack.size()-1)
150
			//			for (def s : stack[1..m]) println s
151
			//			println "FILE ="+outfile
152 152
			if (firstWord) { // there was no words
153 153
				pagedWriter.writeCharacters("");
154 154
				this.idxstart.add("${wordTag}_0")
......
174 174
				}
175 175
				notes.clear()
176 176
			}
177

  
177
			
178 178
			pagedWriter.close();
179 179
			
180
//			println "STACK TO REWRITE: $tags"
180
			//			println "STACK TO REWRITE: $tags"
181 181
			for (int i = 0 ; i < tags.size() ; i++) {
182 182
				String tag = tags.remove(0)
183 183
				i--
184
//				println "	tag=$tag"
184
				//				println "	tag=$tag"
185 185
				if (tag == "div") {
186 186
					break; // remove elements until first "div" tag
187 187
				}
188 188
			}
189
//			println "STACK TO REWRITE2: $tags"
189
			//			println "STACK TO REWRITE2: $tags"
190 190
			
191 191
			return tags;
192 192
		} else {
193 193
			return [];
194 194
		}
195 195
	}
196

  
196
	
197 197
	/**
198 198
	 * Creates the next output.
199 199
	 *
......
207 207
			outfile = new File(outdir, txtname+"_"+(++pagecount)+".html")
208 208
			pages.add(outfile)
209 209
			firstWord = true; // waiting for next word
210

  
210
			
211 211
			pagedWriter = new StaxStackWriter(outfile, "UTF-8")
212

  
212
			
213 213
			//pagedWriter.writeStartDocument()
214 214
			pagedWriter.writeDTD("<!DOCTYPE html>")
215 215
			pagedWriter.writeCharacters("\n")
......
227 227
			pagedWriter.writeCharacters("\n")
228 228
			pagedWriter.writeStartElement("body") //<body>
229 229
			pagedWriter.writeStartElement("div", ["class": "txmeditionpage"]) //<div>
230
//			println "OPENING: $tags"
230
			//			println "OPENING: $tags"
231 231
			pagedWriter.writeStartElements(tags)
232 232
			return true;
233 233
		} catch (Exception e) {
......
236 236
			return false;
237 237
		}
238 238
	}
239

  
239
	
240 240
	/**
241 241
	 * Creates the output.
242 242
	 *
......
251 251
			return false;
252 252
		}
253 253
	}
254

  
254
	
255 255
	/**
256 256
	 * Gets the page files.
257 257
	 *
......
260 260
	public ArrayList<File> getPageFiles() {
261 261
		return pages;
262 262
	}
263

  
263
	
264 264
	/**
265 265
	 * Gets the idx.
266 266
	 *
......
269 269
	public ArrayList<String> getIdx() {
270 270
		return idxstart;
271 271
	}
272

  
272
	
273 273
	/**
274 274
	 * Go to text.
275 275
	 */
......
280 280
					return;
281 281
		}
282 282
	}
283

  
283
	
284 284
	def notes = []
285 285
	def currentOutOfTextElements = [] // stack of element with out of text to edit opened element
286 286
	def writeOutOfTextToEditText = false
......
288 288
	 * Process.
289 289
	 */
290 290
	public boolean process() {
291

  
291
		
292 292
		try {
293 293
			boolean flagNote = false;
294 294
			String noteContent = "";
295 295
			String rend = ""
296 296
			goToText();
297

  
297
			
298 298
			String localname = "";
299 299
			if (!createNextOutput()) {
300 300
				return false;
301 301
			}
302

  
302
			
303 303
			for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) {
304 304
				rend = "";
305 305
				switch (event) {
......
311 311
						} else if (currentOutOfTextElements.size() > 0) {
312 312
							currentOutOfTextElements << localname
313 313
						}
314

  
314
					
315 315
						if (localname == paginationElement) {
316 316
							createNextOutput()
317 317
							wordcount=0;
......
320 320
								pagedWriter.writeElement("p", ["class":"txmeditionpb", "align":"center"], getAttributeValue(parser, null,"n"))
321 321
							}
322 322
						}
323

  
323
					
324 324
						rend = getAttributeValue(parser, null, "rend")
325 325
						if (rend == null) rend = "";
326
						
326
					
327 327
						switch (localname) {
328 328
							case "text":
329 329
								LinkedHashMap attributes = new LinkedHashMap();
330 330
								for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
331 331
									attributes[parser.getAttributeLocalName(i)] = parser.getAttributeValue(i).toString()
332 332
								}
333

  
333
							
334 334
								pagedWriter.write("\n")
335 335
								pagedWriter.writeStartElement("p")
336 336
								pagedWriter.writeAttribute("class", rend)
337 337
								if (attributes.containsKey("id")) {
338 338
									pagedWriter.writeElement("h3", attributes["id"])
339 339
								}
340

  
340
							
341 341
								pagedWriter.writeStartElement("table")
342 342
								for (String k : attributes.keySet()) {
343 343
									if (k == "id") continue;
344 344
									if (k == "rend") continue;
345

  
345
									
346 346
									pagedWriter.writeStartElement("tr")
347 347
									pagedWriter.writeElement("td", k)
348 348
									pagedWriter.writeElement("td", attributes[k])
......
414 414
								break;
415 415
							case "sp":
416 416
								pagedWriter.writeStartElement("p", ["class":"turn"])
417
								pagedWriter.writeStartElement("span")
418
								pagedWriter.writeAttribute("class", "spk")
419
								pagedWriter.writeCharacters(parser.getAttributeValue(null,"speaker")+": ")
420
								pagedWriter.writeEndElement() // span@class=spk
417
							
418
								if (parser.getAttributeValue(null,"speaker") != null) {
419
									pagedWriter.writeStartElement("span")
420
									pagedWriter.writeAttribute("class", "spk")
421
									pagedWriter.writeCharacters(parser.getAttributeValue(null,"speaker")+": ")
422
									pagedWriter.writeEndElement() // span@class=spk
423
								}
424
							
421 425
								break;
422 426
							case "u":
423
								//pagedWriter.writeStartElement("p", ["class":"u"])
424
								pagedWriter.writeStartElement("span")
425
								pagedWriter.writeAttribute("class", "sync")
426
								pagedWriter.writeCharacters(parser.getAttributeValue(null,"time"))
427
								//pagedWriter.writeEndElement(); // span@class=spk
427
							//pagedWriter.writeStartElement("p", ["class":"u"])
428
								if (parser.getAttributeValue(null,"time") != null) {
429
									pagedWriter.writeStartElement("span")
430
									pagedWriter.writeAttribute("class", "sync")
431
									pagedWriter.writeCharacters(parser.getAttributeValue(null,"time"))
432
								}
433
							//pagedWriter.writeEndElement(); // span@class=spk
428 434
								break;
429 435
							case "div":
430 436
							case "div1":
......
440 446
								break;
441 447
							case wordTag:
442 448
								wordid = getAttributeValue(parser, null,"id");
443

  
449
							
444 450
								wordcount++;
445 451
								if (wordcount >= wordmax) {
446 452
									createNextOutput();
447 453
								}
448

  
454
							
449 455
								if (firstWord) {
450 456
									firstWord = false;
451 457
									this.idxstart.add(wordid);
452 458
								}
453

  
459
							
454 460
								break;
455 461
							case "ana":
456 462
								flaginterp=true;
......
476 482
						localname = parser.getLocalName();
477 483
						if (currentOutOfTextElements.size() > 0) currentOutOfTextElements.pop()
478 484
						writeOutOfTextToEditText = currentOutOfTextElements.size() > 0
479

  
485
					
480 486
						switch (localname) {
481 487
							case "text":
482 488
								break;
......
541 547
								if (l > 0) {
542 548
									endOfLastWord = lastword.subSequence(l-1, l)
543 549
								}
544
								
550
							
545 551
								if (interpvalue != null) {
546 552
									interpvalue = interpvalue
547 553
								}
......
554 560
									pagedWriter.writeCharacters("\n")
555 561
									pagedWriter.writeStartElement("span", ["title":interpvalue, "id":wordid])
556 562
								}
557

  
563
							
558 564
								pagedWriter.writeCharacters(wordvalue)
559 565
								pagedWriter.writeEndElement()
560
								//pagedWriter.writeComment("\n")
566
							//pagedWriter.writeComment("\n")
561 567
								lastword=wordvalue;
562 568
								break;
563 569
							default:
tmp/org.txm.core/src/java/org/txm/metadatas/Metadatas.java (revision 2924)
87 87
	File xmlfile;
88 88
	
89 89
	/** The metadatas. */
90
	ArrayList<Metadata> metadatas = new ArrayList<Metadata>();
90
	ArrayList<Metadata> metadatas = new ArrayList<>();
91 91
	
92 92
	/** The headers list. */
93
	ArrayList<String> headersList = new ArrayList<String>();
93
	ArrayList<String> headersList = new ArrayList<>();
94 94
	
95 95
	/** The isinialize. */
96 96
	boolean isInitialize = false;
......
353 353
	
354 354
	public HashMap<String, String> getTextMetadata(File f) {
355 355
		
356
		HashMap<String, String> data = new HashMap<String, String>();
356
		HashMap<String, String> data = new HashMap<>();
357 357
		String txtname = f.getName();
358 358
		int idx = txtname.lastIndexOf(".");
359 359
		if (idx > 0) txtname = txtname.substring(0, idx);
......
383 383
	 */
384 384
	public static boolean convertCsvToXml(File csvfile, File xmlFile, String encoding, String separator, String txtseparator, int nbheaderline) throws Exception {
385 385
		
386
		if (separator == null || separator.length() == 0) {
387
			separator = "\t";
388
		}
389
		if (encoding == null || encoding.length() == 0) {
390
			encoding = "UTF-8";
391
		}
392
		xmlFile.createNewFile();
393
		
394
		if (!csvfile.exists()) {
395
			System.out.println("Error: CSV file does not exists");
396
			return false;
397
		}
398
		
399
		XMLOutputFactory factory = XMLOutputFactory.newInstance();
400
		FileOutputStream output = new FileOutputStream(xmlFile);
401
		XMLStreamWriter writer = factory.createXMLStreamWriter(output, "UTF-8");// create a new file
402
		
403
		CsvReader reader = new CsvReader(csvfile.getAbsolutePath(), separator.charAt(0), Charset.forName(encoding));
404
		if (txtseparator != null && txtseparator.length() > 0)
405
			reader.setTextQualifier(txtseparator.charAt(0));
406
		
407
		reader.readHeaders();
408
		
409
		String[] headers = reader.getHeaders();
410
		
411
		if (headers.length == 0) {
412
			System.out.println("Error: No header in the metadata file " + csvfile + " with separators: column='" + separator + "' and text='" + txtseparator + "'");
413
			writer.close();
414
			output.close();
415
			return false;
416
		}
417
		
418
		if (!headers[0].equals("id")) {
419
			System.out.println("Error: The first column name in the header line of the metadata file '$csvfile' must be 'id' and found '" + headers[0]
420
					+ "' column separator='\"+separator+\"' and text separator='\"+txtseparator+\"'");
421
			writer.close();
422
			output.close();
423
			if (!separator.equals("\t")) {
424
				System.out.println("\tTrying with separators: column='\t' and text=''...");
425
				return convertCsvToXml(csvfile, xmlFile, encoding, "\t", "", nbheaderline);
386
		try {
387
			if (separator == null || separator.length() == 0) {
388
				separator = "\t";
426 389
			}
427
		}
428
		
429
		// check for double columns
430
		HashSet<String> testhash = new HashSet<String>();
431
		HashSet<String> doubles = new HashSet<String>();
432
		for (String str : headers) {
433
			if (testhash.contains(str))
434
				doubles.add(str);
435
			testhash.add(str);
436
		}
437
		if (doubles.size() > 0) {
438
			System.out.println("Error: the metadata file '$csvfile' contains duplicated column names: " + doubles);
439
			return false;
440
		}
441
		
442
		String[] longnames = new String[headers.length];
443
		String[] types = new String[headers.length];
444
		if (nbheaderline > 1) {// get longnames
445
			reader.readRecord();
446
			for (int i = 0; i < headers.length; i++) {
447
				longnames[i] = reader.get(headers[i]);
390
			if (encoding == null || encoding.length() == 0) {
391
				encoding = "UTF-8";
448 392
			}
449
		}
450
		else {
451
			for (int i = 0; i < headers.length; i++) {
452
				longnames[i] = headers[i];
393
			xmlFile.createNewFile();
394
			
395
			if (!csvfile.exists()) {
396
				System.out.println("Error: CSV file does not exists");
397
				return false;
453 398
			}
454
		}
455
		
456
		if (nbheaderline > 2) {// got types
457
			reader.readRecord();
458
			for (int i = 0; i < headers.length; i++) {
459
				types[i] = reader.get(headers[i]);
399
			
400
			XMLOutputFactory factory = XMLOutputFactory.newInstance();
401
			FileOutputStream output = new FileOutputStream(xmlFile);
402
			XMLStreamWriter writer = factory.createXMLStreamWriter(output, "UTF-8");// create a new file
403
			
404
			CsvReader reader = new CsvReader(csvfile.getAbsolutePath(), separator.charAt(0), Charset.forName(encoding));
405
			if (txtseparator != null && txtseparator.length() > 0)
406
				reader.setTextQualifier(txtseparator.charAt(0));
407
			
408
			reader.readHeaders();
409
			
410
			String[] headers = reader.getHeaders();
411
			
412
			if (headers.length == 0) {
413
				System.out.println("Error: No header in the metadata file " + csvfile + " with separators: column='" + separator + "' and text='" + txtseparator + "'");
414
				writer.close();
415
				output.close();
416
				return false;
460 417
			}
461
		}
462
		else {
463
			for (int i = 0; i < headers.length; i++) {
464
				types[i] = "String";
418
			
419
			if (!headers[0].equals("id")) {
420
				System.out.println("Error: The first column name in the header line of the metadata file '$csvfile' must be 'id' and found '" + headers[0]
421
						+ "' column separator='\"+separator+\"' and text separator='\"+txtseparator+\"'");
422
				writer.close();
423
				output.close();
424
				if (!separator.equals("\t")) {
425
					System.out.println("\tTrying with separators: column='\t' and text=''...");
426
					return convertCsvToXml(csvfile, xmlFile, encoding, "\t", "", nbheaderline);
427
				}
465 428
			}
466
		}
467
		
468
		writer.writeStartDocument("UTF-8", "1.0");
469
		writer.writeStartElement("enrichissement");
470
		writer.writeStartElement("metadatas");
471
		writer.writeCharacters("\n");
472
		// println "headers : "+Arrays.toString(headers)
473
		for (int i = 1; i < headers.length; i++) {
474
			if (headers[i].length() == 0) {
475
				headers[i] = "noname";
476
				System.out.println("Warning: the " + (i + 1) + "th column name is empty");
429
			
430
			// check for double columns
431
			HashSet<String> testhash = new HashSet<>();
432
			HashSet<String> doubles = new HashSet<>();
433
			for (String str : headers) {
434
				if (testhash.contains(str))
435
					doubles.add(str);
436
				testhash.add(str);
477 437
			}
478
			// if(!headers[i].equals("id"))// the first
479
			// {
480
			writer.writeStartElement("metadata");
481
			writer.writeAttribute("id", AsciiUtils.buildId(headers[i]));
482
			writer.writeAttribute("shortname", headers[i]);
483
			writer.writeAttribute("longname", longnames[i]);
484
			writer.writeAttribute("type", types[i]);
485
			writer.writeAttribute("colwidth", "100");
486
			writer.writeAttribute("selection", "true");
487
			writer.writeAttribute("partition", "true");
488
			writer.writeAttribute("display", "true");
438
			if (doubles.size() > 0) {
439
				System.out.println("Error: the metadata file '$csvfile' contains duplicated column names: " + doubles);
440
				return false;
441
			}
489 442
			
490
			writer.writeEndElement();
491
			writer.writeCharacters("\n");
492
			// }
493
		}
494
		writer.writeEndElement();// close metadatas
495
		writer.writeCharacters("\n");
496
		
497
		writer.writeStartElement("texts");
498
		writer.writeCharacters("\n");
499
		while (reader.readRecord()) {
500
			writer.writeStartElement("text");
501
			for (int i = 0; i < headers.length; i++)
502
				if (headers[i].equals("id")) {
503
					writer.writeAttribute("id", reader.get(headers[i]));
443
			String[] longnames = new String[headers.length];
444
			String[] types = new String[headers.length];
445
			if (nbheaderline > 1) {// get longnames
446
				reader.readRecord();
447
				for (int i = 0; i < headers.length; i++) {
448
					longnames[i] = reader.get(headers[i]);
504 449
				}
505
				else if (headers[i].equals("xpath")) {
506
					writer.writeAttribute("xpath", reader.get(headers[i]));
450
			}
451
			else {
452
				for (int i = 0; i < headers.length; i++) {
453
					longnames[i] = headers[i];
507 454
				}
455
			}
508 456
			
509
			for (int i = 0; i < headers.length; i++)
510
				if (!headers[i].equals("id") && !headers[i].equals("xpath")) {
511
					writer.writeEmptyElement("entry");
512
					writer.writeAttribute("id", AsciiUtils.buildId(headers[i]));
513
					String value = reader.get(headers[i]);
514
					if (value.length() == 0)
515
						writer.writeAttribute("value", "N/A");
516
					else
517
						writer.writeAttribute("value", value);
518
					
457
			if (nbheaderline > 2) {// got types
458
				reader.readRecord();
459
				for (int i = 0; i < headers.length; i++) {
460
					types[i] = reader.get(headers[i]);
519 461
				}
520
			writer.writeEndElement();
462
			}
463
			else {
464
				for (int i = 0; i < headers.length; i++) {
465
					types[i] = "String";
466
				}
467
			}
468
			
469
			writer.writeStartDocument("UTF-8", "1.0");
470
			writer.writeStartElement("enrichissement");
471
			writer.writeStartElement("metadatas");
521 472
			writer.writeCharacters("\n");
473
			// println "headers : "+Arrays.toString(headers)
474
			for (int i = 1; i < headers.length; i++) {
475
				if (headers[i].length() == 0) {
476
					headers[i] = "noname";
477
					System.out.println("Warning: the " + (i + 1) + "th column name is empty");
478
				}
479
				// if(!headers[i].equals("id"))// the first
480
				// {
481
				writer.writeStartElement("metadata");
... Ce différentiel a été tronqué car il excède la taille maximale pouvant être affichée.

Formats disponibles : Unified diff