Revision 2437

tmp/org.txm.index.core/src/org/txm/index/core/functions/PartitionIndex.java (revision 2437)
131 131
	@Override
132 132
	public String getComputingStartMessage() {
133 133
		return TXMCoreMessages.bind(IndexCoreMessages.indexOfP0PropertiesP1OnP2Partition, this.pQuery, WordProperty.propertiesToString(this.pProperties).replaceAll(TXMPreferences.LIST_SEPARATOR,
134
				", "), this.getCorpus().getName()); //$NON-NLS-1$
134
				", "), this.getPartition().getSimpleName()); //$NON-NLS-1$
135 135
	}
136 136
	
137 137
	
tmp/org.txm.cooccurrence.core/src/org/txm/cooccurrence/core/functions/Cooccurrence.java (revision 2437)
78 78
 * @author mdecorde
79 79
 * 
80 80
 */
81
public class Cooccurrence extends TXMResult  {
82

  
81
public class Cooccurrence extends TXMResult {
82
	
83 83
	/** The nocooc. */
84 84
	protected static int nocooc = 1;
85
	
85 86
	/** The prefix r. */
86 87
	protected static String prefixR = "Cooccurrences"; //$NON-NLS-1$
87

  
88
	
88 89
	/** The allsignaturesstr. */
89 90
	private HashMap<Integer, String> allsignaturesstr;
90

  
91
	
91 92
	/** The anticontextquery. */
92 93
	private CQLQuery anticontextquery;
93

  
94
	
94 95
	private boolean buildLexicalTableWithCooccurrents;
95

  
96
	
96 97
	/** The conclines. */
97 98
	List<org.txm.concordance.core.functions.Line> conclines;
98

  
99
	
99 100
	/** The conc. */
100 101
	Concordance concordance;
101

  
102
	
102 103
	/** The contextquery. */
103 104
	private CQLQuery contextQuery;
104

  
105
	
105 106
	/** The count. */
106 107
	HashMap<String, Integer> count;
107

  
108
	
108 109
	// contains the number of encounter
109 110
	/** The counted. */
110
	HashMap<Integer, Integer> counted = new HashMap<Integer, Integer>();
111

  
111
	HashMap<Integer, Integer> counted = new HashMap<>();
112
	
112 113
	// contains the sum of distances
113 114
	/** The counts. */
114
	HashMap<String, Integer> counts = new HashMap<String, Integer>();
115

  
115
	HashMap<String, Integer> counts = new HashMap<>();
116
	
116 117
	/** The dist. */
117 118
	HashMap<String, Float> dist;
118

  
119
	//System.out.println("Matches: focus: "+m1.size()+" full: "+m2.size()+" anti: "+m3.size());
120
	//System.out.println("T matches : "+(System.currentTimeMillis()- time)); //$NON-NLS-1$
119
	
120
	// System.out.println("Matches: focus: "+m1.size()+" full: "+m2.size()+" anti: "+m3.size());
121
	// System.out.println("T matches : "+(System.currentTimeMillis()- time)); //$NON-NLS-1$
121 122
	/** The distances. */
122
	HashMap<String, Double> distances = new HashMap<String, Double>();
123

  
123
	HashMap<String, Double> distances = new HashMap<>();
124
	
124 125
	// contains the sum of distances
125 126
	/** The distancescounts. */
126
	HashMap<String, Integer> distancesCounts = new HashMap<String, Integer>();
127

  
127
	HashMap<String, Integer> distancesCounts = new HashMap<>();
128
	
128 129
	/** The FA. */
129 130
	int FA = -1;
130

  
131
	
131 132
	/** The freq. */
132 133
	HashMap<String, Integer> freq;
133

  
134
	
134 135
	/** The index. */
135 136
	Index index;
136

  
137
	
137 138
	/** The indexfreqs. */
138
	HashMap<String, Integer> indexfreqs = new HashMap<String, Integer>();
139

  
139
	HashMap<String, Integer> indexfreqs = new HashMap<>();
140
	
140 141
	/** The keys to string. */
141 142
	private HashMap<String, String> keysToString;
142

  
143
	
143 144
	/** The lines. */
144
	List<CLine> lines = new ArrayList<CLine>();
145

  
145
	List<CLine> lines = new ArrayList<>();
146
	
146 147
	/** The lt. */
147 148
	private LexicalTableImpl lt;
148

  
149
	
149 150
	/** The m1. */
150 151
	private List<Match> m1;
151

  
152
	//contains the list of positions already counteds
152
	
153
	// contains the list of positions already counteds
153 154
	/** The m2. */
154 155
	private List<Match> m2;
155

  
156
	
156 157
	/** The m3. */
157 158
	private List<Match> m3;
159
	
158 160
	int numberOfCooccurrents = -1;
161
	
159 162
	/** The number of keyword. */
160 163
	int numberOfKeyword = 0;
161

  
164
	
162 165
	/** The occproperties. */
163 166
	HashMap<String, List<String>> occproperties;
167
	
164 168
	/** The P. */
165 169
	int P = -1;
166

  
170
	
167 171
	/** The reference corpus to use = the R symbol that point to a matrix WordxFreqs. */
168 172
	String referenceCorpus;
169

  
173
	
170 174
	/** The scores. */
171 175
	HashMap<String, Double> scores;
172

  
176
	
173 177
	/** The symbol. */
174 178
	private String symbol;
175

  
179
	
176 180
	/** The writer. */
177 181
	private BufferedWriter writer;
178

  
179 182
	
180 183
	
181 184
	
182
	@Parameter(key=CooccurrencePreferences.QUERY_FILTER)
185
	
186
	@Parameter(key = CooccurrencePreferences.QUERY_FILTER)
183 187
	protected String pCooccurentQueryFilter = "[]"; //$NON-NLS-1$
184 188
	
185 189
	/** The mincof. */
186
	@Parameter(key=CooccurrencePreferences.MIN_COUNT)
190
	@Parameter(key = CooccurrencePreferences.MIN_COUNT)
187 191
	protected Integer pFCoocFilter;
188 192
	
189 193
	/** The minf. */
190
	@Parameter(key=TXMPreferences.F_MIN)
194
	@Parameter(key = TXMPreferences.F_MIN)
191 195
	protected Integer pFminFilter;
192 196
	
193 197
	/** The include xpivot. */
194
	@Parameter(key=CooccurrencePreferences.INCLUDE_X_PIVOT)
198
	@Parameter(key = CooccurrencePreferences.INCLUDE_X_PIVOT)
195 199
	protected Boolean pIncludeXpivot;
196 200
	
197 201
	/** The maxleft. */
198
	@Parameter(key=CooccurrencePreferences.MAX_LEFT)
202
	@Parameter(key = CooccurrencePreferences.MAX_LEFT)
199 203
	protected Integer pMaxLeftContextSize;
200 204
	
201 205
	/** The maxright. */
202
	@Parameter(key=CooccurrencePreferences.MAX_RIGHT)
206
	@Parameter(key = CooccurrencePreferences.MAX_RIGHT)
203 207
	protected Integer pMaxRightContextSize;
204 208
	
205 209
	/** The minleft. */
206
	@Parameter(key=CooccurrencePreferences.MIN_LEFT)
210
	@Parameter(key = CooccurrencePreferences.MIN_LEFT)
207 211
	protected Integer pMinLeftContextSize;
208 212
	
209 213
	/** The minright. */
210
	@Parameter(key=CooccurrencePreferences.MIN_RIGHT)
214
	@Parameter(key = CooccurrencePreferences.MIN_RIGHT)
211 215
	protected Integer pMinRightContextSize;
212 216
	
213 217
	/** The cooccurrents properties to display. */
214
	@Parameter(key=CooccurrencePreferences.UNIT_PROPERTIES)
218
	@Parameter(key = CooccurrencePreferences.UNIT_PROPERTIES)
215 219
	protected List<WordProperty> pProperties;
216 220
	
217 221
	/** The keyword query. */
218
	@Parameter(key=CooccurrencePreferences.QUERY)
222
	@Parameter(key = CooccurrencePreferences.QUERY)
219 223
	protected CQLQuery pQuery;
220 224
	
221 225
	/** The minscore. */
222
	@Parameter(key=CooccurrencePreferences.MIN_SCORE)
226
	@Parameter(key = CooccurrencePreferences.MIN_SCORE)
223 227
	protected Float pScoreMinFilter;
224 228
	
225 229
	/**
226 230
	 * The structural unit context limit.
227
	 * In null then the unit property is used. 
231
	 * In null then the unit property is used.
228 232
	 */
229
	@Parameter(key=CooccurrencePreferences.STRUCTURAL_UNIT_LIMIT, electric=false)
233
	@Parameter(key = CooccurrencePreferences.STRUCTURAL_UNIT_LIMIT, electric = false)
230 234
	protected StructuralUnit pStructuralUnitLimit;
231

  
232

  
235
	
236
	
233 237
	/**
234 238
	 * Creates a not computed cooccurrence from the specified corpus.
239
	 * 
235 240
	 * @param parent
236 241
	 */
237
	public Cooccurrence(CQPCorpus parent)	{
242
	public Cooccurrence(CQPCorpus parent) {
238 243
		super(parent);
239 244
	}
240

  
245
	
241 246
	/**
242 247
	 * Creates a not computed cooccurrence from a parameters node.
248
	 * 
243 249
	 * @param parametersNodePath
244 250
	 */
245
	public Cooccurrence(String parametersNodePath)	{
251
	public Cooccurrence(String parametersNodePath) {
246 252
		super(parametersNodePath);
247 253
	}
248

  
249

  
250

  
254
	
255
	
256
	
251 257
	@Override
252 258
	protected boolean _compute() throws CqiClientException, IOException, CqiServerError, StatException {
253
		//FIXME: debug
254
		//System.out.println("cooc: "+corpus+" "+query+" "+properties+" "+limit+" "+maxLeft+" "+minLeft+" "+minRight+" "+maxRight+" "+minFreq+" "+minCof+" "+minScore+" "+includeXpivot);
255

  
259
		// FIXME: debug
260
		// System.out.println("cooc: "+corpus+" "+query+" "+properties+" "+limit+" "+maxLeft+" "+minLeft+" "+minRight+" "+maxRight+" "+minFreq+" "+minCof+" "+minScore+" "+includeXpivot);
261
		
256 262
		this.subTask(CooccurrenceCoreMessages.info_buildingQueries);
257 263
		
258 264
		// clear data
......
272 278
		}
273 279
		catch (Exception e) {
274 280
		}
275
			
281
		
276 282
		if (!this.stepQueryLimits()) {
277 283
			return false;
278
		}	
279

  
284
		}
285
		
280 286
		this.subTask(CooccurrenceCoreMessages.info_retreivingMatches);
281 287
		if (!this.stepGetMatches()) {
282 288
			return false;
283 289
		}
284 290
		this.worked(10);
285

  
291
		
286 292
		this.subTask(CooccurrenceCoreMessages.info_buildingLineSignatures);
287 293
		if (!this.stepBuildSignatures()) {
288 294
			return false;
289 295
		}
290 296
		this.worked(10);
291

  
297
		
292 298
		this.subTask(CooccurrenceCoreMessages.info_counting);
293
		if (!this.stepCount())	{
299
		if (!this.stepCount()) {
294 300
			return false;
295 301
		}
296 302
		this.worked(10);
297

  
303
		
298 304
		this.subTask(CooccurrenceCoreMessages.info_buildingLexicalTable);
299
		if (!this.stepBuildLexicalTable())	{
305
		if (!this.stepBuildLexicalTable()) {
300 306
			return false;
301 307
		}
302 308
		this.worked(10);
303

  
309
		
304 310
		this.subTask(CooccurrenceCoreMessages.info_computingSpecificitiesScores);
305
		if (!this.stepGetScores())	{
311
		if (!this.stepGetScores()) {
306 312
			return false;
307 313
		}
308 314
		
309 315
		this.clearMemory();
310 316
		this.worked(10);
311
			
312

  
317
		
318
		
313 319
		return true;
314 320
	}
315

  
321
	
316 322
	@Override
317 323
	public boolean loadParameters() throws CqiClientException {
318 324
		pProperties = (List<WordProperty>) Property.stringToProperties(getCorpus(), this.getStringParameterValue(TXMPreferences.UNIT_PROPERTIES));
......
320 326
		pStructuralUnitLimit = this.getCorpus().getStructuralUnit(this.getStringParameterValue(CooccurrencePreferences.STRUCTURAL_UNIT_LIMIT));
321 327
		return true;
322 328
	}
323

  
329
	
324 330
	@Override
325 331
	public boolean saveParameters() {
326 332
		this.saveParameter(TXMPreferences.UNIT_PROPERTIES, Property.propertiesToString(this.pProperties));
327

  
333
		
328 334
		if (pQuery != null) {
329 335
			this.saveParameter(TXMPreferences.QUERY, pQuery.getQueryString());
330 336
		}
331

  
337
		
332 338
		if (pStructuralUnitLimit != null) {
333 339
			this.saveParameter(CooccurrencePreferences.STRUCTURAL_UNIT_LIMIT, this.pStructuralUnitLimit.getName());
334 340
		}
335

  
341
		
336 342
		return true;
337 343
	}
338 344
	
......
344 350
	 * @throws RWorkspaceException the r workspace exception
345 351
	 */
346 352
	public String asRMatrix() throws RWorkspaceException {
347
		symbol = prefixR+nocooc;
348

  
353
		symbol = prefixR + nocooc;
354
		
349 355
		String[] occ = new String[this.lines.size()];
350 356
		int[] freq = new int[this.lines.size()];
351 357
		int[] cofreq = new int[this.lines.size()];
352 358
		double[] score = new double[this.lines.size()];
353 359
		double[] dist = new double[this.lines.size()];
354

  
360
		
355 361
		int i = 0;
356 362
		for (CLine line : this.lines) {
357 363
			occ[i] = line.occ;
......
361 367
			dist[i] = line.distmoyenne;
362 368
			i++;
363 369
		}
364

  
370
		
365 371
		RWorkspace rw = RWorkspace.getRWorkspaceInstance();
366 372
		rw.addVectorToWorkspace("coococc", occ); //$NON-NLS-1$
367 373
		rw.addVectorToWorkspace("coocfreq", freq); //$NON-NLS-1$
368 374
		rw.addVectorToWorkspace("cooccofreq", cofreq); //$NON-NLS-1$
369 375
		rw.addVectorToWorkspace("coocscore", score); //$NON-NLS-1$
370 376
		rw.addVectorToWorkspace("coocmeandist", dist); //$NON-NLS-1$
371

  
372
		rw.eval(symbol+ "<- matrix(data = c(coocfreq, cooccofreq, coocscore, coocmeandist), nrow = "+this.lines.size()+", ncol = 4)"); //$NON-NLS-1$ //$NON-NLS-2$
373
		rw.eval("rownames("+symbol+" ) <- coococc"); //$NON-NLS-1$ //$NON-NLS-2$
374
		rw.eval("colnames("+symbol+" ) <- c('freq', 'cofreq', 'score', 'dist')"); //$NON-NLS-1$ //$NON-NLS-2$
375
		rw.eval(symbol+ "<- list(data="+symbol //$NON-NLS-1$
376
				//+ ", leftcontext="+this.leftContextSize
377
				//+ ", rightcontext="+this.rightContextSize
378
				//+ ", query=\""+this.query.getQueryString()+"\""
377
		
378
		rw.eval(symbol + "<- matrix(data = c(coocfreq, cooccofreq, coocscore, coocmeandist), nrow = " + this.lines.size() + ", ncol = 4)"); //$NON-NLS-1$ //$NON-NLS-2$
379
		rw.eval("rownames(" + symbol + " ) <- coococc"); //$NON-NLS-1$ //$NON-NLS-2$
380
		rw.eval("colnames(" + symbol + " ) <- c('freq', 'cofreq', 'score', 'dist')"); //$NON-NLS-1$ //$NON-NLS-2$
381
		rw.eval(symbol + "<- list(data=" + symbol //$NON-NLS-1$
382
		// + ", leftcontext="+this.leftContextSize
383
		// + ", rightcontext="+this.rightContextSize
384
		// + ", query=\""+this.query.getQueryString()+"\""
379 385
				+ ")"); //$NON-NLS-1$
380

  
386
		
381 387
		nocooc++;
382 388
		return symbol;
383 389
	}
384

  
390
	
385 391
	@Override
386 392
	public boolean canCompute() {
387

  
393
		
388 394
		if (pQuery == null || pQuery.isEmpty()) {
389 395
			Log.fine("No query set.");
390 396
			return false;
391 397
		}
392

  
398
		
393 399
		if (pProperties == null) {
394 400
			Log.fine("No properties set.");
395 401
			return false;
396 402
		}
397

  
403
		
398 404
		if (getCorpus() == null) {
399 405
			Log.fine("No corpus set.");
400 406
			return false;
401 407
		}
402

  
408
		
403 409
		if (pProperties.size() == 0) {
404 410
			Log.fine("No properties filled.");
405 411
			return false;
406 412
		}
407

  
413
		
408 414
		return true;
409 415
	}
410 416
	
......
415 421
				this.writer.flush();
416 422
				this.writer.close();
417 423
			}
418
		} catch (Exception e) {
424
		}
425
		catch (Exception e) {
419 426
			org.txm.utils.logger.Log.printStackTrace(e);
420 427
		}
421 428
	}
422

  
423
	//FIXME: useless? 
429
	
430
	// FIXME: useless?
424 431
	public void clearMemory() {
425 432
		if (distances != null) distances.clear();
426 433
		if (distancesCounts != null) distancesCounts.clear();
......
440 447
		
441 448
		lt = null;
442 449
	}
443

  
444
//	/**
445
//	 * Count occ.
446
//	 *
447
//	 * @param propsvalue the propsvalue
448
//	 * @param rightcontext the rightcontext
449
//	 */
450
//	public void countOcc(Map<Property, List<String>> propsvalue, boolean rightcontext) {
451
//		// System.out.println("countOcc (R="+rightcontext+") "+propsvalue);
452
//		Property key = pProperties.get(0);
453
//		List<String> iterationlist = propsvalue.get(key);
454
//		// pr chq mot
455
//		for (int i = 0; i < iterationlist.size(); i++) {
456
//			// build occ
457
//			String occ = ""; //$NON-NLS-1$
458
//			for (Property p : pProperties)
459
//				occ += propsvalue.get(p).get(i) + "_"; //$NON-NLS-1$
460
//			occ = occ.substring(0, occ.length() - 1);
461
//
462
//			if (occproperties.get(occ) == null) {
463
//				ArrayList<String> values = new ArrayList<String>();
464
//				for (Property p : pProperties)
465
//					values.add(propsvalue.get(p).get(i));
466
//				occproperties.put(occ, values);
467
//			}
468
//
469
//			// System.out.println("occ '"+occ+"'");
470
//			// update nbocc
471
//			if (count.get(occ) == null) {
472
//				count.put(occ, 0);
473
//			}
474
//			count.put(occ, count.get(occ) + 1);
475
//
476
//			// update dist
477
//			if (dist.get(occ) == null) {
478
//				dist.put(occ, 0.0f);
479
//			}
480
//			if (rightcontext) {
481
//				dist.put(occ, dist.get(occ) + i + 1);
482
//			} else {
483
//				dist.put(occ, dist.get(occ) + iterationlist.size() - i);
484
//			}
485
//
486
//			// update freq
487
//			if (freq.get(occ) == null) {
488
//				// System.out.println("compute freq of "+occ);
489
//				int occfreq = -1; // calcul avec l'index
490
//				for (org.txm.index.core.functions.Line l : index.getLines(0, index.getV())) {
491
//					if (l.toString().equals(occ)) {
492
//						// System.out.println("FOUND "+occ);
493
//						occfreq = l.getFrequency();
494
//						break;
495
//					}
496
//				}
497
//				freq.put(occ, occfreq);
498
//			}
499
//		}
500
//	}
501

  
450
	
451
	// /**
452
	// * Count occ.
453
	// *
454
	// * @param propsvalue the propsvalue
455
	// * @param rightcontext the rightcontext
456
	// */
457
	// public void countOcc(Map<Property, List<String>> propsvalue, boolean rightcontext) {
458
	// // System.out.println("countOcc (R="+rightcontext+") "+propsvalue);
459
	// Property key = pProperties.get(0);
460
	// List<String> iterationlist = propsvalue.get(key);
461
	// // pr chq mot
462
	// for (int i = 0; i < iterationlist.size(); i++) {
463
	// // build occ
464
	// String occ = ""; //$NON-NLS-1$
465
	// for (Property p : pProperties)
466
	// occ += propsvalue.get(p).get(i) + "_"; //$NON-NLS-1$
467
	// occ = occ.substring(0, occ.length() - 1);
468
	//
469
	// if (occproperties.get(occ) == null) {
470
	// ArrayList<String> values = new ArrayList<String>();
471
	// for (Property p : pProperties)
472
	// values.add(propsvalue.get(p).get(i));
473
	// occproperties.put(occ, values);
474
	// }
475
	//
476
	// // System.out.println("occ '"+occ+"'");
477
	// // update nbocc
478
	// if (count.get(occ) == null) {
479
	// count.put(occ, 0);
480
	// }
481
	// count.put(occ, count.get(occ) + 1);
482
	//
483
	// // update dist
484
	// if (dist.get(occ) == null) {
485
	// dist.put(occ, 0.0f);
486
	// }
487
	// if (rightcontext) {
488
	// dist.put(occ, dist.get(occ) + i + 1);
489
	// } else {
490
	// dist.put(occ, dist.get(occ) + iterationlist.size() - i);
491
	// }
492
	//
493
	// // update freq
494
	// if (freq.get(occ) == null) {
495
	// // System.out.println("compute freq of "+occ);
496
	// int occfreq = -1; // calcul avec l'index
497
	// for (org.txm.index.core.functions.Line l : index.getLines(0, index.getV())) {
498
	// if (l.toString().equals(occ)) {
499
	// // System.out.println("FOUND "+occ);
500
	// occfreq = l.getFrequency();
501
	// break;
502
	// }
503
	// }
504
	// freq.put(occ, occfreq);
505
	// }
506
	// }
507
	// }
508
	
502 509
	/**
503 510
	 * Gets the corpus.
504 511
	 *
......
507 514
	public CQPCorpus getCorpus() {
508 515
		return (CQPCorpus) this.getParent();
509 516
	}
510

  
517
	
511 518
	@Override
512 519
	public String getDetails() {
513
		Object[] params = new Object[]{this.getParent(), this.pQuery, this.pProperties, this.pStructuralUnitLimit, (this.pMinLeftContextSize - 1), (this.pMaxLeftContextSize - 1), (this.pMinRightContextSize - 1),
514
				(this.pMaxRightContextSize-1), this.pFminFilter, this.pFCoocFilter, this.pScoreMinFilter};
520
		Object[] params = new Object[] { this.getParent(), this.pQuery, this.pProperties, this.pStructuralUnitLimit, (this.pMinLeftContextSize - 1), (this.pMaxLeftContextSize - 1),
521
				(this.pMinRightContextSize - 1),
522
				(this.pMaxRightContextSize - 1), this.pFminFilter, this.pFCoocFilter, this.pScoreMinFilter };
515 523
		return NLS.bind(CooccurrenceCoreMessages.info_details, params);
516 524
	}
517

  
525
	
518 526
	/**
519 527
	 * Gets the fA.
520 528
	 *
......
523 531
	public int getFA() {
524 532
		return this.FA;
525 533
	}
526

  
534
	
527 535
	public boolean getIncludeXPivot() {
528 536
		return pIncludeXpivot;
529 537
	}
530

  
538
	
531 539
	/**
532 540
	 * Gets the lines.
533 541
	 *
......
536 544
	public List<CLine> getLines() {
537 545
		return lines;
538 546
	}
539

  
547
	
540 548
	/**
541 549
	 * Gets the max left.
542 550
	 *
......
545 553
	public int getMaxLeft() {
546 554
		return pMaxLeftContextSize;
547 555
	}
548

  
556
	
549 557
	/**
550 558
	 * Gets the max right.
551 559
	 *
......
554 562
	public int getMaxRight() {
555 563
		return pMaxRightContextSize;
556 564
	}
557

  
565
	
558 566
	/**
559 567
	 * Gets the min left.
560 568
	 *
......
563 571
	public int getMinLeft() {
564 572
		return pMinLeftContextSize;
565 573
	}
566

  
574
	
567 575
	/**
568 576
	 * Gets the min right.
569 577
	 *
......
572 580
	public int getMinRight() {
573 581
		return pMinRightContextSize;
574 582
	}
575

  
583
	
576 584
	@Override
577 585
	public String getName() {
578 586
		if (this.getParent() != null) {
......
582 590
			return this.getSimpleName();
583 591
		}
584 592
	}
585

  
593
	
586 594
	/**
587 595
	 * Gets the lines.
588 596
	 *
......
597 605
		}
598 606
		return numberOfCooccurrents;
599 607
	}
600

  
608
	
601 609
	/**
602 610
	 * Gets the lines.
603 611
	 *
......
609 617
		}
610 618
		return 0;
611 619
	}
612

  
620
	
613 621
	/**
614 622
	 * Gets the number of keyword.
615 623
	 *
......
618 626
	public int getNumberOfKeyword() {
619 627
		return numberOfKeyword;
620 628
	}
621

  
629
	
622 630
	/**
623 631
	 * Gets the p.
624 632
	 *
......
627 635
	public int getP() {
628 636
		return P;
629 637
	}
630

  
638
	
631 639
	/**
632 640
	 * Gets the properties.
633 641
	 *
......
636 644
	public List<WordProperty> getProperties() {
637 645
		return pProperties;
638 646
	}
639

  
647
	
640 648
	/**
641 649
	 * Gets the query.
642 650
	 *
......
645 653
	public CQLQuery getQuery() {
646 654
		return pQuery;
647 655
	}
648

  
656
	
649 657
	@Override
650 658
	public String getSimpleName() {
651
		if (pQuery != null && !pQuery.isEmpty())	{
659
		if (pQuery != null && !pQuery.isEmpty()) {
652 660
			StringBuffer output = new StringBuffer();
653 661
			output.append(pQuery.getQueryString());
654
			if (pMaxLeftContextSize > 0 && pMaxRightContextSize > 0)	{
655
				output.append(" (" + (pMaxLeftContextSize - 1) + ", " + (pMaxRightContextSize - 1) + ")"); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ 
662
			if (pMaxLeftContextSize > 0 && pMaxRightContextSize > 0) {
663
				output.append(" (" + (pMaxLeftContextSize - 1) + ", " + (pMaxRightContextSize - 1) + ")"); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
656 664
			}
657 665
			return output.toString();
658 666
		}
659
		else	{
667
		else {
660 668
			return this.getEmptyName();
661 669
		}
662 670
	}
663

  
664 671
	
672
	
665 673
	@Override
666 674
	public String getComputingStartMessage() {
667
		return TXMCoreMessages.bind(CooccurrenceCoreMessages.cooccurrentsOfP0PropertieP1InTheP2Corpus, this.pQuery, WordProperty.propertiesToString(this.pProperties).replaceAll(TXMPreferences.LIST_SEPARATOR, ", "), this.getCorpus().getName()); //$NON-NLS-1$
675
		return TXMCoreMessages.bind(CooccurrenceCoreMessages.cooccurrentsOfP0PropertieP1InTheP2Corpus, this.pQuery, WordProperty.propertiesToString(this.pProperties).replaceAll(
676
				TXMPreferences.LIST_SEPARATOR, ", "), this.getCorpus().getName()); //$NON-NLS-1$
668 677
	}
669

  
670 678
	
679
	
671 680
	@Override
672 681
	public String getComputingDoneMessage() {
673
		if(this.lines.isEmpty())	{
682
		if (this.lines.isEmpty()) {
674 683
			return TXMCoreMessages.common_noResults;
675 684
		}
676
		else	{
685
		else {
677 686
			return TXMCoreMessages.bind(CooccurrenceCoreMessages.P0CooccurentsForP1Occurrences, this.lines.size(), this.numberOfKeyword);
678 687
		}
679 688
	}
......
687 696
	public StructuralUnit getStructuralUnitLimit() {
688 697
		return this.pStructuralUnitLimit;
689 698
	}
690

  
699
	
691 700
	/**
692 701
	 * Gets the symbol.
693 702
	 *
......
696 705
	public String getSymbol() {
697 706
		return this.symbol;
698 707
	}
699

  
708
	
700 709
	/**
701 710
	 * Inits the conc infos.
702 711
	 *
......
704 713
	 * @return true, if successful
705 714
	 */
706 715
	private boolean initConcInfos(Concordance conc) {
707

  
716
		
708 717
		int from = 0;
709 718
		int to = conc.getNLines() - 1;
710 719
		return initConcInfos(conc, from, to);
711 720
	}
712

  
721
	
713 722
	/**
714 723
	 * Inits the conc infos.
715 724
	 *
......
719 728
	 * @return true, if successful
720 729
	 */
721 730
	private boolean initConcInfos(Concordance conc, int from, int to) {
722

  
731
		
723 732
		try {
724 733
			this.concordance = conc;
725 734
			conclines = conc.getLines(from, to);
726 735
			return initConcInfos(conc, conclines);
727
		} catch (Exception e) {
736
		}
737
		catch (Exception e) {
728 738
			org.txm.utils.logger.Log.printStackTrace(e);
729 739
		}
730 740
		return false;
731 741
	}
732

  
742
	
733 743
	/**
734 744
	 * Inits the conc infos.
735 745
	 *
......
738 748
	 * @return true, if successful
739 749
	 */
740 750
	private boolean initConcInfos(Concordance conc, List<Line> conclines) {
741

  
751
		
742 752
		try {
743 753
			CQPCorpus corpus = this.getCorpus();
744 754
			this.concordance = conc;
......
755 765
			pMaxRightContextSize = conc.getRightContextSize();
756 766
			this.conclines = conclines;
757 767
			return true;
758
		} catch (CqiClientException e) {
768
		}
769
		catch (CqiClientException e) {
759 770
			org.txm.utils.logger.Log.printStackTrace(e);
760 771
		}
761 772
		return false;
762 773
	}
763

  
774
	
764 775
	/**
765 776
	 * Prints the.
766 777
	 */
......
772 783
		for (CLine line : lines)
773 784
			System.out.println(line.resume("\t", "")); //$NON-NLS-1$ //$NON-NLS-2$
774 785
	}
775

  
786
	
776 787
	public void setCoocQuery(String q) {
777 788
		pCooccurentQueryFilter = q;
778 789
	}
779

  
790
	
780 791
	/**
781 792
	 * Sets the max left.
782 793
	 *
......
785 796
	public void setMaxLeft(int maxleft) {
786 797
		this.pMaxLeftContextSize = maxleft;
787 798
	}
788

  
799
	
789 800
	/**
790 801
	 * Sets the max right.
791 802
	 *
......
794 805
	public void setMaxRight(int maxright) {
795 806
		this.pMaxRightContextSize = maxright;
796 807
	}
797

  
808
	
798 809
	/**
799 810
	 * Sets the min left.
800 811
	 *
......
803 814
	public void setMinLeft(int minleft) {
804 815
		this.pMinLeftContextSize = minleft;
805 816
	}
806

  
817
	
807 818
	/**
808 819
	 * Sets the min right.
809 820
	 *
......
812 823
	public void setMinRight(int minright) {
813 824
		this.pMinRightContextSize = minright;
814 825
	}
815

  
826
	
816 827
	public void setParameters(CQLQuery query, List<WordProperty> properties, StructuralUnit limit, int maxLeft, int minLeft, int minRight,
817 828
			int maxRight, int minFreq, float minScore, int minCof, boolean includeXpivot, boolean buildLexicalTableWithCooccurrents) {
818

  
829
		
819 830
		this.pQuery = query;
820 831
		this.pProperties = properties;
821 832
		this.pStructuralUnitLimit = limit;
......
833 844
	public void setIncludeXpivot(boolean b) {
834 845
		pIncludeXpivot = b;
835 846
	}
836

  
847
	
848
	@Override
837 849
	public boolean setParameters(TXMParameters parameters) {
838 850
		try {
839 851
			CQPCorpus corpus = this.getCorpus();
840
			boolean includeXpivot = (Boolean) parameters.getBoolean(CooccurrencePreferences.INCLUDE_X_PIVOT);
852
			boolean includeXpivot = parameters.getBoolean(CooccurrencePreferences.INCLUDE_X_PIVOT);
841 853
			
842 854
			String queryString = ""; //$NON-NLS-1$
843
			if(parameters.get(TXMPreferences.QUERY) != null)	{
855
			if (parameters.get(TXMPreferences.QUERY) != null) {
844 856
				queryString = parameters.get(TXMPreferences.QUERY).toString();
845 857
			}
846 858
			CQLQuery query = new CQLQuery(queryString);
847 859
			
848 860
			StructuralUnit limit = (StructuralUnit) parameters.get(CooccurrencePreferences.STRUCTURAL_UNIT_LIMIT);
849

  
861
			
850 862
			Object propsParam = parameters.get(CooccurrencePreferences.UNIT_PROPERTIES);
851 863
			List<WordProperty> properties = null;
852 864
			if (propsParam instanceof List) {
853
				properties = (List<WordProperty>)propsParam;
865
				properties = (List<WordProperty>) propsParam;
854 866
			}
855 867
			else if (propsParam instanceof String) {
856 868
				properties = (List<WordProperty>) Property.stringToProperties(corpus, propsParam.toString());
857 869
			}
858

  
870
			
859 871
			int maxLeft = parameters.getInteger(CooccurrencePreferences.MAX_LEFT);
860 872
			int minLeft = parameters.getInteger(CooccurrencePreferences.MIN_LEFT);
861 873
			int maxRight = parameters.getInteger(CooccurrencePreferences.MAX_RIGHT);
......
864 876
			int minCof = parameters.getInteger(CooccurrencePreferences.MIN_COUNT);
865 877
			int minFreq = parameters.getInteger(TXMPreferences.F_MIN);
866 878
			boolean buildLexicalTableWithCooccurrents = parameters.getBoolean(CooccurrencePreferences.PARTIAL_LEXICAL_TABLE);
867

  
879
			
868 880
			this.setParameters(query, properties, limit, maxLeft, minLeft, minRight, maxRight, minFreq, minScore, minCof, includeXpivot, buildLexicalTableWithCooccurrents);
869

  
870
		} catch (Exception e) {
871
			System.out.println("Error while setting cooccurrence parameters: "+e.getLocalizedMessage());
881
			
882
		}
883
		catch (Exception e) {
884
			System.out.println("Error while setting cooccurrence parameters: " + e.getLocalizedMessage());
872 885
			Log.printStackTrace(e);
873 886
			return false;
874 887
		}
875 888
		return true;
876 889
	}
877

  
890
	
878 891
	public void setReferenceCorpus(String symbol) {
879 892
		referenceCorpus = symbol;
880 893
	}
881

  
894
	
882 895
	/**
883 896
	 * Sets the structural unit limit.
884 897
	 *
......
887 900
	public void setStructuralUnitLimt(StructuralUnit su) {
888 901
		pStructuralUnitLimit = su;
889 902
	}
890

  
903
	
891 904
	/**
892 905
	 * Sets the thresfold.
893 906
	 *
......
896 909
	 * @param score the score
897 910
	 */
898 911
	public void setThresfold(int freq, int count, float score) {
899

  
912
		
900 913
		pFminFilter = freq;
901 914
		pFCoocFilter = count;
902 915
		pScoreMinFilter = score;
903 916
	}
904

  
917
	
905 918
	/**
906 919
	 * Sort.
907 920
	 *
......
915 928
			Collections.sort(lines, comparator);
916 929
		}
917 930
	}
918

  
931
	
919 932
	/**
920 933
	 * Step build lexical table.
921 934
	 *
......
923 936
	 * @throws RWorkspaceException the r workspace exception
924 937
	 */
925 938
	public boolean stepBuildLexicalTable() throws RWorkspaceException {
926

  
939
		
927 940
		CQPCorpus corpus = this.getCorpus();
928
		String[] colnames = {
929
				corpus.getName() + "-" + pQuery.getQueryString(), pQuery.getQueryString() }; //$NON-NLS-1$
930
		keysToString = new HashMap<String, String>();
931

  
932
		//time = System.currentTimeMillis();
933
		for (TXMResult rez : corpus.getChildren(Index.class)) { //TODO: fix usages of index for cooc
941
		String[] colnames = { corpus.getName() + "-" + pQuery.getQueryString(), pQuery.getQueryString() }; //$NON-NLS-1$
942
		keysToString = new HashMap<>();
943
		
944
		// time = System.currentTimeMillis();
945
		for (TXMResult rez : corpus.getChildren(Index.class)) { // TODO: fix usages of index for cooc
934 946
			Index rezvoc = (Index) rez;
935

  
947
			
936 948
			if (rezvoc.getProperties().equals(pProperties)) {
937
				if (rezvoc.getQuery().equals(new CQLQuery(""+pCooccurentQueryFilter+""))) { //$NON-NLS-1$ //$NON-NLS-2$
949
				if (rezvoc.getQuery().equals(new CQLQuery(pCooccurentQueryFilter))) {
938 950
					if (rezvoc.getFilterFmax() == null) {
939 951
						index = rezvoc;
940 952
						break;
......
942 954
				}
943 955
			}
944 956
		}
945

  
957
		
946 958
		if (index == null) {
947 959
			try {
948 960
				index = new Index(corpus);
961
				index.setVisible(false);
949 962
				index.setParameters(new CQLQuery(pCooccurentQueryFilter), pProperties, null, null, null, null);
950 963
				if (!index.compute(monitor)) {
951 964
					Log.severe("Cooccurrence internal Index compute failed. Aborting.");
952 965
					return false;
953 966
				}
954
			} catch (Exception e){
955
				Log.severe("Error while computing Index for the cooccurrence: "+e.getLocalizedMessage());
967
			}
968
			catch (Exception e) {
969
				Log.severe("Error while computing Index for the cooccurrence: " + e.getLocalizedMessage());
956 970
				return false;
957 971
			}
958 972
		}
959

  
973
		
960 974
		// ALTER THE INDEX IF A REFERENCE CORPUS IS SET -> this change the base frequencies
961 975
		if (referenceCorpus != null && referenceCorpus.length() > 0) {
962
			//voc.toTxt(new File("/home/mdecorde/TEMP/before.tsv"), "UTF-8", "\t", "");
976
			// voc.toTxt(new File("/home/mdecorde/TEMP/before.tsv"), "UTF-8", "\t", "");
963 977
			try {
964 978
				index.alterFrequencies(referenceCorpus);
965
			} catch (Exception e) {
979
			}
980
			catch (Exception e) {
966 981
				// TODO Auto-generated catch block
967 982
				org.txm.utils.logger.Log.printStackTrace(e);
968 983
				return false;
969 984
			}
970 985
		}
971

  
986
		
972 987
		List<org.txm.index.core.functions.Line> vocLines = index.getAllLines();
973 988
		int[][] freqs;
974 989
		String[] rownames;
975 990
		if (buildLexicalTableWithCooccurrents) {
976 991
			freqs = new int[counts.keySet().size()][2];
977 992
			rownames = new String[counts.keySet().size()];
978
		} else { // all words
993
		}
994
		else { // all words
979 995
			freqs = new int[vocLines.size()][2];
980 996
			rownames = new String[vocLines.size()];
981 997
		}
982

  
998
		
983 999
		int i = 0;
984
		//System.out.println("T voc : "+(System.currentTimeMillis()- time)); //$NON-NLS-1$
1000
		// System.out.println("T voc : "+(System.currentTimeMillis()- time)); //$NON-NLS-1$
985 1001
		// System.out.println("nb lines voc "+voclines.size());
986
		//System.out.println("counts keys: "+counts.keySet());
1002
		// System.out.println("counts keys: "+counts.keySet());
987 1003
		for (org.txm.index.core.functions.Line l : vocLines) {
988
			//System.out.println("L sign '"+l.getSignature()+"'");
1004
			// System.out.println("L sign '"+l.getSignature()+"'");
989 1005
			if (counts.keySet().contains(l.getSignature())) {
990 1006
				keysToString.put(l.toString(), l.getSignature());
991 1007
				rownames[i] = l.toString();
992
				//System.out.println("set rowname: "+l.toString());
993
				//System.out.println("("+l.getSignature()+", "+l.toString()+") : "+l.getFrequency()+" - "+counts.get(l.getSignature()));
1008
				// System.out.println("set rowname: "+l.toString());
1009
				// System.out.println("("+l.getSignature()+", "+l.toString()+") : "+l.getFrequency()+" - "+counts.get(l.getSignature()));
994 1010
				int count = counts.get(l.getSignature());
995 1011
				int tot = l.getFrequency();
996 1012
				indexfreqs.put(l.toString(), tot);
997

  
1013
				
998 1014
				freqs[i][0] = tot - count;
999 1015
				freqs[i][1] = count;
1000 1016
				i++;
1001
			} else if (!buildLexicalTableWithCooccurrents) {
1017
			}
1018
			else if (!buildLexicalTableWithCooccurrents) {
1002 1019
				keysToString.put(l.toString(), l.getSignature());
1003 1020
				rownames[i] = l.toString();
1004
				//System.out.println("set rowname: "+l.toString());
1005
				//System.out.println("("+l.getSignature()+", "+l.toString()+") : "+l.getFrequency()+" - "+counts.get(l.getSignature()));
1006

  
1021
				// System.out.println("set rowname: "+l.toString());
1022
				// System.out.println("("+l.getSignature()+", "+l.toString()+") : "+l.getFrequency()+" - "+counts.get(l.getSignature()));
1023
				
1007 1024
				int tot = l.getFrequency();
1008 1025
				indexfreqs.put(l.toString(), tot);
1009

  
1026
				
1010 1027
				freqs[i][0] = tot;
1011 1028
				freqs[i][1] = 0;
1012 1029
				i++;
......
1014 1031
		}
1015 1032
		index.delete(); // no more needed
1016 1033
		index = null;
1017

  
1018
		//time = System.currentTimeMillis();
1034
		
1035
		// time = System.currentTimeMillis();
1019 1036
		if (freqs.length == 0) {
1020 1037
			System.out.println(CooccurrenceCoreMessages.errorColonNoCooccurrents);
1021 1038
		}
1022
		//		try {
1023
		//			PrintWriter writer = IOUtils.getWriter("/home/mdecorde/test_cooc.txt");
1024
		//			//writer.println("Build LT: ");
1025
		//			//writer.println("freqs: "+Arrays.toString(freqs));
1026
		//			//writer.println("Rows: "+);
1027
		//			//String rows = Arrays.toString();
1028
		//			int nrow = rownames.length;
1029
		//			int ncol = colnames.length;
1030
		//			for (int ii = 0 ; ii < nrow ; ii++) {
1031
		//				writer.write(rownames[ii]);
1032
		//				for (int j = 0 ; j < ncol ; j++) {
1033
		//					writer.write("\t"+freqs[ii][j]);
1034
		//				}
1035
		//				writer.write("\n");
1036
		//			}
1037
		//			writer.close();
1038
		//			//writer.println("Cols: "+Arrays.toString(colnames));
1039
		//		} catch(Exception e) {e.printStackTrace();}
1040

  
1039
		// try {
1040
		// PrintWriter writer = IOUtils.getWriter("/home/mdecorde/test_cooc.txt");
1041
		// //writer.println("Build LT: ");
1042
		// //writer.println("freqs: "+Arrays.toString(freqs));
1043
		// //writer.println("Rows: "+);
1044
		// //String rows = Arrays.toString();
1045
		// int nrow = rownames.length;
1046
		// int ncol = colnames.length;
1047
		// for (int ii = 0 ; ii < nrow ; ii++) {
1048
		// writer.write(rownames[ii]);
1049
		// for (int j = 0 ; j < ncol ; j++) {
1050
		// writer.write("\t"+freqs[ii][j]);
1051
		// }
1052
		// writer.write("\n");
1053
		// }
1054
		// writer.close();
1055
		// //writer.println("Cols: "+Arrays.toString(colnames));
1056
		// } catch(Exception e) {e.printStackTrace();}
1057
		
1041 1058
		lt = new LexicalTableImpl(freqs, rownames, colnames);
1042

  
1043

  
1044
		//		if(referenceCorpus != null && referenceCorpus.length() > 0) {
1045
		//			//lt.removeCol(0, false);
1046
		//			lt.setReference(referenceCorpus);
1047
		//			lt.exchangeColumns(1,2);
1048
		//		}
1059
		
1060
		
1061
		// if(referenceCorpus != null && referenceCorpus.length() > 0) {
1062
		// //lt.removeCol(0, false);
1063
		// lt.setReference(referenceCorpus);
1064
		// lt.exchangeColumns(1,2);
1065
		// }
1049 1066
		return true;
1050 1067
	}
1051

  
1068
	
1052 1069
	/**
1053 1070
	 * Step build signatures.
1054 1071
	 *
......
1058 1075
	 * @throws CqiServerError the cqi server error
1059 1076
	 */
1060 1077
	public boolean stepBuildSignatures() throws UnexpectedAnswerException, IOException, CqiServerError {
1061
		allsignaturesstr = new HashMap<Integer, String>();
1062
		Set<Integer> allpositions = new HashSet<Integer>(); // no duplicates
1078
		allsignaturesstr = new HashMap<>();
1079
		Set<Integer> allpositions = new HashSet<>(); // no duplicates
1063 1080
		for (Match n : m2) {
1064 1081
			for (int i = n.getStart(); i <= n.getEnd(); i++) {
1065 1082
				allpositions.add(i);
1066 1083
			}
1067 1084
		}
1068
		//System.out.println("Position set: "+allpositions.size());
1069

  
1085
		// System.out.println("Position set: "+allpositions.size());
1086
		
1070 1087
		int[] allpositionsarray = new int[allpositions.size()];
1071 1088
		int pcount = 0;
1072 1089
		for (int p : allpositions) {
1073 1090
			allpositionsarray[pcount++] = p;
1074 1091
		}
1075

  
1076
		HashMap<Property, int[]> propsId = new HashMap<Property, int[]>();
1092
		
1093
		HashMap<Property, int[]> propsId = new HashMap<>();
1077 1094
		// HashMap<Property, String[]> propsValues = new HashMap<Property, String[]>();
1078 1095
		for (Property property : pProperties) {
1079 1096
			int[] indices = CorpusManager.getCorpusManager().getCqiClient()
......
1082 1099
			// CorpusManager.getCorpusManager().getCqiClient().cpos2Str(property.getQualifiedName(),allpositionsarray);
1083 1100
			propsId.put(property, indices);
1084 1101
			// propsValues.put(property, values);
1085
			//System.out.println("all "+property+" indices: "+propsId.get(property).length);
1102
			// System.out.println("all "+property+" indices: "+propsId.get(property).length);
1086 1103
		}
1087

  
1088
		//System.out.println("T values + ids: "+(System.currentTimeMillis()- time)); //$NON-NLS-1$
1089

  
1104
		
1105
		// System.out.println("T values + ids: "+(System.currentTimeMillis()- time)); //$NON-NLS-1$
1106
		
1090 1107
		pcount = 0;
1091 1108
		for (int position : allpositionsarray) {
1092
			//String sign = ""; //$NON-NLS-1$
1109
			// String sign = ""; //$NON-NLS-1$
1093 1110
			String signstr = ""; //$NON-NLS-1$
1094 1111
			for (Property property : pProperties) {
1095 1112
				signstr += "[" + propsId.get(property)[pcount] + "]"; //$NON-NLS-1$ //$NON-NLS-2$
1096
				//signstr+="["+propsValues.get(property)[pcount]+"]"; //$NON-NLS-1$ //$NON-NLS-2$
1113
				// signstr+="["+propsValues.get(property)[pcount]+"]"; //$NON-NLS-1$ //$NON-NLS-2$
1097 1114
			}
1098 1115
			// allsignatures.put(position, sign);
1099 1116
			allsignaturesstr.put(position, signstr);
1100 1117
			pcount++;
1101

  
1118
			
1102 1119
		}
1103 1120
		return true;
1104 1121
	}
1105

  
1122
	
1106 1123
	/**
1107 1124
	 * Step count.
1108 1125
	 *
1109 1126
	 * @return true, if successful
1110 1127
	 */
1111 1128
	public boolean stepCount() {
1112
		ArrayList<Integer> keepedPosition = new ArrayList<Integer>();
1113

  
1129
		ArrayList<Integer> keepedPosition = new ArrayList<>();
1130
		
1114 1131
		int startsearchM2 = 0; // optimisation: m2 is ordered
1115 1132
		int startsearchM3 = 0; // optimisation: m3 is ordered
1116
		//time = System.currentTimeMillis();
1133
		// time = System.currentTimeMillis();
1117 1134
		for (Match m : m1) { // for each match = for each focus
1118 1135
			
1119 1136
			if (m.getTarget() >= 0) { // if target is set focus on target position
1120 1137
				m.setStart(m.getTarget());
1121 1138
				m.setEnd(m.getTarget());
1122 1139
			}
1123
			//System.out.println("for match m: "+m);
1140
			// System.out.println("for match m: "+m);
1124 1141
			Match n = null; // the match which contains the context
1125 1142
			Match o = null; // the match which does not contain the context
1126 1143
			boolean matchFound = false;
1127 1144
			for (int i = startsearchM2; i < m2.size(); i++) { // find n
1128 1145
				n = m2.get(i);
1129 1146
				if (n.getStart() <= m.getStart() && m.getEnd() <= n.getEnd()) {
1130
					startsearchM2 = i;	
1147
					startsearchM2 = i;
1131 1148
					matchFound = true;
1132 1149
					break;
1133 1150
				}
1134 1151
			}
1135 1152
			if (Thread.interrupted()) return false; // stop if interrupted by user
1136
			//System.out.println("found n: "+n);
1137

  
1153
			// System.out.println("found n: "+n);
1154
			
1138 1155
			for (int i = startsearchM3; i < m3.size(); i++) { // find next match m3 contained by m2
1139

  
1156
				
1140 1157
				o = m3.get(i);
1141 1158
				if (o.getStart() <= m.getStart() && m.getEnd() <= o.getEnd()) {
1142 1159
					startsearchM3 = i;
......
1144 1161
					break;
1145 1162
				}
1146 1163
			}
1147
			//System.out.println("found o: "+o);
1148

  
1164
			// System.out.println("found o: "+o);
1165
			
1149 1166
			if (!matchFound) {
1150 1167
				continue;
1151 1168
			}
1152

  
1169
			
1153 1170
			int start = n.getStart();
1154
			int size = n.getEnd() - start +1;
1155
			//if (size > 0)
1156
			//	size++;
1171
			int size = n.getEnd() - start + 1;
1172
			// if (size > 0)
1173
			// size++;
1157 1174
			// System.out.println("Process focus:"+m+" with maxcontext:"+n+" and anticontext:"+o);
1158 1175
			// System.out.println("NbOccs "+(size));
1159 1176
			int[] positions = new int[size];
1160 1177
			int noOcc = 0;
1161

  
1162
			//System.out.println("positions");
1178
			
1179
			// System.out.println("positions");
1163 1180
			// System.out.println("start: "+(start)+" end:"+n.getEnd());
1164
			for (int position = start; position <= n.getEnd(); position++) { 
1181
			for (int position = start; position <= n.getEnd(); position++) {
1165 1182
				// creates the list of positions, anticontext not yet removed
1166 1183
				positions[noOcc++] = position;
1167
				//System.out.print(" "+position);
1184
				// System.out.print(" "+position);
1168 1185
			}
1169

  
1186
			
1170 1187
			noOcc = 0;
1171 1188
			for (int position : positions) { // cooccurent words positions
1172 1189
				// String signature = allsignatures.get(position);
1173 1190
				String signaturestr = allsignaturesstr.get(position);
1174
				if (o.getStart() <= position && position <= o.getEnd()) { 
1191
				if (o.getStart() <= position && position <= o.getEnd()) {
1175 1192
					// ignore positions in the anticontext positions
1176 1193
					continue;
1177 1194
				}
1178

  
1195
				
1179 1196
				if (!distances.containsKey(signaturestr)) {
1180 1197
					distances.put(signaturestr, 0.0);
1181 1198
					distancesCounts.put(signaturestr, 0);
1182 1199
				}
1183

  
1200
				
1184 1201
				if (counted.containsKey(position)) {
1185 1202
					// ignore positions already counted
1186 1203
					counted.put(position, counted.get(position) + 1);
......
1193 1210
					}
1194 1211
					counts.put(signaturestr, (counts.get(signaturestr)) + 1);
1195 1212
					keepedPosition.add(position);
1196

  
1213
					
1197 1214
					double dist;
1198 1215
					if (position <= m.getStart()) {
1199 1216
						dist = m.getStart() - position;
1200
					} else if (m.getEnd() <= position) {
1217
					}
1218
					else if (m.getEnd() <= position) {
1201 1219
						dist = position - m.getEnd();
1202
					} else { // the n match is in the m match !?
1203
						System.out.println("Warning: the  n match is in the m match ? "+n+ " "+m);
1220
					}
1221
					else { // the n match is in the m match !?
1222
						System.out.println("Warning: the  n match is in the m match ? " + n + " " + m);
1204 1223
						dist = 0;
1205 1224
					}
1206

  
1225
					
1207 1226
					distances.put(signaturestr, (distances.get(signaturestr)) + dist);
1208 1227
					distancesCounts.put(signaturestr, (distancesCounts.get(signaturestr)) + 1);
1209

  
1228
					
1210 1229
				}
1211

  
1230
				
1212 1231
				noOcc++;
1213 1232
			}
1214

  
1233
			
1215 1234
			// System.out.println("nb Occ ignored: "+ignore);
1216 1235
			// System.out.println("nb Occ chevauche: "+chevauche);
1217 1236
		}
1218
		//System.out.println("T counts : "+(System.currentTimeMillis()- time)); //$NON-NLS-1$
1219

  
1237
		// System.out.println("T counts : "+(System.currentTimeMillis()- time)); //$NON-NLS-1$
1238
		
1220 1239
		allsignaturesstr = null; // no more need
1221
		//		counted = null;
1240
		// counted = null;
1222 1241
		return true;
1223 1242
	}
1224

  
1243
	
1225 1244
	/**
1226 1245
	 * Step get matches.
1227 1246
	 *
1228 1247
	 * @return true, if successful
1229
	 * @throws CqiClientException 
1248
	 * @throws CqiClientException
1230 1249
	 */
1231 1250
	public boolean stepGetMatches() throws CqiClientException {
1232 1251
		CQPCorpus corpus = this.getCorpus();
1233 1252
		QueryResult r1 = corpus.query(pQuery, "CoocFocusQuery", false); // keywords positions //$NON-NLS-1$
1234 1253
		QueryResult r2 = corpus.query(contextQuery, "CoocContextFocusQuery", false); // max context //$NON-NLS-1$
1235 1254
		QueryResult r3 = corpus.query(anticontextquery, "CoocAntiContextFocusQuery", false); // no context //$NON-NLS-1$
1236

  
1255
		
1237 1256
		m1 = r1.getMatches();
1238 1257
		numberOfKeyword = m1.size();
1239 1258
		m2 = r2.getMatches();
1240 1259
		m3 = r3.getMatches();
1241

  
1242
		//		System.out.println(query+" M1 size: "+m1.size());
1243
		//		System.out.println(contextquery+" M2 size: "+m2.size());
1244
		//		System.out.println(anticontextquery+" M3 size: "+m3.size());
1260
		
1261
		// System.out.println(query+" M1 size: "+m1.size());
1262
		// System.out.println(contextquery+" M2 size: "+m2.size());
1263
		// System.out.println(anticontextquery+" M3 size: "+m3.size());
1245 1264
		r1.drop();
1246 1265
		r2.drop();
1247 1266
		r3.drop();
1248 1267
		return true;
1249 1268
	}
1250

  
1269
	
1251 1270
	/**
1252 1271
	 * Step get scores.
1253 1272
	 *
......
1256 1275
	 * @throws StatException the stat exception
1257 1276
	 */
1258 1277
	public boolean stepGetScores() throws CqiClientException, StatException {
1259

  
1278
		
1260 1279
		SpecificitiesR specif = new SpecificitiesR(lt);
1261
		//System.out.println("Specif N part: "+specif.getNbrPart()); //$NON-NLS-1$
1262
		//System.out.println("Specif N lines number: "+specif.getSpecificitesIndex().length); //$NON-NLS-1$
1263
		//System.out.println("T specif e: "+(System.currentTimeMillis()- time)); //$NON-NLS-1$
1264
		//specif.toTxt(new File("~/Bureau/coocresults/specif Cooc")); //$NON-NLS-1$
1280
		// System.out.println("Specif N part: "+specif.getNbrPart()); //$NON-NLS-1$
1281
		// System.out.println("Specif N lines number: "+specif.getSpecificitesIndex().length); //$NON-NLS-1$
1282
		// System.out.println("T specif e: "+(System.currentTimeMillis()- time)); //$NON-NLS-1$
1283
		// specif.toTxt(new File("~/Bureau/coocresults/specif Cooc")); //$NON-NLS-1$
1265 1284
		String[] specifrownames = specif.getRowNames().asStringsArray();
1266 1285
		double[][] scores = specif.getScores();
1267
		//System.out.println("Nb specif result: "+specif.getSpecificitesIndex().length);
1268

  
1286
		// System.out.println("Nb specif result: "+specif.getSpecificitesIndex().length);
1287
		
1269 1288
		int iimax = Math.min(specifrownames.length, scores.length);
1270 1289
		for (int ii = 0; ii < iimax; ii++) { // counts.keySet())
1271 1290
			String signaturestr = keysToString.get(specifrownames[ii]);
1272

  
1273
			ArrayList<String> props = new ArrayList<String>();
1291
			
1292
			ArrayList<String> props = new ArrayList<>();
1274 1293
			if (pProperties.size() > 1) {
1275 1294
				String[] splited = specifrownames[ii].split("_", pProperties.size()); //$NON-NLS-1$
1276

  
1295
				
1277 1296
				for (int p = 0; p < pProperties.size(); p++) {
1278 1297
					props.add(splited[p]);
1279 1298
				}
......
1281 1300
			else {
1282 1301
				props.add(specifrownames[ii]);
1283 1302
			}
1284
			//			if(specifrownames[ii].equals("(") || specifrownames[ii].equals(")"))
1285
			//			{
1286
			//				System.out.println("rowname: "+specifrownames[ii]);
1287
			//				System.out.println("props: "+props);
1288
			//				System.out.println("counts: "+counts.get(signaturestr));
1289
			//				System.out.println("speciffreq: "+indexfreqs.get(specifrownames[ii]));
1290
			//				System.out.println("specif score: "+scores[ii][1]);
1291
			//				System.out.println("distance: "+distances.get(signaturestr));
1292
			//				System.out.println("distance count: "+distancescounts.get(signaturestr));
1293
			//			}
1303
			// if(specifrownames[ii].equals("(") || specifrownames[ii].equals(")"))
1304
			// {
1305
			// System.out.println("rowname: "+specifrownames[ii]);
1306
			// System.out.println("props: "+props);
1307
			// System.out.println("counts: "+counts.get(signaturestr));
1308
			// System.out.println("speciffreq: "+indexfreqs.get(specifrownames[ii]));
1309
			// System.out.println("specif score: "+scores[ii][1]);
1310
			// System.out.println("distance: "+distances.get(signaturestr));
1311
			// System.out.println("distance count: "+distancescounts.get(signaturestr));
1312
			// }
1294 1313
			if (counts.containsKey(signaturestr)) {
1295 1314
				CLine cline = new CLine(this, specifrownames[ii], props, counts
1296 1315
						.get(signaturestr), indexfreqs.get(specifrownames[ii]), scores[ii][1],
1297 1316
						((float) (distances.get(signaturestr) / distancesCounts
1298 1317
								.get(signaturestr))) - 1.0f, -1);
1299
				//System.out.println(cline);
... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff