Revision 468

tmp/org.txm.cql2lsa.rcp/.classpath (revision 468)
1
<?xml version="1.0" encoding="UTF-8"?>
2
<classpath>
3
	<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.6"/>
4
	<classpathentry kind="con" path="org.eclipse.pde.core.requiredPlugins">
5
		<accessrules>
6
			<accessrule kind="accessible" pattern="**"/>
7
		</accessrules>
8
	</classpathentry>
9
	<classpathentry kind="src" path="src"/>
10
	<classpathentry kind="output" path="bin"/>
11
</classpath>
0 12

  
tmp/org.txm.cql2lsa.rcp/META-INF/MANIFEST.MF (revision 468)
1
Manifest-Version: 1.0
2
Bundle-ManifestVersion: 2
3
Bundle-Name: CQL2LSA
4
Bundle-SymbolicName: CQL2LSA;singleton:=true
5
Bundle-Version: 1.0.0.qualifier
6
Bundle-Activator: cql2lsarcp.Activator
7
Require-Bundle: org.txm.core;bundle-version="0.7.0",
8
 org.txm.rcp;bundle-version="0.7.8",
9
 org.eclipse.ui,
10
 org.eclipse.core.runtime,
11
 org.eclipse.ui.editors;bundle-version="3.8.100",
12
 org.eclipse.core.expressions;bundle-version="3.4.500",
13
 org.txm.index.core,
14
 org.txm.lexicaltable.core,
15
 org.txm.statsengine.r.core
16
Bundle-RequiredExecutionEnvironment: JavaSE-1.6
17
Bundle-ActivationPolicy: lazy
18
Export-Package: cql2lsarcp,
19
 org.txm.functions.cql2lsa,
20
 org.txm.rcpapplication.commands.function
21
Bundle-Vendor: Textometrie.org
0 22

  
tmp/org.txm.cql2lsa.rcp/.project (revision 468)
1
<?xml version="1.0" encoding="UTF-8"?>
2
<projectDescription>
3
	<name>CQL2LSARCP</name>
4
	<comment></comment>
5
	<projects>
6
	</projects>
7
	<buildSpec>
8
		<buildCommand>
9
			<name>org.eclipse.jdt.core.javabuilder</name>
10
			<arguments>
11
			</arguments>
12
		</buildCommand>
13
		<buildCommand>
14
			<name>org.eclipse.pde.ManifestBuilder</name>
15
			<arguments>
16
			</arguments>
17
		</buildCommand>
18
		<buildCommand>
19
			<name>org.eclipse.pde.SchemaBuilder</name>
20
			<arguments>
21
			</arguments>
22
		</buildCommand>
23
	</buildSpec>
24
	<natures>
25
		<nature>org.eclipse.pde.PluginNature</nature>
26
		<nature>org.eclipse.jdt.core.javanature</nature>
27
	</natures>
28
</projectDescription>
0 29

  
tmp/org.txm.cql2lsa.rcp/src/cql2lsarcp/Activator.java (revision 468)
1
package cql2lsarcp;
2

  
3
import org.eclipse.jface.resource.ImageDescriptor;
4
import org.eclipse.ui.plugin.AbstractUIPlugin;
5
import org.osgi.framework.BundleContext;
6

  
7
/**
8
 * The activator class controls the plug-in life cycle
9
 */
10
public class Activator extends AbstractUIPlugin {
11

  
12
	// The plug-in ID
13
	public static final String PLUGIN_ID = "CQL2LSA"; //$NON-NLS-1$
14

  
15
	// The shared instance
16
	private static Activator plugin;
17
	
18
	/**
19
	 * The constructor
20
	 */
21
	public Activator() {
22
	}
23

  
24
	/*
25
	 * (non-Javadoc)
26
	 * @see org.eclipse.ui.plugin.AbstractUIPlugin#start(org.osgi.framework.BundleContext)
27
	 */
28
	public void start(BundleContext context) throws Exception {
29
		super.start(context);
30
		plugin = this;
31
	}
32

  
33
	/*
34
	 * (non-Javadoc)
35
	 * @see org.eclipse.ui.plugin.AbstractUIPlugin#stop(org.osgi.framework.BundleContext)
36
	 */
37
	public void stop(BundleContext context) throws Exception {
38
		plugin = null;
39
		super.stop(context);
40
	}
41

  
42
	/**
43
	 * Returns the shared instance
44
	 *
45
	 * @return the shared instance
46
	 */
47
	public static Activator getDefault() {
48
		return plugin;
49
	}
50

  
51
	/**
52
	 * Returns an image descriptor for the image file at the given
53
	 * plug-in relative path
54
	 *
55
	 * @param path the path
56
	 * @return the image descriptor
57
	 */
58
	public static ImageDescriptor getImageDescriptor(String path) {
59
		return imageDescriptorFromPlugin(PLUGIN_ID, path);
60
	}
61
}
0 62

  
tmp/org.txm.cql2lsa.rcp/src/org/txm/functions/cql2lsa/ExpII.java (revision 468)
1
package org.txm.functions.cql2lsa;
2

  
3
import java.io.BufferedReader;
4
import java.io.BufferedWriter;
5
import java.io.File;
6
import java.io.FileInputStream;
7
import java.io.FileOutputStream;
8
import java.io.IOException;
9
import java.io.InputStreamReader;
10
import java.io.OutputStreamWriter;
11
import java.io.PrintWriter;
12
import java.util.ArrayList;
13
import java.util.Collection;
14
import java.util.Collections;
15
import java.util.Comparator;
16
import java.util.HashMap;
17
import java.util.LinkedHashMap;
18
import java.util.List;
19
import java.util.Map;
20

  
21
import org.eclipse.core.runtime.IAdaptable;
22
import org.eclipse.jface.resource.ImageDescriptor;
23
import org.eclipse.ui.model.IWorkbenchAdapter;
24
import org.txm.core.messages.TXMCoreMessages;
25
import org.txm.core.results.TXMResult;
26
import org.txm.functions.ProgressWatcher;
27
import org.txm.functions.TXMCommand;
28
import org.txm.index.core.functions.LineComparator.SortMode;
29
import org.txm.index.core.messages.IndexCoreMessages;
30
import org.txm.lexicaltable.core.functions.LexicalTable;
31
import org.txm.lexicaltable.core.statsengine.r.data.LexicalTableImpl;
32
import org.txm.rcp.Messages;
33
import org.txm.searchengine.cqp.clientExceptions.CqiClientException;
34
import org.txm.searchengine.cqp.clientExceptions.InvalidCqpIdException;
35
import org.txm.searchengine.cqp.corpus.Corpus;
36
import org.txm.searchengine.cqp.corpus.QueryResult;
37
import org.txm.searchengine.cqp.corpus.query.Match;
38
import org.txm.searchengine.cqp.corpus.query.Query;
39
import org.txm.searchengine.cqp.serverException.CqiServerError;
40
import org.txm.statsengine.r.core.exceptions.RWorkspaceException;
41
import org.txm.utils.logger.Log;
42

  
43
public class ExpII extends TXMCommand implements IAdaptable {
44
	Corpus corpus;
45
	List<String> texts;
46
	int[] textBoundaries;
47

  
48
	LinkedHashMap<String, QueryIndexLine> lines = new LinkedHashMap<String, QueryIndexLine>();
49

  
50
	/** The writer. */
51
	private OutputStreamWriter writer;
52

  
53
	public ExpII(Corpus corpus) throws CqiClientException, IOException, CqiServerError, InvalidCqpIdException {
54
		this.corpus = corpus;
55
		texts = corpus.getTextsID();
56
		textBoundaries = corpus.getTextEndLimits(); 
57

  
58
	}
59

  
60
	public int getT() {
61
		int t = 0;
62
		for (QueryIndexLine line : lines.values()) {
63
			t += line.getFrequency();
64
		}
65
		return t;
66
	}
67

  
68
	public int getFmin() {
69
		int t = 999999999;
70
		for (QueryIndexLine line : lines.values()) {
71
			int f = line.getFrequency();
72
			if (f < t) t = f;
73
		}
74
		return t;
75
	}
76

  
77
	public int getFmax() {
78
		int t = 0;
79
		for (QueryIndexLine line : lines.values()) {
80
			int f = line.getFrequency();
81
			if (f > t) t = f;
82
		}
83
		return t;
84
	}
85

  
86
	public int getV() {
87
		return lines.values().size();
88
	}
89

  
90
	public String getName() {
91
		return corpus.getName();
92
	}
93

  
94
	public List<String> getTextNames() {
95
		return texts;
96
	}
97

  
98
	public TXMResult getParent() {
99
		return corpus;
100
	}
101

  
102
	public Corpus getCorpus() {
103
		return corpus;
104
	}
105

  
106
	public Collection<QueryIndexLine> getLines() {
107
		return lines.values();
108
	}
109

  
110
	public LinkedHashMap<String, QueryIndexLine> getLinesHash() {
111
		return lines;
112
	}
113

  
114
	int multi = 1;
115
	public void sortLines(SortMode mode, boolean revert) {
116

  
117
		multi = 1;
118
		if (revert) multi = -1;
119
		List<Map.Entry<String, QueryIndexLine>> entries =
120
				new ArrayList<Map.Entry<String, QueryIndexLine>>(lines.entrySet());
121

  
122
		if (mode == SortMode.FREQUNIT) {
123
			Collections.sort(entries, new Comparator<Map.Entry<String, QueryIndexLine>>() {
124
				public int compare(Map.Entry<String, QueryIndexLine> a, Map.Entry<String, QueryIndexLine> b){
125
					int ret = multi * (a.getValue().getFrequency() - b.getValue().getFrequency());
126
					if (ret == 0) {
127
						return multi * a.getValue().getName().compareTo(b.getValue().getName());
128
					}
129
					return ret;
130
				}
131
			});
132
		} else if (mode == SortMode.FREQ) {
133
			Collections.sort(entries, new Comparator<Map.Entry<String, QueryIndexLine>>() {
134
				public int compare(Map.Entry<String, QueryIndexLine> a, Map.Entry<String, QueryIndexLine> b){
135
					return multi * (a.getValue().getFrequency() - b.getValue().getFrequency());
136
				}
137
			});
138
		} else if (mode == SortMode.UNIT) {
139
			Collections.sort(entries, new Comparator<Map.Entry<String, QueryIndexLine>>() {
140
				public int compare(Map.Entry<String, QueryIndexLine> a, Map.Entry<String, QueryIndexLine> b){
141
					return multi * a.getValue().getName().compareTo(b.getValue().getName());
142
				}
143
			});
144
		} else if (mode == SortMode.UNITFREQ) {
145
			Collections.sort(entries, new Comparator<Map.Entry<String, QueryIndexLine>>() {
146
				public int compare(Map.Entry<String, QueryIndexLine> a, Map.Entry<String, QueryIndexLine> b){
147
					int ret = multi * a.getValue().getName().compareTo(b.getValue().getName());
148
					if (ret == 0) {
149
						return multi * (a.getValue().getFrequency() - b.getValue().getFrequency());
150
					}
151
					return ret;
152
				}
153
			});
154
		}
155

  
156
		LinkedHashMap<String, QueryIndexLine> sortedMap = new LinkedHashMap<String, QueryIndexLine>();
157
		for (Map.Entry<String, QueryIndexLine> entry : entries) {
158
			sortedMap.put(entry.getKey(), entry.getValue());
159
		}
160

  
161
		lines = sortedMap;
162
	}
163

  
164
	HashMap<String, int[]> keywordStartPositions;
165
	HashMap<String, int[]> keywordEndPositions;
166

  
167
	public void compute(File queriesFile, File keywordFile, File outputDir) throws CqiClientException, IOException, CqiServerError {
168
		System.out.println("Starting ExpII");
169
		
170
		File outputFile = new File(outputDir, "doc_word_freq_keyword_2.txt");
171
		File lexiconFile = new File(outputDir, "lexicon_2.txt");
172
		
173
		PrintWriter writer = new PrintWriter(new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outputFile), "UTF-8")));
174
		
175
		keywordStartPositions = new HashMap<String, int[]>();
176
		keywordEndPositions = new HashMap<String, int[]>();
177
		HashMap<String, String> keywordQueriestoName = new HashMap<String, String>();
178
		int idx_lexicon_counter = 0;
179
		
180
		// assos query et idx
181
		// si queries est dans keywords alors, pas la peine de calculer keyword, parce que la query est un keyword
182
		LinkedHashMap<String, Integer> idxLexicon = new LinkedHashMap<String, Integer>();
183
		LinkedHashMap<String, int[]> keywordsMaxCountsLexicon = new LinkedHashMap<String, int[]>(); 
184
		
185
		BufferedReader keywordFileReader = new BufferedReader(new InputStreamReader(
186
				new FileInputStream(keywordFile), "UTF-8")); //$NON-NLS-1$
187
		ArrayList<String> keywordLines = new ArrayList<String>();
188
		String l = keywordFileReader.readLine();
189
		while (l != null) {
190
			keywordLines.add(l);
191
			l = keywordFileReader.readLine();
192
		}
193
		keywordFileReader.close();
194
		System.out.println("Number of keywords lines: "+keywordLines.size());
195
		monitor.beginTask("Querying keywords...", keywordLines.size());
196

  
197
		int nkeyword = 0;
198
		for (String line : keywordLines) {
199
			String[] split = line.split("=", 2); //$NON-NLS-1$
200
			if (split.length == 2) {
201
				Query q = new Query(split[1]);
202
				keywordQueriestoName.put(split[1], split[0]);
203
				QueryResult result = corpus.query(q, "K"+nkeyword++, false); //$NON-NLS-1$
204
				keywordStartPositions.put(split[1], result.getStarts());
205
				keywordEndPositions.put(split[1], result.getEnds());
206
				keywordsMaxCountsLexicon.put(split[1], new int[textBoundaries.length]);
207
				idxLexicon.put(split[1], idx_lexicon_counter++);
208
				monitor.worked(1);
209
			}
210
		}
211
		keywordFileReader.close();
212
		
213
		BufferedReader queriesFileReader = new BufferedReader(new InputStreamReader(new FileInputStream(queriesFile), "UTF-8")); //$NON-NLS-1$
214
		ArrayList<String> lines = new ArrayList<String>();
215
		l = queriesFileReader.readLine();
216
		while (l != null) {
217
			lines.add(l);
218
			l = queriesFileReader.readLine();
219
		}
220
		queriesFileReader.close();
221

  
222
		System.out.println("Number of lemma lines: "+lines.size());
223
		monitor.beginTask("Querying...", lines.size());
224

  
225
		int nquery = 0;
226
		for (String line : lines) {
227
			String[] split = line.split("=", 2); //$NON-NLS-1$
228
			if (split.length == 2) {
229
				if (hasLine(split[0])) {
230
					System.out.println(TXMCoreMessages.QueryIndex_2+line);
231
				} else {
232
					QueryResult result = corpus.query(new Query(split[1]), "Q"+nquery++, false); //$NON-NLS-1$
233
					int[] starts = result.getStarts();
234
					int[] ends =  result.getEnds();
235

  
236
					int[] counts = new int[textBoundaries.length];
237
					int count = 0;
238
					int noText = 0;
239
					int endOfCurrentText = textBoundaries[noText];
240
					for (int i : starts) {
241
						while (i >= endOfCurrentText) {
242
							if (noText >= textBoundaries.length) break;
243
							counts[noText] = count;
244
							noText++;
245
							if (noText >= textBoundaries.length) break;
246
							endOfCurrentText = textBoundaries[noText];
247
							counts[noText] = 0;
248
							count = 0;
249
						}
250
						count++;
251
					}
252
					counts[noText] = count;
253

  
254
					if (idxLexicon.containsKey(split[1])) { // the query is already computed !!
255
						for (noText = 0 ; noText < textBoundaries.length ; noText++) {
256
							if (counts[noText] > 0) {
257
								//System.out.println("MCL-"+split[1]+"\t"+noText+"\t"+idxLexicon.get(split[1])+"\t"+counts[noText]+"\t"+idxLexicon.get(split[1]));
258
								writer.println(noText+"\t"+idxLexicon.get(split[1])+"\t"+counts[noText]+"\t"+idxLexicon.get(split[1]));
259
							}
260
						}
261
					} else { // test if match is covered by keywords
262
						idxLexicon.put(split[1], idx_lexicon_counter++); // put Lemma query
263
						
264
						//int max_sum_f = 0;
265
						int[] max_f = new int[textBoundaries.length];
266
						String[] max_key = new String[textBoundaries.length];
267
						
268
						for (String key : keywordEndPositions.keySet()) {
269
							int[] keyMaxValues = keywordsMaxCountsLexicon.get(key);
270
							int[] f = covered(starts, ends, keywordStartPositions.get(key), keywordEndPositions.get(key));
271
							for (noText = 0 ; noText < textBoundaries.length ; noText++) {
272
								if (f[noText] > max_f[noText]) {
273
									max_f[noText] = f[noText];
274
									max_key[noText] = key;
275
								}
276
								if (f[noText] > keyMaxValues[noText]) {
277
									keyMaxValues[noText] = f[noText];
278
								}
279
							}
280
						}
281
//						System.out.println("max_f="+Arrays.toString(counts));
282
//						System.out.println("max_f="+Arrays.toString(max_f));
283
						for (noText = 0 ; noText < textBoundaries.length ; noText++) {
284
							int F = counts[noText] - max_f[noText];
285
							if (F > 0) {
286
								//System.out.println("L-"+split[1]+"\t"+noText+"\t"+idxLexicon.get(split[1])+"\t"+F+"\t0");
287
								writer.println(noText+"\t"+idxLexicon.get(split[1])+"\t"+F+"\t0");
288
							}
289
						}
290
					}				
291
				}
292
			}
293
			monitor.worked(1);
294
		}
295
		
296
		monitor.setMessage("Finalizing doc_word_freq_2.txt file...");
297
		for (String key : keywordsMaxCountsLexicon.keySet()) {
298
			int[] keyMaxValues = keywordsMaxCountsLexicon.get(key);
299
			for (int noText = 0 ; noText < textBoundaries.length ; noText++) {
300
				int F = keyMaxValues[noText];
301
				if (F > 0) {
302
					//System.out.println("MC-"+key+"\t"+noText+"\t"+idxLexicon.get(key)+"\t"+F+"\t"+idxLexicon.get(key));
303
					writer.println(noText+"\t"+idxLexicon.get(key)+"\t"+F+"\t"+idxLexicon.get(key));
304
				}
305
			}
306
		}
307
		queriesFileReader.close();
308
		writer.close();
309
		
310
		monitor.setMessage("Writing lexicon file...");
311
		writer = new PrintWriter(new BufferedWriter(new OutputStreamWriter(new FileOutputStream(lexiconFile), "UTF-8")));
312
		for (String query : idxLexicon.keySet()) {
313
			String name = keywordQueriestoName.get(query);
314
			if (name != null) {
315
				writer.println(keywordQueriestoName.get(query));
316
			} else {
317
				writer.println(query);
318
			}
319
			
320
		}
321
		writer.close();
322
		
323
		System.out.println("Done, result saved in: \n - "+outputFile.getAbsolutePath()+"\n - "+lexiconFile.getAbsolutePath());
324
	}
325

  
326
	/**
327
	 * Write all the lines on a writer.
328
	 *
329
	 * @param outfile the outfile
330
	 * @param encoding the encoding
331
	 * @param colseparator the colseparator
332
	 * @param txtseparator the txtseparator
333
	 * @return true, if successful
334
	 */
335
	public boolean toTxt(File outfile, String encoding, String colseparator, String txtseparator) {
336
		try {
337
			toTxt(outfile, 0, lines.size(), encoding, colseparator, txtseparator);
338
		} catch (Exception e) {
339
			System.err.println(IndexCoreMessages.Index_7 + Log.toString(e));
340
			return false;
341
		}
342
		return true;
343
	}
344

  
345
	/**
346
	 * Write the lines between from and to on a writer.
347
	 *
348
	 * @param outfile the outfile
349
	 * @param from The first line to be written
350
	 * @param to The last line to be writen
351
	 * @param encoding the encoding
352
	 * @param colseparator the colseparator
353
	 * @param txtseparator the txtseparator
354
	 * @throws CqiClientException the cqi client exception
355
	 * @throws IOException Signals that an I/O exception has occurred.
356
	 */
357
	public void toTxt(File outfile, int from, int to, String encoding, String colseparator, String txtseparator)
358
			throws CqiClientException, IOException {
359
		// NK: writer declared as class attribute to perform a clean if the operation is interrupted
360
		this.writer = new OutputStreamWriter(new FileOutputStream(outfile),
361
				encoding); 
362
		String header = "Queries"; //$NON-NLS-1$
363
		header = txtseparator+ header.substring(0, header.length() - 1) +txtseparator;
364
		header += colseparator+ txtseparator+ "F" + txtseparator; //$NON-NLS-1$
365

  
366
		for (String t : texts)
367
			header += colseparator + txtseparator+ t.replace(txtseparator, txtseparator+txtseparator)+txtseparator; 
368
		header += "\n"; //$NON-NLS-1$
369
		writer.write(header);
370

  
371
		// for(Line ligne: lines)
372
		for (String name : lines.keySet()) {
373
			QueryIndexLine ligne = lines.get(name);
374
			writer.write(txtseparator+ ligne.getName().replace(txtseparator, txtseparator+txtseparator)+ txtseparator + colseparator + ligne.getFrequency()); 
375

  
376
			for (int j = 0; j < texts.size(); j++)
377
				writer.write(colseparator + ligne.getFrequency(j)); 
378
			writer.write("\n"); //$NON-NLS-1$
379
		}
380
		writer.flush();
381
		writer.close();
382
	}
383

  
384
	/**
385
	 * Only one query result and uses texts boundaries to count frequencies for each text
386
	 * @param name
387
	 * @param query
388
	 * @return
389
	 * @throws CqiClientException
390
	 */
391
	public QueryIndexLine addLine(String name, Query query) throws CqiClientException {
392
		if (lines.containsKey(name)) return null;
393
		QueryResult qresult = corpus.query(query, "tmp", true); //$NON-NLS-1$
394
		//System.out.println(query.toString()+" "+qresult.getNMatch());
395
		int[] counts = new int[textBoundaries.length];
396
		int count = 0;
397
		int noText = 0;
398
		int endOfCurrentText = textBoundaries[noText];
399
		for (Match m : qresult.getMatches()) {
400
			while (m.getStart() >= endOfCurrentText) {
401
				//System.out.println(m.getStart() +">="+endOfCurrentText);
402
				if (noText >= textBoundaries.length) break;
403
				//System.out.println("Text: "+texts.get(noText)+" count="+count+" notext="+noText);
404
				counts[noText] = count;
405
				noText++;
406
				if (noText >= textBoundaries.length) break;
407
				endOfCurrentText = textBoundaries[noText];
408
				counts[noText] = 0;
409
				count = 0;
410
			}
411
			count++;
412
		}
413
		//System.out.println(noText +"<?"+textBoundaries.length+" count="+count);
414
		if (noText < textBoundaries.length) // last text
415
			counts[noText] = count;
416

  
417
		qresult.drop();
418

  
419
		QueryIndexLine line = new QueryIndexLine(name, query, null);
420
		line.setFrequencies(counts);
421
		lines.put(name, line);
422
		return line;
423
	}
424

  
425
	public LexicalTable toLexicalTable() {
426

  
427
		int npart = texts.size();
428
		int[][] freqs = new int[lines.size()][npart];
429
		String[] rownames = new String[lines.size()];
430
		String[] colnames = new String[npart];
431

  
432
		for (int i = 0 ; i < lines.size() ; i++) {
433
			QueryIndexLine line = lines.get(i);
434
			int[] linefreqs = line.getFreqs();
435
			rownames[i] = line.getName();
436
			for (int j = 0 ; j < npart ; j++) {
437
				freqs[i][j] = linefreqs[j];
438
			}
439
		}
440
		int j = 0;
441
		for (String t : texts) {
442
			colnames[j] = t;
443
			j++;
444
		}
445

  
446
		try {
447
			LexicalTable lt = new LexicalTable(corpus, corpus.getProperty("word"), new LexicalTableImpl(freqs, rownames, colnames));
448
			return lt;
449
		} catch (RWorkspaceException e) {
450
			org.txm.utils.logger.Log.printStackTrace(e);
451
		} catch (CqiClientException e) {
452
			org.txm.utils.logger.Log.printStackTrace(e);
453
		}
454
		return null;
455
	}
456

  
457
	public boolean removeLine(String name) {
458
		if (lines.containsKey(name)) {
459
			this.acquireSemaphore();
460
			lines.remove(name);
461
			this.releaseSemaphore();
462
			return true;
463
		} else {
464
			return false;
465
		}
466
	}
467

  
468
	public boolean hasLine(String name) {
469
		return lines.containsKey(name);
470
	}
471

  
472
	public ArrayList<QueryIndexLine> getLines(int from, int to) {
473
		if (lines.size() == 0) return new ArrayList<QueryIndexLine>();
474

  
475
		if (from < 0) from = 0;
476
		if (to < 0) to = 0;
477
		if (to > lines.size()) to = lines.size();
478
		if (from > to) from = to - 1;
479
		ArrayList<QueryIndexLine> tmp = new ArrayList<QueryIndexLine>();
480
		int i = 0;
481
		for (QueryIndexLine line : lines.values()) {
482
			if (i >= from && i < to) {
483
				tmp.add(line);
484
			}
485
			i++;
486
		}
487

  
488
		return tmp;
489
	}
490

  
491
	@Override
492
	public boolean delete() {
493
		if (corpus != null) return corpus.removeResult(this);
494
		return false;
495
	}
496

  
497
	@Override
498
	public String[] getExportTXTExtensions() {
499
		return new String[]{"*.csv"};
500
	}
501

  
502
	@Override
503
	public void clean() {
504
		try {
505
			this.writer.flush();
506
			this.writer.close();	
507
		} catch (IOException e) {
508
			// TODO Auto-generated catch block
509
			org.txm.utils.logger.Log.printStackTrace(e);
510
		}
511
	}
512

  
513
	@Override
514
	public Object getAdapter(Class adapterType) {
515
		if (adapterType == IWorkbenchAdapter.class)
516
			return ExpIIAAdapter;
517
		return null;
518
	}
519

  
520
	/** The WordCloud adapter. */
521
	private static IWorkbenchAdapter ExpIIAAdapter = new IWorkbenchAdapter() {
522

  
523
		@Override
524
		public Object[] getChildren(Object o) {
525
			return new Object[0];
526
		}
527

  
528
		@Override
529
		public ImageDescriptor getImageDescriptor(Object object) {
530
			return null;
531
		}
532

  
533
		@Override
534
		public String getLabel(Object o) {
535
			return ((ExpII) o).getName();
536
		}
537

  
538
		@Override
539
		public Object getParent(Object o) {
540
			return ((ExpII) o).getCorpus();
541
		}
542
	};
543

  
544

  
545

  
546
	private int[] covered(int[] starts, int[] ends, int[] keywordStarts, int[] keywordEnds) {
547
		int[] counts = new int[textBoundaries.length];
548
		int noText = 0;
549
		int endOfCurrentText = textBoundaries[noText];
550
		int f = 0;
551
		//		printMatchs(starts, ends, keywordStarts, keywordEnds);
552

  
553
		int i_keyword = 0;
554
		//System.out.println("len i="+starts.length+ " len i_keyword="+keywordStarts.length);
555
		for (int i = 0 ; i < starts.length ; ) {
556

  
557
			//System.out.println("i="+i+ " i_keyword="+i_keyword);
558
			if (i_keyword >= keywordStarts.length) break; // no more keyword positions
559
			//System.out.println(""+starts[i]+"->"+ends[i]+" : "+keywordStarts[i_keyword]+"->"+keywordEnds[i_keyword]);
560

  
561
			while (starts[i] >= endOfCurrentText) {
562
				if (noText >= textBoundaries.length) break;
563
				counts[noText] = f;
564
				noText++;
565
				if (noText >= textBoundaries.length) break;
566
				endOfCurrentText = textBoundaries[noText];
567
				counts[noText] = 0;
568
				f = 0;
569
			}
570
			if (starts[i] < keywordStarts[i_keyword]) {
571
				//System.out.println(" match start is not covered");
572
				i++;
573
			} else if (starts[i] > keywordEnds[i_keyword]) {
574
				//System.out.println(" next keyword");
575
				i_keyword++; // see next keyword match
576
			} else if (ends[i] <= keywordEnds[i_keyword]) {
577
				//System.out.println(" next match");
578
				//System.out.println(">>>> "+starts[i]+"->"+ends[i]+" : "+keywordStarts[i_keyword]+"->"+keywordEnds[i_keyword]);
579
				i++; // OK, test next match
580
				f++;
581
			} else {
582
				//System.out.println(" match end is not covered");
583
				i++; // OK, test next match
584
			}
585
		}
586
		counts[noText] = f;
587
		return counts;
588
	}
589

  
590
	private static void printMatchs(int[] starts, int[] ends, int[] keywordStarts, int[] keywordEnds) {
591
		int min = 999999999;
592
		int max = 0;
593
		for (int i : starts) if (i < min) min = i;
594
		for (int i : keywordStarts) if (i < min) min = i;
595
		for (int i : ends) if (i > max) max = i;
596
		for (int i : keywordEnds) if (i > max) max = i;
597
		for (int i = min ; i <= max ; i++) System.out.print(""+i+"\t");
598
		System.out.println();
599
		int j = 0;
600
		int k = 0;
601
		boolean inout = false;
602
		for (int i = min ; i <= max ; i++) {
603
			if (j < starts.length && starts[j] == i) {
604
				if (k < ends.length && ends[k] == i) {
605
					System.out.print(""+starts[j++]+"><\t");
606
					k++;
607
				} else {
608
					System.out.print(""+starts[j++]+">\t");
609
					inout = true;
610
				}
611
			} else if (k < ends.length && ends[k] == i) {
612
				System.out.print("<"+ends[k++]+"\t");
613
				inout = false;
614
			} else {
615
				if (inout) {
616
					System.out.print("-\t");
617
				} else {
618
					System.out.print("\t");
619
				}
620
			}
621
		}
622
		System.out.println();
623

  
624
		j = 0;
625
		k = 0;
626
		inout = false;
627
		for (int i = min ; i <= max ; i++) {
628
			if (j < keywordStarts.length && keywordStarts[j] == i) {
629
				if (k < keywordEnds.length && keywordEnds[k] == i) {
630
					System.out.print(""+keywordStarts[j++]+"><\t");
631
					k++;
632
				} else {
633
					System.out.print(""+keywordStarts[j++]+">\t");
634
					inout = true;
635
				}
636
			} else if (k < keywordEnds.length && keywordEnds[k] == i) {
637
				System.out.print("<"+keywordEnds[k++]+"\t");
638
				inout = false;
639
			} else {
640
				if (inout) {
641
					System.out.print("-\t");
642
				} else {
643
					System.out.print("\t");
644
				}
645

  
646
			}
647
		}
648
		System.out.println();
649
	}
650

  
651
	public static void main(String[] args) {
652
		//		int starts[] = {1, 10, 16, 24};
653
		//		int ends[] = {2, 12, 16, 30};
654
		//		int kstarts[] = {5, 9, 16, 22};
655
		//		int kends[] = {8, 12, 16, 35};
656
		//		System.out.println(covered(starts, ends, kstarts, kends));
657

  
658
		//		int starts[] = {10, 15 ,24, 50};
659
		//		int ends[] = {12, 16 ,30, 60};
660
		//		int kstarts[] = {5, 9, 14, 20};
661
		//		int kends[] = {8, 12, 18, 40};
662
		//		System.out.println(covered(starts, ends, kstarts, kends));
663

  
664
		//		int starts[] = {1, 10, 15 ,24};
665
		//		int ends[] = {2, 12, 16 ,30};
666
		//		int kstarts[] = {5, 9, 14, 20};
667
		//		int kends[] = {8, 12, 18, 40};
668
		//		System.out.println(covered(starts, ends, kstarts, kends));
669

  
670
		//		int starts[] = {4, 10, 16};
671
		//		int ends[] = {5, 11, 17};
672
		//		int kstarts[] = {3, 11, 15};
673
		//		int kends[] = {8, 13, 20};
674
		//		System.out.println(covered(starts, ends, kstarts, kends));
675

  
676
		int starts[] = {4, 10, 16, 20};
677
		int ends[] = {5, 11, 17, 22};
678
		int kstarts[] = {3, 11, 15};
679
		int kends[] = {8, 13, 20};
680
		//System.out.println(covered(starts, ends, kstarts, kends));
681
	}
682

  
683
	@Override
684
	public boolean compute(ProgressWatcher watcher) throws Exception {
685
		System.out.println("ExpII.compute(monitor): not implemented.");
686
		return false;
687
	}
688

  
689
	@Override
690
	public String getSimpleName() {
691
		return "ExpII";
692
	}
693

  
694
	@Override
695
	public String getDetails() {
696
		return texts.toString();
697
	}
698
}
0 699

  
tmp/org.txm.cql2lsa.rcp/src/org/txm/functions/cql2lsa/package.html (revision 468)
1
<html>
2
<body>
3
<p>Prototype of Query index.</p>
4
</body>
5
</html>
0 6

  
tmp/org.txm.cql2lsa.rcp/src/org/txm/functions/cql2lsa/ExpI.java (revision 468)
1
package org.txm.functions.cql2lsa;
2

  
3
import java.io.BufferedReader;
4
import java.io.File;
5
import java.io.FileInputStream;
6
import java.io.FileOutputStream;
7
import java.io.IOException;
8
import java.io.InputStreamReader;
9
import java.io.OutputStreamWriter;
10
import java.util.ArrayList;
11
import java.util.Collection;
12
import java.util.Collections;
13
import java.util.Comparator;
14
import java.util.LinkedHashMap;
15
import java.util.List;
16
import java.util.Map;
17

  
18
import org.eclipse.core.runtime.IAdaptable;
19
import org.eclipse.jface.resource.ImageDescriptor;
20
import org.eclipse.ui.model.IWorkbenchAdapter;
21
import org.txm.core.messages.TXMCoreMessages;
22
import org.txm.functions.ProgressWatcher;
23
import org.txm.functions.TXMCommand;
24
import org.txm.index.core.functions.LineComparator.SortMode;
25
import org.txm.index.core.messages.IndexCoreMessages;
26
import org.txm.lexicaltable.core.statsengine.r.data.LexicalTableImpl;
27
import org.txm.rcp.IImageKeys;
28
import org.txm.searchengine.cqp.clientExceptions.CqiClientException;
29
import org.txm.searchengine.cqp.clientExceptions.InvalidCqpIdException;
30
import org.txm.searchengine.cqp.corpus.Corpus;
31
import org.txm.searchengine.cqp.corpus.QueryResult;
32
import org.txm.searchengine.cqp.corpus.query.Match;
33
import org.txm.searchengine.cqp.corpus.query.Query;
34
import org.txm.searchengine.cqp.serverException.CqiServerError;
35
import org.txm.statsengine.r.core.exceptions.RWorkspaceException;
36
import org.txm.utils.logger.Log;
37

  
38
public class ExpI extends TXMCommand implements IAdaptable {
39
	Corpus corpus;
40
	List<String> texts;
41
	int[] textBoundaries;
42

  
43
	LinkedHashMap<String, QueryIndexLine> lines = new LinkedHashMap<String, QueryIndexLine>();
44

  
45
	/** The writer. */
46
	private OutputStreamWriter writer;
47

  
48
	public ExpI(Corpus corpus) throws CqiClientException, IOException, CqiServerError, InvalidCqpIdException {
49
		this.corpus = corpus;
50
		//System.out.println("get text ids");
51
		texts = corpus.getTextsID();
52
		//System.out.println( "init texts: "+texts);
53
		//System.out.println("get text limits: "+texts.size());
54
		textBoundaries = corpus.getTextEndLimits(); 
55
		//System.out.println("rdy: "+textBoundaries.length);
56
//		System.out.println(Arrays.toString(textBoundaries));
57
//		System.out.println("length: "+textBoundaries.length);
58
//		System.out.println(texts);
59
//		System.out.println("length: "+texts.size());
60
	}
61

  
62
	public String getName() {
63
		return corpus.getName();
64
	}
65
	
66
	public List<String> getTextNames() {
67
		return texts;
68
	}
69

  
70
	public org.txm.core.results.TXMResult getParent() {
71
		return corpus;
72
	}
73

  
74
	public Corpus getCorpus() {
75
		return corpus;
76
	}
77

  
78
	public Collection<QueryIndexLine> getLines() {
79
		return lines.values();
80
	}
81

  
82
	public LinkedHashMap<String, QueryIndexLine> getLinesHash() {
83
		return lines;
84
	}
85

  
86
	int multi = 1;
87
	public void sortLines(SortMode mode, boolean revert) {
88

  
89
		multi = 1;
90
		if (revert) multi = -1;
91
		List<Map.Entry<String, QueryIndexLine>> entries =
92
				new ArrayList<Map.Entry<String, QueryIndexLine>>(lines.entrySet());
93

  
94
		if (mode == SortMode.FREQUNIT) {
95
			Collections.sort(entries, new Comparator<Map.Entry<String, QueryIndexLine>>() {
96
				public int compare(Map.Entry<String, QueryIndexLine> a, Map.Entry<String, QueryIndexLine> b){
97
					int ret = multi * (a.getValue().getFrequency() - b.getValue().getFrequency());
98
					if (ret == 0) {
99
						return multi * a.getValue().getName().compareTo(b.getValue().getName());
100
					}
101
					return ret;
102
				}
103
			});
104
		} else if (mode == SortMode.FREQ) {
105
			Collections.sort(entries, new Comparator<Map.Entry<String, QueryIndexLine>>() {
106
				public int compare(Map.Entry<String, QueryIndexLine> a, Map.Entry<String, QueryIndexLine> b){
107
					return multi * (a.getValue().getFrequency() - b.getValue().getFrequency());
108
				}
109
			});
110
		} else if (mode == SortMode.UNIT) {
111
			Collections.sort(entries, new Comparator<Map.Entry<String, QueryIndexLine>>() {
112
				public int compare(Map.Entry<String, QueryIndexLine> a, Map.Entry<String, QueryIndexLine> b){
113
					return multi * a.getValue().getName().compareTo(b.getValue().getName());
114
				}
115
			});
116
		} else if (mode == SortMode.UNITFREQ) {
117
			Collections.sort(entries, new Comparator<Map.Entry<String, QueryIndexLine>>() {
118
				public int compare(Map.Entry<String, QueryIndexLine> a, Map.Entry<String, QueryIndexLine> b){
119
					int ret = multi * a.getValue().getName().compareTo(b.getValue().getName());
120
					if (ret == 0) {
121
						return multi * (a.getValue().getFrequency() - b.getValue().getFrequency());
122
					}
123
					return ret;
124
				}
125
			});
126
		}
127

  
128
		LinkedHashMap<String, QueryIndexLine> sortedMap = new LinkedHashMap<String, QueryIndexLine>();
129
		for (Map.Entry<String, QueryIndexLine> entry : entries) {
130
			sortedMap.put(entry.getKey(), entry.getValue());
131
		}
132

  
133
		lines = sortedMap;
134
	}
135

  
136
	public void addLinesFromFile(File propFile) throws CqiClientException, IOException {
137
		BufferedReader reader = new BufferedReader(new InputStreamReader(
138
				new FileInputStream(propFile), "UTF-8")); //$NON-NLS-1$
139
		ArrayList<String> lines = new ArrayList<String>();
140
		String l = reader.readLine();
141
		while (l != null) {
142
			lines.add(l);
143
			l = reader.readLine();
144
		}
145
		reader.close();
146
		
147
		System.out.println("Number of query lines: "+lines.size());
148
		monitor.beginTask("Querying...", lines.size());
149
		
150
		for (String line : lines) {
151
			String[] split = line.split("=", 2); //$NON-NLS-1$
152
			if (split.length == 2) {
153
				if (hasLine(split[0])) {
154
					System.out.println(TXMCoreMessages.QueryIndex_2+line);
155
				} else {
156
					if (addLine(split[0], new Query(split[1])) == null) {
157
						System.out.println(TXMCoreMessages.QueryIndex_3+line);
158
					}
159
					monitor.worked(1);
160
				}
161
			}
162
		}
163
	}
164

  
165
	/**
166
	 * Write all the lines on a writer.
167
	 *
168
	 * @param outfile the outfile
169
	 * @param encoding the encoding
170
	 * @param colseparator the colseparator
171
	 * @param txtseparator the txtseparator
172
	 * @return true, if successful
173
	 */
174
	public boolean toTxt(File outfile, String encoding, String colseparator, String txtseparator) {
175
		try {
176
			toTxt(outfile, 0, lines.size(), encoding, colseparator, txtseparator);
177
		} catch (Exception e) {
178
			System.err.println(IndexCoreMessages.Index_7 + Log.toString(e));
179
			return false;
180
		}
181
		return true;
182
	}
183

  
184
	/**
185
	 * Write the lines between from and to on a writer.
186
	 *
187
	 * @param outfile the outfile
188
	 * @param from The first line to be written
189
	 * @param to The last line to be writen
190
	 * @param encoding the encoding
191
	 * @param colseparator the colseparator
192
	 * @param txtseparator the txtseparator
193
	 * @throws CqiClientException the cqi client exception
194
	 * @throws IOException Signals that an I/O exception has occurred.
195
	 */
196
	public void toTxt(File outfile, int from, int to, String encoding, String colseparator, String txtseparator)
197
			throws CqiClientException, IOException {
198
		// NK: writer declared as class attribute to perform a clean if the operation is interrupted
199
		this.writer = new OutputStreamWriter(new FileOutputStream(outfile),
200
				encoding); 
201
		String header = "Queries"; //$NON-NLS-1$
202
		header = txtseparator+ header.substring(0, header.length() - 1) +txtseparator;
203
		header += colseparator+ txtseparator+ "F" + txtseparator; //$NON-NLS-1$
204

  
205
		for (String t : texts)
206
			header += colseparator + txtseparator+ t.replace(txtseparator, txtseparator+txtseparator)+txtseparator; 
207
		header += "\n"; //$NON-NLS-1$
208
		writer.write(header);
209

  
210
		// for(Line ligne: lines)
211
		for (String name : lines.keySet()) {
212
			QueryIndexLine ligne = lines.get(name);
213
			writer.write(txtseparator+ ligne.getName().replace(txtseparator, txtseparator+txtseparator)+ txtseparator + colseparator + ligne.getFrequency()); 
214

  
215
			for (int j = 0; j < texts.size(); j++)
216
				writer.write(colseparator + ligne.getFrequency(j)); 
217
			writer.write("\n"); //$NON-NLS-1$
218
		}
219
		writer.flush();
220
		writer.close();
221
	}
222

  
223
	/**
224
	 * Only one query result and uses texts boundaries to count frequencies for each text
225
	 * @param name
226
	 * @param query
227
	 * @return
228
	 * @throws CqiClientException
229
	 */
230
	public QueryIndexLine addLine(String name, Query query) throws CqiClientException {
231
		if (lines.containsKey(name)) return null;
232
		QueryResult qresult = corpus.query(query, "tmp", true); //$NON-NLS-1$
233
		//System.out.println(query.toString()+" "+qresult.getNMatch());
234
		int[] counts = new int[textBoundaries.length];
235
		int count = 0;
236
		int noText = 0;
237
		int endOfCurrentText = textBoundaries[noText];
238
		for (Match m : qresult.getMatches()) {
239
			while (m.getStart() >= endOfCurrentText) {
240
				//System.out.println(m.getStart() +">="+endOfCurrentText);
241
				if (noText >= textBoundaries.length) break;
242
				//System.out.println("Text: "+texts.get(noText)+" count="+count+" notext="+noText);
243
				counts[noText] = count;
244
				noText++;
245
				if (noText >= textBoundaries.length) break;
246
				endOfCurrentText = textBoundaries[noText];
247
				counts[noText] = 0;
248
				count = 0;
249
			}
250
			count++;
251
		}
252
		//System.out.println(noText +"<?"+textBoundaries.length+" count="+count);
253
		if (noText < textBoundaries.length) // last text
254
			counts[noText] = count;
255
		
256
		qresult.drop();
257
		
258
		QueryIndexLine line = new QueryIndexLine(name, query, null);
259
		line.setFrequencies(counts);
260
		lines.put(name, line);
261
		return line;
262
	}
263

  
264
	public LexicalTableImpl toLexicalTable() {
265

  
266
		int npart = texts.size();
267
		int[][] freqs = new int[lines.size()][npart];
268
		String[] rownames = new String[lines.size()];
269
		String[] colnames = new String[npart];
270

  
271
		for (int i = 0 ; i < lines.size() ; i++) {
272
			QueryIndexLine line = lines.get(i);
273
			int[] linefreqs = line.getFreqs();
274
			rownames[i] = line.getName();
275
			for (int j = 0 ; j < npart ; j++) {
276
				freqs[i][j] = linefreqs[j];
277
			}
278
		}
279
		int j = 0;
280
		for (String t : texts) {
281
			colnames[j] = t;
282
			j++;
283
		}
284

  
285
		try {
286
			LexicalTableImpl lt = new LexicalTableImpl(freqs, rownames, colnames);
287
			return lt;
288
		} catch (RWorkspaceException e) {
289
			org.txm.utils.logger.Log.printStackTrace(e);
290
		}
291
		return null;
292
	}
293

  
294
	public boolean removeLine(String name) {
295
		if (lines.containsKey(name)) {
296
			this.acquireSemaphore();
297
			lines.remove(name);
298
			this.releaseSemaphore();
299
			return true;
300
		} else {
301
			return false;
302
		}
303
	}
304

  
305
	public boolean hasLine(String name) {
306
		return lines.containsKey(name);
307
	}
308

  
309
	public ArrayList<QueryIndexLine> getLines(int from, int to) {
310
		if (lines.size() == 0) return new ArrayList<QueryIndexLine>();
311

  
312
		if (from < 0) from = 0;
313
		if (to < 0) to = 0;
314
		if (to > lines.size()) to = lines.size();
315
		if (from > to) from = to - 1;
316
		ArrayList<QueryIndexLine> tmp = new ArrayList<QueryIndexLine>();
317
		int i = 0;
318
		for (QueryIndexLine line : lines.values()) {
319
			if (i >= from && i < to) {
320
				tmp.add(line);
321
			}
322
			i++;
323
		}
324

  
325
		return tmp;
326
	}
327
	
328
	@Override
329
	public boolean delete() {
330
		if (corpus != null) return corpus.removeResult(this);
331
		return false;
332
	}
333

  
334
	@Override
335
	public String[] getExportTXTExtensions() {
336
		return new String[]{"*.csv"};
337
	}
338

  
339
	@Override
340
	public void clean() {
341
		try {
342
			this.writer.flush();
343
			this.writer.close();	
344
		} catch (IOException e) {
345
			// TODO Auto-generated catch block
346
			org.txm.utils.logger.Log.printStackTrace(e);
347
		}
348
	}
349

  
350
	@Override
351
	public Object getAdapter(Class adapterType) {
352
		// TODO Auto-generated method stub
353
		if (adapterType == IWorkbenchAdapter.class)
354
			return queryIndexOfTextAdapter;
355
		return null;
356
	}
357

  
358
	/** The WordCloud adapter. */
359
	private static IWorkbenchAdapter queryIndexOfTextAdapter = new IWorkbenchAdapter() {
360

  
361
		@Override
362
		public Object[] getChildren(Object o) {
363
			return new Object[0];
364
		}
365

  
366
		@Override
367
		public ImageDescriptor getImageDescriptor(Object object) {
368
			return IImageKeys.getImageDescriptor(IImageKeys.QUERYINDEX);
369
		}
370

  
371
		@Override
372
		public String getLabel(Object o) {
373
			return ((ExpI) o).getName();
374
		}
375

  
376
		@Override
377
		public Object getParent(Object o) {
378
			return ((ExpI) o).getCorpus();
379
		}
380
	};
381

  
382
	@Override
383
	public boolean compute(ProgressWatcher watcher) throws Exception {
384
		System.out.println("ExpI.compute(monitor): not implemented.");
385
		return false;
386
	}
387

  
388
	@Override
389
	public String getSimpleName() {
390
		return "ExpI";
391
	}
392

  
393
	@Override
394
	public String getDetails() {
395
		return texts.toString();
396
	}
397
}
0 398

  
tmp/org.txm.cql2lsa.rcp/src/org/txm/functions/cql2lsa/QueryIndexLine.java (revision 468)
1
package org.txm.functions.cql2lsa;
2

  
3
import org.txm.searchengine.cqp.clientExceptions.CqiClientException;
4
import org.txm.searchengine.cqp.corpus.QueryResult;
5
import org.txm.searchengine.cqp.corpus.query.Query;
6

  
7
public class QueryIndexLine {
8
	String name;
9
	private Query query;
10
	QueryResult[] qresults;
11
	int[] freqs;
12
	int total;
13

  
14

  
15
	public QueryIndexLine(String name, Query query, QueryResult[] qresults) {
16
		this.qresults = qresults;
17
		this.query = query;
18
		this.name = name;
19
	}
20

  
21
	public String getName() {
22
		return name;
23
	}
24

  
25
	public Query getQuery() {
26
		return query;
27
	}
28
	
29
	public int[] getFreqs() {
30
		computeFreqsAndTotal();
31
		return freqs;
32
	}
33
	
34
	private void computeFreqsAndTotal() {
35
		if (freqs == null) {
36
			//System.out.println("compute freqs");
37
			total = 0;
38
			try {
39
				freqs = new int[qresults.length];
40
				for (int i = 0 ; i < freqs.length ; i++) {
41
					//System.out.println("add "+qresults[i].getNMatch());
42
					freqs[i] = qresults[i].getNMatch();
43
					total += freqs[i];
44
					qresults[i].drop();
45
				}
46
				qresults = null;
47
			} catch (CqiClientException e) {
48
				// TODO Auto-generated catch block
49
				org.txm.utils.logger.Log.printStackTrace(e);
50
				freqs = new int[0];
51
			}
52
		} else {
53
			total = 0;
54
			for (int i = 0 ; i < freqs.length ; i++) {
55
				total += freqs[i];
56
			}
57
		}
58
	}
59

  
60
	public int getFrequency() {
61
		computeFreqsAndTotal();
62
		return total;
63
	}
64
	
65
	public int getFrequency(int i) {
66
		computeFreqsAndTotal();
67
		return freqs[i];
68
	}
69
	
70
	public void setFrequencies(int[] freqs) {
71
		this.freqs = freqs;
72
//		total = 0 ;
73
//		for (int f : freqs) total+=f;
74
//		System.out.println("total : "+total);
75
	}
76

  
77
	/*private static void mergeMatches(int from, List<Match> matches, List<Match> tmp) {
78
		int im1 = 0;
79
		for (int im2 = 0 ; im2 < tmp.size() && im1 < matches.size() ;) {
80
			Match m1 = matches.get(im1);
81
			Match m2 = tmp.get(im2);
82

  
83
			if (m1.getStart() < m2.getStart()) {
84
				im1++;
85
			} else if (m1.getStart() >  m2.getStart()) {
86
				im2++;
87
				matches.add(im1,m2);
88
			} else {
89
				if (m1.getEnd() <  m2.getEnd()) {
90
					im1++;
91
				} else if (m1.getEnd() >  m2.getEnd()) {
92
					im2++;
93
					matches.add(im1,m2);
94
				} else {
95
					System.out.println("ERROR while merging : m1 == m2");
96
				}
97
			}
98
		} // end of match merge
99
	}
100

  
101
	public void union(List<QueryIndexLine> lines) throws CqiClientException {
102
		for (QueryIndexLine line : lines) { // process lines	
103
			for (QueryResult qresult : line.getQResult()) {
104

  
105
				List<Match> matches = qresult.getMatches();
106

  
107
				int nmatches = matches.size();
108
				int im1 = 0;
109
				List<Match> matches2 = qresult.getMatches();
110
				int nmatches2 = matches2.size();
111
				ArrayList<Match> tmp = new ArrayList<Match>(); // receive the match to add
112

  
113
				// 1-5 2-5 3-6 7-9
114
				// 1.4 1-5 2-5 3-5 7-8 7-9
115
				// select all without doublons
116
				int im2;
117
				for (im2 = 0 ; im2 < nmatches2 && im1 < nmatches ;) {
118
					Match m1 = matches.get(im1);
119
					Match m2 = matches2.get(im2);
120

  
121
					if (m1.getStart() == m2.getStart() && m1.getEnd() == m2.getEnd()) {
122
						// don't add
123
						im2++; // test next match2
124
					} else {
125
						tmp.add(m2);
126
						if (m1.getStart() <  m2.getStart()) {
127
							im1++;
128
						} else if (m1.getStart() >  m2.getStart()){
129
							im2++;
130
						} else {
131
							if (m1.getEnd() <  m2.getEnd()) {
132
								im1++;
133
							} else {
134
								im2++;
135
							} 
136
						}
137
					}
138
				} // almost end of match selection
139
				for (int i = im2 ; i < nmatches2 ; i++) 
140
					tmp.add(matches2.get(im2)); // get the last ones if any
141

  
142
				// merge
143
				mergeMatches(0, matches, tmp);
144
			} // end of union
145
			freqs = null; // will be recomputed next time
146
		}
147
	}*/
148

  
149
	/*public void inter(List<QueryIndexLine> lines) throws CqiClientException {
150
		List<Match> matches = qresult.getMatches();
151
		for (QueryIndexLine line : lines) {
152
			List<Match> matches2 = line.getQResult().getMatches();
153
		}
154
		freq = -1;
155
	}
156

  
157
	public void minus(List<QueryIndexLine> lines)throws CqiClientException {
158
		List<Match> matches = qresult.getMatches();
159
		for (QueryIndexLine line : lines) {
160
			List<Match> matches2 = line.getQResult().getMatches();
161
		}
162
		freq = -1;
163
	}
164
	 */
165
}
0 166

  
tmp/org.txm.cql2lsa.rcp/src/org/txm/functions/cql2lsa/ContextSubcorpus.java (revision 468)
1
package org.txm.functions.cql2lsa;
2

  
3
import java.io.BufferedReader;
4
import java.io.File;
5
import java.io.FileInputStream;
6
import java.io.IOException;
7
import java.io.InputStreamReader;
8
import java.util.ArrayList;
9
import java.util.HashMap;
10

  
11
import org.txm.core.results.TXMResult;
12
import org.txm.functions.ProgressWatcher;
13
import org.txm.functions.TXMCommand;
14
import org.txm.searchengine.cqp.CQPEngine;
15
import org.txm.searchengine.cqp.MemCqiClient;
16
import org.txm.searchengine.cqp.clientExceptions.CqiClientException;
17
import org.txm.searchengine.cqp.clientExceptions.InvalidCqpIdException;
18
import org.txm.searchengine.cqp.corpus.Corpus;
19
import org.txm.searchengine.cqp.corpus.QueryResult;
20
import org.txm.searchengine.cqp.corpus.Subcorpus;
21
import org.txm.searchengine.cqp.corpus.query.Query;
... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff