Révision 2407

tmp/org.txm.searchengine.cqp.core/src/org/txm/searchengine/cqp/corpus/CQPLexicon.java (revision 2407)
44 44
import org.txm.searchengine.cqp.AbstractCqiClient;
45 45
import org.txm.searchengine.cqp.ICqiClient;
46 46
import org.txm.searchengine.cqp.clientExceptions.CqiClientException;
47
import org.txm.searchengine.cqp.corpus.query.MatchUtils;
47 48
import org.txm.searchengine.cqp.serverException.CqiServerError;
48 49
//import org.txm.statsengine.core.StatException;
49 50
//import org.txm.statsengine.core.data.Vector;
......
145 146
		return true;
146 147
	}
147 148
	
149
	// /**
150
	// *
151
	// * @param corpus
152
	// * @param property
153
	// * @param monitor
154
	// * @return
155
	// * @throws CqiClientException
156
	// */
157
	// // FIXME: why this method needs to create and delete some new subcorpus???? the computing can't be done directly on the corpus argument???
158
	// // eg. dist = CorpusManager.getCorpusManager().getCqiClient().fdist1(corpus.getQualifiedCqpId(), 0, ICqiClient.CQI_CONST_FIELD_MATCH, property.getName());
159
	// protected boolean computewithSubCorpus(Subcorpus corpus, Property property, IProgressMonitor monitor) throws CqiClientException {
160
	//
161
	// //System.out.println("not found");
162
	//// Log.finest(NLS.bind(IndexCoreMessages.computingTheLexiconOfSubcorpusP0, corpus.getName()));
163
	// //long start = System.currentTimeMillis();
164
	// int[][] fdist = null;
165
	// AbstractCqiClient cqi = CorpusManager.getCorpusManager().getCqiClient();
166
	// String tmp = "TMP" + CQPCorpus.getNextSubcorpusCounter();
167
	// String qtmp = corpus.getMainCorpus().getQualifiedCqpId()+":"+tmp;
168
	// try {
169
	//// System.out.println("subcorpus: "+corpus.getQualifiedCqpId());
170
	//// System.out.println("query subcorpus: "+qtmp);
171
	// cqi.cqpQuery(corpus.getQualifiedCqpId(), tmp, "[]"); //$NON-NLS-1$
172
	// fdist = CorpusManager.getCorpusManager().getCqiClient().fdist1(qtmp, 0, ICqiClient.CQI_CONST_FIELD_MATCH, property.getName());
173
	// //System.out.println("nb lines: "+fdist.length);
174
	// } catch (Exception e) {
175
	// throw new CqiClientException(e);
176
	// } finally {
177
	// try {
178
	// cqi.dropSubCorpus(qtmp);
179
	// } catch (IOException e) {
180
	// // TODO Auto-generated catch block
181
	// e.printStackTrace();
182
	// } catch (CqiServerError e) {
183
	// // TODO Auto-generated catch block
184
	// e.printStackTrace();
185
	// }
186
	// }
187
	// int lexiconSize = fdist.length;
188
	//
189
	// int[] freqs = new int[lexiconSize];
190
	// int[] ids = new int[lexiconSize];
191
	// for (int i = 0; i < fdist.length; i++) {
192
	// ids[i] = fdist[i][0];
193
	// freqs[i] = fdist[i][1];
194
	// }
195
	//
196
	// init(corpus, property, freqs, ids);
197
	// return true;
198
	// }
199
	
148 200
	/**
149 201
	 * 
150 202
	 * @param corpus
......
153 205
	 * @return
154 206
	 * @throws CqiClientException
155 207
	 */
156
	// FIXME: why this method needs to create and delete some new subcorpus???? the computing can't be done directly on the corpus argument???
208
	// FIXME: SJ: why this method needs to create and delete some new subcorpus? the computing can't be done directly on the corpus argument?
157 209
	// eg. dist = CorpusManager.getCorpusManager().getCqiClient().fdist1(corpus.getQualifiedCqpId(), 0, ICqiClient.CQI_CONST_FIELD_MATCH, property.getName());
158 210
	protected boolean computewithSubCorpus(Subcorpus corpus, Property property, IProgressMonitor monitor) throws CqiClientException {
159 211
		
......
161 213
		// Log.finest(NLS.bind(IndexCoreMessages.computingTheLexiconOfSubcorpusP0, corpus.getName()));
162 214
		// long start = System.currentTimeMillis();
163 215
		int[][] fdist = null;
164
		AbstractCqiClient cqi = CorpusManager.getCorpusManager().getCqiClient();
165
		String tmp = "TMP" + CQPCorpus.getNextSubcorpusCounter();
166
		String qtmp = corpus.getMainCorpus().getQualifiedCqpId() + ":" + tmp;
167
		try {
168
			// System.out.println("subcorpus: "+corpus.getQualifiedCqpId());
169
			// System.out.println("query subcorpus: "+qtmp);
170
			cqi.cqpQuery(corpus.getQualifiedCqpId(), tmp, "[]"); //$NON-NLS-1$
171
			fdist = CorpusManager.getCorpusManager().getCqiClient().fdist1(qtmp, 0, ICqiClient.CQI_CONST_FIELD_MATCH, property.getName());
172
			// System.out.println("nb lines: "+fdist.length);
216
		
217
		boolean fast = false;
218
		
219
		// FIXME: SJ: old version
220
		if (!fast) {
221
			
222
			AbstractCqiClient cqi = CorpusManager.getCorpusManager().getCqiClient();
223
			String tmp = "TMP" + CQPCorpus.getNextSubcorpusCounter();
224
			String qtmp = corpus.getMainCorpus().getQualifiedCqpId() + ":" + tmp;
225
			
226
			try {
227
				// System.out.println("subcorpus: "+corpus.getQualifiedCqpId());
228
				// System.out.println("query subcorpus: "+qtmp);
229
				
230
				cqi.cqpQuery(corpus.getQualifiedCqpId(), tmp, "[]"); //$NON-NLS-1$
231
				fdist = CorpusManager.getCorpusManager().getCqiClient().fdist1(qtmp, 0, ICqiClient.CQI_CONST_FIELD_MATCH, property.getName());
232
				// System.out.println("nb lines: "+fdist.length);
233
			}
234
			catch (Exception e) {
235
				throw new CqiClientException(e);
236
			}
237
			finally {
238
				try {
239
					cqi.dropSubCorpus(qtmp);
240
				}
241
				catch (IOException e) {
242
					// TODO Auto-generated catch block
243
					e.printStackTrace();
244
				}
245
				catch (CqiServerError e) {
246
					// TODO Auto-generated catch block
247
					e.printStackTrace();
248
				}
249
			}
173 250
		}
174
		catch (Exception e) {
175
			throw new CqiClientException(e);
176
		}
177
		finally {
251
		// FIXME: SJ: new version by MD that seems fastest, need to test on all types of lexicons corpora before replacing old version
252
		else {
253
			
178 254
			try {
179
				cqi.dropSubCorpus(qtmp);
255
				int[] positions = MatchUtils.toPositions(corpus.getMatches());
256
				int[] indexes = CorpusManager.getCorpusManager().getCqiClient().cpos2Id(property.getQualifiedName(), positions);
257
				int n_indexes = CorpusManager.getCorpusManager().getCqiClient().attributeSize(property.getQualifiedName());
258
				int[] counts = new int[n_indexes];
259
				for (int i : indexes) {
260
					counts[i]++;
261
				}
262
				fdist = new int[indexes.length][2];
263
				
264
				int n = 0;
265
				for (int i : indexes) {
266
					fdist[n][0] = i;
267
					fdist[n][1] = counts[i];
268
					n++;
269
				}
180 270
			}
181
			catch (IOException e) {
271
			catch (Exception e) {
182 272
				// TODO Auto-generated catch block
183 273
				e.printStackTrace();
184 274
			}
185
			catch (CqiServerError e) {
186
				// TODO Auto-generated catch block
187
				e.printStackTrace();
188
			}
189 275
		}
190 276
		int lexiconSize = fdist.length;
191 277
		
......
200 286
		return true;
201 287
	}
202 288
	
289
	
203 290
	/**
204 291
	 * Compute number of tokens. / this.nbr
205 292
	 */
tmp/org.txm.searchengine.cqp.core/src/org/txm/searchengine/cqp/corpus/query/MatchUtils.java (revision 2407)
4 4
import java.util.List;
5 5

  
6 6
public class MatchUtils {
7
	
7 8
	/**
8 9
	 * 
9 10
	 * @param list the matches to cover
......
11 12
	 * @return the indexes of 'b' matches that covers the 'a' matches
12 13
	 */
13 14
	public static ArrayList<Integer> matchesCoveredByB(List<Match> list, List<Match> b) {
14
		int ai=0, bi=0;
15
		ArrayList<Integer> result = new ArrayList<Integer>();
15
		int ai = 0, bi = 0;
16
		ArrayList<Integer> result = new ArrayList<>();
16 17
		
17
		while (ai < list.size() && bi < b.size() ) {
18
			if (b.get(bi).contains(list.get(ai))) { 
18
		while (ai < list.size() && bi < b.size()) {
19
			if (b.get(bi).contains(list.get(ai))) {
19 20
				result.add(bi);
20
				bi++; 
21
			} else { 
22
				ai++; 
23
			} 
21
				bi++;
22
			}
23
			else {
24
				ai++;
25
			}
24 26
		}
25 27
		
26 28
		return result;
27 29
	}
28 30
	
29 31
	/**
32
	 * Gets an array of all positions of the specified list of matches.
30 33
	 * 
34
	 * @param list the list of matches
35
	 * @return an array of all positions of the specified list of matches
36
	 */
37
	public static int[] toPositions(List<Match> list) {
38
		ArrayList<Integer> result = new ArrayList<>();
39
		for (Match m : list) {
40
			if (m.getStart() == m.getEnd()) {
41
				result.add(m.getStart());
42
			}
43
			else {
44
				for (int i = m.getStart(); i <= m.getEnd(); i++) {
45
					result.add(i);
46
				}
47
			}
48
		}
49
		
50
		int[] positions = new int[result.size()];
51
		for (int i = 0; i < result.size(); i++) {
52
			positions[i] = result.get(i);
53
		}
54
		return positions;
55
	}
56
	
57
	/**
58
	 * 
31 59
	 * @param match
32 60
	 * @return array of int position from a Match
33 61
	 */
......
44 72
	public static int[] toPositions(Match match1, Match match2) {
45 73
		int len = match2.end - match1.start + 1;
46 74
		
47
		if (len <= 0 ) {
75
		if (len <= 0) {
48 76
			return new int[0];
49
		} else if (len == 1) {
50
			return new int[] {match1.start};
51
		} else if (len == 2) {
52
			return new int[] {match1.start, match2.end};
53
		} else {
77
		}
78
		else if (len == 1) {
79
			return new int[] { match1.start };
80
		}
81
		else if (len == 2) {
82
			return new int[] { match1.start, match2.end };
83
		}
84
		else {
54 85
			int[] positions = new int[len];
55 86
			int n = 0;
56
			for (int i = match1.start ; i <= match2.end ; i++) {
87
			for (int i = match1.start; i <= match2.end; i++) {
57 88
				positions[n++] = i;
58 89
			}
59 90
			return positions;

Formats disponibles : Unified diff