44 |
44 |
import org.txm.searchengine.cqp.AbstractCqiClient;
|
45 |
45 |
import org.txm.searchengine.cqp.ICqiClient;
|
46 |
46 |
import org.txm.searchengine.cqp.clientExceptions.CqiClientException;
|
|
47 |
import org.txm.searchengine.cqp.corpus.query.MatchUtils;
|
47 |
48 |
import org.txm.searchengine.cqp.serverException.CqiServerError;
|
48 |
49 |
//import org.txm.statsengine.core.StatException;
|
49 |
50 |
//import org.txm.statsengine.core.data.Vector;
|
... | ... | |
145 |
146 |
return true;
|
146 |
147 |
}
|
147 |
148 |
|
|
149 |
// /**
|
|
150 |
// *
|
|
151 |
// * @param corpus
|
|
152 |
// * @param property
|
|
153 |
// * @param monitor
|
|
154 |
// * @return
|
|
155 |
// * @throws CqiClientException
|
|
156 |
// */
|
|
157 |
// // FIXME: why this method needs to create and delete some new subcorpus???? the computing can't be done directly on the corpus argument???
|
|
158 |
// // eg. dist = CorpusManager.getCorpusManager().getCqiClient().fdist1(corpus.getQualifiedCqpId(), 0, ICqiClient.CQI_CONST_FIELD_MATCH, property.getName());
|
|
159 |
// protected boolean computewithSubCorpus(Subcorpus corpus, Property property, IProgressMonitor monitor) throws CqiClientException {
|
|
160 |
//
|
|
161 |
// //System.out.println("not found");
|
|
162 |
//// Log.finest(NLS.bind(IndexCoreMessages.computingTheLexiconOfSubcorpusP0, corpus.getName()));
|
|
163 |
// //long start = System.currentTimeMillis();
|
|
164 |
// int[][] fdist = null;
|
|
165 |
// AbstractCqiClient cqi = CorpusManager.getCorpusManager().getCqiClient();
|
|
166 |
// String tmp = "TMP" + CQPCorpus.getNextSubcorpusCounter();
|
|
167 |
// String qtmp = corpus.getMainCorpus().getQualifiedCqpId()+":"+tmp;
|
|
168 |
// try {
|
|
169 |
//// System.out.println("subcorpus: "+corpus.getQualifiedCqpId());
|
|
170 |
//// System.out.println("query subcorpus: "+qtmp);
|
|
171 |
// cqi.cqpQuery(corpus.getQualifiedCqpId(), tmp, "[]"); //$NON-NLS-1$
|
|
172 |
// fdist = CorpusManager.getCorpusManager().getCqiClient().fdist1(qtmp, 0, ICqiClient.CQI_CONST_FIELD_MATCH, property.getName());
|
|
173 |
// //System.out.println("nb lines: "+fdist.length);
|
|
174 |
// } catch (Exception e) {
|
|
175 |
// throw new CqiClientException(e);
|
|
176 |
// } finally {
|
|
177 |
// try {
|
|
178 |
// cqi.dropSubCorpus(qtmp);
|
|
179 |
// } catch (IOException e) {
|
|
180 |
// // TODO Auto-generated catch block
|
|
181 |
// e.printStackTrace();
|
|
182 |
// } catch (CqiServerError e) {
|
|
183 |
// // TODO Auto-generated catch block
|
|
184 |
// e.printStackTrace();
|
|
185 |
// }
|
|
186 |
// }
|
|
187 |
// int lexiconSize = fdist.length;
|
|
188 |
//
|
|
189 |
// int[] freqs = new int[lexiconSize];
|
|
190 |
// int[] ids = new int[lexiconSize];
|
|
191 |
// for (int i = 0; i < fdist.length; i++) {
|
|
192 |
// ids[i] = fdist[i][0];
|
|
193 |
// freqs[i] = fdist[i][1];
|
|
194 |
// }
|
|
195 |
//
|
|
196 |
// init(corpus, property, freqs, ids);
|
|
197 |
// return true;
|
|
198 |
// }
|
|
199 |
|
148 |
200 |
/**
|
149 |
201 |
*
|
150 |
202 |
* @param corpus
|
... | ... | |
153 |
205 |
* @return
|
154 |
206 |
* @throws CqiClientException
|
155 |
207 |
*/
|
156 |
|
// FIXME: why this method needs to create and delete some new subcorpus???? the computing can't be done directly on the corpus argument???
|
|
208 |
// FIXME: SJ: why this method needs to create and delete some new subcorpus? the computing can't be done directly on the corpus argument?
|
157 |
209 |
// eg. dist = CorpusManager.getCorpusManager().getCqiClient().fdist1(corpus.getQualifiedCqpId(), 0, ICqiClient.CQI_CONST_FIELD_MATCH, property.getName());
|
158 |
210 |
protected boolean computewithSubCorpus(Subcorpus corpus, Property property, IProgressMonitor monitor) throws CqiClientException {
|
159 |
211 |
|
... | ... | |
161 |
213 |
// Log.finest(NLS.bind(IndexCoreMessages.computingTheLexiconOfSubcorpusP0, corpus.getName()));
|
162 |
214 |
// long start = System.currentTimeMillis();
|
163 |
215 |
int[][] fdist = null;
|
164 |
|
AbstractCqiClient cqi = CorpusManager.getCorpusManager().getCqiClient();
|
165 |
|
String tmp = "TMP" + CQPCorpus.getNextSubcorpusCounter();
|
166 |
|
String qtmp = corpus.getMainCorpus().getQualifiedCqpId() + ":" + tmp;
|
167 |
|
try {
|
168 |
|
// System.out.println("subcorpus: "+corpus.getQualifiedCqpId());
|
169 |
|
// System.out.println("query subcorpus: "+qtmp);
|
170 |
|
cqi.cqpQuery(corpus.getQualifiedCqpId(), tmp, "[]"); //$NON-NLS-1$
|
171 |
|
fdist = CorpusManager.getCorpusManager().getCqiClient().fdist1(qtmp, 0, ICqiClient.CQI_CONST_FIELD_MATCH, property.getName());
|
172 |
|
// System.out.println("nb lines: "+fdist.length);
|
|
216 |
|
|
217 |
boolean fast = false;
|
|
218 |
|
|
219 |
// FIXME: SJ: old version
|
|
220 |
if (!fast) {
|
|
221 |
|
|
222 |
AbstractCqiClient cqi = CorpusManager.getCorpusManager().getCqiClient();
|
|
223 |
String tmp = "TMP" + CQPCorpus.getNextSubcorpusCounter();
|
|
224 |
String qtmp = corpus.getMainCorpus().getQualifiedCqpId() + ":" + tmp;
|
|
225 |
|
|
226 |
try {
|
|
227 |
// System.out.println("subcorpus: "+corpus.getQualifiedCqpId());
|
|
228 |
// System.out.println("query subcorpus: "+qtmp);
|
|
229 |
|
|
230 |
cqi.cqpQuery(corpus.getQualifiedCqpId(), tmp, "[]"); //$NON-NLS-1$
|
|
231 |
fdist = CorpusManager.getCorpusManager().getCqiClient().fdist1(qtmp, 0, ICqiClient.CQI_CONST_FIELD_MATCH, property.getName());
|
|
232 |
// System.out.println("nb lines: "+fdist.length);
|
|
233 |
}
|
|
234 |
catch (Exception e) {
|
|
235 |
throw new CqiClientException(e);
|
|
236 |
}
|
|
237 |
finally {
|
|
238 |
try {
|
|
239 |
cqi.dropSubCorpus(qtmp);
|
|
240 |
}
|
|
241 |
catch (IOException e) {
|
|
242 |
// TODO Auto-generated catch block
|
|
243 |
e.printStackTrace();
|
|
244 |
}
|
|
245 |
catch (CqiServerError e) {
|
|
246 |
// TODO Auto-generated catch block
|
|
247 |
e.printStackTrace();
|
|
248 |
}
|
|
249 |
}
|
173 |
250 |
}
|
174 |
|
catch (Exception e) {
|
175 |
|
throw new CqiClientException(e);
|
176 |
|
}
|
177 |
|
finally {
|
|
251 |
// FIXME: SJ: new version by MD that seems fastest, need to test on all types of lexicons corpora before replacing old version
|
|
252 |
else {
|
|
253 |
|
178 |
254 |
try {
|
179 |
|
cqi.dropSubCorpus(qtmp);
|
|
255 |
int[] positions = MatchUtils.toPositions(corpus.getMatches());
|
|
256 |
int[] indexes = CorpusManager.getCorpusManager().getCqiClient().cpos2Id(property.getQualifiedName(), positions);
|
|
257 |
int n_indexes = CorpusManager.getCorpusManager().getCqiClient().attributeSize(property.getQualifiedName());
|
|
258 |
int[] counts = new int[n_indexes];
|
|
259 |
for (int i : indexes) {
|
|
260 |
counts[i]++;
|
|
261 |
}
|
|
262 |
fdist = new int[indexes.length][2];
|
|
263 |
|
|
264 |
int n = 0;
|
|
265 |
for (int i : indexes) {
|
|
266 |
fdist[n][0] = i;
|
|
267 |
fdist[n][1] = counts[i];
|
|
268 |
n++;
|
|
269 |
}
|
180 |
270 |
}
|
181 |
|
catch (IOException e) {
|
|
271 |
catch (Exception e) {
|
182 |
272 |
// TODO Auto-generated catch block
|
183 |
273 |
e.printStackTrace();
|
184 |
274 |
}
|
185 |
|
catch (CqiServerError e) {
|
186 |
|
// TODO Auto-generated catch block
|
187 |
|
e.printStackTrace();
|
188 |
|
}
|
189 |
275 |
}
|
190 |
276 |
int lexiconSize = fdist.length;
|
191 |
277 |
|
... | ... | |
200 |
286 |
return true;
|
201 |
287 |
}
|
202 |
288 |
|
|
289 |
|
203 |
290 |
/**
|
204 |
291 |
* Compute number of tokens. / this.nbr
|
205 |
292 |
*/
|