| 33 |
33 |
import java.util.ArrayList;
|
| 34 |
34 |
import java.util.Arrays;
|
| 35 |
35 |
import java.util.HashMap;
|
|
36 |
import java.util.HashSet;
|
| 36 |
37 |
import java.util.LinkedList;
|
| 37 |
38 |
import java.util.List;
|
| 38 |
39 |
import java.util.UUID;
|
| ... | ... | |
| 50 |
51 |
import org.txm.searchengine.core.Selection;
|
| 51 |
52 |
import org.txm.searchengine.core.messages.SearchEngineCoreMessages;
|
| 52 |
53 |
import org.txm.searchengine.cqp.AbstractCqiClient;
|
|
54 |
import org.txm.searchengine.cqp.CQPPreferences;
|
| 53 |
55 |
import org.txm.searchengine.cqp.CQPSearchEngine;
|
| 54 |
56 |
import org.txm.searchengine.cqp.NetCqiClient;
|
| 55 |
57 |
import org.txm.searchengine.cqp.clientExceptions.CqiClientException;
|
| ... | ... | |
| 71 |
73 |
* @author jmague
|
| 72 |
74 |
*/
|
| 73 |
75 |
public class Subcorpus extends CQPCorpus {
|
| 74 |
|
|
|
76 |
|
| 75 |
77 |
/**
|
| 76 |
78 |
* Query used to build the corpus.
|
| 77 |
79 |
*/
|
| 78 |
80 |
@Parameter(key = TXMPreferences.QUERY)
|
| 79 |
81 |
protected IQuery pQuery;
|
| 80 |
|
|
|
82 |
|
|
83 |
/**
|
|
84 |
* Query used to build the corpus.
|
|
85 |
*/
|
|
86 |
@Parameter(key = CQPPreferences.MATCHINGSTRATEGY)
|
|
87 |
protected String pMatchingStrategy;
|
|
88 |
|
| 81 |
89 |
protected Selection qresult;
|
| 82 |
|
|
|
90 |
|
| 83 |
91 |
protected SelectionResult selectionResult;
|
| 84 |
|
|
|
92 |
|
| 85 |
93 |
/**
|
| 86 |
94 |
* CQP text ids cache
|
| 87 |
95 |
*/
|
| 88 |
96 |
protected String[] cqpTextIDS = null;
|
| 89 |
97 |
|
|
98 |
public static final HashSet<String> matchingStrategies = new HashSet<String>(Arrays.asList("shortest","standard","longest","traditional"));
|
| 90 |
99 |
|
| 91 |
|
|
| 92 |
100 |
/**
|
| 93 |
101 |
*
|
| 94 |
102 |
* @param corpus
|
| ... | ... | |
| 96 |
104 |
public Subcorpus(CQPCorpus corpus) {
|
| 97 |
105 |
super(corpus);
|
| 98 |
106 |
}
|
| 99 |
|
|
|
107 |
|
| 100 |
108 |
/**
|
| 101 |
109 |
*
|
| 102 |
110 |
* @param partition
|
| ... | ... | |
| 104 |
112 |
public Subcorpus(Partition partition) {
|
| 105 |
113 |
super(partition);
|
| 106 |
114 |
}
|
| 107 |
|
|
|
115 |
|
| 108 |
116 |
/**
|
| 109 |
117 |
*
|
| 110 |
118 |
* @param result parent result
|
| ... | ... | |
| 114 |
122 |
this.pQuery = result.getQuery();
|
| 115 |
123 |
this.setUserName(pQuery.toString());
|
| 116 |
124 |
}
|
| 117 |
|
|
|
125 |
|
| 118 |
126 |
/**
|
| 119 |
127 |
*
|
| 120 |
128 |
* @param parametersNodePath
|
| ... | ... | |
| 122 |
130 |
public Subcorpus(String parametersNodePath) {
|
| 123 |
131 |
super(parametersNodePath);
|
| 124 |
132 |
}
|
| 125 |
|
|
|
133 |
|
| 126 |
134 |
/**
|
| 127 |
135 |
* Instantiates a new subcorpus.
|
| 128 |
136 |
*
|
| ... | ... | |
| 133 |
141 |
*/
|
| 134 |
142 |
@Override
|
| 135 |
143 |
protected boolean __compute(TXMProgressMonitor monitor) throws Exception {
|
| 136 |
|
|
|
144 |
|
| 137 |
145 |
if (this.getParent() instanceof QueryBasedTXMResult) { // update the pQuery
|
| 138 |
146 |
this.pQuery = ((QueryBasedTXMResult)this.getParent()).getQuery();
|
| 139 |
147 |
this.setName(pQuery.getName());
|
| 140 |
148 |
}
|
| 141 |
|
|
|
149 |
|
| 142 |
150 |
if (pID == null || pID.length() == 0) {
|
| 143 |
151 |
pID = subcorpusNamePrefix + getNextSubcorpusCounter().toString();
|
| 144 |
152 |
}
|
| 145 |
|
|
|
153 |
|
| 146 |
154 |
if (this.pQuery != null) {
|
| 147 |
155 |
this.qresult = null; // reset
|
| 148 |
156 |
String parent_id = this.getCorpusParent().getQualifiedCqpId();
|
| ... | ... | |
| 150 |
158 |
Log.warning(NLS.bind("Error: CQL length limit reached ({0}) with {1} length.", CQLQuery.MAX_CQL_LENGTH, pQuery.getQueryString().length()));
|
| 151 |
159 |
return false;
|
| 152 |
160 |
}
|
| 153 |
|
|
|
161 |
|
|
162 |
AbstractCqiClient CQI = CorpusManager.getCorpusManager().getCqiClient();
|
| 154 |
163 |
String q = this.pQuery.getQueryString();
|
| 155 |
164 |
if (q.matches("undump \"[^\"]+\";")) { // undump query
|
| 156 |
165 |
String path = q.substring(8, q.length() - 2);
|
| ... | ... | |
| 158 |
167 |
Log.severe(NLS.bind("Error: could not compute subcorpus from the dump file: {0}.", path));
|
| 159 |
168 |
return false;
|
| 160 |
169 |
}
|
| 161 |
|
CorpusManager.getCorpusManager().getCqiClient().query(this.getCorpusParent().getQualifiedCqpId()+";");
|
| 162 |
|
CorpusManager.getCorpusManager().getCqiClient().query("undump "+this.pID+" < "+q.substring(7));
|
|
170 |
CQI.query(this.getCorpusParent().getQualifiedCqpId()+";");
|
|
171 |
CQI.query("undump "+this.pID+" < "+q.substring(7));
|
| 163 |
172 |
} else if (q.matches("(union|join|intersect|inter|difference|diff) [^ ]+ [^ ]+;")) {
|
| 164 |
|
CorpusManager.getCorpusManager().getCqiClient().query(this.getCorpusParent().getQualifiedCqpId()+";");
|
| 165 |
|
CorpusManager.getCorpusManager().getCqiClient().query(this.pID+" = "+q);
|
|
173 |
CQI.query(this.getCorpusParent().getQualifiedCqpId()+";");
|
|
174 |
CQI.query(this.pID+" = "+q);
|
| 166 |
175 |
} else {
|
| 167 |
|
CorpusManager.getCorpusManager().getCqiClient().cqpQuery(parent_id, this.pID, CQLQuery.fixQuery(q, this.getLang()));
|
| 168 |
|
// if (q.contains("@[") || q.contains("@\"")) {
|
| 169 |
|
// Log.fine("Target detected in "+q+", recentering on target.");
|
| 170 |
|
// CorpusManager.getCorpusManager().getCqiClient().cqpQuery(this.getQualifiedCqpId(), this.pID, "<target>[]");
|
| 171 |
|
// }
|
|
176 |
String previousMatchingStrategy = null;
|
|
177 |
if (matchingStrategies.contains(pMatchingStrategy)) { // set MatchingStrategy for this query
|
|
178 |
previousMatchingStrategy = CQI.getOption("MatchingStrategy");
|
|
179 |
if (previousMatchingStrategy.equals(pMatchingStrategy)) {
|
|
180 |
previousMatchingStrategy = null; // finally no need to change strategy
|
|
181 |
} else { // ok we need to change strategy
|
|
182 |
CQI.setOption("MatchingStrategy", pMatchingStrategy);
|
|
183 |
}
|
|
184 |
|
|
185 |
}
|
|
186 |
|
|
187 |
CQI.cqpQuery(parent_id, this.pID, CQLQuery.fixQuery(q, this.getLang()));
|
|
188 |
if (previousMatchingStrategy != null) { // restore MatchingStrategy for this query
|
|
189 |
CQI.setOption("MatchingStrategy", previousMatchingStrategy);
|
|
190 |
}
|
|
191 |
// if (q.contains("@[") || q.contains("@\"")) {
|
|
192 |
// Log.fine("Target detected in "+q+", recentering on target.");
|
|
193 |
// CorpusManager.getCorpusManager().getCqiClient().cqpQuery(this.getQualifiedCqpId(), this.pID, "<target>[]");
|
|
194 |
// }
|
| 172 |
195 |
}
|
| 173 |
|
|
|
196 |
|
| 174 |
197 |
//this.qresult = this.pQuery.getSearchEngine().query(this.getCorpusParent(), this.pQuery, this.getUserName(), true);
|
| 175 |
|
|
|
198 |
|
| 176 |
199 |
this.qresult = new QueryResult(this.pID, this.getUserName(), this.getCorpusParent(), (CQLQuery) this.pQuery); // getCorpusParent().query(pQuery, this.pID, true);
|
| 177 |
200 |
// queryResult = new QueryResult(queryResultId, queryResultName, this, query);
|
| 178 |
201 |
}
|
| 179 |
|
|
|
202 |
|
| 180 |
203 |
cqpTextIDS = null; // reset text ids cache
|
| 181 |
|
|
|
204 |
|
| 182 |
205 |
return qresult != null;
|
| 183 |
206 |
}
|
|
207 |
|
|
208 |
public void setMatchingStrategy(String strategy) {
|
|
209 |
this.pMatchingStrategy = strategy;
|
|
210 |
}
|
| 184 |
211 |
|
| 185 |
|
|
|
212 |
public String getMatchingStrategy() {
|
|
213 |
return this.pMatchingStrategy;
|
|
214 |
}
|
|
215 |
|
| 186 |
216 |
@Override
|
| 187 |
217 |
public String getComputingStartMessage() {
|
| 188 |
|
|
|
218 |
|
| 189 |
219 |
if (this.pQuery == null) {
|
| 190 |
220 |
return TXMCoreMessages.bind(SearchEngineCoreMessages.info_computingTheP0SubCorpusOfP1CommaQueryP2, this.getSimpleName(), this.getParent().getName(), "<no query>");
|
| 191 |
221 |
}
|
| 192 |
|
|
|
222 |
|
| 193 |
223 |
// created with structure, property and value(s)
|
| 194 |
224 |
if (this.pQuery instanceof SubcorpusCQLQuery) {
|
| 195 |
225 |
SubcorpusCQLQuery query = (SubcorpusCQLQuery) this.pQuery;
|
| ... | ... | |
| 201 |
231 |
return TXMCoreMessages.bind(SearchEngineCoreMessages.info_computingTheP0SubCorpusOfP1CommaQueryP2, this.getSimpleName(), this.getParent().getName(), this.getQuery().asString());
|
| 202 |
232 |
}
|
| 203 |
233 |
}
|
| 204 |
|
|
|
234 |
|
| 205 |
235 |
/*
|
| 206 |
236 |
* retro compatibility method for import.xml file
|
| 207 |
237 |
* @see org.txm.searchengine.cqp.corpus.Corpus#getLocale()
|
| ... | ... | |
| 212 |
242 |
this.setUserName(e.getAttribute(NAME));
|
| 213 |
243 |
this.pID = "S" + getNextSubcorpusCounter();
|
| 214 |
244 |
this.pQuery = new CQLQuery(e.getAttribute("query")); //$NON-NLS-1$
|
| 215 |
|
|
|
245 |
|
| 216 |
246 |
NodeList subcorpusElems = e.getElementsByTagName("subcorpus"); //$NON-NLS-1$
|
| 217 |
247 |
for (int i = 0; i < subcorpusElems.getLength(); i++) {
|
| 218 |
248 |
Element subcorpusElem = (Element) subcorpusElems.item(i);
|
| ... | ... | |
| 224 |
254 |
Log.warning(TXMCoreMessages.bind(TXMCoreMessages.failedToRestoreTheP0SubcorpusOfP1P2, this.pID, subcorpusElem.getAttribute("name"), ex)); //$NON-NLS-1$
|
| 225 |
255 |
}
|
| 226 |
256 |
}
|
| 227 |
|
|
|
257 |
|
| 228 |
258 |
NodeList partitionElems = e.getElementsByTagName("partition"); //$NON-NLS-1$
|
| 229 |
259 |
for (int i = 0; i < partitionElems.getLength(); i++) {
|
| 230 |
260 |
Element partitionElem = (Element) partitionElems.item(i);
|
| 231 |
261 |
String name = partitionElem.getAttribute("name"); //$NON-NLS-1$
|
| 232 |
262 |
List<String> names = new ArrayList<>();
|
| 233 |
263 |
List<String> queries = new ArrayList<>();
|
| 234 |
|
|
|
264 |
|
| 235 |
265 |
NodeList partElems = partitionElem.getElementsByTagName("part"); //$NON-NLS-1$
|
| 236 |
266 |
for (int j = 0; j < partElems.getLength(); j++) {
|
| 237 |
267 |
Element part = (Element) partElems.item(j);
|
| ... | ... | |
| 255 |
285 |
// return super.load();
|
| 256 |
286 |
return true;
|
| 257 |
287 |
}
|
| 258 |
|
|
|
288 |
|
| 259 |
289 |
@Override
|
| 260 |
290 |
public boolean canCompute() {
|
| 261 |
291 |
return getUserName() != null && getUserName().length() > 0 &&
|
| 262 |
292 |
((pQuery != null && pQuery.getQueryString().length() > 0) || qresult != null);
|
| 263 |
293 |
}
|
| 264 |
|
|
|
294 |
|
| 265 |
295 |
/*
|
| 266 |
296 |
* (non-Javadoc)
|
| 267 |
297 |
* @see
|
| ... | ... | |
| 274 |
304 |
throw new InvalidCqpIdException(TXMCoreMessages.bind(CQPSearchEngineCoreMessages.p0IsNotAValidCQPIDForASubcorpusItMustBeAnUppercaseCharacterFollowedByLowercaseCharacters, pID));
|
| 275 |
305 |
return true;
|
| 276 |
306 |
}
|
| 277 |
|
|
|
307 |
|
| 278 |
308 |
@Override
|
| 279 |
309 |
public void clean() {
|
| 280 |
310 |
super.clean();
|
| 281 |
|
|
|
311 |
|
| 282 |
312 |
// nothing to do if the corpus has not yet been computed
|
| 283 |
313 |
if (this.hasBeenComputedOnce()) {
|
| 284 |
314 |
try {
|
| ... | ... | |
| 293 |
323 |
Log.printStackTrace(e);
|
| 294 |
324 |
}
|
| 295 |
325 |
}
|
| 296 |
|
|
|
326 |
|
| 297 |
327 |
qresult = null;
|
| 298 |
328 |
selectionResult = null;
|
| 299 |
329 |
}
|
| 300 |
|
|
| 301 |
|
|
|
330 |
|
|
331 |
|
| 302 |
332 |
@Override
|
| 303 |
333 |
public List<? extends Match> getMatches() {
|
| 304 |
334 |
if (qresult == null) { // not computed
|
| ... | ... | |
| 312 |
342 |
return new ArrayList<>();
|
| 313 |
343 |
}
|
| 314 |
344 |
}
|
| 315 |
|
|
|
345 |
|
| 316 |
346 |
public int getNMatch() {
|
| 317 |
347 |
try {
|
| 318 |
348 |
return qresult.getNMatch();
|
| ... | ... | |
| 323 |
353 |
return 0;
|
| 324 |
354 |
}
|
| 325 |
355 |
}
|
| 326 |
|
|
|
356 |
|
| 327 |
357 |
// protected void finalize() throws Throwable {
|
| 328 |
358 |
// try {
|
| 329 |
359 |
// //
|
| ... | ... | |
| 338 |
368 |
// super.finalize();
|
| 339 |
369 |
// }
|
| 340 |
370 |
// }
|
| 341 |
|
|
|
371 |
|
| 342 |
372 |
/*
|
| 343 |
373 |
* (non-Javadoc)
|
| 344 |
374 |
* @see
|
| ... | ... | |
| 348 |
378 |
public String getQualifiedCqpId() {
|
| 349 |
379 |
return getMainCorpus().getCqpId() + ":" + this.pID; //$NON-NLS-1$
|
| 350 |
380 |
}
|
| 351 |
|
|
|
381 |
|
| 352 |
382 |
// /**
|
| 353 |
383 |
// * Register to parent.
|
| 354 |
384 |
// *
|
| ... | ... | |
| 381 |
411 |
//
|
| 382 |
412 |
// this._load();
|
| 383 |
413 |
// }
|
| 384 |
|
|
|
414 |
|
| 385 |
415 |
/**
|
| 386 |
416 |
* Gets the query.
|
| 387 |
417 |
*
|
| ... | ... | |
| 390 |
420 |
public IQuery getQuery() {
|
| 391 |
421 |
return pQuery;
|
| 392 |
422 |
}
|
| 393 |
|
|
|
423 |
|
| 394 |
424 |
@Override
|
| 395 |
425 |
public String getResultType() {
|
| 396 |
426 |
return "Corpus";
|
| 397 |
427 |
}
|
| 398 |
|
|
|
428 |
|
| 399 |
429 |
public SelectionResult getSelectionResult() {
|
| 400 |
430 |
return selectionResult;
|
| 401 |
431 |
}
|
| 402 |
|
|
| 403 |
|
|
|
432 |
|
|
433 |
|
| 404 |
434 |
/**
|
| 405 |
435 |
* Returns the number of occurrences in the subcorpus.
|
| 406 |
436 |
*
|
| ... | ... | |
| 414 |
444 |
@Override
|
| 415 |
445 |
public int getSize() throws CqiClientException {
|
| 416 |
446 |
if (!this.hasBeenComputedOnce()) return 0;
|
| 417 |
|
|
|
447 |
|
| 418 |
448 |
if (this.size == -1) {
|
| 419 |
449 |
// Log.finest(TXMCoreMessages.bind(TXMCoreMessages.SUBCORPUS_SIZE, new Object[]{this.pName, "N/A"}));
|
| 420 |
450 |
long start = System.currentTimeMillis();
|
| 421 |
|
|
|
451 |
|
| 422 |
452 |
try {
|
| 423 |
453 |
AbstractCqiClient cqiClient = CorpusManager.getCorpusManager().getCqiClient();
|
| 424 |
454 |
String qid = this.getQualifiedCqpId();
|
| ... | ... | |
| 434 |
464 |
throw new UnexpectedAnswerException();
|
| 435 |
465 |
}
|
| 436 |
466 |
this.size = 0;
|
| 437 |
|
|
|
467 |
|
| 438 |
468 |
for (int i = 0; i < match.length; i++) {
|
| 439 |
469 |
size += matchend[i] - match[i] + 1;
|
| 440 |
470 |
}
|
| ... | ... | |
| 448 |
478 |
}
|
| 449 |
479 |
return this.size;
|
| 450 |
480 |
}
|
| 451 |
|
|
|
481 |
|
| 452 |
482 |
@Override
|
| 453 |
483 |
public int[] getStartLimits(String sup) throws IOException,
|
| 454 |
484 |
CqiServerError, InvalidCqpIdException, CqiClientException {
|
| 455 |
485 |
return this.getMainCorpus().getStartLimits(sup);
|
| 456 |
486 |
}
|
| 457 |
|
|
|
487 |
|
| 458 |
488 |
/*
|
| 459 |
489 |
* (non-Javadoc)
|
| 460 |
490 |
* @see
|
| ... | ... | |
| 466 |
496 |
throws CqiClientException {
|
| 467 |
497 |
return getMainCorpus().getStructuralUnit(name);
|
| 468 |
498 |
}
|
| 469 |
|
|
|
499 |
|
| 470 |
500 |
/*
|
| 471 |
501 |
* (non-Javadoc)
|
| 472 |
502 |
* @see org.txm.searchengine.cqp.corpus.Corpus#getStructuralUnits()
|
| ... | ... | |
| 475 |
505 |
public List<StructuralUnit> getStructuralUnits() throws CqiClientException {
|
| 476 |
506 |
return getMainCorpus().getStructuralUnits();
|
| 477 |
507 |
}
|
| 478 |
|
|
|
508 |
|
| 479 |
509 |
public SelectionResult initSelectionResult(Element e) {
|
| 480 |
|
|
|
510 |
|
| 481 |
511 |
if (e != null) {
|
| 482 |
512 |
selectionResult = new SelectionResult();
|
| 483 |
513 |
NodeList selectionList = e.getElementsByTagName("selection"); //$NON-NLS-1$
|
| 484 |
514 |
for (int i = 0; i < selectionList.getLength();) {
|
| 485 |
515 |
Element selection = (Element) selectionList.item(i); // the selection element
|
| 486 |
|
|
|
516 |
|
| 487 |
517 |
NodeList textList = selection.getElementsByTagName("selText"); // get text ids //$NON-NLS-1$
|
| 488 |
518 |
for (int j = 0; j < textList.getLength(); j++) {
|
| 489 |
519 |
Element text = (Element) textList.item(j);
|
| 490 |
520 |
selectionResult.add(text.getAttribute("id")); //$NON-NLS-1$
|
| 491 |
521 |
}
|
| 492 |
|
|
|
522 |
|
| 493 |
523 |
NodeList critList = selection.getElementsByTagName("selCrit"); // get the criteria //$NON-NLS-1$
|
| 494 |
524 |
for (int j = 0; j < critList.getLength(); j++) {
|
| 495 |
525 |
Element crit = (Element) critList.item(j);
|
| ... | ... | |
| 503 |
533 |
}
|
| 504 |
534 |
return selectionResult;
|
| 505 |
535 |
}
|
| 506 |
|
|
|
536 |
|
| 507 |
537 |
@Override
|
| 508 |
538 |
public boolean loadParameters() throws Exception {
|
| 509 |
539 |
String q = this.getStringParameterValue(TXMPreferences.QUERY);
|
| 510 |
540 |
if (!q.isEmpty()) {
|
| 511 |
541 |
pQuery = new CQLQuery(q);
|
| 512 |
542 |
}
|
| 513 |
|
|
|
543 |
|
| 514 |
544 |
return true;
|
| 515 |
545 |
}
|
| 516 |
|
|
|
546 |
|
| 517 |
547 |
@Override
|
| 518 |
548 |
public boolean saveParameters() throws Exception {
|
| 519 |
549 |
if (pQuery != null) {
|
| ... | ... | |
| 521 |
551 |
}
|
| 522 |
552 |
return true;
|
| 523 |
553 |
}
|
| 524 |
|
|
|
554 |
|
| 525 |
555 |
/**
|
| 526 |
556 |
* Instantiates a new subcorpus.
|
| 527 |
557 |
*
|
| ... | ... | |
| 534 |
564 |
* {@link CQPCorpus#createSubcorpus(CQLQuery, String)}
|
| 535 |
565 |
*/
|
| 536 |
566 |
protected void setParameters(String pID, String name, CQLQuery query) throws InvalidCqpIdException {
|
| 537 |
|
|
|
567 |
|
| 538 |
568 |
this.pID = pID;
|
| 539 |
569 |
this.pQuery = query;
|
| 540 |
570 |
this.setUserName(name);
|
| 541 |
571 |
}
|
| 542 |
|
|
|
572 |
|
| 543 |
573 |
/**
|
| 544 |
574 |
*
|
| 545 |
575 |
* @param cqpId
|
| ... | ... | |
| 547 |
577 |
* @param queryResult
|
| 548 |
578 |
*/
|
| 549 |
579 |
public void setParameters(String cqpId, String name, QueryResult queryResult) {
|
| 550 |
|
|
|
580 |
|
| 551 |
581 |
this.pID = cqpId;
|
| 552 |
582 |
this.setUserName(name);
|
| 553 |
583 |
this.pQuery = queryResult.getQuery();
|
| 554 |
584 |
this.qresult = queryResult;
|
| 555 |
|
|
|
585 |
|
| 556 |
586 |
this.setDirty();
|
| 557 |
587 |
}
|
| 558 |
|
|
|
588 |
|
| 559 |
589 |
/**
|
| 560 |
590 |
* Sets the query to use.
|
| 561 |
591 |
*
|
| ... | ... | |
| 564 |
594 |
public void setQuery(CQLQuery query) {
|
| 565 |
595 |
this.pQuery = query;
|
| 566 |
596 |
}
|
| 567 |
|
|
| 568 |
|
|
|
597 |
|
|
598 |
|
| 569 |
599 |
// TODO merge SelectionResult and Subcorpus ?
|
| 570 |
600 |
public void setSelectionResult(SelectionResult selectionResult) {
|
| 571 |
601 |
this.selectionResult = selectionResult;
|
| ... | ... | |
| 597 |
627 |
// getSelfElement().appendChild(selectionElem);// append selection elem to self
|
| 598 |
628 |
// }
|
| 599 |
629 |
}
|
| 600 |
|
|
|
630 |
|
| 601 |
631 |
/*
|
| 602 |
632 |
* (non-Javadoc)
|
| 603 |
633 |
* @see java.lang.Object#toString()
|
| ... | ... | |
| 606 |
636 |
public String toString() {
|
| 607 |
637 |
return this.getName();
|
| 608 |
638 |
}
|
| 609 |
|
|
|
639 |
|
| 610 |
640 |
@Override
|
| 611 |
641 |
@Deprecated
|
| 612 |
642 |
public boolean _toTxt(File outfile, String encoding, String colseparator, String txtseparator) throws Exception {
|
| 613 |
643 |
return false;
|
| 614 |
644 |
}
|
| 615 |
|
|
|
645 |
|
| 616 |
646 |
@Override
|
| 617 |
647 |
public void setIsModified(boolean b) {
|
| 618 |
648 |
// nothing
|
| 619 |
649 |
}
|
| 620 |
|
|
|
650 |
|
| 621 |
651 |
@Override
|
| 622 |
652 |
public CorpusBuild getRootCorpusBuild() {
|
| 623 |
653 |
return getMainCorpus();
|
| 624 |
654 |
}
|
| 625 |
|
|
|
655 |
|
| 626 |
656 |
/**
|
| 627 |
657 |
* Return the CQP START positions of the main corpus texts
|
| 628 |
658 |
*/
|
| ... | ... | |
| 633 |
663 |
}
|
| 634 |
664 |
return textLimits;
|
| 635 |
665 |
}
|
| 636 |
|
|
|
666 |
|
| 637 |
667 |
/**
|
| 638 |
668 |
* Return the CQP END positions of the main corpus texts
|
| 639 |
669 |
*/
|
| ... | ... | |
| 648 |
678 |
}
|
| 649 |
679 |
return textEndLimits;
|
| 650 |
680 |
}
|
| 651 |
|
|
|
681 |
|
| 652 |
682 |
/**
|
| 653 |
683 |
* Gets the nb texts.
|
| 654 |
684 |
*
|
| ... | ... | |
| 660 |
690 |
@Override
|
| 661 |
691 |
public int getNbTexts() throws CqiClientException, IOException, CqiServerError {
|
| 662 |
692 |
if (nbtext == -1) {
|
| 663 |
|
|
|
693 |
|
| 664 |
694 |
List<Integer> structsIncorpus = getTextNumberInCorpus();
|
| 665 |
|
|
|
695 |
|
| 666 |
696 |
nbtext = structsIncorpus.size();
|
| 667 |
697 |
}
|
| 668 |
698 |
return nbtext;
|
| 669 |
699 |
}
|
| 670 |
|
|
|
700 |
|
| 671 |
701 |
private List<Integer> getTextNumberInCorpus() throws CqiClientException, IOException, CqiServerError {
|
| 672 |
|
|
|
702 |
|
| 673 |
703 |
StructuralUnit text_su = this.getStructuralUnit("text"); //$NON-NLS-1$
|
| 674 |
704 |
StructuralUnitProperty text_id_sup = text_su.getProperty("id"); //$NON-NLS-1$
|
| 675 |
705 |
nbtext = CorpusManager.getCorpusManager().getCqiClient().attributeSize(text_id_sup.getQualifiedName());
|
| 676 |
|
|
|
706 |
|
| 677 |
707 |
int[] structs = new int[nbtext];
|
| 678 |
708 |
int[][] structs_positions = new int[structs.length][2];
|
| 679 |
709 |
for (int i = 0; i < nbtext; i++) {
|
| 680 |
710 |
structs[i] = i;
|
| 681 |
711 |
structs_positions[i] = CorpusManager.getCorpusManager().getCqiClient().struc2Cpos(text_id_sup.getQualifiedName(), i);
|
| 682 |
712 |
}
|
| 683 |
|
|
|
713 |
|
| 684 |
714 |
List<Integer> structsIncorpus = new ArrayList<>();
|
| 685 |
715 |
// filter structs with matches
|
| 686 |
716 |
List<? extends Match> matches = this.getMatches();
|
| ... | ... | |
| 701 |
731 |
}
|
| 702 |
732 |
return structsIncorpus;
|
| 703 |
733 |
}
|
| 704 |
|
|
|
734 |
|
| 705 |
735 |
/**
|
| 706 |
736 |
*
|
| 707 |
737 |
* @return the text_id values of the CQP corpus ordered by position
|
| ... | ... | |
| 711 |
741 |
*/
|
| 712 |
742 |
@Override
|
| 713 |
743 |
public String[] getCorpusTextIdsList() throws CqiClientException, IOException, CqiServerError {
|
| 714 |
|
|
|
744 |
|
| 715 |
745 |
if (cqpTextIDS == null) {
|
| 716 |
746 |
StructuralUnit text_su = this.getStructuralUnit("text"); //$NON-NLS-1$
|
| 717 |
747 |
StructuralUnitProperty text_id_sup = text_su.getProperty("id"); //$NON-NLS-1$
|
| 718 |
|
|
|
748 |
|
| 719 |
749 |
List<Integer> list = getTextNumberInCorpus();
|
| 720 |
750 |
int[] structs = new int[list.size()];
|
| 721 |
751 |
for (int i = 0; i < list.size(); i++) {
|
| 722 |
752 |
structs[i] = list.get(i);
|
| 723 |
753 |
}
|
| 724 |
|
|
|
754 |
|
| 725 |
755 |
cqpTextIDS = CorpusManager.getCorpusManager().getCqiClient().struc2Str(text_id_sup.getQualifiedName(), structs);
|
| 726 |
756 |
}
|
| 727 |
757 |
return cqpTextIDS;
|
| 728 |
758 |
}
|
| 729 |
|
|
|
759 |
|
| 730 |
760 |
/**
|
| 731 |
761 |
* Gets the texts ids and order number in corpus.
|
| 732 |
762 |
*
|
| ... | ... | |
| 744 |
774 |
for (int i = 0; i < list.size(); i++) {
|
| 745 |
775 |
structs[i] = list.get(i);
|
| 746 |
776 |
}
|
| 747 |
|
|
|
777 |
|
| 748 |
778 |
StructuralUnit text_su = this.getStructuralUnit("text"); //$NON-NLS-1$
|
| 749 |
779 |
StructuralUnitProperty text_id_sup = text_su.getProperty("id"); //$NON-NLS-1$
|
| 750 |
|
|
|
780 |
|
| 751 |
781 |
String[] ids = CorpusManager.getCorpusManager().getCqiClient().struc2Str(text_id_sup.getQualifiedName(), structs);
|
| 752 |
782 |
for (int i = 0; i < ids.length; i++) {
|
| 753 |
783 |
textids.put(ids[i], structs[i]);
|
| ... | ... | |
| 755 |
785 |
}
|
| 756 |
786 |
return textids;
|
| 757 |
787 |
}
|
| 758 |
|
|
|
788 |
|
| 759 |
789 |
@Override
|
| 760 |
790 |
public String getName() {
|
| 761 |
791 |
return this.getCorpusParent().getSimpleName() + TXMPreferences.PARENT_NAME_SEPARATOR + this.getSimpleName();
|
| 762 |
792 |
}
|
| 763 |
|
|
|
793 |
|
| 764 |
794 |
@Override
|
| 765 |
795 |
public String getDetails() {
|
| 766 |
796 |
try {
|
| ... | ... | |
| 777 |
807 |
|
| 778 |
808 |
@Override
|
| 779 |
809 |
public List<?> isBuildValid() {
|
| 780 |
|
|
|
810 |
|
| 781 |
811 |
if (this.getParent() != null) return new LinkedList<String>(); // ok
|
| 782 |
|
|
|
812 |
|
| 783 |
813 |
return Arrays.asList("No parent corpus result");
|
| 784 |
814 |
}
|
| 785 |
815 |
|
| 786 |
816 |
@Override
|
| 787 |
817 |
public String isReady() {
|
| 788 |
|
|
|
818 |
|
| 789 |
819 |
if (!hasBeenComputedOnce()) return "the subcorpus is not a computed TXMResult";
|
| 790 |
|
|
|
820 |
|
| 791 |
821 |
try {
|
| 792 |
822 |
int s = this.getSize();
|
| 793 |
823 |
if (s >= 0) return null;
|