33 |
33 |
import java.util.ArrayList;
|
34 |
34 |
import java.util.Arrays;
|
35 |
35 |
import java.util.HashMap;
|
|
36 |
import java.util.HashSet;
|
36 |
37 |
import java.util.LinkedList;
|
37 |
38 |
import java.util.List;
|
38 |
39 |
import java.util.UUID;
|
... | ... | |
50 |
51 |
import org.txm.searchengine.core.Selection;
|
51 |
52 |
import org.txm.searchengine.core.messages.SearchEngineCoreMessages;
|
52 |
53 |
import org.txm.searchengine.cqp.AbstractCqiClient;
|
|
54 |
import org.txm.searchengine.cqp.CQPPreferences;
|
53 |
55 |
import org.txm.searchengine.cqp.CQPSearchEngine;
|
54 |
56 |
import org.txm.searchengine.cqp.NetCqiClient;
|
55 |
57 |
import org.txm.searchengine.cqp.clientExceptions.CqiClientException;
|
... | ... | |
71 |
73 |
* @author jmague
|
72 |
74 |
*/
|
73 |
75 |
public class Subcorpus extends CQPCorpus {
|
74 |
|
|
|
76 |
|
75 |
77 |
/**
|
76 |
78 |
* Query used to build the corpus.
|
77 |
79 |
*/
|
78 |
80 |
@Parameter(key = TXMPreferences.QUERY)
|
79 |
81 |
protected IQuery pQuery;
|
80 |
|
|
|
82 |
|
|
83 |
/**
|
|
84 |
* Query used to build the corpus.
|
|
85 |
*/
|
|
86 |
@Parameter(key = CQPPreferences.MATCHINGSTRATEGY)
|
|
87 |
protected String pMatchingStrategy;
|
|
88 |
|
81 |
89 |
protected Selection qresult;
|
82 |
|
|
|
90 |
|
83 |
91 |
protected SelectionResult selectionResult;
|
84 |
|
|
|
92 |
|
85 |
93 |
/**
|
86 |
94 |
* CQP text ids cache
|
87 |
95 |
*/
|
88 |
96 |
protected String[] cqpTextIDS = null;
|
89 |
97 |
|
|
98 |
public static final HashSet<String> matchingStrategies = new HashSet<String>(Arrays.asList("shortest","standard","longest","traditional"));
|
90 |
99 |
|
91 |
|
|
92 |
100 |
/**
|
93 |
101 |
*
|
94 |
102 |
* @param corpus
|
... | ... | |
96 |
104 |
public Subcorpus(CQPCorpus corpus) {
|
97 |
105 |
super(corpus);
|
98 |
106 |
}
|
99 |
|
|
|
107 |
|
100 |
108 |
/**
|
101 |
109 |
*
|
102 |
110 |
* @param partition
|
... | ... | |
104 |
112 |
public Subcorpus(Partition partition) {
|
105 |
113 |
super(partition);
|
106 |
114 |
}
|
107 |
|
|
|
115 |
|
108 |
116 |
/**
|
109 |
117 |
*
|
110 |
118 |
* @param result parent result
|
... | ... | |
114 |
122 |
this.pQuery = result.getQuery();
|
115 |
123 |
this.setUserName(pQuery.toString());
|
116 |
124 |
}
|
117 |
|
|
|
125 |
|
118 |
126 |
/**
|
119 |
127 |
*
|
120 |
128 |
* @param parametersNodePath
|
... | ... | |
122 |
130 |
public Subcorpus(String parametersNodePath) {
|
123 |
131 |
super(parametersNodePath);
|
124 |
132 |
}
|
125 |
|
|
|
133 |
|
126 |
134 |
/**
|
127 |
135 |
* Instantiates a new subcorpus.
|
128 |
136 |
*
|
... | ... | |
133 |
141 |
*/
|
134 |
142 |
@Override
|
135 |
143 |
protected boolean __compute(TXMProgressMonitor monitor) throws Exception {
|
136 |
|
|
|
144 |
|
137 |
145 |
if (this.getParent() instanceof QueryBasedTXMResult) { // update the pQuery
|
138 |
146 |
this.pQuery = ((QueryBasedTXMResult)this.getParent()).getQuery();
|
139 |
147 |
this.setName(pQuery.getName());
|
140 |
148 |
}
|
141 |
|
|
|
149 |
|
142 |
150 |
if (pID == null || pID.length() == 0) {
|
143 |
151 |
pID = subcorpusNamePrefix + getNextSubcorpusCounter().toString();
|
144 |
152 |
}
|
145 |
|
|
|
153 |
|
146 |
154 |
if (this.pQuery != null) {
|
147 |
155 |
this.qresult = null; // reset
|
148 |
156 |
String parent_id = this.getCorpusParent().getQualifiedCqpId();
|
... | ... | |
150 |
158 |
Log.warning(NLS.bind("Error: CQL length limit reached ({0}) with {1} length.", CQLQuery.MAX_CQL_LENGTH, pQuery.getQueryString().length()));
|
151 |
159 |
return false;
|
152 |
160 |
}
|
153 |
|
|
|
161 |
|
|
162 |
AbstractCqiClient CQI = CorpusManager.getCorpusManager().getCqiClient();
|
154 |
163 |
String q = this.pQuery.getQueryString();
|
155 |
164 |
if (q.matches("undump \"[^\"]+\";")) { // undump query
|
156 |
165 |
String path = q.substring(8, q.length() - 2);
|
... | ... | |
158 |
167 |
Log.severe(NLS.bind("Error: could not compute subcorpus from the dump file: {0}.", path));
|
159 |
168 |
return false;
|
160 |
169 |
}
|
161 |
|
CorpusManager.getCorpusManager().getCqiClient().query(this.getCorpusParent().getQualifiedCqpId()+";");
|
162 |
|
CorpusManager.getCorpusManager().getCqiClient().query("undump "+this.pID+" < "+q.substring(7));
|
|
170 |
CQI.query(this.getCorpusParent().getQualifiedCqpId()+";");
|
|
171 |
CQI.query("undump "+this.pID+" < "+q.substring(7));
|
163 |
172 |
} else if (q.matches("(union|join|intersect|inter|difference|diff) [^ ]+ [^ ]+;")) {
|
164 |
|
CorpusManager.getCorpusManager().getCqiClient().query(this.getCorpusParent().getQualifiedCqpId()+";");
|
165 |
|
CorpusManager.getCorpusManager().getCqiClient().query(this.pID+" = "+q);
|
|
173 |
CQI.query(this.getCorpusParent().getQualifiedCqpId()+";");
|
|
174 |
CQI.query(this.pID+" = "+q);
|
166 |
175 |
} else {
|
167 |
|
CorpusManager.getCorpusManager().getCqiClient().cqpQuery(parent_id, this.pID, CQLQuery.fixQuery(q, this.getLang()));
|
168 |
|
// if (q.contains("@[") || q.contains("@\"")) {
|
169 |
|
// Log.fine("Target detected in "+q+", recentering on target.");
|
170 |
|
// CorpusManager.getCorpusManager().getCqiClient().cqpQuery(this.getQualifiedCqpId(), this.pID, "<target>[]");
|
171 |
|
// }
|
|
176 |
String previousMatchingStrategy = null;
|
|
177 |
if (matchingStrategies.contains(pMatchingStrategy)) { // set MatchingStrategy for this query
|
|
178 |
previousMatchingStrategy = CQI.getOption("MatchingStrategy");
|
|
179 |
if (previousMatchingStrategy.equals(pMatchingStrategy)) {
|
|
180 |
previousMatchingStrategy = null; // finally no need to change strategy
|
|
181 |
} else { // ok we need to change strategy
|
|
182 |
CQI.setOption("MatchingStrategy", pMatchingStrategy);
|
|
183 |
}
|
|
184 |
|
|
185 |
}
|
|
186 |
|
|
187 |
CQI.cqpQuery(parent_id, this.pID, CQLQuery.fixQuery(q, this.getLang()));
|
|
188 |
if (previousMatchingStrategy != null) { // restore MatchingStrategy for this query
|
|
189 |
CQI.setOption("MatchingStrategy", previousMatchingStrategy);
|
|
190 |
}
|
|
191 |
// if (q.contains("@[") || q.contains("@\"")) {
|
|
192 |
// Log.fine("Target detected in "+q+", recentering on target.");
|
|
193 |
// CorpusManager.getCorpusManager().getCqiClient().cqpQuery(this.getQualifiedCqpId(), this.pID, "<target>[]");
|
|
194 |
// }
|
172 |
195 |
}
|
173 |
|
|
|
196 |
|
174 |
197 |
//this.qresult = this.pQuery.getSearchEngine().query(this.getCorpusParent(), this.pQuery, this.getUserName(), true);
|
175 |
|
|
|
198 |
|
176 |
199 |
this.qresult = new QueryResult(this.pID, this.getUserName(), this.getCorpusParent(), (CQLQuery) this.pQuery); // getCorpusParent().query(pQuery, this.pID, true);
|
177 |
200 |
// queryResult = new QueryResult(queryResultId, queryResultName, this, query);
|
178 |
201 |
}
|
179 |
|
|
|
202 |
|
180 |
203 |
cqpTextIDS = null; // reset text ids cache
|
181 |
|
|
|
204 |
|
182 |
205 |
return qresult != null;
|
183 |
206 |
}
|
|
207 |
|
|
208 |
public void setMatchingStrategy(String strategy) {
|
|
209 |
this.pMatchingStrategy = strategy;
|
|
210 |
}
|
184 |
211 |
|
185 |
|
|
|
212 |
public String getMatchingStrategy() {
|
|
213 |
return this.pMatchingStrategy;
|
|
214 |
}
|
|
215 |
|
186 |
216 |
@Override
|
187 |
217 |
public String getComputingStartMessage() {
|
188 |
|
|
|
218 |
|
189 |
219 |
if (this.pQuery == null) {
|
190 |
220 |
return TXMCoreMessages.bind(SearchEngineCoreMessages.info_computingTheP0SubCorpusOfP1CommaQueryP2, this.getSimpleName(), this.getParent().getName(), "<no query>");
|
191 |
221 |
}
|
192 |
|
|
|
222 |
|
193 |
223 |
// created with structure, property and value(s)
|
194 |
224 |
if (this.pQuery instanceof SubcorpusCQLQuery) {
|
195 |
225 |
SubcorpusCQLQuery query = (SubcorpusCQLQuery) this.pQuery;
|
... | ... | |
201 |
231 |
return TXMCoreMessages.bind(SearchEngineCoreMessages.info_computingTheP0SubCorpusOfP1CommaQueryP2, this.getSimpleName(), this.getParent().getName(), this.getQuery().asString());
|
202 |
232 |
}
|
203 |
233 |
}
|
204 |
|
|
|
234 |
|
205 |
235 |
/*
|
206 |
236 |
* retro compatibility method for import.xml file
|
207 |
237 |
* @see org.txm.searchengine.cqp.corpus.Corpus#getLocale()
|
... | ... | |
212 |
242 |
this.setUserName(e.getAttribute(NAME));
|
213 |
243 |
this.pID = "S" + getNextSubcorpusCounter();
|
214 |
244 |
this.pQuery = new CQLQuery(e.getAttribute("query")); //$NON-NLS-1$
|
215 |
|
|
|
245 |
|
216 |
246 |
NodeList subcorpusElems = e.getElementsByTagName("subcorpus"); //$NON-NLS-1$
|
217 |
247 |
for (int i = 0; i < subcorpusElems.getLength(); i++) {
|
218 |
248 |
Element subcorpusElem = (Element) subcorpusElems.item(i);
|
... | ... | |
224 |
254 |
Log.warning(TXMCoreMessages.bind(TXMCoreMessages.failedToRestoreTheP0SubcorpusOfP1P2, this.pID, subcorpusElem.getAttribute("name"), ex)); //$NON-NLS-1$
|
225 |
255 |
}
|
226 |
256 |
}
|
227 |
|
|
|
257 |
|
228 |
258 |
NodeList partitionElems = e.getElementsByTagName("partition"); //$NON-NLS-1$
|
229 |
259 |
for (int i = 0; i < partitionElems.getLength(); i++) {
|
230 |
260 |
Element partitionElem = (Element) partitionElems.item(i);
|
231 |
261 |
String name = partitionElem.getAttribute("name"); //$NON-NLS-1$
|
232 |
262 |
List<String> names = new ArrayList<>();
|
233 |
263 |
List<String> queries = new ArrayList<>();
|
234 |
|
|
|
264 |
|
235 |
265 |
NodeList partElems = partitionElem.getElementsByTagName("part"); //$NON-NLS-1$
|
236 |
266 |
for (int j = 0; j < partElems.getLength(); j++) {
|
237 |
267 |
Element part = (Element) partElems.item(j);
|
... | ... | |
255 |
285 |
// return super.load();
|
256 |
286 |
return true;
|
257 |
287 |
}
|
258 |
|
|
|
288 |
|
259 |
289 |
@Override
|
260 |
290 |
public boolean canCompute() {
|
261 |
291 |
return getUserName() != null && getUserName().length() > 0 &&
|
262 |
292 |
((pQuery != null && pQuery.getQueryString().length() > 0) || qresult != null);
|
263 |
293 |
}
|
264 |
|
|
|
294 |
|
265 |
295 |
/*
|
266 |
296 |
* (non-Javadoc)
|
267 |
297 |
* @see
|
... | ... | |
274 |
304 |
throw new InvalidCqpIdException(TXMCoreMessages.bind(CQPSearchEngineCoreMessages.p0IsNotAValidCQPIDForASubcorpusItMustBeAnUppercaseCharacterFollowedByLowercaseCharacters, pID));
|
275 |
305 |
return true;
|
276 |
306 |
}
|
277 |
|
|
|
307 |
|
278 |
308 |
@Override
|
279 |
309 |
public void clean() {
|
280 |
310 |
super.clean();
|
281 |
|
|
|
311 |
|
282 |
312 |
// nothing to do if the corpus has not yet been computed
|
283 |
313 |
if (this.hasBeenComputedOnce()) {
|
284 |
314 |
try {
|
... | ... | |
293 |
323 |
Log.printStackTrace(e);
|
294 |
324 |
}
|
295 |
325 |
}
|
296 |
|
|
|
326 |
|
297 |
327 |
qresult = null;
|
298 |
328 |
selectionResult = null;
|
299 |
329 |
}
|
300 |
|
|
301 |
|
|
|
330 |
|
|
331 |
|
302 |
332 |
@Override
|
303 |
333 |
public List<? extends Match> getMatches() {
|
304 |
334 |
if (qresult == null) { // not computed
|
... | ... | |
312 |
342 |
return new ArrayList<>();
|
313 |
343 |
}
|
314 |
344 |
}
|
315 |
|
|
|
345 |
|
316 |
346 |
public int getNMatch() {
|
317 |
347 |
try {
|
318 |
348 |
return qresult.getNMatch();
|
... | ... | |
323 |
353 |
return 0;
|
324 |
354 |
}
|
325 |
355 |
}
|
326 |
|
|
|
356 |
|
327 |
357 |
// protected void finalize() throws Throwable {
|
328 |
358 |
// try {
|
329 |
359 |
// //
|
... | ... | |
338 |
368 |
// super.finalize();
|
339 |
369 |
// }
|
340 |
370 |
// }
|
341 |
|
|
|
371 |
|
342 |
372 |
/*
|
343 |
373 |
* (non-Javadoc)
|
344 |
374 |
* @see
|
... | ... | |
348 |
378 |
public String getQualifiedCqpId() {
|
349 |
379 |
return getMainCorpus().getCqpId() + ":" + this.pID; //$NON-NLS-1$
|
350 |
380 |
}
|
351 |
|
|
|
381 |
|
352 |
382 |
// /**
|
353 |
383 |
// * Register to parent.
|
354 |
384 |
// *
|
... | ... | |
381 |
411 |
//
|
382 |
412 |
// this._load();
|
383 |
413 |
// }
|
384 |
|
|
|
414 |
|
385 |
415 |
/**
|
386 |
416 |
* Gets the query.
|
387 |
417 |
*
|
... | ... | |
390 |
420 |
public IQuery getQuery() {
|
391 |
421 |
return pQuery;
|
392 |
422 |
}
|
393 |
|
|
|
423 |
|
394 |
424 |
@Override
|
395 |
425 |
public String getResultType() {
|
396 |
426 |
return "Corpus";
|
397 |
427 |
}
|
398 |
|
|
|
428 |
|
399 |
429 |
public SelectionResult getSelectionResult() {
|
400 |
430 |
return selectionResult;
|
401 |
431 |
}
|
402 |
|
|
403 |
|
|
|
432 |
|
|
433 |
|
404 |
434 |
/**
|
405 |
435 |
* Returns the number of occurrences in the subcorpus.
|
406 |
436 |
*
|
... | ... | |
414 |
444 |
@Override
|
415 |
445 |
public int getSize() throws CqiClientException {
|
416 |
446 |
if (!this.hasBeenComputedOnce()) return 0;
|
417 |
|
|
|
447 |
|
418 |
448 |
if (this.size == -1) {
|
419 |
449 |
// Log.finest(TXMCoreMessages.bind(TXMCoreMessages.SUBCORPUS_SIZE, new Object[]{this.pName, "N/A"}));
|
420 |
450 |
long start = System.currentTimeMillis();
|
421 |
|
|
|
451 |
|
422 |
452 |
try {
|
423 |
453 |
AbstractCqiClient cqiClient = CorpusManager.getCorpusManager().getCqiClient();
|
424 |
454 |
String qid = this.getQualifiedCqpId();
|
... | ... | |
434 |
464 |
throw new UnexpectedAnswerException();
|
435 |
465 |
}
|
436 |
466 |
this.size = 0;
|
437 |
|
|
|
467 |
|
438 |
468 |
for (int i = 0; i < match.length; i++) {
|
439 |
469 |
size += matchend[i] - match[i] + 1;
|
440 |
470 |
}
|
... | ... | |
448 |
478 |
}
|
449 |
479 |
return this.size;
|
450 |
480 |
}
|
451 |
|
|
|
481 |
|
452 |
482 |
@Override
|
453 |
483 |
public int[] getStartLimits(String sup) throws IOException,
|
454 |
484 |
CqiServerError, InvalidCqpIdException, CqiClientException {
|
455 |
485 |
return this.getMainCorpus().getStartLimits(sup);
|
456 |
486 |
}
|
457 |
|
|
|
487 |
|
458 |
488 |
/*
|
459 |
489 |
* (non-Javadoc)
|
460 |
490 |
* @see
|
... | ... | |
466 |
496 |
throws CqiClientException {
|
467 |
497 |
return getMainCorpus().getStructuralUnit(name);
|
468 |
498 |
}
|
469 |
|
|
|
499 |
|
470 |
500 |
/*
|
471 |
501 |
* (non-Javadoc)
|
472 |
502 |
* @see org.txm.searchengine.cqp.corpus.Corpus#getStructuralUnits()
|
... | ... | |
475 |
505 |
public List<StructuralUnit> getStructuralUnits() throws CqiClientException {
|
476 |
506 |
return getMainCorpus().getStructuralUnits();
|
477 |
507 |
}
|
478 |
|
|
|
508 |
|
479 |
509 |
public SelectionResult initSelectionResult(Element e) {
|
480 |
|
|
|
510 |
|
481 |
511 |
if (e != null) {
|
482 |
512 |
selectionResult = new SelectionResult();
|
483 |
513 |
NodeList selectionList = e.getElementsByTagName("selection"); //$NON-NLS-1$
|
484 |
514 |
for (int i = 0; i < selectionList.getLength();) {
|
485 |
515 |
Element selection = (Element) selectionList.item(i); // the selection element
|
486 |
|
|
|
516 |
|
487 |
517 |
NodeList textList = selection.getElementsByTagName("selText"); // get text ids //$NON-NLS-1$
|
488 |
518 |
for (int j = 0; j < textList.getLength(); j++) {
|
489 |
519 |
Element text = (Element) textList.item(j);
|
490 |
520 |
selectionResult.add(text.getAttribute("id")); //$NON-NLS-1$
|
491 |
521 |
}
|
492 |
|
|
|
522 |
|
493 |
523 |
NodeList critList = selection.getElementsByTagName("selCrit"); // get the criteria //$NON-NLS-1$
|
494 |
524 |
for (int j = 0; j < critList.getLength(); j++) {
|
495 |
525 |
Element crit = (Element) critList.item(j);
|
... | ... | |
503 |
533 |
}
|
504 |
534 |
return selectionResult;
|
505 |
535 |
}
|
506 |
|
|
|
536 |
|
507 |
537 |
@Override
|
508 |
538 |
public boolean loadParameters() throws Exception {
|
509 |
539 |
String q = this.getStringParameterValue(TXMPreferences.QUERY);
|
510 |
540 |
if (!q.isEmpty()) {
|
511 |
541 |
pQuery = new CQLQuery(q);
|
512 |
542 |
}
|
513 |
|
|
|
543 |
|
514 |
544 |
return true;
|
515 |
545 |
}
|
516 |
|
|
|
546 |
|
517 |
547 |
@Override
|
518 |
548 |
public boolean saveParameters() throws Exception {
|
519 |
549 |
if (pQuery != null) {
|
... | ... | |
521 |
551 |
}
|
522 |
552 |
return true;
|
523 |
553 |
}
|
524 |
|
|
|
554 |
|
525 |
555 |
/**
|
526 |
556 |
* Instantiates a new subcorpus.
|
527 |
557 |
*
|
... | ... | |
534 |
564 |
* {@link CQPCorpus#createSubcorpus(CQLQuery, String)}
|
535 |
565 |
*/
|
536 |
566 |
protected void setParameters(String pID, String name, CQLQuery query) throws InvalidCqpIdException {
|
537 |
|
|
|
567 |
|
538 |
568 |
this.pID = pID;
|
539 |
569 |
this.pQuery = query;
|
540 |
570 |
this.setUserName(name);
|
541 |
571 |
}
|
542 |
|
|
|
572 |
|
543 |
573 |
/**
|
544 |
574 |
*
|
545 |
575 |
* @param cqpId
|
... | ... | |
547 |
577 |
* @param queryResult
|
548 |
578 |
*/
|
549 |
579 |
public void setParameters(String cqpId, String name, QueryResult queryResult) {
|
550 |
|
|
|
580 |
|
551 |
581 |
this.pID = cqpId;
|
552 |
582 |
this.setUserName(name);
|
553 |
583 |
this.pQuery = queryResult.getQuery();
|
554 |
584 |
this.qresult = queryResult;
|
555 |
|
|
|
585 |
|
556 |
586 |
this.setDirty();
|
557 |
587 |
}
|
558 |
|
|
|
588 |
|
559 |
589 |
/**
|
560 |
590 |
* Sets the query to use.
|
561 |
591 |
*
|
... | ... | |
564 |
594 |
public void setQuery(CQLQuery query) {
|
565 |
595 |
this.pQuery = query;
|
566 |
596 |
}
|
567 |
|
|
568 |
|
|
|
597 |
|
|
598 |
|
569 |
599 |
// TODO merge SelectionResult and Subcorpus ?
|
570 |
600 |
public void setSelectionResult(SelectionResult selectionResult) {
|
571 |
601 |
this.selectionResult = selectionResult;
|
... | ... | |
597 |
627 |
// getSelfElement().appendChild(selectionElem);// append selection elem to self
|
598 |
628 |
// }
|
599 |
629 |
}
|
600 |
|
|
|
630 |
|
601 |
631 |
/*
|
602 |
632 |
* (non-Javadoc)
|
603 |
633 |
* @see java.lang.Object#toString()
|
... | ... | |
606 |
636 |
public String toString() {
|
607 |
637 |
return this.getName();
|
608 |
638 |
}
|
609 |
|
|
|
639 |
|
610 |
640 |
@Override
|
611 |
641 |
@Deprecated
|
612 |
642 |
public boolean _toTxt(File outfile, String encoding, String colseparator, String txtseparator) throws Exception {
|
613 |
643 |
return false;
|
614 |
644 |
}
|
615 |
|
|
|
645 |
|
616 |
646 |
@Override
|
617 |
647 |
public void setIsModified(boolean b) {
|
618 |
648 |
// nothing
|
619 |
649 |
}
|
620 |
|
|
|
650 |
|
621 |
651 |
@Override
|
622 |
652 |
public CorpusBuild getRootCorpusBuild() {
|
623 |
653 |
return getMainCorpus();
|
624 |
654 |
}
|
625 |
|
|
|
655 |
|
626 |
656 |
/**
|
627 |
657 |
* Return the CQP START positions of the main corpus texts
|
628 |
658 |
*/
|
... | ... | |
633 |
663 |
}
|
634 |
664 |
return textLimits;
|
635 |
665 |
}
|
636 |
|
|
|
666 |
|
637 |
667 |
/**
|
638 |
668 |
* Return the CQP END positions of the main corpus texts
|
639 |
669 |
*/
|
... | ... | |
648 |
678 |
}
|
649 |
679 |
return textEndLimits;
|
650 |
680 |
}
|
651 |
|
|
|
681 |
|
652 |
682 |
/**
|
653 |
683 |
* Gets the nb texts.
|
654 |
684 |
*
|
... | ... | |
660 |
690 |
@Override
|
661 |
691 |
public int getNbTexts() throws CqiClientException, IOException, CqiServerError {
|
662 |
692 |
if (nbtext == -1) {
|
663 |
|
|
|
693 |
|
664 |
694 |
List<Integer> structsIncorpus = getTextNumberInCorpus();
|
665 |
|
|
|
695 |
|
666 |
696 |
nbtext = structsIncorpus.size();
|
667 |
697 |
}
|
668 |
698 |
return nbtext;
|
669 |
699 |
}
|
670 |
|
|
|
700 |
|
671 |
701 |
private List<Integer> getTextNumberInCorpus() throws CqiClientException, IOException, CqiServerError {
|
672 |
|
|
|
702 |
|
673 |
703 |
StructuralUnit text_su = this.getStructuralUnit("text"); //$NON-NLS-1$
|
674 |
704 |
StructuralUnitProperty text_id_sup = text_su.getProperty("id"); //$NON-NLS-1$
|
675 |
705 |
nbtext = CorpusManager.getCorpusManager().getCqiClient().attributeSize(text_id_sup.getQualifiedName());
|
676 |
|
|
|
706 |
|
677 |
707 |
int[] structs = new int[nbtext];
|
678 |
708 |
int[][] structs_positions = new int[structs.length][2];
|
679 |
709 |
for (int i = 0; i < nbtext; i++) {
|
680 |
710 |
structs[i] = i;
|
681 |
711 |
structs_positions[i] = CorpusManager.getCorpusManager().getCqiClient().struc2Cpos(text_id_sup.getQualifiedName(), i);
|
682 |
712 |
}
|
683 |
|
|
|
713 |
|
684 |
714 |
List<Integer> structsIncorpus = new ArrayList<>();
|
685 |
715 |
// filter structs with matches
|
686 |
716 |
List<? extends Match> matches = this.getMatches();
|
... | ... | |
701 |
731 |
}
|
702 |
732 |
return structsIncorpus;
|
703 |
733 |
}
|
704 |
|
|
|
734 |
|
705 |
735 |
/**
|
706 |
736 |
*
|
707 |
737 |
* @return the text_id values of the CQP corpus ordered by position
|
... | ... | |
711 |
741 |
*/
|
712 |
742 |
@Override
|
713 |
743 |
public String[] getCorpusTextIdsList() throws CqiClientException, IOException, CqiServerError {
|
714 |
|
|
|
744 |
|
715 |
745 |
if (cqpTextIDS == null) {
|
716 |
746 |
StructuralUnit text_su = this.getStructuralUnit("text"); //$NON-NLS-1$
|
717 |
747 |
StructuralUnitProperty text_id_sup = text_su.getProperty("id"); //$NON-NLS-1$
|
718 |
|
|
|
748 |
|
719 |
749 |
List<Integer> list = getTextNumberInCorpus();
|
720 |
750 |
int[] structs = new int[list.size()];
|
721 |
751 |
for (int i = 0; i < list.size(); i++) {
|
722 |
752 |
structs[i] = list.get(i);
|
723 |
753 |
}
|
724 |
|
|
|
754 |
|
725 |
755 |
cqpTextIDS = CorpusManager.getCorpusManager().getCqiClient().struc2Str(text_id_sup.getQualifiedName(), structs);
|
726 |
756 |
}
|
727 |
757 |
return cqpTextIDS;
|
728 |
758 |
}
|
729 |
|
|
|
759 |
|
730 |
760 |
/**
|
731 |
761 |
* Gets the texts ids and order number in corpus.
|
732 |
762 |
*
|
... | ... | |
744 |
774 |
for (int i = 0; i < list.size(); i++) {
|
745 |
775 |
structs[i] = list.get(i);
|
746 |
776 |
}
|
747 |
|
|
|
777 |
|
748 |
778 |
StructuralUnit text_su = this.getStructuralUnit("text"); //$NON-NLS-1$
|
749 |
779 |
StructuralUnitProperty text_id_sup = text_su.getProperty("id"); //$NON-NLS-1$
|
750 |
|
|
|
780 |
|
751 |
781 |
String[] ids = CorpusManager.getCorpusManager().getCqiClient().struc2Str(text_id_sup.getQualifiedName(), structs);
|
752 |
782 |
for (int i = 0; i < ids.length; i++) {
|
753 |
783 |
textids.put(ids[i], structs[i]);
|
... | ... | |
755 |
785 |
}
|
756 |
786 |
return textids;
|
757 |
787 |
}
|
758 |
|
|
|
788 |
|
759 |
789 |
@Override
|
760 |
790 |
public String getName() {
|
761 |
791 |
return this.getCorpusParent().getSimpleName() + TXMPreferences.PARENT_NAME_SEPARATOR + this.getSimpleName();
|
762 |
792 |
}
|
763 |
|
|
|
793 |
|
764 |
794 |
@Override
|
765 |
795 |
public String getDetails() {
|
766 |
796 |
try {
|
... | ... | |
777 |
807 |
|
778 |
808 |
@Override
|
779 |
809 |
public List<?> isBuildValid() {
|
780 |
|
|
|
810 |
|
781 |
811 |
if (this.getParent() != null) return new LinkedList<String>(); // ok
|
782 |
|
|
|
812 |
|
783 |
813 |
return Arrays.asList("No parent corpus result");
|
784 |
814 |
}
|
785 |
815 |
|
786 |
816 |
@Override
|
787 |
817 |
public String isReady() {
|
788 |
|
|
|
818 |
|
789 |
819 |
if (!hasBeenComputedOnce()) return "the subcorpus is not a computed TXMResult";
|
790 |
|
|
|
820 |
|
791 |
821 |
try {
|
792 |
822 |
int s = this.getSize();
|
793 |
823 |
if (s >= 0) return null;
|