81 |
81 |
*/
|
82 |
82 |
public class Concordance extends TXMResult {
|
83 |
83 |
|
|
84 |
public static final String TARGET_SELECT = "Select";
|
|
85 |
public static final String TARGET_SHOW = "Show";
|
|
86 |
public static final String TARGET_KEEPLEFT = "Keep left";
|
|
87 |
public static final String TARGET_KEEPRIGHT = "Keep right";
|
|
88 |
|
84 |
89 |
/** The noconc. */
|
85 |
90 |
protected static int noconc = 1;;
|
86 |
91 |
|
... | ... | |
170 |
175 |
@Parameter(key = ConcordancePreferences.RIGHT_VIEW_PROPERTIES)
|
171 |
176 |
protected List<WordProperty> pViewRightProperties;
|
172 |
177 |
|
|
178 |
/** The right c view properties. */
|
|
179 |
@Parameter(key = ConcordancePreferences.TARGET_STRATEGY)
|
|
180 |
protected String pTargetStrategy;
|
|
181 |
|
173 |
182 |
/**
|
174 |
183 |
* Optional parameterQuery result already resolved. If set the pQuery is optional
|
175 |
184 |
*/
|
... | ... | |
331 |
340 |
}
|
332 |
341 |
|
333 |
342 |
this.pTopIndex = 0;
|
|
343 |
|
334 |
344 |
this.nLines = queryResult.getNMatch();
|
335 |
345 |
this.lines = new ArrayList<>(Collections.nCopies(nLines, (Line) null));// lines are lazily fetched; we force an
|
336 |
346 |
|
... | ... | |
391 |
401 |
+ ", rightcontext=" + this.pRightContextSize //$NON-NLS-1$
|
392 |
402 |
// + ", query=\""+this.query.getQueryString()+"\""
|
393 |
403 |
+ ")"); //$NON-NLS-1$
|
394 |
|
|
|
404 |
|
395 |
405 |
return symbol;
|
396 |
406 |
}
|
397 |
407 |
|
... | ... | |
455 |
465 |
buf.append("Concordance:\n"); //$NON-NLS-1$
|
456 |
466 |
buf.append("\tCorpus: " + this.getCorpus() + "\n"); //$NON-NLS-1$ //$NON-NLS-2$
|
457 |
467 |
buf
|
458 |
|
.append("\tQuery: " + this.getQuery() + "\n"); //$NON-NLS-1$ //$NON-NLS-2$
|
|
468 |
.append("\tQuery: " + this.getQuery() + "\n"); //$NON-NLS-1$ //$NON-NLS-2$
|
459 |
469 |
buf
|
460 |
|
.append("\tLeft Context View property: " + this.getLeftViewProperties() + "\n"); //$NON-NLS-1$ //$NON-NLS-2$
|
|
470 |
.append("\tLeft Context View property: " + this.getLeftViewProperties() + "\n"); //$NON-NLS-1$ //$NON-NLS-2$
|
461 |
471 |
buf
|
462 |
|
.append("\tKeyword View property: " + this.getKeywordViewProperties() + "\n"); //$NON-NLS-1$ //$NON-NLS-2$
|
|
472 |
.append("\tKeyword View property: " + this.getKeywordViewProperties() + "\n"); //$NON-NLS-1$ //$NON-NLS-2$
|
463 |
473 |
buf
|
464 |
|
.append("\tRight Context View property: " + this.getRightViewProperties() + "\n"); //$NON-NLS-1$ //$NON-NLS-2$
|
|
474 |
.append("\tRight Context View property: " + this.getRightViewProperties() + "\n"); //$NON-NLS-1$ //$NON-NLS-2$
|
465 |
475 |
buf
|
466 |
|
.append("\tLeft Context Sort property: " + this.getLeftAnalysisProperties() + "\n"); //$NON-NLS-1$ //$NON-NLS-2$
|
|
476 |
.append("\tLeft Context Sort property: " + this.getLeftAnalysisProperties() + "\n"); //$NON-NLS-1$ //$NON-NLS-2$
|
467 |
477 |
buf
|
468 |
|
.append("\tKeyword Sort property: " + this.getKeywordAnalysisProperties() + "\n"); //$NON-NLS-1$ //$NON-NLS-2$
|
|
478 |
.append("\tKeyword Sort property: " + this.getKeywordAnalysisProperties() + "\n"); //$NON-NLS-1$ //$NON-NLS-2$
|
469 |
479 |
buf
|
470 |
|
.append("\tRight Context Sort property: " + this.getRightAnalysisProperties() + "\n"); //$NON-NLS-1$ //$NON-NLS-2$
|
|
480 |
.append("\tRight Context Sort property: " + this.getRightAnalysisProperties() + "\n"); //$NON-NLS-1$ //$NON-NLS-2$
|
471 |
481 |
buf.append("\tReference View Pattern: " + this.getRefViewPattern() + "\n"); //$NON-NLS-1$ //$NON-NLS-2$
|
472 |
482 |
buf.append("\tReference Sort Pattern: " + this.getRefAnalysePattern() + "\n"); //$NON-NLS-1$ //$NON-NLS-2$
|
473 |
483 |
return buf.toString();
|
... | ... | |
593 |
603 |
if (to >= nLines) to = nLines - 1;
|
594 |
604 |
|
595 |
605 |
List<? extends Match> matches = queryResult.getMatches(from, to);
|
|
606 |
// boolean hasTarget = false;
|
|
607 |
// for (Match m : matches) {
|
|
608 |
// if (m.getTarget() >= 0) {
|
|
609 |
// hasTarget = true;
|
|
610 |
// break;
|
|
611 |
// }
|
|
612 |
// }
|
596 |
613 |
|
597 |
614 |
// System.out.println("cqllimit="+cql_limit);
|
598 |
615 |
// System.out.println("CQLLimitStarts="+CQLLimitStarts);
|
... | ... | |
638 |
655 |
List<Integer> lengthOfLeftCtx = new ArrayList<>(j - i);
|
639 |
656 |
|
640 |
657 |
// get all first token and last token of all lines
|
641 |
|
int currentKeywordPos;// = matches.get(0).getStart();
|
|
658 |
int currentKeywordPos, currentKeywordEndPos;// = matches.get(0).getStart();
|
642 |
659 |
// currentText = previousPivotText;
|
643 |
660 |
|
644 |
|
ArrayList<Match> subsetMatch = new ArrayList<>();
|
|
661 |
//ArrayList<Match> subsetMatch = new ArrayList<>();
|
645 |
662 |
|
646 |
663 |
for (int k = 0; k < j - i; k++) {
|
647 |
664 |
Match match = matches.get(i - from + k);
|
648 |
|
subsetMatch.add(match);
|
|
665 |
//subsetMatch.add(match);
|
649 |
666 |
// System.out.println("match: "+match);
|
650 |
667 |
// 1) find current text using keyword position
|
651 |
668 |
currentKeywordPos = match.getStart();
|
|
669 |
currentKeywordEndPos = match.getEnd();
|
652 |
670 |
|
|
671 |
// if (hasTarget) {
|
|
672 |
// if (Concordance.TARGET_SELECT.equals(pTargetStrategy)) {
|
|
673 |
// currentKeywordPos = match.getTarget();
|
|
674 |
// currentKeywordEndPos = match.getTarget();
|
|
675 |
// } else if (Concordance.TARGET_KEEPLEFT.equals(pTargetStrategy)) {
|
|
676 |
// currentKeywordPos = match.getStart();
|
|
677 |
// currentKeywordEndPos = match.getTarget();
|
|
678 |
// } else if (Concordance.TARGET_KEEPRIGHT.equals(pTargetStrategy)) {
|
|
679 |
// currentKeywordPos = match.getTarget();
|
|
680 |
// currentKeywordEndPos = match.getEnd();
|
|
681 |
// } else if (Concordance.TARGET_SHOW.equals(pTargetStrategy)) {
|
|
682 |
// currentKeywordPos = match.getStart();
|
|
683 |
// currentKeywordEndPos = match.getEnd();
|
|
684 |
// }
|
|
685 |
// }
|
|
686 |
|
653 |
687 |
if (CQLLimitStarts != null && currentCQLLimit < CQLLimitStarts.length) {
|
654 |
688 |
while (currentCQLLimit < CQLLimitStarts.length && CQLLimitStarts[currentCQLLimit] <= currentKeywordPos) { // find the match's text
|
655 |
689 |
currentCQLLimit++; // matches are ordered
|
... | ... | |
669 |
703 |
}
|
670 |
704 |
}
|
671 |
705 |
beginingOfLeftCtxPositions.add(ctxPosition);
|
672 |
|
lengthOfLeftCtx.add(match.getStart() - ctxPosition); // distance between let context start position and keyword position
|
|
706 |
lengthOfLeftCtx.add(currentKeywordPos - ctxPosition); // distance between let context start position and keyword position
|
673 |
707 |
// System.out.println("left: "+ctxPosition+ "len: "+(match.getStart() - ctxPosition));
|
674 |
708 |
|
675 |
709 |
// KEYWORD LIMITS
|
676 |
|
beginingOfKeywordsPositions.add(match.getStart());
|
677 |
|
lengthOfKeywords.add(match.getEnd() - match.getStart() + 1);
|
|
710 |
beginingOfKeywordsPositions.add(currentKeywordPos);
|
|
711 |
lengthOfKeywords.add(currentKeywordEndPos - currentKeywordPos + 1);
|
678 |
712 |
|
679 |
713 |
// TARGET POSITION
|
680 |
|
targetPositions.add(match.getTarget());
|
681 |
714 |
|
|
715 |
|
|
716 |
// if (Concordance.TARGET_SELECT.equals(pTargetStrategy) || Concordance.TARGET_KEEPLEFT.equals(pTargetStrategy) || Concordance.TARGET_KEEPRIGHT.equals(pTargetStrategy)) {
|
|
717 |
// targetPositions.add(match.getTarget());
|
|
718 |
// } else if (Concordance.TARGET_SHOW.equals(pTargetStrategy)) {
|
|
719 |
targetPositions.add(match.getTarget());
|
|
720 |
// }
|
|
721 |
|
682 |
722 |
// check if the end of keyword pass a text limit
|
683 |
|
currentKeywordPos = match.getEnd();
|
|
723 |
currentKeywordPos = currentKeywordEndPos;
|
684 |
724 |
|
685 |
725 |
if (CQLLimitStarts != null)
|
686 |
726 |
if (currentCQLLimit < CQLLimitStarts.length) {
|
687 |
|
while (currentCQLLimit < CQLLimitStarts.length && CQLLimitStarts[currentCQLLimit] <= currentKeywordPos) { // find the match's text
|
688 |
|
currentCQLLimit++; // matches are ordered
|
|
727 |
while (currentCQLLimit < CQLLimitStarts.length && CQLLimitStarts[currentCQLLimit] <= currentKeywordPos) { // find the match's text
|
|
728 |
currentCQLLimit++; // matches are ordered
|
|
729 |
}
|
|
730 |
if (currentCQLLimit > 0) {
|
|
731 |
currentCQLLimit--; // get the previous text
|
|
732 |
}
|
689 |
733 |
}
|
690 |
|
if (currentCQLLimit > 0)
|
691 |
|
currentCQLLimit--; // get the previous text
|
692 |
|
}
|
693 |
734 |
// find the right limit
|
694 |
|
ctxPosition = match.getEnd() + pRightContextSize;
|
|
735 |
ctxPosition = currentKeywordEndPos + pRightContextSize;
|
695 |
736 |
|
696 |
737 |
if (CQLLimitStarts != null)
|
697 |
738 |
if (currentCQLLimit + 1 < CQLLimitStarts.length) {
|
698 |
|
if (CQLLimitStarts[currentCQLLimit + 1] < ctxPosition) { // test if the right context last position is in the same text as the pivot
|
699 |
|
ctxPosition = CQLLimitStarts[currentCQLLimit + 1] - 1; // the context end is the next text start position
|
|
739 |
if (CQLLimitStarts[currentCQLLimit + 1] < ctxPosition) { // test if the right context last position is in the same text as the pivot
|
|
740 |
ctxPosition = CQLLimitStarts[currentCQLLimit + 1] - 1; // the context end is the next text start position
|
|
741 |
}
|
700 |
742 |
}
|
701 |
|
}
|
702 |
|
|
|
743 |
|
703 |
744 |
// if (ctxPosition > corpus.getSize()) ctxPosition = corpus.getSize();
|
704 |
745 |
|
705 |
|
beginingOfRightCtxPositions.add(match.getEnd() + 1);
|
706 |
|
lengthOfRightCtx.add(ctxPosition - match.getEnd());
|
|
746 |
beginingOfRightCtxPositions.add(currentKeywordEndPos + 1);
|
|
747 |
lengthOfRightCtx.add(ctxPosition - currentKeywordEndPos);
|
707 |
748 |
// System.out.println("right: "+ctxPosition+" len: "+(ctxPosition - match.getEnd() + 1)+"\n");
|
708 |
749 |
}
|
709 |
750 |
|
... | ... | |
717 |
758 |
lengthOfLeftCtx));
|
718 |
759 |
}
|
719 |
760 |
for (Property property : pViewRightProperties) {
|
720 |
|
rightCtxViewPropValues.put(property, cqiClient.getData(property, beginingOfRightCtxPositions,
|
721 |
|
lengthOfRightCtx));
|
|
761 |
rightCtxViewPropValues.put(property, cqiClient.getData(property, beginingOfRightCtxPositions, lengthOfRightCtx));
|
722 |
762 |
}
|
723 |
763 |
for (Property property : pViewKeywordProperties) {
|
724 |
|
keywordsViewPropValues.put(property, cqiClient.getData(property, beginingOfKeywordsPositions,
|
725 |
|
lengthOfKeywords));
|
|
764 |
keywordsViewPropValues.put(property, cqiClient.getData(property, beginingOfKeywordsPositions, lengthOfKeywords));
|
726 |
765 |
}
|
727 |
766 |
|
728 |
767 |
// get all analysis property values of all lines
|
... | ... | |
764 |
803 |
/** The occurrences Ids&Text => BackToText */
|
765 |
804 |
keywordsIdValues = cqiClient.getData(id, beginingOfKeywordsPositions, lengthOfKeywords);
|
766 |
805 |
keywordsTextValues = cqiClient.getData(text_id, beginingOfKeywordsPositions,
|
767 |
|
Collections.nCopies(beginingOfKeywordsPositions
|
768 |
|
.size(), 1));
|
|
806 |
Collections.nCopies(beginingOfKeywordsPositions.size(), 1));
|
769 |
807 |
}
|
770 |
808 |
else {
|
771 |
809 |
keywordsTextValues = null;
|
... | ... | |
798 |
836 |
Map<Property, List<String>> lineKeywordsViewPropValue = new HashMap<>();
|
799 |
837 |
Map<Property, List<String>> lineRightCtxViewPropValue = new HashMap<>();
|
800 |
838 |
for (Property property : pViewLeftProperties) {
|
801 |
|
lineLeftCtxViewPropValue.put(property,
|
802 |
|
leftCtxViewPropValues.get(property).get(k));
|
|
839 |
lineLeftCtxViewPropValue.put(property, leftCtxViewPropValues.get(property).get(k));
|
803 |
840 |
}
|
804 |
841 |
for (Property property : pViewRightProperties) {
|
805 |
|
lineRightCtxViewPropValue.put(property,
|
806 |
|
rightCtxViewPropValues.get(property).get(k));
|
|
842 |
lineRightCtxViewPropValue.put(property, rightCtxViewPropValues.get(property).get(k));
|
807 |
843 |
}
|
808 |
844 |
for (Property property : pViewKeywordProperties) {
|
809 |
|
lineKeywordsViewPropValue.put(property,
|
810 |
|
keywordsViewPropValues.get(property).get(k));
|
|
845 |
lineKeywordsViewPropValue.put(property, keywordsViewPropValues.get(property).get(k));
|
811 |
846 |
}
|
812 |
847 |
|
813 |
848 |
// get the kth line reference values
|
... | ... | |
836 |
871 |
|
837 |
872 |
int targetpos = targetPositions.get(k);
|
838 |
873 |
|
839 |
|
|
840 |
874 |
Match match = matches.get(i - from + k);
|
841 |
875 |
|
842 |
876 |
Line line = new Line(this, wordids, TextId,
|
... | ... | |
1647 |
1681 |
keyColSeparator = "";
|
1648 |
1682 |
writer.write(
|
1649 |
1683 |
txtseparator + ConcordanceCoreMessages.reference + txtseparator
|
1650 |
|
+ colseparator + txtseparator + ConcordanceCoreMessages.context + txtseparator
|
1651 |
|
+ "\n"); //$NON-NLS-1$
|
|
1684 |
+ colseparator + txtseparator + ConcordanceCoreMessages.context + txtseparator
|
|
1685 |
+ "\n"); //$NON-NLS-1$
|
1652 |
1686 |
}
|
1653 |
1687 |
else {
|
1654 |
1688 |
writer.write(
|
1655 |
1689 |
addTxtSep(ConcordanceCoreMessages.reference, txtseparator) + colseparator
|
1656 |
|
+ addTxtSep(ConcordanceCoreMessages.leftContext, txtseparator) + colseparator
|
1657 |
|
+ addTxtSep(ConcordanceCoreMessages.keyword, txtseparator) + colseparator
|
1658 |
|
+ addTxtSep(ConcordanceCoreMessages.rightContext, txtseparator)
|
1659 |
|
+ "\n"); //$NON-NLS-1$
|
|
1690 |
+ addTxtSep(ConcordanceCoreMessages.leftContext, txtseparator) + colseparator
|
|
1691 |
+ addTxtSep(ConcordanceCoreMessages.keyword, txtseparator) + colseparator
|
|
1692 |
+ addTxtSep(ConcordanceCoreMessages.rightContext, txtseparator)
|
|
1693 |
+ "\n"); //$NON-NLS-1$
|
1660 |
1694 |
}
|
1661 |
1695 |
|
1662 |
1696 |
// write lines
|
... | ... | |
1671 |
1705 |
for (Line line : sublines) {
|
1672 |
1706 |
writer.write(
|
1673 |
1707 |
addTxtSep(line.getViewRef().toString(), txtseparator)
|
1674 |
|
+ colseparator + addTxtSep(line.leftContextToString(), txtseparator)
|
1675 |
|
+ keyColSeparator + addTxtSep(line.keywordToString(), txtseparator)
|
1676 |
|
+ keyColSeparator + addTxtSep(line.rightContextToString(), txtseparator)
|
1677 |
|
+ "\n"); //$NON-NLS-1$
|
|
1708 |
+ colseparator + addTxtSep(line.leftContextToString(), txtseparator)
|
|
1709 |
+ keyColSeparator + addTxtSep(line.keywordToString(), txtseparator)
|
|
1710 |
+ keyColSeparator + addTxtSep(line.rightContextToString(), txtseparator)
|
|
1711 |
+ "\n"); //$NON-NLS-1$
|
1678 |
1712 |
writer.flush();
|
1679 |
1713 |
}
|
1680 |
1714 |
// println "Lines1: "+lines
|