Révision 3198
tmp/org.txm.cooccurrence.core/src/org/txm/cooccurrence/core/functions/Cooccurrence.java (revision 3198) | ||
---|---|---|
41 | 41 |
import java.util.Set; |
42 | 42 |
|
43 | 43 |
import org.eclipse.osgi.util.NLS; |
44 |
import org.rosuda.REngine.REXPMismatchException; |
|
45 | 44 |
import org.txm.concordance.core.functions.Concordance; |
46 | 45 |
import org.txm.concordance.core.functions.Line; |
47 | 46 |
import org.txm.cooccurrence.core.functions.comparators.CLineComparator; |
... | ... | |
57 | 56 |
import org.txm.searchengine.cqp.clientExceptions.CqiClientException; |
58 | 57 |
import org.txm.searchengine.cqp.clientExceptions.UnexpectedAnswerException; |
59 | 58 |
import org.txm.searchengine.cqp.corpus.CQPCorpus; |
60 |
import org.txm.searchengine.cqp.corpus.CorpusManager; |
|
61 | 59 |
import org.txm.searchengine.cqp.corpus.Property; |
62 | 60 |
import org.txm.searchengine.cqp.corpus.QueryResult; |
63 | 61 |
import org.txm.searchengine.cqp.corpus.StructuralUnit; |
64 |
import org.txm.searchengine.cqp.corpus.StructuralUnitProperty; |
|
65 |
import org.txm.searchengine.cqp.corpus.VirtualProperty; |
|
66 | 62 |
import org.txm.searchengine.cqp.corpus.WordProperty; |
67 |
import org.txm.searchengine.cqp.corpus.query.Match; |
|
68 | 63 |
import org.txm.searchengine.cqp.corpus.query.CQLQuery; |
64 |
import org.txm.searchengine.cqp.corpus.query.Match; |
|
69 | 65 |
import org.txm.searchengine.cqp.serverException.CqiServerError; |
70 | 66 |
import org.txm.specificities.core.statsengine.r.function.SpecificitiesR; |
71 | 67 |
import org.txm.statsengine.core.StatException; |
... | ... | |
76 | 72 |
import org.txm.utils.logger.Log; |
77 | 73 |
|
78 | 74 |
/** |
79 |
* Compute a cooccurrence from a concordance.
|
|
75 |
* Compute a cooccurrence from a pivot query.
|
|
80 | 76 |
* |
81 | 77 |
* @author mdecorde |
82 | 78 |
* |
83 | 79 |
*/ |
84 | 80 |
public class Cooccurrence extends TXMResult { |
85 | 81 |
|
82 |
/** |
|
83 |
* The Class CLine. |
|
84 |
*/ |
|
85 |
public class CLine { |
|
86 |
|
|
87 |
/** The cooc. */ |
|
88 |
Cooccurrence cooc; |
|
89 |
|
|
90 |
/** The distmoyenne. */ |
|
91 |
public float distmoyenne; |
|
92 |
|
|
93 |
/** The freq. */ |
|
94 |
public int freq; |
|
95 |
|
|
96 |
/** The id. */ |
|
97 |
public int id; |
|
98 |
|
|
99 |
/** The mode. */ |
|
100 |
public long mode; |
|
101 |
|
|
102 |
/** The nbocc. */ |
|
103 |
public int nbocc; |
|
104 |
|
|
105 |
/** The occ. */ |
|
106 |
public String occ; |
|
107 |
|
|
108 |
/** The props. */ |
|
109 |
public List<String> props; |
|
110 |
|
|
111 |
/** The score. */ |
|
112 |
public double score; |
|
113 |
|
|
114 |
/** |
|
115 |
* Instantiates a new c line. |
|
116 |
* |
|
117 |
* @param cooc the cooc |
|
118 |
* @param occ the occ |
|
119 |
* @param props the props |
|
120 |
* @param nbocc the nbocc |
|
121 |
* @param freq the freq |
|
122 |
* @param score the score |
|
123 |
* @param distmoyenne the distmoyenne |
|
124 |
* @param mode the mode |
|
125 |
*/ |
|
126 |
public CLine(Cooccurrence cooc, String occ, List<String> props, |
|
127 |
int nbocc, int freq, double score, Float distmoyenne, long mode) { |
|
128 |
this.occ = occ; |
|
129 |
this.props = props; |
|
130 |
this.freq = freq; |
|
131 |
this.nbocc = nbocc; |
|
132 |
this.score = score; |
|
133 |
this.distmoyenne = distmoyenne; |
|
134 |
this.mode = mode; |
|
135 |
this.cooc = cooc; |
|
136 |
} |
|
137 |
|
|
138 |
/** |
|
139 |
* Adds the txt sep. |
|
140 |
* |
|
141 |
* @param str the str |
|
142 |
* @param sep the sep |
|
143 |
* @return the string |
|
144 |
*/ |
|
145 |
private String addTxtSep(String str, String sep) { |
|
146 |
return sep + str.replace(sep, sep + sep) + sep; |
|
147 |
} |
|
148 |
|
|
149 |
/** |
|
150 |
* Gets the cooc. |
|
151 |
* |
|
152 |
* @return the cooc |
|
153 |
*/ |
|
154 |
public Cooccurrence getCooc() { |
|
155 |
return cooc; |
|
156 |
} |
|
157 |
|
|
158 |
/** |
|
159 |
* Resume. |
|
160 |
* |
|
161 |
* @param colseparator the colseparator |
|
162 |
* @param txtseparator the txtseparator |
|
163 |
* @return the string |
|
164 |
*/ |
|
165 |
public String resume(String colseparator, String txtseparator) { |
|
166 |
return addTxtSep("" + occ, txtseparator) //$NON-NLS-1$ |
|
167 |
+ colseparator + freq + colseparator + nbocc + colseparator + score + colseparator + distmoyenne; |
|
168 |
} |
|
169 |
|
|
170 |
/** |
|
171 |
* Sets the count and dist. |
|
172 |
* |
|
173 |
* @param count the count |
|
174 |
* @param dist the dist |
|
175 |
*/ |
|
176 |
public void setCountAndDist(int count, int dist) { |
|
177 |
this.nbocc = count; |
|
178 |
this.distmoyenne = dist; |
|
179 |
} |
|
180 |
|
|
181 |
/** |
|
182 |
* Sets the freq. |
|
183 |
* |
|
184 |
* @param freq the new freq |
|
185 |
*/ |
|
186 |
public void setFreq(int freq) { |
|
187 |
this.freq = freq; |
|
188 |
} |
|
189 |
|
|
190 |
/** |
|
191 |
* Sets the score. |
|
192 |
*/ |
|
193 |
public void setScore() {// FB == freq, R = nbocc |
|
194 |
this.score = (this.nbocc + this.freq + this.distmoyenne); |
|
195 |
} |
|
196 |
|
|
197 |
@Override |
|
198 |
public String toString() { |
|
199 |
return occ + CooccurrenceCoreMessages.fColon + freq + CooccurrenceCoreMessages.occColon + nbocc + CooccurrenceCoreMessages.scoreColon + score + CooccurrenceCoreMessages.meanDistColon |
|
200 |
+ distmoyenne + CooccurrenceCoreMessages.propertiesColon + props; |
|
201 |
} |
|
202 |
} |
|
203 |
|
|
86 | 204 |
/** The nocooc. */ |
87 | 205 |
protected static int nocooc = 1; |
88 | 206 |
|
... | ... | |
92 | 210 |
/** The allsignaturesstr. */ |
93 | 211 |
private HashMap<Integer, String> allsignaturesstr; |
94 | 212 |
|
95 |
/** The anticontextquery. */
|
|
213 |
/** The anti-context query used to trim the nearest contexts cooccurrents. */
|
|
96 | 214 |
private CQLQuery anticontextquery; |
97 | 215 |
|
98 | 216 |
private boolean buildLexicalTableWithCooccurrents; |
... | ... | |
243 | 361 |
super(parametersNodePath); |
244 | 362 |
} |
245 | 363 |
|
246 |
|
|
247 |
|
|
248 | 364 |
@Override |
249 | 365 |
protected boolean _compute(TXMProgressMonitor monitor) throws CqiClientException, IOException, CqiServerError, StatException { |
250 | 366 |
// System.out.println("cooc: "+corpus+" "+query+" "+properties+" "+limit+" "+maxLeft+" "+minLeft+" "+minRight+" "+maxRight+" "+minFreq+" "+minCof+" "+minScore+" "+includeXpivot); |
... | ... | |
307 | 423 |
return true; |
308 | 424 |
} |
309 | 425 |
|
426 |
/** |
|
427 |
* To txt. |
|
428 |
* |
|
429 |
* @param outfile the outfile |
|
430 |
* @param encoding the encoding |
|
431 |
* @param colseparator the colseparator |
|
432 |
* @param txtseparator the txtseparator |
|
433 |
* @return true, if successful |
|
434 |
*/ |
|
310 | 435 |
@Override |
311 |
public boolean loadParameters() throws CqiClientException { |
|
312 |
pProperties = (List<WordProperty>) Property.stringToProperties(getCorpus(), this.getStringParameterValue(TXMPreferences.UNIT_PROPERTIES)); |
|
313 |
pQuery = new CQLQuery(this.getStringParameterValue(TXMPreferences.QUERY)); |
|
314 |
pStructuralUnitLimit = this.getCorpus().getStructuralUnit(this.getStringParameterValue(CooccurrencePreferences.STRUCTURAL_UNIT_LIMIT)); |
|
315 |
return true; |
|
316 |
} |
|
317 |
|
|
318 |
@Override |
|
319 |
public boolean saveParameters() { |
|
320 |
this.saveParameter(TXMPreferences.UNIT_PROPERTIES, Property.propertiesToString(this.pProperties)); |
|
321 |
|
|
322 |
if (pQuery != null) { |
|
323 |
this.saveParameter(TXMPreferences.QUERY, pQuery.getQueryString()); |
|
436 |
public boolean _toTxt(File outfile, String encoding, String colseparator, String txtseparator) { |
|
437 |
try { |
|
438 |
// NK: writer declared as class attribute to perform a clean if the operation is interrupted |
|
439 |
this.writer = new BufferedWriter(new OutputStreamWriter( |
|
440 |
new FileOutputStream(outfile), encoding)); |
|
441 |
// if ("UTF-8".equals(encoding)) writer.write('\ufeff'); // UTF-8 BOM |
|
442 |
toTxt(writer, colseparator, txtseparator); |
|
324 | 443 |
} |
325 |
|
|
326 |
if (pStructuralUnitLimit != null) {
|
|
327 |
this.saveParameter(CooccurrencePreferences.STRUCTURAL_UNIT_LIMIT, this.pStructuralUnitLimit.getName());
|
|
444 |
catch (Exception e) { |
|
445 |
Log.severe(TXMCoreMessages.bind(TXMCoreMessages.error_error, e));
|
|
446 |
return false;
|
|
328 | 447 |
} |
329 |
|
|
330 | 448 |
return true; |
331 | 449 |
} |
332 | 450 |
|
... | ... | |
493 | 611 |
// } |
494 | 612 |
|
495 | 613 |
/** |
496 |
* Gets the corpus.
|
|
614 |
* Creates a CQL query string from the specified lines.
|
|
497 | 615 |
* |
498 |
* @return the corpus |
|
616 |
* @param lines |
|
617 |
* @return the query |
|
499 | 618 |
*/ |
500 |
public CQPCorpus getCorpus() { |
|
501 |
return (CQPCorpus) this.getParent(); |
|
502 |
} |
|
619 |
public String createQuery(List<CLine> lines) { |
|
503 | 620 |
|
504 |
@Override |
|
505 |
public String getDetails() { |
|
506 |
Object[] params = new Object[] { this.getParent(), this.pQuery, this.pProperties, this.pStructuralUnitLimit, (this.pMinLeftContextSize - 1), (this.pMaxLeftContextSize - 1), |
|
507 |
(this.pMinRightContextSize - 1), |
|
508 |
(this.pMaxRightContextSize - 1), this.pFminFilter, this.pFCoocFilter, this.pScoreMinFilter }; |
|
509 |
return NLS.bind(CooccurrenceCoreMessages.info_details, params); |
|
510 |
} |
|
511 |
|
|
512 |
@Override |
|
513 |
public String getName() { |
|
514 |
try { |
|
515 |
return this.getParent().getName() + TXMPreferences.PARENT_NAME_SEPARATOR + this.getSimpleName(); |
|
621 |
if (this.getQuery().isEmpty()) { |
|
622 |
return ""; //$NON-NLS-1$ |
|
516 | 623 |
} |
517 |
catch (Exception e) { |
|
518 |
return this.getSimpleName(); |
|
519 |
} |
|
520 |
} |
|
521 | 624 |
|
522 | 625 |
|
523 |
@Override |
|
524 |
public String getSimpleName() { |
|
525 |
if (this.pQuery != null && !this.pQuery.isEmpty()) { |
|
526 |
StringBuffer output = new StringBuffer(); |
|
527 |
output.append(this.pQuery.asString()); |
|
528 |
output.append(WordProperty.asString(this.pProperties)); |
|
626 |
int nbProps = this.getProperties().size(); |
|
627 |
List<WordProperty> props = this.getProperties(); |
|
529 | 628 |
|
530 |
if (this.pMaxLeftContextSize > 0 && this.pMaxRightContextSize > 0) { |
|
531 |
output.append(" " + (this.pMaxLeftContextSize - 1) + " " + (this.pMaxRightContextSize - 1)); //$NON-NLS-1$ //$NON-NLS-2$ |
|
629 |
String query = "@["; //$NON-NLS-1$ |
|
630 |
for (int p = 0; p < nbProps; p++) { |
|
631 |
|
|
632 |
if (props.get(p) instanceof WordProperty) { |
|
633 |
ArrayList<String> values = new ArrayList<String>(); |
|
634 |
for (int l = 0; l < lines.size(); l++) { |
|
635 |
CLine line = lines.get(l); |
|
636 |
String s = line.props.get(p); |
|
637 |
values.add(s); |
|
638 |
} |
|
639 |
String test = ((WordProperty)props.get(p)).getCQLTest(values); |
|
640 |
if (test != null) { |
|
641 |
query += test; |
|
642 |
} |
|
532 | 643 |
} |
533 |
output.append(TXMCoreMessages.formatMinFilter(this.pFminFilter)); |
|
534 |
output.append(TXMCoreMessages.formatMinFilter(this.pFCoocFilter)); |
|
535 |
output.append(TXMCoreMessages.formatMinFilter(this.pScoreMinFilter)); |
|
644 |
if (p < nbProps-1) { |
|
645 |
query += " & "; //$NON-NLS-1$ |
|
646 |
} |
|
647 |
} |
|
648 |
query += "] "; //$NON-NLS-1$ |
|
536 | 649 |
|
537 |
// TODO: SJ: improve the hiding or display of value according to the default preferences values |
|
538 |
// output.append(TXMCoreMessages.formatMinFilter(this.pFminFilter, CooccurrencePreferences.getInstance().getInt(CooccurrencePreferences.F_MIN) + 1)); |
|
539 |
// output.append(TXMCoreMessages.formatMinFilter(this.pFCoocFilter, CooccurrencePreferences.getInstance().getInt(CooccurrencePreferences.MIN_COUNT) + 1)); |
|
540 |
// output.append(TXMCoreMessages.formatMinFilter(this.pScoreMinFilter, CooccurrencePreferences.getInstance().getDouble(CooccurrencePreferences.MIN_SCORE) + 1)); |
|
650 |
int maxempan = Math.max(this.getMaxLeft(), this.getMaxRight()); |
|
651 |
if (this.getIncludeXPivot() && maxempan == 0) maxempan = 1; |
|
541 | 652 |
|
653 |
String maxempanstr = "within " + maxempan + " "; //$NON-NLS-1$ //$NON-NLS-2$ |
|
654 |
if (this.getStructuralUnitLimit() != null) maxempanstr += this.getStructuralUnitLimit().getName(); |
|
542 | 655 |
|
543 |
return output.toString(); |
|
656 |
|
|
657 |
String pquery = CQLQuery.fixQuery(this.getQuery().getQueryString()); |
|
658 |
if (this.getMaxLeft() == 0) { |
|
659 |
query = "" + pquery + " []* " + query + " " + maxempanstr; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ |
|
544 | 660 |
} |
661 |
else if (this.getMaxRight() == 0) { |
|
662 |
query = "" + query + " []* " + pquery + " " + maxempanstr; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ |
|
663 |
} |
|
545 | 664 |
else { |
546 |
return this.getEmptyName();
|
|
665 |
query = "(" + pquery + " []* " + query + ") | (" + query + " []* " + pquery + ") " + maxempanstr; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ //$NON-NLS-5$
|
|
547 | 666 |
} |
548 |
} |
|
549 | 667 |
|
550 |
|
|
551 |
@Override |
|
552 |
public String getComputingStartMessage() { |
|
553 |
return TXMCoreMessages.bind(CooccurrenceCoreMessages.cooccurrentsOfP0PropertieP1InTheP2Corpus, (this.pQuery != null?this.pQuery.asString():"<no query>"), WordProperty.asString(this.pProperties), |
|
554 |
(this.pMaxLeftContextSize - 1), (this.pMaxRightContextSize - 1), this.pFminFilter, this.pFCoocFilter, this.pScoreMinFilter, this.getCorpus().getName()); |
|
668 |
return query; |
|
555 | 669 |
} |
556 | 670 |
|
557 |
|
|
558 | 671 |
@Override |
559 | 672 |
public String getComputingDoneMessage() { |
560 | 673 |
if (this.lines.isEmpty()) { |
... | ... | |
565 | 678 |
} |
566 | 679 |
} |
567 | 680 |
|
681 |
@Override |
|
682 |
public String getComputingStartMessage() { |
|
683 |
return TXMCoreMessages.bind(CooccurrenceCoreMessages.cooccurrentsOfP0PropertieP1InTheP2Corpus, (this.pQuery != null?this.pQuery.asString():"<no query>"), WordProperty.asString(this.pProperties), |
|
684 |
(this.pMaxLeftContextSize - 1), (this.pMaxRightContextSize - 1), this.pFminFilter, this.pFCoocFilter, this.pScoreMinFilter, this.getCorpus().getName()); |
|
685 |
} |
|
568 | 686 |
|
569 | 687 |
|
570 | 688 |
/** |
689 |
* Gets the corpus. |
|
690 |
* |
|
691 |
* @return the corpus |
|
692 |
*/ |
|
693 |
public CQPCorpus getCorpus() { |
|
694 |
return (CQPCorpus) this.getParent(); |
|
695 |
} |
|
696 |
|
|
697 |
|
|
698 |
@Override |
|
699 |
public String getDetails() { |
|
700 |
Object[] params = new Object[] { this.getParent(), this.pQuery, this.pProperties, this.pStructuralUnitLimit, (this.pMinLeftContextSize - 1), (this.pMaxLeftContextSize - 1), |
|
701 |
(this.pMinRightContextSize - 1), |
|
702 |
(this.pMaxRightContextSize - 1), this.pFminFilter, this.pFCoocFilter, this.pScoreMinFilter }; |
|
703 |
return NLS.bind(CooccurrenceCoreMessages.info_details, params); |
|
704 |
} |
|
705 |
|
|
706 |
|
|
707 |
/** |
|
571 | 708 |
* Gets the fA. |
572 | 709 |
* |
573 | 710 |
* @return the fA |
... | ... | |
625 | 762 |
return pMinRightContextSize; |
626 | 763 |
} |
627 | 764 |
|
765 |
@Override |
|
766 |
public String getName() { |
|
767 |
try { |
|
768 |
return this.getParent().getName() + TXMPreferences.PARENT_NAME_SEPARATOR + this.getSimpleName(); |
|
769 |
} |
|
770 |
catch (Exception e) { |
|
771 |
return this.getSimpleName(); |
|
772 |
} |
|
773 |
} |
|
774 |
|
|
628 | 775 |
/** |
629 | 776 |
* Gets the lines. |
630 | 777 |
* |
... | ... | |
689 | 836 |
} |
690 | 837 |
|
691 | 838 |
|
839 |
@Override |
|
840 |
public String getResultType() { |
|
841 |
return CooccurrenceCoreMessages.RESULT_TYPE; |
|
842 |
} |
|
843 |
|
|
844 |
@Override |
|
845 |
public String getSimpleName() { |
|
846 |
if (this.pQuery != null && !this.pQuery.isEmpty()) { |
|
847 |
StringBuffer output = new StringBuffer(); |
|
848 |
output.append(this.pQuery.asString()); |
|
849 |
output.append(WordProperty.asString(this.pProperties)); |
|
850 |
|
|
851 |
if (this.pMaxLeftContextSize > 0 && this.pMaxRightContextSize > 0) { |
|
852 |
output.append(" " + (this.pMaxLeftContextSize - 1) + " " + (this.pMaxRightContextSize - 1)); //$NON-NLS-1$ //$NON-NLS-2$ |
|
853 |
} |
|
854 |
output.append(TXMCoreMessages.formatMinFilter(this.pFminFilter)); |
|
855 |
output.append(TXMCoreMessages.formatMinFilter(this.pFCoocFilter)); |
|
856 |
output.append(TXMCoreMessages.formatMinFilter(this.pScoreMinFilter)); |
|
857 |
|
|
858 |
// TODO: SJ: improve the hiding or display of value according to the default preferences values |
|
859 |
// output.append(TXMCoreMessages.formatMinFilter(this.pFminFilter, CooccurrencePreferences.getInstance().getInt(CooccurrencePreferences.F_MIN) + 1)); |
|
860 |
// output.append(TXMCoreMessages.formatMinFilter(this.pFCoocFilter, CooccurrencePreferences.getInstance().getInt(CooccurrencePreferences.MIN_COUNT) + 1)); |
|
861 |
// output.append(TXMCoreMessages.formatMinFilter(this.pScoreMinFilter, CooccurrencePreferences.getInstance().getDouble(CooccurrencePreferences.MIN_SCORE) + 1)); |
|
862 |
|
|
863 |
|
|
864 |
return output.toString(); |
|
865 |
} |
|
866 |
else { |
|
867 |
return this.getEmptyName(); |
|
868 |
} |
|
869 |
} |
|
870 |
|
|
692 | 871 |
/** |
693 | 872 |
* Gets the structural unit limit. |
694 | 873 |
* |
... | ... | |
772 | 951 |
return false; |
773 | 952 |
} |
774 | 953 |
|
954 |
@Override |
|
955 |
public boolean loadParameters() throws CqiClientException { |
|
956 |
pProperties = (List<WordProperty>) Property.stringToProperties(getCorpus(), this.getStringParameterValue(TXMPreferences.UNIT_PROPERTIES)); |
|
957 |
pQuery = new CQLQuery(this.getStringParameterValue(TXMPreferences.QUERY)); |
|
958 |
pStructuralUnitLimit = this.getCorpus().getStructuralUnit(this.getStringParameterValue(CooccurrencePreferences.STRUCTURAL_UNIT_LIMIT)); |
|
959 |
return true; |
|
960 |
} |
|
961 |
|
|
775 | 962 |
/** |
776 | 963 |
* Prints the. |
777 | 964 |
*/ |
... | ... | |
784 | 971 |
System.out.println(line.resume("\t", "")); //$NON-NLS-1$ //$NON-NLS-2$ |
785 | 972 |
} |
786 | 973 |
|
974 |
@Override |
|
975 |
public boolean saveParameters() { |
|
976 |
this.saveParameter(TXMPreferences.UNIT_PROPERTIES, Property.propertiesToString(this.pProperties)); |
|
977 |
|
|
978 |
if (pQuery != null) { |
|
979 |
this.saveParameter(TXMPreferences.QUERY, pQuery.getQueryString()); |
|
980 |
} |
|
981 |
|
|
982 |
if (pStructuralUnitLimit != null) { |
|
983 |
this.saveParameter(CooccurrencePreferences.STRUCTURAL_UNIT_LIMIT, this.pStructuralUnitLimit.getName()); |
|
984 |
} |
|
985 |
|
|
986 |
return true; |
|
987 |
} |
|
988 |
|
|
787 | 989 |
public void setCoocQuery(String q) { |
788 | 990 |
pCooccurentQueryFilter = q; |
789 | 991 |
} |
790 | 992 |
|
993 |
public void setIncludeXpivot(boolean b) { |
|
994 |
pIncludeXpivot = b; |
|
995 |
} |
|
996 |
|
|
791 | 997 |
/** |
792 | 998 |
* Sets the max left. |
793 | 999 |
* |
... | ... | |
841 | 1047 |
this.buildLexicalTableWithCooccurrents = buildLexicalTableWithCooccurrents; |
842 | 1048 |
} |
843 | 1049 |
|
844 |
public void setIncludeXpivot(boolean b) { |
|
845 |
pIncludeXpivot = b; |
|
846 |
} |
|
847 |
|
|
848 | 1050 |
@Override |
849 | 1051 |
public boolean setParameters(TXMParameters parameters) { |
850 | 1052 |
try { |
... | ... | |
888 | 1090 |
return true; |
889 | 1091 |
} |
890 | 1092 |
|
1093 |
/** |
|
1094 |
* Sets the query. |
|
1095 |
* |
|
1096 |
* @param query |
|
1097 |
*/ |
|
1098 |
public void setQuery(CQLQuery query) { |
|
1099 |
this.pQuery = query; |
|
1100 |
} |
|
1101 |
|
|
891 | 1102 |
public void setReferenceCorpus(String symbol) { |
892 | 1103 |
referenceCorpus = symbol; |
893 | 1104 |
} |
... | ... | |
1448 | 1659 |
return toTxt(outfile, encoding, "\t", ""); //$NON-NLS-1$ //$NON-NLS-2$ |
1449 | 1660 |
} |
1450 | 1661 |
|
1451 |
/** |
|
1452 |
* To txt. |
|
1453 |
* |
|
1454 |
* @param outfile the outfile |
|
1455 |
* @param encoding the encoding |
|
1456 |
* @param colseparator the colseparator |
|
1457 |
* @param txtseparator the txtseparator |
|
1458 |
* @return true, if successful |
|
1459 |
*/ |
|
1460 |
@Override |
|
1461 |
public boolean _toTxt(File outfile, String encoding, String colseparator, String txtseparator) { |
|
1462 |
try { |
|
1463 |
// NK: writer declared as class attribute to perform a clean if the operation is interrupted |
|
1464 |
this.writer = new BufferedWriter(new OutputStreamWriter( |
|
1465 |
new FileOutputStream(outfile), encoding)); |
|
1466 |
// if ("UTF-8".equals(encoding)) writer.write('\ufeff'); // UTF-8 BOM |
|
1467 |
toTxt(writer, colseparator, txtseparator); |
|
1468 |
} |
|
1469 |
catch (Exception e) { |
|
1470 |
Log.severe(TXMCoreMessages.bind(TXMCoreMessages.error_error, e)); |
|
1471 |
return false; |
|
1472 |
} |
|
1473 |
return true; |
|
1474 |
} |
|
1475 | 1662 |
|
1476 | 1663 |
/** |
1477 | 1664 |
* To txt. |
... | ... | |
1502 | 1689 |
} |
1503 | 1690 |
|
1504 | 1691 |
|
1505 |
/** |
|
1506 |
* Sets the query. |
|
1507 |
* |
|
1508 |
* @param query |
|
1509 |
*/ |
|
1510 |
public void setQuery(CQLQuery query) { |
|
1511 |
this.pQuery = query; |
|
1512 |
} |
|
1513 |
|
|
1514 |
/** |
|
1515 |
* Creates a CQL query string from the specified lines. |
|
1516 |
* |
|
1517 |
* @param lines |
|
1518 |
* @return the query |
|
1519 |
*/ |
|
1520 |
public String createQuery(List<CLine> lines) { |
|
1521 |
|
|
1522 |
if (this.getQuery().isEmpty()) { |
|
1523 |
return ""; //$NON-NLS-1$ |
|
1524 |
} |
|
1525 |
|
|
1526 |
|
|
1527 |
int nbProps = this.getProperties().size(); |
|
1528 |
List<WordProperty> props = this.getProperties(); |
|
1529 |
|
|
1530 |
String query = "@["; //$NON-NLS-1$ |
|
1531 |
for (int p = 0; p < nbProps; p++) { |
|
1532 |
|
|
1533 |
if (props.get(p) instanceof WordProperty) { |
|
1534 |
ArrayList<String> values = new ArrayList<String>(); |
|
1535 |
for (int l = 0; l < lines.size(); l++) { |
|
1536 |
CLine line = lines.get(l); |
|
1537 |
String s = line.props.get(p); |
|
1538 |
values.add(s); |
|
1539 |
} |
|
1540 |
String test = ((WordProperty)props.get(p)).getCQLTest(values); |
|
1541 |
if (test != null) { |
|
1542 |
query += test; |
|
1543 |
} |
|
1544 |
} |
|
1545 |
if (p < nbProps-1) { |
|
1546 |
query += " & "; //$NON-NLS-1$ |
|
1547 |
} |
|
1548 |
} |
|
1549 |
query += "] "; //$NON-NLS-1$ |
|
1550 |
|
|
1551 |
int maxempan = Math.max(this.getMaxLeft(), this.getMaxRight()); |
|
1552 |
if (this.getIncludeXPivot() && maxempan == 0) maxempan = 1; |
|
1553 |
|
|
1554 |
String maxempanstr = "within " + maxempan + " "; //$NON-NLS-1$ //$NON-NLS-2$ |
|
1555 |
if (this.getStructuralUnitLimit() != null) maxempanstr += this.getStructuralUnitLimit().getName(); |
|
1556 |
|
|
1557 |
|
|
1558 |
String pquery = CQLQuery.fixQuery(this.getQuery().getQueryString()); |
|
1559 |
if (this.getMaxLeft() == 0) { |
|
1560 |
query = "" + pquery + " []* " + query + " " + maxempanstr; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ |
|
1561 |
} |
|
1562 |
else if (this.getMaxRight() == 0) { |
|
1563 |
query = "" + query + " []* " + pquery + " " + maxempanstr; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ |
|
1564 |
} |
|
1565 |
else { |
|
1566 |
query = "(" + pquery + " []* " + query + ") | (" + query + " []* " + pquery + ") " + maxempanstr; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ //$NON-NLS-5$ |
|
1567 |
} |
|
1568 |
|
|
1569 |
return query; |
|
1570 |
} |
|
1571 |
|
|
1572 |
|
|
1573 |
/** |
|
1574 |
* The Class CLine. |
|
1575 |
*/ |
|
1576 |
public class CLine { |
|
1577 |
|
|
1578 |
/** The cooc. */ |
|
1579 |
Cooccurrence cooc; |
|
1580 |
|
|
1581 |
/** The distmoyenne. */ |
|
1582 |
public float distmoyenne; |
|
1583 |
|
|
1584 |
/** The freq. */ |
|
1585 |
public int freq; |
|
1586 |
|
|
1587 |
/** The id. */ |
|
1588 |
public int id; |
|
1589 |
|
|
1590 |
/** The mode. */ |
|
1591 |
public long mode; |
|
1592 |
|
|
1593 |
/** The nbocc. */ |
|
1594 |
public int nbocc; |
|
1595 |
|
|
1596 |
/** The occ. */ |
|
1597 |
public String occ; |
|
1598 |
|
|
1599 |
/** The props. */ |
|
1600 |
public List<String> props; |
|
1601 |
|
|
1602 |
/** The score. */ |
|
1603 |
public double score; |
|
1604 |
|
|
1605 |
/** |
|
1606 |
* Instantiates a new c line. |
|
1607 |
* |
|
1608 |
* @param cooc the cooc |
|
1609 |
* @param occ the occ |
|
1610 |
* @param props the props |
|
1611 |
* @param nbocc the nbocc |
|
1612 |
* @param freq the freq |
|
1613 |
* @param score the score |
|
1614 |
* @param distmoyenne the distmoyenne |
|
1615 |
* @param mode the mode |
|
1616 |
*/ |
|
1617 |
public CLine(Cooccurrence cooc, String occ, List<String> props, |
|
1618 |
int nbocc, int freq, double score, Float distmoyenne, long mode) { |
|
1619 |
this.occ = occ; |
|
1620 |
this.props = props; |
|
1621 |
this.freq = freq; |
|
1622 |
this.nbocc = nbocc; |
|
1623 |
this.score = score; |
|
1624 |
this.distmoyenne = distmoyenne; |
|
1625 |
this.mode = mode; |
|
1626 |
this.cooc = cooc; |
|
1627 |
} |
|
1628 |
|
|
1629 |
/** |
|
1630 |
* Adds the txt sep. |
|
1631 |
* |
|
1632 |
* @param str the str |
|
1633 |
* @param sep the sep |
|
1634 |
* @return the string |
|
1635 |
*/ |
|
1636 |
private String addTxtSep(String str, String sep) { |
|
1637 |
return sep + str.replace(sep, sep + sep) + sep; |
|
1638 |
} |
|
1639 |
|
|
1640 |
/** |
|
1641 |
* Gets the cooc. |
|
1642 |
* |
|
1643 |
* @return the cooc |
|
1644 |
*/ |
|
1645 |
public Cooccurrence getCooc() { |
|
1646 |
return cooc; |
|
1647 |
} |
|
1648 |
|
|
1649 |
/** |
|
1650 |
* Resume. |
|
1651 |
* |
|
1652 |
* @param colseparator the colseparator |
|
1653 |
* @param txtseparator the txtseparator |
|
1654 |
* @return the string |
|
1655 |
*/ |
|
1656 |
public String resume(String colseparator, String txtseparator) { |
|
1657 |
return addTxtSep("" + occ, txtseparator) //$NON-NLS-1$ |
|
1658 |
+ colseparator + freq + colseparator + nbocc + colseparator + score + colseparator + distmoyenne; |
|
1659 |
} |
|
1660 |
|
|
1661 |
/** |
|
1662 |
* Sets the count and dist. |
|
1663 |
* |
|
1664 |
* @param count the count |
|
1665 |
* @param dist the dist |
|
1666 |
*/ |
|
1667 |
public void setCountAndDist(int count, int dist) { |
|
1668 |
this.nbocc = count; |
|
1669 |
this.distmoyenne = dist; |
|
1670 |
} |
|
1671 |
|
|
1672 |
/** |
|
1673 |
* Sets the freq. |
|
1674 |
* |
|
1675 |
* @param freq the new freq |
|
1676 |
*/ |
|
1677 |
public void setFreq(int freq) { |
|
1678 |
this.freq = freq; |
|
1679 |
} |
|
1680 |
|
|
1681 |
/** |
|
1682 |
* Sets the score. |
|
1683 |
*/ |
|
1684 |
public void setScore() {// FB == freq, R = nbocc |
|
1685 |
this.score = (this.nbocc + this.freq + this.distmoyenne); |
|
1686 |
} |
|
1687 |
|
|
1688 |
@Override |
|
1689 |
public String toString() { |
|
1690 |
return occ + CooccurrenceCoreMessages.fColon + freq + CooccurrenceCoreMessages.occColon + nbocc + CooccurrenceCoreMessages.scoreColon + score + CooccurrenceCoreMessages.meanDistColon |
|
1691 |
+ distmoyenne + CooccurrenceCoreMessages.propertiesColon + props; |
|
1692 |
} |
|
1693 |
} |
|
1694 |
|
|
1695 |
|
|
1696 |
@Override |
|
1697 |
public String getResultType() { |
|
1698 |
return CooccurrenceCoreMessages.RESULT_TYPE; |
|
1699 |
} |
|
1700 |
|
|
1701 |
|
|
1702 | 1692 |
} |
Formats disponibles : Unified diff