34 |
34 |
import java.io.IOException;
|
35 |
35 |
import java.io.OutputStreamWriter;
|
36 |
36 |
import java.io.UnsupportedEncodingException;
|
37 |
|
import java.util.ArrayList;
|
38 |
37 |
import java.util.Arrays;
|
39 |
|
import java.util.HashSet;
|
40 |
38 |
import java.util.List;
|
41 |
|
import java.util.Set;
|
42 |
39 |
|
43 |
40 |
import org.rosuda.REngine.REXPMismatchException;
|
44 |
41 |
import org.rosuda.REngine.Rserve.RserveException;
|
... | ... | |
48 |
45 |
import org.txm.lexicon.core.corpusengine.cqp.Lexicon;
|
49 |
46 |
import org.txm.searchengine.cqp.clientExceptions.CqiClientException;
|
50 |
47 |
import org.txm.searchengine.cqp.corpus.Corpus;
|
51 |
|
import org.txm.searchengine.cqp.corpus.Part;
|
52 |
48 |
import org.txm.searchengine.cqp.corpus.Partition;
|
53 |
49 |
import org.txm.searchengine.cqp.corpus.Property;
|
54 |
50 |
import org.txm.searchengine.cqp.corpus.Subcorpus;
|
... | ... | |
132 |
128 |
*/
|
133 |
129 |
protected double[][] selectedSpecificitiesIndex;
|
134 |
130 |
|
|
131 |
|
|
132 |
|
|
133 |
|
|
134 |
/**
|
|
135 |
* Compute the specificity index for all the cells of a complete lexical
|
|
136 |
* table, as defined by a {@link Partition} and a {@link Property}.
|
|
137 |
*
|
|
138 |
* This may be used for extracting, for all part, the forms the most or the
|
|
139 |
* less specif ic.
|
|
140 |
*
|
|
141 |
* @param partition the partition
|
|
142 |
* @param analysisProperty the analysis property
|
|
143 |
* @param formFocus the form focus
|
|
144 |
* @param partsFocus the parts focus
|
|
145 |
* @param Fmin the fmin
|
|
146 |
* @return the specificites result
|
|
147 |
* @throws CqiClientException the cqi client exception
|
|
148 |
* @throws StatException the stat exception
|
|
149 |
*/
|
|
150 |
public SpecificitesResult(Partition partition, Property analysisProperty, int Fmin, int maxScore) throws CqiClientException, StatException {
|
|
151 |
this(LexicalTableFactory.getLexicalTable(partition, analysisProperty, Fmin), maxScore);
|
135 |
152 |
|
|
153 |
if (partition.getParts().size() < 2) {
|
|
154 |
throw new IllegalArgumentException(SpecificitiesCoreMessages.ComputeError_NEED_AT_LEAST_2_PARTS);
|
|
155 |
}
|
|
156 |
}
|
|
157 |
|
136 |
158 |
/**
|
|
159 |
* Specificites.
|
|
160 |
*
|
|
161 |
* @param table the table
|
|
162 |
* @return the specificites result
|
|
163 |
* @throws CqiClientException the cqi client exception
|
|
164 |
* @throws StatException the stat exception
|
|
165 |
*/
|
|
166 |
public SpecificitesResult(LexicalTable table, int maxScore) throws CqiClientException, StatException {
|
|
167 |
if (table.getNColumns() < 2) {
|
|
168 |
throw new IllegalArgumentException(SpecificitiesCoreMessages.ComputeError_NEED_AT_LEAST_2_PARTS);
|
|
169 |
}
|
|
170 |
|
|
171 |
SpecificitiesImpl si = new SpecificitiesImpl(table.getData());
|
|
172 |
|
|
173 |
double[][] specIndex = si.getScores();
|
|
174 |
|
|
175 |
if (table.getPartition() != null) {
|
|
176 |
// System.out.println("table="+table);
|
|
177 |
// System.out.println("rows="+table.getRowNames());
|
|
178 |
// System.out.println("cols="+table.getColNames());
|
|
179 |
// System.out.println("prop="+table.getProperty());
|
|
180 |
init(symbol, specIndex, table, Arrays
|
|
181 |
.asList(table.getRowNames().asStringsArray()), Arrays.asList(table.getColNames().asStringsArray()), table.getProperty().getName(), maxScore);
|
|
182 |
} else {
|
|
183 |
init(si.getSymbol(), specIndex, table, Arrays
|
|
184 |
.asList(table.getRowNames().asStringsArray()), Arrays.asList(table.getColNames().asStringsArray()),
|
|
185 |
"TLNONAME: " + table.getProperty().getName(), maxScore); //$NON-NLS-1$
|
|
186 |
}
|
|
187 |
}
|
|
188 |
|
|
189 |
/**
|
|
190 |
* Specificites.
|
|
191 |
*
|
|
192 |
* @param corpus the corpus
|
|
193 |
* @param subcorpus the subcorpus
|
|
194 |
* @param property the property
|
|
195 |
* @return the specificites result
|
|
196 |
* @throws CqiClientException the cqi client exception
|
|
197 |
* @throws StatException the stat exception
|
|
198 |
* @throws REXPMismatchException
|
|
199 |
* @throws RserveException
|
|
200 |
*/
|
|
201 |
public SpecificitesResult(Corpus corpus, Subcorpus subcorpus, Property property, int maxScore) throws CqiClientException,
|
|
202 |
StatException, RserveException, REXPMismatchException {
|
|
203 |
|
|
204 |
|
|
205 |
|
|
206 |
Lexicon totalFrequencies = Lexicon.getLexicon(corpus, property);
|
|
207 |
Lexicon subFrequencies = Lexicon.getLexicon(subcorpus, property);
|
|
208 |
|
|
209 |
//System.out.println("Send corpus vector");
|
|
210 |
Vector totalFSymbol;
|
|
211 |
Vector subFSymbol;
|
|
212 |
try {
|
|
213 |
totalFSymbol = totalFrequencies.asVector();
|
|
214 |
subFSymbol = subFrequencies.asVector();
|
|
215 |
} catch (StatException e) {
|
|
216 |
throw new RWorkspaceException(e);
|
|
217 |
}
|
|
218 |
|
|
219 |
SpecificitiesImpl si = new SpecificitiesImpl(totalFSymbol, subFSymbol);
|
|
220 |
double[][] specIndex = si.getScores();
|
|
221 |
|
|
222 |
init(si.getSymbol(), specIndex, totalFrequencies, subFrequencies, corpus.getName()+"\\"+subcorpus.getName(), maxScore);
|
|
223 |
}
|
|
224 |
|
|
225 |
|
|
226 |
/**
|
137 |
227 |
* Instantiates a new specificities result : for subcorpus specificities.
|
138 |
228 |
*
|
139 |
229 |
* @param symbol the symbol
|
... | ... | |
663 |
753 |
return this.getName();
|
664 |
754 |
}
|
665 |
755 |
|
666 |
|
|
667 |
|
/**
|
668 |
|
* Compute the specificity index for all the cells of a complete lexical
|
669 |
|
* table, as defined by a {@link Partition} and a {@link Property}.
|
670 |
|
*
|
671 |
|
* This may be used for extracting, for all part, the forms the most or the
|
672 |
|
* less specif ic.
|
673 |
|
*
|
674 |
|
* @param partition the partition
|
675 |
|
* @param analysisProperty the analysis property
|
676 |
|
* @param formFocus the form focus
|
677 |
|
* @param partsFocus the parts focus
|
678 |
|
* @param Fmin the fmin
|
679 |
|
* @return the specificites result
|
680 |
|
* @throws CqiClientException the cqi client exception
|
681 |
|
* @throws StatException the stat exception
|
682 |
|
*/
|
683 |
|
public SpecificitesResult(Partition partition, Property analysisProperty, int Fmin, int maxScore) throws CqiClientException, StatException {
|
684 |
|
this(LexicalTableFactory.getLexicalTable(partition, analysisProperty, Fmin), maxScore);
|
685 |
756 |
|
686 |
|
if (partition.getParts().size() < 2) {
|
687 |
|
throw new IllegalArgumentException(SpecificitiesCoreMessages.ComputeError_NEED_AT_LEAST_2_PARTS);
|
688 |
|
}
|
689 |
|
}
|
690 |
|
|
691 |
|
/**
|
692 |
|
* Specificites.
|
693 |
|
*
|
694 |
|
* @param table the table
|
695 |
|
* @return the specificites result
|
696 |
|
* @throws CqiClientException the cqi client exception
|
697 |
|
* @throws StatException the stat exception
|
698 |
|
*/
|
699 |
|
public SpecificitesResult(LexicalTable table, int maxScore)
|
700 |
|
throws CqiClientException, StatException {
|
701 |
|
if (table.getNColumns() < 2) {
|
702 |
|
throw new IllegalArgumentException(SpecificitiesCoreMessages.ComputeError_NEED_AT_LEAST_2_PARTS);
|
703 |
|
}
|
704 |
|
|
705 |
|
SpecificitiesImpl si = new SpecificitiesImpl(table.getData());
|
706 |
|
|
707 |
|
double[][] specIndex = si.getScores();
|
708 |
|
|
709 |
|
if (table.getPartition() != null) {
|
710 |
|
// System.out.println("table="+table);
|
711 |
|
// System.out.println("rows="+table.getRowNames());
|
712 |
|
// System.out.println("cols="+table.getColNames());
|
713 |
|
// System.out.println("prop="+table.getProperty());
|
714 |
|
init(symbol, specIndex, table, Arrays
|
715 |
|
.asList(table.getRowNames().asStringsArray()), Arrays.asList(table.getColNames().asStringsArray()), table.getProperty().getName(), maxScore);
|
716 |
|
} else {
|
717 |
|
init(si.getSymbol(), specIndex, table, Arrays
|
718 |
|
.asList(table.getRowNames().asStringsArray()), Arrays.asList(table.getColNames().asStringsArray()),
|
719 |
|
"TLNONAME: " + table.getProperty().getName(), maxScore); //$NON-NLS-1$
|
720 |
|
}
|
721 |
|
}
|
722 |
|
|
723 |
|
/**
|
724 |
|
* Specificites.
|
725 |
|
*
|
726 |
|
* @param corpus the corpus
|
727 |
|
* @param subcorpus the subcorpus
|
728 |
|
* @param property the property
|
729 |
|
* @return the specificites result
|
730 |
|
* @throws CqiClientException the cqi client exception
|
731 |
|
* @throws StatException the stat exception
|
732 |
|
* @throws REXPMismatchException
|
733 |
|
* @throws RserveException
|
734 |
|
*/
|
735 |
|
public SpecificitesResult(Corpus corpus,
|
736 |
|
Subcorpus subcorpus, Property property, int maxScore) throws CqiClientException,
|
737 |
|
StatException, RserveException, REXPMismatchException {
|
738 |
|
|
739 |
|
|
740 |
|
|
741 |
|
Lexicon totalFrequencies = Lexicon.getLexicon(corpus, property);
|
742 |
|
Lexicon subFrequencies = Lexicon.getLexicon(subcorpus, property);
|
743 |
|
|
744 |
|
//System.out.println("Send corpus vector");
|
745 |
|
Vector totalFSymbol;
|
746 |
|
Vector subFSymbol;
|
747 |
|
try {
|
748 |
|
totalFSymbol = totalFrequencies.asVector();
|
749 |
|
subFSymbol = subFrequencies.asVector();
|
750 |
|
} catch (StatException e) {
|
751 |
|
throw new RWorkspaceException(e);
|
752 |
|
}
|
753 |
|
|
754 |
|
SpecificitiesImpl si = new SpecificitiesImpl(totalFSymbol, subFSymbol);
|
755 |
|
double[][] specIndex = si.getScores();
|
756 |
|
|
757 |
|
init(si.getSymbol(), specIndex, totalFrequencies, subFrequencies, corpus.getName()+"\\"+subcorpus.getName(), maxScore);
|
758 |
|
}
|
759 |
757 |
}
|