Révision 313
tmp/org.txm.wordcloud.core/META-INF/MANIFEST.MF (revision 313) | ||
---|---|---|
9 | 9 |
org.txm.core;bundle-version="0.7.0", |
10 | 10 |
org.txm.chartsengine.jfreechart.core;bundle-version="1.0.0", |
11 | 11 |
org.txm.chartsengine.r.core;bundle-version="1.0.0", |
12 |
org.txm.index.core;bundle-version="1.0.0" |
|
12 |
org.txm.index.core;bundle-version="1.0.0", |
|
13 |
org.txm.lexicon.core |
|
13 | 14 |
Export-Package: org.txm.wordcloud.core.chartsengine.jfreechart, |
14 | 15 |
org.txm.wordcloud.core.chartsengine.r, |
15 | 16 |
org.txm.wordcloud.core.functions, |
tmp/org.txm.index.core/src/org/txm/index/core/functions/Index.java (revision 313) | ||
---|---|---|
226 | 226 |
this.props = new ArrayList<Property>(); |
227 | 227 |
this.props.add(property); |
228 | 228 |
|
229 |
lexicon = corpus.getLexicon(property);
|
|
229 |
lexicon = Lexicon.getLexicon(corpus, property);
|
|
230 | 230 |
|
231 | 231 |
lexicon.addResult(this); |
232 | 232 |
|
tmp/org.txm.utils/src/org/txm/utils/io/IOUtils.java (revision 313) | ||
---|---|---|
11 | 11 |
import java.io.OutputStreamWriter; |
12 | 12 |
import java.io.PrintWriter; |
13 | 13 |
import java.io.UnsupportedEncodingException; |
14 |
import java.util.ArrayList; |
|
14 | 15 |
|
15 | 16 |
import org.txm.utils.i18n.DetectBOM; |
16 | 17 |
|
... | ... | |
60 | 61 |
public static PrintWriter getWriter(File file, String encoding) throws UnsupportedEncodingException, FileNotFoundException { |
61 | 62 |
return getWriter(file, encoding, false); |
62 | 63 |
} |
63 |
|
|
64 |
|
|
64 | 65 |
public static PrintWriter getWriter(File file, boolean append) throws UnsupportedEncodingException, FileNotFoundException { |
65 | 66 |
return getWriter(file, "UTF-8", append); |
66 | 67 |
} |
67 |
|
|
68 |
|
|
68 | 69 |
public static PrintWriter getWriter(File file, String encoding, boolean append) throws UnsupportedEncodingException, FileNotFoundException { |
69 | 70 |
return new PrintWriter(new OutputStreamWriter(new BufferedOutputStream(new FileOutputStream(file, append)) , "UTF-8")); //$NON-NLS-1$ |
70 | 71 |
} |
... | ... | |
77 | 78 |
PrintWriter writer = getWriter(file); |
78 | 79 |
writer.write(str); |
79 | 80 |
writer.close(); |
80 |
|
|
81 |
|
|
81 | 82 |
} |
83 |
|
|
84 |
public static ArrayList<String> getLines(File file, String encoding) { |
|
85 |
ArrayList<String> lines = new ArrayList<String>(); |
|
86 |
try { |
|
87 |
BufferedReader reader = IOUtils.getReader(file, encoding); |
|
88 |
String line = reader.readLine(); |
|
89 |
while (line != null) { |
|
90 |
lines.add(line); |
|
91 |
line = reader.readLine(); |
|
92 |
} |
|
93 |
} catch(Exception e) { |
|
94 |
System.out.println("Error while getting lines: "+e.getLocalizedMessage()); |
|
95 |
} |
|
96 |
return lines; |
|
97 |
} |
|
82 | 98 |
} |
tmp/org.txm.cooccurrence.core/src/org/txm/cooccurrence/core/functions/Cooccurrence.java (revision 313) | ||
---|---|---|
48 | 48 |
import org.txm.functions.concordances.Concordance; |
49 | 49 |
import org.txm.functions.concordances.Line; |
50 | 50 |
import org.txm.index.core.functions.Index; |
51 |
import org.txm.lexicaltable.core.statsengine.data.LexicalTable; |
|
51 |
import org.txm.lexicaltable.core.functions.LexicalTable; |
|
52 |
import org.txm.lexicaltable.core.functions.LexicalTableFactory; |
|
52 | 53 |
import org.txm.lexicaltable.core.statsengine.r.data.LexicalTableImpl; |
53 | 54 |
import org.txm.searchengine.cqp.clientExceptions.CqiClientException; |
54 | 55 |
import org.txm.searchengine.cqp.clientExceptions.UnexpectedAnswerException; |
... | ... | |
60 | 61 |
import org.txm.searchengine.cqp.corpus.query.Match; |
61 | 62 |
import org.txm.searchengine.cqp.corpus.query.Query; |
62 | 63 |
import org.txm.searchengine.cqp.serverException.CqiServerError; |
63 |
import org.txm.specificities.core.functions.Specificites; |
|
64 | 64 |
import org.txm.specificities.core.functions.SpecificitesResult; |
65 |
import org.txm.specificities.core.statsengine.r.function.SpecificitiesImpl; |
|
66 |
import org.txm.lexicaltable.core.statsengine.data.ILexicalTable; |
|
65 | 67 |
import org.txm.statsengine.core.StatException; |
66 | 68 |
import org.txm.statsengine.r.core.RWorkspace; |
67 | 69 |
import org.txm.statsengine.r.core.exceptions.RWorkspaceException; |
... | ... | |
198 | 200 |
private HashMap<Integer, String> allsignaturesstr; |
199 | 201 |
|
200 | 202 |
/** The lt. */ |
201 |
private LexicalTable lt; |
|
203 |
private LexicalTableImpl lt;
|
|
202 | 204 |
|
203 | 205 |
/** The keys to string. */ |
204 | 206 |
private HashMap<String, String> keysToString; |
... | ... | |
676 | 678 |
} |
677 | 679 |
if (Thread.interrupted()) return false; // stop if interrupted by user |
678 | 680 |
|
679 |
// ALTER THE INDEX IF A REFERENCE CORPUS IS SET |
|
680 |
if(referenceCorpus != null && referenceCorpus.length() > 0) { |
|
681 |
// ALTER THE INDEX IF A REFERENCE CORPUS IS SET -> this change the base frequencies
|
|
682 |
if (referenceCorpus != null && referenceCorpus.length() > 0) {
|
|
681 | 683 |
//voc.toTxt(new File("/home/mdecorde/TEMP/before.tsv"), "UTF-8", "\t", ""); |
682 | 684 |
try { |
683 | 685 |
voc.setIsAltered(true); |
... | ... | |
774 | 776 |
// //writer.println("Cols: "+Arrays.toString(colnames)); |
775 | 777 |
// } catch(Exception e) {e.printStackTrace();} |
776 | 778 |
|
777 |
lt = LexicalTableImpl.createLexicalTable(freqs, properties.get(0), rownames, colnames, 1); |
|
778 |
lt.setCorpus(corpus); |
|
779 |
lt = new LexicalTableImpl(freqs, rownames, colnames); |
|
779 | 780 |
|
780 | 781 |
// if(referenceCorpus != null && referenceCorpus.length() > 0) { |
781 | 782 |
// //lt.removeCol(0, false); |
... | ... | |
794 | 795 |
*/ |
795 | 796 |
public boolean stepGetScores() throws CqiClientException, StatException |
796 | 797 |
{ |
797 |
SpecificitesResult specif = Specificites.specificites(lt, 1000);
|
|
798 |
SpecificitiesImpl specif = new SpecificitiesImpl(lt);
|
|
798 | 799 |
//System.out.println("Specif N part: "+specif.getNbrPart()); //$NON-NLS-1$ |
799 | 800 |
//System.out.println("Specif N lines number: "+specif.getSpecificitesIndex().length); //$NON-NLS-1$ |
800 | 801 |
//System.out.println("T specif e: "+(System.currentTimeMillis()- time)); //$NON-NLS-1$ |
801 | 802 |
//specif.toTxt(new File("~/Bureau/coocresults/specif Cooc")); //$NON-NLS-1$ |
802 |
String[] specifrownames = specif.getTypeNames();
|
|
803 |
double[][] scores = specif.getSpecificitesIndex();
|
|
803 |
String[] specifrownames = specif.getRowNames().asStringsArray();
|
|
804 |
double[][] scores = specif.getScores();
|
|
804 | 805 |
//System.out.println("Nb specif result: "+specif.getSpecificitesIndex().length); |
805 | 806 |
|
806 | 807 |
int iimax = Math.min(specifrownames.length, scores.length); |
tmp/org.txm.cooccurrence.core/META-INF/MANIFEST.MF (revision 313) | ||
---|---|---|
3 | 3 |
Bundle-Name: Cooccurrence Core |
4 | 4 |
Bundle-SymbolicName: org.txm.cooccurrence.core;singleton:=true |
5 | 5 |
Bundle-Version: 1.0.0.qualifier |
6 |
Require-Bundle: org.txm.lexicaltable.core,
|
|
6 |
Require-Bundle: org.txm.concordance.core;bundle-version="1.0.0",
|
|
7 | 7 |
org.txm.core;bundle-version="0.7.0";visibility:=reexport, |
8 |
org.txm.lexicaltable.core, |
|
8 | 9 |
org.txm.specificities.core;bundle-version="1.0.0", |
9 | 10 |
org.eclipse.core.runtime, |
10 | 11 |
org.txm.index.core;bundle-version="1.0.0" |
tmp/org.txm.specificities.core/src/org/txm/specificities/core/functions/Specificites.java (revision 313) | ||
---|---|---|
1 |
// Copyright © 2010-2013 ENS de Lyon. |
|
2 |
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of |
|
3 |
// Lyon 2, University of Franche-Comté, University of Nice |
|
4 |
// Sophia Antipolis, University of Paris 3. |
|
5 |
// |
|
6 |
// The TXM platform is free software: you can redistribute it |
|
7 |
// and/or modify it under the terms of the GNU General Public |
|
8 |
// License as published by the Free Software Foundation, |
|
9 |
// either version 2 of the License, or (at your option) any |
|
10 |
// later version. |
|
11 |
// |
|
12 |
// The TXM platform is distributed in the hope that it will be |
|
13 |
// useful, but WITHOUT ANY WARRANTY; without even the implied |
|
14 |
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR |
|
15 |
// PURPOSE. See the GNU General Public License for more |
|
16 |
// details. |
|
17 |
// |
|
18 |
// You should have received a copy of the GNU General |
|
19 |
// Public License along with the TXM platform. If not, see |
|
20 |
// http://www.gnu.org/licenses. |
|
21 |
// |
|
22 |
// |
|
23 |
// |
|
24 |
// $LastChangedDate: 2016-11-29 16:47:07 +0100 (Tue, 29 Nov 2016) $ |
|
25 |
// $LastChangedRevision: 3349 $ |
|
26 |
// $LastChangedBy: mdecorde $ |
|
27 |
// |
|
28 |
package org.txm.specificities.core.functions; |
|
29 |
|
|
30 |
import java.util.ArrayList; |
|
31 |
import java.util.Arrays; |
|
32 |
import java.util.HashSet; |
|
33 |
import java.util.List; |
|
34 |
import java.util.Set; |
|
35 |
|
|
36 |
import org.txm.lexicaltable.core.functions.LexicalTableFactory; |
|
37 |
import org.txm.lexicaltable.core.statsengine.data.LexicalTable; |
|
38 |
import org.txm.lexicon.core.corpusengine.cqp.Lexicon; |
|
39 |
import org.txm.searchengine.cqp.clientExceptions.CqiClientException; |
|
40 |
import org.txm.searchengine.cqp.corpus.Corpus; |
|
41 |
import org.txm.searchengine.cqp.corpus.Part; |
|
42 |
import org.txm.searchengine.cqp.corpus.Partition; |
|
43 |
import org.txm.searchengine.cqp.corpus.Property; |
|
44 |
import org.txm.searchengine.cqp.corpus.Subcorpus; |
|
45 |
import org.txm.searchengine.cqp.corpus.query.Focus; |
|
46 |
import org.txm.specificities.core.messages.SpecificitiesCoreMessages; |
|
47 |
import org.txm.statsengine.core.StatException; |
|
48 |
import org.txm.statsengine.core.data.Vector; |
|
49 |
import org.txm.statsengine.r.core.exceptions.RWorkspaceException; |
|
50 |
|
|
51 |
// TODO: Auto-generated Javadoc |
|
52 |
/** |
|
53 |
* High level access to the Specificities. |
|
54 |
* |
|
55 |
* The methods in this class access the Specificities function through |
|
56 |
* high-level objects (representation of CQP corpus ({@link Corpus}, |
|
57 |
* {@link Subcorpus}, {@link Partition}), representation of queries ( |
|
58 |
* {@link Focus}, {@link Property}), and lexicon {@link Lexicon})). |
|
59 |
* |
|
60 |
* @author sloiseau |
|
61 |
* |
|
62 |
*/ |
|
63 |
public class Specificites { |
|
64 |
|
|
65 |
/** The specif_counter. */ |
|
66 |
private static int specif_counter = 1; |
|
67 |
private static ArrayList<Integer> selectedColIdx; |
|
68 |
|
|
69 |
/** |
|
70 |
* Compute the specificity index for all the cells of a complete lexical |
|
71 |
* table, as defined by a {@link Partition} and a {@link Property}. |
|
72 |
* |
|
73 |
* This may be used for extracting, for all part, the forms the most or the |
|
74 |
* less specif ic. |
|
75 |
* |
|
76 |
* @param partition the partition |
|
77 |
* @param analysisProperty the analysis property |
|
78 |
* @param formFocus the form focus |
|
79 |
* @param partsFocus the parts focus |
|
80 |
* @param Fmin the fmin |
|
81 |
* @return the specificites result |
|
82 |
* @throws CqiClientException the cqi client exception |
|
83 |
* @throws StatException the stat exception |
|
84 |
*/ |
|
85 |
public static SpecificitesResult specificites(Partition partition, Property analysisProperty, |
|
86 |
List<Part> partsFocus, int Fmin, int maxScore) throws CqiClientException, StatException { |
|
87 |
if (partition.getParts().size() < 2) { |
|
88 |
throw new IllegalArgumentException(SpecificitiesCoreMessages.ComputeError_NEED_AT_LEAST_2_PARTS); |
|
89 |
} |
|
90 |
// complication due to Jean-Philippe special interest in usage of Part. |
|
91 |
List<String> partNamesFocus = new ArrayList<String>(); |
|
92 |
|
|
93 |
if (partsFocus != null) { |
|
94 |
partNamesFocus = new ArrayList<String>(); |
|
95 |
for (Part p : partsFocus) { |
|
96 |
partNamesFocus.add(p.getShortName()); |
|
97 |
} |
|
98 |
} |
|
99 |
// System.out.println("get/create lexical table"); |
|
100 |
LexicalTable table = LexicalTableFactory.getLexicalTable(partition, analysisProperty, Fmin); |
|
101 |
partition.addResult(table); |
|
102 |
|
|
103 |
// Create a set of the existing types |
|
104 |
String[] rownames = table.getRowNames().asStringsArray(); |
|
105 |
ArrayList<String> found = new ArrayList<String>(Arrays.asList(rownames)); |
|
106 |
|
|
107 |
String symbol = org.txm.specificities.core.statsengine.r.function.Specificites.prefixR+(specif_counter++); |
|
108 |
|
|
109 |
//System.out.println("Av specif "); |
|
110 |
double[][] specIndex = org.txm.specificities.core.statsengine.r.function.Specificites |
|
111 |
.specificites(symbol, table, found.toArray(new String[] {}), partNamesFocus.toArray(new String[] {})); |
|
112 |
//System.out.println("Ap specif "); |
|
113 |
|
|
114 |
return new SpecificitesResult( |
|
115 |
symbol, |
|
116 |
specIndex, |
|
117 |
table, |
|
118 |
found, |
|
119 |
partNamesFocus, |
|
120 |
analysisProperty.getName(), |
|
121 |
maxScore); |
|
122 |
} |
|
123 |
|
|
124 |
/** |
|
125 |
* Specificites. |
|
126 |
* |
|
127 |
* @param table the table |
|
128 |
* @return the specificites result |
|
129 |
* @throws CqiClientException the cqi client exception |
|
130 |
* @throws StatException the stat exception |
|
131 |
*/ |
|
132 |
public static SpecificitesResult specificites(LexicalTable table, int maxScore) |
|
133 |
throws CqiClientException, StatException { |
|
134 |
if (table.getNColumns() < 2) { |
|
135 |
throw new IllegalArgumentException(SpecificitiesCoreMessages.ComputeError_NEED_AT_LEAST_2_PARTS); |
|
136 |
} |
|
137 |
|
|
138 |
String[] rownames = table.getRowNames().asStringsArray(); |
|
139 |
String[] colnames = table.getColNames().asStringsArray(); |
|
140 |
String symbol = org.txm.specificities.core.statsengine.r.function.Specificites.prefixR+(specif_counter++); |
|
141 |
|
|
142 |
double[][] specIndex = org.txm.specificities.core.statsengine.r.function.Specificites.specificites(symbol, table, rownames, colnames); |
|
143 |
|
|
144 |
if (table.getPartition() != null) { |
|
145 |
return new SpecificitesResult(symbol, specIndex, table, Arrays |
|
146 |
.asList(rownames), Arrays.asList(colnames), table.getProperty().getName(), maxScore); |
|
147 |
} else { |
|
148 |
return new SpecificitesResult(symbol, specIndex, table, Arrays |
|
149 |
.asList(rownames), Arrays.asList(colnames), |
|
150 |
"TLNONAME: " + table.getProperty().getName(), maxScore); //$NON-NLS-1$ |
|
151 |
} |
|
152 |
} |
|
153 |
|
|
154 |
/** |
|
155 |
* Specificites. |
|
156 |
* |
|
157 |
* @param corpus the corpus |
|
158 |
* @param subcorpus the subcorpus |
|
159 |
* @param property the property |
|
160 |
* @return the specificites result |
|
161 |
* @throws CqiClientException the cqi client exception |
|
162 |
* @throws StatException the stat exception |
|
163 |
*/ |
|
164 |
public static SpecificitesResult specificites(Corpus corpus, |
|
165 |
Subcorpus subcorpus, Property property, int maxScore) throws CqiClientException, |
|
166 |
StatException { |
|
167 |
|
|
168 |
Lexicon totalFrequencies = corpus.getLexicon(property); |
|
169 |
Lexicon subFrequencies = subcorpus.getLexicon(property); |
|
170 |
|
|
171 |
//System.out.println("Send corpus vector"); |
|
172 |
Vector totalFSymbol; |
|
173 |
Vector subFSymbol; |
|
174 |
try { |
|
175 |
totalFSymbol = totalFrequencies.asVector(); |
|
176 |
subFSymbol = subFrequencies.asVector(); |
|
177 |
} catch (StatException e) { |
|
178 |
throw new RWorkspaceException(e); |
|
179 |
} |
|
180 |
|
|
181 |
//System.out.println("compute specifs"); |
|
182 |
String symbol = org.txm.specificities.core.statsengine.r.function.Specificites.prefixR+(specif_counter++); |
|
183 |
double[][] specIndex = org.txm.specificities.core.statsengine.r.function.Specificites.specificites(symbol, totalFSymbol, subFSymbol); |
|
184 |
|
|
185 |
//System.out.println("build SpecificitesResult"); |
|
186 |
SpecificitesResult specif = new SpecificitesResult(symbol, specIndex, totalFrequencies, |
|
187 |
subFrequencies, property.getName(), maxScore); |
|
188 |
|
|
189 |
specif.setCorpus(subcorpus); |
|
190 |
return specif ; |
|
191 |
} |
|
192 |
|
|
193 |
/** |
|
194 |
* TODO to be replaced by |
|
195 |
* "org.apache.commons.collections.CollectionUtils.subtract" when someone |
|
196 |
* figure out where is the eclipse plugin for apache commons collections. SL |
|
197 |
* |
|
198 |
* @param leftoperande the leftoperande |
|
199 |
* @param rightoperande the rightoperande |
|
200 |
* @return an array of two sets: the first one contains the element of |
|
201 |
* leftoperande not in rightoperande, the second the element of |
|
202 |
* leftoperande present in rightoperande. |
|
203 |
*/ |
|
204 |
private static final Set<String>[] subtract(Set<String> leftoperande, |
|
205 |
Set<String> rightoperande) { |
|
206 |
Set<String>[] returned = new HashSet[] { new HashSet<String>(), |
|
207 |
new HashSet<String>() }; |
|
208 |
for (String s : leftoperande) { |
|
209 |
if (!rightoperande.contains(s)) { |
|
210 |
returned[0].add(s); |
|
211 |
} else { |
|
212 |
returned[1].add(s); |
|
213 |
} |
|
214 |
} |
|
215 |
return returned; |
|
216 |
} |
|
217 |
} |
tmp/org.txm.specificities.core/src/org/txm/specificities/core/functions/Chi2.java (revision 313) | ||
---|---|---|
1 | 1 |
package org.txm.specificities.core.functions; |
2 | 2 |
|
3 | 3 |
import org.txm.functions.contrasts.Contrast; |
4 |
import org.txm.lexicaltable.core.statsengine.data.LexicalTable; |
|
4 |
import org.txm.lexicaltable.core.functions.LexicalTable; |
|
5 |
import org.txm.lexicaltable.core.statsengine.data.ILexicalTable; |
|
5 | 6 |
import org.txm.searchengine.cqp.clientExceptions.CqiClientException; |
6 | 7 |
import org.txm.searchengine.cqp.corpus.Corpus; |
7 | 8 |
import org.txm.searchengine.cqp.corpus.Partition; |
... | ... | |
11 | 12 |
import org.txm.statsengine.r.core.RWorkspace; |
12 | 13 |
|
13 | 14 |
public class Chi2 extends Contrast { |
15 |
|
|
16 |
private ILexicalTable itable; |
|
17 |
|
|
14 | 18 |
public Chi2(Corpus corpus, Subcorpus subcorpus, Property property) |
15 | 19 |
throws StatException, CqiClientException { |
16 | 20 |
super(corpus, subcorpus, property); |
17 |
// TODO Auto-generated constructor stub |
|
18 | 21 |
} |
19 | 22 |
|
20 | 23 |
public Chi2(Partition partition, Property property, int Fmin) |
21 | 24 |
throws StatException, CqiClientException { |
22 | 25 |
super(partition, property, Fmin); |
23 |
// TODO Auto-generated constructor stub |
|
24 | 26 |
} |
25 | 27 |
|
26 | 28 |
|
27 | 29 |
public Chi2(LexicalTable table) { |
28 | 30 |
super(table); |
29 |
// TODO Auto-generated constructor stub |
|
30 | 31 |
} |
31 | 32 |
|
32 | 33 |
@Override |
33 | 34 |
public boolean compute() throws StatException { |
34 |
colNames = table.getColNames().asStringsArray(); |
|
35 |
rowNames = table.getRowNames().asStringsArray(); |
|
36 |
frequencies = RWorkspace.getRWorkspaceInstance().evalToInt2D(table.getSymbol()); |
|
35 |
itable = table.getData(); |
|
36 |
colNames = itable.getColNames().asStringsArray(); |
|
37 |
rowNames = itable.getRowNames().asStringsArray(); |
|
38 |
frequencies = RWorkspace.getRWorkspaceInstance().evalToInt2D(itable.getSymbol()); |
|
37 | 39 |
|
38 | 40 |
// compute |
39 |
String cmd = "mat <- "+table.getSymbol()+"\n"+ //$NON-NLS-1$ //$NON-NLS-2$ |
|
41 |
String cmd = "mat <- "+itable.getSymbol()+"\n"+ //$NON-NLS-1$ //$NON-NLS-2$
|
|
40 | 42 |
" mat2 <- rowSums(mat) %o% colSums(mat) / sum(mat)"+"\n"+ //$NON-NLS-1$ //$NON-NLS-2$ |
41 | 43 |
symbol +" <- (mat - mat2) ^ 2 / mat2"; //$NON-NLS-1$ |
42 | 44 |
indices = RWorkspace.getRWorkspaceInstance().evalToDouble2D(cmd); |
tmp/org.txm.specificities.core/src/org/txm/specificities/core/functions/SpecificitesResult.java (revision 313) | ||
---|---|---|
34 | 34 |
import java.io.IOException; |
35 | 35 |
import java.io.OutputStreamWriter; |
36 | 36 |
import java.io.UnsupportedEncodingException; |
37 |
import java.util.ArrayList; |
|
38 |
import java.util.Arrays; |
|
39 |
import java.util.HashSet; |
|
37 | 40 |
import java.util.List; |
41 |
import java.util.Set; |
|
38 | 42 |
|
43 |
import org.rosuda.REngine.REXPMismatchException; |
|
44 |
import org.rosuda.REngine.Rserve.RserveException; |
|
39 | 45 |
import org.txm.functions.Function; |
40 |
import org.txm.lexicaltable.core.statsengine.data.LexicalTable; |
|
46 |
import org.txm.lexicaltable.core.functions.LexicalTable; |
|
47 |
import org.txm.lexicaltable.core.functions.LexicalTableFactory; |
|
41 | 48 |
import org.txm.lexicon.core.corpusengine.cqp.Lexicon; |
49 |
import org.txm.searchengine.cqp.clientExceptions.CqiClientException; |
|
42 | 50 |
import org.txm.searchengine.cqp.corpus.Corpus; |
51 |
import org.txm.searchengine.cqp.corpus.Part; |
|
43 | 52 |
import org.txm.searchengine.cqp.corpus.Partition; |
53 |
import org.txm.searchengine.cqp.corpus.Property; |
|
44 | 54 |
import org.txm.searchengine.cqp.corpus.Subcorpus; |
45 | 55 |
import org.txm.specificities.core.messages.SpecificitiesCoreMessages; |
56 |
import org.txm.specificities.core.statsengine.r.function.SpecificitiesImpl; |
|
46 | 57 |
import org.txm.statsengine.core.StatException; |
47 | 58 |
import org.txm.statsengine.core.data.Vector; |
48 | 59 |
import org.txm.statsengine.core.utils.ArrayIndex; |
49 | 60 |
import org.txm.statsengine.core.utils.CheckArray; |
50 | 61 |
import org.txm.statsengine.r.core.RWorkspace; |
62 |
import org.txm.statsengine.r.core.exceptions.RWorkspaceException; |
|
51 | 63 |
|
52 |
// TODO: Auto-generated Javadoc |
|
53 | 64 |
/** |
54 | 65 |
* Hold specificites computation result. |
55 | 66 |
* |
... | ... | |
61 | 72 |
public static int MAXSPECIF = 1000; |
62 | 73 |
|
63 | 74 |
/** The indices. */ |
64 |
private final double[][] indices;
|
|
75 |
private double[][] indices; |
|
65 | 76 |
|
66 | 77 |
/** The table. */ |
67 |
private final LexicalTable table;
|
|
78 |
private LexicalTable table; |
|
68 | 79 |
|
69 | 80 |
/** The rowindex. */ |
70 | 81 |
private int[] rowindex = null; |
... | ... | |
131 | 142 |
* @param subLexicon the sub lexicon |
132 | 143 |
* @param name the name |
133 | 144 |
*/ |
134 |
protected SpecificitesResult(String symbol, double[][] specIndex, Lexicon lexicon, Lexicon subLexicon, String name, int maxScore) {
|
|
145 |
protected void init(String symbol, double[][] specIndex, Lexicon lexicon, Lexicon subLexicon, String name, int maxScore) {
|
|
135 | 146 |
|
136 |
super(subLexicon.getCorpus()); |
|
137 |
|
|
147 |
//super(subLexicon.getCorpus());
|
|
148 |
this.corpus = subLexicon.getCorpus(); |
|
138 | 149 |
this.symbol = symbol; |
139 | 150 |
this.indices = specIndex; |
140 | 151 |
|
... | ... | |
180 | 191 |
* @param maxScore if abs(score) > maxScore -> score = +/- maxScore |
181 | 192 |
* @throws StatException the stat exception |
182 | 193 |
*/ |
183 |
protected SpecificitesResult(String symbol, double[][] specIndex, LexicalTable table,
|
|
194 |
protected void init(String symbol, double[][] specIndex, LexicalTable table,
|
|
184 | 195 |
List<String> typeFocus, List<String> partFocus, String name, int maxScore) |
185 | 196 |
throws StatException { |
186 | 197 |
|
187 |
super(table); |
|
188 |
|
|
198 |
this.table = table; |
|
189 | 199 |
this.symbol = symbol; |
190 | 200 |
|
191 | 201 |
if (table == null) { |
... | ... | |
297 | 307 |
* @throws StatException the stat exception |
298 | 308 |
*/ |
299 | 309 |
public int getCorpusSize() throws StatException { |
300 |
return table != null ? table.getTotal() : lexicon.nbrOfToken(); |
|
310 |
return table != null ? table.getData().getTotal() : lexicon.nbrOfToken();
|
|
301 | 311 |
} |
302 | 312 |
|
303 | 313 |
/** |
... | ... | |
368 | 378 |
//System.out.println("no frequencies"); |
369 | 379 |
if (table != null) { |
370 | 380 |
//System.out.println("FROM TABLE"); |
371 |
frequencies = RWorkspace.getRWorkspaceInstance().evalToInt2D(table.getSymbol()); |
|
381 |
frequencies = RWorkspace.getRWorkspaceInstance().evalToInt2D(table.getData().getSymbol());
|
|
372 | 382 |
|
373 | 383 |
} else {// if table == null : subcorpus specif |
374 | 384 |
//System.out.println("FROM LEXICON"); |
... | ... | |
653 | 663 |
public String getDetails() { |
654 | 664 |
return this.getName(); |
655 | 665 |
} |
666 |
|
|
667 |
|
|
668 |
/** |
|
669 |
* Compute the specificity index for all the cells of a complete lexical |
|
670 |
* table, as defined by a {@link Partition} and a {@link Property}. |
|
671 |
* |
|
672 |
* This may be used for extracting, for all part, the forms the most or the |
|
673 |
* less specif ic. |
|
674 |
* |
|
675 |
* @param partition the partition |
|
676 |
* @param analysisProperty the analysis property |
|
677 |
* @param formFocus the form focus |
|
678 |
* @param partsFocus the parts focus |
|
679 |
* @param Fmin the fmin |
|
680 |
* @return the specificites result |
|
681 |
* @throws CqiClientException the cqi client exception |
|
682 |
* @throws StatException the stat exception |
|
683 |
*/ |
|
684 |
public SpecificitesResult(Partition partition, Property analysisProperty, int Fmin, int maxScore) throws CqiClientException, StatException { |
|
685 |
this(LexicalTableFactory.getLexicalTable(partition, analysisProperty, Fmin), maxScore); |
|
686 |
|
|
687 |
if (partition.getParts().size() < 2) { |
|
688 |
throw new IllegalArgumentException(SpecificitiesCoreMessages.ComputeError_NEED_AT_LEAST_2_PARTS); |
|
689 |
} |
|
690 |
} |
|
691 |
|
|
692 |
/** |
|
693 |
* Specificites. |
|
694 |
* |
|
695 |
* @param table the table |
|
696 |
* @return the specificites result |
|
697 |
* @throws CqiClientException the cqi client exception |
|
698 |
* @throws StatException the stat exception |
|
699 |
*/ |
|
700 |
public SpecificitesResult(LexicalTable table, int maxScore) |
|
701 |
throws CqiClientException, StatException { |
|
702 |
if (table.getNColumns() < 2) { |
|
703 |
throw new IllegalArgumentException(SpecificitiesCoreMessages.ComputeError_NEED_AT_LEAST_2_PARTS); |
|
704 |
} |
|
705 |
|
|
706 |
SpecificitiesImpl si = new SpecificitiesImpl(table.getData()); |
|
707 |
|
|
708 |
double[][] specIndex = si.getScores(); |
|
709 |
|
|
710 |
if (table.getPartition() != null) { |
|
711 |
init(symbol, specIndex, table, Arrays |
|
712 |
.asList(table.getRowNames().asStringsArray()), Arrays.asList(table.getColNames().asStringsArray()), table.getProperty().getName(), maxScore); |
|
713 |
} else { |
|
714 |
init(si.getSymbol(), specIndex, table, Arrays |
|
715 |
.asList(table.getRowNames().asStringsArray()), Arrays.asList(table.getColNames().asStringsArray()), |
|
716 |
"TLNONAME: " + table.getProperty().getName(), maxScore); //$NON-NLS-1$ |
|
717 |
} |
|
718 |
} |
|
719 |
|
|
720 |
/** |
|
721 |
* Specificites. |
|
722 |
* |
|
723 |
* @param corpus the corpus |
|
724 |
* @param subcorpus the subcorpus |
|
725 |
* @param property the property |
|
726 |
* @return the specificites result |
|
727 |
* @throws CqiClientException the cqi client exception |
|
728 |
* @throws StatException the stat exception |
|
729 |
* @throws REXPMismatchException |
|
730 |
* @throws RserveException |
|
731 |
*/ |
|
732 |
public SpecificitesResult(Corpus corpus, |
|
733 |
Subcorpus subcorpus, Property property, int maxScore) throws CqiClientException, |
|
734 |
StatException, RserveException, REXPMismatchException { |
|
735 |
|
|
736 |
|
|
737 |
|
|
738 |
Lexicon totalFrequencies = Lexicon.getLexicon(corpus, property); |
|
739 |
Lexicon subFrequencies = Lexicon.getLexicon(subcorpus, property); |
|
740 |
|
|
741 |
//System.out.println("Send corpus vector"); |
|
742 |
Vector totalFSymbol; |
|
743 |
Vector subFSymbol; |
|
744 |
try { |
|
745 |
totalFSymbol = totalFrequencies.asVector(); |
|
746 |
subFSymbol = subFrequencies.asVector(); |
|
747 |
} catch (StatException e) { |
|
748 |
throw new RWorkspaceException(e); |
|
749 |
} |
|
750 |
|
|
751 |
SpecificitiesImpl si = new SpecificitiesImpl(totalFSymbol, subFSymbol); |
|
752 |
double[][] specIndex = si.getScores(); |
|
753 |
|
|
754 |
init(si.getSymbol(), specIndex, totalFrequencies, subFrequencies, corpus.getName()+"\\"+subcorpus.getName(), maxScore); |
|
755 |
} |
|
656 | 756 |
} |
tmp/org.txm.specificities.core/src/org/txm/specificities/core/functions/Specificites2.java (revision 313) | ||
---|---|---|
1 | 1 |
package org.txm.specificities.core.functions; |
2 | 2 |
|
3 | 3 |
import org.txm.functions.contrasts.Contrast; |
4 |
import org.txm.lexicaltable.core.statsengine.data.LexicalTable; |
|
4 |
import org.txm.lexicaltable.core.functions.LexicalTable; |
|
5 |
import org.txm.lexicaltable.core.statsengine.data.ILexicalTable; |
|
5 | 6 |
import org.txm.searchengine.cqp.clientExceptions.CqiClientException; |
6 | 7 |
import org.txm.searchengine.cqp.corpus.Corpus; |
7 | 8 |
import org.txm.searchengine.cqp.corpus.Partition; |
8 | 9 |
import org.txm.searchengine.cqp.corpus.Property; |
9 | 10 |
import org.txm.searchengine.cqp.corpus.Subcorpus; |
11 |
import org.txm.specificities.core.statsengine.r.function.SpecificitiesImpl; |
|
10 | 12 |
import org.txm.statsengine.core.StatException; |
11 | 13 |
import org.txm.statsengine.r.core.RWorkspace; |
12 | 14 |
|
13 | 15 |
public class Specificites2 extends Contrast { |
14 |
|
|
16 |
|
|
17 |
private ILexicalTable itable; |
|
18 |
|
|
15 | 19 |
public Specificites2(Corpus corpus, Subcorpus subcorpus, Property property) |
16 | 20 |
throws StatException, CqiClientException { |
17 | 21 |
super(corpus, subcorpus, property); |
18 |
// TODO Auto-generated constructor stub |
|
19 | 22 |
} |
20 | 23 |
|
21 | 24 |
public Specificites2(Partition partition, Property property, int Fmin) |
22 | 25 |
throws StatException, CqiClientException { |
23 | 26 |
super(partition, property, Fmin); |
24 |
// TODO Auto-generated constructor stub |
|
25 | 27 |
} |
26 | 28 |
|
27 | 29 |
|
28 | 30 |
public Specificites2(LexicalTable table) { |
29 | 31 |
super(table); |
30 |
// TODO Auto-generated constructor stub |
|
31 | 32 |
} |
32 | 33 |
|
33 | 34 |
public boolean compute() throws StatException { |
34 |
|
|
35 |
colNames = table.getColNames().asStringsArray(); |
|
36 |
rowNames = table.getRowNames().asStringsArray(); |
|
37 |
frequencies = RWorkspace.getRWorkspaceInstance().evalToInt2D(table.getSymbol()); |
|
38 |
|
|
39 |
// compute |
|
40 |
indices = org.txm.specificities.core.statsengine.r.function.Specificites.specificites("SYMBOL", table, null, null); //$NON-NLS-1$ |
|
41 |
|
|
42 |
return true; |
|
43 |
} |
|
35 |
itable = table.getData(); |
|
36 |
colNames = itable.getColNames().asStringsArray(); |
|
37 |
rowNames = itable.getRowNames().asStringsArray(); |
|
38 |
frequencies = RWorkspace.getRWorkspaceInstance().evalToInt2D(itable.getSymbol()); |
|
44 | 39 |
|
40 |
// compute |
|
41 |
indices = new SpecificitiesImpl(itable).getScores(); //$NON-NLS-1$ |
|
42 |
return true; |
|
43 |
} |
|
44 |
|
|
45 | 45 |
@Override |
46 | 46 |
public String getName() { |
47 |
// TODO Auto-generated method stub |
|
48 | 47 |
return "Specif2"; //$NON-NLS-1$ |
49 | 48 |
} |
50 | 49 |
|
... | ... | |
63 | 62 |
@Override |
64 | 63 |
public void clean() { |
65 | 64 |
// TODO Auto-generated method stub |
66 |
|
|
65 |
|
|
67 | 66 |
} |
68 | 67 |
} |
tmp/org.txm.specificities.core/src/org/txm/specificities/core/statsengine/data/ISpecificities.java (revision 313) | ||
---|---|---|
1 |
// Copyright © 2010-2013 ENS de Lyon. |
|
2 |
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of |
|
3 |
// Lyon 2, University of Franche-Comté, University of Nice |
|
4 |
// Sophia Antipolis, University of Paris 3. |
|
5 |
// |
|
6 |
// The TXM platform is free software: you can redistribute it |
|
7 |
// and/or modify it under the terms of the GNU General Public |
|
8 |
// License as published by the Free Software Foundation, |
|
9 |
// either version 2 of the License, or (at your option) any |
|
10 |
// later version. |
|
11 |
// |
|
12 |
// The TXM platform is distributed in the hope that it will be |
|
13 |
// useful, but WITHOUT ANY WARRANTY; without even the implied |
|
14 |
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR |
|
15 |
// PURPOSE. See the GNU General Public License for more |
|
16 |
// details. |
|
17 |
// |
|
18 |
// You should have received a copy of the GNU General |
|
19 |
// Public License along with the TXM platform. If not, see |
|
20 |
// http://www.gnu.org/licenses. |
|
21 |
// |
|
22 |
// |
|
23 |
// |
|
24 |
// $LastChangedDate: 2016-09-19 10:31:00 +0200 (Mon, 19 Sep 2016) $ |
|
25 |
// $LastChangedRevision: 3298 $ |
|
26 |
// $LastChangedBy: mdecorde $ |
|
27 |
// |
|
28 |
package org.txm.specificities.core.statsengine.data; |
|
29 |
|
|
30 |
import org.txm.searchengine.cqp.corpus.Partition; |
|
31 |
import org.txm.searchengine.cqp.corpus.Property; |
|
32 |
import org.txm.statsengine.core.data.ContingencyTable; |
|
33 |
import org.txm.statsengine.core.data.Matrix; |
|
34 |
|
|
35 |
// TODO: Auto-generated Javadoc |
|
36 |
/** |
|
37 |
* A LexicalTable is a special kind of {@link ContingencyTable} extracted from a |
|
38 |
* corpora given a {@link Partition} (the columns) and a {@link Property} (the |
|
39 |
* rows). |
|
40 |
* |
|
41 |
* Can be edited, rows can be deleted cols can be deleted |
|
42 |
* |
|
43 |
* can be exported of imported from/to a file |
|
44 |
* |
|
45 |
* @author sloiseau |
|
46 |
*/ |
|
47 |
public interface ISpecificities extends Matrix { |
|
48 |
|
|
49 |
} |
|
0 | 50 |
tmp/org.txm.specificities.core/src/org/txm/specificities/core/statsengine/r/function/Specificites.java (revision 313) | ||
---|---|---|
1 |
// Copyright © 2010-2013 ENS de Lyon. |
|
2 |
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of |
|
3 |
// Lyon 2, University of Franche-Comté, University of Nice |
|
4 |
// Sophia Antipolis, University of Paris 3. |
|
5 |
// |
|
6 |
// The TXM platform is free software: you can redistribute it |
|
7 |
// and/or modify it under the terms of the GNU General Public |
|
8 |
// License as published by the Free Software Foundation, |
|
9 |
// either version 2 of the License, or (at your option) any |
|
10 |
// later version. |
|
11 |
// |
|
12 |
// The TXM platform is distributed in the hope that it will be |
|
13 |
// useful, but WITHOUT ANY WARRANTY; without even the implied |
|
14 |
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR |
|
15 |
// PURPOSE. See the GNU General Public License for more |
|
16 |
// details. |
|
17 |
// |
|
18 |
// You should have received a copy of the GNU General |
|
19 |
// Public License along with the TXM platform. If not, see |
|
20 |
// http://www.gnu.org/licenses. |
|
21 |
// |
|
22 |
// |
|
23 |
// |
|
24 |
// $LastChangedDate: 2017-01-24 18:11:42 +0100 (Tue, 24 Jan 2017) $ |
|
25 |
// $LastChangedRevision: 3400 $ |
|
26 |
// $LastChangedBy: mdecorde $ |
|
27 |
// |
|
28 |
package org.txm.specificities.core.statsengine.r.function; |
|
29 |
|
|
30 |
import org.rosuda.REngine.REXP; |
|
31 |
import org.rosuda.REngine.REXPMismatchException; |
|
32 |
import org.rosuda.REngine.Rserve.RserveException; |
|
33 |
import org.txm.lexicaltable.core.statsengine.data.LexicalTable; |
|
34 |
import org.txm.specificities.core.messages.SpecificitiesCoreMessages; |
|
35 |
import org.txm.statsengine.core.StatException; |
|
36 |
import org.txm.statsengine.core.data.QuantitativeDataStructure; |
|
37 |
import org.txm.statsengine.core.data.Vector; |
|
38 |
import org.txm.statsengine.r.core.RWorkspace; |
|
39 |
import org.txm.statsengine.r.core.data.VectorImpl; |
|
40 |
import org.txm.utils.logger.Log; |
|
41 |
|
|
42 |
// TODO: Auto-generated Javadoc |
|
43 |
/** |
|
44 |
* Bridge between the statistical engine and the high-level specificities |
|
45 |
* function {@link org.txm.functions.specificities.Specificities}. |
|
46 |
* |
|
47 |
* This class contains only one methods; it takes as input low-level object (a |
|
48 |
* {@link QuantitativeDataStructure}) and return a low level object (a |
|
49 |
* <int>double[][]</int> array of specificities index). |
|
50 |
* |
|
51 |
* @author sloiseau |
|
52 |
* |
|
53 |
*/ |
|
54 |
public class Specificites { |
|
55 |
|
|
56 |
/** The DEFAUL t_ symbol. */ |
|
57 |
public static final String prefixR = "SpecifResult"; //$NON-NLS-1$ |
|
58 |
|
|
59 |
/** |
|
60 |
* Specificites. |
|
61 |
* |
|
62 |
* @param lexicon the lexicon |
|
63 |
* @param subLexicon the sub lexicon |
|
64 |
* @return the double[][] |
|
65 |
* @throws StatException the stat exception |
|
66 |
*/ |
|
67 |
public static double[][] specificites(String symbol, Vector lexicon, Vector subLexicon) |
|
68 |
throws StatException { |
|
69 |
RWorkspace rw = RWorkspace.getRWorkspaceInstance(); |
|
70 |
try { |
|
71 |
// System.out.println("lexicon "+lexicon.getLength()); |
|
72 |
//System.out.println(" call specificites.lexicon.new"); |
|
73 |
rw.safeEval("library(textometry)"); |
|
74 |
REXP r = rw.callFunction( |
|
75 |
"specificities.lexicon.new", new QuantitativeDataStructure[] { lexicon, subLexicon }, symbol); //$NON-NLS-1$ |
|
76 |
// double[] res = RWorkspace.toDouble(r); |
|
77 |
//System.out.println(" build double matrix"); |
|
78 |
double[][] res; |
|
79 |
try { |
|
80 |
res = r.asDoubleMatrix(); |
|
81 |
} catch (REXPMismatchException e) { |
|
82 |
throw new StatException(SpecificitiesCoreMessages.ComputeError_FAILED_TO_GET_SPECIFICITIES |
|
83 |
+ e.getMessage(), e); |
|
84 |
} |
|
85 |
Log.finest(res.length + SpecificitiesCoreMessages.Specificites_2); |
|
86 |
return res; |
|
87 |
|
|
88 |
} catch (Exception e) { |
|
89 |
System.out.println(e.getLocalizedMessage()); |
|
90 |
org.txm.utils.logger.Log.printStackTrace(e); |
|
91 |
} |
|
92 |
return null; |
|
93 |
} |
|
94 |
|
|
95 |
/** |
|
96 |
* Specificites. |
|
97 |
* |
|
98 |
* @param table the table |
|
99 |
* @param typeFocus the type focus |
|
100 |
* @param partFocus the part focus |
|
101 |
* @return the double[][] |
|
102 |
* @throws StatException the stat exception |
|
103 |
*/ |
|
104 |
public static double[][] specificites(String symbol, LexicalTable table, |
|
105 |
String[] typeFocus, String[] partFocus) throws StatException { |
|
106 |
|
|
107 |
RWorkspace rw = RWorkspace.getRWorkspaceInstance(); |
|
108 |
|
|
109 |
VectorImpl row = null; |
|
110 |
if (typeFocus != null && typeFocus.length != 0) |
|
111 |
row = new VectorImpl(typeFocus); |
|
112 |
|
|
113 |
VectorImpl column = null; |
|
114 |
if (partFocus != null && partFocus.length != 0) |
|
115 |
column = new VectorImpl(partFocus); |
|
116 |
|
|
117 |
try { |
|
118 |
rw.safeEval("library(textometry)"); |
|
119 |
} catch (RserveException e1) { |
|
120 |
// TODO Auto-generated catch block |
|
121 |
e1.printStackTrace(); |
|
122 |
} catch (REXPMismatchException e1) { |
|
123 |
// TODO Auto-generated catch block |
|
124 |
e1.printStackTrace(); |
|
125 |
} |
|
126 |
REXP r = rw.callFunction("specificities", new QuantitativeDataStructure[] { table, row, column }, symbol); //$NON-NLS-1$ |
|
127 |
|
|
128 |
double[][] res; |
|
129 |
try { |
|
130 |
res = r.asDoubleMatrix(); |
|
131 |
} catch (REXPMismatchException e) { |
|
132 |
throw new StatException(SpecificitiesCoreMessages.ComputeError_FAILED_TO_GET_SPECIFICITIES + e.getMessage(), e); |
|
133 |
} |
|
134 |
Log.finest((res.length * res[0].length) + SpecificitiesCoreMessages.Specificites_2); |
|
135 |
|
|
136 |
return res; |
|
137 |
} |
|
138 |
|
|
139 |
/** |
|
140 |
* Gets the default symbol. |
|
141 |
* |
|
142 |
* @param symbol the symbol |
|
143 |
* @return the default symbol |
|
144 |
*/ |
|
145 |
public static double[][] getDefaultSymbol(String symbol) |
|
146 |
{ |
|
147 |
return null; |
|
148 |
} |
|
149 |
|
|
150 |
} |
tmp/org.txm.specificities.core/src/org/txm/specificities/core/statsengine/r/function/SpecificitiesImpl.java (revision 313) | ||
---|---|---|
1 |
// Copyright © 2010-2013 ENS de Lyon. |
|
2 |
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of |
|
3 |
// Lyon 2, University of Franche-Comté, University of Nice |
|
4 |
// Sophia Antipolis, University of Paris 3. |
|
5 |
// |
|
6 |
// The TXM platform is free software: you can redistribute it |
|
7 |
// and/or modify it under the terms of the GNU General Public |
|
8 |
// License as published by the Free Software Foundation, |
|
9 |
// either version 2 of the License, or (at your option) any |
|
10 |
// later version. |
|
11 |
// |
|
12 |
// The TXM platform is distributed in the hope that it will be |
|
13 |
// useful, but WITHOUT ANY WARRANTY; without even the implied |
|
14 |
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR |
|
15 |
// PURPOSE. See the GNU General Public License for more |
|
16 |
// details. |
|
17 |
// |
|
18 |
// You should have received a copy of the GNU General |
|
19 |
// Public License along with the TXM platform. If not, see |
|
20 |
// http://www.gnu.org/licenses. |
|
21 |
// |
|
22 |
// |
|
23 |
// |
|
24 |
// $LastChangedDate: 2017-01-24 18:11:42 +0100 (Tue, 24 Jan 2017) $ |
|
25 |
// $LastChangedRevision: 3400 $ |
|
26 |
// $LastChangedBy: mdecorde $ |
|
27 |
// |
|
28 |
package org.txm.specificities.core.statsengine.r.function; |
|
29 |
|
|
30 |
import org.rosuda.REngine.REXP; |
|
31 |
import org.rosuda.REngine.REXPMismatchException; |
|
32 |
import org.rosuda.REngine.Rserve.RserveException; |
|
33 |
import org.txm.lexicaltable.core.statsengine.data.ILexicalTable; |
|
34 |
import org.txm.lexicaltable.core.statsengine.r.data.LexicalTableImpl; |
|
35 |
import org.txm.specificities.core.messages.SpecificitiesCoreMessages; |
|
36 |
import org.txm.specificities.core.statsengine.data.ISpecificities; |
|
37 |
import org.txm.statsengine.core.StatException; |
|
38 |
import org.txm.statsengine.core.data.QuantitativeDataStructure; |
|
39 |
import org.txm.statsengine.core.data.Vector; |
|
40 |
import org.txm.statsengine.r.core.RWorkspace; |
|
41 |
import org.txm.statsengine.r.core.data.MatrixImpl; |
|
42 |
import org.txm.statsengine.r.core.data.VectorImpl; |
|
43 |
import org.txm.statsengine.r.core.exceptions.RWorkspaceException; |
|
44 |
import org.txm.utils.logger.Log; |
|
45 |
|
|
46 |
// TODO: Auto-generated Javadoc |
|
47 |
/** |
|
48 |
* Bridge between the statistical engine and the high-level specificities |
|
49 |
* function {@link org.txm.functions.specificities.Specificities}. |
|
50 |
* |
|
51 |
* This class contains only one methods; it takes as input low-level object (a |
|
52 |
* {@link QuantitativeDataStructure}) and return a low level object (a |
|
53 |
* <int>double[][]</int> array of specificities index). |
|
54 |
* |
|
55 |
* @author sloiseau |
|
56 |
* |
|
57 |
*/ |
|
58 |
public class SpecificitiesImpl extends MatrixImpl implements ISpecificities { |
|
59 |
|
|
60 |
protected static int specif_counter = 1; |
|
61 |
/** The DEFAULT symbol. */ |
|
62 |
public static final String prefixR = "SpecifResult"; //$NON-NLS-1$ |
|
63 |
|
|
64 |
protected double[][] specIndex; |
|
65 |
|
|
66 |
/** |
|
67 |
* With an already computed specificities score matrix |
|
68 |
* @param symbol |
|
69 |
* @throws RWorkspaceException |
|
70 |
*/ |
|
71 |
public SpecificitiesImpl(String symbol) throws RWorkspaceException { |
|
72 |
super(symbol); |
|
73 |
} |
|
74 |
|
|
75 |
public SpecificitiesImpl(ILexicalTable lt) throws StatException { |
|
76 |
super(prefixR+(specif_counter++)); |
|
77 |
|
|
78 |
|
|
79 |
// Create a set of the existing types |
|
80 |
String[] rownames = lt.getRowNames().asStringsArray(); |
|
81 |
String[] colnames = lt.getColNames().asStringsArray(); |
|
82 |
|
|
83 |
//System.out.println("Av specif "); |
|
84 |
specIndex = compute(symbol, lt, rownames, colnames); |
|
85 |
} |
|
86 |
|
|
87 |
/** |
|
88 |
* compute Specificites of subcorpus using the textometry package functions. |
|
89 |
* |
|
90 |
* @param lexicon the lexicon |
|
91 |
* @param subLexicon the sub lexicon |
|
92 |
* @return the double[][] |
|
93 |
* @throws StatException the stat exception |
|
94 |
* @throws REXPMismatchException |
|
95 |
* @throws RserveException |
|
96 |
*/ |
|
97 |
public SpecificitiesImpl(Vector lexicon, Vector subLexicon) |
|
98 |
throws StatException, RserveException, REXPMismatchException { |
|
99 |
super(prefixR+(specif_counter++)); |
|
100 |
|
|
101 |
RWorkspace rw = RWorkspace.getRWorkspaceInstance(); |
|
102 |
rw.safeEval("library(textometry)"); |
|
103 |
REXP r = rw.callFunction( |
|
104 |
"specificities.lexicon.new", new QuantitativeDataStructure[] { lexicon, subLexicon }, symbol); //$NON-NLS-1$ |
|
105 |
// double[] res = RWorkspace.toDouble(r); |
|
106 |
//System.out.println(" build double matrix"); |
|
107 |
specIndex = r.asDoubleMatrix(); |
|
108 |
} |
|
109 |
|
|
110 |
/** |
|
111 |
* compute Specificites of lexical table using the textometry package functions. |
|
112 |
* |
|
113 |
* @param table the table |
|
114 |
* @param typeFocus the type focus |
|
115 |
* @param partFocus the part focus |
|
116 |
* @return the double[][] |
|
117 |
* @throws StatException the stat exception |
|
118 |
*/ |
|
119 |
protected static double[][] compute(String symbol, ILexicalTable table, |
|
120 |
String[] typeFocus, String[] partFocus) throws StatException { |
|
121 |
|
|
122 |
RWorkspace rw = RWorkspace.getRWorkspaceInstance(); |
|
123 |
|
|
124 |
VectorImpl row = null; |
|
125 |
if (typeFocus != null && typeFocus.length != 0) |
|
126 |
row = new VectorImpl(typeFocus); |
|
127 |
|
|
128 |
VectorImpl column = null; |
|
129 |
if (partFocus != null && partFocus.length != 0) |
|
130 |
column = new VectorImpl(partFocus); |
|
131 |
|
|
132 |
try { |
|
133 |
rw.safeEval("library(textometry)"); |
|
134 |
} catch (RserveException e1) { |
|
135 |
// TODO Auto-generated catch block |
|
136 |
e1.printStackTrace(); |
|
137 |
} catch (REXPMismatchException e1) { |
|
138 |
// TODO Auto-generated catch block |
|
139 |
e1.printStackTrace(); |
|
140 |
} |
|
141 |
REXP r = rw.callFunction("specificities", new QuantitativeDataStructure[] { table, row, column }, symbol); //$NON-NLS-1$ |
|
142 |
|
|
143 |
double[][] res; |
|
144 |
try { |
|
145 |
res = r.asDoubleMatrix(); |
|
146 |
} catch (REXPMismatchException e) { |
|
147 |
throw new StatException(SpecificitiesCoreMessages.ComputeError_FAILED_TO_GET_SPECIFICITIES + e.getMessage(), e); |
|
148 |
} |
|
149 |
Log.finest((res.length * res[0].length) + SpecificitiesCoreMessages.Specificites_2); |
|
150 |
|
|
151 |
return res; |
|
152 |
} |
|
153 |
|
|
154 |
public double[][] getScores() { |
|
155 |
return specIndex; |
|
156 |
} |
|
157 |
} |
|
0 | 158 |
tmp/org.txm.specificities.core/src/org/txm/functions/contrasts/Contrast.java (revision 313) | ||
---|---|---|
9 | 9 |
|
10 | 10 |
import org.txm.chartsengine.r.core.themes.DefaultTheme; |
11 | 11 |
import org.txm.core.results.TXMResult; |
12 |
import org.txm.lexicaltable.core.functions.LexicalTable; |
|
12 | 13 |
import org.txm.lexicaltable.core.functions.LexicalTableFactory; |
13 |
import org.txm.lexicaltable.core.statsengine.data.LexicalTable; |
|
14 |
import org.txm.lexicaltable.core.statsengine.data.ILexicalTable;
|
|
14 | 15 |
import org.txm.lexicaltable.core.statsengine.r.data.LexicalTableImpl; |
15 | 16 |
import org.txm.lexicon.core.corpusengine.cqp.Lexicon; |
16 | 17 |
//import org.txm.progression.core.functions.Progression; |
... | ... | |
28 | 29 |
|
29 | 30 |
public abstract class Contrast extends TXMResult { |
30 | 31 |
|
31 |
protected String symbol; |
|
32 |
protected static int no_contrast = 1; |
|
33 |
|
|
34 |
protected String symbol; // the R table that stores the freqs + indices |
|
32 | 35 |
protected LexicalTable table; |
33 | 36 |
|
34 | 37 |
protected double[][] indices; |
... | ... | |
40 | 43 |
throw new IllegalArgumentException(SpecificitiesCoreMessages.ComputeError_NEED_AT_LEAST_2_PARTS); |
41 | 44 |
} |
42 | 45 |
table = LexicalTableFactory.getLexicalTable(partition, property, Fmin); |
43 |
|
|
46 |
symbol = "Contrast_"+this.getClass().getSimpleName()+"_"+(no_contrast++); |
|
44 | 47 |
} |
45 | 48 |
|
46 | 49 |
public Contrast(LexicalTable table) { |
47 | 50 |
this.table = table; |
51 |
symbol = "Contrast_"+this.getClass().getSimpleName()+"_"+(no_contrast++); |
|
48 | 52 |
} |
49 | 53 |
|
50 | 54 |
public Contrast(Corpus corpus, Subcorpus subcorpus, Property property) throws StatException, CqiClientException { |
51 |
Lexicon totalFrequencies = corpus.getLexicon(property); |
|
52 |
Lexicon subFrequencies = subcorpus.getLexicon(property); |
|
53 |
|
|
54 |
Vector totalFSymbol = totalFrequencies.asVector(); |
|
55 |
Vector subFSymbol = subFrequencies.asVector(); |
|
56 |
|
|
57 |
String symbol = RWorkspace.getRandomSymbol(); |
|
58 |
RWorkspace.getRWorkspaceInstance().eval(symbol + " <- lexiconsToLexicalTable("+totalFSymbol.getSymbol()+", "+subFSymbol.getSymbol()+");"); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ |
|
59 |
table = new LexicalTableImpl(symbol); |
|
60 |
table.setCorpus(corpus); |
|
55 |
table = new LexicalTable(corpus, subcorpus); |
|
56 |
symbol = "Contrast_"+this.getClass().getSimpleName()+"_"+(no_contrast++); |
|
61 | 57 |
} |
62 | 58 |
|
63 | 59 |
public abstract boolean compute() throws Exception; |
... | ... | |
85 | 81 |
} |
86 | 82 |
|
87 | 83 |
public void reloadValues() throws StatException { |
88 |
colNames = table.getColNames().asStringsArray(); |
|
89 |
rowNames = table.getRowNames().asStringsArray(); |
|
90 |
frequencies = RWorkspace.getRWorkspaceInstance().evalToInt2D(table.getSymbol()); |
|
84 |
|
|
85 |
colNames = table.getData().getColNames().asStringsArray(); |
|
86 |
rowNames = table.getData().getRowNames().asStringsArray(); |
|
87 |
frequencies = RWorkspace.getRWorkspaceInstance().evalToInt2D(table.getData().getSymbol()); |
|
88 |
|
|
91 | 89 |
indices = RWorkspace.getRWorkspaceInstance().evalToDouble2D(symbol); |
92 | 90 |
} |
93 | 91 |
|
tmp/org.txm.specificities.core/src/org/txm/functions/contrasts/RelativeFrequency.java (revision 313) | ||
---|---|---|
1 | 1 |
package org.txm.functions.contrasts; |
2 | 2 |
|
3 | 3 |
import org.txm.core.messages.TXMCoreMessages; |
4 |
import org.txm.lexicaltable.core.statsengine.data.LexicalTable; |
|
4 |
import org.txm.lexicaltable.core.functions.LexicalTable; |
|
5 |
import org.txm.lexicaltable.core.statsengine.data.ILexicalTable; |
|
5 | 6 |
import org.txm.searchengine.cqp.clientExceptions.CqiClientException; |
6 | 7 |
import org.txm.searchengine.cqp.corpus.Corpus; |
7 | 8 |
import org.txm.searchengine.cqp.corpus.Partition; |
... | ... | |
12 | 13 |
|
13 | 14 |
public class RelativeFrequency extends Contrast { |
14 | 15 |
|
16 |
private ILexicalTable itable; |
|
17 |
|
|
15 | 18 |
public RelativeFrequency(Corpus corpus, Subcorpus subcorpus, Property property) |
16 | 19 |
throws StatException, CqiClientException { |
17 | 20 |
super(corpus, subcorpus, property); |
... | ... | |
31 | 34 |
|
32 | 35 |
@Override |
33 | 36 |
public boolean compute() throws StatException { |
34 |
colNames = table.getColNames().asStringsArray(); |
|
35 |
rowNames = table.getRowNames().asStringsArray(); |
|
36 |
frequencies = RWorkspace.getRWorkspaceInstance().evalToInt2D(table.getSymbol()); |
|
37 |
itable = table.getData(); |
|
38 |
colNames = itable.getColNames().asStringsArray(); |
|
39 |
rowNames = itable.getRowNames().asStringsArray(); |
|
40 |
frequencies = RWorkspace.getRWorkspaceInstance().evalToInt2D(itable.getSymbol()); |
|
37 | 41 |
|
38 | 42 |
// compute |
39 |
String cmd = symbol +" <- 100*"+table.getSymbol()+" / rowSums("+table.getSymbol()+")"; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
|
|
43 |
String cmd = symbol +" <- 100*"+itable.getSymbol()+" / rowSums("+itable.getSymbol()+")"; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
|
|
40 | 44 |
indices = RWorkspace.getRWorkspaceInstance().evalToDouble2D(cmd); |
41 | 45 |
return true; |
42 | 46 |
} |
tmp/org.txm.specificities.core/src/org/txm/functions/contrasts/AbsoluteFrequency.java (revision 313) | ||
---|---|---|
1 | 1 |
package org.txm.functions.contrasts; |
2 | 2 |
|
3 | 3 |
import org.txm.core.messages.TXMCoreMessages; |
4 |
import org.txm.lexicaltable.core.statsengine.data.LexicalTable; |
|
4 |
import org.txm.lexicaltable.core.functions.LexicalTable; |
|
5 |
import org.txm.lexicaltable.core.statsengine.data.ILexicalTable; |
|
5 | 6 |
import org.txm.searchengine.cqp.clientExceptions.CqiClientException; |
6 | 7 |
import org.txm.searchengine.cqp.corpus.Corpus; |
7 | 8 |
import org.txm.searchengine.cqp.corpus.Partition; |
... | ... | |
12 | 13 |
|
13 | 14 |
public class AbsoluteFrequency extends Contrast { |
14 | 15 |
|
16 |
private ILexicalTable itable; |
|
17 |
|
|
15 | 18 |
public AbsoluteFrequency(Corpus corpus, Subcorpus subcorpus, Property property) |
16 | 19 |
throws StatException, CqiClientException { |
17 | 20 |
super(corpus, subcorpus, property); |
18 |
// TODO Auto-generated constructor stub |
|
19 | 21 |
} |
20 | 22 |
|
21 | 23 |
public AbsoluteFrequency(Partition partition, Property property, int Fmin) |
22 | 24 |
throws StatException, CqiClientException { |
23 | 25 |
super(partition, property, Fmin); |
24 |
// TODO Auto-generated constructor stub |
|
25 | 26 |
} |
26 | 27 |
|
27 | 28 |
|
28 | 29 |
public AbsoluteFrequency(LexicalTable table) { |
29 | 30 |
super(table); |
30 |
// TODO Auto-generated constructor stub |
|
31 | 31 |
} |
32 | 32 |
|
33 | 33 |
@Override |
34 | 34 |
public boolean compute() throws StatException { |
35 |
colNames = table.getColNames().asStringsArray(); |
|
36 |
rowNames = table.getRowNames().asStringsArray(); |
|
37 |
frequencies = RWorkspace.getRWorkspaceInstance().evalToInt2D(table.getSymbol()); |
|
35 |
itable = table.getData(); |
|
36 |
colNames = itable.getColNames().asStringsArray(); |
|
37 |
rowNames = itable.getRowNames().asStringsArray(); |
|
38 |
frequencies = RWorkspace.getRWorkspaceInstance().evalToInt2D(itable.getSymbol()); |
|
38 | 39 |
|
39 | 40 |
// compute |
40 |
String cmd = symbol +" <- "+table.getSymbol(); //$NON-NLS-1$ |
|
41 |
String cmd = symbol +" <- "+itable.getSymbol(); //$NON-NLS-1$
|
|
41 | 42 |
indices = RWorkspace.getRWorkspaceInstance().evalToDouble2D(cmd); |
42 | 43 |
return true; |
43 | 44 |
} |
tmp/org.txm.specificities.core/src/org/txm/functions/contrasts/Contrasts.java (revision 313) | ||
---|---|---|
3 | 3 |
import java.util.ArrayList; |
4 | 4 |
|
5 | 5 |
import org.txm.core.messages.TXMCoreMessages; |
6 |
import org.txm.lexicaltable.core.statsengine.data.LexicalTable;
|
|
6 |
import org.txm.lexicaltable.core.functions.LexicalTable;
|
|
7 | 7 |
import org.txm.specificities.core.functions.Chi2; |
8 | 8 |
import org.txm.specificities.core.functions.Specificites2; |
9 | 9 |
|
10 | 10 |
public class Contrasts { |
11 | 11 |
LexicalTable table; |
12 |
|
|
12 | 13 |
ArrayList<Contrast> mesures = new ArrayList<Contrast>(); |
13 | 14 |
ArrayList<ContrastMethod> methods; |
14 | 15 |
|
tmp/org.txm.specificities.core/META-INF/MANIFEST.MF (revision 313) | ||
---|---|---|
4 | 4 |
Bundle-SymbolicName: org.txm.specificities.core;singleton:=true |
5 | 5 |
Bundle-Version: 1.0.0.qualifier |
6 | 6 |
Bundle-RequiredExecutionEnvironment: JavaSE-1.6 |
7 |
Require-Bundle: org.txm.statsengine.core;bundle-version="1.0.0", |
|
7 |
Require-Bundle: org.txm.statsengine.r.core, |
|
8 |
org.txm.lexicon.core;bundle-version="1.0.0", |
|
9 |
org.txm.lexicaltable.core;bundle-version="1.0.0", |
|
10 |
org.txm.statsengine.core;bundle-version="1.0.0", |
|
8 | 11 |
org.txm.core;bundle-version="0.7.0", |
9 | 12 |
org.txm.progression.core;bundle-version="1.0.0", |
10 | 13 |
org.txm.chartsengine.core;bundle-version="1.0.0", |
11 | 14 |
org.txm.chartsengine.jfreechart.core;bundle-version="1.0.0", |
12 | 15 |
org.txm.chartsengine.r.core;bundle-version="1.0.0", |
13 |
org.eclipse.core.runtime;bundle-version="3.10.0", |
|
14 |
org.txm.lexicaltable.core;bundle-version="1.0.0", |
|
15 |
org.txm.lexicon.core;bundle-version="1.0.0" |
|
16 |
org.eclipse.core.runtime;bundle-version="3.10.0" |
|
16 | 17 |
Export-Package: org.txm.functions.contrasts, |
17 | 18 |
org.txm.specificities.core.chartsengine.jfreechart, |
18 | 19 |
org.txm.specificities.core.chartsengine.r, |
tmp/org.txm.statsengine.core/META-INF/MANIFEST.MF (revision 313) | ||
---|---|---|
11 | 11 |
Require-Bundle: org.txm.core;bundle-version="0.7.0", |
12 | 12 |
org.txm.utils;bundle-version="1.0.0" |
13 | 13 |
Bundle-Vendor: Textometrie.org |
14 |
Import-Package: org.eclipse.osgi.util |
tmp/org.txm.statsengine.core/src/org/txm/statsengine/core/messages/StatsEngineCoreMessages.java (revision 313) | ||
---|---|---|
1 | 1 |
package org.txm.statsengine.core.messages; |
2 | 2 |
|
3 |
import org.eclipse.osgi.util.NLS; |
|
3 | 4 |
import org.txm.utils.messages.Utf8NLS; |
4 | 5 |
|
5 |
public class StatsEngineCoreMessages extends Utf8NLS {
|
|
6 |
public class StatsEngineCoreMessages extends NLS { |
|
6 | 7 |
|
7 | 8 |
private static final String BUNDLE_NAME = "org.txm.statsengine.core.messages.messages"; //$NON-NLS-1$ |
8 | 9 |
|
tmp/org.txm.statsengine.core/src/org/txm/statsengine/core/data/QuantitativeDataStructure.java (revision 313) | ||
---|---|---|
27 | 27 |
// |
28 | 28 |
package org.txm.statsengine.core.data; |
29 | 29 |
|
30 |
import org.txm.core.results.ITXMResult; |
|
31 |
|
|
32 |
|
|
33 |
// TODO: Auto-generated Javadoc |
|
34 | 30 |
/** |
35 | 31 |
* The root of all other data structures. |
36 | 32 |
* |
37 | 33 |
* @author sloiseau |
38 | 34 |
*/ |
39 |
public interface QuantitativeDataStructure extends ITXMResult {
|
|
35 |
public interface QuantitativeDataStructure { |
|
40 | 36 |
|
41 | 37 |
/** |
42 | 38 |
* The symbol is a the name of the data structure in a wrapped statistical |
tmp/org.txm.cah.core/src/org/txm/cah/core/functions/CAH.java (revision 313) | ||
---|---|---|
35 | 35 |
import org.txm.cah.core.messages.CAHCoreMessages; |
36 | 36 |
import org.txm.core.results.ITXMResult; |
37 | 37 |
import org.txm.functions.Function; |
38 |
import org.txm.lexicaltable.core.statsengine.data.LexicalTable;
|
|
38 |
import org.txm.lexicaltable.core.functions.LexicalTable;
|
|
39 | 39 |
import org.txm.searchengine.cqp.corpus.Corpus; |
40 | 40 |
import org.txm.searchengine.cqp.corpus.Partition; |
41 | 41 |
import org.txm.statsengine.r.core.RWorkspace; |
... | ... | |
113 | 113 |
protected CA ca; |
114 | 114 |
protected LexicalTable table; |
115 | 115 |
|
116 |
|
|
117 | 116 |
/** |
118 | 117 |
* Instantiates a new cAH. |
119 | 118 |
* |
... | ... | |
160 | 159 |
this.source = table; |
161 | 160 |
this.partition = table.getPartition(); |
162 | 161 |
this.corpus = table.getCorpus(); |
163 |
this.target = table.getSymbol(); |
|
162 |
this.target = table.getData().getSymbol();
|
|
164 | 163 |
this.table = table; |
165 | 164 |
} |
166 | 165 |
|
tmp/org.txm.lexicon.core/src/org/txm/lexicon/core/corpusengine/cqp/Lexicon.java (revision 313) | ||
---|---|---|
33 | 33 |
import java.io.IOException; |
34 | 34 |
import java.io.OutputStreamWriter; |
35 | 35 |
import java.io.UnsupportedEncodingException; |
36 |
import java.util.ArrayList; |
|
36 | 37 |
import java.util.Arrays; |
37 | 38 |
import java.util.Map; |
38 | 39 |
|
40 |
import org.txm.core.messages.TXMCoreMessages; |
|
41 |
import org.txm.core.results.ITXMResult; |
|
39 | 42 |
import org.txm.functions.Function; |
40 | 43 |
import org.txm.lexicon.core.messages.LexiconCoreMessages; |
41 |
import org.txm.searchengine.cqp.CqpDataProxy; |
|
44 |
import org.txm.searchengine.cqp.ICqiClient; |
|
45 |
import org.txm.searchengine.cqp.clientExceptions.CqiClientException; |
|
42 | 46 |
import org.txm.searchengine.cqp.corpus.Corpus; |
43 | 47 |
import org.txm.searchengine.cqp.corpus.CorpusManager; |
48 |
import org.txm.searchengine.cqp.corpus.MainCorpus; |
|
44 | 49 |
import org.txm.searchengine.cqp.corpus.Property; |
45 | 50 |
import org.txm.searchengine.cqp.corpus.Subcorpus; |
51 |
import org.txm.searchengine.cqp.corpus.query.Query; |
|
46 | 52 |
import org.txm.statsengine.core.StatException; |
47 | 53 |
import org.txm.statsengine.core.data.Vector; |
48 | 54 |
import org.txm.statsengine.r.core.data.VectorImpl; |
55 |
import org.txm.utils.logger.Log; |
|
49 | 56 |
|
50 | 57 |
// TODO: Auto-generated Javadoc |
51 | 58 |
// TODO should be put into stat.data package ? |
... | ... | |
65 | 72 |
protected static int nolex = 1; |
66 | 73 |
|
67 | 74 |
/** The freqs. */ |
68 |
private final int[] freqs;
|
|
75 |
private int[] freqs; |
|
69 | 76 |
|
70 | 77 |
/** The ids. */ |
71 |
private final int[] ids;
|
|
78 |
private int[] ids; |
|
72 | 79 |
|
73 | 80 |
/** The forms. */ |
74 | 81 |
private String[] forms; |
75 | 82 |
|
76 | 83 |
/** The property. */ |
77 |
private final Property property;
|
|
84 |
private Property property; |
|
78 | 85 |
|
79 | 86 |
/** The number of tokens. */ |
80 | 87 |
int numberOfTokens = -1; |
81 | 88 |
|
82 |
/** The cache. */ |
|
83 |
private Map<Property, CqpDataProxy> cache; |
|
84 |
|
|
85 | 89 |
/** The symbol. */ |
86 | 90 |
private String symbol; |
87 | 91 |
|
88 | 92 |
/** The writer. */ |
89 | 93 |
private OutputStreamWriter writer; |
90 | 94 |
|
95 |
public Lexicon(Corpus corpus, Property property) throws CqiClientException { |
|
96 |
if (corpus instanceof MainCorpus) initLexiconWith((MainCorpus)corpus, property); |
|
97 |
else if (corpus instanceof Subcorpus) initLexiconWith((MainCorpus)corpus, property); |
|
98 |
else { |
|
99 |
throw new IllegalArgumentException("Corpus class not managed: "+corpus); |
|
100 |
} |
|
101 |
} |
|
102 |
|
|
91 | 103 |
/** |
92 |
* Protected on purpose: should be accessed through. |
|
93 |
* |
|
94 |
* @param corpus the corpus |
|
95 |
* @param property the property |
|
96 |
* @param freq the freq |
|
97 |
* @param ids the ids |
|
98 |
* {@link Corpus#getLexicon(Property)} or |
|
99 |
* {@link Subcorpus#getLexicon(Property)}. |
|
104 |
* Gets the lexicon relative to a given property. |
|
105 |
* |
|
106 |
* @param property |
|
107 |
* the property |
|
108 |
* |
|
109 |
* @return the lexicon |
|
110 |
* |
|
111 |
* @throws CqiClientException |
|
112 |
* the cqi client exception |
|
100 | 113 |
*/ |
101 |
public Lexicon(Corpus corpus, Property property, final int[] freq, final int[] ids) { |
|
102 |
super(corpus); |
|
103 |
if (freq.length != ids.length) |
|
104 |
throw new IllegalArgumentException(LexiconCoreMessages.Lexicon_0); |
|
105 |
this.freqs = freq; |
|
106 |
this.ids = ids; |
|
107 |
this.forms = null; |
|
108 |
this.property = property; |
|
114 |
protected void initLexiconWith(MainCorpus corpus, Property property) throws CqiClientException { |
|
115 |
// System.out.println("in "+this.getCqpId()+" look for cached lexicon "+property); |
|
116 |
|
|
117 |
|
|
118 |
|
|
119 |
// System.out.println("not found"); |
|
120 |
Log.finest(TXMCoreMessages.LEXICON + corpus.getName()); |
|
121 |
int lexiconSize; |
|
122 |
try { |
|
123 |
lexiconSize = CorpusManager.getCorpusManager().getCqiClient() |
|
124 |
.lexiconSize(property.getQualifiedName()); |
|
125 |
} catch (Exception e) { |
|
126 |
throw new CqiClientException(e); |
|
127 |
} |
|
128 |
|
|
129 |
int[] ids = new int[lexiconSize]; |
|
130 |
for (int i = 0; i < ids.length; i++) { |
|
131 |
ids[i] = i; |
|
132 |
} |
|
133 |
|
|
134 |
/* |
|
135 |
* String[] forms = null; try { forms = |
|
136 |
* CorpusManager.getCorpusManager(). |
Formats disponibles : Unified diff