Révision 73
tmp/org.txm.specificities.core/.classpath (revision 73) | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<classpath> |
|
3 |
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.6"/> |
|
4 |
<classpathentry kind="con" path="org.eclipse.pde.core.requiredPlugins"/> |
|
5 |
<classpathentry kind="src" path="src"/> |
|
6 |
<classpathentry kind="output" path="bin"/> |
|
7 |
</classpath> |
|
0 | 8 |
tmp/org.txm.specificities.core/META-INF/MANIFEST.MF (revision 73) | ||
---|---|---|
1 |
Manifest-Version: 1.0 |
|
2 |
Bundle-ManifestVersion: 2 |
|
3 |
Bundle-Name: Specificities Core |
|
4 |
Bundle-SymbolicName: org.txm.specificities.core;singleton:=true |
|
5 |
Bundle-Version: 1.0.0.qualifier |
|
6 |
Bundle-RequiredExecutionEnvironment: JavaSE-1.6 |
|
7 |
Require-Bundle: org.txm.chartsengine.core;bundle-version="1.0.0", |
|
8 |
org.txm.core, |
|
9 |
org.txm.chartsengine.jfreechart.core;bundle-version="1.0.0", |
|
10 |
org.txm.chartsengine.r.core;bundle-version="1.0.0", |
|
11 |
org.eclipse.core.runtime;bundle-version="3.10.0" |
|
12 |
Export-Package: org.txm.specificities.core.chartsengine.jfreechart, |
|
13 |
org.txm.specificities.core.chartsengine.r, |
|
14 |
org.txm.specificities.core.functions, |
|
15 |
org.txm.specificities.core.messages, |
|
16 |
org.txm.specificities.core.preferences |
|
0 | 17 |
tmp/org.txm.specificities.core/.project (revision 73) | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<projectDescription> |
|
3 |
<name>org.txm.specificities.core</name> |
|
4 |
<comment></comment> |
|
5 |
<projects> |
|
6 |
</projects> |
|
7 |
<buildSpec> |
|
8 |
<buildCommand> |
|
9 |
<name>org.eclipse.jdt.core.javabuilder</name> |
|
10 |
<arguments> |
|
11 |
</arguments> |
|
12 |
</buildCommand> |
|
13 |
<buildCommand> |
|
14 |
<name>org.eclipse.pde.ManifestBuilder</name> |
|
15 |
<arguments> |
|
16 |
</arguments> |
|
17 |
</buildCommand> |
|
18 |
<buildCommand> |
|
19 |
<name>org.eclipse.pde.SchemaBuilder</name> |
|
20 |
<arguments> |
|
21 |
</arguments> |
|
22 |
</buildCommand> |
|
23 |
</buildSpec> |
|
24 |
<natures> |
|
25 |
<nature>org.eclipse.pde.PluginNature</nature> |
|
26 |
<nature>org.eclipse.jdt.core.javanature</nature> |
|
27 |
</natures> |
|
28 |
</projectDescription> |
|
0 | 29 |
tmp/org.txm.specificities.core/src/org/txm/specificities/core/preferences/SpecificitiesPreferences.java (revision 73) | ||
---|---|---|
1 |
package org.txm.specificities.core.preferences; |
|
2 |
|
|
3 |
|
|
4 |
import org.eclipse.core.runtime.preferences.DefaultScope; |
|
5 |
import org.osgi.framework.FrameworkUtil; |
|
6 |
import org.osgi.service.prefs.Preferences; |
|
7 |
import org.txm.core.preferences.TXMPreferences; |
|
8 |
|
|
9 |
/** |
|
10 |
* Default preferences initializer. |
|
11 |
* @author sjacquot |
|
12 |
* |
|
13 |
*/ |
|
14 |
public class SpecificitiesPreferences extends TXMPreferences { |
|
15 |
|
|
16 |
|
|
17 |
// auto populate the preference node qualifier from the current bundle id |
|
18 |
public static final String PREFERENCES_NODE = FrameworkUtil.getBundle(SpecificitiesPreferences.class).getSymbolicName(); |
|
19 |
|
|
20 |
public static final String PREFERENCES_PREFIX = "specificities_"; //$NON-NLS-1$ |
|
21 |
|
|
22 |
|
|
23 |
public static final String MAX_SCORE = PREFERENCES_PREFIX + "maxscore"; //$NON-NLS-1$ |
|
24 |
public static final String FORMAT = PREFERENCES_PREFIX + "format"; //$NON-NLS-1$ |
|
25 |
|
|
26 |
// charts |
|
27 |
public static final String CHART_TRANSPOSE = PREFERENCES_PREFIX + "chart_transpose"; //$NON-NLS-1$ |
|
28 |
public static final String CHART_DRAW_BARS = PREFERENCES_PREFIX + "chart_draw_bars"; //$NON-NLS-1$ |
|
29 |
public static final String CHART_DRAW_LINES = PREFERENCES_PREFIX + "chart_draw_lines"; //$NON-NLS-1$ |
|
30 |
public static final String CHART_BANALITY = PREFERENCES_PREFIX + "chart_banality"; //$NON-NLS-1$ |
|
31 |
|
|
32 |
public SpecificitiesPreferences() { |
|
33 |
// TODO Auto-generated constructor stub |
|
34 |
} |
|
35 |
|
|
36 |
@Override |
|
37 |
public void initializeDefaultPreferences() { |
|
38 |
Preferences preferences = DefaultScope.INSTANCE.getNode(PREFERENCES_NODE); |
|
39 |
|
|
40 |
preferences.put(FORMAT, "%,.1f"); |
|
41 |
preferences.putInt(MAX_SCORE, 1); |
|
42 |
|
|
43 |
preferences.putBoolean(CHART_TRANSPOSE, false); |
|
44 |
preferences.putBoolean(CHART_DRAW_BARS, true); |
|
45 |
preferences.putBoolean(CHART_DRAW_LINES, false); |
|
46 |
preferences.putFloat(CHART_BANALITY, 2); |
|
47 |
|
|
48 |
|
|
49 |
// shared charts rendering preferences |
|
50 |
super.initializeChartsEngineSharedPreferences(preferences); |
|
51 |
} |
|
52 |
|
|
53 |
} |
|
0 | 54 |
tmp/org.txm.specificities.core/src/org/txm/specificities/core/messages/TBXSpecificitiesMessages.java (revision 73) | ||
---|---|---|
1 |
package org.txm.specificities.core.messages; |
|
2 |
|
|
3 |
import org.eclipse.osgi.util.NLS; |
|
4 |
|
|
5 |
|
|
6 |
|
|
7 |
public class TBXSpecificitiesMessages extends NLS { |
|
8 |
|
|
9 |
private static final String BUNDLE_NAME = "org.txm.specificities.core.messages.messages"; //$NON-NLS-1$ |
|
10 |
|
|
11 |
public static String ChartsEngine_SPECIFICITIES_X_AXIS_LABEL; |
|
12 |
public static String ChartsEngine_SPECIFICITIES_Y_AXIS_LABEL; |
|
13 |
public static String ChartsEngine_SPECIFICITIES_BANALITY_MARKER_LABEL; |
|
14 |
|
|
15 |
|
|
16 |
static { |
|
17 |
// initialize resource bundle |
|
18 |
NLS.initializeMessages(BUNDLE_NAME, TBXSpecificitiesMessages.class); |
|
19 |
} |
|
20 |
|
|
21 |
} |
|
0 | 22 |
tmp/org.txm.specificities.core/src/org/txm/specificities/core/messages/messages_fr.properties (revision 73) | ||
---|---|---|
1 |
ChartsEngine_SPECIFICITIES_BANALITY_MARKER_LABEL=Banalit? : {0} |
|
2 |
ChartsEngine_SPECIFICITIES_X_AXIS_LABEL=Partie |
|
3 |
ChartsEngine_SPECIFICITIES_Y_AXIS_LABEL=Score |
|
0 | 4 |
tmp/org.txm.specificities.core/src/org/txm/specificities/core/messages/messages.properties (revision 73) | ||
---|---|---|
1 |
ChartsEngine_SPECIFICITIES_BANALITY_MARKER_LABEL=Banality: {0} |
|
2 |
ChartsEngine_SPECIFICITIES_X_AXIS_LABEL=Part |
|
3 |
ChartsEngine_SPECIFICITIES_Y_AXIS_LABEL=Score |
|
0 | 4 |
tmp/org.txm.specificities.core/src/org/txm/specificities/core/functions/Chi2.java (revision 73) | ||
---|---|---|
1 |
package org.txm.specificities.core.functions; |
|
2 |
|
|
3 |
import org.txm.functions.contrasts.Contrast; |
|
4 |
import org.txm.searchengine.cqp.clientExceptions.CqiClientException; |
|
5 |
import org.txm.searchengine.cqp.corpus.Corpus; |
|
6 |
import org.txm.searchengine.cqp.corpus.Partition; |
|
7 |
import org.txm.searchengine.cqp.corpus.Property; |
|
8 |
import org.txm.searchengine.cqp.corpus.Subcorpus; |
|
9 |
import org.txm.stat.StatException; |
|
10 |
import org.txm.stat.data.LexicalTable; |
|
11 |
import org.txm.stat.engine.r.RWorkspace; |
|
12 |
|
|
13 |
public class Chi2 extends Contrast { |
|
14 |
public Chi2(Corpus corpus, Subcorpus subcorpus, Property property) |
|
15 |
throws StatException, CqiClientException { |
|
16 |
super(corpus, subcorpus, property); |
|
17 |
// TODO Auto-generated constructor stub |
|
18 |
} |
|
19 |
|
|
20 |
public Chi2(Partition partition, Property property, int Fmin) |
|
21 |
throws StatException, CqiClientException { |
|
22 |
super(partition, property, Fmin); |
|
23 |
// TODO Auto-generated constructor stub |
|
24 |
} |
|
25 |
|
|
26 |
|
|
27 |
public Chi2(LexicalTable table) { |
|
28 |
super(table); |
|
29 |
// TODO Auto-generated constructor stub |
|
30 |
} |
|
31 |
|
|
32 |
@Override |
|
33 |
public boolean compute() throws StatException { |
|
34 |
colNames = table.getColNames().asStringsArray(); |
|
35 |
rowNames = table.getRowNames().asStringsArray(); |
|
36 |
frequencies = RWorkspace.getRWorkspaceInstance().evalToInt2D(table.getSymbol()); |
|
37 |
|
|
38 |
// compute |
|
39 |
String cmd = "mat <- "+table.getSymbol()+"\n"+ //$NON-NLS-1$ //$NON-NLS-2$ |
|
40 |
" mat2 <- rowSums(mat) %o% colSums(mat) / sum(mat)"+"\n"+ //$NON-NLS-1$ //$NON-NLS-2$ |
|
41 |
symbol +" <- (mat - mat2) ^ 2 / mat2"; //$NON-NLS-1$ |
|
42 |
indices = RWorkspace.getRWorkspaceInstance().evalToDouble2D(cmd); |
|
43 |
return true; |
|
44 |
} |
|
45 |
|
|
46 |
@Override |
|
47 |
public String getName() { |
|
48 |
return "Chi2"; //$NON-NLS-1$ |
|
49 |
} |
|
50 |
} |
|
0 | 51 |
tmp/org.txm.specificities.core/src/org/txm/specificities/core/functions/SpecificitesException.java (revision 73) | ||
---|---|---|
1 |
// Copyright © 2010-2013 ENS de Lyon. |
|
2 |
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of |
|
3 |
// Lyon 2, University of Franche-Comté, University of Nice |
|
4 |
// Sophia Antipolis, University of Paris 3. |
|
5 |
// |
|
6 |
// The TXM platform is free software: you can redistribute it |
|
7 |
// and/or modify it under the terms of the GNU General Public |
|
8 |
// License as published by the Free Software Foundation, |
|
9 |
// either version 2 of the License, or (at your option) any |
|
10 |
// later version. |
|
11 |
// |
|
12 |
// The TXM platform is distributed in the hope that it will be |
|
13 |
// useful, but WITHOUT ANY WARRANTY; without even the implied |
|
14 |
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR |
|
15 |
// PURPOSE. See the GNU General Public License for more |
|
16 |
// details. |
|
17 |
// |
|
18 |
// You should have received a copy of the GNU General |
|
19 |
// Public License along with the TXM platform. If not, see |
|
20 |
// http://www.gnu.org/licenses. |
|
21 |
// |
|
22 |
// |
|
23 |
// |
|
24 |
// $LastChangedDate: 2013-05-06 17:38:43 +0200 (lun., 06 mai 2013) $ |
|
25 |
// $LastChangedRevision: 2386 $ |
|
26 |
// $LastChangedBy: mdecorde $ |
|
27 |
// |
|
28 |
package org.txm.specificities.core.functions; |
|
29 |
|
|
30 |
// TODO: Auto-generated Javadoc |
|
31 |
/** |
|
32 |
* The Class SpecificitesException. |
|
33 |
* |
|
34 |
* @author sloiseau |
|
35 |
*/ |
|
36 |
public class SpecificitesException extends Exception { |
|
37 |
|
|
38 |
/** The Constant serialVersionUID. */ |
|
39 |
private static final long serialVersionUID = 4206288552541531386L; |
|
40 |
|
|
41 |
/** |
|
42 |
* Instantiates a new specificites exception. |
|
43 |
*/ |
|
44 |
public SpecificitesException() { |
|
45 |
super(); |
|
46 |
// TODO Auto-generated constructor stub |
|
47 |
} |
|
48 |
|
|
49 |
/** |
|
50 |
* Instantiates a new specificites exception. |
|
51 |
* |
|
52 |
* @param arg0 the arg0 |
|
53 |
* @param arg1 the arg1 |
|
54 |
*/ |
|
55 |
public SpecificitesException(String arg0, Throwable arg1) { |
|
56 |
super(arg0, arg1); |
|
57 |
// TODO Auto-generated constructor stub |
|
58 |
} |
|
59 |
|
|
60 |
/** |
|
61 |
* Instantiates a new specificites exception. |
|
62 |
* |
|
63 |
* @param arg0 the arg0 |
|
64 |
*/ |
|
65 |
public SpecificitesException(String arg0) { |
|
66 |
super(arg0); |
|
67 |
// TODO Auto-generated constructor stub |
|
68 |
} |
|
69 |
|
|
70 |
/** |
|
71 |
* Instantiates a new specificites exception. |
|
72 |
* |
|
73 |
* @param arg0 the arg0 |
|
74 |
*/ |
|
75 |
public SpecificitesException(Throwable arg0) { |
|
76 |
super(arg0); |
|
77 |
// TODO Auto-generated constructor stub |
|
78 |
} |
|
79 |
} |
|
0 | 80 |
tmp/org.txm.specificities.core/src/org/txm/specificities/core/functions/SpecificitesResult.java (revision 73) | ||
---|---|---|
1 |
// Copyright © 2010-2013 ENS de Lyon. |
|
2 |
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of |
|
3 |
// Lyon 2, University of Franche-Comté, University of Nice |
|
4 |
// Sophia Antipolis, University of Paris 3. |
|
5 |
// |
|
6 |
// The TXM platform is free software: you can redistribute it |
|
7 |
// and/or modify it under the terms of the GNU General Public |
|
8 |
// License as published by the Free Software Foundation, |
|
9 |
// either version 2 of the License, or (at your option) any |
|
10 |
// later version. |
|
11 |
// |
|
12 |
// The TXM platform is distributed in the hope that it will be |
|
13 |
// useful, but WITHOUT ANY WARRANTY; without even the implied |
|
14 |
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR |
|
15 |
// PURPOSE. See the GNU General Public License for more |
|
16 |
// details. |
|
17 |
// |
|
18 |
// You should have received a copy of the GNU General |
|
19 |
// Public License along with the TXM platform. If not, see |
|
20 |
// http://www.gnu.org/licenses. |
|
21 |
// |
|
22 |
// |
|
23 |
// |
|
24 |
// $LastChangedDate: 2017-01-24 18:11:42 +0100 (Tue, 24 Jan 2017) $ |
|
25 |
// $LastChangedRevision: 3400 $ |
|
26 |
// $LastChangedBy: mdecorde $ |
|
27 |
// |
|
28 |
package org.txm.specificities.core.functions; |
|
29 |
|
|
30 |
import java.io.BufferedWriter; |
|
31 |
import java.io.File; |
|
32 |
import java.io.FileNotFoundException; |
|
33 |
import java.io.FileOutputStream; |
|
34 |
import java.io.IOException; |
|
35 |
import java.io.OutputStreamWriter; |
|
36 |
import java.io.UnsupportedEncodingException; |
|
37 |
import java.util.ArrayList; |
|
38 |
import java.util.List; |
|
39 |
|
|
40 |
import org.rosuda.REngine.REXPMismatchException; |
|
41 |
import org.txm.core.chartsengine.r.__RDevice; |
|
42 |
import org.txm.functions.Function; |
|
43 |
import org.txm.functions.TXMResult; |
|
44 |
import org.txm.functions.progression.Progression; |
|
45 |
import org.txm.searchengine.cqp.corpus.Corpus; |
|
46 |
import org.txm.searchengine.cqp.corpus.Lexicon; |
|
47 |
import org.txm.searchengine.cqp.corpus.Partition; |
|
48 |
import org.txm.searchengine.cqp.corpus.Subcorpus; |
|
49 |
import org.txm.stat.StatException; |
|
50 |
import org.txm.stat.data.LexicalTable; |
|
51 |
import org.txm.stat.data.Matrix; |
|
52 |
import org.txm.stat.data.Vector; |
|
53 |
import org.txm.stat.engine.r.RWorkspace; |
|
54 |
import org.txm.stat.engine.r.data.DoubleMatrix; |
|
55 |
import org.txm.stat.engine.r.data.VectorImpl; |
|
56 |
import org.txm.stat.utils.ArrayIndex; |
|
57 |
import org.txm.stat.utils.CheckArray; |
|
58 |
import org.txm.stat.utils.VectorizeArray; |
|
59 |
import org.txm.HasResults; |
|
60 |
import org.txm.Messages; |
|
61 |
|
|
62 |
// TODO: Auto-generated Javadoc |
|
63 |
/** |
|
64 |
* Hold specificites computation result. |
|
65 |
* |
|
66 |
* @author sloiseau |
|
67 |
* |
|
68 |
*/ |
|
69 |
public class SpecificitesResult extends Function implements TXMResult { |
|
70 |
|
|
71 |
public static int MAXSPECIF = 1000; |
|
72 |
|
|
73 |
/** The indices. */ |
|
74 |
private final double[][] indices; |
|
75 |
|
|
76 |
/** The table. */ |
|
77 |
private final LexicalTable table; |
|
78 |
|
|
79 |
/** The rowindex. */ |
|
80 |
private int[] rowindex = null; |
|
81 |
|
|
82 |
/** The colindex. */ |
|
83 |
private int[] colindex = null; |
|
84 |
|
|
85 |
/** The frequencies. */ |
|
86 |
private int[][] frequencies = null; |
|
87 |
|
|
88 |
/** The lexicon. */ |
|
89 |
private Lexicon lexicon; |
|
90 |
|
|
91 |
/** The sub lexicon. */ |
|
92 |
private Lexicon subLexicon; |
|
93 |
|
|
94 |
/** The colnames. */ |
|
95 |
private List<String> colnames = null; |
|
96 |
|
|
97 |
/** The rownames. */ |
|
98 |
private List<String> rownames = null; |
|
99 |
|
|
100 |
/** The corpus. */ |
|
101 |
private Corpus corpus = null; |
|
102 |
|
|
103 |
/** The sub corpus. */ |
|
104 |
private Corpus subCorpus = null; |
|
105 |
|
|
106 |
/** The partition. */ |
|
107 |
private Partition partition; |
|
108 |
|
|
109 |
/** The name. */ |
|
110 |
private String name; |
|
111 |
|
|
112 |
/** The symbol. */ |
|
113 |
private String symbol; |
|
114 |
|
|
115 |
/** The subcorpus. */ |
|
116 |
private Corpus subcorpus; |
|
117 |
|
|
118 |
/** The writer. */ |
|
119 |
private BufferedWriter writer; |
|
120 |
|
|
121 |
/** |
|
122 |
* The selected type names to focus on. |
|
123 |
*/ |
|
124 |
protected String[] selectedTypeNames; |
|
125 |
|
|
126 |
/** |
|
127 |
* The selected part names to focus on. |
|
128 |
*/ |
|
129 |
protected String[] selectedPartNames; |
|
130 |
|
|
131 |
/** |
|
132 |
* The selected specificities indices to focus on. |
|
133 |
*/ |
|
134 |
protected double[][] selectedSpecificitiesIndex; |
|
135 |
|
|
136 |
|
|
137 |
/** |
|
138 |
* Instantiates a new specificites result : for subcorpus specif ities. |
|
139 |
* |
|
140 |
* @param symbol the symbol |
|
141 |
* @param specIndex the spec index |
|
142 |
* @param lexicon the lexicon |
|
143 |
* @param subLexicon the sub lexicon |
|
144 |
* @param name the name |
|
145 |
*/ |
|
146 |
protected SpecificitesResult(String symbol, double[][] specIndex, Lexicon lexicon, |
|
147 |
Lexicon subLexicon, String name, int maxScore) { |
|
148 |
this.symbol = symbol; |
|
149 |
this.indices = specIndex; |
|
150 |
|
|
151 |
this.lexicon = lexicon; |
|
152 |
this.subLexicon = subLexicon; |
|
153 |
this.corpus = lexicon.getCorpus(); |
|
154 |
this.subCorpus = subLexicon.getCorpus(); |
|
155 |
this.name = name; |
|
156 |
|
|
157 |
int MAX = MAXSPECIF; |
|
158 |
int MIN = -MAXSPECIF; |
|
159 |
|
|
160 |
if (maxScore > 0) { |
|
161 |
MAX = maxScore; |
|
162 |
MIN = -maxScore; |
|
163 |
} |
|
164 |
for (int i = 0 ; i < indices.length ; i++) { |
|
165 |
for (int j = 0 ; j < indices[i].length ; j++) { |
|
166 |
if (indices[i][j] > MAX) { |
|
167 |
indices[i][j] = MAX; |
|
168 |
} else if (indices[i][j] < MIN) { |
|
169 |
indices[i][j] = MIN; |
|
170 |
} |
|
171 |
} |
|
172 |
} |
|
173 |
|
|
174 |
this.table = null; |
|
175 |
} |
|
176 |
|
|
177 |
/** |
|
178 |
* Instantiates a new specificites result. |
|
179 |
* |
|
180 |
* @param symbol the symbol |
|
181 |
* @param specIndex the spec index (as double[row][column]) |
|
182 |
* @param table the table |
|
183 |
* @param typeFocus names of the rows of the lexical table with corresponding rows |
|
184 |
* in the specIndex array; <code>null</code> if all the rows are |
|
185 |
* represented in specIndex. |
|
186 |
* @param partFocus names of the cols of the lexical table with corresponding cols |
|
187 |
* in the specIndex array; <code>null</code> if all the cols are |
|
188 |
* represented in specIndex. |
|
189 |
* @param name the name |
|
190 |
* @param maxScore if abs(score) > maxScore -> score = +/- maxScore |
|
191 |
* @throws StatException the stat exception |
|
192 |
*/ |
|
193 |
protected SpecificitesResult(String symbol, double[][] specIndex, LexicalTable table, |
|
194 |
List<String> typeFocus, List<String> partFocus, String name, int maxScore) |
|
195 |
throws StatException { |
|
196 |
this.symbol = symbol; |
|
197 |
|
|
198 |
if (table == null) { |
|
199 |
throw new IllegalArgumentException(Messages.SpecificitesResult_2); |
|
200 |
} |
|
201 |
this.partition = table.getPartition(); |
|
202 |
this.name = name; |
|
203 |
|
|
204 |
if (specIndex == null || specIndex.length == 0) { |
|
205 |
throw new IllegalArgumentException(Messages.SpecificitesResult_0); |
|
206 |
} |
|
207 |
boolean ok = CheckArray.checkMatrixRepresentation(specIndex); |
|
208 |
if (!ok) { |
|
209 |
throw new IllegalArgumentException(Messages.SpecificitesResult_1); |
|
210 |
} |
|
211 |
|
|
212 |
this.indices = specIndex; |
|
213 |
this.table = table; |
|
214 |
this.colnames = partFocus; |
|
215 |
this.rownames = typeFocus; |
|
216 |
|
|
217 |
// filter by max&min |
|
218 |
int MAX = MAXSPECIF; |
|
219 |
int MIN = -MAXSPECIF; |
|
220 |
|
|
221 |
if (maxScore > 0) { |
|
222 |
MAX = maxScore; |
|
223 |
MIN = -maxScore; |
|
224 |
} |
|
225 |
|
|
226 |
for (int i = 0 ; i < indices.length ; i++) { |
|
227 |
for (int j = 0 ; j < indices[i].length ; j++) { |
|
228 |
if (indices[i][j] > MAX) { |
|
229 |
indices[i][j] = MAX; |
|
230 |
} else if (indices[i][j] < MIN) { |
|
231 |
indices[i][j] = MIN; |
|
232 |
} |
|
233 |
} |
|
234 |
} |
|
235 |
|
|
236 |
if (typeFocus != null && typeFocus.size() != 0) { |
|
237 |
if (typeFocus.size() != specIndex.length) { |
|
238 |
throw new IllegalArgumentException( |
|
239 |
Messages.SpecificitesResult_3 + typeFocus.size() |
|
240 |
+ Messages.SpecificitesResult_4 |
|
241 |
+ specIndex.length |
|
242 |
+ Messages.SpecificitesResult_5); |
|
243 |
} |
|
244 |
|
|
245 |
rowindex = ArrayIndex.getIndex( |
|
246 |
table.getRowNames().asStringsArray(), typeFocus |
|
247 |
.toArray(new String[] {})); |
|
248 |
for (int i : rowindex) { |
|
249 |
if (i == -1) { |
|
250 |
throw new IllegalArgumentException( |
|
251 |
Messages.SpecificitesResult_6); |
|
252 |
} |
|
253 |
} |
|
254 |
} |
|
255 |
if (partFocus != null && partFocus.size() != 0) { |
|
256 |
if (partFocus.size() != specIndex[0].length) { |
|
257 |
throw new IllegalArgumentException( |
|
258 |
Messages.SpecificitesResult_7 + partFocus.size() |
|
259 |
+ ", " + specIndex[0].length + ")."); //$NON-NLS-1$//$NON-NLS-2$ |
|
260 |
} |
|
261 |
colindex = ArrayIndex.getIndex( |
|
262 |
table.getColNames().asStringsArray(), partFocus |
|
263 |
.toArray(new String[] {})); |
|
264 |
for (int i : colindex) { |
|
265 |
if (i == -1) { |
|
266 |
throw new IllegalArgumentException( |
|
267 |
Messages.SpecificitesResult_10); |
|
268 |
} |
|
269 |
} |
|
270 |
} |
|
271 |
} |
|
272 |
|
|
273 |
/** |
|
274 |
* Gets the type focus. |
|
275 |
* |
|
276 |
* @return the type focus |
|
277 |
*/ |
|
278 |
public List<String> getTypeFocus() { |
|
279 |
return rownames; |
|
280 |
} |
|
281 |
|
|
282 |
/** |
|
283 |
* Gets the part focus. |
|
284 |
* |
|
285 |
* @return the part focus |
|
286 |
*/ |
|
287 |
public List<String> getPartFocus() { |
|
288 |
return colnames; |
|
289 |
} |
|
290 |
|
|
291 |
/** |
|
292 |
* Gets the nbr part. |
|
293 |
* |
|
294 |
* @return the nbr part |
|
295 |
* @throws StatException the stat exception |
|
296 |
*/ |
|
297 |
public int getNbrPart() throws StatException { |
|
298 |
if (table != null) |
|
299 |
return table.getNColumns(); |
|
300 |
else |
|
301 |
return getPartShortNames().length; |
|
302 |
} |
|
303 |
|
|
304 |
/** |
|
305 |
* Gets the corpus size. |
|
306 |
* |
|
307 |
* @return the corpus size |
|
308 |
* @throws StatException the stat exception |
|
309 |
*/ |
|
310 |
public int getCorpusSize() throws StatException { |
|
311 |
return table != null ? table.getTotal() : lexicon.nbrOfToken(); |
|
312 |
} |
|
313 |
|
|
314 |
/** |
|
315 |
* Gets the specificites index. |
|
316 |
* |
|
317 |
* @return the specificites index |
|
318 |
*/ |
|
319 |
public double[][] getSpecificitesIndex() { |
|
320 |
return indices; |
|
321 |
} |
|
322 |
|
|
323 |
/** |
|
324 |
* Name of the type for which specificite are computed. |
|
325 |
* |
|
326 |
* @return the type names |
|
327 |
* @throws StatException the stat exception |
|
328 |
*/ |
|
329 |
public String[] getTypeNames() throws StatException { |
|
330 |
if (table != null) { |
|
331 |
return (rownames != null && rownames.size() != 0) ? rownames |
|
332 |
.toArray(new String[] {}) : table.getRowNames() |
|
333 |
.asStringsArray(); |
|
334 |
} else { |
|
335 |
return lexicon.getForms(); |
|
336 |
} |
|
337 |
} |
|
338 |
|
|
339 |
/** |
|
340 |
* Name of the part for which specif icite are computed. |
|
341 |
* |
|
342 |
* @return the part short names |
|
343 |
* @throws StatException the stat exception |
|
344 |
*/ |
|
345 |
public String[] getPartShortNames() throws StatException { |
|
346 |
// System.out.println("get col names: "+table); |
|
347 |
// System.out.println("colnames: "+colnames); |
|
348 |
if (colnames != null && colnames.size() > 0) |
|
349 |
return colnames.toArray(new String[]{}); |
|
350 |
else if (table != null) { |
|
351 |
// return (colnames != null && colnames.size() != 0) ? |
|
352 |
// colnames.toArray(new String[]{}) : |
|
353 |
// table.getColNames().asStringsArray(); |
|
354 |
/* |
|
355 |
* List<Part> parts = table.getPartition().getOrderedParts(); String[] |
|
356 |
* partShortNames = new String[parts.size()]; for (int i = 0; i < |
|
357 |
* partShortNames.length; i++) { partShortNames[i] = |
|
358 |
* parts.get(i).getShortName(); } |
|
359 |
*/ |
|
360 |
return table.getColNames().asStringsArray(); |
|
361 |
|
|
362 |
} else { |
|
363 |
if(this.subCorpus != null) |
|
364 |
return new String[] { this.subCorpus.getName(), |
|
365 |
this.corpus.getName() + " \\ " + this.subCorpus.getName() }; //$NON-NLS-1$ |
|
366 |
} |
|
367 |
return new String[0]; |
|
368 |
} |
|
369 |
|
|
370 |
/** |
|
371 |
* Gets the frequency. |
|
372 |
* |
|
373 |
* @return the frequency |
|
374 |
* @throws StatException the stat exception |
|
375 |
*/ |
|
376 |
public int[][] getFrequency() throws StatException { |
|
377 |
//System.out.println("GET FREQUENCIES"); |
|
378 |
if (frequencies == null) { |
|
379 |
//System.out.println("no frequencies"); |
|
380 |
if (table != null) { |
|
381 |
//System.out.println("FROM TABLE"); |
|
382 |
frequencies = RWorkspace.getRWorkspaceInstance().evalToInt2D(table.getSymbol()); |
|
383 |
|
|
384 |
} else {// if table == null : subcorpus specif |
|
385 |
//System.out.println("FROM LEXICON"); |
|
386 |
frequencies = new int[lexicon.getFreq().length][2]; // build a frenquency table from lexicons |
|
387 |
String[] corpusforms = lexicon.getForms(); // all the forms |
|
388 |
String[] subcorpusforms = subLexicon.getForms(); // all the forms |
|
389 |
int[] corpusfreq = lexicon.getFreq(); // all the freqs |
|
390 |
int[] subcorpusfreq = subLexicon.getFreq(); // all the freqs in the subcorpus |
|
391 |
|
|
392 |
// System.out.println("len subforms: "+subcorpusforms.length); |
|
393 |
// System.out.println("len subfreqs: "+subcorpusfreq.length); |
|
394 |
for (int i = 0 ; i < corpusforms.length ; i++) //get all forms |
|
395 |
{ |
|
396 |
int j = 0; // find the index of the form in the subcorpus arrays |
|
397 |
for (j = 0; j < subcorpusforms.length; j++) { |
|
398 |
if (subcorpusforms[j].equals(corpusforms[i])) |
|
399 |
break;// found the good form |
|
400 |
} |
|
401 |
if(j < subcorpusforms.length) |
|
402 |
frequencies[i][0] = subcorpusfreq[j]; |
|
403 |
frequencies[i][1] = corpusfreq[i] - frequencies[i][0]; |
|
404 |
} |
|
405 |
} |
|
406 |
} |
|
407 |
return frequencies; |
|
408 |
} |
|
409 |
|
|
410 |
/** |
|
411 |
* Gets the lexical table. |
|
412 |
* |
|
413 |
* @return the lexical table |
|
414 |
*/ |
|
415 |
public LexicalTable getLexicalTable() { |
|
416 |
return table; |
|
417 |
} |
|
418 |
|
|
419 |
/** |
|
420 |
* Gets the lexicon. |
|
421 |
* |
|
422 |
* @return the lexicon |
|
423 |
*/ |
|
424 |
public Lexicon getLexicon() { |
|
425 |
return lexicon; |
|
426 |
} |
|
427 |
|
|
428 |
/** |
|
429 |
* Gets the part size. |
|
430 |
* |
|
431 |
* @return the part size |
|
432 |
* @throws StatException the stat exception |
|
433 |
*/ |
|
434 |
public int[] getPartSize() throws StatException { |
|
435 |
if (table != null) { |
|
436 |
Vector partsize = table.getColMarginsVector(); |
|
437 |
if (colindex != null) { |
|
438 |
partsize = partsize.get(colindex); |
|
439 |
} |
|
440 |
return partsize.asIntArray(); |
|
441 |
} else { |
|
442 |
return new int[] { subLexicon.nbrOfToken(), |
|
443 |
lexicon.nbrOfToken() - subLexicon.nbrOfToken() }; |
|
444 |
} |
|
445 |
} |
|
446 |
|
|
447 |
/** |
|
448 |
* The frequency in the whole corpus. |
|
449 |
* |
|
450 |
* @return the form frequencies |
|
451 |
* @throws StatException the stat exception |
|
452 |
*/ |
|
453 |
public int[] getFormFrequencies() throws StatException { |
|
454 |
if (table != null) { |
|
455 |
//System.out.println("get freq by table"); |
|
456 |
Vector formFrequencies = table.getRowMarginsVector(); |
|
457 |
if (rowindex != null) { |
|
458 |
formFrequencies = formFrequencies.get(rowindex); |
|
459 |
} |
|
460 |
return formFrequencies.asIntArray(); |
|
461 |
} else { |
|
462 |
//System.out.println("freqs by lexicon of "+lexicon.getCorpus()); |
|
463 |
return lexicon.getFreq(); |
|
464 |
} |
|
465 |
} |
|
466 |
|
|
467 |
/** |
|
468 |
* To txt. |
|
469 |
* |
|
470 |
* @param outfile the outfile |
|
471 |
* @param encoding the encoding |
|
472 |
* @param colseparator the colseparator |
|
473 |
* @param txtseparator the txtseparator |
|
474 |
* @return true, if successful |
|
475 |
* @throws StatException the stat exception |
|
476 |
*/ |
|
477 |
public boolean toTxt(File outfile, String encoding, String colseparator, String txtseparator) throws StatException { |
|
478 |
|
|
479 |
// NK: Declared as class attribute to perform a clean if the operation is interrupted |
|
480 |
//OutputStreamWriter writer; |
|
481 |
try { |
|
482 |
this.writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outfile), encoding)); |
|
483 |
} catch (UnsupportedEncodingException e1) { |
|
484 |
org.txm.utils.logger.Log.printStackTrace(e1); |
|
485 |
return false; |
|
486 |
} catch (FileNotFoundException e1) { |
|
487 |
org.txm.utils.logger.Log.printStackTrace(e1); |
|
488 |
return false; |
|
489 |
} |
|
490 |
|
|
491 |
//int[] T = getPartSize(); |
|
492 |
acquireSemaphore(); |
|
493 |
try { |
|
494 |
getFrequency(); |
|
495 |
getFormFrequencies(); |
|
496 |
getLexicon(); |
|
497 |
String[] names = getPartShortNames(); |
|
498 |
// if ("UTF-8".equals(encoding)) writer.write('\ufeff'); // UTF-8 BOM |
|
499 |
//write column header |
|
500 |
String txt = Messages.SpecificitesResult_8+colseparator+"F"; //$NON-NLS-1$ |
|
501 |
for(String colname : names) { |
|
502 |
txt += colseparator+ "f_"+colname + colseparator + "Score"; //$NON-NLS-1$ //$NON-NLS-2$ |
|
503 |
} |
|
504 |
writer.write(txt +"\n"); //$NON-NLS-1$ |
|
505 |
|
|
506 |
//write data |
|
507 |
for (int ii = 0; ii < frequencies.length; ii++) { |
|
508 |
txt = txtseparator + getTypeNames()[ii].replace(txtseparator, txtseparator+txtseparator)+txtseparator; |
|
509 |
int somme = 0; |
|
510 |
String txtcols = ""; //$NON-NLS-1$ |
|
511 |
for (int j = 0; j < frequencies[ii].length; j++) { |
|
512 |
somme += frequencies[ii][j]; |
|
513 |
txtcols += (colseparator + frequencies[ii][j]); |
|
514 |
txtcols += (colseparator + indices[ii][j]); |
|
515 |
} |
|
516 |
writer.write(txt + colseparator + somme+ txtcols+ "\n"); //$NON-NLS-1$ |
|
517 |
writer.flush(); |
|
518 |
} |
|
519 |
writer.close(); |
|
520 |
} catch (IOException e) { |
|
521 |
org.txm.utils.logger.Log.printStackTrace(e); |
|
522 |
return false; |
|
523 |
} finally { |
|
524 |
releaseSemaphore(); |
|
525 |
} |
|
526 |
|
|
527 |
return true; |
|
528 |
} |
|
529 |
|
|
530 |
/** |
|
531 |
* To svg. |
|
532 |
* |
|
533 |
* @param file the file |
|
534 |
* @param typeNames the type names |
|
535 |
* @param partNames the part names |
|
536 |
* @param specIndex the spec index |
|
537 |
* @param transpose the transpose |
|
538 |
* @param drawBarPlot the draw bar plot |
|
539 |
* @param drawLines the draw lines |
|
540 |
* @param device the device |
|
541 |
* @throws StatException the stat exception |
|
542 |
*/ |
|
543 |
@Deprecated |
|
544 |
// FIXME : to remove when charts engine will be validated |
|
545 |
public final void toSVG(File file, String[] typeNames, |
|
546 |
String[] partNames, double[][] specIndex, boolean transpose, boolean drawBarPlot, boolean drawLines, __RDevice device, float BANALITE, boolean monochrome) throws StatException { |
|
547 |
|
|
548 |
// TODO: should filter max and min here |
|
549 |
// specIndex |
|
550 |
|
|
551 |
Matrix spec = new DoubleMatrix(specIndex); |
|
552 |
Vector types = new VectorImpl(typeNames); |
|
553 |
Vector parts = new VectorImpl(partNames); |
|
554 |
|
|
555 |
RWorkspace rw = RWorkspace.getRWorkspaceInstance(); |
|
556 |
rw.eval("rownames(" + spec.getSymbol() + ") <- " + types.getSymbol() + ";"); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ |
|
557 |
rw.eval("colnames(" + spec.getSymbol() + ") <- " + parts.getSymbol() + ";"); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ |
|
558 |
try { |
|
559 |
//System.out.println(""+"barplot(" + spec.getSymbol() + ", beside=T, legend.text=rownames(" + spec.getSymbol() + "));"); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ |
|
560 |
|
|
561 |
String matrix = spec.getSymbol(); |
|
562 |
if (transpose) |
|
563 |
matrix = "t("+matrix+")"; //$NON-NLS-1$ //$NON-NLS-2$ |
|
564 |
|
|
565 |
|
|
566 |
String names = parts.getSymbol(); |
|
567 |
if (transpose) |
|
568 |
names = types.getSymbol(); |
|
569 |
|
|
570 |
String legend = types.getSymbol(); |
|
571 |
if (transpose) |
|
572 |
legend = parts.getSymbol(); |
|
573 |
|
|
574 |
String title = this.getName(); |
|
575 |
|
|
576 |
String cmd = ""; //$NON-NLS-1$ |
|
577 |
|
|
578 |
String[] colors; |
|
579 |
String[] allcolors = Progression.colors; |
|
580 |
if (transpose) { // one color per part |
|
581 |
colors = new String[partNames.length]; |
|
582 |
for (int j = 0 ; j < partNames.length ; j++) { |
|
583 |
colors[j] = allcolors[j%allcolors.length]; |
|
584 |
} |
|
585 |
} else { // one color per word |
|
586 |
colors = new String[typeNames.length]; |
|
587 |
for (int j = 0 ; j < typeNames.length ; j++) |
|
588 |
colors[j] = allcolors[j%allcolors.length]; |
|
589 |
} |
|
590 |
|
|
591 |
RWorkspace.getRWorkspaceInstance().eval("rm(colors)"); //$NON-NLS-1$ |
|
592 |
RWorkspace.getRWorkspaceInstance().addVectorToWorkspace("colors", colors); //$NON-NLS-1$ |
|
593 |
//System.out.println("colors: "+Arrays.toString(colors)); |
|
594 |
|
|
595 |
String colorString; |
|
596 |
|
|
597 |
if (monochrome) |
|
598 |
colorString = ""; //$NON-NLS-1$ |
|
599 |
else |
|
600 |
colorString = ", col=colors"; //$NON-NLS-1$ |
|
601 |
|
|
602 |
cmd += "op <- par(mar=c(5,5,5,10), xpd=TRUE);\n"; //$NON-NLS-1$ |
|
603 |
|
|
604 |
if (drawBarPlot) { |
|
605 |
cmd += "mat <- "+matrix+";\n" + //$NON-NLS-1$ //$NON-NLS-2$ |
|
606 |
"draw <- barplot(mat, space=c(1,3)"+colorString+ //$NON-NLS-1$ |
|
607 |
", horiz=F, las=2, names.arg="+names+", main=\""+title+ //$NON-NLS-1$ //$NON-NLS-2$ |
|
608 |
"\", beside=T, xpd=TRUE);\n" + //$NON-NLS-1$ //$NON-NLS-2$ |
|
609 |
// removed : legend.text="+legend + " |
|
610 |
//"rm(op);" + //$NON-NLS-1$ |
|
611 |
"W2 <- 100000;\n"+//$NON-NLS-1$ |
|
612 |
"lines(c(0,W2), c(-"+BANALITE+", -"+BANALITE+"), lty=2);\n" + // seuil banalité inférieur //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ |
|
613 |
"lines(c(0,W2), c("+BANALITE+", "+BANALITE+"), lty=2);\n" + // seuil banalité supérieur //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ |
|
614 |
"lines(c(0,W2), c(0,0), lty=1);\n"+ //$NON-NLS-1$ |
|
615 |
"\n"; //$NON-NLS-1$ |
|
616 |
} else { |
|
617 |
//compute the hist but dont show the bar |
|
618 |
cmd += "mat <- "+matrix+";\n" + //$NON-NLS-1$ //$NON-NLS-2$ |
|
619 |
"draw <- barplot(mat, space=c(1,3), col=\"white\", border=NA, horiz=F, las=2, names.arg="+names+", main=\""+title+"\", beside=T);\n"; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ |
|
620 |
//"rm(op);\n"; //$NON-NLS-1$ |
|
621 |
} |
|
622 |
|
|
623 |
if (drawLines) { |
|
624 |
if (transpose) { |
|
625 |
cmd += "for(i in 1:length(draw[1,])){\n" + //$NON-NLS-1$ |
|
626 |
"lines(draw[,i], mat[,i], col=colors[[i]]);\n" + //$NON-NLS-1$ |
|
627 |
"}\n"; //$NON-NLS-1$ |
|
628 |
} else { |
|
629 |
cmd += "for(i in 1:length(draw[,1])){\n" + //$NON-NLS-1$ |
|
630 |
"lines(draw[i,], mat[i,], col=colors[[i]]);\n" + //$NON-NLS-1$ |
|
631 |
"}\n"; //$NON-NLS-1$ |
|
632 |
} |
|
633 |
if (!drawBarPlot) { |
|
634 |
if (transpose) { |
|
635 |
cmd += "for(i in 1:length(draw[1,])){\n" + //$NON-NLS-1$ |
|
636 |
"text(draw[,i], 0, label="+legend+", offset=0, srt=90);\n" + //$NON-NLS-1$ //$NON-NLS-2$ |
|
637 |
"lines(c(draw[[1,i]],draw[[length(draw[,i]),i]]), c(0,0))" + //$NON-NLS-1$ |
|
638 |
"}\n"; //$NON-NLS-1$ |
|
639 |
} else { |
|
640 |
cmd += "lines( c(0,99999), c(0,0))\n"; //$NON-NLS-1$ |
|
641 |
} |
|
642 |
} |
|
643 |
} |
|
644 |
|
|
645 |
// if (transpose) |
|
646 |
// cmd += "legend(\"topright\", "+names+", inset = c(-0.2, 0), col = colors, lty = 1, xpd=TRUE);\n"; //$NON-NLS-1$ //$NON-NLS-2$ |
|
647 |
// else |
|
648 |
if (drawBarPlot) |
|
649 |
cmd += "legend(\"topright\", "+legend+", inset = c(-0.2,0), col = colors, pch = 0, xpd=TRUE);\n"; //$NON-NLS-1$ //$NON-NLS-2$ |
|
650 |
else |
|
651 |
cmd += "legend(\"topright\", "+legend+", inset = c(-0.2,0), col = colors, lty = 1, xpd=TRUE);\n"; //$NON-NLS-1$ //$NON-NLS-2$ |
|
652 |
|
|
653 |
rw.plot(file, cmd, device); |
|
654 |
} catch (Exception e) { |
|
655 |
System.out.println(e.getLocalizedMessage()); |
|
656 |
throw new StatException(e); |
|
657 |
} |
|
658 |
} |
|
659 |
|
|
660 |
/** |
|
661 |
* SL |
|
662 |
* |
|
663 |
* TODO should be a generic function for drawing barplot with vector. |
|
664 |
* |
|
665 |
* @param file the file |
|
666 |
* @throws StatException the stat exception |
|
667 |
*/ |
|
668 |
@Deprecated |
|
669 |
// FIXME : to remove when charts engine will be validated |
|
670 |
public void toSVG(File file) throws StatException { |
|
671 |
toSVG(file, __RDevice.SVG); |
|
672 |
} |
|
673 |
|
|
674 |
|
|
675 |
/** |
|
676 |
* To svg. |
|
677 |
* |
|
678 |
* @param file the file |
|
679 |
* @param device the device |
|
680 |
* @throws StatException the stat exception |
|
681 |
*/ |
|
682 |
@Deprecated |
|
683 |
// FIXME : to remove when charts engine will be validated |
|
684 |
public void toSVG(File file, __RDevice device) throws StatException { |
|
685 |
// TODO Auto-generated method stub |
|
686 |
double[][] specindex = getSpecificitesIndex(); |
|
687 |
try { |
|
688 |
RWorkspace rw = RWorkspace.getRWorkspaceInstance(); |
|
689 |
double[] i = VectorizeArray.vectorizeByInner(specindex); |
|
690 |
VectorImpl v = new VectorImpl(i); |
|
691 |
//System.out.println("R : "+v.getSymbol()); //$NON-NLS-1$ |
|
692 |
rw.plot(file, "barplot(" + v.getSymbol() + ")", device); //$NON-NLS-1$ //$NON-NLS-2$ |
|
693 |
} catch (REXPMismatchException e) { |
|
694 |
throw new StatException(e); |
|
695 |
} |
|
696 |
} |
|
697 |
|
|
698 |
/** |
|
699 |
* Gets the parent, i.e. the subcorpus if the specif ities were computed on a |
|
700 |
* subcorpus, and the partition if teh specificities were computed on a |
|
701 |
* partition |
|
702 |
* |
|
703 |
* @return the parent |
|
704 |
*/ |
|
705 |
public HasResults getParent() { |
|
706 |
if (partition != null) return table; |
|
707 |
if (subcorpus != null) return subcorpus; |
|
708 |
return corpus; // computed on subcorpus |
|
709 |
} |
|
710 |
|
|
711 |
/** |
|
712 |
* Gets the name. |
|
713 |
* |
|
714 |
* @return the name |
|
715 |
*/ |
|
716 |
public String getName() { |
|
717 |
return name; |
|
718 |
} |
|
719 |
|
|
720 |
/** |
|
721 |
* Gets the symbol. |
|
722 |
* |
|
723 |
* @return the symbol |
|
724 |
*/ |
|
725 |
public String getSymbol() { |
|
726 |
return symbol; |
|
727 |
} |
|
728 |
|
|
729 |
/** |
|
730 |
* Gets the sorted part indexes. |
|
731 |
* |
|
732 |
* @return the sorted part indexes |
|
733 |
*/ |
|
734 |
public int[] getSortedPartIndexes() { |
|
735 |
|
|
736 |
try { |
|
737 |
int[] indexes = new int[this.getNbrPart()]; |
|
738 |
String[] partsname = this.getPartShortNames().clone(); |
|
739 |
for (int i = 0; i < this.getNbrPart(); i++) { |
|
740 |
indexes[i] = i; |
|
741 |
} |
|
742 |
|
|
743 |
for (int i = 0; i < this.getNbrPart(); i++) { |
|
744 |
int imin = i; |
|
745 |
for (int j = i; j < this.getNbrPart(); j++) { |
|
746 |
if (partsname[imin].compareTo(partsname[j]) > 0) { |
|
747 |
imin = j; |
|
748 |
} |
|
749 |
} |
|
750 |
String tmp = partsname[i]; |
|
751 |
partsname[i] = partsname[imin]; |
|
752 |
partsname[imin] = tmp; |
|
753 |
|
|
754 |
int tmp2 = indexes[i]; |
|
755 |
indexes[i] = indexes[imin]; |
|
756 |
indexes[imin] = tmp2; |
|
757 |
} |
|
758 |
return indexes; |
|
759 |
} catch (StatException e) { |
|
760 |
org.txm.utils.logger.Log.printStackTrace(e); |
|
761 |
} |
|
762 |
return new int[0]; |
|
763 |
} |
|
764 |
|
|
765 |
/** |
|
766 |
* Sets the corpus. |
|
767 |
* |
|
768 |
* @param subcorpus2 the new corpus |
|
769 |
*/ |
|
770 |
public void setCorpus(Subcorpus subcorpus2) { |
|
771 |
this.subcorpus = subcorpus2; |
|
772 |
} |
|
773 |
|
|
774 |
/** |
|
775 |
* Gets the corpus. |
|
776 |
* |
|
777 |
* @return the corpus |
|
778 |
*/ |
|
779 |
public Corpus getCorpus() { |
|
780 |
return subcorpus; |
|
781 |
} |
|
782 |
|
|
783 |
@Override |
|
784 |
public void clean() { |
|
785 |
try { |
|
786 |
this.writer.flush(); |
|
787 |
this.writer.close(); |
|
788 |
} catch (IOException e) { |
|
789 |
org.txm.utils.logger.Log.printStackTrace(e); |
|
790 |
} |
|
791 |
} |
|
792 |
|
|
793 |
/** |
|
794 |
* @return the selectedTypeNames |
|
795 |
*/ |
|
796 |
public String[] getSelectedTypeNames() { |
|
797 |
return selectedTypeNames; |
|
798 |
} |
|
799 |
|
|
800 |
/** |
|
801 |
* @param selectedTypeNames the selectedTypeNames to set |
|
802 |
*/ |
|
803 |
public void setSelectedTypeNames(String[] selectedTypeNames) { |
|
804 |
this.selectedTypeNames = selectedTypeNames; |
|
805 |
} |
|
806 |
|
|
807 |
/** |
|
808 |
* @return the selectedPartNames |
|
809 |
*/ |
|
810 |
public String[] getSelectedPartNames() { |
|
811 |
return selectedPartNames; |
|
812 |
} |
|
813 |
|
|
814 |
/** |
|
815 |
* @param selectedPartNames the selectedPartNames to set |
|
816 |
*/ |
|
817 |
public void setSelectedPartNames(String[] selectedPartNames) { |
|
818 |
this.selectedPartNames = selectedPartNames; |
|
819 |
} |
|
820 |
|
|
821 |
/** |
|
822 |
* @return the selectedSpecificitiesIndex |
|
823 |
*/ |
|
824 |
public double[][] getSelectedSpecificitiesIndex() { |
|
825 |
return selectedSpecificitiesIndex; |
|
826 |
} |
|
827 |
|
|
828 |
/** |
|
829 |
* @param selectedSpecificitiesIndex the selectedSpecificitiesIndex to set |
|
830 |
*/ |
|
831 |
public void setSelectedSpecificitiesIndex(double[][] selectedSpecificitiesIndex) { |
|
832 |
this.selectedSpecificitiesIndex = selectedSpecificitiesIndex; |
|
833 |
} |
|
834 |
|
|
835 |
@Override |
|
836 |
public boolean delete() { |
|
837 |
if (partition != null) return partition.removeResult(this); |
|
838 |
return corpus.removeResult(this); |
|
839 |
} |
|
840 |
} |
|
0 | 841 |
tmp/org.txm.specificities.core/src/org/txm/specificities/core/functions/Specificites.java (revision 73) | ||
---|---|---|
1 |
// Copyright © 2010-2013 ENS de Lyon. |
|
2 |
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of |
|
3 |
// Lyon 2, University of Franche-Comté, University of Nice |
|
4 |
// Sophia Antipolis, University of Paris 3. |
|
5 |
// |
|
6 |
// The TXM platform is free software: you can redistribute it |
|
7 |
// and/or modify it under the terms of the GNU General Public |
|
8 |
// License as published by the Free Software Foundation, |
|
9 |
// either version 2 of the License, or (at your option) any |
|
10 |
// later version. |
|
11 |
// |
|
12 |
// The TXM platform is distributed in the hope that it will be |
|
13 |
// useful, but WITHOUT ANY WARRANTY; without even the implied |
|
14 |
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR |
|
15 |
// PURPOSE. See the GNU General Public License for more |
|
16 |
// details. |
|
17 |
// |
|
18 |
// You should have received a copy of the GNU General |
|
19 |
// Public License along with the TXM platform. If not, see |
|
20 |
// http://www.gnu.org/licenses. |
|
21 |
// |
|
22 |
// |
|
23 |
// |
|
24 |
// $LastChangedDate: 2016-11-29 16:47:07 +0100 (Tue, 29 Nov 2016) $ |
|
25 |
// $LastChangedRevision: 3349 $ |
|
26 |
// $LastChangedBy: mdecorde $ |
|
27 |
// |
|
28 |
package org.txm.specificities.core.functions; |
|
29 |
|
|
30 |
import java.io.File; |
|
31 |
import java.util.ArrayList; |
|
32 |
import java.util.Arrays; |
|
33 |
import java.util.HashSet; |
|
34 |
import java.util.List; |
|
35 |
import java.util.Set; |
|
36 |
|
|
37 |
import org.txm.searchengine.cqp.clientExceptions.CqiClientException; |
|
38 |
import org.txm.searchengine.cqp.corpus.Corpus; |
|
39 |
import org.txm.searchengine.cqp.corpus.Lexicon; |
|
40 |
import org.txm.searchengine.cqp.corpus.Part; |
|
41 |
import org.txm.searchengine.cqp.corpus.Partition; |
|
42 |
import org.txm.searchengine.cqp.corpus.Property; |
|
43 |
import org.txm.searchengine.cqp.corpus.Subcorpus; |
|
44 |
import org.txm.searchengine.cqp.corpus.query.Focus; |
|
45 |
import org.txm.stat.StatException; |
|
46 |
import org.txm.stat.data.LexicalTable; |
|
47 |
import org.txm.stat.data.Vector; |
|
48 |
import org.txm.stat.engine.r.RWorkspaceException; |
|
49 |
import org.txm.Messages; |
|
50 |
|
|
51 |
// TODO: Auto-generated Javadoc |
|
52 |
/** |
|
53 |
* High level access to the Specificities. |
|
54 |
* |
|
55 |
* The methods in this class access the Specificities function through |
|
56 |
* high-level objects (representation of CQP corpus ({@link Corpus}, |
|
57 |
* {@link Subcorpus}, {@link Partition}), representation of queries ( |
|
58 |
* {@link Focus}, {@link Property}), and lexicon {@link Lexicon})). |
|
59 |
* |
|
60 |
* @author sloiseau |
|
61 |
* |
|
62 |
*/ |
|
63 |
public class Specificites { |
|
64 |
|
|
65 |
/** The specif_counter. */ |
|
66 |
private static int specif_counter = 1; |
|
67 |
private static ArrayList<Integer> selectedColIdx; |
|
68 |
|
|
69 |
/** |
|
70 |
* Compute the specificity index for all the cells of a complete lexical |
|
71 |
* table, as defined by a {@link Partition} and a {@link Property}. |
|
72 |
* |
|
73 |
* This may be used for extracting, for all part, the forms the most or the |
|
74 |
* less specif ic. |
|
75 |
* |
|
76 |
* @param partition the partition |
|
77 |
* @param analysisProperty the analysis property |
|
78 |
* @param formFocus the form focus |
|
79 |
* @param partsFocus the parts focus |
|
80 |
* @param Fmin the fmin |
|
81 |
* @return the specificites result |
|
82 |
* @throws CqiClientException the cqi client exception |
|
83 |
* @throws StatException the stat exception |
|
84 |
*/ |
|
85 |
public static SpecificitesResult specificites(Partition partition, Property analysisProperty, |
|
86 |
List<Part> partsFocus, int Fmin, int maxScore) throws CqiClientException, StatException { |
|
87 |
if (partition.getParts().size() < 2) { |
|
88 |
throw new IllegalArgumentException(Messages.Specificites_0); |
|
89 |
} |
|
90 |
// complication due to Jean-Philippe special interest in usage of Part. |
|
91 |
List<String> partNamesFocus = new ArrayList<String>(); |
|
92 |
|
|
93 |
if (partsFocus != null) { |
|
94 |
partNamesFocus = new ArrayList<String>(); |
|
95 |
for (Part p : partsFocus) { |
|
96 |
partNamesFocus.add(p.getShortName()); |
|
97 |
} |
|
98 |
} |
|
99 |
// System.out.println("get/create lexical table"); |
|
100 |
LexicalTable table = partition.getLexicalTable(analysisProperty, Fmin); |
|
101 |
partition.storeResult(table); |
|
102 |
|
|
103 |
// Create a set of the existing types |
|
104 |
String[] rownames = table.getRowNames().asStringsArray(); |
|
105 |
ArrayList<String> found = new ArrayList<String>(Arrays.asList(rownames)); |
|
106 |
|
|
107 |
String symbol = org.txm.stat.engine.r.function.Specificites.prefixR+(specif_counter++); |
|
108 |
|
|
109 |
//System.out.println("Av specif "); |
|
110 |
double[][] specIndex = org.txm.stat.engine.r.function.Specificites |
|
111 |
.specificites(symbol, table, found.toArray(new String[] {}), partNamesFocus.toArray(new String[] {})); |
|
112 |
//System.out.println("Ap specif "); |
|
113 |
|
|
114 |
return new SpecificitesResult( |
|
115 |
symbol, |
|
116 |
specIndex, |
|
117 |
table, |
|
118 |
found, |
|
119 |
partNamesFocus, |
|
120 |
analysisProperty.getName(), |
|
121 |
maxScore); |
|
122 |
} |
|
123 |
|
|
124 |
/** |
|
125 |
* Specificites. |
|
126 |
* |
|
127 |
* @param table the table |
|
128 |
* @return the specificites result |
|
129 |
* @throws CqiClientException the cqi client exception |
|
130 |
* @throws StatException the stat exception |
|
131 |
*/ |
|
132 |
public static SpecificitesResult specificites(LexicalTable table, int maxScore) |
|
133 |
throws CqiClientException, StatException { |
|
134 |
if (table.getNColumns() < 2) { |
|
135 |
throw new IllegalArgumentException(Messages.Specificites_0); |
|
136 |
} |
|
137 |
|
|
138 |
String[] rownames = table.getRowNames().asStringsArray(); |
|
139 |
String[] colnames = table.getColNames().asStringsArray(); |
|
140 |
String symbol = org.txm.stat.engine.r.function.Specificites.prefixR+(specif_counter++); |
|
141 |
|
|
142 |
double[][] specIndex = org.txm.stat.engine.r.function.Specificites.specificites(symbol, table, rownames, colnames); |
|
143 |
|
|
144 |
if (table.getPartition() != null) { |
|
145 |
return new SpecificitesResult(symbol, specIndex, table, Arrays |
|
146 |
.asList(rownames), Arrays.asList(colnames), table.getProperty().getName(), maxScore); |
|
147 |
} else { |
|
148 |
return new SpecificitesResult(symbol, specIndex, table, Arrays |
|
149 |
.asList(rownames), Arrays.asList(colnames), |
|
150 |
"TLNONAME: " + table.getProperty().getName(), maxScore); //$NON-NLS-1$ |
|
151 |
} |
|
152 |
} |
|
153 |
|
|
154 |
/** |
|
155 |
* Specificites. |
|
156 |
* |
|
157 |
* @param corpus the corpus |
|
158 |
* @param subcorpus the subcorpus |
|
159 |
* @param property the property |
|
160 |
* @return the specificites result |
|
161 |
* @throws CqiClientException the cqi client exception |
|
162 |
* @throws StatException the stat exception |
|
163 |
*/ |
|
164 |
public static SpecificitesResult specificites(Corpus corpus, |
|
165 |
Subcorpus subcorpus, Property property, int maxScore) throws CqiClientException, |
|
166 |
StatException { |
|
167 |
|
|
168 |
Lexicon totalFrequencies = corpus.getLexicon(property); |
|
169 |
Lexicon subFrequencies = subcorpus.getLexicon(property); |
|
170 |
|
|
171 |
//System.out.println("Send corpus vector"); |
|
172 |
Vector totalFSymbol; |
|
173 |
Vector subFSymbol; |
|
174 |
try { |
|
175 |
totalFSymbol = totalFrequencies.asVector(); |
|
176 |
subFSymbol = subFrequencies.asVector(); |
|
177 |
} catch (StatException e) { |
|
178 |
throw new RWorkspaceException(e); |
|
179 |
} |
|
180 |
|
|
181 |
//System.out.println("compute specifs"); |
|
182 |
String symbol = org.txm.stat.engine.r.function.Specificites.prefixR+(specif_counter++); |
|
183 |
double[][] specIndex = org.txm.stat.engine.r.function.Specificites.specificites(symbol, totalFSymbol, subFSymbol); |
|
184 |
|
|
185 |
//System.out.println("build SpecificitesResult"); |
|
186 |
SpecificitesResult specif = new SpecificitesResult(symbol, specIndex, totalFrequencies, |
|
187 |
subFrequencies, property.getName(), maxScore); |
|
188 |
|
|
189 |
specif.setCorpus(subcorpus); |
|
190 |
return specif ; |
|
191 |
} |
|
192 |
|
|
193 |
/** |
|
194 |
* TODO to be replaced by |
|
195 |
* "org.apache.commons.collections.CollectionUtils.subtract" when someone |
|
196 |
* figure out where is the eclipse plugin for apache commons collections. SL |
|
197 |
* |
|
198 |
* @param leftoperande the leftoperande |
|
199 |
* @param rightoperande the rightoperande |
|
200 |
* @return an array of two sets: the first one contains the element of |
|
201 |
* leftoperande not in rightoperande, the second the element of |
|
202 |
* leftoperande present in rightoperande. |
|
203 |
*/ |
|
204 |
private static final Set<String>[] subtract(Set<String> leftoperande, |
|
205 |
Set<String> rightoperande) { |
|
206 |
Set<String>[] returned = new HashSet[] { new HashSet<String>(), |
|
207 |
new HashSet<String>() }; |
|
208 |
for (String s : leftoperande) { |
|
209 |
if (!rightoperande.contains(s)) { |
|
210 |
returned[0].add(s); |
|
211 |
} else { |
|
212 |
returned[1].add(s); |
|
213 |
} |
|
214 |
} |
|
215 |
return returned; |
|
216 |
} |
|
217 |
} |
|
0 | 218 |
tmp/org.txm.specificities.core/src/org/txm/specificities/core/functions/Specificites2.java (revision 73) | ||
---|---|---|
1 |
package org.txm.specificities.core.functions; |
|
2 |
|
|
3 |
import org.txm.functions.contrasts.Contrast; |
|
4 |
import org.txm.searchengine.cqp.clientExceptions.CqiClientException; |
|
5 |
import org.txm.searchengine.cqp.corpus.Corpus; |
|
6 |
import org.txm.searchengine.cqp.corpus.Partition; |
|
7 |
import org.txm.searchengine.cqp.corpus.Property; |
|
8 |
import org.txm.searchengine.cqp.corpus.Subcorpus; |
|
9 |
import org.txm.stat.StatException; |
|
10 |
import org.txm.stat.data.LexicalTable; |
|
11 |
import org.txm.stat.engine.r.RWorkspace; |
|
12 |
|
|
13 |
public class Specificites2 extends Contrast { |
|
14 |
|
|
15 |
public Specificites2(Corpus corpus, Subcorpus subcorpus, Property property) |
|
16 |
throws StatException, CqiClientException { |
|
17 |
super(corpus, subcorpus, property); |
|
18 |
// TODO Auto-generated constructor stub |
|
19 |
} |
|
20 |
|
|
21 |
public Specificites2(Partition partition, Property property, int Fmin) |
|
22 |
throws StatException, CqiClientException { |
|
23 |
super(partition, property, Fmin); |
|
24 |
// TODO Auto-generated constructor stub |
|
25 |
} |
|
26 |
|
|
27 |
|
|
28 |
public Specificites2(LexicalTable table) { |
|
29 |
super(table); |
|
30 |
// TODO Auto-generated constructor stub |
|
31 |
} |
|
32 |
|
|
33 |
public boolean compute() throws StatException { |
|
34 |
|
|
35 |
colNames = table.getColNames().asStringsArray(); |
|
36 |
rowNames = table.getRowNames().asStringsArray(); |
|
37 |
frequencies = RWorkspace.getRWorkspaceInstance().evalToInt2D(table.getSymbol()); |
|
38 |
|
|
39 |
// compute |
|
40 |
indices = org.txm.stat.engine.r.function.Specificites.specificites("SYMBOL", table, null, null); //$NON-NLS-1$ |
|
41 |
|
|
42 |
return true; |
|
43 |
} |
|
44 |
|
|
45 |
@Override |
|
46 |
public String getName() { |
|
47 |
// TODO Auto-generated method stub |
|
48 |
return "Specif2"; //$NON-NLS-1$ |
|
49 |
} |
|
50 |
} |
|
0 | 51 |
Formats disponibles : Unified diff