Révision 619
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/TxmToHSQL.groovy (revision 619) | ||
---|---|---|
27 | 27 |
// |
28 | 28 |
package org.txm.scripts; |
29 | 29 |
|
30 |
import org.txm.lexicon.core.corpusengine.cqp.Lexicon
|
|
30 |
import org.txm.lexicon.core.functions.Lexicon;
|
|
31 | 31 |
import org.txm.searchengine.cqp.corpus.Corpus; |
32 | 32 |
import org.txm.searchengine.cqp.corpus.Property; |
33 | 33 |
import org.txm.searchengine.cqp.corpus.CorpusManager; |
tmp/org.txm.index.core/src/org/txm/index/core/functions/Index.java (revision 619) | ||
---|---|---|
48 | 48 |
import org.txm.index.core.functions.LineComparator.SortMode; |
49 | 49 |
import org.txm.index.core.messages.IndexCoreMessages; |
50 | 50 |
import org.txm.index.core.preferences.IndexPreferences; |
51 |
import org.txm.lexicon.core.corpusengine.cqp.Lexicon;
|
|
51 |
import org.txm.lexicon.core.functions.Lexicon;
|
|
52 | 52 |
import org.txm.searchengine.cqp.CQPEngine; |
53 | 53 |
import org.txm.searchengine.cqp.ICqiClient; |
54 | 54 |
import org.txm.searchengine.cqp.clientExceptions.CqiClientException; |
tmp/org.txm.cooccurrence.core/src/org/txm/cooccurrence/core/preferences/CooccurrencePreferences.java (revision 619) | ||
---|---|---|
47 | 47 |
// local result preferences |
48 | 48 |
public static final String QUERY = PREFERENCES_PREFIX + "query"; //$NON-NLS-1$ |
49 | 49 |
|
50 |
public static final String COOCQUERYFILTER = PREFERENCES_PREFIX + "cooc_query_filter"; //$NON-NLS-1$
|
|
50 |
public static final String COOC_QUERY_FILTER = PREFERENCES_PREFIX + "cooc_query_filter"; //$NON-NLS-1$
|
|
51 | 51 |
|
52 | 52 |
|
53 | 53 |
/** |
tmp/org.txm.cooccurrence.core/src/org/txm/cooccurrence/core/functions/Cooccurrence.java (revision 619) | ||
---|---|---|
161 | 161 |
/** The P. */ |
162 | 162 |
int P = -1; |
163 | 163 |
|
164 |
@Parameter(key=CooccurrencePreferences.COOCQUERYFILTER) |
|
164 |
/** The reference corpus to use = the R symbol that point to a matrix WordxFreqs. */ |
|
165 |
String referenceCorpus; |
|
166 |
|
|
167 |
/** The scores. */ |
|
168 |
HashMap<String, Double> scores; |
|
169 |
|
|
170 |
/** The symbol. */ |
|
171 |
private String symbol; |
|
172 |
|
|
173 |
/** The writer. */ |
|
174 |
private BufferedWriter writer; |
|
175 |
|
|
176 |
|
|
177 |
|
|
178 |
|
|
179 |
@Parameter(key=CooccurrencePreferences.COOC_QUERY_FILTER) |
|
165 | 180 |
protected String pCooccurentQueryFilter = "[]"; //$NON-NLS-1$ |
181 |
|
|
166 | 182 |
/** The mincof. */ |
167 | 183 |
@Parameter(key=CooccurrencePreferences.MIN_COUNT) |
168 | 184 |
protected Integer pFCoocFilter; |
185 |
|
|
169 | 186 |
/** The minf. */ |
170 | 187 |
@Parameter(key=CooccurrencePreferences.MIN_FREQ) |
171 | 188 |
protected Integer pFminFilter; |
189 |
|
|
172 | 190 |
/** The include xpivot. */ |
173 | 191 |
@Parameter(key=CooccurrencePreferences.INCLUDE_X_PIVOT) |
174 | 192 |
protected Boolean pIncludeXpivot; |
193 |
|
|
175 | 194 |
/** The maxleft. */ |
176 | 195 |
@Parameter(key=CooccurrencePreferences.MAX_LEFT) |
177 | 196 |
protected Integer pMaxLeftContextSize; |
197 |
|
|
178 | 198 |
/** The maxright. */ |
179 | 199 |
@Parameter(key=CooccurrencePreferences.MAX_RIGHT) |
180 | 200 |
protected Integer pMaxRightContextSize; |
201 |
|
|
181 | 202 |
/** The minleft. */ |
182 | 203 |
@Parameter(key=CooccurrencePreferences.MIN_LEFT) |
183 | 204 |
protected Integer pMinLeftContextSize; |
205 |
|
|
184 | 206 |
/** The minright. */ |
185 | 207 |
@Parameter(key=CooccurrencePreferences.MIN_RIGHT) |
186 | 208 |
protected Integer pMinRightContextSize; |
209 |
|
|
187 | 210 |
/** The cooccurents properties to show. */ |
188 | 211 |
@Parameter |
189 | 212 |
protected List<Property> pProperties; |
213 |
|
|
190 | 214 |
/** The keyword query. */ |
191 | 215 |
@Parameter |
192 | 216 |
protected Query pQuery; |
217 |
|
|
193 | 218 |
/** The minscore. */ |
194 | 219 |
@Parameter(key=CooccurrencePreferences.MIN_SCORE) |
195 | 220 |
protected Double pScoreMinFilter; |
221 |
|
|
196 | 222 |
/** The structural unit context limit. */ |
197 | 223 |
@Parameter |
198 | 224 |
protected StructuralUnit pStructuralUnitLimit; |
199 | 225 |
|
200 |
/** The reference corpus to use = the R symbol that point to a matrix WordxFreqs. */ |
|
201 |
String referenceCorpus; |
|
202 | 226 |
|
203 |
/** The scores. */ |
|
204 |
HashMap<String, Double> scores; |
|
205 |
|
|
206 |
/** The symbol. */ |
|
207 |
private String symbol; |
|
208 |
|
|
209 |
/** The writer. */ |
|
210 |
private BufferedWriter writer; |
|
211 |
|
|
227 |
|
|
228 |
|
|
229 |
|
|
230 |
|
|
212 | 231 |
/** |
213 | 232 |
* Creates an empty <link>Cooccurrence</link> object, child of the specified <link>Corpus</link>. |
214 | 233 |
* @param corpus |
... | ... | |
546 | 565 |
|
547 | 566 |
@Override |
548 | 567 |
public String getName() { |
549 |
if (this.getParent() != null) |
|
568 |
if (this.getParent() != null) {
|
|
550 | 569 |
return this.getParent().getSimpleName() + ": " + this.getSimpleName(); //$NON-NLS-1$ |
551 |
else return this.getSimpleName(); |
|
570 |
} |
|
571 |
else { |
|
572 |
return this.getSimpleName(); |
|
573 |
} |
|
552 | 574 |
} |
553 | 575 |
|
554 | 576 |
/** |
... | ... | |
1511 | 1533 |
|
1512 | 1534 |
@Override |
1513 | 1535 |
public boolean loadParameters() throws CqiClientException { |
1514 |
|
|
1515 |
// pCooccurentQueryFilter = this.getStringParameterValue(CooccurrencePreferences.COOCQUERYFILTER); |
|
1516 |
// pFCoocFilter = this.getIntParameterValue(CooccurrencePreferences.MIN_COUNT); |
|
1517 |
// pFminFilter = this.getIntParameterValue(CooccurrencePreferences.MIN_FREQ); |
|
1518 |
// pIncludeXpivot = this.getBooleanParameterValue(CooccurrencePreferences.INCLUDE_X_PIVOT); |
|
1519 |
// pMaxLeftContextSize = this.getIntParameterValue(CooccurrencePreferences.MAX_LEFT); |
|
1520 |
// pMaxRightContextSize = this.getIntParameterValue(CooccurrencePreferences.MAX_RIGHT); |
|
1521 |
// pMinLeftContextSize = this.getIntParameterValue(CooccurrencePreferences.MIN_LEFT); |
|
1522 |
// pMinRightContextSize = this.getIntParameterValue(CooccurrencePreferences.MIN_RIGHT); |
|
1523 |
|
|
1524 | 1536 |
String s = this.getStringParameterValue(CooccurrencePreferences.PROPERTIES); |
1525 | 1537 |
pProperties = WordProperty.fromStringToList(getCorpus(), s); |
1526 | 1538 |
|
1527 | 1539 |
pQuery = new Query(this.getStringParameterValue(CooccurrencePreferences.QUERY)); |
1528 |
// pScoreMinFilter = this.getIntParameterValue(CooccurrencePreferences.MIN_SCORE); |
|
1529 | 1540 |
|
1530 | 1541 |
s = this.getStringParameterValue(CooccurrencePreferences.STRUCTURE_LIMIT); |
1531 | 1542 |
pStructuralUnitLimit = getCorpus().getStructuralUnit(s); |
tmp/org.txm.textsbalance.core/src/org/txm/textsbalance/core/functions/TextsBalance.java (revision 619) | ||
---|---|---|
61 | 61 |
|
62 | 62 |
/** |
63 | 63 |
* |
64 |
* @param corpus |
|
64 | 65 |
*/ |
65 | 66 |
public TextsBalance(Corpus corpus) { |
66 | 67 |
super(corpus); |
tmp/org.txm.index.rcp/plugin.xml (revision 619) | ||
---|---|---|
123 | 123 |
type="org.eclipse.ui.model.IWorkbenchAdapter"> |
124 | 124 |
</adapter> |
125 | 125 |
</factory> |
126 |
<factory |
|
127 |
adaptableType="org.txm.lexicon.core.functions.Lexicon" |
|
128 |
class="org.txm.index.rcp.adapters.LexiconAdapterFactory"> |
|
129 |
<adapter |
|
130 |
type="org.eclipse.ui.model.IWorkbenchAdapter"> |
|
131 |
</adapter> |
|
132 |
</factory> |
|
126 | 133 |
</extension> |
127 | 134 |
<extension |
128 | 135 |
point="org.eclipse.ui.commands"> |
tmp/org.txm.index.rcp/src/org/txm/index/rcp/handlers/ComputeLexicon.java (revision 619) | ||
---|---|---|
8 | 8 |
import org.eclipse.ui.IWorkbenchPage; |
9 | 9 |
import org.txm.index.core.functions.Index; |
10 | 10 |
import org.txm.index.rcp.editors.DictionnaryEditor; |
11 |
import org.txm.lexicon.core.corpusengine.cqp.Lexicon;
|
|
11 |
import org.txm.lexicon.core.functions.Lexicon;
|
|
12 | 12 |
import org.txm.rcp.TXMWindows; |
13 | 13 |
import org.txm.rcp.editors.TXMEditorPart; |
14 | 14 |
import org.txm.rcp.editors.TXMResultEditorInput; |
tmp/org.txm.index.rcp/src/org/txm/index/rcp/adapters/LexiconAdapterFactory.java (revision 619) | ||
---|---|---|
5 | 5 |
import org.eclipse.ui.model.IWorkbenchAdapter; |
6 | 6 |
import org.eclipse.ui.plugin.AbstractUIPlugin; |
7 | 7 |
import org.osgi.framework.FrameworkUtil; |
8 |
import org.txm.lexicon.core.corpusengine.cqp.Lexicon;
|
|
8 |
import org.txm.lexicon.core.functions.Lexicon;
|
|
9 | 9 |
import org.txm.rcp.adapters.TXMResultAdapter; |
10 | 10 |
import org.txm.rcp.adapters.TXMResultAdapterFactory; |
11 | 11 |
|
tmp/org.txm.searchengine.cqp.core/src/org/txm/functions/summary/Summary.java (revision 619) | ||
---|---|---|
1 | 1 |
package org.txm.functions.summary; |
2 | 2 |
|
3 | 3 |
import java.io.File; |
4 |
import java.io.IOException; |
|
5 | 4 |
import java.util.ArrayList; |
6 | 5 |
import java.util.Arrays; |
7 | 6 |
|
... | ... | |
12 | 11 |
import org.txm.objects.Page; |
13 | 12 |
import org.txm.objects.Text; |
14 | 13 |
import org.txm.searchengine.cqp.CQPEngine; |
15 |
import org.txm.searchengine.cqp.clientExceptions.CqiClientException; |
|
16 | 14 |
import org.txm.searchengine.cqp.corpus.MainCorpus; |
17 | 15 |
import org.txm.searchengine.cqp.corpus.Property; |
18 | 16 |
import org.txm.searchengine.cqp.corpus.QueryResult; |
19 | 17 |
import org.txm.searchengine.cqp.corpus.StructuralUnitProperty; |
20 | 18 |
import org.txm.searchengine.cqp.corpus.query.Query; |
21 |
import org.txm.searchengine.cqp.serverException.CqiServerError; |
|
22 | 19 |
|
23 | 20 |
public class Summary extends TXMResult { |
24 | 21 |
|
... | ... | |
31 | 28 |
this.corpus = corpus; |
32 | 29 |
} |
33 | 30 |
|
34 |
public TreeNode getRoot() |
|
35 |
{ |
|
36 |
return treenodes; |
|
31 |
|
|
32 |
@Override |
|
33 |
public boolean canCompute() { |
|
34 |
return corpus != null && properties != null && properties.size() > 0; |
|
37 | 35 |
} |
38 | 36 |
|
39 |
public boolean compute() throws CqiClientException, IOException, CqiServerError |
|
40 |
{ |
|
37 |
@Override |
|
38 |
public boolean setParameters(TXMParameters parameters) { |
|
39 |
// TODO Auto-generated method stub |
|
40 |
System.err.println("Summary.setParameters(): not yet implemented."); |
|
41 |
return true; |
|
42 |
} |
|
43 |
|
|
44 |
@Override |
|
45 |
public boolean loadParameters() { |
|
46 |
// TODO Auto-generated method stub |
|
47 |
System.err.println("Summary.loadParameters(): not yet implemented."); |
|
48 |
return true; |
|
49 |
} |
|
50 |
|
|
51 |
|
|
52 |
@Override |
|
53 |
public boolean saveParameters() { |
|
54 |
// TODO Auto-generated method stub |
|
55 |
System.err.println("Summary.saveParameters(): not yet implemented."); |
|
56 |
return true; |
|
57 |
} |
|
58 |
|
|
59 |
|
|
60 |
@Override |
|
61 |
protected boolean _compute() throws Exception { |
|
41 | 62 |
treenodes.end = corpus.getSize(); |
42 | 63 |
treenodes.id = corpus.getName(); |
43 | 64 |
treenodes.start = 0; |
... | ... | |
78 | 99 |
processNodeList(treenodes); |
79 | 100 |
|
80 | 101 |
//System.out.println("checking result..."+treenodes.size()+" nodes"); |
81 |
if (treenodes.check()) |
|
102 |
if (treenodes.check()) {
|
|
82 | 103 |
return true; |
83 |
else |
|
104 |
} |
|
105 |
else { |
|
84 | 106 |
return false; |
107 |
} |
|
85 | 108 |
} |
86 | 109 |
|
110 |
public TreeNode getRoot() { |
|
111 |
return treenodes; |
|
112 |
} |
|
113 |
|
|
114 |
|
|
115 |
|
|
87 | 116 |
private void processNodeList(TreeNode nodes) |
88 | 117 |
{ |
89 | 118 |
//System.out.println("+processing node list: "+nodes.size()); |
... | ... | |
258 | 287 |
|
259 | 288 |
} |
260 | 289 |
|
261 |
@Override |
|
262 |
public boolean canCompute() { |
|
263 |
return corpus != null && properties != null && properties.size() > 0; |
|
264 |
} |
|
265 | 290 |
|
266 | 291 |
@Override |
267 |
public boolean setParameters(TXMParameters parameters) { |
|
268 |
// TODO Auto-generated method stub |
|
269 |
System.err.println("Summary.setParameters(): not yet implemented."); |
|
270 |
return true; |
|
271 |
} |
|
272 |
|
|
273 |
@Override |
|
274 |
public boolean loadParameters() { |
|
275 |
// TODO Auto-generated method stub |
|
276 |
System.err.println("Summary.loadParameters(): not yet implemented."); |
|
277 |
return true; |
|
278 |
} |
|
279 |
|
|
280 |
|
|
281 |
@Override |
|
282 |
public boolean saveParameters() { |
|
283 |
// TODO Auto-generated method stub |
|
284 |
System.err.println("Summary.saveParameters(): not yet implemented."); |
|
285 |
return true; |
|
286 |
} |
|
287 |
|
|
288 |
|
|
289 |
@Override |
|
290 |
protected boolean _compute() throws Exception { |
|
291 |
System.err.println("Summary.compute() not implemented"); |
|
292 |
return false; |
|
293 |
} |
|
294 |
|
|
295 |
@Override |
|
296 | 292 |
public boolean toTxt(File outfile, String encoding, String colseparator, String txtseparator) throws Exception { |
297 | 293 |
|
298 | 294 |
return false; |
299 | 295 |
} |
300 | 296 |
|
301 |
public void setProperties(ArrayList<StructuralUnitProperty> properties2) {
|
|
297 |
public void setProperties(ArrayList<StructuralUnitProperty> properties) { |
|
302 | 298 |
this.properties = properties; |
303 | 299 |
} |
304 | 300 |
} |
tmp/org.txm.statsengine.r.rcp/src/org/txm/statsengine/r/rcp/views/RVariablesView.java (revision 619) | ||
---|---|---|
67 | 67 |
import org.txm.functions.referencer.Referencer; |
68 | 68 |
import org.txm.index.core.functions.Index; |
69 | 69 |
import org.txm.lexicaltable.core.functions.LexicalTable; |
70 |
import org.txm.lexicon.core.corpusengine.cqp.Lexicon;
|
|
70 |
import org.txm.lexicon.core.functions.Lexicon;
|
|
71 | 71 |
import org.txm.objects.Base; |
72 | 72 |
import org.txm.progression.core.functions.Progression; |
73 | 73 |
import org.txm.rcp.IImageKeys; |
tmp/org.txm.lexicaltable.core/src/org/txm/lexicaltable/core/statsengine/r/data/LexicalTableImpl.java (revision 619) | ||
---|---|---|
39 | 39 |
import org.rosuda.REngine.REXPMismatchException; |
40 | 40 |
import org.txm.lexicaltable.core.messages.LexicalTableCoreMessages; |
41 | 41 |
import org.txm.lexicaltable.core.statsengine.data.ILexicalTable; |
42 |
import org.txm.lexicon.core.corpusengine.cqp.Lexicon;
|
|
42 |
import org.txm.lexicon.core.functions.Lexicon;
|
|
43 | 43 |
import org.txm.searchengine.cqp.corpus.Property; |
44 | 44 |
import org.txm.statsengine.core.StatException; |
45 | 45 |
import org.txm.statsengine.core.data.Vector; |
tmp/org.txm.lexicaltable.core/src/org/txm/lexicaltable/core/functions/LexicalTable.java (revision 619) | ||
---|---|---|
17 | 17 |
import org.txm.lexicaltable.core.preferences.LexicalTablePreferences; |
18 | 18 |
import org.txm.lexicaltable.core.statsengine.data.ILexicalTable; |
19 | 19 |
import org.txm.lexicaltable.core.statsengine.r.data.LexicalTableImpl; |
20 |
import org.txm.lexicon.core.corpusengine.cqp.Lexicon;
|
|
20 |
import org.txm.lexicon.core.functions.Lexicon;
|
|
21 | 21 |
import org.txm.searchengine.cqp.clientExceptions.CqiClientException; |
22 | 22 |
import org.txm.searchengine.cqp.corpus.Corpus; |
23 | 23 |
import org.txm.searchengine.cqp.corpus.Partition; |
tmp/org.txm.lexicaltable.core/src/org/txm/lexicaltable/core/functions/___LexicalTableFactory.java (revision 619) | ||
---|---|---|
6 | 6 |
import java.util.Map; |
7 | 7 |
|
8 | 8 |
import org.txm.lexicaltable.core.statsengine.r.data.LexicalTableImpl; |
9 |
import org.txm.lexicon.core.corpusengine.cqp.Lexicon;
|
|
9 |
import org.txm.lexicon.core.functions.Lexicon;
|
|
10 | 10 |
import org.txm.searchengine.cqp.clientExceptions.CqiClientException; |
11 | 11 |
import org.txm.searchengine.cqp.corpus.Corpus; |
12 | 12 |
import org.txm.searchengine.cqp.corpus.Partition; |
tmp/org.txm.rcp/src/main/java/org/txm/rcp/views/cmdparameters/TXMResultDebugView.java (revision 619) | ||
---|---|---|
47 | 47 |
*/ |
48 | 48 |
public class TXMResultDebugView extends ViewPart implements ISelectionChangedListener { |
49 | 49 |
|
50 |
|
|
50 |
/** |
|
51 |
* Display area. |
|
52 |
*/ |
|
51 | 53 |
protected StyledText displayArea; |
52 | 54 |
|
53 | 55 |
/** |
... | ... | |
82 | 84 |
TXMResult result = (TXMResult) selection; |
83 | 85 |
|
84 | 86 |
StringBuffer buffer = new StringBuffer(); |
85 |
buffer.append("Result: " + result.toString() + "\n"); //$NON-NLS-1$ //$NON-NLS-2$
|
|
87 |
buffer.append("Result: " + result.getClass() + "\n"); //$NON-NLS-1$ //$NON-NLS-2$
|
|
86 | 88 |
buffer.append("UUID: " + result.getUUID() + "\n"); //$NON-NLS-1$ //$NON-NLS-2$ |
87 | 89 |
buffer.append("Simple name: " + result.getSimpleName() + "\n"); //$NON-NLS-1$ //$NON-NLS-2$ |
88 | 90 |
buffer.append("Name: " + result.getName() + "\n"); //$NON-NLS-1$ //$NON-NLS-2$ |
89 | 91 |
buffer.append("Valid filename: " + result.getValidFileName() + "\n"); //$NON-NLS-1$ //$NON-NLS-2$ |
90 | 92 |
buffer.append("Empty name: " + result.getEmptyName() + "\n"); //$NON-NLS-1$ //$NON-NLS-2$ |
93 |
buffer.append("toString(): " + result.toString() + "\n"); //$NON-NLS-1$ //$NON-NLS-2$ |
|
91 | 94 |
buffer.append("Details: " + result.getDetails() + "\n\n"); //$NON-NLS-1$ //$NON-NLS-2$ |
92 | 95 |
|
93 | 96 |
// Command preferences |
... | ... | |
106 | 109 |
buffer.append("Chart object = " + ((ChartResult)result).getChart() + ", chart type = " + ((ChartResult)result).getChartType() + "\n"); |
107 | 110 |
} |
108 | 111 |
buffer.append("Selection full path name = " + result.getFullPathSimpleName() + " - " + result.getName() + "\n"); |
109 |
buffer.append("Direct children count = " + result.getResults().size() + ", direct visible children count = " + result.getChildren(true).size() + ", children count = " + result.getDeepChildren().size()); |
|
112 |
buffer.append("Direct children count = " + result.getResults().size() + ", direct visible children count = " + result.getChildren(true).size() + ", children count = " + result.getDeepChildren().size() + "\n");
|
|
110 | 113 |
buffer.append("Root parent = " + result.getRootParent() + ", main corpus parent = " + Corpus.getParentMainCorpus(result) + ", first parent corpus = " + Corpus.getFirstParentCorpus(result)); |
111 | 114 |
|
112 | 115 |
|
tmp/org.txm.rcp/src/main/java/org/txm/rcp/views/SummaryView.java (revision 619) | ||
---|---|---|
140 | 140 |
} |
141 | 141 |
summary = new Summary(selectedCorpus); |
142 | 142 |
ArrayList<StructuralUnitProperty> properties = new ArrayList<StructuralUnitProperty>(); |
143 |
for (Property p : selectedProps) |
|
144 |
if (p instanceof StructuralUnitProperty) |
|
143 |
for (Property p : selectedProps) {
|
|
144 |
if (p instanceof StructuralUnitProperty) {
|
|
145 | 145 |
properties.add((StructuralUnitProperty) p); |
146 |
} |
|
147 |
} |
|
146 | 148 |
|
147 | 149 |
summary.setProperties(properties); |
148 | 150 |
if (summary.compute()) { |
tmp/org.txm.specificities.core/src/org/txm/specificities/core/functions/Specificities.java (revision 619) | ||
---|---|---|
41 | 41 |
import org.txm.core.results.TXMParameters; |
42 | 42 |
import org.txm.core.results.TXMResult; |
43 | 43 |
import org.txm.lexicaltable.core.functions.LexicalTable; |
44 |
import org.txm.lexicon.core.corpusengine.cqp.Lexicon;
|
|
44 |
import org.txm.lexicon.core.functions.Lexicon;
|
|
45 | 45 |
import org.txm.searchengine.cqp.clientExceptions.CqiClientException; |
46 | 46 |
import org.txm.searchengine.cqp.corpus.Corpus; |
47 | 47 |
import org.txm.specificities.core.messages.SpecificitiesCoreMessages; |
tmp/org.txm.core/src/java/org/txm/core/preferences/TXMPreferences.java (revision 619) | ||
---|---|---|
919 | 919 |
|
920 | 920 |
IEclipsePreferences preferences = scope.getNode(nodeQualifier); |
921 | 921 |
|
922 |
str.append("Path = " + preferences .absolutePath() + ")\n");
|
|
922 |
str.append("Path = " + preferences .absolutePath() + "\n"); |
|
923 | 923 |
|
924 | 924 |
try { |
925 | 925 |
String[] keys = preferences.keys(); |
tmp/org.txm.core/src/java/org/txm/core/results/TXMResult.java (revision 619) | ||
---|---|---|
42 | 42 |
*/ |
43 | 43 |
protected String uniqueID; |
44 | 44 |
//protected String path; |
45 |
public static final DateFormat ID_TIME_FORMAT = new SimpleDateFormat("YYMMDD"); |
|
45 |
public static final DateFormat ID_TIME_FORMAT = new SimpleDateFormat("YYYYMMDD");
|
|
46 | 46 |
/** Editor can use this to test if the result need to be saved */ |
47 | 47 |
protected boolean hasBeenComputedOnce = false; |
48 | 48 |
|
tmp/org.txm.lexicon.core/src/org/txm/lexicon/core/corpusengine/cqp/Lexicon.java (revision 619) | ||
---|---|---|
1 |
// Copyright © 2010-2013 ENS de Lyon. |
|
2 |
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of |
|
3 |
// Lyon 2, University of Franche-Comté, University of Nice |
|
4 |
// Sophia Antipolis, University of Paris 3. |
|
5 |
// |
|
6 |
// The TXM platform is free software: you can redistribute it |
|
7 |
// and/or modify it under the terms of the GNU General Public |
|
8 |
// License as published by the Free Software Foundation, |
|
9 |
// either version 2 of the License, or (at your option) any |
|
10 |
// later version. |
|
11 |
// |
|
12 |
// The TXM platform is distributed in the hope that it will be |
|
13 |
// useful, but WITHOUT ANY WARRANTY; without even the implied |
|
14 |
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR |
|
15 |
// PURPOSE. See the GNU General Public License for more |
|
16 |
// details. |
|
17 |
// |
|
18 |
// You should have received a copy of the GNU General |
|
19 |
// Public License along with the TXM platform. If not, see |
|
20 |
// http://www.gnu.org/licenses. |
|
21 |
// |
|
22 |
// |
|
23 |
// |
|
24 |
// $LastChangedDate: 2016-09-19 10:31:00 +0200 (Mon, 19 Sep 2016) $ |
|
25 |
// $LastChangedRevision: 3298 $ |
|
26 |
// $LastChangedBy: mdecorde $ |
|
27 |
// |
|
28 |
package org.txm.lexicon.core.corpusengine.cqp; |
|
29 |
|
|
30 |
import java.io.File; |
|
31 |
import java.io.FileNotFoundException; |
|
32 |
import java.io.FileOutputStream; |
|
33 |
import java.io.IOException; |
|
34 |
import java.io.OutputStreamWriter; |
|
35 |
import java.io.UnsupportedEncodingException; |
|
36 |
import java.util.Arrays; |
|
37 |
import java.util.Map; |
|
38 |
|
|
39 |
import org.eclipse.core.runtime.IProgressMonitor; |
|
40 |
import org.txm.core.messages.TXMCoreMessages; |
|
41 |
import org.txm.core.results.TXMParameters; |
|
42 |
import org.txm.core.results.TXMResult; |
|
43 |
import org.txm.lexicon.core.messages.LexiconCoreMessages; |
|
44 |
import org.txm.searchengine.cqp.ICqiClient; |
|
45 |
import org.txm.searchengine.cqp.clientExceptions.CqiClientException; |
|
46 |
import org.txm.searchengine.cqp.corpus.Corpus; |
|
47 |
import org.txm.searchengine.cqp.corpus.CorpusManager; |
|
48 |
import org.txm.searchengine.cqp.corpus.MainCorpus; |
|
49 |
import org.txm.searchengine.cqp.corpus.Property; |
|
50 |
import org.txm.searchengine.cqp.corpus.Subcorpus; |
|
51 |
import org.txm.searchengine.cqp.corpus.query.Query; |
|
52 |
import org.txm.statsengine.core.StatException; |
|
53 |
import org.txm.statsengine.core.data.Vector; |
|
54 |
import org.txm.statsengine.r.core.data.VectorImpl; |
|
55 |
import org.txm.utils.logger.Log; |
|
56 |
|
|
57 |
// TODO: Auto-generated Javadoc |
|
58 |
// TODO should be put into stat.data package ? |
|
59 |
/** |
|
60 |
* Represent a frequency list according to a {@link Corpus} (or a. |
|
61 |
* |
|
62 |
* {@link Subcorpus}) and a {@link Property}. |
|
63 |
* @author sloiseau |
|
64 |
*/ |
|
65 |
public class Lexicon extends TXMResult { |
|
66 |
|
|
67 |
/** The nolex. */ |
|
68 |
protected static int nolex = 1; |
|
69 |
|
|
70 |
/** The prefix r. */ |
|
71 |
protected static String prefixR = "Lexicon_"; //$NON-NLS-1$ |
|
72 |
|
|
73 |
/** The forms. */ |
|
74 |
private String[] forms; |
|
75 |
|
|
76 |
/** The freqs. */ |
|
77 |
private int[] freqs; |
|
78 |
|
|
79 |
/** The ids. */ |
|
80 |
private int[] ids; |
|
81 |
|
|
82 |
/** The number of tokens. */ |
|
83 |
int numberOfTokens = -1; |
|
84 |
|
|
85 |
/** The property. */ |
|
86 |
private Property pProperty; |
|
87 |
|
|
88 |
/** The symbol. */ |
|
89 |
private String symbol; |
|
90 |
|
|
91 |
/** The writer. */ |
|
92 |
private OutputStreamWriter writer; |
|
93 |
|
|
94 |
private Corpus corpus; |
|
95 |
|
|
96 |
// /** |
|
97 |
// * Find or build a lexicon given a Corpus (MainCorpus or SubCorpus). |
|
98 |
// * |
|
99 |
// * @param corpus |
|
100 |
// * @param property |
|
101 |
// * @return a Lexicon. May return null if the lexicon forms or freqs are null. |
|
102 |
// * @throws Exception |
|
103 |
// */ |
|
104 |
// public static Lexicon getLexicon(Corpus corpus, Property property) throws Exception { |
|
105 |
// HashSet<Object> results = corpus.getStoredData(Lexicon.class); |
|
106 |
// for (Object result : results) { |
|
107 |
// Lexicon lex = (Lexicon)result; |
|
108 |
// if (lex.getProperty().equals(property)) { |
|
109 |
// return lex; |
|
110 |
// } |
|
111 |
// } |
|
112 |
// |
|
113 |
// Lexicon lex = new Lexicon(corpus); |
|
114 |
// lex.setParameters(property); |
|
115 |
// if (lex.compute(null) && lex.getForms() != null && lex.getFreq() != null) { |
|
116 |
// corpus.storeData(lex); |
|
117 |
// return lex; |
|
118 |
// } else { |
|
119 |
// return null; |
|
120 |
// } |
|
121 |
// } |
|
122 |
|
|
123 |
public Lexicon(Corpus corpus) { |
|
124 |
super(corpus); |
|
125 |
this.corpus = corpus; |
|
126 |
} |
|
127 |
|
|
128 |
|
|
129 |
@Override |
|
130 |
public boolean saveParameters() throws Exception { |
|
131 |
// TODO Auto-generated method stub |
|
132 |
return true; |
|
133 |
} |
|
134 |
|
|
135 |
@Override |
|
136 |
public boolean loadParameters() throws Exception { |
|
137 |
// TODO Auto-generated method stub |
|
138 |
return true; |
|
139 |
} |
|
140 |
|
|
141 |
@Override |
|
142 |
public void clean() { |
|
143 |
// TODO Auto-generated method stub |
|
144 |
|
|
145 |
} |
|
146 |
|
|
147 |
@Override |
|
148 |
public boolean canCompute() throws Exception { |
|
149 |
return corpus != null && pProperty != null; |
|
150 |
} |
|
151 |
|
|
152 |
@Override |
|
153 |
protected boolean _compute() throws Exception { |
|
154 |
if (corpus instanceof MainCorpus) { |
|
155 |
return computeWithMainCorpus((MainCorpus)corpus, pProperty, monitor); |
|
156 |
} |
|
157 |
else if (corpus instanceof Subcorpus) { |
|
158 |
return computewithSubCorpus((Subcorpus)corpus, pProperty, monitor); |
|
159 |
} |
|
160 |
else { |
|
161 |
System.out.println("Error: Lexicon parent is neither a Maincorpus nor a Subcorpus."); |
|
162 |
return false; |
|
163 |
} |
|
164 |
} |
|
165 |
|
|
166 |
/** |
|
167 |
* Gets the lexicon relative to a given property. |
|
168 |
* |
|
169 |
* @param property |
|
170 |
* the property |
|
171 |
* |
|
172 |
* @return the lexicon |
|
173 |
* |
|
174 |
* @throws CqiClientException |
|
175 |
* the cqi client exception |
|
176 |
*/ |
|
177 |
protected boolean computeWithMainCorpus(MainCorpus corpus, Property property, IProgressMonitor monitor) throws CqiClientException { |
|
178 |
// System.out.println("in "+this.getCqpId()+" look for cached lexicon "+property); |
|
179 |
// System.out.println("not found"); |
|
180 |
this.subTask("Computing lexicon size..."); |
|
181 |
Log.finest(TXMCoreMessages.LEXICON + corpus.getName()); |
|
182 |
int lexiconSize; |
|
183 |
try { |
|
184 |
lexiconSize = CorpusManager.getCorpusManager().getCqiClient().lexiconSize(property.getQualifiedName()); |
|
185 |
} catch (Exception e) { |
|
186 |
throw new CqiClientException(e); |
|
187 |
} |
|
188 |
|
|
189 |
int[] ids = new int[lexiconSize]; |
|
190 |
for (int i = 0; i < ids.length; i++) { |
|
191 |
ids[i] = i; |
|
192 |
} |
|
193 |
|
|
194 |
int[] freqs; |
|
195 |
try { |
|
196 |
this.subTask("Computing lexicon frequencies..."); |
|
197 |
freqs = CorpusManager.getCorpusManager().getCqiClient().id2Freq(property.getQualifiedName(), ids); |
|
198 |
} catch (Exception e) { |
|
199 |
throw new CqiClientException(e); |
|
200 |
} |
|
201 |
|
|
202 |
init(corpus, property, freqs, ids); |
|
203 |
return true; |
|
204 |
} |
|
205 |
|
|
206 |
/** |
|
207 |
* |
|
208 |
* @param corpus |
|
209 |
* @param property |
|
210 |
* @param monitor |
|
211 |
* @return |
|
212 |
* @throws CqiClientException |
|
213 |
*/ |
|
214 |
protected boolean computewithSubCorpus(Subcorpus corpus, Property property, IProgressMonitor monitor) throws CqiClientException { |
|
215 |
|
|
216 |
//System.out.println("not found"); |
|
217 |
Log.finest(TXMCoreMessages.SUBCORPUS_LEXICON + corpus.getName()); |
|
218 |
long start = System.currentTimeMillis(); |
|
219 |
int[][] fdist = null; |
|
220 |
Subcorpus tmp = null; |
|
221 |
try { |
|
222 |
this.subTask("Computing lexicon frequencies..."); |
|
223 |
tmp = corpus.createSubcorpus(new Query("[]"), "S"+corpus.getNextSubcorpusCounter(), true); //$NON-NLS-1$ |
|
224 |
if (tmp != null) { |
|
225 |
fdist = CorpusManager.getCorpusManager().getCqiClient().fdist1( |
|
226 |
tmp.getQualifiedCqpId(), 0, |
|
227 |
ICqiClient.CQI_CONST_FIELD_MATCH, property.getName()); |
|
228 |
|
|
229 |
corpus.dropSubcorpus(tmp); // drop the subcorpus only if correctly created |
|
230 |
} |
|
231 |
//System.out.println("nb lines: "+fdist.length); |
|
232 |
} catch (Exception e) { |
|
233 |
throw new CqiClientException(e); |
|
234 |
} finally { |
|
235 |
if (tmp != null) { |
|
236 |
try {corpus.dropSubcorpus(tmp);} |
|
237 |
catch (Exception e2) {} |
|
238 |
} |
|
239 |
} |
|
240 |
int lexiconSize = fdist.length; |
|
241 |
|
|
242 |
int[] freqs = new int[lexiconSize]; |
|
243 |
int[] ids = new int[lexiconSize]; |
|
244 |
for (int i = 0; i < fdist.length; i++) { |
|
245 |
ids[i] = fdist[i][0]; |
|
246 |
freqs[i] = fdist[i][1]; |
|
247 |
} |
|
248 |
|
|
249 |
init(corpus, property, freqs, ids); |
|
250 |
return true; |
|
251 |
} |
|
252 |
|
|
253 |
|
|
254 |
/** |
|
255 |
* Convert the Lexicon into a Vector object. |
|
256 |
* |
|
257 |
* @return the vector |
|
258 |
* @throws StatException the stat exception |
|
259 |
*/ |
|
260 |
public Vector asVector() throws StatException { |
|
261 |
String symbol = prefixR + (nolex++); |
|
262 |
VectorImpl v = new VectorImpl(freqs, symbol); |
|
263 |
v.setRNames(getForms()); |
|
264 |
this.symbol = v.getSymbol(); |
|
265 |
return v; |
|
266 |
} |
|
267 |
|
|
268 |
|
|
269 |
/** |
|
270 |
* Compute number of tokens. / this.nbr |
|
271 |
*/ |
|
272 |
private void computeNumberOfTokens() { |
|
273 |
numberOfTokens = 0; |
|
274 |
for (int i = 0; i < freqs.length; i++) { |
|
275 |
numberOfTokens += freqs[i]; |
|
276 |
// System.out.println(numberOfTokens); |
|
277 |
// if (freqs[i] != 1) System.out.println(freqs[i]); |
|
278 |
} |
|
279 |
} |
|
280 |
|
|
281 |
|
|
282 |
|
|
283 |
@Override |
|
284 |
public boolean delete() { |
|
285 |
if (corpus != null) { |
|
286 |
corpus.removeData(this); |
|
287 |
} |
|
288 |
return true; |
|
289 |
} |
|
290 |
|
|
291 |
/** |
|
292 |
* Dump lexicon forms and frequencies in a String. |
|
293 |
* |
|
294 |
* @param col the col |
|
295 |
* @param txt the txt |
|
296 |
* @return the string |
|
297 |
*/ |
|
298 |
public String dump(String col, String txt) { |
|
299 |
StringBuffer buffer = new StringBuffer(); |
|
300 |
getForms(); |
|
301 |
for (int i = 0; i < forms.length; i++) { |
|
302 |
buffer.append(txt+ forms[i].replace(txt, txt+txt) + txt + col + freqs[i] + "\n"); //$NON-NLS-1$ |
|
303 |
} |
|
304 |
return buffer.toString(); |
|
305 |
} |
|
306 |
|
|
307 |
/* (non-Javadoc) |
|
308 |
* @see java.lang.Object#equals(java.lang.Object) |
|
309 |
*/ |
|
310 |
@Override |
|
311 |
public boolean equals(Object obj) { |
|
312 |
if (!(obj instanceof Lexicon)) { |
|
313 |
return false; |
|
314 |
} |
|
315 |
Lexicon other = (Lexicon) obj; |
|
316 |
|
|
317 |
if (other.nbrOfType() != this.nbrOfType()) { |
|
318 |
return false; |
|
319 |
} |
|
320 |
return (Arrays.equals(freqs, other.getFreq()) && Arrays.equals(getForms(), other.getForms())); |
|
321 |
} |
|
322 |
|
|
323 |
/** |
|
324 |
* The corpus or subcorpus this lexicon is build on. |
|
325 |
* |
|
326 |
* @return the corpus |
|
327 |
*/ |
|
328 |
public Corpus getCorpus() { |
|
329 |
return corpus; |
|
330 |
} |
|
331 |
|
|
332 |
public String getDetails() { |
|
333 |
return this.corpus.getName() + " " + this.pProperty.getName(); //$NON-NLS-1$ |
|
334 |
} |
|
335 |
|
|
336 |
//TODO: move this into a Lexicon chart renderer |
|
337 |
// /** |
|
338 |
// * Draw a pareto graphic with this frequency list and record it into the |
|
339 |
// * provided filename into svg format. |
|
340 |
// * |
|
341 |
// * @param file where to save the pareto graphic. |
|
342 |
// * @return the pareto graphic |
|
343 |
// * @throws StatException if anything goes wrong. |
|
344 |
// */ |
|
345 |
// public void getParetoGraphic(File file) throws StatException { |
|
346 |
// String rName = asVector().getSymbol(); |
|
347 |
// String expr = "pareto(" + rName + ")"; //$NON-NLS-1$ //$NON-NLS-2$ |
|
348 |
// try { |
|
349 |
// RWorkspace.getRWorkspaceInstance().plot(file, expr, RDevice.SVG); |
|
350 |
// } catch (Exception e) { |
|
351 |
// throw new StatException(e); |
|
352 |
// } |
|
353 |
// } |
|
354 |
|
|
355 |
/** |
|
356 |
* The dif ferent types in the lexicon, the type at the index <code>j</code> |
|
357 |
* of this array have the frequency at index <code>j</code> in the array |
|
358 |
* returned by {@link #getFreq()}. |
|
359 |
* |
|
360 |
* @return types as an array of <code>String</code> |
|
361 |
*/ |
|
362 |
public String[] getForms() { |
|
363 |
if (forms == null) { |
|
364 |
if(ids == null) { |
|
365 |
return new String[0]; |
|
366 |
} |
|
367 |
try { |
|
368 |
forms = CorpusManager.getCorpusManager().getCqiClient().id2Str(pProperty.getQualifiedName(), ids); |
|
369 |
} catch (Exception e) { |
|
370 |
// TODO Auto-generated catch block |
|
371 |
org.txm.utils.logger.Log.printStackTrace(e); |
|
372 |
} |
|
373 |
} |
|
374 |
return forms; |
|
375 |
} |
|
376 |
|
|
377 |
/** |
|
378 |
* The dif ferent types in the lexicon, the type at the index <code>j</code> |
|
379 |
* of this array have the frequency at index <code>j</code> in the array |
|
380 |
* returned by {@link #getFreq()}. |
|
381 |
* |
|
382 |
* @param number the number |
|
383 |
* @return types as an array of <code>String</code> |
|
384 |
*/ |
|
385 |
public String[] getForms(int number) { |
|
386 |
//System.out.println("Lexicon("+this.property+" get forms. number="+number+", ids len="+ids.length); |
|
387 |
if (forms == null) { |
|
388 |
try { |
|
389 |
number = Math.min(number, ids.length); |
|
390 |
if (number <= 0) { |
|
391 |
return new String[0]; |
|
392 |
} |
|
393 |
int[] subpositions = new int[number]; |
|
394 |
System.arraycopy(ids, 0, subpositions, 0, number); |
|
395 |
return CorpusManager.getCorpusManager().getCqiClient().id2Str(pProperty.getQualifiedName(), subpositions); |
|
396 |
} catch (Exception e) { |
|
397 |
// TODO Auto-generated catch block |
|
398 |
org.txm.utils.logger.Log.printStackTrace(e); |
|
399 |
return null; |
|
400 |
} |
|
401 |
} else { |
|
402 |
number = Math.min(number, ids.length); |
|
403 |
if (number <= 0) { |
|
404 |
return new String[0]; |
|
405 |
} |
|
406 |
String[] subforms = new String[number]; |
|
407 |
System.arraycopy(ids, 0, subforms, 0, number); |
|
408 |
return subforms; |
|
409 |
} |
|
410 |
} |
|
411 |
|
|
412 |
/** |
|
413 |
* The dif ferent frequencies in the lexicon. See {@link #getForms()}. |
|
414 |
* |
|
415 |
* @return frequencies as an array of <code>int</code> |
|
416 |
*/ |
|
417 |
public int[] getFreq() { |
|
418 |
return freqs; |
|
419 |
} |
|
420 |
|
|
421 |
/** |
|
422 |
* return the ids of the entries. |
|
423 |
* |
|
424 |
* @return types as an array of <code>String</code> |
|
425 |
*/ |
|
426 |
public int[] getIds() { |
|
427 |
return ids; |
|
428 |
} |
|
429 |
|
|
430 |
public String getName() { |
|
431 |
try { |
|
432 |
return LexiconCoreMessages.RESULT_TYPE + ": " + this.corpus.getSimpleName() + ": " + this.getSimpleName(); |
|
433 |
} |
|
434 |
catch(Exception e) { |
|
435 |
} |
|
436 |
return ""; //$NON-NLS-1$ |
|
437 |
} |
|
438 |
|
|
439 |
/** |
|
440 |
* The property this lexicon is build on. |
|
441 |
* |
|
442 |
* @return the property |
|
443 |
*/ |
|
444 |
public Property getProperty() { |
|
445 |
return pProperty; |
|
446 |
} |
|
447 |
|
|
448 |
public String getSimpleName() { |
|
449 |
try { |
|
450 |
return this.getProperty().getName(); |
|
451 |
} |
|
452 |
catch(Exception e) { |
|
453 |
} |
|
454 |
return ""; |
|
455 |
} |
|
456 |
|
|
457 |
/** |
|
458 |
* Gets the symbol. |
|
459 |
* |
|
460 |
* @return the symbol |
|
461 |
*/ |
|
462 |
public String getSymbol() { |
|
463 |
return this.symbol; |
|
464 |
} |
|
465 |
|
|
466 |
/** |
|
467 |
* Hack frequencies using a map to set forms and frequencies |
|
468 |
* |
|
469 |
* @param corpus the corpus |
|
470 |
* @param pProperty the property |
|
471 |
* @param map the map |
|
472 |
* {@link Corpus#getLexicon(Property)} or |
|
473 |
* {@link Subcorpus#getLexicon(Property)}. |
|
474 |
*/ |
|
475 |
public boolean hack(Map<String, Integer> map) { |
|
476 |
if (map.size() != forms.length) return false; |
|
477 |
|
|
478 |
//super(corpus); |
|
479 |
int size = map.size(); |
|
480 |
int[] freqs = new int[size]; |
|
481 |
String[] forms = map.keySet().toArray(new String[] {}); |
|
482 |
for (int i = 0; i < forms.length; i++) { |
|
483 |
freqs[i] = map.get(forms[i]); |
|
484 |
} |
|
485 |
|
|
486 |
this.freqs = freqs; |
|
487 |
return true; |
|
488 |
} |
|
489 |
|
|
490 |
/** |
|
491 |
* Protected on purpose: should be accessed through others initializer. |
|
492 |
* |
|
493 |
* @param corpus the corpus |
|
494 |
* @param property the property |
|
495 |
* @param freq the freq |
|
496 |
* @param ids the ids |
|
497 |
* {@link Corpus#getLexicon(Property)} or |
|
498 |
* {@link Subcorpus#getLexicon(Property)}. |
|
499 |
*/ |
|
500 |
protected void init(TXMResult corpus, Property property, int[] freq, int[] ids) { |
|
501 |
if (freq.length != ids.length) { |
|
502 |
throw new IllegalArgumentException(LexiconCoreMessages.Lexicon_0); |
|
503 |
} |
|
504 |
this.freqs = freq; |
|
505 |
this.ids = ids; |
|
506 |
this.forms = null; |
|
507 |
this.pProperty = property; |
|
508 |
this.corpus = (Corpus) corpus; |
|
509 |
} |
|
510 |
|
|
511 |
|
|
512 |
/** |
|
513 |
* Number of tokens (sum of all the frequencies) in the corpus. |
|
514 |
* |
|
515 |
* @return the size of the corpus or subcorpus. |
|
516 |
*/ |
|
517 |
public int nbrOfToken() { |
|
518 |
if (numberOfTokens <= 0) { |
|
519 |
computeNumberOfTokens(); |
|
520 |
} |
|
521 |
return numberOfTokens; |
|
522 |
} |
|
523 |
|
|
524 |
|
|
525 |
/** |
|
526 |
* Number of dif ferent types in the frequency list. |
|
527 |
* |
|
528 |
* @return number of types in the corpus or subcorpus. |
|
529 |
*/ |
|
530 |
public int nbrOfType() { |
|
531 |
return freqs.length; |
|
532 |
} |
|
533 |
|
|
534 |
public void setParameters(Property property) { |
|
535 |
this.pProperty = property; |
|
536 |
} |
|
537 |
|
|
538 |
@Override |
|
539 |
public boolean setParameters(TXMParameters parameters) { |
|
540 |
try { |
|
541 |
Property p = (Property) parameters.get("properties"); |
|
542 |
this.setParameters(p); |
|
543 |
} catch (Exception e) { |
|
544 |
Log.printStackTrace(e); |
|
545 |
return false; |
|
546 |
} |
|
547 |
return true; |
|
548 |
} |
|
549 |
|
|
550 |
/** |
|
551 |
* Sets the symbol. |
|
552 |
* |
|
553 |
* @param symbol the new symbol |
|
554 |
*/ |
|
555 |
public void setSymbol(String symbol) { |
|
556 |
this.symbol = symbol; |
|
557 |
} |
|
558 |
|
|
559 |
|
|
560 |
@Override |
|
561 |
public String toString() { |
|
562 |
return LexiconCoreMessages.Lexicon_3 + getName(); |
|
563 |
} |
|
564 |
|
|
565 |
/** |
|
566 |
* To txt. |
|
567 |
* |
|
568 |
* @param outfile the outfile |
|
569 |
* @param encoding the encoding |
|
570 |
* @param colseparator the colseparator |
|
571 |
* @param txtseparator the txtseparator |
|
572 |
* @return true, if successful |
|
573 |
*/ |
|
574 |
@Deprecated |
|
575 |
public boolean toTxt(File outfile, String encoding, String colseparator, String txtseparator) { |
|
576 |
// NK: writer declared as class attribute to perform a clean if the operation is interrupted |
|
577 |
// OutputStreamWriter writer; |
|
578 |
try { |
|
579 |
this.writer = new OutputStreamWriter(new FileOutputStream(outfile), |
|
580 |
encoding); |
|
581 |
} catch (UnsupportedEncodingException e1) { |
|
582 |
org.txm.utils.logger.Log.printStackTrace(e1); |
|
583 |
return false; |
|
584 |
} catch (FileNotFoundException e1) { |
|
585 |
org.txm.utils.logger.Log.printStackTrace(e1); |
|
586 |
return false; |
|
587 |
} |
|
588 |
|
|
589 |
try { |
|
590 |
writer.write(this.dump(colseparator, txtseparator)); |
|
591 |
writer.close(); |
|
592 |
} catch (IOException e) { |
|
593 |
org.txm.utils.logger.Log.printStackTrace(e); |
|
594 |
return false; |
|
595 |
} |
|
596 |
|
|
597 |
return true; |
|
598 |
} |
|
599 |
|
|
600 |
public void setProperty(Property property) { |
|
601 |
this.pProperty = property; |
|
602 |
} |
|
603 |
|
|
604 |
|
|
605 |
} |
tmp/org.txm.lexicon.core/src/org/txm/lexicon/core/functions/Lexicon.java (revision 619) | ||
---|---|---|
1 |
// Copyright © 2010-2013 ENS de Lyon. |
|
2 |
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of |
|
3 |
// Lyon 2, University of Franche-Comté, University of Nice |
|
4 |
// Sophia Antipolis, University of Paris 3. |
|
5 |
// |
|
6 |
// The TXM platform is free software: you can redistribute it |
|
7 |
// and/or modify it under the terms of the GNU General Public |
|
8 |
// License as published by the Free Software Foundation, |
|
9 |
// either version 2 of the License, or (at your option) any |
|
10 |
// later version. |
|
11 |
// |
|
12 |
// The TXM platform is distributed in the hope that it will be |
|
13 |
// useful, but WITHOUT ANY WARRANTY; without even the implied |
|
14 |
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR |
|
15 |
// PURPOSE. See the GNU General Public License for more |
|
16 |
// details. |
|
17 |
// |
|
18 |
// You should have received a copy of the GNU General |
|
19 |
// Public License along with the TXM platform. If not, see |
|
20 |
// http://www.gnu.org/licenses. |
|
21 |
// |
|
22 |
// |
|
23 |
// |
|
24 |
// $LastChangedDate: 2016-09-19 10:31:00 +0200 (Mon, 19 Sep 2016) $ |
|
25 |
// $LastChangedRevision: 3298 $ |
|
26 |
// $LastChangedBy: mdecorde $ |
|
27 |
// |
|
28 |
package org.txm.lexicon.core.functions; |
|
29 |
|
|
30 |
import java.io.File; |
|
31 |
import java.io.FileNotFoundException; |
|
32 |
import java.io.FileOutputStream; |
|
33 |
import java.io.IOException; |
|
34 |
import java.io.OutputStreamWriter; |
|
35 |
import java.io.UnsupportedEncodingException; |
|
36 |
import java.util.Arrays; |
|
37 |
import java.util.Map; |
|
38 |
|
|
39 |
import org.eclipse.core.runtime.IProgressMonitor; |
|
40 |
import org.txm.core.messages.TXMCoreMessages; |
|
41 |
import org.txm.core.results.TXMParameters; |
|
42 |
import org.txm.core.results.TXMResult; |
|
43 |
import org.txm.lexicon.core.messages.LexiconCoreMessages; |
|
44 |
import org.txm.searchengine.cqp.ICqiClient; |
|
45 |
import org.txm.searchengine.cqp.clientExceptions.CqiClientException; |
|
46 |
import org.txm.searchengine.cqp.corpus.Corpus; |
|
47 |
import org.txm.searchengine.cqp.corpus.CorpusManager; |
|
48 |
import org.txm.searchengine.cqp.corpus.MainCorpus; |
|
49 |
import org.txm.searchengine.cqp.corpus.Property; |
|
50 |
import org.txm.searchengine.cqp.corpus.Subcorpus; |
|
51 |
import org.txm.searchengine.cqp.corpus.query.Query; |
|
52 |
import org.txm.statsengine.core.StatException; |
|
53 |
import org.txm.statsengine.core.data.Vector; |
|
54 |
import org.txm.statsengine.r.core.data.VectorImpl; |
|
55 |
import org.txm.utils.logger.Log; |
|
56 |
|
|
57 |
// TODO: Auto-generated Javadoc |
|
58 |
// TODO should be put into stat.data package ? |
|
59 |
/** |
|
60 |
* Represent a frequency list according to a {@link Corpus} (or a. |
|
61 |
* |
|
62 |
* {@link Subcorpus}) and a {@link Property}. |
|
63 |
* @author sloiseau |
|
64 |
*/ |
|
65 |
public class Lexicon extends TXMResult { |
|
66 |
|
|
67 |
/** The nolex. */ |
|
68 |
protected static int nolex = 1; |
|
69 |
|
|
70 |
/** The prefix r. */ |
|
71 |
protected static String prefixR = "Lexicon_"; //$NON-NLS-1$ |
|
72 |
|
|
73 |
/** The forms. */ |
|
74 |
private String[] forms; |
|
75 |
|
|
76 |
/** The freqs. */ |
|
77 |
private int[] freqs; |
|
78 |
|
|
79 |
/** The ids. */ |
|
80 |
private int[] ids; |
|
81 |
|
|
82 |
/** The number of tokens. */ |
|
83 |
int numberOfTokens = -1; |
|
84 |
|
|
85 |
/** The property. */ |
|
86 |
private Property pProperty; |
|
87 |
|
|
88 |
/** The symbol. */ |
|
89 |
private String symbol; |
|
90 |
|
|
91 |
/** The writer. */ |
|
92 |
private OutputStreamWriter writer; |
|
93 |
|
|
94 |
private Corpus corpus; |
|
95 |
|
|
96 |
// /** |
|
97 |
// * Find or build a lexicon given a Corpus (MainCorpus or SubCorpus). |
|
98 |
// * |
|
99 |
// * @param corpus |
|
100 |
// * @param property |
|
101 |
// * @return a Lexicon. May return null if the lexicon forms or freqs are null. |
|
102 |
// * @throws Exception |
|
103 |
// */ |
|
104 |
// public static Lexicon getLexicon(Corpus corpus, Property property) throws Exception { |
|
105 |
// HashSet<Object> results = corpus.getStoredData(Lexicon.class); |
|
106 |
// for (Object result : results) { |
|
107 |
// Lexicon lex = (Lexicon)result; |
|
108 |
// if (lex.getProperty().equals(property)) { |
|
109 |
// return lex; |
|
110 |
// } |
|
111 |
// } |
|
112 |
// |
|
113 |
// Lexicon lex = new Lexicon(corpus); |
|
114 |
// lex.setParameters(property); |
|
115 |
// if (lex.compute(null) && lex.getForms() != null && lex.getFreq() != null) { |
|
116 |
// corpus.storeData(lex); |
|
117 |
// return lex; |
|
118 |
// } else { |
|
119 |
// return null; |
|
120 |
// } |
|
121 |
// } |
|
122 |
|
|
123 |
public Lexicon(Corpus corpus) { |
|
124 |
super(corpus); |
|
125 |
this.corpus = corpus; |
|
126 |
} |
|
127 |
|
|
128 |
|
|
129 |
@Override |
|
130 |
public boolean saveParameters() throws Exception { |
|
131 |
// TODO Auto-generated method stub |
|
132 |
return true; |
|
133 |
} |
|
134 |
|
|
135 |
@Override |
|
136 |
public boolean loadParameters() throws Exception { |
|
137 |
// TODO Auto-generated method stub |
|
138 |
return true; |
|
139 |
} |
|
140 |
|
|
141 |
@Override |
|
142 |
public void clean() { |
|
143 |
// TODO Auto-generated method stub |
|
144 |
|
|
145 |
} |
|
146 |
|
|
147 |
@Override |
|
148 |
public boolean canCompute() throws Exception { |
|
149 |
return corpus != null && pProperty != null; |
|
150 |
} |
|
151 |
|
|
152 |
@Override |
|
153 |
protected boolean _compute() throws Exception { |
|
154 |
if (corpus instanceof MainCorpus) { |
|
155 |
return computeWithMainCorpus((MainCorpus)corpus, pProperty, monitor); |
|
156 |
} |
|
157 |
else if (corpus instanceof Subcorpus) { |
|
158 |
return computewithSubCorpus((Subcorpus)corpus, pProperty, monitor); |
|
159 |
} |
|
160 |
else { |
|
161 |
System.out.println("Error: Lexicon parent is neither a Maincorpus nor a Subcorpus."); |
|
162 |
return false; |
|
163 |
} |
|
164 |
} |
|
165 |
|
|
166 |
/** |
|
167 |
* Gets the lexicon relative to a given property. |
|
168 |
* |
|
169 |
* @param property |
|
170 |
* the property |
|
171 |
* |
|
172 |
* @return the lexicon |
|
173 |
* |
|
174 |
* @throws CqiClientException |
|
175 |
* the cqi client exception |
|
176 |
*/ |
|
177 |
protected boolean computeWithMainCorpus(MainCorpus corpus, Property property, IProgressMonitor monitor) throws CqiClientException { |
|
178 |
// System.out.println("in "+this.getCqpId()+" look for cached lexicon "+property); |
|
179 |
// System.out.println("not found"); |
|
180 |
this.subTask("Computing lexicon size..."); |
|
181 |
Log.finest(TXMCoreMessages.LEXICON + corpus.getName()); |
|
182 |
int lexiconSize; |
|
183 |
try { |
|
184 |
lexiconSize = CorpusManager.getCorpusManager().getCqiClient().lexiconSize(property.getQualifiedName()); |
|
185 |
} catch (Exception e) { |
|
186 |
throw new CqiClientException(e); |
|
187 |
} |
|
188 |
|
|
189 |
int[] ids = new int[lexiconSize]; |
|
190 |
for (int i = 0; i < ids.length; i++) { |
|
191 |
ids[i] = i; |
|
192 |
} |
|
193 |
|
|
194 |
int[] freqs; |
|
195 |
try { |
|
196 |
this.subTask("Computing lexicon frequencies..."); |
|
197 |
freqs = CorpusManager.getCorpusManager().getCqiClient().id2Freq(property.getQualifiedName(), ids); |
|
198 |
} catch (Exception e) { |
|
199 |
throw new CqiClientException(e); |
|
200 |
} |
|
201 |
|
|
202 |
init(corpus, property, freqs, ids); |
|
203 |
return true; |
|
204 |
} |
|
205 |
|
|
206 |
/** |
|
207 |
* |
|
208 |
* @param corpus |
|
209 |
* @param property |
|
210 |
* @param monitor |
|
211 |
* @return |
|
212 |
* @throws CqiClientException |
|
213 |
*/ |
|
214 |
protected boolean computewithSubCorpus(Subcorpus corpus, Property property, IProgressMonitor monitor) throws CqiClientException { |
|
215 |
|
|
216 |
//System.out.println("not found"); |
|
217 |
Log.finest(TXMCoreMessages.SUBCORPUS_LEXICON + corpus.getName()); |
|
218 |
long start = System.currentTimeMillis(); |
|
219 |
int[][] fdist = null; |
|
220 |
Subcorpus tmp = null; |
|
221 |
try { |
|
222 |
this.subTask("Computing lexicon frequencies..."); |
|
223 |
tmp = corpus.createSubcorpus(new Query("[]"), "S"+corpus.getNextSubcorpusCounter(), true); //$NON-NLS-1$ |
|
224 |
if (tmp != null) { |
|
225 |
fdist = CorpusManager.getCorpusManager().getCqiClient().fdist1( |
|
226 |
tmp.getQualifiedCqpId(), 0, |
|
227 |
ICqiClient.CQI_CONST_FIELD_MATCH, property.getName()); |
|
228 |
|
|
229 |
corpus.dropSubcorpus(tmp); // drop the subcorpus only if correctly created |
|
230 |
} |
|
231 |
//System.out.println("nb lines: "+fdist.length); |
|
232 |
} catch (Exception e) { |
|
233 |
throw new CqiClientException(e); |
|
234 |
} finally { |
|
235 |
if (tmp != null) { |
|
236 |
try {corpus.dropSubcorpus(tmp);} |
|
237 |
catch (Exception e2) {} |
|
238 |
} |
|
239 |
} |
|
240 |
int lexiconSize = fdist.length; |
|
241 |
|
|
242 |
int[] freqs = new int[lexiconSize]; |
|
243 |
int[] ids = new int[lexiconSize]; |
|
244 |
for (int i = 0; i < fdist.length; i++) { |
|
245 |
ids[i] = fdist[i][0]; |
|
246 |
freqs[i] = fdist[i][1]; |
|
247 |
} |
|
248 |
|
|
249 |
init(corpus, property, freqs, ids); |
|
250 |
return true; |
|
251 |
} |
|
252 |
|
|
253 |
|
|
254 |
/** |
|
255 |
* Convert the Lexicon into a Vector object. |
|
256 |
* |
|
257 |
* @return the vector |
|
258 |
* @throws StatException the stat exception |
|
259 |
*/ |
|
260 |
public Vector asVector() throws StatException { |
|
261 |
String symbol = prefixR + (nolex++); |
|
262 |
VectorImpl v = new VectorImpl(freqs, symbol); |
|
263 |
v.setRNames(getForms()); |
|
264 |
this.symbol = v.getSymbol(); |
|
265 |
return v; |
|
266 |
} |
|
267 |
|
|
268 |
|
|
269 |
/** |
|
270 |
* Compute number of tokens. / this.nbr |
|
271 |
*/ |
|
272 |
private void computeNumberOfTokens() { |
|
273 |
numberOfTokens = 0; |
|
274 |
for (int i = 0; i < freqs.length; i++) { |
|
275 |
numberOfTokens += freqs[i]; |
|
276 |
// System.out.println(numberOfTokens); |
|
277 |
// if (freqs[i] != 1) System.out.println(freqs[i]); |
|
278 |
} |
|
279 |
} |
|
280 |
|
|
281 |
|
|
282 |
|
|
283 |
@Override |
|
284 |
public boolean delete() { |
|
285 |
if (corpus != null) { |
|
286 |
corpus.removeData(this); |
|
287 |
} |
|
288 |
return true; |
|
289 |
} |
|
290 |
|
|
291 |
/** |
|
292 |
* Dump lexicon forms and frequencies in a String. |
|
293 |
* |
|
294 |
* @param col the col |
|
295 |
* @param txt the txt |
|
296 |
* @return the string |
|
297 |
*/ |
|
298 |
public String dump(String col, String txt) { |
|
299 |
StringBuffer buffer = new StringBuffer(); |
|
300 |
getForms(); |
|
301 |
for (int i = 0; i < forms.length; i++) { |
|
302 |
buffer.append(txt+ forms[i].replace(txt, txt+txt) + txt + col + freqs[i] + "\n"); //$NON-NLS-1$ |
|
303 |
} |
|
304 |
return buffer.toString(); |
|
305 |
} |
|
306 |
|
|
307 |
/* (non-Javadoc) |
|
308 |
* @see java.lang.Object#equals(java.lang.Object) |
|
309 |
*/ |
|
310 |
@Override |
|
311 |
public boolean equals(Object obj) { |
|
312 |
if (!(obj instanceof Lexicon)) { |
|
313 |
return false; |
|
314 |
} |
|
315 |
Lexicon other = (Lexicon) obj; |
|
316 |
|
|
317 |
if (other.nbrOfType() != this.nbrOfType()) { |
|
318 |
return false; |
|
319 |
} |
|
320 |
return (Arrays.equals(freqs, other.getFreq()) && Arrays.equals(getForms(), other.getForms())); |
|
321 |
} |
|
322 |
|
|
323 |
/** |
|
324 |
* The corpus or subcorpus this lexicon is build on. |
|
325 |
* |
|
326 |
* @return the corpus |
|
327 |
*/ |
|
328 |
public Corpus getCorpus() { |
|
329 |
return corpus; |
|
330 |
} |
|
331 |
|
|
332 |
public String getDetails() { |
|
333 |
return this.corpus.getName() + " " + this.pProperty.getName(); //$NON-NLS-1$ |
|
334 |
} |
|
335 |
|
|
336 |
//TODO: move this into a Lexicon chart renderer |
|
337 |
// /** |
|
338 |
// * Draw a pareto graphic with this frequency list and record it into the |
|
339 |
// * provided filename into svg format. |
|
340 |
// * |
|
341 |
// * @param file where to save the pareto graphic. |
|
342 |
// * @return the pareto graphic |
|
343 |
// * @throws StatException if anything goes wrong. |
|
344 |
// */ |
|
345 |
// public void getParetoGraphic(File file) throws StatException { |
|
346 |
// String rName = asVector().getSymbol(); |
|
347 |
// String expr = "pareto(" + rName + ")"; //$NON-NLS-1$ //$NON-NLS-2$ |
|
348 |
// try { |
|
349 |
// RWorkspace.getRWorkspaceInstance().plot(file, expr, RDevice.SVG); |
|
350 |
// } catch (Exception e) { |
|
351 |
// throw new StatException(e); |
|
352 |
// } |
|
353 |
// } |
|
354 |
|
|
355 |
/** |
|
356 |
* The dif ferent types in the lexicon, the type at the index <code>j</code> |
|
357 |
* of this array have the frequency at index <code>j</code> in the array |
|
358 |
* returned by {@link #getFreq()}. |
|
359 |
* |
|
360 |
* @return types as an array of <code>String</code> |
|
361 |
*/ |
|
362 |
public String[] getForms() { |
|
363 |
if (forms == null) { |
|
364 |
if(ids == null) { |
|
365 |
return new String[0]; |
|
366 |
} |
|
367 |
try { |
|
368 |
forms = CorpusManager.getCorpusManager().getCqiClient().id2Str(pProperty.getQualifiedName(), ids); |
|
369 |
} catch (Exception e) { |
|
370 |
// TODO Auto-generated catch block |
|
371 |
org.txm.utils.logger.Log.printStackTrace(e); |
|
372 |
} |
|
373 |
} |
|
374 |
return forms; |
|
375 |
} |
Formats disponibles : Unified diff