Révision 619

tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/TxmToHSQL.groovy (revision 619)
27 27
//
28 28
package org.txm.scripts;
29 29

  
30
import org.txm.lexicon.core.corpusengine.cqp.Lexicon
30
import org.txm.lexicon.core.functions.Lexicon;
31 31
import org.txm.searchengine.cqp.corpus.Corpus;
32 32
import org.txm.searchengine.cqp.corpus.Property;
33 33
import org.txm.searchengine.cqp.corpus.CorpusManager;
tmp/org.txm.index.core/src/org/txm/index/core/functions/Index.java (revision 619)
48 48
import org.txm.index.core.functions.LineComparator.SortMode;
49 49
import org.txm.index.core.messages.IndexCoreMessages;
50 50
import org.txm.index.core.preferences.IndexPreferences;
51
import org.txm.lexicon.core.corpusengine.cqp.Lexicon;
51
import org.txm.lexicon.core.functions.Lexicon;
52 52
import org.txm.searchengine.cqp.CQPEngine;
53 53
import org.txm.searchengine.cqp.ICqiClient;
54 54
import org.txm.searchengine.cqp.clientExceptions.CqiClientException;
tmp/org.txm.cooccurrence.core/src/org/txm/cooccurrence/core/preferences/CooccurrencePreferences.java (revision 619)
47 47
	// local result preferences
48 48
	public static final String QUERY = PREFERENCES_PREFIX + "query"; //$NON-NLS-1$
49 49

  
50
	public static final String COOCQUERYFILTER = PREFERENCES_PREFIX + "cooc_query_filter"; //$NON-NLS-1$
50
	public static final String COOC_QUERY_FILTER = PREFERENCES_PREFIX + "cooc_query_filter"; //$NON-NLS-1$
51 51

  
52 52
	
53 53
	/**
tmp/org.txm.cooccurrence.core/src/org/txm/cooccurrence/core/functions/Cooccurrence.java (revision 619)
161 161
	/** The P. */
162 162
	int P = -1;
163 163

  
164
	@Parameter(key=CooccurrencePreferences.COOCQUERYFILTER)
164
	/** The reference corpus to use = the R symbol that point to a matrix WordxFreqs. */
165
	String referenceCorpus;
166

  
167
	/** The scores. */
168
	HashMap<String, Double> scores;
169

  
170
	/** The symbol. */
171
	private String symbol;
172

  
173
	/** The writer. */
174
	private BufferedWriter writer;
175

  
176
	
177
	
178
	
179
	@Parameter(key=CooccurrencePreferences.COOC_QUERY_FILTER)
165 180
	protected String pCooccurentQueryFilter = "[]"; //$NON-NLS-1$
181
	
166 182
	/** The mincof. */
167 183
	@Parameter(key=CooccurrencePreferences.MIN_COUNT)
168 184
	protected Integer pFCoocFilter;
185
	
169 186
	/** The minf. */
170 187
	@Parameter(key=CooccurrencePreferences.MIN_FREQ)
171 188
	protected Integer pFminFilter;
189
	
172 190
	/** The include xpivot. */
173 191
	@Parameter(key=CooccurrencePreferences.INCLUDE_X_PIVOT)
174 192
	protected Boolean pIncludeXpivot;
193
	
175 194
	/** The maxleft. */
176 195
	@Parameter(key=CooccurrencePreferences.MAX_LEFT)
177 196
	protected Integer pMaxLeftContextSize;
197
	
178 198
	/** The maxright. */
179 199
	@Parameter(key=CooccurrencePreferences.MAX_RIGHT)
180 200
	protected Integer pMaxRightContextSize;
201
	
181 202
	/** The minleft. */
182 203
	@Parameter(key=CooccurrencePreferences.MIN_LEFT)
183 204
	protected Integer pMinLeftContextSize;
205
	
184 206
	/** The minright. */
185 207
	@Parameter(key=CooccurrencePreferences.MIN_RIGHT)
186 208
	protected Integer pMinRightContextSize;
209
	
187 210
	/** The cooccurents properties to show. */
188 211
	@Parameter
189 212
	protected List<Property> pProperties;
213
	
190 214
	/** The keyword query. */
191 215
	@Parameter
192 216
	protected Query pQuery;
217
	
193 218
	/** The minscore. */
194 219
	@Parameter(key=CooccurrencePreferences.MIN_SCORE)
195 220
	protected Double pScoreMinFilter;
221
	
196 222
	/** The structural unit context limit. */
197 223
	@Parameter
198 224
	protected StructuralUnit pStructuralUnitLimit;
199 225

  
200
	/** The reference corpus to use = the R symbol that point to a matrix WordxFreqs. */
201
	String referenceCorpus;
202 226

  
203
	/** The scores. */
204
	HashMap<String, Double> scores;
205

  
206
	/** The symbol. */
207
	private String symbol;
208

  
209
	/** The writer. */
210
	private BufferedWriter writer;
211

  
227
	
228
	
229
	
230
	
212 231
	/**
213 232
	 * Creates an empty <link>Cooccurrence</link> object, child of the specified <link>Corpus</link>.
214 233
	 * @param corpus
......
546 565

  
547 566
	@Override
548 567
	public String getName() {
549
		if (this.getParent() != null)
568
		if (this.getParent() != null) {
550 569
			return this.getParent().getSimpleName() + ": " + this.getSimpleName(); //$NON-NLS-1$
551
		else return this.getSimpleName();
570
		}
571
		else {
572
			return this.getSimpleName();
573
		}
552 574
	}
553 575

  
554 576
	/**
......
1511 1533
	
1512 1534
	@Override
1513 1535
	public boolean loadParameters() throws CqiClientException {
1514
		
1515
//		pCooccurentQueryFilter = this.getStringParameterValue(CooccurrencePreferences.COOCQUERYFILTER);
1516
//		pFCoocFilter = this.getIntParameterValue(CooccurrencePreferences.MIN_COUNT);
1517
//		pFminFilter = this.getIntParameterValue(CooccurrencePreferences.MIN_FREQ);
1518
//		pIncludeXpivot  = this.getBooleanParameterValue(CooccurrencePreferences.INCLUDE_X_PIVOT);
1519
//		pMaxLeftContextSize = this.getIntParameterValue(CooccurrencePreferences.MAX_LEFT);
1520
//		pMaxRightContextSize = this.getIntParameterValue(CooccurrencePreferences.MAX_RIGHT);
1521
//		pMinLeftContextSize = this.getIntParameterValue(CooccurrencePreferences.MIN_LEFT);
1522
//		pMinRightContextSize = this.getIntParameterValue(CooccurrencePreferences.MIN_RIGHT);
1523
		
1524 1536
		String s = this.getStringParameterValue(CooccurrencePreferences.PROPERTIES);
1525 1537
		pProperties = WordProperty.fromStringToList(getCorpus(), s);
1526 1538
		
1527 1539
		pQuery = new Query(this.getStringParameterValue(CooccurrencePreferences.QUERY));
1528
//		pScoreMinFilter = this.getIntParameterValue(CooccurrencePreferences.MIN_SCORE);
1529 1540
		
1530 1541
		s = this.getStringParameterValue(CooccurrencePreferences.STRUCTURE_LIMIT);
1531 1542
		pStructuralUnitLimit = getCorpus().getStructuralUnit(s);
tmp/org.txm.textsbalance.core/src/org/txm/textsbalance/core/functions/TextsBalance.java (revision 619)
61 61
	
62 62
	/**
63 63
	 * 
64
	 * @param corpus
64 65
	 */
65 66
	public TextsBalance(Corpus corpus) {
66 67
		super(corpus);
tmp/org.txm.index.rcp/plugin.xml (revision 619)
123 123
               type="org.eclipse.ui.model.IWorkbenchAdapter">
124 124
         </adapter>
125 125
      </factory>
126
      <factory
127
            adaptableType="org.txm.lexicon.core.functions.Lexicon"
128
            class="org.txm.index.rcp.adapters.LexiconAdapterFactory">
129
         <adapter
130
               type="org.eclipse.ui.model.IWorkbenchAdapter">
131
         </adapter>
132
      </factory>
126 133
   </extension>
127 134
   <extension
128 135
         point="org.eclipse.ui.commands">
tmp/org.txm.index.rcp/src/org/txm/index/rcp/handlers/ComputeLexicon.java (revision 619)
8 8
import org.eclipse.ui.IWorkbenchPage;
9 9
import org.txm.index.core.functions.Index;
10 10
import org.txm.index.rcp.editors.DictionnaryEditor;
11
import org.txm.lexicon.core.corpusengine.cqp.Lexicon;
11
import org.txm.lexicon.core.functions.Lexicon;
12 12
import org.txm.rcp.TXMWindows;
13 13
import org.txm.rcp.editors.TXMEditorPart;
14 14
import org.txm.rcp.editors.TXMResultEditorInput;
tmp/org.txm.index.rcp/src/org/txm/index/rcp/adapters/LexiconAdapterFactory.java (revision 619)
5 5
import org.eclipse.ui.model.IWorkbenchAdapter;
6 6
import org.eclipse.ui.plugin.AbstractUIPlugin;
7 7
import org.osgi.framework.FrameworkUtil;
8
import org.txm.lexicon.core.corpusengine.cqp.Lexicon;
8
import org.txm.lexicon.core.functions.Lexicon;
9 9
import org.txm.rcp.adapters.TXMResultAdapter;
10 10
import org.txm.rcp.adapters.TXMResultAdapterFactory;
11 11

  
tmp/org.txm.searchengine.cqp.core/src/org/txm/functions/summary/Summary.java (revision 619)
1 1
package org.txm.functions.summary;
2 2

  
3 3
import java.io.File;
4
import java.io.IOException;
5 4
import java.util.ArrayList;
6 5
import java.util.Arrays;
7 6

  
......
12 11
import org.txm.objects.Page;
13 12
import org.txm.objects.Text;
14 13
import org.txm.searchengine.cqp.CQPEngine;
15
import org.txm.searchengine.cqp.clientExceptions.CqiClientException;
16 14
import org.txm.searchengine.cqp.corpus.MainCorpus;
17 15
import org.txm.searchengine.cqp.corpus.Property;
18 16
import org.txm.searchengine.cqp.corpus.QueryResult;
19 17
import org.txm.searchengine.cqp.corpus.StructuralUnitProperty;
20 18
import org.txm.searchengine.cqp.corpus.query.Query;
21
import org.txm.searchengine.cqp.serverException.CqiServerError;
22 19

  
23 20
public class Summary extends TXMResult {
24 21
	
......
31 28
		this.corpus = corpus;
32 29
	}
33 30

  
34
	public TreeNode getRoot()
35
	{
36
		return treenodes;
31
	
32
	@Override
33
	public boolean canCompute() {
34
		return corpus != null && properties != null && properties.size() > 0;
37 35
	}
38 36

  
39
	public boolean compute() throws CqiClientException, IOException, CqiServerError
40
	{
37
	@Override
38
	public boolean setParameters(TXMParameters parameters) {
39
		// TODO Auto-generated method stub
40
		System.err.println("Summary.setParameters(): not yet implemented.");
41
		return true;
42
	}
43

  
44
	@Override
45
	public boolean loadParameters() {
46
		// TODO Auto-generated method stub
47
		System.err.println("Summary.loadParameters(): not yet implemented.");
48
		return true;
49
	}
50

  
51

  
52
	@Override
53
	public boolean saveParameters() {
54
		// TODO Auto-generated method stub
55
		System.err.println("Summary.saveParameters(): not yet implemented.");
56
		return true;
57
	}
58

  
59
	
60
	@Override
61
	protected boolean _compute() throws Exception {
41 62
		treenodes.end = corpus.getSize();
42 63
		treenodes.id = corpus.getName();
43 64
		treenodes.start = 0;
......
78 99
		processNodeList(treenodes);
79 100

  
80 101
		//System.out.println("checking result..."+treenodes.size()+" nodes");
81
		if (treenodes.check())
102
		if (treenodes.check()) {
82 103
			return true;
83
		else
104
		}
105
		else {
84 106
			return false;
107
		}
85 108
	}
86 109

  
110
	public TreeNode getRoot() {
111
		return treenodes;
112
	}
113

  
114

  
115

  
87 116
	private void processNodeList(TreeNode nodes)
88 117
	{
89 118
		//System.out.println("+processing node list: "+nodes.size());
......
258 287
		
259 288
	}
260 289

  
261
	@Override
262
	public boolean canCompute() {
263
		return corpus != null && properties != null && properties.size() > 0;
264
	}
265 290

  
266 291
	@Override
267
	public boolean setParameters(TXMParameters parameters) {
268
		// TODO Auto-generated method stub
269
		System.err.println("Summary.setParameters(): not yet implemented.");
270
		return true;
271
	}
272

  
273
	@Override
274
	public boolean loadParameters() {
275
		// TODO Auto-generated method stub
276
		System.err.println("Summary.loadParameters(): not yet implemented.");
277
		return true;
278
	}
279

  
280

  
281
	@Override
282
	public boolean saveParameters() {
283
		// TODO Auto-generated method stub
284
		System.err.println("Summary.saveParameters(): not yet implemented.");
285
		return true;
286
	}
287

  
288
	
289
	@Override
290
	protected boolean _compute() throws Exception {
291
		System.err.println("Summary.compute() not implemented");
292
		return false;
293
	}
294

  
295
	@Override
296 292
	public boolean toTxt(File outfile, String encoding, String colseparator, String txtseparator) throws Exception {
297 293
		
298 294
		return false;
299 295
	}
300 296

  
301
	public void setProperties(ArrayList<StructuralUnitProperty> properties2) {
297
	public void setProperties(ArrayList<StructuralUnitProperty> properties) {
302 298
		this.properties = properties;
303 299
	}
304 300
}
tmp/org.txm.statsengine.r.rcp/src/org/txm/statsengine/r/rcp/views/RVariablesView.java (revision 619)
67 67
import org.txm.functions.referencer.Referencer;
68 68
import org.txm.index.core.functions.Index;
69 69
import org.txm.lexicaltable.core.functions.LexicalTable;
70
import org.txm.lexicon.core.corpusengine.cqp.Lexicon;
70
import org.txm.lexicon.core.functions.Lexicon;
71 71
import org.txm.objects.Base;
72 72
import org.txm.progression.core.functions.Progression;
73 73
import org.txm.rcp.IImageKeys;
tmp/org.txm.lexicaltable.core/src/org/txm/lexicaltable/core/statsengine/r/data/LexicalTableImpl.java (revision 619)
39 39
import org.rosuda.REngine.REXPMismatchException;
40 40
import org.txm.lexicaltable.core.messages.LexicalTableCoreMessages;
41 41
import org.txm.lexicaltable.core.statsengine.data.ILexicalTable;
42
import org.txm.lexicon.core.corpusengine.cqp.Lexicon;
42
import org.txm.lexicon.core.functions.Lexicon;
43 43
import org.txm.searchengine.cqp.corpus.Property;
44 44
import org.txm.statsengine.core.StatException;
45 45
import org.txm.statsengine.core.data.Vector;
tmp/org.txm.lexicaltable.core/src/org/txm/lexicaltable/core/functions/LexicalTable.java (revision 619)
17 17
import org.txm.lexicaltable.core.preferences.LexicalTablePreferences;
18 18
import org.txm.lexicaltable.core.statsengine.data.ILexicalTable;
19 19
import org.txm.lexicaltable.core.statsengine.r.data.LexicalTableImpl;
20
import org.txm.lexicon.core.corpusengine.cqp.Lexicon;
20
import org.txm.lexicon.core.functions.Lexicon;
21 21
import org.txm.searchengine.cqp.clientExceptions.CqiClientException;
22 22
import org.txm.searchengine.cqp.corpus.Corpus;
23 23
import org.txm.searchengine.cqp.corpus.Partition;
tmp/org.txm.lexicaltable.core/src/org/txm/lexicaltable/core/functions/___LexicalTableFactory.java (revision 619)
6 6
import java.util.Map;
7 7

  
8 8
import org.txm.lexicaltable.core.statsengine.r.data.LexicalTableImpl;
9
import org.txm.lexicon.core.corpusengine.cqp.Lexicon;
9
import org.txm.lexicon.core.functions.Lexicon;
10 10
import org.txm.searchengine.cqp.clientExceptions.CqiClientException;
11 11
import org.txm.searchengine.cqp.corpus.Corpus;
12 12
import org.txm.searchengine.cqp.corpus.Partition;
tmp/org.txm.rcp/src/main/java/org/txm/rcp/views/cmdparameters/TXMResultDebugView.java (revision 619)
47 47
 */
48 48
public class TXMResultDebugView extends ViewPart implements ISelectionChangedListener {
49 49

  
50
	
50
	/**
51
	 * Display area.
52
	 */
51 53
	protected StyledText displayArea;
52 54

  
53 55
	/**
......
82 84
			TXMResult result = (TXMResult) selection;
83 85
			
84 86
			StringBuffer buffer = new StringBuffer();
85
			buffer.append("Result: " + result.toString() + "\n"); //$NON-NLS-1$ //$NON-NLS-2$
87
			buffer.append("Result: " + result.getClass() + "\n"); //$NON-NLS-1$ //$NON-NLS-2$
86 88
			buffer.append("UUID: " + result.getUUID() + "\n"); //$NON-NLS-1$ //$NON-NLS-2$
87 89
			buffer.append("Simple name: " + result.getSimpleName() + "\n"); //$NON-NLS-1$ //$NON-NLS-2$
88 90
			buffer.append("Name: " + result.getName() + "\n"); //$NON-NLS-1$ //$NON-NLS-2$
89 91
			buffer.append("Valid filename: " + result.getValidFileName() + "\n"); //$NON-NLS-1$ //$NON-NLS-2$
90 92
			buffer.append("Empty name: " + result.getEmptyName() + "\n"); //$NON-NLS-1$ //$NON-NLS-2$
93
			buffer.append("toString(): " + result.toString() + "\n"); //$NON-NLS-1$ //$NON-NLS-2$
91 94
			buffer.append("Details: " + result.getDetails() + "\n\n"); //$NON-NLS-1$ //$NON-NLS-2$
92 95

  
93 96
			// Command preferences
......
106 109
				buffer.append("Chart object = " + ((ChartResult)result).getChart() + ", chart type = " + ((ChartResult)result).getChartType() + "\n");
107 110
			}
108 111
			buffer.append("Selection full path name = " + result.getFullPathSimpleName() + " - " + result.getName() + "\n");
109
			buffer.append("Direct children count = " + result.getResults().size() + ", direct visible children count = " + result.getChildren(true).size() + ", children count = " + result.getDeepChildren().size());
112
			buffer.append("Direct children count = " + result.getResults().size() + ", direct visible children count = " + result.getChildren(true).size() + ", children count = " + result.getDeepChildren().size() + "\n");
110 113
			buffer.append("Root parent = " + result.getRootParent() + ", main corpus parent = " + Corpus.getParentMainCorpus(result) + ", first parent corpus = " + Corpus.getFirstParentCorpus(result));
111 114

  
112 115
			
tmp/org.txm.rcp/src/main/java/org/txm/rcp/views/SummaryView.java (revision 619)
140 140
			}
141 141
			summary = new Summary(selectedCorpus);
142 142
			ArrayList<StructuralUnitProperty> properties = new ArrayList<StructuralUnitProperty>();
143
			for (Property p : selectedProps)
144
				if (p instanceof StructuralUnitProperty)
143
			for (Property p : selectedProps) {
144
				if (p instanceof StructuralUnitProperty) {
145 145
					properties.add((StructuralUnitProperty) p);
146
				}
147
			}
146 148

  
147 149
			summary.setProperties(properties);
148 150
			if (summary.compute()) {
tmp/org.txm.specificities.core/src/org/txm/specificities/core/functions/Specificities.java (revision 619)
41 41
import org.txm.core.results.TXMParameters;
42 42
import org.txm.core.results.TXMResult;
43 43
import org.txm.lexicaltable.core.functions.LexicalTable;
44
import org.txm.lexicon.core.corpusengine.cqp.Lexicon;
44
import org.txm.lexicon.core.functions.Lexicon;
45 45
import org.txm.searchengine.cqp.clientExceptions.CqiClientException;
46 46
import org.txm.searchengine.cqp.corpus.Corpus;
47 47
import org.txm.specificities.core.messages.SpecificitiesCoreMessages;
tmp/org.txm.core/src/java/org/txm/core/preferences/TXMPreferences.java (revision 619)
919 919
		
920 920
		IEclipsePreferences preferences = scope.getNode(nodeQualifier);
921 921
		
922
		str.append("Path = " +  preferences .absolutePath() + ")\n");
922
		str.append("Path = " +  preferences .absolutePath() + "\n");
923 923
		
924 924
		try {
925 925
			String[] keys = preferences.keys();
tmp/org.txm.core/src/java/org/txm/core/results/TXMResult.java (revision 619)
42 42
	 */
43 43
	protected String uniqueID;
44 44
	//protected String path;
45
	public static final DateFormat ID_TIME_FORMAT = new SimpleDateFormat("YYMMDD");
45
	public static final DateFormat ID_TIME_FORMAT = new SimpleDateFormat("YYYYMMDD");
46 46
	/** Editor can use this to test if the result need to be saved */
47 47
	protected boolean hasBeenComputedOnce = false;
48 48

  
tmp/org.txm.lexicon.core/src/org/txm/lexicon/core/corpusengine/cqp/Lexicon.java (revision 619)
1
// Copyright © 2010-2013 ENS de Lyon.
2
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
3
// Lyon 2, University of Franche-Comté, University of Nice
4
// Sophia Antipolis, University of Paris 3.
5
// 
6
// The TXM platform is free software: you can redistribute it
7
// and/or modify it under the terms of the GNU General Public
8
// License as published by the Free Software Foundation,
9
// either version 2 of the License, or (at your option) any
10
// later version.
11
// 
12
// The TXM platform is distributed in the hope that it will be
13
// useful, but WITHOUT ANY WARRANTY; without even the implied
14
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15
// PURPOSE. See the GNU General Public License for more
16
// details.
17
// 
18
// You should have received a copy of the GNU General
19
// Public License along with the TXM platform. If not, see
20
// http://www.gnu.org/licenses.
21
// 
22
// 
23
// 
24
// $LastChangedDate: 2016-09-19 10:31:00 +0200 (Mon, 19 Sep 2016) $
25
// $LastChangedRevision: 3298 $
26
// $LastChangedBy: mdecorde $ 
27
//
28
package org.txm.lexicon.core.corpusengine.cqp;
29

  
30
import java.io.File;
31
import java.io.FileNotFoundException;
32
import java.io.FileOutputStream;
33
import java.io.IOException;
34
import java.io.OutputStreamWriter;
35
import java.io.UnsupportedEncodingException;
36
import java.util.Arrays;
37
import java.util.Map;
38

  
39
import org.eclipse.core.runtime.IProgressMonitor;
40
import org.txm.core.messages.TXMCoreMessages;
41
import org.txm.core.results.TXMParameters;
42
import org.txm.core.results.TXMResult;
43
import org.txm.lexicon.core.messages.LexiconCoreMessages;
44
import org.txm.searchengine.cqp.ICqiClient;
45
import org.txm.searchengine.cqp.clientExceptions.CqiClientException;
46
import org.txm.searchengine.cqp.corpus.Corpus;
47
import org.txm.searchengine.cqp.corpus.CorpusManager;
48
import org.txm.searchengine.cqp.corpus.MainCorpus;
49
import org.txm.searchengine.cqp.corpus.Property;
50
import org.txm.searchengine.cqp.corpus.Subcorpus;
51
import org.txm.searchengine.cqp.corpus.query.Query;
52
import org.txm.statsengine.core.StatException;
53
import org.txm.statsengine.core.data.Vector;
54
import org.txm.statsengine.r.core.data.VectorImpl;
55
import org.txm.utils.logger.Log;
56

  
57
// TODO: Auto-generated Javadoc
58
//  TODO should be put into stat.data package ?
59
/**
60
 * Represent a frequency list according to a {@link Corpus} (or a.
61
 *
62
 * {@link Subcorpus}) and a {@link Property}.
63
 * @author sloiseau
64
 */
65
public class Lexicon extends TXMResult {
66

  
67
	/** The nolex. */
68
	protected static int nolex = 1;
69

  
70
	/** The prefix r. */
71
	protected static String prefixR = "Lexicon_"; //$NON-NLS-1$
72

  
73
	/** The forms. */
74
	private String[] forms;
75

  
76
	/** The freqs. */
77
	private int[] freqs;
78

  
79
	/** The ids. */
80
	private int[] ids;
81

  
82
	/** The number of tokens. */
83
	int numberOfTokens = -1;
84

  
85
	/** The property. */
86
	private Property pProperty;
87

  
88
	/** The symbol. */
89
	private String symbol;
90

  
91
	/** The writer. */
92
	private OutputStreamWriter writer;
93

  
94
	private Corpus corpus;
95

  
96
//	/**
97
//	 * Find or build a lexicon given a Corpus (MainCorpus or SubCorpus).
98
//	 * 
99
//	 * @param corpus
100
//	 * @param property
101
//	 * @return a Lexicon. May return null if the lexicon forms or freqs are null.
102
//	 * @throws Exception 
103
//	 */
104
//	public static Lexicon getLexicon(Corpus corpus, Property property) throws Exception {
105
//		HashSet<Object> results = corpus.getStoredData(Lexicon.class);
106
//		for (Object result : results) {
107
//			Lexicon lex = (Lexicon)result;
108
//			if (lex.getProperty().equals(property)) {
109
//				return lex;
110
//			}
111
//		}
112
//
113
//		Lexicon lex = new Lexicon(corpus);
114
//		lex.setParameters(property);
115
//		if (lex.compute(null) && lex.getForms() != null && lex.getFreq() != null) {
116
//			corpus.storeData(lex);
117
//			return lex;
118
//		} else {
119
//			return null;
120
//		}
121
//	}
122

  
123
	public Lexicon(Corpus corpus) {
124
		super(corpus);
125
		this.corpus = corpus;
126
	}
127

  
128
	
129
	@Override
130
	public boolean saveParameters() throws Exception {
131
		// TODO Auto-generated method stub
132
		return true;
133
	}
134

  
135
	@Override
136
	public boolean loadParameters() throws Exception {
137
		// TODO Auto-generated method stub
138
		return true;
139
	}
140

  
141
	@Override
142
	public void clean() {
143
		// TODO Auto-generated method stub
144
		
145
	}
146

  
147
	@Override
148
	public boolean canCompute() throws Exception {
149
		return corpus != null && pProperty != null;
150
	}
151

  
152
	@Override
153
	protected boolean _compute() throws Exception {
154
		if (corpus instanceof MainCorpus) {
155
			return computeWithMainCorpus((MainCorpus)corpus, pProperty, monitor);
156
		}
157
		else if (corpus instanceof Subcorpus) {
158
			return computewithSubCorpus((Subcorpus)corpus, pProperty, monitor);
159
		}
160
		else {
161
			System.out.println("Error: Lexicon parent is neither a Maincorpus nor a Subcorpus.");
162
			return false;
163
		}
164
	}
165
	
166
	/**
167
	 * Gets the lexicon relative to a given property.
168
	 * 
169
	 * @param property
170
	 *            the property
171
	 * 
172
	 * @return the lexicon
173
	 * 
174
	 * @throws CqiClientException
175
	 *             the cqi client exception
176
	 */
177
	protected boolean computeWithMainCorpus(MainCorpus corpus, Property property, IProgressMonitor monitor) throws CqiClientException {
178
		// System.out.println("in "+this.getCqpId()+" look for cached lexicon "+property);
179
		// System.out.println("not found");
180
		this.subTask("Computing lexicon size...");
181
		Log.finest(TXMCoreMessages.LEXICON + corpus.getName());
182
		int lexiconSize;
183
		try {
184
			lexiconSize = CorpusManager.getCorpusManager().getCqiClient().lexiconSize(property.getQualifiedName());
185
		} catch (Exception e) {
186
			throw new CqiClientException(e);
187
		}
188

  
189
		int[] ids = new int[lexiconSize];
190
		for (int i = 0; i < ids.length; i++) {
191
			ids[i] = i;
192
		}
193

  
194
		int[] freqs;
195
		try {
196
			this.subTask("Computing lexicon frequencies...");
197
			freqs = CorpusManager.getCorpusManager().getCqiClient().id2Freq(property.getQualifiedName(), ids);
198
		} catch (Exception e) {
199
			throw new CqiClientException(e);
200
		}
201

  
202
		init(corpus, property, freqs, ids);
203
		return true;
204
	}
205

  
206
	/**
207
	 * 
208
	 * @param corpus
209
	 * @param property
210
	 * @param monitor
211
	 * @return
212
	 * @throws CqiClientException
213
	 */
214
	protected boolean computewithSubCorpus(Subcorpus corpus, Property property, IProgressMonitor monitor) throws CqiClientException {
215

  
216
		//System.out.println("not found");
217
		Log.finest(TXMCoreMessages.SUBCORPUS_LEXICON + corpus.getName());
218
		long start = System.currentTimeMillis();
219
		int[][] fdist = null;
220
		Subcorpus tmp = null;
221
		try {
222
			this.subTask("Computing lexicon frequencies...");
223
			tmp = corpus.createSubcorpus(new Query("[]"), "S"+corpus.getNextSubcorpusCounter(), true); //$NON-NLS-1$
224
			if (tmp != null) {
225
				fdist = CorpusManager.getCorpusManager().getCqiClient().fdist1(
226
						tmp.getQualifiedCqpId(), 0,
227
						ICqiClient.CQI_CONST_FIELD_MATCH, property.getName());
228

  
229
				corpus.dropSubcorpus(tmp); // drop the subcorpus only if correctly created
230
			}
231
			//System.out.println("nb lines: "+fdist.length);
232
		} catch (Exception e) {
233
			throw new CqiClientException(e);
234
		} finally {
235
			if (tmp != null) {
236
				try {corpus.dropSubcorpus(tmp);}
237
				catch (Exception e2) {}
238
			}
239
		}
240
		int lexiconSize = fdist.length;
241

  
242
		int[] freqs = new int[lexiconSize];
243
		int[] ids = new int[lexiconSize];
244
		for (int i = 0; i < fdist.length; i++) {
245
			ids[i] = fdist[i][0];
246
			freqs[i] = fdist[i][1];
247
		}
248

  
249
		init(corpus, property, freqs, ids);
250
		return true;
251
	}
252
	
253
	
254
	/**
255
	 * Convert the Lexicon into a Vector object.
256
	 *
257
	 * @return the vector
258
	 * @throws StatException the stat exception
259
	 */
260
	public Vector asVector() throws StatException {
261
		String symbol = prefixR + (nolex++);
262
		VectorImpl v = new VectorImpl(freqs, symbol);
263
		v.setRNames(getForms());
264
		this.symbol = v.getSymbol();
265
		return v;
266
	}
267

  
268

  
269
	/**
270
	 * Compute number of tokens. / this.nbr
271
	 */
272
	private void computeNumberOfTokens() {
273
		numberOfTokens = 0;
274
		for (int i = 0; i < freqs.length; i++) {
275
			numberOfTokens += freqs[i];
276
			// System.out.println(numberOfTokens);
277
			// if (freqs[i] != 1) System.out.println(freqs[i]);
278
		}
279
	}
280

  
281

  
282

  
283
	@Override
284
	public boolean delete() {
285
		if (corpus != null) {
286
			corpus.removeData(this);
287
		}
288
		return true;
289
	}
290

  
291
	/**
292
	 * Dump lexicon forms and frequencies in a String.
293
	 *
294
	 * @param col the col
295
	 * @param txt the txt
296
	 * @return the string
297
	 */
298
	public String dump(String col, String txt) {
299
		StringBuffer buffer = new StringBuffer();
300
		getForms();
301
		for (int i = 0; i < forms.length; i++) {
302
			buffer.append(txt+ forms[i].replace(txt, txt+txt) + txt + col + freqs[i] + "\n"); //$NON-NLS-1$ 
303
		}
304
		return buffer.toString();
305
	}
306

  
307
	/* (non-Javadoc)
308
	 * @see java.lang.Object#equals(java.lang.Object)
309
	 */
310
	@Override
311
	public boolean equals(Object obj) {
312
		if (!(obj instanceof Lexicon)) {
313
			return false;
314
		}
315
		Lexicon other = (Lexicon) obj;
316

  
317
		if (other.nbrOfType() != this.nbrOfType()) {
318
			return false;
319
		}
320
		return (Arrays.equals(freqs, other.getFreq()) && Arrays.equals(getForms(), other.getForms()));
321
	}
322

  
323
	/**
324
	 * The corpus or subcorpus this lexicon is build on.
325
	 * 
326
	 * @return the corpus
327
	 */
328
	public Corpus getCorpus() {
329
		return corpus;
330
	}
331

  
332
	public String getDetails() {
333
		return  this.corpus.getName() + " " + this.pProperty.getName(); //$NON-NLS-1$
334
	}
335

  
336
	//TODO: move this into a Lexicon chart renderer
337
	//	/**
338
	//	 * Draw a pareto graphic with this frequency list and record it into the
339
	//	 * provided filename into svg format.
340
	//	 *
341
	//	 * @param file where to save the pareto graphic.
342
	//	 * @return the pareto graphic
343
	//	 * @throws StatException if anything goes wrong.
344
	//	 */
345
	//	public void getParetoGraphic(File file) throws StatException {
346
	//		String rName = asVector().getSymbol();
347
	//		String expr = "pareto(" + rName + ")"; //$NON-NLS-1$ //$NON-NLS-2$
348
	//		try {
349
	//			RWorkspace.getRWorkspaceInstance().plot(file, expr, RDevice.SVG);
350
	//		} catch (Exception e) {
351
	//			throw new StatException(e);
352
	//		}
353
	//	}
354

  
355
	/**
356
	 * The dif ferent types in the lexicon, the type at the index <code>j</code>
357
	 * of this array have the frequency at index <code>j</code> in the array
358
	 * returned by {@link #getFreq()}.
359
	 * 
360
	 * @return types as an array of <code>String</code>
361
	 */
362
	public String[] getForms() {
363
		if (forms == null) {
364
			if(ids == null) {
365
				return new String[0];
366
			}
367
			try {
368
				forms = CorpusManager.getCorpusManager().getCqiClient().id2Str(pProperty.getQualifiedName(), ids);
369
			} catch (Exception e) {
370
				// TODO Auto-generated catch block
371
				org.txm.utils.logger.Log.printStackTrace(e);
372
			}
373
		}
374
		return forms;
375
	}
376

  
377
	/**
378
	 * The dif ferent types in the lexicon, the type at the index <code>j</code>
379
	 * of this array have the frequency at index <code>j</code> in the array
380
	 * returned by {@link #getFreq()}.
381
	 *
382
	 * @param number the number
383
	 * @return types as an array of <code>String</code>
384
	 */
385
	public String[] getForms(int number) {
386
		//System.out.println("Lexicon("+this.property+" get forms. number="+number+", ids len="+ids.length);
387
		if (forms == null) {
388
			try {
389
				number = Math.min(number, ids.length);
390
				if (number <= 0) {
391
					return new String[0];
392
				}
393
				int[] subpositions = new int[number];
394
				System.arraycopy(ids, 0, subpositions, 0, number);
395
				return CorpusManager.getCorpusManager().getCqiClient().id2Str(pProperty.getQualifiedName(), subpositions);
396
			} catch (Exception e) {
397
				// TODO Auto-generated catch block
398
				org.txm.utils.logger.Log.printStackTrace(e);
399
				return null;
400
			}
401
		} else {
402
			number = Math.min(number, ids.length);
403
			if (number <= 0) {
404
				return new String[0];
405
			}
406
			String[] subforms = new String[number];
407
			System.arraycopy(ids, 0, subforms, 0, number);
408
			return subforms;
409
		}
410
	}
411

  
412
	/**
413
	 * The dif ferent frequencies in the lexicon. See {@link #getForms()}.
414
	 * 
415
	 * @return frequencies as an array of <code>int</code>
416
	 */
417
	public int[] getFreq() {
418
		return freqs;
419
	}
420

  
421
	/**
422
	 * return the ids of the entries.
423
	 *
424
	 * @return types as an array of <code>String</code>
425
	 */
426
	public int[] getIds() {
427
		return ids;
428
	}
429

  
430
	public String getName() {
431
		try {
432
			return LexiconCoreMessages.RESULT_TYPE + ": " + this.corpus.getSimpleName() + ": " + this.getSimpleName();	
433
		}
434
		catch(Exception e) {
435
		}
436
		return ""; //$NON-NLS-1$
437
	}
438

  
439
	/**
440
	 * The property this lexicon is build on.
441
	 * 
442
	 * @return the property
443
	 */
444
	public Property getProperty() {
445
		return pProperty;
446
	}
447

  
448
	public String getSimpleName() {
449
		try {
450
			return this.getProperty().getName();
451
		}
452
		catch(Exception e) {
453
		}
454
		return "";
455
	}
456

  
457
	/**
458
	 * Gets the symbol.
459
	 *
460
	 * @return the symbol
461
	 */
462
	public String getSymbol() {
463
		return this.symbol;
464
	}
465

  
466
	/**
467
	 * Hack frequencies using a map to set forms and frequencies
468
	 *
469
	 * @param corpus the corpus
470
	 * @param pProperty the property
471
	 * @param map the map
472
	 * {@link Corpus#getLexicon(Property)} or
473
	 * {@link Subcorpus#getLexicon(Property)}.
474
	 */
475
	public boolean hack(Map<String, Integer> map) {
476
		if (map.size() != forms.length) return false;
477

  
478
		//super(corpus);
479
		int size = map.size();
480
		int[] freqs = new int[size];
481
		String[] forms = map.keySet().toArray(new String[] {});
482
		for (int i = 0; i < forms.length; i++) {
483
			freqs[i] = map.get(forms[i]);
484
		}
485

  
486
		this.freqs = freqs;
487
		return true;
488
	}
489

  
490
	/**
491
	 * Protected on purpose: should be accessed through others initializer.
492
	 *
493
	 * @param corpus the corpus
494
	 * @param property the property
495
	 * @param freq the freq
496
	 * @param ids the ids
497
	 * {@link Corpus#getLexicon(Property)} or
498
	 * {@link Subcorpus#getLexicon(Property)}.
499
	 */
500
	protected void init(TXMResult corpus, Property property, int[] freq, int[] ids) {
501
		if (freq.length != ids.length) {
502
			throw new IllegalArgumentException(LexiconCoreMessages.Lexicon_0);
503
		}
504
		this.freqs = freq;
505
		this.ids = ids;
506
		this.forms = null;
507
		this.pProperty = property;
508
		this.corpus = (Corpus) corpus;
509
	}
510

  
511

  
512
	/**
513
	 * Number of tokens (sum of all the frequencies) in the corpus.
514
	 * 
515
	 * @return the size of the corpus or subcorpus.
516
	 */
517
	public int nbrOfToken() {
518
		if (numberOfTokens <= 0) {
519
			computeNumberOfTokens();
520
		}
521
		return numberOfTokens;
522
	}
523

  
524

  
525
	/**
526
	 * Number of dif ferent types in the frequency list.
527
	 * 
528
	 * @return number of types in the corpus or subcorpus.
529
	 */
530
	public int nbrOfType() {
531
		return freqs.length;
532
	}
533

  
534
	public void setParameters(Property property) {
535
		this.pProperty = property;
536
	}
537

  
538
	@Override
539
	public boolean setParameters(TXMParameters parameters) {
540
		try {
541
			Property p = (Property) parameters.get("properties");
542
			this.setParameters(p);
543
		} catch (Exception e) {
544
			Log.printStackTrace(e);
545
			return false;
546
		}
547
		return true;
548
	}
549

  
550
	/**
551
	 * Sets the symbol.
552
	 *
553
	 * @param symbol the new symbol
554
	 */
555
	public void setSymbol(String symbol) {
556
		this.symbol = symbol;
557
	}
558

  
559

  
560
	@Override
561
	public String toString() {
562
		return LexiconCoreMessages.Lexicon_3 + getName();
563
	}
564

  
565
	/**
566
	 * To txt.
567
	 *
568
	 * @param outfile the outfile
569
	 * @param encoding the encoding
570
	 * @param colseparator the colseparator
571
	 * @param txtseparator the txtseparator
572
	 * @return true, if successful
573
	 */
574
	@Deprecated
575
	public boolean toTxt(File outfile, String encoding, String colseparator, String txtseparator) {
576
		// NK: writer declared as class attribute to perform a clean if the operation is interrupted
577
		// OutputStreamWriter writer;
578
		try {
579
			this.writer = new OutputStreamWriter(new FileOutputStream(outfile),
580
					encoding); 
581
		} catch (UnsupportedEncodingException e1) {
582
			org.txm.utils.logger.Log.printStackTrace(e1);
583
			return false;
584
		} catch (FileNotFoundException e1) {
585
			org.txm.utils.logger.Log.printStackTrace(e1);
586
			return false;
587
		}
588

  
589
		try {
590
			writer.write(this.dump(colseparator, txtseparator));
591
			writer.close();
592
		} catch (IOException e) {
593
			org.txm.utils.logger.Log.printStackTrace(e);
594
			return false;
595
		}
596

  
597
		return true;
598
	}
599

  
600
	public void setProperty(Property property) {
601
		this.pProperty = property;
602
	}
603

  
604

  
605
}
tmp/org.txm.lexicon.core/src/org/txm/lexicon/core/functions/Lexicon.java (revision 619)
1
// Copyright © 2010-2013 ENS de Lyon.
2
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
3
// Lyon 2, University of Franche-Comté, University of Nice
4
// Sophia Antipolis, University of Paris 3.
5
// 
6
// The TXM platform is free software: you can redistribute it
7
// and/or modify it under the terms of the GNU General Public
8
// License as published by the Free Software Foundation,
9
// either version 2 of the License, or (at your option) any
10
// later version.
11
// 
12
// The TXM platform is distributed in the hope that it will be
13
// useful, but WITHOUT ANY WARRANTY; without even the implied
14
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15
// PURPOSE. See the GNU General Public License for more
16
// details.
17
// 
18
// You should have received a copy of the GNU General
19
// Public License along with the TXM platform. If not, see
20
// http://www.gnu.org/licenses.
21
// 
22
// 
23
// 
24
// $LastChangedDate: 2016-09-19 10:31:00 +0200 (Mon, 19 Sep 2016) $
25
// $LastChangedRevision: 3298 $
26
// $LastChangedBy: mdecorde $ 
27
//
28
package org.txm.lexicon.core.functions;
29

  
30
import java.io.File;
31
import java.io.FileNotFoundException;
32
import java.io.FileOutputStream;
33
import java.io.IOException;
34
import java.io.OutputStreamWriter;
35
import java.io.UnsupportedEncodingException;
36
import java.util.Arrays;
37
import java.util.Map;
38

  
39
import org.eclipse.core.runtime.IProgressMonitor;
40
import org.txm.core.messages.TXMCoreMessages;
41
import org.txm.core.results.TXMParameters;
42
import org.txm.core.results.TXMResult;
43
import org.txm.lexicon.core.messages.LexiconCoreMessages;
44
import org.txm.searchengine.cqp.ICqiClient;
45
import org.txm.searchengine.cqp.clientExceptions.CqiClientException;
46
import org.txm.searchengine.cqp.corpus.Corpus;
47
import org.txm.searchengine.cqp.corpus.CorpusManager;
48
import org.txm.searchengine.cqp.corpus.MainCorpus;
49
import org.txm.searchengine.cqp.corpus.Property;
50
import org.txm.searchengine.cqp.corpus.Subcorpus;
51
import org.txm.searchengine.cqp.corpus.query.Query;
52
import org.txm.statsengine.core.StatException;
53
import org.txm.statsengine.core.data.Vector;
54
import org.txm.statsengine.r.core.data.VectorImpl;
55
import org.txm.utils.logger.Log;
56

  
57
// TODO: Auto-generated Javadoc
58
//  TODO should be put into stat.data package ?
59
/**
60
 * Represent a frequency list according to a {@link Corpus} (or a.
61
 *
62
 * {@link Subcorpus}) and a {@link Property}.
63
 * @author sloiseau
64
 */
65
public class Lexicon extends TXMResult {
66

  
67
	/** The nolex. */
68
	protected static int nolex = 1;
69

  
70
	/** The prefix r. */
71
	protected static String prefixR = "Lexicon_"; //$NON-NLS-1$
72

  
73
	/** The forms. */
74
	private String[] forms;
75

  
76
	/** The freqs. */
77
	private int[] freqs;
78

  
79
	/** The ids. */
80
	private int[] ids;
81

  
82
	/** The number of tokens. */
83
	int numberOfTokens = -1;
84

  
85
	/** The property. */
86
	private Property pProperty;
87

  
88
	/** The symbol. */
89
	private String symbol;
90

  
91
	/** The writer. */
92
	private OutputStreamWriter writer;
93

  
94
	private Corpus corpus;
95

  
96
//	/**
97
//	 * Find or build a lexicon given a Corpus (MainCorpus or SubCorpus).
98
//	 * 
99
//	 * @param corpus
100
//	 * @param property
101
//	 * @return a Lexicon. May return null if the lexicon forms or freqs are null.
102
//	 * @throws Exception 
103
//	 */
104
//	public static Lexicon getLexicon(Corpus corpus, Property property) throws Exception {
105
//		HashSet<Object> results = corpus.getStoredData(Lexicon.class);
106
//		for (Object result : results) {
107
//			Lexicon lex = (Lexicon)result;
108
//			if (lex.getProperty().equals(property)) {
109
//				return lex;
110
//			}
111
//		}
112
//
113
//		Lexicon lex = new Lexicon(corpus);
114
//		lex.setParameters(property);
115
//		if (lex.compute(null) && lex.getForms() != null && lex.getFreq() != null) {
116
//			corpus.storeData(lex);
117
//			return lex;
118
//		} else {
119
//			return null;
120
//		}
121
//	}
122

  
123
	public Lexicon(Corpus corpus) {
124
		super(corpus);
125
		this.corpus = corpus;
126
	}
127

  
128
	
129
	@Override
130
	public boolean saveParameters() throws Exception {
131
		// TODO Auto-generated method stub
132
		return true;
133
	}
134

  
135
	@Override
136
	public boolean loadParameters() throws Exception {
137
		// TODO Auto-generated method stub
138
		return true;
139
	}
140

  
141
	@Override
142
	public void clean() {
143
		// TODO Auto-generated method stub
144
		
145
	}
146

  
147
	@Override
148
	public boolean canCompute() throws Exception {
149
		return corpus != null && pProperty != null;
150
	}
151

  
152
	@Override
153
	protected boolean _compute() throws Exception {
154
		if (corpus instanceof MainCorpus) {
155
			return computeWithMainCorpus((MainCorpus)corpus, pProperty, monitor);
156
		}
157
		else if (corpus instanceof Subcorpus) {
158
			return computewithSubCorpus((Subcorpus)corpus, pProperty, monitor);
159
		}
160
		else {
161
			System.out.println("Error: Lexicon parent is neither a Maincorpus nor a Subcorpus.");
162
			return false;
163
		}
164
	}
165
	
166
	/**
167
	 * Gets the lexicon relative to a given property.
168
	 * 
169
	 * @param property
170
	 *            the property
171
	 * 
172
	 * @return the lexicon
173
	 * 
174
	 * @throws CqiClientException
175
	 *             the cqi client exception
176
	 */
177
	protected boolean computeWithMainCorpus(MainCorpus corpus, Property property, IProgressMonitor monitor) throws CqiClientException {
178
		// System.out.println("in "+this.getCqpId()+" look for cached lexicon "+property);
179
		// System.out.println("not found");
180
		this.subTask("Computing lexicon size...");
181
		Log.finest(TXMCoreMessages.LEXICON + corpus.getName());
182
		int lexiconSize;
183
		try {
184
			lexiconSize = CorpusManager.getCorpusManager().getCqiClient().lexiconSize(property.getQualifiedName());
185
		} catch (Exception e) {
186
			throw new CqiClientException(e);
187
		}
188

  
189
		int[] ids = new int[lexiconSize];
190
		for (int i = 0; i < ids.length; i++) {
191
			ids[i] = i;
192
		}
193

  
194
		int[] freqs;
195
		try {
196
			this.subTask("Computing lexicon frequencies...");
197
			freqs = CorpusManager.getCorpusManager().getCqiClient().id2Freq(property.getQualifiedName(), ids);
198
		} catch (Exception e) {
199
			throw new CqiClientException(e);
200
		}
201

  
202
		init(corpus, property, freqs, ids);
203
		return true;
204
	}
205

  
206
	/**
207
	 * 
208
	 * @param corpus
209
	 * @param property
210
	 * @param monitor
211
	 * @return
212
	 * @throws CqiClientException
213
	 */
214
	protected boolean computewithSubCorpus(Subcorpus corpus, Property property, IProgressMonitor monitor) throws CqiClientException {
215

  
216
		//System.out.println("not found");
217
		Log.finest(TXMCoreMessages.SUBCORPUS_LEXICON + corpus.getName());
218
		long start = System.currentTimeMillis();
219
		int[][] fdist = null;
220
		Subcorpus tmp = null;
221
		try {
222
			this.subTask("Computing lexicon frequencies...");
223
			tmp = corpus.createSubcorpus(new Query("[]"), "S"+corpus.getNextSubcorpusCounter(), true); //$NON-NLS-1$
224
			if (tmp != null) {
225
				fdist = CorpusManager.getCorpusManager().getCqiClient().fdist1(
226
						tmp.getQualifiedCqpId(), 0,
227
						ICqiClient.CQI_CONST_FIELD_MATCH, property.getName());
228

  
229
				corpus.dropSubcorpus(tmp); // drop the subcorpus only if correctly created
230
			}
231
			//System.out.println("nb lines: "+fdist.length);
232
		} catch (Exception e) {
233
			throw new CqiClientException(e);
234
		} finally {
235
			if (tmp != null) {
236
				try {corpus.dropSubcorpus(tmp);}
237
				catch (Exception e2) {}
238
			}
239
		}
240
		int lexiconSize = fdist.length;
241

  
242
		int[] freqs = new int[lexiconSize];
243
		int[] ids = new int[lexiconSize];
244
		for (int i = 0; i < fdist.length; i++) {
245
			ids[i] = fdist[i][0];
246
			freqs[i] = fdist[i][1];
247
		}
248

  
249
		init(corpus, property, freqs, ids);
250
		return true;
251
	}
252
	
253
	
254
	/**
255
	 * Convert the Lexicon into a Vector object.
256
	 *
257
	 * @return the vector
258
	 * @throws StatException the stat exception
259
	 */
260
	public Vector asVector() throws StatException {
261
		String symbol = prefixR + (nolex++);
262
		VectorImpl v = new VectorImpl(freqs, symbol);
263
		v.setRNames(getForms());
264
		this.symbol = v.getSymbol();
265
		return v;
266
	}
267

  
268

  
269
	/**
270
	 * Compute number of tokens. / this.nbr
271
	 */
272
	private void computeNumberOfTokens() {
273
		numberOfTokens = 0;
274
		for (int i = 0; i < freqs.length; i++) {
275
			numberOfTokens += freqs[i];
276
			// System.out.println(numberOfTokens);
277
			// if (freqs[i] != 1) System.out.println(freqs[i]);
278
		}
279
	}
280

  
281

  
282

  
283
	@Override
284
	public boolean delete() {
285
		if (corpus != null) {
286
			corpus.removeData(this);
287
		}
288
		return true;
289
	}
290

  
291
	/**
292
	 * Dump lexicon forms and frequencies in a String.
293
	 *
294
	 * @param col the col
295
	 * @param txt the txt
296
	 * @return the string
297
	 */
298
	public String dump(String col, String txt) {
299
		StringBuffer buffer = new StringBuffer();
300
		getForms();
301
		for (int i = 0; i < forms.length; i++) {
302
			buffer.append(txt+ forms[i].replace(txt, txt+txt) + txt + col + freqs[i] + "\n"); //$NON-NLS-1$ 
303
		}
304
		return buffer.toString();
305
	}
306

  
307
	/* (non-Javadoc)
308
	 * @see java.lang.Object#equals(java.lang.Object)
309
	 */
310
	@Override
311
	public boolean equals(Object obj) {
312
		if (!(obj instanceof Lexicon)) {
313
			return false;
314
		}
315
		Lexicon other = (Lexicon) obj;
316

  
317
		if (other.nbrOfType() != this.nbrOfType()) {
318
			return false;
319
		}
320
		return (Arrays.equals(freqs, other.getFreq()) && Arrays.equals(getForms(), other.getForms()));
321
	}
322

  
323
	/**
324
	 * The corpus or subcorpus this lexicon is build on.
325
	 * 
326
	 * @return the corpus
327
	 */
328
	public Corpus getCorpus() {
329
		return corpus;
330
	}
331

  
332
	public String getDetails() {
333
		return  this.corpus.getName() + " " + this.pProperty.getName(); //$NON-NLS-1$
334
	}
335

  
336
	//TODO: move this into a Lexicon chart renderer
337
	//	/**
338
	//	 * Draw a pareto graphic with this frequency list and record it into the
339
	//	 * provided filename into svg format.
340
	//	 *
341
	//	 * @param file where to save the pareto graphic.
342
	//	 * @return the pareto graphic
343
	//	 * @throws StatException if anything goes wrong.
344
	//	 */
345
	//	public void getParetoGraphic(File file) throws StatException {
346
	//		String rName = asVector().getSymbol();
347
	//		String expr = "pareto(" + rName + ")"; //$NON-NLS-1$ //$NON-NLS-2$
348
	//		try {
349
	//			RWorkspace.getRWorkspaceInstance().plot(file, expr, RDevice.SVG);
350
	//		} catch (Exception e) {
351
	//			throw new StatException(e);
352
	//		}
353
	//	}
354

  
355
	/**
356
	 * The dif ferent types in the lexicon, the type at the index <code>j</code>
357
	 * of this array have the frequency at index <code>j</code> in the array
358
	 * returned by {@link #getFreq()}.
359
	 * 
360
	 * @return types as an array of <code>String</code>
361
	 */
362
	public String[] getForms() {
363
		if (forms == null) {
364
			if(ids == null) {
365
				return new String[0];
366
			}
367
			try {
368
				forms = CorpusManager.getCorpusManager().getCqiClient().id2Str(pProperty.getQualifiedName(), ids);
369
			} catch (Exception e) {
370
				// TODO Auto-generated catch block
371
				org.txm.utils.logger.Log.printStackTrace(e);
372
			}
373
		}
374
		return forms;
375
	}
... Ce différentiel a été tronqué car il excède la taille maximale pouvant être affichée.

Formats disponibles : Unified diff