Revision 143

tmp/org.txm.lexicaltable.core/src/org/txm/functions/intertextualdistance/package.html (revision 143)
1
<html>
2
<body>
3
<p>Prototype of Intertextual distance</p>
4
</body>
5
</html>
0 6

  
tmp/org.txm.lexicaltable.core/src/org/txm/functions/intertextualdistance/InterTextDistance.java (revision 143)
1
package org.txm.functions.intertextualdistance;
2
/**
3
 * @author mdecorde lvanni
4
 */
5

  
6
import java.io.File;
7

  
8
import org.rosuda.REngine.REXP;
9
import org.rosuda.REngine.REXPMismatchException;
10
import org.txm.HasResults;
11
import org.txm.functions.Function;
12
import org.txm.functions.TXMResult;
13
import org.txm.searchengine.cqp.clientExceptions.CqiClientException;
14
import org.txm.searchengine.cqp.clientExceptions.InvalidCqpIdException;
15
import org.txm.stat.StatException;
16
import org.txm.stat.data.LexicalTable;
17
import org.txm.stat.engine.r.RWorkspace;
18

  
19
public class InterTextDistance extends Function implements TXMResult {
20
	
21
	LexicalTable table;
22
	String method = "euclidian"; //$NON-NLS-1$
23
	
24
	public InterTextDistance(LexicalTable table) {
25
		this.table = table;
26
	}
27
	public boolean compute() throws CqiClientException, InvalidCqpIdException, REXPMismatchException, StatException {
28
//		Corpus corpus = CorpusManager.getCorpusManager().getCorpus("VOEUX");
29
//		String method = "euclidean"; // "euclidean"’, ‘"maximum"’, ‘"manhattan"’, ‘"canberra"’, ‘"binary"’ or ‘"minkowski"’
30
//
31
//		StructuralUnit text_su = corpus.getStructuralUnit("text");
32
//		StructuralUnitProperty text_prop = text_su.getProperty("loc");
33
//		Partition textes = corpus.createPartition(text_su, text_prop);
34
//
35
//		Property prop = corpus.getProperty("word") ;
36
		//LexicalTable table = textes.getLexicalTable(prop, 2);
37

  
38
		System.out.println(table.getSymbol()); // LexicalTable41
39

  
40
		RWorkspace r = RWorkspace.getRWorkspaceInstance() ;	// get the R connection
41
		REXP rresult = r.eval("as.matrix(dist(t("+table.getSymbol()+"), method=\"euclidian\"))"); //$NON-NLS-1$ //$NON-NLS-2$
42
		double[][] result = rresult.asDoubleMatrix();
43
		
44
		for (int i = 0 ; i < result.length ; i++) {
45
			for (int j = 0; j < result[i].length ; j++) {
46
				System.out.print(" "+result[i][j]); //$NON-NLS-1$
47
			}
48
			System.out.println();
49
		}
50
		return true;
51
	}
52
	
53
	public LexicalTable getTable() {
54
		return table;
55
	}
56
	
57
	public String getLabel() {
58
		return method;
59
	}
60
	@Override
61
	public void clean() {
62
		// TODO Auto-generated method stub
63
		
64
	}
65
	@Override
66
	public boolean toTxt(File outfile, String encoding, String colseparator,
67
			String txtseparator) throws Exception {
68
		// TODO Auto-generated method stub
69
		return false;
70
	}
71

  
72
	@Override
73
	public boolean delete() {
74
		return false;
75
	}
76
	@Override
77
	public HasResults getParent() {
78
		return table;
79
	}
80
}
0 81

  
tmp/org.txm.lexicaltable.core/src/org/txm/lexicaltable/core/Activator.java (revision 143)
1
package org.txm.lexicaltable.core;
2

  
3
import org.eclipse.ui.plugin.AbstractUIPlugin;
4
import org.osgi.framework.BundleContext;
5

  
6
/**
7
 * The activator class controls the plug-in life cycle
8
 */
9
public class Activator extends AbstractUIPlugin {
10

  
11
	// The plug-in ID
12
	public static final String PLUGIN_ID = "org.txm.lexicaltable.core"; //$NON-NLS-1$
13

  
14
	// The shared instance
15
	private static Activator plugin;
16
	
17
	/**
18
	 * The constructor
19
	 */
20
	public Activator() {
21
	}
22

  
23
	/*
24
	 * (non-Javadoc)
25
	 * @see org.eclipse.ui.plugin.AbstractUIPlugin#start(org.osgi.framework.BundleContext)
26
	 */
27
	public void start(BundleContext context) throws Exception {
28
		super.start(context);
29
		plugin = this;
30
	}
31

  
32
	/*
33
	 * (non-Javadoc)
34
	 * @see org.eclipse.ui.plugin.AbstractUIPlugin#stop(org.osgi.framework.BundleContext)
35
	 */
36
	public void stop(BundleContext context) throws Exception {
37
		plugin = null;
38
		super.stop(context);
39
	}
40

  
41
	/**
42
	 * Returns the shared instance
43
	 *
44
	 * @return the shared instance
45
	 */
46
	public static Activator getDefault() {
47
		return plugin;
48
	}
49

  
50
}
0 51

  
tmp/org.txm.lexicaltable.core/src/org/txm/lexicaltable/functions/LexicalTableBuilder.java (revision 143)
1
package org.txm.lexicaltable.functions;
2

  
3
import org.txm.searchengine.cqp.clientExceptions.CqiClientException;
4
import org.txm.searchengine.cqp.corpus.Partition;
5
import org.txm.searchengine.cqp.corpus.Property;
6
import org.txm.stat.StatException;
7
import org.txm.stat.data.LexicalTable;
8
import org.txm.stat.engine.r.data.LexicalTableImpl;
9

  
10
public class LexicalTableBuilder {
11
	/**
12
	 * Create a lexical table for this partition given an analysis property.
13
	 * Lexical table are cached and recycled. In order to drop from memory a
14
	 * lexical table, use {@link #dropLexicalTable(Property)}.
15
	 *
16
	 * @param analysisProperty the analysis property
17
	 * @param Fmin the fmin
18
	 * @return the lexical table
19
	 * @throws StatException the stat exception
20
	 * @throws CqiClientException the cqi client exception
21
	 * @author sloiseau
22
	 */
23
	public static LexicalTable getLexicalTable(Partition partition, Property analysisProperty, int Fmin)
24
	throws StatException, CqiClientException {
25
		// System.out.println("CL : test si déja existant ds cache");
26
		/*if (lexicalTables.containsKey(analysisProperty.getName())) {
27
			if (lexicalTables.get(analysisProperty.getName()).getConstructorFmin() == Fmin)
28
				return lexicalTables.get(analysisProperty.getName());
29
		}*/
30
		// System.out.println("CL : fait un new lexique");
31
		LexicalTable t = LexicalTableImpl.getLexicalTable(partition,
32
				analysisProperty, Fmin);
33

  
34
		// System.out.println("CL : put results ds le cache");
35
		partition.storeResult(t);
36
		return t;
37
	}
38
}
0 39

  
tmp/org.txm.lexicaltable.core/src/org/txm/stat/data/LexicalTable.java (revision 143)
1
// Copyright © 2010-2013 ENS de Lyon.
2
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
3
// Lyon 2, University of Franche-Comté, University of Nice
4
// Sophia Antipolis, University of Paris 3.
5
// 
6
// The TXM platform is free software: you can redistribute it
7
// and/or modify it under the terms of the GNU General Public
8
// License as published by the Free Software Foundation,
9
// either version 2 of the License, or (at your option) any
10
// later version.
11
// 
12
// The TXM platform is distributed in the hope that it will be
13
// useful, but WITHOUT ANY WARRANTY; without even the implied
14
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15
// PURPOSE. See the GNU General Public License for more
16
// details.
17
// 
18
// You should have received a copy of the GNU General
19
// Public License along with the TXM platform. If not, see
20
// http://www.gnu.org/licenses.
21
// 
22
// 
23
// 
24
// $LastChangedDate: 2016-09-19 10:31:00 +0200 (Mon, 19 Sep 2016) $
25
// $LastChangedRevision: 3298 $
26
// $LastChangedBy: mdecorde $ 
27
//
28
package org.txm.stat.data;
29

  
30
import java.io.File;
31
import java.util.List;
32

  
33
import org.rosuda.REngine.REXPMismatchException;
34
import org.txm.HasResults;
35
import org.txm.searchengine.cqp.corpus.Corpus;
36
import org.txm.searchengine.cqp.corpus.Partition;
37
import org.txm.searchengine.cqp.corpus.Property;
38
import org.txm.stat.StatException;
39
import org.txm.stat.engine.r.RWorkspaceException;
40

  
41
// TODO: Auto-generated Javadoc
42
/**
43
 * A LexicalTable is a special kind of {@link ContingencyTable} extracted from a
44
 * corpora given a {@link Partition} (the columns) and a {@link Property} (the
45
 * rows).
46
 * 
47
 * Can be edited, rows can be deleted cols can be deleted
48
 * 
49
 * can be exported of imported from/to a file
50
 * 
51
 * @author sloiseau
52
 */
53
public interface LexicalTable extends ContingencyTable, HasResults {
54

  
55
	/**
56
	 * Get the partition this lexical table is bound to.
57
	 * 
58
	 * @return the partition
59
	 */
60
	abstract Partition getPartition();
61

  
62
	/**
63
	 * Get the property this lexical table is bound to.
64
	 * 
65
	 * @return the property.
66
	 */
67
	abstract Property getProperty();
68

  
69
	/**
70
	 * Insert a column representing the reference corpus
71
	 * @param symbol
72
	 */
73
	abstract void setReference(String symbol);
74
	
75
	/**
76
	 * The name of the lexical table, conventionned.
77
	 * 
78
	 * @return the name of the LexicalTable.
79
	 */
80
	abstract String getName();
81

  
82
	/**
83
	 * Gets the copy.
84
	 *
85
	 * @return the copy
86
	 */
87
	abstract LexicalTable getCopy();
88

  
89
	/**
90
	 * Sort.
91
	 *
92
	 * @param colindex the colindex
93
	 * @param reverse the reverse
94
	 */
95
	abstract void sort(int colindex, Boolean reverse);
96

  
97
	/**
98
	 * Sort row names.
99
	 *
100
	 * @param reverse the reverse
101
	 */
102
	public void sortRowNames(Boolean reverse);
103

  
104
	/**
105
	 * Removes the col.
106
	 *
107
	 * @param col the col
108
	 * @param checkEmptyLines the check empty lines
109
	 */
110
	abstract void removeCol(int col, boolean checkEmptyLines);
111

  
112
	/**
113
	 * Removes the cols.
114
	 *
115
	 * @param cols the cols
116
	 */
117
	abstract void removeCols(List<Integer> cols);
118

  
119
	/**
120
	 * Removes the cols.
121
	 *
122
	 * @param coltodelete the coltodelete
123
	 */
124
	abstract void removeCols(int[] coltodelete);
125

  
126
	/**
127
	 * Removes the row.
128
	 *
129
	 * @param row the row
130
	 */
131
	abstract void removeRow(int row);
132

  
133
	/**
134
	 * Removes the rows.
135
	 *
136
	 * @param row the row
137
	 */
138
	abstract void removeRows(List<Integer> row);
139

  
140
	/**
141
	 * Removes the rows.
142
	 *
143
	 * @param selectionIndices the selection indices
144
	 */
145
	abstract void removeRows(int[] selectionIndices);
146

  
147
	/**
148
	 * Sets the.
149
	 *
150
	 * @param row the row
151
	 * @param col the col
152
	 * @param value the value
153
	 */
154
	abstract void set(int row, int col, double value);
155

  
156
	/**
157
	 * Gets the fmin.
158
	 *
159
	 * @return the fmin
160
	 */
161
	abstract int getFmin();
162
	
163
	/**
164
	 * Gets the constructor fmin.
165
	 *
166
	 * @return the constructor fmin
167
	 */
168
	abstract int getConstructorFmin();
169

  
170
	/**
171
	 * Gets the fmax.
172
	 *
173
	 * @return the fmax
174
	 */
175
	abstract int getFmax();
176

  
177
	/**
178
	 * Removes the rows.
179
	 *
180
	 * @param i the i
181
	 * @param j the j
182
	 */
183
	abstract void removeRows(int i, int j);
184

  
185
	/**
186
	 * Cut.
187
	 * TODO: must be done after calling copy(), check why.
188
	 *
189
	 * @param nlines the number of lines to keep
190
	 */
191
	abstract void cut(int nlines);
192

  
193
	/**
194
	 * Filter.
195
	 *
196
	 * @param freqs the freqs
197
	 * @param nlines the nlines
198
	 * @param fmin the fmin
199
	 */
200
	abstract void filter(int nlines, int fmin) throws Exception;
201

  
202
	/**
203
	 * Sets the order.
204
	 *
205
	 * @param neworder the neworder
206
	 * @param reverse the reverse
207
	 */
208
	abstract public void setOrder(List<Integer> neworder, Boolean reverse);
209

  
210
	/**
211
	 * Gets the freqs.
212
	 *
213
	 * @return the freqs
214
	 */
215
	abstract List<Integer> getFreqs();
216

  
217
	/**
218
	 * Export data.
219
	 *
220
	 * @param file the file
221
	 * @param colseparator the colseparator
222
	 * @param txtseparator the txtseparator
223
	 */
224
	abstract void exportData(File file, String colseparator, String txtseparator);
225
	abstract void toTxt(File outfile, String encoding, String colseparator, String txtseparator);
226
	
227
	/**
228
	 * Import data.
229
	 *
230
	 * @param file the file
231
	 */
232
	abstract boolean importData(File file) throws RWorkspaceException, REXPMismatchException;
233

  
234
	/**
235
	 * Gets the corpus.
236
	 *
237
	 * @return the corpus
238
	 */
239
	abstract Corpus getCorpus();
240

  
241
	/**
242
	 * Sets the corpus.
243
	 *
244
	 * @param corpus the new corpus
245
	 */
246
	abstract void setCorpus(Corpus corpus);
247

  
248
	abstract void sortByFreqs(boolean reverse) throws Exception;
249

  
250
	@Override
251
	abstract Vector getRowMarginsVector() throws StatException;
252
	
253
	@Override
254
	abstract Vector getColMarginsVector() throws StatException;
255
	
256
	@Override
257
	abstract int[] getRowMargins() throws Exception;
258
	
259
	@Override
260
	abstract int[] getColMargins() throws Exception;
261
	
262
	
263
	/**
264
	 * Gets the number of rows.
265
	 * @return the number of rows
266
	 */
267
	public abstract int getRowsCount();
268

  
269
	
270
	/**
271
	 * Gets the number of columns.
272
	 * @return the number of columns
273
	 */
274
	public abstract int getColumnsCount();
275

  
276
	abstract HasResults getParent();
277
	
278
	/**
279
	 * When the Lexical table is computed with nor partition nor corpus
280
	 * @param i
281
	 */
282
	public abstract void setParent(HasResults i);
283

  
284
}
0 285

  
tmp/org.txm.lexicaltable.core/src/org/txm/stat/engine/r/data/LexicalTableImpl.java (revision 143)
1
// Copyright © 2010-2013 ENS de Lyon.
2
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
3
// Lyon 2, University of Franche-Comté, University of Nice
4
// Sophia Antipolis, University of Paris 3.
5
// 
6
// The TXM platform is free software: you can redistribute it
7
// and/or modify it under the terms of the GNU General Public
8
// License as published by the Free Software Foundation,
9
// either version 2 of the License, or (at your option) any
10
// later version.
11
// 
12
// The TXM platform is distributed in the hope that it will be
13
// useful, but WITHOUT ANY WARRANTY; without even the implied
14
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15
// PURPOSE. See the GNU General Public License for more
16
// details.
17
// 
18
// You should have received a copy of the GNU General
19
// Public License along with the TXM platform. If not, see
20
// http://www.gnu.org/licenses.
21
// 
22
// 
23
// 
24
// $LastChangedDate: 2016-09-19 10:31:00 +0200 (Mon, 19 Sep 2016) $
25
// $LastChangedRevision: 3298 $
26
// $LastChangedBy: mdecorde $ 
27
//
28
package org.txm.stat.engine.r.data;
29

  
30
import java.io.BufferedReader;
31
import java.io.File;
32
import java.io.FileInputStream;
33
import java.io.FileReader;
34
import java.io.IOException;
35
import java.io.InputStreamReader;
36
import java.util.ArrayList;
37
import java.util.Collection;
38
import java.util.HashMap;
39
import java.util.List;
40
import java.util.Map;
41

  
42
import org.txm.HasResults;
43
import org.txm.Messages;
44
import org.txm.functions.index.Index;
45
//import org.txm.functions.queryindex.*;
46
import org.txm.functions.index.Line;
47
import org.txm.searchengine.cqp.clientExceptions.CqiClientException;
48
import org.txm.searchengine.cqp.corpus.Corpus;
49
import org.txm.searchengine.cqp.corpus.Lexicon;
50
import org.txm.searchengine.cqp.corpus.Partition;
51
import org.txm.searchengine.cqp.corpus.Property;
52
import org.txm.stat.StatException;
53
import org.txm.stat.data.LexicalTable;
54
import org.txm.stat.data.QuantitativeDataStructure;
55
import org.txm.stat.data.Vector;
56
import org.txm.stat.engine.r.RException;
57
import org.txm.stat.engine.r.RWorkspace;
58
import org.txm.stat.engine.r.RWorkspaceException;
59

  
60
import cern.colt.matrix.DoubleFactory2D;
61
import cern.colt.matrix.DoubleMatrix2D;
62

  
63
// TODO: Auto-generated Javadoc
64
/**
65
 * Implementation of the {@link LexicalTable} interface, wrapping a R matrix.
66
 * 
67
 * A lexical table is a contingency table representing the frequencies of
68
 * linguistic types accross several sub-parts of a corpus.
69
 * 
70
 * Each column of the lexical table stand for a parts. Each row stand for a
71
 * linguistic type. Each cell give the frequency of the corresponding unit into
72
 * the corresponding part.
73
 * 
74
 * @author Sylvain Loiseau &lt;sloiseau@ens-lsh.fr&gt;
75
 * 
76
 */
77
public class LexicalTableImpl extends ContingencyTableImpl implements
78
LexicalTable {
79

  
80
	HasResults parent;
81
	
82
	/** The partition. */
83
	private Partition partition;
84

  
85
	/** The corpus. */
86
	private Corpus corpus;
87

  
88
	/** The property. */
89
	private Property property;
90

  
91
	/** The sortedindex. */
92
	private int[] sortedindex;
93

  
94
	/** The constructor_fmin. */
95
	private int constructor_fmin;
96

  
97

  
98
	/**
99
	 * Instantiates a new lexical table impl.
100
	 *
101
	 * @param matrix the matrix
102
	 * @param partition the partition
103
	 * @param property the property
104
	 * @param formNames the form names
105
	 * @param partNames the part names
106
	 * @throws RWorkspaceException the r workspace exception
107
	 */
108
	private LexicalTableImpl(DoubleMatrix2D matrix, Partition partition,
109
			Property property, String[] formNames, String[] partNames)
110
					throws RWorkspaceException {
111
		super(matrix, formNames, partNames);
112

  
113
		this.partition = partition;
114
		this.property = property;
115
		initSortedIndex();
116
	}
117

  
118
	/**
119
	 * Instantiates a new lexical table impl.
120
	 *
121
	 * @param matrix the matrix
122
	 * @param property the property
123
	 * @param formNames the form names
124
	 * @param partNames the part names
125
	 * @throws RWorkspaceException the r workspace exception
126
	 */
127
	private LexicalTableImpl(DoubleMatrix2D matrix, Property property,
128
			String[] formNames, String[] partNames) throws RWorkspaceException {
129
		super(matrix, formNames, partNames);
130

  
131
		this.property = property;
132
		initSortedIndex();
133

  
134
	}
135

  
136
	/**
137
	 * Instantiates a new lexical table impl.
138
	 *
139
	 * @param mat the mat
140
	 * @param partition the partition
141
	 * @param property the property
142
	 * @param array the array
143
	 * @param array2 the array2
144
	 * @throws RWorkspaceException the r workspace exception
145
	 */
146
	public LexicalTableImpl(int[][] mat, Partition partition,
147
			Property property, String[] array, String[] array2) throws RWorkspaceException {
148
		this(mat,property, array, array2);
149
		this.partition = partition;
150
	}
151

  
152
	/**
153
	 * Instantiates a new lexical table impl.
154
	 *
155
	 * @param matrix the matrix
156
	 * @param property the property
157
	 * @param formNames the form names
158
	 * @param partNames the part names
159
	 * @throws RWorkspaceException the r workspace exception
160
	 */
161
	private LexicalTableImpl(int[][] matrix, Property property,
162
			String[] formNames, String[] partNames) throws RWorkspaceException {
163
		super(matrix, formNames, partNames);
164

  
165
		this.property = property;
166
		initSortedIndex();
167

  
168
	}
169

  
170
	/**
171
	 * Instantiates a new lexical table impl.
172
	 *
173
	 * @param table the table
174
	 * @param symbol the symbol
175
	 * @throws RWorkspaceException the r workspace exception
176
	 */
177
	public LexicalTableImpl(LexicalTableImpl table, String symbol)
178
			throws RWorkspaceException {
179
		super(symbol);
180
		this.partition = table.getPartition();
181
		this.property = table.getProperty();
182
		initSortedIndex();
183
	}
184

  
185
	
186
	/**
187
	 * Instantiates a new lexical table impl.
188
	 *
189
	 * @param table the table
190
	 * @param symbol the symbol
191
	 * @throws RWorkspaceException the r workspace exception
192
	 */
193
	public LexicalTableImpl(String symbol)
194
			throws RWorkspaceException {
195
		super(symbol);
196
		this.partition = null;
197
		this.property = null;
198
	}
199

  
200
	public LexicalTableImpl(String symbol, Lexicon corpusLexicon, Lexicon subcorpusLexicon) throws StatException {
201
		super(symbol);
202
		RWorkspace rw = RWorkspace.getRWorkspaceInstance();
203
		Vector corpusLexiconV = corpusLexicon.asVector();
204
		Vector subcorpusLexiconV = subcorpusLexicon.asVector();
205
		
206
		rw.callFunction("lexicons2LexicalTable", new QuantitativeDataStructure[] { corpusLexiconV, subcorpusLexiconV }, symbol); //$NON-NLS-1$
207
		
208
		this.property = corpusLexicon.getProperty();
209
		this.corpus = corpusLexicon.getCorpus();
210
	}
211

  
212
	/**
213
	 * Inits the sorted index.
214
	 */
215
	public void initSortedIndex() {
216
		int ncol = this.getNColumns();
217
		sortedindex = new int[ncol];
218
		for (int i = 0; i < ncol; i++)
219
			sortedindex[i] = i;
220
	}
221

  
222
	/* (non-Javadoc)
223
	 * @see org.txm.stat.data.LexicalTable#getPartition()
224
	 */
225
	@Override
226
	public Partition getPartition() {
227
		return partition;
228
	}
229

  
230
	/* (non-Javadoc)
231
	 * @see org.txm.stat.data.LexicalTable#getProperty()
232
	 */
233
	@Override
234
	public Property getProperty() {
235
		return property;
236
	}
237

  
238
	/**
239
	 * Create a Complete Lexical Table according to a Partition and a Property.
240
	 *
241
	 * @param partition the partition
242
	 * @param analysisProperty the analysis property
243
	 * @param Fmin the fmin
244
	 * @return the lexical table
245
	 * @throws CqiClientException the cqi client exception
246
	 * @throws RWorkspaceException the r workspace exception
247
	 */
248
	public static final LexicalTable getLexicalTable(Partition partition,
249
			Property analysisProperty, int Fmin) throws CqiClientException,
250
			RWorkspaceException {
251
		//long time = System.currentTimeMillis();
252
		List<Lexicon> lexicons = new ArrayList<Lexicon>();
253
		// Set<String> allLexiconEntry = new HashSet<String>();
254
		for (int i = 0; i < partition.getNPart(); i++) {
255
			Lexicon l = partition.getParts().get(i)
256
					.getLexicon(analysisProperty);
257
			lexicons.add(l);
258
			// allLexiconEntry.addAll(Arrays.asList(l.getForms()));
259
		}
260
		//System.out.println("time lexicon build "+(System.currentTimeMillis()-time));
261
		//time = System.currentTimeMillis();
262
		// String[] entries = allLexiconEntry.toArray(new String[]{});
263
		Corpus c = partition.getCorpus();
264
		Lexicon ll = c.getLexicon(analysisProperty);
265

  
266
		ArrayList<String> filteredForms = new ArrayList<String>();
267
		//create a copy and filter line with Fmin;
268
		for (int i = 0 ; i < ll.getFreq().length ; i++) {
269
			if (ll.getFreq()[i] >= Fmin) {
270
				filteredForms.add(ll.getForms()[i]);
271
			}
272
		}
273
		//System.out.println("remove freq too low "+(System.currentTimeMillis()-time));
274
		//time = System.currentTimeMillis();
275
		Map<String, Integer> entries2index = new HashMap<String, Integer>();
276
		for (int i = 0; i < filteredForms.size(); i++) {
277
			entries2index.put(filteredForms.get(i), i);
278
		}
279

  
280
		//System.out.println("entries2index "+(System.currentTimeMillis()-time));
281
		//time = System.currentTimeMillis();
282
		int[][] mat = new int[filteredForms.size()][lexicons.size()];//DoubleFactory2D.sparse.make(filteredForms.size(), lexicons.size(), 0);
283

  
284

  
285
		Integer id= null;
286
		for (int i = 0; i < lexicons.size(); i++) {
287
			Lexicon l = lexicons.get(i);
288
			String[] ents = l.getForms();
289
			int[] freqs = l.getFreq();
290
			for (int j = 0; j < freqs.length; j++) {
291
				id = entries2index.get(ents[j]);
292
				// if (entriesFreqs[id] >= 2)
293
				if (id != null)
294
					mat[id][i] = freqs[j]; //mat.setQuick(id, i, freqs[j]);
295
			}
296
		}
297
		//System.out.println("time build matrix "+(System.currentTimeMillis()-time));
298
		//time = System.currentTimeMillis();
299
		//System.out.println("Entries size " + filteredForms.size());
300
		//System.out.println("mat size " + mat.rows());
301
		//System.out.println("mat columns " + mat.columns());
302

  
303
		LexicalTableImpl table = new LexicalTableImpl(mat, partition,
304
				analysisProperty, filteredForms.toArray(new String[]{}), partition.getPartShortNames()
305
				.toArray(new String[] {}));
306
		table.constructor_fmin = Fmin;
307
		//System.out.println("time build table lexical "+(System.currentTimeMillis()-time));
308
		return table;
309
	}
310

  
311
	/**
312
	 * Creates the lexical table impl.
313
	 *
314
	 * @param partindex the partindex
315
	 * @param symbol the symbol
316
	 * @return the lexical table
317
	 * @throws RWorkspaceException the r workspace exception
318
	 */
319
	static public LexicalTable createLexicalTableImpl(Index partindex,
320
			String symbol) throws RWorkspaceException {
321
		List<Index> vocabularies = new ArrayList<Index>();
322
		vocabularies.add(partindex);
323
		
324
		LexicalTable lt = createLexicalTableImpl(vocabularies, symbol, false);
325
		lt.setParent(partindex);
326
		return lt;
327
	}
328

  
329
	private static int getNline(File f) {
330
		try {
331
			BufferedReader reader = new BufferedReader(new FileReader(f));
332
			int i = 0;
333

  
334
			while(reader.readLine() != null) i++;
335
			return i;
336
		} catch (Exception e) { return 0; }
337

  
338
	}
339
	
340
//	/**
341
//	 * Creates the lexical table impl.
342
//	 *
343
//	 * @param partindex the partindex
344
//	 * @param symbol the symbol
345
//	 * @return the lexical table
346
//	 * @throws RWorkspaceException the r workspace exception
347
//	 */
348
//	static public LexicalTable createLexicalTableImpl(QueryIndex partindex,
349
//			String symbol) throws RWorkspaceException {
350
//		List<QueryIndex> qindexes = new ArrayList<QueryIndex>();
351
//		qindexes.add(partindex);
352
//
353
//		return createLexicalTableImpl(qindexes, symbol);
354
//	}
355
	
356
//	/**
357
//	 * Creates the lexical table impl.
358
//	 *
359
//	 * @param query indexes 
360
//	 * @param symbol the symbol
361
//	 * @param useAllOccurrences 
362
//	 * @return the lexical table
363
//	 * @throws RWorkspaceException the r workspace exception
364
//	 */
365
//	static public LexicalTable createLexicalTableImpl(
366
//		List<QueryIndex> qindexes, String symbol)
367
//					throws RWorkspaceException {
368
//
369
//		System.out.println(Messages.LexicalTableImpl_1 + qindexes);
370
//		QueryIndex partindex = qindexes.get(0);// FRIGO
371
//		if (!partindex.isComputedWithPartition())
372
//			return null;
373
//
374
//		Partition partition = partindex.getPartition();
375
//		Property property = null;
376
//		try {
377
//			property = partindex.getCorpus().getProperties().get(0);
378
//		} catch (CqiClientException e) {
379
//			// TODO Auto-generated catch block
380
//			org.txm.utils.logger.Log.printStackTrace(e);
381
//		}
382
//
383
//		HashMap<String, QueryIndexLine> alllines = new HashMap<String, QueryIndexLine>();
384
//		// merge lines of all indexes
385
//		for (QueryIndex voc : qindexes) {
386
//			for (QueryIndexLine l : voc.getLines()) {
387
//				alllines.put(l.getName(), l);
388
//			}
389
//		}
390
//
391
//		List<String> colnames = partindex.getPartnames();
392
//
393
//		Collection<QueryIndexLine> lines = alllines.values();
394
//		List<String> rownames = new ArrayList<String>(lines.size());
395
//		for (QueryIndexLine l : lines) {
396
//			rownames.add(l.getName());
397
//		}
398
//
399
//		String[] entries = new String[alllines.size()];
400
//
401
//		int[][] mat = new int[rownames.size()][colnames.size()];
402
//		int[] margins = new int[colnames.size()]; // compute margins
403
//		int i = 0;
404
//		for (QueryIndexLine l : lines) {
405
//			for (int j = 0; j < colnames.size(); j++) {
406
//				mat[i][j] = l.getFrequency(j);
407
//				margins[j] += l.getFrequency(j);
408
//			}
409
//			entries[i++] = l.toString();
410
//		}
411
//
412
//		//System.out.println("mat size : ["+(rownames.size() + extra)+"]["+colnames.size()+"]");
413
//		//System.out.println("rownames size : "+rownames.size());
414
//		//System.out.println("colnames size : "+colnames.size());
415
//		LexicalTableImpl table = new LexicalTableImpl(mat, partition, property,
416
//				rownames.toArray(new String[] {}), colnames
417
//				.toArray(new String[] {}));
418
//		table.constructor_fmin = qindexes.get(0).getFmin();
419
//		return table;
420
//	}
421
	
422

  
423
	static public LexicalTable createLexicalTableImpl(File tsvFile) throws IOException, RWorkspaceException {
424
		
425
		BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(tsvFile) , "UTF-8")); //$NON-NLS-1$
426
		String line = reader.readLine();
427
		String[] split = line.split("\t"); //$NON-NLS-1$
428
		int ncol = split.length;
429
		if (ncol <= 1) {
430
			throw new IOException(Messages.LexicalTableImpl_5+ncol+")"); //$NON-NLS-1$
431
		}
432
		
433
		int nlines = getNline(tsvFile);
434
		String[] forms = new String[nlines];
435
		int[][] freqs = new int[nlines][ncol-1];
436
		
437
		int i = 0;
438
		while(line != null) {
439
			split = line.split("\t"); //$NON-NLS-1$
440
			if (split.length == ncol) {
441
				forms[i] = split[0];
442
				for( int j = 1 ; j < ncol ; j++)
443
					freqs[i][j-1] = Integer.parseInt(split[j]);
444
			} else {
445
				throw new IOException(Messages.LexicalTableImpl_8+i+Messages.LexicalTableImpl_9+ncol+Messages.LexicalTableImpl_10);
446
			}
447
			i++;
448
			line = reader.readLine();
449
		}
450
		
451
		String[] colnames = new String[ncol-1];
452
		for( int j = 0 ; j < ncol - 1 ; j++) colnames[j] = "forms"+(j+1); //$NON-NLS-1$
453
		
454
		LexicalTable lt = LexicalTableImpl.createLexicalTable(freqs, null, forms, colnames, 1);
455
		
456
		return lt;
457
	}
458

  
459
	/**
460
	 * Creates the lexical table impl.
461
	 *
462
	 * @param vocabularies the vocabularies
463
	 * @param symbol the symbol
464
	 * @param useAllOccurrences 
465
	 * @return the lexical table
466
	 * @throws RWorkspaceException the r workspace exception
467
	 */
468
	static public LexicalTable createLexicalTableImpl(
469
			List<Index> vocabularies, String symbol, boolean useAllOccurrences)
470
					throws RWorkspaceException {
471

  
472
		System.out.println(Messages.LexicalTableImpl_1 + vocabularies);
473
		Index partindex = vocabularies.get(0);// FRIGO
474
		if (!partindex.isComputedWithPartition())
475
			return null;
476

  
477
		Partition partition = partindex.getPartition();
478
		Property property = partindex.getProperties().get(0);
479

  
480
		HashMap<String, Line> alllines = new HashMap<String, Line>();
481
		// merge lines of all indexes
482
		for (Index voc : vocabularies) {
483
			for (Line l : voc.getAllLines()) {
484
				if (alllines.containsKey(l.getSignature())) {
485
					Line ll = alllines.get(l.getSignature());
486
					int[] c1 = ll.getFrequencies();
487
					int[] c2 = l.getFrequencies();
488
					for (int i = 0; i < c1.length; i++)
489
						c2[i] += c1[i];
490
					ll.setCounts(c2, 0.0f);
491
				} else
492
					alllines.put(l.toString(), l);
493
			}
494
		}
495

  
496
		List<String> colnames = partindex.getPartnames();
497

  
498
		Collection<Line> lines = alllines.values();
499
		List<String> rownames = new ArrayList<String>(lines.size());
500
		for (Line l : lines) {
501
			rownames.add(l.toString());
502
		}
503
		int extra = 0;
504
		if(useAllOccurrences)
505
			extra = 1;
506

  
507
		String[] entries = new String[alllines.size() + extra];
508

  
509

  
510
		int[][] mat = new int[rownames.size() + extra][colnames.size()];
511
		int[] margins = new int[colnames.size()]; // compute margins
512
		int i = 0;
513
		for (Line l : lines) {
514
			for (int j = 0; j < colnames.size(); j++) {
515
				mat[i][j] = l.getFrequency(j);
516
				margins[j] += l.getFrequency(j);
517
			}
518
			entries[i++] = l.toString();
519
		}
520

  
521
		if (useAllOccurrences) {
522
			try {
523
				int[] partitionSizes = partition.getPartSizes();
524
				int[] reste = new int[partitionSizes.length];
525

  
526
				//System.out.println("margins : "+Arrays.toString(margins));
527
				//System.out.println("partsizes : "+Arrays.toString(partitionSizes));
528

  
529
				for(i = 0 ; i < reste.length ; i++)
530
				{
531
					reste[i] = partitionSizes[i] - margins[i];
532
					if(reste[i] < 0)
533
					{
534
						System.out.println(Messages.LexicalTableImpl_12+i+Messages.LexicalTableImpl_13);
535
						return null;
536
					}
537
					mat[lines.size()][i] = reste[i];
538
				}
539
				entries[lines.size()] = "#RESTE#"; //$NON-NLS-1$
540
				rownames.add("#RESTE#"); //$NON-NLS-1$
541
				//System.out.println("rownames: "+rownames);
542
				//System.out.println("reste : "+Arrays.toString(reste));
543
			} catch (CqiClientException e) {
544
				// TODO Auto-generated catch block
545
				org.txm.utils.logger.Log.printStackTrace(e);
546
			}
547

  
548
		}
549
		//System.out.println("mat size : ["+(rownames.size() + extra)+"]["+colnames.size()+"]");
550
		//System.out.println("rownames size : "+rownames.size());
551
		//System.out.println("colnames size : "+colnames.size());
552
		LexicalTableImpl table = new LexicalTableImpl(mat, partition, property,
553
				rownames.toArray(new String[] {}), colnames
554
				.toArray(new String[] {}));
555
		table.constructor_fmin = vocabularies.get(0).getFmin();
556
		return table;
557
	}
558

  
559
	/**
560
	 * Creates the lexical table.
561
	 *
562
	 * @param freqs the freqs
563
	 * @param prop the prop
564
	 * @param rownames the rownames
565
	 * @param colnames the colnames
566
	 * @param Fmin the fmin
567
	 * @return the lexical table
568
	 * @throws RWorkspaceException the r workspace exception
569
	 */
570
	public static LexicalTable createLexicalTable(int[][] freqs, Property prop,
571
			String[] rownames, String[] colnames, int Fmin) throws RWorkspaceException {
572

  
573
		ArrayList<Integer> idx = new ArrayList<Integer>();
574
		for (int j = 0; j < rownames.length; j++) 
575
		{
576
			int sum = 0;
577
			for (int i = 0; i < colnames.length; i++) 
578
			{
579
				sum += freqs[j][i];
580
			}
581
			if (sum >= Fmin)
582
				idx.add(j);
583
		}
584

  
585
		DoubleMatrix2D mat = DoubleFactory2D.sparse.make(idx.size(),
586
				colnames.length, 0);
587

  
588
		int countline = 0;
589
		for (int j : idx) 
590
		{
591
			for (int i = 0; i < colnames.length; i++) 
592
			{
593
				mat.setQuick(countline, i, freqs[j][i]);
594
			}
595
			countline++;
596
		}
597

  
598
		String[] filteredrownames = new String[idx.size()];
599
		for(int i = 0 ; i < idx.size() ; i++)
600
			filteredrownames[i] = rownames[idx.get(i)];
601

  
602
		LexicalTableImpl table = new LexicalTableImpl(mat, prop, filteredrownames, colnames);
603
		table.constructor_fmin = Fmin;
604
		return table;
605
	}
606

  
607
	/* (non-Javadoc)
608
	 * @see org.txm.stat.data.LexicalTable#getName()
609
	 */
610
	@Override
611
	public String getName() {
612
		if (partition != null)
613
			return property.getName(); 
614
		else
615
			return property.getName();
616
	}
617

  
618
	/* (non-Javadoc)
619
	 * @see org.txm.stat.data.LexicalTable#getCopy()
620
	 */
621
	@Override
622
	/**
623
	 * return a copy of a Lexical table builded on a partition
624
	 */
625
	public LexicalTable getCopy() {
626
		try {
627
			if (partition != null)
628
				return getLexicalTable(this.partition, this.property,this.constructor_fmin);
629
			else
630
				return null;
631
		} catch (Exception e) {
632
			org.txm.utils.logger.Log.printStackTrace(e);
633
		}
634
		return null;
635
	}
636

  
637
	/* (non-Javadoc)
638
	 * @see org.txm.stat.data.LexicalTable#getFmax()
639
	 */
640
	@Override
641
	public int getFmax() {
642
		List<Integer> freqs = getFreqs();
643
		int max = 0;
644
		for (int i : freqs)
645
			if (max < i)
646
				max = i;
647
		return max;
648
	}
649

  
650
	/* (non-Javadoc)
651
	 * @see org.txm.stat.data.LexicalTable#getFmin()
652
	 */
653
	@Override
654
	public int getFmin() {
655
		List<Integer> freqs = getFreqs();
656
		int min = 999999;
657
		for (int i : freqs)
658
			if (min > i)
659
				min = i;
660
		return min;
661
	}
662

  
663
	/* (non-Javadoc)
664
	 * 
665
	 * TODO: = rowmargins ?
666
	 * @see org.txm.stat.data.LexicalTable#getFreqs()
667
	 */
668
	@Override
669
	public List<Integer> getFreqs() {
670
		ArrayList<Integer> freqs = new ArrayList<Integer>();
671
		ArrayList<double[]> cols = new ArrayList<double[]>();
672
		int Nrows = this.getNRows();
673
		int Ncols = this.getNColumns();
674
		for (int i = 0; i < Ncols; i++)
675
			try {
676
				cols.add(this.getCol(i).asDoubleArray());
677
			} catch (RException e) {
678
				// TODO Auto-generated catch block
679
				org.txm.utils.logger.Log.printStackTrace(e);
680
			} catch (RWorkspaceException e) {
681
				// TODO Auto-generated catch block
682
				org.txm.utils.logger.Log.printStackTrace(e);
683
			} catch (StatException e) {
684
				// TODO Auto-generated catch block
685
				org.txm.utils.logger.Log.printStackTrace(e);
686
			}
687
		int sum = 0;
688

  
689
		for (int i = 0; i < Nrows; i++) {
690
			sum = 0;
691
			for (int j = 0; j < Ncols; j++)
692
				sum += (int) cols.get(j)[i];
693
			freqs.add(sum);
694
		}
695
		return freqs;
696
	}
697

  
698
	public void setReference(String refSymbol) {
699
		try {
700
			RWorkspace rw = RWorkspace.getRWorkspaceInstance();
701
			rw.voidEval("missingrownames <- rownames("+symbol+")[!rownames("+symbol+")%in%rownames("+refSymbol+")]"); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
702
			rw.voidEval("missingrows <- matrix(ncol=1, nrow=length(missingrownames))"); //$NON-NLS-1$
703
			rw.voidEval("missingrows[,] <- 0"); //$NON-NLS-1$
704
			rw.voidEval("rownames(missingrows) <- missingrownames"); //$NON-NLS-1$
705
			rw.voidEval(refSymbol+" <- t(t(rbind("+refSymbol+", t(t(missingrows)))))"); //$NON-NLS-1$ //$NON-NLS-2$
706
			rw.voidEval("refLines <- t(t("+refSymbol+"[rownames("+symbol+"),]))"); // only keep the same lines  //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
707
			rw.voidEval("rmargins <-  t(t(margin.table("+symbol+", 1)))"); //  //$NON-NLS-1$ //$NON-NLS-2$
708
			rw.voidEval("refmargin <- margin.table(refLines)"); //$NON-NLS-1$
709
			rw.voidEval(symbol+" <- cbind("+symbol+", abs(refLines - rmargins))"); // use abs if refLine does not contains a line from 'symbol' //$NON-NLS-1$ //$NON-NLS-2$
710
			rw.voidEval("colnames("+symbol+")[length(colnames("+symbol+"))] <- \"##RESTE##\""); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
711
		} catch (Exception e) {
712
			// TODO Auto-generated catch block
713
			org.txm.utils.logger.Log.printStackTrace(e);
714
		}
715
	}
716

  
717
	/* (non-Javadoc)
718
	 * @see org.txm.stat.data.LexicalTable#getConstructorFmin()
719
	 */
720
	@Override
721
	public int getConstructorFmin() {
722
		return constructor_fmin;
723
	}
724

  
725
	/* (non-Javadoc)
726
	 * @see org.txm.stat.data.LexicalTable#getCorpus()
727
	 */
728
	@Override
729
	public Corpus getCorpus() {
730
		if (getPartition() != null)
731
			return getPartition().getCorpus();
732
		else
733
			return this.corpus;
734
	}
735

  
736
	/* (non-Javadoc)
737
	 * @see org.txm.stat.data.LexicalTable#setCorpus(org.txm.searchengine.cqp.corpus.Corpus)
738
	 */
739
	@Override
740
	public void setCorpus(Corpus corpus)
741
	{
742
		this.corpus = corpus;
743
	}
744

  
745
	@Override
746
	public int getRowsCount() {
747
		return this.getNRows();
748
	}
749

  
750
	@Override
751
	public int getColumnsCount() {
752
		return this.getNColumns();
753
	}
754

  
755
	ArrayList<Object> results = new ArrayList<Object>();
756
	@Override
757
	public List<Object> getResults() {
758
		// TODO Auto-generated method stub
759
		return results;
760
	}
761

  
762
	@Override
763
	public boolean removeResult(Object result) {
764
		return results.remove(result);
765
	}
766

  
767
	@Override
768
	public List<HasResults> getSubHasResults() {
769
		return new ArrayList<HasResults>();
770
	}
771

  
772
	@Override
773
	public void storeResult(Object result) {
774
		results.add(result);
775
	}
776
	
777
	/**
778
	 * When the Lexical table is computed with nor partition nor corpus
779
	 * @param i
780
	 */
781
	public void setParent(HasResults i) {
782
		parent = i;
783
	}
784
	
785
	public HasResults getParent() {
786
		if (parent != null) return parent;
787
		if (partition != null) return partition;
788
		else return corpus;
789
	}
790
}
0 791

  
tmp/org.txm.lexicaltable.core/build.properties (revision 143)
1
source.. = src/
2
output.. = bin/
3
bin.includes = META-INF/,\
4
               .
0 5

  
tmp/org.txm.lexicaltable.core/.settings/org.eclipse.jdt.core.prefs (revision 143)
1
eclipse.preferences.version=1
2
org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
3
org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.6
4
org.eclipse.jdt.core.compiler.compliance=1.6
5
org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
6
org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
7
org.eclipse.jdt.core.compiler.source=1.6
0 8

  
tmp/org.txm.lexicaltable.core/.classpath (revision 143)
1
<?xml version="1.0" encoding="UTF-8"?>
2
<classpath>
3
	<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.6"/>
4
	<classpathentry kind="con" path="org.eclipse.pde.core.requiredPlugins"/>
5
	<classpathentry kind="src" path="src"/>
6
	<classpathentry kind="output" path="bin"/>
7
</classpath>
0 8

  
tmp/org.txm.lexicaltable.core/META-INF/MANIFEST.MF (revision 143)
1
Manifest-Version: 1.0
2
Bundle-ManifestVersion: 2
3
Bundle-Name: LexicalTable Core
4
Bundle-SymbolicName: org.txm.lexicaltable.core
5
Bundle-Version: 1.0.0.qualifier
6
Bundle-Activator: org.txm.lexicaltable.core.Activator
7
Require-Bundle: org.txm.core;bundle-version="0.7.0",
8
 org.eclipse.ui,
9
 org.eclipse.core.runtime,
10
 org.txm.r;bundle-version="1.0.0",
11
 org.txm.index.core;bundle-version="1.0.0"
12
Bundle-RequiredExecutionEnvironment: JavaSE-1.6
13
Bundle-ActivationPolicy: lazy
14
Export-Package: org.txm.functions.intertextualdistance,
15
 org.txm.lexicaltable.core,
16
 org.txm.lexicaltable.functions,
17
 org.txm.stat.data,
18
 org.txm.stat.engine.r.data
0 19

  
tmp/org.txm.lexicaltable.core/.project (revision 143)
1
<?xml version="1.0" encoding="UTF-8"?>
2
<projectDescription>
3
	<name>org.txm.lexicaltable.core</name>
4
	<comment></comment>
5
	<projects>
6
	</projects>
7
	<buildSpec>
8
		<buildCommand>
9
			<name>org.eclipse.jdt.core.javabuilder</name>
10
			<arguments>
11
			</arguments>
12
		</buildCommand>
13
		<buildCommand>
14
			<name>org.eclipse.pde.ManifestBuilder</name>
15
			<arguments>
16
			</arguments>
17
		</buildCommand>
18
		<buildCommand>
19
			<name>org.eclipse.pde.SchemaBuilder</name>
20
			<arguments>
21
			</arguments>
22
		</buildCommand>
23
	</buildSpec>
24
	<natures>
25
		<nature>org.eclipse.pde.PluginNature</nature>
26
		<nature>org.eclipse.jdt.core.javanature</nature>
27
	</natures>
28
</projectDescription>
0 29

  

Also available in: Unified diff