Revision 479 tmp/org.txm.groovy.core/src/groovy/org/txm/test/CQPBenchmark.groovy

CQPBenchmark.groovy (revision 479)
1
//package org.txm.test
1
package org.txm.test
2

  
3

  
4
/**
5
 * Main.
6
 *
7
 * @param args the args
8
 */
9
// Copyright © 2010-2013 ENS de Lyon.
10
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
11
// Lyon 2, University of Franche-Comté, University of Nice
12
// Sophia Antipolis, University of Paris 3.
13
// 
14
// The TXM platform is free software: you can redistribute it
15
// and/or modify it under the terms of the GNU General Public
16
// License as published by the Free Software Foundation,
17
// either version 2 of the License, or (at your option) any
18
// later version.
19
// 
20
// The TXM platform is distributed in the hope that it will be
21
// useful, but WITHOUT ANY WARRANTY; without even the implied
22
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
23
// PURPOSE. See the GNU General Public License for more
24
// details.
25
// 
26
// You should have received a copy of the GNU General
27
// Public License along with the TXM platform. If not, see
28
// http://www.gnu.org/licenses.
2 29
//
3
//import org.txm.*
4
//import org.txm.functions.*
5
////import org.txm.functions.ca.*
6
////import org.txm.functions.classification.CAH
7
//import org.txm.concordance.core.functions.*
8
//import org.txm.concordance.core.functions.comparators.*
9
//import org.txm.functions.cooccurrences.*
10
//import org.txm.functions.diagnostic.*
11
//import org.txm.functions.index.*
12
//import org.txm.functions.referencer.Referencer
13
//import org.txm.functions.specificities.*
14
//import org.txm.searchengine.cqp.corpus.*
15
//import org.txm.searchengine.cqp.corpus.query.*
16
//import org.txm.stat.data.LexicalTable
17
//import org.txm.utils.DeleteDir
18 30
//
19 31
//
20
//testDir = new File(System.getProperty("user.home"), "TXM/testrelease");
21
//DeleteDir.deleteDirectory testDir;
22
//testDir.mkdir()
32
// $LastChangedDate: 2011-11-25 11:30:11 +0100 (ven., 25 nov. 2011) $
33
// $LastChangedRevision: 2069 $
34
// $LastChangedBy: mdecorde $
23 35
//
24
////get corpus
25
//csv = [];
26
//firstExecution = 0;
27
//
28
//def process(String CORPUSNAME, String QUERY1, String QUERY2) {
29
//	
30
//	String ENCODING = "UTF-8"
31
//	String COLSEPARATOR = "\t"
32
//	String TXTSEPARATOR = ""
33
//	int i = 0;
34
//	def corpora = CorpusManager.getCorpusManager().getCorpora()
35
//	MainCorpus corpus = CorpusManager.getCorpusManager().getCorpus(CORPUSNAME)
36
//	//println "CORPUS: "+corpus
37
//
38
//	Query query = new Query(QUERY1) //"\"..............*\"", "\"..........*\""
39
//	Query query2 = new Query(QUERY2)
40
//
41
//	File exporttestdir = new File(testDir, corpus.getName());
42
//	File reportFile = new File(exporttestdir, "report.csv")
43
//	DeleteDir.deleteDirectory exporttestdir;
44
//	exporttestdir.mkdir()
45
//	//println "Results are saved in dir: "+ exporttestdir
46
//
47
//	// word properties
48
//	def word_property = corpus.getProperty("word")
49
//
50
//	// structure properties
51
//	StructuralUnit text_su = corpus.getStructuralUnit("text")
52
//	StructuralUnit s_su = corpus.getStructuralUnit("s")
53
//	Property text_id_property = text_su.getProperty("id")
54
//	ReferencePattern referencePattern = new ReferencePattern().addProperty(text_id_property)
55
//
56
//	long time;
57
//	// START START START START
58
//	if (firstExecution == 0)
59
//		csv << ["object", "size", "nPart", "command", "query", "query freq", "$CORPUSNAME $QUERY1 $QUERY2 mode "+Toolbox.getParam(Toolbox.CQI_NETWORK_MODE)]
60
//	else
61
//		csv[i++] << "$CORPUSNAME $QUERY1 $QUERY2 mode "+Toolbox.getParam(Toolbox.CQI_NETWORK_MODE)
62
//	
63
//	// INFORMATIONS
64
//	print " INFO"
36

  
37

  
38
import java.util.ArrayList;
39

  
40
import org.txm.utils.DeleteDir;
41
import org.txm.*;
42
import org.txm.lexicaltable.core.functions.LexicalTable
43
import org.txm.objects.Base;
44
import org.txm.ca.core.functions.CA
45
import org.txm.cah.core.functions.CAH
46
import org.txm.concordance.core.functions.Concordance
47
import org.txm.cooccurrence.core.functions.Cooccurrence
48
import org.txm.functions.*;
49
import org.txm.functions.diagnostic.*;
50
import org.txm.functions.ca.*;
51
import org.txm.functions.index.*;
52
import org.txm.functions.referencer.Referencer;
53
import org.txm.functions.specificities.*;
54
import org.txm.functions.concordances.*;
55
import org.txm.functions.concordances.comparators.*;
56
import org.txm.searchengine.cqp.ReferencePattern
57
import org.txm.searchengine.cqp.corpus.*;
58
import org.txm.searchengine.cqp.corpus.query.*;
59

  
60

  
61
testDir = new File(System.getProperty("user.home"), "TXM/testrelease");
62
DeleteDir.deleteDirectory testDir;
63
testDir.mkdir()
64

  
65
//get corpus
66
csv = [];
67
firstExecution = 0;
68

  
69
def process(String CORPUSNAME, String QUERY1, String QUERY2) {
70
	
71
	String ENCODING = "UTF-8"
72
	String COLSEPARATOR = "\t"
73
	String TXTSEPARATOR = ""
74
	int i = 0;
75
	def corpora = CorpusManager.getCorpusManager().getCorpora()
76
	MainCorpus corpus = CorpusManager.getCorpusManager().getCorpus(CORPUSNAME)
77
	//println "CORPUS: "+corpus
78

  
79
	Query query = new Query(QUERY1) //"\"..............*\"", "\"..........*\""
80
	Query query2 = new Query(QUERY2)
81

  
82
	File exporttestdir = new File(testDir, corpus.getName());
83
	File reportFile = new File(exporttestdir, "report.csv")
84
	DeleteDir.deleteDirectory exporttestdir;
85
	exporttestdir.mkdir()
86
	//println "Results are saved in dir: "+ exporttestdir
87

  
88
	// word properties
89
	def word_property = corpus.getProperty("word")
90

  
91
	// structure properties
92
	StructuralUnit text_su = corpus.getStructuralUnit("text")
93
	StructuralUnit s_su = corpus.getStructuralUnit("s")
94
	Property text_id_property = text_su.getProperty("id")
95
	ReferencePattern referencePattern = new ReferencePattern().addProperty(text_id_property)
96

  
97
	long time;
98
	// START START START START
99
	if (firstExecution == 0)
100
		csv << ["object", "size", "nPart", "command", "query", "query freq", "$CORPUSNAME $QUERY1 $QUERY2 mode "+Toolbox.getParam(Toolbox.CQI_NETWORK_MODE)]
101
	else
102
		csv[i++] << "$CORPUSNAME $QUERY1 $QUERY2 mode "+Toolbox.getParam(Toolbox.CQI_NETWORK_MODE)
103
	
104
	// INFORMATIONS
105
	print " INFO"
106
	time = System.currentTimeMillis();
107
	Diagnostic diag = new Diagnostic(corpus, 20)
108
	diag.stepGeneralInfos();
109
	diag.stepLexicalProperties();
110
	diag.stepStructuralUnits();
111
	diag.toHTML(new File(exporttestdir, "diag"))
112
	if (firstExecution == 0)
113
		csv << [corpus.getName(), corpus.getSize(), 1, "Informations", "no query", "no freq", (System.currentTimeMillis()-time)/1000]
114
	else
115
		csv[i++] << (System.currentTimeMillis()-time)/1000
116

  
117
	// LEXICON
118
	print " LEX"
119
	time = System.currentTimeMillis();
120
	corpus.getLexicon(word_property).toTxt(new File(exporttestdir, "lexpos"), ENCODING, COLSEPARATOR, TXTSEPARATOR);
121
	if (firstExecution == 0)
122
		csv << [corpus.getName(), corpus.getSize(), 1, "Lexicon", "no query", "no freq", (System.currentTimeMillis()-time)/1000]
123
	else
124
		csv[i++] << (System.currentTimeMillis()-time)/1000
125
		
126
	// INDEX
127
	print " INDEX"
128
	time = System.currentTimeMillis();
129
	IndexSample index = new IndexSample(corpus, query, [word_property])
130
	index.toTxt(new File(exporttestdir, "indexlemmafuncj"), ENCODING, COLSEPARATOR, TXTSEPARATOR)
131
	if (firstExecution == 0)
132
		csv << [corpus.getName(), corpus.getSize(), 1, "Index", query, index.getT(), (System.currentTimeMillis()-time)/1000]
133
	else
134
		csv[i++] << (System.currentTimeMillis()-time)/1000
135

  
136
	// REFERENCER
137
	print " REF"
138
	time = System.currentTimeMillis();
139
	Referencer referencer = new Referencer(corpus, query, word_property, [text_id_property], true);
140
	referencer.getQueryMatches()
141
	referencer.getQueryindexes()
142
	referencer.groupPositionsbyId()
143
	referencer.toTxt(new File(exporttestdir, "referencer"), ENCODING)
144
	if (firstExecution == 0)
145
		csv << [corpus.getName(), corpus.getSize(), 1, "Referencer", query, index.getT(), (System.currentTimeMillis()-time)/1000]
146
	else
147
		csv[i++] << (System.currentTimeMillis()-time)/1000
148

  
149
	// CONCORDANCE
150
	print " CONC"
151
	time = System.currentTimeMillis();
152
	Concordance concordance = new Concordance(corpus, query, word_property, [word_property, word_property], referencePattern, referencePattern, 15, 15)
153
	concordance.toTxt(new File(exporttestdir,"concj"), Concordance.Format.CONCORDANCE)
154
	if (firstExecution == 0)
155
		csv << [corpus.getName(), corpus.getSize(), 1, "Concordances", query, index.getT(), (System.currentTimeMillis()-time)/1000]
156
	else
157
		csv[i++] << (System.currentTimeMillis()-time)/1000
158

  
159
	// COOCCURRENCE WORD WINDOW
160
	print " COOC"
161
	time = System.currentTimeMillis();
162
	Cooccurrence cooc = new Cooccurrence(corpus, query, [word_property], null, 21, 1, 1, 11, 2, 3, 1, false);
163
	cooc.process();
164
	cooc.toTxt(new File(exporttestdir, "cooc_wordwindow"), ENCODING)
165
	if (firstExecution == 0)
166
		csv << [corpus.getName(), corpus.getSize(), 1, "Cooccurrences words", query, index.getT(), (System.currentTimeMillis()-time)/1000]
167
	else
168
		csv[i++] << (System.currentTimeMillis()-time)/1000
169

  
170
	// COOCCURRENCE SENTENCE WINDOW
171
	print " COOC"
172
	time = System.currentTimeMillis();
173
	Cooccurrence cooc2 = new Cooccurrence(corpus, query, [word_property], s_su,2, 1, 1, 1, 2, 3,1, false);
174
	cooc2.process();
175
	cooc2.toTxt(new File(exporttestdir, "cooc_swindow"), ENCODING)
176
	if (firstExecution == 0)
177
		csv << [corpus.getName(), corpus.getSize(), 1, "Cooccurrences structures", query, index.getT(), (System.currentTimeMillis()-time)/1000]
178
	else
179
		csv[i++] << (System.currentTimeMillis()-time)/1000
180

  
181
	// INDEX
182
	print " INDEX"
183
	time = System.currentTimeMillis();
184
	index = new IndexSample(corpus, query2, [word_property])
185
	index.toTxt(new File(exporttestdir, "indexlemmafuncj"), ENCODING, COLSEPARATOR, TXTSEPARATOR)
186
	if (firstExecution == 0)
187
		csv << [corpus.getName(), corpus.getSize(), 1, "Index", query2, index.getT(), (System.currentTimeMillis()-time)/1000]
188
	else
189
		csv[i++] << (System.currentTimeMillis()-time)/1000
190

  
191
	// REFERENCER
192
	print " REF"
193
	time = System.currentTimeMillis();
194
	referencer = new Referencer(corpus, query2, word_property, [text_id_property], true);
195
	referencer.getQueryMatches()
196
	referencer.getQueryindexes()
197
	referencer.groupPositionsbyId()
198
	referencer.toTxt(new File(exporttestdir, "referencer"), ENCODING)
199
	if (firstExecution == 0)
200
		csv << [corpus.getName(), corpus.getSize(), 1, "Referencer", query, index.getT(), (System.currentTimeMillis()-time)/1000]
201
	else
202
		csv[i++] << (System.currentTimeMillis()-time)/1000
203

  
204
	// CONCORDANCE
205
	print " CONC"
206
	time = System.currentTimeMillis();
207
	concordance = new Concordance(corpus, query2, word_property, [word_property, word_property], referencePattern, referencePattern, 15, 15)
208
	concordance.toTxt(new File(exporttestdir,"concj"), Concordance.Format.CONCORDANCE)
209
	if (firstExecution == 0)
210
		csv << [corpus.getName(), corpus.getSize(), 1, "Concordances", query, index.getT(), (System.currentTimeMillis()-time)/1000]
211
	else
212
		csv[i++] << (System.currentTimeMillis()-time)/1000
213

  
214
	// COOCCURRENCE WORD WINDOW
215
	print " COOC"
216
	time = System.currentTimeMillis();
217
	cooc = new Cooccurrence(corpus, query2, [word_property], null, 21, 1, 1, 11, 2, 3, 1, false);
218
	cooc.process();
219
	cooc.toTxt(new File(exporttestdir, "cooc_wordwindow"), ENCODING)
220
	if (firstExecution == 0)
221
		csv << [corpus.getName(), corpus.getSize(), 1, "Cooccurrences words", query, index.getT(), (System.currentTimeMillis()-time)/1000]
222
	else
223
		csv[i++] << (System.currentTimeMillis()-time)/1000
224

  
225
	// COOCCURRENCE SENTENCE WINDOW
226
	print " COOC"
227
	time = System.currentTimeMillis();
228
	cooc2 = new Cooccurrence(corpus, query2, [word_property], s_su,2, 1, 1, 1, 2, 3,1, false);
229
	cooc2.process();
230
	cooc2.toTxt(new File(exporttestdir, "cooc_swindow"), ENCODING)
231
	if (firstExecution == 0)
232
		csv << [corpus.getName(), corpus.getSize(), 1, "Cooccurrences structures", query, index.getT(), (System.currentTimeMillis()-time)/1000]
233
	else
234
		csv[i++] << (System.currentTimeMillis()-time)/1000
235

  
236
	// SUBCORPORA
237
	print " SUBCORPUS"
238
	time = System.currentTimeMillis();
239
	Corpus DGcorpus = corpus.createSubcorpus(text_su, text_id_property, "01_DeGaulle", "dgsubcorpus")
240
	if (firstExecution == 0)
241
		csv << [corpus.getName(), corpus.getSize(), 1, "Subcorpus", "no query", "no freqs", (System.currentTimeMillis()-time)/1000]
242
	else
243
		csv[i++] << (System.currentTimeMillis()-time)/1000
244

  
245
	// PARTITIONS
246
	print " PARTITIONS"
247
	time = System.currentTimeMillis();
248
	Partition discours_types = corpus.createPartition(text_su, text_id_property)
249
	Partition discours_dates = corpus.createPartition(text_su, text_id_property)
250
	if (firstExecution == 0)
251
		csv << [corpus.getName(), corpus.getSize(), 1, "Partition 2x", "no query", "no freqs", (System.currentTimeMillis()-time)/1000]
252
	else
253
		csv[i++] << (System.currentTimeMillis()-time)/1000
254

  
255
	// LEXICAL TABLE
256
	print " LT"
257
	time = System.currentTimeMillis();
258
	LexicalTable table = discours_types.getLexicalTable(word_property, 2);
259
	table.exportData(new File(exporttestdir, "type_LT"), COLSEPARATOR, TXTSEPARATOR);
260
	if (firstExecution == 0)
261
		csv << [discours_types.getName(), corpus.getSize(), discours_types.getNPart(), "LT part", "no query", "no freqs", (System.currentTimeMillis()-time)/1000]
262
	else
263
		csv[i++] << (System.currentTimeMillis()-time)/1000
264

  
265
	// SPECIF SUBCORPUS
65 266
//	time = System.currentTimeMillis();
66
//	Diagnostic diag = new Diagnostic(corpus, 20)
67
//	diag.stepGeneralInfos();
68
//	diag.stepLexicalProperties();
69
//	diag.stepStructuralUnits();
70
//	diag.toHTML(new File(exporttestdir, "diag"))
267
//	print " SPECIF"
268
//	SpecificitesResult specifresult2 = org.txm.functions.specificities.Specificites.specificites(DGcorpus.getParent(), DGcorpus, word_property)
269
//	specifresult2.toTxt(new File(exporttestdir,"dgsub_specifloc"), ENCODING, COLSEPARATOR, TXTSEPARATOR)
71 270
//	if (firstExecution == 0)
72
//		csv << [corpus.getName(), corpus.getSize(), 1, "Informations", "no query", "no freq", (System.currentTimeMillis()-time)/1000]
271
//		csv << [DGcorpus.getName(), corpus.getSize(), 1, "Specif sub", "no query", "no freqs", (System.currentTimeMillis()-time)/1000]
73 272
//	else
74 273
//		csv[i++] << (System.currentTimeMillis()-time)/1000
75 274
//
76
//	// LEXICON
77
//	print " LEX"
275
//	// SPECIF LEXICAL TABLE
276
//	print " SPECIF"
78 277
//	time = System.currentTimeMillis();
79
//	corpus.getLexicon(word_property).toTxt(new File(exporttestdir, "lexpos"), ENCODING, COLSEPARATOR, TXTSEPARATOR);
278
//	SpecificitesResult specifresult3 = org.txm.functions.specificities.Specificites.specificites(table);
279
//	specifresult3.toTxt(new File(exporttestdir,"speciftype"), ENCODING, COLSEPARATOR, TXTSEPARATOR)
80 280
//	if (firstExecution == 0)
81
//		csv << [corpus.getName(), corpus.getSize(), 1, "Lexicon", "no query", "no freq", (System.currentTimeMillis()-time)/1000]
281
//		csv << [table.getName(), corpus.getSize(), table.getNColumns(), "specif LT", "no query", "no freqs", (System.currentTimeMillis()-time)/1000]
82 282
//	else
83 283
//		csv[i++] << (System.currentTimeMillis()-time)/1000
84
//		
85
//	// INDEX
86
//	print " INDEX"
87
//	time = System.currentTimeMillis();
88
//	IndexSample index = new IndexSample(corpus, query, [word_property])
89
//	index.toTxt(new File(exporttestdir, "indexlemmafuncj"), ENCODING, COLSEPARATOR, TXTSEPARATOR)
90
//	if (firstExecution == 0)
91
//		csv << [corpus.getName(), corpus.getSize(), 1, "Index", query, index.getT(), (System.currentTimeMillis()-time)/1000]
92
//	else
93
//		csv[i++] << (System.currentTimeMillis()-time)/1000
94
//
95
//	// REFERENCER
96
//	print " REF"
97
//	time = System.currentTimeMillis();
98
//	Referencer referencer = new Referencer(corpus, query, word_property, [text_id_property], true);
99
//	referencer.getQueryMatches()
100
//	referencer.getQueryindexes()
101
//	referencer.groupPositionsbyId()
102
//	referencer.toTxt(new File(exporttestdir, "referencer"), ENCODING)
103
//	if (firstExecution == 0)
104
//		csv << [corpus.getName(), corpus.getSize(), 1, "Referencer", query, index.getT(), (System.currentTimeMillis()-time)/1000]
105
//	else
106
//		csv[i++] << (System.currentTimeMillis()-time)/1000
107
//
108
//	// CONCORDANCE
109
//	print " CONC"
110
//	time = System.currentTimeMillis();
111
//	Concordance concordance = new Concordance(corpus, query, word_property, [word_property, word_property], referencePattern, referencePattern, 15, 15)
112
//	concordance.toTxt(new File(exporttestdir,"concj"), Concordance.Format.CONCORDANCE)
113
//	if (firstExecution == 0)
114
//		csv << [corpus.getName(), corpus.getSize(), 1, "Concordances", query, index.getT(), (System.currentTimeMillis()-time)/1000]
115
//	else
116
//		csv[i++] << (System.currentTimeMillis()-time)/1000
117
//
118
//	// COOCCURRENCE WORD WINDOW
119
//	print " COOC"
120
//	time = System.currentTimeMillis();
121
//	Cooccurrence cooc = new Cooccurrence(corpus, query, [word_property], null, 21, 1, 1, 11, 2, 3, 1, false);
122
//	cooc.process();
123
//	cooc.toTxt(new File(exporttestdir, "cooc_wordwindow"), ENCODING)
124
//	if (firstExecution == 0)
125
//		csv << [corpus.getName(), corpus.getSize(), 1, "Cooccurrences words", query, index.getT(), (System.currentTimeMillis()-time)/1000]
126
//	else
127
//		csv[i++] << (System.currentTimeMillis()-time)/1000
128
//
129
//	// COOCCURRENCE SENTENCE WINDOW
130
//	print " COOC"
131
//	time = System.currentTimeMillis();
132
//	Cooccurrence cooc2 = new Cooccurrence(corpus, query, [word_property], s_su,2, 1, 1, 1, 2, 3,1, false);
133
//	cooc2.process();
134
//	cooc2.toTxt(new File(exporttestdir, "cooc_swindow"), ENCODING)
135
//	if (firstExecution == 0)
136
//		csv << [corpus.getName(), corpus.getSize(), 1, "Cooccurrences structures", query, index.getT(), (System.currentTimeMillis()-time)/1000]
137
//	else
138
//		csv[i++] << (System.currentTimeMillis()-time)/1000
139
//
140
//	// INDEX
141
//	print " INDEX"
142
//	time = System.currentTimeMillis();
143
//	index = new IndexSample(corpus, query2, [word_property])
144
//	index.toTxt(new File(exporttestdir, "indexlemmafuncj"), ENCODING, COLSEPARATOR, TXTSEPARATOR)
145
//	if (firstExecution == 0)
146
//		csv << [corpus.getName(), corpus.getSize(), 1, "Index", query2, index.getT(), (System.currentTimeMillis()-time)/1000]
147
//	else
148
//		csv[i++] << (System.currentTimeMillis()-time)/1000
149
//
150
//	// REFERENCER
151
//	print " REF"
152
//	time = System.currentTimeMillis();
153
//	referencer = new Referencer(corpus, query2, word_property, [text_id_property], true);
154
//	referencer.getQueryMatches()
155
//	referencer.getQueryindexes()
156
//	referencer.groupPositionsbyId()
157
//	referencer.toTxt(new File(exporttestdir, "referencer"), ENCODING)
158
//	if (firstExecution == 0)
159
//		csv << [corpus.getName(), corpus.getSize(), 1, "Referencer", query, index.getT(), (System.currentTimeMillis()-time)/1000]
160
//	else
161
//		csv[i++] << (System.currentTimeMillis()-time)/1000
162
//
163
//	// CONCORDANCE
164
//	print " CONC"
165
//	time = System.currentTimeMillis();
166
//	concordance = new Concordance(corpus, query2, word_property, [word_property, word_property], referencePattern, referencePattern, 15, 15)
167
//	concordance.toTxt(new File(exporttestdir,"concj"), Concordance.Format.CONCORDANCE)
168
//	if (firstExecution == 0)
169
//		csv << [corpus.getName(), corpus.getSize(), 1, "Concordances", query, index.getT(), (System.currentTimeMillis()-time)/1000]
170
//	else
171
//		csv[i++] << (System.currentTimeMillis()-time)/1000
172
//
173
//	// COOCCURRENCE WORD WINDOW
174
//	print " COOC"
175
//	time = System.currentTimeMillis();
176
//	cooc = new Cooccurrence(corpus, query2, [word_property], null, 21, 1, 1, 11, 2, 3, 1, false);
177
//	cooc.process();
178
//	cooc.toTxt(new File(exporttestdir, "cooc_wordwindow"), ENCODING)
179
//	if (firstExecution == 0)
180
//		csv << [corpus.getName(), corpus.getSize(), 1, "Cooccurrences words", query, index.getT(), (System.currentTimeMillis()-time)/1000]
181
//	else
182
//		csv[i++] << (System.currentTimeMillis()-time)/1000
183
//
184
//	// COOCCURRENCE SENTENCE WINDOW
185
//	print " COOC"
186
//	time = System.currentTimeMillis();
187
//	cooc2 = new Cooccurrence(corpus, query2, [word_property], s_su,2, 1, 1, 1, 2, 3,1, false);
188
//	cooc2.process();
189
//	cooc2.toTxt(new File(exporttestdir, "cooc_swindow"), ENCODING)
190
//	if (firstExecution == 0)
191
//		csv << [corpus.getName(), corpus.getSize(), 1, "Cooccurrences structures", query, index.getT(), (System.currentTimeMillis()-time)/1000]
192
//	else
193
//		csv[i++] << (System.currentTimeMillis()-time)/1000
194
//
195
//	// SUBCORPORA
196
//	print " SUBCORPUS"
197
//	time = System.currentTimeMillis();
198
//	Corpus DGcorpus = corpus.createSubcorpus(text_su, text_id_property, "01_DeGaulle", "dgsubcorpus")
199
//	if (firstExecution == 0)
200
//		csv << [corpus.getName(), corpus.getSize(), 1, "Subcorpus", "no query", "no freqs", (System.currentTimeMillis()-time)/1000]
201
//	else
202
//		csv[i++] << (System.currentTimeMillis()-time)/1000
203
//
204
//	// PARTITIONS
205
//	print " PARTITIONS"
206
//	time = System.currentTimeMillis();
207
//	Partition discours_types = corpus.createPartition(text_su, text_id_property)
208
//	Partition discours_dates = corpus.createPartition(text_su, text_id_property)
209
//	if (firstExecution == 0)
210
//		csv << [corpus.getName(), corpus.getSize(), 1, "Partition 2x", "no query", "no freqs", (System.currentTimeMillis()-time)/1000]
211
//	else
212
//		csv[i++] << (System.currentTimeMillis()-time)/1000
213
//
214
//	// LEXICAL TABLE
215
//	print " LT"
216
//	time = System.currentTimeMillis();
217
//	LexicalTable table = discours_types.getLexicalTable(word_property, 2);
218
//	table.exportData(new File(exporttestdir, "type_LT"), COLSEPARATOR, TXTSEPARATOR);
219
//	if (firstExecution == 0)
220
//		csv << [discours_types.getName(), corpus.getSize(), discours_types.getNPart(), "LT part", "no query", "no freqs", (System.currentTimeMillis()-time)/1000]
221
//	else
222
//		csv[i++] << (System.currentTimeMillis()-time)/1000
223
//
224
////	// SPECIF SUBCORPUS
225
////	time = System.currentTimeMillis();
226
////	print " SPECIF"
227
////	SpecificitesResult specifresult2 = org.txm.functions.specificities.Specificites.specificites(DGcorpus.getParent(), DGcorpus, word_property)
228
////	specifresult2.toTxt(new File(exporttestdir,"dgsub_specifloc"), ENCODING, COLSEPARATOR, TXTSEPARATOR)
229
////	if (firstExecution == 0)
230
////		csv << [DGcorpus.getName(), corpus.getSize(), 1, "Specif sub", "no query", "no freqs", (System.currentTimeMillis()-time)/1000]
231
////	else
232
////		csv[i++] << (System.currentTimeMillis()-time)/1000
233
////
234
////	// SPECIF LEXICAL TABLE
235
////	print " SPECIF"
236
////	time = System.currentTimeMillis();
237
////	SpecificitesResult specifresult3 = org.txm.functions.specificities.Specificites.specificites(table);
238
////	specifresult3.toTxt(new File(exporttestdir,"speciftype"), ENCODING, COLSEPARATOR, TXTSEPARATOR)
239
////	if (firstExecution == 0)
240
////		csv << [table.getName(), corpus.getSize(), table.getNColumns(), "specif LT", "no query", "no freqs", (System.currentTimeMillis()-time)/1000]
241
////	else
242
////		csv[i++] << (System.currentTimeMillis()-time)/1000
243
////
244
////	// AFC PARTITION
245
////	print " AFC"
246
////	time = System.currentTimeMillis();
247
////	CA ca = new CA(discours_dates, word_property, 0 ,9999999)
248
////	ca.stepLexicalTable();
249
////	ca.stepSortTableLexical();
250
////	ca.stepCompute()
251
////	ca.toSVGFactorialMap(new File(exporttestdir,"cadates"), true, true)
252
////	ca.toSVGSingularValues(new File(exporttestdir,"cadates_singularvalues"))
253
////	if (firstExecution == 0)
254
////		csv << [discours_dates.getName(), corpus.getSize(), discours_dates.getNPart(), "AFC part", "no query", "no freqs", (System.currentTimeMillis()-time)/1000]
255
////	else
256
////		csv[i++] << (System.currentTimeMillis()-time)/1000
257
////
258
////	// AFC LEXICAL TABLE
259
////	print " AFC"
260
////	time = System.currentTimeMillis();
261
////	CA ca2 = new CA(table);
262
////	ca2.stepCompute()
263
////	ca2.toSVGFactorialMap(new File(exporttestdir,"cadates"), true, true)
264
////	ca2.toSVGSingularValues(new File(exporttestdir,"cadates_singularvalues"))
265
////	if (firstExecution == 0)
266
////		csv << [table.getName(), corpus.getSize(), table.getNColumns(), "specif LT", "no query", "no freqs", (System.currentTimeMillis()-time)/1000]
267
////	else
268
////		csv[i++] << (System.currentTimeMillis()-time)/1000
269
////
270
////	// CAH
271
////	print " CAH"
272
////	time = System.currentTimeMillis();
273
////	CAH cah = new CAH(ca, true, CAH.getDefaultMethod(), CAH.getDefaultMetric(), 4, false)
274
////	cah.stepCompute()
275
////	cah.toSVG(new File(exporttestdir, "dates_cah"),__RDevice.SVG);
276
////	if (firstExecution == 0)
277
////		csv << [discours_dates.getName(), corpus.getSize(), discours_dates.getNPart(), "CAH ca table", "no query", "no freqs", (System.currentTimeMillis()-time)/1000]
278
////	else
279
////		csv[i++] << (System.currentTimeMillis()-time)/1000
280
////
281
////	firstExecution++;
282
////	println ""
283
//
284
//}
285
//
286
//
287
//
288
//println "restarting TBX..."
289
//Toolbox.setParam(Toolbox.CQI_NETWORK_MODE, "false")
290
//println "MEMORY MODE: "+Toolbox.restart();
291
//process("DISCOURS", "\"..............*\"", "\"..........*\"") // first time
292
//println "MEMORY MODE: "+Toolbox.restart();
293
//process("DISCOURS", "\"..............*\"", "\"..........*\"")
294
//
295
//println "restarting TBX..."
296
//Toolbox.setParam(Toolbox.CQI_NETWORK_MODE, "true")
297
//println "NETWORK MODE: "+Toolbox.restart();
298
//process("DISCOURS", "\"..............*\"", "\"..........*\"")
299
//println "NETWORK MODE: "+Toolbox.restart();
300
//process("DISCOURS", "\"..............*\"", "\"..........*\"")
301
//
302
//
303
//
304
//
305
//// add TOTAL line
306
//int[] totaux = ["","","","","","",""]
307
//for (int i = 7 ; i < csv.size() ; i++) {
308
//	int total = 0;
309
//	for (def line : csv) {
310
//		total += line[i]
311
//	}
312
//	totaux << total
313
//}
314
//csv << totaux;
315
//
316
//// WRITE ALL RESULTS
317
//for (def line : csv) {
318
//	for (int i = 0 ; i < firstExecution ; i++) {
319
//		def item = line[i]
320
//		if (i > 0) print "\t"+item
321
//		else print item
322
//	}
323
//	println ""
324
//}
284

  
285
	// AFC PARTITION
286
	print " AFC"
287
	time = System.currentTimeMillis();
288
	CA ca = new CA(discours_dates, word_property, 0 ,9999999)
289
	ca.stepLexicalTable();
290
	ca.stepSortTableLexical();
291
	ca.stepCompute()
292
	ca.toSVGFactorialMap(new File(exporttestdir,"cadates"), true, true)
293
	ca.toSVGSingularValues(new File(exporttestdir,"cadates_singularvalues"))
294
	if (firstExecution == 0)
295
		csv << [discours_dates.getName(), corpus.getSize(), discours_dates.getNPart(), "AFC part", "no query", "no freqs", (System.currentTimeMillis()-time)/1000]
296
	else
297
		csv[i++] << (System.currentTimeMillis()-time)/1000
298

  
299
	// AFC LEXICAL TABLE
300
	print " AFC"
301
	time = System.currentTimeMillis();
302
	CA ca2 = new CA(table);
303
	ca2.stepCompute()
304
	ca2.toSVGFactorialMap(new File(exporttestdir,"cadates"), true, true)
305
	ca2.toSVGSingularValues(new File(exporttestdir,"cadates_singularvalues"))
306
	if (firstExecution == 0)
307
		csv << [table.getName(), corpus.getSize(), table.getNColumns(), "specif LT", "no query", "no freqs", (System.currentTimeMillis()-time)/1000]
308
	else
309
		csv[i++] << (System.currentTimeMillis()-time)/1000
310

  
311
	// CAH
312
	print " CAH"
313
	time = System.currentTimeMillis();
314
	CAH cah = new CAH(ca, true, CAH.getDefaultMethod(), CAH.getDefaultMetric(), 4, false)
315
	cah.stepCompute()
316
	cah.toSVG(new File(exporttestdir, "dates_cah"),RDevice.SVG);
317
	if (firstExecution == 0)
318
		csv << [discours_dates.getName(), corpus.getSize(), discours_dates.getNPart(), "CAH ca table", "no query", "no freqs", (System.currentTimeMillis()-time)/1000]
319
	else
320
		csv[i++] << (System.currentTimeMillis()-time)/1000
321

  
322
	firstExecution++;
323
	println ""
324

  
325
}
326

  
327

  
328

  
329
println "restarting TBX..."
330
Toolbox.setParam(Toolbox.CQI_NETWORK_MODE, "false")
331
println "MEMORY MODE: "+Toolbox.restart();
332
process("DISCOURS", "\"..............*\"", "\"..........*\"") // first time
333
println "MEMORY MODE: "+Toolbox.restart();
334
process("DISCOURS", "\"..............*\"", "\"..........*\"")
335

  
336
println "restarting TBX..."
337
Toolbox.setParam(Toolbox.CQI_NETWORK_MODE, "true")
338
println "NETWORK MODE: "+Toolbox.restart();
339
process("DISCOURS", "\"..............*\"", "\"..........*\"")
340
println "NETWORK MODE: "+Toolbox.restart();
341
process("DISCOURS", "\"..............*\"", "\"..........*\"")
342

  
343

  
344

  
345

  
346
// add TOTAL line
347
int[] totaux = ["","","","","","",""]
348
for (int i = 7 ; i < csv.size() ; i++) {
349
	int total = 0;
350
	for (def line : csv) {
351
		total += line[i]
352
	}
353
	totaux << total
354
}
355
csv << totaux;
356

  
357
// WRITE ALL RESULTS
358
for (def line : csv) {
359
	for (int i = 0 ; i < firstExecution ; i++) {
360
		def item = line[i]
361
		if (i > 0) print "\t"+item
362
		else print item
363
	}
364
	println ""
365
}

Also available in: Unified diff