Statistics
| Revision:

root / tmp / org.txm.core / src / groovy / org / txm / test / CQPBenchmark.groovy @ 148

History | View | Annotate | Download (13.1 kB)

1
//package org.txm.test
2
//
3
//import org.txm.*
4
//import org.txm.functions.*
5
////import org.txm.functions.ca.*
6
////import org.txm.functions.classification.CAH
7
//import org.txm.functions.concordances.*
8
//import org.txm.functions.concordances.comparators.*
9
//import org.txm.functions.cooccurrences.*
10
//import org.txm.functions.diagnostic.*
11
//import org.txm.functions.index.*
12
//import org.txm.functions.referencer.Referencer
13
//import org.txm.functions.specificities.*
14
//import org.txm.searchengine.cqp.corpus.*
15
//import org.txm.searchengine.cqp.corpus.query.*
16
//import org.txm.stat.data.LexicalTable
17
//import org.txm.utils.DeleteDir
18
//
19
//
20
//testDir = new File(System.getProperty("user.home"), "TXM/testrelease");
21
//DeleteDir.deleteDirectory testDir;
22
//testDir.mkdir()
23
//
24
////get corpus
25
//csv = [];
26
//firstExecution = 0;
27
//
28
//def process(String CORPUSNAME, String QUERY1, String QUERY2) {
29
//        
30
//        String ENCODING = "UTF-8"
31
//        String COLSEPARATOR = "\t"
32
//        String TXTSEPARATOR = ""
33
//        int i = 0;
34
//        def corpora = CorpusManager.getCorpusManager().getCorpora()
35
//        MainCorpus corpus = CorpusManager.getCorpusManager().getCorpus(CORPUSNAME)
36
//        //println "CORPUS: "+corpus
37
//
38
//        Query query = new Query(QUERY1) //"\"..............*\"", "\"..........*\""
39
//        Query query2 = new Query(QUERY2)
40
//
41
//        File exporttestdir = new File(testDir, corpus.getName());
42
//        File reportFile = new File(exporttestdir, "report.csv")
43
//        DeleteDir.deleteDirectory exporttestdir;
44
//        exporttestdir.mkdir()
45
//        //println "Results are saved in dir: "+ exporttestdir
46
//
47
//        // word properties
48
//        def word_property = corpus.getProperty("word")
49
//
50
//        // structure properties
51
//        StructuralUnit text_su = corpus.getStructuralUnit("text")
52
//        StructuralUnit s_su = corpus.getStructuralUnit("s")
53
//        Property text_id_property = text_su.getProperty("id")
54
//        ReferencePattern referencePattern = new ReferencePattern().addProperty(text_id_property)
55
//
56
//        long time;
57
//        // START START START START
58
//        if (firstExecution == 0)
59
//                csv << ["object", "size", "nPart", "command", "query", "query freq", "$CORPUSNAME $QUERY1 $QUERY2 mode "+Toolbox.getParam(Toolbox.CQI_NETWORK_MODE)]
60
//        else
61
//                csv[i++] << "$CORPUSNAME $QUERY1 $QUERY2 mode "+Toolbox.getParam(Toolbox.CQI_NETWORK_MODE)
62
//        
63
//        // INFORMATIONS
64
//        print " INFO"
65
//        time = System.currentTimeMillis();
66
//        Diagnostic diag = new Diagnostic(corpus, 20)
67
//        diag.stepGeneralInfos();
68
//        diag.stepLexicalProperties();
69
//        diag.stepStructuralUnits();
70
//        diag.toHTML(new File(exporttestdir, "diag"))
71
//        if (firstExecution == 0)
72
//                csv << [corpus.getName(), corpus.getSize(), 1, "Informations", "no query", "no freq", (System.currentTimeMillis()-time)/1000]
73
//        else
74
//                csv[i++] << (System.currentTimeMillis()-time)/1000
75
//
76
//        // LEXICON
77
//        print " LEX"
78
//        time = System.currentTimeMillis();
79
//        corpus.getLexicon(word_property).toTxt(new File(exporttestdir, "lexpos"), ENCODING, COLSEPARATOR, TXTSEPARATOR);
80
//        if (firstExecution == 0)
81
//                csv << [corpus.getName(), corpus.getSize(), 1, "Lexicon", "no query", "no freq", (System.currentTimeMillis()-time)/1000]
82
//        else
83
//                csv[i++] << (System.currentTimeMillis()-time)/1000
84
//                
85
//        // INDEX
86
//        print " INDEX"
87
//        time = System.currentTimeMillis();
88
//        IndexSample index = new IndexSample(corpus, query, [word_property])
89
//        index.toTxt(new File(exporttestdir, "indexlemmafuncj"), ENCODING, COLSEPARATOR, TXTSEPARATOR)
90
//        if (firstExecution == 0)
91
//                csv << [corpus.getName(), corpus.getSize(), 1, "Index", query, index.getT(), (System.currentTimeMillis()-time)/1000]
92
//        else
93
//                csv[i++] << (System.currentTimeMillis()-time)/1000
94
//
95
//        // REFERENCER
96
//        print " REF"
97
//        time = System.currentTimeMillis();
98
//        Referencer referencer = new Referencer(corpus, query, word_property, [text_id_property], true);
99
//        referencer.getQueryMatches()
100
//        referencer.getQueryindexes()
101
//        referencer.groupPositionsbyId()
102
//        referencer.toTxt(new File(exporttestdir, "referencer"), ENCODING)
103
//        if (firstExecution == 0)
104
//                csv << [corpus.getName(), corpus.getSize(), 1, "Referencer", query, index.getT(), (System.currentTimeMillis()-time)/1000]
105
//        else
106
//                csv[i++] << (System.currentTimeMillis()-time)/1000
107
//
108
//        // CONCORDANCE
109
//        print " CONC"
110
//        time = System.currentTimeMillis();
111
//        Concordance concordance = new Concordance(corpus, query, word_property, [word_property, word_property], referencePattern, referencePattern, 15, 15)
112
//        concordance.toTxt(new File(exporttestdir,"concj"), Concordance.Format.CONCORDANCE)
113
//        if (firstExecution == 0)
114
//                csv << [corpus.getName(), corpus.getSize(), 1, "Concordances", query, index.getT(), (System.currentTimeMillis()-time)/1000]
115
//        else
116
//                csv[i++] << (System.currentTimeMillis()-time)/1000
117
//
118
//        // COOCCURRENCE WORD WINDOW
119
//        print " COOC"
120
//        time = System.currentTimeMillis();
121
//        Cooccurrence cooc = new Cooccurrence(corpus, query, [word_property], null, 21, 1, 1, 11, 2, 3, 1, false);
122
//        cooc.process();
123
//        cooc.toTxt(new File(exporttestdir, "cooc_wordwindow"), ENCODING)
124
//        if (firstExecution == 0)
125
//                csv << [corpus.getName(), corpus.getSize(), 1, "Cooccurrences words", query, index.getT(), (System.currentTimeMillis()-time)/1000]
126
//        else
127
//                csv[i++] << (System.currentTimeMillis()-time)/1000
128
//
129
//        // COOCCURRENCE SENTENCE WINDOW
130
//        print " COOC"
131
//        time = System.currentTimeMillis();
132
//        Cooccurrence cooc2 = new Cooccurrence(corpus, query, [word_property], s_su,2, 1, 1, 1, 2, 3,1, false);
133
//        cooc2.process();
134
//        cooc2.toTxt(new File(exporttestdir, "cooc_swindow"), ENCODING)
135
//        if (firstExecution == 0)
136
//                csv << [corpus.getName(), corpus.getSize(), 1, "Cooccurrences structures", query, index.getT(), (System.currentTimeMillis()-time)/1000]
137
//        else
138
//                csv[i++] << (System.currentTimeMillis()-time)/1000
139
//
140
//        // INDEX
141
//        print " INDEX"
142
//        time = System.currentTimeMillis();
143
//        index = new IndexSample(corpus, query2, [word_property])
144
//        index.toTxt(new File(exporttestdir, "indexlemmafuncj"), ENCODING, COLSEPARATOR, TXTSEPARATOR)
145
//        if (firstExecution == 0)
146
//                csv << [corpus.getName(), corpus.getSize(), 1, "Index", query2, index.getT(), (System.currentTimeMillis()-time)/1000]
147
//        else
148
//                csv[i++] << (System.currentTimeMillis()-time)/1000
149
//
150
//        // REFERENCER
151
//        print " REF"
152
//        time = System.currentTimeMillis();
153
//        referencer = new Referencer(corpus, query2, word_property, [text_id_property], true);
154
//        referencer.getQueryMatches()
155
//        referencer.getQueryindexes()
156
//        referencer.groupPositionsbyId()
157
//        referencer.toTxt(new File(exporttestdir, "referencer"), ENCODING)
158
//        if (firstExecution == 0)
159
//                csv << [corpus.getName(), corpus.getSize(), 1, "Referencer", query, index.getT(), (System.currentTimeMillis()-time)/1000]
160
//        else
161
//                csv[i++] << (System.currentTimeMillis()-time)/1000
162
//
163
//        // CONCORDANCE
164
//        print " CONC"
165
//        time = System.currentTimeMillis();
166
//        concordance = new Concordance(corpus, query2, word_property, [word_property, word_property], referencePattern, referencePattern, 15, 15)
167
//        concordance.toTxt(new File(exporttestdir,"concj"), Concordance.Format.CONCORDANCE)
168
//        if (firstExecution == 0)
169
//                csv << [corpus.getName(), corpus.getSize(), 1, "Concordances", query, index.getT(), (System.currentTimeMillis()-time)/1000]
170
//        else
171
//                csv[i++] << (System.currentTimeMillis()-time)/1000
172
//
173
//        // COOCCURRENCE WORD WINDOW
174
//        print " COOC"
175
//        time = System.currentTimeMillis();
176
//        cooc = new Cooccurrence(corpus, query2, [word_property], null, 21, 1, 1, 11, 2, 3, 1, false);
177
//        cooc.process();
178
//        cooc.toTxt(new File(exporttestdir, "cooc_wordwindow"), ENCODING)
179
//        if (firstExecution == 0)
180
//                csv << [corpus.getName(), corpus.getSize(), 1, "Cooccurrences words", query, index.getT(), (System.currentTimeMillis()-time)/1000]
181
//        else
182
//                csv[i++] << (System.currentTimeMillis()-time)/1000
183
//
184
//        // COOCCURRENCE SENTENCE WINDOW
185
//        print " COOC"
186
//        time = System.currentTimeMillis();
187
//        cooc2 = new Cooccurrence(corpus, query2, [word_property], s_su,2, 1, 1, 1, 2, 3,1, false);
188
//        cooc2.process();
189
//        cooc2.toTxt(new File(exporttestdir, "cooc_swindow"), ENCODING)
190
//        if (firstExecution == 0)
191
//                csv << [corpus.getName(), corpus.getSize(), 1, "Cooccurrences structures", query, index.getT(), (System.currentTimeMillis()-time)/1000]
192
//        else
193
//                csv[i++] << (System.currentTimeMillis()-time)/1000
194
//
195
//        // SUBCORPORA
196
//        print " SUBCORPUS"
197
//        time = System.currentTimeMillis();
198
//        Corpus DGcorpus = corpus.createSubcorpus(text_su, text_id_property, "01_DeGaulle", "dgsubcorpus")
199
//        if (firstExecution == 0)
200
//                csv << [corpus.getName(), corpus.getSize(), 1, "Subcorpus", "no query", "no freqs", (System.currentTimeMillis()-time)/1000]
201
//        else
202
//                csv[i++] << (System.currentTimeMillis()-time)/1000
203
//
204
//        // PARTITIONS
205
//        print " PARTITIONS"
206
//        time = System.currentTimeMillis();
207
//        Partition discours_types = corpus.createPartition(text_su, text_id_property)
208
//        Partition discours_dates = corpus.createPartition(text_su, text_id_property)
209
//        if (firstExecution == 0)
210
//                csv << [corpus.getName(), corpus.getSize(), 1, "Partition 2x", "no query", "no freqs", (System.currentTimeMillis()-time)/1000]
211
//        else
212
//                csv[i++] << (System.currentTimeMillis()-time)/1000
213
//
214
//        // LEXICAL TABLE
215
//        print " LT"
216
//        time = System.currentTimeMillis();
217
//        LexicalTable table = discours_types.getLexicalTable(word_property, 2);
218
//        table.exportData(new File(exporttestdir, "type_LT"), COLSEPARATOR, TXTSEPARATOR);
219
//        if (firstExecution == 0)
220
//                csv << [discours_types.getName(), corpus.getSize(), discours_types.getNPart(), "LT part", "no query", "no freqs", (System.currentTimeMillis()-time)/1000]
221
//        else
222
//                csv[i++] << (System.currentTimeMillis()-time)/1000
223
//
224
////        // SPECIF SUBCORPUS
225
////        time = System.currentTimeMillis();
226
////        print " SPECIF"
227
////        SpecificitesResult specifresult2 = org.txm.functions.specificities.Specificites.specificites(DGcorpus.getParent(), DGcorpus, word_property)
228
////        specifresult2.toTxt(new File(exporttestdir,"dgsub_specifloc"), ENCODING, COLSEPARATOR, TXTSEPARATOR)
229
////        if (firstExecution == 0)
230
////                csv << [DGcorpus.getName(), corpus.getSize(), 1, "Specif sub", "no query", "no freqs", (System.currentTimeMillis()-time)/1000]
231
////        else
232
////                csv[i++] << (System.currentTimeMillis()-time)/1000
233
////
234
////        // SPECIF LEXICAL TABLE
235
////        print " SPECIF"
236
////        time = System.currentTimeMillis();
237
////        SpecificitesResult specifresult3 = org.txm.functions.specificities.Specificites.specificites(table);
238
////        specifresult3.toTxt(new File(exporttestdir,"speciftype"), ENCODING, COLSEPARATOR, TXTSEPARATOR)
239
////        if (firstExecution == 0)
240
////                csv << [table.getName(), corpus.getSize(), table.getNColumns(), "specif LT", "no query", "no freqs", (System.currentTimeMillis()-time)/1000]
241
////        else
242
////                csv[i++] << (System.currentTimeMillis()-time)/1000
243
////
244
////        // AFC PARTITION
245
////        print " AFC"
246
////        time = System.currentTimeMillis();
247
////        CA ca = new CA(discours_dates, word_property, 0 ,9999999)
248
////        ca.stepLexicalTable();
249
////        ca.stepSortTableLexical();
250
////        ca.stepCompute()
251
////        ca.toSVGFactorialMap(new File(exporttestdir,"cadates"), true, true)
252
////        ca.toSVGSingularValues(new File(exporttestdir,"cadates_singularvalues"))
253
////        if (firstExecution == 0)
254
////                csv << [discours_dates.getName(), corpus.getSize(), discours_dates.getNPart(), "AFC part", "no query", "no freqs", (System.currentTimeMillis()-time)/1000]
255
////        else
256
////                csv[i++] << (System.currentTimeMillis()-time)/1000
257
////
258
////        // AFC LEXICAL TABLE
259
////        print " AFC"
260
////        time = System.currentTimeMillis();
261
////        CA ca2 = new CA(table);
262
////        ca2.stepCompute()
263
////        ca2.toSVGFactorialMap(new File(exporttestdir,"cadates"), true, true)
264
////        ca2.toSVGSingularValues(new File(exporttestdir,"cadates_singularvalues"))
265
////        if (firstExecution == 0)
266
////                csv << [table.getName(), corpus.getSize(), table.getNColumns(), "specif LT", "no query", "no freqs", (System.currentTimeMillis()-time)/1000]
267
////        else
268
////                csv[i++] << (System.currentTimeMillis()-time)/1000
269
////
270
////        // CAH
271
////        print " CAH"
272
////        time = System.currentTimeMillis();
273
////        CAH cah = new CAH(ca, true, CAH.getDefaultMethod(), CAH.getDefaultMetric(), 4, false)
274
////        cah.stepCompute()
275
////        cah.toSVG(new File(exporttestdir, "dates_cah"),__RDevice.SVG);
276
////        if (firstExecution == 0)
277
////                csv << [discours_dates.getName(), corpus.getSize(), discours_dates.getNPart(), "CAH ca table", "no query", "no freqs", (System.currentTimeMillis()-time)/1000]
278
////        else
279
////                csv[i++] << (System.currentTimeMillis()-time)/1000
280
////
281
////        firstExecution++;
282
////        println ""
283
//
284
//}
285
//
286
//
287
//
288
//println "restarting TBX..."
289
//Toolbox.setParam(Toolbox.CQI_NETWORK_MODE, "false")
290
//println "MEMORY MODE: "+Toolbox.restart();
291
//process("DISCOURS", "\"..............*\"", "\"..........*\"") // first time
292
//println "MEMORY MODE: "+Toolbox.restart();
293
//process("DISCOURS", "\"..............*\"", "\"..........*\"")
294
//
295
//println "restarting TBX..."
296
//Toolbox.setParam(Toolbox.CQI_NETWORK_MODE, "true")
297
//println "NETWORK MODE: "+Toolbox.restart();
298
//process("DISCOURS", "\"..............*\"", "\"..........*\"")
299
//println "NETWORK MODE: "+Toolbox.restart();
300
//process("DISCOURS", "\"..............*\"", "\"..........*\"")
301
//
302
//
303
//
304
//
305
//// add TOTAL line
306
//int[] totaux = ["","","","","","",""]
307
//for (int i = 7 ; i < csv.size() ; i++) {
308
//        int total = 0;
309
//        for (def line : csv) {
310
//                total += line[i]
311
//        }
312
//        totaux << total
313
//}
314
//csv << totaux;
315
//
316
//// WRITE ALL RESULTS
317
//for (def line : csv) {
318
//        for (int i = 0 ; i < firstExecution ; i++) {
319
//                def item = line[i]
320
//                if (i > 0) print "\t"+item
321
//                else print item
322
//        }
323
//        println ""
324
//}