root / tmp / org.txm.analec.rcp / src / org / txm / macro / analec / exploit / SchemasSummaryMacro.groovy @ 758
History | View | Annotate | Download (3.3 kB)
1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
|
---|---|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
|
3 |
// @author mdecorde
|
4 |
// @author sheiden
|
5 |
// STANDARD DECLARATIONS
|
6 |
package org.txm.macro.analec.exploit
|
7 |
|
8 |
import org.apache.commons.lang.StringUtils |
9 |
import org.kohsuke.args4j.* |
10 |
|
11 |
import groovy.transform.Field |
12 |
|
13 |
import org.txm.Toolbox |
14 |
import org.txm.macro.analec.AnalecUtils; |
15 |
import org.txm.rcp.swt.widget.parameters.* |
16 |
import org.txm.analec.* |
17 |
import org.txm.searchengine.cqp.AbstractCqiClient |
18 |
import org.txm.searchengine.cqp.corpus.* |
19 |
|
20 |
import visuAnalec.donnees.Structure |
21 |
import visuAnalec.elements.* |
22 |
|
23 |
def scriptName = this.class.getSimpleName() |
24 |
|
25 |
def selection = [] |
26 |
for (def s : corpusViewSelections) { |
27 |
if (s instanceof Corpus) selection << s |
28 |
else if (s instanceof Partition) selection.addAll(s.getParts()) |
29 |
} |
30 |
|
31 |
if (selection.size() == 0) { |
32 |
println "** $scriptName: please select a Corpus or a Partition in the Corpus view: "+corpusViewSelections
|
33 |
return false |
34 |
} |
35 |
|
36 |
// BEGINNING OF PARAMETERS
|
37 |
@Field @Option(name="schema_ursql", usage="TYPE@PROP=VALUE", widget="String", required=true, def="CHAINE") |
38 |
String schema_ursql
|
39 |
@Field @Option(name="minimum_schema_size", usage="Minimum size needed to consider a schema", widget="Integer", required=true, def="3") |
40 |
int minimum_schema_size
|
41 |
@Field @Option(name="maximum_schema_size", usage="Maximum size needed to consider a schema", widget="Integer", required=true, def="9999999") |
42 |
int maximum_schema_size
|
43 |
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF ON ALL REALLY ALL", required=true, def="OFF") |
44 |
debug |
45 |
if (!ParametersDialog.open(this)) return |
46 |
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3 |
47 |
|
48 |
def CQI = CQPEngine.getCqiClient()
|
49 |
|
50 |
def allresults = new LinkedHashMap() |
51 |
def props = new HashSet() |
52 |
for (def corpus : selection) { |
53 |
|
54 |
mainCorpus = corpus.getMainCorpus() |
55 |
|
56 |
def word = mainCorpus.getWordProperty()
|
57 |
def analecCorpus = AnalecCorpora.getCorpus(mainCorpus.getName())
|
58 |
for (def type : analecCorpus.getStructure().getUnites()) |
59 |
props.addAll(analecCorpus.getStructure().getUniteProperties(type)); |
60 |
|
61 |
def selectedSchemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, maximum_schema_size);
|
62 |
if (selectedSchemas.size() > 0) |
63 |
allresults[corpus] = selectedSchemas.sort(){it.getUnite0()};
|
64 |
} |
65 |
|
66 |
// tabulate summary
|
67 |
def keys = allresults.keySet().sort();
|
68 |
println "Statistiques des schémas de "+corpusViewSelections.join(",") |
69 |
println "\t"+keys.join("\t") |
70 |
|
71 |
// number of schemas
|
72 |
print "Schemas"
|
73 |
for (def k : keys) { |
74 |
print "\t"+allresults[k].size();
|
75 |
} |
76 |
println ""
|
77 |
|
78 |
// Average length
|
79 |
print "Average len"
|
80 |
for (def k : keys) { |
81 |
int total = 0; |
82 |
for (Schema s : allresults[k]) total+= s.contenu.size(); |
83 |
|
84 |
print "\t"+total / allresults[k].size()
|
85 |
} |
86 |
println ""
|
87 |
|
88 |
// median length
|
89 |
print "Median len"
|
90 |
for (def k : keys) { |
91 |
print "\t"+allresults[k][(int)(allresults[k].size() / 2)].contenu.size(); |
92 |
} |
93 |
println ""
|
94 |
|
95 |
// number of properties values
|
96 |
for (def p : props) { |
97 |
print "$p"
|
98 |
for (def k : keys) { |
99 |
def propsvalues = new HashSet() |
100 |
for (def u : allresults[k]) { |
101 |
propsvalues.add(u.getProp(p)) |
102 |
} |
103 |
if (debug) println "k @p values="+propsvalues |
104 |
print "\t"+propsvalues.size();
|
105 |
} |
106 |
println ""
|
107 |
} |
108 |
|