root / tmp / org.txm.analec.rcp / src / org / txm / macro / urs / exploit / SchemasSummaryMacro.groovy @ 2166
History | View | Annotate | Download (3.7 kB)
1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
|
---|---|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
|
3 |
// @author mdecorde
|
4 |
// @author sheiden
|
5 |
// STANDARD DECLARATIONS
|
6 |
package org.txm.macro.urs.exploit
|
7 |
|
8 |
import org.apache.commons.lang.StringUtils |
9 |
import org.kohsuke.args4j.* |
10 |
|
11 |
import groovy.transform.Field |
12 |
|
13 |
import org.txm.Toolbox |
14 |
import org.txm.rcp.swt.widget.parameters.* |
15 |
import org.txm.annotation.urs.* |
16 |
import org.txm.macro.urs.AnalecUtils |
17 |
import org.txm.searchengine.cqp.AbstractCqiClient |
18 |
import org.txm.searchengine.cqp.corpus.* |
19 |
import org.txm.searchengine.cqp.CQPSearchEngine |
20 |
|
21 |
import visuAnalec.donnees.Structure |
22 |
import visuAnalec.elements.* |
23 |
|
24 |
def scriptName = this.class.getSimpleName() |
25 |
|
26 |
def selection = [] |
27 |
for (def s : corpusViewSelections) { |
28 |
if (s instanceof CQPCorpus) selection << s |
29 |
else if (s instanceof Partition) selection.addAll(s.getParts()) |
30 |
} |
31 |
|
32 |
if (selection.size() == 0) { |
33 |
println "** $scriptName: please select a Corpus or a Partition in the Corpus view: "+corpusViewSelections
|
34 |
return false |
35 |
} else {
|
36 |
for (def c : selection) c.compute(false) |
37 |
} |
38 |
|
39 |
// BEGINNING OF PARAMETERS
|
40 |
@Field @Option(name="schema_ursql", usage="TYPE@PROP=VALUE", widget="String", required=true, def="CHAINE") |
41 |
String schema_ursql
|
42 |
@Field @Option(name="minimum_schema_size", usage="Minimum size needed to consider a schema", widget="Integer", required=true, def="3") |
43 |
int minimum_schema_size
|
44 |
@Field @Option(name="maximum_schema_size", usage="Maximum size needed to consider a schema", widget="Integer", required=true, def="9999999") |
45 |
int maximum_schema_size
|
46 |
@Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=true, def="true") |
47 |
boolean strict_inclusion
|
48 |
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF ON ALL REALLY ALL", required=true, def="OFF") |
49 |
debug |
50 |
if (!ParametersDialog.open(this)) return |
51 |
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3 |
52 |
|
53 |
def allresults = new LinkedHashMap() |
54 |
def props = new HashSet() |
55 |
for (def corpus : selection) { |
56 |
|
57 |
def word = corpus.getWordProperty()
|
58 |
def analecCorpus = URSCorpora.getCorpus(corpus)
|
59 |
|
60 |
def errors = AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql) |
61 |
if (errors.size() > 0) { |
62 |
println "** The $schema_ursql schema URSQL cannot be computed in the corpus with types: $errors."
|
63 |
return; |
64 |
} |
65 |
|
66 |
for (def type : analecCorpus.getStructure().getSchemas()) |
67 |
props.addAll(analecCorpus.getStructure().getSchemaProperties(type)); |
68 |
|
69 |
def selectedSchemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, maximum_schema_size, strict_inclusion);
|
70 |
if (selectedSchemas.size() > 0) |
71 |
allresults[corpus] = selectedSchemas.sort(){it.getUnite0()};
|
72 |
} |
73 |
|
74 |
// tabulate summary
|
75 |
def keys = allresults.keySet().sort();
|
76 |
println "Statistiques des schémas de "+corpusViewSelections.join(",") |
77 |
println "\t"+keys.join("\t") |
78 |
|
79 |
// number of schemas
|
80 |
print "Schemas"
|
81 |
for (def k : keys) { |
82 |
print "\t"+allresults[k].size();
|
83 |
} |
84 |
println ""
|
85 |
|
86 |
// Average length
|
87 |
print "Average len"
|
88 |
for (def k : keys) { |
89 |
int total = 0; |
90 |
for (Schema s : allresults[k]) total+= s.contenu.size(); |
91 |
|
92 |
print "\t"+total / allresults[k].size()
|
93 |
} |
94 |
println ""
|
95 |
|
96 |
// median length
|
97 |
print "Median len"
|
98 |
for (def k : keys) { |
99 |
print "\t"+allresults[k][(int)(allresults[k].size() / 2)].contenu.size(); |
100 |
} |
101 |
println ""
|
102 |
|
103 |
// number of properties values
|
104 |
for (def p : props) { |
105 |
print "$p"
|
106 |
for (def k : keys) { |
107 |
def propsvalues = new HashSet() |
108 |
for (def schema : allresults[k]) { |
109 |
propsvalues.add(schema.getProp(p)) |
110 |
} |
111 |
if (debug) println "k @p values="+propsvalues |
112 |
print "\t"+propsvalues.size();
|
113 |
} |
114 |
println ""
|
115 |
} |
116 |
|