root / tmp / org.txm.analec.rcp / src / org / txm / macro / urs / exploit / SchemasSummaryMacro.groovy @ 2167
History | View | Annotate | Download (3.7 kB)
1 | 671 | mdecorde | // Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
|
---|---|---|---|
2 | 671 | mdecorde | // Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
|
3 | 671 | mdecorde | // @author mdecorde
|
4 | 671 | mdecorde | // @author sheiden
|
5 | 671 | mdecorde | // STANDARD DECLARATIONS
|
6 | 1217 | mdecorde | package org.txm.macro.urs.exploit
|
7 | 671 | mdecorde | |
8 | 671 | mdecorde | import org.apache.commons.lang.StringUtils |
9 | 671 | mdecorde | import org.kohsuke.args4j.* |
10 | 671 | mdecorde | |
11 | 671 | mdecorde | import groovy.transform.Field |
12 | 671 | mdecorde | |
13 | 671 | mdecorde | import org.txm.Toolbox |
14 | 671 | mdecorde | import org.txm.rcp.swt.widget.parameters.* |
15 | 1217 | mdecorde | import org.txm.annotation.urs.* |
16 | 1217 | mdecorde | import org.txm.macro.urs.AnalecUtils |
17 | 671 | mdecorde | import org.txm.searchengine.cqp.AbstractCqiClient |
18 | 671 | mdecorde | import org.txm.searchengine.cqp.corpus.* |
19 | 1979 | mdecorde | import org.txm.searchengine.cqp.CQPSearchEngine |
20 | 671 | mdecorde | |
21 | 671 | mdecorde | import visuAnalec.donnees.Structure |
22 | 671 | mdecorde | import visuAnalec.elements.* |
23 | 671 | mdecorde | |
24 | 671 | mdecorde | def scriptName = this.class.getSimpleName() |
25 | 671 | mdecorde | |
26 | 671 | mdecorde | def selection = [] |
27 | 671 | mdecorde | for (def s : corpusViewSelections) { |
28 | 1094 | mdecorde | if (s instanceof CQPCorpus) selection << s |
29 | 671 | mdecorde | else if (s instanceof Partition) selection.addAll(s.getParts()) |
30 | 671 | mdecorde | } |
31 | 671 | mdecorde | |
32 | 671 | mdecorde | if (selection.size() == 0) { |
33 | 671 | mdecorde | println "** $scriptName: please select a Corpus or a Partition in the Corpus view: "+corpusViewSelections
|
34 | 671 | mdecorde | return false |
35 | 2105 | mdecorde | } else {
|
36 | 2105 | mdecorde | for (def c : selection) c.compute(false) |
37 | 671 | mdecorde | } |
38 | 671 | mdecorde | |
39 | 671 | mdecorde | // BEGINNING OF PARAMETERS
|
40 | 671 | mdecorde | @Field @Option(name="schema_ursql", usage="TYPE@PROP=VALUE", widget="String", required=true, def="CHAINE") |
41 | 671 | mdecorde | String schema_ursql
|
42 | 671 | mdecorde | @Field @Option(name="minimum_schema_size", usage="Minimum size needed to consider a schema", widget="Integer", required=true, def="3") |
43 | 671 | mdecorde | int minimum_schema_size
|
44 | 671 | mdecorde | @Field @Option(name="maximum_schema_size", usage="Maximum size needed to consider a schema", widget="Integer", required=true, def="9999999") |
45 | 671 | mdecorde | int maximum_schema_size
|
46 | 2144 | mdecorde | @Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=true, def="true") |
47 | 2144 | mdecorde | boolean strict_inclusion
|
48 | 671 | mdecorde | @Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF ON ALL REALLY ALL", required=true, def="OFF") |
49 | 671 | mdecorde | debug |
50 | 671 | mdecorde | if (!ParametersDialog.open(this)) return |
51 | 671 | mdecorde | if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3 |
52 | 671 | mdecorde | |
53 | 671 | mdecorde | def allresults = new LinkedHashMap() |
54 | 671 | mdecorde | def props = new HashSet() |
55 | 671 | mdecorde | for (def corpus : selection) { |
56 | 671 | mdecorde | |
57 | 2145 | mdecorde | def word = corpus.getWordProperty()
|
58 | 2145 | mdecorde | def analecCorpus = URSCorpora.getCorpus(corpus)
|
59 | 2145 | mdecorde | |
60 | 2145 | mdecorde | def errors = AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql) |
61 | 2145 | mdecorde | if (errors.size() > 0) { |
62 | 2145 | mdecorde | println "** The $schema_ursql schema URSQL cannot be computed in the corpus with types: $errors."
|
63 | 2145 | mdecorde | return; |
64 | 2145 | mdecorde | } |
65 | 2162 | mdecorde | |
66 | 2166 | mdecorde | for (def type : analecCorpus.getStructure().getSchemas()) |
67 | 2166 | mdecorde | props.addAll(analecCorpus.getStructure().getSchemaProperties(type)); |
68 | 671 | mdecorde | |
69 | 2144 | mdecorde | def selectedSchemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, maximum_schema_size, strict_inclusion);
|
70 | 671 | mdecorde | if (selectedSchemas.size() > 0) |
71 | 671 | mdecorde | allresults[corpus] = selectedSchemas.sort(){it.getUnite0()};
|
72 | 671 | mdecorde | } |
73 | 671 | mdecorde | |
74 | 671 | mdecorde | // tabulate summary
|
75 | 671 | mdecorde | def keys = allresults.keySet().sort();
|
76 | 2167 | mdecorde | println "Statistiques des schémas de "+corpusViewSelections.join(", ") |
77 | 671 | mdecorde | println "\t"+keys.join("\t") |
78 | 671 | mdecorde | |
79 | 671 | mdecorde | // number of schemas
|
80 | 671 | mdecorde | print "Schemas"
|
81 | 671 | mdecorde | for (def k : keys) { |
82 | 671 | mdecorde | print "\t"+allresults[k].size();
|
83 | 671 | mdecorde | } |
84 | 671 | mdecorde | println ""
|
85 | 671 | mdecorde | |
86 | 671 | mdecorde | // Average length
|
87 | 671 | mdecorde | print "Average len"
|
88 | 671 | mdecorde | for (def k : keys) { |
89 | 671 | mdecorde | int total = 0; |
90 | 671 | mdecorde | for (Schema s : allresults[k]) total+= s.contenu.size(); |
91 | 671 | mdecorde | |
92 | 671 | mdecorde | print "\t"+total / allresults[k].size()
|
93 | 671 | mdecorde | } |
94 | 671 | mdecorde | println ""
|
95 | 671 | mdecorde | |
96 | 671 | mdecorde | // median length
|
97 | 671 | mdecorde | print "Median len"
|
98 | 671 | mdecorde | for (def k : keys) { |
99 | 671 | mdecorde | print "\t"+allresults[k][(int)(allresults[k].size() / 2)].contenu.size(); |
100 | 671 | mdecorde | } |
101 | 671 | mdecorde | println ""
|
102 | 671 | mdecorde | |
103 | 671 | mdecorde | // number of properties values
|
104 | 671 | mdecorde | for (def p : props) { |
105 | 671 | mdecorde | print "$p"
|
106 | 671 | mdecorde | for (def k : keys) { |
107 | 671 | mdecorde | def propsvalues = new HashSet() |
108 | 2166 | mdecorde | for (def schema : allresults[k]) { |
109 | 2166 | mdecorde | propsvalues.add(schema.getProp(p)) |
110 | 671 | mdecorde | } |
111 | 671 | mdecorde | if (debug) println "k @p values="+propsvalues |
112 | 671 | mdecorde | print "\t"+propsvalues.size();
|
113 | 671 | mdecorde | } |
114 | 671 | mdecorde | println ""
|
115 | 671 | mdecorde | } |