Statistics
| Revision:

root / tmp / org.txm.analec.rcp / src / org / txm / macro / urs / exploit / SchemasSummaryMacro.groovy @ 2166

History | View | Annotate | Download (3.7 kB)

1
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author mdecorde
4
// @author sheiden
5
// STANDARD DECLARATIONS
6
package org.txm.macro.urs.exploit
7

    
8
import org.apache.commons.lang.StringUtils
9
import org.kohsuke.args4j.*
10

    
11
import groovy.transform.Field
12

    
13
import org.txm.Toolbox
14
import org.txm.rcp.swt.widget.parameters.*
15
import org.txm.annotation.urs.*
16
import org.txm.macro.urs.AnalecUtils
17
import org.txm.searchengine.cqp.AbstractCqiClient
18
import org.txm.searchengine.cqp.corpus.*
19
import org.txm.searchengine.cqp.CQPSearchEngine
20

    
21
import visuAnalec.donnees.Structure
22
import visuAnalec.elements.*
23

    
24
def scriptName = this.class.getSimpleName()
25

    
26
def selection = []
27
for (def s : corpusViewSelections) {
28
        if (s instanceof CQPCorpus) selection << s
29
        else if (s instanceof Partition) selection.addAll(s.getParts())
30
}
31

    
32
if (selection.size() == 0) {
33
        println "** $scriptName: please select a Corpus or a Partition in the Corpus view: "+corpusViewSelections
34
        return false
35
} else {
36
        for (def c : selection) c.compute(false)
37
}
38

    
39
// BEGINNING OF PARAMETERS
40
@Field @Option(name="schema_ursql", usage="TYPE@PROP=VALUE", widget="String", required=true, def="CHAINE")
41
                String schema_ursql
42
@Field @Option(name="minimum_schema_size", usage="Minimum size needed to consider a schema", widget="Integer", required=true, def="3")
43
                int minimum_schema_size
44
@Field @Option(name="maximum_schema_size", usage="Maximum size needed to consider a schema", widget="Integer", required=true, def="9999999")
45
                int maximum_schema_size
46
@Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=true, def="true")
47
                boolean strict_inclusion
48
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF        ON        ALL        REALLY ALL", required=true, def="OFF")
49
                debug
50
if (!ParametersDialog.open(this)) return
51
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3
52

    
53
def allresults = new LinkedHashMap()
54
def props = new HashSet()
55
for (def corpus : selection) {
56

    
57
        def word = corpus.getWordProperty()
58
        def analecCorpus = URSCorpora.getCorpus(corpus)
59
        
60
        def errors = AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql)
61
        if (errors.size() > 0) {
62
                println "** The $schema_ursql schema URSQL cannot be computed in the corpus with types: $errors."
63
                return;
64
        }
65
                
66
        for (def type : analecCorpus.getStructure().getSchemas())
67
                props.addAll(analecCorpus.getStructure().getSchemaProperties(type));
68

    
69
        def selectedSchemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, maximum_schema_size, strict_inclusion);
70
        if (selectedSchemas.size() > 0)
71
                allresults[corpus] = selectedSchemas.sort(){it.getUnite0()};
72
}
73

    
74
// tabulate summary
75
def keys = allresults.keySet().sort();
76
println "Statistiques des schémas de "+corpusViewSelections.join(",")
77
println "\t"+keys.join("\t")
78

    
79
// number of schemas
80
print "Schemas"
81
for (def k : keys) {
82
        print "\t"+allresults[k].size();
83
}
84
println ""
85

    
86
// Average length
87
print "Average len"
88
for (def k : keys) {
89
        int total = 0;
90
        for (Schema s : allresults[k]) total+= s.contenu.size();
91
        
92
        print "\t"+total / allresults[k].size()
93
}
94
println ""
95

    
96
// median length
97
print "Median len"
98
for (def k : keys) {
99
        print "\t"+allresults[k][(int)(allresults[k].size() / 2)].contenu.size();
100
}
101
println ""
102

    
103
// number of properties values
104
for (def p : props) {
105
        print "$p"
106
        for (def k : keys) {
107
                def propsvalues = new HashSet()
108
                for (def schema : allresults[k]) {
109
                        propsvalues.add(schema.getProp(p))
110
                }
111
                if (debug) println "k @p values="+propsvalues
112
                print "\t"+propsvalues.size();
113
        }
114
        println ""
115
}
116