Statistics
| Revision:

root / tmp / org.txm.analec.rcp / src / org / txm / macro / urs / exploit / UnitsInterdistanceMacro.groovy @ 2105

History | View | Annotate | Download (4.6 kB)

1
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author mdecorde
4
// @author sheiden
5
// STANDARD DECLARATIONS
6
package org.txm.macro.urs.exploit
7

    
8
import org.kohsuke.args4j.*
9
import groovy.transform.Field
10
import org.txm.*
11
import org.txm.macro.urs.AnalecUtils
12
import visuAnalec.elements.*
13
import org.txm.rcp.swt.widget.parameters.*
14
import org.txm.annotation.urs.*
15
import org.txm.searchengine.cqp.corpus.*
16
import org.apache.commons.lang.StringUtils;
17

    
18
def selection = []
19
for (def s : corpusViewSelections) {
20
        if (s instanceof CQPCorpus) selection << s
21
        else if (s instanceof Partition) selection.addAll(s.getParts())
22
}
23

    
24
if (selection.size() == 0) {
25
        println "** $scriptName: please select a Corpus or a Partition in the Corpus view: "+corpusViewSelections
26
        return false
27
} else {
28
        for (def c : selection) c.compute(false)
29
}
30

    
31
// BEGINNING OF PARAMETERS
32
@Field @Option(name="schema_ursql", usage="TYPE@PROP=REGEX", widget="String", required=true, def="CHAINE")
33
String schema_ursql
34

    
35
@Field @Option(name="minimum_schema_size", usage="minimal schema size", widget="Integer", required=true, def="3")
36
int minimum_schema_size
37
@Field @Option(name="maximum_schema_size", usage="Maximum size needed to consider a schema", widget="Integer", required=true, def="9999999")
38
int maximum_schema_size
39
@Field @Option(name="schema_display_property_name",usage="", widget="String", required=false, def="REF")
40
String schema_display_property_name
41

    
42
@Field @Option(name="unit_ursql", usage="TYPE@PROP=REGEX", widget="String", required=false, def="MENTION")
43
String unit_ursql
44

    
45
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF        ON        ALL        REALLY ALL", required=true, def="OFF")
46
debug
47

    
48
if (!ParametersDialog.open(this)) return;
49
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3
50

    
51
for (def corpus : selection) {
52
        def analecCorpus = URSCorpora.getCorpus(corpus)
53
        
54
        def errors = AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql)
55
        if (errors.size() > 0) {
56
                println "** The $schema_ursql schema URSQL cannot be computed in the corpus with types: $errors."
57
                return;
58
        }
59
        
60
        errors=AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql)
61
        if (errors.size() > 0) {
62
                println "** $unit_ursql unit URSQL cannot be computed in the corpus with types: $errors."
63
                return;
64
        }
65
        
66
        def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, maximum_schema_size);
67
        def distances = [];
68
        def nDistances = 0
69
        def cadences = [];
70
        for (def schema : schemas) {
71
                
72
                def allUnites = schema.getUnitesSousjacentesNonTriees()
73
        
74
                def units = AnalecUtils.filterElements(debug, allUnites, unit_ursql)
75
                
76
                Collections.sort(units)
77
                        
78
                for (int i = 0 ; i < units.size() ; i++) {
79
                        int d1 = 0;
80
                        int d2 = 0;
81
                        if (i < units.size()-1) d1 = units[i+1].getDeb() - units[i].getFin();
82
                        if (d1 < 0) {
83
                                //println "D1 "+units[i+1].getDeb()+" - "+units[i].getFin()+" = "+d1
84
                                d1 = 0; // the first unit pass the next one ?
85
                        }
86
                        if (i > 0) d2 = units[i].getDeb() - units[i-1].getFin();
87
                        if (d2 < 0) {
88
                                //println "D2 "+units[i].getDeb()+" - "+units[i-1].getFin()+" = "+d2
89
                                d2 = 0; // the first unit pass the next one ?
90
                        }
91
                        distances << d1
92
                        
93
                        if (d1 < d2) cadences << d1 else cadences << d2
94
                        
95
                        nDistances++
96
                }
97
        }
98
        distances = distances.sort()
99
        cadences = cadences.sort()
100
        
101
        int distances_total = distances.sum()
102
        int cadences_total = cadences.sum()
103
        coef = (distances_total / nDistances)
104
        cadence = (cadences_total / nDistances)
105
        
106
        println "$corpus distances:"
107
        //println "distances $distances"
108
        println "distance moyenne inter-mayonnaise : $distances_total / $nDistances = $coef"
109
        println "distance medianne inter-mayonnaise : "+distances[(int)(distances.size() / 2)]
110
        println "distance quartils : "+distances[0]+" "+distances[(int)(distances.size() / 4)] + " "+distances[(int)(distances.size() / 2)]+" "+distances[(int)(3*distances.size() / 4)]+" "+distances[(int)(distances.size() -1)]
111
        //println "cadences $cadences"
112
        println "cadence moyenne : $cadences_total / $nDistances = $cadence"
113
        println "cadence medianne : "+cadences[(int)(cadences.size() / 2)]
114
        println "cadence quartils : "+cadences[0]+" "+cadences[(int)(cadences.size() / 4)] + " "+cadences[(int)(cadences.size() / 2)]+" "+cadences[(int)(3*cadences.size() / 4)]+" "+cadences[(int)(cadences.size() -1)]
115
        
116
        //return ["result":coef, "result2":cadence, "data":["distances":distances, "nDistances":nDistances, "cadences":cadences]]
117
}