Statistics
| Revision:

root / tmp / org.txm.analec.rcp / src / org / txm / macro / urs / exploit / UnitsInterdistanceMacro.groovy @ 2134

History | View | Annotate | Download (4.6 kB)

1
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author mdecorde
4
// @author sheiden
5
// STANDARD DECLARATIONS
6
package org.txm.macro.urs.exploit
7

    
8
import org.kohsuke.args4j.*
9
import groovy.transform.Field
10
import org.txm.*
11
import org.txm.macro.urs.AnalecUtils
12
import visuAnalec.elements.*
13
import org.txm.rcp.swt.widget.parameters.*
14
import org.txm.annotation.urs.*
15
import org.txm.searchengine.cqp.corpus.*
16
import org.apache.commons.lang.StringUtils;
17

    
18
def selection = []
19
for (def s : corpusViewSelections) {
20
        if (s instanceof CQPCorpus) selection << s
21
        else if (s instanceof Partition) selection.addAll(s.getParts())
22
}
23

    
24
if (selection.size() == 0) {
25
        println "** $scriptName: please select a Corpus or a Partition in the Corpus view: "+corpusViewSelections
26
        return false
27
} else {
28
        for (def c : selection) c.compute(false)
29
}
30

    
31
// BEGINNING OF PARAMETERS
32
@Field @Option(name="schema_ursql", usage="TYPE@PROP=REGEX", widget="String", required=true, def="CHAINE")
33
String schema_ursql
34

    
35
@Field @Option(name="minimum_schema_size", usage="minimal schema size", widget="Integer", required=true, def="3")
36
int minimum_schema_size
37
@Field @Option(name="maximum_schema_size", usage="Maximum size needed to consider a schema", widget="Integer", required=true, def="9999999")
38
int maximum_schema_size
39
@Field @Option(name="schema_display_property_name",usage="", widget="String", required=false, def="REF")
40
String schema_display_property_name
41

    
42
@Field @Option(name="unit_ursql", usage="TYPE@PROP=REGEX", widget="String", required=false, def="MENTION")
43
String unit_ursql
44

    
45
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF        ON        ALL        REALLY ALL", required=true, def="OFF")
46
debug
47

    
48
if (!ParametersDialog.open(this)) return;
49
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3
50

    
51
for (def corpus : selection) {
52
        def analecCorpus = URSCorpora.getCorpus(corpus)
53
        
54
        def errors = AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql)
55
        if (errors.size() > 0) {
56
                println "** The $schema_ursql schema URSQL cannot be computed in the corpus with types: $errors."
57
                return;
58
        }
59
        
60
        errors=AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql)
61
        if (errors.size() > 0) {
62
                println "** $unit_ursql unit URSQL cannot be computed in the corpus with types: $errors."
63
                return;
64
        }
65
        
66
        def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, maximum_schema_size);
67
        def distances = [];
68
        def nDistances = 0
69
        def cadences = [];
70
        for (def schema : schemas) {
71
                
72
                def allUnites = schema.getUnitesSousjacentesNonTriees()
73
        
74
                def units = AnalecUtils.filterElements(debug, allUnites, unit_ursql)
75
                
76
                Collections.sort(units)
77
                        
78
                for (int i = 0 ; i < units.size() ; i++) {
79
                        int d1 = 9999999;
80
                        int d2 = 9999999;
81
                        
82
                        
83
                        if (i < units.size()-1) d1 = units[i+1].getDeb() - units[i].getFin() - 1;
84
                        if (d1 < 0) {
85
                                //println "D1 "+units[i+1].getDeb()+" - "+units[i].getFin()+" = "+d1
86
                                d1 = 0; // the first unit pass the next one ?
87
                        }
88
                        if (i > 0) {
89
                                d2 = units[i].getDeb() - units[i-1].getFin() - 1;
90
                                distances << d2
91
                                nDistances++
92
                        }
93
                        if (d2 < 0) {
94
                                //println "D2 "+units[i].getDeb()+" - "+units[i-1].getFin()+" = "+d2
95
                                d2 = 0; // the first unit pass the next one ?
96
                        }
97
                        
98
                        if (d1 < d2) cadences << d1 else cadences << d2
99
                }
100
        }
101
        distances = distances.sort()
102
        cadences = cadences.sort()
103
        
104
        int distances_total = distances.sum()
105
        int cadences_total = cadences.sum()
106
        coef = (distances_total / nDistances)
107
        cadence = (cadences_total / nDistances)
108
        
109
        println "$corpus distances:"
110
        if (debug > 0) println "distances $distances"
111
        println "distance moyenne : $distances_total / ${distances.size()} = $coef"
112
        println "distance medianne : "+distances[(int)(distances.size() / 2)]
113
        println "distance quartils : "+distances[0]+" "+distances[(int)(distances.size() / 4)] + " "+distances[(int)(distances.size() / 2)]+" "+distances[(int)(3*distances.size() / 4)]+" "+distances[(int)(distances.size() -1)]
114
        if (debug > 0) println "cadences $cadences"
115
        println "cadence moyenne : $cadences_total / ${cadences.size()} = $cadence"
116
        println "cadence medianne : "+cadences[(int)(cadences.size() / 2)]
117
        println "cadence quartils : "+cadences[0]+" "+cadences[(int)(cadences.size() / 4)] + " "+cadences[(int)(cadences.size() / 2)]+" "+cadences[(int)(3*cadences.size() / 4)]+" "+cadences[(int)(cadences.size() -1)]
118
        
119
        //return ["result":coef, "result2":cadence, "data":["distances":distances, "nDistances":nDistances, "cadences":cadences]]
120
}