Statistics
| Revision:

root / tmp / org.txm.analec.rcp / src / org / txm / macro / urs / exploit / mesures1 / InterDistanceMacro.groovy @ 1962

History | View | Annotate | Download (3.6 kB)

1
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author mdecorde
4
// @author sheiden
5
// STANDARD DECLARATIONS
6
package org.txm.macro.urs.exploit.mesures1
7

    
8
import org.kohsuke.args4j.*
9
import groovy.transform.Field
10
import org.txm.*
11
import org.txm.rcp.swt.widget.parameters.*
12
import org.txm.annotation.urs.*
13
import org.txm.searchengine.cqp.corpus.*
14
import org.apache.commons.lang.StringUtils;
15

    
16
// BEGINNING OF PARAMETERS
17
@Field @Option(name="schema_ursql", usage="TYPE@PROP=REGEX", widget="String", required=true, def="CHAINE")
18
String schema_ursql
19

    
20
@Field @Option(name="minimum_schema_size", usage="minimal schema size", widget="Integer", required=true, def="3")
21
int minimum_schema_size
22

    
23
@Field @Option(name="schema_display_property_name",usage="", widget="String", required=false, def="REF")
24
String schema_display_property_name
25

    
26
@Field @Option(name="unit_ursql", usage="TYPE@PROP=REGEX", widget="String", required=false, def="MENTION")
27
String unit_ursql
28

    
29
if (!(corpusViewSelection instanceof CQPCorpus)) {
30
        println "Corpora selection is not a Corpus"
31
        return;
32
}
33

    
34
if (!ParametersDialog.open(this)) return;
35

    
36
CQPCorpus corpus = corpusViewSelection
37
def analecCorpus = URSCorpora.getCorpus(corpus)
38

    
39
if (!AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql)) {
40
        println "** The $schema_ursql schema URSQL cannot be computed in the corpus."
41
        return;
42
}
43

    
44
if (!AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql)) {
45
        println "** $unit_ursql unit URSQL cannot be computed in the corpus."
46
        return;
47
}
48

    
49
def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, 999999);
50
def distances = [];
51
def nDistances = 0
52
def cadences = [];
53
for (def schema : schemas) {
54
        
55
        def allUnites = schema.getUnitesSousjacentesNonTriees()
56

    
57
        def units = AnalecUtils.filterElements(false, allUnites, unit_ursql)
58
        
59
        Collections.sort(units)
60
                
61
        for (int i = 0 ; i < units.size() ; i++) {
62
                int d1 = 0;
63
                int d2 = 0;
64
                if (i < units.size()-1) d1 = units[i+1].getDeb() - units[i].getFin();
65
                if (d1 < 0) {
66
                        //println "D1 "+units[i+1].getDeb()+" - "+units[i].getFin()+" = "+d1
67
                        d1 = 0; // the first unit pass the next one ?
68
                }
69
                if (i > 0) d2 = units[i].getDeb() - units[i-1].getFin();
70
                if (d2 < 0) {
71
                        //println "D2 "+units[i].getDeb()+" - "+units[i-1].getFin()+" = "+d2
72
                        d2 = 0; // the first unit pass the next one ?
73
                }
74
                distances << d1
75
                
76
                if (d1 < d2) cadences << d1 else cadences << d2
77
                
78
                nDistances++
79
        }
80
}
81
distances = distances.sort()
82
cadences = cadences.sort()
83

    
84
int distances_total = distances.sum()
85
int cadences_total = cadences.sum()
86
coef = (distances_total / nDistances)
87
cadence = (cadences_total / nDistances)
88
println "distances $distances"
89
println "distance moyenne inter-mayonnaise : $distances_total / $nDistances = $coef"
90
println "distance medianne inter-mayonnaise : "+distances[(int)(distances.size() / 2)]
91
println "distance quartils : "+distances[0]+" "+distances[(int)(distances.size() / 4)] + " "+distances[(int)(distances.size() / 2)]+" "+distances[(int)(3*distances.size() / 4)]+" "+distances[(int)(distances.size() -1)]
92
println "cadences $cadences"
93
println "cadence moyenne : $cadences_total / $nDistances = $cadence"
94
println "cadence medianne : "+cadences[(int)(cadences.size() / 2)]
95
println "cadence quartils : "+cadences[0]+" "+cadences[(int)(cadences.size() / 4)] + " "+cadences[(int)(cadences.size() / 2)]+" "+cadences[(int)(3*cadences.size() / 4)]+" "+cadences[(int)(cadences.size() -1)]
96

    
97
return ["result":coef, "result2":cadence, "data":["distances":distances, "nDistances":nDistances, "cadences":cadences]]