Statistics
| Revision:

root / tmp / org.txm.analec.rcp / src / org / txm / macro / analec / exploit / mesures1 / DistanceInterMaillonnaireMacro.groovy @ 1166

History | View | Annotate | Download (5 kB)

1
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author mdecorde
4
// @author sheiden
5
// STANDARD DECLARATIONS
6
package org.txm.macro.analec.exploit.mesures1
7

    
8
import org.kohsuke.args4j.*
9
import groovy.transform.Field
10
import org.txm.*
11
import org.txm.rcp.swt.widget.parameters.*
12
import org.txm.analec.*
13
import org.txm.searchengine.cqp.corpus.*
14
import org.apache.commons.lang.StringUtils;
15

    
16
// BEGINNING OF PARAMETERS
17
@Field @Option(name="schema_type",usage="", widget="String", required=true, def="CHAINE")
18
String schema_type
19

    
20
@Field @Option(name="minimum_schema_size", usage="", widget="Integer", required=true, def="3")
21
int minimum_schema_size
22

    
23
@Field @Option(name="schema_property_name",usage="", widget="String", required=false, def="")
24
String schema_property_name
25

    
26
@Field @Option(name="schema_property_value",usage="", widget="String", required=false, def=".*")
27
String schema_property_value
28

    
29
@Field @Option(name="unit_type",usage="", widget="String", required=false, def="MENTION")
30
String unit_type
31

    
32
@Field @Option(name="unit_property_name", usage="", widget="String", required=false, def="")
33
String unit_property_name
34

    
35
@Field @Option(name="unit_property_value", usage="", widget="String", required=false, def=".*")
36
String unit_property_value
37

    
38
if (!(corpusViewSelection instanceof MainCorpus)) {
39
        println "Corpora selection is not a Corpus"
40
        return;
41
}
42

    
43
if (!ParametersDialog.open(this)) return;
44

    
45
MainCorpus corpus = corpusViewSelection
46
def analecCorpus = AnalecCorpora.getCorpus(corpus)
47

    
48
// check Schema parameters
49
if (!analecCorpus.getStructure().getSchemas().contains(schema_type)) {
50
        println "No schema with name=$schema_type"
51
        return;
52
} else {
53
        if (schema_property_name.length() > 0 && schema_property_value.length() > 0) {
54
                // test property existance
55
                def props = analecCorpus.getStructure().getSchemaProperties(schema_type);
56
                if (!props.contains(schema_property_name)) {
57
                        println "Schema $schema_type has no property named $schema_property_name"
58
                        return;
59
                }
60
        }
61
}
62

    
63
// check unit parameters
64
if (!analecCorpus.getStructure().getUnites().contains(unit_type)) {
65
        println "No unit with name=$unit_type"
66
        return;
67
} else {
68
        if (unit_property_name.length() > 0 && unit_property_value.length() > 0) {
69
                // test property existance
70
                def props = analecCorpus.getStructure().getUniteProperties(unit_type);
71
                if (!props.contains(unit_property_name)) {
72
                        println "Unit $unit_type has no property named $unit_property_name"
73
                        return;
74
                }
75
        }
76
}
77

    
78
def schemas = analecCorpus.getSchemas(schema_type)
79
def distances = [];
80
def nDistances = 0
81
def cadences = [];
82
for (def schema : schemas) {
83

    
84
        if (schema_property_name.length() > 0 && schema_property_value.length() > 0) {
85
                if (!schema.getProp(schema_property_name).matches(schema_property_value)) {
86
                        // ignoring this schema
87
                        continue
88
                }
89
        }
90
        
91
        def allUnites = schema.getUnitesSousjacentesNonTriees()
92
        int nUnites = allUnites.size()
93
        if (nUnites < minimum_schema_size) continue;
94

    
95
        def units = []
96
        for (def unit : allUnites) {
97
        
98
                if (unit_type.length() > 0) {
99
                        if (!unit.getType().equals(unit_type)) {
100
                                continue
101
                        }
102
                }
103
                
104
                if (unit_property_name.length() > 0 && unit_property_value.length() > 0) {
105
                        if (!unit.getProp(unit_property_name).matches(unit_property_value)) {
106
                                // ignoring this schema
107
                                continue
108
                        }
109
                }
110
                
111
                units << unit
112
        }
113
        
114
        Collections.sort(units)
115
                
116
        for (int i = 0 ; i < units.size() ; i++) {
117
                int d1 = 0;
118
                int d2 = 0;
119
                if (i < units.size()-1) d1 = units[i+1].getDeb() - units[i].getFin();
120
                if (d1 < 0) {
121
                        //println "D1 "+units[i+1].getDeb()+" - "+units[i].getFin()+" = "+d1
122
                        d1 = 0; // the first unit pass the next one ?
123
                }
124
                if (i > 0) d2 = units[i].getDeb() - units[i-1].getFin();
125
                if (d2 < 0) {
126
                        //println "D2 "+units[i].getDeb()+" - "+units[i-1].getFin()+" = "+d2
127
                        d2 = 0; // the first unit pass the next one ?
128
                }
129
                distances << d1
130
                
131
                if (d1 < d2) cadences << d1 else cadences << d2
132
                
133
                nDistances++
134
        }
135
}
136
distances = distances.sort()
137
cadences = cadences.sort()
138

    
139
int distances_total = distances.sum()
140
int cadences_total = cadences.sum()
141
coef = (distances_total / nDistances)
142
cadence = (cadences_total / nDistances)
143
println "distances $distances"
144
println "distance moyenne inter-mayonnaise : $distances_total / $nDistances = $coef"
145
println "distance medianne inter-mayonnaise : "+distances[(int)(distances.size() / 2)]
146
println "distance quartils : "+distances[0]+" "+distances[(int)(distances.size() / 4)] + " "+distances[(int)(distances.size() / 2)]+" "+distances[(int)(3*distances.size() / 4)]+" "+distances[(int)(distances.size() -1)]
147
println "cadences $cadences"
148
println "cadence moyenne : $cadences_total / $nDistances = $cadence"
149
println "cadence medianne : "+cadences[(int)(cadences.size() / 2)]
150
println "cadence quartils : "+cadences[0]+" "+cadences[(int)(cadences.size() / 4)] + " "+cadences[(int)(cadences.size() / 2)]+" "+cadences[(int)(3*cadences.size() / 4)]+" "+cadences[(int)(cadences.size() -1)]
151

    
152
return ["result":coef, "result2":cadence, "data":["distances":distances, "nDistances":nDistances, "cadences":cadences]]