Statistics
| Revision:

root / tmp / org.txm.analec.rcp / src / org / txm / macro / urs / exploit / mesures1 / InterDistanceMacro.groovy @ 1962

History | View | Annotate | Download (3.6 kB)

1 671 mdecorde
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
2 671 mdecorde
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3 671 mdecorde
// @author mdecorde
4 671 mdecorde
// @author sheiden
5 671 mdecorde
// STANDARD DECLARATIONS
6 1217 mdecorde
package org.txm.macro.urs.exploit.mesures1
7 671 mdecorde
8 671 mdecorde
import org.kohsuke.args4j.*
9 671 mdecorde
import groovy.transform.Field
10 671 mdecorde
import org.txm.*
11 671 mdecorde
import org.txm.rcp.swt.widget.parameters.*
12 1217 mdecorde
import org.txm.annotation.urs.*
13 671 mdecorde
import org.txm.searchengine.cqp.corpus.*
14 671 mdecorde
import org.apache.commons.lang.StringUtils;
15 671 mdecorde
16 671 mdecorde
// BEGINNING OF PARAMETERS
17 1962 mdecorde
@Field @Option(name="schema_ursql", usage="TYPE@PROP=REGEX", widget="String", required=true, def="CHAINE")
18 1962 mdecorde
String schema_ursql
19 671 mdecorde
20 1962 mdecorde
@Field @Option(name="minimum_schema_size", usage="minimal schema size", widget="Integer", required=true, def="3")
21 671 mdecorde
int minimum_schema_size
22 671 mdecorde
23 1962 mdecorde
@Field @Option(name="schema_display_property_name",usage="", widget="String", required=false, def="REF")
24 1962 mdecorde
String schema_display_property_name
25 671 mdecorde
26 1962 mdecorde
@Field @Option(name="unit_ursql", usage="TYPE@PROP=REGEX", widget="String", required=false, def="MENTION")
27 1962 mdecorde
String unit_ursql
28 671 mdecorde
29 1962 mdecorde
if (!(corpusViewSelection instanceof CQPCorpus)) {
30 671 mdecorde
        println "Corpora selection is not a Corpus"
31 671 mdecorde
        return;
32 671 mdecorde
}
33 671 mdecorde
34 671 mdecorde
if (!ParametersDialog.open(this)) return;
35 671 mdecorde
36 1962 mdecorde
CQPCorpus corpus = corpusViewSelection
37 1217 mdecorde
def analecCorpus = URSCorpora.getCorpus(corpus)
38 671 mdecorde
39 1962 mdecorde
if (!AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql)) {
40 1962 mdecorde
        println "** The $schema_ursql schema URSQL cannot be computed in the corpus."
41 671 mdecorde
        return;
42 671 mdecorde
}
43 671 mdecorde
44 1962 mdecorde
if (!AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql)) {
45 1962 mdecorde
        println "** $unit_ursql unit URSQL cannot be computed in the corpus."
46 671 mdecorde
        return;
47 671 mdecorde
}
48 671 mdecorde
49 1962 mdecorde
def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, 999999);
50 671 mdecorde
def distances = [];
51 671 mdecorde
def nDistances = 0
52 671 mdecorde
def cadences = [];
53 671 mdecorde
for (def schema : schemas) {
54 671 mdecorde
55 671 mdecorde
        def allUnites = schema.getUnitesSousjacentesNonTriees()
56 671 mdecorde
57 1962 mdecorde
        def units = AnalecUtils.filterElements(false, allUnites, unit_ursql)
58 671 mdecorde
59 671 mdecorde
        Collections.sort(units)
60 671 mdecorde
61 671 mdecorde
        for (int i = 0 ; i < units.size() ; i++) {
62 671 mdecorde
                int d1 = 0;
63 671 mdecorde
                int d2 = 0;
64 671 mdecorde
                if (i < units.size()-1) d1 = units[i+1].getDeb() - units[i].getFin();
65 671 mdecorde
                if (d1 < 0) {
66 671 mdecorde
                        //println "D1 "+units[i+1].getDeb()+" - "+units[i].getFin()+" = "+d1
67 671 mdecorde
                        d1 = 0; // the first unit pass the next one ?
68 671 mdecorde
                }
69 671 mdecorde
                if (i > 0) d2 = units[i].getDeb() - units[i-1].getFin();
70 671 mdecorde
                if (d2 < 0) {
71 671 mdecorde
                        //println "D2 "+units[i].getDeb()+" - "+units[i-1].getFin()+" = "+d2
72 671 mdecorde
                        d2 = 0; // the first unit pass the next one ?
73 671 mdecorde
                }
74 671 mdecorde
                distances << d1
75 671 mdecorde
76 671 mdecorde
                if (d1 < d2) cadences << d1 else cadences << d2
77 671 mdecorde
78 671 mdecorde
                nDistances++
79 671 mdecorde
        }
80 671 mdecorde
}
81 671 mdecorde
distances = distances.sort()
82 671 mdecorde
cadences = cadences.sort()
83 671 mdecorde
84 671 mdecorde
int distances_total = distances.sum()
85 671 mdecorde
int cadences_total = cadences.sum()
86 671 mdecorde
coef = (distances_total / nDistances)
87 671 mdecorde
cadence = (cadences_total / nDistances)
88 671 mdecorde
println "distances $distances"
89 671 mdecorde
println "distance moyenne inter-mayonnaise : $distances_total / $nDistances = $coef"
90 671 mdecorde
println "distance medianne inter-mayonnaise : "+distances[(int)(distances.size() / 2)]
91 671 mdecorde
println "distance quartils : "+distances[0]+" "+distances[(int)(distances.size() / 4)] + " "+distances[(int)(distances.size() / 2)]+" "+distances[(int)(3*distances.size() / 4)]+" "+distances[(int)(distances.size() -1)]
92 671 mdecorde
println "cadences $cadences"
93 671 mdecorde
println "cadence moyenne : $cadences_total / $nDistances = $cadence"
94 671 mdecorde
println "cadence medianne : "+cadences[(int)(cadences.size() / 2)]
95 671 mdecorde
println "cadence quartils : "+cadences[0]+" "+cadences[(int)(cadences.size() / 4)] + " "+cadences[(int)(cadences.size() / 2)]+" "+cadences[(int)(3*cadences.size() / 4)]+" "+cadences[(int)(cadences.size() -1)]
96 671 mdecorde
97 671 mdecorde
return ["result":coef, "result2":cadence, "data":["distances":distances, "nDistances":nDistances, "cadences":cadences]]