Statistics
| Revision:

root / tmp / org.txm.analec.rcp / src / org / txm / macro / urs / exploit / UnitsInterdistanceMacro.groovy @ 2143

History | View | Annotate | Download (4.6 kB)

1 671 mdecorde
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
2 671 mdecorde
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3 671 mdecorde
// @author mdecorde
4 671 mdecorde
// @author sheiden
5 671 mdecorde
// STANDARD DECLARATIONS
6 2082 mdecorde
package org.txm.macro.urs.exploit
7 671 mdecorde
8 671 mdecorde
import org.kohsuke.args4j.*
9 671 mdecorde
import groovy.transform.Field
10 671 mdecorde
import org.txm.*
11 1968 mdecorde
import org.txm.macro.urs.AnalecUtils
12 1968 mdecorde
import visuAnalec.elements.*
13 671 mdecorde
import org.txm.rcp.swt.widget.parameters.*
14 1217 mdecorde
import org.txm.annotation.urs.*
15 671 mdecorde
import org.txm.searchengine.cqp.corpus.*
16 671 mdecorde
import org.apache.commons.lang.StringUtils;
17 671 mdecorde
18 2105 mdecorde
def selection = []
19 2105 mdecorde
for (def s : corpusViewSelections) {
20 2105 mdecorde
        if (s instanceof CQPCorpus) selection << s
21 2105 mdecorde
        else if (s instanceof Partition) selection.addAll(s.getParts())
22 1968 mdecorde
}
23 1968 mdecorde
24 2105 mdecorde
if (selection.size() == 0) {
25 2105 mdecorde
        println "** $scriptName: please select a Corpus or a Partition in the Corpus view: "+corpusViewSelections
26 2105 mdecorde
        return false
27 2105 mdecorde
} else {
28 2105 mdecorde
        for (def c : selection) c.compute(false)
29 2105 mdecorde
}
30 2105 mdecorde
31 671 mdecorde
// BEGINNING OF PARAMETERS
32 1962 mdecorde
@Field @Option(name="schema_ursql", usage="TYPE@PROP=REGEX", widget="String", required=true, def="CHAINE")
33 1962 mdecorde
String schema_ursql
34 671 mdecorde
35 1962 mdecorde
@Field @Option(name="minimum_schema_size", usage="minimal schema size", widget="Integer", required=true, def="3")
36 671 mdecorde
int minimum_schema_size
37 2099 mdecorde
@Field @Option(name="maximum_schema_size", usage="Maximum size needed to consider a schema", widget="Integer", required=true, def="9999999")
38 2099 mdecorde
int maximum_schema_size
39 1962 mdecorde
@Field @Option(name="schema_display_property_name",usage="", widget="String", required=false, def="REF")
40 1962 mdecorde
String schema_display_property_name
41 671 mdecorde
42 1962 mdecorde
@Field @Option(name="unit_ursql", usage="TYPE@PROP=REGEX", widget="String", required=false, def="MENTION")
43 1962 mdecorde
String unit_ursql
44 671 mdecorde
45 1968 mdecorde
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF        ON        ALL        REALLY ALL", required=true, def="OFF")
46 1968 mdecorde
debug
47 671 mdecorde
48 671 mdecorde
if (!ParametersDialog.open(this)) return;
49 1968 mdecorde
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3
50 671 mdecorde
51 2105 mdecorde
for (def corpus : selection) {
52 2105 mdecorde
        def analecCorpus = URSCorpora.getCorpus(corpus)
53 671 mdecorde
54 2105 mdecorde
        def errors = AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql)
55 2105 mdecorde
        if (errors.size() > 0) {
56 2105 mdecorde
                println "** The $schema_ursql schema URSQL cannot be computed in the corpus with types: $errors."
57 2105 mdecorde
                return;
58 2105 mdecorde
        }
59 671 mdecorde
60 2105 mdecorde
        errors=AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql)
61 2105 mdecorde
        if (errors.size() > 0) {
62 2105 mdecorde
                println "** $unit_ursql unit URSQL cannot be computed in the corpus with types: $errors."
63 2105 mdecorde
                return;
64 2105 mdecorde
        }
65 2105 mdecorde
66 2105 mdecorde
        def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, maximum_schema_size);
67 2105 mdecorde
        def distances = [];
68 2105 mdecorde
        def nDistances = 0
69 2105 mdecorde
        def cadences = [];
70 2105 mdecorde
        for (def schema : schemas) {
71 671 mdecorde
72 2105 mdecorde
                def allUnites = schema.getUnitesSousjacentesNonTriees()
73 2105 mdecorde
74 2105 mdecorde
                def units = AnalecUtils.filterElements(debug, allUnites, unit_ursql)
75 2105 mdecorde
76 2105 mdecorde
                Collections.sort(units)
77 2105 mdecorde
78 2105 mdecorde
                for (int i = 0 ; i < units.size() ; i++) {
79 2134 mdecorde
                        int d1 = 9999999;
80 2134 mdecorde
                        int d2 = 9999999;
81 2134 mdecorde
82 2134 mdecorde
83 2134 mdecorde
                        if (i < units.size()-1) d1 = units[i+1].getDeb() - units[i].getFin() - 1;
84 2105 mdecorde
                        if (d1 < 0) {
85 2105 mdecorde
                                //println "D1 "+units[i+1].getDeb()+" - "+units[i].getFin()+" = "+d1
86 2105 mdecorde
                                d1 = 0; // the first unit pass the next one ?
87 2105 mdecorde
                        }
88 2134 mdecorde
                        if (i > 0) {
89 2134 mdecorde
                                d2 = units[i].getDeb() - units[i-1].getFin() - 1;
90 2134 mdecorde
                                distances << d2
91 2134 mdecorde
                                nDistances++
92 2134 mdecorde
                        }
93 2105 mdecorde
                        if (d2 < 0) {
94 2105 mdecorde
                                //println "D2 "+units[i].getDeb()+" - "+units[i-1].getFin()+" = "+d2
95 2105 mdecorde
                                d2 = 0; // the first unit pass the next one ?
96 2105 mdecorde
                        }
97 2105 mdecorde
98 2105 mdecorde
                        if (d1 < d2) cadences << d1 else cadences << d2
99 671 mdecorde
                }
100 671 mdecorde
        }
101 2105 mdecorde
        distances = distances.sort()
102 2105 mdecorde
        cadences = cadences.sort()
103 2105 mdecorde
104 2105 mdecorde
        int distances_total = distances.sum()
105 2105 mdecorde
        int cadences_total = cadences.sum()
106 2105 mdecorde
        coef = (distances_total / nDistances)
107 2105 mdecorde
        cadence = (cadences_total / nDistances)
108 2105 mdecorde
109 2105 mdecorde
        println "$corpus distances:"
110 2134 mdecorde
        if (debug > 0) println "distances $distances"
111 2134 mdecorde
        println "distance moyenne : $distances_total / ${distances.size()} = $coef"
112 2134 mdecorde
        println "distance medianne : "+distances[(int)(distances.size() / 2)]
113 2105 mdecorde
        println "distance quartils : "+distances[0]+" "+distances[(int)(distances.size() / 4)] + " "+distances[(int)(distances.size() / 2)]+" "+distances[(int)(3*distances.size() / 4)]+" "+distances[(int)(distances.size() -1)]
114 2134 mdecorde
        if (debug > 0) println "cadences $cadences"
115 2134 mdecorde
        println "cadence moyenne : $cadences_total / ${cadences.size()} = $cadence"
116 2105 mdecorde
        println "cadence medianne : "+cadences[(int)(cadences.size() / 2)]
117 2105 mdecorde
        println "cadence quartils : "+cadences[0]+" "+cadences[(int)(cadences.size() / 4)] + " "+cadences[(int)(cadences.size() / 2)]+" "+cadences[(int)(3*cadences.size() / 4)]+" "+cadences[(int)(cadences.size() -1)]
118 2105 mdecorde
119 2105 mdecorde
        //return ["result":coef, "result2":cadence, "data":["distances":distances, "nDistances":nDistances, "cadences":cadences]]
120 671 mdecorde
}