Statistics
| Revision:

root / tmp / org.txm.analec.rcp / src / org / txm / macro / urs / DistanceInterMaillonnaireMacro.groovy @ 1217

History | View | Annotate | Download (3.7 kB)

1 481 mdecorde
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
2 481 mdecorde
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3 481 mdecorde
// @author mdecorde
4 481 mdecorde
// @author sheiden
5 481 mdecorde
// STANDARD DECLARATIONS
6 1217 mdecorde
package org.txm.macro.urs
7 481 mdecorde
8 481 mdecorde
import org.kohsuke.args4j.*
9 481 mdecorde
import groovy.transform.Field
10 481 mdecorde
import org.txm.*
11 671 mdecorde
import org.txm.rcp.swt.widget.parameters.*
12 1217 mdecorde
import org.txm.annotation.urs.*
13 481 mdecorde
import org.txm.searchengine.cqp.corpus.*
14 481 mdecorde
import org.apache.commons.lang.StringUtils;
15 481 mdecorde
16 481 mdecorde
// BEGINNING OF PARAMETERS
17 481 mdecorde
@Field @Option(name="schema_type",usage="", widget="String", required=true, def="Coréférence")
18 481 mdecorde
String schema_type
19 481 mdecorde
20 481 mdecorde
@Field @Option(name="minimum_schema_size", usage="", widget="Integer", required=true, def="3")
21 481 mdecorde
int minimum_schema_size
22 481 mdecorde
23 481 mdecorde
@Field @Option(name="schema_property_name",usage="", widget="String", required=false, def="")
24 481 mdecorde
String schema_property_name
25 481 mdecorde
26 481 mdecorde
@Field @Option(name="schema_property_value",usage="", widget="String", required=false, def=".*")
27 481 mdecorde
String schema_property_value
28 481 mdecorde
29 481 mdecorde
@Field @Option(name="unit_type",usage="", widget="String", required=false, def="Maillon")
30 481 mdecorde
String unit_type
31 481 mdecorde
32 481 mdecorde
@Field @Option(name="unit_property_name", usage="", widget="String", required=false, def="")
33 481 mdecorde
String unit_property_name
34 481 mdecorde
35 481 mdecorde
@Field @Option(name="unit_property_value", usage="", widget="String", required=false, def=".*")
36 481 mdecorde
String unit_property_value
37 481 mdecorde
38 481 mdecorde
if (!(corpusViewSelection instanceof MainCorpus)) {
39 481 mdecorde
        println "Corpora selection is not a Corpus"
40 481 mdecorde
        return;
41 481 mdecorde
}
42 481 mdecorde
43 481 mdecorde
if (!ParametersDialog.open(this)) return;
44 481 mdecorde
45 481 mdecorde
MainCorpus corpus = corpusViewSelection
46 1217 mdecorde
def analecCorpus = URSCorpora.getCorpus(corpus)
47 481 mdecorde
48 481 mdecorde
// check Schema parameters
49 481 mdecorde
if (!analecCorpus.getStructure().getSchemas().contains(schema_type)) {
50 481 mdecorde
        println "No schema with name=$schema_type"
51 481 mdecorde
        return;
52 481 mdecorde
} else {
53 481 mdecorde
        if (schema_property_name.length() > 0 && schema_property_value.length() > 0) {
54 481 mdecorde
                // test property existance
55 481 mdecorde
                def props = analecCorpus.getStructure().getSchemaProperties(schema_type);
56 481 mdecorde
                if (!props.contains(schema_property_name)) {
57 481 mdecorde
                        println "Schema $schema_type has no property named $schema_property_name"
58 481 mdecorde
                        return;
59 481 mdecorde
                }
60 481 mdecorde
        }
61 481 mdecorde
}
62 481 mdecorde
63 481 mdecorde
// check unit parameters
64 481 mdecorde
if (!analecCorpus.getStructure().getUnites().contains(unit_type)) {
65 481 mdecorde
        println "No unit with name=$unit_type"
66 481 mdecorde
        return;
67 481 mdecorde
} else {
68 481 mdecorde
        if (unit_property_name.length() > 0 && unit_property_value.length() > 0) {
69 481 mdecorde
                // test property existance
70 481 mdecorde
                def props = analecCorpus.getStructure().getUniteProperties(unit_type);
71 481 mdecorde
                if (!props.contains(unit_property_name)) {
72 481 mdecorde
                        println "Unit $unit_type has no property named $unit_property_name"
73 481 mdecorde
                        return;
74 481 mdecorde
                }
75 481 mdecorde
        }
76 481 mdecorde
}
77 481 mdecorde
78 481 mdecorde
def schemas = analecCorpus.getSchemas(schema_type)
79 481 mdecorde
def distances = 0;
80 481 mdecorde
def nDistances = 0
81 481 mdecorde
for (def schema : schemas) {
82 481 mdecorde
83 481 mdecorde
        if (schema_property_name.length() > 0 && schema_property_value.length() > 0) {
84 481 mdecorde
                if (!schema.getProp(schema_property_name).matches(schema_property_value)) {
85 481 mdecorde
                        // ignoring this schema
86 481 mdecorde
                        continue
87 481 mdecorde
                }
88 481 mdecorde
        }
89 481 mdecorde
90 481 mdecorde
        def allUnites = schema.getUnitesSousjacentesNonTriees()
91 481 mdecorde
        int nUnites = allUnites.size()
92 481 mdecorde
        if (nUnites < minimum_schema_size) continue;
93 481 mdecorde
94 481 mdecorde
        def units = []
95 481 mdecorde
        for (def unit : allUnites) {
96 481 mdecorde
97 481 mdecorde
                if (unit_type.length() > 0) {
98 481 mdecorde
                        if (!unit.getType().equals(unit_type)) {
99 481 mdecorde
                                continue
100 481 mdecorde
                        }
101 481 mdecorde
                }
102 481 mdecorde
103 481 mdecorde
                if (unit_property_name.length() > 0 && unit_property_value.length() > 0) {
104 481 mdecorde
                        if (!unit.getProp(unit_property_name).matches(unit_property_value)) {
105 481 mdecorde
                                // ignoring this schema
106 481 mdecorde
                                continue
107 481 mdecorde
                        }
108 481 mdecorde
                }
109 481 mdecorde
110 481 mdecorde
                units << unit
111 481 mdecorde
        }
112 481 mdecorde
113 481 mdecorde
        units.sort() { u1, u2 -> u1.getDeb() <=> u2.getDeb()}
114 481 mdecorde
115 481 mdecorde
        for (int i = 0 ; i < units.size() -1 ; i++) {
116 481 mdecorde
                distances +=  units[i+1].getDeb() - units[i].getFin()
117 481 mdecorde
                nDistances++
118 481 mdecorde
        }
119 481 mdecorde
}
120 481 mdecorde
121 481 mdecorde
coef = (distances / nDistances)
122 481 mdecorde
println "distance moyenne inter-mayonnaise : $distances / $nDistances = $coef"
123 481 mdecorde
124 481 mdecorde
return ["result":coef, "data":["distances":distances, "nDistances":nDistances]]