Statistics
| Revision:

root / tmp / org.txm.analec.rcp / src / org / txm / macro / analec / DistanceInterMaillonnaireMacro.groovy @ 1166

History | View | Annotate | Download (3.7 kB)

1
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author mdecorde
4
// @author sheiden
5
// STANDARD DECLARATIONS
6
package org.txm.macro.analec
7

    
8
import org.kohsuke.args4j.*
9
import groovy.transform.Field
10
import org.txm.*
11
import org.txm.rcp.swt.widget.parameters.*
12
import org.txm.analec.*
13
import org.txm.searchengine.cqp.corpus.*
14
import org.apache.commons.lang.StringUtils;
15

    
16
// BEGINNING OF PARAMETERS
17
@Field @Option(name="schema_type",usage="", widget="String", required=true, def="Coréférence")
18
String schema_type
19

    
20
@Field @Option(name="minimum_schema_size", usage="", widget="Integer", required=true, def="3")
21
int minimum_schema_size
22

    
23
@Field @Option(name="schema_property_name",usage="", widget="String", required=false, def="")
24
String schema_property_name
25

    
26
@Field @Option(name="schema_property_value",usage="", widget="String", required=false, def=".*")
27
String schema_property_value
28

    
29
@Field @Option(name="unit_type",usage="", widget="String", required=false, def="Maillon")
30
String unit_type
31

    
32
@Field @Option(name="unit_property_name", usage="", widget="String", required=false, def="")
33
String unit_property_name
34

    
35
@Field @Option(name="unit_property_value", usage="", widget="String", required=false, def=".*")
36
String unit_property_value
37

    
38
if (!(corpusViewSelection instanceof MainCorpus)) {
39
        println "Corpora selection is not a Corpus"
40
        return;
41
}
42

    
43
if (!ParametersDialog.open(this)) return;
44

    
45
MainCorpus corpus = corpusViewSelection
46
def analecCorpus = AnalecCorpora.getCorpus(corpus)
47

    
48
// check Schema parameters
49
if (!analecCorpus.getStructure().getSchemas().contains(schema_type)) {
50
        println "No schema with name=$schema_type"
51
        return;
52
} else {
53
        if (schema_property_name.length() > 0 && schema_property_value.length() > 0) {
54
                // test property existance
55
                def props = analecCorpus.getStructure().getSchemaProperties(schema_type);
56
                if (!props.contains(schema_property_name)) {
57
                        println "Schema $schema_type has no property named $schema_property_name"
58
                        return;
59
                }
60
        }
61
}
62

    
63
// check unit parameters
64
if (!analecCorpus.getStructure().getUnites().contains(unit_type)) {
65
        println "No unit with name=$unit_type"
66
        return;
67
} else {
68
        if (unit_property_name.length() > 0 && unit_property_value.length() > 0) {
69
                // test property existance
70
                def props = analecCorpus.getStructure().getUniteProperties(unit_type);
71
                if (!props.contains(unit_property_name)) {
72
                        println "Unit $unit_type has no property named $unit_property_name"
73
                        return;
74
                }
75
        }
76
}
77

    
78
def schemas = analecCorpus.getSchemas(schema_type)
79
def distances = 0;
80
def nDistances = 0
81
for (def schema : schemas) {
82

    
83
        if (schema_property_name.length() > 0 && schema_property_value.length() > 0) {
84
                if (!schema.getProp(schema_property_name).matches(schema_property_value)) {
85
                        // ignoring this schema
86
                        continue
87
                }
88
        }
89
        
90
        def allUnites = schema.getUnitesSousjacentesNonTriees()
91
        int nUnites = allUnites.size()
92
        if (nUnites < minimum_schema_size) continue;
93

    
94
        def units = []
95
        for (def unit : allUnites) {
96
        
97
                if (unit_type.length() > 0) {
98
                        if (!unit.getType().equals(unit_type)) {
99
                                continue
100
                        }
101
                }
102
                
103
                if (unit_property_name.length() > 0 && unit_property_value.length() > 0) {
104
                        if (!unit.getProp(unit_property_name).matches(unit_property_value)) {
105
                                // ignoring this schema
106
                                continue
107
                        }
108
                }
109
                
110
                units << unit
111
        }
112
        
113
        units.sort() { u1, u2 -> u1.getDeb() <=> u2.getDeb()}
114
                
115
        for (int i = 0 ; i < units.size() -1 ; i++) {
116
                distances +=  units[i+1].getDeb() - units[i].getFin() 
117
                nDistances++
118
        }
119
}
120

    
121
coef = (distances / nDistances)
122
println "distance moyenne inter-mayonnaise : $distances / $nDistances = $coef"
123

    
124
return ["result":coef, "data":["distances":distances, "nDistances":nDistances]]