Statistics
| Revision:

root / tmp / org.txm.analec.rcp / src / org / txm / macro / urs / exploit / mesures1 / CoefficientStabiliteMacro.groovy @ 1217

History | View | Annotate | Download (4.8 kB)

1
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author mdecorde
4
// @author sheiden
5
// STANDARD DECLARATIONS
6
package org.txm.macro.urs.exploit.mesures1
7

    
8
import org.kohsuke.args4j.*
9
import groovy.transform.Field
10
import org.txm.*
11
import org.txm.rcp.swt.widget.parameters.*
12
import org.txm.annotation.urs.*
13
import org.txm.searchengine.cqp.corpus.*
14
import org.apache.commons.lang.StringUtils;
15

    
16
// BEGINNING OF PARAMETERS
17
@Field @Option(name="schema_type",usage="", widget="String", required=true, def="CHAINE")
18
String schema_type
19

    
20
@Field @Option(name="minimum_schema_size", usage="", widget="Integer", required=true, def="3")
21
int minimum_schema_size
22

    
23
@Field @Option(name="schema_property_name",usage="", widget="String", required=false, def="")
24
String schema_property_name
25

    
26
@Field @Option(name="schema_display_property_name",usage="", widget="String", required=false, def="REF")
27
String schema_display_property_name
28

    
29
@Field @Option(name="schema_property_value",usage="", widget="String", required=false, def=".*")
30
String schema_property_value
31

    
32
@Field @Option(name="unit_type",usage="", widget="String", required=false, def="MENTION")
33
String unit_type
34

    
35
@Field @Option(name="unit_property_name", usage="", widget="String", required=false, def="CATEGORIE")
36
String unit_property_name
37

    
38
@Field @Option(name="unit_property_value", usage="", widget="String", required=false, def="GN Défini|GN Démonstratif|Nom Propre")
39
String unit_property_value
40

    
41
@Field @Option(name="word_property", usage="", widget="String", required=false, def="word")
42
String word_property
43

    
44
if (!(corpusViewSelection instanceof MainCorpus)) {
45
        println "Corpora selection is not a Corpus"
46
        return;
47
}
48

    
49
// Open the parameters input dialog box
50
if (!ParametersDialog.open(this)) return;
51
// END OF PARAMETERS
52

    
53
MainCorpus corpus = corpusViewSelection
54
def analecCorpus = URSCorpora.getCorpus(corpus)
55

    
56
// check Schema parameters
57
if (!analecCorpus.getStructure().getSchemas().contains(schema_type)) {
58
        println "No schema with name=$schema_type"
59
        return;
60
} else {
61
        if (schema_property_name.length() > 0 && schema_property_value.length() > 0) {
62
                // test property existance
63
                def props = analecCorpus.getStructure().getSchemaProperties(schema_type);
64
                if (!props.contains(schema_property_name)) {
65
                        println "Schema $schema_type has no property named $schema_property_name"
66
                        return;
67
                }
68
        }
69
}
70

    
71
// check unit parameters
72
if (!analecCorpus.getStructure().getUnites().contains(unit_type)) {
73
        println "No unit with name=$unit_type"
74
        return;
75
} else {
76
        if (unit_property_name.length() > 0 && unit_property_value.length() > 0) {
77
                // test property existance
78
                def props = analecCorpus.getStructure().getUniteProperties(unit_type);
79
                if (!props.contains(unit_property_name)) {
80
                        println "Unit $unit_type has no property named $unit_property_name"
81
                        return;
82
                }
83
        }
84
}
85

    
86
def CQI = CQPSearchEngine.getCqiClient()
87

    
88
def prop = corpus.getProperty(word_property)
89

    
90
def schemas = analecCorpus.getSchemas(schema_type)
91
allFormesSet = new HashSet();
92
nUnitesGrandTotal = 0;
93
def coefs = []
94
int n = 1
95
for (def schema : schemas) {
96
        def formesSet = new HashSet(); // contient toutes les formes du CR courant
97
        nUnitesTotal = 0;
98
        if (schema_property_name.length() > 0 && schema_property_value.length() > 0) {
99
                if (!schema.getProp(schema_property_name).matches(schema_property_value)) {
100
                        // ignoring this schema
101
                        continue
102
                }
103
        }
104
        
105
        def units = schema.getUnitesSousjacentesNonTriees()
106
        def nUnites = units.size();
107
        
108
        if (nUnites < minimum_schema_size) continue;
109

    
110
        for (def unit : units) {
111
        
112
                if (unit_type.length() > 0) {
113
                        if (!unit.getType().equals(unit_type)) {
114
                                continue
115
                        }
116
                }
117
                
118
                if (unit_property_name.length() > 0 && unit_property_value.length() > 0) {
119
                        if (!unit.getProp(unit_property_name).matches(unit_property_value)) {
120
                                // ignoring this schema
121
                                continue
122
                        }
123
                }
124
                
125
                String forme =  null;
126
                if (prop == null) { // word_property is the analec unit property to use
127
                        forme = unit.getProp(word_property)
128
                } else {
129
                        int[] pos = null;
130
                        if (unit.getDeb() == unit.getFin()) pos = [unit.getDeb()]
131
                        else pos = unit.getDeb()..unit.getFin()
132
                                
133
                        forme = StringUtils.join(CQI.cpos2Str(prop.getQualifiedName(), pos), " ") // ids is enough
134
                }
135
                
136
                formesSet.add(forme)
137
                
138
                nUnitesTotal++
139
        }
140
        if (formesSet.size() == 0) {
141
                coef = -1
142
        } else {
143
                coef = (nUnitesTotal/formesSet.size())
144
        }
145
        coefs << coef
146
        nUnitesGrandTotal += nUnitesTotal;
147
        allFormesSet.addAll(formesSet)
148
        
149
        if (schema_display_property_name != null) {
150
                print schema.getProp(schema_display_property_name)
151
        } else {
152
                print schema_type+"-"+n+" : "
153
        }
154
        
155
        println " ($nUnites units) : $nUnitesTotal selected units / ${formesSet.size()} forms = $coef"
156
        n++
157
}
158

    
159
return ["result":coefs, "data":["nUnitesTotal":nUnitesGrandTotal, "allFormesSet":allFormesSet], "coef":(nUnitesGrandTotal/allFormesSet.size())]