Statistics
| Revision:

root / tmp / org.txm.analec.rcp / src / org / txm / macro / urs / exploit / UnitsReferentialStabilityMacro.groovy @ 2144

History | View | Annotate | Download (5.3 kB)

1
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author mdecorde
4
// @author sheiden
5
// STANDARD DECLARATIONS
6
package org.txm.macro.urs.exploit
7

    
8
import org.kohsuke.args4j.*
9
import groovy.transform.Field
10
import org.txm.*
11
import org.txm.rcp.swt.widget.parameters.*
12
import org.txm.annotation.urs.*
13
import org.txm.searchengine.cqp.corpus.*
14
import org.apache.commons.lang.StringUtils;
15
import org.txm.macro.urs.AnalecUtils
16
import visuAnalec.elements.*
17
import org.txm.searchengine.cqp.CQPSearchEngine
18

    
19
def selection = []
20
for (def s : corpusViewSelections) {
21
        if (s instanceof CQPCorpus) selection << s
22
        else if (s instanceof Partition) selection.addAll(s.getParts())
23
}
24

    
25
if (selection.size() == 0) {
26
        println "** $scriptName: please select a Corpus or a Partition in the Corpus view: "+corpusViewSelections
27
        return false
28
} else {
29
        for (def c : selection) c.compute(false)
30
}
31

    
32
@Field @Option(name="schema_ursql", usage="TYPE@PROP=REGEX", widget="String", required=true, def="CHAINE")
33
String schema_ursql
34

    
35
@Field @Option(name="minimum_schema_size", usage="minimal schema size", widget="Integer", required=true, def="3")
36
int minimum_schema_size
37
@Field @Option(name="maximum_schema_size", usage="Maximum size needed to consider a schema", widget="Integer", required=true, def="9999999")
38
int maximum_schema_size
39
@Field @Option(name="schema_display_property_name",usage="", widget="String", required=false, def="REF")
40
String schema_display_property_name
41

    
42
@Field @Option(name="unit_ursql", usage="TYPE@PROP=REGEX", widget="String", required=false, def="MENTION")
43
String unit_ursql
44
@Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=true, def="true")
45
boolean strict_inclusion
46
@Field @Option(name="word_property", usage="", widget="String", required=false, def="word")
47
String word_property
48

    
49
@Field @Option(name="show_values", usage="", widget="Boolean", required=false, def="false")
50
boolean show_values
51

    
52
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF        ON        ALL        REALLY ALL", required=true, def="OFF")
53
debug
54

    
55
if (!ParametersDialog.open(this)) return;
56
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3
57

    
58

    
59
for (def corpus : selection) {
60
        def analecCorpus = URSCorpora.getCorpus(corpus)
61
        
62
        def errors = AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql)
63
        if (errors.size() > 0) {
64
                println "** The $schema_ursql schema URSQL cannot be computed in the corpus with types: $errors."
65
                return;
66
        }
67
        
68
        errors=AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql)
69
        if (errors.size() > 0) {
70
                println "** $unit_ursql unit URSQL cannot be computed in the corpus with types: $errors."
71
                return;
72
        }
73
        def CQI = CQPSearchEngine.getCqiClient()
74
        
75
        def prop = corpus.getProperty(word_property)
76
        if (prop == null) { // no CQP property called $word_property
77
                errors=AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, AnalecUtils.getFilterParameters(unit_ursql)[0], word_property)
78
                if (errors.size() > 0) {
79
                        println "** $word_property unit property cannot be computed in the corpus with types: $errors."
80
                        return;
81
                }
82
        }
83
        def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, maximum_schema_size, strict_inclusion);
84
        allFormesSet = new HashSet();
85
        def coefs = []
86
        int n = 1
87
        
88
        int nUnitesAllSchemas = 0
89
        int nUnitesTotalSchemas = 0
90
        
91
        println "** $corpus schemas: "
92
        for (def schema : schemas) {
93
                def formesSet = new HashSet(); // contient toutes les formes du CR courant
94
                nUnitesTotal = 0;
95
                
96
                def allUnites = schema.getUnitesSousjacentesNonTriees()
97
        
98
                def units = AnalecUtils.filterElements(debug, allUnites, unit_ursql)
99
                def nUnites = schema.getUnitesSousjacentes().size()
100
                def nUnitesTotal = units.size()
101
                for (def unit : units) {
102
                
103
                        String forme =  null;
104
                        if (prop == null) { // word_property is the analec unit property to use
105
                                forme = unit.getProp(word_property)
106
                        } else {
107
                                int[] pos = null;
108
                                if (unit.getDeb() == unit.getFin()) pos = [unit.getDeb()]
109
                                else pos = unit.getDeb()..unit.getFin()
110
                                        
111
                                forme = StringUtils.join(CQI.cpos2Str(prop.getQualifiedName(), pos), " ") // ids is enough
112
                        }
113
                        
114
                        formesSet.add(forme)
115
                }
116
                
117
                if (formesSet.size() == 0 || nUnitesTotal == 0) {
118
                        coef = "NA"
119
                } else {
120
                        coef = (nUnitesTotal/formesSet.size())
121
                }
122
                coefs << coef
123
                allFormesSet.addAll(formesSet)
124
                
125
                if (schema_display_property_name != null && schema_display_property_name.length() > 0) {
126
                        print schema.getProp(schema_display_property_name)
127
                } else {
128
                        print schema_ursql+"-"+n+" : "
129
                }
130
                
131
                println " ($nUnites units) : $nUnitesTotal selected units / ${formesSet.size()} ${word_property}s = $coef"
132
                if (show_values) {
133
                        println "\t${word_property}s="+formesSet
134
                }
135
                n++
136
                
137
                nUnitesAllSchemas += nUnites
138
                nUnitesTotalSchemas += nUnitesTotal
139
        }
140
        
141
        coef = nUnitesTotalSchemas/allFormesSet.size()
142
        //println "ALL : ($nUnitesAllSchemas units) : $nUnitesTotalSchemas selected units / ${allFormesSet.size()} ${word_property}s = $coef"
143
        
144
//        return ["result":coefs, "data":["nUnitesTotal":nUnitesTotalSchemas, "allFormesSet":allFormesSet], "coef":(coef)]
145
}