Statistics
| Revision:

root / tmp / org.txm.analec.rcp / src / org / txm / macro / urs / exploit / mesures1 / StabilityScoreMacro.groovy @ 1962

History | View | Annotate | Download (3.3 kB)

1 671 mdecorde
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
2 671 mdecorde
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3 671 mdecorde
// @author mdecorde
4 671 mdecorde
// @author sheiden
5 671 mdecorde
// STANDARD DECLARATIONS
6 1217 mdecorde
package org.txm.macro.urs.exploit.mesures1
7 671 mdecorde
8 671 mdecorde
import org.kohsuke.args4j.*
9 671 mdecorde
import groovy.transform.Field
10 671 mdecorde
import org.txm.*
11 671 mdecorde
import org.txm.rcp.swt.widget.parameters.*
12 1217 mdecorde
import org.txm.annotation.urs.*
13 671 mdecorde
import org.txm.searchengine.cqp.corpus.*
14 671 mdecorde
import org.apache.commons.lang.StringUtils;
15 671 mdecorde
16 1962 mdecorde
@Field @Option(name="schema_ursql", usage="TYPE@PROP=REGEX", widget="String", required=true, def="CHAINE")
17 1962 mdecorde
String schema_ursql
18 671 mdecorde
19 1962 mdecorde
@Field @Option(name="minimum_schema_size", usage="minimal schema size", widget="Integer", required=true, def="3")
20 671 mdecorde
int minimum_schema_size
21 671 mdecorde
22 671 mdecorde
@Field @Option(name="schema_display_property_name",usage="", widget="String", required=false, def="REF")
23 671 mdecorde
String schema_display_property_name
24 671 mdecorde
25 1962 mdecorde
@Field @Option(name="unit_ursql", usage="TYPE@PROP=REGEX", widget="String", required=false, def="MENTION")
26 1962 mdecorde
String unit_ursql
27 671 mdecorde
28 671 mdecorde
@Field @Option(name="word_property", usage="", widget="String", required=false, def="word")
29 671 mdecorde
String word_property
30 671 mdecorde
31 1962 mdecorde
if (!(corpusViewSelection instanceof CQPCorpus)) {
32 671 mdecorde
        println "Corpora selection is not a Corpus"
33 671 mdecorde
        return;
34 671 mdecorde
}
35 671 mdecorde
36 671 mdecorde
// Open the parameters input dialog box
37 671 mdecorde
if (!ParametersDialog.open(this)) return;
38 671 mdecorde
// END OF PARAMETERS
39 671 mdecorde
40 1962 mdecorde
def corpus = corpusViewSelection
41 1217 mdecorde
def analecCorpus = URSCorpora.getCorpus(corpus)
42 671 mdecorde
43 1962 mdecorde
if (!AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql)) {
44 1962 mdecorde
        println "** The $schema_ursql schema URSQL cannot be computed in the corpus."
45 671 mdecorde
        return;
46 671 mdecorde
}
47 671 mdecorde
48 1962 mdecorde
if (!AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql)) {
49 1962 mdecorde
        println "** $unit_ursql unit URSQL cannot be computed in the corpus."
50 671 mdecorde
        return;
51 671 mdecorde
}
52 671 mdecorde
53 788 mdecorde
def CQI = CQPSearchEngine.getCqiClient()
54 671 mdecorde
55 671 mdecorde
def prop = corpus.getProperty(word_property)
56 671 mdecorde
57 1962 mdecorde
def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, 999999);
58 671 mdecorde
allFormesSet = new HashSet();
59 671 mdecorde
nUnitesGrandTotal = 0;
60 671 mdecorde
def coefs = []
61 671 mdecorde
int n = 1
62 671 mdecorde
for (def schema : schemas) {
63 671 mdecorde
        def formesSet = new HashSet(); // contient toutes les formes du CR courant
64 671 mdecorde
        nUnitesTotal = 0;
65 671 mdecorde
66 1962 mdecorde
        def allUnites = schema.getUnitesSousjacentesNonTriees()
67 671 mdecorde
68 1962 mdecorde
        def units = AnalecUtils.filterElements(false, allUnites, unit_ursql)
69 1962 mdecorde
70 671 mdecorde
        for (def unit : units) {
71 671 mdecorde
72 671 mdecorde
                String forme =  null;
73 671 mdecorde
                if (prop == null) { // word_property is the analec unit property to use
74 671 mdecorde
                        forme = unit.getProp(word_property)
75 671 mdecorde
                } else {
76 671 mdecorde
                        int[] pos = null;
77 671 mdecorde
                        if (unit.getDeb() == unit.getFin()) pos = [unit.getDeb()]
78 671 mdecorde
                        else pos = unit.getDeb()..unit.getFin()
79 671 mdecorde
80 671 mdecorde
                        forme = StringUtils.join(CQI.cpos2Str(prop.getQualifiedName(), pos), " ") // ids is enough
81 671 mdecorde
                }
82 671 mdecorde
83 671 mdecorde
                formesSet.add(forme)
84 671 mdecorde
85 671 mdecorde
                nUnitesTotal++
86 671 mdecorde
        }
87 671 mdecorde
        if (formesSet.size() == 0) {
88 671 mdecorde
                coef = -1
89 671 mdecorde
        } else {
90 671 mdecorde
                coef = (nUnitesTotal/formesSet.size())
91 671 mdecorde
        }
92 671 mdecorde
        coefs << coef
93 671 mdecorde
        nUnitesGrandTotal += nUnitesTotal;
94 671 mdecorde
        allFormesSet.addAll(formesSet)
95 671 mdecorde
96 671 mdecorde
        if (schema_display_property_name != null) {
97 671 mdecorde
                print schema.getProp(schema_display_property_name)
98 671 mdecorde
        } else {
99 1962 mdecorde
                print schema_ursql+"-"+n+" : "
100 671 mdecorde
        }
101 671 mdecorde
102 671 mdecorde
        println " ($nUnites units) : $nUnitesTotal selected units / ${formesSet.size()} forms = $coef"
103 671 mdecorde
        n++
104 671 mdecorde
}
105 671 mdecorde
106 671 mdecorde
return ["result":coefs, "data":["nUnitesTotal":nUnitesGrandTotal, "allFormesSet":allFormesSet], "coef":(nUnitesGrandTotal/allFormesSet.size())]