Statistics
| Revision:

root / tmp / org.txm.analec.rcp / src / org / txm / macro / urs / exploit / mesures1 / NatureOfTheFirstUnitMacro.groovy @ 1962

History | View | Annotate | Download (2.9 kB)

1
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author mdecorde
4
// @author sheiden
5
// STANDARD DECLARATIONS
6
package org.txm.macro.urs.exploit.mesures1
7

    
8
import org.kohsuke.args4j.*
9
import groovy.transform.Field
10
import org.txm.*
11
import org.txm.rcp.swt.widget.parameters.*
12
import org.txm.annotation.urs.*
13
import org.txm.searchengine.cqp.corpus.*
14
import org.apache.commons.lang.StringUtils;
15

    
16
// BEGINNING OF PARAMETERS
17
@Field @Option(name="schema_ursql", usage="TYPE@PROP=REGEX", widget="String", required=true, def="CHAINE")
18
String schema_ursql
19

    
20
@Field @Option(name="minimum_schema_size", usage="minimal schema size", widget="Integer", required=true, def="3")
21
int minimum_schema_size
22

    
23
@Field @Option(name="unit_ursql", usage="TYPE@PROP=REGEX", widget="String", required=false, def="MENTION")
24
String unit_ursql
25

    
26
@Field @Option(name="word_property", usage="", widget="String", required=false, def="CATEGORIE")
27
String word_property
28

    
29
if (!(corpusViewSelection instanceof CQPCorpus)) {
30
        println "Corpora selection is not a Corpus"
31
        return;
32
}
33

    
34
// Open the parameters input dialog box
35
if (!ParametersDialog.open(this)) return;
36
// END OF PARAMETERS
37

    
38
CQPCorpus corpus = corpusViewSelection
39
def analecCorpus = URSCorpora.getCorpus(corpus)
40

    
41
if (!AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql)) {
42
        println "** The $schema_ursql schema URSQL cannot be computed in the corpus."
43
        return;
44
}
45

    
46
if (!AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql)) {
47
        println "** $unit_ursql unit URSQL cannot be computed in the corpus."
48
        return;
49
}
50

    
51
def CQI = CQPSearchEngine.getCqiClient()
52

    
53
def prop = corpus.getProperty(word_property)
54

    
55
def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, 999999);
56
def freqs = [:]
57

    
58
def distances = 0;
59
def nDistances = 0
60
for (def schema : schemas) {
61
        
62
        def allUnites = schema.getUnitesSousjacentesNonTriees()
63
        
64
        def units = AnalecUtils.filterElements(false, allUnites, unit_ursql)
65
        
66
        int nUnites = units.size();
67
        
68
        if (units.size() == 0) continue;
69
        
70
        def unit = units[0]
71
        
72
        String forme =  null;
73
        if (prop == null) { // word_property is the analec unit property to use
74
                forme = unit.getProp(word_property)
75
        } else {
76
                int[] pos = null;
77
                if (unit.getDeb() == unit.getFin()) pos = [unit.getDeb()]
78
                else pos = unit.getDeb()..unit.getFin()
79
                        
80
                forme = StringUtils.join(CQI.cpos2Str(prop.getQualifiedName(), pos), " ") // ids is enough
81
        }
82
        
83
        if (!freqs.containsKey(forme)) freqs[forme] = 0;
84
        
85
        freqs[forme] = freqs[forme] + 1;
86
}
87

    
88
println "Index des natures de premier maillon :"
89
int max = 0;
90
def result = "";
91
for (def forme : freqs.keySet().sort() {it -> -freqs[it]}) {
92
        println "$forme\t"+freqs[forme]
93
        if (max < freqs[forme]) {
94
                max = freqs[forme]
95
                result = "$forme: "+freqs[forme]
96
        }
97
}
98

    
99
["result": result, "data": freqs]