Statistics
| Revision:

root / tmp / org.txm.analec.rcp / src / org / txm / macro / urs / exploit / mesures1 / GrammaticalCategoryMacro.groovy @ 1962

History | View | Annotate | Download (3.6 kB)

1
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author mdecorde
4
// @author sheiden
5
// STANDARD DECLARATIONS
6
package org.txm.macro.urs.exploit.mesures1
7

    
8
import org.kohsuke.args4j.*
9
import groovy.transform.Field
10
import org.txm.*
11
import org.txm.rcp.swt.widget.parameters.*
12
import org.txm.annotation.urs.*
13
import org.txm.searchengine.cqp.*
14
import org.txm.searchengine.cqp.corpus.*
15
import org.apache.commons.lang.StringUtils;
16

    
17
// BEGINNING OF PARAMETERS
18

    
19
@Field @Option(name="schema_ursql", usage="TYPE@PROP=VALUE", widget="String", required=true, def="CHAINE")
20
String schema_ursql
21

    
22
@Field @Option(name="minimum_schema_size", usage="minimal schema size", widget="Integer", required=true, def="3")
23
int minimum_schema_size
24

    
25
@Field @Option(name="schema_display_property_name",usage="", widget="String", required=false, def="REF")
26
String schema_display_property_name
27

    
28
@Field @Option(name="unit_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="MENTION")
29
String unit_ursql
30

    
31
@Field @Option(name="property", usage="", widget="String", required=false, def="CATEGORIE")
32

    
33
String property
34

    
35
if (!(corpusViewSelection instanceof CQPCorpus)) {
36
        println "Corpora selection is not a Corpus"
37
        return;
38
}
39

    
40
// Open the parameters input dialog box
41
if (!ParametersDialog.open(this)) return;
42
// END OF PARAMETERS
43

    
44
MainCorpus corpus = corpusViewSelection
45
def analecCorpus = URSCorpora.getCorpus(corpus)
46

    
47
if (!AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql)) {
48
        println "** The $schema_ursql schema URSQL cannot be computed in the corpus."
49
        return;
50
}
51

    
52
if (!AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql)) {
53
        println "** $unit_ursql unit URSQL cannot be computed in the corpus."
54
        return;
55
}
56

    
57
def CQI = CQPSearchEngine.getCqiClient()
58

    
59
def prop = corpus.getProperty(property)
60
if (prop == null) {
61
        analecCorpus.getStructure()
62
}
63
def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, 999999);
64
def allFreqs = [:]
65
def n = 0
66
for (def schema : schemas) {
67
        n++
68
        
69
        def freqs = [:]
70
                
71
        def allUnites = schema.getUnitesSousjacentesNonTriees()
72

    
73
        def units = AnalecUtils.filterElements(false, allUnites, unit_ursql)
74
        
75
        for (def unit : units) { // no need to sort units
76

    
77
                String forme =  null;
78
                if (prop == null) { // property is the analec unit property to use
79
                        forme = unit.getProp(property)
80
                } else {
81
                        int[] pos = null;
82
                        if (unit.getDeb() == unit.getFin()) pos = [unit.getDeb()]
83
                        else pos = unit.getDeb()..unit.getFin()
84
                                
85
                        forme = StringUtils.join(CQI.cpos2Str(prop.getQualifiedName(), pos), " ") // ids is enough
86
                }
87
                
88
                if (!freqs.containsKey(forme)) freqs[forme] = 0;
89
                freqs[forme] = freqs[forme] + 1;
90
                
91
                if (!allFreqs.containsKey(forme)) allFreqs[forme] = 0;
92
                allFreqs[forme] = allFreqs[forme] + 1;
93
        }
94
        
95
        if (schema_display_property_name != null) {
96
                println "Index des natures de $unit_ursql de '"+schema.getProp(schema_display_property_name)+"' : "
97
        } else {
98
                println "Index des natures de $schema_ursql - $n : "
99
        }
100
        
101
        int max = 0;
102
        def result = "";
103
        for (def forme : freqs.sort() { a, b -> -a.value <=> -b.value ?: a.key <=> b.key }) {
104
                println forme.key+"\t"+forme.value
105
        }
106
}
107

    
108
int max = 0;
109
def result = "";
110

    
111
println "Index des natures de $schema_ursql : "
112
for (def forme : allFreqs.sort() { a, b -> -a.value <=> -b.value ?: a.key <=> b.key }) {
113
        println forme.key+"\t"+forme.value
114
        if (max < forme.value) {
115
                max = forme.value
116
                result = "$forme: "+forme.value
117
        }
118
}
119

    
120
return ["result":result, "data":allFreqs]