Statistics
| Revision:

root / tmp / org.txm.analec.rcp / src / org / txm / macro / urs / CategorieGrammaticaleMacro.groovy @ 1217

History | View | Annotate | Download (5.1 kB)

1
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author mdecorde
4
// @author sheiden
5
// STANDARD DECLARATIONS
6
package org.txm.macro.urs
7

    
8
import org.kohsuke.args4j.*
9
import groovy.transform.Field
10
import org.txm.*
11
import org.txm.rcp.swt.widget.parameters.*
12
import org.txm.annotation.urs.*
13
import org.txm.searchengine.cqp.corpus.*
14
import org.apache.commons.lang.StringUtils;
15

    
16
// BEGINNING OF PARAMETERS
17
@Field @Option(name="schema_type",usage="", widget="String", required=true, def="Coréférence")
18
String schema_type
19

    
20
@Field @Option(name="minimum_schema_size", usage="", widget="Integer", required=true, def="3")
21
int minimum_schema_size
22

    
23
@Field @Option(name="schema_property_name",usage="", widget="String", required=false, def="")
24
String schema_property_name
25

    
26
@Field @Option(name="schema_property_value",usage="", widget="String", required=false, def=".*")
27
String schema_property_value
28

    
29
@Field @Option(name="schema_display_property_name",usage="", widget="String", required=false, def="Nom du référent")
30
String schema_display_property_name
31

    
32
@Field @Option(name="unit_type",usage="", widget="String", required=false, def="Maillon")
33
String unit_type
34

    
35
@Field @Option(name="unit_property_name", usage="", widget="String", required=false, def="")
36
String unit_property_name
37

    
38
@Field @Option(name="unit_property_value", usage="", widget="String", required=false, def=".*")
39
String unit_property_value
40

    
41
@Field @Option(name="word_property", usage="", widget="StringArray", metaVar="Catégorie        CATEGORIE        pos        fropos        frpos        word        lemma        frlemma        frolemma        #forme#", required=false, def="Catégorie")
42

    
43
String word_property
44

    
45
if (!(corpusViewSelection instanceof MainCorpus)) {
46
        println "Corpora selection is not a Corpus"
47
        return;
48
}
49

    
50
// Open the parameters input dialog box
51
if (!ParametersDialog.open(this)) return;
52
// END OF PARAMETERS
53

    
54
MainCorpus corpus = corpusViewSelection
55
def analecCorpus = URSCorpora.getCorpus(corpus)
56

    
57
if (!analecCorpus.getStructure().getSchemas().contains(schema_type)) {
58
        println "No schema with name=$schema_type"
59
        return;
60
} else {
61
        if (schema_property_name.length() > 0 && schema_property_value.length() > 0) {
62
                // test property existance
63
                def props = analecCorpus.getStructure().getSchemaProperties(schema_type);
64
                if (!props.contains(schema_property_name)) {
65
                        println "Schema $schema_type has no property named $schema_property_name"
66
                        return;
67
                }
68
        }
69
}
70

    
71
if (!analecCorpus.getStructure().getUnites().contains(unit_type)) {
72
        println "No unit with name=$unit_type"
73
        return;
74
} else {
75
        if (unit_property_name.length() > 0 && unit_property_value.length() > 0) {
76
                // test property existance
77
                def props = analecCorpus.getStructure().getUniteProperties(unit_type);
78
                if (!props.contains(unit_property_name)) {
79
                        println "Unit $unit_type has no property named $unit_property_name"
80
                        return;
81
                }
82
        }
83
}
84

    
85
def CQI = CQPSearchEngine.getCqiClient()
86

    
87
def prop = corpus.getProperty(word_property)
88
if (prop == null) {
89
        analecCorpus.getStructure()
90
}
91
def schemas = analecCorpus.getSchemas(schema_type)
92
def allFreqs = [:]
93
def n = 0
94
for (def schema : schemas) {
95
        n++
96
        
97
        def freqs = [:]
98
        
99
        if (schema_property_name.length() > 0 && schema_property_value.length() > 0) {
100
                if (!schema.getProp(schema_property_name).matches(schema_property_value)) {
101
                        // ignoring this schema
102
                        continue
103
                }
104
        }
105
        
106
        def allUnites = schema.getUnitesSousjacentesNonTriees()
107
        int nUnites = allUnites.size()
108
        if (nUnites < minimum_schema_size) continue;
109

    
110
        def units = []
111
        for (def unit : allUnites) { // no need to sort units
112
        
113
                if (unit_type.length() > 0) {
114
                        if (!unit.getType().equals(unit_type)) {
115
                                continue
116
                        }
117
                }
118
                
119
                if (unit_property_name.length() > 0 && unit_property_value.length() > 0) {
120
                        if (!unit.getProp(unit_property_name).matches(unit_property_value)) {
121
                                // ignoring this schema
122
                                continue
123
                        }
124
                }
125
                
126
                String forme =  null;
127
                if (prop == null) { // word_property is the analec unit property to use
128
                        forme = unit.getProp(word_property)
129
                } else {
130
                        int[] pos = null;
131
                        if (unit.getDeb() == unit.getFin()) pos = [unit.getDeb()]
132
                        else pos = unit.getDeb()..unit.getFin()
133
                                
134
                        forme = StringUtils.join(CQI.cpos2Str(prop.getQualifiedName(), pos), " ") // ids is enough
135
                }
136
                
137
                if (!freqs.containsKey(forme)) freqs[forme] = 0;
138
                freqs[forme] = freqs[forme] + 1;
139
                
140
                if (!allFreqs.containsKey(forme)) allFreqs[forme] = 0;
141
                allFreqs[forme] = allFreqs[forme] + 1;
142
        }
143
        
144
        if (schema_display_property_name != null) {
145
                println "index des natures de $unit_type de '"+schema.getProp(schema_display_property_name)+"' : "
146
        } else {
147
                println "index des natures de $schema_type - $n : "
148
        }
149
        
150
        int max = 0;
151
        def result = "";
152
        for (def forme : freqs.sort() { a, b -> -a.value <=> -b.value ?: a.key <=> b.key }) {
153
                println forme.key+"\t"+forme.value
154
        }
155
}
156

    
157
int max = 0;
158
def result = "";
159

    
160
println "index des natures de $schema_type : "
161
for (def forme : allFreqs.sort() { a, b -> -a.value <=> -b.value ?: a.key <=> b.key }) {
162
        println forme.key+"\t"+forme.value
163
        if (max < forme.value) {
164
                max = forme.value
165
                result = "$forme: "+forme.value
166
        }
167
}
168

    
169
return ["result":result, "data":allFreqs]