Statistics
| Revision:

root / tmp / org.txm.analec.rcp / src / org / txm / macro / analec / exploit / mesures1 / NatureDuPremierMaillonMacro.groovy @ 1166

History | View | Annotate | Download (4.4 kB)

1
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author mdecorde
4
// @author sheiden
5
// STANDARD DECLARATIONS
6
package org.txm.macro.analec.exploit.mesures1
7

    
8
import org.kohsuke.args4j.*
9
import groovy.transform.Field
10
import org.txm.*
11
import org.txm.rcp.swt.widget.parameters.*
12
import org.txm.analec.*
13
import org.txm.searchengine.cqp.corpus.*
14
import org.apache.commons.lang.StringUtils;
15

    
16
// BEGINNING OF PARAMETERS
17
@Field @Option(name="schema_type",usage="", widget="String", required=true, def="CHAINE")
18
String schema_type
19

    
20
@Field @Option(name="minimum_schema_size", usage="", widget="Integer", required=true, def="3")
21
int minimum_schema_size
22

    
23
@Field @Option(name="schema_property_name",usage="", widget="String", required=false, def="")
24
String schema_property_name
25

    
26
@Field @Option(name="schema_property_value",usage="", widget="String", required=false, def=".*")
27
String schema_property_value
28

    
29
@Field @Option(name="unit_type",usage="", widget="String", required=false, def="MENTION")
30
String unit_type
31

    
32
@Field @Option(name="unit_property_name", usage="", widget="String", required=false, def="")
33
String unit_property_name
34

    
35
@Field @Option(name="unit_property_value", usage="", widget="String", required=false, def=".*")
36
String unit_property_value
37

    
38
@Field @Option(name="word_property", usage="", widget="String", required=false, def="CATEGORIE")
39
String word_property
40

    
41
if (!(corpusViewSelection instanceof MainCorpus)) {
42
        println "Corpora selection is not a Corpus"
43
        return;
44
}
45

    
46
// Open the parameters input dialog box
47
if (!ParametersDialog.open(this)) return;
48
// END OF PARAMETERS
49

    
50
MainCorpus corpus = corpusViewSelection
51
def analecCorpus = AnalecCorpora.getCorpus(corpus)
52

    
53
if (!analecCorpus.getStructure().getSchemas().contains(schema_type)) {
54
        println "No schema with name=$schema_type"
55
        return;
56
} else {
57
        if (schema_property_name.length() > 0 && schema_property_value.length() > 0) {
58
                // test property existance
59
                def props = analecCorpus.getStructure().getSchemaProperties(schema_type);
60
                if (!props.contains(schema_property_name)) {
61
                        println "Schema $schema_type has no property named $schema_property_name"
62
                        return;
63
                }
64
        }
65
}
66

    
67
if (!analecCorpus.getStructure().getUnites().contains(unit_type)) {
68
        println "No unit with name=$unit_type"
69
        return;
70
} else {
71
        if (unit_property_name.length() > 0 && unit_property_value.length() > 0) {
72
                // test property existance
73
                def props = analecCorpus.getStructure().getUniteProperties(unit_type);
74
                if (!props.contains(unit_property_name)) {
75
                        println "Unit $unit_type has no property named $unit_property_name"
76
                        return;
77
                }
78
        }
79
}
80

    
81
def CQI = CQPSearchEngine.getCqiClient()
82

    
83
def prop = corpus.getProperty(word_property)
84

    
85
def schemas = analecCorpus.getSchemas(schema_type)
86
def freqs = [:]
87

    
88
def distances = 0;
89
def nDistances = 0
90
for (def schema : schemas) {
91

    
92
        if (schema_property_name.length() > 0 && schema_property_value.length() > 0) {
93
                if (!schema.getProp(schema_property_name).matches(schema_property_value)) {
94
                        // ignoring this schema
95
                        continue
96
                }
97
        }
98
        
99
        def allUnites = schema.getUnitesSousjacentesNonTriees()
100
        int nUnites = allUnites.size()
101
        if (nUnites < minimum_schema_size) continue;
102

    
103
        def units = []
104
        for (def unit : allUnites) {
105
        
106
                if (unit_type.length() > 0) {
107
                        if (!unit.getType().equals(unit_type)) {
108
                                continue
109
                        }
110
                }
111
                
112
                if (unit_property_name.length() > 0 && unit_property_value.length() > 0) {
113
                        if (!unit.getProp(unit_property_name).matches(unit_property_value)) {
114
                                // ignoring this schema
115
                                continue
116
                        }
117
                }
118
                
119
                units << unit
120
        }
121
        
122
        units.sort() { u1, u2 ->
123
                return u1.getDeb() - u2.getDeb()
124
        }
125
        if (units.size() == 0) continue;
126
        
127
        def unit = units[0]
128
        
129
        String forme =  null;
130
        if (prop == null) { // word_property is the analec unit property to use
131
                forme = unit.getProp(word_property)
132
        } else {
133
                int[] pos = null;
134
                if (unit.getDeb() == unit.getFin()) pos = [unit.getDeb()]
135
                else pos = unit.getDeb()..unit.getFin()
136
                        
137
                forme = StringUtils.join(CQI.cpos2Str(prop.getQualifiedName(), pos), " ") // ids is enough
138
        }
139
        
140
        if (!freqs.containsKey(forme)) freqs[forme] = 0;
141
        
142
        freqs[forme] = freqs[forme] + 1;
143
}
144

    
145
println "index des natures de premier maillon :"
146
int max = 0;
147
def result = "";
148
for (def forme : freqs.keySet().sort() {it -> -freqs[it]}) {
149
        println "$forme\t"+freqs[forme]
150
        if (max < freqs[forme]) {
151
                max = freqs[forme]
152
                result = "$forme: "+freqs[forme]
153
        }
154
}
155

    
156
["result": result, "data": freqs]