Statistics
| Revision:

root / tmp / org.txm.analec.rcp / src / org / txm / macro / urs / DensiteReferentielleMacro.groovy @ 1217

History | View | Annotate | Download (2.2 kB)

1
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author mdecorde
4
// @author sheiden
5
// STANDARD DECLARATIONS
6
package org.txm.macro.urs
7

    
8
import org.kohsuke.args4j.*
9
import groovy.transform.Field
10
import org.txm.rcp.swt.widget.parameters.*
11
import org.txm.annotation.urs.*
12
import org.txm.searchengine.cqp.corpus.*
13

    
14
if (!(corpusViewSelection instanceof MainCorpus)) {
15
        println "Corpora selection is not a Corpus"
16
        return;
17
}
18

    
19
@Field @Option(name="unit_type",usage="", widget="String", required=false, def="Maillon")
20
String unit_type
21

    
22
@Field @Option(name="unit_property_name", usage="", widget="String", required=false, def="")
23
String unit_property_name
24

    
25
@Field @Option(name="unit_property_value", usage="", widget="String", required=false, def=".*")
26
String unit_property_value
27

    
28
if (!ParametersDialog.open(this)) return;
29
// END OF PARAMETERS
30

    
31
MainCorpus corpus = corpusViewSelection
32
def analecCorpus = URSCorpora.getCorpus(corpus);
33

    
34
// check unit parameters
35
if (!analecCorpus.getStructure().getUnites().contains(unit_type)) {
36
        println "No unit with name=$unit_type"
37
        return;
38
} else {
39
        if (unit_property_name.length() > 0 && unit_property_value.length() > 0) {
40
                // test property existance
41
                def props = analecCorpus.getStructure().getUniteProperties(unit_type);
42
                if (!props.contains(unit_property_name)) {
43
                        println "Unit $unit_type has no property named $unit_property_name"
44
                        return;
45
                }
46
        }
47
}
48

    
49
int nMots = corpus.getSize();
50

    
51
int nUnites = 0;
52
        for (def unit : analecCorpus.getToutesUnites()) {
53
                if (unit_type.length() > 0) {
54
                        if (!unit.getType().equals(unit_type)) {
55
                                continue
56
                        }
57
                }
58
                
59
                if (unit_property_name.length() > 0 && unit_property_value.length() > 0) {
60
                        if (!unit.getProp(unit_property_name).matches(unit_property_value)) {
61
                                // ignoring this schema
62
                                continue
63
                        }
64
                }
65
        
66
                nUnites++
67
        }
68

    
69
coef = (nUnites /nMots)
70
println "densité référentielle : nUnites/nMots = $nUnites/$nMots = $coef = ${coef*100}%"
71
if (nUnites >= nMots) {
72
        println "WARNING: possible encoding error. Number of units ($nUnites) is greater than number of words ($nMots)"
73
}
74
return ["result":coef, "data":["nUnites":nUnites, "nMots":nMots]]