Statistics
| Revision:

root / tmp / org.txm.analec.rcp / src / org / txm / macro / urs / exploit / mesures1 / LongueurMoyenneMacro.groovy @ 1217

History | View | Annotate | Download (5.3 kB)

1
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author mdecorde
4
// @author sheiden
5

    
6
// STANDARD DECLARATIONS
7
package org.txm.macro.urs.exploit.mesures1
8

    
9
import org.kohsuke.args4j.*
10
import groovy.transform.Field
11
import org.txm.rcp.swt.widget.parameters.*
12
import org.txm.annotation.urs.*
13
import org.txm.searchengine.cqp.corpus.*
14
import org.txm.Toolbox
15
import org.txm.rcp.commands.*
16

    
17

    
18
// BEGINNING OF PARAMETERS
19
@Field @Option(name="schema_type",usage="", widget="String", required=true, def="CHAINE")
20
String schema_type
21

    
22
@Field @Option(name="minimum_schema_size",usage="", widget="Integer", required=true, def="3")
23
int minimum_schema_size
24

    
25
@Field @Option(name="schema_property_name",usage="", widget="String", required=false, def="")
26
String schema_property_name
27

    
28
@Field @Option(name="schema_property_value",usage="", widget="String", required=false, def=".*")
29
String schema_property_value
30

    
31
@Field @Option(name="unit_type",usage="", widget="String", required=false, def="MENTION")
32
String unit_type
33

    
34
@Field @Option(name="unit_property_name", usage="", widget="String", required=false, def="")
35
String unit_property_name
36

    
37
@Field @Option(name="unit_property_value", usage="", widget="String", required=false, def=".*")
38
String unit_property_value
39

    
40
if (!(corpusViewSelection instanceof MainCorpus)) {
41
        println "Corpora selection is not a Corpus"
42
        return;
43
}
44

    
45
if (!ParametersDialog.open(this)) return;
46
// END OF PARAMETERS
47

    
48
MainCorpus corpus = corpusViewSelection
49
def analecCorpus = URSCorpora.getCorpus(corpus)
50

    
51
// check Schema parameters
52
if (!analecCorpus.getStructure().getSchemas().contains(schema_type)) {
53
        println "No schema with name=$schema_type"
54
        return;
55
} else {
56
        if (schema_property_name.length() > 0 && schema_property_value.length() > 0) {
57
                // test property existance
58
                def props = analecCorpus.getStructure().getSchemaProperties(schema_type);
59
                if (!props.contains(schema_property_name)) {
60
                        println "Schema $schema_type has no property named $schema_property_name"
61
                        return;
62
                }
63
        }
64
}
65

    
66
// check unit parameters
67
if (!analecCorpus.getStructure().getUnites().contains(unit_type)) {
68
        println "No unit with name=$unit_type"
69
        return;
70
} else {
71
        if (unit_property_name.length() > 0 && unit_property_value.length() > 0) {
72
                // test property existance
73
                def props = analecCorpus.getStructure().getUniteProperties(unit_type);
74
                if (!props.contains(unit_property_name)) {
75
                        println "Unit $unit_type has no property named $unit_property_name"
76
                        return;
77
                }
78
        }
79
}
80

    
81
def schemas = analecCorpus.getSchemas(schema_type)
82

    
83
int nSchemas = 0;
84

    
85
def lens = [:]
86
def lensnames = [:]
87
for (def schema : schemas) {
88

    
89
        if (schema_property_name.length() > 0 && schema_property_value.length() > 0) {
90
                if (!schema.getProp(schema_property_name).matches(schema_property_value)) {
91
                        // ignoring this schema
92
                        continue
93
                }
94
        }
95
        
96
        int nUnites = 0;
97
        for (def unit : schema.getUnitesSousjacentesNonTriees()) {
98
                if (unit_type.length() > 0) {
99
                        if (!unit.getType().equals(unit_type)) {
100
                                continue
101
                        }
102
                }
103
                
104
                if (unit_property_name.length() > 0 && unit_property_value.length() > 0) {
105
                        if (!unit.getProp(unit_property_name).matches(unit_property_value)) {
106
                                // ignoring this schema
107
                                continue
108
                        }
109
                }
110
        
111
                nUnites++
112
        }
113
        
114
        if (nUnites < minimum_schema_size) continue;
115
        
116
        if (!lens.containsKey(nUnites)) {
117
                lens[nUnites] = 0;
118
                lensnames[nUnites] = [];
119
        }
120
        
121
        lens[nUnites] = lens[nUnites] + 1;
122
        lensnames[nUnites] << schema.getProp(schema_property_name)
123
        nSchemas++;
124
}
125

    
126
//println "nSchemas=$nSchemas"
127
def freqs = lens.keySet();
128
freqs.sort();
129
int t = 0;
130
int n = 0;
131
//println "Fréquences ("+freqs.size()+")"
132
for (def f : freqs) {
133
        t += f * lens[f]
134
        n += lens[f]
135
}
136

    
137
coef = (t/n)
138
def slens = lens.sort { a, b -> -a.value <=> -b.value ?: -a.key <=> -b.key }
139
def flens = []
140
slens.each { key, value -> value.times { flens << key } }
141
def nbins = flens.size()*2
142

    
143
def cfreq = 0
144
println "longueur moyenne des chaînes de référence : $t/$n = "+coef
145
println "index hiérarchique des longueurs de chaînes :\nlen\tfreq\tcfreq"
146
slens.each { println it.key+"        "+it.value+"        "+(cfreq+=it.value)+"        "+lensnames[it.key] }
147

    
148
def slens2 = slens.sort { a, b -> -a.key <=> -b.key }
149

    
150
def r = RWorkspace.getRWorkspaceInstance()
151

    
152
r.addVectorToWorkspace("len", slens2.keySet() as int[])
153
r.addVectorToWorkspace("freq", slens2.values() as int[])
154
r.addVectorToWorkspace("flen", flens as int[])
155

    
156
def corpusName = corpus.getID()
157

    
158
def PNGFile = File.createTempFile("txm", ".png", new File(Toolbox.getTxmHomePath(), "results"))
159
def PNGFilePath = PNGFile.getAbsolutePath()
160
println "PNG file: "+PNGFilePath
161

    
162
def SVGFile = File.createTempFile("txm", ".svg", new File(Toolbox.getTxmHomePath(), "results"))
163
def SVGFilePath = SVGFile.getAbsolutePath()
164
println "SVG file: "+SVGFilePath
165

    
166
/// BEGINNING OF R SCRIPT
167
def script ="""
168
hist(flen, xaxt='n', col="gray", xlab="Length", breaks=$nbins, main="$corpusName Longueur des chaînes ($nbins bins)")
169
axis(side=1, at=len)
170
dev.off()
171
"""
172
/// END OF R SCRIPT
173

    
174
// execute R script
175
r.plot(PNGFile, "png(file = \"${PNGFilePath}\"); "+script)
176
r.plot(SVGFile, "svg(file = \"${SVGFilePath}\"); "+script)
177

    
178
//display the SVG results graphic
179
monitor.syncExec(new Runnable() {
180
        @Override
181
        public void run() { OpenSVGGraph.OpenSVGFile(SVGFilePath, corpusName+" Longueur des chaînes") }
182
})
183

    
184
return ["result":coef, "data":lens]