Statistics
| Revision:

root / tmp / org.txm.analec.rcp / src / org / txm / macro / urs / exploit / mesures1 / MeanDistanceMacro.groovy @ 1962

History | View | Annotate | Download (4 kB)

1
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author mdecorde
4
// @author sheiden
5

    
6
// STANDARD DECLARATIONS
7
package org.txm.macro.urs.exploit.mesures1
8

    
9
import org.kohsuke.args4j.*
10
import groovy.transform.Field
11
import org.txm.rcp.swt.widget.parameters.*
12
import org.txm.annotation.urs.*
13
import org.txm.searchengine.cqp.corpus.*
14
import org.txm.Toolbox
15
import org.txm.rcp.commands.*
16
import org.txm.macro.urs.AnalecUtils
17

    
18
@Field @Option(name="schema_ursql", usage="TYPE@PROP=REGEX", widget="String", required=true, def="CHAINE")
19
String schema_ursql
20

    
21
@Field @Option(name="minimum_schema_size", usage="minimal schema size", widget="Integer", required=true, def="3")
22
int minimum_schema_size
23

    
24
@Field @Option(name="schema_property_display", usage="schema property to show", widget="String", required=true, def="REF")
25
String schema_property_display
26

    
27
@Field @Option(name="unit_ursql", usage="TYPE@PROP=REGEX", widget="String", required=false, def="MENTION")
28
String unit_ursql
29

    
30
if (!(corpusViewSelection instanceof CQPCorpus)) {
31
        println "Corpora selection is not a Corpus"
32
        return;
33
}
34

    
35
if (!ParametersDialog.open(this)) return;
36
// END OF PARAMETERS
37

    
38
def corpus = corpusViewSelection
39
def analecCorpus = URSCorpora.getCorpus(corpus)
40

    
41
if (!AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql)) {
42
        println "** The $schema_ursql schema URSQL cannot be computed in the corpus."
43
        return;
44
}
45

    
46
if (!AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql)) {
47
        println "** $unit_ursql unit URSQL cannot be computed in the corpus."
48
        return;
49
}
50

    
51
def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, 999999);
52

    
53
int nSchemas = 0;
54

    
55
def lens = [:]
56
def lensnames = [:]
57
for (def schema : schemas) {
58

    
59
        def allUnites = schema.getUnitesSousjacentesNonTriees()
60
        
61
        def units = AnalecUtils.filterElements(false, allUnites, unit_ursql)
62
        
63
        int nUnites = units.size();
64
                
65
        if (!lens.containsKey(nUnites)) {
66
                lens[nUnites] = 0;
67
                lensnames[nUnites] = [];
68
        }
69
        
70
        lens[nUnites] = lens[nUnites] + 1;
71
        lensnames[nUnites] << schema.getProp(schema_property_display)
72
        nSchemas++;
73
}
74

    
75
//println "nSchemas=$nSchemas"
76
def freqs = lens.keySet();
77
freqs.sort();
78
int t = 0;
79
int n = 0;
80
//println "Fréquences ("+freqs.size()+")"
81
for (def f : freqs) {
82
        t += f * lens[f]
83
        n += lens[f]
84
}
85

    
86
coef = (t/n)
87
def slens = lens.sort { a, b -> -a.value <=> -b.value ?: -a.key <=> -b.key }
88
def flens = []
89
slens.each { key, value -> value.times { flens << key } }
90
def nbins = flens.size()*2
91

    
92
def cfreq = 0
93
println "Longueur moyenne des chaînes de référence : $t/$n = "+coef
94
println "Index hiérarchique des longueurs de chaînes :\nlen\tfreq\tcfreq"
95
slens.each { println it.key+"        "+it.value+"        "+(cfreq+=it.value)+"        "+lensnames[it.key] }
96

    
97
def slens2 = slens.sort { a, b -> -a.key <=> -b.key }
98

    
99
def r = RWorkspace.getRWorkspaceInstance()
100

    
101
r.addVectorToWorkspace("len", slens2.keySet() as int[])
102
r.addVectorToWorkspace("freq", slens2.values() as int[])
103
r.addVectorToWorkspace("flen", flens as int[])
104

    
105
def corpusName = corpus.getID()
106

    
107
def PNGFile = File.createTempFile("txm", ".png", new File(Toolbox.getTxmHomePath(), "results"))
108
def PNGFilePath = PNGFile.getAbsolutePath()
109
println "PNG file: "+PNGFilePath
110

    
111
def SVGFile = File.createTempFile("txm", ".svg", new File(Toolbox.getTxmHomePath(), "results"))
112
def SVGFilePath = SVGFile.getAbsolutePath()
113
println "SVG file: "+SVGFilePath
114

    
115
/// BEGINNING OF R SCRIPT
116
def script ="""
117
hist(flen, xaxt='n', col="gray", xlab="Length", breaks=$nbins, main="$corpusName Longueur des chaînes ($nbins bins)")
118
axis(side=1, at=len)
119
dev.off()
120
"""
121
/// END OF R SCRIPT
122

    
123
// execute R script
124
r.plot(PNGFile, "png(file = \"${PNGFilePath}\"); "+script)
125
r.plot(SVGFile, "svg(file = \"${SVGFilePath}\"); "+script)
126

    
127
//display the SVG results graphic
128
monitor.syncExec(new Runnable() {
129
        @Override
130
        public void run() { OpenSVGGraph.OpenSVGFile(SVGFilePath, corpusName+" Longueur des chaînes") }
131
})
132

    
133
return ["result":coef, "data":lens]