Statistics
| Revision:

root / tmp / org.txm.analec.rcp / src / org / txm / macro / urs / exploit / mesures1 / MeanDistanceMacro.groovy @ 1962

History | View | Annotate | Download (4 kB)

1 671 mdecorde
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
2 671 mdecorde
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3 671 mdecorde
// @author mdecorde
4 671 mdecorde
// @author sheiden
5 671 mdecorde
6 671 mdecorde
// STANDARD DECLARATIONS
7 1217 mdecorde
package org.txm.macro.urs.exploit.mesures1
8 671 mdecorde
9 671 mdecorde
import org.kohsuke.args4j.*
10 671 mdecorde
import groovy.transform.Field
11 671 mdecorde
import org.txm.rcp.swt.widget.parameters.*
12 1217 mdecorde
import org.txm.annotation.urs.*
13 671 mdecorde
import org.txm.searchengine.cqp.corpus.*
14 671 mdecorde
import org.txm.Toolbox
15 671 mdecorde
import org.txm.rcp.commands.*
16 1962 mdecorde
import org.txm.macro.urs.AnalecUtils
17 671 mdecorde
18 1962 mdecorde
@Field @Option(name="schema_ursql", usage="TYPE@PROP=REGEX", widget="String", required=true, def="CHAINE")
19 1962 mdecorde
String schema_ursql
20 671 mdecorde
21 1962 mdecorde
@Field @Option(name="minimum_schema_size", usage="minimal schema size", widget="Integer", required=true, def="3")
22 671 mdecorde
int minimum_schema_size
23 671 mdecorde
24 1962 mdecorde
@Field @Option(name="schema_property_display", usage="schema property to show", widget="String", required=true, def="REF")
25 1962 mdecorde
String schema_property_display
26 671 mdecorde
27 1962 mdecorde
@Field @Option(name="unit_ursql", usage="TYPE@PROP=REGEX", widget="String", required=false, def="MENTION")
28 1962 mdecorde
String unit_ursql
29 671 mdecorde
30 1962 mdecorde
if (!(corpusViewSelection instanceof CQPCorpus)) {
31 671 mdecorde
        println "Corpora selection is not a Corpus"
32 671 mdecorde
        return;
33 671 mdecorde
}
34 671 mdecorde
35 671 mdecorde
if (!ParametersDialog.open(this)) return;
36 671 mdecorde
// END OF PARAMETERS
37 671 mdecorde
38 1962 mdecorde
def corpus = corpusViewSelection
39 1217 mdecorde
def analecCorpus = URSCorpora.getCorpus(corpus)
40 671 mdecorde
41 1962 mdecorde
if (!AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql)) {
42 1962 mdecorde
        println "** The $schema_ursql schema URSQL cannot be computed in the corpus."
43 671 mdecorde
        return;
44 671 mdecorde
}
45 671 mdecorde
46 1962 mdecorde
if (!AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql)) {
47 1962 mdecorde
        println "** $unit_ursql unit URSQL cannot be computed in the corpus."
48 671 mdecorde
        return;
49 671 mdecorde
}
50 671 mdecorde
51 1962 mdecorde
def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, 999999);
52 671 mdecorde
53 671 mdecorde
int nSchemas = 0;
54 671 mdecorde
55 671 mdecorde
def lens = [:]
56 671 mdecorde
def lensnames = [:]
57 671 mdecorde
for (def schema : schemas) {
58 671 mdecorde
59 1962 mdecorde
        def allUnites = schema.getUnitesSousjacentesNonTriees()
60 671 mdecorde
61 1962 mdecorde
        def units = AnalecUtils.filterElements(false, allUnites, unit_ursql)
62 671 mdecorde
63 1962 mdecorde
        int nUnites = units.size();
64 1962 mdecorde
65 671 mdecorde
        if (!lens.containsKey(nUnites)) {
66 671 mdecorde
                lens[nUnites] = 0;
67 671 mdecorde
                lensnames[nUnites] = [];
68 671 mdecorde
        }
69 671 mdecorde
70 671 mdecorde
        lens[nUnites] = lens[nUnites] + 1;
71 1962 mdecorde
        lensnames[nUnites] << schema.getProp(schema_property_display)
72 671 mdecorde
        nSchemas++;
73 671 mdecorde
}
74 671 mdecorde
75 671 mdecorde
//println "nSchemas=$nSchemas"
76 671 mdecorde
def freqs = lens.keySet();
77 671 mdecorde
freqs.sort();
78 671 mdecorde
int t = 0;
79 671 mdecorde
int n = 0;
80 671 mdecorde
//println "Fréquences ("+freqs.size()+")"
81 671 mdecorde
for (def f : freqs) {
82 671 mdecorde
        t += f * lens[f]
83 671 mdecorde
        n += lens[f]
84 671 mdecorde
}
85 671 mdecorde
86 671 mdecorde
coef = (t/n)
87 671 mdecorde
def slens = lens.sort { a, b -> -a.value <=> -b.value ?: -a.key <=> -b.key }
88 671 mdecorde
def flens = []
89 671 mdecorde
slens.each { key, value -> value.times { flens << key } }
90 671 mdecorde
def nbins = flens.size()*2
91 671 mdecorde
92 671 mdecorde
def cfreq = 0
93 1962 mdecorde
println "Longueur moyenne des chaînes de référence : $t/$n = "+coef
94 1962 mdecorde
println "Index hiérarchique des longueurs de chaînes :\nlen\tfreq\tcfreq"
95 671 mdecorde
slens.each { println it.key+"        "+it.value+"        "+(cfreq+=it.value)+"        "+lensnames[it.key] }
96 671 mdecorde
97 671 mdecorde
def slens2 = slens.sort { a, b -> -a.key <=> -b.key }
98 671 mdecorde
99 671 mdecorde
def r = RWorkspace.getRWorkspaceInstance()
100 671 mdecorde
101 671 mdecorde
r.addVectorToWorkspace("len", slens2.keySet() as int[])
102 671 mdecorde
r.addVectorToWorkspace("freq", slens2.values() as int[])
103 671 mdecorde
r.addVectorToWorkspace("flen", flens as int[])
104 671 mdecorde
105 1162 mdecorde
def corpusName = corpus.getID()
106 671 mdecorde
107 875 sjacqu01
def PNGFile = File.createTempFile("txm", ".png", new File(Toolbox.getTxmHomePath(), "results"))
108 671 mdecorde
def PNGFilePath = PNGFile.getAbsolutePath()
109 671 mdecorde
println "PNG file: "+PNGFilePath
110 671 mdecorde
111 875 sjacqu01
def SVGFile = File.createTempFile("txm", ".svg", new File(Toolbox.getTxmHomePath(), "results"))
112 671 mdecorde
def SVGFilePath = SVGFile.getAbsolutePath()
113 671 mdecorde
println "SVG file: "+SVGFilePath
114 671 mdecorde
115 671 mdecorde
/// BEGINNING OF R SCRIPT
116 671 mdecorde
def script ="""
117 671 mdecorde
hist(flen, xaxt='n', col="gray", xlab="Length", breaks=$nbins, main="$corpusName Longueur des chaînes ($nbins bins)")
118 671 mdecorde
axis(side=1, at=len)
119 671 mdecorde
dev.off()
120 671 mdecorde
"""
121 671 mdecorde
/// END OF R SCRIPT
122 671 mdecorde
123 671 mdecorde
// execute R script
124 671 mdecorde
r.plot(PNGFile, "png(file = \"${PNGFilePath}\"); "+script)
125 671 mdecorde
r.plot(SVGFile, "svg(file = \"${SVGFilePath}\"); "+script)
126 671 mdecorde
127 671 mdecorde
//display the SVG results graphic
128 671 mdecorde
monitor.syncExec(new Runnable() {
129 671 mdecorde
        @Override
130 671 mdecorde
        public void run() { OpenSVGGraph.OpenSVGFile(SVGFilePath, corpusName+" Longueur des chaînes") }
131 671 mdecorde
})
132 671 mdecorde
133 671 mdecorde
return ["result":coef, "data":lens]