Statistics
| Revision:

root / tmp / org.txm.analec.rcp / src / org / txm / macro / urs / exploit / SchemaLengthsMacro.groovy @ 2105

History | View | Annotate | Download (5.2 kB)

1
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author mdecorde
4
// @author sheiden
5

    
6
// STANDARD DECLARATIONS
7
package org.txm.macro.urs.exploit
8

    
9
import org.kohsuke.args4j.*
10
import groovy.transform.Field
11
import org.txm.rcp.swt.widget.parameters.*
12
import org.txm.annotation.urs.*
13
import org.txm.macro.urs.AnalecUtils
14
import visuAnalec.elements.*
15
import org.txm.searchengine.cqp.corpus.*
16
import org.txm.Toolbox
17
import org.txm.rcp.commands.*
18
import org.txm.statsengine.r.core.RWorkspace
19

    
20
def selection = []
21
for (def s : corpusViewSelections) {
22
        if (s instanceof CQPCorpus) selection << s
23
        else if (s instanceof Partition) selection.addAll(s.getParts())
24
}
25

    
26
if (selection.size() == 0) {
27
        println "** $scriptName: please select a Corpus or a Partition in the Corpus view: "+corpusViewSelections
28
        return false
29
} else {
30
        for (def c : selection) c.compute(false)
31
}
32

    
33
@Field @Option(name="schema_ursql", usage="TYPE@PROP=REGEX", widget="String", required=true, def="CHAINE")
34
                String schema_ursql
35

    
36
@Field @Option(name="minimum_schema_size", usage="minimal schema size", widget="Integer", required=true, def="3")
37
                int minimum_schema_size
38
@Field @Option(name="maximum_schema_size", usage="Maximum size needed to consider a schema", widget="Integer", required=true, def="9999999")
39
                int maximum_schema_size
40
@Field @Option(name="schema_property_display", usage="schema property to show", widget="String", required=true, def="REF")
41
                String schema_property_display
42

    
43
@Field @Option(name="unit_ursql", usage="TYPE@PROP=REGEX", widget="String", required=false, def="MENTION")
44
                String unit_ursql
45
//@Field @Option(name="output_graph", usage="Show chart", widget="Boolean", required=true, def="false")
46
                output_graph = false
47
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF        ON        ALL        REALLY ALL", required=true, def="OFF")
48
                debug
49

    
50
if (!ParametersDialog.open(this)) return;
51
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3
52

    
53
for (def corpus : selection) {
54
        def analecCorpus = URSCorpora.getCorpus(corpus)
55

    
56
        def errors = AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql)
57
        if (errors.size() > 0) {
58
                println "** The $schema_ursql schema URSQL cannot be computed in the corpus with types: $errors."
59
                return;
60
        }
61

    
62
        errors=AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql)
63
        if (errors.size() > 0) {
64
                println "** $unit_ursql unit URSQL cannot be computed in the corpus with types: $errors."
65
                return;
66
        }
67

    
68
        def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, maximum_schema_size);
69

    
70
        int nSchemas = 0;
71

    
72
        def lens = [:]
73
        def lensnames = [:]
74
        for (def schema : schemas) {
75

    
76
                def allUnites = schema.getUnitesSousjacentesNonTriees()
77

    
78
                def units = AnalecUtils.filterElements(debug, allUnites, unit_ursql)
79

    
80
                int nUnites = units.size();
81

    
82
                if (!lens.containsKey(nUnites)) {
83
                        lens[nUnites] = 0;
84
                        lensnames[nUnites] = [];
85
                }
86

    
87
                lens[nUnites] = lens[nUnites] + 1;
88
                lensnames[nUnites] << schema.getProp(schema_property_display)
89
                nSchemas++;
90
        }
91

    
92
        //println "nSchemas=$nSchemas"
93
        def freqs = lens.keySet();
94
        freqs.sort();
95
        int t = 0;
96
        int n = 0;
97
        //println "Fréquences ("+freqs.size()+")"
98
        for (def f : freqs) {
99
                t += f * lens[f]
100
                n += lens[f]
101
        }
102

    
103
        coef = (t/n)
104
        def slens = lens.sort { a, b -> -a.value <=> -b.value ?: -a.key <=> -b.key }
105
        def flens = []
106
        slens.each { key, value -> value.times { flens << key } }
107
        def nbins = flens.size()*2
108

    
109
        def cfreq = 0
110
        println "Longueur moyenne des chaînes de référence : $t/$n = "+coef
111
        println "Index hiérarchique des longueurs de chaînes :\nlen\tfreq\tcfreq"
112
        slens.each { println it.key+"        "+it.value+"        "+(cfreq+=it.value)+"        "+lensnames[it.key] }
113

    
114
        def slens2 = slens.sort { a, b -> -a.key <=> -b.key }
115
        if (output_graph) {
116
                def r = RWorkspace.getRWorkspaceInstance()
117

    
118
                r.addVectorToWorkspace("len", slens2.keySet() as int[])
119
                r.addVectorToWorkspace("freq", slens2.values() as int[])
120
                r.addVectorToWorkspace("flen", flens as int[])
121

    
122
                def corpusName = corpus.getName()
123
                println "corpusName=$corpusName"
124
                def PNGFile = File.createTempFile("txm_"+corpus.getID()+"_", ".png", new File(Toolbox.getTxmHomePath(), "results"))
125
                def PNGFilePath = PNGFile.getAbsolutePath()
126
                println "PNG file: "+PNGFilePath
127

    
128
                def SVGFile = File.createTempFile("txm_"+corpus.getID()+"_", ".svg", new File(Toolbox.getTxmHomePath(), "results"))
129
                def SVGFilePath = SVGFile.getAbsolutePath()
130
                println "SVG file: "+SVGFilePath
131

    
132
                /// BEGINNING OF R SCRIPT
133
                def script ="""
134
hist(flen, xaxt='n', col="gray", xlab="Length", breaks=$nbins, main="$corpusName Longueur des chaînes ($nbins bins)")
135
axis(side=1, at=len)
136
dev.off()
137
"""
138
                /// END OF R SCRIPT
139

    
140
                // execute R script
141
                r.plot(PNGFile, "png(file = \"${PNGFilePath}\"); "+script)
142
                r.plot(SVGFile, "svg(file = \"${SVGFilePath}\"); "+script)
143

    
144
                //display the SVG results graphic
145
                monitor.syncExec(new Runnable() {
146
                                        @Override
147
                                        public void run() { OpenBrowser.openfile(SVGFile.getAbsolutePath(), corpusName+" Longueur des chaînes") }
148
                                })
149
        }
150
        //return ["result":coef, "data":lens]
151
}
152

    
153