Statistics
| Revision:

root / tmp / org.txm.analec.rcp / src / org / txm / macro / analec / misc / UnitsCorrelationMacro.groovy @ 875

History | View | Annotate | Download (7.3 kB)

1
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author mdecorde
4
// @author sheiden
5
// STANDARD DECLARATIONS
6
package org.txm.macro.analec.misc
7

    
8
import org.apache.commons.lang.StringUtils
9
import org.txm.rcp.views.corpora.CorporaView
10
import groovy.transform.Field
11

    
12
import org.kohsuke.args4j.*
13
import org.txm.Toolbox
14
import org.txm.analec.*
15
import org.txm.lexicaltable.core.statsengine.r.data.LexicalTableImpl
16
import org.txm.macro.analec.*
17
import org.txm.rcp.commands.*
18
import org.txm.rcp.swt.widget.parameters.*
19
import org.txm.searchengine.cqp.corpus.*
20
import org.txm.searchengine.cqp.corpus.query.Query;
21

    
22
import visuAnalec.donnees.*
23
import visuAnalec.elements.*
24
import cern.colt.matrix.DoubleFactory2D
25
import cern.colt.matrix.DoubleMatrix2D
26

    
27
def scriptName = this.class.getSimpleName()
28
def parent
29
def selection = []
30
if (!(corpusViewSelection instanceof Corpus)) {
31
        println "** $scriptName please select a Corpus to run the macro"
32
}
33

    
34

    
35
// BEGINNING OF PARAMETERS
36
@Field @Option(name="schema_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="CHAINE")
37
                String schema_ursql
38
@Field @Option(name="minimum_schema_size", usage="Minimum size needed to consider a schema", widget="Integer", required=true, def="3")
39
                int minimum_schema_size
40
@Field @Option(name="maximum_schema_size", usage="Maximum size needed to consider a schema", widget="Integer", required=true, def="9999999")
41
                int maximum_schema_size
42
@Field @Option(name="unit_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="MENTION")
43
                String unit_ursql
44
@Field @Option(name="limit_distance_in_schema", usage="Unit distance in schema (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
45
                int limit_distance_in_schema
46
@Field @Option(name="limit_cql", usage="CQL to build structure limits", widget="Query", required=true, def="<div> [] expand to div")
47
                limit_cql
48
@Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=true, def="true")
49
                boolean strict_inclusion
50
@Field @Option(name="limit_distance", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
51
                int limit_distance
52
@Field @Option(name="unit_prop1", usage="PROP1", widget="String", required=false, def="PROP1")
53
                String unit_prop1
54
@Field @Option(name="unit_prop2", usage="PROP2", widget="String", required=false, def="PROP2")
55
                String unit_prop2
56
@Field @Option(name="corr_method", usage="try them all", widget="StringArray", metaVar="pearson        spearman        kendall", required=false, def="pearson")
57
                String corr_method
58
@Field @Option(name="corr_style", usage="try them all", widget="StringArray", metaVar="circle        square        ellipse        number        shade        color        pie", required=false, def="number")
59
String corr_style
60
@Field @Option(name="corr_layout", usage="try them all", widget="StringArray", metaVar="full        lower        upper", required=false, def="upper")
61
String corr_layout
62
@Field @Option(name="corr_order", usage="try them all", widget="StringArray", metaVar="AOE        FPC        hclust        alphabet", required=false, def="hclust")
63
String corr_order
64
@Field @Option(name="output_lexicaltable", usage="create or not a lexical table with the result", widget="Boolean", required=true, def="false")
65
                output_lexicaltable
66
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF        ON        ALL        REALLY ALL", required=true, def="OFF")
67
                debug
68
if (!ParametersDialog.open(this)) return
69
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3
70

    
71

    
72
def CQI = CQPSearchEngine.getCqiClient()
73

    
74
def correlations = [:]
75
def values1 = new HashSet()
76
def values2 = new HashSet()
77
def corpus = corpusViewSelection
78
        
79
        mainCorpus = corpus.getMainCorpus()
80

    
81
        def word = mainCorpus.getWordProperty()
82
        def analecCorpus = AnalecCorpora.getCorpus(mainCorpus.getName())
83

    
84

    
85

    
86
        def selectedUnits = AnalecUtils.selectUnitsInSchema(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, maximum_schema_size, 
87
        unit_ursql, limit_distance_in_schema, limit_cql, strict_inclusion, limit_distance);
88

    
89
for (def unit : selectedUnits) {
90
        def value1 = unit.getProp(unit_prop1)
91
        if (value1 == null) value1 = "<null>"
92
        if (value1.length() == 0) value1 = "<empty>"
93
        def value2 = unit.getProp(unit_prop2)
94
        if (value2 == null) value2 = "<null>"
95
        if (value2.length() == 0) value2 = "<empty>"
96
        
97
        values1 << value1
98
        values2 << value2
99
        
100
        if (!correlations.containsKey(value1)) correlations[value1] = [:]
101
        def line = correlations[value1]
102
        if (!line.containsKey(value2)) line[value2] = 0
103
        line[value2] += 1
104
}
105

    
106
def matrix = new int[values1.size()][values2.size()];
107
println "\t"+values2.join("\t")
108
int i = 0;
109
for (def value1 : values1) {
110
        print value1
111
        int j = 0;
112
        for (def value2 : values2) {
113
        if (correlations[value1][value2] == null) correlations[value1][value2] = 0;
114
                print "\t"+correlations[value1][value2]
115
                
116
                matrix[i][j] = correlations[value1][value2]
117
                j++
118
        }
119
        println ""
120
        i++
121
}
122

    
123
def r = RWorkspace.getRWorkspaceInstance()
124
r.addVectorToWorkspace("corrlines", values1 as String[])
125
r.addVectorToWorkspace("corrcols", values2 as String[])
126
r.addMatrixToWorkspace("corrmatrix", matrix)
127
r.eval("rownames(corrmatrix) = corrlines")
128
r.eval("colnames(corrmatrix) = corrcols")
129

    
130
def resultsDir = new File(Toolbox.getTxmHomePath(), "results")
131
resultsDir.mkdirs()
132
file = File.createTempFile("txm_corr_pairs_", ".svg", resultsDir)
133

    
134

    
135

    
136
def title = "${corpus.getMainCorpus()}.${corpus}\n${unit_ursql}"
137
if (limit_distance > 1) title += "[${limit_distance}]."
138
if (limit_cql != null && !limit_cql.getQueryString().equals("\"\"")) title += "\n(${limit_cql} limits)"
139
title += "\t P1=$unit_prop1 P2=unit_prop2"
140

    
141
def plotScript = """
142

143
r1 = cor(corrmatrix, use="complete.obs", method="$corr_method");
144
r2 = cov(corrmatrix, use="complete.obs") ;
145

146
library(corrplot)
147
corrplot(r1, type="$corr_layout", order="$corr_order", method="$corr_style")
148
"""
149

    
150

    
151
// execute R script
152
if (!output_lexicaltable) {
153
        r.plot(file, plotScript)
154
}
155
title = "$unit_prop1 $corr_method correlations"
156

    
157

    
158
def lt = null;
159
if (output_lexicaltable) {
160
        mFactory = DoubleFactory2D.dense
161
        dmatrix = mFactory.make(values1.size(), values2.size())
162
        for (int ii = 0 ; ii < values1.size() ; ii++) {
163
                for (int jj = 0 ; jj < values2.size() ; jj++) {
164
                        dmatrix.set(ii, jj, matrix[ii][jj])
165
                }
166
        }
167
        if (corpusViewSelection instanceof Partition) {
168
                lt = new LexicalTableImpl(dmatrix, corpusViewSelection, corpusViewSelection.getCorpus().getProperty("word"),
169
                                values1 as String[], values2 as String[])
170
                lt.setCorpus(corpusViewSelection.getCorpus());
171
                corpusViewSelection.storeResult(lt)
172
        } else {
173
                lt = new LexicalTableImpl(dmatrix, corpus.getProperty("word"),
174
                                values1 as String[], values2 as String[])
175
                lt.setCorpus(corpus);
176
                corpus.storeResult(lt)
177
        }
178
}
179

    
180

    
181

    
182

    
183
        monitor.syncExec(new Runnable() {
184
                                @Override
185
                                public void run() { try {
186
                                
187
        
188
                                                
189
                                                if (UnitsCorrelationMacro.this.output_lexicaltable) {
190
                                                        CorporaView.refreshObject(corpus)
191
                                                        CorporaView.expand(lt)
192
                                                } else {
193
                                                        OpenSVGGraph.OpenSVGFile(UnitsCorrelationMacro.this.file.getAbsolutePath(), "Correlations Units")
194
                                                }
195
                                        } catch (e) { e.printStackTrace() }}
196
                        })
197

    
198

    
199
return correlations