Statistics
| Revision:

root / tmp / org.txm.groovy.core / src / groovy / org / txm / macro / prototypes / stats / CAFilter.groovy @ 2929

History | View | Annotate | Download (7.5 kB)

1
// STANDARD DECLARATIONS
2
package org.txm.macro
3

    
4
import org.kohsuke.args4j.*
5
import groovy.transform.Field
6
import org.txm.rcpapplication.swt.widget.parameters.*
7
import org.txm.searchengine.cqp.clientExceptions.*
8
import org.txm.searchengine.cqp.corpus.*
9
import org.txm.searchengine.cqp.corpus.query.*
10
import org.apache.commons.lang.time.StopWatch
11
import java.util.Arrays
12
import org.jfree.chart.renderer.xy.*
13
import org.jfree.chart.renderer.*
14
import org.jfree.chart.plot.*
15
import org.jfree.data.xy.*
16
import org.jfree.chart.axis.*
17
import java.awt.*;
18
import java.awt.geom.*;
19
import org.jfree.chart.labels.*
20

    
21
import org.txm.ca.core.chartsengine.jfreechart.themes.highcharts.renderers.*
22
import org.txm.ca.rcp.editors.*
23
import org.txm.libs.office.ReadODS
24
import org.txm.ca.core.chartsengine.jfreechart.datasets.*
25
import org.jfree.chart.renderer.AbstractRenderer
26

    
27
import org.apache.commons.math3.stat.descriptive.*
28

    
29
println "editor: "+editor
30

    
31
if (!(editor instanceof CAEditor)) {
32
        println "editor is not a CA editor: $editor, Run the macro with F12 when the editor is selected :-)"
33
        return
34
}
35

    
36
def chartEditor = editor.getEditors()[0]
37
def chartComposite = chartEditor.getComposite()
38

    
39
// BEGINNING OF PARAMETERS
40

    
41
// Declare each parameter here
42
// (available widget types: Query, File, Folder, String, Text, Boolean, Integer, Float and Date)
43

    
44
//@Field @Option(name="query", usage="an example query", widget="Query", required=true, def='[pos="V.*"]')
45
//def query
46

    
47
//@Field @Option(name="file", usage="an example file", widget="File", required=true, def="C:/Temp/foo.txt")
48
//def file
49

    
50
//@Field @Option(name="folder", usage="an example folder", widget="Folder", required=false, def="C:/Temp")
51
//def folder
52

    
53
//@Field @Option(name="date", usage="an example date", widget="Date", required=false, def="1984-09-01")
54
//def date
55

    
56
//@Field @Option(name="integer", usage="an example integer", widget="Integer", required=false, def="42")
57
//def integer
58

    
59
@Field @Option(name="QMin", usage="min Q", widget="Float", required=true, def="0.01")
60
def QMin
61

    
62
@Field @Option(name="CTRXMin", usage="min CTR X", widget="Float", required=true, def="0.001")
63
def CTRXMin
64

    
65
@Field @Option(name="CTRYMin", usage="min CTR Y", widget="Float", required=true, def="0.001")
66
def CTRYMin
67

    
68
@Field @Option(name="stackX", usage="stack X", widget="Float", required=true, def="0.0")
69
def stackX
70

    
71
@Field @Option(name="stackY", usage="stack Y", widget="Float", required=true, def="0.0")
72
def stackY
73

    
74
//@Field @Option(name="string", usage="an example string", widget="String", required=false, def="hello world!")
75
//def string
76

    
77
@Field @Option(name="regexFilter", usage="row property form regex", widget="Text", required=false, def="")
78
def regexFilter
79

    
80
@Field @Option(name="debug", usage="debug (verbose) mode", widget="Boolean", required=true, def="true")
81
def debug
82

    
83
// La fenêtre de résultats
84
CAresultWindow = editor
85

    
86
// Le tableau de données d'aide à l'interprétation
87
// Les données de l'AFC sont manipulables par les méthodes documentées ici http://txm.sourceforge.net/javadoc/TXM/TBX/org/txm/stat/engine/r/function/CA.html
88

    
89
ica = CAresultWindow.getCA()
90
ca = ica.getCA()
91

    
92
rowNames = ica.getRowNames()
93
rowCos2 = ca.getRowCos2()
94
rowContrib = ca.getRowContrib()
95

    
96
colNames = ica.getColNames()
97
colCos2 = ca.getColCos2()
98
colContrib = ca.getColContrib()
99

    
100
F1 = ica.getFirstDimension()-1
101
F2 = ica.getSecondDimension()-1
102

    
103
println sprintf("F1 = %d, F2 = %d, %d initial rows, %d initial cols", F1+1, F2+1, rowNames.length, colNames.length)
104

    
105
def stats = { vector, factor ->
106
        s = new DescriptiveStatistics()
107
        (vector.length).times {
108
                s.addValue(vector[it][factor]);
109
        }
110

    
111
        println "min       quartile  median    third quartile  max"
112
        println sprintf("%8f  %8f  %8f  %8f        %8f", s.getMin(), s.getPercentile(25), s.getPercentile(50), s.getPercentile(75), s.getMax())
113
}
114

    
115
// rows
116

    
117
println "ROWS -----"
118
println "CTR"+(F1+1)
119
stats(rowContrib, F1)
120

    
121
println "CTR"+(F2+1)
122
stats(rowContrib, F2)
123

    
124
println "Cos² "+(F1+1)
125
stats(rowCos2, F1)
126

    
127
println "Cos² "+(F2+1)
128
stats(rowCos2, F2)
129

    
130
// cols
131

    
132
println "COLS -----"
133
println "CTR"+(F1+1)
134
stats(colContrib, F1)
135

    
136
println "CTR"+(F2+1)
137
stats(colContrib, F2)
138

    
139
println "Cos² "+(F1+1)
140
stats(colCos2, F1)
141

    
142
println "Cos² "+(F2+1)
143
stats(colCos2, F2)
144

    
145

    
146
def stats2 = { vector, factor1, factor2 ->
147
        s = new DescriptiveStatistics()
148
        (vector.length).times {
149
                s.addValue(vector[it][factor1]+vector[it][factor2]);
150
        }
151

    
152
        println "min       quartile  median    third quartile  max"
153
        println sprintf("%8f  %8f  %8f  %8f        %8f", s.getMin(), s.getPercentile(25), s.getPercentile(50), s.getPercentile(75), s.getMax())
154
}
155

    
156
println "Q"+(F1+1)+(F2+1)
157
stats2(rowCos2, F1, F2)
158

    
159
println "Q"+(F1+1)+(F2+1)
160
stats2(colCos2, F1, F2)
161

    
162
// Open the parameters input dialog box
163
if (!ParametersDialog.open(this)) return
164

    
165
// END OF PARAMETERS
166

    
167

    
168
filteredRows = []
169

    
170
println ""
171

    
172
if (debug) println "\nROWS -----"
173

    
174
if (regexFilter.length() > 0) {
175
        nmatched = 0
176
        rowNames.each { it ->
177
                if (it ==~ regexFilter) {
178
                        if (debug && !filteredRows.contains(it)) {
179
                                if (nmatched > 0) print ", "
180
                                print it
181
                        }
182
                        filteredRows << it
183
                        nmatched++
184
                }
185
        }
186

    
187
        if (debug && nmatched > 0) println ""
188

    
189
        if (debug) {
190
                println sprintf("\n%d rows filtered by regex", nmatched)
191
        }
192

    
193
}
194

    
195
nQCTR = 0
196
rowNames.eachWithIndex { it, i ->
197
        def cos2 = rowCos2[i]
198
        if ((rowContrib[i][F1] < CTRXMin) && (rowContrib[i][F2] < CTRYMin) && (cos2[F1] + cos2[F2] < QMin)) {
199
                if (debug && !filteredRows.contains(rowNames[i])) {
200
                        if (nQCTR > 0) print ", "
201
                        print rowNames[i]
202
                }
203
                filteredRows << rowNames[i]
204
                nQCTR++
205
        }
206
}
207

    
208
if (debug) {
209
        println sprintf("\n%d rows filtered by CTR or Q"+(F1+1)+(F2+1), nQCTR)
210
}
211

    
212
// cols
213

    
214
filteredCols = []
215

    
216
println ""
217

    
218
if (debug) println "\nCOLS -----"
219

    
220
if (regexFilter.length() > 0) {
221
        nmatched = 0
222
        colNames.each { it ->
223
                if (it ==~ regexFilter) {
224
                        if (debug && !filteredCols.contains(it)) {
225
                                if (nmatched > 0) print ", "
226
                                print it
227
                        }
228
                        filteredCols << it
229
                        nmatched++
230
                }
231
        }
232

    
233
        if (debug && nmatched > 0) println ""
234

    
235
        if (debug) {
236
                println sprintf("\n%d cols filtered by regex", nmatched)
237
        }
238

    
239
}
240

    
241
nQCTR = 0
242
colNames.eachWithIndex { it, i ->
243
        def cos2 = colCos2[i]
244
        if (!((colContrib[i][F1] > CTRXMin) || (colContrib[i][F2] > CTRYMin) || (cos2[F1] + cos2[F2] > QMin))) {
245
                if (debug && !filteredCols.contains(colNames[i])) {
246
                        if (nQCTR > 0) print ", "
247
                        print colNames[i]
248
                }
249
                filteredCols << colNames[i]
250
                nQCTR++
251
        }
252
}
253

    
254
if (debug) {
255
        println sprintf("\n%d cols filtered by CTR or Q"+(F1+1)+(F2+1), nQCTR)
256
}
257

    
258
double stackXv = stackX
259
double stackYv = stackY
260

    
261

    
262
// Visualisation graphique
263

    
264
chartCAresultWindow = CAresultWindow.getEditors()[0]
265
chartComposite = chartCAresultWindow.getComposite()
266

    
267
monitor.syncExec( new Runnable() {
268
        public void run() {
269

    
270
                                println chartComposite
271
                                def chart = chartEditor.getChart();
272

    
273
                                dataset2 = new CAXYDataset(ica)
274
                                
275
                                labels = dataset2.rowLabels
276
                                coords = dataset2.rowCoordinates
277
                                (labels.length).times {
278
                                        if (filteredRows.contains(labels[it])) {
279
                                                println "Moving "+labels[it]+" row to origin."
280
                                                labels[it] = ""
281
                                                coords[it][F1] = stackXv
282
                                                coords[it][F2] = stackYv
283
                                        }
284
                                }
285
                                labels = dataset2.columnLabels
286
                                coords = dataset2.columnCoordinates
287
                                (labels.length).times {
288
                                        if (filteredCols.contains(labels[it])) {
289
                                                println "Moving "+labels[it]+" col to origin."
290
                                                labels[it] = ""
291
                                                coords[it][F1] = stackXv
292
                                                coords[it][F2] = stackYv
293
                                        }
294
                                }
295

    
296
                                chart.getXYPlot().setDataset(dataset2)
297

    
298
                                ica.getChartCreator().getChartsEngine().getJFCTheme().apply(chart); // need to be call AFTER setRenderer() cause this method changes some renderering parameters
299
                                chartComposite.loadChart()
300
                        }
301
})