|
1 |
// STANDARD DECLARATIONS
|
|
2 |
package org.txm.macro
|
|
3 |
|
|
4 |
import org.kohsuke.args4j.*
|
|
5 |
import groovy.transform.Field
|
|
6 |
import org.txm.rcpapplication.swt.widget.parameters.*
|
|
7 |
import org.txm.searchengine.cqp.clientExceptions.*
|
|
8 |
import org.txm.searchengine.cqp.corpus.*
|
|
9 |
import org.txm.searchengine.cqp.corpus.query.*
|
|
10 |
import org.apache.commons.lang.time.StopWatch
|
|
11 |
import java.util.Arrays
|
|
12 |
import org.jfree.chart.renderer.xy.*
|
|
13 |
import org.jfree.chart.renderer.*
|
|
14 |
import org.jfree.chart.plot.*
|
|
15 |
import org.jfree.data.xy.*
|
|
16 |
import org.jfree.chart.axis.*
|
|
17 |
import java.awt.*;
|
|
18 |
import java.awt.geom.*;
|
|
19 |
import org.jfree.chart.labels.*
|
|
20 |
|
|
21 |
import org.txm.ca.core.chartsengine.jfreechart.themes.highcharts.renderers.*
|
|
22 |
import org.txm.ca.rcp.editors.*
|
|
23 |
import org.txm.libs.office.ReadODS
|
|
24 |
import org.txm.ca.core.chartsengine.jfreechart.datasets.*
|
|
25 |
import org.jfree.chart.renderer.AbstractRenderer
|
|
26 |
|
|
27 |
import org.apache.commons.math3.stat.descriptive.*
|
|
28 |
|
|
29 |
println "editor: "+editor
|
|
30 |
|
|
31 |
if (!(editor instanceof CAEditor)) {
|
|
32 |
println "editor is not a CA editor: $editor, Run the macro with F12 when the editor is selected :-)"
|
|
33 |
return
|
|
34 |
}
|
|
35 |
|
|
36 |
def chartEditor = editor.getEditors()[0]
|
|
37 |
def chartComposite = chartEditor.getComposite()
|
|
38 |
|
|
39 |
// BEGINNING OF PARAMETERS
|
|
40 |
|
|
41 |
// Declare each parameter here
|
|
42 |
// (available widget types: Query, File, Folder, String, Text, Boolean, Integer, Float and Date)
|
|
43 |
|
|
44 |
//@Field @Option(name="query", usage="an example query", widget="Query", required=true, def='[pos="V.*"]')
|
|
45 |
//def query
|
|
46 |
|
|
47 |
//@Field @Option(name="file", usage="an example file", widget="File", required=true, def="C:/Temp/foo.txt")
|
|
48 |
//def file
|
|
49 |
|
|
50 |
//@Field @Option(name="folder", usage="an example folder", widget="Folder", required=false, def="C:/Temp")
|
|
51 |
//def folder
|
|
52 |
|
|
53 |
//@Field @Option(name="date", usage="an example date", widget="Date", required=false, def="1984-09-01")
|
|
54 |
//def date
|
|
55 |
|
|
56 |
//@Field @Option(name="integer", usage="an example integer", widget="Integer", required=false, def="42")
|
|
57 |
//def integer
|
|
58 |
|
|
59 |
@Field @Option(name="QMin", usage="min Q", widget="Float", required=true, def="0.01")
|
|
60 |
def QMin
|
|
61 |
|
|
62 |
@Field @Option(name="CTRXMin", usage="min CTR X", widget="Float", required=true, def="0.001")
|
|
63 |
def CTRXMin
|
|
64 |
|
|
65 |
@Field @Option(name="CTRYMin", usage="min CTR Y", widget="Float", required=true, def="0.001")
|
|
66 |
def CTRYMin
|
|
67 |
|
|
68 |
@Field @Option(name="stackX", usage="stack X", widget="Float", required=true, def="0.0")
|
|
69 |
def stackX
|
|
70 |
|
|
71 |
@Field @Option(name="stackY", usage="stack Y", widget="Float", required=true, def="0.0")
|
|
72 |
def stackY
|
|
73 |
|
|
74 |
//@Field @Option(name="string", usage="an example string", widget="String", required=false, def="hello world!")
|
|
75 |
//def string
|
|
76 |
|
|
77 |
@Field @Option(name="regexFilter", usage="row property form regex", widget="Text", required=false, def="")
|
|
78 |
def regexFilter
|
|
79 |
|
|
80 |
@Field @Option(name="debug", usage="debug (verbose) mode", widget="Boolean", required=true, def="true")
|
|
81 |
def debug
|
|
82 |
|
|
83 |
// La fenêtre de résultats
|
|
84 |
CAresultWindow = editor
|
|
85 |
|
|
86 |
// Le tableau de données d'aide à l'interprétation
|
|
87 |
// Les données de l'AFC sont manipulables par les méthodes documentées ici http://txm.sourceforge.net/javadoc/TXM/TBX/org/txm/stat/engine/r/function/CA.html
|
|
88 |
|
|
89 |
ica = CAresultWindow.getCA()
|
|
90 |
ca = ica.getCA()
|
|
91 |
|
|
92 |
rowNames = ica.getRowNames()
|
|
93 |
rowCos2 = ca.getRowCos2()
|
|
94 |
rowContrib = ca.getRowContrib()
|
|
95 |
|
|
96 |
colNames = ica.getColNames()
|
|
97 |
colCos2 = ca.getColCos2()
|
|
98 |
colContrib = ca.getColContrib()
|
|
99 |
|
|
100 |
F1 = ica.getFirstDimension()-1
|
|
101 |
F2 = ica.getSecondDimension()-1
|
|
102 |
|
|
103 |
println sprintf("F1 = %d, F2 = %d, %d initial rows, %d initial cols", F1+1, F2+1, rowNames.length, colNames.length)
|
|
104 |
|
|
105 |
def stats = { vector, factor ->
|
|
106 |
s = new DescriptiveStatistics()
|
|
107 |
(vector.length).times {
|
|
108 |
s.addValue(vector[it][factor]);
|
|
109 |
}
|
|
110 |
|
|
111 |
println "min quartile median third quartile max"
|
|
112 |
println sprintf("%8f %8f %8f %8f %8f", s.getMin(), s.getPercentile(25), s.getPercentile(50), s.getPercentile(75), s.getMax())
|
|
113 |
}
|
|
114 |
|
|
115 |
// rows
|
|
116 |
|
|
117 |
println "ROWS -----"
|
|
118 |
println "CTR"+(F1+1)
|
|
119 |
stats(rowContrib, F1)
|
|
120 |
|
|
121 |
println "CTR"+(F2+1)
|
|
122 |
stats(rowContrib, F2)
|
|
123 |
|
|
124 |
println "Cos² "+(F1+1)
|
|
125 |
stats(rowCos2, F1)
|
|
126 |
|
|
127 |
println "Cos² "+(F2+1)
|
|
128 |
stats(rowCos2, F2)
|
|
129 |
|
|
130 |
// cols
|
|
131 |
|
|
132 |
println "COLS -----"
|
|
133 |
println "CTR"+(F1+1)
|
|
134 |
stats(colContrib, F1)
|
|
135 |
|
|
136 |
println "CTR"+(F2+1)
|
|
137 |
stats(colContrib, F2)
|
|
138 |
|
|
139 |
println "Cos² "+(F1+1)
|
|
140 |
stats(colCos2, F1)
|
|
141 |
|
|
142 |
println "Cos² "+(F2+1)
|
|
143 |
stats(colCos2, F2)
|
|
144 |
|
|
145 |
|
|
146 |
def stats2 = { vector, factor1, factor2 ->
|
|
147 |
s = new DescriptiveStatistics()
|
|
148 |
(vector.length).times {
|
|
149 |
s.addValue(vector[it][factor1]+vector[it][factor2]);
|
|
150 |
}
|
|
151 |
|
|
152 |
println "min quartile median third quartile max"
|
|
153 |
println sprintf("%8f %8f %8f %8f %8f", s.getMin(), s.getPercentile(25), s.getPercentile(50), s.getPercentile(75), s.getMax())
|
|
154 |
}
|
|
155 |
|
|
156 |
println "Q"+(F1+1)+(F2+1)
|
|
157 |
stats2(rowCos2, F1, F2)
|
|
158 |
|
|
159 |
println "Q"+(F1+1)+(F2+1)
|
|
160 |
stats2(colCos2, F1, F2)
|
|
161 |
|
|
162 |
// Open the parameters input dialog box
|
|
163 |
if (!ParametersDialog.open(this)) return
|
|
164 |
|
|
165 |
// END OF PARAMETERS
|
|
166 |
|
|
167 |
|
|
168 |
filteredRows = []
|
|
169 |
|
|
170 |
println ""
|
|
171 |
|
|
172 |
if (debug) println "\nROWS -----"
|
|
173 |
|
|
174 |
if (regexFilter.length() > 0) {
|
|
175 |
nmatched = 0
|
|
176 |
rowNames.each { it ->
|
|
177 |
if (it ==~ regexFilter) {
|
|
178 |
if (debug && !filteredRows.contains(it)) {
|
|
179 |
if (nmatched > 0) print ", "
|
|
180 |
print it
|
|
181 |
}
|
|
182 |
filteredRows << it
|
|
183 |
nmatched++
|
|
184 |
}
|
|
185 |
}
|
|
186 |
|
|
187 |
if (debug && nmatched > 0) println ""
|
|
188 |
|
|
189 |
if (debug) {
|
|
190 |
println sprintf("\n%d rows filtered by regex", nmatched)
|
|
191 |
}
|
|
192 |
|
|
193 |
}
|
|
194 |
|
|
195 |
nQCTR = 0
|
|
196 |
rowNames.eachWithIndex { it, i ->
|
|
197 |
def cos2 = rowCos2[i]
|
|
198 |
if ((rowContrib[i][F1] < CTRXMin) && (rowContrib[i][F2] < CTRYMin) && (cos2[F1] + cos2[F2] < QMin)) {
|
|
199 |
if (debug && !filteredRows.contains(rowNames[i])) {
|
|
200 |
if (nQCTR > 0) print ", "
|
|
201 |
print rowNames[i]
|
|
202 |
}
|
|
203 |
filteredRows << rowNames[i]
|
|
204 |
nQCTR++
|
|
205 |
}
|
|
206 |
}
|
|
207 |
|
|
208 |
if (debug) {
|
|
209 |
println sprintf("\n%d rows filtered by CTR or Q"+(F1+1)+(F2+1), nQCTR)
|
|
210 |
}
|
|
211 |
|
|
212 |
// cols
|
|
213 |
|
|
214 |
filteredCols = []
|
|
215 |
|
|
216 |
println ""
|
|
217 |
|
|
218 |
if (debug) println "\nCOLS -----"
|
|
219 |
|
|
220 |
if (regexFilter.length() > 0) {
|
|
221 |
nmatched = 0
|
|
222 |
colNames.each { it ->
|
|
223 |
if (it ==~ regexFilter) {
|
|
224 |
if (debug && !filteredCols.contains(it)) {
|
|
225 |
if (nmatched > 0) print ", "
|
|
226 |
print it
|
|
227 |
}
|
|
228 |
filteredCols << it
|
|
229 |
nmatched++
|
|
230 |
}
|
|
231 |
}
|
|
232 |
|
|
233 |
if (debug && nmatched > 0) println ""
|
|
234 |
|
|
235 |
if (debug) {
|
|
236 |
println sprintf("\n%d cols filtered by regex", nmatched)
|
|
237 |
}
|
|
238 |
|
|
239 |
}
|
|
240 |
|
|
241 |
nQCTR = 0
|
|
242 |
colNames.eachWithIndex { it, i ->
|
|
243 |
def cos2 = colCos2[i]
|
|
244 |
if (!((colContrib[i][F1] > CTRXMin) || (colContrib[i][F2] > CTRYMin) || (cos2[F1] + cos2[F2] > QMin))) {
|
|
245 |
if (debug && !filteredCols.contains(colNames[i])) {
|
|
246 |
if (nQCTR > 0) print ", "
|
|
247 |
print colNames[i]
|
|
248 |
}
|
|
249 |
filteredCols << colNames[i]
|
|
250 |
nQCTR++
|
|
251 |
}
|
|
252 |
}
|
|
253 |
|
|
254 |
if (debug) {
|
|
255 |
println sprintf("\n%d cols filtered by CTR or Q"+(F1+1)+(F2+1), nQCTR)
|
|
256 |
}
|
|
257 |
|
|
258 |
double stackXv = stackX
|
|
259 |
double stackYv = stackY
|
|
260 |
|
|
261 |
|
|
262 |
// Visualisation graphique
|
|
263 |
|
|
264 |
chartCAresultWindow = CAresultWindow.getEditors()[0]
|
|
265 |
chartComposite = chartCAresultWindow.getComposite()
|
|
266 |
|
|
267 |
monitor.syncExec( new Runnable() {
|
|
268 |
public void run() {
|
|
269 |
|
|
270 |
println chartComposite
|
|
271 |
def chart = chartEditor.getChart();
|
|
272 |
|
|
273 |
dataset2 = new CAXYDataset(ica)
|
|
274 |
|
|
275 |
labels = dataset2.rowLabels
|
|
276 |
coords = dataset2.rowCoordinates
|
|
277 |
(labels.length).times {
|
|
278 |
if (filteredRows.contains(labels[it])) {
|
|
279 |
println "Moving "+labels[it]+" row to origin."
|
|
280 |
labels[it] = ""
|
|
281 |
coords[it][F1] = stackXv
|
|
282 |
coords[it][F2] = stackYv
|
|
283 |
}
|
|
284 |
}
|
|
285 |
labels = dataset2.columnLabels
|
|
286 |
coords = dataset2.columnCoordinates
|
|
287 |
(labels.length).times {
|
|
288 |
if (filteredCols.contains(labels[it])) {
|
|
289 |
println "Moving "+labels[it]+" col to origin."
|
|
290 |
labels[it] = ""
|
|
291 |
coords[it][F1] = stackXv
|
|
292 |
coords[it][F2] = stackYv
|
|
293 |
}
|
|
294 |
}
|
|
295 |
|
|
296 |
chart.getXYPlot().setDataset(dataset2)
|
|
297 |
|
|
298 |
ica.getChartCreator().getChartsEngine().getJFCTheme().apply(chart); // need to be call AFTER setRenderer() cause this method changes some renderering parameters
|
|
299 |
chartComposite.loadChart()
|
|
300 |
}
|
|
301 |
})
|