root / tmp / org.txm.core / src / groovy / org / txm / macroproto / partition / CreatePartitionByNumericMetadataMacro.groovy @ 306
History | View | Annotate | Download (3.3 kB)
1 |
// STANDARD DECLARATIONS
|
---|---|
2 |
package org.txm.macroproto.partition
|
3 |
|
4 |
import groovy.transform.Field |
5 |
|
6 |
import org.kohsuke.args4j.* |
7 |
import org.txm.rcpapplication.swt.widget.parameters.* |
8 |
import org.txm.rcpapplication.views.* |
9 |
import org.txm.searchengine.cqp.corpus.* |
10 |
|
11 |
//BEGINNING OF PARAMETERS
|
12 |
def corpus = corpusViewSelection
|
13 |
if (!(corpus instanceof Corpus)) { |
14 |
println "Error: this macro should be run with a Corpus selected"
|
15 |
return
|
16 |
} |
17 |
|
18 |
@Field @Option(name="property", usage="the structural Unit properties list separated with commas", widget="String", required=true, def="sadnesst1") |
19 |
def property = "sadnesst1" |
20 |
|
21 |
@Field @Option(name="values_file", usage="txt file", widget="File", required=true, def="C:\\Users\\Fanny\\Desktop\\values_num.txt") |
22 |
def values_file
|
23 |
|
24 |
@Field @Option(name="balance", usage="the structural Unit properties list separated with commas", widget="Boolean", required=true, def="true") |
25 |
def balance = false |
26 |
|
27 |
@Field @Option(name="expand_struct", usage="expand_struct", widget="String", required=true, def="question") |
28 |
def expand_struct = "question" |
29 |
|
30 |
// Open the parameters input dialog box
|
31 |
if (!ParametersDialog.open(this)) return; |
32 |
|
33 |
def property_values = values_file.getText("UTF-8").replaceAll("\n", ",").replaceAll("\r", "") |
34 |
|
35 |
println "Balance de la métadonnée '$property' du corpus '$corpus'"
|
36 |
|
37 |
def domain = [] |
38 |
for (def s : property_values.split(",")) { |
39 |
s = s.trim() |
40 |
try {
|
41 |
domain << Integer.parseInt(s)
|
42 |
} catch(Exception e) { |
43 |
domain << Float.parseFloat(s)
|
44 |
} |
45 |
} |
46 |
def uniq_domain = new HashSet(domain).sort() |
47 |
println "domain size "+domain.size()
|
48 |
|
49 |
def names = [] |
50 |
def queries = [] |
51 |
def p = 2 |
52 |
String values = "" |
53 |
|
54 |
if (!balance) {
|
55 |
|
56 |
for (def d : uniq_domain) { |
57 |
if (d < p) {
|
58 |
values += "|$d"
|
59 |
} else {
|
60 |
if (values.startsWith("|")) values = values.substring(1) |
61 |
queries << "[_.text_"+property+"=\"$values\"] expand to $expand_struct" |
62 |
names << values |
63 |
values = d |
64 |
p++ |
65 |
} |
66 |
} |
67 |
queries << "[_.text_"+property+"=\"$values\"] expand to $expand_struct" |
68 |
names << values |
69 |
|
70 |
|
71 |
corpus.createPartition(property+"_non_balanced", queries, names);
|
72 |
|
73 |
} else {
|
74 |
//String v = domain.toString()
|
75 |
//v = "c("+v.substring(1, v.length() -1) + ")"
|
76 |
String v = "c($property_values)" |
77 |
println "=========================="
|
78 |
//RWorkspace.getRWorkspaceInstance().addVectorToWorkspace("domain", domain.toArray(new double[domain.size()]))
|
79 |
def res = RWorkspace.getRWorkspaceInstance().eval("quantile($v)") |
80 |
def ranges = res.asDoubles()
|
81 |
println "quantiles : $ranges"
|
82 |
values = ""
|
83 |
p = ranges[1]
|
84 |
i = 1
|
85 |
names = []
|
86 |
queries = []
|
87 |
for (def d : uniq_domain) { |
88 |
//println "d=$d, p=$p"
|
89 |
if (d < p) {
|
90 |
values += "|$d"
|
91 |
} else {
|
92 |
if (values.startsWith("|")) values = values.substring(1) |
93 |
println """[_.text_${property}="$values"]"""
|
94 |
queries << "[_.text_"+property+"=\"$values\"] expand to $expand_struct" |
95 |
names << values |
96 |
values = d |
97 |
i++ |
98 |
p = ranges[i] |
99 |
} |
100 |
} |
101 |
|
102 |
queries << "[_.text_"+property+"=\"$values\"] expand to $expand_struct" |
103 |
names << values |
104 |
|
105 |
corpus.createPartition(property+"_balanced", queries, names);
|
106 |
} |
107 |
|
108 |
|
109 |
println queries |
110 |
println names |
111 |
|
112 |
|
113 |
//display the graphic
|
114 |
monitor.syncExec(new Runnable() { |
115 |
@Override
|
116 |
public void run() { |
117 |
org.txm.rcpapplication.commands.RestartTXM.reloadViews(); |
118 |
} |
119 |
}); |
120 |
|
121 |
/*
|
122 |
int[] counts = new int[51]
|
123 |
for (def d : domain) {
|
124 |
counts[(int)(10*d)]++
|
125 |
}
|
126 |
println counts
|
127 |
*/
|