Statistics
| Revision:

root / tmp / org.txm.core / src / groovy / org / txm / macroproto / partition / CreatePartitionByNumericMetadataMacro.groovy @ 306

History | View | Annotate | Download (3.3 kB)

1
// STANDARD DECLARATIONS
2
package org.txm.macroproto.partition
3

    
4
import groovy.transform.Field
5

    
6
import org.kohsuke.args4j.*
7
import org.txm.rcpapplication.swt.widget.parameters.*
8
import org.txm.rcpapplication.views.*
9
import org.txm.searchengine.cqp.corpus.*
10

    
11
//BEGINNING OF PARAMETERS
12
def corpus = corpusViewSelection
13
if (!(corpus instanceof Corpus)) {
14
        println "Error: this macro should be run with a Corpus selected"
15
        return
16
}
17

    
18
@Field @Option(name="property", usage="the structural Unit properties list separated with commas", widget="String", required=true, def="sadnesst1")
19
def property = "sadnesst1"
20

    
21
@Field @Option(name="values_file", usage="txt file", widget="File", required=true, def="C:\\Users\\Fanny\\Desktop\\values_num.txt")
22
def values_file
23

    
24
@Field @Option(name="balance", usage="the structural Unit properties list separated with commas", widget="Boolean", required=true, def="true")
25
def balance = false
26

    
27
@Field @Option(name="expand_struct", usage="expand_struct", widget="String", required=true, def="question")
28
def expand_struct = "question"
29

    
30
// Open the parameters input dialog box
31
if (!ParametersDialog.open(this)) return;
32

    
33
def property_values = values_file.getText("UTF-8").replaceAll("\n", ",").replaceAll("\r", "")
34

    
35
println "Balance de la métadonnée '$property' du corpus '$corpus'"
36

    
37
def domain = []
38
for (def s : property_values.split(",")) {
39
        s = s.trim()
40
        try {
41
                domain << Integer.parseInt(s)
42
        } catch(Exception e) {
43
                domain << Float.parseFloat(s)
44
        }
45
}
46
def uniq_domain = new HashSet(domain).sort() 
47
println "domain size "+domain.size()
48

    
49
def names = []
50
def queries = []
51
def p = 2
52
String values = ""
53

    
54
if (!balance) {
55

    
56
for (def d : uniq_domain) {
57
        if (d < p) {
58
                values += "|$d"
59
        } else {
60
                if (values.startsWith("|")) values = values.substring(1)
61
                queries <<  "[_.text_"+property+"=\"$values\"] expand to $expand_struct"
62
                names << values
63
                values = d
64
                p++
65
        }
66
}
67
queries <<  "[_.text_"+property+"=\"$values\"] expand to $expand_struct"
68
names << values
69

    
70

    
71
corpus.createPartition(property+"_non_balanced", queries, names);
72

    
73
} else {
74
//String v = domain.toString()
75
//v = "c("+v.substring(1, v.length() -1) + ")"
76
String v  = "c($property_values)"
77
println "=========================="
78
//RWorkspace.getRWorkspaceInstance().addVectorToWorkspace("domain", domain.toArray(new double[domain.size()]))
79
def res = RWorkspace.getRWorkspaceInstance().eval("quantile($v)")
80
def ranges = res.asDoubles()
81
println "quantiles : $ranges"
82
values = ""
83
p = ranges[1]
84
i = 1
85
names = []
86
queries = []
87
for (def d : uniq_domain) {
88
        //println "d=$d, p=$p"
89
        if (d < p) {
90
                values += "|$d"
91
        } else {
92
                if (values.startsWith("|")) values = values.substring(1)
93
                println """[_.text_${property}="$values"]"""
94
                queries <<  "[_.text_"+property+"=\"$values\"] expand to $expand_struct"
95
                names << values
96
                values = d
97
                i++
98
                p = ranges[i]
99
        }
100
}
101

    
102
queries <<  "[_.text_"+property+"=\"$values\"] expand to $expand_struct"
103
names << values
104

    
105
corpus.createPartition(property+"_balanced", queries, names);
106
}
107

    
108

    
109
println queries
110
println names
111

    
112

    
113
//display the graphic
114
monitor.syncExec(new Runnable() {
115
        @Override
116
        public void run() {        
117
        org.txm.rcpapplication.commands.RestartTXM.reloadViews();
118
        }
119
});
120

    
121
/*
122
int[] counts = new int[51]
123
for (def d : domain) {
124
        counts[(int)(10*d)]++
125
}
126
println counts
127
*/