Statistics
| Revision:

root / tmp / org.txm.groovy.core / src / groovy / org / txm / macroproto / partition / CreatePartitionByNumericMetadataMacro.groovy @ 479

History | View | Annotate | Download (3.4 kB)

1
// STANDARD DECLARATIONS
2
package org.txm.macroproto.partition
3

    
4
import org.kohsuke.args4j.*
5

    
6
import groovy.transform.Field
7

    
8
import org.txm.rcpapplication.swt.widget.parameters.*
9
import org.txm.searchengine.cqp.corpus.*
10
import org.txm.utils.logger.Log;
11
import org.txm.rcpapplication.views.*
12
import org.txm.statsengine.r.core.RWorkspace
13
import org.txm.Toolbox
14

    
15
//BEGINNING OF PARAMETERS
16
def corpus = corpusViewSelection
17
if (!(corpus instanceof Corpus)) {
18
        println "Error: this macro should be run with a Corpus selected"
19
        return
20
}
21

    
22
@Field @Option(name="property", usage="the structural Unit properties list separated with commas", widget="String", required=true, def="sadnesst1")
23
def property = "sadnesst1"
24

    
25
@Field @Option(name="values_file", usage="txt file", widget="File", required=true, def="C:\\Users\\Fanny\\Desktop\\values_num.txt")
26
def values_file
27

    
28
@Field @Option(name="balance", usage="the structural Unit properties list separated with commas", widget="Boolean", required=true, def="true")
29
def balance = false
30

    
31
@Field @Option(name="expand_struct", usage="expand_struct", widget="String", required=true, def="question")
32
def expand_struct = "question"
33

    
34
// Open the parameters input dialog box
35
if (!ParametersDialog.open(this)) return;
36

    
37
def property_values = values_file.getText("UTF-8").replaceAll("\n", ",").replaceAll("\r", "")
38

    
39
println "Balance de la métadonnée '$property' du corpus '$corpus'"
40

    
41
def domain = []
42
for (def s : property_values.split(",")) {
43
        s = s.trim()
44
        try {
45
                domain << Integer.parseInt(s)
46
        } catch(Exception e) {
47
                domain << Float.parseFloat(s)
48
        }
49
}
50
def uniq_domain = new HashSet(domain).sort() 
51
println "domain size "+domain.size()
52

    
53
def names = []
54
def queries = []
55
def p = 2
56
String values = ""
57

    
58
if (!balance) {
59

    
60
for (def d : uniq_domain) {
61
        if (d < p) {
62
                values += "|$d"
63
        } else {
64
                if (values.startsWith("|")) values = values.substring(1)
65
                queries <<  "[_.text_"+property+"=\"$values\"] expand to $expand_struct"
66
                names << values
67
                values = d
68
                p++
69
        }
70
}
71
queries <<  "[_.text_"+property+"=\"$values\"] expand to $expand_struct"
72
names << values
73

    
74

    
75
corpus.createPartition(property+"_non_balanced", queries, names);
76

    
77
} else {
78
//String v = domain.toString()
79
//v = "c("+v.substring(1, v.length() -1) + ")"
80
String v  = "c($property_values)"
81
println "=========================="
82
//RWorkspace.getRWorkspaceInstance().addVectorToWorkspace("domain", domain.toArray(new double[domain.size()]))
83
def res = RWorkspace.getRWorkspaceInstance().eval("quantile($v)")
84
def ranges = res.asDoubles()
85
println "quantiles : $ranges"
86
values = ""
87
p = ranges[1]
88
i = 1
89
names = []
90
queries = []
91
for (def d : uniq_domain) {
92
        //println "d=$d, p=$p"
93
        if (d < p) {
94
                values += "|$d"
95
        } else {
96
                if (values.startsWith("|")) values = values.substring(1)
97
                println """[_.text_${property}="$values"]"""
98
                queries <<  "[_.text_"+property+"=\"$values\"] expand to $expand_struct"
99
                names << values
100
                values = d
101
                i++
102
                p = ranges[i]
103
        }
104
}
105

    
106
queries <<  "[_.text_"+property+"=\"$values\"] expand to $expand_struct"
107
names << values
108

    
109
corpus.createPartition(property+"_balanced", queries, names);
110
}
111

    
112

    
113
println queries
114
println names
115

    
116

    
117
//display the graphic
118
monitor.syncExec(new Runnable() {
119
        @Override
120
        public void run() {        
121
        org.txm.rcpapplication.commands.RestartTXM.reloadViews();
122
        }
123
});
124

    
125
/*
126
int[] counts = new int[51]
127
for (def d : domain) {
128
        counts[(int)(10*d)]++
129
}
130
println counts
131
*/