Révision 3590
TXM/trunk/org.txm.groovy.core/src/groovy/org/txm/macro/cqp/StructIndexMacro.groovy (revision 3590) | ||
---|---|---|
1 |
// Copyright © 2023 ENS de Lyon |
|
2 |
// Licensed under the terms of the GNU General Public License version 3 (http://www.gnu.org/licenses/gpl-3.0.html) |
|
3 |
// @author sheiden |
|
4 |
|
|
5 |
// STANDARD DECLARATIONS |
|
6 |
package org.txm.macro.cqp |
|
7 |
|
|
8 |
import org.kohsuke.args4j.* |
|
9 |
import groovy.transform.Field |
|
10 |
import org.txm.rcp.swt.widget.parameters.* |
|
11 |
import org.txm.macro.cqp.CQPUtils |
|
12 |
import org.txm.searchengine.cqp.CQPSearchEngine |
|
13 |
|
|
14 |
scriptName = this.class.getSimpleName() |
|
15 |
|
|
16 |
// BEGINNING OF PARAMETERS |
|
17 |
|
|
18 |
@Field @Option(name="struct_props", usage="struct@prop1,prop2...", widget="String", required=true, def="") |
|
19 |
def struct_props |
|
20 |
|
|
21 |
@Field @Option(name="Vmax", usage="maximum number of values displayed", widget="Integer", required=true, def="100") |
|
22 |
def Vmax |
|
23 |
|
|
24 |
@Field @Option(name="groupByValue", usage="group results by value before counting", widget="Boolean", required=false, def="true") |
|
25 |
def groupByValue |
|
26 |
|
|
27 |
// Open the parameters input dialog box |
|
28 |
if (!ParametersDialog.open(this)) return |
|
29 |
|
|
30 |
// END OF PARAMETERS |
|
31 |
|
|
32 |
utils = new CQPUtils() |
|
33 |
corpusEngine = CQPSearchEngine.getCqiClient() |
|
34 |
|
|
35 |
// check for a corpus selection |
|
36 |
corpora = utils.getCorpora(this) |
|
37 |
|
|
38 |
if ((corpora == null) || corpora.size() > 1) { |
|
39 |
println "** $scriptName: please select a corpus in the Corpus view or provide a corpus name. Aborting." |
|
40 |
return false |
|
41 |
} |
|
42 |
|
|
43 |
if ((struct_props == null) || struct_props.size() == 0) { |
|
44 |
println "** $scriptName: please set the 'struct_props' parameter. Aborting." |
|
45 |
return false |
|
46 |
} |
|
47 |
|
|
48 |
corpus = corpora[0].getMainCorpus().getName() |
|
49 |
|
|
50 |
globalVmax = Vmax |
|
51 |
|
|
52 |
(struct, prop) = struct_props.tokenize('@')*.trim() |
|
53 |
|
|
54 |
if ((prop == null) || prop.size() == 0) { |
|
55 |
println "** $scriptName: please set @properties in the 'struct_props' parameter." |
|
56 |
} else { |
|
57 |
|
|
58 |
if (prop.indexOf(',') == -1) { |
|
59 |
prop = [prop] |
|
60 |
} else { |
|
61 |
prop = prop.tokenize(',')*.trim() |
|
62 |
} |
|
63 |
|
|
64 |
occs = (0..corpusEngine.attributeSize("$corpus.${struct}_${prop[0]}")-1).collect { cpos -> |
|
65 |
prop.collect { prop -> |
|
66 |
corpusEngine.struc2Str("$corpus.${struct}_$prop", [cpos] as int[])[0] |
|
67 |
}.join('_') |
|
68 |
} |
|
69 |
|
|
70 |
if (groupByValue) { |
|
71 |
occs = occs.sort().countBy { it }.sort { a,b -> b.value <=> a.value ?: a.key <=> b.key } |
|
72 |
if (globalVmax == 0) { |
|
73 |
occs.each { println sprintf("%s\t%d", it.key, it.value) } |
|
74 |
} else { |
|
75 |
occs.take(globalVmax).each { println sprintf("%s\t%d", it.key, it.value) } |
|
76 |
} |
|
77 |
} else { |
|
78 |
if (globalVmax == 0) { |
|
79 |
occs.each { println it } |
|
80 |
} else { |
|
81 |
occs.take(globalVmax).each { println it } |
|
82 |
} |
|
83 |
} |
|
84 |
} |
|
85 |
|
TXM/trunk/org.txm.groovy.core/src/groovy/org/txm/macro/cqp/StructsIndexMacro.groovy (revision 3590) | ||
---|---|---|
1 |
// Copyright © 2023 ENS de Lyon |
|
2 |
// Licensed under the terms of the GNU General Public License version 3 (http://www.gnu.org/licenses/gpl-3.0.html) |
|
3 |
// @author sheiden |
|
4 |
|
|
5 |
// STANDARD DECLARATIONS |
|
6 |
package org.txm.macro.cqp |
|
7 |
|
|
8 |
import org.kohsuke.args4j.* |
|
9 |
import groovy.transform.Field |
|
10 |
import org.txm.rcp.swt.widget.parameters.* |
|
11 |
import org.txm.macro.cqp.CQPUtils |
|
12 |
import org.txm.searchengine.cqp.CQPSearchEngine |
|
13 |
|
|
14 |
scriptName = this. class .getSimpleName() |
|
15 |
|
|
16 |
// BEGINNING OF PARAMETERS |
|
17 |
|
|
18 |
@Field @Option(name = "structs_props", usage = "struct1@prop1_prop2,struct2@prop3_prop4...", widget |
|
19 |
= "String", required = true, def = "") |
|
20 |
def structs_props |
|
21 |
|
|
22 |
@Field @Option(name = "Vmax", usage = "maximum number of values displayed (use '0' to display all)", widget = "Integer", required = true, |
|
23 |
def = "0") |
|
24 |
def Vmax |
|
25 |
|
|
26 |
// Open the parameters input dialog box |
|
27 |
if (!ParametersDialog.open(this)) { |
|
28 |
return |
|
29 |
} |
|
30 |
|
|
31 |
// END OF PARAMETERS |
|
32 |
|
|
33 |
utils = new CQPUtils() |
|
34 |
corpusEngine = CQPSearchEngine.getCqiClient() |
|
35 |
|
|
36 |
// check for a corpus selection |
|
37 |
corpora = utils.getCorpora(this) |
|
38 |
|
|
39 |
if ((corpora == null) || corpora.size() > 1) { |
|
40 |
println "** $scriptName: please select a corpus in the Corpus view or provide a corpus name. Aborting." |
|
41 |
return false |
|
42 |
} |
|
43 |
|
|
44 |
// check for structs_props parameter |
|
45 |
if ((structs_props == null) || structs_props.size() == 0) { |
|
46 |
println "** $scriptName: please set the 'structs_props' parameter. Aborting." |
|
47 |
return false |
|
48 |
} |
|
49 |
|
|
50 |
corpus_name = corpora[0].getMainCorpus().getName() |
|
51 |
corpus_size = corpora[0].getMainCorpus().getSize() |
|
52 |
|
|
53 |
if (structs_props.indexOf(',') == -1) { |
|
54 |
struct_props = [structs_props] |
|
55 |
} else { |
|
56 |
struct_props = structs_props.tokenize(',')*.trim() |
|
57 |
} |
|
58 |
|
|
59 |
globalVmax = Vmax |
|
60 |
|
|
61 |
def printPropValues(start, end, struct_prop, output) { |
|
62 |
|
|
63 |
if (struct_prop.size() > 0) { |
|
64 |
|
|
65 |
curr_struct_prop = struct_prop[0] |
|
66 |
s_p = curr_struct_prop.tokenize('@')*.trim() |
|
67 |
def struct = s_p[0] |
|
68 |
def prop = s_p[1] |
|
69 |
|
|
70 |
if ((prop == null) || prop.size() == 0) { |
|
71 |
println "** $scriptName: please set @properties in the 'structs_props' parameter." |
|
72 |
} else { |
|
73 |
|
|
74 |
if (prop.indexOf('_') == -1) { |
|
75 |
prop = [prop] |
|
76 |
} else { |
|
77 |
prop = prop.tokenize('_')*.trim() |
|
78 |
} |
|
79 |
|
|
80 |
occs = (0..corpusEngine.attributeSize("$corpus_name.${struct}_${prop[0]}")-1).collect { |
|
81 |
idx -> curr_start = corpusEngine.struc2Cpos("$corpus_name.${struct}_${prop[0]}", idx as int)[0] |
|
82 |
curr_end = corpusEngine.struc2Cpos("$corpus_name.${struct}_${prop[0]}", idx as int)[1] |
|
83 |
if (curr_start >= start && curr_end <= end) { |
|
84 |
[ |
|
85 |
curr_start, |
|
86 |
curr_end, |
|
87 |
prop.collect { |
|
88 |
curr_prop -> corpusEngine.struc2Str("$corpus_name.${struct}_$curr_prop", [idx] as int [])[0] |
|
89 |
}.join('_') |
|
90 |
] |
|
91 |
} else null |
|
92 |
} |
|
93 |
def n = 0 |
|
94 |
occs.each { |
|
95 |
if (it != null && (globalVmax == 0 || n < globalVmax)) { |
|
96 |
(start, end, value) = it |
|
97 |
// println prefix+value // struct+"@"+prop[0]+"="+ |
|
98 |
if (output.length() > 0) { |
|
99 |
printPropValues(start, end, struct_prop.tail(), output + "\t" + value) |
|
100 |
} else { |
|
101 |
printPropValues(start, end, struct_prop.tail(), value) |
|
102 |
} |
|
103 |
n++ |
|
104 |
} |
|
105 |
} |
|
106 |
} |
|
107 |
} else println output |
|
108 |
} |
|
109 |
|
|
110 |
printPropValues(0, corpus_size, struct_props, "") |
|
111 |
|
|
112 |
|
Formats disponibles : Unified diff