root / tmp / org.txm.analec.rcp / src / org / txm / macro / urs / exploit / UnitsSummaryMacro.groovy @ 2167
History | View | Annotate | Download (4 kB)
1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
|
---|---|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
|
3 |
// @author mdecorde
|
4 |
// @author sheiden
|
5 |
// STANDARD DECLARATIONS
|
6 |
package org.txm.macro.urs.exploit
|
7 |
|
8 |
import org.apache.commons.lang.StringUtils |
9 |
import org.kohsuke.args4j.* |
10 |
|
11 |
import groovy.transform.Field |
12 |
|
13 |
import org.txm.Toolbox |
14 |
import org.txm.rcp.swt.widget.parameters.* |
15 |
import org.txm.annotation.urs.* |
16 |
import org.txm.macro.urs.AnalecUtils |
17 |
import org.txm.searchengine.cqp.AbstractCqiClient |
18 |
import org.txm.searchengine.cqp.corpus.* |
19 |
import org.txm.searchengine.cqp.CQPSearchEngine |
20 |
|
21 |
import visuAnalec.donnees.Structure |
22 |
import visuAnalec.elements.* |
23 |
|
24 |
def scriptName = this.class.getSimpleName() |
25 |
|
26 |
def selection = [] |
27 |
for (def s : corpusViewSelections) { |
28 |
if (s instanceof CQPCorpus) selection << s |
29 |
else if (s instanceof Partition) selection.addAll(s.getParts()) |
30 |
} |
31 |
|
32 |
if (selection.size() == 0) { |
33 |
println "** $scriptName: please select a Corpus or a Partition in the Corpus view: "+corpusViewSelections
|
34 |
return false |
35 |
} else {
|
36 |
for (def c : selection) c.compute(false) |
37 |
} |
38 |
|
39 |
// BEGINNING OF PARAMETERS
|
40 |
@Field @Option(name="schema_ursql", usage="TYPE@PROP=VALUE", widget="String", required=true, def="CHAINE") |
41 |
String schema_ursql
|
42 |
@Field @Option(name="minimum_schema_size", usage="Minimum size needed to consider a schema", widget="Integer", required=true, def="3") |
43 |
int minimum_schema_size
|
44 |
@Field @Option(name="maximum_schema_size", usage="Maximum size needed to consider a schema", widget="Integer", required=true, def="9999999") |
45 |
int maximum_schema_size
|
46 |
@Field @Option(name="unit_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="MENTION") |
47 |
String unit_ursql
|
48 |
@Field @Option(name="position_in_schema", usage="Unit distance in schema (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0") |
49 |
int position_in_schema
|
50 |
@Field @Option(name="cql_limit", usage="CQL to build structure limits", widget="Query", required=false, def="") |
51 |
cql_limit |
52 |
@Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=false, def="true") |
53 |
boolean strict_inclusion
|
54 |
@Field @Option(name="position_in_matches", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0") |
55 |
int position_in_matches
|
56 |
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF ON ALL REALLY ALL", required=true, def="OFF") |
57 |
debug |
58 |
if (!ParametersDialog.open(this)) return |
59 |
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3 |
60 |
|
61 |
def CQI = CQPSearchEngine.getCqiClient()
|
62 |
|
63 |
def allresults = new LinkedHashMap() |
64 |
def props = new HashSet() |
65 |
for (def corpus : selection) { |
66 |
|
67 |
def word = corpus.getWordProperty()
|
68 |
def analecCorpus = URSCorpora.getCorpus(corpus)
|
69 |
for (def type : analecCorpus.getStructure().getUnites()) |
70 |
props.addAll(analecCorpus.getStructure().getUniteProperties(type)); |
71 |
|
72 |
def selectedUnits = AnalecUtils.selectUnitsInSchema(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, maximum_schema_size,
|
73 |
unit_ursql, position_in_schema, cql_limit, strict_inclusion, position_in_matches); |
74 |
|
75 |
if (debug) println "found ${selectedUnits.size()} units in $corpus}" |
76 |
allresults[corpus] = selectedUnits; |
77 |
} |
78 |
|
79 |
// tabulate summary
|
80 |
def keys = allresults.keySet().sort();
|
81 |
println "Statistiques des unités de "+corpusViewSelections.join(",") |
82 |
println "\t"+keys.join("\t") |
83 |
print "Units"
|
84 |
for (def k : keys) { |
85 |
print "\t"+allresults[k].size();
|
86 |
} |
87 |
println ""
|
88 |
for (def p : props) { |
89 |
print "$p"
|
90 |
for (def k : keys) { |
91 |
def propsvalues = new HashSet() |
92 |
for (def u : allresults[k]) { |
93 |
propsvalues.add(u.getProp(p)) |
94 |
} |
95 |
print "\t"+propsvalues.size();
|
96 |
def values = [] |
97 |
values.addAll(propsvalues) |
98 |
values = values.sort() |
99 |
if (debug) print "\t\""+values.join("\", \"")+"\""; |
100 |
} |
101 |
println ""
|
102 |
} |
103 |
|