root / tmp / org.txm.analec.rcp / src / org / txm / macro / urs / exploit / UnitsListMacro.groovy @ 2144
History | View | Annotate | Download (6.5 kB)
1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
|
---|---|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
|
3 |
// @author mdecorde
|
4 |
// @author sheiden
|
5 |
// STANDARD DECLARATIONS
|
6 |
package org.txm.macro.urs.exploit
|
7 |
|
8 |
import org.apache.commons.lang.StringUtils |
9 |
import org.kohsuke.args4j.* |
10 |
|
11 |
import groovy.transform.Field |
12 |
|
13 |
import org.txm.Toolbox |
14 |
import org.txm.rcp.swt.widget.parameters.* |
15 |
import org.txm.annotation.urs.* |
16 |
import org.txm.concordance.core.functions.Concordance |
17 |
import org.txm.concordance.rcp.editors.ConcordanceEditor |
18 |
import org.txm.concordance.rcp.handlers.* |
19 |
import org.txm.macro.urs.AnalecUtils |
20 |
import org.txm.searchengine.cqp.AbstractCqiClient |
21 |
import org.txm.searchengine.cqp.ReferencePattern |
22 |
import org.txm.searchengine.cqp.corpus.* |
23 |
import org.txm.searchengine.cqp.corpus.query.CQLQuery |
24 |
import org.txm.searchengine.cqp.CQPSearchEngine |
25 |
|
26 |
import visuAnalec.donnees.Structure |
27 |
import visuAnalec.elements.* |
28 |
|
29 |
def scriptName = this.class.getSimpleName() |
30 |
|
31 |
def selection = [] |
32 |
for (def s : corpusViewSelections) { |
33 |
if (s instanceof CQPCorpus) selection << s |
34 |
else if (s instanceof Partition) selection.addAll(s.getParts()) |
35 |
} |
36 |
|
37 |
if (selection.size() == 0) { |
38 |
println "** $scriptName: please select a Corpus or a Partition in the Corpus view: "+corpusViewSelections
|
39 |
return false |
40 |
} else {
|
41 |
for (def c : selection) c.compute(false) |
42 |
} |
43 |
|
44 |
// BEGINNING OF PARAMETERS
|
45 |
@Field @Option(name="schema_ursql", usage="TYPE@PROP=VALUE", widget="String", required=true, def="CHAINE") |
46 |
String schema_ursql
|
47 |
@Field @Option(name="minimum_schema_size", usage="Minimum size needed to consider a schema", widget="Integer", required=true, def="3") |
48 |
int minimum_schema_size
|
49 |
@Field @Option(name="maximum_schema_size", usage="Maximum size needed to consider a schema", widget="Integer", required=true, def="9999999") |
50 |
int maximum_schema_size
|
51 |
@Field @Option(name="unit_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="MENTION") |
52 |
String unit_ursql
|
53 |
@Field @Option(name="position_in_schema", usage="Unit distance in schema (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0") |
54 |
int position_in_schema
|
55 |
@Field @Option(name="cql_limit", usage="CQL to build structure limits", widget="Query", required=false, def="") |
56 |
cql_limit |
57 |
@Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=true, def="true") |
58 |
boolean strict_inclusion
|
59 |
@Field @Option(name="position_in_matches", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0") |
60 |
int position_in_matches
|
61 |
@Field @Option(name="output_mode", usage="If selected units properties and words are shown", widget="StringArray", metaVar="FORMATED TABULATED COUNT CQL CONCORDANCE", required=true, def="FORMATED") |
62 |
output_mode |
63 |
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF ON ALL REALLY ALL", required=true, def="OFF") |
64 |
debug |
65 |
if (!ParametersDialog.open(this)) return; |
66 |
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3 |
67 |
|
68 |
|
69 |
def CQI = CQPSearchEngine.getCqiClient()
|
70 |
|
71 |
//corpus = corpusViewSelection
|
72 |
def allResults = [:]
|
73 |
for (def corpus : selection) { |
74 |
|
75 |
def word = corpus.getWordProperty()
|
76 |
def analecCorpus = URSCorpora.getCorpus(corpus)
|
77 |
|
78 |
def selectedUnits = AnalecUtils.selectUnitsInSchema(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, maximum_schema_size,
|
79 |
unit_ursql, position_in_schema, cql_limit, strict_inclusion, position_in_matches); |
80 |
|
81 |
def n = 1 |
82 |
|
83 |
allResults[corpus] = selectedUnits |
84 |
} |
85 |
|
86 |
if (output_mode == "FORMATED") { |
87 |
for (def corpus : allResults.keySet()) { |
88 |
def word = corpus.getWordProperty()
|
89 |
def selectedUnits = allResults[corpus]
|
90 |
println "$corpus units: "+selectedUnits.size()
|
91 |
def n = 1 |
92 |
for (def unit : selectedUnits) { |
93 |
int[] pos = null |
94 |
if (unit.getDeb() == unit.getFin()) pos = [unit.getDeb()]
|
95 |
else pos = (unit.getDeb()..unit.getFin())
|
96 |
def form = StringUtils.join(CQI.cpos2Str(word.getQualifiedName(), pos), " ") |
97 |
|
98 |
def props = unit.getProps().sort()
|
99 |
println sprintf("#%4d, %d-%d, \"%s\" %s", n, unit.getDeb(), unit.getFin(), form, props)
|
100 |
n++ |
101 |
} |
102 |
} |
103 |
} else if (output_mode == "TABULATED") { |
104 |
for (def corpus : allResults.keySet()) { |
105 |
def word = corpus.getWordProperty()
|
106 |
def selectedUnits = allResults[corpus]
|
107 |
println "$corpus units: "+selectedUnits.size()
|
108 |
def n = 1 |
109 |
for (def unit : selectedUnits) { |
110 |
int[] pos = null |
111 |
if (unit.getDeb() == unit.getFin()) pos = [unit.getDeb()]
|
112 |
else pos = (unit.getDeb()..unit.getFin())
|
113 |
def form = StringUtils.join(CQI.cpos2Str(word.getQualifiedName(), pos), " ") |
114 |
|
115 |
def props = unit.getProps().sort()
|
116 |
println sprintf("#%4d\t%d\t%d\t%s\t%s", n, unit.getDeb(), unit.getFin(), form, props)
|
117 |
n++ |
118 |
} |
119 |
} |
120 |
} else if (output_mode == "COUNT") { |
121 |
for (def corpus : allResults.keySet()) { |
122 |
def selectedUnits = allResults[corpus]
|
123 |
println "$corpus units: "+selectedUnits.size()
|
124 |
} |
125 |
} else if (output_mode == "CQL") { |
126 |
for (def corpus : allResults.keySet()) { |
127 |
def selectedUnits = allResults[corpus]
|
128 |
println "$corpus units: "+selectedUnits.size()
|
129 |
println AnalecUtils.getCQL(corpus.getID(), selectedUnits) |
130 |
} |
131 |
} else if (output_mode == "CONCORDANCE") { |
132 |
|
133 |
for (def corpus : allResults.keySet()) { |
134 |
def word = corpus.getProperty("word") |
135 |
def text_id = corpus.getStructuralUnit("text").getProperty("id") |
136 |
def selectedUnits = allResults[corpus]
|
137 |
def query = "" |
138 |
if (schema_ursql != null) query+= " $schema_ursql" |
139 |
if (unit_ursql != null) query+= " $unit_ursql" |
140 |
def ret = AnalecUtils.getStartsEndsTargetsArrays(selectedUnits)
|
141 |
def starts = ret[0] |
142 |
def ends = ret[1] |
143 |
def targets = ret[2] |
144 |
FakeQueryResult fqr = new FakeQueryResult("Fake", corpus, new CQLQuery(query), starts, ends, targets) |
145 |
|
146 |
Concordance concordance = new Concordance(corpus);
|
147 |
concordance.setParameters(new CQLQuery(""), [word],[word],[word], |
148 |
[word],[word],[word], |
149 |
new ReferencePattern(text_id), new ReferencePattern(text_id), 7, 10) |
150 |
concordance.setQueryResult(fqr) |
151 |
|
152 |
if (monitor == null) { |
153 |
println "Error: cannot open concordance editor: null monitor"
|
154 |
return
|
155 |
} |
156 |
monitor.syncExec(new Runnable() { |
157 |
public void run() { |
158 |
try {
|
159 |
concordance.compute(); // skip the CQI.query step
|
160 |
|
161 |
ConcordanceEditor editor = ComputeConcordance.openEditor(concordance); |
162 |
|
163 |
} catch(Throwable error) { |
164 |
println "Error: "+error;
|
165 |
error.printStackTrace(); |
166 |
} |
167 |
} |
168 |
}); |
169 |
} |
170 |
} |
171 |
|
172 |
return allResults
|