Révision 2162
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/AnalecUtils.groovy (revision 2162) | ||
---|---|---|
15 | 15 |
def params = getFilterParameters(ursql) |
16 | 16 |
def typeRegexp = params[0] |
17 | 17 |
def propRegexp = params[1] |
18 |
println "params=$params" |
|
18 | 19 |
return isPropertyDefined(clazz, analecCorpus, typeRegexp, propRegexp) |
19 | 20 |
} |
20 | 21 |
|
... | ... | |
150 | 151 |
* filter groups elements with the elements positions |
151 | 152 |
* |
152 | 153 |
* |
153 |
* @param groups |
|
154 |
* @param groups [schema:units list]
|
|
154 | 155 |
* @param distance 0=no selection, 1=first, 2=second, -1 last, -2 last-last |
155 | 156 |
* @return |
156 | 157 |
*/ |
157 | 158 |
static def filterUniteByInclusionInSchema(def debug, def groups, Integer distance) { |
158 | 159 |
if (distance == 0) return groups; |
159 |
|
|
160 |
distance = distance-1; |
|
160 |
if (distance > 0) distance = distance-1; |
|
161 | 161 |
def newGroups = [:] |
162 | 162 |
for (def k : groups.keySet()) { |
163 |
newGroups[k] = []; |
|
164 | 163 |
def group = groups[k] |
165 |
if (Math.abs(distance) < group.size()) |
|
166 |
newGroups[k] << group[distance] |
|
164 |
if (group.size() == 0) { |
|
165 |
newGroups[k] = group; |
|
166 |
continue; |
|
167 |
} |
|
168 |
def indexes = null |
|
169 |
if (distance > 0) { |
|
170 |
indexes = 0..Math.min(distance, group.size()) |
|
171 |
} else { |
|
172 |
indexes = Math.max(distance, -group.size())..-1 |
|
173 |
} |
|
174 |
newGroups[k] = group[indexes]; |
|
167 | 175 |
} |
168 | 176 |
return newGroups |
169 | 177 |
} |
... | ... | |
216 | 224 |
static def findAllInCorpus(def debug, def analecCorpus, Class elemClazz, String URSQL) { |
217 | 225 |
def params = getFilterParameters(URSQL) |
218 | 226 |
if (debug >= 2) println "PARAMS=$params" |
219 |
return findAllInCorpus(debug, analecCorpus, elemClazz, params[0], params[1], params[2]) |
|
227 |
return findAllInCorpus(debug, analecCorpus, elemClazz, params[0], params[1], params[2], params[3])
|
|
220 | 228 |
} |
221 | 229 |
|
222 |
static def findAllInCorpus(def debug, Corpus analecCorpus, Class elemClazz, String typeRegex, String propName, String valueRegex) { |
|
230 |
static def findAllInCorpus(def debug, Corpus analecCorpus, Class elemClazz, String typeRegex, String propName, boolean eq, String valueRegex) {
|
|
223 | 231 |
def allElements = null; |
224 | 232 |
|
225 | 233 |
if (elemClazz != null) { |
... | ... | |
236 | 244 |
allElements.addAll(analecCorpus.getTousSchemas()) |
237 | 245 |
} |
238 | 246 |
|
239 |
return filterElements(debug, allElements, typeRegex, propName, valueRegex); |
|
247 |
return filterElements(debug, allElements, typeRegex, propName, eq, valueRegex);
|
|
240 | 248 |
} |
241 | 249 |
|
242 | 250 |
static def filterBySize(def elements, Integer minimum_schema_size, Integer maximum_schema_size) { |
... | ... | |
340 | 348 |
|
341 | 349 |
static def findAllUnitesInElements(def debug, def elements, String URSQL) { |
342 | 350 |
def params = getFilterParameters(URSQL) |
343 |
return findAllUnitesInElements(debug, elements, params[0], params[1], params[2]) |
|
351 |
return findAllUnitesInElements(debug, elements, params[0], params[1], params[2], params[3])
|
|
344 | 352 |
} |
345 | 353 |
|
346 |
static def findAllUnitesInElements(def debug, def elements, String typeRegex, String propName, String valueRegex) { |
|
354 |
static def findAllUnitesInElements(def debug, def elements, String typeRegex, String propName, boolean eq, String valueRegex) {
|
|
347 | 355 |
def allElements = [] |
348 | 356 |
|
349 | 357 |
for (Element element : elements) { |
350 |
allElements.addAll(filterElements(debug, element.getUnitesSousjacentes(), typeRegex, propName, valueRegex)); |
|
358 |
allElements.addAll(filterElements(debug, element.getUnitesSousjacentes(), typeRegex, propName, eq, valueRegex));
|
|
351 | 359 |
} |
352 | 360 |
|
353 | 361 |
return allElements; |
... | ... | |
372 | 380 |
*/ |
373 | 381 |
static def groupAllUnitesInElements(def debug, def elements, String URSQL) { |
374 | 382 |
def params = getFilterParameters(URSQL) |
375 |
return groupAllUnitesInElements(debug, elements, params[0], params[1], params[2]) |
|
383 |
return groupAllUnitesInElements(debug, elements, params[0], params[1], params[2], params[3])
|
|
376 | 384 |
} |
377 | 385 |
|
378 |
static def groupAllUnitesInElements(def debug, def elements, String typeRegex, String propName, String valueRegex) { |
|
386 |
static def groupAllUnitesInElements(def debug, def elements, String typeRegex, String propName, boolean eq, String valueRegex) {
|
|
379 | 387 |
def allElements = [:] |
380 | 388 |
|
381 | 389 |
for (Element element : elements) { |
382 |
allElements[element] = filterElements(debug, element.getUnitesSousjacentes(), typeRegex, propName, valueRegex); |
|
390 |
allElements[element] = filterElements(debug, element.getUnitesSousjacentes(), typeRegex, propName, eq, valueRegex);
|
|
383 | 391 |
} |
384 | 392 |
|
385 | 393 |
return allElements; |
... | ... | |
392 | 400 |
String value = ""; |
393 | 401 |
|
394 | 402 |
int atidx = URSQL.indexOf("@"); |
395 |
int equalidx = URSQL.indexOf("="); |
|
396 |
|
|
397 |
if (atidx >= 0 && equalidx >= 0 && atidx < equalidx) { // TYPE@PROP=VALUE |
|
403 |
int equal_start_idx = URSQL.indexOf("="); |
|
404 |
int equal_end_idx = equal_start_idx |
|
405 |
int differentidx = URSQL.indexOf("!="); |
|
406 |
boolean eq = differentidx < 0 || differentidx != equal_start_idx-1 |
|
407 |
if (!eq) { |
|
408 |
equal_start_idx-- |
|
409 |
} |
|
410 |
|
|
411 |
if (atidx >= 0 && equal_start_idx >= 0 && atidx < equal_start_idx) { // TYPE@PROP=VALUE |
|
398 | 412 |
type = URSQL.substring(0, atidx) |
399 |
prop = URSQL.substring(atidx+1, equalidx) |
|
400 |
value = URSQL.substring(equalidx+1) |
|
413 |
prop = URSQL.substring(atidx+1, equal_start_idx)
|
|
414 |
value = URSQL.substring(equal_end_idx+1)
|
|
401 | 415 |
} else if (atidx >= 0) { // TYPE@PROP |
402 | 416 |
type = URSQL.substring(0, atidx) |
403 | 417 |
prop = URSQL.substring(atidx+1) |
404 |
} else if (equalidx >= 0) { // TYPE=VALUE -> not well formed |
|
405 |
type = URSQL.substring(0, equalidx) |
|
406 |
value = URSQL.substring(equalidx+1) |
|
418 |
} else if (equal_start_idx >= 0) { // TYPE=VALUE -> not well formed
|
|
419 |
type = URSQL.substring(0, equal_start_idx)
|
|
420 |
value = URSQL.substring(equal_end_idx+1)
|
|
407 | 421 |
} else { // TYPE |
408 | 422 |
type = URSQL; |
409 | 423 |
} |
410 | 424 |
// println(["'"+type+"'", "'"+prop+"'", "'"+value+"'"]) |
411 | 425 |
|
412 |
return [type, prop, value] |
|
426 |
return [type, prop, eq, value]
|
|
413 | 427 |
} |
414 | 428 |
|
415 | 429 |
static def filterElements(def debug, def allElements, String URSQL) { |
416 | 430 |
def params = getFilterParameters(URSQL) |
417 |
return filterElements(debug, allElements, params[0], params[1], params[2]) |
|
431 |
return filterElements(debug, allElements, params[0], params[1], params[2], params[3])
|
|
418 | 432 |
} |
419 | 433 |
|
420 |
static def filterElements(def debug, def allElements, String typeRegex, String propName, String valueRegex) { |
|
434 |
static def filterElements(def debug, def allElements, String typeRegex, String propName, boolean eq, String valueRegex) {
|
|
421 | 435 |
if (debug >= 2) println "filtering "+allElements.size()+" elements with typeRegex='$typeRegex' propName='$propName' and valueRegex='$valueRegex'" |
422 | 436 |
if (typeRegex != null && typeRegex.length() > 0) { |
423 | 437 |
def filteredElements = [] |
... | ... | |
438 | 452 |
def matcher = /$valueRegex/ |
439 | 453 |
for (Element element : allElements) { |
440 | 454 |
def value = element.getProp(propName) |
441 |
if (value != null && value ==~ matcher) { |
|
442 |
filteredElements << element |
|
455 |
if (value ==~ matcher) { |
|
456 |
if (eq) filteredElements << element |
|
457 |
} else { |
|
458 |
if (!eq) filteredElements << element |
|
443 | 459 |
} |
444 | 460 |
} |
445 | 461 |
} else { // select only elements with the prop |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/SchemasSummaryMacro.groovy (revision 2162) | ||
---|---|---|
62 | 62 |
println "** The $schema_ursql schema URSQL cannot be computed in the corpus with types: $errors." |
63 | 63 |
return; |
64 | 64 |
} |
65 |
|
|
66 |
errors = AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql) |
|
67 |
if (errors.size() > 0) { |
|
68 |
println "** $unit_ursql unit URSQL cannot be computed in the corpus with types: $errors." |
|
69 |
return; |
|
70 |
} |
|
71 |
|
|
65 |
|
|
72 | 66 |
for (def type : analecCorpus.getStructure().getUnites()) |
73 | 67 |
props.addAll(analecCorpus.getStructure().getUniteProperties(type)); |
74 | 68 |
|
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/UnitsIndexMacro.groovy (revision 2162) | ||
---|---|---|
173 | 173 |
else println "Index de la propriété $unit_property_display des unités $unit_ursql[$position_in_matches] de ${corpus.getMainCorpus()}.${corpusViewSelection}" |
174 | 174 |
println "$unit_property_display\t"+selection.join("\t") |
175 | 175 |
|
176 |
int total_freq = 0 |
|
176 | 177 |
keys.eachWithIndex { prop_val, i -> |
177 | 178 |
String line = "" |
178 | 179 |
if (prop_val.size() > 0) { |
... | ... | |
188 | 189 |
|
189 | 190 |
matrix.set(i, j, freq) |
190 | 191 |
line += "\t"+freq |
191 |
|
|
192 |
total_freq += freq |
|
192 | 193 |
if (min < freq) min = freq |
193 | 194 |
} |
194 | 195 |
if (min >= output_fmin) |
195 | 196 |
println "$line" |
196 | 197 |
} |
197 |
|
|
198 |
println "\nTOTAL=$total_freq" |
|
198 | 199 |
props = keys |
199 | 200 |
|
200 | 201 |
def r = RWorkspace.getRWorkspaceInstance() |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/SchemasCoverage.groovy (revision 2162) | ||
---|---|---|
1 |
// Copyright © 2019 ENS de Lyon, CNRS, University of Franche-Comté |
|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
3 |
// @author mdecorde |
|
4 |
// STANDARD DECLARATIONS |
|
5 |
package org.txm.macro.urs.exploit |
|
6 |
|
|
7 |
import groovy.transform.Field |
|
8 |
|
|
9 |
import org.jfree.chart.JFreeChart |
|
10 |
import org.jfree.chart.editor.ChartEditor |
|
11 |
import org.kohsuke.args4j.* |
|
12 |
import org.txm.Toolbox |
|
13 |
import org.txm.annotation.urs.* |
|
14 |
import org.txm.chartsengine.core.preferences.ChartsEnginePreferences |
|
15 |
import org.txm.chartsengine.r.core.RChartsEngine |
|
16 |
import org.txm.macro.urs.AnalecUtils |
|
17 |
import org.txm.progression.core.chartsengine.jfreechart.themes.highcharts.renderers.ProgressionItemSelectionRenderer |
|
18 |
import org.txm.progression.core.functions.Progression |
|
19 |
import org.txm.rcp.Application |
|
20 |
import org.txm.rcp.IImageKeys |
|
21 |
import org.txm.rcp.swt.widget.parameters.* |
|
22 |
import org.txm.searchengine.cqp.CQPSearchEngine |
|
23 |
import org.txm.searchengine.cqp.corpus.* |
|
24 |
import org.txm.searchengine.cqp.corpus.query.CQLQuery |
|
25 |
import org.txm.chartsengine.rcp.*; |
|
26 |
import visuAnalec.elements.* |
|
27 |
|
|
28 |
def scriptName = this.class.getSimpleName() |
|
29 |
|
|
30 |
def selection = [] |
|
31 |
for (def s : corpusViewSelections) { |
|
32 |
if (s instanceof CQPCorpus) selection << s |
|
33 |
else if (s instanceof Partition) selection.addAll(s.getParts()) |
|
34 |
} |
|
35 |
|
|
36 |
if (selection.size() == 0) { |
|
37 |
println "** $scriptName: please select a Corpus or a Partition in the Corpus view: "+corpusViewSelections |
|
38 |
return false |
|
39 |
} else { |
|
40 |
for (def c : selection) c.compute(false) |
|
41 |
} |
|
42 |
|
|
43 |
// BEGINNING OF PARAMETERS |
|
44 |
@Field @Option(name="schema_ursql", usage="TYPE@PROP=VALUE", widget="String", required=true, def="CHAINE") |
|
45 |
String schema_ursql |
|
46 |
@Field @Option(name="minimum_schema_size", usage="Minimum size needed to consider a schema", widget="Integer", required=true, def="3") |
|
47 |
int minimum_schema_size |
|
48 |
@Field @Option(name="schema_property_display", usage="PROP", widget="String", required=true, def="REF") |
|
49 |
String schema_property_display |
|
50 |
|
|
51 |
@Field @Option(name="unit_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="") |
|
52 |
String unit_ursql |
|
53 |
@Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=true, def="true") |
|
54 |
boolean strict_inclusion |
|
55 |
|
|
56 |
@Field @Option(name="structure_properties", usage="name_prpperty,name2_property2,name3_property", widget="String", required=true, def="div_n,p_n") |
|
57 |
String structure_properties |
|
58 |
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF ON ALL REALLY ALL", required=true, def="OFF") |
|
59 |
debug |
|
60 |
if (!ParametersDialog.open(this)) return |
|
61 |
|
|
62 |
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3 |
|
63 |
if (structure_properties.length() == 0) return; |
|
64 |
|
|
65 |
def CQI = CQPSearchEngine.getCqiClient() |
|
66 |
|
|
67 |
for (def corpus : selection) { |
|
68 |
|
|
69 |
def properties = [corpus.getStructuralUnit("text").getProperty("id")] |
|
70 |
|
|
71 |
// build structural unit properties list from the "structure_properties" parameter |
|
72 |
for (def name : structure_properties.split(",")) { |
|
73 |
name = name.trim() |
|
74 |
String[] split = name.split("_", 2); |
|
75 |
if (split.length == 2) { |
|
76 |
def su = corpus.getStructuralUnit(split[0]) |
|
77 |
if (su == null) { |
|
78 |
println "No Structure for name=$name" |
|
79 |
} else { |
|
80 |
def p = su.getProperty(split[1]) |
|
81 |
if (p == null) { |
|
82 |
println "No Structure property for name=$name" |
|
83 |
} else { |
|
84 |
properties << p |
|
85 |
} |
|
86 |
} |
|
87 |
} else { |
|
88 |
println "Wrong structural unit name format: $name" |
|
89 |
} |
|
90 |
} |
|
91 |
def cql_limit_matches = corpus.getMatches() |
|
92 |
|
|
93 |
def word = corpus.getWordProperty() |
|
94 |
def analecCorpus = URSCorpora.getCorpus(corpus) |
|
95 |
AnalecUtils.defineProperty(Schema.class, analecCorpus, schema_ursql, "LOCALISATION") |
|
96 |
URSCorpora.getVue(corpus).initVueParDefaut() |
|
97 |
|
|
98 |
def errors = AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql) |
|
99 |
if (errors.size() > 0) { |
|
100 |
println "** The $schema_ursql schema URSQL cannot be computed in the corpus with types: $errors." |
|
101 |
return; |
|
102 |
} |
|
103 |
|
|
104 |
errors = AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql) |
|
105 |
if (errors.size() > 0) { |
|
106 |
println "** The $unit_ursql unit URSQL cannot be computed in the corpus with types: $errors." |
|
107 |
return; |
|
108 |
} |
|
109 |
|
|
110 |
if (schema_property_display.length() > 0) { |
|
111 |
errors = AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, AnalecUtils.getFilterParameters(schema_ursql)[0], schema_property_display).size() |
|
112 |
if (errors > 0) { |
|
113 |
println "Error: some Schema types don't contain the $schema_property_display property: $errors" |
|
114 |
return |
|
115 |
} |
|
116 |
} |
|
117 |
|
|
118 |
def allUnits = [:] |
|
119 |
def allHighlightedUnits = [:] |
|
120 |
def allSchemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, 0, 99999999, strict_inclusion) |
|
121 |
|
|
122 |
if (allSchemas.size() == 0) { |
|
123 |
println "No schema match for '$schema_ursql' selection. Aborting" |
|
124 |
return |
|
125 |
} |
|
126 |
|
|
127 |
if (debug) println "Building selection of units to highlight..." |
|
128 |
allHighlightedUnits = AnalecUtils.groupAllUnitesInElements(debug, allSchemas, unit_ursql) |
|
129 |
|
|
130 |
if (allHighlightedUnits.size() == 0) { |
|
131 |
println "No schema&unit match for '$schema_ursql' and '$unit_ursql' selection. Aborting" |
|
132 |
return |
|
133 |
} |
|
134 |
if (debug) println "allHighlightedUnits=${allHighlightedUnits.size()}" |
|
135 |
|
|
136 |
println "annotating ${allSchemas.size()} schemas..." |
|
137 |
for (Schema schema : allSchemas) { |
|
138 |
if (debug) println " schema="+schema.getProps() |
|
139 |
def selectedUnits = allHighlightedUnits[schema] |
|
140 |
|
|
141 |
if (selectedUnits.size() == 0) continue; |
|
142 |
selectedUnits = AnalecUtils.filterUniteByInclusion(debug, selectedUnits, corpus.getMatches(), strict_inclusion, 0) |
|
143 |
|
|
144 |
if (selectedUnits.size() == 0) continue; |
|
145 |
|
|
146 |
if (selectedUnits.size() < minimum_schema_size) { |
|
147 |
schema.getProps()["LOCALISATION"] = "N/A" |
|
148 |
if (debug) println schema.getProp(schema_property_display)+" -> N/A" |
|
149 |
continue; // no need to go further, process next selected eleemnt of corpora view |
|
150 |
} |
|
151 |
|
|
152 |
|
|
153 |
if (debug) println " selectedUnits=${selectedUnits.size()}" |
|
154 |
|
|
155 |
// get all positions for the selected units |
|
156 |
def positions = new TreeSet() |
|
157 |
for (def unit : selectedUnits) { |
|
158 |
positions.addAll(unit.getDeb()..unit.getFin()) |
|
159 |
} |
|
160 |
int[] positions_array = positions |
|
161 |
if (debug) println " positions=${positions.size()}" |
|
162 |
|
|
163 |
// test each property indexes |
|
164 |
for (def property : properties) { |
|
165 |
if (debug) println " testing $property..." |
|
166 |
def idx = CQI.cpos2Struc(property.getQualifiedName(), positions_array) |
|
167 |
def hash = new HashSet() |
|
168 |
hash.addAll(idx) |
|
169 |
if (debug) println " hash=$hash" |
|
170 |
if (hash.size() == 1) { // the units are only in ONE structure |
|
171 |
int[] struct = hash |
|
172 |
def ref = CQI.struc2Str(property.getQualifiedName(), struct)[0] |
|
173 |
println schema.getProp(schema_property_display)+" (${selectedUnits.size} units) -> "+property.getFullName()+" "+ref |
|
174 |
schema.getProps()["LOCALISATION"] = property.getFullName()+" "+ref |
|
175 |
} |
|
176 |
} |
|
177 |
} |
|
178 |
} |
|
179 |
|
|
180 |
//println ""+queries.size()+" selected schemas: "+queries |
|
181 |
|
Formats disponibles : Unified diff