Révision 2164
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/SchemasCoverage.groovy (revision 2164) | ||
---|---|---|
1 |
// Copyright © 2019 ENS de Lyon, CNRS, University of Franche-Comté |
|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
3 |
// @author mdecorde |
|
4 |
// STANDARD DECLARATIONS |
|
5 |
package org.txm.macro.urs.exploit |
|
6 |
|
|
7 |
import groovy.transform.Field |
|
8 |
|
|
9 |
import org.jfree.chart.JFreeChart |
|
10 |
import org.jfree.chart.editor.ChartEditor |
|
11 |
import org.kohsuke.args4j.* |
|
12 |
import org.txm.Toolbox |
|
13 |
import org.txm.annotation.urs.* |
|
14 |
import org.txm.chartsengine.core.preferences.ChartsEnginePreferences |
|
15 |
import org.txm.chartsengine.r.core.RChartsEngine |
|
16 |
import org.txm.macro.urs.AnalecUtils |
|
17 |
import org.txm.progression.core.chartsengine.jfreechart.themes.highcharts.renderers.ProgressionItemSelectionRenderer |
|
18 |
import org.txm.progression.core.functions.Progression |
|
19 |
import org.txm.rcp.Application |
|
20 |
import org.txm.rcp.IImageKeys |
|
21 |
import org.txm.rcp.swt.widget.parameters.* |
|
22 |
import org.txm.searchengine.cqp.CQPSearchEngine |
|
23 |
import org.txm.searchengine.cqp.corpus.* |
|
24 |
import org.txm.searchengine.cqp.corpus.query.CQLQuery |
|
25 |
import org.txm.chartsengine.rcp.*; |
|
26 |
import visuAnalec.elements.* |
|
27 |
|
|
28 |
def scriptName = this.class.getSimpleName() |
|
29 |
|
|
30 |
def selection = [] |
|
31 |
for (def s : corpusViewSelections) { |
|
32 |
if (s instanceof CQPCorpus) selection << s |
|
33 |
else if (s instanceof Partition) selection.addAll(s.getParts()) |
|
34 |
} |
|
35 |
|
|
36 |
if (selection.size() == 0) { |
|
37 |
println "** $scriptName: please select a Corpus or a Partition in the Corpus view: "+corpusViewSelections |
|
38 |
return false |
|
39 |
} else { |
|
40 |
for (def c : selection) c.compute(false) |
|
41 |
} |
|
42 |
|
|
43 |
// BEGINNING OF PARAMETERS |
|
44 |
@Field @Option(name="schema_ursql", usage="TYPE@PROP=VALUE", widget="String", required=true, def="CHAINE") |
|
45 |
String schema_ursql |
|
46 |
@Field @Option(name="minimum_schema_size", usage="Minimum size needed to consider a schema", widget="Integer", required=true, def="3") |
|
47 |
int minimum_schema_size |
|
48 |
@Field @Option(name="schema_property_display", usage="PROP", widget="String", required=true, def="REF") |
|
49 |
String schema_property_display |
|
50 |
|
|
51 |
@Field @Option(name="unit_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="") |
|
52 |
String unit_ursql |
|
53 |
@Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=true, def="true") |
|
54 |
boolean strict_inclusion |
|
55 |
|
|
56 |
@Field @Option(name="structure_properties", usage="name_prpperty,name2_property2,name3_property", widget="String", required=true, def="div_n,p_n") |
|
57 |
String structure_properties |
|
58 |
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF ON ALL REALLY ALL", required=true, def="OFF") |
|
59 |
debug |
|
60 |
if (!ParametersDialog.open(this)) return |
|
61 |
|
|
62 |
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3 |
|
63 |
if (structure_properties.length() == 0) return; |
|
64 |
|
|
65 |
def CQI = CQPSearchEngine.getCqiClient() |
|
66 |
|
|
67 |
for (def corpus : selection) { |
|
68 |
|
|
69 |
def properties = [corpus.getStructuralUnit("text").getProperty("id")] |
|
70 |
|
|
71 |
// build structural unit properties list from the "structure_properties" parameter |
|
72 |
for (def name : structure_properties.split(",")) { |
|
73 |
name = name.trim() |
|
74 |
String[] split = name.split("_", 2); |
|
75 |
if (split.length == 2) { |
|
76 |
def su = corpus.getStructuralUnit(split[0]) |
|
77 |
if (su == null) { |
|
78 |
println "No Structure for name=$name" |
|
79 |
} else { |
|
80 |
def p = su.getProperty(split[1]) |
|
81 |
if (p == null) { |
|
82 |
println "No Structure property for name=$name" |
|
83 |
} else { |
|
84 |
properties << p |
|
85 |
} |
|
86 |
} |
|
87 |
} else { |
|
88 |
println "Wrong structural unit name format: $name" |
|
89 |
} |
|
90 |
} |
|
91 |
def cql_limit_matches = corpus.getMatches() |
|
92 |
|
|
93 |
def word = corpus.getWordProperty() |
|
94 |
def analecCorpus = URSCorpora.getCorpus(corpus) |
|
95 |
AnalecUtils.defineProperty(Schema.class, analecCorpus, schema_ursql, "LOCALISATION") |
|
96 |
URSCorpora.getVue(corpus).initVueParDefaut() |
|
97 |
|
|
98 |
def errors = AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql) |
|
99 |
if (errors.size() > 0) { |
|
100 |
println "** The $schema_ursql schema URSQL cannot be computed in the corpus with types: $errors." |
|
101 |
return; |
|
102 |
} |
|
103 |
|
|
104 |
errors = AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql) |
|
105 |
if (errors.size() > 0) { |
|
106 |
println "** The $unit_ursql unit URSQL cannot be computed in the corpus with types: $errors." |
|
107 |
return; |
|
108 |
} |
|
109 |
|
|
110 |
if (schema_property_display.length() > 0) { |
|
111 |
errors = AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, AnalecUtils.getFilterParameters(schema_ursql)[0], schema_property_display).size() |
|
112 |
if (errors > 0) { |
|
113 |
println "Error: some Schema types don't contain the $schema_property_display property: $errors" |
|
114 |
return |
|
115 |
} |
|
116 |
} |
|
117 |
|
|
118 |
def allUnits = [:] |
|
119 |
def allHighlightedUnits = [:] |
|
120 |
def allSchemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, 0, 99999999, strict_inclusion) |
|
121 |
|
|
122 |
if (allSchemas.size() == 0) { |
|
123 |
println "No schema match for '$schema_ursql' selection. Aborting" |
|
124 |
return |
|
125 |
} |
|
126 |
|
|
127 |
if (debug) println "Building selection of units to highlight..." |
|
128 |
allHighlightedUnits = AnalecUtils.groupAllUnitesInElements(debug, allSchemas, unit_ursql) |
|
129 |
|
|
130 |
if (allHighlightedUnits.size() == 0) { |
|
131 |
println "No schema&unit match for '$schema_ursql' and '$unit_ursql' selection. Aborting" |
|
132 |
return |
|
133 |
} |
|
134 |
if (debug) println "allHighlightedUnits=${allHighlightedUnits.size()}" |
|
135 |
|
|
136 |
println "annotating ${allSchemas.size()} schemas..." |
|
137 |
for (Schema schema : allSchemas) { |
|
138 |
if (debug) println " schema="+schema.getProps() |
|
139 |
def selectedUnits = allHighlightedUnits[schema] |
|
140 |
|
|
141 |
if (selectedUnits.size() == 0) continue; |
|
142 |
selectedUnits = AnalecUtils.filterUniteByInclusion(debug, selectedUnits, corpus.getMatches(), strict_inclusion, 0) |
|
143 |
|
|
144 |
if (selectedUnits.size() == 0) continue; |
|
145 |
|
|
146 |
if (selectedUnits.size() < minimum_schema_size) { |
|
147 |
schema.getProps()["LOCALISATION"] = "N/A" |
|
148 |
if (debug) println schema.getProp(schema_property_display)+" -> N/A" |
|
149 |
continue; // no need to go further, process next selected eleemnt of corpora view |
|
150 |
} |
|
151 |
|
|
152 |
|
|
153 |
if (debug) println " selectedUnits=${selectedUnits.size()}" |
|
154 |
|
|
155 |
// get all positions for the selected units |
|
156 |
def positions = new TreeSet() |
|
157 |
for (def unit : selectedUnits) { |
|
158 |
positions.addAll(unit.getDeb()..unit.getFin()) |
|
159 |
} |
|
160 |
int[] positions_array = positions |
|
161 |
if (debug) println " positions=${positions.size()}" |
|
162 |
|
|
163 |
// test each property indexes |
|
164 |
for (def property : properties) { |
|
165 |
if (debug) println " testing $property..." |
|
166 |
def idx = CQI.cpos2Struc(property.getQualifiedName(), positions_array) |
|
167 |
def hash = new HashSet() |
|
168 |
hash.addAll(idx) |
|
169 |
if (debug) println " hash=$hash" |
|
170 |
if (hash.size() == 1) { // the units are only in ONE structure |
|
171 |
int[] struct = hash |
|
172 |
def ref = CQI.struc2Str(property.getQualifiedName(), struct)[0] |
|
173 |
println schema.getProp(schema_property_display)+" (${selectedUnits.size} units) -> "+property.getFullName()+" "+ref |
|
174 |
schema.getProps()["LOCALISATION"] = property.getFullName()+" "+ref |
|
175 |
} |
|
176 |
} |
|
177 |
} |
|
178 |
} |
|
179 |
|
|
180 |
//println ""+queries.size()+" selected schemas: "+queries |
|
181 |
|
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/SchemasCoverageMacro.groovy (revision 2164) | ||
---|---|---|
1 |
// Copyright © 2019 ENS de Lyon, CNRS, University of Franche-Comté |
|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
3 |
// @author mdecorde |
|
4 |
// STANDARD DECLARATIONS |
|
5 |
package org.txm.macro.urs.exploit |
|
6 |
|
|
7 |
import groovy.transform.Field |
|
8 |
|
|
9 |
import org.jfree.chart.JFreeChart |
|
10 |
import org.jfree.chart.editor.ChartEditor |
|
11 |
import org.kohsuke.args4j.* |
|
12 |
import org.txm.Toolbox |
|
13 |
import org.txm.annotation.urs.* |
|
14 |
import org.txm.chartsengine.core.preferences.ChartsEnginePreferences |
|
15 |
import org.txm.chartsengine.r.core.RChartsEngine |
|
16 |
import org.txm.macro.urs.AnalecUtils |
|
17 |
import org.txm.progression.core.chartsengine.jfreechart.themes.highcharts.renderers.ProgressionItemSelectionRenderer |
|
18 |
import org.txm.progression.core.functions.Progression |
|
19 |
import org.txm.rcp.Application |
|
20 |
import org.txm.rcp.IImageKeys |
|
21 |
import org.txm.rcp.swt.widget.parameters.* |
|
22 |
import org.txm.searchengine.cqp.CQPSearchEngine |
|
23 |
import org.txm.searchengine.cqp.corpus.* |
|
24 |
import org.txm.searchengine.cqp.corpus.query.CQLQuery |
|
25 |
import org.txm.chartsengine.rcp.*; |
|
26 |
import visuAnalec.elements.* |
|
27 |
|
|
28 |
def scriptName = this.class.getSimpleName() |
|
29 |
|
|
30 |
def selection = [] |
|
31 |
for (def s : corpusViewSelections) { |
|
32 |
if (s instanceof CQPCorpus) selection << s |
|
33 |
else if (s instanceof Partition) selection.addAll(s.getParts()) |
|
34 |
} |
|
35 |
|
|
36 |
if (selection.size() == 0) { |
|
37 |
println "** $scriptName: please select a Corpus or a Partition in the Corpus view: "+corpusViewSelections |
|
38 |
return false |
|
39 |
} else { |
|
40 |
for (def c : selection) c.compute(false) |
|
41 |
} |
|
42 |
|
|
43 |
// BEGINNING OF PARAMETERS |
|
44 |
@Field @Option(name="schema_ursql", usage="TYPE@PROP=VALUE", widget="String", required=true, def="CHAINE") |
|
45 |
String schema_ursql |
|
46 |
@Field @Option(name="minimum_schema_size", usage="Minimum size needed to consider a schema", widget="Integer", required=true, def="3") |
|
47 |
int minimum_schema_size |
|
48 |
@Field @Option(name="schema_property_display", usage="PROP", widget="String", required=true, def="REF") |
|
49 |
String schema_property_display |
|
50 |
|
|
51 |
@Field @Option(name="unit_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="") |
|
52 |
String unit_ursql |
|
53 |
@Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=true, def="true") |
|
54 |
boolean strict_inclusion |
|
55 |
|
|
56 |
@Field @Option(name="structure_properties", usage="name_prpperty,name2_property2,name3_property", widget="String", required=true, def="div_n,p_n") |
|
57 |
String structure_properties |
|
58 |
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF ON ALL REALLY ALL", required=true, def="OFF") |
|
59 |
debug |
|
60 |
if (!ParametersDialog.open(this)) return |
|
61 |
|
|
62 |
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3 |
|
63 |
if (structure_properties.length() == 0) return; |
|
64 |
|
|
65 |
def CQI = CQPSearchEngine.getCqiClient() |
|
66 |
|
|
67 |
for (def corpus : selection) { |
|
68 |
|
|
69 |
def properties = [corpus.getStructuralUnit("text").getProperty("id")] |
|
70 |
|
|
71 |
// build structural unit properties list from the "structure_properties" parameter |
|
72 |
for (def name : structure_properties.split(",")) { |
|
73 |
name = name.trim() |
|
74 |
String[] split = name.split("_", 2); |
|
75 |
if (split.length == 2) { |
|
76 |
def su = corpus.getStructuralUnit(split[0]) |
|
77 |
if (su == null) { |
|
78 |
println "No Structure for name=$name" |
|
79 |
} else { |
|
80 |
def p = su.getProperty(split[1]) |
|
81 |
if (p == null) { |
|
82 |
println "No Structure property for name=$name" |
|
83 |
} else { |
|
84 |
properties << p |
|
85 |
} |
|
86 |
} |
|
87 |
} else { |
|
88 |
println "Wrong structural unit name format: $name" |
|
89 |
} |
|
90 |
} |
|
91 |
def cql_limit_matches = corpus.getMatches() |
|
92 |
|
|
93 |
def word = corpus.getWordProperty() |
|
94 |
def analecCorpus = URSCorpora.getCorpus(corpus) |
|
95 |
AnalecUtils.defineProperty(Schema.class, analecCorpus, schema_ursql, "LOCALISATION") |
|
96 |
URSCorpora.getVue(corpus).initVueParDefaut() |
|
97 |
|
|
98 |
def errors = AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql) |
|
99 |
if (errors.size() > 0) { |
|
100 |
println "** The $schema_ursql schema URSQL cannot be computed in the corpus with types: $errors." |
|
101 |
return; |
|
102 |
} |
|
103 |
|
|
104 |
errors = AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql) |
|
105 |
if (errors.size() > 0) { |
|
106 |
println "** The $unit_ursql unit URSQL cannot be computed in the corpus with types: $errors." |
|
107 |
return; |
|
108 |
} |
|
109 |
|
|
110 |
if (schema_property_display.length() > 0) { |
|
111 |
errors = AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, AnalecUtils.getFilterParameters(schema_ursql)[0], schema_property_display).size() |
|
112 |
if (errors > 0) { |
|
113 |
println "Error: some Schema types don't contain the $schema_property_display property: $errors" |
|
114 |
return |
|
115 |
} |
|
116 |
} |
|
117 |
|
|
118 |
def allUnits = [:] |
|
119 |
def allHighlightedUnits = [:] |
|
120 |
def allSchemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, 0, 99999999, strict_inclusion) |
|
121 |
|
|
122 |
if (allSchemas.size() == 0) { |
|
123 |
println "No schema match for '$schema_ursql' selection. Aborting" |
|
124 |
return |
|
125 |
} |
|
126 |
|
|
127 |
if (debug) println "Building selection of units to highlight..." |
|
128 |
allHighlightedUnits = AnalecUtils.groupAllUnitesInElements(debug, allSchemas, unit_ursql) |
|
129 |
|
|
130 |
if (allHighlightedUnits.size() == 0) { |
|
131 |
println "No schema&unit match for '$schema_ursql' and '$unit_ursql' selection. Aborting" |
|
132 |
return |
|
133 |
} |
|
134 |
if (debug) println "allHighlightedUnits=${allHighlightedUnits.size()}" |
|
135 |
|
|
136 |
println "annotating ${allSchemas.size()} schemas..." |
|
137 |
for (Schema schema : allSchemas) { |
|
138 |
if (debug) println " schema="+schema.getProps() |
|
139 |
def selectedUnits = allHighlightedUnits[schema] |
|
140 |
|
|
141 |
if (selectedUnits.size() == 0) continue; |
|
142 |
selectedUnits = AnalecUtils.filterUniteByInclusion(debug, selectedUnits, corpus.getMatches(), strict_inclusion, 0) |
|
143 |
|
|
144 |
if (selectedUnits.size() == 0) continue; |
|
145 |
|
|
146 |
if (selectedUnits.size() < minimum_schema_size) { |
|
147 |
schema.getProps()["LOCALISATION"] = "N/A" |
|
148 |
if (debug) println schema.getProp(schema_property_display)+" -> N/A" |
|
149 |
continue; // no need to go further, process next selected eleemnt of corpora view |
|
150 |
} |
|
151 |
|
|
152 |
|
|
153 |
if (debug) println " selectedUnits=${selectedUnits.size()}" |
|
154 |
|
|
155 |
// get all positions for the selected units |
|
156 |
def positions = new TreeSet() |
|
157 |
for (def unit : selectedUnits) { |
|
158 |
positions.addAll(unit.getDeb()..unit.getFin()) |
|
159 |
} |
|
160 |
int[] positions_array = positions |
|
161 |
if (debug) println " positions=${positions.size()}" |
|
162 |
|
|
163 |
// test each property indexes |
|
164 |
for (def property : properties) { |
|
165 |
if (debug) println " testing $property..." |
|
166 |
def idx = CQI.cpos2Struc(property.getQualifiedName(), positions_array) |
|
167 |
def hash = new HashSet() |
|
168 |
hash.addAll(idx) |
|
169 |
if (debug) println " hash=$hash" |
|
170 |
if (hash.size() == 1) { // the units are only in ONE structure |
|
171 |
int[] struct = hash |
|
172 |
def ref = CQI.struc2Str(property.getQualifiedName(), struct)[0] |
|
173 |
println schema.getProp(schema_property_display)+" (${selectedUnits.size} units) -> "+property.getFullName()+" "+ref |
|
174 |
schema.getProps()["LOCALISATION"] = property.getFullName()+" "+ref |
|
175 |
} |
|
176 |
} |
|
177 |
} |
|
178 |
} |
|
179 |
|
|
180 |
//println ""+queries.size()+" selected schemas: "+queries |
|
181 |
|
Formats disponibles : Unified diff