Révision 2164
| tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/SchemasCoverage.groovy (revision 2164) | ||
|---|---|---|
| 1 |
// Copyright © 2019 ENS de Lyon, CNRS, University of Franche-Comté |
|
| 2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
| 3 |
// @author mdecorde |
|
| 4 |
// STANDARD DECLARATIONS |
|
| 5 |
package org.txm.macro.urs.exploit |
|
| 6 |
|
|
| 7 |
import groovy.transform.Field |
|
| 8 |
|
|
| 9 |
import org.jfree.chart.JFreeChart |
|
| 10 |
import org.jfree.chart.editor.ChartEditor |
|
| 11 |
import org.kohsuke.args4j.* |
|
| 12 |
import org.txm.Toolbox |
|
| 13 |
import org.txm.annotation.urs.* |
|
| 14 |
import org.txm.chartsengine.core.preferences.ChartsEnginePreferences |
|
| 15 |
import org.txm.chartsengine.r.core.RChartsEngine |
|
| 16 |
import org.txm.macro.urs.AnalecUtils |
|
| 17 |
import org.txm.progression.core.chartsengine.jfreechart.themes.highcharts.renderers.ProgressionItemSelectionRenderer |
|
| 18 |
import org.txm.progression.core.functions.Progression |
|
| 19 |
import org.txm.rcp.Application |
|
| 20 |
import org.txm.rcp.IImageKeys |
|
| 21 |
import org.txm.rcp.swt.widget.parameters.* |
|
| 22 |
import org.txm.searchengine.cqp.CQPSearchEngine |
|
| 23 |
import org.txm.searchengine.cqp.corpus.* |
|
| 24 |
import org.txm.searchengine.cqp.corpus.query.CQLQuery |
|
| 25 |
import org.txm.chartsengine.rcp.*; |
|
| 26 |
import visuAnalec.elements.* |
|
| 27 |
|
|
| 28 |
def scriptName = this.class.getSimpleName() |
|
| 29 |
|
|
| 30 |
def selection = [] |
|
| 31 |
for (def s : corpusViewSelections) {
|
|
| 32 |
if (s instanceof CQPCorpus) selection << s |
|
| 33 |
else if (s instanceof Partition) selection.addAll(s.getParts()) |
|
| 34 |
} |
|
| 35 |
|
|
| 36 |
if (selection.size() == 0) {
|
|
| 37 |
println "** $scriptName: please select a Corpus or a Partition in the Corpus view: "+corpusViewSelections |
|
| 38 |
return false |
|
| 39 |
} else {
|
|
| 40 |
for (def c : selection) c.compute(false) |
|
| 41 |
} |
|
| 42 |
|
|
| 43 |
// BEGINNING OF PARAMETERS |
|
| 44 |
@Field @Option(name="schema_ursql", usage="TYPE@PROP=VALUE", widget="String", required=true, def="CHAINE") |
|
| 45 |
String schema_ursql |
|
| 46 |
@Field @Option(name="minimum_schema_size", usage="Minimum size needed to consider a schema", widget="Integer", required=true, def="3") |
|
| 47 |
int minimum_schema_size |
|
| 48 |
@Field @Option(name="schema_property_display", usage="PROP", widget="String", required=true, def="REF") |
|
| 49 |
String schema_property_display |
|
| 50 |
|
|
| 51 |
@Field @Option(name="unit_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="") |
|
| 52 |
String unit_ursql |
|
| 53 |
@Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=true, def="true") |
|
| 54 |
boolean strict_inclusion |
|
| 55 |
|
|
| 56 |
@Field @Option(name="structure_properties", usage="name_prpperty,name2_property2,name3_property", widget="String", required=true, def="div_n,p_n") |
|
| 57 |
String structure_properties |
|
| 58 |
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF ON ALL REALLY ALL", required=true, def="OFF") |
|
| 59 |
debug |
|
| 60 |
if (!ParametersDialog.open(this)) return |
|
| 61 |
|
|
| 62 |
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3 |
|
| 63 |
if (structure_properties.length() == 0) return; |
|
| 64 |
|
|
| 65 |
def CQI = CQPSearchEngine.getCqiClient() |
|
| 66 |
|
|
| 67 |
for (def corpus : selection) {
|
|
| 68 |
|
|
| 69 |
def properties = [corpus.getStructuralUnit("text").getProperty("id")]
|
|
| 70 |
|
|
| 71 |
// build structural unit properties list from the "structure_properties" parameter |
|
| 72 |
for (def name : structure_properties.split(",")) {
|
|
| 73 |
name = name.trim() |
|
| 74 |
String[] split = name.split("_", 2);
|
|
| 75 |
if (split.length == 2) {
|
|
| 76 |
def su = corpus.getStructuralUnit(split[0]) |
|
| 77 |
if (su == null) {
|
|
| 78 |
println "No Structure for name=$name" |
|
| 79 |
} else {
|
|
| 80 |
def p = su.getProperty(split[1]) |
|
| 81 |
if (p == null) {
|
|
| 82 |
println "No Structure property for name=$name" |
|
| 83 |
} else {
|
|
| 84 |
properties << p |
|
| 85 |
} |
|
| 86 |
} |
|
| 87 |
} else {
|
|
| 88 |
println "Wrong structural unit name format: $name" |
|
| 89 |
} |
|
| 90 |
} |
|
| 91 |
def cql_limit_matches = corpus.getMatches() |
|
| 92 |
|
|
| 93 |
def word = corpus.getWordProperty() |
|
| 94 |
def analecCorpus = URSCorpora.getCorpus(corpus) |
|
| 95 |
AnalecUtils.defineProperty(Schema.class, analecCorpus, schema_ursql, "LOCALISATION") |
|
| 96 |
URSCorpora.getVue(corpus).initVueParDefaut() |
|
| 97 |
|
|
| 98 |
def errors = AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql) |
|
| 99 |
if (errors.size() > 0) {
|
|
| 100 |
println "** The $schema_ursql schema URSQL cannot be computed in the corpus with types: $errors." |
|
| 101 |
return; |
|
| 102 |
} |
|
| 103 |
|
|
| 104 |
errors = AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql) |
|
| 105 |
if (errors.size() > 0) {
|
|
| 106 |
println "** The $unit_ursql unit URSQL cannot be computed in the corpus with types: $errors." |
|
| 107 |
return; |
|
| 108 |
} |
|
| 109 |
|
|
| 110 |
if (schema_property_display.length() > 0) {
|
|
| 111 |
errors = AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, AnalecUtils.getFilterParameters(schema_ursql)[0], schema_property_display).size() |
|
| 112 |
if (errors > 0) {
|
|
| 113 |
println "Error: some Schema types don't contain the $schema_property_display property: $errors" |
|
| 114 |
return |
|
| 115 |
} |
|
| 116 |
} |
|
| 117 |
|
|
| 118 |
def allUnits = [:] |
|
| 119 |
def allHighlightedUnits = [:] |
|
| 120 |
def allSchemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, 0, 99999999, strict_inclusion) |
|
| 121 |
|
|
| 122 |
if (allSchemas.size() == 0) {
|
|
| 123 |
println "No schema match for '$schema_ursql' selection. Aborting" |
|
| 124 |
return |
|
| 125 |
} |
|
| 126 |
|
|
| 127 |
if (debug) println "Building selection of units to highlight..." |
|
| 128 |
allHighlightedUnits = AnalecUtils.groupAllUnitesInElements(debug, allSchemas, unit_ursql) |
|
| 129 |
|
|
| 130 |
if (allHighlightedUnits.size() == 0) {
|
|
| 131 |
println "No schema&unit match for '$schema_ursql' and '$unit_ursql' selection. Aborting" |
|
| 132 |
return |
|
| 133 |
} |
|
| 134 |
if (debug) println "allHighlightedUnits=${allHighlightedUnits.size()}"
|
|
| 135 |
|
|
| 136 |
println "annotating ${allSchemas.size()} schemas..."
|
|
| 137 |
for (Schema schema : allSchemas) {
|
|
| 138 |
if (debug) println " schema="+schema.getProps() |
|
| 139 |
def selectedUnits = allHighlightedUnits[schema] |
|
| 140 |
|
|
| 141 |
if (selectedUnits.size() == 0) continue; |
|
| 142 |
selectedUnits = AnalecUtils.filterUniteByInclusion(debug, selectedUnits, corpus.getMatches(), strict_inclusion, 0) |
|
| 143 |
|
|
| 144 |
if (selectedUnits.size() == 0) continue; |
|
| 145 |
|
|
| 146 |
if (selectedUnits.size() < minimum_schema_size) {
|
|
| 147 |
schema.getProps()["LOCALISATION"] = "N/A" |
|
| 148 |
if (debug) println schema.getProp(schema_property_display)+" -> N/A" |
|
| 149 |
continue; // no need to go further, process next selected eleemnt of corpora view |
|
| 150 |
} |
|
| 151 |
|
|
| 152 |
|
|
| 153 |
if (debug) println " selectedUnits=${selectedUnits.size()}"
|
|
| 154 |
|
|
| 155 |
// get all positions for the selected units |
|
| 156 |
def positions = new TreeSet() |
|
| 157 |
for (def unit : selectedUnits) {
|
|
| 158 |
positions.addAll(unit.getDeb()..unit.getFin()) |
|
| 159 |
} |
|
| 160 |
int[] positions_array = positions |
|
| 161 |
if (debug) println " positions=${positions.size()}"
|
|
| 162 |
|
|
| 163 |
// test each property indexes |
|
| 164 |
for (def property : properties) {
|
|
| 165 |
if (debug) println " testing $property..." |
|
| 166 |
def idx = CQI.cpos2Struc(property.getQualifiedName(), positions_array) |
|
| 167 |
def hash = new HashSet() |
|
| 168 |
hash.addAll(idx) |
|
| 169 |
if (debug) println " hash=$hash" |
|
| 170 |
if (hash.size() == 1) { // the units are only in ONE structure
|
|
| 171 |
int[] struct = hash |
|
| 172 |
def ref = CQI.struc2Str(property.getQualifiedName(), struct)[0] |
|
| 173 |
println schema.getProp(schema_property_display)+" (${selectedUnits.size} units) -> "+property.getFullName()+" "+ref
|
|
| 174 |
schema.getProps()["LOCALISATION"] = property.getFullName()+" "+ref |
|
| 175 |
} |
|
| 176 |
} |
|
| 177 |
} |
|
| 178 |
} |
|
| 179 |
|
|
| 180 |
//println ""+queries.size()+" selected schemas: "+queries |
|
| 181 |
|
|
| tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/SchemasCoverageMacro.groovy (revision 2164) | ||
|---|---|---|
| 1 |
// Copyright © 2019 ENS de Lyon, CNRS, University of Franche-Comté |
|
| 2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
| 3 |
// @author mdecorde |
|
| 4 |
// STANDARD DECLARATIONS |
|
| 5 |
package org.txm.macro.urs.exploit |
|
| 6 |
|
|
| 7 |
import groovy.transform.Field |
|
| 8 |
|
|
| 9 |
import org.jfree.chart.JFreeChart |
|
| 10 |
import org.jfree.chart.editor.ChartEditor |
|
| 11 |
import org.kohsuke.args4j.* |
|
| 12 |
import org.txm.Toolbox |
|
| 13 |
import org.txm.annotation.urs.* |
|
| 14 |
import org.txm.chartsengine.core.preferences.ChartsEnginePreferences |
|
| 15 |
import org.txm.chartsengine.r.core.RChartsEngine |
|
| 16 |
import org.txm.macro.urs.AnalecUtils |
|
| 17 |
import org.txm.progression.core.chartsengine.jfreechart.themes.highcharts.renderers.ProgressionItemSelectionRenderer |
|
| 18 |
import org.txm.progression.core.functions.Progression |
|
| 19 |
import org.txm.rcp.Application |
|
| 20 |
import org.txm.rcp.IImageKeys |
|
| 21 |
import org.txm.rcp.swt.widget.parameters.* |
|
| 22 |
import org.txm.searchengine.cqp.CQPSearchEngine |
|
| 23 |
import org.txm.searchengine.cqp.corpus.* |
|
| 24 |
import org.txm.searchengine.cqp.corpus.query.CQLQuery |
|
| 25 |
import org.txm.chartsengine.rcp.*; |
|
| 26 |
import visuAnalec.elements.* |
|
| 27 |
|
|
| 28 |
def scriptName = this.class.getSimpleName() |
|
| 29 |
|
|
| 30 |
def selection = [] |
|
| 31 |
for (def s : corpusViewSelections) {
|
|
| 32 |
if (s instanceof CQPCorpus) selection << s |
|
| 33 |
else if (s instanceof Partition) selection.addAll(s.getParts()) |
|
| 34 |
} |
|
| 35 |
|
|
| 36 |
if (selection.size() == 0) {
|
|
| 37 |
println "** $scriptName: please select a Corpus or a Partition in the Corpus view: "+corpusViewSelections |
|
| 38 |
return false |
|
| 39 |
} else {
|
|
| 40 |
for (def c : selection) c.compute(false) |
|
| 41 |
} |
|
| 42 |
|
|
| 43 |
// BEGINNING OF PARAMETERS |
|
| 44 |
@Field @Option(name="schema_ursql", usage="TYPE@PROP=VALUE", widget="String", required=true, def="CHAINE") |
|
| 45 |
String schema_ursql |
|
| 46 |
@Field @Option(name="minimum_schema_size", usage="Minimum size needed to consider a schema", widget="Integer", required=true, def="3") |
|
| 47 |
int minimum_schema_size |
|
| 48 |
@Field @Option(name="schema_property_display", usage="PROP", widget="String", required=true, def="REF") |
|
| 49 |
String schema_property_display |
|
| 50 |
|
|
| 51 |
@Field @Option(name="unit_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="") |
|
| 52 |
String unit_ursql |
|
| 53 |
@Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=true, def="true") |
|
| 54 |
boolean strict_inclusion |
|
| 55 |
|
|
| 56 |
@Field @Option(name="structure_properties", usage="name_prpperty,name2_property2,name3_property", widget="String", required=true, def="div_n,p_n") |
|
| 57 |
String structure_properties |
|
| 58 |
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF ON ALL REALLY ALL", required=true, def="OFF") |
|
| 59 |
debug |
|
| 60 |
if (!ParametersDialog.open(this)) return |
|
| 61 |
|
|
| 62 |
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3 |
|
| 63 |
if (structure_properties.length() == 0) return; |
|
| 64 |
|
|
| 65 |
def CQI = CQPSearchEngine.getCqiClient() |
|
| 66 |
|
|
| 67 |
for (def corpus : selection) {
|
|
| 68 |
|
|
| 69 |
def properties = [corpus.getStructuralUnit("text").getProperty("id")]
|
|
| 70 |
|
|
| 71 |
// build structural unit properties list from the "structure_properties" parameter |
|
| 72 |
for (def name : structure_properties.split(",")) {
|
|
| 73 |
name = name.trim() |
|
| 74 |
String[] split = name.split("_", 2);
|
|
| 75 |
if (split.length == 2) {
|
|
| 76 |
def su = corpus.getStructuralUnit(split[0]) |
|
| 77 |
if (su == null) {
|
|
| 78 |
println "No Structure for name=$name" |
|
| 79 |
} else {
|
|
| 80 |
def p = su.getProperty(split[1]) |
|
| 81 |
if (p == null) {
|
|
| 82 |
println "No Structure property for name=$name" |
|
| 83 |
} else {
|
|
| 84 |
properties << p |
|
| 85 |
} |
|
| 86 |
} |
|
| 87 |
} else {
|
|
| 88 |
println "Wrong structural unit name format: $name" |
|
| 89 |
} |
|
| 90 |
} |
|
| 91 |
def cql_limit_matches = corpus.getMatches() |
|
| 92 |
|
|
| 93 |
def word = corpus.getWordProperty() |
|
| 94 |
def analecCorpus = URSCorpora.getCorpus(corpus) |
|
| 95 |
AnalecUtils.defineProperty(Schema.class, analecCorpus, schema_ursql, "LOCALISATION") |
|
| 96 |
URSCorpora.getVue(corpus).initVueParDefaut() |
|
| 97 |
|
|
| 98 |
def errors = AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql) |
|
| 99 |
if (errors.size() > 0) {
|
|
| 100 |
println "** The $schema_ursql schema URSQL cannot be computed in the corpus with types: $errors." |
|
| 101 |
return; |
|
| 102 |
} |
|
| 103 |
|
|
| 104 |
errors = AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql) |
|
| 105 |
if (errors.size() > 0) {
|
|
| 106 |
println "** The $unit_ursql unit URSQL cannot be computed in the corpus with types: $errors." |
|
| 107 |
return; |
|
| 108 |
} |
|
| 109 |
|
|
| 110 |
if (schema_property_display.length() > 0) {
|
|
| 111 |
errors = AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, AnalecUtils.getFilterParameters(schema_ursql)[0], schema_property_display).size() |
|
| 112 |
if (errors > 0) {
|
|
| 113 |
println "Error: some Schema types don't contain the $schema_property_display property: $errors" |
|
| 114 |
return |
|
| 115 |
} |
|
| 116 |
} |
|
| 117 |
|
|
| 118 |
def allUnits = [:] |
|
| 119 |
def allHighlightedUnits = [:] |
|
| 120 |
def allSchemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, 0, 99999999, strict_inclusion) |
|
| 121 |
|
|
| 122 |
if (allSchemas.size() == 0) {
|
|
| 123 |
println "No schema match for '$schema_ursql' selection. Aborting" |
|
| 124 |
return |
|
| 125 |
} |
|
| 126 |
|
|
| 127 |
if (debug) println "Building selection of units to highlight..." |
|
| 128 |
allHighlightedUnits = AnalecUtils.groupAllUnitesInElements(debug, allSchemas, unit_ursql) |
|
| 129 |
|
|
| 130 |
if (allHighlightedUnits.size() == 0) {
|
|
| 131 |
println "No schema&unit match for '$schema_ursql' and '$unit_ursql' selection. Aborting" |
|
| 132 |
return |
|
| 133 |
} |
|
| 134 |
if (debug) println "allHighlightedUnits=${allHighlightedUnits.size()}"
|
|
| 135 |
|
|
| 136 |
println "annotating ${allSchemas.size()} schemas..."
|
|
| 137 |
for (Schema schema : allSchemas) {
|
|
| 138 |
if (debug) println " schema="+schema.getProps() |
|
| 139 |
def selectedUnits = allHighlightedUnits[schema] |
|
| 140 |
|
|
| 141 |
if (selectedUnits.size() == 0) continue; |
|
| 142 |
selectedUnits = AnalecUtils.filterUniteByInclusion(debug, selectedUnits, corpus.getMatches(), strict_inclusion, 0) |
|
| 143 |
|
|
| 144 |
if (selectedUnits.size() == 0) continue; |
|
| 145 |
|
|
| 146 |
if (selectedUnits.size() < minimum_schema_size) {
|
|
| 147 |
schema.getProps()["LOCALISATION"] = "N/A" |
|
| 148 |
if (debug) println schema.getProp(schema_property_display)+" -> N/A" |
|
| 149 |
continue; // no need to go further, process next selected eleemnt of corpora view |
|
| 150 |
} |
|
| 151 |
|
|
| 152 |
|
|
| 153 |
if (debug) println " selectedUnits=${selectedUnits.size()}"
|
|
| 154 |
|
|
| 155 |
// get all positions for the selected units |
|
| 156 |
def positions = new TreeSet() |
|
| 157 |
for (def unit : selectedUnits) {
|
|
| 158 |
positions.addAll(unit.getDeb()..unit.getFin()) |
|
| 159 |
} |
|
| 160 |
int[] positions_array = positions |
|
| 161 |
if (debug) println " positions=${positions.size()}"
|
|
| 162 |
|
|
| 163 |
// test each property indexes |
|
| 164 |
for (def property : properties) {
|
|
| 165 |
if (debug) println " testing $property..." |
|
| 166 |
def idx = CQI.cpos2Struc(property.getQualifiedName(), positions_array) |
|
| 167 |
def hash = new HashSet() |
|
| 168 |
hash.addAll(idx) |
|
| 169 |
if (debug) println " hash=$hash" |
|
| 170 |
if (hash.size() == 1) { // the units are only in ONE structure
|
|
| 171 |
int[] struct = hash |
|
| 172 |
def ref = CQI.struc2Str(property.getQualifiedName(), struct)[0] |
|
| 173 |
println schema.getProp(schema_property_display)+" (${selectedUnits.size} units) -> "+property.getFullName()+" "+ref
|
|
| 174 |
schema.getProps()["LOCALISATION"] = property.getFullName()+" "+ref |
|
| 175 |
} |
|
| 176 |
} |
|
| 177 |
} |
|
| 178 |
} |
|
| 179 |
|
|
| 180 |
//println ""+queries.size()+" selected schemas: "+queries |
|
| 181 |
|
|
Formats disponibles : Unified diff