Révision 2145
| tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/SchemaAccessibilityScoresMacro.groovy (revision 2145) | ||
|---|---|---|
| 1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
| 2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
| 3 |
// @author mdecorde |
|
| 4 |
// @author sheiden |
|
| 5 |
|
|
| 6 |
// STANDARD DECLARATIONS |
|
| 7 |
package org.txm.macro.urs.exploit |
|
| 8 |
|
|
| 9 |
import org.kohsuke.args4j.* |
|
| 10 |
import groovy.transform.Field |
|
| 11 |
import org.txm.rcp.swt.widget.parameters.* |
|
| 12 |
import org.txm.annotation.urs.* |
|
| 13 |
import org.txm.macro.urs.AnalecUtils |
|
| 14 |
import visuAnalec.elements.* |
|
| 15 |
import org.txm.searchengine.cqp.corpus.* |
|
| 16 |
import org.txm.Toolbox |
|
| 17 |
import org.txm.rcp.commands.* |
|
| 18 |
import org.txm.statsengine.r.core.RWorkspace |
|
| 19 |
|
|
| 20 |
def selection = [] |
|
| 21 |
for (def s : corpusViewSelections) {
|
|
| 22 |
if (s instanceof CQPCorpus) selection << s |
|
| 23 |
else if (s instanceof Partition) selection.addAll(s.getParts()) |
|
| 24 |
} |
|
| 25 |
|
|
| 26 |
if (selection.size() == 0) {
|
|
| 27 |
println "** $scriptName: please select a Corpus or a Partition in the Corpus view: "+corpusViewSelections |
|
| 28 |
return false |
|
| 29 |
} else {
|
|
| 30 |
for (def c : selection) c.compute(false) |
|
| 31 |
} |
|
| 32 |
|
|
| 33 |
@Field @Option(name="schema_ursql", usage="TYPE@PROP=REGEX", widget="String", required=true, def="CHAINE") |
|
| 34 |
String schema_ursql |
|
| 35 |
|
|
| 36 |
@Field @Option(name="minimum_schema_size", usage="minimal schema size", widget="Integer", required=true, def="3") |
|
| 37 |
int minimum_schema_size |
|
| 38 |
@Field @Option(name="maximum_schema_size", usage="Maximum size needed to consider a schema", widget="Integer", required=true, def="9999999") |
|
| 39 |
int maximum_schema_size |
|
| 40 |
@Field @Option(name="schema_property_display", usage="schema property to show", widget="String", required=true, def="REF") |
|
| 41 |
String schema_property_display |
|
| 42 |
@Field @Option(name="unit_ursql", usage="TYPE@PROP=REGEX", widget="String", required=false, def="MENTION") |
|
| 43 |
String unit_ursql |
|
| 44 |
@Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=true, def="true") |
|
| 45 |
boolean strict_inclusion |
|
| 46 |
@Field @Option(name="position_in_matches", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0") |
|
| 47 |
int position_in_matches |
|
| 48 |
@Field @Option(name="sep", usage="choose between the len or freq columns", widget="Separator", metaVar="freq len", required=true, def="") |
|
| 49 |
def sep |
|
| 50 |
@Field @Option(name="sort_column", usage="choose between the len or freq columns", widget="StringArray", metaVar="freq len", required=true, def="freq") |
|
| 51 |
String sort_column |
|
| 52 |
@Field @Option(name="max_lines", usage="cut the number of lines shown. '0' means no cut", widget="Integer", required=false, def="0") |
|
| 53 |
int max_lines |
|
| 54 |
//@Field @Option(name="output_graph", usage="Show chart", widget="Boolean", required=true, def="false") |
|
| 55 |
output_graph = false |
|
| 56 |
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF ON ALL REALLY ALL", required=true, def="OFF") |
|
| 57 |
debug |
|
| 58 |
|
|
| 59 |
if (!ParametersDialog.open(this)) return; |
|
| 60 |
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3 |
|
| 61 |
|
|
| 62 |
for (def corpus : selection) {
|
|
| 63 |
def analecCorpus = URSCorpora.getCorpus(corpus) |
|
| 64 |
|
|
| 65 |
def errors = AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql) |
|
| 66 |
if (errors.size() > 0) {
|
|
| 67 |
println "** The $schema_ursql schema URSQL cannot be computed in the corpus with types: $errors." |
|
| 68 |
return; |
|
| 69 |
} |
|
| 70 |
|
|
| 71 |
errors=AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql) |
|
| 72 |
if (errors.size() > 0) {
|
|
| 73 |
println "** $unit_ursql unit URSQL cannot be computed in the corpus with types: $errors." |
|
| 74 |
return; |
|
| 75 |
} |
|
| 76 |
|
|
| 77 |
def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, maximum_schema_size, strict_inclusion); |
|
| 78 |
|
|
| 79 |
int nSchemas = 0; |
|
| 80 |
|
|
| 81 |
def scores = [:] |
|
| 82 |
def lensnames = [:] |
|
| 83 |
for (def schema : schemas) {
|
|
| 84 |
|
|
| 85 |
def allUnites = schema.getUnitesSousjacentesNonTriees() |
|
| 86 |
|
|
| 87 |
def units = AnalecUtils.filterElements(debug, allUnites, unit_ursql) |
|
| 88 |
|
|
| 89 |
selectedUnits = AnalecUtils.filterUniteByInclusion(debug, units, corpus.getMatches(), strict_inclusion, position_in_matches) |
|
| 90 |
|
|
| 91 |
int nUnites = selectedUnits.size(); |
|
| 92 |
|
|
| 93 |
if (!scores.containsKey(schema)) {
|
|
| 94 |
scores[schema] = 0; |
|
| 95 |
} |
|
| 96 |
|
|
| 97 |
scores[schema] = units.su |
|
| 98 |
|
|
| 99 |
String value = schema.getProp(schema_property_display); |
|
| 100 |
nSchemas++; |
|
| 101 |
} |
|
| 102 |
|
|
| 103 |
//println "nSchemas=$nSchemas" |
|
| 104 |
def freqs = lens.keySet(); |
|
| 105 |
freqs.sort(); |
|
| 106 |
int t = 0; |
|
| 107 |
int n = 0; |
|
| 108 |
//println "Fréquences ("+freqs.size()+")"
|
|
| 109 |
for (def f : freqs) {
|
|
| 110 |
t += f * lens[f] |
|
| 111 |
n += lens[f] |
|
| 112 |
} |
|
| 113 |
|
|
| 114 |
if (n == 0) {
|
|
| 115 |
println "No units selected for schemas=$schema_ursql and units=$unit_ursql" |
|
| 116 |
return false; |
|
| 117 |
} |
|
| 118 |
|
|
| 119 |
coef = (t/n) |
|
| 120 |
|
|
| 121 |
def slens = null |
|
| 122 |
if ("freq".equals(sort_column)) {
|
|
| 123 |
slens = lens.sort { a, b -> -a.value <=> -b.value ?: -a.key <=> -b.key }
|
|
| 124 |
} else {
|
|
| 125 |
slens = lens.sort { a, b -> -a.key <=> -b.key ?: -a.value <=> -b.value }
|
|
| 126 |
} |
|
| 127 |
|
|
| 128 |
def flens = [] |
|
| 129 |
slens.each { key, value -> value.times { flens << key } }
|
|
| 130 |
def nbins = flens.size()*2 |
|
| 131 |
|
|
| 132 |
println "Schema scores in '$corpus'. The average schema score is $t/$n = "+coef |
|
| 133 |
println "Index:\nlen\tfreq\tcfreq" |
|
| 134 |
int nShown = 0; |
|
| 135 |
for( def it : slens) {
|
|
| 136 |
println it.key+" "+it.value+" "+(cfreq+=it.value)+" "+lensnames[it.key].join(", ")
|
|
| 137 |
nShown++ |
|
| 138 |
if (max_lines > 0 && nShown >= max_lines) {
|
|
| 139 |
println "... (${slens.size() - max_lines})"
|
|
| 140 |
break; |
|
| 141 |
} |
|
| 142 |
} |
|
| 143 |
|
|
| 144 |
|
|
| 145 |
//return ["result":coef, "data":lens] |
|
| 146 |
} |
|
| 147 |
|
|
| 148 |
|
|
| tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/SchemasProgressionMacro.groovy (revision 2145) | ||
|---|---|---|
| 87 | 87 |
|
| 88 | 88 |
def CQI = CQPSearchEngine.getCqiClient() |
| 89 | 89 |
|
| 90 |
|
|
| 91 |
|
|
| 92 | 90 |
def queries = [] |
| 93 | 91 |
def queryResults = [] |
| 94 | 92 |
def informations = [] |
| ... | ... | |
| 191 | 189 |
n++ |
| 192 | 190 |
} |
| 193 | 191 |
|
| 194 |
|
|
| 195 |
|
|
| 196 | 192 |
def queryResult = new FakeQueryResult(corpus.getID(), corpus, query, starts, ends, null) |
| 197 | 193 |
|
| 198 | 194 |
queries << query |
| ... | ... | |
| 229 | 225 |
try {
|
| 230 | 226 |
//def charteditorpart = org.txm.chartsengine.rcp.SWTChartsComponentsProvider.openEditor(Application.swtComponentProvider.createProgressionChartEditorPart(IImageKeys.getImage(IImageKeys.ACTION_PROGRESSION), progression, progression.isMonochrome(), progression.isMonostyle(), progression.isDoCumulative())) |
| 231 | 227 |
def charteditorpart = org.txm.chartsengine.rcp.editors.ChartEditor.openEditor(progression); |
| 232 |
// JFreeChart chart = charteditorpart.getChart() |
|
| 233 |
// def plot = chart.getXYPlot() |
|
| 234 |
// |
|
| 235 |
// ProgressionItemSelectionRenderer renderer = plot.getRenderer(); |
|
| 236 |
// //renderer.setBaseItemLabelsVisible(true) |
|
| 237 |
// //renderer.setBaseLinesVisible(false) |
|
| 238 |
// //renderer.setBaseSeriesVisible(SchemasProgressionMacro.this.show_lines, true); |
|
| 239 |
// //renderer.setItemLabelsVisible(true) |
|
| 240 |
// //renderer.setLinesVisible(false) |
|
| 241 |
// renderer.setAdditionalLabelInformation(informations) |
|
| 242 |
// renderer.setAdditionalShapeScales(styles) |
|
| 243 |
// chart.getLegend().setVisible(SchemasProgressionMacro.this.show_legend) |
|
| 244 |
// chart.getTitle().setVisible(SchemasProgressionMacro.this.show_title) |
|
| 228 |
JFreeChart chart = charteditorpart.getChart() |
|
| 229 |
def plot = chart.getXYPlot() |
|
| 245 | 230 |
|
| 231 |
ProgressionItemSelectionRenderer renderer = plot.getRenderer(); |
|
| 232 |
//renderer.setBaseItemLabelsVisible(true) |
|
| 233 |
//renderer.setBaseLinesVisible(false) |
|
| 234 |
//renderer.setBaseSeriesVisible(SchemasProgressionMacro.this.show_lines, true); |
|
| 235 |
//renderer.setItemLabelsVisible(true) |
|
| 236 |
//renderer.setLinesVisible(false) |
|
| 237 |
renderer.setAdditionalLabelInformation(informations) |
|
| 238 |
renderer.setAdditionalShapeScales(styles) |
|
| 239 |
chart.getLegend().setVisible(SchemasProgressionMacro.this.show_legend) |
|
| 240 |
chart.getTitle().setVisible(SchemasProgressionMacro.this.show_title) |
|
| 241 |
|
|
| 246 | 242 |
} catch(Exception e) {e.printStackTrace()}
|
| 247 | 243 |
} |
| 248 | 244 |
}) |
| 249 |
|
|
| 250 | 245 |
} catch(Exception e) {
|
| 251 | 246 |
e.printStackTrace() |
| 252 | 247 |
return false |
| tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/SchemasListMacro.groovy (revision 2145) | ||
|---|---|---|
| 1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
|
|
| 1 |
// Copyright © 2019 ENS de Lyon, CNRS, University of Franche-Comté
|
|
| 2 | 2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
| 3 | 3 |
// @author mdecorde |
| 4 | 4 |
// @author sheiden |
| ... | ... | |
| 34 | 34 |
// BEGINNING OF PARAMETERS |
| 35 | 35 |
@Field @Option(name="schema_ursql", usage="TYPE@PROP=VALUE", widget="String", required=true, def="CHAINE") |
| 36 | 36 |
String schema_ursql |
| 37 |
|
|
| 38 | 37 |
@Field @Option(name="minimum_schema_size", usage="minimal schema size", widget="Integer", required=true, def="3") |
| 39 | 38 |
int minimum_schema_size |
| 40 | 39 |
@Field @Option(name="maximum_schema_size", usage="Maximum size needed to consider a schema", widget="Integer", required=true, def="9999999") |
| 41 | 40 |
int maximum_schema_size |
| 42 | 41 |
@Field @Option(name="unit_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="MENTION") |
| 43 | 42 |
String unit_ursql |
| 43 |
@Field @Option(name="cql_limit", usage="CQL to build structure limits", widget="Query", required=false, def="") |
|
| 44 |
cql_limit |
|
| 44 | 45 |
@Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=true, def="true") |
| 45 | 46 |
boolean strict_inclusion |
| 47 |
@Field @Option(name="position_in_matches", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0") |
|
| 48 |
int position_in_matches |
|
| 46 | 49 |
@Field @Option(name="word_property", usage="", widget="StringArray", metaVar="word lemma frlemma frolemma #forme# id", required=false, def="word") |
| 47 | 50 |
String word_property |
| 48 |
|
|
| 49 | 51 |
@Field @Option(name="separator", usage="", widget="String", required=true, def=", ") |
| 50 | 52 |
String separator |
| 51 |
|
|
| 52 | 53 |
@Field @Option(name="buildCQL", usage="générer la requête des unités", widget="Boolean", required=true, def='false') |
| 53 | 54 |
def buildCQL |
| 54 |
|
|
| 55 | 55 |
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF ON ALL REALLY ALL", required=true, def="OFF") |
| 56 | 56 |
debug |
| 57 | 57 |
|
| 58 | 58 |
if (!ParametersDialog.open(this)) return; |
| 59 | 59 |
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3 |
| 60 | 60 |
|
| 61 |
|
|
| 62 | 61 |
for (def corpus : selection) {
|
| 63 | 62 |
def analecCorpus = URSCorpora.getCorpus(corpus) |
| 64 | 63 |
|
| ... | ... | |
| 68 | 67 |
return; |
| 69 | 68 |
} |
| 70 | 69 |
|
| 71 |
errors=AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql)
|
|
| 70 |
errors = AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql)
|
|
| 72 | 71 |
if (errors.size() > 0) {
|
| 73 | 72 |
println "** $unit_ursql unit URSQL cannot be computed in the corpus with types: $errors." |
| 74 | 73 |
return; |
| ... | ... | |
| 76 | 75 |
|
| 77 | 76 |
def CQI = CQPSearchEngine.getCqiClient() |
| 78 | 77 |
|
| 78 |
def word_prop = null |
|
| 79 | 79 |
if (buildCQL) {
|
| 80 | 80 |
word_prop = corpus.getProperty("id")
|
| 81 | 81 |
} else {
|
| ... | ... | |
| 84 | 84 |
|
| 85 | 85 |
def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, maximum_schema_size, strict_inclusion); |
| 86 | 86 |
schemas.sort() {it.getProps()}
|
| 87 |
def nSchemas = 0 |
|
| 88 | 87 |
|
| 89 |
def lens = [:] |
|
| 90 |
|
|
| 91 |
println "$corpus schemas (${schemas.size()}):"
|
|
| 88 |
println "$corpus schemas (${schemas.size()}): "
|
|
| 92 | 89 |
for (def schema : schemas) {
|
| 93 | 90 |
|
| 94 | 91 |
def allUnites = schema.getUnitesSousjacentesNonTriees() |
| 95 | 92 |
def units = AnalecUtils.filterElements(debug, allUnites, unit_ursql) |
| 96 |
units.sort() |
|
| 93 |
units = AnalecUtils.filterUniteByInclusion(debug, allUnites, corpus.getMatches(), false, 0) |
|
| 94 |
|
|
| 95 |
if (cql_limit != null) {
|
|
| 96 |
def cql_limit_matches = null; |
|
| 97 |
if (cql_limit != null && !cql_limit.getQueryString().equals("\"\"")) {
|
|
| 98 |
def limitssubcorpus = corpus.createSubcorpus(cql_limit, corpus.getName().toUpperCase()) |
|
| 99 |
cql_limit_matches = limitssubcorpus.getMatches(); |
|
| 100 |
limitssubcorpus.delete(); |
|
| 101 |
} else {
|
|
| 102 |
cql_limit_matches = corpus.getMatches() |
|
| 103 |
} |
|
| 104 |
units = AnalecUtils.filterUniteByInclusion(debug, units, cql_limit_matches, strict_inclusion, position_in_matches) |
|
| 105 |
} |
|
| 97 | 106 |
|
| 98 |
print schema.getProps().toString()+ ": " |
|
| 107 |
print schema.getProps().toString()+ " ("+units.size()+"/"+allUnites.size()+"): "
|
|
| 99 | 108 |
def first = true |
| 109 |
def values = new HashSet() |
|
| 100 | 110 |
for (def unit : units) {
|
| 101 | 111 |
|
| 102 | 112 |
String forme = null; |
| ... | ... | |
| 122 | 132 |
if (unit.getDeb() == unit.getFin()) pos = [unit.getDeb()] |
| 123 | 133 |
else pos = (unit.getDeb()..unit.getFin()) |
| 124 | 134 |
|
| 125 |
forme = StringUtils.join(CQI.cpos2Str(word_prop.getQualifiedName(), pos), " ") // ids is enough
|
|
| 135 |
forme = StringUtils.join(CQI.cpos2Str(word_prop.getQualifiedName(), pos), " ") // ids is enough |
|
| 126 | 136 |
} |
| 127 | 137 |
|
| 128 |
if (first) { first = false } else { print separator }
|
|
| 129 |
print forme |
|
| 138 |
values << forme |
|
| 130 | 139 |
} |
| 131 | 140 |
} |
| 132 |
println "" |
|
| 133 |
|
|
| 134 |
nSchemas++ |
|
| 141 |
|
|
| 142 |
println values.join(separator) |
|
| 135 | 143 |
} |
| 136 | 144 |
} |
| 137 | 145 |
|
| tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/SchemasSummaryMacro.groovy (revision 2145) | ||
|---|---|---|
| 50 | 50 |
if (!ParametersDialog.open(this)) return |
| 51 | 51 |
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3 |
| 52 | 52 |
|
| 53 |
def CQI = CQPSearchEngine.getCqiClient() |
|
| 54 |
|
|
| 55 | 53 |
def allresults = new LinkedHashMap() |
| 56 | 54 |
def props = new HashSet() |
| 57 | 55 |
for (def corpus : selection) {
|
| 58 | 56 |
|
| 59 |
mainCorpus = corpus.getMainCorpus() |
|
| 60 |
|
|
| 61 |
def word = mainCorpus.getWordProperty() |
|
| 62 |
def analecCorpus = URSCorpora.getCorpus(mainCorpus) |
|
| 57 |
def word = corpus.getWordProperty() |
|
| 58 |
def analecCorpus = URSCorpora.getCorpus(corpus) |
|
| 59 |
|
|
| 60 |
def errors = AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql) |
|
| 61 |
if (errors.size() > 0) {
|
|
| 62 |
println "** The $schema_ursql schema URSQL cannot be computed in the corpus with types: $errors." |
|
| 63 |
return; |
|
| 64 |
} |
|
| 65 |
|
|
| 66 |
errors = AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql) |
|
| 67 |
if (errors.size() > 0) {
|
|
| 68 |
println "** $unit_ursql unit URSQL cannot be computed in the corpus with types: $errors." |
|
| 69 |
return; |
|
| 70 |
} |
|
| 71 |
|
|
| 63 | 72 |
for (def type : analecCorpus.getStructure().getUnites()) |
| 64 | 73 |
props.addAll(analecCorpus.getStructure().getUniteProperties(type)); |
| 65 | 74 |
|
| tmp/org.txm.analec.rcp/src/org/txm/macro/urs/AnalecUtils.groovy (revision 2145) | ||
|---|---|---|
| 104 | 104 |
*/ |
| 105 | 105 |
static def selectUnitsInSchema(def debug, Corpus analecCorpus, org.txm.searchengine.cqp.corpus.CQPCorpus corpus, |
| 106 | 106 |
String schema_ursql, Integer minimum_schema_size, Integer maximum_schema_size, |
| 107 |
String unit_ursql, Integer position_in_schema, CQLQuery cql_limit, Boolean strict_inclusion, int position) {
|
|
| 107 |
String unit_ursql, Integer position_in_schema, CQLQuery cql_limit, Boolean strict_inclusion, int position_in_matches) {
|
|
| 108 | 108 |
def groupedUnits = [] |
| 109 | 109 |
if (schema_ursql != null && schema_ursql.length() > 0 || minimum_schema_size > 1) {
|
| 110 | 110 |
def allSchema = null; |
| ... | ... | |
| 137 | 137 |
if (debug >= 2) println "matches=${matches}"
|
| 138 | 138 |
def allUnits = [] |
| 139 | 139 |
for (def k : groupedUnits.keySet()) {
|
| 140 |
def selectedUnits = AnalecUtils.filterUniteByInclusion(debug, groupedUnits[k], matches, strict_inclusion, position) |
|
| 140 |
def selectedUnits = AnalecUtils.filterUniteByInclusion(debug, groupedUnits[k], matches, strict_inclusion, position_in_matches)
|
|
| 141 | 141 |
allUnits.addAll(selectedUnits) |
| 142 | 142 |
} |
| 143 | 143 |
if (debug >= 2) println "selectedUnits=${allUnits.size()}"
|
| tmp/org.txm.analec.rcp/src/org/txm/macro/urs/DemoMacro.groovy (revision 2145) | ||
|---|---|---|
| 2 | 2 |
|
| 3 | 3 |
import org.txm.annotation.urs.URSCorpora |
| 4 | 4 |
import org.txm.searchengine.cqp.corpus.CQPCorpus |
| 5 |
import visuAnalec.elements.* |
|
| 5 | 6 |
|
| 6 | 7 |
// get the CQP corpus |
| 7 | 8 |
if (!(corpusViewSelection instanceof CQPCorpus)) {
|
| ... | ... | |
| 10 | 11 |
} |
| 11 | 12 |
def corpus = corpusViewSelection |
| 12 | 13 |
|
| 13 |
// get the Analec corpus |
|
| 14 |
// get the Analec corpus : stores the annotations and annotation structure |
|
| 15 |
|
|
| 14 | 16 |
def analecCorpus = URSCorpora.getCorpus(corpus) |
| 15 | 17 |
|
| 16 |
// get the annotations structure |
|
| 18 |
// get the annotations structure : stores the available annotation types, properties and values |
|
| 19 |
|
|
| 17 | 20 |
def structure = analecCorpus.getStructure() |
| 18 | 21 |
|
| 19 |
// get the annotations view |
|
| 22 |
// get the annotations view : stores annotations and annotation structure display rules |
|
| 23 |
|
|
| 20 | 24 |
def view = URSCorpora.getVue(corpus) |
| 21 | 25 |
|
| 22 |
// get annotations
|
|
| 26 |
// display annotations per Element type (Unite, Relation, Schema) and per type
|
|
| 23 | 27 |
|
| 24 |
// Units |
|
| 25 |
println "Units:" |
|
| 26 |
for (String type : structure.getUnites()) {
|
|
| 27 |
def units = analecCorpus.getUnites(type) |
|
| 28 |
if (units.size() > 0) {
|
|
| 29 |
println " ${units.size()} $type"
|
|
| 30 |
} |
|
| 31 |
} |
|
| 28 |
// Create annotation type |
|
| 32 | 29 |
|
| 33 |
// Relations |
|
| 34 |
println "Relations:" |
|
| 35 |
for (String type : structure.getRelations()) {
|
|
| 36 |
def relations = analecCorpus.getRelations(type) |
|
| 37 |
if (relations.size() > 0) {
|
|
| 38 |
println " ${relations.size()} $type"
|
|
| 39 |
} |
|
| 40 |
} |
|
| 30 |
structure.ajouterType(Unite.class, "EXEMPLE"); |
|
| 31 |
//structure.ajouterType(Relation.class, "EXEMPLE"); |
|
| 32 |
//structure.ajouterType(Schema.class, "EXEMPLE"); |
|
| 33 |
println structure.getTypes(Unite.class) |
|
| 41 | 34 |
|
| 42 |
// Schemas |
|
| 43 |
println "Schemas:" |
|
| 44 |
for (String type : structure.getSchemas()) {
|
|
| 45 |
def schemas = analecCorpus.getSchemas(type) |
|
| 46 |
if (schemas.size() > 0) {
|
|
| 47 |
println " ${schemas.size()} $type"
|
|
| 48 |
} |
|
| 49 |
} |
|
| 35 |
// Create annotation property |
|
| 50 | 36 |
|
| 37 |
structure.ajouterProp(Unite.class, "EXEMPLE", "PEXEMPLE") |
|
| 38 |
println structure.getNomsProps(Unite.class, "EXEMPLE") |
|
| 39 |
|
|
| 40 |
// Create annotation value |
|
| 41 |
|
|
| 42 |
structure.ajouterVal(Unite.class, "EXEMPLE", "PEXEMPLE", "oui") |
|
| 43 |
structure.ajouterVal(Unite.class, "EXEMPLE", "PEXEMPLE", "non") |
|
| 44 |
|
|
| 51 | 45 |
// Create annotations |
| 52 | 46 |
|
| 47 |
def props = [:] |
|
| 48 |
props["PEXEMPLE"] = "oui" |
|
| 49 |
def u = analecCorpus.addUniteSaisie("EXEMPLE", 0, 10, props)
|
|
| 50 |
println u |
|
| 51 |
|
|
| 53 | 52 |
// Edit annotations |
| 54 |
|
|
| 53 |
|
|
| 54 |
u.getProps()["EXEMPLE"] = "non" |
|
| 55 |
|
|
| 56 |
// Unit getters |
|
| 57 |
|
|
| 58 |
println u.getDeb() // start of unit |
|
| 59 |
println u.getFin() // end of unit |
|
| 60 |
|
|
| 61 |
println u.getProp("EXEMPLE")
|
|
| 62 |
println u.getType() |
|
| 63 |
|
|
| 64 |
// Browse Units |
|
| 65 |
|
|
| 66 |
println "Units:" |
|
| 67 |
for (String type : structure.getUnites()) {
|
|
| 68 |
def units = analecCorpus.getUnites(type) |
|
| 69 |
if (units.size() > 0) {
|
|
| 70 |
println " ${units.size()} $type"
|
|
| 71 |
} |
|
| 72 |
} |
|
| 73 |
|
|
| 74 |
// Browse Relations |
|
| 75 |
|
|
| 76 |
println "Relations:" |
|
| 77 |
for (String type : structure.getRelations()) {
|
|
| 78 |
def relations = analecCorpus.getRelations(type) |
|
| 79 |
if (relations.size() > 0) {
|
|
| 80 |
println " ${relations.size()} $type"
|
|
| 81 |
} |
|
| 82 |
} |
|
| 83 |
|
|
| 84 |
// Browse Schemas |
|
| 85 |
|
|
| 86 |
println "Schemas:" |
|
| 87 |
for (String type : structure.getSchemas()) {
|
|
| 88 |
def schemas = analecCorpus.getSchemas(type) |
|
| 89 |
if (schemas.size() > 0) {
|
|
| 90 |
println " ${schemas.size()} $type"
|
|
| 91 |
} |
|
| 92 |
} |
|
| 93 |
|
|
| 55 | 94 |
// Delete annotations |
| 56 | 95 |
|
| 96 |
analecCorpus.supUnite(u) |
|
| 97 |
//analecCorpus.supRelation(rel) |
|
| 98 |
//analecCorpus.supSchema(sch) |
|
| 99 |
|
|
| 100 |
// Delete annotation value |
|
| 101 |
|
|
| 102 |
structure.supprimerVal(Unite.class, "EXEMPLE", "PEXEMPLE", "oui") |
|
| 103 |
structure.supprimerVal(Unite.class, "EXEMPLE", "PEXEMPLE", "non") |
|
| 104 |
|
|
| 105 |
// Delete annotation property |
|
| 106 |
|
|
| 107 |
structure.supprimerProp(Unite.class, "EXEMPLE", "PEXEMPLE") |
|
| 108 |
|
|
| 109 |
// Delete annotation type |
|
| 110 |
|
|
| 111 |
structure.supprimerType(Unite.class, "EXEMPLE"); |
|
| 112 |
//structure.supprimerType(Relation.class, "EXEMPLE"); |
|
| 113 |
//structure.supprimerType(Schema.class, "EXEMPLE"); |
|
| 114 |
|
|
| 57 | 115 |
// Revert changes |
| 58 | 116 |
|
| 117 |
//URSCorpora.revert(corpus); |
|
| 118 |
|
|
| 59 | 119 |
// Save changes |
| 60 |
URSCorpora.getView(corpus) |
|
| 120 |
|
|
| 121 |
//URSCorpora.saveCorpus(corpus) |
|
| 122 |
|
|
| 123 |
|
|
| tmp/org.txm.analec.rcp/src/org/txm/macroprototypes/urs/exploit/SchemaAccessibilityScoresMacro.groovy (revision 2145) | ||
|---|---|---|
| 1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
| 2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
| 3 |
// @author mdecorde |
|
| 4 |
// @author sheiden |
|
| 5 |
|
|
| 6 |
// STANDARD DECLARATIONS |
|
| 7 |
package org.txm.macroprototypes.urs.exploit |
|
| 8 |
|
|
| 9 |
import org.kohsuke.args4j.* |
|
| 10 |
import groovy.transform.Field |
|
| 11 |
import org.txm.rcp.swt.widget.parameters.* |
|
| 12 |
import org.txm.annotation.urs.* |
|
| 13 |
import org.txm.macro.urs.AnalecUtils |
|
| 14 |
import visuAnalec.elements.* |
|
| 15 |
import org.txm.searchengine.cqp.corpus.* |
|
| 16 |
import org.txm.Toolbox |
|
| 17 |
import org.txm.rcp.commands.* |
|
| 18 |
import org.txm.statsengine.r.core.RWorkspace |
|
| 19 |
|
|
| 20 |
def selection = [] |
|
| 21 |
for (def s : corpusViewSelections) {
|
|
| 22 |
if (s instanceof CQPCorpus) selection << s |
|
| 23 |
else if (s instanceof Partition) selection.addAll(s.getParts()) |
|
| 24 |
} |
|
| 25 |
|
|
| 26 |
if (selection.size() == 0) {
|
|
| 27 |
println "** $scriptName: please select a Corpus or a Partition in the Corpus view: "+corpusViewSelections |
|
| 28 |
return false |
|
| 29 |
} else {
|
|
| 30 |
for (def c : selection) c.compute(false) |
|
| 31 |
} |
|
| 32 |
|
|
| 33 |
@Field @Option(name="schema_ursql", usage="TYPE@PROP=REGEX", widget="String", required=true, def="CHAINE") |
|
| 34 |
String schema_ursql |
|
| 35 |
|
|
| 36 |
@Field @Option(name="minimum_schema_size", usage="minimal schema size", widget="Integer", required=true, def="3") |
|
| 37 |
int minimum_schema_size |
|
| 38 |
@Field @Option(name="maximum_schema_size", usage="Maximum size needed to consider a schema", widget="Integer", required=true, def="9999999") |
|
| 39 |
int maximum_schema_size |
|
| 40 |
@Field @Option(name="schema_property_display", usage="schema property to show", widget="String", required=true, def="REF") |
|
| 41 |
String schema_property_display |
|
| 42 |
@Field @Option(name="unit_ursql", usage="TYPE@PROP=REGEX", widget="String", required=false, def="MENTION") |
|
| 43 |
String unit_ursql |
|
| 44 |
@Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=true, def="true") |
|
| 45 |
boolean strict_inclusion |
|
| 46 |
@Field @Option(name="position_in_matches", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0") |
|
| 47 |
int position_in_matches |
|
| 48 |
@Field @Option(name="sep", usage="choose between the len or freq columns", widget="Separator", metaVar="freq len", required=true, def="") |
|
| 49 |
def sep |
|
| 50 |
@Field @Option(name="sort_column", usage="choose between the len or freq columns", widget="StringArray", metaVar="freq len", required=true, def="freq") |
|
| 51 |
String sort_column |
|
| 52 |
@Field @Option(name="max_lines", usage="cut the number of lines shown. '0' means no cut", widget="Integer", required=false, def="0") |
|
| 53 |
int max_lines |
|
| 54 |
//@Field @Option(name="output_graph", usage="Show chart", widget="Boolean", required=true, def="false") |
|
| 55 |
output_graph = false |
|
| 56 |
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF ON ALL REALLY ALL", required=true, def="OFF") |
|
| 57 |
debug |
|
| 58 |
|
|
| 59 |
if (!ParametersDialog.open(this)) return; |
|
| 60 |
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3 |
|
| 61 |
|
|
| 62 |
for (def corpus : selection) {
|
|
| 63 |
def analecCorpus = URSCorpora.getCorpus(corpus) |
|
| 64 |
|
|
| 65 |
def errors = AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql) |
|
| 66 |
if (errors.size() > 0) {
|
|
| 67 |
println "** The $schema_ursql schema URSQL cannot be computed in the corpus with types: $errors." |
|
| 68 |
return; |
|
| 69 |
} |
|
| 70 |
|
|
| 71 |
errors=AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql) |
|
| 72 |
if (errors.size() > 0) {
|
|
| 73 |
println "** $unit_ursql unit URSQL cannot be computed in the corpus with types: $errors." |
|
| 74 |
return; |
|
| 75 |
} |
|
| 76 |
|
|
| 77 |
def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, maximum_schema_size, strict_inclusion); |
|
| 78 |
|
|
| 79 |
int nSchemas = 0; |
|
| 80 |
|
|
| 81 |
def scores = [:] |
|
| 82 |
def lensnames = [:] |
|
| 83 |
for (def schema : schemas) {
|
|
| 84 |
|
|
| 85 |
def allUnites = schema.getUnitesSousjacentesNonTriees() |
|
| 86 |
|
|
| 87 |
def units = AnalecUtils.filterElements(debug, allUnites, unit_ursql) |
|
| 88 |
|
|
| 89 |
selectedUnits = AnalecUtils.filterUniteByInclusion(debug, units, corpus.getMatches(), strict_inclusion, position_in_matches) |
|
| 90 |
|
|
| 91 |
int nUnites = selectedUnits.size(); |
|
| 92 |
|
|
| 93 |
if (!scores.containsKey(schema)) {
|
|
| 94 |
scores[schema] = 0; |
|
| 95 |
} |
|
| 96 |
|
|
| 97 |
scores[schema] = units.su |
|
| 98 |
|
|
| 99 |
String value = schema.getProp(schema_property_display); |
|
| 100 |
nSchemas++; |
|
| 101 |
} |
|
| 102 |
|
|
| 103 |
//println "nSchemas=$nSchemas" |
|
| 104 |
def freqs = lens.keySet(); |
|
| 105 |
freqs.sort(); |
|
| 106 |
int t = 0; |
|
| 107 |
int n = 0; |
|
| 108 |
//println "Fréquences ("+freqs.size()+")"
|
|
| 109 |
for (def f : freqs) {
|
|
| 110 |
t += f * lens[f] |
|
| 111 |
n += lens[f] |
|
| 112 |
} |
|
| 113 |
|
|
| 114 |
if (n == 0) {
|
|
| 115 |
println "No units selected for schemas=$schema_ursql and units=$unit_ursql" |
|
| 116 |
return false; |
|
| 117 |
} |
|
| 118 |
|
|
| 119 |
coef = (t/n) |
|
| 120 |
|
|
| 121 |
def slens = null |
|
| 122 |
if ("freq".equals(sort_column)) {
|
|
| 123 |
slens = lens.sort { a, b -> -a.value <=> -b.value ?: -a.key <=> -b.key }
|
|
| 124 |
} else {
|
|
| 125 |
slens = lens.sort { a, b -> -a.key <=> -b.key ?: -a.value <=> -b.value }
|
|
| 126 |
} |
|
| 127 |
|
|
| 128 |
def flens = [] |
|
| 129 |
slens.each { key, value -> value.times { flens << key } }
|
|
| 130 |
def nbins = flens.size()*2 |
|
| 131 |
|
|
| 132 |
println "Schema scores in '$corpus'. The average schema score is $t/$n = "+coef |
|
| 133 |
println "Index:\nlen\tfreq\tcfreq" |
|
| 134 |
int nShown = 0; |
|
| 135 |
for( def it : slens) {
|
|
| 136 |
println it.key+" "+it.value+" "+(cfreq+=it.value)+" "+lensnames[it.key].join(", ")
|
|
| 137 |
nShown++ |
|
| 138 |
if (max_lines > 0 && nShown >= max_lines) {
|
|
| 139 |
println "... (${slens.size() - max_lines})"
|
|
| 140 |
break; |
|
| 141 |
} |
|
| 142 |
} |
|
| 143 |
|
|
| 144 |
|
|
| 145 |
//return ["result":coef, "data":lens] |
|
| 146 |
} |
|
| 147 |
|
|
| 148 |
|
|
| tmp/org.txm.groovy.core/src/java/org/txm/groovy/core/GSERunner.java (revision 2145) | ||
|---|---|---|
| 95 | 95 |
defaultGSE.setConfig(configuration); |
| 96 | 96 |
|
| 97 | 97 |
File jardir = new File(rootDir, "lib"); //$NON-NLS-1$ |
| 98 |
try {
|
|
| 99 |
defaultGSE.getGroovyClassLoader().addClasspath(jardir.getAbsolutePath()); |
|
| 100 |
defaultGSE.getGroovyClassLoader().addURL(jardir.toURI().toURL()); |
|
| 101 |
} catch (MalformedURLException e) {
|
|
| 102 |
// TODO Auto-generated catch block |
|
| 103 |
e.printStackTrace(); |
|
| 104 |
} |
|
| 105 |
|
|
| 98 | 106 |
if (jardir.exists() && jardir.isDirectory()) {
|
| 99 | 107 |
for (File f: jardir.listFiles(new FilenameFilter() {
|
| 100 | 108 |
@Override |
| 101 | 109 |
public boolean accept(File dir, String name) {
|
| 102 |
return name.endsWith(".jar"); //$NON-NLS-1$
|
|
| 110 |
return name.endsWith(".jar") || name.endsWith(".so") || name.endsWith(".dylib") || name.endsWith(".dll"); //$NON-NLS-1$
|
|
| 103 | 111 |
} |
| 104 | 112 |
})) |
| 105 | 113 |
{
|
| tmp/org.txm.progression.core/src/org/txm/progression/core/chartsengine/jfreechart/themes/highcharts/renderers/ProgressionItemSelectionRenderer.java (revision 2145) | ||
|---|---|---|
| 254 | 254 |
return s; |
| 255 | 255 |
} |
| 256 | 256 |
|
| 257 |
public void setAdditionalShapeScales(List<List<Double>> shapeScales) {
|
|
| 258 |
this.additionalShapesScale = shapeScales; |
|
| 259 |
} |
|
| 260 |
|
|
| 261 |
public void setAdditionalLabelInformation(List<List<String>> informations) {
|
|
| 262 |
this.additionalInformations = informations; |
|
| 263 |
} |
|
| 257 | 264 |
|
| 258 | 265 |
/** |
| 259 | 266 |
* @param chartPanel the chartPanel to set |
| ... | ... | |
| 307 | 314 |
|
| 308 | 315 |
label += "\n" + RendererUtils.valuesNumberFormat.format(((XYSeriesCollection) dataset).getSeries(series).getY(item)) + " / " + (((XYSeriesCollection) dataset).getSeries(series).getItemCount() - 2); //$NON-NLS-1$ //$NON-NLS-2$ // -2 for the 0 point and the dummy last point |
| 309 | 316 |
|
| 317 |
if (additionalInformations != null) {
|
|
| 318 |
if (additionalInformations.size() > series) {
|
|
| 319 |
if (additionalInformations.get(series).size() > item) {
|
|
| 320 |
label += "\n "+additionalInformations.get(series).get(item); |
|
| 321 |
} |
|
| 322 |
} |
|
| 323 |
} |
|
| 324 |
|
|
| 310 | 325 |
return label; |
| 311 | 326 |
} |
| 312 | 327 |
|
| tmp/org.txm.chartsengine.jfreechart.core/src/org/txm/chartsengine/jfreechart/core/themes/highcharts/defaulttheme/renderers/ItemSelectionXYStepRenderer.java (revision 2145) | ||
|---|---|---|
| 452 | 452 |
super.drawItem(g2, state, dataArea, info, plot, domainAxis, rangeAxis, dataset, series, item, crosshairState, pass); |
| 453 | 453 |
|
| 454 | 454 |
// force shape rendering because the XYStepRenderer doesn't manage it by default |
| 455 |
// EntityCollection entities = null; |
|
| 456 |
// if(info != null) {
|
|
| 457 |
// entities = info.getOwner().getEntityCollection(); |
|
| 458 |
// } |
|
| 459 |
// |
|
| 460 |
// drawSecondaryPass(g2, plot, dataset, pass, series, item, domainAxis, dataArea, rangeAxis, crosshairState, entities); |
|
| 455 |
EntityCollection entities = null; |
|
| 456 |
if(info != null) {
|
|
| 457 |
entities = info.getOwner().getEntityCollection(); |
|
| 458 |
} |
|
| 459 |
drawSecondaryPass(g2, plot, dataset, pass, series, item, domainAxis, dataArea, rangeAxis, crosshairState, entities); |
|
| 461 | 460 |
} |
| 462 | 461 |
} |
| 463 | 462 |
|
| tmp/org.txm.rcp/src/main/java/org/txm/rcp/editors/TXMEditor.java (revision 2145) | ||
|---|---|---|
| 1037 | 1037 |
* @throws Exception |
| 1038 | 1038 |
*/ |
| 1039 | 1039 |
public final void refresh(boolean update) throws Exception {
|
| 1040 |
|
|
| 1040 |
if (this.parent.isDisposed()) return; // nothing to refresh |
|
| 1041 |
|
|
| 1041 | 1042 |
// skip refresh if the part creation has failed |
| 1042 | 1043 |
if (!createPartControlDoneSucessfully) {
|
| 1043 | 1044 |
return; |
| ... | ... | |
| 1077 | 1078 |
this.topToolBar.setComputingParametersVisible(false); |
| 1078 | 1079 |
} |
| 1079 | 1080 |
|
| 1080 |
this.topToolBar.redraw(); |
|
| 1081 |
if (!topToolBar.isDisposed()) this.topToolBar.redraw();
|
|
| 1081 | 1082 |
|
| 1082 | 1083 |
CorporaView.refreshObject(this); |
| 1083 | 1084 |
TXMResultDebugView.refreshView(); |
Formats disponibles : Unified diff