Révision 2144
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/SchemaAccessibilityScoresMacro.groovy (revision 2144) | ||
---|---|---|
1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
3 |
// @author mdecorde |
|
4 |
// @author sheiden |
|
5 |
|
|
6 |
// STANDARD DECLARATIONS |
|
7 |
package org.txm.macro.urs.exploit |
|
8 |
|
|
9 |
import org.kohsuke.args4j.* |
|
10 |
import groovy.transform.Field |
|
11 |
import org.txm.rcp.swt.widget.parameters.* |
|
12 |
import org.txm.annotation.urs.* |
|
13 |
import org.txm.macro.urs.AnalecUtils |
|
14 |
import visuAnalec.elements.* |
|
15 |
import org.txm.searchengine.cqp.corpus.* |
|
16 |
import org.txm.Toolbox |
|
17 |
import org.txm.rcp.commands.* |
|
18 |
import org.txm.statsengine.r.core.RWorkspace |
|
19 |
|
|
20 |
def selection = [] |
|
21 |
for (def s : corpusViewSelections) { |
|
22 |
if (s instanceof CQPCorpus) selection << s |
|
23 |
else if (s instanceof Partition) selection.addAll(s.getParts()) |
|
24 |
} |
|
25 |
|
|
26 |
if (selection.size() == 0) { |
|
27 |
println "** $scriptName: please select a Corpus or a Partition in the Corpus view: "+corpusViewSelections |
|
28 |
return false |
|
29 |
} else { |
|
30 |
for (def c : selection) c.compute(false) |
|
31 |
} |
|
32 |
|
|
33 |
@Field @Option(name="schema_ursql", usage="TYPE@PROP=REGEX", widget="String", required=true, def="CHAINE") |
|
34 |
String schema_ursql |
|
35 |
|
|
36 |
@Field @Option(name="minimum_schema_size", usage="minimal schema size", widget="Integer", required=true, def="3") |
|
37 |
int minimum_schema_size |
|
38 |
@Field @Option(name="maximum_schema_size", usage="Maximum size needed to consider a schema", widget="Integer", required=true, def="9999999") |
|
39 |
int maximum_schema_size |
|
40 |
@Field @Option(name="schema_property_display", usage="schema property to show", widget="String", required=true, def="REF") |
|
41 |
String schema_property_display |
|
42 |
@Field @Option(name="unit_ursql", usage="TYPE@PROP=REGEX", widget="String", required=false, def="MENTION") |
|
43 |
String unit_ursql |
|
44 |
@Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=true, def="true") |
|
45 |
boolean strict_inclusion |
|
46 |
@Field @Option(name="position_in_matches", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0") |
|
47 |
int position_in_matches |
|
48 |
@Field @Option(name="sep", usage="choose between the len or freq columns", widget="Separator", metaVar="freq len", required=true, def="") |
|
49 |
def sep |
|
50 |
@Field @Option(name="sort_column", usage="choose between the len or freq columns", widget="StringArray", metaVar="freq len", required=true, def="freq") |
|
51 |
String sort_column |
|
52 |
@Field @Option(name="max_lines", usage="cut the number of lines shown. '0' means no cut", widget="Integer", required=false, def="0") |
|
53 |
int max_lines |
|
54 |
//@Field @Option(name="output_graph", usage="Show chart", widget="Boolean", required=true, def="false") |
|
55 |
output_graph = false |
|
56 |
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF ON ALL REALLY ALL", required=true, def="OFF") |
|
57 |
debug |
|
58 |
|
|
59 |
if (!ParametersDialog.open(this)) return; |
|
60 |
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3 |
|
61 |
|
|
62 |
for (def corpus : selection) { |
|
63 |
def analecCorpus = URSCorpora.getCorpus(corpus) |
|
64 |
|
|
65 |
def errors = AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql) |
|
66 |
if (errors.size() > 0) { |
|
67 |
println "** The $schema_ursql schema URSQL cannot be computed in the corpus with types: $errors." |
|
68 |
return; |
|
69 |
} |
|
70 |
|
|
71 |
errors=AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql) |
|
72 |
if (errors.size() > 0) { |
|
73 |
println "** $unit_ursql unit URSQL cannot be computed in the corpus with types: $errors." |
|
74 |
return; |
|
75 |
} |
|
76 |
|
|
77 |
def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, maximum_schema_size, strict_inclusion); |
|
78 |
|
|
79 |
int nSchemas = 0; |
|
80 |
|
|
81 |
def scores = [:] |
|
82 |
def lensnames = [:] |
|
83 |
for (def schema : schemas) { |
|
84 |
|
|
85 |
def allUnites = schema.getUnitesSousjacentesNonTriees() |
|
86 |
|
|
87 |
def units = AnalecUtils.filterElements(debug, allUnites, unit_ursql) |
|
88 |
|
|
89 |
selectedUnits = AnalecUtils.filterUniteByInclusion(debug, units, corpus.getMatches(), strict_inclusion, position_in_matches) |
|
90 |
|
|
91 |
int nUnites = selectedUnits.size(); |
|
92 |
|
|
93 |
if (!scores.containsKey(schema)) { |
|
94 |
scores[schema] = 0; |
|
95 |
} |
|
96 |
|
|
97 |
scores[schema] = units.su |
|
98 |
|
|
99 |
String value = schema.getProp(schema_property_display); |
|
100 |
nSchemas++; |
|
101 |
} |
|
102 |
|
|
103 |
//println "nSchemas=$nSchemas" |
|
104 |
def freqs = lens.keySet(); |
|
105 |
freqs.sort(); |
|
106 |
int t = 0; |
|
107 |
int n = 0; |
|
108 |
//println "Fréquences ("+freqs.size()+")" |
|
109 |
for (def f : freqs) { |
|
110 |
t += f * lens[f] |
|
111 |
n += lens[f] |
|
112 |
} |
|
113 |
|
|
114 |
if (n == 0) { |
|
115 |
println "No units selected for schemas=$schema_ursql and units=$unit_ursql" |
|
116 |
return false; |
|
117 |
} |
|
118 |
|
|
119 |
coef = (t/n) |
|
120 |
|
|
121 |
def slens = null |
|
122 |
if ("freq".equals(sort_column)) { |
|
123 |
slens = lens.sort { a, b -> -a.value <=> -b.value ?: -a.key <=> -b.key } |
|
124 |
} else { |
|
125 |
slens = lens.sort { a, b -> -a.key <=> -b.key ?: -a.value <=> -b.value } |
|
126 |
} |
|
127 |
|
|
128 |
def flens = [] |
|
129 |
slens.each { key, value -> value.times { flens << key } } |
|
130 |
def nbins = flens.size()*2 |
|
131 |
|
|
132 |
println "Schema scores in '$corpus'. The average schema score is $t/$n = "+coef |
|
133 |
println "Index:\nlen\tfreq\tcfreq" |
|
134 |
int nShown = 0; |
|
135 |
for( def it : slens) { |
|
136 |
println it.key+" "+it.value+" "+(cfreq+=it.value)+" "+lensnames[it.key].join(", ") |
|
137 |
nShown++ |
|
138 |
if (max_lines > 0 && nShown >= max_lines) { |
|
139 |
println "... (${slens.size() - max_lines})" |
|
140 |
break; |
|
141 |
} |
|
142 |
} |
|
143 |
|
|
144 |
|
|
145 |
//return ["result":coef, "data":lens] |
|
146 |
} |
|
147 |
|
|
148 |
|
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/UnitsReferentialStabilityMacro.groovy (revision 2144) | ||
---|---|---|
41 | 41 |
|
42 | 42 |
@Field @Option(name="unit_ursql", usage="TYPE@PROP=REGEX", widget="String", required=false, def="MENTION") |
43 | 43 |
String unit_ursql |
44 |
|
|
44 |
@Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=true, def="true") |
|
45 |
boolean strict_inclusion |
|
45 | 46 |
@Field @Option(name="word_property", usage="", widget="String", required=false, def="word") |
46 | 47 |
String word_property |
47 | 48 |
|
... | ... | |
79 | 80 |
return; |
80 | 81 |
} |
81 | 82 |
} |
82 |
def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, maximum_schema_size); |
|
83 |
def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, maximum_schema_size, strict_inclusion);
|
|
83 | 84 |
allFormesSet = new HashSet(); |
84 | 85 |
def coefs = [] |
85 | 86 |
int n = 1 |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/UnitsIndexMacro.groovy (revision 2144) | ||
---|---|---|
92 | 92 |
def allresults = [:] |
93 | 93 |
for (def corpus : selection) { |
94 | 94 |
|
95 |
mainCorpus = corpus.getMainCorpus() |
|
95 |
def word = corpus.getWordProperty() |
|
96 |
def analecCorpus = URSCorpora.getCorpus(corpus) |
|
96 | 97 |
|
97 |
def word = mainCorpus.getWordProperty() |
|
98 |
def analecCorpus = URSCorpora.getCorpus(mainCorpus) |
|
99 |
|
|
100 | 98 |
def errors = AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql) |
101 | 99 |
if (errors.size() > 0) { |
102 | 100 |
println "** The $schema_ursql schema URSQL cannot be computed in the corpus with types: $errors." |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/SchemasProgressionMacro.groovy (revision 2144) | ||
---|---|---|
125 | 125 |
|
126 | 126 |
def allUnits = [:] |
127 | 127 |
def allHighlightedUnits = [:] |
128 |
def allSchemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, maximum_schema_size) |
|
128 |
def allSchemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, maximum_schema_size, strict_inclusion)
|
|
129 | 129 |
|
130 | 130 |
if (allSchemas.size() == 0) { |
131 | 131 |
println "No schema match for '$schema_ursql' selection. Aborting" |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/UnitsListMacro.groovy (revision 2144) | ||
---|---|---|
72 | 72 |
def allResults = [:] |
73 | 73 |
for (def corpus : selection) { |
74 | 74 |
|
75 |
mainCorpus = corpus.getMainCorpus() |
|
75 |
def word = corpus.getWordProperty() |
|
76 |
def analecCorpus = URSCorpora.getCorpus(corpus) |
|
76 | 77 |
|
77 |
def word = mainCorpus.getWordProperty() |
|
78 |
def analecCorpus = URSCorpora.getCorpus(mainCorpus) |
|
79 |
|
|
80 | 78 |
def selectedUnits = AnalecUtils.selectUnitsInSchema(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, maximum_schema_size, |
81 | 79 |
unit_ursql, position_in_schema, cql_limit, strict_inclusion, position_in_matches); |
82 | 80 |
|
... | ... | |
87 | 85 |
|
88 | 86 |
if (output_mode == "FORMATED") { |
89 | 87 |
for (def corpus : allResults.keySet()) { |
90 |
def mainCorpus = corpus.getMainCorpus() |
|
91 |
def word = mainCorpus.getWordProperty() |
|
88 |
def word = corpus.getWordProperty() |
|
92 | 89 |
def selectedUnits = allResults[corpus] |
93 | 90 |
println "$corpus units: "+selectedUnits.size() |
94 | 91 |
def n = 1 |
... | ... | |
105 | 102 |
} |
106 | 103 |
} else if (output_mode == "TABULATED") { |
107 | 104 |
for (def corpus : allResults.keySet()) { |
108 |
def mainCorpus = corpus.getMainCorpus() |
|
109 |
def word = mainCorpus.getWordProperty() |
|
105 |
def word = corpus.getWordProperty() |
|
110 | 106 |
def selectedUnits = allResults[corpus] |
111 | 107 |
println "$corpus units: "+selectedUnits.size() |
112 | 108 |
def n = 1 |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/SchemasListMacro.groovy (revision 2144) | ||
---|---|---|
41 | 41 |
int maximum_schema_size |
42 | 42 |
@Field @Option(name="unit_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="MENTION") |
43 | 43 |
String unit_ursql |
44 |
|
|
44 |
@Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=true, def="true") |
|
45 |
boolean strict_inclusion |
|
45 | 46 |
@Field @Option(name="word_property", usage="", widget="StringArray", metaVar="word lemma frlemma frolemma #forme# id", required=false, def="word") |
46 | 47 |
String word_property |
47 | 48 |
|
... | ... | |
81 | 82 |
word_prop = corpus.getProperty(word_property) |
82 | 83 |
} |
83 | 84 |
|
84 |
def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, maximum_schema_size); |
|
85 |
def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, maximum_schema_size, strict_inclusion);
|
|
85 | 86 |
schemas.sort() {it.getProps()} |
86 | 87 |
def nSchemas = 0 |
87 | 88 |
|
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/UnitsInterdistanceMacro.groovy (revision 2144) | ||
---|---|---|
41 | 41 |
|
42 | 42 |
@Field @Option(name="unit_ursql", usage="TYPE@PROP=REGEX", widget="String", required=false, def="MENTION") |
43 | 43 |
String unit_ursql |
44 |
|
|
44 |
@Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=true, def="true") |
|
45 |
boolean strict_inclusion |
|
45 | 46 |
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF ON ALL REALLY ALL", required=true, def="OFF") |
46 | 47 |
debug |
47 | 48 |
|
... | ... | |
63 | 64 |
return; |
64 | 65 |
} |
65 | 66 |
|
66 |
def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, maximum_schema_size); |
|
67 |
def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, maximum_schema_size, strict_inclusion);
|
|
67 | 68 |
def distances = []; |
68 | 69 |
def nDistances = 0 |
69 | 70 |
def cadences = []; |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/UnitsSummaryMacro.groovy (revision 2144) | ||
---|---|---|
64 | 64 |
def props = new HashSet() |
65 | 65 |
for (def corpus : selection) { |
66 | 66 |
|
67 |
mainCorpus = corpus.getMainCorpus() |
|
68 |
|
|
69 |
def word = mainCorpus.getWordProperty() |
|
70 |
def analecCorpus = URSCorpora.getCorpus(mainCorpus) |
|
67 |
def word = corpus.getWordProperty() |
|
68 |
def analecCorpus = URSCorpora.getCorpus(corpus) |
|
71 | 69 |
for (def type : analecCorpus.getStructure().getUnites()) |
72 | 70 |
props.addAll(analecCorpus.getStructure().getUniteProperties(type)); |
73 | 71 |
|
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/SchemasSummaryMacro.groovy (revision 2144) | ||
---|---|---|
43 | 43 |
int minimum_schema_size |
44 | 44 |
@Field @Option(name="maximum_schema_size", usage="Maximum size needed to consider a schema", widget="Integer", required=true, def="9999999") |
45 | 45 |
int maximum_schema_size |
46 |
@Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=true, def="true") |
|
47 |
boolean strict_inclusion |
|
46 | 48 |
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF ON ALL REALLY ALL", required=true, def="OFF") |
47 | 49 |
debug |
48 | 50 |
if (!ParametersDialog.open(this)) return |
... | ... | |
61 | 63 |
for (def type : analecCorpus.getStructure().getUnites()) |
62 | 64 |
props.addAll(analecCorpus.getStructure().getUniteProperties(type)); |
63 | 65 |
|
64 |
def selectedSchemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, maximum_schema_size); |
|
66 |
def selectedSchemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, maximum_schema_size, strict_inclusion);
|
|
65 | 67 |
if (selectedSchemas.size() > 0) |
66 | 68 |
allresults[corpus] = selectedSchemas.sort(){it.getUnite0()}; |
67 | 69 |
} |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/SchemaLengthsMacro.groovy (revision 2144) | ||
---|---|---|
43 | 43 |
String unit_ursql |
44 | 44 |
@Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=true, def="true") |
45 | 45 |
boolean strict_inclusion |
46 |
@Field @Option(name="position_in_matches", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0") |
|
47 |
int position_in_matches |
|
46 | 48 |
@Field @Option(name="sep", usage="choose between the len or freq columns", widget="Separator", metaVar="freq len", required=true, def="") |
47 | 49 |
def sep |
48 | 50 |
@Field @Option(name="sort_column", usage="choose between the len or freq columns", widget="StringArray", metaVar="freq len", required=true, def="freq") |
... | ... | |
72 | 74 |
return; |
73 | 75 |
} |
74 | 76 |
|
75 |
def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, maximum_schema_size); |
|
77 |
def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, maximum_schema_size, strict_inclusion);
|
|
76 | 78 |
|
77 | 79 |
int nSchemas = 0; |
78 | 80 |
|
... | ... | |
84 | 86 |
|
85 | 87 |
def units = AnalecUtils.filterElements(debug, allUnites, unit_ursql) |
86 | 88 |
|
87 |
selectedUnits = AnalecUtils.filterUniteByInclusion(debug, units, corpus.getMatches(), strict_inclusion, 0)
|
|
89 |
selectedUnits = AnalecUtils.filterUniteByInclusion(debug, units, corpus.getMatches(), strict_inclusion, position_in_matches)
|
|
88 | 90 |
|
89 | 91 |
int nUnites = selectedUnits.size(); |
90 | 92 |
|
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/edit/ResetAllAnnotationsMacro.groovy (revision 2144) | ||
---|---|---|
8 | 8 |
import org.txm.searchengine.cqp.corpus.* |
9 | 9 |
import visuAnalec.elements.* |
10 | 10 |
|
11 |
if (!(corpusViewSelection instanceof MainCorpus)) {
|
|
11 |
if (!(corpusViewSelection instanceof CQPCorpus)) {
|
|
12 | 12 |
println "Corpora selection is not a Corpus" |
13 | 13 |
return; |
14 | 14 |
} |
... | ... | |
19 | 19 |
// Open the parameters input dialog box |
20 | 20 |
if (!ParametersDialog.open(this)) return; |
21 | 21 |
|
22 |
MainCorpus corpus = corpusViewSelection
|
|
22 |
def corpus = corpusViewSelection
|
|
23 | 23 |
def analecCorpus = URSCorpora.getCorpus(corpus); |
24 | 24 |
def structure = analecCorpus.getStructure() |
25 | 25 |
|
... | ... | |
48 | 48 |
// fireMessage(new StructureEvent(analecCorpus, TypeModifStructure.SUPPR_TYPE, Schema.class, type)); |
49 | 49 |
} |
50 | 50 |
|
51 |
println "Done. Save the corpus to finish the reset." |
|
51 |
println "Done. Save the corpus to finish the annotations reset."
|
|
52 | 52 |
|
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/AnalecUtils.groovy (revision 2144) | ||
---|---|---|
53 | 53 |
|
54 | 54 |
static def selectSchemas(def debug, Corpus analecCorpus, String schema_ursql, Integer minimum_schema_size, Integer maximum_schema_size) { |
55 | 55 |
if (maximum_schema_size <= 0) maximum_schema_size = Integer.MAX_VALUE; |
56 |
if (minimum_schema_size < 0) minimum_schema_size = 0; |
|
56 | 57 |
def allSchemas = [] |
57 |
if (schema_ursql != null && schema_ursql.length() > 0 || minimum_schema_size > 1 || maximum_schema_size >= 1) { |
|
58 |
|
|
59 |
if (schema_ursql != null && schema_ursql.length() > 0) allSchemas = AnalecUtils.findAllInCorpus(debug, analecCorpus, Schema.class, schema_ursql) |
|
60 |
else allSchemas = analecCorpus.getTousSchemas() |
|
58 | 61 |
|
59 |
if (schema_ursql != null && schema_ursql.length() > 0) allSchemas = AnalecUtils.findAllInCorpus(debug, analecCorpus, Schema.class, schema_ursql) |
|
60 |
else allSchemas = analecCorpus.getTousSchemas() |
|
61 |
|
|
62 |
if (debug >= 2) println "allSchemas=${allSchemas.size()}" |
|
63 |
if (minimum_schema_size > 1 || maximum_schema_size >= 1) allSchemas = AnalecUtils.filterBySize(allSchemas, minimum_schema_size, maximum_schema_size); |
|
64 |
} else { |
|
65 |
allSchemas = analecCorpus.getTousSchemas() |
|
66 |
} |
|
62 |
if (debug >= 2) println "allSchemas=${allSchemas.size()}" |
|
63 |
allSchemas = AnalecUtils.filterBySize(allSchemas, minimum_schema_size, maximum_schema_size); |
|
64 |
|
|
67 | 65 |
return allSchemas |
68 | 66 |
} |
69 | 67 |
|
70 | 68 |
static def selectSchemasInCorpus(def debug, Corpus analecCorpus, org.txm.searchengine.cqp.corpus.CQPCorpus corpus, |
71 |
String schema_ursql, Integer minimum_schema_size, Integer maximum_schema_size) {
|
|
69 |
String schema_ursql, Integer minimum_schema_size, Integer maximum_schema_size, boolean strictInclusion) {
|
|
72 | 70 |
|
73 |
def allSchemas = AnalecUtils.selectSchemas(debug, analecCorpus, schema_ursql, minimum_schema_size, maximum_schema_size); |
|
71 |
if (maximum_schema_size <= 0) maximum_schema_size = Integer.MAX_VALUE; |
|
72 |
if (minimum_schema_size < 0) minimum_schema_size = 0; |
|
73 |
|
|
74 |
def allSchemas = [] |
|
75 |
if (schema_ursql != null && schema_ursql.length() > 0) allSchemas = AnalecUtils.findAllInCorpus(debug, analecCorpus, Schema.class, schema_ursql) |
|
76 |
else allSchemas = analecCorpus.getTousSchemas() |
|
74 | 77 |
|
75 | 78 |
def selectedSchemas = [] |
76 | 79 |
for (Schema schema : allSchemas) { |
77 |
def selectedUnits = AnalecUtils.filterUniteByInclusion(debug, schema.getUnitesSousjacentes(), corpus.getMatches(), true, 0) |
|
78 |
if (selectedUnits.size() > 0 ) { |
|
80 |
def selectedUnits = AnalecUtils.filterUniteByInclusion(debug, schema.getUnitesSousjacentes(), corpus.getMatches(), strictInclusion, 0) |
|
81 |
|
|
82 |
if (minimum_schema_size <= selectedUnits.size() && selectedUnits.size() <= maximum_schema_size ) { |
|
79 | 83 |
selectedSchemas << schema |
80 | 84 |
} |
81 | 85 |
} |
... | ... | |
109 | 113 |
else allSchema = analecCorpus.getTousSchemas() |
110 | 114 |
if (debug >= 2) println "allSchema=${allSchema.size()}" |
111 | 115 |
|
112 |
if (minimum_schema_size > 1) allSchema = AnalecUtils.filterBySize(allSchema, minimum_schema_size, maximum_schema_size);
|
|
116 |
allSchema = AnalecUtils.filterBySize(allSchema, minimum_schema_size, maximum_schema_size); |
|
113 | 117 |
if (debug >= 2) println "allSchema=${allSchema.size()}" |
114 | 118 |
|
115 | 119 |
groupedUnits = AnalecUtils.groupAllUnitesInElements(debug, allSchema, unit_ursql) |
... | ... | |
121 | 125 |
} |
122 | 126 |
if (debug >= 2) println "groupedUnits=${groupedUnits.size()}" |
123 | 127 |
|
128 |
// limit units to corpus or cql_limit matches |
|
124 | 129 |
def matches = null |
125 | 130 |
if (cql_limit != null && !cql_limit.getQueryString().equals("\"\"")) { |
126 | 131 |
Subcorpus limitssubcorpus = corpus.createSubcorpus(cql_limit, corpus.getID().toUpperCase()) |
... | ... | |
151 | 156 |
*/ |
152 | 157 |
static def filterUniteByInclusionInSchema(def debug, def groups, Integer distance) { |
153 | 158 |
if (distance == 0) return groups; |
159 |
|
|
154 | 160 |
distance = distance-1; |
155 | 161 |
def newGroups = [:] |
156 | 162 |
for (def k : groups.keySet()) { |
... | ... | |
174 | 180 |
return [starts, ends, null] |
175 | 181 |
} |
176 | 182 |
|
183 |
static int[] toIntArray(Unite u) { |
|
184 |
if (u.getDeb() > u.getFin()) // error |
|
185 |
return (u.getFin()..u.getDeb()).toArray(new int[u.getDeb()-u.getFin()]) |
|
186 |
else |
|
187 |
return (u.getDeb()..u.getFin()).toArray(new int[u.getFin()-u.getDeb()]) |
|
188 |
} |
|
189 |
|
|
177 | 190 |
static String toString(Element e) { |
178 | 191 |
Schema r = null; |
179 | 192 |
|
... | ... | |
185 | 198 |
return sprintf("%s=%d", e.getContenu().size(), e.getProps().sort()) |
186 | 199 |
} |
187 | 200 |
|
188 |
static int[] toIntArray(Unite u) { |
|
189 |
if (u.getDeb() > u.getFin()) // error |
|
190 |
return (u.getFin()..u.getDeb()).toArray(new int[u.getDeb()-u.getFin()]) |
|
191 |
else |
|
192 |
return (u.getDeb()..u.getFin()).toArray(new int[u.getFin()-u.getDeb()]) |
|
193 |
} |
|
194 |
|
|
195 | 201 |
static String toString(def CQI, def wordProperty, Element e) { |
196 | 202 |
Schema r = null; |
197 | 203 |
|
... | ... | |
230 | 236 |
allElements.addAll(analecCorpus.getTousSchemas()) |
231 | 237 |
} |
232 | 238 |
|
233 |
|
|
234 | 239 |
return filterElements(debug, allElements, typeRegex, propName, valueRegex); |
235 | 240 |
} |
236 | 241 |
|
237 |
static def filterBySize(def elements, Integer minSize, Integer maximum_schema_size) {
|
|
242 |
static def filterBySize(def elements, Integer minimum_schema_size, Integer maximum_schema_size) {
|
|
238 | 243 |
if (maximum_schema_size == null || maximum_schema_size <= 0) maximum_schema_size = Integer.MAX_VALUE; |
239 |
if (minSize == null || minSize < 0) minSize = 0;
|
|
244 |
if (minimum_schema_size == null || minimum_schema_size < 0) minimum_schema_size = 0;
|
|
240 | 245 |
|
241 | 246 |
def filteredElements = [] |
242 | 247 |
for (Element e : elements) { |
243 | 248 |
Unite[] units = e.getUnitesSousjacentes(); |
244 | 249 |
int size = units.length; |
245 |
if (size < minSize) continue;
|
|
250 |
if (size < minimum_schema_size) continue;
|
|
246 | 251 |
if (size > maximum_schema_size) continue; |
247 | 252 |
filteredElements << e; |
248 | 253 |
} |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/check/CheckDuplicatesInSchemasMacro.groovy (revision 2144) | ||
---|---|---|
64 | 64 |
|
65 | 65 |
def allUnits = [:] |
66 | 66 |
|
67 |
def allSchemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus , schema_ursql, -1, Integer.MAX_VALUE) |
|
67 |
def allSchemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus , schema_ursql, -1, Integer.MAX_VALUE, false)
|
|
68 | 68 |
|
69 | 69 |
if (allSchemas.size() == 0) { |
70 | 70 |
println "No schema match for '$schema_ursql' selection. Aborting" |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/check/CheckAnnotationStructureValuesMacro.groovy (revision 2144) | ||
---|---|---|
8 | 8 |
import org.txm.searchengine.cqp.corpus.* |
9 | 9 |
import visuAnalec.elements.* |
10 | 10 |
|
11 |
if (!(corpusViewSelection instanceof MainCorpus)) {
|
|
11 |
if (!(corpusViewSelection instanceof CQPCorpus)) {
|
|
12 | 12 |
println "Corpora selection is not a Corpus" |
13 | 13 |
return; |
14 | 14 |
} |
... | ... | |
27 | 27 |
|
28 | 28 |
// END OF PARAMETERS |
29 | 29 |
|
30 |
MainCorpus corpus = corpusViewSelection
|
|
30 |
def corpus = corpusViewSelection
|
|
31 | 31 |
def analecCorpus = URSCorpora.getCorpus(corpus); |
32 | 32 |
def structure = analecCorpus.getStructure() |
33 | 33 |
|
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/democrat/AjoutDefinitudeMacro.groovy (revision 2144) | ||
---|---|---|
22 | 22 |
import visuAnalec.elements.Unite; |
23 | 23 |
import visuAnalec.vue.Vue |
24 | 24 |
|
25 |
if (!(corpusViewSelection instanceof MainCorpus)) {
|
|
25 |
if (!(corpusViewSelection instanceof CQPCorpus)) {
|
|
26 | 26 |
println "Corpora selection is not a Corpus" |
27 | 27 |
return; |
28 | 28 |
} |
... | ... | |
35 | 35 |
|
36 | 36 |
if (!ParametersDialog.open(this)) return; |
37 | 37 |
|
38 |
MainCorpus corpus = corpusViewSelection
|
|
38 |
def corpus = corpusViewSelection
|
|
39 | 39 |
AbstractCqiClient CQI = CQPSearchEngine.getCqiClient(); |
40 | 40 |
def word = corpus.getWordProperty() |
41 | 41 |
def analecCorpus = URSCorpora.getCorpus(corpus); |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/democrat/AccessibiliteMacro.groovy (revision 2144) | ||
---|---|---|
32 | 32 |
|
33 | 33 |
// CORPS DU SCRIPT |
34 | 34 |
|
35 |
if (!(corpusViewSelection instanceof MainCorpus)) {
|
|
35 |
if (!(corpusViewSelection instanceof CQPCorpus)) {
|
|
36 | 36 |
println "Corpora selection is not a Corpus: "+corpusViewSelection |
37 | 37 |
return |
38 | 38 |
} |
tmp/org.txm.analec.rcp/src/org/txm/annotation/urs/package.html (revision 2144) | ||
---|---|---|
1 |
<html> |
|
2 |
<head></head> |
|
3 |
<body> |
|
4 |
|
|
5 |
MAIN DOCUMENTATION<br> |
|
6 |
|
|
7 |
Get the Analec corpus using the @see(URSCorpora) class |
|
8 |
|
|
9 |
</body> |
|
10 |
</html> |
|
0 | 11 |
Formats disponibles : Unified diff