Révision 2082
tmp/org.txm.analec.rcp/plugin.xml (revision 2082) | ||
---|---|---|
22 | 22 |
<menu |
23 | 23 |
id="menu.urs.tools" |
24 | 24 |
label="Tools"> |
25 |
<visibleWhen |
|
26 |
checkEnabled="false"> |
|
27 |
<reference |
|
28 |
definitionId="OneCorpusSelected"> |
|
29 |
</reference> |
|
30 |
</visibleWhen> |
|
31 | 25 |
<menu |
32 | 26 |
label="Annotation"> |
33 |
<command |
|
34 |
commandId="org.txm.rcp.commands.ExecuteMacro" |
|
35 |
label="Annotate concordance" |
|
36 |
style="push"> |
|
37 |
<parameter |
|
38 |
name="org.txm.rcp.command.parameter.file" |
|
39 |
value="org/txm/macro/urs/edit/ConcordanceToUnitMacro.groovy"> |
|
40 |
</parameter> |
|
41 |
</command> |
|
42 |
<command |
|
43 |
commandId="org.txm.rcp.commands.ExecuteMacro" |
|
44 |
label="Reset annotation" |
|
45 |
style="push"> |
|
46 |
<parameter |
|
47 |
name="org.txm.rcp.command.parameter.file" |
|
48 |
value="org/txm/macro/urs/edit/ResetAnnotationsMacro.groovy"> |
|
49 |
</parameter> |
|
50 |
</command> |
|
51 |
<command |
|
52 |
commandId="org.txm.rcp.commands.ExecuteMacro" |
|
53 |
label="SI to Singleton" |
|
54 |
style="push"> |
|
55 |
<parameter |
|
56 |
name="org.txm.rcp.command.parameter.file" |
|
57 |
value="org/txm/macro/urs/edit/Si2SingletonMacro.groovy"> |
|
58 |
</parameter> |
|
59 |
</command> |
|
60 |
<command |
|
61 |
commandId="org.txm.rcp.commands.ExecuteMacro" |
|
62 |
label="Annotate selection of units" |
|
63 |
style="push"> |
|
64 |
<parameter |
|
65 |
name="org.txm.rcp.command.parameter.file" |
|
66 |
value="org/txm/macro/urs/edit/UnitsAnnotateMacro.groovy"> |
|
67 |
</parameter> |
|
68 |
</command> |
|
27 |
<dynamic |
|
28 |
class="org.txm.annotation.urs.commands.URSToolsMenuContribution" |
|
29 |
id="edit"> |
|
30 |
</dynamic> |
|
69 | 31 |
</menu> |
70 | 32 |
<menu |
71 | 33 |
label="Verification"> |
72 |
<command |
|
73 |
commandId="org.txm.rcp.commands.ExecuteMacro" |
|
74 |
label="Check duplicated units in schema" |
|
75 |
style="push"> |
|
76 |
<parameter |
|
77 |
name="org.txm.rcp.command.parameter.file" |
|
78 |
value="org/txm/macro/urs/edit/CheckDuplicatesInSchemasMacro.groovy"> |
|
79 |
</parameter> |
|
80 |
</command> |
|
81 |
<command |
|
82 |
commandId="org.txm.rcp.commands.ExecuteMacro" |
|
83 |
label="Check annotation structure values" |
|
84 |
style="push"> |
|
85 |
<parameter |
|
86 |
name="org.txm.rcp.command.parameter.file" |
|
87 |
value="org/txm/macro/urs/edit/CheckAnnotationStructureValuesMacro.groovy"> |
|
88 |
</parameter> |
|
89 |
</command> |
|
34 |
<dynamic |
|
35 |
class="org.txm.annotation.urs.commands.URSToolsMenuContribution" |
|
36 |
id="check"> |
|
37 |
</dynamic> |
|
90 | 38 |
</menu> |
91 | 39 |
<menu |
92 | 40 |
label="Exploitation"> |
93 |
<command |
|
94 |
commandId="org.txm.rcp.commands.ExecuteMacro" |
|
95 |
label="Units summary" |
|
96 |
style="push"> |
|
97 |
<parameter |
|
98 |
name="org.txm.rcp.command.parameter.file" |
|
99 |
value="org/txm/macro/urs/exploit/UnitsSummaryMacro.groovy"> |
|
100 |
</parameter> |
|
101 |
</command> |
|
102 |
<command |
|
103 |
commandId="org.txm.rcp.commands.ExecuteMacro" |
|
104 |
label="Units list" |
|
105 |
style="push"> |
|
106 |
<parameter |
|
107 |
name="org.txm.rcp.command.parameter.file" |
|
108 |
value="org/txm/macro/urs/exploit/UnitsListMacro.groovy"> |
|
109 |
</parameter> |
|
110 |
</command> |
|
111 |
<command |
|
112 |
commandId="org.txm.rcp.commands.ExecuteMacro" |
|
113 |
label="Units index" |
|
114 |
style="push"> |
|
115 |
<parameter |
|
116 |
name="org.txm.rcp.command.parameter.file" |
|
117 |
value="org/txm/macro/urs/exploit/UnitsIndexMacro.groovy"> |
|
118 |
</parameter> |
|
119 |
</command> |
|
120 |
<command |
|
121 |
commandId="org.txm.rcp.commands.ExecuteMacro" |
|
122 |
label="Schemas progression" |
|
123 |
style="push"> |
|
124 |
<parameter |
|
125 |
name="org.txm.rcp.command.parameter.file" |
|
126 |
value="org/txm/macro/urs/exploit/SchemasProgressionMacro.groovy"> |
|
127 |
</parameter> |
|
128 |
</command> |
|
129 |
<command |
|
130 |
commandId="org.txm.rcp.commands.ExecuteMacro" |
|
131 |
label="Schemas summary" |
|
132 |
style="push"> |
|
133 |
<parameter |
|
134 |
name="org.txm.rcp.command.parameter.file" |
|
135 |
value="org/txm/macro/urs/exploit/SchemasSummaryMacro.groovy"> |
|
136 |
</parameter> |
|
137 |
</command> |
|
138 |
</menu> |
|
139 |
<menu |
|
140 |
label="Export"> |
|
141 |
<command |
|
142 |
commandId="org.txm.rcp.commands.ExecuteMacro" |
|
143 |
label="Export to Glozz" |
|
144 |
style="push"> |
|
145 |
<parameter |
|
146 |
name="org.txm.rcp.command.parameter.file" |
|
147 |
value="org/txm/macro/urs/export/ExportToGlozzMacro.groovy"> |
|
148 |
</parameter> |
|
149 |
</command> |
|
150 |
<command |
|
151 |
commandId="org.txm.rcp.commands.ExecuteMacro" |
|
152 |
label="Export as..." |
|
153 |
style="push"> |
|
154 |
<parameter |
|
155 |
name="org.txm.rcp.command.parameter.file" |
|
156 |
value="org/txm/macro/urs/export/ExportAsMacro.groovy"> |
|
157 |
</parameter> |
|
158 |
</command> |
|
159 |
</menu> |
|
160 |
<menu |
|
161 |
label="Exploitation2"> |
|
162 | 41 |
<dynamic |
163 | 42 |
class="org.txm.annotation.urs.commands.URSToolsMenuContribution" |
164 | 43 |
id="exploit"> |
165 | 44 |
</dynamic> |
166 | 45 |
</menu> |
167 | 46 |
<menu |
168 |
label="Annotation2">
|
|
47 |
label="Export">
|
|
169 | 48 |
<dynamic |
170 | 49 |
class="org.txm.annotation.urs.commands.URSToolsMenuContribution" |
171 |
id="edit">
|
|
50 |
id="export">
|
|
172 | 51 |
</dynamic> |
173 | 52 |
</menu> |
174 | 53 |
</menu> |
tmp/org.txm.analec.rcp/META-INF/MANIFEST.MF (revision 2082) | ||
---|---|---|
171 | 171 |
org.txm.macro.urs.democrat, |
172 | 172 |
org.txm.macro.urs.edit, |
173 | 173 |
org.txm.macro.urs.exploit, |
174 |
org.txm.macro.urs.exploit.mesures1, |
|
175 | 174 |
org.txm.macro.urs.export, |
176 | 175 |
org.txm.macro.urs.misc, |
177 | 176 |
visuAnalec, |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/check/CheckDuplicatesInSchemasMacro.groovy (revision 2082) | ||
---|---|---|
1 |
package org.txm.macro.urs.check |
|
2 |
|
|
3 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
4 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
5 |
// @author mdecorde |
|
6 |
// @author sheiden |
|
7 |
// STANDARD DECLARATIONS |
|
8 |
|
|
9 |
import groovy.transform.Field |
|
10 |
|
|
11 |
import org.jfree.chart.JFreeChart |
|
12 |
import org.kohsuke.args4j.* |
|
13 |
import org.txm.Toolbox |
|
14 |
import org.txm.annotation.urs.* |
|
15 |
import org.txm.macro.urs.AnalecUtils |
|
16 |
import org.txm.rcp.Application |
|
17 |
import org.txm.rcp.IImageKeys |
|
18 |
import org.txm.rcp.swt.widget.parameters.* |
|
19 |
import org.txm.searchengine.cqp.CQPSearchEngine |
|
20 |
import org.txm.searchengine.cqp.corpus.* |
|
21 |
import org.txm.searchengine.cqp.corpus.query.CQLQuery |
|
22 |
|
|
23 |
import visuAnalec.elements.* |
|
24 |
|
|
25 |
def scriptName = this.class.getSimpleName() |
|
26 |
|
|
27 |
if (!(corpusViewSelection instanceof CQPCorpus)) { |
|
28 |
println "** $scriptName please select a Corpus to run the macro" |
|
29 |
return; |
|
30 |
} |
|
31 |
|
|
32 |
// BEGINNING OF PARAMETERS |
|
33 |
@Field @Option(name="schema_ursql", usage="TYPE@PROP=VALUE", widget="String", required=true, def="CHAINE") |
|
34 |
String schema_ursql |
|
35 |
@Field @Option(name="schema_property_display", usage="PROP", widget="String", required=true, def="REF") |
|
36 |
String schema_property_display |
|
37 |
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF ON ALL REALLY ALL", required=true, def="OFF") |
|
38 |
debug |
|
39 |
if (!ParametersDialog.open(this)) return |
|
40 |
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3 |
|
41 |
|
|
42 |
def CQI = CQPSearchEngine.getCqiClient() |
|
43 |
|
|
44 |
def corpus = corpusViewSelection |
|
45 |
|
|
46 |
def word = corpus.getWordProperty() |
|
47 |
def analecCorpus = URSCorpora.getCorpus(corpus) |
|
48 |
|
|
49 |
if (schema_property_display.length() > 0) { |
|
50 |
def errors = AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql, schema_property_display).size() |
|
51 |
if (errors > 0) { |
|
52 |
println "Error: some Schema types don't contain the $schema_property_display property: $errors" |
|
53 |
return |
|
54 |
} |
|
55 |
} |
|
56 |
|
|
57 |
def allUnits = [:] |
|
58 |
|
|
59 |
def allSchemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus , schema_ursql, -1, Integer.MAX_VALUE) |
|
60 |
|
|
61 |
if (allSchemas.size() == 0) { |
|
62 |
println "No schema match for '$schema_ursql' selection. Aborting" |
|
63 |
return |
|
64 |
} |
|
65 |
|
|
66 |
allUnits = AnalecUtils.groupAllUnitesInElements(debug, allSchemas) |
|
67 |
|
|
68 |
if (allUnits.size() == 0) { |
|
69 |
println "No unit selection. Aborting" |
|
70 |
return |
|
71 |
} |
|
72 |
|
|
73 |
if (debug) println "allUnits=${allUnits.size()}" |
|
74 |
|
|
75 |
def duplicates = [:] |
|
76 |
for (Schema schema : allSchemas) { |
|
77 |
def units = allUnits[schema]; |
|
78 |
for (def unit : units) { |
|
79 |
if (!duplicates.containsKey(unit)) duplicates[unit] = [] |
|
80 |
duplicates[unit] << schema |
|
81 |
} |
|
82 |
} |
|
83 |
|
|
84 |
def units = [] |
|
85 |
units.addAll(duplicates.keySet()) // remove non duplicates from hash |
|
86 |
for (def unit : units) { |
|
87 |
if (duplicates[unit].size() < 2) duplicates.remove(unit) |
|
88 |
} |
|
89 |
|
|
90 |
if (duplicates.size() > 0) { |
|
91 |
println "Duplicates found" |
|
92 |
for (def unit : duplicates.keySet()) { |
|
93 |
println AnalecUtils.toString(CQI, word, unit)+" in: " |
|
94 |
for (Schema schema : duplicates[unit]) { |
|
95 |
println " '"+schema.getProp(schema_property_display)+"'\t"+schema.getProps() |
|
96 |
} |
|
97 |
} |
|
98 |
} else { |
|
99 |
println "No duplicates found in $schema_ursql units" |
|
100 |
} |
|
101 |
|
|
102 |
return duplicates |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/check/CheckAnnotationStructureValuesMacro.groovy (revision 2082) | ||
---|---|---|
1 |
// STANDARD DECLARATIONS |
|
2 |
package org.txm.macro.urs.check |
|
3 |
|
|
4 |
import org.kohsuke.args4j.* |
|
5 |
import groovy.transform.Field |
|
6 |
import org.txm.rcp.swt.widget.parameters.* |
|
7 |
import org.txm.annotation.urs.* |
|
8 |
import org.txm.searchengine.cqp.corpus.* |
|
9 |
import visuAnalec.elements.* |
|
10 |
|
|
11 |
if (!(corpusViewSelection instanceof MainCorpus)) { |
|
12 |
println "Corpora selection is not a Corpus" |
|
13 |
return; |
|
14 |
} |
|
15 |
|
|
16 |
@Field @Option(name="unit_type",usage="", widget="String", required=false, def="MENTION") |
|
17 |
String unit_type |
|
18 |
|
|
19 |
@Field @Option(name="unit_property_name", usage="", widget="String", required=false, def="REF") |
|
20 |
String unit_property_name |
|
21 |
|
|
22 |
@Field @Option(name="pruneUnusedValues", usage="", widget="Boolean", required=false, def="false") |
|
23 |
boolean pruneUnusedValues |
|
24 |
|
|
25 |
// Open the parameters input dialog box |
|
26 |
if (!ParametersDialog.open(this)) return; |
|
27 |
|
|
28 |
// END OF PARAMETERS |
|
29 |
|
|
30 |
MainCorpus corpus = corpusViewSelection |
|
31 |
def analecCorpus = URSCorpora.getCorpus(corpus); |
|
32 |
def structure = analecCorpus.getStructure() |
|
33 |
|
|
34 |
def unit_types = structure.getTypes(Unite.class); |
|
35 |
if (!unit_types.contains(unit_type)) { |
|
36 |
println "Missing unit type: $unit_type" |
|
37 |
return |
|
38 |
} |
|
39 |
|
|
40 |
def props = analecCorpus.getStructure().getUniteProperties(unit_type); |
|
41 |
if (!props.contains(unit_property_name)) { |
|
42 |
println "No properties '$unit_property_name' in '$unit_type' unit." |
|
43 |
return |
|
44 |
} |
|
45 |
|
|
46 |
def prop = unit_property_name |
|
47 |
|
|
48 |
def tmpvalues = new HashSet() |
|
49 |
tmpvalues.addAll(structure.getValeursProp(Unite.class, unit_type, prop)); |
|
50 |
println "Values stored in the structure: $tmpvalues" |
|
51 |
|
|
52 |
def used_values = new HashSet(); |
|
53 |
def unites = analecCorpus.getUnites(unit_type); |
|
54 |
for (Unite unite : unites) { |
|
55 |
used_values.add(unite.getProp(prop)) |
|
56 |
} |
|
57 |
|
|
58 |
tmpvalues.removeAll(used_values); |
|
59 |
if (tmpvalues.size() > 0) { |
|
60 |
println "The following values ("+tmpvalues.size()+") are not used: "+tmpvalues.join(", ") |
|
61 |
if (pruneUnusedValues) { |
|
62 |
println "Pruning the values..." |
|
63 |
for (String val : tmpvalues) { |
|
64 |
structure.supprimerVal(Unite.class, unit_type, unit_property_name, val); |
|
65 |
} |
|
66 |
} |
|
67 |
} else { |
|
68 |
println "All the values are used." |
|
69 |
} |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/AllMesuresMacro.groovy (revision 2082) | ||
---|---|---|
1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
3 |
// @author mdecorde |
|
4 |
// @author sheiden |
|
5 |
// STANDARD DECLARATIONS |
|
6 |
package org.txm.macro.urs.exploit |
|
7 |
|
|
8 |
import org.kohsuke.args4j.* |
|
9 |
|
|
10 |
import groovy.transform.Field |
|
11 |
|
|
12 |
import org.txm.* |
|
13 |
import org.txm.rcp.swt.widget.parameters.* |
|
14 |
import org.txm.annotation.urs.* |
|
15 |
import org.txm.searchengine.cqp.corpus.* |
|
16 |
import org.apache.commons.lang.StringUtils; |
|
17 |
|
|
18 |
// BEGINNING OF PARAMETERS |
|
19 |
|
|
20 |
@Field @Option(name="tsvFile",usage="", widget="FileSave", required=true, def="result.tsv") |
|
21 |
File tsvFile |
|
22 |
|
|
23 |
@Field @Option(name="default_schema_ursql", usage="TYPE@PROP=REGEX", widget="String", required=true, def="CHAINE") |
|
24 |
String default_schema_ursql |
|
25 |
|
|
26 |
@Field @Option(name="default_minimum_schema_size", usage="", widget="Integer", required=true, def="3") |
|
27 |
int default_minimum_schema_size |
|
28 |
|
|
29 |
|
|
30 |
@Field @Option(name="schema_property_display_name",usage="", widget="String", required=false, def="REF") |
|
31 |
String schema_property_display_name |
|
32 |
|
|
33 |
@Field @Option(name="default_unit_ursql", usage="TYPE@PROP=REGEX", widget="String", required=false, def="MENTION") |
|
34 |
String default_unit_ursql |
|
35 |
|
|
36 |
@Field @Option(name="default_word_property", usage="", widget="String", required=false, def="word") |
|
37 |
String default_word_property |
|
38 |
@Field @Option(name="default_pos_property", usage="", widget="String", required=false, def="CATEGORIE") |
|
39 |
String default_pos_property |
|
40 |
|
|
41 |
if (!ParametersDialog.open(this)) return; |
|
42 |
// END OF PARAMETERS |
|
43 |
|
|
44 |
println "Corpora selections: "+corpusViewSelections |
|
45 |
|
|
46 |
table = [] // contains all table lines |
|
47 |
mesures = [] |
|
48 |
|
|
49 |
for (def corpus : corpusViewSelections) { // for each corpus selected in the corpora view |
|
50 |
if (!(corpus instanceof MainCorpus)) continue; // check if the corpus is a maincorpus |
|
51 |
def line = [] ; table << line // create and add a new table line |
|
52 |
line << corpus.getID() // add the corpus name in the first column |
|
53 |
|
|
54 |
println "*** Computing mesures for $corpus" // each macro return a "result" and a "data" |
|
55 |
|
|
56 |
params = [ |
|
57 |
"unit_ursql":default_unit_ursql, |
|
58 |
] |
|
59 |
returnedValue = execMesure(ReferentialDensityMacro, line, corpus, params) |
|
60 |
line << returnedValue["result"] |
|
61 |
|
|
62 |
params = [ |
|
63 |
"schema_ursql":default_schema_ursql, |
|
64 |
"minimum_schema_size":default_minimum_schema_size, |
|
65 |
"unit_ursql":default_unit_ursql, |
|
66 |
] |
|
67 |
returnedValue = execMesure(MeanDistanceMacro, line, corpus, params) |
|
68 |
line << returnedValue["result"] |
|
69 |
|
|
70 |
params = [ |
|
71 |
"schema_ursql":default_schema_ursql, |
|
72 |
"minimum_schema_size":default_minimum_schema_size, |
|
73 |
"unit_ursql":default_unit_ursql, |
|
74 |
] |
|
75 |
returnedValue = execMesure(NumberOfSchemaMacro, line, corpus, params) |
|
76 |
line << returnedValue["result"] |
|
77 |
|
|
78 |
params = [ |
|
79 |
"schema_ursql":default_schema_ursql, |
|
80 |
"minimum_schema_size":default_minimum_schema_size, |
|
81 |
"schema_property_display_name":schema_property_display_name, |
|
82 |
"unit_ursql":default_unit_ursql+"@CATEGORIE=GN Défini|GN Démonstratif|Nom Propre", |
|
83 |
"word_property":default_word_property, |
|
84 |
] |
|
85 |
returnedValue = execMesure(StabilityScoreMacro, line, corpus, params) |
|
86 |
line << returnedValue["result"] |
|
87 |
|
|
88 |
params = [ |
|
89 |
"schema_ursql":default_schema_ursql, |
|
90 |
"minimum_schema_size":default_minimum_schema_size, |
|
91 |
"unit_ursql":default_unit_ursql, |
|
92 |
] |
|
93 |
returnedValue = execMesure(InterDistanceMacro, line, corpus, params) |
|
94 |
line << returnedValue["result"] |
|
95 |
|
|
96 |
params = [ |
|
97 |
"schema_ursql":default_schema_ursql, |
|
98 |
"minimum_schema_size":default_minimum_schema_size, |
|
99 |
"unit_ursql":default_unit_ursql, |
|
100 |
"word_property":default_pos_property, |
|
101 |
] |
|
102 |
returnedValue = execMesure(NatureOfTheFirstUnitMacro, line, corpus, params) |
|
103 |
line << returnedValue["data"] |
|
104 |
|
|
105 |
params = [ |
|
106 |
"schema_ursql":default_schema_ursql, |
|
107 |
"minimum_schema_size":default_minimum_schema_size, |
|
108 |
"schema_property_display_name":schema_property_display_name, |
|
109 |
"unit_ursql":default_unit_ursql, |
|
110 |
"word_property":default_pos_property, |
|
111 |
] |
|
112 |
returnedValue = execMesure(GrammaticalCategoryMacro, line, corpus, params) |
|
113 |
line << returnedValue["data"] |
|
114 |
} |
|
115 |
|
|
116 |
// WRITE RESULTS IN THE TSV FILE |
|
117 |
tsvFile.withWriter("UTF-8") { writer -> |
|
118 |
writer.println "\t"+mesures.join("\t") |
|
119 |
table.each { line -> writer.println line.join("\t") } |
|
120 |
} |
|
121 |
|
|
122 |
println "Done. Results are saved in ${tsvFile.getAbsolutePath()} file." |
|
123 |
|
|
124 |
// UTILITY FUNCTIONS |
|
125 |
def execMesure(def mesure, def line, def corpus, def params) { |
|
126 |
def m = mesure.getSimpleName().substring(0, mesure.getSimpleName().indexOf("Macro")) |
|
127 |
mesures << m |
|
128 |
println "***** ${mesures.size()}- $m with parameters: $params" |
|
129 |
def r = gse.run(mesure, ["args":params, "corpusViewSelection":corpus, "monitor":monitor]) |
|
130 |
if (r == null) throw new Exception("Null result"); |
|
131 |
return r; |
|
132 |
} |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/GrammaticalCategoryMacro.groovy (revision 2082) | ||
---|---|---|
1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
3 |
// @author mdecorde |
|
4 |
// @author sheiden |
|
5 |
// STANDARD DECLARATIONS |
|
6 |
package org.txm.macro.urs.exploit |
|
7 |
|
|
8 |
import org.kohsuke.args4j.* |
|
9 |
import groovy.transform.Field |
|
10 |
import org.txm.* |
|
11 |
import org.txm.macro.urs.AnalecUtils |
|
12 |
import visuAnalec.elements.* |
|
13 |
import org.txm.rcp.swt.widget.parameters.* |
|
14 |
import org.txm.annotation.urs.* |
|
15 |
import org.txm.searchengine.cqp.* |
|
16 |
import org.txm.searchengine.cqp.corpus.* |
|
17 |
import org.apache.commons.lang.StringUtils; |
|
18 |
|
|
19 |
// BEGINNING OF PARAMETERS |
|
20 |
|
|
21 |
@Field @Option(name="schema_ursql", usage="TYPE@PROP=VALUE", widget="String", required=true, def="CHAINE") |
|
22 |
String schema_ursql |
|
23 |
|
|
24 |
@Field @Option(name="minimum_schema_size", usage="minimal schema size", widget="Integer", required=true, def="3") |
|
25 |
int minimum_schema_size |
|
26 |
|
|
27 |
@Field @Option(name="schema_display_property_name",usage="", widget="String", required=false, def="REF") |
|
28 |
String schema_display_property_name |
|
29 |
|
|
30 |
@Field @Option(name="unit_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="MENTION") |
|
31 |
String unit_ursql |
|
32 |
|
|
33 |
@Field @Option(name="property", usage="", widget="String", required=false, def="CATEGORIE") |
|
34 |
String property |
|
35 |
|
|
36 |
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF ON ALL REALLY ALL", required=true, def="OFF") |
|
37 |
debug |
|
38 |
|
|
39 |
if (!(corpusViewSelection instanceof CQPCorpus)) { |
|
40 |
println "Corpora selection is not a Corpus" |
|
41 |
return; |
|
42 |
} |
|
43 |
|
|
44 |
// Open the parameters input dialog box |
|
45 |
if (!ParametersDialog.open(this)) return; |
|
46 |
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3 |
|
47 |
|
|
48 |
// END OF PARAMETERS |
|
49 |
|
|
50 |
MainCorpus corpus = corpusViewSelection |
|
51 |
def analecCorpus = URSCorpora.getCorpus(corpus) |
|
52 |
|
|
53 |
if (!AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql)) { |
|
54 |
println "** The $schema_ursql schema URSQL cannot be computed in the corpus." |
|
55 |
return; |
|
56 |
} |
|
57 |
|
|
58 |
if (!AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql)) { |
|
59 |
println "** $unit_ursql unit URSQL cannot be computed in the corpus." |
|
60 |
return; |
|
61 |
} |
|
62 |
|
|
63 |
def CQI = CQPSearchEngine.getCqiClient() |
|
64 |
|
|
65 |
def prop = corpus.getProperty(property) |
|
66 |
if (prop == null) { |
|
67 |
analecCorpus.getStructure() |
|
68 |
} |
|
69 |
def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, 999999); |
|
70 |
def allFreqs = [:] |
|
71 |
def n = 0 |
|
72 |
for (def schema : schemas) { |
|
73 |
n++ |
|
74 |
|
|
75 |
def freqs = [:] |
|
76 |
|
|
77 |
def allUnites = schema.getUnitesSousjacentesNonTriees() |
|
78 |
|
|
79 |
def units = AnalecUtils.filterElements(debug, allUnites, unit_ursql) |
|
80 |
|
|
81 |
for (def unit : units) { // no need to sort units |
|
82 |
|
|
83 |
String forme = null; |
|
84 |
if (prop == null) { // property is the analec unit property to use |
|
85 |
forme = unit.getProp(property) |
|
86 |
} else { |
|
87 |
int[] pos = null; |
|
88 |
if (unit.getDeb() == unit.getFin()) pos = [unit.getDeb()] |
|
89 |
else pos = unit.getDeb()..unit.getFin() |
|
90 |
|
|
91 |
forme = StringUtils.join(CQI.cpos2Str(prop.getQualifiedName(), pos), " ") // ids is enough |
|
92 |
} |
|
93 |
|
|
94 |
if (!freqs.containsKey(forme)) freqs[forme] = 0; |
|
95 |
freqs[forme] = freqs[forme] + 1; |
|
96 |
|
|
97 |
if (!allFreqs.containsKey(forme)) allFreqs[forme] = 0; |
|
98 |
allFreqs[forme] = allFreqs[forme] + 1; |
|
99 |
} |
|
100 |
|
|
101 |
if (schema_display_property_name != null) { |
|
102 |
println "Index des natures de $unit_ursql de '"+schema.getProp(schema_display_property_name)+"' : " |
|
103 |
} else { |
|
104 |
println "Index des natures de $schema_ursql - $n : " |
|
105 |
} |
|
106 |
|
|
107 |
int max = 0; |
|
108 |
def result = ""; |
|
109 |
for (def forme : freqs.sort() { a, b -> -a.value <=> -b.value ?: a.key <=> b.key }) { |
|
110 |
println forme.key+"\t"+forme.value |
|
111 |
} |
|
112 |
} |
|
113 |
|
|
114 |
int max = 0; |
|
115 |
def result = ""; |
|
116 |
|
|
117 |
println "Index des natures de $schema_ursql : " |
|
118 |
for (def forme : allFreqs.sort() { a, b -> -a.value <=> -b.value ?: a.key <=> b.key }) { |
|
119 |
println forme.key+"\t"+forme.value |
|
120 |
if (max < forme.value) { |
|
121 |
max = forme.value |
|
122 |
result = "$forme: "+forme.value |
|
123 |
} |
|
124 |
} |
|
125 |
|
|
126 |
return ["result":result, "data":allFreqs] |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/UnitsSummaryMacro.groovy (revision 2082) | ||
---|---|---|
45 | 45 |
String unit_ursql |
46 | 46 |
@Field @Option(name="limit_distance_in_schema", usage="Unit distance in schema (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0") |
47 | 47 |
int limit_distance_in_schema |
48 |
@Field @Option(name="limit_cql", usage="CQL to build structure limits", widget="Query", required=true, def="<div> [] expand to div")
|
|
48 |
@Field @Option(name="limit_cql", usage="CQL to build structure limits", widget="Query", required=false, def="")
|
|
49 | 49 |
limit_cql |
50 |
@Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=true, def="true")
|
|
50 |
@Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=false, def="true")
|
|
51 | 51 |
boolean strict_inclusion |
52 | 52 |
@Field @Option(name="limit_distance", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0") |
53 | 53 |
int limit_distance |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/SchemasMacro.groovy (revision 2082) | ||
---|---|---|
1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
3 |
// @author mdecorde |
|
4 |
// @author sheiden |
|
5 |
|
|
6 |
// STANDARD DECLARATIONS |
|
7 |
package org.txm.macro.urs.exploit |
|
8 |
|
|
9 |
import org.kohsuke.args4j.* |
|
10 |
import groovy.transform.Field |
|
11 |
import org.txm.rcp.swt.widget.parameters.* |
|
12 |
import org.txm.annotation.urs.* |
|
13 |
import visuAnalec.elements.* |
|
14 |
import org.txm.searchengine.cqp.corpus.* |
|
15 |
import org.txm.macro.urs.AnalecUtils |
|
16 |
import org.txm.Toolbox |
|
17 |
import org.txm.rcp.commands.* |
|
18 |
import org.apache.commons.lang.StringUtils |
|
19 |
import org.txm.searchengine.cqp.CQPSearchEngine |
|
20 |
|
|
21 |
if (!(corpusViewSelection instanceof CQPCorpus)) { |
|
22 |
println "Corpus view selection is not a Corpus" |
|
23 |
return; |
|
24 |
} |
|
25 |
|
|
26 |
// BEGINNING OF PARAMETERS |
|
27 |
@Field @Option(name="schema_ursql", usage="TYPE@PROP=VALUE", widget="String", required=true, def="CHAINE") |
|
28 |
String schema_ursql |
|
29 |
|
|
30 |
@Field @Option(name="minimum_schema_size", usage="minimal schema size", widget="Integer", required=true, def="3") |
|
31 |
int minimum_schema_size |
|
32 |
|
|
33 |
@Field @Option(name="unit_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="MENTION") |
|
34 |
String unit_ursql |
|
35 |
|
|
36 |
@Field @Option(name="word_property", usage="", widget="StringArray", metaVar="word lemma frlemma frolemma #forme# id", required=false, def="word") |
|
37 |
String word_property |
|
38 |
|
|
39 |
@Field @Option(name="separator", usage="", widget="String", required=true, def=", ") |
|
40 |
String separator |
|
41 |
|
|
42 |
@Field @Option(name="buildCQL", usage="générer la requête des unités", widget="Boolean", required=true, def='false') |
|
43 |
def buildCQL |
|
44 |
|
|
45 |
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF ON ALL REALLY ALL", required=true, def="OFF") |
|
46 |
debug |
|
47 |
|
|
48 |
if (!ParametersDialog.open(this)) return; |
|
49 |
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3 |
|
50 |
|
|
51 |
|
|
52 |
CQPCorpus corpus = corpusViewSelection |
|
53 |
def analecCorpus = URSCorpora.getCorpus(corpus) |
|
54 |
|
|
55 |
if (!AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql)) { |
|
56 |
println "** The $schema_ursql schema URSQL cannot be computed in the corpus." |
|
57 |
return; |
|
58 |
} |
|
59 |
|
|
60 |
if (!AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql)) { |
|
61 |
println "** $unit_ursql unit URSQL cannot be computed in the corpus." |
|
62 |
return; |
|
63 |
} |
|
64 |
|
|
65 |
def CQI = CQPSearchEngine.getCqiClient() |
|
66 |
|
|
67 |
if (buildCQL) { |
|
68 |
word_prop = corpus.getProperty("id") |
|
69 |
} else { |
|
70 |
word_prop = corpus.getProperty(word_property) |
|
71 |
} |
|
72 |
|
|
73 |
def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, 999999); |
|
74 |
schemas.sort() {it.getProps()} |
|
75 |
def nSchemas = 0 |
|
76 |
|
|
77 |
def lens = [:] |
|
78 |
for (def schema : schemas) { |
|
79 |
|
|
80 |
def allUnites = schema.getUnitesSousjacentesNonTriees() |
|
81 |
def units = AnalecUtils.filterElements(debug, allUnites, unit_ursql) |
|
82 |
|
|
83 |
print schema.getProps().toString()+ ": " |
|
84 |
def first = true |
|
85 |
for (def unit : units) { |
|
86 |
|
|
87 |
String forme = null; |
|
88 |
|
|
89 |
if (buildCQL) { |
|
90 |
int[] pos = null |
|
91 |
if (unit.getDeb() == unit.getFin()) pos = [unit.getDeb()] |
|
92 |
else pos = (unit.getDeb()..unit.getFin()) |
|
93 |
def first2= true |
|
94 |
q = "" |
|
95 |
pos.each { |
|
96 |
if (first2) { first2 = false } else { q = q+" " } |
|
97 |
int[] pos2 = [it] |
|
98 |
q = q+"["+word_prop+"=\""+CQI.cpos2Str(word_prop.getQualifiedName(), pos2)[0]+"\"]" |
|
99 |
} |
|
100 |
if (first) { first = false } else { print "|" } |
|
101 |
print "("+q+")" |
|
102 |
} else { |
|
103 |
if (word_prop == null) { // word_property is the analec unit property to use |
|
104 |
forme = unit.getProp(word_property) |
|
105 |
} else { |
|
106 |
int[] pos = null |
|
107 |
if (unit.getDeb() == unit.getFin()) pos = [unit.getDeb()] |
|
108 |
else pos = (unit.getDeb()..unit.getFin()) |
|
109 |
|
|
110 |
forme = StringUtils.join(CQI.cpos2Str(word_prop.getQualifiedName(), pos), " ") // ids is enough |
|
111 |
} |
|
112 |
|
|
113 |
if (first) { first = false } else { print separator } |
|
114 |
print forme |
|
115 |
} |
|
116 |
} |
|
117 |
println "" |
|
118 |
|
|
119 |
nSchemas++ |
|
120 |
} |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/MeanDistanceMacro.groovy (revision 2082) | ||
---|---|---|
1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
3 |
// @author mdecorde |
|
4 |
// @author sheiden |
|
5 |
|
|
6 |
// STANDARD DECLARATIONS |
|
7 |
package org.txm.macro.urs.exploit |
|
8 |
|
|
9 |
import org.kohsuke.args4j.* |
|
10 |
import groovy.transform.Field |
|
11 |
import org.txm.rcp.swt.widget.parameters.* |
|
12 |
import org.txm.annotation.urs.* |
|
13 |
import org.txm.macro.urs.AnalecUtils |
|
14 |
import visuAnalec.elements.* |
|
15 |
import org.txm.searchengine.cqp.corpus.* |
|
16 |
import org.txm.Toolbox |
|
17 |
import org.txm.rcp.commands.* |
|
18 |
import org.txm.statsengine.r.core.RWorkspace |
|
19 |
|
|
20 |
if (!(corpusViewSelection instanceof CQPCorpus)) { |
|
21 |
println "Corpora selection is not a Corpus" |
|
22 |
return; |
|
23 |
} |
|
24 |
|
|
25 |
@Field @Option(name="schema_ursql", usage="TYPE@PROP=REGEX", widget="String", required=true, def="CHAINE") |
|
26 |
String schema_ursql |
|
27 |
|
|
28 |
@Field @Option(name="minimum_schema_size", usage="minimal schema size", widget="Integer", required=true, def="3") |
|
29 |
int minimum_schema_size |
|
30 |
|
|
31 |
@Field @Option(name="schema_property_display", usage="schema property to show", widget="String", required=true, def="REF") |
|
32 |
String schema_property_display |
|
33 |
|
|
34 |
@Field @Option(name="unit_ursql", usage="TYPE@PROP=REGEX", widget="String", required=false, def="MENTION") |
|
35 |
String unit_ursql |
|
36 |
|
|
37 |
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF ON ALL REALLY ALL", required=true, def="OFF") |
|
38 |
debug |
|
39 |
|
|
40 |
if (!ParametersDialog.open(this)) return; |
|
41 |
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3 |
|
42 |
|
|
43 |
def corpus = corpusViewSelection |
|
44 |
def analecCorpus = URSCorpora.getCorpus(corpus) |
|
45 |
|
|
46 |
if (!AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql)) { |
|
47 |
println "** The $schema_ursql schema URSQL cannot be computed in the corpus." |
|
48 |
return; |
|
49 |
} |
|
50 |
|
|
51 |
if (!AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql)) { |
|
52 |
println "** $unit_ursql unit URSQL cannot be computed in the corpus." |
|
53 |
return; |
|
54 |
} |
|
55 |
|
|
56 |
def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, 999999); |
|
57 |
|
|
58 |
int nSchemas = 0; |
|
59 |
|
|
60 |
def lens = [:] |
|
61 |
def lensnames = [:] |
|
62 |
for (def schema : schemas) { |
|
63 |
|
|
64 |
def allUnites = schema.getUnitesSousjacentesNonTriees() |
|
65 |
|
|
66 |
def units = AnalecUtils.filterElements(debug, allUnites, unit_ursql) |
|
67 |
|
|
68 |
int nUnites = units.size(); |
|
69 |
|
|
70 |
if (!lens.containsKey(nUnites)) { |
|
71 |
lens[nUnites] = 0; |
|
72 |
lensnames[nUnites] = []; |
|
73 |
} |
|
74 |
|
|
75 |
lens[nUnites] = lens[nUnites] + 1; |
|
76 |
lensnames[nUnites] << schema.getProp(schema_property_display) |
|
77 |
nSchemas++; |
|
78 |
} |
|
79 |
|
|
80 |
//println "nSchemas=$nSchemas" |
|
81 |
def freqs = lens.keySet(); |
|
82 |
freqs.sort(); |
|
83 |
int t = 0; |
|
84 |
int n = 0; |
|
85 |
//println "Fréquences ("+freqs.size()+")" |
|
86 |
for (def f : freqs) { |
|
87 |
t += f * lens[f] |
|
88 |
n += lens[f] |
|
89 |
} |
|
90 |
|
|
91 |
coef = (t/n) |
|
92 |
def slens = lens.sort { a, b -> -a.value <=> -b.value ?: -a.key <=> -b.key } |
|
93 |
def flens = [] |
|
94 |
slens.each { key, value -> value.times { flens << key } } |
|
95 |
def nbins = flens.size()*2 |
|
96 |
|
|
97 |
def cfreq = 0 |
|
98 |
println "Longueur moyenne des chaînes de référence : $t/$n = "+coef |
|
99 |
println "Index hiérarchique des longueurs de chaînes :\nlen\tfreq\tcfreq" |
|
100 |
slens.each { println it.key+" "+it.value+" "+(cfreq+=it.value)+" "+lensnames[it.key] } |
|
101 |
|
|
102 |
def slens2 = slens.sort { a, b -> -a.key <=> -b.key } |
|
103 |
|
|
104 |
def r = RWorkspace.getRWorkspaceInstance() |
|
105 |
|
|
106 |
r.addVectorToWorkspace("len", slens2.keySet() as int[]) |
|
107 |
r.addVectorToWorkspace("freq", slens2.values() as int[]) |
|
108 |
r.addVectorToWorkspace("flen", flens as int[]) |
|
109 |
|
|
110 |
def corpusName = corpus.getID() |
|
111 |
|
|
112 |
def PNGFile = File.createTempFile("txm", ".png", new File(Toolbox.getTxmHomePath(), "results")) |
|
113 |
def PNGFilePath = PNGFile.getAbsolutePath() |
|
114 |
println "PNG file: "+PNGFilePath |
|
115 |
|
|
116 |
def SVGFile = File.createTempFile("txm", ".svg", new File(Toolbox.getTxmHomePath(), "results")) |
|
117 |
def SVGFilePath = SVGFile.getAbsolutePath() |
|
118 |
println "SVG file: "+SVGFilePath |
|
119 |
|
|
120 |
/// BEGINNING OF R SCRIPT |
|
121 |
def script =""" |
|
122 |
hist(flen, xaxt='n', col="gray", xlab="Length", breaks=$nbins, main="$corpusName Longueur des chaînes ($nbins bins)") |
|
123 |
axis(side=1, at=len) |
|
124 |
dev.off() |
|
125 |
""" |
|
126 |
/// END OF R SCRIPT |
|
127 |
|
|
128 |
// execute R script |
|
129 |
r.plot(PNGFile, "png(file = \"${PNGFilePath}\"); "+script) |
|
130 |
r.plot(SVGFile, "svglite(file = \"${SVGFilePath}\"); "+script) |
|
131 |
|
|
132 |
//display the SVG results graphic |
|
133 |
monitor.syncExec(new Runnable() { |
|
134 |
@Override |
|
135 |
public void run() { OpenBrowser.openfile(SVGFilePath, corpusName+" Longueur des chaînes") } |
|
136 |
}) |
|
137 |
|
|
138 |
return ["result":coef, "data":lens] |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/InterDistanceMacro.groovy (revision 2082) | ||
---|---|---|
1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
3 |
// @author mdecorde |
|
4 |
// @author sheiden |
|
5 |
// STANDARD DECLARATIONS |
|
6 |
package org.txm.macro.urs.exploit |
|
7 |
|
|
8 |
import org.kohsuke.args4j.* |
|
9 |
import groovy.transform.Field |
|
10 |
import org.txm.* |
|
11 |
import org.txm.macro.urs.AnalecUtils |
|
12 |
import visuAnalec.elements.* |
|
13 |
import org.txm.rcp.swt.widget.parameters.* |
|
14 |
import org.txm.annotation.urs.* |
|
15 |
import org.txm.searchengine.cqp.corpus.* |
|
16 |
import org.apache.commons.lang.StringUtils; |
|
17 |
|
|
18 |
if (!(corpusViewSelection instanceof CQPCorpus)) { |
|
19 |
println "Corpora selection is not a Corpus" |
|
20 |
return; |
|
21 |
} |
|
22 |
|
|
23 |
// BEGINNING OF PARAMETERS |
|
24 |
@Field @Option(name="schema_ursql", usage="TYPE@PROP=REGEX", widget="String", required=true, def="CHAINE") |
|
25 |
String schema_ursql |
|
26 |
|
|
27 |
@Field @Option(name="minimum_schema_size", usage="minimal schema size", widget="Integer", required=true, def="3") |
|
28 |
int minimum_schema_size |
|
29 |
|
|
30 |
@Field @Option(name="schema_display_property_name",usage="", widget="String", required=false, def="REF") |
|
31 |
String schema_display_property_name |
|
32 |
|
|
33 |
@Field @Option(name="unit_ursql", usage="TYPE@PROP=REGEX", widget="String", required=false, def="MENTION") |
|
34 |
String unit_ursql |
|
35 |
|
|
36 |
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF ON ALL REALLY ALL", required=true, def="OFF") |
|
37 |
debug |
|
38 |
|
|
39 |
if (!ParametersDialog.open(this)) return; |
|
40 |
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3 |
|
41 |
|
|
42 |
CQPCorpus corpus = corpusViewSelection |
|
43 |
def analecCorpus = URSCorpora.getCorpus(corpus) |
|
44 |
|
|
45 |
if (!AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql)) { |
|
46 |
println "** The $schema_ursql schema URSQL cannot be computed in the corpus." |
|
47 |
return; |
|
48 |
} |
|
49 |
|
|
50 |
if (!AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql)) { |
|
51 |
println "** $unit_ursql unit URSQL cannot be computed in the corpus." |
|
52 |
return; |
|
53 |
} |
|
54 |
|
|
55 |
def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, 999999); |
|
56 |
def distances = []; |
|
57 |
def nDistances = 0 |
|
58 |
def cadences = []; |
|
59 |
for (def schema : schemas) { |
|
60 |
|
|
61 |
def allUnites = schema.getUnitesSousjacentesNonTriees() |
|
62 |
|
|
63 |
def units = AnalecUtils.filterElements(debug, allUnites, unit_ursql) |
|
64 |
|
|
65 |
Collections.sort(units) |
|
66 |
|
|
67 |
for (int i = 0 ; i < units.size() ; i++) { |
|
68 |
int d1 = 0; |
|
69 |
int d2 = 0; |
|
70 |
if (i < units.size()-1) d1 = units[i+1].getDeb() - units[i].getFin(); |
|
71 |
if (d1 < 0) { |
|
72 |
//println "D1 "+units[i+1].getDeb()+" - "+units[i].getFin()+" = "+d1 |
|
73 |
d1 = 0; // the first unit pass the next one ? |
|
74 |
} |
|
75 |
if (i > 0) d2 = units[i].getDeb() - units[i-1].getFin(); |
|
76 |
if (d2 < 0) { |
|
77 |
//println "D2 "+units[i].getDeb()+" - "+units[i-1].getFin()+" = "+d2 |
|
78 |
d2 = 0; // the first unit pass the next one ? |
|
79 |
} |
|
80 |
distances << d1 |
|
81 |
|
|
82 |
if (d1 < d2) cadences << d1 else cadences << d2 |
|
83 |
|
|
84 |
nDistances++ |
|
85 |
} |
|
86 |
} |
|
87 |
distances = distances.sort() |
|
88 |
cadences = cadences.sort() |
|
89 |
|
|
90 |
int distances_total = distances.sum() |
|
91 |
int cadences_total = cadences.sum() |
|
92 |
coef = (distances_total / nDistances) |
|
93 |
cadence = (cadences_total / nDistances) |
|
94 |
println "distances $distances" |
|
95 |
println "distance moyenne inter-mayonnaise : $distances_total / $nDistances = $coef" |
|
96 |
println "distance medianne inter-mayonnaise : "+distances[(int)(distances.size() / 2)] |
|
97 |
println "distance quartils : "+distances[0]+" "+distances[(int)(distances.size() / 4)] + " "+distances[(int)(distances.size() / 2)]+" "+distances[(int)(3*distances.size() / 4)]+" "+distances[(int)(distances.size() -1)] |
|
98 |
println "cadences $cadences" |
|
99 |
println "cadence moyenne : $cadences_total / $nDistances = $cadence" |
|
100 |
println "cadence medianne : "+cadences[(int)(cadences.size() / 2)] |
|
101 |
println "cadence quartils : "+cadences[0]+" "+cadences[(int)(cadences.size() / 4)] + " "+cadences[(int)(cadences.size() / 2)]+" "+cadences[(int)(3*cadences.size() / 4)]+" "+cadences[(int)(cadences.size() -1)] |
|
102 |
|
|
103 |
return ["result":coef, "result2":cadence, "data":["distances":distances, "nDistances":nDistances, "cadences":cadences]] |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/NatureOfTheFirstUnitMacro.groovy (revision 2082) | ||
---|---|---|
1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
3 |
// @author mdecorde |
|
4 |
// @author sheiden |
|
5 |
// STANDARD DECLARATIONS |
|
6 |
package org.txm.macro.urs.exploit |
|
7 |
|
|
8 |
import org.kohsuke.args4j.* |
|
9 |
import groovy.transform.Field |
|
10 |
import org.txm.* |
|
11 |
import org.txm.macro.urs.AnalecUtils |
|
12 |
import visuAnalec.elements.* |
|
13 |
import org.txm.rcp.swt.widget.parameters.* |
|
14 |
import org.txm.annotation.urs.* |
|
15 |
import org.txm.searchengine.cqp.corpus.* |
|
16 |
import org.txm.searchengine.cqp.CQPSearchEngine |
|
17 |
import org.apache.commons.lang.StringUtils; |
|
18 |
|
|
19 |
if (!(corpusViewSelection instanceof CQPCorpus)) { |
|
20 |
println "Corpora selection is not a Corpus" |
|
21 |
return; |
|
22 |
} |
|
23 |
|
|
24 |
// BEGINNING OF PARAMETERS |
|
25 |
@Field @Option(name="schema_ursql", usage="TYPE@PROP=REGEX", widget="String", required=true, def="CHAINE") |
|
26 |
String schema_ursql |
|
27 |
|
|
28 |
@Field @Option(name="minimum_schema_size", usage="minimal schema size", widget="Integer", required=true, def="3") |
|
29 |
int minimum_schema_size |
|
30 |
|
|
31 |
@Field @Option(name="unit_ursql", usage="TYPE@PROP=REGEX", widget="String", required=false, def="MENTION") |
|
32 |
String unit_ursql |
|
33 |
|
|
34 |
@Field @Option(name="word_property", usage="", widget="String", required=false, def="CATEGORIE") |
|
35 |
String word_property |
|
36 |
|
|
37 |
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF ON ALL REALLY ALL", required=true, def="OFF") |
|
38 |
debug |
|
39 |
|
|
40 |
if (!ParametersDialog.open(this)) return; |
|
41 |
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3 |
|
42 |
|
|
43 |
|
|
44 |
CQPCorpus corpus = corpusViewSelection |
|
45 |
def analecCorpus = URSCorpora.getCorpus(corpus) |
|
46 |
|
|
47 |
if (!AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql)) { |
|
48 |
println "** The $schema_ursql schema URSQL cannot be computed in the corpus." |
|
49 |
return; |
|
50 |
} |
|
51 |
|
|
52 |
if (!AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql)) { |
|
53 |
println "** $unit_ursql unit URSQL cannot be computed in the corpus." |
|
54 |
return; |
|
55 |
} |
|
56 |
|
|
57 |
def CQI = CQPSearchEngine.getCqiClient() |
|
58 |
|
|
59 |
def prop = corpus.getProperty(word_property) |
|
60 |
|
|
61 |
def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, 999999); |
|
62 |
def freqs = [:] |
|
63 |
|
|
64 |
def distances = 0; |
|
65 |
def nDistances = 0 |
|
66 |
for (def schema : schemas) { |
|
67 |
|
|
68 |
def allUnites = schema.getUnitesSousjacentesNonTriees() |
|
69 |
|
|
70 |
def units = AnalecUtils.filterElements(debug, allUnites, unit_ursql) |
|
71 |
|
|
72 |
int nUnites = units.size(); |
|
73 |
|
|
74 |
if (units.size() == 0) continue; |
|
75 |
|
|
76 |
def unit = units[0] |
|
77 |
|
|
78 |
String forme = null; |
|
79 |
if (prop == null) { // word_property is the analec unit property to use |
|
80 |
forme = unit.getProp(word_property) |
|
81 |
} else { |
|
82 |
int[] pos = null; |
|
83 |
if (unit.getDeb() == unit.getFin()) pos = [unit.getDeb()] |
|
84 |
else pos = unit.getDeb()..unit.getFin() |
|
85 |
|
|
86 |
forme = StringUtils.join(CQI.cpos2Str(prop.getQualifiedName(), pos), " ") // ids is enough |
|
87 |
} |
|
88 |
|
|
89 |
if (!freqs.containsKey(forme)) freqs[forme] = 0; |
|
90 |
|
|
91 |
freqs[forme] = freqs[forme] + 1; |
|
92 |
} |
|
93 |
|
|
94 |
println "Index des natures de premier maillon :" |
|
95 |
int max = 0; |
|
96 |
def result = ""; |
|
97 |
for (def forme : freqs.keySet().sort() {it -> -freqs[it]}) { |
|
98 |
println "$forme\t"+freqs[forme] |
|
99 |
if (max < freqs[forme]) { |
|
100 |
max = freqs[forme] |
|
101 |
result = "$forme: "+freqs[forme] |
|
102 |
} |
|
103 |
} |
|
104 |
|
|
105 |
["result": result, "data": freqs] |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/NumberOfSchemaMacro.groovy (revision 2082) | ||
---|---|---|
1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
3 |
// @author mdecorde |
|
4 |
// @author sheiden |
|
5 |
// STANDARD DECLARATIONS |
|
6 |
package org.txm.macro.urs.exploit |
|
7 |
|
|
8 |
import org.kohsuke.args4j.* |
|
9 |
import groovy.transform.Field |
|
10 |
import org.txm.rcp.swt.widget.parameters.* |
|
11 |
import org.txm.annotation.urs.* |
|
12 |
import visuAnalec.elements.* |
|
13 |
import org.txm.searchengine.cqp.corpus.* |
|
14 |
import org.txm.macro.urs.AnalecUtils |
|
15 |
|
|
16 |
if (!(corpusViewSelection instanceof CQPCorpus)) { |
|
17 |
println "Corpora selection is not a Corpus" |
|
18 |
return; |
|
19 |
} |
|
20 |
|
|
21 |
// BEGINNING OF PARAMETERS |
|
22 |
@Field @Option(name="schema_ursql", usage="TYPE@PROP=REGEX", widget="String", required=true, def="CHAINE") |
|
23 |
String schema_ursql |
|
24 |
|
|
25 |
@Field @Option(name="minimum_schema_size", usage="minimal schema size", widget="Integer", required=true, def="3") |
|
26 |
int minimum_schema_size |
|
27 |
|
|
28 |
@Field @Option(name="unit_ursql", usage="TYPE@PROP=REGEX", widget="String", required=false, def="MENTION") |
|
29 |
String unit_ursql |
|
30 |
|
|
31 |
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF ON ALL REALLY ALL", required=true, def="OFF") |
|
32 |
debug |
|
33 |
|
|
34 |
if (!ParametersDialog.open(this)) return; |
|
35 |
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3 |
|
36 |
|
|
37 |
|
|
38 |
CQPCorpus corpus = corpusViewSelection |
|
39 |
def analecCorpus = URSCorpora.getCorpus(corpus) |
|
40 |
|
|
41 |
// check Schema parameters |
|
42 |
if (!AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql)) { |
|
43 |
println "** The $schema_ursql schema URSQL cannot be computed in the corpus." |
|
44 |
return; |
|
45 |
} |
|
46 |
|
|
47 |
if (!AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql)) { |
|
48 |
println "** $unit_ursql unit URSQL cannot be computed in the corpus." |
|
49 |
return; |
|
50 |
} |
|
51 |
|
|
52 |
def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, 999999); |
|
53 |
|
|
54 |
int nSchemas = schemas.size(); |
|
55 |
|
|
56 |
println "Nombre de chaînes de référence d'un texte : $nSchemas" |
|
57 |
|
|
58 |
["result":nSchemas, "data":schemas] |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/ReferentialDensityMacro.groovy (revision 2082) | ||
---|---|---|
1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
3 |
// @author mdecorde |
|
4 |
// @author sheiden |
|
5 |
// STANDARD DECLARATIONS |
|
6 |
package org.txm.macro.urs.exploit |
|
7 |
|
|
8 |
import org.kohsuke.args4j.* |
|
9 |
import groovy.transform.Field |
|
10 |
import org.txm.rcp.swt.widget.parameters.* |
|
11 |
import org.txm.annotation.urs.* |
|
12 |
import visuAnalec.elements.* |
|
13 |
import org.txm.searchengine.cqp.corpus.* |
|
14 |
import org.txm.macro.urs.AnalecUtils |
|
15 |
|
|
16 |
if (!(corpusViewSelection instanceof CQPCorpus)) { |
|
17 |
println "Corpora selection is not a Corpus" |
|
18 |
return; |
|
19 |
} |
|
20 |
|
|
21 |
@Field @Option(name="unit_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="MENTION") |
|
22 |
String unit_ursql |
|
23 |
@Field @Option(name="limit_cql", usage="CQL to build structure limits", widget="Query", required=true, def="<div> [] expand to div") |
|
24 |
limit_cql |
|
25 |
@Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=true, def="true") |
|
26 |
strict_inclusion |
|
27 |
@Field @Option(name="limit_distance", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0") |
|
28 |
limit_distance |
|
29 |
|
|
30 |
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF ON ALL REALLY ALL", required=true, def="OFF") |
|
31 |
debug |
|
32 |
|
|
33 |
if (!ParametersDialog.open(this)) return; |
|
34 |
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3 |
|
35 |
|
|
36 |
|
|
37 |
CQPCorpus corpus = corpusViewSelection |
|
38 |
def analecCorpus = URSCorpora.getCorpus(corpus); |
|
39 |
|
|
40 |
int nMots = corpus.getSize(); |
|
41 |
|
|
42 |
def units = AnalecUtils.selectUnitsInSchema(debug, analecCorpus, corpus, "", 0, 0, |
|
43 |
unit_ursql, 0, limit_cql, strict_inclusion, limit_distance); |
|
44 |
|
|
45 |
int nUnites = units.size(); |
|
46 |
|
|
47 |
coef = (nUnites /nMots) |
|
48 |
println "Densité référentielle : nUnites/nMots = $nUnites/$nMots = $coef = ${coef*100}%" |
|
49 |
if (nUnites >= nMots) { |
|
50 |
println "WARNING: possible encoding error. Number of units ($nUnites) is greater than number of words ($nMots)" |
|
51 |
} |
|
52 |
return ["result":coef, "data":["nUnites":nUnites, "nMots":nMots]] |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/StabilityScoreMacro.groovy (revision 2082) | ||
---|---|---|
1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
3 |
// @author mdecorde |
|
4 |
// @author sheiden |
|
5 |
// STANDARD DECLARATIONS |
|
6 |
package org.txm.macro.urs.exploit |
|
7 |
|
|
8 |
import org.kohsuke.args4j.* |
|
9 |
import groovy.transform.Field |
|
10 |
import org.txm.* |
|
11 |
import org.txm.rcp.swt.widget.parameters.* |
|
12 |
import org.txm.annotation.urs.* |
|
13 |
import org.txm.searchengine.cqp.corpus.* |
|
14 |
import org.apache.commons.lang.StringUtils; |
|
15 |
import org.txm.macro.urs.AnalecUtils |
|
16 |
import visuAnalec.elements.* |
|
17 |
import org.txm.searchengine.cqp.CQPSearchEngine |
|
18 |
|
|
19 |
if (!(corpusViewSelection instanceof CQPCorpus)) { |
|
20 |
println "Corpora selection is not a Corpus" |
|
21 |
return; |
|
22 |
} |
|
23 |
|
|
24 |
@Field @Option(name="schema_ursql", usage="TYPE@PROP=REGEX", widget="String", required=true, def="CHAINE") |
|
25 |
String schema_ursql |
|
26 |
|
|
27 |
@Field @Option(name="minimum_schema_size", usage="minimal schema size", widget="Integer", required=true, def="3") |
|
28 |
int minimum_schema_size |
|
29 |
|
|
30 |
@Field @Option(name="schema_display_property_name",usage="", widget="String", required=false, def="REF") |
|
31 |
String schema_display_property_name |
|
32 |
|
|
33 |
@Field @Option(name="unit_ursql", usage="TYPE@PROP=REGEX", widget="String", required=false, def="MENTION") |
|
34 |
String unit_ursql |
|
35 |
|
|
36 |
@Field @Option(name="word_property", usage="", widget="String", required=false, def="word") |
|
37 |
String word_property |
|
38 |
|
|
39 |
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF ON ALL REALLY ALL", required=true, def="OFF") |
|
40 |
debug |
|
41 |
|
|
42 |
if (!ParametersDialog.open(this)) return; |
|
43 |
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3 |
|
44 |
|
|
45 |
|
|
46 |
def corpus = corpusViewSelection |
|
47 |
def analecCorpus = URSCorpora.getCorpus(corpus) |
|
48 |
|
|
49 |
if (!AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql)) { |
|
50 |
println "** The $schema_ursql schema URSQL cannot be computed in the corpus." |
|
51 |
return; |
|
52 |
} |
|
53 |
|
|
54 |
if (!AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql)) { |
|
55 |
println "** $unit_ursql unit URSQL cannot be computed in the corpus." |
|
56 |
return; |
|
57 |
} |
|
58 |
|
|
59 |
def CQI = CQPSearchEngine.getCqiClient() |
|
60 |
|
|
61 |
def prop = corpus.getProperty(word_property) |
|
62 |
|
|
63 |
def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, 999999); |
|
64 |
allFormesSet = new HashSet(); |
|
65 |
nUnitesGrandTotal = 0; |
|
66 |
def coefs = [] |
|
67 |
int n = 1 |
|
68 |
for (def schema : schemas) { |
|
69 |
def formesSet = new HashSet(); // contient toutes les formes du CR courant |
|
70 |
nUnitesTotal = 0; |
|
71 |
|
|
72 |
def allUnites = schema.getUnitesSousjacentesNonTriees() |
|
73 |
|
|
74 |
def units = AnalecUtils.filterElements(debug, allUnites, unit_ursql) |
|
75 |
def nUnites = units.size() |
|
76 |
for (def unit : units) { |
|
77 |
|
|
78 |
String forme = null; |
|
79 |
if (prop == null) { // word_property is the analec unit property to use |
|
80 |
forme = unit.getProp(word_property) |
|
81 |
} else { |
|
82 |
int[] pos = null; |
|
83 |
if (unit.getDeb() == unit.getFin()) pos = [unit.getDeb()] |
|
84 |
else pos = unit.getDeb()..unit.getFin() |
|
85 |
|
|
86 |
forme = StringUtils.join(CQI.cpos2Str(prop.getQualifiedName(), pos), " ") // ids is enough |
|
87 |
} |
|
88 |
|
|
89 |
formesSet.add(forme) |
|
90 |
|
|
91 |
nUnitesTotal++ |
|
92 |
} |
|
93 |
if (formesSet.size() == 0) { |
|
94 |
coef = -1 |
|
95 |
} else { |
|
96 |
coef = (nUnitesTotal/formesSet.size()) |
|
97 |
} |
|
98 |
coefs << coef |
|
99 |
nUnitesGrandTotal += nUnitesTotal; |
|
100 |
allFormesSet.addAll(formesSet) |
|
101 |
|
|
102 |
if (schema_display_property_name != null) { |
|
103 |
print schema.getProp(schema_display_property_name) |
|
104 |
} else { |
|
105 |
print schema_ursql+"-"+n+" : " |
|
106 |
} |
|
107 |
|
|
108 |
println " ($nUnites units) : $nUnitesTotal selected units / ${formesSet.size()} forms = $coef" |
|
109 |
n++ |
|
110 |
} |
|
111 |
|
|
112 |
return ["result":coefs, "data":["nUnitesTotal":nUnitesGrandTotal, "allFormesSet":allFormesSet], "coef":(nUnitesGrandTotal/allFormesSet.size())] |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/edit/CheckDuplicatesInSchemasMacro.groovy (revision 2082) | ||
---|---|---|
1 |
package org.txm.macro.urs.edit |
|
2 |
|
|
3 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
4 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
5 |
// @author mdecorde |
|
6 |
// @author sheiden |
|
7 |
// STANDARD DECLARATIONS |
|
8 |
|
|
9 |
import groovy.transform.Field |
|
10 |
|
|
11 |
import org.jfree.chart.JFreeChart |
|
12 |
import org.kohsuke.args4j.* |
|
13 |
import org.txm.Toolbox |
|
14 |
import org.txm.annotation.urs.* |
|
15 |
import org.txm.macro.urs.AnalecUtils |
|
16 |
import org.txm.rcp.Application |
|
17 |
import org.txm.rcp.IImageKeys |
|
18 |
import org.txm.rcp.swt.widget.parameters.* |
|
19 |
import org.txm.searchengine.cqp.CQPSearchEngine |
|
20 |
import org.txm.searchengine.cqp.corpus.* |
|
21 |
import org.txm.searchengine.cqp.corpus.query.CQLQuery |
|
22 |
|
|
23 |
import visuAnalec.elements.* |
|
24 |
|
|
25 |
def scriptName = this.class.getSimpleName() |
|
26 |
|
|
27 |
if (!(corpusViewSelection instanceof CQPCorpus)) { |
|
28 |
println "** $scriptName please select a Corpus to run the macro" |
|
29 |
return; |
|
30 |
} |
|
31 |
|
|
32 |
// BEGINNING OF PARAMETERS |
|
33 |
@Field @Option(name="schema_ursql", usage="TYPE@PROP=VALUE", widget="String", required=true, def="CHAINE") |
|
34 |
String schema_ursql |
|
35 |
@Field @Option(name="schema_property_display", usage="PROP", widget="String", required=true, def="REF") |
|
36 |
String schema_property_display |
|
37 |
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF ON ALL REALLY ALL", required=true, def="OFF") |
|
38 |
debug |
|
39 |
if (!ParametersDialog.open(this)) return |
|
40 |
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3 |
|
41 |
|
|
42 |
def CQI = CQPSearchEngine.getCqiClient() |
|
43 |
|
|
44 |
def corpus = corpusViewSelection |
|
45 |
|
|
46 |
def word = corpus.getWordProperty() |
|
47 |
def analecCorpus = URSCorpora.getCorpus(corpus) |
|
48 |
|
|
49 |
if (schema_property_display.length() > 0) { |
|
50 |
def errors = AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql, schema_property_display).size() |
|
51 |
if (errors > 0) { |
|
52 |
println "Error: some Schema types don't contain the $schema_property_display property: $errors" |
|
53 |
return |
|
54 |
} |
|
55 |
} |
|
56 |
|
|
57 |
def allUnits = [:] |
|
58 |
|
|
59 |
def allSchemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus , schema_ursql, -1, Integer.MAX_VALUE) |
|
60 |
|
|
61 |
if (allSchemas.size() == 0) { |
|
62 |
println "No schema match for '$schema_ursql' selection. Aborting" |
|
63 |
return |
|
64 |
} |
|
65 |
|
|
66 |
allUnits = AnalecUtils.groupAllUnitesInElements(debug, allSchemas) |
|
67 |
|
|
68 |
if (allUnits.size() == 0) { |
|
69 |
println "No unit selection. Aborting" |
|
70 |
return |
|
71 |
} |
|
72 |
|
|
73 |
if (debug) println "allUnits=${allUnits.size()}" |
|
74 |
|
|
75 |
def duplicates = [:] |
|
76 |
for (Schema schema : allSchemas) { |
|
77 |
def units = allUnits[schema]; |
|
78 |
for (def unit : units) { |
|
79 |
if (!duplicates.containsKey(unit)) duplicates[unit] = [] |
|
80 |
duplicates[unit] << schema |
|
81 |
} |
|
82 |
} |
|
83 |
|
|
84 |
def units = [] |
|
85 |
units.addAll(duplicates.keySet()) // remove non duplicates from hash |
|
86 |
for (def unit : units) { |
|
87 |
if (duplicates[unit].size() < 2) duplicates.remove(unit) |
|
88 |
} |
|
89 |
|
|
90 |
if (duplicates.size() > 0) { |
|
91 |
println "Duplicates found" |
|
92 |
for (def unit : duplicates.keySet()) { |
|
93 |
println AnalecUtils.toString(CQI, word, unit)+" in: " |
|
94 |
for (Schema schema : duplicates[unit]) { |
|
95 |
println " '"+schema.getProp(schema_property_display)+"'\t"+schema.getProps() |
|
96 |
} |
|
97 |
} |
|
98 |
} else { |
|
99 |
println "No duplicates found in $schema_ursql units" |
|
100 |
} |
|
101 |
|
|
102 |
return duplicates |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/edit/CheckAnnotationStructureValuesMacro.groovy (revision 2082) | ||
---|---|---|
1 |
// STANDARD DECLARATIONS |
|
2 |
package org.txm.macro.urs.edit |
|
3 |
|
|
4 |
import org.kohsuke.args4j.* |
|
5 |
import groovy.transform.Field |
|
6 |
import org.txm.rcp.swt.widget.parameters.* |
|
7 |
import org.txm.annotation.urs.* |
|
8 |
import org.txm.searchengine.cqp.corpus.* |
|
9 |
import visuAnalec.elements.* |
|
10 |
|
Formats disponibles : Unified diff