Révision 2099
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/check/CheckAnnotationStructureValuesMacro.groovy (revision 2099) | ||
---|---|---|
16 | 16 |
@Field @Option(name="unit_type",usage="", widget="String", required=false, def="MENTION") |
17 | 17 |
String unit_type |
18 | 18 |
|
19 |
@Field @Option(name="unit_property_name", usage="", widget="String", required=false, def="REF")
|
|
20 |
String unit_property_name
|
|
19 |
@Field @Option(name="unit_property", usage="", widget="String", required=false, def="REF") |
|
20 |
String unit_property |
|
21 | 21 |
|
22 | 22 |
@Field @Option(name="pruneUnusedValues", usage="", widget="Boolean", required=false, def="false") |
23 | 23 |
boolean pruneUnusedValues |
... | ... | |
38 | 38 |
} |
39 | 39 |
|
40 | 40 |
def props = analecCorpus.getStructure().getUniteProperties(unit_type); |
41 |
if (!props.contains(unit_property_name)) {
|
|
42 |
println "No properties '$unit_property_name' in '$unit_type' unit."
|
|
41 |
if (!props.contains(unit_property)) { |
|
42 |
println "No properties '$unit_property' in '$unit_type' unit." |
|
43 | 43 |
return |
44 | 44 |
} |
45 | 45 |
|
46 |
def prop = unit_property_name
|
|
46 |
def prop = unit_property |
|
47 | 47 |
|
48 | 48 |
def tmpvalues = new HashSet() |
49 | 49 |
tmpvalues.addAll(structure.getValeursProp(Unite.class, unit_type, prop)); |
... | ... | |
61 | 61 |
if (pruneUnusedValues) { |
62 | 62 |
println "Pruning the values..." |
63 | 63 |
for (String val : tmpvalues) { |
64 |
structure.supprimerVal(Unite.class, unit_type, unit_property_name, val);
|
|
64 |
structure.supprimerVal(Unite.class, unit_type, unit_property, val); |
|
65 | 65 |
} |
66 | 66 |
} |
67 | 67 |
} else { |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/democrat/Si2SingletonMacro.groovy (revision 2099) | ||
---|---|---|
1 |
// @author Bruno Oberlé |
|
2 |
// v1.0.0 20170711 |
|
3 |
|
|
4 |
// Cette macro individualise tous les noms de référents "SI" en leur attribuant un numéro unique (SI_1, SI_2, etc.). |
|
5 |
|
|
6 |
package org.txm.macro.urs.democrat |
|
7 |
|
|
8 |
import org.apache.commons.lang.* |
|
9 |
import org.kohsuke.args4j.* |
|
10 |
import groovy.transform.* |
|
11 |
import org.txm.* |
|
12 |
import org.txm.rcp.swt.widget.parameters.* |
|
13 |
import org.txm.annotation.urs.* |
|
14 |
import org.txm.searchengine.cqp.* |
|
15 |
import org.txm.searchengine.cqp.corpus.* |
|
16 |
import visuAnalec.Message.* |
|
17 |
import visuAnalec.donnees.* |
|
18 |
import visuAnalec.elements.* |
|
19 |
import visuAnalec.vue.* |
|
20 |
|
|
21 |
// CORPS DU SCRIPT |
|
22 |
|
|
23 |
if (!(corpusViewSelection instanceof MainCorpus)) { |
|
24 |
println "Corpora selection is not a Corpus" |
|
25 |
return |
|
26 |
} |
|
27 |
|
|
28 |
// BEGINNING OF PARAMETERS |
|
29 |
@Field @Option(name="unitType", usage="", widget="String", required=true, def="MENTION") |
|
30 |
def unitType |
|
31 |
@Field @Option(name="refPropertyName", usage="", widget="String", required=true, def="REF") |
|
32 |
def refPropertyName |
|
33 |
if (!ParametersDialog.open(this)) return |
|
34 |
|
|
35 |
corpus = corpusViewSelection |
|
36 |
CQI = CQPSearchEngine.getCqiClient() |
|
37 |
word = corpus.getWordProperty() |
|
38 |
analecCorpus = URSCorpora.getCorpus(corpus) |
|
39 |
vue = URSCorpora.getVue(corpus) |
|
40 |
structure = analecCorpus.getStructure() |
|
41 |
if (!structure.getUnites().contains(unitType)) { // check if the structure contains the unitType units |
|
42 |
println "Error: corpus structure does not contains unit with name=$unitType" |
|
43 |
return |
|
44 |
} |
|
45 |
|
|
46 |
if (!structure.getUniteProperties(unitType).contains(refPropertyName)) { |
|
47 |
println "Error: corpus structure does not contains property name=$unitType" |
|
48 |
return |
|
49 |
} |
|
50 |
|
|
51 |
def nModified = 0 |
|
52 |
def nIgnored = 0 |
|
53 |
|
|
54 |
def units = analecCorpus.getUnites(unitType) |
|
55 |
//units.sort() { a, b -> a.getDeb() <=> b.getDeb() ?: a.getFin() <=> b.getFin() } |
|
56 |
|
|
57 |
def refSet = new HashSet() |
|
58 |
for (Unite unit : units) { // process all units |
|
59 |
def prop = unit.getProp(refPropertyName) |
|
60 |
refSet.add(prop) |
|
61 |
} |
|
62 |
|
|
63 |
def counter = 1 |
|
64 |
for (Unite unit : units) { // process all units |
|
65 |
|
|
66 |
def prop = unit.getProp(refPropertyName) |
|
67 |
if (prop && prop == "SI") { |
|
68 |
def name = "SI_" + counter |
|
69 |
while (refSet.contains(name)) { |
|
70 |
counter++ |
|
71 |
name = "SI_" + counter |
|
72 |
} |
|
73 |
counter++ |
|
74 |
//println "old prop"+ prop |
|
75 |
unit.getProps().put(refPropertyName, name) |
|
76 |
//println "new prop"+ name |
|
77 |
nModified++ |
|
78 |
} else { |
|
79 |
nIgnored++ |
|
80 |
} |
|
81 |
} |
|
82 |
|
|
83 |
println "Result:" |
|
84 |
println "- $nModified units of type $unitType have been modified." |
|
85 |
println "- $nIgnored units of type $unitType have not been modified.\n" |
|
86 |
|
|
87 |
// udpate the view (also see also |
|
88 |
// http://forge.cbp.ens-lyon.fr/redmine/issues/2065) |
|
89 |
URSCorpora.getVue(analecCorpus).retablirVueParDefaut() |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/AllMesuresMacro.groovy (revision 2099) | ||
---|---|---|
1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
3 |
// @author mdecorde |
|
4 |
// @author sheiden |
|
5 |
// STANDARD DECLARATIONS |
|
6 |
package org.txm.macro.urs.exploit |
|
7 |
|
|
8 |
import org.kohsuke.args4j.* |
|
9 |
|
|
10 |
import groovy.transform.Field |
|
11 |
|
|
12 |
import org.txm.* |
|
13 |
import org.txm.rcp.swt.widget.parameters.* |
|
14 |
import org.txm.annotation.urs.* |
|
15 |
import org.txm.macroprototypes.urs.exploit.GrammaticalCategoryMacro |
|
16 |
import org.txm.macroprototypes.urs.exploit.NatureOfTheFirstUnitMacro |
|
17 |
import org.txm.macroprototypes.urs.exploit.NumberOfSchemaMacro |
|
18 |
import org.txm.searchengine.cqp.corpus.* |
|
19 |
import org.apache.commons.lang.StringUtils; |
|
20 |
|
|
21 |
// BEGINNING OF PARAMETERS |
|
22 |
|
|
23 |
@Field @Option(name="tsvFile",usage="", widget="FileSave", required=true, def="result.tsv") |
|
24 |
File tsvFile |
|
25 |
|
|
26 |
@Field @Option(name="default_schema_ursql", usage="TYPE@PROP=REGEX", widget="String", required=true, def="CHAINE") |
|
27 |
String default_schema_ursql |
|
28 |
|
|
29 |
@Field @Option(name="default_minimum_schema_size", usage="", widget="Integer", required=true, def="3") |
|
30 |
int default_minimum_schema_size |
|
31 |
|
|
32 |
|
|
33 |
@Field @Option(name="schema_property_display_name",usage="", widget="String", required=false, def="REF") |
|
34 |
String schema_property_display_name |
|
35 |
|
|
36 |
@Field @Option(name="default_unit_ursql", usage="TYPE@PROP=REGEX", widget="String", required=false, def="MENTION") |
|
37 |
String default_unit_ursql |
|
38 |
|
|
39 |
@Field @Option(name="default_word_property", usage="", widget="String", required=false, def="word") |
|
40 |
String default_word_property |
|
41 |
@Field @Option(name="default_pos_property", usage="", widget="String", required=false, def="CATEGORIE") |
|
42 |
String default_pos_property |
|
43 |
|
|
44 |
if (!ParametersDialog.open(this)) return; |
|
45 |
// END OF PARAMETERS |
|
46 |
|
|
47 |
println "Corpora selections: "+corpusViewSelections |
|
48 |
|
|
49 |
table = [] // contains all table lines |
|
50 |
mesures = [] |
|
51 |
|
|
52 |
for (def corpus : corpusViewSelections) { // for each corpus selected in the corpora view |
|
53 |
if (!(corpus instanceof MainCorpus)) continue; // check if the corpus is a maincorpus |
|
54 |
def line = [] ; table << line // create and add a new table line |
|
55 |
line << corpus.getID() // add the corpus name in the first column |
|
56 |
|
|
57 |
println "*** Computing mesures for $corpus" // each macro return a "result" and a "data" |
|
58 |
|
|
59 |
params = [ |
|
60 |
"unit_ursql":default_unit_ursql, |
|
61 |
] |
|
62 |
returnedValue = execMesure(UnitsReferentialDensityMacro, line, corpus, params) |
|
63 |
line << returnedValue["result"] |
|
64 |
|
|
65 |
params = [ |
|
66 |
"schema_ursql":default_schema_ursql, |
|
67 |
"minimum_schema_size":default_minimum_schema_size, |
|
68 |
"unit_ursql":default_unit_ursql, |
|
69 |
] |
|
70 |
returnedValue = execMesure(SchemaLengthsMacro, line, corpus, params) |
|
71 |
line << returnedValue["result"] |
|
72 |
|
|
73 |
params = [ |
|
74 |
"schema_ursql":default_schema_ursql, |
|
75 |
"minimum_schema_size":default_minimum_schema_size, |
|
76 |
"unit_ursql":default_unit_ursql, |
|
77 |
] |
|
78 |
returnedValue = execMesure(NumberOfSchemaMacro, line, corpus, params) |
|
79 |
line << returnedValue["result"] |
|
80 |
|
|
81 |
params = [ |
|
82 |
"schema_ursql":default_schema_ursql, |
|
83 |
"minimum_schema_size":default_minimum_schema_size, |
|
84 |
"schema_property_display_name":schema_property_display_name, |
|
85 |
"unit_ursql":default_unit_ursql+"@CATEGORIE=GN Défini|GN Démonstratif|Nom Propre", |
|
86 |
"word_property":default_word_property, |
|
87 |
] |
|
88 |
returnedValue = execMesure(UnitsStabilityScoreMacro, line, corpus, params) |
|
89 |
line << returnedValue["result"] |
|
90 |
|
|
91 |
params = [ |
|
92 |
"schema_ursql":default_schema_ursql, |
|
93 |
"minimum_schema_size":default_minimum_schema_size, |
|
94 |
"unit_ursql":default_unit_ursql, |
|
95 |
] |
|
96 |
returnedValue = execMesure(UnitsInterDistanceMacro, line, corpus, params) |
|
97 |
line << returnedValue["result"] |
|
98 |
|
|
99 |
params = [ |
|
100 |
"schema_ursql":default_schema_ursql, |
|
101 |
"minimum_schema_size":default_minimum_schema_size, |
|
102 |
"unit_ursql":default_unit_ursql, |
|
103 |
"word_property":default_pos_property, |
|
104 |
] |
|
105 |
returnedValue = execMesure(NatureOfTheFirstUnitMacro, line, corpus, params) |
|
106 |
line << returnedValue["data"] |
|
107 |
|
|
108 |
params = [ |
|
109 |
"schema_ursql":default_schema_ursql, |
|
110 |
"minimum_schema_size":default_minimum_schema_size, |
|
111 |
"schema_property_display_name":schema_property_display_name, |
|
112 |
"unit_ursql":default_unit_ursql, |
|
113 |
"word_property":default_pos_property, |
|
114 |
] |
|
115 |
returnedValue = execMesure(GrammaticalCategoryMacro, line, corpus, params) |
|
116 |
line << returnedValue["data"] |
|
117 |
} |
|
118 |
|
|
119 |
// WRITE RESULTS IN THE TSV FILE |
|
120 |
tsvFile.withWriter("UTF-8") { writer -> |
|
121 |
writer.println "\t"+mesures.join("\t") |
|
122 |
table.each { line -> writer.println line.join("\t") } |
|
123 |
} |
|
124 |
|
|
125 |
println "Done. Results are saved in ${tsvFile.getAbsolutePath()} file." |
|
126 |
|
|
127 |
// UTILITY FUNCTIONS |
|
128 |
def execMesure(def mesure, def line, def corpus, def params) { |
|
129 |
def m = mesure.getSimpleName().substring(0, mesure.getSimpleName().indexOf("Macro")) |
|
130 |
mesures << m |
|
131 |
println "***** ${mesures.size()}- $m with parameters: $params" |
|
132 |
def r = gse.run(mesure, ["args":params, "corpusViewSelection":corpus, "monitor":monitor]) |
|
133 |
if (r == null) throw new Exception("Null result"); |
|
134 |
return r; |
|
135 |
} |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/UnitsInterDistanceMacro.groovy (revision 2099) | ||
---|---|---|
1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
3 |
// @author mdecorde |
|
4 |
// @author sheiden |
|
5 |
// STANDARD DECLARATIONS |
|
6 |
package org.txm.macro.urs.exploit |
|
7 |
|
|
8 |
import org.kohsuke.args4j.* |
|
9 |
import groovy.transform.Field |
|
10 |
import org.txm.* |
|
11 |
import org.txm.macro.urs.AnalecUtils |
|
12 |
import visuAnalec.elements.* |
|
13 |
import org.txm.rcp.swt.widget.parameters.* |
|
14 |
import org.txm.annotation.urs.* |
|
15 |
import org.txm.searchengine.cqp.corpus.* |
|
16 |
import org.apache.commons.lang.StringUtils; |
|
17 |
|
|
18 |
if (!(corpusViewSelection instanceof CQPCorpus)) { |
|
19 |
println "Corpora selection is not a Corpus" |
|
20 |
return; |
|
21 |
} |
|
22 |
|
|
23 |
// BEGINNING OF PARAMETERS |
|
24 |
@Field @Option(name="schema_ursql", usage="TYPE@PROP=REGEX", widget="String", required=true, def="CHAINE") |
|
25 |
String schema_ursql |
|
26 |
|
|
27 |
@Field @Option(name="minimum_schema_size", usage="minimal schema size", widget="Integer", required=true, def="3") |
|
28 |
int minimum_schema_size |
|
29 |
|
|
30 |
@Field @Option(name="schema_display_property_name",usage="", widget="String", required=false, def="REF") |
|
31 |
String schema_display_property_name |
|
32 |
|
|
33 |
@Field @Option(name="unit_ursql", usage="TYPE@PROP=REGEX", widget="String", required=false, def="MENTION") |
|
34 |
String unit_ursql |
|
35 |
|
|
36 |
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF ON ALL REALLY ALL", required=true, def="OFF") |
|
37 |
debug |
|
38 |
|
|
39 |
if (!ParametersDialog.open(this)) return; |
|
40 |
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3 |
|
41 |
|
|
42 |
CQPCorpus corpus = corpusViewSelection |
|
43 |
def analecCorpus = URSCorpora.getCorpus(corpus) |
|
44 |
|
|
45 |
def errors = AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql) |
|
46 |
if (errors.size() > 0) { |
|
47 |
println "** The $schema_ursql schema URSQL cannot be computed in the corpus with types: $errors." |
|
48 |
return; |
|
49 |
} |
|
50 |
|
|
51 |
errors=AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql) |
|
52 |
if (errors.size() > 0) { |
|
53 |
println "** $unit_ursql unit URSQL cannot be computed in the corpus with types: $errors." |
|
54 |
return; |
|
55 |
} |
|
56 |
|
|
57 |
def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, 999999); |
|
58 |
def distances = []; |
|
59 |
def nDistances = 0 |
|
60 |
def cadences = []; |
|
61 |
for (def schema : schemas) { |
|
62 |
|
|
63 |
def allUnites = schema.getUnitesSousjacentesNonTriees() |
|
64 |
|
|
65 |
def units = AnalecUtils.filterElements(debug, allUnites, unit_ursql) |
|
66 |
|
|
67 |
Collections.sort(units) |
|
68 |
|
|
69 |
for (int i = 0 ; i < units.size() ; i++) { |
|
70 |
int d1 = 0; |
|
71 |
int d2 = 0; |
|
72 |
if (i < units.size()-1) d1 = units[i+1].getDeb() - units[i].getFin(); |
|
73 |
if (d1 < 0) { |
|
74 |
//println "D1 "+units[i+1].getDeb()+" - "+units[i].getFin()+" = "+d1 |
|
75 |
d1 = 0; // the first unit pass the next one ? |
|
76 |
} |
|
77 |
if (i > 0) d2 = units[i].getDeb() - units[i-1].getFin(); |
|
78 |
if (d2 < 0) { |
|
79 |
//println "D2 "+units[i].getDeb()+" - "+units[i-1].getFin()+" = "+d2 |
|
80 |
d2 = 0; // the first unit pass the next one ? |
|
81 |
} |
|
82 |
distances << d1 |
|
83 |
|
|
84 |
if (d1 < d2) cadences << d1 else cadences << d2 |
|
85 |
|
|
86 |
nDistances++ |
|
87 |
} |
|
88 |
} |
|
89 |
distances = distances.sort() |
|
90 |
cadences = cadences.sort() |
|
91 |
|
|
92 |
int distances_total = distances.sum() |
|
93 |
int cadences_total = cadences.sum() |
|
94 |
coef = (distances_total / nDistances) |
|
95 |
cadence = (cadences_total / nDistances) |
|
96 |
println "distances $distances" |
|
97 |
println "distance moyenne inter-mayonnaise : $distances_total / $nDistances = $coef" |
|
98 |
println "distance medianne inter-mayonnaise : "+distances[(int)(distances.size() / 2)] |
|
99 |
println "distance quartils : "+distances[0]+" "+distances[(int)(distances.size() / 4)] + " "+distances[(int)(distances.size() / 2)]+" "+distances[(int)(3*distances.size() / 4)]+" "+distances[(int)(distances.size() -1)] |
|
100 |
println "cadences $cadences" |
|
101 |
println "cadence moyenne : $cadences_total / $nDistances = $cadence" |
|
102 |
println "cadence medianne : "+cadences[(int)(cadences.size() / 2)] |
|
103 |
println "cadence quartils : "+cadences[0]+" "+cadences[(int)(cadences.size() / 4)] + " "+cadences[(int)(cadences.size() / 2)]+" "+cadences[(int)(3*cadences.size() / 4)]+" "+cadences[(int)(cadences.size() -1)] |
|
104 |
|
|
105 |
return ["result":coef, "result2":cadence, "data":["distances":distances, "nDistances":nDistances, "cadences":cadences]] |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/UnitsStabilityScoreMacro.groovy (revision 2099) | ||
---|---|---|
1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
3 |
// @author mdecorde |
|
4 |
// @author sheiden |
|
5 |
// STANDARD DECLARATIONS |
|
6 |
package org.txm.macro.urs.exploit |
|
7 |
|
|
8 |
import org.kohsuke.args4j.* |
|
9 |
import groovy.transform.Field |
|
10 |
import org.txm.* |
|
11 |
import org.txm.rcp.swt.widget.parameters.* |
|
12 |
import org.txm.annotation.urs.* |
|
13 |
import org.txm.searchengine.cqp.corpus.* |
|
14 |
import org.apache.commons.lang.StringUtils; |
|
15 |
import org.txm.macro.urs.AnalecUtils |
|
16 |
import visuAnalec.elements.* |
|
17 |
import org.txm.searchengine.cqp.CQPSearchEngine |
|
18 |
|
|
19 |
if (!(corpusViewSelection instanceof CQPCorpus)) { |
|
20 |
println "Corpora selection is not a Corpus" |
|
21 |
return; |
|
22 |
} |
|
23 |
|
|
24 |
@Field @Option(name="schema_ursql", usage="TYPE@PROP=REGEX", widget="String", required=true, def="CHAINE") |
|
25 |
String schema_ursql |
|
26 |
|
|
27 |
@Field @Option(name="minimum_schema_size", usage="minimal schema size", widget="Integer", required=true, def="3") |
|
28 |
int minimum_schema_size |
|
29 |
|
|
30 |
@Field @Option(name="schema_display_property_name",usage="", widget="String", required=false, def="REF") |
|
31 |
String schema_display_property_name |
|
32 |
|
|
33 |
@Field @Option(name="unit_ursql", usage="TYPE@PROP=REGEX", widget="String", required=false, def="MENTION") |
|
34 |
String unit_ursql |
|
35 |
|
|
36 |
@Field @Option(name="word_property", usage="", widget="String", required=false, def="word") |
|
37 |
String word_property |
|
38 |
|
|
39 |
@Field @Option(name="show_values", usage="", widget="Boolean", required=false, def="false") |
|
40 |
boolean show_values |
|
41 |
|
|
42 |
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF ON ALL REALLY ALL", required=true, def="OFF") |
|
43 |
debug |
|
44 |
|
|
45 |
if (!ParametersDialog.open(this)) return; |
|
46 |
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3 |
|
47 |
|
|
48 |
|
|
49 |
def corpus = corpusViewSelection |
|
50 |
def analecCorpus = URSCorpora.getCorpus(corpus) |
|
51 |
|
|
52 |
def errors = AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql) |
|
53 |
if (errors.size() > 0) { |
|
54 |
println "** The $schema_ursql schema URSQL cannot be computed in the corpus with types: $errors." |
|
55 |
return; |
|
56 |
} |
|
57 |
|
|
58 |
errors=AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql) |
|
59 |
if (errors.size() > 0) { |
|
60 |
println "** $unit_ursql unit URSQL cannot be computed in the corpus with types: $errors." |
|
61 |
return; |
|
62 |
} |
|
63 |
def CQI = CQPSearchEngine.getCqiClient() |
|
64 |
|
|
65 |
def prop = corpus.getProperty(word_property) |
|
66 |
if (prop == null) { // no CQP property called $word_property |
|
67 |
errors=AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, AnalecUtils.getFilterParameters(unit_ursql)[0], word_property) |
|
68 |
if (errors.size() > 0) { |
|
69 |
println "** $word_property unit property cannot be computed in the corpus with types: $errors." |
|
70 |
return; |
|
71 |
} |
|
72 |
} |
|
73 |
def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, 999999); |
|
74 |
allFormesSet = new HashSet(); |
|
75 |
def coefs = [] |
|
76 |
int n = 1 |
|
77 |
|
|
78 |
int nUnitesAllSchemas = 0 |
|
79 |
int nUnitesTotalSchemas = 0 |
|
80 |
|
|
81 |
for (def schema : schemas) { |
|
82 |
def formesSet = new HashSet(); // contient toutes les formes du CR courant |
|
83 |
nUnitesTotal = 0; |
|
84 |
|
|
85 |
def allUnites = schema.getUnitesSousjacentesNonTriees() |
|
86 |
|
|
87 |
def units = AnalecUtils.filterElements(debug, allUnites, unit_ursql) |
|
88 |
def nUnites = schema.getUnitesSousjacentes().size() |
|
89 |
def nUnitesTotal = units.size() |
|
90 |
for (def unit : units) { |
|
91 |
|
|
92 |
String forme = null; |
|
93 |
if (prop == null) { // word_property is the analec unit property to use |
|
94 |
forme = unit.getProp(word_property) |
|
95 |
} else { |
|
96 |
int[] pos = null; |
|
97 |
if (unit.getDeb() == unit.getFin()) pos = [unit.getDeb()] |
|
98 |
else pos = unit.getDeb()..unit.getFin() |
|
99 |
|
|
100 |
forme = StringUtils.join(CQI.cpos2Str(prop.getQualifiedName(), pos), " ") // ids is enough |
|
101 |
} |
|
102 |
|
|
103 |
formesSet.add(forme) |
|
104 |
} |
|
105 |
|
|
106 |
if (formesSet.size() == 0 || nUnitesTotal == 0) { |
|
107 |
coef = "NA" |
|
108 |
} else { |
|
109 |
coef = (nUnitesTotal/formesSet.size()) |
|
110 |
} |
|
111 |
coefs << coef |
|
112 |
allFormesSet.addAll(formesSet) |
|
113 |
|
|
114 |
if (schema_display_property_name != null && schema_display_property_name.length() > 0) { |
|
115 |
print schema.getProp(schema_display_property_name) |
|
116 |
} else { |
|
117 |
print schema_ursql+"-"+n+" : " |
|
118 |
} |
|
119 |
|
|
120 |
println " ($nUnites units) : $nUnitesTotal selected units / ${formesSet.size()} ${word_property}s = $coef" |
|
121 |
if (show_values) { |
|
122 |
println "\t${word_property}s="+formesSet |
|
123 |
} |
|
124 |
n++ |
|
125 |
|
|
126 |
nUnitesAllSchemas += nUnites |
|
127 |
nUnitesTotalSchemas += nUnitesTotal |
|
128 |
} |
|
129 |
|
|
130 |
coef = nUnitesTotalSchemas/allFormesSet.size() |
|
131 |
//println "ALL : ($nUnitesAllSchemas units) : $nUnitesTotalSchemas selected units / ${allFormesSet.size()} ${word_property}s = $coef" |
|
132 |
|
|
133 |
return ["result":coefs, "data":["nUnitesTotal":nUnitesTotalSchemas, "allFormesSet":allFormesSet], "coef":(coef)] |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/SchemasProgressionMacro.groovy (revision 2099) | ||
---|---|---|
36 | 36 |
parent = corpusViewSelection |
37 | 37 |
|
38 | 38 |
// BEGINNING OF PARAMETERS |
39 |
@Field @Option(name="sep", usage="Schemas and units selection part", widget="Separator", required=true, def="Selection") |
|
40 |
String sep |
|
39 | 41 |
@Field @Option(name="schema_ursql", usage="TYPE@PROP=VALUE", widget="String", required=true, def="CHAINE") |
40 | 42 |
String schema_ursql |
41 | 43 |
@Field @Option(name="minimum_schema_size", usage="Minimum size needed to consider a schema", widget="Integer", required=true, def="3") |
... | ... | |
52 | 54 |
cql_limit |
53 | 55 |
@Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=true, def="true") |
54 | 56 |
boolean strict_inclusion |
55 |
@Field @Option(name="position", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0") |
|
56 |
int position |
|
57 |
@Field @Option(name="position_in_matches", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0") |
|
58 |
int position_in_matches |
|
59 |
@Field @Option(name="sep2", usage="Progression command parameters", widget="Separator", required=true, def="Progression") |
|
60 |
String sep2 |
|
57 | 61 |
@Field @Option(name="struct_name", usage="Structure to display", widget="String", required=true, def="div") |
58 | 62 |
String struct_name |
59 | 63 |
@Field @Option(name="struct_prop", usage="Structure property to display", widget="String", required=true, def="n") |
... | ... | |
153 | 157 |
if (cql_limit_matches != null) { |
154 | 158 |
if (debug) println "corpus matches: "+parent.getMatches() |
155 | 159 |
if (debug) println "filter cql_limit_matches=${cql_limit_matches} with "+selectedAndHighlightedUnits.size()+" units." |
156 |
selectedAndHighlightedUnits = AnalecUtils.filterUniteByInclusion(debug, selectedAndHighlightedUnits, cql_limit_matches, strict_inclusion, position) |
|
160 |
selectedAndHighlightedUnits = AnalecUtils.filterUniteByInclusion(debug, selectedAndHighlightedUnits, cql_limit_matches, strict_inclusion, position_in_matches)
|
|
157 | 161 |
if (debug) println "selectedAndHighlightedUnits=${selectedAndHighlightedUnits.size()}" |
158 | 162 |
} |
159 | 163 |
|
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/UnitsListMacro.groovy (revision 2099) | ||
---|---|---|
54 | 54 |
cql_limit |
55 | 55 |
@Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=true, def="true") |
56 | 56 |
boolean strict_inclusion |
57 |
@Field @Option(name="position", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0") |
|
58 |
int position |
|
59 |
@Field @Option(name="output_mode", usage="If selected units properties and words are shown", widget="StringArray", metaVar="COUNT TABULATED FORMATED CONCORDANCE CQL", required=true, def="FORMATED")
|
|
57 |
@Field @Option(name="position_in_matches", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
|
|
58 |
int position_in_matches
|
|
59 |
@Field @Option(name="output_mode", usage="If selected units properties and words are shown", widget="StringArray", metaVar="FORMATED TABULATED COUNT CQL CONCORDANCE", required=true, def="FORMATED")
|
|
60 | 60 |
output_mode |
61 | 61 |
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF ON ALL REALLY ALL", required=true, def="OFF") |
62 | 62 |
debug |
... | ... | |
76 | 76 |
def analecCorpus = URSCorpora.getCorpus(mainCorpus) |
77 | 77 |
|
78 | 78 |
def selectedUnits = AnalecUtils.selectUnitsInSchema(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, maximum_schema_size, |
79 |
unit_ursql, position_in_schema, cql_limit, strict_inclusion, position); |
|
79 |
unit_ursql, position_in_schema, cql_limit, strict_inclusion, position_in_matches);
|
|
80 | 80 |
|
81 | 81 |
def n = 1 |
82 | 82 |
|
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/SchemasListMacro.groovy (revision 2099) | ||
---|---|---|
29 | 29 |
|
30 | 30 |
@Field @Option(name="minimum_schema_size", usage="minimal schema size", widget="Integer", required=true, def="3") |
31 | 31 |
int minimum_schema_size |
32 |
|
|
32 |
@Field @Option(name="maximum_schema_size", usage="Maximum size needed to consider a schema", widget="Integer", required=true, def="9999999") |
|
33 |
int maximum_schema_size |
|
33 | 34 |
@Field @Option(name="unit_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="MENTION") |
34 | 35 |
String unit_ursql |
35 | 36 |
|
... | ... | |
72 | 73 |
word_prop = corpus.getProperty(word_property) |
73 | 74 |
} |
74 | 75 |
|
75 |
def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, 999999);
|
|
76 |
def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, maximum_schema_size);
|
|
76 | 77 |
schemas.sort() {it.getProps()} |
77 | 78 |
def nSchemas = 0 |
78 | 79 |
|
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/UnitsInterdistanceMacro.groovy (revision 2099) | ||
---|---|---|
1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
3 |
// @author mdecorde |
|
4 |
// @author sheiden |
|
5 |
// STANDARD DECLARATIONS |
|
6 |
package org.txm.macro.urs.exploit |
|
7 |
|
|
8 |
import org.kohsuke.args4j.* |
|
9 |
import groovy.transform.Field |
|
10 |
import org.txm.* |
|
11 |
import org.txm.macro.urs.AnalecUtils |
|
12 |
import visuAnalec.elements.* |
|
13 |
import org.txm.rcp.swt.widget.parameters.* |
|
14 |
import org.txm.annotation.urs.* |
|
15 |
import org.txm.searchengine.cqp.corpus.* |
|
16 |
import org.apache.commons.lang.StringUtils; |
|
17 |
|
|
18 |
if (!(corpusViewSelection instanceof CQPCorpus)) { |
|
19 |
println "Corpora selection is not a Corpus" |
|
20 |
return; |
|
21 |
} |
|
22 |
|
|
23 |
// BEGINNING OF PARAMETERS |
|
24 |
@Field @Option(name="schema_ursql", usage="TYPE@PROP=REGEX", widget="String", required=true, def="CHAINE") |
|
25 |
String schema_ursql |
|
26 |
|
|
27 |
@Field @Option(name="minimum_schema_size", usage="minimal schema size", widget="Integer", required=true, def="3") |
|
28 |
int minimum_schema_size |
|
29 |
@Field @Option(name="maximum_schema_size", usage="Maximum size needed to consider a schema", widget="Integer", required=true, def="9999999") |
|
30 |
int maximum_schema_size |
|
31 |
@Field @Option(name="schema_display_property_name",usage="", widget="String", required=false, def="REF") |
|
32 |
String schema_display_property_name |
|
33 |
|
|
34 |
@Field @Option(name="unit_ursql", usage="TYPE@PROP=REGEX", widget="String", required=false, def="MENTION") |
|
35 |
String unit_ursql |
|
36 |
|
|
37 |
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF ON ALL REALLY ALL", required=true, def="OFF") |
|
38 |
debug |
|
39 |
|
|
40 |
if (!ParametersDialog.open(this)) return; |
|
41 |
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3 |
|
42 |
|
|
43 |
CQPCorpus corpus = corpusViewSelection |
|
44 |
def analecCorpus = URSCorpora.getCorpus(corpus) |
|
45 |
|
|
46 |
def errors = AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql) |
|
47 |
if (errors.size() > 0) { |
|
48 |
println "** The $schema_ursql schema URSQL cannot be computed in the corpus with types: $errors." |
|
49 |
return; |
|
50 |
} |
|
51 |
|
|
52 |
errors=AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql) |
|
53 |
if (errors.size() > 0) { |
|
54 |
println "** $unit_ursql unit URSQL cannot be computed in the corpus with types: $errors." |
|
55 |
return; |
|
56 |
} |
|
57 |
|
|
58 |
def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, maximum_schema_size); |
|
59 |
def distances = []; |
|
60 |
def nDistances = 0 |
|
61 |
def cadences = []; |
|
62 |
for (def schema : schemas) { |
|
63 |
|
|
64 |
def allUnites = schema.getUnitesSousjacentesNonTriees() |
|
65 |
|
|
66 |
def units = AnalecUtils.filterElements(debug, allUnites, unit_ursql) |
|
67 |
|
|
68 |
Collections.sort(units) |
|
69 |
|
|
70 |
for (int i = 0 ; i < units.size() ; i++) { |
|
71 |
int d1 = 0; |
|
72 |
int d2 = 0; |
|
73 |
if (i < units.size()-1) d1 = units[i+1].getDeb() - units[i].getFin(); |
|
74 |
if (d1 < 0) { |
|
75 |
//println "D1 "+units[i+1].getDeb()+" - "+units[i].getFin()+" = "+d1 |
|
76 |
d1 = 0; // the first unit pass the next one ? |
|
77 |
} |
|
78 |
if (i > 0) d2 = units[i].getDeb() - units[i-1].getFin(); |
|
79 |
if (d2 < 0) { |
|
80 |
//println "D2 "+units[i].getDeb()+" - "+units[i-1].getFin()+" = "+d2 |
|
81 |
d2 = 0; // the first unit pass the next one ? |
|
82 |
} |
|
83 |
distances << d1 |
|
84 |
|
|
85 |
if (d1 < d2) cadences << d1 else cadences << d2 |
|
86 |
|
|
87 |
nDistances++ |
|
88 |
} |
|
89 |
} |
|
90 |
distances = distances.sort() |
|
91 |
cadences = cadences.sort() |
|
92 |
|
|
93 |
int distances_total = distances.sum() |
|
94 |
int cadences_total = cadences.sum() |
|
95 |
coef = (distances_total / nDistances) |
|
96 |
cadence = (cadences_total / nDistances) |
|
97 |
println "distances $distances" |
|
98 |
println "distance moyenne inter-mayonnaise : $distances_total / $nDistances = $coef" |
|
99 |
println "distance medianne inter-mayonnaise : "+distances[(int)(distances.size() / 2)] |
|
100 |
println "distance quartils : "+distances[0]+" "+distances[(int)(distances.size() / 4)] + " "+distances[(int)(distances.size() / 2)]+" "+distances[(int)(3*distances.size() / 4)]+" "+distances[(int)(distances.size() -1)] |
|
101 |
println "cadences $cadences" |
|
102 |
println "cadence moyenne : $cadences_total / $nDistances = $cadence" |
|
103 |
println "cadence medianne : "+cadences[(int)(cadences.size() / 2)] |
|
104 |
println "cadence quartils : "+cadences[0]+" "+cadences[(int)(cadences.size() / 4)] + " "+cadences[(int)(cadences.size() / 2)]+" "+cadences[(int)(3*cadences.size() / 4)]+" "+cadences[(int)(cadences.size() -1)] |
|
105 |
|
|
106 |
return ["result":coef, "result2":cadence, "data":["distances":distances, "nDistances":nDistances, "cadences":cadences]] |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/UnitsSummaryMacro.groovy (revision 2099) | ||
---|---|---|
49 | 49 |
cql_limit |
50 | 50 |
@Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=false, def="true") |
51 | 51 |
boolean strict_inclusion |
52 |
@Field @Option(name="position", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0") |
|
53 |
int position |
|
52 |
@Field @Option(name="position_in_matches", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
|
|
53 |
int position_in_matches
|
|
54 | 54 |
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF ON ALL REALLY ALL", required=true, def="OFF") |
55 | 55 |
debug |
56 | 56 |
if (!ParametersDialog.open(this)) return |
... | ... | |
70 | 70 |
props.addAll(analecCorpus.getStructure().getUniteProperties(type)); |
71 | 71 |
|
72 | 72 |
def selectedUnits = AnalecUtils.selectUnitsInSchema(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, maximum_schema_size, |
73 |
unit_ursql, position_in_schema, cql_limit, strict_inclusion, position); |
|
73 |
unit_ursql, position_in_schema, cql_limit, strict_inclusion, position_in_matches);
|
|
74 | 74 |
|
75 | 75 |
allresults[corpus] = selectedUnits; |
76 | 76 |
} |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/UnitsReferentialDensityMacro.groovy (revision 2099) | ||
---|---|---|
18 | 18 |
return; |
19 | 19 |
} |
20 | 20 |
|
21 |
@Field @Option(name="schema_ursql", usage="TYPE@PROP=VALUE", widget="String", required=true, def="CHAINE") |
|
22 |
String schema_ursql |
|
23 |
@Field @Option(name="minimum_schema_size", usage="Minimum size needed to consider a schema", widget="Integer", required=true, def="3") |
|
24 |
int minimum_schema_size |
|
25 |
@Field @Option(name="maximum_schema_size", usage="Maximum size needed to consider a schema", widget="Integer", required=true, def="9999999") |
|
26 |
int maximum_schema_size |
|
21 | 27 |
@Field @Option(name="unit_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="MENTION") |
22 | 28 |
String unit_ursql |
29 |
@Field @Option(name="position_in_schema", usage="Unit distance in schema (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0") |
|
30 |
int position_in_schema |
|
23 | 31 |
@Field @Option(name="cql_limit", usage="CQL to build structure limits", widget="Query", required=true, def="<div> [] expand to div") |
24 | 32 |
cql_limit |
25 | 33 |
@Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=true, def="true") |
26 | 34 |
strict_inclusion |
27 |
@Field @Option(name="position", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0") |
|
28 |
position |
|
35 |
@Field @Option(name="position_in_matches", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
|
|
36 |
position_in_matches
|
|
29 | 37 |
|
30 | 38 |
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF ON ALL REALLY ALL", required=true, def="OFF") |
31 | 39 |
debug |
... | ... | |
39 | 47 |
|
40 | 48 |
int nMots = corpus.getSize(); |
41 | 49 |
|
42 |
def units = AnalecUtils.selectUnitsInSchema(debug, analecCorpus, corpus, "", 0, 0,
|
|
43 |
unit_ursql, 0, cql_limit, strict_inclusion, position);
|
|
50 |
def units = AnalecUtils.selectUnitsInSchema(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, maximum_schema_size,
|
|
51 |
unit_ursql, position_in_schema, cql_limit, strict_inclusion, position_in_matches);
|
|
44 | 52 |
|
45 | 53 |
int nUnites = units.size(); |
46 | 54 |
|
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/SchemaLengthsMacro.groovy (revision 2099) | ||
---|---|---|
27 | 27 |
|
28 | 28 |
@Field @Option(name="minimum_schema_size", usage="minimal schema size", widget="Integer", required=true, def="3") |
29 | 29 |
int minimum_schema_size |
30 |
|
|
30 |
@Field @Option(name="maximum_schema_size", usage="Maximum size needed to consider a schema", widget="Integer", required=true, def="9999999") |
|
31 |
int maximum_schema_size |
|
31 | 32 |
@Field @Option(name="schema_property_display", usage="schema property to show", widget="String", required=true, def="REF") |
32 | 33 |
String schema_property_display |
33 | 34 |
|
... | ... | |
55 | 56 |
return; |
56 | 57 |
} |
57 | 58 |
|
58 |
def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, 999999);
|
|
59 |
def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, maximum_schema_size);
|
|
59 | 60 |
|
60 | 61 |
int nSchemas = 0; |
61 | 62 |
|
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/UnitsReferentialStabilityMacro.groovy (revision 2099) | ||
---|---|---|
1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
3 |
// @author mdecorde |
|
4 |
// @author sheiden |
|
5 |
// STANDARD DECLARATIONS |
|
6 |
package org.txm.macro.urs.exploit |
|
7 |
|
|
8 |
import org.kohsuke.args4j.* |
|
9 |
import groovy.transform.Field |
|
10 |
import org.txm.* |
|
11 |
import org.txm.rcp.swt.widget.parameters.* |
|
12 |
import org.txm.annotation.urs.* |
|
13 |
import org.txm.searchengine.cqp.corpus.* |
|
14 |
import org.apache.commons.lang.StringUtils; |
|
15 |
import org.txm.macro.urs.AnalecUtils |
|
16 |
import visuAnalec.elements.* |
|
17 |
import org.txm.searchengine.cqp.CQPSearchEngine |
|
18 |
|
|
19 |
if (!(corpusViewSelection instanceof CQPCorpus)) { |
|
20 |
println "Corpora selection is not a Corpus" |
|
21 |
return; |
|
22 |
} |
|
23 |
|
|
24 |
@Field @Option(name="schema_ursql", usage="TYPE@PROP=REGEX", widget="String", required=true, def="CHAINE") |
|
25 |
String schema_ursql |
|
26 |
|
|
27 |
@Field @Option(name="minimum_schema_size", usage="minimal schema size", widget="Integer", required=true, def="3") |
|
28 |
int minimum_schema_size |
|
29 |
@Field @Option(name="maximum_schema_size", usage="Maximum size needed to consider a schema", widget="Integer", required=true, def="9999999") |
|
30 |
int maximum_schema_size |
|
31 |
@Field @Option(name="schema_display_property_name",usage="", widget="String", required=false, def="REF") |
|
32 |
String schema_display_property_name |
|
33 |
|
|
34 |
@Field @Option(name="unit_ursql", usage="TYPE@PROP=REGEX", widget="String", required=false, def="MENTION") |
|
35 |
String unit_ursql |
|
36 |
|
|
37 |
@Field @Option(name="word_property", usage="", widget="String", required=false, def="word") |
|
38 |
String word_property |
|
39 |
|
|
40 |
@Field @Option(name="show_values", usage="", widget="Boolean", required=false, def="false") |
|
41 |
boolean show_values |
|
42 |
|
|
43 |
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF ON ALL REALLY ALL", required=true, def="OFF") |
|
44 |
debug |
|
45 |
|
|
46 |
if (!ParametersDialog.open(this)) return; |
|
47 |
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3 |
|
48 |
|
|
49 |
|
|
50 |
def corpus = corpusViewSelection |
|
51 |
def analecCorpus = URSCorpora.getCorpus(corpus) |
|
52 |
|
|
53 |
def errors = AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql) |
|
54 |
if (errors.size() > 0) { |
|
55 |
println "** The $schema_ursql schema URSQL cannot be computed in the corpus with types: $errors." |
|
56 |
return; |
|
57 |
} |
|
58 |
|
|
59 |
errors=AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql) |
|
60 |
if (errors.size() > 0) { |
|
61 |
println "** $unit_ursql unit URSQL cannot be computed in the corpus with types: $errors." |
|
62 |
return; |
|
63 |
} |
|
64 |
def CQI = CQPSearchEngine.getCqiClient() |
|
65 |
|
|
66 |
def prop = corpus.getProperty(word_property) |
|
67 |
if (prop == null) { // no CQP property called $word_property |
|
68 |
errors=AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, AnalecUtils.getFilterParameters(unit_ursql)[0], word_property) |
|
69 |
if (errors.size() > 0) { |
|
70 |
println "** $word_property unit property cannot be computed in the corpus with types: $errors." |
|
71 |
return; |
|
72 |
} |
|
73 |
} |
|
74 |
def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, maximum_schema_size); |
|
75 |
allFormesSet = new HashSet(); |
|
76 |
def coefs = [] |
|
77 |
int n = 1 |
|
78 |
|
|
79 |
int nUnitesAllSchemas = 0 |
|
80 |
int nUnitesTotalSchemas = 0 |
|
81 |
|
|
82 |
for (def schema : schemas) { |
|
83 |
def formesSet = new HashSet(); // contient toutes les formes du CR courant |
|
84 |
nUnitesTotal = 0; |
|
85 |
|
|
86 |
def allUnites = schema.getUnitesSousjacentesNonTriees() |
|
87 |
|
|
88 |
def units = AnalecUtils.filterElements(debug, allUnites, unit_ursql) |
|
89 |
def nUnites = schema.getUnitesSousjacentes().size() |
|
90 |
def nUnitesTotal = units.size() |
|
91 |
for (def unit : units) { |
|
92 |
|
|
93 |
String forme = null; |
|
94 |
if (prop == null) { // word_property is the analec unit property to use |
|
95 |
forme = unit.getProp(word_property) |
|
96 |
} else { |
|
97 |
int[] pos = null; |
|
98 |
if (unit.getDeb() == unit.getFin()) pos = [unit.getDeb()] |
|
99 |
else pos = unit.getDeb()..unit.getFin() |
|
100 |
|
|
101 |
forme = StringUtils.join(CQI.cpos2Str(prop.getQualifiedName(), pos), " ") // ids is enough |
|
102 |
} |
|
103 |
|
|
104 |
formesSet.add(forme) |
|
105 |
} |
|
106 |
|
|
107 |
if (formesSet.size() == 0 || nUnitesTotal == 0) { |
|
108 |
coef = "NA" |
|
109 |
} else { |
|
110 |
coef = (nUnitesTotal/formesSet.size()) |
|
111 |
} |
|
112 |
coefs << coef |
|
113 |
allFormesSet.addAll(formesSet) |
|
114 |
|
|
115 |
if (schema_display_property_name != null && schema_display_property_name.length() > 0) { |
|
116 |
print schema.getProp(schema_display_property_name) |
|
117 |
} else { |
|
118 |
print schema_ursql+"-"+n+" : " |
|
119 |
} |
|
120 |
|
|
121 |
println " ($nUnites units) : $nUnitesTotal selected units / ${formesSet.size()} ${word_property}s = $coef" |
|
122 |
if (show_values) { |
|
123 |
println "\t${word_property}s="+formesSet |
|
124 |
} |
|
125 |
n++ |
|
126 |
|
|
127 |
nUnitesAllSchemas += nUnites |
|
128 |
nUnitesTotalSchemas += nUnitesTotal |
|
129 |
} |
|
130 |
|
|
131 |
coef = nUnitesTotalSchemas/allFormesSet.size() |
|
132 |
//println "ALL : ($nUnitesAllSchemas units) : $nUnitesTotalSchemas selected units / ${allFormesSet.size()} ${word_property}s = $coef" |
|
133 |
|
|
134 |
return ["result":coefs, "data":["nUnitesTotal":nUnitesTotalSchemas, "allFormesSet":allFormesSet], "coef":(coef)] |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/UnitsIndexMacro.groovy (revision 2099) | ||
---|---|---|
49 | 49 |
String unit_ursql |
50 | 50 |
@Field @Option(name="unit_property_display", usage="Unit property to count", widget="String", required=true, def="CATEGORIE") |
51 | 51 |
String unit_property_display |
52 |
@Field @Option(name="cqp_property_display", usage="Word property to display instead of the unit property", widget="String", required=false, def="")
|
|
53 |
String cqp_property_display
|
|
52 |
@Field @Option(name="word_property_display", usage="Word property to display instead of the unit property", widget="String", required=false, def="")
|
|
53 |
String word_property_display
|
|
54 | 54 |
@Field @Option(name="position_in_schema", usage="Unit distance in schema (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0") |
55 | 55 |
int position_in_schema |
56 | 56 |
@Field @Option(name="cql_limit", usage="CQL to build structure limits", widget="Query", required=true, def="<div> [] expand to div") |
57 | 57 |
cql_limit |
58 | 58 |
@Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=true, def="true") |
59 | 59 |
strict_inclusion |
60 |
@Field @Option(name="position", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0") |
|
61 |
position |
|
60 |
@Field @Option(name="position_in_matches", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
|
|
61 |
position_in_matches
|
|
62 | 62 |
//@Field @Option(name="output_2D", usage="output barplot or 3D plot", widget="Boolean", required=true, def="true") |
63 | 63 |
output_2D = true |
64 | 64 |
@Field @Option(name="output_showlegend", usage="output barplot or 3D plot", widget="Boolean", required=true, def="true") |
... | ... | |
75 | 75 |
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3 |
76 | 76 |
|
77 | 77 |
|
78 |
if (cqp_property_display != null && cqp_property_display.length() > 0 && unit_property_display != null && unit_property_display.length() > 0) {
|
|
79 |
println "Warning: both unit_property_display=$unit_property_display and cqp_property_display=$cqp_property_display are set. The index will be computed with the $cqp_property_display CQP property."
|
|
78 |
if (word_property_display != null && word_property_display.length() > 0 && unit_property_display != null && unit_property_display.length() > 0) {
|
|
79 |
println "Warning: both unit_property_display=$unit_property_display and word_property_display=$word_property_display are set. The index will be computed with the $word_property_display CQP property."
|
|
80 | 80 |
} |
81 | 81 |
|
82 |
if (cqp_property_display.length() == 0 && unit_property_display.length() == 0) {
|
|
83 |
println "Error: no analysis property specified in unit_property_display or cqp_property_display. Aborting."
|
|
82 |
if (word_property_display.length() == 0 && unit_property_display.length() == 0) {
|
|
83 |
println "Error: no analysis property specified in unit_property_display or word_property_display. Aborting."
|
|
84 | 84 |
return |
85 | 85 |
} |
86 | 86 |
|
... | ... | |
115 | 115 |
} |
116 | 116 |
|
117 | 117 |
def selectedUnits = AnalecUtils.selectUnitsInSchema(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, maximum_schema_size, |
118 |
unit_ursql, position_in_schema, cql_limit, strict_inclusion, position); |
|
118 |
unit_ursql, position_in_schema, cql_limit, strict_inclusion, position_in_matches);
|
|
119 | 119 |
selectedUnits = new HashSet(selectedUnits) |
120 | 120 |
def counts = null |
121 |
if (cqp_property_display != null && cqp_property_display.length() > 0) {
|
|
122 |
Property p = corpus.getProperty(cqp_property_display)
|
|
121 |
if (word_property_display != null && word_property_display.length() > 0) {
|
|
122 |
Property p = corpus.getProperty(word_property_display)
|
|
123 | 123 |
int[] pos = null |
124 | 124 |
counts = [:] |
125 | 125 |
for (def unit : selectedUnits) { |
... | ... | |
163 | 163 |
else { corpus = corpusViewSelection } |
164 | 164 |
|
165 | 165 |
def title = "${corpus.getMainCorpus()}.${corpusViewSelection}\n${unit_ursql}" |
166 |
title += "[${position}]." |
|
167 |
if (cqp_property_display.length() > 0) title += "${cqp_property_display} frequencies"
|
|
166 |
title += "[${position_in_matches}]."
|
|
167 |
if (word_property_display.length() > 0) title += "${word_property_display} frequencies"
|
|
168 | 168 |
else if (unit_property_display.length() > 0) title += "${unit_property_display} frequencies" |
169 | 169 |
if (cql_limit != null && !cql_limit.getQueryString().equals("\"\"")) title += "\n(${cql_limit} limits)" |
170 | 170 |
|
171 |
if (cqp_property_display.length() > 0) println "Index de la propriété $cqp_property_display des mots des unités $unit_ursql[$position] de ${corpus.getMainCorpus()}.${corpusViewSelection}"
|
|
172 |
else println "Index de la propriété $unit_property_display des unités $unit_ursql[$position] de ${corpus.getMainCorpus()}.${corpusViewSelection}" |
|
171 |
if (word_property_display.length() > 0) println "Index de la propriété $word_property_display des mots des unités $unit_ursql[$position_in_matches] de ${corpus.getMainCorpus()}.${corpusViewSelection}"
|
|
172 |
else println "Index de la propriété $unit_property_display des unités $unit_ursql[$position_in_matches] de ${corpus.getMainCorpus()}.${corpusViewSelection}"
|
|
173 | 173 |
println "$unit_property_display\t"+selection.join("\t") |
174 | 174 |
|
175 | 175 |
keys.eachWithIndex { prop_val, i -> |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/edit/Si2SingletonMacro.groovy (revision 2099) | ||
---|---|---|
1 |
// @author Bruno Oberlé |
|
2 |
// v1.0.0 20170711 |
|
3 |
|
|
4 |
// Cette macro individualise tous les noms de référents "SI" en leur attribuant un numéro unique (SI_1, SI_2, etc.). |
|
5 |
|
|
6 |
package org.txm.macro.urs.edit |
|
7 |
|
|
8 |
import org.apache.commons.lang.* |
|
9 |
import org.kohsuke.args4j.* |
|
10 |
import groovy.transform.* |
|
11 |
import org.txm.* |
|
12 |
import org.txm.rcp.swt.widget.parameters.* |
|
13 |
import org.txm.annotation.urs.* |
|
14 |
import org.txm.searchengine.cqp.* |
|
15 |
import org.txm.searchengine.cqp.corpus.* |
|
16 |
import visuAnalec.Message.* |
|
17 |
import visuAnalec.donnees.* |
|
18 |
import visuAnalec.elements.* |
|
19 |
import visuAnalec.vue.* |
|
20 |
|
|
21 |
// CORPS DU SCRIPT |
|
22 |
|
|
23 |
if (!(corpusViewSelection instanceof MainCorpus)) { |
|
24 |
println "Corpora selection is not a Corpus" |
|
25 |
return |
|
26 |
} |
|
27 |
|
|
28 |
// BEGINNING OF PARAMETERS |
|
29 |
@Field @Option(name="unitType", usage="", widget="String", required=true, def="MENTION") |
|
30 |
def unitType |
|
31 |
@Field @Option(name="refPropertyName", usage="", widget="String", required=true, def="REF") |
|
32 |
def refPropertyName |
|
33 |
if (!ParametersDialog.open(this)) return |
|
34 |
|
|
35 |
corpus = corpusViewSelection |
|
36 |
CQI = CQPSearchEngine.getCqiClient() |
|
37 |
word = corpus.getWordProperty() |
|
38 |
analecCorpus = URSCorpora.getCorpus(corpus) |
|
39 |
vue = URSCorpora.getVue(corpus) |
|
40 |
structure = analecCorpus.getStructure() |
|
41 |
if (!structure.getUnites().contains(unitType)) { // check if the structure contains the unitType units |
|
42 |
println "Error: corpus structure does not contains unit with name=$unitType" |
|
43 |
return |
|
44 |
} |
|
45 |
|
|
46 |
if (!structure.getUniteProperties(unitType).contains(refPropertyName)) { |
|
47 |
println "Error: corpus structure does not contains property name=$unitType" |
|
48 |
return |
|
49 |
} |
|
50 |
|
|
51 |
def nModified = 0 |
|
52 |
def nIgnored = 0 |
|
53 |
|
|
54 |
def units = analecCorpus.getUnites(unitType) |
|
55 |
//units.sort() { a, b -> a.getDeb() <=> b.getDeb() ?: a.getFin() <=> b.getFin() } |
|
56 |
|
|
57 |
def refSet = new HashSet() |
|
58 |
for (Unite unit : units) { // process all units |
|
59 |
def prop = unit.getProp(refPropertyName) |
|
60 |
refSet.add(prop) |
|
61 |
} |
|
62 |
|
|
63 |
def counter = 1 |
|
64 |
for (Unite unit : units) { // process all units |
|
65 |
|
|
66 |
def prop = unit.getProp(refPropertyName) |
|
67 |
if (prop && prop == "SI") { |
|
68 |
def name = "SI_" + counter |
|
69 |
while (refSet.contains(name)) { |
|
70 |
counter++ |
|
71 |
name = "SI_" + counter |
|
72 |
} |
|
73 |
counter++ |
|
74 |
//println "old prop"+ prop |
|
75 |
unit.getProps().put(refPropertyName, name) |
|
76 |
//println "new prop"+ name |
|
77 |
nModified++ |
|
78 |
} else { |
|
79 |
nIgnored++ |
|
80 |
} |
|
81 |
} |
|
82 |
|
|
83 |
println "Result:" |
|
84 |
println "- $nModified units of type $unitType have been modified." |
|
85 |
println "- $nIgnored units of type $unitType have not been modified.\n" |
|
86 |
|
|
87 |
// udpate the view (also see also |
|
88 |
// http://forge.cbp.ens-lyon.fr/redmine/issues/2065) |
|
89 |
URSCorpora.getVue(analecCorpus).retablirVueParDefaut() |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/edit/ConcordanceToUnitsMacro.groovy (revision 2099) | ||
---|---|---|
22 | 22 |
def create_only_if_new |
23 | 23 |
|
24 | 24 |
@Field @Option(name="prop", usage="prop", widget="String", required=true, def="REF") |
25 |
def prop |
|
25 |
def property_name
|
|
26 | 26 |
|
27 | 27 |
@Field @Option(name="value", usage="default value", widget="String", required=true, def="NAME") |
28 |
def value |
|
28 |
def property_value
|
|
29 | 29 |
|
30 | 30 |
// END OF PARAMETERS |
31 | 31 |
|
... | ... | |
64 | 64 |
analecCorpus.getStructure().ajouterType(Unite.class, unit_type); |
65 | 65 |
} |
66 | 66 |
|
67 |
if (!analecCorpus.getStructure().getNomsProps(Unite.class, unit_type).contains(prop)) { |
|
67 |
if (!analecCorpus.getStructure().getNomsProps(Unite.class, unit_type).contains(property_name)) {
|
|
68 | 68 |
//println "The corpus structure does not contains unit with type=$unit_type" |
69 | 69 |
//return; |
70 |
analecCorpus.getStructure().ajouterProp(Unite.class, unit_type, prop) |
|
70 |
analecCorpus.getStructure().ajouterProp(Unite.class, unit_type, property_name)
|
|
71 | 71 |
} |
72 | 72 |
|
73 |
if (!analecCorpus.getStructure().getValeursProp(Unite.class, unit_type, prop).contains(value)) {
|
|
73 |
if (!analecCorpus.getStructure().getValeursProp(Unite.class, unit_type, property_name).contains(property_value)) {
|
|
74 | 74 |
//println "The corpus structure does not contains unit with type=$unit_type" |
75 | 75 |
//return; |
76 |
analecCorpus.getStructure().ajouterVal(Unite.class, unit_type, prop, value) |
|
76 |
analecCorpus.getStructure().ajouterVal(Unite.class, unit_type, property_name, value)
|
|
77 | 77 |
} |
78 | 78 |
|
79 | 79 |
// browse lines and check |
... | ... | |
103 | 103 |
if (do_create) { |
104 | 104 |
n++ |
105 | 105 |
def props = [:] |
106 |
props[prop] = value
|
|
106 |
props[property_name] = property_value
|
|
107 | 107 |
Unite u = analecCorpus.addUniteSaisie(unit_type, m.getStart(), m.getEnd(), props) |
108 | 108 |
// println "$props -> "+u.getProps() |
109 | 109 |
} |
110 | 110 |
} |
111 |
println "$n $unit_type created." |
|
111 |
if (property_name != null) { |
|
112 |
println "$n $unit_type created with $property_name='$property_value'." |
|
113 |
} else { |
|
114 |
println "$n $unit_type created." |
|
115 |
} |
|
116 |
|
|
112 | 117 |
if (n > 0) corpus.setIsModified(true); |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/edit/UnitsDeleteMacro.groovy (revision 2099) | ||
---|---|---|
41 | 41 |
String schema_ursql |
42 | 42 |
@Field @Option(name="minimum_schema_size", usage="Minimum size needed to consider a schema", widget="Integer", required=true, def="3") |
43 | 43 |
int minimum_schema_size |
44 |
@Field @Option(name="maximum_schema_size", usage="Maximum size needed to consider a schema", widget="Integer", required=true, def="9999999") |
|
45 |
int maximum_schema_size |
|
44 | 46 |
@Field @Option(name="unit_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="MENTION") |
45 | 47 |
String unit_ursql |
46 | 48 |
@Field @Option(name="position_in_schema", usage="Unit distance in schema (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0") |
... | ... | |
49 | 51 |
cql_limit |
50 | 52 |
@Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=true, def="true") |
51 | 53 |
boolean strict_inclusion |
52 |
@Field @Option(name="position", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0") |
|
53 |
int position |
|
54 |
@Field @Option(name="position_in_matches", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
|
|
55 |
int position_in_matches
|
|
54 | 56 |
|
55 | 57 |
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF ON ALL REALLY ALL", required=true, def="OFF") |
56 | 58 |
debug |
... | ... | |
68 | 70 |
Vue analecView = URSCorpora.getVue(corpus) |
69 | 71 |
Structure structure = analecCorpus.getStructure() |
70 | 72 |
|
71 |
def selectedUnits = AnalecUtils.selectUnitsInSchema(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, Integer.MAX_VALUE,
|
|
72 |
unit_ursql, position_in_schema, cql_limit, strict_inclusion, position); |
|
73 |
def selectedUnits = AnalecUtils.selectUnitsInSchema(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, maximum_schema_size,
|
|
74 |
unit_ursql, position_in_schema, cql_limit, strict_inclusion, position_in_matches);
|
|
73 | 75 |
|
74 | 76 |
def n = 0 |
75 | 77 |
def nerrors = 0 |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/edit/ResetAllAnnotationsMacro.groovy (revision 2099) | ||
---|---|---|
13 | 13 |
return; |
14 | 14 |
} |
15 | 15 |
|
16 |
@Field @Option(name="I_AM_SURE_IWANT_TO_RESET_THE_ANNOTATIONS", usage="an example boolean", widget="Boolean", required=false, def="false")
|
|
17 |
def I_AM_SURE_IWANT_TO_RESET_THE_ANNOTATIONS
|
|
16 |
@Field @Option(name="CONFIRMATION", usage="select to confirm the annotations reset", widget="Boolean", required=false, def="false")
|
|
17 |
def CONFIRMATION
|
|
18 | 18 |
|
19 | 19 |
// Open the parameters input dialog box |
20 | 20 |
if (!ParametersDialog.open(this)) return; |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/edit/UnitsAnnotateMacro.groovy (revision 2099) | ||
---|---|---|
41 | 41 |
String schema_ursql |
42 | 42 |
@Field @Option(name="minimum_schema_size", usage="Minimum size needed to consider a schema", widget="Integer", required=true, def="3") |
43 | 43 |
int minimum_schema_size |
44 |
@Field @Option(name="maximum_schema_size", usage="Maximum size needed to consider a schema", widget="Integer", required=true, def="9999999") |
|
45 |
int maximum_schema_size |
|
44 | 46 |
@Field @Option(name="unit_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="MENTION") |
45 | 47 |
String unit_ursql |
46 | 48 |
@Field @Option(name="position_in_schema", usage="Unit distance in schema (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0") |
... | ... | |
49 | 51 |
cql_limit |
50 | 52 |
@Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=true, def="true") |
51 | 53 |
boolean strict_inclusion |
52 |
@Field @Option(name="position", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0") |
|
53 |
int position |
|
54 |
|
|
55 |
@Field @Option(name="unit_property_to_set", usage="PROP", widget="String", required=false, def="TESTPROP") |
|
56 |
String unit_property_to_set |
|
57 |
@Field @Option(name="unit_property_value_to_set", usage="VALUE", widget="String", required=false, def="TESTVALUE") |
|
58 |
String unit_property_value_to_set |
|
54 |
@Field @Option(name="position_in_matches", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0") |
|
55 |
int position_in_matches |
|
56 |
@Field @Option(name="property_name", usage="PROP", widget="String", required=false, def="TESTPROP") |
|
57 |
String property_name |
|
58 |
@Field @Option(name="property_value", usage="VALUE", widget="String", required=false, def="TESTVALUE") |
|
59 |
String property_value |
|
59 | 60 |
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF ON ALL REALLY ALL", required=true, def="OFF") |
60 | 61 |
debug |
61 | 62 |
if (!ParametersDialog.open(this)) return |
62 | 63 |
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3 |
63 | 64 |
|
64 |
if (unit_property_to_set.length() == 0) {
|
|
65 |
println "unit_property_to_set not set: aborting."
|
|
65 |
if (property_name.length() == 0) {
|
|
66 |
println "property_name not set: aborting."
|
|
66 | 67 |
} |
67 | 68 |
|
68 | 69 |
//corpus = corpusViewSelection |
... | ... | |
76 | 77 |
Vue analecView = URSCorpora.getVue(corpus) |
77 | 78 |
Structure structure = analecCorpus.getStructure() |
78 | 79 |
|
79 |
AnalecUtils.defineProperty(Unite.class, analecCorpus, unit_ursql, unit_property_to_set)
|
|
80 |
AnalecUtils.defineProperty(Unite.class, analecCorpus, unit_ursql, property_name)
|
|
80 | 81 |
analecView.initVueParDefaut() |
81 | 82 |
|
82 |
def selectedUnits = AnalecUtils.selectUnitsInSchema(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, Integer.MAX_VALUE,
|
|
83 |
unit_ursql, position_in_schema, cql_limit, strict_inclusion, position); |
|
83 |
def selectedUnits = AnalecUtils.selectUnitsInSchema(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, maximum_schema_size,
|
|
84 |
unit_ursql, position_in_schema, cql_limit, strict_inclusion, position_in_matches);
|
|
84 | 85 |
|
85 | 86 |
println " "+selectedUnits.size()+" units to annotate..." |
86 | 87 |
def n = 0 |
87 | 88 |
def nerrors = 0 |
88 | 89 |
for (Unite unit : selectedUnits) { |
89 | 90 |
|
90 |
if (!analecView.setValeurChamp(unit, unit_property_to_set, unit_property_value_to_set)) {
|
|
91 |
if (!analecView.setValeurChamp(unit, property_name, property_value)) {
|
|
91 | 92 |
errors << ""+unit.getDeb()+"->"+unit.getFin() |
92 | 93 |
nerrors++ |
93 | 94 |
} else { |
... | ... | |
101 | 102 |
println " $nerrors erreurs lors de l'annotation de $corpus" |
102 | 103 |
} |
103 | 104 |
if (errors.size() > 0) { |
104 |
println " Errors while annotating $unit_property_to_set=$unit_property_value_to_set"
|
|
105 |
println " Errors while annotating $property_name=$property_value"
|
|
105 | 106 |
println errors.join("\n") |
106 | 107 |
} |
107 | 108 |
return allResults |
tmp/org.txm.analec.rcp/src/org/txm/macroprototypes/urs/exploit/AllMesuresMacro.groovy (revision 2099) | ||
---|---|---|
1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
3 |
// @author mdecorde |
|
4 |
// @author sheiden |
|
5 |
// STANDARD DECLARATIONS |
|
6 |
package org.txm.macroprototypes.urs.exploit |
|
7 |
|
|
8 |
import org.kohsuke.args4j.* |
|
9 |
|
|
10 |
import groovy.transform.Field |
|
11 |
|
|
12 |
import org.txm.* |
|
13 |
import org.txm.rcp.swt.widget.parameters.* |
|
14 |
import org.txm.annotation.urs.* |
|
15 |
import org.txm.searchengine.cqp.corpus.* |
|
16 |
import org.apache.commons.lang.StringUtils; |
|
17 |
|
|
18 |
// BEGINNING OF PARAMETERS |
|
19 |
|
|
20 |
@Field @Option(name="tsvFile",usage="", widget="FileSave", required=true, def="result.tsv") |
|
21 |
File tsvFile |
|
22 |
|
|
23 |
@Field @Option(name="default_schema_ursql", usage="TYPE@PROP=REGEX", widget="String", required=true, def="CHAINE") |
|
24 |
String default_schema_ursql |
|
25 |
|
|
26 |
@Field @Option(name="default_minimum_schema_size", usage="", widget="Integer", required=true, def="3") |
|
27 |
int default_minimum_schema_size |
|
28 |
|
|
29 |
|
|
30 |
@Field @Option(name="schema_property_display_name",usage="", widget="String", required=false, def="REF") |
|
31 |
String schema_property_display_name |
|
32 |
|
|
33 |
@Field @Option(name="default_unit_ursql", usage="TYPE@PROP=REGEX", widget="String", required=false, def="MENTION") |
|
34 |
String default_unit_ursql |
|
35 |
|
|
36 |
@Field @Option(name="default_word_property", usage="", widget="String", required=false, def="word") |
|
37 |
String default_word_property |
|
38 |
@Field @Option(name="default_pos_property", usage="", widget="String", required=false, def="CATEGORIE") |
|
39 |
String default_pos_property |
|
40 |
|
|
41 |
if (!ParametersDialog.open(this)) return; |
|
42 |
// END OF PARAMETERS |
|
43 |
|
|
44 |
println "Corpora selections: "+corpusViewSelections |
|
45 |
|
|
46 |
table = [] // contains all table lines |
|
47 |
mesures = [] |
|
48 |
|
|
49 |
for (def corpus : corpusViewSelections) { // for each corpus selected in the corpora view |
|
50 |
if (!(corpus instanceof MainCorpus)) continue; // check if the corpus is a maincorpus |
|
51 |
def line = [] ; table << line // create and add a new table line |
|
52 |
line << corpus.getID() // add the corpus name in the first column |
|
53 |
|
|
54 |
println "*** Computing mesures for $corpus" // each macro return a "result" and a "data" |
|
55 |
|
|
56 |
params = [ |
|
57 |
"unit_ursql":default_unit_ursql, |
|
58 |
] |
|
59 |
returnedValue = execMesure(UnitsReferentialDensityMacro, line, corpus, params) |
|
60 |
line << returnedValue["result"] |
|
61 |
|
|
62 |
params = [ |
|
63 |
"schema_ursql":default_schema_ursql, |
|
64 |
"minimum_schema_size":default_minimum_schema_size, |
|
65 |
"unit_ursql":default_unit_ursql, |
|
66 |
] |
|
67 |
returnedValue = execMesure(SchemaLengthsMacro, line, corpus, params) |
|
68 |
line << returnedValue["result"] |
|
69 |
|
|
70 |
params = [ |
|
71 |
"schema_ursql":default_schema_ursql, |
|
72 |
"minimum_schema_size":default_minimum_schema_size, |
|
73 |
"unit_ursql":default_unit_ursql, |
|
74 |
] |
|
75 |
returnedValue = execMesure(NumberOfSchemaMacro, line, corpus, params) |
|
76 |
line << returnedValue["result"] |
|
77 |
|
|
78 |
params = [ |
|
79 |
"schema_ursql":default_schema_ursql, |
|
80 |
"minimum_schema_size":default_minimum_schema_size, |
|
81 |
"schema_property_display_name":schema_property_display_name, |
|
82 |
"unit_ursql":default_unit_ursql+"@CATEGORIE=GN Défini|GN Démonstratif|Nom Propre", |
|
83 |
"word_property":default_word_property, |
|
84 |
] |
|
85 |
returnedValue = execMesure(UnitsStabilityScoreMacro, line, corpus, params) |
|
86 |
line << returnedValue["result"] |
|
87 |
|
|
88 |
params = [ |
|
89 |
"schema_ursql":default_schema_ursql, |
|
90 |
"minimum_schema_size":default_minimum_schema_size, |
|
91 |
"unit_ursql":default_unit_ursql, |
|
92 |
] |
|
93 |
returnedValue = execMesure(UnitsInterDistanceMacro, line, corpus, params) |
|
94 |
line << returnedValue["result"] |
|
95 |
|
|
96 |
params = [ |
|
97 |
"schema_ursql":default_schema_ursql, |
|
98 |
"minimum_schema_size":default_minimum_schema_size, |
|
99 |
"unit_ursql":default_unit_ursql, |
|
100 |
"word_property":default_pos_property, |
|
101 |
] |
|
102 |
returnedValue = execMesure(NatureOfTheFirstUnitMacro, line, corpus, params) |
|
103 |
line << returnedValue["data"] |
|
104 |
|
|
105 |
params = [ |
|
106 |
"schema_ursql":default_schema_ursql, |
|
107 |
"minimum_schema_size":default_minimum_schema_size, |
|
108 |
"schema_property_display_name":schema_property_display_name, |
|
109 |
"unit_ursql":default_unit_ursql, |
|
110 |
"word_property":default_pos_property, |
|
111 |
] |
|
112 |
returnedValue = execMesure(GrammaticalCategoryMacro, line, corpus, params) |
|
113 |
line << returnedValue["data"] |
|
114 |
} |
|
115 |
|
|
116 |
// WRITE RESULTS IN THE TSV FILE |
|
117 |
tsvFile.withWriter("UTF-8") { writer -> |
|
118 |
writer.println "\t"+mesures.join("\t") |
|
119 |
table.each { line -> writer.println line.join("\t") } |
|
120 |
} |
|
121 |
|
|
122 |
println "Done. Results are saved in ${tsvFile.getAbsolutePath()} file." |
|
123 |
|
|
124 |
// UTILITY FUNCTIONS |
|
125 |
def execMesure(def mesure, def line, def corpus, def params) { |
|
126 |
def m = mesure.getSimpleName().substring(0, mesure.getSimpleName().indexOf("Macro")) |
|
127 |
mesures << m |
|
128 |
println "***** ${mesures.size()}- $m with parameters: $params" |
|
129 |
def r = gse.run(mesure, ["args":params, "corpusViewSelection":corpus, "monitor":monitor]) |
|
130 |
if (r == null) throw new Exception("Null result"); |
|
131 |
return r; |
|
132 |
} |
tmp/org.txm.libs.args4j/src/org/kohsuke/args4j/CmdLineParser.java (revision 2099) | ||
---|---|---|
56 | 56 |
* Kohsuke Kawaguchi (kk@kohsuke.org) |
57 | 57 |
*/ |
58 | 58 |
public class CmdLineParser { |
59 |
|
|
60 |
public static List<String> widgets = Arrays.asList("Query", "Integer", "File", "CreateFile", "FileSave", "FileOpen", "Folder", "StringArray", "StringArrayMultiple", "StructuralUnits" , "String", "Text", "Password", "Separator", "Float", "Boolean", "Date", "Date-Time"); |
|
61 |
|
|
59 | 62 |
/** |
60 | 63 |
* Discovered {@link OptionHandler}s for options. |
61 | 64 |
*/ |
... | ... | |
153 | 156 |
} |
154 | 157 |
} |
155 | 158 |
|
156 |
public static List<String> widgets = Arrays.asList("Query", "Integer", "File", "CreateFile", "FileSave", "FileOpen", "Folder", "StringArray", "StringArrayMultiple", "StructuralUnits" , "String", "Text", "Float", "Boolean", "Date", "Date-Time"); |
|
157 | 159 |
private void checkWidgetNotInMap(String widget) throws IllegalAnnotationError { |
158 | 160 |
if(!widgets.contains(widget)) { |
159 | 161 |
throw new IllegalAnnotationError(Messages.UNDEFINNED_WIDGET.format(widget, widgets)); |
tmp/org.txm.rcp/src/main/java/org/txm/rcp/handlers/scripts/CreateMacro.java (revision 2099) | ||
---|---|---|
180 | 180 |
writer.println("//@Field @Option(name=\"longString\", usage=\"an example longString\", widget=\"Text\", required=false, def=\"hello world!\")"); //$NON-NLS-1$ |
181 | 181 |
writer.println("//def longString"); //$NON-NLS-1$ |
182 | 182 |
writer.println(""); //$NON-NLS-1$ |
183 |
writer.println("//@Field @Option(name=\"password\", usage=\"an example password\", widget=\"Password\", required=false, def=\"hello world!\")"); //$NON-NLS-1$ |
|
184 |
writer.println("//def password"); //$NON-NLS-1$ |
|
185 |
writer.println(""); //$NON-NLS-1$ |
|
183 | 186 |
writer.println("//@Field @Option(name=\"bool\", usage=\"an example boolean\", widget=\"Boolean\", required=false, def=\"true\")"); //$NON-NLS-1$ |
184 | 187 |
writer.println("//def bool"); //$NON-NLS-1$ |
185 | 188 |
writer.println(""); //$NON-NLS-1$ |
tmp/org.txm.rcp/src/main/java/org/txm/rcp/swt/widget/parameters/ParametersDialog.java (revision 2099) | ||
---|---|---|
20 | 20 |
import org.eclipse.swt.SWT; |
21 | 21 |
import org.eclipse.swt.events.SelectionAdapter; |
22 | 22 |
import org.eclipse.swt.events.SelectionEvent; |
23 |
import org.eclipse.swt.layout.GridData; |
|
23 | 24 |
import org.eclipse.swt.layout.GridLayout; |
24 | 25 |
import org.eclipse.swt.widgets.Button; |
25 | 26 |
import org.eclipse.swt.widgets.Composite; |
... | ... | |
139 | 140 |
Log.severe("Wrong default date format: "+stringValue+". Waiting for: "+DateField.STRINGFORMAT+". Error = "+e + "."); |
140 | 141 |
value = new Date(); |
141 | 142 |
} |
142 |
} else if ("Time".equals(widgetName)) { //$NON-NLS-1$
|
|
143 |
} else if ("Time".equals(widgetName)) { //$NON-NLS-1$ |
|
143 | 144 |
value = Integer.parseInt(stringValue); |
144 | 145 |
} else if ("String".equals(widgetName)) { //$NON-NLS-1$ |
145 | 146 |
value = stringValue; |
147 |
} else if ("Separator".equals(widgetName)) { //$NON-NLS-1$ |
|
148 |
value = stringValue; |
|
149 |
} else if ("Password".equals(widgetName)) { //$NON-NLS-1$ |
|
150 |
value = stringValue; |
|
146 | 151 |
} else if ("StringArray".equals(widgetName)) { //$NON-NLS-1$ |
Formats disponibles : Unified diff