Révision 2094
tmp/org.txm.analec.rcp/src/org/txm/macroprototypes/urs/misc/ChercherRemplacer.txt (revision 2094) | ||
---|---|---|
1 |
Macro ChercherRemplacer |
|
2 |
Auteur : Matthieu QUIGNARD |
|
3 |
Version : 05 Février 2019 |
|
4 |
|
|
5 |
Retouche la valeur d'une propriété pour la remplacer par une autre. |
|
6 |
Par exemple : CATEGORIE=PRO.CHECK => CATEGORIE=ERREUR |
|
7 |
|
|
8 |
Possibilité d'inclure aussi les mentions dont la valeur initiale est vide. |
|
9 |
Par exemple : CATEGORIE= => CATEGORIE=ERREUR |
|
10 |
|
|
11 |
NB : on peut utiliser cette macro pour retoucher le nom des référents. |
|
12 |
Exemple : REF=roi de france => REF=Le Roi de France |
|
13 |
|
|
0 | 14 |
tmp/org.txm.analec.rcp/src/org/txm/macroprototypes/urs/misc/ChercherSupprimerMacro.groovy (revision 2094) | ||
---|---|---|
1 |
// ChercherSupprimer |
|
2 |
// Auteur Matthieu Quignard |
|
3 |
// Date : 14 janvier 2019 |
|
4 |
|
|
5 |
/********** |
|
6 |
Sert à supprimer des mentions qui une valeur particulière attribuée |
|
7 |
Par exemple : CATEGORIE=ERREUR |
|
8 |
ou bien : REF=NON_REF |
|
9 |
ou encore : CHECK= |
|
10 |
|
|
11 |
ATTENTION : CETTE MACRO N'EST PAS REVERSIBLE |
|
12 |
***********/ |
|
13 |
|
|
14 |
package org.txm.macroprototypes.urs.misc |
|
15 |
|
|
16 |
import org.apache.commons.lang.* |
|
17 |
import org.kohsuke.args4j.* |
|
18 |
import groovy.transform.* |
|
19 |
import org.txm.* |
|
20 |
import org.txm.rcpapplication.swt.widget.parameters.* |
|
21 |
import org.txm.analec.* |
|
22 |
import org.txm.searchengine.cqp.* |
|
23 |
import org.txm.searchengine.cqp.corpus.* |
|
24 |
import visuAnalec.Message.* |
|
25 |
import visuAnalec.donnees.* |
|
26 |
import visuAnalec.elements.* |
|
27 |
import visuAnalec.vue.* |
|
28 |
|
|
29 |
// CORPS DU SCRIPT |
|
30 |
|
|
31 |
if (!(corpusViewSelection instanceof MainCorpus)) { |
|
32 |
println "Corpora selection is not a Corpus" |
|
33 |
return |
|
34 |
} |
|
35 |
|
|
36 |
// BEGINNING OF PARAMETERS |
|
37 |
@Field @Option(name="unit_type", usage="Unité", widget="String", required=true, def="MENTION") |
|
38 |
def unit_type |
|
39 |
@Field @Option(name="prop_name", usage="Propriété", widget="String", required=true, def="CATEGORIE") |
|
40 |
def prop_name |
|
41 |
@Field @Option(name="val_cherche", usage="Valeur recherchée", widget="String", required=true, def="") |
|
42 |
def val_cherche |
|
43 |
@Field @Option(name="inclureVides", usage="Inclure les valeurs vides", widget="Boolean", required=true, def="true") |
|
44 |
def inclureVides |
|
45 |
|
|
46 |
if (!ParametersDialog.open(this)) return |
|
47 |
|
|
48 |
corpus = corpusViewSelection |
|
49 |
analecCorpus = AnalecCorpora.getCorpus(corpus.getName()) |
|
50 |
vue = AnalecCorpora.getVue(corpus.getName()) |
|
51 |
structure = analecCorpus.getStructure() |
|
52 |
|
|
53 |
if (!structure.getUnites().contains(unit_type)) { // check if the structure contains the unit_type units |
|
54 |
println "Erreur : le corpus ne contient d'unité de type $unit_type" |
|
55 |
println "Script terminé" |
|
56 |
return |
|
57 |
} |
|
58 |
|
|
59 |
if (!structure.getUniteProperties(unit_type).contains(prop_name)) { |
|
60 |
println "Erreur : les unités $unit_type n'ont pas de propriété $prop_name" |
|
61 |
println "Script terminé" |
|
62 |
return |
|
63 |
} |
|
64 |
|
|
65 |
println "Option 'inclure les valeurs vides' : $inclureVides" |
|
66 |
|
|
67 |
def nDeleted = 0 |
|
68 |
def nIgnored = 0 |
|
69 |
|
|
70 |
def units = analecCorpus.getUnites(unit_type) |
|
71 |
units.sort() { a, b -> a.getDeb() <=> b.getDeb() ?: a.getFin() <=> b.getFin() } |
|
72 |
|
|
73 |
def garbageBin = [] |
|
74 |
|
|
75 |
for (Unite unit : units) { // process all units |
|
76 |
def val = unit.getProp( prop_name ) |
|
77 |
|
|
78 |
if ( (val == val_cherche) || ( inclureVides && (val == "")) ) { |
|
79 |
garbageBin.add( unit ) |
|
80 |
nDeleted++ |
|
81 |
} else { |
|
82 |
nIgnored++ |
|
83 |
} |
|
84 |
} |
|
85 |
|
|
86 |
// Suppression effective des unités ciblées |
|
87 |
garbageBin.each { |
|
88 |
analecCorpus.supUnite( it ) |
|
89 |
} |
|
90 |
|
|
91 |
if (nDeleted > 0) corpus.setIsModified(true); |
|
92 |
|
|
93 |
println "Result:" |
|
94 |
println "- $nDeleted units of type $unit_type have been deleted." |
|
95 |
println "- $nIgnored units of type $unit_type have not been modified." |
|
96 |
|
|
97 |
AnalecCorpora.getVue(analecCorpus).retablirVueParDefaut() |
tmp/org.txm.analec.rcp/src/org/txm/macroprototypes/urs/misc/ChercherRemplacerMacro.groovy (revision 2094) | ||
---|---|---|
1 |
// ChercherRemplacer |
|
2 |
// Auteur Matthieu Quignard |
|
3 |
// Date : 14 janvier 2019 |
|
4 |
|
|
5 |
/********** |
|
6 |
Sert à retoucher une valeur attribuée à une mention et la remplacer par une autre |
|
7 |
Par exemple : CATEGORIE=PRO.CHECK => CATEGORIE=PRO.PER |
|
8 |
ou bien : REF=roi de France => REF=Le Roi de France |
|
9 |
ou encore : REF= => REF=<EMPTY> |
|
10 |
***********/ |
|
11 |
|
|
12 |
package org.txm.macroprototypes.urs.misc |
|
13 |
|
|
14 |
import org.apache.commons.lang.* |
|
15 |
import org.kohsuke.args4j.* |
|
16 |
import groovy.transform.* |
|
17 |
import org.txm.* |
|
18 |
import org.txm.rcpapplication.swt.widget.parameters.* |
|
19 |
import org.txm.analec.* |
|
20 |
import org.txm.searchengine.cqp.* |
|
21 |
import org.txm.searchengine.cqp.corpus.* |
|
22 |
import visuAnalec.Message.* |
|
23 |
import visuAnalec.donnees.* |
|
24 |
import visuAnalec.elements.* |
|
25 |
import visuAnalec.vue.* |
|
26 |
|
|
27 |
// CORPS DU SCRIPT |
|
28 |
|
|
29 |
if (!(corpusViewSelection instanceof MainCorpus)) { |
|
30 |
println "Corpora selection is not a Corpus" |
|
31 |
return |
|
32 |
} |
|
33 |
|
|
34 |
// BEGINNING OF PARAMETERS |
|
35 |
@Field @Option(name="unit_type", usage="Unité", widget="String", required=true, def="MENTION") |
|
36 |
def unit_type |
|
37 |
@Field @Option(name="prop_name", usage="Propriété", widget="String", required=true, def="CATEGORIE") |
|
38 |
def prop_name |
|
39 |
@Field @Option(name="val_cherche", usage="Valeur recherchée", widget="String", required=true, def="") |
|
40 |
def val_cherche |
|
41 |
@Field @Option(name="val_remplace", usage="Valeur de remplacement", widget="String", required=true, def="") |
|
42 |
def val_remplace |
|
43 |
@Field @Option(name="inclureVides", usage="Inclure les valeurs vides", widget="Boolean", required=true, def="true") |
|
44 |
def inclureVides |
|
45 |
|
|
46 |
if (!ParametersDialog.open(this)) return |
|
47 |
|
|
48 |
corpus = corpusViewSelection |
|
49 |
analecCorpus = AnalecCorpora.getCorpus(corpus.getName()) |
|
50 |
vue = AnalecCorpora.getVue(corpus.getName()) |
|
51 |
structure = analecCorpus.getStructure() |
|
52 |
|
|
53 |
if (!structure.getUnites().contains(unit_type)) { // check if the structure contains the unit_type units |
|
54 |
println "Erreur : le corpus ne contient d'unité de type $unit_type" |
|
55 |
println "Script terminé" |
|
56 |
return |
|
57 |
} |
|
58 |
|
|
59 |
if (!structure.getUniteProperties(unit_type).contains(prop_name)) { |
|
60 |
println "Erreur : les unités $unit_type n'ont pas de propriété $prop_name" |
|
61 |
println "Script terminé" |
|
62 |
return |
|
63 |
} |
|
64 |
|
|
65 |
println "Option 'inclure les valeurs vides' : $inclureVides" |
|
66 |
|
|
67 |
def nModified = 0 |
|
68 |
def nIgnored = 0 |
|
69 |
|
|
70 |
def units = analecCorpus.getUnites(unit_type) |
|
71 |
units.sort() { a, b -> a.getDeb() <=> b.getDeb() ?: a.getFin() <=> b.getFin() } |
|
72 |
|
|
73 |
for (Unite unit : units) { // process all units |
|
74 |
def val = unit.getProp( prop_name ) |
|
75 |
|
|
76 |
if ( (val == val_cherche) || ( inclureVides && (val == "")) ) { |
|
77 |
vue.setValeurChamp(unit, prop_name, val_remplace) |
|
78 |
nModified++ |
|
79 |
} else { |
|
80 |
nIgnored++ |
|
81 |
} |
|
82 |
} |
|
83 |
|
|
84 |
|
|
85 |
|
|
86 |
if (nModified > 0) corpus.setIsModified(true); |
|
87 |
|
|
88 |
println "Result:" |
|
89 |
println "- $nModified units of type $unit_type have been modified." |
|
90 |
println "- $nIgnored units of type $unit_type have not been modified." |
|
91 |
|
|
92 |
AnalecCorpora.getVue(analecCorpus).retablirVueParDefaut() |
tmp/org.txm.analec.rcp/src/org/txm/macroprototypes/urs/misc/ChercherSupprimer.txt (revision 2094) | ||
---|---|---|
1 |
Macro ChercherSupprimer |
|
2 |
Auteur : Matthieu QUIGNARD |
|
3 |
Version : 05 Février 2019 |
|
4 |
|
|
5 |
Supprime de façon **DEFINITIVE** les mentions qui ont une certaine valeur de propriété. |
|
6 |
Comme pour ChercherRemplacer, on peut choisir d'inclure aussi |
|
7 |
les mentions qui ont une valeur vide pour la propriété donnée. |
|
8 |
|
|
9 |
Par exemple : |
|
10 |
CATEGORIE=ERREUR |
|
11 |
CHECK= |
|
12 |
REF=NON_REF (suppression des mentions non référentielles) |
|
13 |
REF=SI (suppression de tous les singletons) |
|
14 |
|
|
15 |
|
|
0 | 16 |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/UnitsSummaryMacro.groovy (revision 2094) | ||
---|---|---|
43 | 43 |
int maximum_schema_size |
44 | 44 |
@Field @Option(name="unit_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="MENTION") |
45 | 45 |
String unit_ursql |
46 |
@Field @Option(name="limit_distance_in_schema", usage="Unit distance in schema (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
|
|
47 |
int limit_distance_in_schema
|
|
48 |
@Field @Option(name="limit_cql", usage="CQL to build structure limits", widget="Query", required=false, def="")
|
|
49 |
limit_cql
|
|
46 |
@Field @Option(name="position_in_schema", usage="Unit distance in schema (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
|
|
47 |
int position_in_schema
|
|
48 |
@Field @Option(name="cql_limit", usage="CQL to build structure limits", widget="Query", required=false, def="")
|
|
49 |
cql_limit
|
|
50 | 50 |
@Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=false, def="true") |
51 | 51 |
boolean strict_inclusion |
52 |
@Field @Option(name="limit_distance", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
|
|
53 |
int limit_distance
|
|
52 |
@Field @Option(name="position", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
|
|
53 |
int position
|
|
54 | 54 |
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF ON ALL REALLY ALL", required=true, def="OFF") |
55 | 55 |
debug |
56 | 56 |
if (!ParametersDialog.open(this)) return |
... | ... | |
70 | 70 |
props.addAll(analecCorpus.getStructure().getUniteProperties(type)); |
71 | 71 |
|
72 | 72 |
def selectedUnits = AnalecUtils.selectUnitsInSchema(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, maximum_schema_size, |
73 |
unit_ursql, limit_distance_in_schema, limit_cql, strict_inclusion, limit_distance);
|
|
73 |
unit_ursql, position_in_schema, cql_limit, strict_inclusion, position);
|
|
74 | 74 |
|
75 | 75 |
allresults[corpus] = selectedUnits; |
76 | 76 |
} |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/UnitsReferentialDensityMacro.groovy (revision 2094) | ||
---|---|---|
20 | 20 |
|
21 | 21 |
@Field @Option(name="unit_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="MENTION") |
22 | 22 |
String unit_ursql |
23 |
@Field @Option(name="limit_cql", usage="CQL to build structure limits", widget="Query", required=true, def="<div> [] expand to div")
|
|
24 |
limit_cql
|
|
23 |
@Field @Option(name="cql_limit", usage="CQL to build structure limits", widget="Query", required=true, def="<div> [] expand to div")
|
|
24 |
cql_limit
|
|
25 | 25 |
@Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=true, def="true") |
26 | 26 |
strict_inclusion |
27 |
@Field @Option(name="limit_distance", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
|
|
28 |
limit_distance
|
|
27 |
@Field @Option(name="position", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
|
|
28 |
position
|
|
29 | 29 |
|
30 | 30 |
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF ON ALL REALLY ALL", required=true, def="OFF") |
31 | 31 |
debug |
... | ... | |
40 | 40 |
int nMots = corpus.getSize(); |
41 | 41 |
|
42 | 42 |
def units = AnalecUtils.selectUnitsInSchema(debug, analecCorpus, corpus, "", 0, 0, |
43 |
unit_ursql, 0, limit_cql, strict_inclusion, limit_distance);
|
|
43 |
unit_ursql, 0, cql_limit, strict_inclusion, position);
|
|
44 | 44 |
|
45 | 45 |
int nUnites = units.size(); |
46 | 46 |
|
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/UnitsStabilityScoreMacro.groovy (revision 2094) | ||
---|---|---|
36 | 36 |
@Field @Option(name="word_property", usage="", widget="String", required=false, def="word") |
37 | 37 |
String word_property |
38 | 38 |
|
39 |
@Field @Option(name="show_values", usage="", widget="Boolean", required=false, def="false") |
|
40 |
boolean show_values |
|
41 |
|
|
39 | 42 |
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF ON ALL REALLY ALL", required=true, def="OFF") |
40 | 43 |
debug |
41 | 44 |
|
... | ... | |
60 | 63 |
def CQI = CQPSearchEngine.getCqiClient() |
61 | 64 |
|
62 | 65 |
def prop = corpus.getProperty(word_property) |
63 |
|
|
66 |
if (prop == null) { // no CQP property called $word_property |
|
67 |
errors=AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, AnalecUtils.getFilterParameters(unit_ursql)[0], word_property) |
|
68 |
if (errors.size() > 0) { |
|
69 |
println "** $word_property unit property cannot be computed in the corpus with types: $errors." |
|
70 |
return; |
|
71 |
} |
|
72 |
} |
|
64 | 73 |
def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, 999999); |
65 | 74 |
allFormesSet = new HashSet(); |
66 |
nUnitesGrandTotal = 0; |
|
67 | 75 |
def coefs = [] |
68 | 76 |
int n = 1 |
77 |
|
|
78 |
int nUnitesAllSchemas = 0 |
|
79 |
int nUnitesTotalSchemas = 0 |
|
80 |
|
|
69 | 81 |
for (def schema : schemas) { |
70 | 82 |
def formesSet = new HashSet(); // contient toutes les formes du CR courant |
71 | 83 |
nUnitesTotal = 0; |
... | ... | |
73 | 85 |
def allUnites = schema.getUnitesSousjacentesNonTriees() |
74 | 86 |
|
75 | 87 |
def units = AnalecUtils.filterElements(debug, allUnites, unit_ursql) |
76 |
def nUnites = units.size() |
|
88 |
def nUnites = schema.getUnitesSousjacentes().size() |
|
89 |
def nUnitesTotal = units.size() |
|
77 | 90 |
for (def unit : units) { |
78 | 91 |
|
79 | 92 |
String forme = null; |
... | ... | |
88 | 101 |
} |
89 | 102 |
|
90 | 103 |
formesSet.add(forme) |
91 |
|
|
92 |
nUnitesTotal++ |
|
93 | 104 |
} |
94 |
if (formesSet.size() == 0) { |
|
95 |
coef = -1 |
|
105 |
|
|
106 |
if (formesSet.size() == 0 || nUnitesTotal == 0) { |
|
107 |
coef = "NA" |
|
96 | 108 |
} else { |
97 | 109 |
coef = (nUnitesTotal/formesSet.size()) |
98 | 110 |
} |
99 | 111 |
coefs << coef |
100 |
nUnitesGrandTotal += nUnitesTotal; |
|
101 | 112 |
allFormesSet.addAll(formesSet) |
102 | 113 |
|
103 |
if (schema_display_property_name != null) { |
|
114 |
if (schema_display_property_name != null && schema_display_property_name.length() > 0) {
|
|
104 | 115 |
print schema.getProp(schema_display_property_name) |
105 | 116 |
} else { |
106 | 117 |
print schema_ursql+"-"+n+" : " |
107 | 118 |
} |
108 | 119 |
|
109 |
println " ($nUnites units) : $nUnitesTotal selected units / ${formesSet.size()} forms = $coef" |
|
120 |
println " ($nUnites units) : $nUnitesTotal selected units / ${formesSet.size()} ${word_property}s = $coef" |
|
121 |
if (show_values) { |
|
122 |
println "\t${word_property}s="+formesSet |
|
123 |
} |
|
110 | 124 |
n++ |
125 |
|
|
126 |
nUnitesAllSchemas += nUnites |
|
127 |
nUnitesTotalSchemas += nUnitesTotal |
|
111 | 128 |
} |
112 | 129 |
|
113 |
return ["result":coefs, "data":["nUnitesTotal":nUnitesGrandTotal, "allFormesSet":allFormesSet], "coef":(nUnitesGrandTotal/allFormesSet.size())] |
|
130 |
coef = nUnitesTotalSchemas/allFormesSet.size() |
|
131 |
//println "ALL : ($nUnitesAllSchemas units) : $nUnitesTotalSchemas selected units / ${allFormesSet.size()} ${word_property}s = $coef" |
|
132 |
|
|
133 |
return ["result":coefs, "data":["nUnitesTotal":nUnitesTotalSchemas, "allFormesSet":allFormesSet], "coef":(coef)] |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/UnitsIndexMacro.groovy (revision 2094) | ||
---|---|---|
51 | 51 |
String unit_property_display |
52 | 52 |
@Field @Option(name="cqp_property_display", usage="Word property to display instead of the unit property", widget="String", required=false, def="") |
53 | 53 |
String cqp_property_display |
54 |
@Field @Option(name="limit_distance_in_schema", usage="Unit distance in schema (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
|
|
55 |
int limit_distance_in_schema
|
|
56 |
@Field @Option(name="limit_cql", usage="CQL to build structure limits", widget="Query", required=true, def="<div> [] expand to div")
|
|
57 |
limit_cql
|
|
54 |
@Field @Option(name="position_in_schema", usage="Unit distance in schema (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
|
|
55 |
int position_in_schema
|
|
56 |
@Field @Option(name="cql_limit", usage="CQL to build structure limits", widget="Query", required=true, def="<div> [] expand to div")
|
|
57 |
cql_limit
|
|
58 | 58 |
@Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=true, def="true") |
59 | 59 |
strict_inclusion |
60 |
@Field @Option(name="limit_distance", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
|
|
61 |
limit_distance
|
|
62 |
@Field @Option(name="output_2D", usage="output barplot or 3D plot", widget="Boolean", required=true, def="true") |
|
63 |
output_2D |
|
60 |
@Field @Option(name="position", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
|
|
61 |
position
|
|
62 |
//@Field @Option(name="output_2D", usage="output barplot or 3D plot", widget="Boolean", required=true, def="true")
|
|
63 |
output_2D = true
|
|
64 | 64 |
@Field @Option(name="output_showlegend", usage="output barplot or 3D plot", widget="Boolean", required=true, def="true") |
65 | 65 |
output_showlegend |
66 | 66 |
@Field @Option(name="output_fmin", usage="minimal frequency displayed", widget="Integer", required=true, def="0") |
... | ... | |
115 | 115 |
} |
116 | 116 |
|
117 | 117 |
def selectedUnits = AnalecUtils.selectUnitsInSchema(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, maximum_schema_size, |
118 |
unit_ursql, limit_distance_in_schema, limit_cql, strict_inclusion, limit_distance);
|
|
118 |
unit_ursql, position_in_schema, cql_limit, strict_inclusion, position);
|
|
119 | 119 |
selectedUnits = new HashSet(selectedUnits) |
120 | 120 |
def counts = null |
121 | 121 |
if (cqp_property_display != null && cqp_property_display.length() > 0) { |
... | ... | |
163 | 163 |
else { corpus = corpusViewSelection } |
164 | 164 |
|
165 | 165 |
def title = "${corpus.getMainCorpus()}.${corpusViewSelection}\n${unit_ursql}" |
166 |
title += "[${limit_distance}]."
|
|
166 |
title += "[${position}]."
|
|
167 | 167 |
if (cqp_property_display.length() > 0) title += "${cqp_property_display} frequencies" |
168 | 168 |
else if (unit_property_display.length() > 0) title += "${unit_property_display} frequencies" |
169 |
if (limit_cql != null && !limit_cql.getQueryString().equals("\"\"")) title += "\n(${limit_cql} limits)"
|
|
169 |
if (cql_limit != null && !cql_limit.getQueryString().equals("\"\"")) title += "\n(${cql_limit} limits)"
|
|
170 | 170 |
|
171 |
if (cqp_property_display.length() > 0) println "Index de la propriété $cqp_property_display des mots des unités $unit_ursql[$limit_distance] de ${corpus.getMainCorpus()}.${corpusViewSelection}"
|
|
172 |
else println "Index de la propriété $unit_property_display des unités $unit_ursql[$limit_distance] de ${corpus.getMainCorpus()}.${corpusViewSelection}"
|
|
171 |
if (cqp_property_display.length() > 0) println "Index de la propriété $cqp_property_display des mots des unités $unit_ursql[$position] de ${corpus.getMainCorpus()}.${corpusViewSelection}"
|
|
172 |
else println "Index de la propriété $unit_property_display des unités $unit_ursql[$position] de ${corpus.getMainCorpus()}.${corpusViewSelection}"
|
|
173 | 173 |
println "$unit_property_display\t"+selection.join("\t") |
174 | 174 |
|
175 | 175 |
keys.eachWithIndex { prop_val, i -> |
... | ... | |
223 | 223 |
library(latticeExtra) |
224 | 224 |
library(lattice) |
225 | 225 |
library(RColorBrewer) |
226 |
cloud(value~col+row, m, panel.3d.cloud=panel.3dbars, col.facet='grey', xbase=0.4, ybase=0.4, scales=list(arrows=FALSE, col=1), par.settings = list(axis.line = list(col = "transparent")))
|
|
226 |
cloud(value~col+row, ..., panel.3d.cloud=panel.3dbars, col.facet='grey', xbase=0.4, ybase=0.4, scales=list(arrows=FALSE, col=1), par.settings = list(axis.line = list(col = "transparent")))
|
|
227 | 227 |
""" |
228 | 228 |
} |
229 | 229 |
|
... | ... | |
260 | 260 |
@Override |
261 | 261 |
public void run() { try { |
262 | 262 |
if (UnitsIndexMacro.this.output_histogram) { |
263 |
OpenSVGGraph.OpenSVGFile(file.getAbsolutePath(), selection.toString()+" Units") |
|
263 |
//OpenSVGGraph.OpenSVGFile(file.getAbsolutePath(), selection.toString()+" Units") |
|
264 |
OpenBrowser.openfile(file.getAbsolutePath()) |
|
264 | 265 |
} |
265 | 266 |
if (UnitsIndexMacro.this.output_lexicaltable) { |
266 | 267 |
CorporaView.refreshObject(corpus) |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/SchemasProgressionMacro.groovy (revision 2094) | ||
---|---|---|
46 | 46 |
String schema_property_display |
47 | 47 |
@Field @Option(name="unit_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="") |
48 | 48 |
String unit_ursql |
49 |
@Field @Option(name="limit_distance_in_schema", usage="Unit distance in schema (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
|
|
50 |
int limit_distance_in_schema
|
|
51 |
@Field @Option(name="limit_cql", usage="CQL to build structure limits", widget="Query", required=false, def="")
|
|
52 |
limit_cql
|
|
49 |
@Field @Option(name="position_in_schema", usage="Unit distance in schema (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
|
|
50 |
int position_in_schema
|
|
51 |
@Field @Option(name="cql_limit", usage="CQL to build structure limits", widget="Query", required=false, def="")
|
|
52 |
cql_limit
|
|
53 | 53 |
@Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=true, def="true") |
54 | 54 |
boolean strict_inclusion |
55 |
@Field @Option(name="limit_distance", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
|
|
56 |
int limit_distance
|
|
55 |
@Field @Option(name="position", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
|
|
56 |
int position
|
|
57 | 57 |
@Field @Option(name="struct_name", usage="Structure to display", widget="String", required=true, def="div") |
58 | 58 |
String struct_name |
59 | 59 |
@Field @Option(name="struct_prop", usage="Structure property to display", widget="String", required=true, def="n") |
... | ... | |
77 | 77 |
|
78 | 78 |
def CQI = CQPSearchEngine.getCqiClient() |
79 | 79 |
|
80 |
def limit_cql_matches = null;
|
|
81 |
if (limit_cql != null && !limit_cql.getQueryString().equals("\"\"")) {
|
|
82 |
def limitssubcorpus = parent.createSubcorpus(limit_cql, parent.getName().toUpperCase())
|
|
83 |
limit_cql_matches = limitssubcorpus.getMatches();
|
|
80 |
def cql_limit_matches = null;
|
|
81 |
if (cql_limit != null && !cql_limit.getQueryString().equals("\"\"")) {
|
|
82 |
def limitssubcorpus = parent.createSubcorpus(cql_limit, parent.getName().toUpperCase())
|
|
83 |
cql_limit_matches = limitssubcorpus.getMatches();
|
|
84 | 84 |
limitssubcorpus.delete(); |
85 | 85 |
} else { |
86 |
limit_cql_matches = parent.getMatches()
|
|
86 |
cql_limit_matches = parent.getMatches()
|
|
87 | 87 |
} |
88 | 88 |
|
89 | 89 |
def queries = [] |
... | ... | |
121 | 121 |
} |
122 | 122 |
|
123 | 123 |
allUnits = AnalecUtils.groupAllUnitesInElements(debug, allSchemas) |
124 |
if ((unit_ursql != null && unit_ursql.length() > 0) || (limit_cql != null && !limit_cql.getQueryString().equals("\"\""))) {
|
|
124 |
if ((unit_ursql != null && unit_ursql.length() > 0) || (cql_limit != null && !cql_limit.getQueryString().equals("\"\""))) {
|
|
125 | 125 |
if (debug) println "Building selection of units to highlight..." |
126 | 126 |
allHighlightedUnits = AnalecUtils.groupAllUnitesInElements(debug, allSchemas, unit_ursql) |
127 | 127 |
} |
128 | 128 |
|
129 |
if (limit_distance_in_schema > 0) allHighlightedUnits = AnalecUtils.filterUniteByInclusionInSchema(debug, allSchemas, limit_distance_in_schema)
|
|
129 |
if (position_in_schema > 0) allHighlightedUnits = AnalecUtils.filterUniteByInclusionInSchema(debug, allSchemas, position_in_schema)
|
|
130 | 130 |
|
131 | 131 |
if (allUnits.size() == 0) { |
132 | 132 |
println "No unit match for '$unit_ursql' selection. Aborting" |
... | ... | |
150 | 150 |
def selectedAndHighlightedUnits = new HashSet() // faster to find items |
151 | 151 |
if (allHighlightedUnits != null && allHighlightedUnits.containsKey(schema)) selectedAndHighlightedUnits.addAll(allHighlightedUnits[schema]) |
152 | 152 |
|
153 |
if (limit_cql_matches != null) {
|
|
153 |
if (cql_limit_matches != null) {
|
|
154 | 154 |
if (debug) println "corpus matches: "+parent.getMatches() |
155 |
if (debug) println "filter limit_cql_matches=${limit_cql_matches} with "+selectedAndHighlightedUnits.size()+" units."
|
|
156 |
selectedAndHighlightedUnits = AnalecUtils.filterUniteByInclusion(debug, selectedAndHighlightedUnits, limit_cql_matches, strict_inclusion, limit_distance)
|
|
155 |
if (debug) println "filter cql_limit_matches=${cql_limit_matches} with "+selectedAndHighlightedUnits.size()+" units."
|
|
156 |
selectedAndHighlightedUnits = AnalecUtils.filterUniteByInclusion(debug, selectedAndHighlightedUnits, cql_limit_matches, strict_inclusion, position)
|
|
157 | 157 |
if (debug) println "selectedAndHighlightedUnits=${selectedAndHighlightedUnits.size()}" |
158 | 158 |
} |
159 | 159 |
|
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/UnitsListMacro.groovy (revision 2094) | ||
---|---|---|
48 | 48 |
int maximum_schema_size |
49 | 49 |
@Field @Option(name="unit_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="MENTION") |
50 | 50 |
String unit_ursql |
51 |
@Field @Option(name="limit_distance_in_schema", usage="Unit distance in schema (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
|
|
52 |
int limit_distance_in_schema
|
|
53 |
@Field @Option(name="limit_cql", usage="CQL to build structure limits", widget="Query", required=false, def="")
|
|
54 |
limit_cql
|
|
51 |
@Field @Option(name="position_in_schema", usage="Unit distance in schema (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
|
|
52 |
int position_in_schema
|
|
53 |
@Field @Option(name="cql_limit", usage="CQL to build structure limits", widget="Query", required=false, def="")
|
|
54 |
cql_limit
|
|
55 | 55 |
@Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=true, def="true") |
56 | 56 |
boolean strict_inclusion |
57 |
@Field @Option(name="limit_distance", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
|
|
58 |
int limit_distance
|
|
57 |
@Field @Option(name="position", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
|
|
58 |
int position
|
|
59 | 59 |
@Field @Option(name="output_mode", usage="If selected units properties and words are shown", widget="StringArray", metaVar="COUNT TABULATED FORMATED CONCORDANCE CQL", required=true, def="FORMATED") |
60 | 60 |
output_mode |
61 | 61 |
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF ON ALL REALLY ALL", required=true, def="OFF") |
... | ... | |
76 | 76 |
def analecCorpus = URSCorpora.getCorpus(mainCorpus) |
77 | 77 |
|
78 | 78 |
def selectedUnits = AnalecUtils.selectUnitsInSchema(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, maximum_schema_size, |
79 |
unit_ursql, limit_distance_in_schema, limit_cql, strict_inclusion, limit_distance);
|
|
79 |
unit_ursql, position_in_schema, cql_limit, strict_inclusion, position);
|
|
80 | 80 |
|
81 | 81 |
def n = 1 |
82 | 82 |
|
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/edit/ConcordanceToUnitMacro.groovy (revision 2094) | ||
---|---|---|
1 |
// STANDARD DECLARATIONS |
|
2 |
package org.txm.macro.urs.edit |
|
3 |
|
|
4 |
import org.kohsuke.args4j.* |
|
5 |
import groovy.transform.Field |
|
6 |
import org.txm.rcp.swt.widget.parameters.* |
|
7 |
import org.txm.rcp.editors.concordances.* |
|
8 |
import org.txm.searchengine.cqp.corpus.CQPCorpus |
|
9 |
import org.txm.searchengine.cqp.corpus.MainCorpus |
|
10 |
import org.txm.functions.concordances.* |
|
11 |
import org.txm.annotation.urs.* |
|
12 |
import org.txm.concordance.core.functions.Concordance |
|
13 |
import org.txm.concordance.rcp.editors.ConcordanceEditor |
|
14 |
import visuAnalec.elements.Unite |
|
15 |
|
|
16 |
// BEGINNING OF PARAMETERS |
|
17 |
|
|
18 |
@Field @Option(name="unit_type", usage="The unit type to create", widget="String", required=true, def="MENTION") |
|
19 |
def unit_type |
|
20 |
|
|
21 |
@Field @Option(name="create_only_if_new", usage="Create the unit if not already annotated", widget="Boolean", required=true, def="true") |
|
22 |
def create_only_if_new |
|
23 |
|
|
24 |
@Field @Option(name="prop", usage="prop", widget="String", required=true, def="REF") |
|
25 |
def prop |
|
26 |
|
|
27 |
@Field @Option(name="value", usage="default value", widget="String", required=true, def="NAME") |
|
28 |
def value |
|
29 |
|
|
30 |
// END OF PARAMETERS |
|
31 |
|
|
32 |
// get a Concordance from 1) current Concordance editor or 2) CorporaView selection |
|
33 |
Concordance concordance |
|
34 |
if (editor instanceof ConcordanceEditor) { |
|
35 |
concordance = editor.getConcordance() |
|
36 |
} else if (corpusViewSelection instanceof Concordance) { |
|
37 |
concordance = corpusViewSelection |
|
38 |
} else { |
|
39 |
println "You must select a concordance or open a concordance result to run this macro." |
|
40 |
return false |
|
41 |
} |
|
42 |
|
|
43 |
if (concordance == null) { |
|
44 |
println "You must compute a concordance before." |
|
45 |
return |
|
46 |
} |
|
47 |
|
|
48 |
// check the analec corpus is ready |
|
49 |
CQPCorpus corpus = concordance.getCorpus().getMainCorpus(); |
|
50 |
String name = corpus.getID() |
|
51 |
if (!URSCorpora.isAnnotationStructureReady(corpus)) { |
|
52 |
println "Annotation structure is not ready." |
|
53 |
return |
|
54 |
} |
|
55 |
|
|
56 |
// Open the parameters input dialog box |
|
57 |
if (!ParametersDialog.open(this)) return; |
|
58 |
|
|
59 |
// check the corpus structure has the unit_type provided |
|
60 |
def analecCorpus = URSCorpora.getCorpus(corpus) |
|
61 |
if (!analecCorpus.getStructure().getUnites().contains(unit_type)) { |
|
62 |
//println "The corpus structure does not contains unit with type=$unit_type" |
|
63 |
//return; |
|
64 |
analecCorpus.getStructure().ajouterType(Unite.class, unit_type); |
|
65 |
} |
|
66 |
|
|
67 |
if (!analecCorpus.getStructure().getNomsProps(Unite.class, unit_type).contains(prop)) { |
|
68 |
//println "The corpus structure does not contains unit with type=$unit_type" |
|
69 |
//return; |
|
70 |
analecCorpus.getStructure().ajouterProp(Unite.class, unit_type, prop) |
|
71 |
} |
|
72 |
|
|
73 |
if (!analecCorpus.getStructure().getValeursProp(Unite.class, unit_type, prop).contains(value)) { |
|
74 |
//println "The corpus structure does not contains unit with type=$unit_type" |
|
75 |
//return; |
|
76 |
analecCorpus.getStructure().ajouterVal(Unite.class, unit_type, prop, value) |
|
77 |
} |
|
78 |
|
|
79 |
// browse lines and check |
|
80 |
def units = analecCorpus.getUnites(unit_type) |
|
81 |
def lines = concordance.getLines() |
|
82 |
|
|
83 |
int n = 0 |
|
84 |
for (int iLine = 0 ; iLine < lines.size() ; iLine++) { |
|
85 |
int iUnit = 0 |
|
86 |
def line = lines[iLine] |
|
87 |
def m = line.getMatch() |
|
88 |
def do_create = true |
|
89 |
if (create_only_if_new && iUnit < units.size()) { // test only if create_only_if_new == true |
|
90 |
def unit = null |
|
91 |
//TODO don't iterates over all units |
|
92 |
while (iUnit < units.size() ) { //&& units[iUnit].getDeb() < m.getStart()) { |
|
93 |
if (iUnit < units.size()) { |
|
94 |
unit = units[iUnit++] |
|
95 |
if (unit.getDeb() == m.getStart() && unit.getFin() == m.getEnd()) { // skip and print the line |
|
96 |
println("skiping concordance line '"+line.keywordToString()+"' at "+line.getViewRef().toString()+" ("+unit.getDeb()+ ", "+unit.getFin()+")") |
|
97 |
do_create = false |
|
98 |
continue |
|
99 |
} |
|
100 |
} |
|
101 |
} |
|
102 |
} |
|
103 |
if (do_create) { |
|
104 |
n++ |
|
105 |
def props = [:] |
|
106 |
props[prop] = value |
|
107 |
Unite u = analecCorpus.addUniteSaisie(unit_type, m.getStart(), m.getEnd(), props) |
|
108 |
// println "$props -> "+u.getProps() |
|
109 |
} |
|
110 |
} |
|
111 |
println "$n $unit_type created." |
|
112 |
if (n > 0) corpus.setIsModified(true); |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/edit/ConcordanceToUnitsMacro.groovy (revision 2094) | ||
---|---|---|
1 |
// STANDARD DECLARATIONS |
|
2 |
package org.txm.macro.urs.edit |
|
3 |
|
|
4 |
import org.kohsuke.args4j.* |
|
5 |
import groovy.transform.Field |
|
6 |
import org.txm.rcp.swt.widget.parameters.* |
|
7 |
import org.txm.rcp.editors.concordances.* |
|
8 |
import org.txm.searchengine.cqp.corpus.CQPCorpus |
|
9 |
import org.txm.searchengine.cqp.corpus.MainCorpus |
|
10 |
import org.txm.functions.concordances.* |
|
11 |
import org.txm.annotation.urs.* |
|
12 |
import org.txm.concordance.core.functions.Concordance |
|
13 |
import org.txm.concordance.rcp.editors.ConcordanceEditor |
|
14 |
import visuAnalec.elements.Unite |
|
15 |
|
|
16 |
// BEGINNING OF PARAMETERS |
|
17 |
|
|
18 |
@Field @Option(name="unit_type", usage="The unit type to create", widget="String", required=true, def="MENTION") |
|
19 |
def unit_type |
|
20 |
|
|
21 |
@Field @Option(name="create_only_if_new", usage="Create the unit if not already annotated", widget="Boolean", required=true, def="true") |
|
22 |
def create_only_if_new |
|
23 |
|
|
24 |
@Field @Option(name="prop", usage="prop", widget="String", required=true, def="REF") |
|
25 |
def prop |
|
26 |
|
|
27 |
@Field @Option(name="value", usage="default value", widget="String", required=true, def="NAME") |
|
28 |
def value |
|
29 |
|
|
30 |
// END OF PARAMETERS |
|
31 |
|
|
32 |
// get a Concordance from 1) current Concordance editor or 2) CorporaView selection |
|
33 |
Concordance concordance |
|
34 |
if (editor instanceof ConcordanceEditor) { |
|
35 |
concordance = editor.getConcordance() |
|
36 |
} else if (corpusViewSelection instanceof Concordance) { |
|
37 |
concordance = corpusViewSelection |
|
38 |
} else { |
|
39 |
println "You must select a concordance or open a concordance result to run this macro." |
|
40 |
return false |
|
41 |
} |
|
42 |
|
|
43 |
if (concordance == null) { |
|
44 |
println "You must compute a concordance before." |
|
45 |
return |
|
46 |
} |
|
47 |
|
|
48 |
// check the analec corpus is ready |
|
49 |
CQPCorpus corpus = concordance.getCorpus().getMainCorpus(); |
|
50 |
String name = corpus.getID() |
|
51 |
if (!URSCorpora.isAnnotationStructureReady(corpus)) { |
|
52 |
println "Annotation structure is not ready." |
|
53 |
return |
|
54 |
} |
|
55 |
|
|
56 |
// Open the parameters input dialog box |
|
57 |
if (!ParametersDialog.open(this)) return; |
|
58 |
|
|
59 |
// check the corpus structure has the unit_type provided |
|
60 |
def analecCorpus = URSCorpora.getCorpus(corpus) |
|
61 |
if (!analecCorpus.getStructure().getUnites().contains(unit_type)) { |
|
62 |
//println "The corpus structure does not contains unit with type=$unit_type" |
|
63 |
//return; |
|
64 |
analecCorpus.getStructure().ajouterType(Unite.class, unit_type); |
|
65 |
} |
|
66 |
|
|
67 |
if (!analecCorpus.getStructure().getNomsProps(Unite.class, unit_type).contains(prop)) { |
|
68 |
//println "The corpus structure does not contains unit with type=$unit_type" |
|
69 |
//return; |
|
70 |
analecCorpus.getStructure().ajouterProp(Unite.class, unit_type, prop) |
|
71 |
} |
|
72 |
|
|
73 |
if (!analecCorpus.getStructure().getValeursProp(Unite.class, unit_type, prop).contains(value)) { |
|
74 |
//println "The corpus structure does not contains unit with type=$unit_type" |
|
75 |
//return; |
|
76 |
analecCorpus.getStructure().ajouterVal(Unite.class, unit_type, prop, value) |
|
77 |
} |
|
78 |
|
|
79 |
// browse lines and check |
|
80 |
def units = analecCorpus.getUnites(unit_type) |
|
81 |
def lines = concordance.getLines() |
|
82 |
|
|
83 |
int n = 0 |
|
84 |
for (int iLine = 0 ; iLine < lines.size() ; iLine++) { |
|
85 |
int iUnit = 0 |
|
86 |
def line = lines[iLine] |
|
87 |
def m = line.getMatch() |
|
88 |
def do_create = true |
|
89 |
if (create_only_if_new && iUnit < units.size()) { // test only if create_only_if_new == true |
|
90 |
def unit = null |
|
91 |
//TODO don't iterates over all units |
|
92 |
while (iUnit < units.size() ) { //&& units[iUnit].getDeb() < m.getStart()) { |
|
93 |
if (iUnit < units.size()) { |
|
94 |
unit = units[iUnit++] |
|
95 |
if (unit.getDeb() == m.getStart() && unit.getFin() == m.getEnd()) { // skip and print the line |
|
96 |
println("skiping concordance line '"+line.keywordToString()+"' at "+line.getViewRef().toString()+" ("+unit.getDeb()+ ", "+unit.getFin()+")") |
|
97 |
do_create = false |
|
98 |
continue |
|
99 |
} |
|
100 |
} |
|
101 |
} |
|
102 |
} |
|
103 |
if (do_create) { |
|
104 |
n++ |
|
105 |
def props = [:] |
|
106 |
props[prop] = value |
|
107 |
Unite u = analecCorpus.addUniteSaisie(unit_type, m.getStart(), m.getEnd(), props) |
|
108 |
// println "$props -> "+u.getProps() |
|
109 |
} |
|
110 |
} |
|
111 |
println "$n $unit_type created." |
|
112 |
if (n > 0) corpus.setIsModified(true); |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/edit/UnitsDeleteMacro.groovy (revision 2094) | ||
---|---|---|
43 | 43 |
int minimum_schema_size |
44 | 44 |
@Field @Option(name="unit_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="MENTION") |
45 | 45 |
String unit_ursql |
46 |
@Field @Option(name="limit_distance_in_schema", usage="Unit distance in schema (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
|
|
47 |
int limit_distance_in_schema
|
|
48 |
@Field @Option(name="limit_cql", usage="CQL to build structure limits", widget="Query", required=true, def="<div> [] expand to div")
|
|
49 |
limit_cql
|
|
46 |
@Field @Option(name="position_in_schema", usage="Unit distance in schema (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
|
|
47 |
int position_in_schema
|
|
48 |
@Field @Option(name="cql_limit", usage="CQL to build structure limits", widget="Query", required=true, def="<div> [] expand to div")
|
|
49 |
cql_limit
|
|
50 | 50 |
@Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=true, def="true") |
51 | 51 |
boolean strict_inclusion |
52 |
@Field @Option(name="limit_distance", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
|
|
53 |
int limit_distance
|
|
52 |
@Field @Option(name="position", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
|
|
53 |
int position
|
|
54 | 54 |
|
55 | 55 |
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF ON ALL REALLY ALL", required=true, def="OFF") |
56 | 56 |
debug |
... | ... | |
69 | 69 |
Structure structure = analecCorpus.getStructure() |
70 | 70 |
|
71 | 71 |
def selectedUnits = AnalecUtils.selectUnitsInSchema(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, Integer.MAX_VALUE, |
72 |
unit_ursql, limit_distance_in_schema, limit_cql, strict_inclusion, limit_distance);
|
|
72 |
unit_ursql, position_in_schema, cql_limit, strict_inclusion, position);
|
|
73 | 73 |
|
74 | 74 |
def n = 0 |
75 | 75 |
def nerrors = 0 |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/edit/UnitsAnnotateMacro.groovy (revision 2094) | ||
---|---|---|
43 | 43 |
int minimum_schema_size |
44 | 44 |
@Field @Option(name="unit_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="MENTION") |
45 | 45 |
String unit_ursql |
46 |
@Field @Option(name="limit_distance_in_schema", usage="Unit distance in schema (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
|
|
47 |
int limit_distance_in_schema
|
|
48 |
@Field @Option(name="limit_cql", usage="CQL to build structure limits", widget="Query", required=true, def="<div> [] expand to div")
|
|
49 |
limit_cql
|
|
46 |
@Field @Option(name="position_in_schema", usage="Unit distance in schema (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
|
|
47 |
int position_in_schema
|
|
48 |
@Field @Option(name="cql_limit", usage="CQL to build structure limits", widget="Query", required=false, def="<div> [] expand to div")
|
|
49 |
cql_limit
|
|
50 | 50 |
@Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=true, def="true") |
51 | 51 |
boolean strict_inclusion |
52 |
@Field @Option(name="limit_distance", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
|
|
53 |
int limit_distance
|
|
52 |
@Field @Option(name="position", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
|
|
53 |
int position
|
|
54 | 54 |
|
55 | 55 |
@Field @Option(name="unit_property_to_set", usage="PROP", widget="String", required=false, def="TESTPROP") |
56 | 56 |
String unit_property_to_set |
... | ... | |
80 | 80 |
analecView.initVueParDefaut() |
81 | 81 |
|
82 | 82 |
def selectedUnits = AnalecUtils.selectUnitsInSchema(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, Integer.MAX_VALUE, |
83 |
unit_ursql, limit_distance_in_schema, limit_cql, strict_inclusion, limit_distance);
|
|
83 |
unit_ursql, position_in_schema, cql_limit, strict_inclusion, position);
|
|
84 | 84 |
|
85 | 85 |
println " "+selectedUnits.size()+" units to annotate..." |
86 | 86 |
def n = 0 |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/AnalecUtils.groovy (revision 2094) | ||
---|---|---|
93 | 93 |
* @param minimum_schema_size |
94 | 94 |
* @param maximum_schema_size |
95 | 95 |
* @param unit_ursql |
96 |
* @param limit_cql
|
|
96 |
* @param cql_limit
|
|
97 | 97 |
* @param strict_inclusion |
98 |
* @param limit_distance
|
|
98 |
* @param position
|
|
99 | 99 |
* @return |
100 | 100 |
*/ |
101 | 101 |
static def selectUnitsInSchema(def debug, Corpus analecCorpus, org.txm.searchengine.cqp.corpus.CQPCorpus corpus, |
102 | 102 |
String schema_ursql, Integer minimum_schema_size, Integer maximum_schema_size, |
103 |
String unit_ursql, Integer limit_distance_in_schema, CQLQuery limit_cql, Boolean strict_inclusion, int limit_distance) {
|
|
103 |
String unit_ursql, Integer position_in_schema, CQLQuery cql_limit, Boolean strict_inclusion, int position) {
|
|
104 | 104 |
def groupedUnits = [] |
105 | 105 |
if (schema_ursql != null && schema_ursql.length() > 0 || minimum_schema_size > 1) { |
106 | 106 |
def allSchema = null; |
... | ... | |
114 | 114 |
|
115 | 115 |
groupedUnits = AnalecUtils.groupAllUnitesInElements(debug, allSchema, unit_ursql) |
116 | 116 |
|
117 |
if (limit_distance_in_schema >= 0) groupedUnits = AnalecUtils.filterUniteByInclusionInSchema(debug, groupedUnits, limit_distance_in_schema)
|
|
117 |
if (position_in_schema >= 0) groupedUnits = AnalecUtils.filterUniteByInclusionInSchema(debug, groupedUnits, position_in_schema)
|
|
118 | 118 |
|
119 | 119 |
} else { |
120 | 120 |
groupedUnits = ["all":AnalecUtils.findAllInCorpus(debug, analecCorpus, Unite.class, unit_ursql)] |
... | ... | |
122 | 122 |
if (debug >= 2) println "groupedUnits=${groupedUnits.size()}" |
123 | 123 |
|
124 | 124 |
def matches = null |
125 |
if (limit_cql != null && !limit_cql.getQueryString().equals("\"\"")) {
|
|
126 |
Subcorpus limitssubcorpus = corpus.createSubcorpus(limit_cql, corpus.getID().toUpperCase())
|
|
125 |
if (cql_limit != null && !cql_limit.getQueryString().equals("\"\"")) {
|
|
126 |
Subcorpus limitssubcorpus = corpus.createSubcorpus(cql_limit, corpus.getID().toUpperCase())
|
|
127 | 127 |
matches = limitssubcorpus.getMatches(); |
128 | 128 |
limitssubcorpus.delete(); |
129 | 129 |
} else { |
... | ... | |
132 | 132 |
if (debug >= 2) println "matches=${matches}" |
133 | 133 |
def allUnits = [] |
134 | 134 |
for (def k : groupedUnits.keySet()) { |
135 |
def selectedUnits = AnalecUtils.filterUniteByInclusion(debug, groupedUnits[k], matches, strict_inclusion, limit_distance)
|
|
135 |
def selectedUnits = AnalecUtils.filterUniteByInclusion(debug, groupedUnits[k], matches, strict_inclusion, position)
|
|
136 | 136 |
allUnits.addAll(selectedUnits) |
137 | 137 |
} |
138 | 138 |
if (debug >= 2) println "selectedUnits=${allUnits.size()}" |
... | ... | |
306 | 306 |
return selectedUnitsPerMatch |
307 | 307 |
} |
308 | 308 |
|
309 |
static def filterUniteByInclusion(def debug, def allUnites, def matches, boolean strict_inclusion, int limit_distance) {
|
|
309 |
static def filterUniteByInclusion(def debug, def allUnites, def matches, boolean strict_inclusion, int position) {
|
|
310 | 310 |
|
311 | 311 |
def selectedUnitsPerMatch = groupByMatch(debug, allUnites, matches, strict_inclusion); |
312 | 312 |
//println "selectedUnitsPerMatch size="+selectedUnitsPerMatch.size() |
313 | 313 |
def selectedUnits = [] |
314 |
if (limit_distance != 0) {
|
|
315 |
if (limit_distance > 0) limit_distance--
|
|
314 |
if (position != 0) {
|
|
315 |
if (position > 0) position--
|
|
316 | 316 |
|
317 | 317 |
for (def m : selectedUnitsPerMatch.keySet()) { |
318 |
if (selectedUnitsPerMatch[m].size() > limit_distance && selectedUnitsPerMatch[m].size() > 0) {
|
|
318 |
if (selectedUnitsPerMatch[m].size() > position && selectedUnitsPerMatch[m].size() > 0) {
|
|
319 | 319 |
def units = selectedUnitsPerMatch[m] |
320 | 320 |
//println "$m -> "+units.collect() {it -> it.getDeb()} |
321 | 321 |
units = units.sort() { a, b -> a.getDeb() <=> b.getDeb() ?: a.getFin() <=> b.getFin() } |
322 | 322 |
//println "$m -> "+units.collect() {it -> it.getDeb()} |
323 |
selectedUnits << units[limit_distance]
|
|
324 |
if (debug >=3) println "dist select: "+units[limit_distance].getDeb()
|
|
323 |
selectedUnits << units[position]
|
|
324 |
if (debug >=3) println "dist select: "+units[position].getDeb()
|
|
325 | 325 |
} |
326 | 326 |
} |
327 | 327 |
} else { |
... | ... | |
387 | 387 |
int atidx = URSQL.indexOf("@"); |
388 | 388 |
int equalidx = URSQL.indexOf("="); |
389 | 389 |
|
390 |
if (atidx >= 0 && equalidx >= 0 && atidx < equalidx) { |
|
390 |
if (atidx >= 0 && equalidx >= 0 && atidx < equalidx) { // TYPE@PROP=VALUE
|
|
391 | 391 |
type = URSQL.substring(0, atidx) |
392 | 392 |
prop = URSQL.substring(atidx+1, equalidx) |
393 | 393 |
value = URSQL.substring(equalidx+1) |
394 |
} else if (atidx >= 0) { |
|
394 |
} else if (atidx >= 0) { // TYPE@PROP
|
|
395 | 395 |
type = URSQL.substring(0, atidx) |
396 | 396 |
prop = URSQL.substring(atidx+1) |
397 |
} else if (equalidx >= 0) { |
|
397 |
} else if (equalidx >= 0) { // TYPE=VALUE -> not well formed
|
|
398 | 398 |
type = URSQL.substring(0, equalidx) |
399 | 399 |
value = URSQL.substring(equalidx+1) |
400 |
} else { |
|
400 |
} else { // TYPE
|
|
401 | 401 |
type = URSQL; |
402 | 402 |
} |
403 | 403 |
// println(["'"+type+"'", "'"+prop+"'", "'"+value+"'"]) |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/check/CheckDuplicatesInSchemasMacro.groovy (revision 2094) | ||
---|---|---|
96 | 96 |
} |
97 | 97 |
|
98 | 98 |
if (duplicates.size() > 0) { |
99 |
println "Duplicates found"
|
|
99 |
println "${duplicates.size()} duplicates found"
|
|
100 | 100 |
for (def unit : duplicates.keySet()) { |
101 | 101 |
println AnalecUtils.toString(CQI, word, unit)+" in: " |
102 | 102 |
for (Schema schema : duplicates[unit]) { |
... | ... | |
104 | 104 |
} |
105 | 105 |
} |
106 | 106 |
} else { |
107 |
println "No duplicates found in $schema_ursql units" |
|
107 |
println "No duplicates found in $schema_ursql schema units"
|
|
108 | 108 |
} |
109 | 109 |
|
110 | 110 |
return duplicates |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/democrat/VerificationsMacro.groovy (revision 2094) | ||
---|---|---|
1 |
// Auteur Matthieu Quignard |
|
2 |
// Date : 14 janvier 2019 |
|
3 |
|
|
4 |
/********** |
|
5 |
Vérifications automatiques |
|
6 |
1. Repère les mentions sans catégorie : CHECK > CAT |
|
7 |
2. Repère les mentions sans référent : CHECK > REF |
|
8 |
3. Supprime les ponctuations en début et en fin de mention : CHECK > BORNES |
|
9 |
4. Supprime les prépositions autres que 'de' en début de mention : CHECK > BORNES |
|
10 |
5. Supprime automatiquement toutes les mentions vides = sans aucun mot = de longueur 0 |
|
11 |
6. Détecter les mentions qui ont exactement les mêmes bornes : CHECK > DOUBLON |
|
12 |
7 (option). Détecter les pronoms hors mention : CHECK > NEW |
|
13 |
***********/ |
|
14 |
|
|
15 |
package org.txm.macro.urs.democrat |
|
16 |
|
|
17 |
import org.apache.commons.lang.* |
|
18 |
import org.kohsuke.args4j.* |
|
19 |
import groovy.transform.* |
|
20 |
import org.txm.* |
|
21 |
import org.txm.rcpapplication.swt.widget.parameters.* |
|
22 |
import org.txm.analec.* |
|
23 |
import org.txm.searchengine.cqp.* |
|
24 |
import org.txm.searchengine.cqp.corpus.* |
|
25 |
import visuAnalec.Message.* |
|
26 |
import visuAnalec.donnees.* |
|
27 |
import visuAnalec.elements.* |
|
28 |
import visuAnalec.vue.* |
|
29 |
|
|
30 |
// TODO : ajouter les étiquettes équivalentes issues du tagset TreeTagger |
|
31 |
// Ponctuations et Prépositions |
|
32 |
def interditsAuDebut = ["PONfbl", "PONfrt", "PONpxx", "PRE"] |
|
33 |
// Ponctuations |
|
34 |
def interditsALaFin = ["PONfbl", "PONfrt", "PONpxx"] |
|
35 |
// Pronoms en tous genres |
|
36 |
def listePronoms = ["PROadv", "PROcar", "PROdem", "PROimp", "PROind", "PROint", "PROper", "PROpos", "PROrel"] |
|
37 |
|
|
38 |
// CORPS DU SCRIPT |
|
39 |
|
|
40 |
if (!(corpusViewSelection instanceof MainCorpus)) { |
|
41 |
println "Corpora selection is not a Corpus" |
|
42 |
return |
|
43 |
} |
|
44 |
|
|
45 |
// BEGINNING OF PARAMETERS |
|
46 |
@Field @Option(name="unit_type", usage="Unité", widget="String", required=true, def="MENTION") |
|
47 |
def unit_type |
|
48 |
@Field @Option(name="pos_property_name", usage="Etiquette de morphosyntaxe", widget="String", required=true, def="frpos") |
|
49 |
def pos_property_name |
|
50 |
@Field @Option(name="cat_name", usage="Propriété CATEGORIE", widget="String", required=true, def="CATEGORIE") |
|
51 |
def cat_name |
|
52 |
@Field @Option(name="ref_name", usage="Propriété REF", widget="String", required=true, def="REF") |
|
53 |
def ref_name |
|
54 |
@Field @Option(name="checkPronouns", usage="Vérifier les pronoms oubliés", widget="Boolean", required=true, def="true") |
|
55 |
def checkPronouns |
|
56 |
|
|
57 |
if (!ParametersDialog.open(this)) return |
|
58 |
|
|
59 |
corpus = corpusViewSelection |
|
60 |
CQI = Toolbox.getCqiClient() |
|
61 |
word = corpus.getWordProperty() |
|
62 |
posProperty = corpus.getProperty(pos_property_name) |
|
63 |
if (posProperty == null) { |
|
64 |
println "Error: CQP corpus does not contains the word property with name=$pos_property_name" |
|
65 |
return |
|
66 |
} |
|
67 |
|
|
68 |
analecCorpus = AnalecCorpora.getCorpus(corpus.getName()) |
|
69 |
vue = AnalecCorpora.getVue(corpus.getName()) |
|
70 |
structure = analecCorpus.getStructure() |
|
71 |
|
|
72 |
if (!structure.getUnites().contains(unit_type)) { // check if the structure contains the unit_type units |
|
73 |
println "Error: corpus structure does not contains unit with name=$unit_type" |
|
74 |
return |
|
75 |
} |
|
76 |
|
|
77 |
if (!structure.getUniteProperties(unit_type).contains(cat_name)) { |
|
78 |
println "Erreur : les unités $unit_type n'ont pas de propriété $cat_name" |
|
79 |
return |
|
80 |
} |
|
81 |
|
|
82 |
if (!structure.getUniteProperties(unit_type).contains(ref_name)) { |
|
83 |
println "Erreur : les unités $unit_type n'ont pas de propriété $ref_name" |
|
84 |
return |
|
85 |
} |
|
86 |
|
|
87 |
println "Détection des pronoms oubliés : $checkPronouns" |
|
88 |
|
|
89 |
// Reinitialiser la propriété CHECK |
|
90 |
if (!structure.getUniteProperties(unit_type).contains("CHECK")) { |
|
91 |
analecCorpus.ajouterProp(Unite.class, unit_type, "CHECK") |
|
92 |
} else { |
|
93 |
println "Nettoyage des anciennes annotations CHECK" |
|
94 |
def tmpvalues = new HashSet() |
|
95 |
tmpvalues.addAll(structure.getValeursProp(Unite.class, unit_type, "CHECK")); |
|
96 |
for (String val : tmpvalues) { |
|
97 |
structure.supprimerVal(Unite.class, unit_type, "CHECK", val); |
|
98 |
//println "suppression de l'étiquette $val" |
|
99 |
} |
|
100 |
} |
|
101 |
|
|
102 |
structure.ajouterVal(Unite.class, unit_type, "CHECK", "DONE") |
|
103 |
|
|
104 |
|
|
105 |
|
|
106 |
def nModified = 0 |
|
107 |
def nIgnored = 0 |
|
108 |
def nDeleted = 0 |
|
109 |
def nAdded = 0 |
|
110 |
|
|
111 |
def garbageBin = [] |
|
112 |
|
|
113 |
def nToks = corpus.getSize() |
|
114 |
def tokenIndex = new int[nToks] |
|
115 |
|
|
116 |
def i = 0 |
|
117 |
for (i=0 ; i< nToks ; i++) tokenIndex[i] = 1 |
|
118 |
|
|
119 |
errors = new HashMap() |
|
120 |
def units = analecCorpus.getUnites(unit_type) |
|
121 |
units.sort() { a, b -> a.getDeb() <=> b.getDeb() ?: a.getFin() <=> b.getFin() } |
|
122 |
|
|
123 |
// pour les doublons |
|
124 |
def lastUnit = null |
|
125 |
|
|
126 |
for (Unite unit : units) { // process all units |
|
127 |
def erreur = "" |
|
128 |
|
|
129 |
// 1. Catégories vides |
|
130 |
def cat = unit.getProp( cat_name ); |
|
131 |
if (cat == "") erreur += "CAT " |
|
132 |
|
|
133 |
// 2. Référents vides (plus grave) ; pas besoin de catégories |
|
134 |
def ref = unit.getProp( ref_name ); |
|
135 |
if (ref == "") erreur += "REF " |
|
136 |
|
|
137 |
// 3. Suppression des erreurs initiales ; besoin de catégories |
|
138 |
int[] positions = null |
|
139 |
if (unit.getDeb() == unit.getFin()) positions = [unit.getDeb()] |
|
140 |
else positions = (unit.getDeb()..unit.getFin()) |
|
141 |
def Mention = CQI.cpos2Str(posProperty.getQualifiedName(), positions) |
|
142 |
|
|
143 |
def isOK = false |
|
144 |
while (isOK == false) { |
|
145 |
if (interditsAuDebut.contains(Mention[0])) { |
|
146 |
if (positions.size() == 1) { |
|
147 |
erreur += "SUPPR" |
|
148 |
isOK = true |
|
149 |
} else { |
|
150 |
def debut = unit.getDeb() |
|
151 |
unit.setDeb( debut + 1 ) |
|
152 |
|
|
153 |
if (unit.getDeb() == unit.getFin()) positions = [unit.getDeb()] |
|
154 |
else positions = (unit.getDeb()..unit.getFin()) |
|
155 |
Mention = CQI.cpos2Str(posProperty.getQualifiedName(), positions) |
|
156 |
if (!erreur.contains("BORNESG")) erreur += "BORNESG " |
|
157 |
} |
|
158 |
} else { |
|
159 |
isOK = true |
|
160 |
} |
|
161 |
} |
|
162 |
|
|
163 |
|
|
164 |
// 4. Suppression des erreurs de borne de fin ; besoin de catégories |
|
165 |
if (unit.getDeb() == unit.getFin()) positions = [unit.getDeb()] |
|
166 |
else positions = (unit.getDeb()..unit.getFin()) |
|
167 |
Mention = CQI.cpos2Str(posProperty.getQualifiedName(), positions) |
|
168 |
|
|
169 |
isOK = false |
|
170 |
while ((isOK == false) && (erreur != "remove")) { |
|
171 |
def n = Mention.size() |
|
172 |
if (interditsALaFin.contains(Mention[ n-1 ])) { |
|
173 |
if (positions.size() == 1) { |
|
174 |
if (!erreur.contains("SUPPR")) erreur += "SUPPR" |
|
175 |
isOK = true |
|
176 |
} else { |
|
177 |
def fin = unit.getFin() |
|
178 |
unit.setFin( fin - 1 ) |
|
179 |
|
|
180 |
if (unit.getDeb() == unit.getFin()) positions = [unit.getDeb()] |
|
181 |
else positions = (unit.getDeb()..unit.getFin()) |
|
182 |
Mention = CQI.cpos2Str(posProperty.getQualifiedName(), positions) |
|
183 |
if (!erreur.contains("BORNESD")) erreur += "BORNESD " |
|
184 |
} |
|
185 |
} else isOK = true |
|
186 |
} |
|
187 |
|
|
188 |
// 5. Suppression des unités problématiques (bornes incohérentes ou mot vide) ; pas besoin de catégories |
|
189 |
def forme = CQI.cpos2Str(word.getQualifiedName(), positions)[0].trim().toLowerCase() |
|
190 |
|
|
191 |
if (erreur == "remove") {} |
|
192 |
else if (unit.getFin() < unit.getDeb()) { |
|
193 |
println "ERREUR GRAVE : segmentation incohérente" |
|
194 |
erreur = "remove" |
|
195 |
} else if ( forme.length() == 0 ) { |
|
196 |
println "ERREUR GRAVE : unité sans mot" |
|
197 |
erreur = "remove" |
|
198 |
} |
|
199 |
|
|
200 |
// 6. Détection des doublons ; pas besoin de catégories |
|
201 |
if (lastUnit != null) { |
|
202 |
if ((unit.getDeb() == lastUnit.getDeb()) && (unit.getFin() == lastUnit.getFin()) ) { |
|
203 |
erreur += " DOUBLON " |
|
204 |
} |
|
205 |
} |
|
206 |
lastUnit = unit |
|
207 |
|
|
208 |
|
|
209 |
erreur = erreur.trim() |
|
210 |
if (erreur == "remove") { |
|
211 |
garbageBin.add( unit ) |
|
212 |
nDeleted++ |
|
213 |
} else if (erreur != "") { |
|
214 |
vue.setValeurChamp(unit, "CHECK", erreur) |
|
215 |
nModified++ |
|
216 |
} else { |
|
217 |
nIgnored++ |
|
218 |
} |
|
219 |
|
|
220 |
// mise à jour des tokens couverts |
|
221 |
for (int p=unit.getDeb() ; p <= unit.getFin() ; p++) { |
|
222 |
tokenIndex[p] = 0 |
|
223 |
} |
|
224 |
} |
|
225 |
|
|
226 |
// Suppression effective des unités incohérentes |
|
227 |
garbageBin.each { |
|
228 |
analecCorpus.supUnite( it ) |
|
229 |
} |
|
230 |
|
|
231 |
// 7. Ajouter les pronoms non couverts par une annotation ; besoin de catégories |
|
232 |
// Parcourir les tokens non couverts ; obtenir leur POS et créer une unité si c'est un pronom |
|
233 |
|
|
234 |
if (checkPronouns) { |
|
235 |
println "Détection des pronoms oubliés" |
|
236 |
for (i=0 ; i < nToks ; i++) { |
|
237 |
if (tokenIndex[i] > 0) { |
|
238 |
Mention = CQI.cpos2Str(posProperty.getQualifiedName(), i) |
|
239 |
if (listePronoms.contains(Mention[0])) { |
|
240 |
def props = [:] |
|
241 |
props["CHECK"] = "NEW" |
|
242 |
Unite u = analecCorpus.addUniteSaisie(unit_type, i, i, props) |
|
243 |
vue.setValeurChamp(u, "CHECK", "NEW") |
|
244 |
nAdded++ |
|
245 |
} |
|
246 |
} |
|
247 |
} |
|
248 |
} |
|
249 |
|
|
250 |
if (nAdded + nModified + nDeleted > 0) corpus.setIsModified(true); |
|
251 |
|
|
252 |
println "Result:" |
|
253 |
println "- $nModified units of type $unit_type have been modified." |
|
254 |
println "- $nDeleted units of type $unit_type have been deleted." |
|
255 |
println "- $nIgnored units of type $unit_type have not been modified." |
|
256 |
println "- $nAdded forgotten pronominal units of type $unit_type have been added.\n" |
|
257 |
|
|
258 |
AnalecCorpora.getVue(analecCorpus).retablirVueParDefaut() |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/democrat/RetoucheComplementDuNomMacro.groovy (revision 2094) | ||
---|---|---|
5 | 5 |
import org.kohsuke.args4j.* |
6 | 6 |
import groovy.transform.* |
7 | 7 |
import org.txm.* |
8 |
import org.txm.rcp.swt.widget.parameters.* |
|
9 |
import org.txm.annotation.urs.*
|
|
8 |
import org.txm.rcpapplication.swt.widget.parameters.*
|
|
9 |
import org.txm.analec.*
|
|
10 | 10 |
import org.txm.searchengine.cqp.* |
11 | 11 |
import org.txm.searchengine.cqp.corpus.* |
12 | 12 |
import visuAnalec.Message.* |
... | ... | |
16 | 16 |
|
17 | 17 |
|
18 | 18 |
/* MACRO pour corriger une erreur d'annotation |
19 |
Retirer le "De" du complément du nom |
|
20 |
Algo : |
|
21 |
POUR CHAQUE MENTION dont le premier mot est "de" (en minuscules) |
|
22 |
SI il existe une autre MENTION dans laquelle celle-ci est totalement incluse |
|
23 |
ALORS incrémenter d'un mot la frontière gauche de la mention |
|
24 |
Ajouter la categorie CDN.CHECK pour qu'on puisse verifier facilement le job. |
|
25 |
*/ |
|
19 |
Retirer le "de" ou le "d'" (forme élidée) du complément du nom |
|
26 | 20 |
|
21 |
Algo : |
|
22 |
POUR CHAQUE MENTION dont le premier mot est "de" (en minuscules) |
|
23 |
SI il existe une autre MENTION dans laquelle celle-ci est totalement incluse |
|
24 |
ALORS incrémenter d'un mot la frontière gauche de la mention |
|
25 |
Ajouter la categorie CDN.CHECK pour qu'on puisse verifier facilement le job. |
|
26 |
|
|
27 |
Cette macro ne s'appuie pas sur des catégories morphosyntaxiques. |
|
28 |
On peut donc la faire tourner avant les frpos2cattex ou fropos2cattex |
|
29 |
|
|
30 |
Auteur : Matthieu QUIGNARD |
|
31 |
Date : 18 janvier 2019 |
|
32 |
*/ |
|
33 |
|
|
27 | 34 |
// BEGINNING OF PARAMETERS |
28 |
if (!(corpusViewSelection instanceof org.txm.searchengine.cqp.corpus.CQPCorpus)) { |
|
29 |
println "Selection must be a Corpus: "+corpusViewSelection |
|
30 |
return; |
|
31 |
} |
|
35 |
|
|
32 | 36 |
// Declare each parameter here |
33 | 37 |
// BEGINNING OF PARAMETERS |
34 | 38 |
@Field @Option(name="unit_type", usage="", widget="String", required=true, def="MENTION") |
35 |
def unit_type
|
|
39 |
def unit_type |
|
36 | 40 |
@Field @Option(name="category_name", usage="", widget="String", required=true, def="CATEGORIE") |
37 |
def category_name
|
|
41 |
def category_name |
|
38 | 42 |
if (!ParametersDialog.open(this)) return |
39 | 43 |
|
40 |
corpus = corpusViewSelection.getMainCorpus()
|
|
41 |
CQI = CQPSearchEngine.getCqiClient()
|
|
44 |
corpus = corpusViewSelection
|
|
45 |
CQI = Toolbox.getCqiClient()
|
|
42 | 46 |
word = corpus.getWordProperty() |
43 | 47 |
|
44 |
analecCorpus = URSCorpora.getCorpus(corpus)
|
|
45 |
vue = URSCorpora.getVue(corpus)
|
|
48 |
analecCorpus = AnalecCorpora.getCorpus(corpus.getName())
|
|
49 |
vue = AnalecCorpora.getVue(corpus.getName())
|
|
46 | 50 |
structure = analecCorpus.getStructure() |
47 | 51 |
if (!structure.getUnites().contains(unit_type)) { // check if the structure contains the unit_type units |
48 | 52 |
println "Error: corpus structure does not contains unit with name=$unit_type" |
... | ... | |
51 | 55 |
|
52 | 56 |
|
53 | 57 |
// Si la structure d'annotation ne contient pas CATEGORIE, on la crée avec ses valeurs |
54 |
if (!structure.getUniteProperties(unit_type).contains(category_name)) { |
|
55 |
structure.ajouterProp(Unite.class, unit_type, category_name)
|
|
58 |
if (!structure.getUniteProperties(unit_type).contains(category_name)) {
|
|
59 |
structure.ajouterProp(Unite.class, unit_type, category_name)
|
|
56 | 60 |
} |
57 | 61 |
|
58 | 62 |
def check_cat = "CDN.CHECK" |
... | ... | |
69 | 73 |
units.sort() { a, b -> a.getDeb() <=> b.getDeb() ?: a.getFin() <=> b.getFin() } |
70 | 74 |
|
71 | 75 |
/* Test sur la premiere mention : |
72 |
def debut1 = units[1].getDeb()
|
|
73 |
def fin1 = units[1].getFin()
|
|
74 |
println "$debut1 - $fin1"
|
|
75 |
units[1].setDeb( debut1 + 1)
|
|
76 |
units[1].setFin( fin1 + 1)
|
|
77 |
URSCorpora.getVue(analecCorpus).retablirVueParDefaut()
|
|
78 |
def debut2 = units[1].getDeb()
|
|
79 |
def fin2 = units[1].getFin()
|
|
80 |
println "$debut2 - $fin2"
|
|
81 |
*/
|
|
76 |
def debut1 = units[1].getDeb() |
|
77 |
def fin1 = units[1].getFin() |
|
78 |
println "$debut1 - $fin1" |
|
79 |
units[1].setDeb( debut1 + 1) |
|
80 |
units[1].setFin( fin1 + 1) |
|
81 |
AnalecCorpora.getVue(analecCorpus).retablirVueParDefaut()
|
|
82 |
def debut2 = units[1].getDeb() |
|
83 |
def fin2 = units[1].getFin() |
|
84 |
println "$debut2 - $fin2" |
|
85 |
*/ |
|
82 | 86 |
|
83 | 87 |
|
84 | 88 |
for (Unite unit : units) { // process all units |
85 |
|
|
89 |
|
|
86 | 90 |
def debut = unit.getDeb() |
87 | 91 |
def fin = unit.getFin() |
88 | 92 |
def premierMot = CQI.cpos2Str(word.getQualifiedName(), debut)[0] |
89 |
|
|
90 |
if (premierMot != "de") {
|
|
93 |
|
|
94 |
if ((premierMot != "de") && (premierMot !="d'")) {
|
|
91 | 95 |
nIgnored1++ |
92 | 96 |
compteur++ |
93 | 97 |
continue |
94 | 98 |
} else { |
95 |
for (i = compteur-1; i >= 0 ; i--) {
|
|
99 |
for (i = compteur-1; i>=0 ; i--) {
|
|
96 | 100 |
def u = units[i] |
97 | 101 |
def udeb = u.getDeb() |
98 | 102 |
def ufin = u.getFin() |
99 | 103 |
if (ufin >= fin) { |
100 |
println "\nAVANT => Unit $compteur : $debut - $fin"
|
|
101 |
if (fin > debut) unit.setDeb( debut++ )
|
|
102 |
else println "not resizing"
|
|
103 |
def debut2 = unit.getDeb()
|
|
104 |
def fin2 = unit.getFin()
|
|
105 |
URSCorpora.getVue(analecCorpus).retablirVueParDefaut()
|
|
106 |
println "APRES => Unit $compteur : $debut2 - $fin2"
|
|
107 |
unit.getProps().put(category_name, check_cat)
|
|
108 |
break
|
|
104 |
println "\nAVANT => Unit $compteur : $debut - $fin"
|
|
105 |
if (fin > debut) unit.setDeb( ++debut )
|
|
106 |
else println "not resizing"
|
|
107 |
def debut2 = unit.getDeb()
|
|
108 |
def fin2 = unit.getFin()
|
|
109 |
AnalecCorpora.getVue(analecCorpus).retablirVueParDefaut()
|
|
110 |
println "APRES => Unit $compteur : $debut2 - $fin2"
|
|
111 |
unit.getProps().put(category_name, check_cat)
|
|
112 |
break
|
|
109 | 113 |
} |
110 | 114 |
} |
111 |
if (i < 0) nIgnored2++
|
|
115 |
if (i <0) nIgnored2++ |
|
112 | 116 |
else nModified++ |
113 | 117 |
compteur++ |
114 |
} |
|
118 |
}
|
|
115 | 119 |
} |
116 | 120 |
|
117 | 121 |
println "\nResult:" |
... | ... | |
121 | 125 |
println "Total ($compteur)." |
122 | 126 |
|
123 | 127 |
// END OF PARAMETERS |
124 |
URSCorpora.getVue(analecCorpus).retablirVueParDefaut()
|
|
128 |
AnalecCorpora.getVue(analecCorpus).retablirVueParDefaut()
|
|
125 | 129 |
println "corpora selection: "+corpusViewSelection |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/democrat/SUJ-PARTINF.txt (revision 2094) | ||
---|---|---|
1 |
Macro : SUJ-PARTINF |
|
2 |
Auteur : Matthieu QUIGNARD |
|
3 |
Date : 05 Février 2019 |
|
4 |
|
|
5 |
Recatégorise les mentions étiquetées en SUJ.ZERO pour distinguer les cas |
|
6 |
des verbes conjugués (qui restent en SUJ.ZERO) |
|
7 |
des verbes à l'infinitif ou au participe passé/présent qui, eux, vont |
|
8 |
passer sous la catégorie SUJ.PARTINF |
|
9 |
|
|
10 |
Cette macro est réversible. |
|
11 |
Il suffit d'invoquer la macro ChercherRemplacer et substituer |
|
12 |
CATEGORIE=SUJ.PARTINF |
|
13 |
en |
|
14 |
CATEGORIE=SUJ.ZERO |
|
15 |
|
|
16 |
NB : fonctionne avec le tagset fro (ancien français) et le tagset TreeTagger. |
|
17 |
Infinitif : VERinf VER:infi |
|
18 |
Part.Pass : VERppa VER:pper |
|
19 |
Part.Pres : VERppe VER:ppre |
|
0 | 20 |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/democrat/Verifications_alMacro.groovy (revision 2094) | ||
---|---|---|
1 |
// Auteur Matthieu Quignard |
|
2 |
// Date : 14 janvier 2019 |
|
3 |
|
|
4 |
/********** |
|
5 |
Vérifications automatiques |
|
6 |
1. Repère les mentions sans catégorie : CHECK > CAT |
|
7 |
2. Repère les mentions sans référent : CHECK > REF |
|
8 |
3. Supprime les ponctuations en début et en fin de mention : CHECK > BORNES |
|
9 |
4. Supprime les prépositions autres que 'de' en début de mention : CHECK > BORNES |
|
10 |
5. Supprime automatiquement toutes les mentions vides = sans aucun mot = de longueur 0 |
|
11 |
6. Détecter les mentions qui ont exactement les mêmes bornes : CHECK > DOUBLON |
|
12 |
7 (option). Détecter les pronoms hors mention : CHECK > NEW |
|
13 |
***********/ |
|
14 |
|
|
15 |
package org.txm.macro.urs.democrat |
|
16 |
|
|
17 |
import org.apache.commons.lang.* |
|
18 |
import org.kohsuke.args4j.* |
|
19 |
import groovy.transform.* |
|
20 |
import org.txm.* |
|
21 |
import org.txm.rcpapplication.swt.widget.parameters.* |
|
22 |
import org.txm.analec.* |
|
23 |
import org.txm.searchengine.cqp.* |
Formats disponibles : Unified diff