Révision 1852
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/AjoutDefinitudeMacro.groovy (revision 1852) | ||
---|---|---|
1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
3 |
// @author mdecorde |
|
4 |
// @author sheiden |
|
5 |
// STANDARD DECLARATIONS |
|
6 |
package org.txm.macro.urs |
|
7 |
|
|
8 |
import org.apache.commons.lang.StringUtils; |
|
9 |
import org.kohsuke.args4j.* |
|
10 |
|
|
11 |
import groovy.transform.Field |
|
12 |
|
|
13 |
import org.txm.Toolbox; |
|
14 |
import org.txm.rcp.swt.widget.parameters.* |
|
15 |
import org.txm.annotation.urs.* |
|
16 |
import org.txm.searchengine.cqp.AbstractCqiClient; |
|
17 |
import org.txm.searchengine.cqp.corpus.* |
|
18 |
|
|
19 |
import visuAnalec.Message.StructureEvent; |
|
20 |
import visuAnalec.Message.TypeModifStructure; |
|
21 |
import visuAnalec.donnees.Structure; |
|
22 |
import visuAnalec.elements.Unite; |
|
23 |
import visuAnalec.vue.Vue |
|
24 |
|
|
25 |
if (!(corpusViewSelection instanceof MainCorpus)) { |
|
26 |
println "Corpora selection is not a Corpus" |
|
27 |
return; |
|
28 |
} |
|
29 |
|
|
30 |
// BEGINNING OF PARAMETERS |
|
31 |
@Field @Option(name="unit_type",usage="", widget="String", required=true, def="Maillon") |
|
32 |
String unit_type |
|
33 |
@Field @Option(name="reset",usage="", widget="Boolean", required=true, def="true") |
|
34 |
boolean reset |
|
35 |
|
|
36 |
if (!ParametersDialog.open(this)) return; |
|
37 |
|
|
38 |
MainCorpus corpus = corpusViewSelection |
|
39 |
AbstractCqiClient CQI = CQPSearchEngine.getCqiClient(); |
|
40 |
def word = corpus.getWordProperty() |
|
41 |
def analecCorpus = URSCorpora.getCorpus(corpus); |
|
42 |
Structure structure = analecCorpus.getStructure() |
|
43 |
if (!structure.getUnites().contains(unit_type)) { // check if the structure contains the unit_type units |
|
44 |
println "Error: corpus structure does not contains unit with name=$unit_type" |
|
45 |
return |
|
46 |
} |
|
47 |
def props = structure.getUniteProperties(unit_type) |
|
48 |
|
|
49 |
String DEFINITUDE = "DEFINITUDE" |
|
50 |
if (!props.contains(DEFINITUDE)) { // update the structure if needed |
|
51 |
analecCorpus.ajouterProp(Unite.class, unit_type, DEFINITUDE); |
|
52 |
analecCorpus.ajouterVal(Unite.class, unit_type, DEFINITUDE, "DEFINI"); |
|
53 |
analecCorpus.ajouterVal(Unite.class, unit_type, DEFINITUDE, "INDEFINI"); |
|
54 |
analecCorpus.ajouterVal(Unite.class, unit_type, DEFINITUDE, "DEMONSTRATIF"); |
|
55 |
analecCorpus.ajouterVal(Unite.class, unit_type, DEFINITUDE, "AMBIGU"); |
|
56 |
analecCorpus.ajouterVal(Unite.class, unit_type, DEFINITUDE, "NONE"); |
|
57 |
} |
|
58 |
|
|
59 |
int nIgnored = 0 // number of ignored units |
|
60 |
int nModified = 0 // number of modified units |
|
61 |
int nDefini = 0 // number of "DEFINI" units |
|
62 |
int nIndefini = 0 // number of "InDEFINI" units |
|
63 |
int nDemonstratif = 0 // number of "DEMONSTRATIF" units |
|
64 |
int nAmbigu = 0 // number of "AMBIGU" units |
|
65 |
int nNone = 0 // number of "NONE" units |
|
66 |
|
|
67 |
def units = analecCorpus.getUnites(unit_type) |
|
68 |
units.sort() { a, b -> a.getDeb() <=> b.getDeb() ?: a.getFin() <=> b.getFin() } |
|
69 |
for (Unite unit : units) { // process all units |
|
70 |
|
|
71 |
def prop = unit.getProp(DEFINITUDE); |
|
72 |
|
|
73 |
int[] pos = null |
|
74 |
if (unit.getDeb() == unit.getFin()) pos = [unit.getDeb()] |
|
75 |
else pos = (unit.getDeb()..unit.getFin()) |
|
76 |
def form = StringUtils.join(CQI.cpos2Str(word.getQualifiedName(), pos), " ") |
|
77 |
|
|
78 |
if (reset || prop == null || prop.length() == 0 || prop.equals("NONE")) { |
|
79 |
// petits ajouts à faire ? : |(ses\s.+)|(Ses\s.+)|(son\s.+)|(Son\s.+)|(sa\s.+)|(Sa\s.+)|(leurs?\s.+)|(Leurs?\s.+)|(tous\s.+)|(Tous\s.+)|(toutes\s.+)|(Toutes\s.+) |
|
80 |
if (form =~ /^(le\s.+)|(Les\s.+)|(Le\s.+)|(la\s.+)|(La\s.+)|(l'.+)|(L'.+)|(les\s.+)|(au\s.+)|(Au\s.+)|(aux\s.+)|(Aux\s.+)|(du\s.+)|(Du\s.+)/) { |
|
81 |
unit.getProps().put(DEFINITUDE, "DEFINI") |
|
82 |
nDefini++ |
|
83 |
} else if (form =~ /^(un\s.+)|(une\s.+)|(Un\s.+)|(Une\s.+)|(Chaque\s.+)|(chaque\s.+)|(Certains\s.+)|(Certaines\s.+)|(certains\s.+)|(certaines\s.+)|(aucun\s.+)|(aucune\s.+)|(Aucun\s.+)|(Aucunes\s.+)|(Autre\s.+)|(Autre\s.+)|(autres\s.+)|(autre\s.+)|(quelque\s.+)|(quelques\s.+)|(Quelque\s.+)|(Quelques\s.+)/) { |
|
84 |
unit.getProps().put(DEFINITUDE, "INDEFINI") |
|
85 |
nIndefini++ |
|
86 |
} else if (form =~ /^(ce\s.+)|(cette\s.+)|(Cette\s.+)|(cet\s.+)|(ces\s.+)|(Ce\s.+)|(Cet\s.+)|(Ces\s.+)/) { |
|
87 |
unit.getProps().put(DEFINITUDE, "DEMONSTRATIF") |
|
88 |
nDemonstratif++ |
|
89 |
} else if (form =~ /^(des\s.+)|(de\s.+)|(Des\s.+)|(De\s.+)/) { |
|
90 |
unit.getProps().put(DEFINITUDE, "AMBIGU") |
|
91 |
nAmbigu++ |
|
92 |
} else { |
|
93 |
unit.getProps().put(DEFINITUDE, "NONE") |
|
94 |
nNone++; |
|
95 |
} |
|
96 |
nModified++ |
|
97 |
|
|
98 |
} else { |
|
99 |
// nothing to do |
|
100 |
nIgnored++ |
|
101 |
} |
|
102 |
} |
|
103 |
|
|
104 |
println "nIgnored=$nIgnored" |
|
105 |
println "nModified=$nModified" |
|
106 |
println " nDefini=$nDefini" |
|
107 |
println " nIndefini=$nIndefini" |
|
108 |
println " nDemonstratif=$nDemonstratif" |
|
109 |
println " nAmbigu=$nAmbigu" |
|
110 |
println " nNone=$nNone" |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/CreationRelationsMacro.groovy (revision 1852) | ||
---|---|---|
1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
3 |
// @author mdecorde |
|
4 |
// STANDARD DECLARATIONS |
|
5 |
package org.txm.macro.urs |
|
6 |
|
|
7 |
import org.kohsuke.args4j.* |
|
8 |
|
|
9 |
import groovy.transform.Field |
|
10 |
|
|
11 |
import org.txm.rcp.swt.widget.parameters.* |
|
12 |
import org.txm.annotation.urs.* |
|
13 |
import org.txm.searchengine.cqp.corpus.* |
|
14 |
|
|
15 |
import visuAnalec.donnees.Structure; |
|
16 |
import visuAnalec.elements.Relation; |
|
17 |
import visuAnalec.elements.Schema |
|
18 |
import visuAnalec.elements.Unite; |
|
19 |
import visuAnalec.vue.Vue |
|
20 |
|
|
21 |
if (!(corpusViewSelection instanceof MainCorpus)) { |
|
22 |
println "Corpora selection is not a Corpus" |
|
23 |
return; |
|
24 |
} |
|
25 |
|
|
26 |
// BEGINNING OF PARAMETERS |
|
27 |
@Field @Option(name="unit_type",usage="", widget="String", required=true, def="MENTION") |
|
28 |
String unit_type |
|
29 |
@Field @Option(name="schema_type",usage="", widget="String", required=true, def="CHAINE") |
|
30 |
String schema_type |
|
31 |
if (!ParametersDialog.open(this)) return; |
|
32 |
|
|
33 |
int nCreated = 0 // count the number of created RELATION |
|
34 |
|
|
35 |
MainCorpus corpus = corpusViewSelection |
|
36 |
def analecCorpus = URSCorpora.getCorpus(corpus); // analec corpus has the same name has the TXM corpus |
|
37 |
Structure structure = analecCorpus.getStructure() |
|
38 |
if (!structure.getUnites().contains(unit_type)) { // check if the structure contains the unit_type units |
|
39 |
println "Error: corpus structure does not contains unit with name=$unit_type" |
|
40 |
return |
|
41 |
} |
|
42 |
if (!structure.getSchemas().contains(schema_type)) { // check if the structure contains the unit_type units |
|
43 |
println "Error: corpus structure does not contains schema with name=$schema_type" |
|
44 |
return |
|
45 |
} |
|
46 |
if (!structure.getRelations().contains("ANAPHORE")) { // update the structure if needed |
|
47 |
println "Creating the 'ANAPHORE' relation in the structure" |
|
48 |
structure.ajouterType(Relation.class, "ANAPHORE") |
|
49 |
analecCorpus.ajouterProp(Relation.class, "ANAPHORE", "TYPE") |
|
50 |
analecCorpus.ajouterVal(Relation.class, "ANAPHORE", "TYPE", "COREFERENTE") |
|
51 |
analecCorpus.ajouterVal(Relation.class, "ANAPHORE", "TYPE", "ASSOCIATIVE") |
|
52 |
} |
|
53 |
if (analecCorpus.getRelations("ANAPHORE").size() > 0) { |
|
54 |
println "Error: This macro can't update existing Relations" |
|
55 |
return |
|
56 |
} |
|
57 |
|
|
58 |
for (Schema schema : analecCorpus.getSchemas(schema_type)) { // parse all CHAINE |
|
59 |
def units = [] |
|
60 |
for (Unite unit : schema.getUnitesSousjacentes()) { // keep only the 'unit_type' units |
|
61 |
if (unit.type.equals(unit_type)) units << unit |
|
62 |
} |
|
63 |
units.sort() { a, b -> a.getDeb() <=> b.getDeb() ?: a.getFin() <=> b.getFin() } // sort them |
|
64 |
|
|
65 |
for (int i = 0 ; i < units.size() - 1 ; i++) { // build RELATIONS and don't process the last unit |
|
66 |
println "creating "+units[i+1]+", "+units[i] |
|
67 |
Relation relation = new Relation("ANAPHORE", units[i+1], units[i]) |
|
68 |
relation.getProps().put("TYPE", "COREFERENTE") |
|
69 |
analecCorpus.addRelationLue(relation) // add the new relation |
|
70 |
nCreated++; |
|
71 |
} |
|
72 |
} |
|
73 |
|
|
74 |
println "nCreated=$nCreated" |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/SchemaTypesMacro.groovy (revision 1852) | ||
---|---|---|
1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
3 |
// @author mdecorde |
|
4 |
// @author sheiden |
|
5 |
// STANDARD DECLARATIONS |
|
6 |
package org.txm.macro.urs |
|
7 |
|
|
8 |
import org.kohsuke.args4j.* |
|
9 |
import groovy.transform.Field |
|
10 |
import org.txm.rcp.swt.widget.parameters.* |
|
11 |
import org.txm.annotation.urs.* |
|
12 |
import org.txm.searchengine.cqp.corpus.* |
|
13 |
|
|
14 |
if (!(corpusViewSelection instanceof MainCorpus)) { |
|
15 |
println "Corpora selection is not a Corpus" |
|
16 |
return; |
|
17 |
} |
|
18 |
|
|
19 |
MainCorpus corpus = corpusViewSelection |
|
20 |
def analecCorpus = URSCorpora.getCorpus(corpus); |
|
21 |
|
|
22 |
def schemas = analecCorpus.getTousSchemas() |
|
23 |
def set = new HashMap() |
|
24 |
for (def s : schemas.collect { it.getType() }) { |
|
25 |
if (!set.containsKey(s)) set[s] = 0; |
|
26 |
set[s] = set[s] +1 |
|
27 |
} |
|
28 |
println "Schemas types: "+set.sort() { it -> set[it]} |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/NombreDeChainesMacro.groovy (revision 1852) | ||
---|---|---|
1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
3 |
// @author mdecorde |
|
4 |
// @author sheiden |
|
5 |
// STANDARD DECLARATIONS |
|
6 |
package org.txm.macro.urs |
|
7 |
|
|
8 |
import org.kohsuke.args4j.* |
|
9 |
import groovy.transform.Field |
|
10 |
import org.txm.rcp.swt.widget.parameters.* |
|
11 |
import org.txm.annotation.urs.* |
|
12 |
import org.txm.searchengine.cqp.corpus.* |
|
13 |
|
|
14 |
|
|
15 |
// BEGINNING OF PARAMETERS |
|
16 |
@Field @Option(name="schema_type",usage="", widget="String", required=true, def="Coréférence") |
|
17 |
String schema_type |
|
18 |
|
|
19 |
@Field @Option(name="minimum_schema_size",usage="", widget="Integer", required=true, def="3") |
|
20 |
int minimum_schema_size |
|
21 |
|
|
22 |
@Field @Option(name="schema_property_name",usage="", widget="String", required=false, def="") |
|
23 |
String schema_property_name |
|
24 |
|
|
25 |
@Field @Option(name="schema_property_value",usage="", widget="String", required=false, def=".*") |
|
26 |
String schema_property_value |
|
27 |
|
|
28 |
@Field @Option(name="unit_type",usage="", widget="String", required=false, def="Maillon") |
|
29 |
String unit_type |
|
30 |
|
|
31 |
@Field @Option(name="unit_property_name", usage="", widget="String", required=false, def="") |
|
32 |
String unit_property_name |
|
33 |
|
|
34 |
@Field @Option(name="unit_property_value", usage="", widget="String", required=false, def=".*") |
|
35 |
String unit_property_value |
|
36 |
|
|
37 |
if (!(corpusViewSelection instanceof MainCorpus)) { |
|
38 |
println "Corpora selection is not a Corpus" |
|
39 |
return; |
|
40 |
} |
|
41 |
|
|
42 |
// Open the parameters input dialog box |
|
43 |
if (!ParametersDialog.open(this)) return; |
|
44 |
// END OF PARAMETERS |
|
45 |
|
|
46 |
MainCorpus corpus = corpusViewSelection |
|
47 |
def analecCorpus = URSCorpora.getCorpus(corpus) |
|
48 |
|
|
49 |
// check Schema parameters |
|
50 |
if (!analecCorpus.getStructure().getSchemas().contains(schema_type)) { |
|
51 |
println "No schema with name=$schema_type" |
|
52 |
return; |
|
53 |
} else { |
|
54 |
if (schema_property_name.length() > 0 && schema_property_value.length() > 0) { |
|
55 |
// test property existance |
|
56 |
def props = analecCorpus.getStructure().getSchemaProperties(schema_type); |
|
57 |
if (!props.contains(schema_property_name)) { |
|
58 |
println "Schema $schema_type has no property named $schema_property_name" |
|
59 |
return; |
|
60 |
} |
|
61 |
} |
|
62 |
} |
|
63 |
|
|
64 |
// check unit parameters |
|
65 |
if (!analecCorpus.getStructure().getUnites().contains(unit_type)) { |
|
66 |
println "No unit with name=$unit_type" |
|
67 |
return; |
|
68 |
} else { |
|
69 |
if (unit_property_name.length() > 0 && unit_property_value.length() > 0) { |
|
70 |
// test property existance |
|
71 |
def props = analecCorpus.getStructure().getUniteProperties(unit_type); |
|
72 |
if (!props.contains(unit_property_name)) { |
|
73 |
println "Unit $unit_type has no property named $unit_property_name" |
|
74 |
return; |
|
75 |
} |
|
76 |
} |
|
77 |
} |
|
78 |
|
|
79 |
def schemas = analecCorpus.getSchemas(schema_type) |
|
80 |
|
|
81 |
int nSchemas = 0; |
|
82 |
for (def schema : schemas) { |
|
83 |
|
|
84 |
if (schema_property_name.length() > 0 && schema_property_value.length() > 0) { |
|
85 |
if (!schema.getProp(schema_property_name).matches(schema_property_value)) { |
|
86 |
// ignoring this schema |
|
87 |
continue |
|
88 |
} |
|
89 |
} |
|
90 |
|
|
91 |
int nUnites = 0; |
|
92 |
for (def unit : schema.getUnitesSousjacentesNonTriees()) { |
|
93 |
if (unit_type.length() > 0) { |
|
94 |
if (!unit.getType().equals(unit_type)) { |
|
95 |
continue |
|
96 |
} |
|
97 |
} |
|
98 |
|
|
99 |
if (unit_property_name.length() > 0 && unit_property_value.length() > 0) { |
|
100 |
if (!unit.getProp(unit_property_name).matches(unit_property_value)) { |
|
101 |
// ignoring this schema |
|
102 |
continue |
|
103 |
} |
|
104 |
} |
|
105 |
|
|
106 |
nUnites++ |
|
107 |
} |
|
108 |
|
|
109 |
if (nUnites < minimum_schema_size) continue; |
|
110 |
|
|
111 |
nSchemas++; |
|
112 |
} |
|
113 |
|
|
114 |
println "nombre de chaînes de référence d'un texte : $nSchemas" |
|
115 |
|
|
116 |
["result":nSchemas, "data":schemas] |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/DistanceInterMaillonnaireMacro.groovy (revision 1852) | ||
---|---|---|
1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
3 |
// @author mdecorde |
|
4 |
// @author sheiden |
|
5 |
// STANDARD DECLARATIONS |
|
6 |
package org.txm.macro.urs |
|
7 |
|
|
8 |
import org.kohsuke.args4j.* |
|
9 |
import groovy.transform.Field |
|
10 |
import org.txm.* |
|
11 |
import org.txm.rcp.swt.widget.parameters.* |
|
12 |
import org.txm.annotation.urs.* |
|
13 |
import org.txm.searchengine.cqp.corpus.* |
|
14 |
import org.apache.commons.lang.StringUtils; |
|
15 |
|
|
16 |
// BEGINNING OF PARAMETERS |
|
17 |
@Field @Option(name="schema_type",usage="", widget="String", required=true, def="Coréférence") |
|
18 |
String schema_type |
|
19 |
|
|
20 |
@Field @Option(name="minimum_schema_size", usage="", widget="Integer", required=true, def="3") |
|
21 |
int minimum_schema_size |
|
22 |
|
|
23 |
@Field @Option(name="schema_property_name",usage="", widget="String", required=false, def="") |
|
24 |
String schema_property_name |
|
25 |
|
|
26 |
@Field @Option(name="schema_property_value",usage="", widget="String", required=false, def=".*") |
|
27 |
String schema_property_value |
|
28 |
|
|
29 |
@Field @Option(name="unit_type",usage="", widget="String", required=false, def="Maillon") |
|
30 |
String unit_type |
|
31 |
|
|
32 |
@Field @Option(name="unit_property_name", usage="", widget="String", required=false, def="") |
|
33 |
String unit_property_name |
|
34 |
|
|
35 |
@Field @Option(name="unit_property_value", usage="", widget="String", required=false, def=".*") |
|
36 |
String unit_property_value |
|
37 |
|
|
38 |
if (!(corpusViewSelection instanceof MainCorpus)) { |
|
39 |
println "Corpora selection is not a Corpus" |
|
40 |
return; |
|
41 |
} |
|
42 |
|
|
43 |
if (!ParametersDialog.open(this)) return; |
|
44 |
|
|
45 |
MainCorpus corpus = corpusViewSelection |
|
46 |
def analecCorpus = URSCorpora.getCorpus(corpus) |
|
47 |
|
|
48 |
// check Schema parameters |
|
49 |
if (!analecCorpus.getStructure().getSchemas().contains(schema_type)) { |
|
50 |
println "No schema with name=$schema_type" |
|
51 |
return; |
|
52 |
} else { |
|
53 |
if (schema_property_name.length() > 0 && schema_property_value.length() > 0) { |
|
54 |
// test property existance |
|
55 |
def props = analecCorpus.getStructure().getSchemaProperties(schema_type); |
|
56 |
if (!props.contains(schema_property_name)) { |
|
57 |
println "Schema $schema_type has no property named $schema_property_name" |
|
58 |
return; |
|
59 |
} |
|
60 |
} |
|
61 |
} |
|
62 |
|
|
63 |
// check unit parameters |
|
64 |
if (!analecCorpus.getStructure().getUnites().contains(unit_type)) { |
|
65 |
println "No unit with name=$unit_type" |
|
66 |
return; |
|
67 |
} else { |
|
68 |
if (unit_property_name.length() > 0 && unit_property_value.length() > 0) { |
|
69 |
// test property existance |
|
70 |
def props = analecCorpus.getStructure().getUniteProperties(unit_type); |
|
71 |
if (!props.contains(unit_property_name)) { |
|
72 |
println "Unit $unit_type has no property named $unit_property_name" |
|
73 |
return; |
|
74 |
} |
|
75 |
} |
|
76 |
} |
|
77 |
|
|
78 |
def schemas = analecCorpus.getSchemas(schema_type) |
|
79 |
def distances = 0; |
|
80 |
def nDistances = 0 |
|
81 |
for (def schema : schemas) { |
|
82 |
|
|
83 |
if (schema_property_name.length() > 0 && schema_property_value.length() > 0) { |
|
84 |
if (!schema.getProp(schema_property_name).matches(schema_property_value)) { |
|
85 |
// ignoring this schema |
|
86 |
continue |
|
87 |
} |
|
88 |
} |
|
89 |
|
|
90 |
def allUnites = schema.getUnitesSousjacentesNonTriees() |
|
91 |
int nUnites = allUnites.size() |
|
92 |
if (nUnites < minimum_schema_size) continue; |
|
93 |
|
|
94 |
def units = [] |
|
95 |
for (def unit : allUnites) { |
|
96 |
|
|
97 |
if (unit_type.length() > 0) { |
|
98 |
if (!unit.getType().equals(unit_type)) { |
|
99 |
continue |
|
100 |
} |
|
101 |
} |
|
102 |
|
|
103 |
if (unit_property_name.length() > 0 && unit_property_value.length() > 0) { |
|
104 |
if (!unit.getProp(unit_property_name).matches(unit_property_value)) { |
|
105 |
// ignoring this schema |
|
106 |
continue |
|
107 |
} |
|
108 |
} |
|
109 |
|
|
110 |
units << unit |
|
111 |
} |
|
112 |
|
|
113 |
units.sort() { u1, u2 -> u1.getDeb() <=> u2.getDeb()} |
|
114 |
|
|
115 |
for (int i = 0 ; i < units.size() -1 ; i++) { |
|
116 |
distances += units[i+1].getDeb() - units[i].getFin() |
|
117 |
nDistances++ |
|
118 |
} |
|
119 |
} |
|
120 |
|
|
121 |
coef = (distances / nDistances) |
|
122 |
println "distance moyenne inter-mayonnaise : $distances / $nDistances = $coef" |
|
123 |
|
|
124 |
return ["result":coef, "data":["distances":distances, "nDistances":nDistances]] |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/NatureDuPremierMaillonMacro.groovy (revision 1852) | ||
---|---|---|
1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
3 |
// @author mdecorde |
|
4 |
// @author sheiden |
|
5 |
// STANDARD DECLARATIONS |
|
6 |
package org.txm.macro.urs |
|
7 |
|
|
8 |
import org.kohsuke.args4j.* |
|
9 |
import groovy.transform.Field |
|
10 |
import org.txm.* |
|
11 |
import org.txm.rcp.swt.widget.parameters.* |
|
12 |
import org.txm.annotation.urs.* |
|
13 |
import org.txm.searchengine.cqp.corpus.* |
|
14 |
import org.apache.commons.lang.StringUtils; |
|
15 |
|
|
16 |
// BEGINNING OF PARAMETERS |
|
17 |
@Field @Option(name="schema_type",usage="", widget="String", required=true, def="Coréférence") |
|
18 |
String schema_type |
|
19 |
|
|
20 |
@Field @Option(name="minimum_schema_size", usage="", widget="Integer", required=true, def="3") |
|
21 |
int minimum_schema_size |
|
22 |
|
|
23 |
@Field @Option(name="schema_property_name",usage="", widget="String", required=false, def="") |
|
24 |
String schema_property_name |
|
25 |
|
|
26 |
@Field @Option(name="schema_property_value",usage="", widget="String", required=false, def=".*") |
|
27 |
String schema_property_value |
|
28 |
|
|
29 |
@Field @Option(name="unit_type",usage="", widget="String", required=false, def="Maillon") |
|
30 |
String unit_type |
|
31 |
|
|
32 |
@Field @Option(name="unit_property_name", usage="", widget="String", required=false, def="") |
|
33 |
String unit_property_name |
|
34 |
|
|
35 |
@Field @Option(name="unit_property_value", usage="", widget="String", required=false, def=".*") |
|
36 |
String unit_property_value |
|
37 |
|
|
38 |
@Field @Option(name="word_property", usage="", widget="StringArray", metaVar="Catégorie pos fropos frpos", required=false, def="Catégorie") |
|
39 |
String word_property |
|
40 |
|
|
41 |
if (!(corpusViewSelection instanceof MainCorpus)) { |
|
42 |
println "Corpora selection is not a Corpus" |
|
43 |
return; |
|
44 |
} |
|
45 |
|
|
46 |
// Open the parameters input dialog box |
|
47 |
if (!ParametersDialog.open(this)) return; |
|
48 |
// END OF PARAMETERS |
|
49 |
|
|
50 |
MainCorpus corpus = corpusViewSelection |
|
51 |
def analecCorpus = URSCorpora.getCorpus(corpus) |
|
52 |
|
|
53 |
if (!analecCorpus.getStructure().getSchemas().contains(schema_type)) { |
|
54 |
println "No schema with name=$schema_type" |
|
55 |
return; |
|
56 |
} else { |
|
57 |
if (schema_property_name.length() > 0 && schema_property_value.length() > 0) { |
|
58 |
// test property existance |
|
59 |
def props = analecCorpus.getStructure().getSchemaProperties(schema_type); |
|
60 |
if (!props.contains(schema_property_name)) { |
|
61 |
println "Schema $schema_type has no property named $schema_property_name" |
|
62 |
return; |
|
63 |
} |
|
64 |
} |
|
65 |
} |
|
66 |
|
|
67 |
if (!analecCorpus.getStructure().getUnites().contains(unit_type)) { |
|
68 |
println "No unit with name=$unit_type" |
|
69 |
return; |
|
70 |
} else { |
|
71 |
if (unit_property_name.length() > 0 && unit_property_value.length() > 0) { |
|
72 |
// test property existance |
|
73 |
def props = analecCorpus.getStructure().getUniteProperties(unit_type); |
|
74 |
if (!props.contains(unit_property_name)) { |
|
75 |
println "Unit $unit_type has no property named $unit_property_name" |
|
76 |
return; |
|
77 |
} |
|
78 |
} |
|
79 |
} |
|
80 |
|
|
81 |
def CQI = CQPSearchEngine.getCqiClient() |
|
82 |
|
|
83 |
def prop = corpus.getProperty(word_property) |
|
84 |
|
|
85 |
def schemas = analecCorpus.getSchemas(schema_type) |
|
86 |
def freqs = [:] |
|
87 |
|
|
88 |
def distances = 0; |
|
89 |
def nDistances = 0 |
|
90 |
for (def schema : schemas) { |
|
91 |
|
|
92 |
if (schema_property_name.length() > 0 && schema_property_value.length() > 0) { |
|
93 |
if (!schema.getProp(schema_property_name).matches(schema_property_value)) { |
|
94 |
// ignoring this schema |
|
95 |
continue |
|
96 |
} |
|
97 |
} |
|
98 |
|
|
99 |
def allUnites = schema.getUnitesSousjacentesNonTriees() |
|
100 |
int nUnites = allUnites.size() |
|
101 |
if (nUnites < minimum_schema_size) continue; |
|
102 |
|
|
103 |
def units = [] |
|
104 |
for (def unit : allUnites) { |
|
105 |
|
|
106 |
if (unit_type.length() > 0) { |
|
107 |
if (!unit.getType().equals(unit_type)) { |
|
108 |
continue |
|
109 |
} |
|
110 |
} |
|
111 |
|
|
112 |
if (unit_property_name.length() > 0 && unit_property_value.length() > 0) { |
|
113 |
if (!unit.getProp(unit_property_name).matches(unit_property_value)) { |
|
114 |
// ignoring this schema |
|
115 |
continue |
|
116 |
} |
|
117 |
} |
|
118 |
|
|
119 |
units << unit |
|
120 |
} |
|
121 |
|
|
122 |
units.sort() { u1, u2 -> |
|
123 |
return u1.getDeb() - u2.getDeb() |
|
124 |
} |
|
125 |
if (units.size() == 0) continue; |
|
126 |
|
|
127 |
def unit = units[0] |
|
128 |
|
|
129 |
String forme = null; |
|
130 |
if (prop == null) { // word_property is the analec unit property to use |
|
131 |
forme = unit.getProp(word_property) |
|
132 |
} else { |
|
133 |
int[] pos = null; |
|
134 |
if (unit.getDeb() == unit.getFin()) pos = [unit.getDeb()] |
|
135 |
else pos = unit.getDeb()..unit.getFin() |
|
136 |
|
|
137 |
forme = StringUtils.join(CQI.cpos2Str(prop.getQualifiedName(), pos), " ") // ids is enough |
|
138 |
} |
|
139 |
|
|
140 |
if (!freqs.containsKey(forme)) freqs[forme] = 0; |
|
141 |
|
|
142 |
freqs[forme] = freqs[forme] + 1; |
|
143 |
} |
|
144 |
|
|
145 |
println "index des natures de premier maillon :" |
|
146 |
int max = 0; |
|
147 |
def result = ""; |
|
148 |
for (def forme : freqs.keySet().sort() {it -> -freqs[it]}) { |
|
149 |
println "$forme\t"+freqs[forme] |
|
150 |
if (max < freqs[forme]) { |
|
151 |
max = freqs[forme] |
|
152 |
result = "$forme: "+freqs[forme] |
|
153 |
} |
|
154 |
} |
|
155 |
|
|
156 |
["result": result, "data": freqs] |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/LongueurMoyenneMacro.groovy (revision 1852) | ||
---|---|---|
1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
3 |
// @author mdecorde |
|
4 |
// @author sheiden |
|
5 |
|
|
6 |
// STANDARD DECLARATIONS |
|
7 |
package org.txm.macro.urs |
|
8 |
|
|
9 |
import org.kohsuke.args4j.* |
|
10 |
|
|
11 |
import groovy.transform.Field |
|
12 |
|
|
13 |
import org.txm.rcp.swt.widget.parameters.* |
|
14 |
import org.txm.annotation.urs.* |
|
15 |
import org.txm.searchengine.cqp.corpus.* |
|
16 |
import org.txm.statsengine.r.core.RWorkspace; |
|
17 |
import org.txm.Toolbox |
|
18 |
import org.txm.rcp.commands.* |
|
19 |
|
|
20 |
// BEGINNING OF PARAMETERS |
|
21 |
@Field @Option(name="schema_type",usage="", widget="String", required=true, def="Coréférence") |
|
22 |
String schema_type |
|
23 |
|
|
24 |
@Field @Option(name="minimum_schema_size",usage="", widget="Integer", required=true, def="3") |
|
25 |
int minimum_schema_size |
|
26 |
|
|
27 |
@Field @Option(name="schema_property_name",usage="", widget="String", required=false, def="") |
|
28 |
String schema_property_name |
|
29 |
|
|
30 |
@Field @Option(name="schema_property_value",usage="", widget="String", required=false, def=".*") |
|
31 |
String schema_property_value |
|
32 |
|
|
33 |
@Field @Option(name="unit_type",usage="", widget="String", required=false, def="Maillon") |
|
34 |
String unit_type |
|
35 |
|
|
36 |
@Field @Option(name="unit_property_name", usage="", widget="String", required=false, def="") |
|
37 |
String unit_property_name |
|
38 |
|
|
39 |
@Field @Option(name="unit_property_value", usage="", widget="String", required=false, def=".*") |
|
40 |
String unit_property_value |
|
41 |
|
|
42 |
if (!(corpusViewSelection instanceof MainCorpus)) { |
|
43 |
println "Corpora selection is not a Corpus" |
|
44 |
return; |
|
45 |
} |
|
46 |
|
|
47 |
if (!ParametersDialog.open(this)) return; |
|
48 |
// END OF PARAMETERS |
|
49 |
|
|
50 |
MainCorpus corpus = corpusViewSelection |
|
51 |
def analecCorpus = URSCorpora.getCorpus(corpus) |
|
52 |
|
|
53 |
// check Schema parameters |
|
54 |
if (!analecCorpus.getStructure().getSchemas().contains(schema_type)) { |
|
55 |
println "No schema with name=$schema_type" |
|
56 |
return; |
|
57 |
} else { |
|
58 |
if (schema_property_name.length() > 0 && schema_property_value.length() > 0) { |
|
59 |
// test property existance |
|
60 |
def props = analecCorpus.getStructure().getSchemaProperties(schema_type); |
|
61 |
if (!props.contains(schema_property_name)) { |
|
62 |
println "Schema $schema_type has no property named $schema_property_name" |
|
63 |
return; |
|
64 |
} |
|
65 |
} |
|
66 |
} |
|
67 |
|
|
68 |
// check unit parameters |
|
69 |
if (!analecCorpus.getStructure().getUnites().contains(unit_type)) { |
|
70 |
println "No unit with name=$unit_type" |
|
71 |
return; |
|
72 |
} else { |
|
73 |
if (unit_property_name.length() > 0 && unit_property_value.length() > 0) { |
|
74 |
// test property existance |
|
75 |
def props = analecCorpus.getStructure().getUniteProperties(unit_type); |
|
76 |
if (!props.contains(unit_property_name)) { |
|
77 |
println "Unit $unit_type has no property named $unit_property_name" |
|
78 |
return; |
|
79 |
} |
|
80 |
} |
|
81 |
} |
|
82 |
|
|
83 |
def schemas = analecCorpus.getSchemas(schema_type) |
|
84 |
|
|
85 |
int nSchemas = 0; |
|
86 |
|
|
87 |
def lens = [:] |
|
88 |
for (def schema : schemas) { |
|
89 |
|
|
90 |
if (schema_property_name.length() > 0 && schema_property_value.length() > 0) { |
|
91 |
if (!schema.getProp(schema_property_name).matches(schema_property_value)) { |
|
92 |
// ignoring this schema |
|
93 |
continue |
|
94 |
} |
|
95 |
} |
|
96 |
|
|
97 |
int nUnites = 0; |
|
98 |
for (def unit : schema.getUnitesSousjacentesNonTriees()) { |
|
99 |
if (unit_type.length() > 0) { |
|
100 |
if (!unit.getType().equals(unit_type)) { |
|
101 |
continue |
|
102 |
} |
|
103 |
} |
|
104 |
|
|
105 |
if (unit_property_name.length() > 0 && unit_property_value.length() > 0) { |
|
106 |
if (!unit.getProp(unit_property_name).matches(unit_property_value)) { |
|
107 |
// ignoring this schema |
|
108 |
continue |
|
109 |
} |
|
110 |
} |
|
111 |
|
|
112 |
nUnites++ |
|
113 |
} |
|
114 |
|
|
115 |
if (nUnites < minimum_schema_size) continue; |
|
116 |
|
|
117 |
if (!lens.containsKey(nUnites)) lens[nUnites] = 0; |
|
118 |
|
|
119 |
lens[nUnites] = lens[nUnites] + 1; |
|
120 |
nSchemas++; |
|
121 |
} |
|
122 |
|
|
123 |
//println "nSchemas=$nSchemas" |
|
124 |
def freqs = lens.keySet(); |
|
125 |
freqs.sort(); |
|
126 |
int t = 0; |
|
127 |
int n = 0; |
|
128 |
//println "Fréquences ("+freqs.size()+")" |
|
129 |
for (def f : freqs) { |
|
130 |
t += f * lens[f] |
|
131 |
n += lens[f] |
|
132 |
} |
|
133 |
|
|
134 |
coef = (t/n) |
|
135 |
def slens = lens.sort { a, b -> -a.value <=> -b.value ?: -a.key <=> -b.key } |
|
136 |
def flens = [] |
|
137 |
slens.each { key, value -> value.times { flens << key } } |
|
138 |
def nbins = flens.size()*2 |
|
139 |
|
|
140 |
def cfreq = 0 |
|
141 |
println "longueur moyenne des chaînes de référence : $t/$n = "+coef |
|
142 |
println "index hiérarchique des longueurs de chaînes :\nlen\tfreq\tcfreq" |
|
143 |
slens.each { println it.key+" "+it.value+" "+(cfreq+=it.value) } |
|
144 |
|
|
145 |
def slens2 = slens.sort { a, b -> -a.key <=> -b.key } |
|
146 |
|
|
147 |
def r = RWorkspace.getRWorkspaceInstance() |
|
148 |
|
|
149 |
r.addVectorToWorkspace("len", slens2.keySet() as int[]) |
|
150 |
r.addVectorToWorkspace("freq", slens2.values() as int[]) |
|
151 |
r.addVectorToWorkspace("flen", flens as int[]) |
|
152 |
|
|
153 |
def corpusName = corpus.getName() |
|
154 |
|
|
155 |
def file = File.createTempFile("txm", ".svg", new File(Toolbox.getTxmHomePath(), "results")) |
|
156 |
println "SVG file: "+file.getAbsolutePath() |
|
157 |
/// BEGINNING OF R SCRIPT |
|
158 |
def script =""" |
|
159 |
hist(flen, xaxt='n', col="gray", xlab="Length", breaks=$nbins, main="$corpusName Longueur des chaînes ($nbins bins)") |
|
160 |
axis(side=1, at=len) |
|
161 |
""" |
|
162 |
/// END OF R SCRIPT |
|
163 |
|
|
164 |
// execute R script |
|
165 |
r.plot(file, script) |
|
166 |
|
|
167 |
//display the SVG results graphic |
|
168 |
monitor.syncExec(new Runnable() { |
|
169 |
@Override |
|
170 |
public void run() { OpenSVGGraph.OpenSVGFile(file.getAbsolutePath(), corpusName+" Longueur des chaînes") } |
|
171 |
}) |
|
172 |
|
|
173 |
return ["result":coef, "data":lens] |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/UnitTypesNotInSchemaMacro.groovy (revision 1852) | ||
---|---|---|
1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
3 |
// @author mdecorde |
|
4 |
// @author sheiden |
|
5 |
// STANDARD DECLARATIONS |
|
6 |
package org.txm.macro.urs |
|
7 |
|
|
8 |
import org.kohsuke.args4j.* |
|
9 |
import groovy.transform.Field |
|
10 |
import org.txm.rcp.swt.widget.parameters.* |
|
11 |
import org.txm.annotation.urs.* |
|
12 |
import org.txm.searchengine.cqp.corpus.* |
|
13 |
|
|
14 |
if (!(corpusViewSelection instanceof MainCorpus)) { |
|
15 |
println "Corpora selection is not a Corpus" |
|
16 |
return; |
|
17 |
} |
|
18 |
|
|
19 |
// BEGINNING OF PARAMETERS |
|
20 |
@Field @Option(name="schema_type",usage="", widget="String", required=true, def="Coréférence") |
|
21 |
String schema_type |
|
22 |
if (!ParametersDialog.open(this)) return; |
|
23 |
|
|
24 |
MainCorpus corpus = corpusViewSelection |
|
25 |
def analecCorpus = URSCorpora.getCorpus(corpus); |
|
26 |
|
|
27 |
def unitesInSchema = new HashSet() |
|
28 |
for (def schema : analecCorpus.getSchemas(schema_type)) { |
|
29 |
unitesInSchema.addAll(schema.getUnitesSousjacentes()) |
|
30 |
} |
|
31 |
println "unites: "+analecCorpus.getToutesUnites().size() |
|
32 |
println "unites in schema: "+unitesInSchema.size() |
|
33 |
|
|
34 |
def set = new HashMap() |
|
35 |
for (def u : analecCorpus.getToutesUnites()) { |
|
36 |
if (unitesInSchema.contains(u)) continue; |
|
37 |
|
|
38 |
if (!set.containsKey(u.getType())) set[u.getType()] = 0; |
|
39 |
set[u.getType()] = set[u.getType()] +1 |
|
40 |
} |
|
41 |
|
|
42 |
println "unites not in schema: "+set.sort() { it -> set[it]} |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/Frpos2CategorieMacro.groovy (revision 1852) | ||
---|---|---|
1 |
// @author Bruno Oberlé (2017-04-01 21:50) |
|
2 |
|
|
3 |
/* |
|
4 |
Définit la catégorie grammaticale du maillon d'après le champ `frpos' |
|
5 |
(tagset de TreeTagger). Le script est adapté de |
|
6 |
http://svn.code.sf.net/p/txm/code/trunk/plugins/Analec/AnalecRCP/src/org/txm/macro/analec/Fropos2CategorieMacro.groovy. |
|
7 |
|
|
8 |
Voici la liste des catégories grammaticales retenues (manuel d'annotation de |
|
9 |
Democrat): |
|
10 |
- GN: Groupe Nominal (le petit chat, le chat, le même, ce chat etc.) |
|
11 |
- POSS: Possessif (mon, ton son, ma, ta, sa, mes, tes, ses, notre, votre, |
|
12 |
leur, nos, vos, leurs) |
|
13 |
- PR: Pronom (moi, toi, lui, elle, nous, vous, eux, elles, le tien, le mien, |
|
14 |
moi-même etc.) |
|
15 |
- PR_CL_O: Pronom Clitique Objet (me, te, le, la, les, lui, leur, y, en) |
|
16 |
- PR_CL_R: Pronom Clitique Réfléchi |
|
17 |
- PR_CL_S: Pronom Clitique Sujet (je, tu, il, elle, on, nous, vous, ils, |
|
18 |
elles) |
|
19 |
- PR_REL: Pronom Relatif (qui, que, quoi, dont, où, lequel, quiconque etc.) |
|
20 |
- PR_WH: Pronom Interrogatif (qui, que, quoi, lequel etc.) |
|
21 |
|
|
22 |
Le script ne peut pas désambiguïser les pronoms clitiques de même forme |
|
23 |
(`nous' est-il un sujet, un objet ou un réfléchi?). Dans ce cas, le script |
|
24 |
opte pour le sujet (ou pour l'objet si l'ambiguïté n'est que entre objet et |
|
25 |
réfléchi). |
|
26 |
|
|
27 |
Quand il n'y a aucune information disponible (erreurs de l'étiqueteur), la |
|
28 |
valeur est UNDEFINED. |
|
29 |
|
|
30 |
L'algorithme est décrit ici: |
|
31 |
https://groupes.renater.fr/wiki/democrat/prive/txm_annotation_exploitation |
|
32 |
|
|
33 |
*/ |
|
34 |
|
|
35 |
package org.txm.macro.urs |
|
36 |
|
|
37 |
import org.apache.commons.lang.* |
|
38 |
import org.kohsuke.args4j.* |
|
39 |
import groovy.transform.* |
|
40 |
import org.txm.* |
|
41 |
import org.txm.rcp.swt.widget.parameters.* |
|
42 |
import org.txm.annotation.urs.* |
|
43 |
import org.txm.searchengine.cqp.* |
|
44 |
import org.txm.searchengine.cqp.corpus.* |
|
45 |
import visuAnalec.Message.* |
|
46 |
import visuAnalec.donnees.* |
|
47 |
import visuAnalec.elements.* |
|
48 |
import visuAnalec.vue.* |
|
49 |
|
|
50 |
def testClitic(def position, def frpos) { |
|
51 |
|
|
52 |
// je me sers de la forme, parce qu'il est difficile de savoir quel est le |
|
53 |
// lemme de "elle" ("il"?), de "te" ("tu"?) ou encore de "leur" |
|
54 |
def form = CQI.cpos2Str(word.getQualifiedName(), position)[0].toLowerCase() |
|
55 |
if ( form == "je" || form == "j'" |
|
56 |
|| form == "tu" || form == "t'" |
|
57 |
|| form == "il" |
|
58 |
|| form == "elle" |
|
59 |
|| form == "on" |
|
60 |
|| form == "vous" |
|
61 |
|| form == "nous" |
|
62 |
|| form == "ils" |
|
63 |
|| form == "elles" ) { |
|
64 |
return "PR_CL_S" |
|
65 |
} else if (form == "me" || form == "m'" |
|
66 |
|| form == "te" |
|
67 |
|| form == "le" || form == "l'" |
|
68 |
|| form == "la" |
|
69 |
|| form == "lui" |
|
70 |
|| form == "leur" |
|
71 |
|| form == "les" ) { |
|
72 |
return "PR_CL_O" |
|
73 |
} else if (form == "se" || form == "s'") { |
|
74 |
return "PR_CL_R" |
|
75 |
} |
|
76 |
return null |
|
77 |
|
|
78 |
} |
|
79 |
|
|
80 |
def testPhrase(def positions, def Mention) { |
|
81 |
|
|
82 |
// on doit regarder ce qui apparaît en premier: |
|
83 |
// - ce peut être un nom, comme dans `le petit chat que j'ai adopté' |
|
84 |
// - ce peut être un pronom relatif, comme dans `(le livre) dans lequel |
|
85 |
// j'ai lu cette histoire...' |
|
86 |
// NOTE: dans Democrat, on n'annote pas, bizarrement, la relative dans le |
|
87 |
// maillon, donc, dans un GN on n'a jamais de relatif inclus. On aura donc |
|
88 |
// toujours `[le petit chat] [que] [j']ai adopté'. Mais tout le monde |
|
89 |
// n'annote pas de la sorte... |
|
90 |
for (def i=0; i<Mention.length; i++) { |
|
91 |
def mention = Mention[i] |
|
92 |
//def form = CQI.cpos2Str(word.getQualifiedName(), positions[i])[0] |
|
93 |
if (mention == "NOM" || mention == "NAM") { |
|
94 |
return "GN" |
|
95 |
} else if (mention == "PRO:REL") { |
|
96 |
return "PR_REL" |
|
97 |
} |
|
98 |
} |
|
99 |
|
|
100 |
return null |
|
101 |
|
|
102 |
} |
|
103 |
|
|
104 |
def testWhPronoun(position, mention) { |
|
105 |
def form = CQI.cpos2Str(word.getQualifiedName(), position)[0] |
|
106 |
if (mention == "PRO" && (form == "qui" || form == "que" || form == "lequel")) { |
|
107 |
return "PR_WH" |
|
108 |
} |
|
109 |
return null |
|
110 |
|
|
111 |
} |
|
112 |
|
|
113 |
def testRules(def positions, def Mention) { |
|
114 |
def catégorie = null |
|
115 |
|
|
116 |
// a possessive (mon, ma...) |
|
117 |
if (Mention.length == 1 && Mention.contains("DET:POS")) |
|
118 |
catégorie = "POSS" |
|
119 |
|
|
120 |
// a clitic (subject: je, tu...; object: me, te; reflexive: se) |
|
121 |
if (!catégorie && Mention.length == 1 && Mention.contains("PRO:PER")) |
|
122 |
catégorie = testClitic(positions[0], Mention[0]) |
|
123 |
|
|
124 |
// an interrogative pronoun |
|
125 |
if (!catégorie && Mention.length == 1) |
|
126 |
catégorie = testWhPronoun(positions[0], Mention[0]) |
|
127 |
|
|
128 |
// a noun phrase or a relative pronoun |
|
129 |
if (!catégorie) |
|
130 |
catégorie = testPhrase(positions, Mention) |
|
131 |
|
|
132 |
// some other kind of pronouns |
|
133 |
if (!catégorie |
|
134 |
&& ( Mention.contains("PRO") |
|
135 |
|| Mention.contains("PRO:POSS") |
|
136 |
|| Mention.contains("PRO:IND") |
|
137 |
|| Mention.contains("PRO:DEM") |
|
138 |
|| Mention.contains("PRO:PER") ) |
|
139 |
&& !Mention.contains("NOM") |
|
140 |
&& !Mention.contains("NAM") ) |
|
141 |
catégorie = "PRO" |
|
142 |
|
|
143 |
// Fin des règles, aucune n'a matchée. On stocke le pattern qu'on affichera à la fin. |
|
144 |
if (!catégorie) { |
|
145 |
catégorie = "UNDEFINED" // clear the field |
|
146 |
def forms = CQI.cpos2Str(word.getQualifiedName(), positions) |
|
147 |
if (!errors.containsKey(Mention)) errors[Mention] = new HashSet() |
|
148 |
errors[Mention] << forms |
|
149 |
} |
|
150 |
|
|
151 |
return catégorie |
|
152 |
} |
|
153 |
|
|
154 |
// |
|
155 |
// FIN DE LA DÉFINITION DES RÈGLES |
|
156 |
// |
|
157 |
|
|
158 |
// CORPS DU SCRIPT |
|
159 |
|
|
160 |
if (!(corpusViewSelection instanceof MainCorpus)) { |
|
161 |
println "Corpora selection is not a Corpus" |
|
162 |
return |
|
163 |
} |
|
164 |
|
|
165 |
// BEGINNING OF PARAMETERS |
|
166 |
@Field @Option(name="unit_type", usage="", widget="String", required=true, def="MENTION") |
|
167 |
def unit_type |
|
168 |
@Field @Option(name="pos_property_name", usage="", widget="String", required=true, def="pos") |
|
169 |
def pos_property_name |
|
170 |
@Field @Option(name="reset", usage="", widget="Boolean", required=true, def="true") |
|
171 |
def reset |
|
172 |
if (!ParametersDialog.open(this)) return |
|
173 |
|
|
174 |
corpus = corpusViewSelection |
|
175 |
CQI = CQPSearchEngine.getCqiClient() |
|
176 |
word = corpus.getWordProperty() |
|
177 |
posProperty = corpus.getProperty(pos_property_name) |
|
178 |
if (posProperty == null) { |
|
179 |
println "Error: CQP corpus does not contains the word property with name=$pos_property_name" |
|
180 |
return |
|
181 |
} |
|
182 |
analecCorpus = URSCorpora.getCorpus(corpus) |
|
183 |
vue = URSCorpora.getVue(corpus.getName()) |
|
184 |
structure = analecCorpus.getStructure() |
|
185 |
if (!structure.getUnites().contains(unit_type)) { // check if the structure contains the unit_type units |
|
186 |
println "Error: corpus structure does not contains unit with name=$unit_type" |
|
187 |
return |
|
188 |
} |
|
189 |
|
|
190 |
CATEGORIE = "CATEGORIE" |
|
191 |
// Si la structure d'annotation ne contient pas CATEGORIE, on la crée avec ses valeurs |
|
192 |
if (!structure.getUniteProperties(unit_type).contains(CATEGORIE)) { |
|
193 |
|
|
194 |
// FIXME: dans le script original (see also |
|
195 |
// http://forge.cbp.ens-lyon.fr/redmine/issues/2065), on utilise |
|
196 |
// analecCorpus.ajouterProp/Val, mais cela ne marche pas dans ma version de |
|
197 |
// TXM-Analec --> je retourne donc à structure.ajouterProp/Val |
|
198 |
|
|
199 |
// la propriété |
|
200 |
structure.ajouterProp(Unite.class, unit_type, CATEGORIE) |
|
201 |
// les valeurs |
|
202 |
structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "GN") |
|
203 |
structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "POSS") |
|
204 |
structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "PRO") |
|
205 |
structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "PR_CL_O") |
|
206 |
structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "PR_CL_S") |
|
207 |
structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "PR_CL_R") |
|
208 |
structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "PR_REL") |
|
209 |
structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "PR_WH") |
|
210 |
//... |
|
211 |
} |
|
212 |
|
|
213 |
def nModified = 0 |
|
214 |
def nIgnored = 0 |
|
215 |
|
|
216 |
errors = new HashMap() |
|
217 |
def units = analecCorpus.getUnites(unit_type) |
|
218 |
units.sort() { a, b -> a.getDeb() <=> b.getDeb() ?: a.getFin() <=> b.getFin() } |
|
219 |
for (Unite unit : units) { // process all units |
|
220 |
|
|
221 |
def prop = unit.getProp(CATEGORIE) |
|
222 |
if (!reset && prop != null && prop.length() > 0) continue // l'unité a déjà une CATEGORIE |
|
223 |
|
|
224 |
int[] positions = null |
|
225 |
if (unit.getDeb() == unit.getFin()) positions = [unit.getDeb()] |
|
226 |
else positions = (unit.getDeb()..unit.getFin()) |
|
227 |
|
|
228 |
def Mention = CQI.cpos2Str(posProperty.getQualifiedName(), positions) |
|
229 |
def cat = testRules(positions, Mention) |
|
230 |
|
|
231 |
if (cat != null) { |
|
232 |
// following line in the original script but doesn't work for me: |
|
233 |
// vue.setValeurChamp(unit, CATEGORIE, cat) |
|
234 |
unit.getProps().put(CATEGORIE, cat) |
|
235 |
nModified++ |
|
236 |
} else { |
|
237 |
nIgnored++ |
|
238 |
} |
|
239 |
} |
|
240 |
|
|
241 |
println "Result:" |
|
242 |
println "- $nModified units of type $unit_type have been modified." |
|
243 |
println "- $nIgnored units of type $unit_type have not been modified.\n" |
|
244 |
|
|
245 |
if (errors.size() > 0) { |
|
246 |
println "Some rules should be added to this macro to process the following remaining 'FROPOS / words' values:" |
|
247 |
errors.keySet().each { println "fropos="+it+"\twords="+errors[it].join(" | ") } |
|
248 |
} |
|
249 |
|
|
250 |
// udpate the view (also see also |
|
251 |
// http://forge.cbp.ens-lyon.fr/redmine/issues/2065) |
|
252 |
URSCorpora.getVue(analecCorpus).retablirVueParDefaut() |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/CreationChainesMacro.groovy (revision 1852) | ||
---|---|---|
1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
3 |
// @author mdecorde |
|
4 |
// STANDARD DECLARATIONS |
|
5 |
package org.txm.macro.urs |
|
6 |
|
|
7 |
import org.kohsuke.args4j.* |
|
8 |
import groovy.transform.Field |
|
9 |
import org.txm.rcp.swt.widget.parameters.* |
|
10 |
import org.txm.annotation.urs.* |
|
11 |
import org.txm.searchengine.cqp.corpus.* |
|
12 |
import visuAnalec.donnees.Structure; |
|
13 |
import visuAnalec.elements.Schema |
|
14 |
import visuAnalec.elements.Unite; |
|
15 |
import visuAnalec.vue.Vue |
|
16 |
|
|
17 |
if (!(corpusViewSelection instanceof MainCorpus)) { |
|
18 |
println "Corpora selection is not a Corpus" |
|
19 |
return; |
|
20 |
} |
|
21 |
|
|
22 |
// BEGINNING OF PARAMETERS |
|
23 |
@Field @Option(name="unit_type",usage="", widget="String", required=true, def="MENTION") |
|
24 |
String unit_type |
|
25 |
if (!ParametersDialog.open(this)) return; |
|
26 |
|
|
27 |
int nCreated = 0 // count the number of created CHAINE |
|
28 |
int nUpdated = 0 // count the number of updated CHAINE |
|
29 |
|
|
30 |
MainCorpus corpus = corpusViewSelection |
|
31 |
def analecCorpus = URSCorpora.getCorpus(corpus); // analec corpus has the same name has the TXM corpus |
|
32 |
Structure structure = analecCorpus.getStructure() |
|
33 |
if (!structure.getUnites().contains(unit_type)) { // check if the structure contains the unit_type units |
|
34 |
println "Error: corpus structure does not contains unit with name=$unit_type" |
|
35 |
return |
|
36 |
} |
|
37 |
if (!structure.getSchemas().contains("CHAINE")) { // update the structure if needed |
|
38 |
println "Creating the 'CHAINE' schema in the structure" |
|
39 |
analecCorpus.ajouterType(Schema.class, "CHAINE") |
|
40 |
analecCorpus.ajouterProp(Schema.class, "CHAINE", "REF") |
|
41 |
analecCorpus.ajouterProp(Schema.class, "CHAINE", "GENRE") |
|
42 |
analecCorpus.ajouterVal(Schema.class, "CHAINE", "GENRE", "INDETERMINABLE") |
|
43 |
analecCorpus.ajouterVal(Schema.class, "CHAINE", "GENRE", "FEMININ") |
|
44 |
analecCorpus.ajouterVal(Schema.class, "CHAINE", "GENRE", "MASCULIN") |
|
45 |
analecCorpus.ajouterProp(Schema.class, "CHAINE", "NOMBRE") |
|
46 |
analecCorpus.ajouterVal(Schema.class, "CHAINE", "NOMBRE", "GROUPE_FLOU") |
|
47 |
analecCorpus.ajouterVal(Schema.class, "CHAINE", "NOMBRE", "GROUPE_STRICT") |
|
48 |
analecCorpus.ajouterVal(Schema.class, "CHAINE", "NOMBRE", "SINGULIER") |
|
49 |
analecCorpus.ajouterProp(Schema.class, "CHAINE", "NB MAILLONS") |
|
50 |
analecCorpus.ajouterProp(Schema.class, "CHAINE", "TYPE REFERENT") |
|
51 |
analecCorpus.ajouterVal(Schema.class, "CHAINE", "TYPE REFERENT", "UNKNOWN") |
|
52 |
analecCorpus.ajouterVal(Schema.class, "CHAINE", "TYPE REFERENT", "CONCRET_OBJECT") |
|
53 |
analecCorpus.ajouterVal(Schema.class, "CHAINE", "TYPE REFERENT", "ABSTRACT_OBJECT") |
|
54 |
analecCorpus.ajouterVal(Schema.class, "CHAINE", "TYPE REFERENT", "TIME") |
|
55 |
analecCorpus.ajouterVal(Schema.class, "CHAINE", "TYPE REFERENT", "PRODUCT") |
|
56 |
analecCorpus.ajouterVal(Schema.class, "CHAINE", "TYPE REFERENT", "AMOUNT") |
|
57 |
analecCorpus.ajouterVal(Schema.class, "CHAINE", "TYPE REFERENT", "EVENT") |
|
58 |
analecCorpus.ajouterVal(Schema.class, "CHAINE", "TYPE REFERENT", "GPE") |
|
59 |
analecCorpus.ajouterVal(Schema.class, "CHAINE", "TYPE REFERENT", "ORG") |
|
60 |
analecCorpus.ajouterVal(Schema.class, "CHAINE", "TYPE REFERENT", "PERSON") |
|
61 |
analecCorpus.ajouterVal(Schema.class, "CHAINE", "TYPE REFERENT", "LIEU") |
|
62 |
} |
|
63 |
|
|
64 |
def props = structure.getUniteProperties(unit_type) |
|
65 |
String REF = "REF" |
|
66 |
if (!props.contains(REF)) { // check the unit_type units have the REF property |
|
67 |
println "Error: $unit_type units have no proprerty named 'REF'" |
|
68 |
return |
|
69 |
} |
|
70 |
|
|
71 |
// parse the units to build CHAINES |
|
72 |
def chaines = [:] |
|
73 |
def units = analecCorpus.getUnites(unit_type) |
|
74 |
units.sort() { a, b -> a.getDeb() <=> b.getDeb() ?: a.getFin() <=> b.getFin() } |
|
75 |
for (Unite unit : units) { |
|
76 |
def ref = unit.getProp(REF) |
|
77 |
if (!chaines.containsKey(ref)) { |
|
78 |
chaines[ref] = [] |
|
79 |
} |
|
80 |
chaines[ref] << unit |
|
81 |
} |
|
82 |
|
|
83 |
// update the already existing CHAINES schemas |
|
84 |
for (Schema schema : analecCorpus.getSchemas("CHAINE")) { |
|
85 |
String ref = schema.getProp(REF) |
|
86 |
if (chaines.containsKey(ref)) { // the CHAINE exists |
|
87 |
// maj des unités de la chaine existante |
|
88 |
int size_before = schema.getContenu().size() |
|
89 |
for (def unit : chaines[ref]) schema.ajouter(unit) // insert the new units in the hashset |
|
90 |
|
|
91 |
// Update the CHAINE size |
|
92 |
schema.props.put("NB MAILLONS", Integer.toString(schema.contenu.size())) |
|
93 |
|
|
94 |
// remove the inserted CHAINE from 'chaines' |
|
95 |
chaines.remove(ref) |
|
96 |
if (size_before < schema.getContenu().size()) // if the size changed, then the CHAIEN have been updated |
|
97 |
nUpdated++ |
|
98 |
} |
|
99 |
} |
|
100 |
|
|
101 |
// create the remaining CHAINES schemas |
|
102 |
for (def ref : chaines.keySet()) { // process the remaining CHAINE of 'chaines' |
|
103 |
nCreated++; |
|
104 |
Schema schema = new Schema() |
|
105 |
schema.type = "CHAINE" |
|
106 |
schema.props.put("REF", ref) |
|
107 |
schema.props.put("GENRE", "") // set default values |
|
108 |
schema.props.put("NOMBRE", "") // set default values |
|
109 |
schema.props.put("NB MAILLONS", Integer.toString(chaines[ref].size())) |
|
110 |
schema.props.put("TYPE REFERENT", "") // set default values |
|
111 |
|
|
112 |
for (def unit : chaines[ref]) schema.ajouter(unit) // insert the new units in the hashset |
|
113 |
|
|
114 |
analecCorpus.addSchemaLu(schema) // add the new schema |
|
115 |
} |
|
116 |
|
|
117 |
println "nUpdated=$nUpdated" |
|
118 |
println "nCreated=$nCreated" |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/ResetAnnotationsMacro.groovy (revision 1852) | ||
---|---|---|
1 |
// STANDARD DECLARATIONS |
|
2 |
package org.txm.macro.urs |
|
3 |
|
|
4 |
import org.kohsuke.args4j.* |
|
5 |
import groovy.transform.Field |
|
6 |
import org.txm.rcp.swt.widget.parameters.* |
|
7 |
import org.txm.annotation.urs.* |
|
8 |
import org.txm.searchengine.cqp.corpus.* |
|
9 |
import visuAnalec.elements.* |
|
10 |
|
|
11 |
if (!(corpusViewSelection instanceof MainCorpus)) { |
|
12 |
println "Corpora selection is not a Corpus" |
|
13 |
return; |
|
14 |
} |
|
15 |
|
|
16 |
@Field @Option(name="I_AM_SURE_IWANT_TO_RESET_THE_ANNOTATIONS", usage="an example boolean", widget="Boolean", required=false, def="false") |
|
17 |
def I_AM_SURE_IWANT_TO_RESET_THE_ANNOTATIONS |
|
18 |
|
|
19 |
// Open the parameters input dialog box |
|
20 |
if (!ParametersDialog.open(this)) return; |
|
21 |
|
|
22 |
MainCorpus corpus = corpusViewSelection |
|
23 |
def analecCorpus = URSCorpora.getCorpus(corpus); |
|
24 |
def structure = analecCorpus.getStructure() |
|
25 |
|
|
26 |
Class[] classes = [Unite.class, Relation.class, Schema.class] |
|
27 |
println "Removing unites..." |
|
28 |
for (String type : structure.getTypes(Unite.class)) { |
|
29 |
for (Unite unite : analecCorpus.getUnites(type).toArray(new Unite[0])) { |
|
30 |
analecCorpus.supUnite(unite) |
|
31 |
} |
|
32 |
// fireMessage(new StructureEvent(analecCorpus, TypeModifStructure.SUPPR_TYPE, Unite.class, type)); |
|
33 |
} |
|
34 |
|
|
35 |
println "Removing relations..." |
|
36 |
for (String type : structure.getTypes(Relation.class)) { |
|
37 |
for (Relation relation : analecCorpus.getRelations(type).toArray(new Relation[0])) { |
|
38 |
analecCorpus.supRelation(relation) |
|
39 |
} |
|
40 |
// fireMessage(new StructureEvent(analecCorpus, TypeModifStructure.SUPPR_TYPE, Relation.class, type)); |
|
41 |
} |
|
42 |
|
|
43 |
println "Removing schemas..." |
|
44 |
for (String type : structure.getTypes(Schema.class)) { |
|
45 |
for (Schema schema : analecCorpus.getSchemas(type).toArray(new Schema[0])) { |
|
46 |
analecCorpus.supSchema(schema) |
|
47 |
} |
|
48 |
// fireMessage(new StructureEvent(analecCorpus, TypeModifStructure.SUPPR_TYPE, Schema.class, type)); |
|
49 |
} |
|
50 |
|
|
51 |
println "Done. Save the corpus to finish the reset." |
|
52 |
|
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/RelationsMacro.groovy (revision 1852) | ||
---|---|---|
1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
3 |
// @author mdecorde |
|
4 |
// @author sheiden |
|
5 |
// STANDARD DECLARATIONS |
|
6 |
package org.txm.macro.urs |
|
7 |
|
|
8 |
import org.apache.commons.lang.StringUtils; |
|
9 |
import org.apache.tools.ant.types.resources.selectors.InstanceOf; |
|
10 |
import org.kohsuke.args4j.* |
|
11 |
|
|
12 |
import groovy.transform.Field |
|
13 |
|
|
14 |
import org.txm.Toolbox; |
|
15 |
import org.txm.rcp.swt.widget.parameters.* |
|
16 |
import org.txm.annotation.urs.* |
|
17 |
import org.txm.searchengine.cqp.AbstractCqiClient; |
|
18 |
import org.txm.searchengine.cqp.corpus.* |
|
19 |
|
|
20 |
import visuAnalec.donnees.Structure; |
|
21 |
import visuAnalec.elements.Relation |
|
22 |
import visuAnalec.elements.Unite; |
|
23 |
|
|
24 |
if (!(corpusViewSelection instanceof MainCorpus)) { |
|
25 |
println "Corpora selection is not a Corpus" |
|
26 |
return; |
|
27 |
} |
|
28 |
|
|
29 |
// BEGINNING OF PARAMETERS |
|
30 |
@Field @Option(name="relation_type",usage="", widget="String", required=true, def="ANAPHORE") |
|
31 |
String relation_type |
|
32 |
|
|
33 |
if (!ParametersDialog.open(this)) return; |
|
34 |
|
|
35 |
MainCorpus corpus = corpusViewSelection |
|
36 |
AbstractCqiClient CQI = CQPSearchEngine.getCqiClient(); |
|
37 |
def word = corpus.getWordProperty() |
|
38 |
def analecCorpus = URSCorpora.getCorpus(corpus); |
|
39 |
|
|
40 |
int n = 1; |
|
41 |
def relations = analecCorpus.getRelations(relation_type) |
|
42 |
for (Relation relation : relations) { |
|
43 |
|
|
44 |
def unit1 = relation.getElt1(); |
|
45 |
def unit2 = relation.getElt2(); |
|
46 |
def props = relation.getProps() |
|
47 |
|
|
48 |
if (unit1 instanceof Unite && unit2 instanceof Unite) { |
|
49 |
int[] pos1 = null |
|
50 |
if (unit1.getDeb() == unit1.getFin()) pos1 = [unit1.getDeb()] |
|
51 |
else pos1 = (unit1.getDeb()..unit1.getFin()) |
|
52 |
def form1 = StringUtils.join(CQI.cpos2Str(word.getQualifiedName(), pos1), " ") |
|
53 |
|
|
54 |
int[] pos2 = null |
|
55 |
if (unit2.getDeb() == unit2.getFin()) pos2 = [unit2.getDeb()] |
|
56 |
else pos2 = (unit2.getDeb()..unit2.getFin()) |
|
57 |
def form2 = StringUtils.join(CQI.cpos2Str(word.getQualifiedName(), pos2), " ") |
|
58 |
|
|
59 |
println "$n - $props : $form1 -> $form2" |
|
60 |
} else { |
|
61 |
println "$n - $props" |
|
62 |
} |
|
63 |
n++ |
|
64 |
} |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/DensiteReferentielleMacro.groovy (revision 1852) | ||
---|---|---|
1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
3 |
// @author mdecorde |
|
4 |
// @author sheiden |
|
5 |
// STANDARD DECLARATIONS |
|
6 |
package org.txm.macro.urs |
|
7 |
|
|
8 |
import org.kohsuke.args4j.* |
|
9 |
import groovy.transform.Field |
|
10 |
import org.txm.rcp.swt.widget.parameters.* |
|
11 |
import org.txm.annotation.urs.* |
|
12 |
import org.txm.searchengine.cqp.corpus.* |
|
13 |
|
|
14 |
if (!(corpusViewSelection instanceof MainCorpus)) { |
|
15 |
println "Corpora selection is not a Corpus" |
|
16 |
return; |
|
17 |
} |
|
18 |
|
|
19 |
@Field @Option(name="unit_type",usage="", widget="String", required=false, def="Maillon") |
|
20 |
String unit_type |
|
21 |
|
|
22 |
@Field @Option(name="unit_property_name", usage="", widget="String", required=false, def="") |
|
23 |
String unit_property_name |
|
24 |
|
|
25 |
@Field @Option(name="unit_property_value", usage="", widget="String", required=false, def=".*") |
|
26 |
String unit_property_value |
|
27 |
|
|
28 |
if (!ParametersDialog.open(this)) return; |
|
29 |
// END OF PARAMETERS |
|
30 |
|
|
31 |
MainCorpus corpus = corpusViewSelection |
|
32 |
def analecCorpus = URSCorpora.getCorpus(corpus); |
|
33 |
|
|
34 |
// check unit parameters |
|
35 |
if (!analecCorpus.getStructure().getUnites().contains(unit_type)) { |
|
36 |
println "No unit with name=$unit_type" |
|
37 |
return; |
|
38 |
} else { |
|
39 |
if (unit_property_name.length() > 0 && unit_property_value.length() > 0) { |
|
40 |
// test property existance |
|
41 |
def props = analecCorpus.getStructure().getUniteProperties(unit_type); |
|
42 |
if (!props.contains(unit_property_name)) { |
|
43 |
println "Unit $unit_type has no property named $unit_property_name" |
|
44 |
return; |
|
45 |
} |
|
46 |
} |
|
47 |
} |
|
48 |
|
|
49 |
int nMots = corpus.getSize(); |
|
50 |
|
|
51 |
int nUnites = 0; |
|
52 |
for (def unit : analecCorpus.getToutesUnites()) { |
|
53 |
if (unit_type.length() > 0) { |
|
54 |
if (!unit.getType().equals(unit_type)) { |
|
55 |
continue |
|
56 |
} |
|
57 |
} |
|
58 |
|
|
59 |
if (unit_property_name.length() > 0 && unit_property_value.length() > 0) { |
|
60 |
if (!unit.getProp(unit_property_name).matches(unit_property_value)) { |
|
61 |
// ignoring this schema |
|
62 |
continue |
|
63 |
} |
|
64 |
} |
|
65 |
|
|
66 |
nUnites++ |
|
67 |
} |
|
68 |
|
|
69 |
coef = (nUnites /nMots) |
|
70 |
println "densité référentielle : nUnites/nMots = $nUnites/$nMots = $coef = ${coef*100}%" |
|
71 |
if (nUnites >= nMots) { |
|
72 |
println "WARNING: possible encoding error. Number of units ($nUnites) is greater than number of words ($nMots)" |
|
73 |
} |
|
74 |
return ["result":coef, "data":["nUnites":nUnites, "nMots":nMots]] |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/UnitsMacro.groovy (revision 1852) | ||
---|---|---|
1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
3 |
// @author mdecorde |
|
4 |
// @author sheiden |
|
5 |
// STANDARD DECLARATIONS |
|
6 |
package org.txm.macro.urs |
|
7 |
|
|
8 |
import org.apache.commons.lang.StringUtils; |
|
9 |
import org.kohsuke.args4j.* |
|
10 |
import groovy.transform.Field |
|
11 |
import org.txm.Toolbox; |
|
12 |
import org.txm.rcp.swt.widget.parameters.* |
|
13 |
import org.txm.annotation.urs.* |
|
14 |
import org.txm.searchengine.cqp.AbstractCqiClient; |
|
15 |
import org.txm.searchengine.cqp.CQPSearchEngine |
|
16 |
import org.txm.searchengine.cqp.corpus.* |
|
17 |
import visuAnalec.donnees.Structure; |
|
18 |
import visuAnalec.elements.Unite; |
|
19 |
|
|
20 |
if (!(corpusViewSelection instanceof MainCorpus)) { |
|
21 |
println "Corpora selection is not a Corpus" |
|
22 |
return; |
|
23 |
} |
|
24 |
|
|
25 |
// BEGINNING OF PARAMETERS |
|
26 |
@Field @Option(name="unit_type",usage="", widget="String", required=true, def="MENTION") |
|
27 |
String unit_type |
|
28 |
|
|
29 |
if (!ParametersDialog.open(this)) return; |
|
30 |
|
|
31 |
MainCorpus corpus = corpusViewSelection |
|
32 |
AbstractCqiClient CQI = CQPSearchEngine.getCqiClient(); |
|
33 |
def word = corpus.getWordProperty() |
|
34 |
def analecCorpus = URSCorpora.getCorpus(corpus); |
|
35 |
|
|
36 |
int n = 1; |
|
37 |
def units = analecCorpus.getUnites(unit_type) |
|
38 |
units.sort() { a, b -> a.getDeb() <=> b.getDeb() ?: a.getFin() <=> b.getFin() } |
Formats disponibles : Unified diff