Révision 1852

tmp/org.txm.analec.rcp/src/org/txm/macro/urs/AjoutDefinitudeMacro.groovy (revision 1852)
1
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author mdecorde
4
// @author sheiden
5
// STANDARD DECLARATIONS
6
package org.txm.macro.urs
7

  
8
import org.apache.commons.lang.StringUtils;
9
import org.kohsuke.args4j.*
10

  
11
import groovy.transform.Field
12

  
13
import org.txm.Toolbox;
14
import org.txm.rcp.swt.widget.parameters.*
15
import org.txm.annotation.urs.*
16
import org.txm.searchengine.cqp.AbstractCqiClient;
17
import org.txm.searchengine.cqp.corpus.*
18

  
19
import visuAnalec.Message.StructureEvent;
20
import visuAnalec.Message.TypeModifStructure;
21
import visuAnalec.donnees.Structure;
22
import visuAnalec.elements.Unite;
23
import visuAnalec.vue.Vue
24

  
25
if (!(corpusViewSelection instanceof MainCorpus)) {
26
	println "Corpora selection is not a Corpus"
27
	return;
28
}
29

  
30
// BEGINNING OF PARAMETERS
31
@Field @Option(name="unit_type",usage="", widget="String", required=true, def="Maillon")
32
String unit_type
33
@Field @Option(name="reset",usage="", widget="Boolean", required=true, def="true")
34
boolean reset
35

  
36
if (!ParametersDialog.open(this)) return;
37

  
38
MainCorpus corpus = corpusViewSelection
39
AbstractCqiClient CQI = CQPSearchEngine.getCqiClient();
40
def word = corpus.getWordProperty()
41
def analecCorpus = URSCorpora.getCorpus(corpus);
42
Structure structure = analecCorpus.getStructure()
43
if (!structure.getUnites().contains(unit_type)) { // check if the structure contains the unit_type units
44
	println "Error: corpus structure does not contains unit with name=$unit_type"
45
	return
46
}
47
def props = structure.getUniteProperties(unit_type)
48

  
49
String DEFINITUDE = "DEFINITUDE"
50
if (!props.contains(DEFINITUDE)) { // update the structure if needed
51
	analecCorpus.ajouterProp(Unite.class, unit_type, DEFINITUDE);
52
	analecCorpus.ajouterVal(Unite.class, unit_type, DEFINITUDE, "DEFINI");
53
	analecCorpus.ajouterVal(Unite.class, unit_type, DEFINITUDE, "INDEFINI");
54
	analecCorpus.ajouterVal(Unite.class, unit_type, DEFINITUDE, "DEMONSTRATIF");
55
	analecCorpus.ajouterVal(Unite.class, unit_type, DEFINITUDE, "AMBIGU");
56
	analecCorpus.ajouterVal(Unite.class, unit_type, DEFINITUDE, "NONE");
57
}
58

  
59
int nIgnored = 0 // number of ignored units
60
int nModified = 0 // number of modified units
61
int nDefini = 0 // number of "DEFINI" units
62
int nIndefini = 0 // number of "InDEFINI" units
63
int nDemonstratif = 0 // number of "DEMONSTRATIF" units
64
int nAmbigu = 0 // number of "AMBIGU" units
65
int nNone = 0 // number of "NONE" units
66

  
67
def units = analecCorpus.getUnites(unit_type)
68
units.sort() { a, b -> a.getDeb() <=> b.getDeb() ?: a.getFin() <=> b.getFin() }
69
for (Unite unit : units) { // process all units
70
	
71
	def prop = unit.getProp(DEFINITUDE);
72
	
73
	int[] pos = null
74
	if (unit.getDeb() == unit.getFin()) pos = [unit.getDeb()]
75
	else pos = (unit.getDeb()..unit.getFin())
76
	def form = StringUtils.join(CQI.cpos2Str(word.getQualifiedName(), pos), " ")
77
	
78
	if (reset || prop == null || prop.length() == 0 || prop.equals("NONE")) {
79
		// petits ajouts à faire ? : |(ses\s.+)|(Ses\s.+)|(son\s.+)|(Son\s.+)|(sa\s.+)|(Sa\s.+)|(leurs?\s.+)|(Leurs?\s.+)|(tous\s.+)|(Tous\s.+)|(toutes\s.+)|(Toutes\s.+)
80
		if (form =~ /^(le\s.+)|(Les\s.+)|(Le\s.+)|(la\s.+)|(La\s.+)|(l'.+)|(L'.+)|(les\s.+)|(au\s.+)|(Au\s.+)|(aux\s.+)|(Aux\s.+)|(du\s.+)|(Du\s.+)/) {
81
			unit.getProps().put(DEFINITUDE, "DEFINI")
82
			nDefini++
83
		} else if (form =~ /^(un\s.+)|(une\s.+)|(Un\s.+)|(Une\s.+)|(Chaque\s.+)|(chaque\s.+)|(Certains\s.+)|(Certaines\s.+)|(certains\s.+)|(certaines\s.+)|(aucun\s.+)|(aucune\s.+)|(Aucun\s.+)|(Aucunes\s.+)|(Autre\s.+)|(Autre\s.+)|(autres\s.+)|(autre\s.+)|(quelque\s.+)|(quelques\s.+)|(Quelque\s.+)|(Quelques\s.+)/) {
84
			unit.getProps().put(DEFINITUDE, "INDEFINI")
85
			nIndefini++
86
		} else if (form =~ /^(ce\s.+)|(cette\s.+)|(Cette\s.+)|(cet\s.+)|(ces\s.+)|(Ce\s.+)|(Cet\s.+)|(Ces\s.+)/) {
87
			unit.getProps().put(DEFINITUDE, "DEMONSTRATIF")
88
			nDemonstratif++
89
		} else if (form =~ /^(des\s.+)|(de\s.+)|(Des\s.+)|(De\s.+)/) {
90
			unit.getProps().put(DEFINITUDE, "AMBIGU")
91
			nAmbigu++
92
		} else {
93
			unit.getProps().put(DEFINITUDE, "NONE")
94
			nNone++;
95
		}
96
		nModified++
97
		
98
	} else {
99
		// nothing to do
100
		nIgnored++
101
	}
102
}
103

  
104
println "nIgnored=$nIgnored"
105
println "nModified=$nModified"
106
println " nDefini=$nDefini"
107
println " nIndefini=$nIndefini"
108
println " nDemonstratif=$nDemonstratif"
109
println " nAmbigu=$nAmbigu"
110
println " nNone=$nNone"
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/CreationRelationsMacro.groovy (revision 1852)
1
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author mdecorde
4
// STANDARD DECLARATIONS
5
package org.txm.macro.urs
6

  
7
import org.kohsuke.args4j.*
8

  
9
import groovy.transform.Field
10

  
11
import org.txm.rcp.swt.widget.parameters.*
12
import org.txm.annotation.urs.*
13
import org.txm.searchengine.cqp.corpus.*
14

  
15
import visuAnalec.donnees.Structure;
16
import visuAnalec.elements.Relation;
17
import visuAnalec.elements.Schema
18
import visuAnalec.elements.Unite;
19
import visuAnalec.vue.Vue
20

  
21
if (!(corpusViewSelection instanceof MainCorpus)) {
22
	println "Corpora selection is not a Corpus"
23
	return;
24
}
25

  
26
// BEGINNING OF PARAMETERS
27
@Field @Option(name="unit_type",usage="", widget="String", required=true, def="MENTION")
28
		String unit_type
29
@Field @Option(name="schema_type",usage="", widget="String", required=true, def="CHAINE")
30
		String schema_type
31
if (!ParametersDialog.open(this)) return;
32

  
33
int nCreated = 0 // count the number of created RELATION
34

  
35
MainCorpus corpus = corpusViewSelection
36
def analecCorpus = URSCorpora.getCorpus(corpus); // analec corpus has the same name has the TXM corpus
37
Structure structure = analecCorpus.getStructure()
38
if (!structure.getUnites().contains(unit_type)) { // check if the structure contains the unit_type units
39
	println "Error: corpus structure does not contains unit with name=$unit_type"
40
	return
41
}
42
if (!structure.getSchemas().contains(schema_type)) { // check if the structure contains the unit_type units
43
	println "Error: corpus structure does not contains schema with name=$schema_type"
44
	return
45
}
46
if (!structure.getRelations().contains("ANAPHORE")) { // update the structure if needed
47
	println "Creating the 'ANAPHORE' relation in the structure"
48
	structure.ajouterType(Relation.class, "ANAPHORE")
49
	analecCorpus.ajouterProp(Relation.class, "ANAPHORE", "TYPE")
50
	analecCorpus.ajouterVal(Relation.class, "ANAPHORE", "TYPE", "COREFERENTE")
51
	analecCorpus.ajouterVal(Relation.class, "ANAPHORE", "TYPE", "ASSOCIATIVE")
52
}
53
if (analecCorpus.getRelations("ANAPHORE").size() > 0) {
54
	println "Error: This macro can't update existing Relations"
55
	return
56
}
57

  
58
for (Schema schema : analecCorpus.getSchemas(schema_type)) { // parse all CHAINE
59
	def units = []
60
	for (Unite unit : schema.getUnitesSousjacentes()) { // keep only the 'unit_type' units
61
		if (unit.type.equals(unit_type)) units << unit
62
	}
63
	units.sort() { a, b -> a.getDeb() <=> b.getDeb() ?: a.getFin() <=> b.getFin() } // sort them
64
	
65
	for (int i = 0 ; i < units.size() - 1 ; i++) { // build RELATIONS and don't process the last unit
66
		println "creating "+units[i+1]+", "+units[i]
67
		Relation relation = new Relation("ANAPHORE", units[i+1], units[i])
68
		relation.getProps().put("TYPE", "COREFERENTE")
69
		analecCorpus.addRelationLue(relation)  // add the new relation
70
		nCreated++;
71
	}
72
}
73

  
74
println "nCreated=$nCreated"
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/SchemaTypesMacro.groovy (revision 1852)
1
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author mdecorde
4
// @author sheiden
5
// STANDARD DECLARATIONS
6
package org.txm.macro.urs
7

  
8
import org.kohsuke.args4j.*
9
import groovy.transform.Field
10
import org.txm.rcp.swt.widget.parameters.*
11
import org.txm.annotation.urs.*
12
import org.txm.searchengine.cqp.corpus.*
13

  
14
if (!(corpusViewSelection instanceof MainCorpus)) {
15
	println "Corpora selection is not a Corpus"
16
	return;
17
}
18

  
19
MainCorpus corpus = corpusViewSelection
20
def analecCorpus = URSCorpora.getCorpus(corpus);
21

  
22
def schemas = analecCorpus.getTousSchemas()
23
def set = new HashMap()
24
for (def s : schemas.collect { it.getType() }) {
25
	if (!set.containsKey(s)) set[s] = 0;
26
	set[s] = set[s] +1
27
}
28
println "Schemas types: "+set.sort() { it -> set[it]}
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/NombreDeChainesMacro.groovy (revision 1852)
1
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author mdecorde
4
// @author sheiden
5
// STANDARD DECLARATIONS
6
package org.txm.macro.urs
7

  
8
import org.kohsuke.args4j.*
9
import groovy.transform.Field
10
import org.txm.rcp.swt.widget.parameters.*
11
import org.txm.annotation.urs.*
12
import org.txm.searchengine.cqp.corpus.*
13

  
14

  
15
// BEGINNING OF PARAMETERS
16
@Field @Option(name="schema_type",usage="", widget="String", required=true, def="Coréférence")
17
String schema_type
18

  
19
@Field @Option(name="minimum_schema_size",usage="", widget="Integer", required=true, def="3")
20
int minimum_schema_size
21

  
22
@Field @Option(name="schema_property_name",usage="", widget="String", required=false, def="")
23
String schema_property_name
24

  
25
@Field @Option(name="schema_property_value",usage="", widget="String", required=false, def=".*")
26
String schema_property_value
27

  
28
@Field @Option(name="unit_type",usage="", widget="String", required=false, def="Maillon")
29
String unit_type
30

  
31
@Field @Option(name="unit_property_name", usage="", widget="String", required=false, def="")
32
String unit_property_name
33

  
34
@Field @Option(name="unit_property_value", usage="", widget="String", required=false, def=".*")
35
String unit_property_value
36

  
37
if (!(corpusViewSelection instanceof MainCorpus)) {
38
	println "Corpora selection is not a Corpus"
39
	return;
40
}
41

  
42
// Open the parameters input dialog box
43
if (!ParametersDialog.open(this)) return;
44
// END OF PARAMETERS
45

  
46
MainCorpus corpus = corpusViewSelection
47
def analecCorpus = URSCorpora.getCorpus(corpus)
48

  
49
// check Schema parameters
50
if (!analecCorpus.getStructure().getSchemas().contains(schema_type)) {
51
	println "No schema with name=$schema_type"
52
	return;
53
} else {
54
	if (schema_property_name.length() > 0 && schema_property_value.length() > 0) {
55
		// test property existance
56
		def props = analecCorpus.getStructure().getSchemaProperties(schema_type);
57
		if (!props.contains(schema_property_name)) {
58
			println "Schema $schema_type has no property named $schema_property_name"
59
			return;
60
		}
61
	}
62
}
63

  
64
// check unit parameters
65
if (!analecCorpus.getStructure().getUnites().contains(unit_type)) {
66
	println "No unit with name=$unit_type"
67
	return;
68
} else {
69
	if (unit_property_name.length() > 0 && unit_property_value.length() > 0) {
70
		// test property existance
71
		def props = analecCorpus.getStructure().getUniteProperties(unit_type);
72
		if (!props.contains(unit_property_name)) {
73
			println "Unit $unit_type has no property named $unit_property_name"
74
			return;
75
		}
76
	}
77
}
78

  
79
def schemas = analecCorpus.getSchemas(schema_type)
80

  
81
int nSchemas = 0;
82
for (def schema : schemas) {
83

  
84
	if (schema_property_name.length() > 0 && schema_property_value.length() > 0) {
85
		if (!schema.getProp(schema_property_name).matches(schema_property_value)) {
86
			// ignoring this schema
87
			continue
88
		}
89
	}
90
	
91
	int nUnites = 0;
92
	for (def unit : schema.getUnitesSousjacentesNonTriees()) {
93
	if (unit_type.length() > 0) {
94
			if (!unit.getType().equals(unit_type)) {
95
				continue
96
			}
97
		}
98
		
99
		if (unit_property_name.length() > 0 && unit_property_value.length() > 0) {
100
			if (!unit.getProp(unit_property_name).matches(unit_property_value)) {
101
				// ignoring this schema
102
				continue
103
			}
104
		}
105
	
106
		nUnites++
107
	}
108
	
109
	if (nUnites < minimum_schema_size) continue;
110

  
111
	nSchemas++;
112
}
113

  
114
println "nombre de chaînes de référence d'un texte : $nSchemas"
115

  
116
["result":nSchemas, "data":schemas]
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/DistanceInterMaillonnaireMacro.groovy (revision 1852)
1
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author mdecorde
4
// @author sheiden
5
// STANDARD DECLARATIONS
6
package org.txm.macro.urs
7

  
8
import org.kohsuke.args4j.*
9
import groovy.transform.Field
10
import org.txm.*
11
import org.txm.rcp.swt.widget.parameters.*
12
import org.txm.annotation.urs.*
13
import org.txm.searchengine.cqp.corpus.*
14
import org.apache.commons.lang.StringUtils;
15

  
16
// BEGINNING OF PARAMETERS
17
@Field @Option(name="schema_type",usage="", widget="String", required=true, def="Coréférence")
18
String schema_type
19

  
20
@Field @Option(name="minimum_schema_size", usage="", widget="Integer", required=true, def="3")
21
int minimum_schema_size
22

  
23
@Field @Option(name="schema_property_name",usage="", widget="String", required=false, def="")
24
String schema_property_name
25

  
26
@Field @Option(name="schema_property_value",usage="", widget="String", required=false, def=".*")
27
String schema_property_value
28

  
29
@Field @Option(name="unit_type",usage="", widget="String", required=false, def="Maillon")
30
String unit_type
31

  
32
@Field @Option(name="unit_property_name", usage="", widget="String", required=false, def="")
33
String unit_property_name
34

  
35
@Field @Option(name="unit_property_value", usage="", widget="String", required=false, def=".*")
36
String unit_property_value
37

  
38
if (!(corpusViewSelection instanceof MainCorpus)) {
39
	println "Corpora selection is not a Corpus"
40
	return;
41
}
42

  
43
if (!ParametersDialog.open(this)) return;
44

  
45
MainCorpus corpus = corpusViewSelection
46
def analecCorpus = URSCorpora.getCorpus(corpus)
47

  
48
// check Schema parameters
49
if (!analecCorpus.getStructure().getSchemas().contains(schema_type)) {
50
	println "No schema with name=$schema_type"
51
	return;
52
} else {
53
	if (schema_property_name.length() > 0 && schema_property_value.length() > 0) {
54
		// test property existance
55
		def props = analecCorpus.getStructure().getSchemaProperties(schema_type);
56
		if (!props.contains(schema_property_name)) {
57
			println "Schema $schema_type has no property named $schema_property_name"
58
			return;
59
		}
60
	}
61
}
62

  
63
// check unit parameters
64
if (!analecCorpus.getStructure().getUnites().contains(unit_type)) {
65
	println "No unit with name=$unit_type"
66
	return;
67
} else {
68
	if (unit_property_name.length() > 0 && unit_property_value.length() > 0) {
69
		// test property existance
70
		def props = analecCorpus.getStructure().getUniteProperties(unit_type);
71
		if (!props.contains(unit_property_name)) {
72
			println "Unit $unit_type has no property named $unit_property_name"
73
			return;
74
		}
75
	}
76
}
77

  
78
def schemas = analecCorpus.getSchemas(schema_type)
79
def distances = 0;
80
def nDistances = 0
81
for (def schema : schemas) {
82

  
83
	if (schema_property_name.length() > 0 && schema_property_value.length() > 0) {
84
		if (!schema.getProp(schema_property_name).matches(schema_property_value)) {
85
			// ignoring this schema
86
			continue
87
		}
88
	}
89
	
90
	def allUnites = schema.getUnitesSousjacentesNonTriees()
91
	int nUnites = allUnites.size()
92
	if (nUnites < minimum_schema_size) continue;
93

  
94
	def units = []
95
	for (def unit : allUnites) {
96
	
97
		if (unit_type.length() > 0) {
98
			if (!unit.getType().equals(unit_type)) {
99
				continue
100
			}
101
		}
102
		
103
		if (unit_property_name.length() > 0 && unit_property_value.length() > 0) {
104
			if (!unit.getProp(unit_property_name).matches(unit_property_value)) {
105
				// ignoring this schema
106
				continue
107
			}
108
		}
109
		
110
		units << unit
111
	}
112
	
113
	units.sort() { u1, u2 -> u1.getDeb() <=> u2.getDeb()}
114
		
115
	for (int i = 0 ; i < units.size() -1 ; i++) {
116
		distances +=  units[i+1].getDeb() - units[i].getFin() 
117
		nDistances++
118
	}
119
}
120

  
121
coef = (distances / nDistances)
122
println "distance moyenne inter-mayonnaise : $distances / $nDistances = $coef"
123

  
124
return ["result":coef, "data":["distances":distances, "nDistances":nDistances]]
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/NatureDuPremierMaillonMacro.groovy (revision 1852)
1
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author mdecorde
4
// @author sheiden
5
// STANDARD DECLARATIONS
6
package org.txm.macro.urs
7

  
8
import org.kohsuke.args4j.*
9
import groovy.transform.Field
10
import org.txm.*
11
import org.txm.rcp.swt.widget.parameters.*
12
import org.txm.annotation.urs.*
13
import org.txm.searchengine.cqp.corpus.*
14
import org.apache.commons.lang.StringUtils;
15

  
16
// BEGINNING OF PARAMETERS
17
@Field @Option(name="schema_type",usage="", widget="String", required=true, def="Coréférence")
18
String schema_type
19

  
20
@Field @Option(name="minimum_schema_size", usage="", widget="Integer", required=true, def="3")
21
int minimum_schema_size
22

  
23
@Field @Option(name="schema_property_name",usage="", widget="String", required=false, def="")
24
String schema_property_name
25

  
26
@Field @Option(name="schema_property_value",usage="", widget="String", required=false, def=".*")
27
String schema_property_value
28

  
29
@Field @Option(name="unit_type",usage="", widget="String", required=false, def="Maillon")
30
String unit_type
31

  
32
@Field @Option(name="unit_property_name", usage="", widget="String", required=false, def="")
33
String unit_property_name
34

  
35
@Field @Option(name="unit_property_value", usage="", widget="String", required=false, def=".*")
36
String unit_property_value
37

  
38
@Field @Option(name="word_property", usage="", widget="StringArray", metaVar="Catégorie	pos	fropos	frpos", required=false, def="Catégorie")
39
String word_property
40

  
41
if (!(corpusViewSelection instanceof MainCorpus)) {
42
	println "Corpora selection is not a Corpus"
43
	return;
44
}
45

  
46
// Open the parameters input dialog box
47
if (!ParametersDialog.open(this)) return;
48
// END OF PARAMETERS
49

  
50
MainCorpus corpus = corpusViewSelection
51
def analecCorpus = URSCorpora.getCorpus(corpus)
52

  
53
if (!analecCorpus.getStructure().getSchemas().contains(schema_type)) {
54
	println "No schema with name=$schema_type"
55
	return;
56
} else {
57
	if (schema_property_name.length() > 0 && schema_property_value.length() > 0) {
58
		// test property existance
59
		def props = analecCorpus.getStructure().getSchemaProperties(schema_type);
60
		if (!props.contains(schema_property_name)) {
61
			println "Schema $schema_type has no property named $schema_property_name"
62
			return;
63
		}
64
	}
65
}
66

  
67
if (!analecCorpus.getStructure().getUnites().contains(unit_type)) {
68
	println "No unit with name=$unit_type"
69
	return;
70
} else {
71
	if (unit_property_name.length() > 0 && unit_property_value.length() > 0) {
72
		// test property existance
73
		def props = analecCorpus.getStructure().getUniteProperties(unit_type);
74
		if (!props.contains(unit_property_name)) {
75
			println "Unit $unit_type has no property named $unit_property_name"
76
			return;
77
		}
78
	}
79
}
80

  
81
def CQI = CQPSearchEngine.getCqiClient()
82

  
83
def prop = corpus.getProperty(word_property)
84

  
85
def schemas = analecCorpus.getSchemas(schema_type)
86
def freqs = [:]
87

  
88
def distances = 0;
89
def nDistances = 0
90
for (def schema : schemas) {
91

  
92
	if (schema_property_name.length() > 0 && schema_property_value.length() > 0) {
93
		if (!schema.getProp(schema_property_name).matches(schema_property_value)) {
94
			// ignoring this schema
95
			continue
96
		}
97
	}
98
	
99
	def allUnites = schema.getUnitesSousjacentesNonTriees()
100
	int nUnites = allUnites.size()
101
	if (nUnites < minimum_schema_size) continue;
102

  
103
	def units = []
104
	for (def unit : allUnites) {
105
	
106
		if (unit_type.length() > 0) {
107
			if (!unit.getType().equals(unit_type)) {
108
				continue
109
			}
110
		}
111
		
112
		if (unit_property_name.length() > 0 && unit_property_value.length() > 0) {
113
			if (!unit.getProp(unit_property_name).matches(unit_property_value)) {
114
				// ignoring this schema
115
				continue
116
			}
117
		}
118
		
119
		units << unit
120
	}
121
	
122
	units.sort() { u1, u2 ->
123
		return u1.getDeb() - u2.getDeb()
124
	}
125
	if (units.size() == 0) continue;
126
	
127
	def unit = units[0]
128
	
129
	String forme =  null;
130
	if (prop == null) { // word_property is the analec unit property to use
131
		forme = unit.getProp(word_property)
132
	} else {
133
		int[] pos = null;
134
		if (unit.getDeb() == unit.getFin()) pos = [unit.getDeb()]
135
		else pos = unit.getDeb()..unit.getFin()
136
			
137
		forme = StringUtils.join(CQI.cpos2Str(prop.getQualifiedName(), pos), " ") // ids is enough
138
	}
139
	
140
	if (!freqs.containsKey(forme)) freqs[forme] = 0;
141
	
142
	freqs[forme] = freqs[forme] + 1;
143
}
144

  
145
println "index des natures de premier maillon :"
146
int max = 0;
147
def result = "";
148
for (def forme : freqs.keySet().sort() {it -> -freqs[it]}) {
149
	println "$forme\t"+freqs[forme]
150
	if (max < freqs[forme]) {
151
		max = freqs[forme]
152
		result = "$forme: "+freqs[forme]
153
	}
154
}
155

  
156
["result": result, "data": freqs]
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/LongueurMoyenneMacro.groovy (revision 1852)
1
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author mdecorde
4
// @author sheiden
5

  
6
// STANDARD DECLARATIONS
7
package org.txm.macro.urs
8

  
9
import org.kohsuke.args4j.*
10

  
11
import groovy.transform.Field
12

  
13
import org.txm.rcp.swt.widget.parameters.*
14
import org.txm.annotation.urs.*
15
import org.txm.searchengine.cqp.corpus.*
16
import org.txm.statsengine.r.core.RWorkspace;
17
import org.txm.Toolbox
18
import org.txm.rcp.commands.*
19

  
20
// BEGINNING OF PARAMETERS
21
@Field @Option(name="schema_type",usage="", widget="String", required=true, def="Coréférence")
22
String schema_type
23

  
24
@Field @Option(name="minimum_schema_size",usage="", widget="Integer", required=true, def="3")
25
int minimum_schema_size
26

  
27
@Field @Option(name="schema_property_name",usage="", widget="String", required=false, def="")
28
String schema_property_name
29

  
30
@Field @Option(name="schema_property_value",usage="", widget="String", required=false, def=".*")
31
String schema_property_value
32

  
33
@Field @Option(name="unit_type",usage="", widget="String", required=false, def="Maillon")
34
String unit_type
35

  
36
@Field @Option(name="unit_property_name", usage="", widget="String", required=false, def="")
37
String unit_property_name
38

  
39
@Field @Option(name="unit_property_value", usage="", widget="String", required=false, def=".*")
40
String unit_property_value
41

  
42
if (!(corpusViewSelection instanceof MainCorpus)) {
43
	println "Corpora selection is not a Corpus"
44
	return;
45
}
46

  
47
if (!ParametersDialog.open(this)) return;
48
// END OF PARAMETERS
49

  
50
MainCorpus corpus = corpusViewSelection
51
def analecCorpus = URSCorpora.getCorpus(corpus)
52

  
53
// check Schema parameters
54
if (!analecCorpus.getStructure().getSchemas().contains(schema_type)) {
55
	println "No schema with name=$schema_type"
56
	return;
57
} else {
58
	if (schema_property_name.length() > 0 && schema_property_value.length() > 0) {
59
		// test property existance
60
		def props = analecCorpus.getStructure().getSchemaProperties(schema_type);
61
		if (!props.contains(schema_property_name)) {
62
			println "Schema $schema_type has no property named $schema_property_name"
63
			return;
64
		}
65
	}
66
}
67

  
68
// check unit parameters
69
if (!analecCorpus.getStructure().getUnites().contains(unit_type)) {
70
	println "No unit with name=$unit_type"
71
	return;
72
} else {
73
	if (unit_property_name.length() > 0 && unit_property_value.length() > 0) {
74
		// test property existance
75
		def props = analecCorpus.getStructure().getUniteProperties(unit_type);
76
		if (!props.contains(unit_property_name)) {
77
			println "Unit $unit_type has no property named $unit_property_name"
78
			return;
79
		}
80
	}
81
}
82

  
83
def schemas = analecCorpus.getSchemas(schema_type)
84

  
85
int nSchemas = 0;
86

  
87
def lens = [:]
88
for (def schema : schemas) {
89

  
90
	if (schema_property_name.length() > 0 && schema_property_value.length() > 0) {
91
		if (!schema.getProp(schema_property_name).matches(schema_property_value)) {
92
			// ignoring this schema
93
			continue
94
		}
95
	}
96
	
97
	int nUnites = 0;
98
	for (def unit : schema.getUnitesSousjacentesNonTriees()) {
99
		if (unit_type.length() > 0) {
100
			if (!unit.getType().equals(unit_type)) {
101
				continue
102
			}
103
		}
104
		
105
		if (unit_property_name.length() > 0 && unit_property_value.length() > 0) {
106
			if (!unit.getProp(unit_property_name).matches(unit_property_value)) {
107
				// ignoring this schema
108
				continue
109
			}
110
		}
111
	
112
		nUnites++
113
	}
114
	
115
	if (nUnites < minimum_schema_size) continue;
116
	
117
	if (!lens.containsKey(nUnites)) lens[nUnites] = 0;
118
	
119
	lens[nUnites] = lens[nUnites] + 1;
120
	nSchemas++;
121
}
122

  
123
//println "nSchemas=$nSchemas"
124
def freqs = lens.keySet();
125
freqs.sort();
126
int t = 0;
127
int n = 0;
128
//println "Fréquences ("+freqs.size()+")"
129
for (def f : freqs) {
130
	t += f * lens[f]
131
	n += lens[f]
132
}
133

  
134
coef = (t/n)
135
def slens = lens.sort { a, b -> -a.value <=> -b.value ?: -a.key <=> -b.key }
136
def flens = []
137
slens.each { key, value -> value.times { flens << key } }
138
def nbins = flens.size()*2
139

  
140
def cfreq = 0
141
println "longueur moyenne des chaînes de référence : $t/$n = "+coef
142
println "index hiérarchique des longueurs de chaînes :\nlen\tfreq\tcfreq"
143
slens.each { println it.key+"	"+it.value+"	"+(cfreq+=it.value) }
144

  
145
def slens2 = slens.sort { a, b -> -a.key <=> -b.key }
146

  
147
def r = RWorkspace.getRWorkspaceInstance()
148

  
149
r.addVectorToWorkspace("len", slens2.keySet() as int[])
150
r.addVectorToWorkspace("freq", slens2.values() as int[])
151
r.addVectorToWorkspace("flen", flens as int[])
152

  
153
def corpusName = corpus.getName()
154

  
155
def file = File.createTempFile("txm", ".svg", new File(Toolbox.getTxmHomePath(), "results"))
156
println "SVG file: "+file.getAbsolutePath()
157
/// BEGINNING OF R SCRIPT
158
def script ="""
159
hist(flen, xaxt='n', col="gray", xlab="Length", breaks=$nbins, main="$corpusName Longueur des chaînes ($nbins bins)")
160
axis(side=1, at=len)
161
"""
162
/// END OF R SCRIPT
163

  
164
// execute R script
165
r.plot(file, script)
166

  
167
//display the SVG results graphic
168
monitor.syncExec(new Runnable() {
169
	@Override
170
	public void run() { OpenSVGGraph.OpenSVGFile(file.getAbsolutePath(), corpusName+" Longueur des chaînes") }
171
})
172

  
173
return ["result":coef, "data":lens]
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/UnitTypesNotInSchemaMacro.groovy (revision 1852)
1
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author mdecorde
4
// @author sheiden
5
// STANDARD DECLARATIONS
6
package org.txm.macro.urs
7

  
8
import org.kohsuke.args4j.*
9
import groovy.transform.Field
10
import org.txm.rcp.swt.widget.parameters.*
11
import org.txm.annotation.urs.*
12
import org.txm.searchengine.cqp.corpus.*
13

  
14
if (!(corpusViewSelection instanceof MainCorpus)) {
15
	println "Corpora selection is not a Corpus"
16
	return;
17
}
18

  
19
// BEGINNING OF PARAMETERS
20
@Field @Option(name="schema_type",usage="", widget="String", required=true, def="Coréférence")
21
String schema_type
22
if (!ParametersDialog.open(this)) return;
23

  
24
MainCorpus corpus = corpusViewSelection
25
def analecCorpus = URSCorpora.getCorpus(corpus);
26

  
27
def unitesInSchema = new HashSet()
28
for (def schema : analecCorpus.getSchemas(schema_type)) {
29
	unitesInSchema.addAll(schema.getUnitesSousjacentes())
30
}
31
println "unites: "+analecCorpus.getToutesUnites().size()
32
println "unites in schema: "+unitesInSchema.size()
33

  
34
def set = new HashMap()
35
for (def u : analecCorpus.getToutesUnites()) {
36
	if (unitesInSchema.contains(u)) continue;
37
	
38
	if (!set.containsKey(u.getType())) set[u.getType()] = 0;
39
	set[u.getType()] = set[u.getType()] +1
40
}
41

  
42
println "unites not in schema: "+set.sort() { it -> set[it]}
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/Frpos2CategorieMacro.groovy (revision 1852)
1
// @author Bruno Oberlé (2017-04-01 21:50)
2

  
3
/*
4
Définit la catégorie grammaticale du maillon d'après le champ `frpos'
5
(tagset de TreeTagger).  Le script est adapté de
6
http://svn.code.sf.net/p/txm/code/trunk/plugins/Analec/AnalecRCP/src/org/txm/macro/analec/Fropos2CategorieMacro.groovy.
7

  
8
Voici la liste des catégories grammaticales retenues (manuel d'annotation de
9
Democrat):
10
- GN: Groupe Nominal (le petit chat, le chat, le même, ce chat etc.)
11
- POSS: Possessif (mon, ton son, ma, ta, sa, mes, tes, ses, notre, votre,
12
  leur, nos, vos, leurs)
13
- PR: Pronom (moi, toi, lui, elle, nous, vous, eux, elles, le tien, le mien,
14
  moi-même etc.)
15
- PR_CL_O: Pronom Clitique Objet (me, te, le, la, les, lui, leur, y, en)
16
- PR_CL_R: Pronom Clitique Réfléchi
17
- PR_CL_S: Pronom Clitique Sujet (je, tu, il, elle, on, nous, vous, ils,
18
  elles)
19
- PR_REL: Pronom Relatif (qui, que, quoi, dont, où, lequel, quiconque etc.)
20
- PR_WH: Pronom Interrogatif (qui, que, quoi, lequel etc.)
21

  
22
Le script ne peut pas désambiguïser les pronoms clitiques de même forme
23
(`nous' est-il un sujet, un objet ou un réfléchi?).  Dans ce cas, le script
24
opte pour le sujet (ou pour l'objet si l'ambiguïté n'est que entre objet et
25
réfléchi).
26

  
27
Quand il n'y a aucune information disponible (erreurs de l'étiqueteur), la
28
valeur est UNDEFINED.
29

  
30
L'algorithme est décrit ici:
31
https://groupes.renater.fr/wiki/democrat/prive/txm_annotation_exploitation
32

  
33
*/
34

  
35
package org.txm.macro.urs
36

  
37
import org.apache.commons.lang.*
38
import org.kohsuke.args4j.*
39
import groovy.transform.*
40
import org.txm.*
41
import org.txm.rcp.swt.widget.parameters.*
42
import org.txm.annotation.urs.*
43
import org.txm.searchengine.cqp.*
44
import org.txm.searchengine.cqp.corpus.*
45
import visuAnalec.Message.*
46
import visuAnalec.donnees.*
47
import visuAnalec.elements.*
48
import visuAnalec.vue.*
49

  
50
def testClitic(def position, def frpos) {
51

  
52
   // je me sers de la forme, parce qu'il est difficile de savoir quel est le
53
   // lemme de "elle" ("il"?), de "te" ("tu"?) ou encore de "leur"
54
   def form = CQI.cpos2Str(word.getQualifiedName(), position)[0].toLowerCase()
55
   if (     form == "je" || form == "j'"
56
         || form == "tu" || form == "t'"
57
         || form == "il"
58
         || form == "elle"
59
         || form == "on"
60
         || form == "vous"
61
         || form == "nous"
62
         || form == "ils"
63
         || form == "elles" ) {
64
      return "PR_CL_S"
65
   } else if (form == "me" || form == "m'"
66
           || form == "te"
67
           || form == "le" || form == "l'"
68
           || form == "la"
69
           || form == "lui"
70
           || form == "leur"
71
           || form == "les" ) {
72
      return "PR_CL_O"
73
   } else if (form == "se" || form == "s'") {
74
      return "PR_CL_R"
75
   }
76
   return null
77

  
78
}
79

  
80
def testPhrase(def positions, def Mention) {
81

  
82
   // on doit regarder ce qui apparaît en premier:
83
   // - ce peut être un nom, comme dans `le petit chat que j'ai adopté'
84
   // - ce peut être un pronom relatif, comme dans `(le livre) dans lequel
85
   // j'ai lu cette histoire...'
86
   // NOTE: dans Democrat, on n'annote pas, bizarrement, la relative dans le
87
   // maillon, donc, dans un GN on n'a jamais de relatif inclus.  On aura donc
88
   // toujours `[le petit chat] [que] [j']ai adopté'.  Mais tout le monde
89
   // n'annote pas de la sorte...
90
   for (def i=0; i<Mention.length; i++) {
91
         def mention = Mention[i]
92
         //def form = CQI.cpos2Str(word.getQualifiedName(), positions[i])[0]
93
         if (mention == "NOM" || mention == "NAM") {
94
            return "GN"
95
         } else if (mention == "PRO:REL") {
96
            return "PR_REL"
97
         }
98
    }
99

  
100
    return null
101

  
102
}
103

  
104
def testWhPronoun(position, mention) {
105
   def form = CQI.cpos2Str(word.getQualifiedName(), position)[0]
106
   if (mention == "PRO" && (form == "qui" || form == "que" || form == "lequel")) {
107
      return "PR_WH"
108
   }
109
   return null
110

  
111
}
112

  
113
def testRules(def positions, def Mention) {
114
	def catégorie = null
115

  
116
   // a possessive (mon, ma...)
117
   if (Mention.length == 1 && Mention.contains("DET:POS"))
118
      catégorie = "POSS"
119

  
120
   // a clitic (subject: je, tu...; object: me, te; reflexive: se)
121
   if (!catégorie && Mention.length == 1 && Mention.contains("PRO:PER"))
122
      catégorie = testClitic(positions[0], Mention[0])
123

  
124
   // an interrogative pronoun
125
   if (!catégorie && Mention.length == 1)
126
      catégorie = testWhPronoun(positions[0], Mention[0])
127

  
128
   // a noun phrase or a relative pronoun
129
   if (!catégorie)
130
      catégorie = testPhrase(positions, Mention)
131

  
132
   // some other kind of pronouns
133
   if (!catégorie
134
         && (   Mention.contains("PRO")
135
             || Mention.contains("PRO:POSS")
136
             || Mention.contains("PRO:IND")
137
             || Mention.contains("PRO:DEM")
138
             || Mention.contains("PRO:PER") )
139
         && !Mention.contains("NOM")
140
         && !Mention.contains("NAM") )
141
      catégorie = "PRO"
142

  
143
// Fin des règles, aucune n'a matchée. On stocke le pattern  qu'on affichera à la fin.
144
   if (!catégorie) {
145
      catégorie = "UNDEFINED" // clear the field
146
		def forms = CQI.cpos2Str(word.getQualifiedName(), positions)
147
		if (!errors.containsKey(Mention)) errors[Mention] = new HashSet()
148
		errors[Mention] << forms
149
	}
150
	
151
	return catégorie
152
}
153

  
154
//
155
// FIN DE LA DÉFINITION DES RÈGLES
156
//
157

  
158
// CORPS DU SCRIPT
159

  
160
if (!(corpusViewSelection instanceof MainCorpus)) {
161
	println "Corpora selection is not a Corpus"
162
	return
163
}
164

  
165
// BEGINNING OF PARAMETERS
166
@Field @Option(name="unit_type", usage="", widget="String", required=true, def="MENTION")
167
def unit_type
168
@Field @Option(name="pos_property_name", usage="", widget="String", required=true, def="pos")
169
def pos_property_name
170
@Field @Option(name="reset", usage="", widget="Boolean", required=true, def="true")
171
def reset
172
if (!ParametersDialog.open(this)) return
173

  
174
corpus = corpusViewSelection
175
CQI = CQPSearchEngine.getCqiClient()
176
word = corpus.getWordProperty()
177
posProperty = corpus.getProperty(pos_property_name)
178
if (posProperty == null) {
179
	println "Error: CQP corpus does not contains the word property with name=$pos_property_name"
180
	return
181
}
182
analecCorpus = URSCorpora.getCorpus(corpus)
183
vue = URSCorpora.getVue(corpus.getName())
184
structure = analecCorpus.getStructure()
185
if (!structure.getUnites().contains(unit_type)) { // check if the structure contains the unit_type units
186
	println "Error: corpus structure does not contains unit with name=$unit_type"
187
	return
188
}
189

  
190
CATEGORIE = "CATEGORIE"
191
// Si la structure d'annotation ne contient pas CATEGORIE, on la crée avec ses valeurs
192
if (!structure.getUniteProperties(unit_type).contains(CATEGORIE)) { 
193

  
194
// FIXME: dans le script original (see also
195
// http://forge.cbp.ens-lyon.fr/redmine/issues/2065), on utilise
196
// analecCorpus.ajouterProp/Val, mais cela ne marche pas dans ma version de
197
// TXM-Analec --> je retourne donc à structure.ajouterProp/Val
198

  
199
// la propriété
200
	structure.ajouterProp(Unite.class, unit_type, CATEGORIE)
201
// les valeurs
202
	structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "GN")
203
	structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "POSS")
204
	structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "PRO")
205
	structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "PR_CL_O")
206
	structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "PR_CL_S")
207
	structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "PR_CL_R")
208
	structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "PR_REL")
209
	structure.ajouterVal(Unite.class, unit_type, CATEGORIE, "PR_WH")
210
//...
211
}
212

  
213
def nModified = 0
214
def nIgnored = 0
215

  
216
errors = new HashMap()
217
def units = analecCorpus.getUnites(unit_type)
218
units.sort() { a, b -> a.getDeb() <=> b.getDeb() ?: a.getFin() <=> b.getFin() }
219
for (Unite unit : units) { // process all units
220
	
221
	def prop = unit.getProp(CATEGORIE)
222
	if (!reset && prop != null && prop.length() > 0) continue // l'unité a déjà une CATEGORIE
223
	
224
	int[] positions = null
225
	if (unit.getDeb() == unit.getFin()) positions = [unit.getDeb()]
226
	else positions = (unit.getDeb()..unit.getFin())
227
	
228
	def Mention = CQI.cpos2Str(posProperty.getQualifiedName(), positions)
229
	def cat = testRules(positions, Mention)
230

  
231
	if (cat != null) {
232
		// following line in the original script but doesn't work for me:
233
      // vue.setValeurChamp(unit, CATEGORIE, cat)
234
      unit.getProps().put(CATEGORIE, cat)
235
		nModified++
236
	} else {
237
		nIgnored++
238
	}
239
}
240

  
241
println "Result:"
242
println "- $nModified units of type $unit_type have been modified."
243
println "- $nIgnored units of type $unit_type have not been modified.\n"
244

  
245
if (errors.size() > 0) {
246
	println "Some rules should be added to this macro to process the following remaining 'FROPOS / words' values:"
247
	errors.keySet().each { println "fropos="+it+"\twords="+errors[it].join(" | ") }
248
}
249

  
250
// udpate the view (also see also
251
// http://forge.cbp.ens-lyon.fr/redmine/issues/2065)
252
URSCorpora.getVue(analecCorpus).retablirVueParDefaut()
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/CreationChainesMacro.groovy (revision 1852)
1
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author mdecorde
4
// STANDARD DECLARATIONS
5
package org.txm.macro.urs
6

  
7
import org.kohsuke.args4j.*
8
import groovy.transform.Field
9
import org.txm.rcp.swt.widget.parameters.*
10
import org.txm.annotation.urs.*
11
import org.txm.searchengine.cqp.corpus.*
12
import visuAnalec.donnees.Structure;
13
import visuAnalec.elements.Schema
14
import visuAnalec.elements.Unite;
15
import visuAnalec.vue.Vue
16

  
17
if (!(corpusViewSelection instanceof MainCorpus)) {
18
	println "Corpora selection is not a Corpus"
19
	return;
20
}
21

  
22
// BEGINNING OF PARAMETERS
23
@Field @Option(name="unit_type",usage="", widget="String", required=true, def="MENTION")
24
String unit_type
25
if (!ParametersDialog.open(this)) return;
26

  
27
int nCreated = 0 // count the number of created CHAINE
28
int nUpdated = 0 // count the number of updated CHAINE
29

  
30
MainCorpus corpus = corpusViewSelection
31
def analecCorpus = URSCorpora.getCorpus(corpus); // analec corpus has the same name has the TXM corpus
32
Structure structure = analecCorpus.getStructure()
33
if (!structure.getUnites().contains(unit_type)) { // check if the structure contains the unit_type units
34
	println "Error: corpus structure does not contains unit with name=$unit_type"
35
	return
36
}
37
if (!structure.getSchemas().contains("CHAINE")) { // update the structure if needed
38
	println "Creating the 'CHAINE' schema in the structure"
39
	analecCorpus.ajouterType(Schema.class, "CHAINE")
40
	analecCorpus.ajouterProp(Schema.class, "CHAINE", "REF")
41
	analecCorpus.ajouterProp(Schema.class, "CHAINE", "GENRE")
42
	analecCorpus.ajouterVal(Schema.class, "CHAINE", "GENRE", "INDETERMINABLE")
43
	analecCorpus.ajouterVal(Schema.class, "CHAINE", "GENRE", "FEMININ")
44
	analecCorpus.ajouterVal(Schema.class, "CHAINE", "GENRE", "MASCULIN")
45
	analecCorpus.ajouterProp(Schema.class, "CHAINE", "NOMBRE")
46
	analecCorpus.ajouterVal(Schema.class, "CHAINE", "NOMBRE", "GROUPE_FLOU")
47
	analecCorpus.ajouterVal(Schema.class, "CHAINE", "NOMBRE", "GROUPE_STRICT")
48
	analecCorpus.ajouterVal(Schema.class, "CHAINE", "NOMBRE", "SINGULIER")
49
	analecCorpus.ajouterProp(Schema.class, "CHAINE", "NB MAILLONS")
50
	analecCorpus.ajouterProp(Schema.class, "CHAINE", "TYPE REFERENT")
51
	analecCorpus.ajouterVal(Schema.class, "CHAINE", "TYPE REFERENT", "UNKNOWN")
52
	analecCorpus.ajouterVal(Schema.class, "CHAINE", "TYPE REFERENT", "CONCRET_OBJECT")
53
	analecCorpus.ajouterVal(Schema.class, "CHAINE", "TYPE REFERENT", "ABSTRACT_OBJECT")
54
	analecCorpus.ajouterVal(Schema.class, "CHAINE", "TYPE REFERENT", "TIME")
55
	analecCorpus.ajouterVal(Schema.class, "CHAINE", "TYPE REFERENT", "PRODUCT")
56
	analecCorpus.ajouterVal(Schema.class, "CHAINE", "TYPE REFERENT", "AMOUNT")
57
	analecCorpus.ajouterVal(Schema.class, "CHAINE", "TYPE REFERENT", "EVENT")
58
	analecCorpus.ajouterVal(Schema.class, "CHAINE", "TYPE REFERENT", "GPE")
59
	analecCorpus.ajouterVal(Schema.class, "CHAINE", "TYPE REFERENT", "ORG")
60
	analecCorpus.ajouterVal(Schema.class, "CHAINE", "TYPE REFERENT", "PERSON")
61
	analecCorpus.ajouterVal(Schema.class, "CHAINE", "TYPE REFERENT", "LIEU")
62
}
63

  
64
def props = structure.getUniteProperties(unit_type)
65
String REF = "REF"
66
if (!props.contains(REF)) { // check the unit_type units have the REF property
67
	println "Error: $unit_type units have no proprerty named 'REF'"
68
	return
69
}
70

  
71
// parse the units to build CHAINES
72
def chaines = [:]
73
def units = analecCorpus.getUnites(unit_type)
74
units.sort() { a, b -> a.getDeb() <=> b.getDeb() ?: a.getFin() <=> b.getFin() }
75
for (Unite unit : units) {
76
	def ref = unit.getProp(REF)
77
	if (!chaines.containsKey(ref)) {
78
		chaines[ref] = []
79
	}
80
	chaines[ref] << unit
81
}
82

  
83
// update the already existing CHAINES schemas
84
for (Schema schema : analecCorpus.getSchemas("CHAINE")) {
85
	String ref = schema.getProp(REF)
86
	if (chaines.containsKey(ref)) { // the CHAINE exists 
87
		// maj des unités de la chaine existante
88
		int size_before = schema.getContenu().size()
89
		for (def unit : chaines[ref]) schema.ajouter(unit) // insert the new units in the hashset
90
		
91
		// Update the CHAINE size
92
		schema.props.put("NB MAILLONS", Integer.toString(schema.contenu.size())) 
93
		
94
		// remove the inserted CHAINE from 'chaines'
95
		chaines.remove(ref)
96
		if (size_before < schema.getContenu().size()) // if the size changed, then the CHAIEN have been updated
97
			nUpdated++
98
	}
99
}
100

  
101
// create the remaining CHAINES schemas
102
for (def ref : chaines.keySet()) { // process the remaining CHAINE of 'chaines'
103
	nCreated++;
104
	Schema schema = new Schema()
105
	schema.type = "CHAINE"
106
	schema.props.put("REF", ref) 
107
	schema.props.put("GENRE", "") // set default values
108
	schema.props.put("NOMBRE", "") // set default values
109
	schema.props.put("NB MAILLONS", Integer.toString(chaines[ref].size())) 
110
	schema.props.put("TYPE REFERENT", "") // set default values
111
	
112
	for (def unit : chaines[ref]) schema.ajouter(unit) // insert the new units in the hashset
113
	
114
	analecCorpus.addSchemaLu(schema)  // add the new schema
115
}
116

  
117
println "nUpdated=$nUpdated"
118
println "nCreated=$nCreated"
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/ResetAnnotationsMacro.groovy (revision 1852)
1
// STANDARD DECLARATIONS
2
package org.txm.macro.urs
3

  
4
import org.kohsuke.args4j.*
5
import groovy.transform.Field
6
import org.txm.rcp.swt.widget.parameters.*
7
import org.txm.annotation.urs.*
8
import org.txm.searchengine.cqp.corpus.*
9
import visuAnalec.elements.*
10

  
11
if (!(corpusViewSelection instanceof MainCorpus)) {
12
	println "Corpora selection is not a Corpus"
13
	return;
14
}
15

  
16
@Field @Option(name="I_AM_SURE_IWANT_TO_RESET_THE_ANNOTATIONS", usage="an example boolean", widget="Boolean", required=false, def="false")
17
def I_AM_SURE_IWANT_TO_RESET_THE_ANNOTATIONS
18

  
19
// Open the parameters input dialog box
20
if (!ParametersDialog.open(this)) return;
21

  
22
MainCorpus corpus = corpusViewSelection
23
def analecCorpus = URSCorpora.getCorpus(corpus);
24
def structure = analecCorpus.getStructure()
25

  
26
Class[] classes = [Unite.class, Relation.class, Schema.class]
27
println "Removing unites..."
28
for (String type : structure.getTypes(Unite.class)) {
29
	for (Unite unite : analecCorpus.getUnites(type).toArray(new Unite[0])) {
30
		analecCorpus.supUnite(unite)
31
	}
32
//	fireMessage(new StructureEvent(analecCorpus, TypeModifStructure.SUPPR_TYPE, Unite.class, type));
33
}
34

  
35
println "Removing relations..."
36
for (String type : structure.getTypes(Relation.class)) {
37
	for (Relation relation : analecCorpus.getRelations(type).toArray(new Relation[0])) {
38
		analecCorpus.supRelation(relation)
39
	}
40
//	fireMessage(new StructureEvent(analecCorpus, TypeModifStructure.SUPPR_TYPE, Relation.class, type));
41
}
42

  
43
println "Removing schemas..."
44
for (String type : structure.getTypes(Schema.class)) {
45
	for (Schema schema : analecCorpus.getSchemas(type).toArray(new Schema[0])) {
46
		analecCorpus.supSchema(schema)
47
	}
48
//	fireMessage(new StructureEvent(analecCorpus, TypeModifStructure.SUPPR_TYPE, Schema.class, type));
49
}
50

  
51
println "Done. Save the corpus to finish the reset."
52

  
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/RelationsMacro.groovy (revision 1852)
1
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author mdecorde
4
// @author sheiden
5
// STANDARD DECLARATIONS
6
package org.txm.macro.urs
7

  
8
import org.apache.commons.lang.StringUtils;
9
import org.apache.tools.ant.types.resources.selectors.InstanceOf;
10
import org.kohsuke.args4j.*
11

  
12
import groovy.transform.Field
13

  
14
import org.txm.Toolbox;
15
import org.txm.rcp.swt.widget.parameters.*
16
import org.txm.annotation.urs.*
17
import org.txm.searchengine.cqp.AbstractCqiClient;
18
import org.txm.searchengine.cqp.corpus.*
19

  
20
import visuAnalec.donnees.Structure;
21
import visuAnalec.elements.Relation
22
import visuAnalec.elements.Unite;
23

  
24
if (!(corpusViewSelection instanceof MainCorpus)) {
25
	println "Corpora selection is not a Corpus"
26
	return;
27
}
28

  
29
// BEGINNING OF PARAMETERS
30
@Field @Option(name="relation_type",usage="", widget="String", required=true, def="ANAPHORE")
31
String relation_type
32

  
33
if (!ParametersDialog.open(this)) return;
34

  
35
MainCorpus corpus = corpusViewSelection
36
AbstractCqiClient CQI = CQPSearchEngine.getCqiClient();
37
def word = corpus.getWordProperty()
38
def analecCorpus = URSCorpora.getCorpus(corpus);
39

  
40
int n = 1;
41
def relations = analecCorpus.getRelations(relation_type)
42
for (Relation relation : relations) {
43
	
44
	def unit1 = relation.getElt1();
45
	def unit2 = relation.getElt2();
46
	def props = relation.getProps()
47
	
48
	if (unit1 instanceof Unite && unit2 instanceof Unite) {
49
		int[] pos1 = null
50
		if (unit1.getDeb() == unit1.getFin()) pos1 = [unit1.getDeb()]
51
		else pos1 = (unit1.getDeb()..unit1.getFin())
52
		def form1 = StringUtils.join(CQI.cpos2Str(word.getQualifiedName(), pos1), " ")
53
		
54
		int[] pos2 = null
55
		if (unit2.getDeb() == unit2.getFin()) pos2 = [unit2.getDeb()]
56
		else pos2 = (unit2.getDeb()..unit2.getFin())
57
		def form2 = StringUtils.join(CQI.cpos2Str(word.getQualifiedName(), pos2), " ")
58
		
59
		println "$n - $props : $form1 -> $form2"
60
	} else {
61
		println "$n - $props"
62
	}
63
	n++
64
}
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/DensiteReferentielleMacro.groovy (revision 1852)
1
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author mdecorde
4
// @author sheiden
5
// STANDARD DECLARATIONS
6
package org.txm.macro.urs
7

  
8
import org.kohsuke.args4j.*
9
import groovy.transform.Field
10
import org.txm.rcp.swt.widget.parameters.*
11
import org.txm.annotation.urs.*
12
import org.txm.searchengine.cqp.corpus.*
13

  
14
if (!(corpusViewSelection instanceof MainCorpus)) {
15
	println "Corpora selection is not a Corpus"
16
	return;
17
}
18

  
19
@Field @Option(name="unit_type",usage="", widget="String", required=false, def="Maillon")
20
String unit_type
21

  
22
@Field @Option(name="unit_property_name", usage="", widget="String", required=false, def="")
23
String unit_property_name
24

  
25
@Field @Option(name="unit_property_value", usage="", widget="String", required=false, def=".*")
26
String unit_property_value
27

  
28
if (!ParametersDialog.open(this)) return;
29
// END OF PARAMETERS
30

  
31
MainCorpus corpus = corpusViewSelection
32
def analecCorpus = URSCorpora.getCorpus(corpus);
33

  
34
// check unit parameters
35
if (!analecCorpus.getStructure().getUnites().contains(unit_type)) {
36
	println "No unit with name=$unit_type"
37
	return;
38
} else {
39
	if (unit_property_name.length() > 0 && unit_property_value.length() > 0) {
40
		// test property existance
41
		def props = analecCorpus.getStructure().getUniteProperties(unit_type);
42
		if (!props.contains(unit_property_name)) {
43
			println "Unit $unit_type has no property named $unit_property_name"
44
			return;
45
		}
46
	}
47
}
48

  
49
int nMots = corpus.getSize();
50

  
51
int nUnites = 0;
52
	for (def unit : analecCorpus.getToutesUnites()) {
53
		if (unit_type.length() > 0) {
54
			if (!unit.getType().equals(unit_type)) {
55
				continue
56
			}
57
		}
58
		
59
		if (unit_property_name.length() > 0 && unit_property_value.length() > 0) {
60
			if (!unit.getProp(unit_property_name).matches(unit_property_value)) {
61
				// ignoring this schema
62
				continue
63
			}
64
		}
65
	
66
		nUnites++
67
	}
68

  
69
coef = (nUnites /nMots)
70
println "densité référentielle : nUnites/nMots = $nUnites/$nMots = $coef = ${coef*100}%"
71
if (nUnites >= nMots) {
72
	println "WARNING: possible encoding error. Number of units ($nUnites) is greater than number of words ($nMots)"
73
}
74
return ["result":coef, "data":["nUnites":nUnites, "nMots":nMots]]
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/UnitsMacro.groovy (revision 1852)
1
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author mdecorde
4
// @author sheiden
5
// STANDARD DECLARATIONS
6
package org.txm.macro.urs
7

  
8
import org.apache.commons.lang.StringUtils;
9
import org.kohsuke.args4j.*
10
import groovy.transform.Field
11
import org.txm.Toolbox;
12
import org.txm.rcp.swt.widget.parameters.*
13
import org.txm.annotation.urs.*
14
import org.txm.searchengine.cqp.AbstractCqiClient;
15
import org.txm.searchengine.cqp.CQPSearchEngine
16
import org.txm.searchengine.cqp.corpus.*
17
import visuAnalec.donnees.Structure;
18
import visuAnalec.elements.Unite;
19

  
20
if (!(corpusViewSelection instanceof MainCorpus)) {
21
	println "Corpora selection is not a Corpus"
22
	return;
23
}
24

  
25
// BEGINNING OF PARAMETERS
26
@Field @Option(name="unit_type",usage="", widget="String", required=true, def="MENTION")
27
String unit_type
28

  
29
if (!ParametersDialog.open(this)) return;
30

  
31
MainCorpus corpus = corpusViewSelection
32
AbstractCqiClient CQI = CQPSearchEngine.getCqiClient();
33
def word = corpus.getWordProperty()
34
def analecCorpus = URSCorpora.getCorpus(corpus);
35

  
36
int n = 1;
37
def units = analecCorpus.getUnites(unit_type)
38
units.sort() { a, b -> a.getDeb() <=> b.getDeb() ?: a.getFin() <=> b.getFin() }
... Ce différentiel a été tronqué car il excède la taille maximale pouvant être affichée.

Formats disponibles : Unified diff